1/* 2 * Copyright (C) 2005, 2006 3 * Avishay Traeger (avishay@gmail.com) 4 * Copyright (C) 2008, 2009 5 * Boaz Harrosh <ooo@electrozaur.com> 6 * 7 * Copyrights for code taken from ext2: 8 * Copyright (C) 1992, 1993, 1994, 1995 9 * Remy Card (card@masi.ibp.fr) 10 * Laboratoire MASI - Institut Blaise Pascal 11 * Universite Pierre et Marie Curie (Paris VI) 12 * from 13 * linux/fs/minix/inode.c 14 * Copyright (C) 1991, 1992 Linus Torvalds 15 * 16 * This file is part of exofs. 17 * 18 * exofs is free software; you can redistribute it and/or modify 19 * it under the terms of the GNU General Public License as published by 20 * the Free Software Foundation. Since it is based on ext2, and the only 21 * valid version of GPL for the Linux kernel is version 2, the only valid 22 * version of GPL for exofs is version 2. 23 * 24 * exofs is distributed in the hope that it will be useful, 25 * but WITHOUT ANY WARRANTY; without even the implied warranty of 26 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 27 * GNU General Public License for more details. 28 * 29 * You should have received a copy of the GNU General Public License 30 * along with exofs; if not, write to the Free Software 31 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 32 */ 33 34#include <linux/slab.h> 35 36#include "exofs.h" 37 38#define EXOFS_DBGMSG2(M...) do {} while (0) 39 40unsigned exofs_max_io_pages(struct ore_layout *layout, 41 unsigned expected_pages) 42{ 43 unsigned pages = min_t(unsigned, expected_pages, 44 layout->max_io_length / PAGE_SIZE); 45 46 return pages; 47} 48 49struct page_collect { 50 struct exofs_sb_info *sbi; 51 struct inode *inode; 52 unsigned expected_pages; 53 struct ore_io_state *ios; 54 55 struct page **pages; 56 unsigned alloc_pages; 57 unsigned nr_pages; 58 unsigned long length; 59 loff_t pg_first; /* keep 64bit also in 32-arches */ 60 bool read_4_write; /* This means two things: that the read is sync 61 * And the pages should not be unlocked. 62 */ 63 struct page *that_locked_page; 64}; 65 66static void _pcol_init(struct page_collect *pcol, unsigned expected_pages, 67 struct inode *inode) 68{ 69 struct exofs_sb_info *sbi = inode->i_sb->s_fs_info; 70 71 pcol->sbi = sbi; 72 pcol->inode = inode; 73 pcol->expected_pages = expected_pages; 74 75 pcol->ios = NULL; 76 pcol->pages = NULL; 77 pcol->alloc_pages = 0; 78 pcol->nr_pages = 0; 79 pcol->length = 0; 80 pcol->pg_first = -1; 81 pcol->read_4_write = false; 82 pcol->that_locked_page = NULL; 83} 84 85static void _pcol_reset(struct page_collect *pcol) 86{ 87 pcol->expected_pages -= min(pcol->nr_pages, pcol->expected_pages); 88 89 pcol->pages = NULL; 90 pcol->alloc_pages = 0; 91 pcol->nr_pages = 0; 92 pcol->length = 0; 93 pcol->pg_first = -1; 94 pcol->ios = NULL; 95 pcol->that_locked_page = NULL; 96 97 /* this is probably the end of the loop but in writes 98 * it might not end here. don't be left with nothing 99 */ 100 if (!pcol->expected_pages) 101 pcol->expected_pages = 102 exofs_max_io_pages(&pcol->sbi->layout, ~0); 103} 104 105static int pcol_try_alloc(struct page_collect *pcol) 106{ 107 unsigned pages; 108 109 /* TODO: easily support bio chaining */ 110 pages = exofs_max_io_pages(&pcol->sbi->layout, pcol->expected_pages); 111 112 for (; pages; pages >>= 1) { 113 pcol->pages = kmalloc(pages * sizeof(struct page *), 114 GFP_KERNEL); 115 if (likely(pcol->pages)) { 116 pcol->alloc_pages = pages; 117 return 0; 118 } 119 } 120 121 EXOFS_ERR("Failed to kmalloc expected_pages=%u\n", 122 pcol->expected_pages); 123 return -ENOMEM; 124} 125 126static void pcol_free(struct page_collect *pcol) 127{ 128 kfree(pcol->pages); 129 pcol->pages = NULL; 130 131 if (pcol->ios) { 132 ore_put_io_state(pcol->ios); 133 pcol->ios = NULL; 134 } 135} 136 137static int pcol_add_page(struct page_collect *pcol, struct page *page, 138 unsigned len) 139{ 140 if (unlikely(pcol->nr_pages >= pcol->alloc_pages)) 141 return -ENOMEM; 142 143 pcol->pages[pcol->nr_pages++] = page; 144 pcol->length += len; 145 return 0; 146} 147 148enum {PAGE_WAS_NOT_IN_IO = 17}; 149static int update_read_page(struct page *page, int ret) 150{ 151 switch (ret) { 152 case 0: 153 /* Everything is OK */ 154 SetPageUptodate(page); 155 if (PageError(page)) 156 ClearPageError(page); 157 break; 158 case -EFAULT: 159 /* In this case we were trying to read something that wasn't on 160 * disk yet - return a page full of zeroes. This should be OK, 161 * because the object should be empty (if there was a write 162 * before this read, the read would be waiting with the page 163 * locked */ 164 clear_highpage(page); 165 166 SetPageUptodate(page); 167 if (PageError(page)) 168 ClearPageError(page); 169 EXOFS_DBGMSG("recovered read error\n"); 170 /* fall through */ 171 case PAGE_WAS_NOT_IN_IO: 172 ret = 0; /* recovered error */ 173 break; 174 default: 175 SetPageError(page); 176 } 177 return ret; 178} 179 180static void update_write_page(struct page *page, int ret) 181{ 182 if (unlikely(ret == PAGE_WAS_NOT_IN_IO)) 183 return; /* don't pass start don't collect $200 */ 184 185 if (ret) { 186 mapping_set_error(page->mapping, ret); 187 SetPageError(page); 188 } 189 end_page_writeback(page); 190} 191 192/* Called at the end of reads, to optionally unlock pages and update their 193 * status. 194 */ 195static int __readpages_done(struct page_collect *pcol) 196{ 197 int i; 198 u64 good_bytes; 199 u64 length = 0; 200 int ret = ore_check_io(pcol->ios, NULL); 201 202 if (likely(!ret)) { 203 good_bytes = pcol->length; 204 ret = PAGE_WAS_NOT_IN_IO; 205 } else { 206 good_bytes = 0; 207 } 208 209 EXOFS_DBGMSG2("readpages_done(0x%lx) good_bytes=0x%llx" 210 " length=0x%lx nr_pages=%u\n", 211 pcol->inode->i_ino, _LLU(good_bytes), pcol->length, 212 pcol->nr_pages); 213 214 for (i = 0; i < pcol->nr_pages; i++) { 215 struct page *page = pcol->pages[i]; 216 struct inode *inode = page->mapping->host; 217 int page_stat; 218 219 if (inode != pcol->inode) 220 continue; /* osd might add more pages at end */ 221 222 if (likely(length < good_bytes)) 223 page_stat = 0; 224 else 225 page_stat = ret; 226 227 EXOFS_DBGMSG2(" readpages_done(0x%lx, 0x%lx) %s\n", 228 inode->i_ino, page->index, 229 page_stat ? "bad_bytes" : "good_bytes"); 230 231 ret = update_read_page(page, page_stat); 232 if (!pcol->read_4_write) 233 unlock_page(page); 234 length += PAGE_SIZE; 235 } 236 237 pcol_free(pcol); 238 EXOFS_DBGMSG2("readpages_done END\n"); 239 return ret; 240} 241 242/* callback of async reads */ 243static void readpages_done(struct ore_io_state *ios, void *p) 244{ 245 struct page_collect *pcol = p; 246 247 __readpages_done(pcol); 248 atomic_dec(&pcol->sbi->s_curr_pending); 249 kfree(pcol); 250} 251 252static void _unlock_pcol_pages(struct page_collect *pcol, int ret, int rw) 253{ 254 int i; 255 256 for (i = 0; i < pcol->nr_pages; i++) { 257 struct page *page = pcol->pages[i]; 258 259 if (rw == READ) 260 update_read_page(page, ret); 261 else 262 update_write_page(page, ret); 263 264 unlock_page(page); 265 } 266} 267 268static int _maybe_not_all_in_one_io(struct ore_io_state *ios, 269 struct page_collect *pcol_src, struct page_collect *pcol) 270{ 271 /* length was wrong or offset was not page aligned */ 272 BUG_ON(pcol_src->nr_pages < ios->nr_pages); 273 274 if (pcol_src->nr_pages > ios->nr_pages) { 275 struct page **src_page; 276 unsigned pages_less = pcol_src->nr_pages - ios->nr_pages; 277 unsigned long len_less = pcol_src->length - ios->length; 278 unsigned i; 279 int ret; 280 281 /* This IO was trimmed */ 282 pcol_src->nr_pages = ios->nr_pages; 283 pcol_src->length = ios->length; 284 285 /* Left over pages are passed to the next io */ 286 pcol->expected_pages += pages_less; 287 pcol->nr_pages = pages_less; 288 pcol->length = len_less; 289 src_page = pcol_src->pages + pcol_src->nr_pages; 290 pcol->pg_first = (*src_page)->index; 291 292 ret = pcol_try_alloc(pcol); 293 if (unlikely(ret)) 294 return ret; 295 296 for (i = 0; i < pages_less; ++i) 297 pcol->pages[i] = *src_page++; 298 299 EXOFS_DBGMSG("Length was adjusted nr_pages=0x%x " 300 "pages_less=0x%x expected_pages=0x%x " 301 "next_offset=0x%llx next_len=0x%lx\n", 302 pcol_src->nr_pages, pages_less, pcol->expected_pages, 303 pcol->pg_first * PAGE_SIZE, pcol->length); 304 } 305 return 0; 306} 307 308static int read_exec(struct page_collect *pcol) 309{ 310 struct exofs_i_info *oi = exofs_i(pcol->inode); 311 struct ore_io_state *ios; 312 struct page_collect *pcol_copy = NULL; 313 int ret; 314 315 if (!pcol->pages) 316 return 0; 317 318 if (!pcol->ios) { 319 int ret = ore_get_rw_state(&pcol->sbi->layout, &oi->oc, true, 320 pcol->pg_first << PAGE_CACHE_SHIFT, 321 pcol->length, &pcol->ios); 322 323 if (ret) 324 return ret; 325 } 326 327 ios = pcol->ios; 328 ios->pages = pcol->pages; 329 330 if (pcol->read_4_write) { 331 ore_read(pcol->ios); 332 return __readpages_done(pcol); 333 } 334 335 pcol_copy = kmalloc(sizeof(*pcol_copy), GFP_KERNEL); 336 if (!pcol_copy) { 337 ret = -ENOMEM; 338 goto err; 339 } 340 341 *pcol_copy = *pcol; 342 ios->done = readpages_done; 343 ios->private = pcol_copy; 344 345 /* pages ownership was passed to pcol_copy */ 346 _pcol_reset(pcol); 347 348 ret = _maybe_not_all_in_one_io(ios, pcol_copy, pcol); 349 if (unlikely(ret)) 350 goto err; 351 352 EXOFS_DBGMSG2("read_exec(0x%lx) offset=0x%llx length=0x%llx\n", 353 pcol->inode->i_ino, _LLU(ios->offset), _LLU(ios->length)); 354 355 ret = ore_read(ios); 356 if (unlikely(ret)) 357 goto err; 358 359 atomic_inc(&pcol->sbi->s_curr_pending); 360 361 return 0; 362 363err: 364 if (!pcol_copy) /* Failed before ownership transfer */ 365 pcol_copy = pcol; 366 _unlock_pcol_pages(pcol_copy, ret, READ); 367 pcol_free(pcol_copy); 368 kfree(pcol_copy); 369 370 return ret; 371} 372 373/* readpage_strip is called either directly from readpage() or by the VFS from 374 * within read_cache_pages(), to add one more page to be read. It will try to 375 * collect as many contiguous pages as posible. If a discontinuity is 376 * encountered, or it runs out of resources, it will submit the previous segment 377 * and will start a new collection. Eventually caller must submit the last 378 * segment if present. 379 */ 380static int readpage_strip(void *data, struct page *page) 381{ 382 struct page_collect *pcol = data; 383 struct inode *inode = pcol->inode; 384 struct exofs_i_info *oi = exofs_i(inode); 385 loff_t i_size = i_size_read(inode); 386 pgoff_t end_index = i_size >> PAGE_CACHE_SHIFT; 387 size_t len; 388 int ret; 389 390 BUG_ON(!PageLocked(page)); 391 392 /* FIXME: Just for debugging, will be removed */ 393 if (PageUptodate(page)) 394 EXOFS_ERR("PageUptodate(0x%lx, 0x%lx)\n", pcol->inode->i_ino, 395 page->index); 396 397 pcol->that_locked_page = page; 398 399 if (page->index < end_index) 400 len = PAGE_CACHE_SIZE; 401 else if (page->index == end_index) 402 len = i_size & ~PAGE_CACHE_MASK; 403 else 404 len = 0; 405 406 if (!len || !obj_created(oi)) { 407 /* this will be out of bounds, or doesn't exist yet. 408 * Current page is cleared and the request is split 409 */ 410 clear_highpage(page); 411 412 SetPageUptodate(page); 413 if (PageError(page)) 414 ClearPageError(page); 415 416 if (!pcol->read_4_write) 417 unlock_page(page); 418 EXOFS_DBGMSG("readpage_strip(0x%lx) empty page len=%zx " 419 "read_4_write=%d index=0x%lx end_index=0x%lx " 420 "splitting\n", inode->i_ino, len, 421 pcol->read_4_write, page->index, end_index); 422 423 return read_exec(pcol); 424 } 425 426try_again: 427 428 if (unlikely(pcol->pg_first == -1)) { 429 pcol->pg_first = page->index; 430 } else if (unlikely((pcol->pg_first + pcol->nr_pages) != 431 page->index)) { 432 /* Discontinuity detected, split the request */ 433 ret = read_exec(pcol); 434 if (unlikely(ret)) 435 goto fail; 436 goto try_again; 437 } 438 439 if (!pcol->pages) { 440 ret = pcol_try_alloc(pcol); 441 if (unlikely(ret)) 442 goto fail; 443 } 444 445 if (len != PAGE_CACHE_SIZE) 446 zero_user(page, len, PAGE_CACHE_SIZE - len); 447 448 EXOFS_DBGMSG2(" readpage_strip(0x%lx, 0x%lx) len=0x%zx\n", 449 inode->i_ino, page->index, len); 450 451 ret = pcol_add_page(pcol, page, len); 452 if (ret) { 453 EXOFS_DBGMSG2("Failed pcol_add_page pages[i]=%p " 454 "this_len=0x%zx nr_pages=%u length=0x%lx\n", 455 page, len, pcol->nr_pages, pcol->length); 456 457 /* split the request, and start again with current page */ 458 ret = read_exec(pcol); 459 if (unlikely(ret)) 460 goto fail; 461 462 goto try_again; 463 } 464 465 return 0; 466 467fail: 468 /* SetPageError(page); ??? */ 469 unlock_page(page); 470 return ret; 471} 472 473static int exofs_readpages(struct file *file, struct address_space *mapping, 474 struct list_head *pages, unsigned nr_pages) 475{ 476 struct page_collect pcol; 477 int ret; 478 479 _pcol_init(&pcol, nr_pages, mapping->host); 480 481 ret = read_cache_pages(mapping, pages, readpage_strip, &pcol); 482 if (ret) { 483 EXOFS_ERR("read_cache_pages => %d\n", ret); 484 return ret; 485 } 486 487 ret = read_exec(&pcol); 488 if (unlikely(ret)) 489 return ret; 490 491 return read_exec(&pcol); 492} 493 494static int _readpage(struct page *page, bool read_4_write) 495{ 496 struct page_collect pcol; 497 int ret; 498 499 _pcol_init(&pcol, 1, page->mapping->host); 500 501 pcol.read_4_write = read_4_write; 502 ret = readpage_strip(&pcol, page); 503 if (ret) { 504 EXOFS_ERR("_readpage => %d\n", ret); 505 return ret; 506 } 507 508 return read_exec(&pcol); 509} 510 511/* 512 * We don't need the file 513 */ 514static int exofs_readpage(struct file *file, struct page *page) 515{ 516 return _readpage(page, false); 517} 518 519/* Callback for osd_write. All writes are asynchronous */ 520static void writepages_done(struct ore_io_state *ios, void *p) 521{ 522 struct page_collect *pcol = p; 523 int i; 524 u64 good_bytes; 525 u64 length = 0; 526 int ret = ore_check_io(ios, NULL); 527 528 atomic_dec(&pcol->sbi->s_curr_pending); 529 530 if (likely(!ret)) { 531 good_bytes = pcol->length; 532 ret = PAGE_WAS_NOT_IN_IO; 533 } else { 534 good_bytes = 0; 535 } 536 537 EXOFS_DBGMSG2("writepages_done(0x%lx) good_bytes=0x%llx" 538 " length=0x%lx nr_pages=%u\n", 539 pcol->inode->i_ino, _LLU(good_bytes), pcol->length, 540 pcol->nr_pages); 541 542 for (i = 0; i < pcol->nr_pages; i++) { 543 struct page *page = pcol->pages[i]; 544 struct inode *inode = page->mapping->host; 545 int page_stat; 546 547 if (inode != pcol->inode) 548 continue; /* osd might add more pages to a bio */ 549 550 if (likely(length < good_bytes)) 551 page_stat = 0; 552 else 553 page_stat = ret; 554 555 update_write_page(page, page_stat); 556 unlock_page(page); 557 EXOFS_DBGMSG2(" writepages_done(0x%lx, 0x%lx) status=%d\n", 558 inode->i_ino, page->index, page_stat); 559 560 length += PAGE_SIZE; 561 } 562 563 pcol_free(pcol); 564 kfree(pcol); 565 EXOFS_DBGMSG2("writepages_done END\n"); 566} 567 568static struct page *__r4w_get_page(void *priv, u64 offset, bool *uptodate) 569{ 570 struct page_collect *pcol = priv; 571 pgoff_t index = offset / PAGE_SIZE; 572 573 if (!pcol->that_locked_page || 574 (pcol->that_locked_page->index != index)) { 575 struct page *page; 576 loff_t i_size = i_size_read(pcol->inode); 577 578 if (offset >= i_size) { 579 *uptodate = true; 580 EXOFS_DBGMSG2("offset >= i_size index=0x%lx\n", index); 581 return ZERO_PAGE(0); 582 } 583 584 page = find_get_page(pcol->inode->i_mapping, index); 585 if (!page) { 586 page = find_or_create_page(pcol->inode->i_mapping, 587 index, GFP_NOFS); 588 if (unlikely(!page)) { 589 EXOFS_DBGMSG("grab_cache_page Failed " 590 "index=0x%llx\n", _LLU(index)); 591 return NULL; 592 } 593 unlock_page(page); 594 } 595 if (PageDirty(page) || PageWriteback(page)) 596 *uptodate = true; 597 else 598 *uptodate = PageUptodate(page); 599 EXOFS_DBGMSG2("index=0x%lx uptodate=%d\n", index, *uptodate); 600 return page; 601 } else { 602 EXOFS_DBGMSG2("YES that_locked_page index=0x%lx\n", 603 pcol->that_locked_page->index); 604 *uptodate = true; 605 return pcol->that_locked_page; 606 } 607} 608 609static void __r4w_put_page(void *priv, struct page *page) 610{ 611 struct page_collect *pcol = priv; 612 613 if ((pcol->that_locked_page != page) && (ZERO_PAGE(0) != page)) { 614 EXOFS_DBGMSG2("index=0x%lx\n", page->index); 615 page_cache_release(page); 616 return; 617 } 618 EXOFS_DBGMSG2("that_locked_page index=0x%lx\n", 619 ZERO_PAGE(0) == page ? -1 : page->index); 620} 621 622static const struct _ore_r4w_op _r4w_op = { 623 .get_page = &__r4w_get_page, 624 .put_page = &__r4w_put_page, 625}; 626 627static int write_exec(struct page_collect *pcol) 628{ 629 struct exofs_i_info *oi = exofs_i(pcol->inode); 630 struct ore_io_state *ios; 631 struct page_collect *pcol_copy = NULL; 632 int ret; 633 634 if (!pcol->pages) 635 return 0; 636 637 BUG_ON(pcol->ios); 638 ret = ore_get_rw_state(&pcol->sbi->layout, &oi->oc, false, 639 pcol->pg_first << PAGE_CACHE_SHIFT, 640 pcol->length, &pcol->ios); 641 if (unlikely(ret)) 642 goto err; 643 644 pcol_copy = kmalloc(sizeof(*pcol_copy), GFP_KERNEL); 645 if (!pcol_copy) { 646 EXOFS_ERR("write_exec: Failed to kmalloc(pcol)\n"); 647 ret = -ENOMEM; 648 goto err; 649 } 650 651 *pcol_copy = *pcol; 652 653 ios = pcol->ios; 654 ios->pages = pcol_copy->pages; 655 ios->done = writepages_done; 656 ios->r4w = &_r4w_op; 657 ios->private = pcol_copy; 658 659 /* pages ownership was passed to pcol_copy */ 660 _pcol_reset(pcol); 661 662 ret = _maybe_not_all_in_one_io(ios, pcol_copy, pcol); 663 if (unlikely(ret)) 664 goto err; 665 666 EXOFS_DBGMSG2("write_exec(0x%lx) offset=0x%llx length=0x%llx\n", 667 pcol->inode->i_ino, _LLU(ios->offset), _LLU(ios->length)); 668 669 ret = ore_write(ios); 670 if (unlikely(ret)) { 671 EXOFS_ERR("write_exec: ore_write() Failed\n"); 672 goto err; 673 } 674 675 atomic_inc(&pcol->sbi->s_curr_pending); 676 return 0; 677 678err: 679 if (!pcol_copy) /* Failed before ownership transfer */ 680 pcol_copy = pcol; 681 _unlock_pcol_pages(pcol_copy, ret, WRITE); 682 pcol_free(pcol_copy); 683 kfree(pcol_copy); 684 685 return ret; 686} 687 688/* writepage_strip is called either directly from writepage() or by the VFS from 689 * within write_cache_pages(), to add one more page to be written to storage. 690 * It will try to collect as many contiguous pages as possible. If a 691 * discontinuity is encountered or it runs out of resources it will submit the 692 * previous segment and will start a new collection. 693 * Eventually caller must submit the last segment if present. 694 */ 695static int writepage_strip(struct page *page, 696 struct writeback_control *wbc_unused, void *data) 697{ 698 struct page_collect *pcol = data; 699 struct inode *inode = pcol->inode; 700 struct exofs_i_info *oi = exofs_i(inode); 701 loff_t i_size = i_size_read(inode); 702 pgoff_t end_index = i_size >> PAGE_CACHE_SHIFT; 703 size_t len; 704 int ret; 705 706 BUG_ON(!PageLocked(page)); 707 708 ret = wait_obj_created(oi); 709 if (unlikely(ret)) 710 goto fail; 711 712 if (page->index < end_index) 713 /* in this case, the page is within the limits of the file */ 714 len = PAGE_CACHE_SIZE; 715 else { 716 len = i_size & ~PAGE_CACHE_MASK; 717 718 if (page->index > end_index || !len) { 719 /* in this case, the page is outside the limits 720 * (truncate in progress) 721 */ 722 ret = write_exec(pcol); 723 if (unlikely(ret)) 724 goto fail; 725 if (PageError(page)) 726 ClearPageError(page); 727 unlock_page(page); 728 EXOFS_DBGMSG("writepage_strip(0x%lx, 0x%lx) " 729 "outside the limits\n", 730 inode->i_ino, page->index); 731 return 0; 732 } 733 } 734 735try_again: 736 737 if (unlikely(pcol->pg_first == -1)) { 738 pcol->pg_first = page->index; 739 } else if (unlikely((pcol->pg_first + pcol->nr_pages) != 740 page->index)) { 741 /* Discontinuity detected, split the request */ 742 ret = write_exec(pcol); 743 if (unlikely(ret)) 744 goto fail; 745 746 EXOFS_DBGMSG("writepage_strip(0x%lx, 0x%lx) Discontinuity\n", 747 inode->i_ino, page->index); 748 goto try_again; 749 } 750 751 if (!pcol->pages) { 752 ret = pcol_try_alloc(pcol); 753 if (unlikely(ret)) 754 goto fail; 755 } 756 757 EXOFS_DBGMSG2(" writepage_strip(0x%lx, 0x%lx) len=0x%zx\n", 758 inode->i_ino, page->index, len); 759 760 ret = pcol_add_page(pcol, page, len); 761 if (unlikely(ret)) { 762 EXOFS_DBGMSG2("Failed pcol_add_page " 763 "nr_pages=%u total_length=0x%lx\n", 764 pcol->nr_pages, pcol->length); 765 766 /* split the request, next loop will start again */ 767 ret = write_exec(pcol); 768 if (unlikely(ret)) { 769 EXOFS_DBGMSG("write_exec failed => %d", ret); 770 goto fail; 771 } 772 773 goto try_again; 774 } 775 776 BUG_ON(PageWriteback(page)); 777 set_page_writeback(page); 778 779 return 0; 780 781fail: 782 EXOFS_DBGMSG("Error: writepage_strip(0x%lx, 0x%lx)=>%d\n", 783 inode->i_ino, page->index, ret); 784 set_bit(AS_EIO, &page->mapping->flags); 785 unlock_page(page); 786 return ret; 787} 788 789static int exofs_writepages(struct address_space *mapping, 790 struct writeback_control *wbc) 791{ 792 struct page_collect pcol; 793 long start, end, expected_pages; 794 int ret; 795 796 start = wbc->range_start >> PAGE_CACHE_SHIFT; 797 end = (wbc->range_end == LLONG_MAX) ? 798 start + mapping->nrpages : 799 wbc->range_end >> PAGE_CACHE_SHIFT; 800 801 if (start || end) 802 expected_pages = end - start + 1; 803 else 804 expected_pages = mapping->nrpages; 805 806 if (expected_pages < 32L) 807 expected_pages = 32L; 808 809 EXOFS_DBGMSG2("inode(0x%lx) wbc->start=0x%llx wbc->end=0x%llx " 810 "nrpages=%lu start=0x%lx end=0x%lx expected_pages=%ld\n", 811 mapping->host->i_ino, wbc->range_start, wbc->range_end, 812 mapping->nrpages, start, end, expected_pages); 813 814 _pcol_init(&pcol, expected_pages, mapping->host); 815 816 ret = write_cache_pages(mapping, wbc, writepage_strip, &pcol); 817 if (unlikely(ret)) { 818 EXOFS_ERR("write_cache_pages => %d\n", ret); 819 return ret; 820 } 821 822 ret = write_exec(&pcol); 823 if (unlikely(ret)) 824 return ret; 825 826 if (wbc->sync_mode == WB_SYNC_ALL) { 827 return write_exec(&pcol); /* pump the last reminder */ 828 } else if (pcol.nr_pages) { 829 /* not SYNC let the reminder join the next writeout */ 830 unsigned i; 831 832 for (i = 0; i < pcol.nr_pages; i++) { 833 struct page *page = pcol.pages[i]; 834 835 end_page_writeback(page); 836 set_page_dirty(page); 837 unlock_page(page); 838 } 839 } 840 return 0; 841} 842 843/* 844static int exofs_writepage(struct page *page, struct writeback_control *wbc) 845{ 846 struct page_collect pcol; 847 int ret; 848 849 _pcol_init(&pcol, 1, page->mapping->host); 850 851 ret = writepage_strip(page, NULL, &pcol); 852 if (ret) { 853 EXOFS_ERR("exofs_writepage => %d\n", ret); 854 return ret; 855 } 856 857 return write_exec(&pcol); 858} 859*/ 860/* i_mutex held using inode->i_size directly */ 861static void _write_failed(struct inode *inode, loff_t to) 862{ 863 if (to > inode->i_size) 864 truncate_pagecache(inode, inode->i_size); 865} 866 867int exofs_write_begin(struct file *file, struct address_space *mapping, 868 loff_t pos, unsigned len, unsigned flags, 869 struct page **pagep, void **fsdata) 870{ 871 int ret = 0; 872 struct page *page; 873 874 page = *pagep; 875 if (page == NULL) { 876 ret = simple_write_begin(file, mapping, pos, len, flags, pagep, 877 fsdata); 878 if (ret) { 879 EXOFS_DBGMSG("simple_write_begin failed\n"); 880 goto out; 881 } 882 883 page = *pagep; 884 } 885 886 /* read modify write */ 887 if (!PageUptodate(page) && (len != PAGE_CACHE_SIZE)) { 888 loff_t i_size = i_size_read(mapping->host); 889 pgoff_t end_index = i_size >> PAGE_CACHE_SHIFT; 890 size_t rlen; 891 892 if (page->index < end_index) 893 rlen = PAGE_CACHE_SIZE; 894 else if (page->index == end_index) 895 rlen = i_size & ~PAGE_CACHE_MASK; 896 else 897 rlen = 0; 898 899 if (!rlen) { 900 clear_highpage(page); 901 SetPageUptodate(page); 902 goto out; 903 } 904 905 ret = _readpage(page, true); 906 if (ret) { 907 /*SetPageError was done by _readpage. Is it ok?*/ 908 unlock_page(page); 909 EXOFS_DBGMSG("__readpage failed\n"); 910 } 911 } 912out: 913 if (unlikely(ret)) 914 _write_failed(mapping->host, pos + len); 915 916 return ret; 917} 918 919static int exofs_write_begin_export(struct file *file, 920 struct address_space *mapping, 921 loff_t pos, unsigned len, unsigned flags, 922 struct page **pagep, void **fsdata) 923{ 924 *pagep = NULL; 925 926 return exofs_write_begin(file, mapping, pos, len, flags, pagep, 927 fsdata); 928} 929 930static int exofs_write_end(struct file *file, struct address_space *mapping, 931 loff_t pos, unsigned len, unsigned copied, 932 struct page *page, void *fsdata) 933{ 934 struct inode *inode = mapping->host; 935 /* According to comment in simple_write_end i_mutex is held */ 936 loff_t i_size = inode->i_size; 937 int ret; 938 939 ret = simple_write_end(file, mapping,pos, len, copied, page, fsdata); 940 if (unlikely(ret)) 941 _write_failed(inode, pos + len); 942 943 /* TODO: once simple_write_end marks inode dirty remove */ 944 if (i_size != inode->i_size) 945 mark_inode_dirty(inode); 946 return ret; 947} 948 949static int exofs_releasepage(struct page *page, gfp_t gfp) 950{ 951 EXOFS_DBGMSG("page 0x%lx\n", page->index); 952 WARN_ON(1); 953 return 0; 954} 955 956static void exofs_invalidatepage(struct page *page, unsigned int offset, 957 unsigned int length) 958{ 959 EXOFS_DBGMSG("page 0x%lx offset 0x%x length 0x%x\n", 960 page->index, offset, length); 961 WARN_ON(1); 962} 963 964 965 /* TODO: Should be easy enough to do proprly */ 966static ssize_t exofs_direct_IO(struct kiocb *iocb, struct iov_iter *iter, 967 loff_t offset) 968{ 969 return 0; 970} 971 972const struct address_space_operations exofs_aops = { 973 .readpage = exofs_readpage, 974 .readpages = exofs_readpages, 975 .writepage = NULL, 976 .writepages = exofs_writepages, 977 .write_begin = exofs_write_begin_export, 978 .write_end = exofs_write_end, 979 .releasepage = exofs_releasepage, 980 .set_page_dirty = __set_page_dirty_nobuffers, 981 .invalidatepage = exofs_invalidatepage, 982 983 /* Not implemented Yet */ 984 .bmap = NULL, /* TODO: use osd's OSD_ACT_READ_MAP */ 985 .direct_IO = exofs_direct_IO, 986 987 /* With these NULL has special meaning or default is not exported */ 988 .migratepage = NULL, 989 .launder_page = NULL, 990 .is_partially_uptodate = NULL, 991 .error_remove_page = NULL, 992}; 993 994/****************************************************************************** 995 * INODE OPERATIONS 996 *****************************************************************************/ 997 998/* 999 * Test whether an inode is a fast symlink. 1000 */ 1001static inline int exofs_inode_is_fast_symlink(struct inode *inode) 1002{ 1003 struct exofs_i_info *oi = exofs_i(inode); 1004 1005 return S_ISLNK(inode->i_mode) && (oi->i_data[0] != 0); 1006} 1007 1008static int _do_truncate(struct inode *inode, loff_t newsize) 1009{ 1010 struct exofs_i_info *oi = exofs_i(inode); 1011 struct exofs_sb_info *sbi = inode->i_sb->s_fs_info; 1012 int ret; 1013 1014 inode->i_mtime = inode->i_ctime = CURRENT_TIME; 1015 1016 ret = ore_truncate(&sbi->layout, &oi->oc, (u64)newsize); 1017 if (likely(!ret)) 1018 truncate_setsize(inode, newsize); 1019 1020 EXOFS_DBGMSG2("(0x%lx) size=0x%llx ret=>%d\n", 1021 inode->i_ino, newsize, ret); 1022 return ret; 1023} 1024 1025/* 1026 * Set inode attributes - update size attribute on OSD if needed, 1027 * otherwise just call generic functions. 1028 */ 1029int exofs_setattr(struct dentry *dentry, struct iattr *iattr) 1030{ 1031 struct inode *inode = d_inode(dentry); 1032 int error; 1033 1034 /* if we are about to modify an object, and it hasn't been 1035 * created yet, wait 1036 */ 1037 error = wait_obj_created(exofs_i(inode)); 1038 if (unlikely(error)) 1039 return error; 1040 1041 error = inode_change_ok(inode, iattr); 1042 if (unlikely(error)) 1043 return error; 1044 1045 if ((iattr->ia_valid & ATTR_SIZE) && 1046 iattr->ia_size != i_size_read(inode)) { 1047 error = _do_truncate(inode, iattr->ia_size); 1048 if (unlikely(error)) 1049 return error; 1050 } 1051 1052 setattr_copy(inode, iattr); 1053 mark_inode_dirty(inode); 1054 return 0; 1055} 1056 1057static const struct osd_attr g_attr_inode_file_layout = ATTR_DEF( 1058 EXOFS_APAGE_FS_DATA, 1059 EXOFS_ATTR_INODE_FILE_LAYOUT, 1060 0); 1061static const struct osd_attr g_attr_inode_dir_layout = ATTR_DEF( 1062 EXOFS_APAGE_FS_DATA, 1063 EXOFS_ATTR_INODE_DIR_LAYOUT, 1064 0); 1065 1066/* 1067 * Read the Linux inode info from the OSD, and return it as is. In exofs the 1068 * inode info is in an application specific page/attribute of the osd-object. 1069 */ 1070static int exofs_get_inode(struct super_block *sb, struct exofs_i_info *oi, 1071 struct exofs_fcb *inode) 1072{ 1073 struct exofs_sb_info *sbi = sb->s_fs_info; 1074 struct osd_attr attrs[] = { 1075 [0] = g_attr_inode_data, 1076 [1] = g_attr_inode_file_layout, 1077 [2] = g_attr_inode_dir_layout, 1078 }; 1079 struct ore_io_state *ios; 1080 struct exofs_on_disk_inode_layout *layout; 1081 int ret; 1082 1083 ret = ore_get_io_state(&sbi->layout, &oi->oc, &ios); 1084 if (unlikely(ret)) { 1085 EXOFS_ERR("%s: ore_get_io_state failed.\n", __func__); 1086 return ret; 1087 } 1088 1089 attrs[1].len = exofs_on_disk_inode_layout_size(sbi->oc.numdevs); 1090 attrs[2].len = exofs_on_disk_inode_layout_size(sbi->oc.numdevs); 1091 1092 ios->in_attr = attrs; 1093 ios->in_attr_len = ARRAY_SIZE(attrs); 1094 1095 ret = ore_read(ios); 1096 if (unlikely(ret)) { 1097 EXOFS_ERR("object(0x%llx) corrupted, return empty file=>%d\n", 1098 _LLU(oi->one_comp.obj.id), ret); 1099 memset(inode, 0, sizeof(*inode)); 1100 inode->i_mode = 0040000 | (0777 & ~022); 1101 /* If object is lost on target we might as well enable it's 1102 * delete. 1103 */ 1104 ret = 0; 1105 goto out; 1106 } 1107 1108 ret = extract_attr_from_ios(ios, &attrs[0]); 1109 if (ret) { 1110 EXOFS_ERR("%s: extract_attr 0 of inode failed\n", __func__); 1111 goto out; 1112 } 1113 WARN_ON(attrs[0].len != EXOFS_INO_ATTR_SIZE); 1114 memcpy(inode, attrs[0].val_ptr, EXOFS_INO_ATTR_SIZE); 1115 1116 ret = extract_attr_from_ios(ios, &attrs[1]); 1117 if (ret) { 1118 EXOFS_ERR("%s: extract_attr 1 of inode failed\n", __func__); 1119 goto out; 1120 } 1121 if (attrs[1].len) { 1122 layout = attrs[1].val_ptr; 1123 if (layout->gen_func != cpu_to_le16(LAYOUT_MOVING_WINDOW)) { 1124 EXOFS_ERR("%s: unsupported files layout %d\n", 1125 __func__, layout->gen_func); 1126 ret = -ENOTSUPP; 1127 goto out; 1128 } 1129 } 1130 1131 ret = extract_attr_from_ios(ios, &attrs[2]); 1132 if (ret) { 1133 EXOFS_ERR("%s: extract_attr 2 of inode failed\n", __func__); 1134 goto out; 1135 } 1136 if (attrs[2].len) { 1137 layout = attrs[2].val_ptr; 1138 if (layout->gen_func != cpu_to_le16(LAYOUT_MOVING_WINDOW)) { 1139 EXOFS_ERR("%s: unsupported meta-data layout %d\n", 1140 __func__, layout->gen_func); 1141 ret = -ENOTSUPP; 1142 goto out; 1143 } 1144 } 1145 1146out: 1147 ore_put_io_state(ios); 1148 return ret; 1149} 1150 1151static void __oi_init(struct exofs_i_info *oi) 1152{ 1153 init_waitqueue_head(&oi->i_wq); 1154 oi->i_flags = 0; 1155} 1156/* 1157 * Fill in an inode read from the OSD and set it up for use 1158 */ 1159struct inode *exofs_iget(struct super_block *sb, unsigned long ino) 1160{ 1161 struct exofs_i_info *oi; 1162 struct exofs_fcb fcb; 1163 struct inode *inode; 1164 int ret; 1165 1166 inode = iget_locked(sb, ino); 1167 if (!inode) 1168 return ERR_PTR(-ENOMEM); 1169 if (!(inode->i_state & I_NEW)) 1170 return inode; 1171 oi = exofs_i(inode); 1172 __oi_init(oi); 1173 exofs_init_comps(&oi->oc, &oi->one_comp, sb->s_fs_info, 1174 exofs_oi_objno(oi)); 1175 1176 /* read the inode from the osd */ 1177 ret = exofs_get_inode(sb, oi, &fcb); 1178 if (ret) 1179 goto bad_inode; 1180 1181 set_obj_created(oi); 1182 1183 /* copy stuff from on-disk struct to in-memory struct */ 1184 inode->i_mode = le16_to_cpu(fcb.i_mode); 1185 i_uid_write(inode, le32_to_cpu(fcb.i_uid)); 1186 i_gid_write(inode, le32_to_cpu(fcb.i_gid)); 1187 set_nlink(inode, le16_to_cpu(fcb.i_links_count)); 1188 inode->i_ctime.tv_sec = (signed)le32_to_cpu(fcb.i_ctime); 1189 inode->i_atime.tv_sec = (signed)le32_to_cpu(fcb.i_atime); 1190 inode->i_mtime.tv_sec = (signed)le32_to_cpu(fcb.i_mtime); 1191 inode->i_ctime.tv_nsec = 1192 inode->i_atime.tv_nsec = inode->i_mtime.tv_nsec = 0; 1193 oi->i_commit_size = le64_to_cpu(fcb.i_size); 1194 i_size_write(inode, oi->i_commit_size); 1195 inode->i_blkbits = EXOFS_BLKSHIFT; 1196 inode->i_generation = le32_to_cpu(fcb.i_generation); 1197 1198 oi->i_dir_start_lookup = 0; 1199 1200 if ((inode->i_nlink == 0) && (inode->i_mode == 0)) { 1201 ret = -ESTALE; 1202 goto bad_inode; 1203 } 1204 1205 if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode)) { 1206 if (fcb.i_data[0]) 1207 inode->i_rdev = 1208 old_decode_dev(le32_to_cpu(fcb.i_data[0])); 1209 else 1210 inode->i_rdev = 1211 new_decode_dev(le32_to_cpu(fcb.i_data[1])); 1212 } else { 1213 memcpy(oi->i_data, fcb.i_data, sizeof(fcb.i_data)); 1214 } 1215 1216 if (S_ISREG(inode->i_mode)) { 1217 inode->i_op = &exofs_file_inode_operations; 1218 inode->i_fop = &exofs_file_operations; 1219 inode->i_mapping->a_ops = &exofs_aops; 1220 } else if (S_ISDIR(inode->i_mode)) { 1221 inode->i_op = &exofs_dir_inode_operations; 1222 inode->i_fop = &exofs_dir_operations; 1223 inode->i_mapping->a_ops = &exofs_aops; 1224 } else if (S_ISLNK(inode->i_mode)) { 1225 if (exofs_inode_is_fast_symlink(inode)) 1226 inode->i_op = &exofs_fast_symlink_inode_operations; 1227 else { 1228 inode->i_op = &exofs_symlink_inode_operations; 1229 inode->i_mapping->a_ops = &exofs_aops; 1230 } 1231 } else { 1232 inode->i_op = &exofs_special_inode_operations; 1233 if (fcb.i_data[0]) 1234 init_special_inode(inode, inode->i_mode, 1235 old_decode_dev(le32_to_cpu(fcb.i_data[0]))); 1236 else 1237 init_special_inode(inode, inode->i_mode, 1238 new_decode_dev(le32_to_cpu(fcb.i_data[1]))); 1239 } 1240 1241 unlock_new_inode(inode); 1242 return inode; 1243 1244bad_inode: 1245 iget_failed(inode); 1246 return ERR_PTR(ret); 1247} 1248 1249int __exofs_wait_obj_created(struct exofs_i_info *oi) 1250{ 1251 if (!obj_created(oi)) { 1252 EXOFS_DBGMSG("!obj_created\n"); 1253 BUG_ON(!obj_2bcreated(oi)); 1254 wait_event(oi->i_wq, obj_created(oi)); 1255 EXOFS_DBGMSG("wait_event done\n"); 1256 } 1257 return unlikely(is_bad_inode(&oi->vfs_inode)) ? -EIO : 0; 1258} 1259 1260/* 1261 * Callback function from exofs_new_inode(). The important thing is that we 1262 * set the obj_created flag so that other methods know that the object exists on 1263 * the OSD. 1264 */ 1265static void create_done(struct ore_io_state *ios, void *p) 1266{ 1267 struct inode *inode = p; 1268 struct exofs_i_info *oi = exofs_i(inode); 1269 struct exofs_sb_info *sbi = inode->i_sb->s_fs_info; 1270 int ret; 1271 1272 ret = ore_check_io(ios, NULL); 1273 ore_put_io_state(ios); 1274 1275 atomic_dec(&sbi->s_curr_pending); 1276 1277 if (unlikely(ret)) { 1278 EXOFS_ERR("object=0x%llx creation failed in pid=0x%llx", 1279 _LLU(exofs_oi_objno(oi)), 1280 _LLU(oi->one_comp.obj.partition)); 1281 /*TODO: When FS is corrupted creation can fail, object already 1282 * exist. Get rid of this asynchronous creation, if exist 1283 * increment the obj counter and try the next object. Until we 1284 * succeed. All these dangling objects will be made into lost 1285 * files by chkfs.exofs 1286 */ 1287 } 1288 1289 set_obj_created(oi); 1290 1291 wake_up(&oi->i_wq); 1292} 1293 1294/* 1295 * Set up a new inode and create an object for it on the OSD 1296 */ 1297struct inode *exofs_new_inode(struct inode *dir, umode_t mode) 1298{ 1299 struct super_block *sb = dir->i_sb; 1300 struct exofs_sb_info *sbi = sb->s_fs_info; 1301 struct inode *inode; 1302 struct exofs_i_info *oi; 1303 struct ore_io_state *ios; 1304 int ret; 1305 1306 inode = new_inode(sb); 1307 if (!inode) 1308 return ERR_PTR(-ENOMEM); 1309 1310 oi = exofs_i(inode); 1311 __oi_init(oi); 1312 1313 set_obj_2bcreated(oi); 1314 1315 inode_init_owner(inode, dir, mode); 1316 inode->i_ino = sbi->s_nextid++; 1317 inode->i_blkbits = EXOFS_BLKSHIFT; 1318 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; 1319 oi->i_commit_size = inode->i_size = 0; 1320 spin_lock(&sbi->s_next_gen_lock); 1321 inode->i_generation = sbi->s_next_generation++; 1322 spin_unlock(&sbi->s_next_gen_lock); 1323 insert_inode_hash(inode); 1324 1325 exofs_init_comps(&oi->oc, &oi->one_comp, sb->s_fs_info, 1326 exofs_oi_objno(oi)); 1327 exofs_sbi_write_stats(sbi); /* Make sure new sbi->s_nextid is on disk */ 1328 1329 mark_inode_dirty(inode); 1330 1331 ret = ore_get_io_state(&sbi->layout, &oi->oc, &ios); 1332 if (unlikely(ret)) { 1333 EXOFS_ERR("exofs_new_inode: ore_get_io_state failed\n"); 1334 return ERR_PTR(ret); 1335 } 1336 1337 ios->done = create_done; 1338 ios->private = inode; 1339 1340 ret = ore_create(ios); 1341 if (ret) { 1342 ore_put_io_state(ios); 1343 return ERR_PTR(ret); 1344 } 1345 atomic_inc(&sbi->s_curr_pending); 1346 1347 return inode; 1348} 1349 1350/* 1351 * struct to pass two arguments to update_inode's callback 1352 */ 1353struct updatei_args { 1354 struct exofs_sb_info *sbi; 1355 struct exofs_fcb fcb; 1356}; 1357 1358/* 1359 * Callback function from exofs_update_inode(). 1360 */ 1361static void updatei_done(struct ore_io_state *ios, void *p) 1362{ 1363 struct updatei_args *args = p; 1364 1365 ore_put_io_state(ios); 1366 1367 atomic_dec(&args->sbi->s_curr_pending); 1368 1369 kfree(args); 1370} 1371 1372/* 1373 * Write the inode to the OSD. Just fill up the struct, and set the attribute 1374 * synchronously or asynchronously depending on the do_sync flag. 1375 */ 1376static int exofs_update_inode(struct inode *inode, int do_sync) 1377{ 1378 struct exofs_i_info *oi = exofs_i(inode); 1379 struct super_block *sb = inode->i_sb; 1380 struct exofs_sb_info *sbi = sb->s_fs_info; 1381 struct ore_io_state *ios; 1382 struct osd_attr attr; 1383 struct exofs_fcb *fcb; 1384 struct updatei_args *args; 1385 int ret; 1386 1387 args = kzalloc(sizeof(*args), GFP_KERNEL); 1388 if (!args) { 1389 EXOFS_DBGMSG("Failed kzalloc of args\n"); 1390 return -ENOMEM; 1391 } 1392 1393 fcb = &args->fcb; 1394 1395 fcb->i_mode = cpu_to_le16(inode->i_mode); 1396 fcb->i_uid = cpu_to_le32(i_uid_read(inode)); 1397 fcb->i_gid = cpu_to_le32(i_gid_read(inode)); 1398 fcb->i_links_count = cpu_to_le16(inode->i_nlink); 1399 fcb->i_ctime = cpu_to_le32(inode->i_ctime.tv_sec); 1400 fcb->i_atime = cpu_to_le32(inode->i_atime.tv_sec); 1401 fcb->i_mtime = cpu_to_le32(inode->i_mtime.tv_sec); 1402 oi->i_commit_size = i_size_read(inode); 1403 fcb->i_size = cpu_to_le64(oi->i_commit_size); 1404 fcb->i_generation = cpu_to_le32(inode->i_generation); 1405 1406 if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode)) { 1407 if (old_valid_dev(inode->i_rdev)) { 1408 fcb->i_data[0] = 1409 cpu_to_le32(old_encode_dev(inode->i_rdev)); 1410 fcb->i_data[1] = 0; 1411 } else { 1412 fcb->i_data[0] = 0; 1413 fcb->i_data[1] = 1414 cpu_to_le32(new_encode_dev(inode->i_rdev)); 1415 fcb->i_data[2] = 0; 1416 } 1417 } else 1418 memcpy(fcb->i_data, oi->i_data, sizeof(fcb->i_data)); 1419 1420 ret = ore_get_io_state(&sbi->layout, &oi->oc, &ios); 1421 if (unlikely(ret)) { 1422 EXOFS_ERR("%s: ore_get_io_state failed.\n", __func__); 1423 goto free_args; 1424 } 1425 1426 attr = g_attr_inode_data; 1427 attr.val_ptr = fcb; 1428 ios->out_attr_len = 1; 1429 ios->out_attr = &attr; 1430 1431 wait_obj_created(oi); 1432 1433 if (!do_sync) { 1434 args->sbi = sbi; 1435 ios->done = updatei_done; 1436 ios->private = args; 1437 } 1438 1439 ret = ore_write(ios); 1440 if (!do_sync && !ret) { 1441 atomic_inc(&sbi->s_curr_pending); 1442 goto out; /* deallocation in updatei_done */ 1443 } 1444 1445 ore_put_io_state(ios); 1446free_args: 1447 kfree(args); 1448out: 1449 EXOFS_DBGMSG("(0x%lx) do_sync=%d ret=>%d\n", 1450 inode->i_ino, do_sync, ret); 1451 return ret; 1452} 1453 1454int exofs_write_inode(struct inode *inode, struct writeback_control *wbc) 1455{ 1456 /* FIXME: fix fsync and use wbc->sync_mode == WB_SYNC_ALL */ 1457 return exofs_update_inode(inode, 1); 1458} 1459 1460/* 1461 * Callback function from exofs_delete_inode() - don't have much cleaning up to 1462 * do. 1463 */ 1464static void delete_done(struct ore_io_state *ios, void *p) 1465{ 1466 struct exofs_sb_info *sbi = p; 1467 1468 ore_put_io_state(ios); 1469 1470 atomic_dec(&sbi->s_curr_pending); 1471} 1472 1473/* 1474 * Called when the refcount of an inode reaches zero. We remove the object 1475 * from the OSD here. We make sure the object was created before we try and 1476 * delete it. 1477 */ 1478void exofs_evict_inode(struct inode *inode) 1479{ 1480 struct exofs_i_info *oi = exofs_i(inode); 1481 struct super_block *sb = inode->i_sb; 1482 struct exofs_sb_info *sbi = sb->s_fs_info; 1483 struct ore_io_state *ios; 1484 int ret; 1485 1486 truncate_inode_pages_final(&inode->i_data); 1487 1488 /* TODO: should do better here */ 1489 if (inode->i_nlink || is_bad_inode(inode)) 1490 goto no_delete; 1491 1492 inode->i_size = 0; 1493 clear_inode(inode); 1494 1495 /* if we are deleting an obj that hasn't been created yet, wait. 1496 * This also makes sure that create_done cannot be called with an 1497 * already evicted inode. 1498 */ 1499 wait_obj_created(oi); 1500 /* ignore the error, attempt a remove anyway */ 1501 1502 /* Now Remove the OSD objects */ 1503 ret = ore_get_io_state(&sbi->layout, &oi->oc, &ios); 1504 if (unlikely(ret)) { 1505 EXOFS_ERR("%s: ore_get_io_state failed\n", __func__); 1506 return; 1507 } 1508 1509 ios->done = delete_done; 1510 ios->private = sbi; 1511 1512 ret = ore_remove(ios); 1513 if (ret) { 1514 EXOFS_ERR("%s: ore_remove failed\n", __func__); 1515 ore_put_io_state(ios); 1516 return; 1517 } 1518 atomic_inc(&sbi->s_curr_pending); 1519 1520 return; 1521 1522no_delete: 1523 clear_inode(inode); 1524} 1525