1/* 2 * Module for pnfs flexfile layout driver. 3 * 4 * Copyright (c) 2014, Primary Data, Inc. All rights reserved. 5 * 6 * Tao Peng <bergwolf@primarydata.com> 7 */ 8 9#include <linux/nfs_fs.h> 10#include <linux/nfs_page.h> 11#include <linux/module.h> 12 13#include <linux/sunrpc/metrics.h> 14 15#include "flexfilelayout.h" 16#include "../nfs4session.h" 17#include "../nfs4idmap.h" 18#include "../internal.h" 19#include "../delegation.h" 20#include "../nfs4trace.h" 21#include "../iostat.h" 22#include "../nfs.h" 23 24#define NFSDBG_FACILITY NFSDBG_PNFS_LD 25 26#define FF_LAYOUT_POLL_RETRY_MAX (15*HZ) 27 28static struct pnfs_layout_hdr * 29ff_layout_alloc_layout_hdr(struct inode *inode, gfp_t gfp_flags) 30{ 31 struct nfs4_flexfile_layout *ffl; 32 33 ffl = kzalloc(sizeof(*ffl), gfp_flags); 34 if (ffl) { 35 INIT_LIST_HEAD(&ffl->error_list); 36 return &ffl->generic_hdr; 37 } else 38 return NULL; 39} 40 41static void 42ff_layout_free_layout_hdr(struct pnfs_layout_hdr *lo) 43{ 44 struct nfs4_ff_layout_ds_err *err, *n; 45 46 list_for_each_entry_safe(err, n, &FF_LAYOUT_FROM_HDR(lo)->error_list, 47 list) { 48 list_del(&err->list); 49 kfree(err); 50 } 51 kfree(FF_LAYOUT_FROM_HDR(lo)); 52} 53 54static int decode_stateid(struct xdr_stream *xdr, nfs4_stateid *stateid) 55{ 56 __be32 *p; 57 58 p = xdr_inline_decode(xdr, NFS4_STATEID_SIZE); 59 if (unlikely(p == NULL)) 60 return -ENOBUFS; 61 memcpy(stateid, p, NFS4_STATEID_SIZE); 62 dprintk("%s: stateid id= [%x%x%x%x]\n", __func__, 63 p[0], p[1], p[2], p[3]); 64 return 0; 65} 66 67static int decode_deviceid(struct xdr_stream *xdr, struct nfs4_deviceid *devid) 68{ 69 __be32 *p; 70 71 p = xdr_inline_decode(xdr, NFS4_DEVICEID4_SIZE); 72 if (unlikely(!p)) 73 return -ENOBUFS; 74 memcpy(devid, p, NFS4_DEVICEID4_SIZE); 75 nfs4_print_deviceid(devid); 76 return 0; 77} 78 79static int decode_nfs_fh(struct xdr_stream *xdr, struct nfs_fh *fh) 80{ 81 __be32 *p; 82 83 p = xdr_inline_decode(xdr, 4); 84 if (unlikely(!p)) 85 return -ENOBUFS; 86 fh->size = be32_to_cpup(p++); 87 if (fh->size > sizeof(struct nfs_fh)) { 88 printk(KERN_ERR "NFS flexfiles: Too big fh received %d\n", 89 fh->size); 90 return -EOVERFLOW; 91 } 92 /* fh.data */ 93 p = xdr_inline_decode(xdr, fh->size); 94 if (unlikely(!p)) 95 return -ENOBUFS; 96 memcpy(&fh->data, p, fh->size); 97 dprintk("%s: fh len %d\n", __func__, fh->size); 98 99 return 0; 100} 101 102/* 103 * Currently only stringified uids and gids are accepted. 104 * I.e., kerberos is not supported to the DSes, so no pricipals. 105 * 106 * That means that one common function will suffice, but when 107 * principals are added, this should be split to accomodate 108 * calls to both nfs_map_name_to_uid() and nfs_map_group_to_gid(). 109 */ 110static int 111decode_name(struct xdr_stream *xdr, u32 *id) 112{ 113 __be32 *p; 114 int len; 115 116 /* opaque_length(4)*/ 117 p = xdr_inline_decode(xdr, 4); 118 if (unlikely(!p)) 119 return -ENOBUFS; 120 len = be32_to_cpup(p++); 121 if (len < 0) 122 return -EINVAL; 123 124 dprintk("%s: len %u\n", __func__, len); 125 126 /* opaque body */ 127 p = xdr_inline_decode(xdr, len); 128 if (unlikely(!p)) 129 return -ENOBUFS; 130 131 if (!nfs_map_string_to_numeric((char *)p, len, id)) 132 return -EINVAL; 133 134 return 0; 135} 136 137static void ff_layout_free_mirror_array(struct nfs4_ff_layout_segment *fls) 138{ 139 int i; 140 141 if (fls->mirror_array) { 142 for (i = 0; i < fls->mirror_array_cnt; i++) { 143 /* normally mirror_ds is freed in 144 * .free_deviceid_node but we still do it here 145 * for .alloc_lseg error path */ 146 if (fls->mirror_array[i]) { 147 kfree(fls->mirror_array[i]->fh_versions); 148 nfs4_ff_layout_put_deviceid(fls->mirror_array[i]->mirror_ds); 149 kfree(fls->mirror_array[i]); 150 } 151 } 152 kfree(fls->mirror_array); 153 fls->mirror_array = NULL; 154 } 155} 156 157static int ff_layout_check_layout(struct nfs4_layoutget_res *lgr) 158{ 159 int ret = 0; 160 161 dprintk("--> %s\n", __func__); 162 163 /* FIXME: remove this check when layout segment support is added */ 164 if (lgr->range.offset != 0 || 165 lgr->range.length != NFS4_MAX_UINT64) { 166 dprintk("%s Only whole file layouts supported. Use MDS i/o\n", 167 __func__); 168 ret = -EINVAL; 169 } 170 171 dprintk("--> %s returns %d\n", __func__, ret); 172 return ret; 173} 174 175static void _ff_layout_free_lseg(struct nfs4_ff_layout_segment *fls) 176{ 177 if (fls) { 178 ff_layout_free_mirror_array(fls); 179 kfree(fls); 180 } 181} 182 183static void ff_layout_sort_mirrors(struct nfs4_ff_layout_segment *fls) 184{ 185 struct nfs4_ff_layout_mirror *tmp; 186 int i, j; 187 188 for (i = 0; i < fls->mirror_array_cnt - 1; i++) { 189 for (j = i + 1; j < fls->mirror_array_cnt; j++) 190 if (fls->mirror_array[i]->efficiency < 191 fls->mirror_array[j]->efficiency) { 192 tmp = fls->mirror_array[i]; 193 fls->mirror_array[i] = fls->mirror_array[j]; 194 fls->mirror_array[j] = tmp; 195 } 196 } 197} 198 199static struct pnfs_layout_segment * 200ff_layout_alloc_lseg(struct pnfs_layout_hdr *lh, 201 struct nfs4_layoutget_res *lgr, 202 gfp_t gfp_flags) 203{ 204 struct pnfs_layout_segment *ret; 205 struct nfs4_ff_layout_segment *fls = NULL; 206 struct xdr_stream stream; 207 struct xdr_buf buf; 208 struct page *scratch; 209 u64 stripe_unit; 210 u32 mirror_array_cnt; 211 __be32 *p; 212 int i, rc; 213 214 dprintk("--> %s\n", __func__); 215 scratch = alloc_page(gfp_flags); 216 if (!scratch) 217 return ERR_PTR(-ENOMEM); 218 219 xdr_init_decode_pages(&stream, &buf, lgr->layoutp->pages, 220 lgr->layoutp->len); 221 xdr_set_scratch_buffer(&stream, page_address(scratch), PAGE_SIZE); 222 223 /* stripe unit and mirror_array_cnt */ 224 rc = -EIO; 225 p = xdr_inline_decode(&stream, 8 + 4); 226 if (!p) 227 goto out_err_free; 228 229 p = xdr_decode_hyper(p, &stripe_unit); 230 mirror_array_cnt = be32_to_cpup(p++); 231 dprintk("%s: stripe_unit=%llu mirror_array_cnt=%u\n", __func__, 232 stripe_unit, mirror_array_cnt); 233 234 if (mirror_array_cnt > NFS4_FLEXFILE_LAYOUT_MAX_MIRROR_CNT || 235 mirror_array_cnt == 0) 236 goto out_err_free; 237 238 rc = -ENOMEM; 239 fls = kzalloc(sizeof(*fls), gfp_flags); 240 if (!fls) 241 goto out_err_free; 242 243 fls->mirror_array_cnt = mirror_array_cnt; 244 fls->stripe_unit = stripe_unit; 245 fls->mirror_array = kcalloc(fls->mirror_array_cnt, 246 sizeof(fls->mirror_array[0]), gfp_flags); 247 if (fls->mirror_array == NULL) 248 goto out_err_free; 249 250 for (i = 0; i < fls->mirror_array_cnt; i++) { 251 struct nfs4_deviceid devid; 252 struct nfs4_deviceid_node *idnode; 253 u32 ds_count; 254 u32 fh_count; 255 int j; 256 257 rc = -EIO; 258 p = xdr_inline_decode(&stream, 4); 259 if (!p) 260 goto out_err_free; 261 ds_count = be32_to_cpup(p); 262 263 /* FIXME: allow for striping? */ 264 if (ds_count != 1) 265 goto out_err_free; 266 267 fls->mirror_array[i] = 268 kzalloc(sizeof(struct nfs4_ff_layout_mirror), 269 gfp_flags); 270 if (fls->mirror_array[i] == NULL) { 271 rc = -ENOMEM; 272 goto out_err_free; 273 } 274 275 spin_lock_init(&fls->mirror_array[i]->lock); 276 fls->mirror_array[i]->ds_count = ds_count; 277 278 /* deviceid */ 279 rc = decode_deviceid(&stream, &devid); 280 if (rc) 281 goto out_err_free; 282 283 idnode = nfs4_find_get_deviceid(NFS_SERVER(lh->plh_inode), 284 &devid, lh->plh_lc_cred, 285 gfp_flags); 286 /* 287 * upon success, mirror_ds is allocated by previous 288 * getdeviceinfo, or newly by .alloc_deviceid_node 289 * nfs4_find_get_deviceid failure is indeed getdeviceinfo falure 290 */ 291 if (idnode) 292 fls->mirror_array[i]->mirror_ds = 293 FF_LAYOUT_MIRROR_DS(idnode); 294 else 295 goto out_err_free; 296 297 /* efficiency */ 298 rc = -EIO; 299 p = xdr_inline_decode(&stream, 4); 300 if (!p) 301 goto out_err_free; 302 fls->mirror_array[i]->efficiency = be32_to_cpup(p); 303 304 /* stateid */ 305 rc = decode_stateid(&stream, &fls->mirror_array[i]->stateid); 306 if (rc) 307 goto out_err_free; 308 309 /* fh */ 310 p = xdr_inline_decode(&stream, 4); 311 if (!p) 312 goto out_err_free; 313 fh_count = be32_to_cpup(p); 314 315 fls->mirror_array[i]->fh_versions = 316 kzalloc(fh_count * sizeof(struct nfs_fh), 317 gfp_flags); 318 if (fls->mirror_array[i]->fh_versions == NULL) { 319 rc = -ENOMEM; 320 goto out_err_free; 321 } 322 323 for (j = 0; j < fh_count; j++) { 324 rc = decode_nfs_fh(&stream, 325 &fls->mirror_array[i]->fh_versions[j]); 326 if (rc) 327 goto out_err_free; 328 } 329 330 fls->mirror_array[i]->fh_versions_cnt = fh_count; 331 332 /* user */ 333 rc = decode_name(&stream, &fls->mirror_array[i]->uid); 334 if (rc) 335 goto out_err_free; 336 337 /* group */ 338 rc = decode_name(&stream, &fls->mirror_array[i]->gid); 339 if (rc) 340 goto out_err_free; 341 342 dprintk("%s: uid %d gid %d\n", __func__, 343 fls->mirror_array[i]->uid, 344 fls->mirror_array[i]->gid); 345 } 346 347 ff_layout_sort_mirrors(fls); 348 rc = ff_layout_check_layout(lgr); 349 if (rc) 350 goto out_err_free; 351 352 ret = &fls->generic_hdr; 353 dprintk("<-- %s (success)\n", __func__); 354out_free_page: 355 __free_page(scratch); 356 return ret; 357out_err_free: 358 _ff_layout_free_lseg(fls); 359 ret = ERR_PTR(rc); 360 dprintk("<-- %s (%d)\n", __func__, rc); 361 goto out_free_page; 362} 363 364static bool ff_layout_has_rw_segments(struct pnfs_layout_hdr *layout) 365{ 366 struct pnfs_layout_segment *lseg; 367 368 list_for_each_entry(lseg, &layout->plh_segs, pls_list) 369 if (lseg->pls_range.iomode == IOMODE_RW) 370 return true; 371 372 return false; 373} 374 375static void 376ff_layout_free_lseg(struct pnfs_layout_segment *lseg) 377{ 378 struct nfs4_ff_layout_segment *fls = FF_LAYOUT_LSEG(lseg); 379 int i; 380 381 dprintk("--> %s\n", __func__); 382 383 for (i = 0; i < fls->mirror_array_cnt; i++) { 384 if (fls->mirror_array[i]) { 385 nfs4_ff_layout_put_deviceid(fls->mirror_array[i]->mirror_ds); 386 fls->mirror_array[i]->mirror_ds = NULL; 387 if (fls->mirror_array[i]->cred) { 388 put_rpccred(fls->mirror_array[i]->cred); 389 fls->mirror_array[i]->cred = NULL; 390 } 391 } 392 } 393 394 if (lseg->pls_range.iomode == IOMODE_RW) { 395 struct nfs4_flexfile_layout *ffl; 396 struct inode *inode; 397 398 ffl = FF_LAYOUT_FROM_HDR(lseg->pls_layout); 399 inode = ffl->generic_hdr.plh_inode; 400 spin_lock(&inode->i_lock); 401 if (!ff_layout_has_rw_segments(lseg->pls_layout)) { 402 ffl->commit_info.nbuckets = 0; 403 kfree(ffl->commit_info.buckets); 404 ffl->commit_info.buckets = NULL; 405 } 406 spin_unlock(&inode->i_lock); 407 } 408 _ff_layout_free_lseg(fls); 409} 410 411/* Return 1 until we have multiple lsegs support */ 412static int 413ff_layout_get_lseg_count(struct nfs4_ff_layout_segment *fls) 414{ 415 return 1; 416} 417 418static int 419ff_layout_alloc_commit_info(struct pnfs_layout_segment *lseg, 420 struct nfs_commit_info *cinfo, 421 gfp_t gfp_flags) 422{ 423 struct nfs4_ff_layout_segment *fls = FF_LAYOUT_LSEG(lseg); 424 struct pnfs_commit_bucket *buckets; 425 int size; 426 427 if (cinfo->ds->nbuckets != 0) { 428 /* This assumes there is only one RW lseg per file. 429 * To support multiple lseg per file, we need to 430 * change struct pnfs_commit_bucket to allow dynamic 431 * increasing nbuckets. 432 */ 433 return 0; 434 } 435 436 size = ff_layout_get_lseg_count(fls) * FF_LAYOUT_MIRROR_COUNT(lseg); 437 438 buckets = kcalloc(size, sizeof(struct pnfs_commit_bucket), 439 gfp_flags); 440 if (!buckets) 441 return -ENOMEM; 442 else { 443 int i; 444 445 spin_lock(cinfo->lock); 446 if (cinfo->ds->nbuckets != 0) 447 kfree(buckets); 448 else { 449 cinfo->ds->buckets = buckets; 450 cinfo->ds->nbuckets = size; 451 for (i = 0; i < size; i++) { 452 INIT_LIST_HEAD(&buckets[i].written); 453 INIT_LIST_HEAD(&buckets[i].committing); 454 /* mark direct verifier as unset */ 455 buckets[i].direct_verf.committed = 456 NFS_INVALID_STABLE_HOW; 457 } 458 } 459 spin_unlock(cinfo->lock); 460 return 0; 461 } 462} 463 464static struct nfs4_pnfs_ds * 465ff_layout_choose_best_ds_for_read(struct nfs_pageio_descriptor *pgio, 466 int *best_idx) 467{ 468 struct nfs4_ff_layout_segment *fls; 469 struct nfs4_pnfs_ds *ds; 470 int idx; 471 472 fls = FF_LAYOUT_LSEG(pgio->pg_lseg); 473 /* mirrors are sorted by efficiency */ 474 for (idx = 0; idx < fls->mirror_array_cnt; idx++) { 475 ds = nfs4_ff_layout_prepare_ds(pgio->pg_lseg, idx, false); 476 if (ds) { 477 *best_idx = idx; 478 return ds; 479 } 480 } 481 482 return NULL; 483} 484 485static void 486ff_layout_pg_init_read(struct nfs_pageio_descriptor *pgio, 487 struct nfs_page *req) 488{ 489 struct nfs_pgio_mirror *pgm; 490 struct nfs4_ff_layout_mirror *mirror; 491 struct nfs4_pnfs_ds *ds; 492 int ds_idx; 493 494 /* Use full layout for now */ 495 if (!pgio->pg_lseg) 496 pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, 497 req->wb_context, 498 0, 499 NFS4_MAX_UINT64, 500 IOMODE_READ, 501 GFP_KERNEL); 502 /* If no lseg, fall back to read through mds */ 503 if (pgio->pg_lseg == NULL) 504 goto out_mds; 505 506 ds = ff_layout_choose_best_ds_for_read(pgio, &ds_idx); 507 if (!ds) 508 goto out_mds; 509 mirror = FF_LAYOUT_COMP(pgio->pg_lseg, ds_idx); 510 511 pgio->pg_mirror_idx = ds_idx; 512 513 /* read always uses only one mirror - idx 0 for pgio layer */ 514 pgm = &pgio->pg_mirrors[0]; 515 pgm->pg_bsize = mirror->mirror_ds->ds_versions[0].rsize; 516 517 return; 518out_mds: 519 pnfs_put_lseg(pgio->pg_lseg); 520 pgio->pg_lseg = NULL; 521 nfs_pageio_reset_read_mds(pgio); 522} 523 524static void 525ff_layout_pg_init_write(struct nfs_pageio_descriptor *pgio, 526 struct nfs_page *req) 527{ 528 struct nfs4_ff_layout_mirror *mirror; 529 struct nfs_pgio_mirror *pgm; 530 struct nfs_commit_info cinfo; 531 struct nfs4_pnfs_ds *ds; 532 int i; 533 int status; 534 535 if (!pgio->pg_lseg) 536 pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, 537 req->wb_context, 538 0, 539 NFS4_MAX_UINT64, 540 IOMODE_RW, 541 GFP_NOFS); 542 /* If no lseg, fall back to write through mds */ 543 if (pgio->pg_lseg == NULL) 544 goto out_mds; 545 546 nfs_init_cinfo(&cinfo, pgio->pg_inode, pgio->pg_dreq); 547 status = ff_layout_alloc_commit_info(pgio->pg_lseg, &cinfo, GFP_NOFS); 548 if (status < 0) 549 goto out_mds; 550 551 /* Use a direct mapping of ds_idx to pgio mirror_idx */ 552 if (WARN_ON_ONCE(pgio->pg_mirror_count != 553 FF_LAYOUT_MIRROR_COUNT(pgio->pg_lseg))) 554 goto out_mds; 555 556 for (i = 0; i < pgio->pg_mirror_count; i++) { 557 ds = nfs4_ff_layout_prepare_ds(pgio->pg_lseg, i, true); 558 if (!ds) 559 goto out_mds; 560 pgm = &pgio->pg_mirrors[i]; 561 mirror = FF_LAYOUT_COMP(pgio->pg_lseg, i); 562 pgm->pg_bsize = mirror->mirror_ds->ds_versions[0].wsize; 563 } 564 565 return; 566 567out_mds: 568 pnfs_put_lseg(pgio->pg_lseg); 569 pgio->pg_lseg = NULL; 570 nfs_pageio_reset_write_mds(pgio); 571} 572 573static unsigned int 574ff_layout_pg_get_mirror_count_write(struct nfs_pageio_descriptor *pgio, 575 struct nfs_page *req) 576{ 577 if (!pgio->pg_lseg) 578 pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, 579 req->wb_context, 580 0, 581 NFS4_MAX_UINT64, 582 IOMODE_RW, 583 GFP_NOFS); 584 if (pgio->pg_lseg) 585 return FF_LAYOUT_MIRROR_COUNT(pgio->pg_lseg); 586 587 /* no lseg means that pnfs is not in use, so no mirroring here */ 588 pnfs_put_lseg(pgio->pg_lseg); 589 pgio->pg_lseg = NULL; 590 nfs_pageio_reset_write_mds(pgio); 591 return 1; 592} 593 594static const struct nfs_pageio_ops ff_layout_pg_read_ops = { 595 .pg_init = ff_layout_pg_init_read, 596 .pg_test = pnfs_generic_pg_test, 597 .pg_doio = pnfs_generic_pg_readpages, 598 .pg_cleanup = pnfs_generic_pg_cleanup, 599}; 600 601static const struct nfs_pageio_ops ff_layout_pg_write_ops = { 602 .pg_init = ff_layout_pg_init_write, 603 .pg_test = pnfs_generic_pg_test, 604 .pg_doio = pnfs_generic_pg_writepages, 605 .pg_get_mirror_count = ff_layout_pg_get_mirror_count_write, 606 .pg_cleanup = pnfs_generic_pg_cleanup, 607}; 608 609static void ff_layout_reset_write(struct nfs_pgio_header *hdr, bool retry_pnfs) 610{ 611 struct rpc_task *task = &hdr->task; 612 613 pnfs_layoutcommit_inode(hdr->inode, false); 614 615 if (retry_pnfs) { 616 dprintk("%s Reset task %5u for i/o through pNFS " 617 "(req %s/%llu, %u bytes @ offset %llu)\n", __func__, 618 hdr->task.tk_pid, 619 hdr->inode->i_sb->s_id, 620 (unsigned long long)NFS_FILEID(hdr->inode), 621 hdr->args.count, 622 (unsigned long long)hdr->args.offset); 623 624 if (!hdr->dreq) { 625 struct nfs_open_context *ctx; 626 627 ctx = nfs_list_entry(hdr->pages.next)->wb_context; 628 set_bit(NFS_CONTEXT_RESEND_WRITES, &ctx->flags); 629 hdr->completion_ops->error_cleanup(&hdr->pages); 630 } else { 631 nfs_direct_set_resched_writes(hdr->dreq); 632 /* fake unstable write to let common nfs resend pages */ 633 hdr->verf.committed = NFS_UNSTABLE; 634 hdr->good_bytes = hdr->args.count; 635 } 636 return; 637 } 638 639 if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) { 640 dprintk("%s Reset task %5u for i/o through MDS " 641 "(req %s/%llu, %u bytes @ offset %llu)\n", __func__, 642 hdr->task.tk_pid, 643 hdr->inode->i_sb->s_id, 644 (unsigned long long)NFS_FILEID(hdr->inode), 645 hdr->args.count, 646 (unsigned long long)hdr->args.offset); 647 648 task->tk_status = pnfs_write_done_resend_to_mds(hdr); 649 } 650} 651 652static void ff_layout_reset_read(struct nfs_pgio_header *hdr) 653{ 654 struct rpc_task *task = &hdr->task; 655 656 pnfs_layoutcommit_inode(hdr->inode, false); 657 658 if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) { 659 dprintk("%s Reset task %5u for i/o through MDS " 660 "(req %s/%llu, %u bytes @ offset %llu)\n", __func__, 661 hdr->task.tk_pid, 662 hdr->inode->i_sb->s_id, 663 (unsigned long long)NFS_FILEID(hdr->inode), 664 hdr->args.count, 665 (unsigned long long)hdr->args.offset); 666 667 task->tk_status = pnfs_read_done_resend_to_mds(hdr); 668 } 669} 670 671static int ff_layout_async_handle_error_v4(struct rpc_task *task, 672 struct nfs4_state *state, 673 struct nfs_client *clp, 674 struct pnfs_layout_segment *lseg, 675 int idx) 676{ 677 struct pnfs_layout_hdr *lo = lseg->pls_layout; 678 struct inode *inode = lo->plh_inode; 679 struct nfs_server *mds_server = NFS_SERVER(inode); 680 681 struct nfs4_deviceid_node *devid = FF_LAYOUT_DEVID_NODE(lseg, idx); 682 struct nfs_client *mds_client = mds_server->nfs_client; 683 struct nfs4_slot_table *tbl = &clp->cl_session->fc_slot_table; 684 685 if (task->tk_status >= 0) 686 return 0; 687 688 switch (task->tk_status) { 689 /* MDS state errors */ 690 case -NFS4ERR_DELEG_REVOKED: 691 case -NFS4ERR_ADMIN_REVOKED: 692 case -NFS4ERR_BAD_STATEID: 693 if (state == NULL) 694 break; 695 nfs_remove_bad_delegation(state->inode); 696 case -NFS4ERR_OPENMODE: 697 if (state == NULL) 698 break; 699 if (nfs4_schedule_stateid_recovery(mds_server, state) < 0) 700 goto out_bad_stateid; 701 goto wait_on_recovery; 702 case -NFS4ERR_EXPIRED: 703 if (state != NULL) { 704 if (nfs4_schedule_stateid_recovery(mds_server, state) < 0) 705 goto out_bad_stateid; 706 } 707 nfs4_schedule_lease_recovery(mds_client); 708 goto wait_on_recovery; 709 /* DS session errors */ 710 case -NFS4ERR_BADSESSION: 711 case -NFS4ERR_BADSLOT: 712 case -NFS4ERR_BAD_HIGH_SLOT: 713 case -NFS4ERR_DEADSESSION: 714 case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION: 715 case -NFS4ERR_SEQ_FALSE_RETRY: 716 case -NFS4ERR_SEQ_MISORDERED: 717 dprintk("%s ERROR %d, Reset session. Exchangeid " 718 "flags 0x%x\n", __func__, task->tk_status, 719 clp->cl_exchange_flags); 720 nfs4_schedule_session_recovery(clp->cl_session, task->tk_status); 721 break; 722 case -NFS4ERR_DELAY: 723 case -NFS4ERR_GRACE: 724 rpc_delay(task, FF_LAYOUT_POLL_RETRY_MAX); 725 break; 726 case -NFS4ERR_RETRY_UNCACHED_REP: 727 break; 728 /* Invalidate Layout errors */ 729 case -NFS4ERR_PNFS_NO_LAYOUT: 730 case -ESTALE: /* mapped NFS4ERR_STALE */ 731 case -EBADHANDLE: /* mapped NFS4ERR_BADHANDLE */ 732 case -EISDIR: /* mapped NFS4ERR_ISDIR */ 733 case -NFS4ERR_FHEXPIRED: 734 case -NFS4ERR_WRONG_TYPE: 735 dprintk("%s Invalid layout error %d\n", __func__, 736 task->tk_status); 737 /* 738 * Destroy layout so new i/o will get a new layout. 739 * Layout will not be destroyed until all current lseg 740 * references are put. Mark layout as invalid to resend failed 741 * i/o and all i/o waiting on the slot table to the MDS until 742 * layout is destroyed and a new valid layout is obtained. 743 */ 744 pnfs_destroy_layout(NFS_I(inode)); 745 rpc_wake_up(&tbl->slot_tbl_waitq); 746 goto reset; 747 /* RPC connection errors */ 748 case -ECONNREFUSED: 749 case -EHOSTDOWN: 750 case -EHOSTUNREACH: 751 case -ENETUNREACH: 752 case -EIO: 753 case -ETIMEDOUT: 754 case -EPIPE: 755 dprintk("%s DS connection error %d\n", __func__, 756 task->tk_status); 757 nfs4_mark_deviceid_unavailable(devid); 758 rpc_wake_up(&tbl->slot_tbl_waitq); 759 /* fall through */ 760 default: 761 if (ff_layout_has_available_ds(lseg)) 762 return -NFS4ERR_RESET_TO_PNFS; 763reset: 764 dprintk("%s Retry through MDS. Error %d\n", __func__, 765 task->tk_status); 766 return -NFS4ERR_RESET_TO_MDS; 767 } 768out: 769 task->tk_status = 0; 770 return -EAGAIN; 771out_bad_stateid: 772 task->tk_status = -EIO; 773 return 0; 774wait_on_recovery: 775 rpc_sleep_on(&mds_client->cl_rpcwaitq, task, NULL); 776 if (test_bit(NFS4CLNT_MANAGER_RUNNING, &mds_client->cl_state) == 0) 777 rpc_wake_up_queued_task(&mds_client->cl_rpcwaitq, task); 778 goto out; 779} 780 781/* Retry all errors through either pNFS or MDS except for -EJUKEBOX */ 782static int ff_layout_async_handle_error_v3(struct rpc_task *task, 783 struct pnfs_layout_segment *lseg, 784 int idx) 785{ 786 struct nfs4_deviceid_node *devid = FF_LAYOUT_DEVID_NODE(lseg, idx); 787 788 if (task->tk_status >= 0) 789 return 0; 790 791 if (task->tk_status != -EJUKEBOX) { 792 dprintk("%s DS connection error %d\n", __func__, 793 task->tk_status); 794 nfs4_mark_deviceid_unavailable(devid); 795 if (ff_layout_has_available_ds(lseg)) 796 return -NFS4ERR_RESET_TO_PNFS; 797 else 798 return -NFS4ERR_RESET_TO_MDS; 799 } 800 801 if (task->tk_status == -EJUKEBOX) 802 nfs_inc_stats(lseg->pls_layout->plh_inode, NFSIOS_DELAY); 803 task->tk_status = 0; 804 rpc_restart_call(task); 805 rpc_delay(task, NFS_JUKEBOX_RETRY_TIME); 806 return -EAGAIN; 807} 808 809static int ff_layout_async_handle_error(struct rpc_task *task, 810 struct nfs4_state *state, 811 struct nfs_client *clp, 812 struct pnfs_layout_segment *lseg, 813 int idx) 814{ 815 int vers = clp->cl_nfs_mod->rpc_vers->number; 816 817 switch (vers) { 818 case 3: 819 return ff_layout_async_handle_error_v3(task, lseg, idx); 820 case 4: 821 return ff_layout_async_handle_error_v4(task, state, clp, 822 lseg, idx); 823 default: 824 /* should never happen */ 825 WARN_ON_ONCE(1); 826 return 0; 827 } 828} 829 830static void ff_layout_io_track_ds_error(struct pnfs_layout_segment *lseg, 831 int idx, u64 offset, u64 length, 832 u32 status, int opnum) 833{ 834 struct nfs4_ff_layout_mirror *mirror; 835 int err; 836 837 mirror = FF_LAYOUT_COMP(lseg, idx); 838 err = ff_layout_track_ds_error(FF_LAYOUT_FROM_HDR(lseg->pls_layout), 839 mirror, offset, length, status, opnum, 840 GFP_NOIO); 841 dprintk("%s: err %d op %d status %u\n", __func__, err, opnum, status); 842} 843 844/* NFS_PROTO call done callback routines */ 845 846static int ff_layout_read_done_cb(struct rpc_task *task, 847 struct nfs_pgio_header *hdr) 848{ 849 struct inode *inode; 850 int err; 851 852 trace_nfs4_pnfs_read(hdr, task->tk_status); 853 if (task->tk_status == -ETIMEDOUT && !hdr->res.op_status) 854 hdr->res.op_status = NFS4ERR_NXIO; 855 if (task->tk_status < 0 && hdr->res.op_status) 856 ff_layout_io_track_ds_error(hdr->lseg, hdr->pgio_mirror_idx, 857 hdr->args.offset, hdr->args.count, 858 hdr->res.op_status, OP_READ); 859 err = ff_layout_async_handle_error(task, hdr->args.context->state, 860 hdr->ds_clp, hdr->lseg, 861 hdr->pgio_mirror_idx); 862 863 switch (err) { 864 case -NFS4ERR_RESET_TO_PNFS: 865 set_bit(NFS_LAYOUT_RETURN_BEFORE_CLOSE, 866 &hdr->lseg->pls_layout->plh_flags); 867 pnfs_read_resend_pnfs(hdr); 868 return task->tk_status; 869 case -NFS4ERR_RESET_TO_MDS: 870 inode = hdr->lseg->pls_layout->plh_inode; 871 pnfs_error_mark_layout_for_return(inode, hdr->lseg); 872 ff_layout_reset_read(hdr); 873 return task->tk_status; 874 case -EAGAIN: 875 rpc_restart_call_prepare(task); 876 return -EAGAIN; 877 } 878 879 return 0; 880} 881 882/* 883 * We reference the rpc_cred of the first WRITE that triggers the need for 884 * a LAYOUTCOMMIT, and use it to send the layoutcommit compound. 885 * rfc5661 is not clear about which credential should be used. 886 * 887 * Flexlayout client should treat DS replied FILE_SYNC as DATA_SYNC, so 888 * to follow http://www.rfc-editor.org/errata_search.php?rfc=5661&eid=2751 889 * we always send layoutcommit after DS writes. 890 */ 891static void 892ff_layout_set_layoutcommit(struct nfs_pgio_header *hdr) 893{ 894 pnfs_set_layoutcommit(hdr->inode, hdr->lseg, 895 hdr->mds_offset + hdr->res.count); 896 dprintk("%s inode %lu pls_end_pos %lu\n", __func__, hdr->inode->i_ino, 897 (unsigned long) NFS_I(hdr->inode)->layout->plh_lwb); 898} 899 900static bool 901ff_layout_reset_to_mds(struct pnfs_layout_segment *lseg, int idx) 902{ 903 /* No mirroring for now */ 904 struct nfs4_deviceid_node *node = FF_LAYOUT_DEVID_NODE(lseg, idx); 905 906 return ff_layout_test_devid_unavailable(node); 907} 908 909static int ff_layout_read_prepare_common(struct rpc_task *task, 910 struct nfs_pgio_header *hdr) 911{ 912 if (unlikely(test_bit(NFS_CONTEXT_BAD, &hdr->args.context->flags))) { 913 rpc_exit(task, -EIO); 914 return -EIO; 915 } 916 if (ff_layout_reset_to_mds(hdr->lseg, hdr->pgio_mirror_idx)) { 917 dprintk("%s task %u reset io to MDS\n", __func__, task->tk_pid); 918 if (ff_layout_has_available_ds(hdr->lseg)) 919 pnfs_read_resend_pnfs(hdr); 920 else 921 ff_layout_reset_read(hdr); 922 rpc_exit(task, 0); 923 return -EAGAIN; 924 } 925 hdr->pgio_done_cb = ff_layout_read_done_cb; 926 927 return 0; 928} 929 930/* 931 * Call ops for the async read/write cases 932 * In the case of dense layouts, the offset needs to be reset to its 933 * original value. 934 */ 935static void ff_layout_read_prepare_v3(struct rpc_task *task, void *data) 936{ 937 struct nfs_pgio_header *hdr = data; 938 939 if (ff_layout_read_prepare_common(task, hdr)) 940 return; 941 942 rpc_call_start(task); 943} 944 945static int ff_layout_setup_sequence(struct nfs_client *ds_clp, 946 struct nfs4_sequence_args *args, 947 struct nfs4_sequence_res *res, 948 struct rpc_task *task) 949{ 950 if (ds_clp->cl_session) 951 return nfs41_setup_sequence(ds_clp->cl_session, 952 args, 953 res, 954 task); 955 return nfs40_setup_sequence(ds_clp->cl_slot_tbl, 956 args, 957 res, 958 task); 959} 960 961static void ff_layout_read_prepare_v4(struct rpc_task *task, void *data) 962{ 963 struct nfs_pgio_header *hdr = data; 964 965 if (ff_layout_read_prepare_common(task, hdr)) 966 return; 967 968 if (ff_layout_setup_sequence(hdr->ds_clp, 969 &hdr->args.seq_args, 970 &hdr->res.seq_res, 971 task)) 972 return; 973 974 if (nfs4_set_rw_stateid(&hdr->args.stateid, hdr->args.context, 975 hdr->args.lock_context, FMODE_READ) == -EIO) 976 rpc_exit(task, -EIO); /* lost lock, terminate I/O */ 977} 978 979static void ff_layout_read_call_done(struct rpc_task *task, void *data) 980{ 981 struct nfs_pgio_header *hdr = data; 982 983 dprintk("--> %s task->tk_status %d\n", __func__, task->tk_status); 984 985 if (test_bit(NFS_IOHDR_REDO, &hdr->flags) && 986 task->tk_status == 0) { 987 nfs4_sequence_done(task, &hdr->res.seq_res); 988 return; 989 } 990 991 /* Note this may cause RPC to be resent */ 992 hdr->mds_ops->rpc_call_done(task, hdr); 993} 994 995static void ff_layout_read_count_stats(struct rpc_task *task, void *data) 996{ 997 struct nfs_pgio_header *hdr = data; 998 999 rpc_count_iostats_metrics(task, 1000 &NFS_CLIENT(hdr->inode)->cl_metrics[NFSPROC4_CLNT_READ]); 1001} 1002 1003static int ff_layout_write_done_cb(struct rpc_task *task, 1004 struct nfs_pgio_header *hdr) 1005{ 1006 struct inode *inode; 1007 int err; 1008 1009 trace_nfs4_pnfs_write(hdr, task->tk_status); 1010 if (task->tk_status == -ETIMEDOUT && !hdr->res.op_status) 1011 hdr->res.op_status = NFS4ERR_NXIO; 1012 if (task->tk_status < 0 && hdr->res.op_status) 1013 ff_layout_io_track_ds_error(hdr->lseg, hdr->pgio_mirror_idx, 1014 hdr->args.offset, hdr->args.count, 1015 hdr->res.op_status, OP_WRITE); 1016 err = ff_layout_async_handle_error(task, hdr->args.context->state, 1017 hdr->ds_clp, hdr->lseg, 1018 hdr->pgio_mirror_idx); 1019 1020 switch (err) { 1021 case -NFS4ERR_RESET_TO_PNFS: 1022 case -NFS4ERR_RESET_TO_MDS: 1023 inode = hdr->lseg->pls_layout->plh_inode; 1024 pnfs_error_mark_layout_for_return(inode, hdr->lseg); 1025 if (err == -NFS4ERR_RESET_TO_PNFS) { 1026 pnfs_set_retry_layoutget(hdr->lseg->pls_layout); 1027 ff_layout_reset_write(hdr, true); 1028 } else { 1029 pnfs_clear_retry_layoutget(hdr->lseg->pls_layout); 1030 ff_layout_reset_write(hdr, false); 1031 } 1032 return task->tk_status; 1033 case -EAGAIN: 1034 rpc_restart_call_prepare(task); 1035 return -EAGAIN; 1036 } 1037 1038 if (hdr->res.verf->committed == NFS_FILE_SYNC || 1039 hdr->res.verf->committed == NFS_DATA_SYNC) 1040 ff_layout_set_layoutcommit(hdr); 1041 1042 /* zero out fattr since we don't care DS attr at all */ 1043 hdr->fattr.valid = 0; 1044 if (task->tk_status >= 0) 1045 nfs_writeback_update_inode(hdr); 1046 1047 return 0; 1048} 1049 1050static int ff_layout_commit_done_cb(struct rpc_task *task, 1051 struct nfs_commit_data *data) 1052{ 1053 struct inode *inode; 1054 int err; 1055 1056 trace_nfs4_pnfs_commit_ds(data, task->tk_status); 1057 if (task->tk_status == -ETIMEDOUT && !data->res.op_status) 1058 data->res.op_status = NFS4ERR_NXIO; 1059 if (task->tk_status < 0 && data->res.op_status) 1060 ff_layout_io_track_ds_error(data->lseg, data->ds_commit_index, 1061 data->args.offset, data->args.count, 1062 data->res.op_status, OP_COMMIT); 1063 err = ff_layout_async_handle_error(task, NULL, data->ds_clp, 1064 data->lseg, data->ds_commit_index); 1065 1066 switch (err) { 1067 case -NFS4ERR_RESET_TO_PNFS: 1068 case -NFS4ERR_RESET_TO_MDS: 1069 inode = data->lseg->pls_layout->plh_inode; 1070 pnfs_error_mark_layout_for_return(inode, data->lseg); 1071 if (err == -NFS4ERR_RESET_TO_PNFS) 1072 pnfs_set_retry_layoutget(data->lseg->pls_layout); 1073 else 1074 pnfs_clear_retry_layoutget(data->lseg->pls_layout); 1075 pnfs_generic_prepare_to_resend_writes(data); 1076 return -EAGAIN; 1077 case -EAGAIN: 1078 rpc_restart_call_prepare(task); 1079 return -EAGAIN; 1080 } 1081 1082 if (data->verf.committed == NFS_UNSTABLE) 1083 pnfs_set_layoutcommit(data->inode, data->lseg, data->lwb); 1084 1085 return 0; 1086} 1087 1088static int ff_layout_write_prepare_common(struct rpc_task *task, 1089 struct nfs_pgio_header *hdr) 1090{ 1091 if (unlikely(test_bit(NFS_CONTEXT_BAD, &hdr->args.context->flags))) { 1092 rpc_exit(task, -EIO); 1093 return -EIO; 1094 } 1095 1096 if (ff_layout_reset_to_mds(hdr->lseg, hdr->pgio_mirror_idx)) { 1097 bool retry_pnfs; 1098 1099 retry_pnfs = ff_layout_has_available_ds(hdr->lseg); 1100 dprintk("%s task %u reset io to %s\n", __func__, 1101 task->tk_pid, retry_pnfs ? "pNFS" : "MDS"); 1102 ff_layout_reset_write(hdr, retry_pnfs); 1103 rpc_exit(task, 0); 1104 return -EAGAIN; 1105 } 1106 1107 return 0; 1108} 1109 1110static void ff_layout_write_prepare_v3(struct rpc_task *task, void *data) 1111{ 1112 struct nfs_pgio_header *hdr = data; 1113 1114 if (ff_layout_write_prepare_common(task, hdr)) 1115 return; 1116 1117 rpc_call_start(task); 1118} 1119 1120static void ff_layout_write_prepare_v4(struct rpc_task *task, void *data) 1121{ 1122 struct nfs_pgio_header *hdr = data; 1123 1124 if (ff_layout_write_prepare_common(task, hdr)) 1125 return; 1126 1127 if (ff_layout_setup_sequence(hdr->ds_clp, 1128 &hdr->args.seq_args, 1129 &hdr->res.seq_res, 1130 task)) 1131 return; 1132 1133 if (nfs4_set_rw_stateid(&hdr->args.stateid, hdr->args.context, 1134 hdr->args.lock_context, FMODE_WRITE) == -EIO) 1135 rpc_exit(task, -EIO); /* lost lock, terminate I/O */ 1136} 1137 1138static void ff_layout_write_call_done(struct rpc_task *task, void *data) 1139{ 1140 struct nfs_pgio_header *hdr = data; 1141 1142 if (test_bit(NFS_IOHDR_REDO, &hdr->flags) && 1143 task->tk_status == 0) { 1144 nfs4_sequence_done(task, &hdr->res.seq_res); 1145 return; 1146 } 1147 1148 /* Note this may cause RPC to be resent */ 1149 hdr->mds_ops->rpc_call_done(task, hdr); 1150} 1151 1152static void ff_layout_write_count_stats(struct rpc_task *task, void *data) 1153{ 1154 struct nfs_pgio_header *hdr = data; 1155 1156 rpc_count_iostats_metrics(task, 1157 &NFS_CLIENT(hdr->inode)->cl_metrics[NFSPROC4_CLNT_WRITE]); 1158} 1159 1160static void ff_layout_commit_prepare_v3(struct rpc_task *task, void *data) 1161{ 1162 rpc_call_start(task); 1163} 1164 1165static void ff_layout_commit_prepare_v4(struct rpc_task *task, void *data) 1166{ 1167 struct nfs_commit_data *wdata = data; 1168 1169 ff_layout_setup_sequence(wdata->ds_clp, 1170 &wdata->args.seq_args, 1171 &wdata->res.seq_res, 1172 task); 1173} 1174 1175static void ff_layout_commit_count_stats(struct rpc_task *task, void *data) 1176{ 1177 struct nfs_commit_data *cdata = data; 1178 1179 rpc_count_iostats_metrics(task, 1180 &NFS_CLIENT(cdata->inode)->cl_metrics[NFSPROC4_CLNT_COMMIT]); 1181} 1182 1183static const struct rpc_call_ops ff_layout_read_call_ops_v3 = { 1184 .rpc_call_prepare = ff_layout_read_prepare_v3, 1185 .rpc_call_done = ff_layout_read_call_done, 1186 .rpc_count_stats = ff_layout_read_count_stats, 1187 .rpc_release = pnfs_generic_rw_release, 1188}; 1189 1190static const struct rpc_call_ops ff_layout_read_call_ops_v4 = { 1191 .rpc_call_prepare = ff_layout_read_prepare_v4, 1192 .rpc_call_done = ff_layout_read_call_done, 1193 .rpc_count_stats = ff_layout_read_count_stats, 1194 .rpc_release = pnfs_generic_rw_release, 1195}; 1196 1197static const struct rpc_call_ops ff_layout_write_call_ops_v3 = { 1198 .rpc_call_prepare = ff_layout_write_prepare_v3, 1199 .rpc_call_done = ff_layout_write_call_done, 1200 .rpc_count_stats = ff_layout_write_count_stats, 1201 .rpc_release = pnfs_generic_rw_release, 1202}; 1203 1204static const struct rpc_call_ops ff_layout_write_call_ops_v4 = { 1205 .rpc_call_prepare = ff_layout_write_prepare_v4, 1206 .rpc_call_done = ff_layout_write_call_done, 1207 .rpc_count_stats = ff_layout_write_count_stats, 1208 .rpc_release = pnfs_generic_rw_release, 1209}; 1210 1211static const struct rpc_call_ops ff_layout_commit_call_ops_v3 = { 1212 .rpc_call_prepare = ff_layout_commit_prepare_v3, 1213 .rpc_call_done = pnfs_generic_write_commit_done, 1214 .rpc_count_stats = ff_layout_commit_count_stats, 1215 .rpc_release = pnfs_generic_commit_release, 1216}; 1217 1218static const struct rpc_call_ops ff_layout_commit_call_ops_v4 = { 1219 .rpc_call_prepare = ff_layout_commit_prepare_v4, 1220 .rpc_call_done = pnfs_generic_write_commit_done, 1221 .rpc_count_stats = ff_layout_commit_count_stats, 1222 .rpc_release = pnfs_generic_commit_release, 1223}; 1224 1225static enum pnfs_try_status 1226ff_layout_read_pagelist(struct nfs_pgio_header *hdr) 1227{ 1228 struct pnfs_layout_segment *lseg = hdr->lseg; 1229 struct nfs4_pnfs_ds *ds; 1230 struct rpc_clnt *ds_clnt; 1231 struct rpc_cred *ds_cred; 1232 loff_t offset = hdr->args.offset; 1233 u32 idx = hdr->pgio_mirror_idx; 1234 int vers; 1235 struct nfs_fh *fh; 1236 1237 dprintk("--> %s ino %lu pgbase %u req %Zu@%llu\n", 1238 __func__, hdr->inode->i_ino, 1239 hdr->args.pgbase, (size_t)hdr->args.count, offset); 1240 1241 ds = nfs4_ff_layout_prepare_ds(lseg, idx, false); 1242 if (!ds) 1243 goto out_failed; 1244 1245 ds_clnt = nfs4_ff_find_or_create_ds_client(lseg, idx, ds->ds_clp, 1246 hdr->inode); 1247 if (IS_ERR(ds_clnt)) 1248 goto out_failed; 1249 1250 ds_cred = ff_layout_get_ds_cred(lseg, idx, hdr->cred); 1251 if (IS_ERR(ds_cred)) 1252 goto out_failed; 1253 1254 vers = nfs4_ff_layout_ds_version(lseg, idx); 1255 1256 dprintk("%s USE DS: %s cl_count %d vers %d\n", __func__, 1257 ds->ds_remotestr, atomic_read(&ds->ds_clp->cl_count), vers); 1258 1259 atomic_inc(&ds->ds_clp->cl_count); 1260 hdr->ds_clp = ds->ds_clp; 1261 fh = nfs4_ff_layout_select_ds_fh(lseg, idx); 1262 if (fh) 1263 hdr->args.fh = fh; 1264 1265 /* 1266 * Note that if we ever decide to split across DSes, 1267 * then we may need to handle dense-like offsets. 1268 */ 1269 hdr->args.offset = offset; 1270 hdr->mds_offset = offset; 1271 1272 /* Perform an asynchronous read to ds */ 1273 nfs_initiate_pgio(ds_clnt, hdr, ds_cred, ds->ds_clp->rpc_ops, 1274 vers == 3 ? &ff_layout_read_call_ops_v3 : 1275 &ff_layout_read_call_ops_v4, 1276 0, RPC_TASK_SOFTCONN); 1277 1278 return PNFS_ATTEMPTED; 1279 1280out_failed: 1281 if (ff_layout_has_available_ds(lseg)) 1282 return PNFS_TRY_AGAIN; 1283 return PNFS_NOT_ATTEMPTED; 1284} 1285 1286/* Perform async writes. */ 1287static enum pnfs_try_status 1288ff_layout_write_pagelist(struct nfs_pgio_header *hdr, int sync) 1289{ 1290 struct pnfs_layout_segment *lseg = hdr->lseg; 1291 struct nfs4_pnfs_ds *ds; 1292 struct rpc_clnt *ds_clnt; 1293 struct rpc_cred *ds_cred; 1294 loff_t offset = hdr->args.offset; 1295 int vers; 1296 struct nfs_fh *fh; 1297 int idx = hdr->pgio_mirror_idx; 1298 1299 ds = nfs4_ff_layout_prepare_ds(lseg, idx, true); 1300 if (!ds) 1301 return PNFS_NOT_ATTEMPTED; 1302 1303 ds_clnt = nfs4_ff_find_or_create_ds_client(lseg, idx, ds->ds_clp, 1304 hdr->inode); 1305 if (IS_ERR(ds_clnt)) 1306 return PNFS_NOT_ATTEMPTED; 1307 1308 ds_cred = ff_layout_get_ds_cred(lseg, idx, hdr->cred); 1309 if (IS_ERR(ds_cred)) 1310 return PNFS_NOT_ATTEMPTED; 1311 1312 vers = nfs4_ff_layout_ds_version(lseg, idx); 1313 1314 dprintk("%s ino %lu sync %d req %Zu@%llu DS: %s cl_count %d vers %d\n", 1315 __func__, hdr->inode->i_ino, sync, (size_t) hdr->args.count, 1316 offset, ds->ds_remotestr, atomic_read(&ds->ds_clp->cl_count), 1317 vers); 1318 1319 hdr->pgio_done_cb = ff_layout_write_done_cb; 1320 atomic_inc(&ds->ds_clp->cl_count); 1321 hdr->ds_clp = ds->ds_clp; 1322 hdr->ds_commit_idx = idx; 1323 fh = nfs4_ff_layout_select_ds_fh(lseg, idx); 1324 if (fh) 1325 hdr->args.fh = fh; 1326 1327 /* 1328 * Note that if we ever decide to split across DSes, 1329 * then we may need to handle dense-like offsets. 1330 */ 1331 hdr->args.offset = offset; 1332 1333 /* Perform an asynchronous write */ 1334 nfs_initiate_pgio(ds_clnt, hdr, ds_cred, ds->ds_clp->rpc_ops, 1335 vers == 3 ? &ff_layout_write_call_ops_v3 : 1336 &ff_layout_write_call_ops_v4, 1337 sync, RPC_TASK_SOFTCONN); 1338 return PNFS_ATTEMPTED; 1339} 1340 1341static u32 calc_ds_index_from_commit(struct pnfs_layout_segment *lseg, u32 i) 1342{ 1343 return i; 1344} 1345 1346static struct nfs_fh * 1347select_ds_fh_from_commit(struct pnfs_layout_segment *lseg, u32 i) 1348{ 1349 struct nfs4_ff_layout_segment *flseg = FF_LAYOUT_LSEG(lseg); 1350 1351 /* FIXME: Assume that there is only one NFS version available 1352 * for the DS. 1353 */ 1354 return &flseg->mirror_array[i]->fh_versions[0]; 1355} 1356 1357static int ff_layout_initiate_commit(struct nfs_commit_data *data, int how) 1358{ 1359 struct pnfs_layout_segment *lseg = data->lseg; 1360 struct nfs4_pnfs_ds *ds; 1361 struct rpc_clnt *ds_clnt; 1362 struct rpc_cred *ds_cred; 1363 u32 idx; 1364 int vers; 1365 struct nfs_fh *fh; 1366 1367 idx = calc_ds_index_from_commit(lseg, data->ds_commit_index); 1368 ds = nfs4_ff_layout_prepare_ds(lseg, idx, true); 1369 if (!ds) 1370 goto out_err; 1371 1372 ds_clnt = nfs4_ff_find_or_create_ds_client(lseg, idx, ds->ds_clp, 1373 data->inode); 1374 if (IS_ERR(ds_clnt)) 1375 goto out_err; 1376 1377 ds_cred = ff_layout_get_ds_cred(lseg, idx, data->cred); 1378 if (IS_ERR(ds_cred)) 1379 goto out_err; 1380 1381 vers = nfs4_ff_layout_ds_version(lseg, idx); 1382 1383 dprintk("%s ino %lu, how %d cl_count %d vers %d\n", __func__, 1384 data->inode->i_ino, how, atomic_read(&ds->ds_clp->cl_count), 1385 vers); 1386 data->commit_done_cb = ff_layout_commit_done_cb; 1387 data->cred = ds_cred; 1388 atomic_inc(&ds->ds_clp->cl_count); 1389 data->ds_clp = ds->ds_clp; 1390 fh = select_ds_fh_from_commit(lseg, data->ds_commit_index); 1391 if (fh) 1392 data->args.fh = fh; 1393 return nfs_initiate_commit(ds_clnt, data, ds->ds_clp->rpc_ops, 1394 vers == 3 ? &ff_layout_commit_call_ops_v3 : 1395 &ff_layout_commit_call_ops_v4, 1396 how, RPC_TASK_SOFTCONN); 1397out_err: 1398 pnfs_generic_prepare_to_resend_writes(data); 1399 pnfs_generic_commit_release(data); 1400 return -EAGAIN; 1401} 1402 1403static int 1404ff_layout_commit_pagelist(struct inode *inode, struct list_head *mds_pages, 1405 int how, struct nfs_commit_info *cinfo) 1406{ 1407 return pnfs_generic_commit_pagelist(inode, mds_pages, how, cinfo, 1408 ff_layout_initiate_commit); 1409} 1410 1411static struct pnfs_ds_commit_info * 1412ff_layout_get_ds_info(struct inode *inode) 1413{ 1414 struct pnfs_layout_hdr *layout = NFS_I(inode)->layout; 1415 1416 if (layout == NULL) 1417 return NULL; 1418 1419 return &FF_LAYOUT_FROM_HDR(layout)->commit_info; 1420} 1421 1422static void 1423ff_layout_free_deviceid_node(struct nfs4_deviceid_node *d) 1424{ 1425 nfs4_ff_layout_free_deviceid(container_of(d, struct nfs4_ff_layout_ds, 1426 id_node)); 1427} 1428 1429static int ff_layout_encode_ioerr(struct nfs4_flexfile_layout *flo, 1430 struct xdr_stream *xdr, 1431 const struct nfs4_layoutreturn_args *args) 1432{ 1433 struct pnfs_layout_hdr *hdr = &flo->generic_hdr; 1434 __be32 *start; 1435 int count = 0, ret = 0; 1436 1437 start = xdr_reserve_space(xdr, 4); 1438 if (unlikely(!start)) 1439 return -E2BIG; 1440 1441 /* This assume we always return _ALL_ layouts */ 1442 spin_lock(&hdr->plh_inode->i_lock); 1443 ret = ff_layout_encode_ds_ioerr(flo, xdr, &count, &args->range); 1444 spin_unlock(&hdr->plh_inode->i_lock); 1445 1446 *start = cpu_to_be32(count); 1447 1448 return ret; 1449} 1450 1451/* report nothing for now */ 1452static void ff_layout_encode_iostats(struct nfs4_flexfile_layout *flo, 1453 struct xdr_stream *xdr, 1454 const struct nfs4_layoutreturn_args *args) 1455{ 1456 __be32 *p; 1457 1458 p = xdr_reserve_space(xdr, 4); 1459 if (likely(p)) 1460 *p = cpu_to_be32(0); 1461} 1462 1463static struct nfs4_deviceid_node * 1464ff_layout_alloc_deviceid_node(struct nfs_server *server, 1465 struct pnfs_device *pdev, gfp_t gfp_flags) 1466{ 1467 struct nfs4_ff_layout_ds *dsaddr; 1468 1469 dsaddr = nfs4_ff_alloc_deviceid_node(server, pdev, gfp_flags); 1470 if (!dsaddr) 1471 return NULL; 1472 return &dsaddr->id_node; 1473} 1474 1475static void 1476ff_layout_encode_layoutreturn(struct pnfs_layout_hdr *lo, 1477 struct xdr_stream *xdr, 1478 const struct nfs4_layoutreturn_args *args) 1479{ 1480 struct nfs4_flexfile_layout *flo = FF_LAYOUT_FROM_HDR(lo); 1481 __be32 *start; 1482 1483 dprintk("%s: Begin\n", __func__); 1484 start = xdr_reserve_space(xdr, 4); 1485 BUG_ON(!start); 1486 1487 ff_layout_encode_ioerr(flo, xdr, args); 1488 ff_layout_encode_iostats(flo, xdr, args); 1489 1490 *start = cpu_to_be32((xdr->p - start - 1) * 4); 1491 dprintk("%s: Return\n", __func__); 1492} 1493 1494static struct pnfs_layoutdriver_type flexfilelayout_type = { 1495 .id = LAYOUT_FLEX_FILES, 1496 .name = "LAYOUT_FLEX_FILES", 1497 .owner = THIS_MODULE, 1498 .alloc_layout_hdr = ff_layout_alloc_layout_hdr, 1499 .free_layout_hdr = ff_layout_free_layout_hdr, 1500 .alloc_lseg = ff_layout_alloc_lseg, 1501 .free_lseg = ff_layout_free_lseg, 1502 .pg_read_ops = &ff_layout_pg_read_ops, 1503 .pg_write_ops = &ff_layout_pg_write_ops, 1504 .get_ds_info = ff_layout_get_ds_info, 1505 .free_deviceid_node = ff_layout_free_deviceid_node, 1506 .mark_request_commit = pnfs_layout_mark_request_commit, 1507 .clear_request_commit = pnfs_generic_clear_request_commit, 1508 .scan_commit_lists = pnfs_generic_scan_commit_lists, 1509 .recover_commit_reqs = pnfs_generic_recover_commit_reqs, 1510 .commit_pagelist = ff_layout_commit_pagelist, 1511 .read_pagelist = ff_layout_read_pagelist, 1512 .write_pagelist = ff_layout_write_pagelist, 1513 .alloc_deviceid_node = ff_layout_alloc_deviceid_node, 1514 .encode_layoutreturn = ff_layout_encode_layoutreturn, 1515 .sync = pnfs_nfs_generic_sync, 1516}; 1517 1518static int __init nfs4flexfilelayout_init(void) 1519{ 1520 printk(KERN_INFO "%s: NFSv4 Flexfile Layout Driver Registering...\n", 1521 __func__); 1522 return pnfs_register_layoutdriver(&flexfilelayout_type); 1523} 1524 1525static void __exit nfs4flexfilelayout_exit(void) 1526{ 1527 printk(KERN_INFO "%s: NFSv4 Flexfile Layout Driver Unregistering...\n", 1528 __func__); 1529 pnfs_unregister_layoutdriver(&flexfilelayout_type); 1530} 1531 1532MODULE_ALIAS("nfs-layouttype4-4"); 1533 1534MODULE_LICENSE("GPL"); 1535MODULE_DESCRIPTION("The NFSv4 flexfile layout driver"); 1536 1537module_init(nfs4flexfilelayout_init); 1538module_exit(nfs4flexfilelayout_exit); 1539