root/fs/ocfs2/extent_map.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. ocfs2_extent_map_init
  2. __ocfs2_extent_map_lookup
  3. ocfs2_extent_map_lookup
  4. ocfs2_extent_map_trunc
  5. ocfs2_ei_is_contained
  6. ocfs2_copy_emi_fields
  7. ocfs2_try_to_merge_extent_map
  8. ocfs2_extent_map_insert_rec
  9. ocfs2_last_eb_is_empty
  10. ocfs2_search_for_hole_index
  11. ocfs2_figure_hole_clusters
  12. ocfs2_get_clusters_nocache
  13. ocfs2_relative_extent_offsets
  14. ocfs2_xattr_get_clusters
  15. ocfs2_get_clusters
  16. ocfs2_extent_map_get_blocks
  17. ocfs2_fiemap_inline
  18. ocfs2_fiemap
  19. ocfs2_overwrite_io
  20. ocfs2_seek_data_hole_offset
  21. ocfs2_read_virt_blocks

   1 // SPDX-License-Identifier: GPL-2.0-only
   2 /* -*- mode: c; c-basic-offset: 8; -*-
   3  * vim: noexpandtab sw=8 ts=8 sts=0:
   4  *
   5  * extent_map.c
   6  *
   7  * Block/Cluster mapping functions
   8  *
   9  * Copyright (C) 2004 Oracle.  All rights reserved.
  10  */
  11 
  12 #include <linux/fs.h>
  13 #include <linux/init.h>
  14 #include <linux/slab.h>
  15 #include <linux/types.h>
  16 #include <linux/fiemap.h>
  17 
  18 #include <cluster/masklog.h>
  19 
  20 #include "ocfs2.h"
  21 
  22 #include "alloc.h"
  23 #include "dlmglue.h"
  24 #include "extent_map.h"
  25 #include "inode.h"
  26 #include "super.h"
  27 #include "symlink.h"
  28 #include "aops.h"
  29 #include "ocfs2_trace.h"
  30 
  31 #include "buffer_head_io.h"
  32 
  33 /*
  34  * The extent caching implementation is intentionally trivial.
  35  *
  36  * We only cache a small number of extents stored directly on the
  37  * inode, so linear order operations are acceptable. If we ever want
  38  * to increase the size of the extent map, then these algorithms must
  39  * get smarter.
  40  */
  41 
  42 void ocfs2_extent_map_init(struct inode *inode)
  43 {
  44         struct ocfs2_inode_info *oi = OCFS2_I(inode);
  45 
  46         oi->ip_extent_map.em_num_items = 0;
  47         INIT_LIST_HEAD(&oi->ip_extent_map.em_list);
  48 }
  49 
  50 static void __ocfs2_extent_map_lookup(struct ocfs2_extent_map *em,
  51                                       unsigned int cpos,
  52                                       struct ocfs2_extent_map_item **ret_emi)
  53 {
  54         unsigned int range;
  55         struct ocfs2_extent_map_item *emi;
  56 
  57         *ret_emi = NULL;
  58 
  59         list_for_each_entry(emi, &em->em_list, ei_list) {
  60                 range = emi->ei_cpos + emi->ei_clusters;
  61 
  62                 if (cpos >= emi->ei_cpos && cpos < range) {
  63                         list_move(&emi->ei_list, &em->em_list);
  64 
  65                         *ret_emi = emi;
  66                         break;
  67                 }
  68         }
  69 }
  70 
  71 static int ocfs2_extent_map_lookup(struct inode *inode, unsigned int cpos,
  72                                    unsigned int *phys, unsigned int *len,
  73                                    unsigned int *flags)
  74 {
  75         unsigned int coff;
  76         struct ocfs2_inode_info *oi = OCFS2_I(inode);
  77         struct ocfs2_extent_map_item *emi;
  78 
  79         spin_lock(&oi->ip_lock);
  80 
  81         __ocfs2_extent_map_lookup(&oi->ip_extent_map, cpos, &emi);
  82         if (emi) {
  83                 coff = cpos - emi->ei_cpos;
  84                 *phys = emi->ei_phys + coff;
  85                 if (len)
  86                         *len = emi->ei_clusters - coff;
  87                 if (flags)
  88                         *flags = emi->ei_flags;
  89         }
  90 
  91         spin_unlock(&oi->ip_lock);
  92 
  93         if (emi == NULL)
  94                 return -ENOENT;
  95 
  96         return 0;
  97 }
  98 
  99 /*
 100  * Forget about all clusters equal to or greater than cpos.
 101  */
 102 void ocfs2_extent_map_trunc(struct inode *inode, unsigned int cpos)
 103 {
 104         struct ocfs2_extent_map_item *emi, *n;
 105         struct ocfs2_inode_info *oi = OCFS2_I(inode);
 106         struct ocfs2_extent_map *em = &oi->ip_extent_map;
 107         LIST_HEAD(tmp_list);
 108         unsigned int range;
 109 
 110         spin_lock(&oi->ip_lock);
 111         list_for_each_entry_safe(emi, n, &em->em_list, ei_list) {
 112                 if (emi->ei_cpos >= cpos) {
 113                         /* Full truncate of this record. */
 114                         list_move(&emi->ei_list, &tmp_list);
 115                         BUG_ON(em->em_num_items == 0);
 116                         em->em_num_items--;
 117                         continue;
 118                 }
 119 
 120                 range = emi->ei_cpos + emi->ei_clusters;
 121                 if (range > cpos) {
 122                         /* Partial truncate */
 123                         emi->ei_clusters = cpos - emi->ei_cpos;
 124                 }
 125         }
 126         spin_unlock(&oi->ip_lock);
 127 
 128         list_for_each_entry_safe(emi, n, &tmp_list, ei_list) {
 129                 list_del(&emi->ei_list);
 130                 kfree(emi);
 131         }
 132 }
 133 
 134 /*
 135  * Is any part of emi2 contained within emi1
 136  */
 137 static int ocfs2_ei_is_contained(struct ocfs2_extent_map_item *emi1,
 138                                  struct ocfs2_extent_map_item *emi2)
 139 {
 140         unsigned int range1, range2;
 141 
 142         /*
 143          * Check if logical start of emi2 is inside emi1
 144          */
 145         range1 = emi1->ei_cpos + emi1->ei_clusters;
 146         if (emi2->ei_cpos >= emi1->ei_cpos && emi2->ei_cpos < range1)
 147                 return 1;
 148 
 149         /*
 150          * Check if logical end of emi2 is inside emi1
 151          */
 152         range2 = emi2->ei_cpos + emi2->ei_clusters;
 153         if (range2 > emi1->ei_cpos && range2 <= range1)
 154                 return 1;
 155 
 156         return 0;
 157 }
 158 
 159 static void ocfs2_copy_emi_fields(struct ocfs2_extent_map_item *dest,
 160                                   struct ocfs2_extent_map_item *src)
 161 {
 162         dest->ei_cpos = src->ei_cpos;
 163         dest->ei_phys = src->ei_phys;
 164         dest->ei_clusters = src->ei_clusters;
 165         dest->ei_flags = src->ei_flags;
 166 }
 167 
 168 /*
 169  * Try to merge emi with ins. Returns 1 if merge succeeds, zero
 170  * otherwise.
 171  */
 172 static int ocfs2_try_to_merge_extent_map(struct ocfs2_extent_map_item *emi,
 173                                          struct ocfs2_extent_map_item *ins)
 174 {
 175         /*
 176          * Handle contiguousness
 177          */
 178         if (ins->ei_phys == (emi->ei_phys + emi->ei_clusters) &&
 179             ins->ei_cpos == (emi->ei_cpos + emi->ei_clusters) &&
 180             ins->ei_flags == emi->ei_flags) {
 181                 emi->ei_clusters += ins->ei_clusters;
 182                 return 1;
 183         } else if ((ins->ei_phys + ins->ei_clusters) == emi->ei_phys &&
 184                    (ins->ei_cpos + ins->ei_clusters) == emi->ei_cpos &&
 185                    ins->ei_flags == emi->ei_flags) {
 186                 emi->ei_phys = ins->ei_phys;
 187                 emi->ei_cpos = ins->ei_cpos;
 188                 emi->ei_clusters += ins->ei_clusters;
 189                 return 1;
 190         }
 191 
 192         /*
 193          * Overlapping extents - this shouldn't happen unless we've
 194          * split an extent to change it's flags. That is exceedingly
 195          * rare, so there's no sense in trying to optimize it yet.
 196          */
 197         if (ocfs2_ei_is_contained(emi, ins) ||
 198             ocfs2_ei_is_contained(ins, emi)) {
 199                 ocfs2_copy_emi_fields(emi, ins);
 200                 return 1;
 201         }
 202 
 203         /* No merge was possible. */
 204         return 0;
 205 }
 206 
 207 /*
 208  * In order to reduce complexity on the caller, this insert function
 209  * is intentionally liberal in what it will accept.
 210  *
 211  * The only rule is that the truncate call *must* be used whenever
 212  * records have been deleted. This avoids inserting overlapping
 213  * records with different physical mappings.
 214  */
 215 void ocfs2_extent_map_insert_rec(struct inode *inode,
 216                                  struct ocfs2_extent_rec *rec)
 217 {
 218         struct ocfs2_inode_info *oi = OCFS2_I(inode);
 219         struct ocfs2_extent_map *em = &oi->ip_extent_map;
 220         struct ocfs2_extent_map_item *emi, *new_emi = NULL;
 221         struct ocfs2_extent_map_item ins;
 222 
 223         ins.ei_cpos = le32_to_cpu(rec->e_cpos);
 224         ins.ei_phys = ocfs2_blocks_to_clusters(inode->i_sb,
 225                                                le64_to_cpu(rec->e_blkno));
 226         ins.ei_clusters = le16_to_cpu(rec->e_leaf_clusters);
 227         ins.ei_flags = rec->e_flags;
 228 
 229 search:
 230         spin_lock(&oi->ip_lock);
 231 
 232         list_for_each_entry(emi, &em->em_list, ei_list) {
 233                 if (ocfs2_try_to_merge_extent_map(emi, &ins)) {
 234                         list_move(&emi->ei_list, &em->em_list);
 235                         spin_unlock(&oi->ip_lock);
 236                         goto out;
 237                 }
 238         }
 239 
 240         /*
 241          * No item could be merged.
 242          *
 243          * Either allocate and add a new item, or overwrite the last recently
 244          * inserted.
 245          */
 246 
 247         if (em->em_num_items < OCFS2_MAX_EXTENT_MAP_ITEMS) {
 248                 if (new_emi == NULL) {
 249                         spin_unlock(&oi->ip_lock);
 250 
 251                         new_emi = kmalloc(sizeof(*new_emi), GFP_NOFS);
 252                         if (new_emi == NULL)
 253                                 goto out;
 254 
 255                         goto search;
 256                 }
 257 
 258                 ocfs2_copy_emi_fields(new_emi, &ins);
 259                 list_add(&new_emi->ei_list, &em->em_list);
 260                 em->em_num_items++;
 261                 new_emi = NULL;
 262         } else {
 263                 BUG_ON(list_empty(&em->em_list) || em->em_num_items == 0);
 264                 emi = list_entry(em->em_list.prev,
 265                                  struct ocfs2_extent_map_item, ei_list);
 266                 list_move(&emi->ei_list, &em->em_list);
 267                 ocfs2_copy_emi_fields(emi, &ins);
 268         }
 269 
 270         spin_unlock(&oi->ip_lock);
 271 
 272 out:
 273         kfree(new_emi);
 274 }
 275 
 276 static int ocfs2_last_eb_is_empty(struct inode *inode,
 277                                   struct ocfs2_dinode *di)
 278 {
 279         int ret, next_free;
 280         u64 last_eb_blk = le64_to_cpu(di->i_last_eb_blk);
 281         struct buffer_head *eb_bh = NULL;
 282         struct ocfs2_extent_block *eb;
 283         struct ocfs2_extent_list *el;
 284 
 285         ret = ocfs2_read_extent_block(INODE_CACHE(inode), last_eb_blk, &eb_bh);
 286         if (ret) {
 287                 mlog_errno(ret);
 288                 goto out;
 289         }
 290 
 291         eb = (struct ocfs2_extent_block *) eb_bh->b_data;
 292         el = &eb->h_list;
 293 
 294         if (el->l_tree_depth) {
 295                 ocfs2_error(inode->i_sb,
 296                             "Inode %lu has non zero tree depth in leaf block %llu\n",
 297                             inode->i_ino,
 298                             (unsigned long long)eb_bh->b_blocknr);
 299                 ret = -EROFS;
 300                 goto out;
 301         }
 302 
 303         next_free = le16_to_cpu(el->l_next_free_rec);
 304 
 305         if (next_free == 0 ||
 306             (next_free == 1 && ocfs2_is_empty_extent(&el->l_recs[0])))
 307                 ret = 1;
 308 
 309 out:
 310         brelse(eb_bh);
 311         return ret;
 312 }
 313 
 314 /*
 315  * Return the 1st index within el which contains an extent start
 316  * larger than v_cluster.
 317  */
 318 static int ocfs2_search_for_hole_index(struct ocfs2_extent_list *el,
 319                                        u32 v_cluster)
 320 {
 321         int i;
 322         struct ocfs2_extent_rec *rec;
 323 
 324         for(i = 0; i < le16_to_cpu(el->l_next_free_rec); i++) {
 325                 rec = &el->l_recs[i];
 326 
 327                 if (v_cluster < le32_to_cpu(rec->e_cpos))
 328                         break;
 329         }
 330 
 331         return i;
 332 }
 333 
 334 /*
 335  * Figure out the size of a hole which starts at v_cluster within the given
 336  * extent list.
 337  *
 338  * If there is no more allocation past v_cluster, we return the maximum
 339  * cluster size minus v_cluster.
 340  *
 341  * If we have in-inode extents, then el points to the dinode list and
 342  * eb_bh is NULL. Otherwise, eb_bh should point to the extent block
 343  * containing el.
 344  */
 345 int ocfs2_figure_hole_clusters(struct ocfs2_caching_info *ci,
 346                                struct ocfs2_extent_list *el,
 347                                struct buffer_head *eb_bh,
 348                                u32 v_cluster,
 349                                u32 *num_clusters)
 350 {
 351         int ret, i;
 352         struct buffer_head *next_eb_bh = NULL;
 353         struct ocfs2_extent_block *eb, *next_eb;
 354 
 355         i = ocfs2_search_for_hole_index(el, v_cluster);
 356 
 357         if (i == le16_to_cpu(el->l_next_free_rec) && eb_bh) {
 358                 eb = (struct ocfs2_extent_block *)eb_bh->b_data;
 359 
 360                 /*
 361                  * Check the next leaf for any extents.
 362                  */
 363 
 364                 if (le64_to_cpu(eb->h_next_leaf_blk) == 0ULL)
 365                         goto no_more_extents;
 366 
 367                 ret = ocfs2_read_extent_block(ci,
 368                                               le64_to_cpu(eb->h_next_leaf_blk),
 369                                               &next_eb_bh);
 370                 if (ret) {
 371                         mlog_errno(ret);
 372                         goto out;
 373                 }
 374 
 375                 next_eb = (struct ocfs2_extent_block *)next_eb_bh->b_data;
 376                 el = &next_eb->h_list;
 377                 i = ocfs2_search_for_hole_index(el, v_cluster);
 378         }
 379 
 380 no_more_extents:
 381         if (i == le16_to_cpu(el->l_next_free_rec)) {
 382                 /*
 383                  * We're at the end of our existing allocation. Just
 384                  * return the maximum number of clusters we could
 385                  * possibly allocate.
 386                  */
 387                 *num_clusters = UINT_MAX - v_cluster;
 388         } else {
 389                 *num_clusters = le32_to_cpu(el->l_recs[i].e_cpos) - v_cluster;
 390         }
 391 
 392         ret = 0;
 393 out:
 394         brelse(next_eb_bh);
 395         return ret;
 396 }
 397 
 398 static int ocfs2_get_clusters_nocache(struct inode *inode,
 399                                       struct buffer_head *di_bh,
 400                                       u32 v_cluster, unsigned int *hole_len,
 401                                       struct ocfs2_extent_rec *ret_rec,
 402                                       unsigned int *is_last)
 403 {
 404         int i, ret, tree_height, len;
 405         struct ocfs2_dinode *di;
 406         struct ocfs2_extent_block *uninitialized_var(eb);
 407         struct ocfs2_extent_list *el;
 408         struct ocfs2_extent_rec *rec;
 409         struct buffer_head *eb_bh = NULL;
 410 
 411         memset(ret_rec, 0, sizeof(*ret_rec));
 412         if (is_last)
 413                 *is_last = 0;
 414 
 415         di = (struct ocfs2_dinode *) di_bh->b_data;
 416         el = &di->id2.i_list;
 417         tree_height = le16_to_cpu(el->l_tree_depth);
 418 
 419         if (tree_height > 0) {
 420                 ret = ocfs2_find_leaf(INODE_CACHE(inode), el, v_cluster,
 421                                       &eb_bh);
 422                 if (ret) {
 423                         mlog_errno(ret);
 424                         goto out;
 425                 }
 426 
 427                 eb = (struct ocfs2_extent_block *) eb_bh->b_data;
 428                 el = &eb->h_list;
 429 
 430                 if (el->l_tree_depth) {
 431                         ocfs2_error(inode->i_sb,
 432                                     "Inode %lu has non zero tree depth in leaf block %llu\n",
 433                                     inode->i_ino,
 434                                     (unsigned long long)eb_bh->b_blocknr);
 435                         ret = -EROFS;
 436                         goto out;
 437                 }
 438         }
 439 
 440         i = ocfs2_search_extent_list(el, v_cluster);
 441         if (i == -1) {
 442                 /*
 443                  * Holes can be larger than the maximum size of an
 444                  * extent, so we return their lengths in a separate
 445                  * field.
 446                  */
 447                 if (hole_len) {
 448                         ret = ocfs2_figure_hole_clusters(INODE_CACHE(inode),
 449                                                          el, eb_bh,
 450                                                          v_cluster, &len);
 451                         if (ret) {
 452                                 mlog_errno(ret);
 453                                 goto out;
 454                         }
 455 
 456                         *hole_len = len;
 457                 }
 458                 goto out_hole;
 459         }
 460 
 461         rec = &el->l_recs[i];
 462 
 463         BUG_ON(v_cluster < le32_to_cpu(rec->e_cpos));
 464 
 465         if (!rec->e_blkno) {
 466                 ocfs2_error(inode->i_sb,
 467                             "Inode %lu has bad extent record (%u, %u, 0)\n",
 468                             inode->i_ino,
 469                             le32_to_cpu(rec->e_cpos),
 470                             ocfs2_rec_clusters(el, rec));
 471                 ret = -EROFS;
 472                 goto out;
 473         }
 474 
 475         *ret_rec = *rec;
 476 
 477         /*
 478          * Checking for last extent is potentially expensive - we
 479          * might have to look at the next leaf over to see if it's
 480          * empty.
 481          *
 482          * The first two checks are to see whether the caller even
 483          * cares for this information, and if the extent is at least
 484          * the last in it's list.
 485          *
 486          * If those hold true, then the extent is last if any of the
 487          * additional conditions hold true:
 488          *  - Extent list is in-inode
 489          *  - Extent list is right-most
 490          *  - Extent list is 2nd to rightmost, with empty right-most
 491          */
 492         if (is_last) {
 493                 if (i == (le16_to_cpu(el->l_next_free_rec) - 1)) {
 494                         if (tree_height == 0)
 495                                 *is_last = 1;
 496                         else if (eb->h_blkno == di->i_last_eb_blk)
 497                                 *is_last = 1;
 498                         else if (eb->h_next_leaf_blk == di->i_last_eb_blk) {
 499                                 ret = ocfs2_last_eb_is_empty(inode, di);
 500                                 if (ret < 0) {
 501                                         mlog_errno(ret);
 502                                         goto out;
 503                                 }
 504                                 if (ret == 1)
 505                                         *is_last = 1;
 506                         }
 507                 }
 508         }
 509 
 510 out_hole:
 511         ret = 0;
 512 out:
 513         brelse(eb_bh);
 514         return ret;
 515 }
 516 
 517 static void ocfs2_relative_extent_offsets(struct super_block *sb,
 518                                           u32 v_cluster,
 519                                           struct ocfs2_extent_rec *rec,
 520                                           u32 *p_cluster, u32 *num_clusters)
 521 
 522 {
 523         u32 coff = v_cluster - le32_to_cpu(rec->e_cpos);
 524 
 525         *p_cluster = ocfs2_blocks_to_clusters(sb, le64_to_cpu(rec->e_blkno));
 526         *p_cluster = *p_cluster + coff;
 527 
 528         if (num_clusters)
 529                 *num_clusters = le16_to_cpu(rec->e_leaf_clusters) - coff;
 530 }
 531 
 532 int ocfs2_xattr_get_clusters(struct inode *inode, u32 v_cluster,
 533                              u32 *p_cluster, u32 *num_clusters,
 534                              struct ocfs2_extent_list *el,
 535                              unsigned int *extent_flags)
 536 {
 537         int ret = 0, i;
 538         struct buffer_head *eb_bh = NULL;
 539         struct ocfs2_extent_block *eb;
 540         struct ocfs2_extent_rec *rec;
 541         u32 coff;
 542 
 543         if (el->l_tree_depth) {
 544                 ret = ocfs2_find_leaf(INODE_CACHE(inode), el, v_cluster,
 545                                       &eb_bh);
 546                 if (ret) {
 547                         mlog_errno(ret);
 548                         goto out;
 549                 }
 550 
 551                 eb = (struct ocfs2_extent_block *) eb_bh->b_data;
 552                 el = &eb->h_list;
 553 
 554                 if (el->l_tree_depth) {
 555                         ocfs2_error(inode->i_sb,
 556                                     "Inode %lu has non zero tree depth in xattr leaf block %llu\n",
 557                                     inode->i_ino,
 558                                     (unsigned long long)eb_bh->b_blocknr);
 559                         ret = -EROFS;
 560                         goto out;
 561                 }
 562         }
 563 
 564         i = ocfs2_search_extent_list(el, v_cluster);
 565         if (i == -1) {
 566                 ret = -EROFS;
 567                 mlog_errno(ret);
 568                 goto out;
 569         } else {
 570                 rec = &el->l_recs[i];
 571                 BUG_ON(v_cluster < le32_to_cpu(rec->e_cpos));
 572 
 573                 if (!rec->e_blkno) {
 574                         ocfs2_error(inode->i_sb,
 575                                     "Inode %lu has bad extent record (%u, %u, 0) in xattr\n",
 576                                     inode->i_ino,
 577                                     le32_to_cpu(rec->e_cpos),
 578                                     ocfs2_rec_clusters(el, rec));
 579                         ret = -EROFS;
 580                         goto out;
 581                 }
 582                 coff = v_cluster - le32_to_cpu(rec->e_cpos);
 583                 *p_cluster = ocfs2_blocks_to_clusters(inode->i_sb,
 584                                                     le64_to_cpu(rec->e_blkno));
 585                 *p_cluster = *p_cluster + coff;
 586                 if (num_clusters)
 587                         *num_clusters = ocfs2_rec_clusters(el, rec) - coff;
 588 
 589                 if (extent_flags)
 590                         *extent_flags = rec->e_flags;
 591         }
 592 out:
 593         brelse(eb_bh);
 594         return ret;
 595 }
 596 
 597 int ocfs2_get_clusters(struct inode *inode, u32 v_cluster,
 598                        u32 *p_cluster, u32 *num_clusters,
 599                        unsigned int *extent_flags)
 600 {
 601         int ret;
 602         unsigned int uninitialized_var(hole_len), flags = 0;
 603         struct buffer_head *di_bh = NULL;
 604         struct ocfs2_extent_rec rec;
 605 
 606         if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) {
 607                 ret = -ERANGE;
 608                 mlog_errno(ret);
 609                 goto out;
 610         }
 611 
 612         ret = ocfs2_extent_map_lookup(inode, v_cluster, p_cluster,
 613                                       num_clusters, extent_flags);
 614         if (ret == 0)
 615                 goto out;
 616 
 617         ret = ocfs2_read_inode_block(inode, &di_bh);
 618         if (ret) {
 619                 mlog_errno(ret);
 620                 goto out;
 621         }
 622 
 623         ret = ocfs2_get_clusters_nocache(inode, di_bh, v_cluster, &hole_len,
 624                                          &rec, NULL);
 625         if (ret) {
 626                 mlog_errno(ret);
 627                 goto out;
 628         }
 629 
 630         if (rec.e_blkno == 0ULL) {
 631                 /*
 632                  * A hole was found. Return some canned values that
 633                  * callers can key on. If asked for, num_clusters will
 634                  * be populated with the size of the hole.
 635                  */
 636                 *p_cluster = 0;
 637                 if (num_clusters) {
 638                         *num_clusters = hole_len;
 639                 }
 640         } else {
 641                 ocfs2_relative_extent_offsets(inode->i_sb, v_cluster, &rec,
 642                                               p_cluster, num_clusters);
 643                 flags = rec.e_flags;
 644 
 645                 ocfs2_extent_map_insert_rec(inode, &rec);
 646         }
 647 
 648         if (extent_flags)
 649                 *extent_flags = flags;
 650 
 651 out:
 652         brelse(di_bh);
 653         return ret;
 654 }
 655 
 656 /*
 657  * This expects alloc_sem to be held. The allocation cannot change at
 658  * all while the map is in the process of being updated.
 659  */
 660 int ocfs2_extent_map_get_blocks(struct inode *inode, u64 v_blkno, u64 *p_blkno,
 661                                 u64 *ret_count, unsigned int *extent_flags)
 662 {
 663         int ret;
 664         int bpc = ocfs2_clusters_to_blocks(inode->i_sb, 1);
 665         u32 cpos, num_clusters, p_cluster;
 666         u64 boff = 0;
 667 
 668         cpos = ocfs2_blocks_to_clusters(inode->i_sb, v_blkno);
 669 
 670         ret = ocfs2_get_clusters(inode, cpos, &p_cluster, &num_clusters,
 671                                  extent_flags);
 672         if (ret) {
 673                 mlog_errno(ret);
 674                 goto out;
 675         }
 676 
 677         /*
 678          * p_cluster == 0 indicates a hole.
 679          */
 680         if (p_cluster) {
 681                 boff = ocfs2_clusters_to_blocks(inode->i_sb, p_cluster);
 682                 boff += (v_blkno & (u64)(bpc - 1));
 683         }
 684 
 685         *p_blkno = boff;
 686 
 687         if (ret_count) {
 688                 *ret_count = ocfs2_clusters_to_blocks(inode->i_sb, num_clusters);
 689                 *ret_count -= v_blkno & (u64)(bpc - 1);
 690         }
 691 
 692 out:
 693         return ret;
 694 }
 695 
 696 /*
 697  * The ocfs2_fiemap_inline() may be a little bit misleading, since
 698  * it not only handles the fiemap for inlined files, but also deals
 699  * with the fast symlink, cause they have no difference for extent
 700  * mapping per se.
 701  */
 702 static int ocfs2_fiemap_inline(struct inode *inode, struct buffer_head *di_bh,
 703                                struct fiemap_extent_info *fieinfo,
 704                                u64 map_start)
 705 {
 706         int ret;
 707         unsigned int id_count;
 708         struct ocfs2_dinode *di;
 709         u64 phys;
 710         u32 flags = FIEMAP_EXTENT_DATA_INLINE|FIEMAP_EXTENT_LAST;
 711         struct ocfs2_inode_info *oi = OCFS2_I(inode);
 712 
 713         di = (struct ocfs2_dinode *)di_bh->b_data;
 714         if (ocfs2_inode_is_fast_symlink(inode))
 715                 id_count = ocfs2_fast_symlink_chars(inode->i_sb);
 716         else
 717                 id_count = le16_to_cpu(di->id2.i_data.id_count);
 718 
 719         if (map_start < id_count) {
 720                 phys = oi->ip_blkno << inode->i_sb->s_blocksize_bits;
 721                 if (ocfs2_inode_is_fast_symlink(inode))
 722                         phys += offsetof(struct ocfs2_dinode, id2.i_symlink);
 723                 else
 724                         phys += offsetof(struct ocfs2_dinode,
 725                                          id2.i_data.id_data);
 726 
 727                 ret = fiemap_fill_next_extent(fieinfo, 0, phys, id_count,
 728                                               flags);
 729                 if (ret < 0)
 730                         return ret;
 731         }
 732 
 733         return 0;
 734 }
 735 
 736 #define OCFS2_FIEMAP_FLAGS      (FIEMAP_FLAG_SYNC)
 737 
 738 int ocfs2_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
 739                  u64 map_start, u64 map_len)
 740 {
 741         int ret, is_last;
 742         u32 mapping_end, cpos;
 743         unsigned int hole_size;
 744         struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
 745         u64 len_bytes, phys_bytes, virt_bytes;
 746         struct buffer_head *di_bh = NULL;
 747         struct ocfs2_extent_rec rec;
 748 
 749         ret = fiemap_check_flags(fieinfo, OCFS2_FIEMAP_FLAGS);
 750         if (ret)
 751                 return ret;
 752 
 753         ret = ocfs2_inode_lock(inode, &di_bh, 0);
 754         if (ret) {
 755                 mlog_errno(ret);
 756                 goto out;
 757         }
 758 
 759         down_read(&OCFS2_I(inode)->ip_alloc_sem);
 760 
 761         /*
 762          * Handle inline-data and fast symlink separately.
 763          */
 764         if ((OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) ||
 765             ocfs2_inode_is_fast_symlink(inode)) {
 766                 ret = ocfs2_fiemap_inline(inode, di_bh, fieinfo, map_start);
 767                 goto out_unlock;
 768         }
 769 
 770         cpos = map_start >> osb->s_clustersize_bits;
 771         mapping_end = ocfs2_clusters_for_bytes(inode->i_sb,
 772                                                map_start + map_len);
 773         is_last = 0;
 774         while (cpos < mapping_end && !is_last) {
 775                 u32 fe_flags;
 776 
 777                 ret = ocfs2_get_clusters_nocache(inode, di_bh, cpos,
 778                                                  &hole_size, &rec, &is_last);
 779                 if (ret) {
 780                         mlog_errno(ret);
 781                         goto out_unlock;
 782                 }
 783 
 784                 if (rec.e_blkno == 0ULL) {
 785                         cpos += hole_size;
 786                         continue;
 787                 }
 788 
 789                 fe_flags = 0;
 790                 if (rec.e_flags & OCFS2_EXT_UNWRITTEN)
 791                         fe_flags |= FIEMAP_EXTENT_UNWRITTEN;
 792                 if (rec.e_flags & OCFS2_EXT_REFCOUNTED)
 793                         fe_flags |= FIEMAP_EXTENT_SHARED;
 794                 if (is_last)
 795                         fe_flags |= FIEMAP_EXTENT_LAST;
 796                 len_bytes = (u64)le16_to_cpu(rec.e_leaf_clusters) << osb->s_clustersize_bits;
 797                 phys_bytes = le64_to_cpu(rec.e_blkno) << osb->sb->s_blocksize_bits;
 798                 virt_bytes = (u64)le32_to_cpu(rec.e_cpos) << osb->s_clustersize_bits;
 799 
 800                 ret = fiemap_fill_next_extent(fieinfo, virt_bytes, phys_bytes,
 801                                               len_bytes, fe_flags);
 802                 if (ret)
 803                         break;
 804 
 805                 cpos = le32_to_cpu(rec.e_cpos)+ le16_to_cpu(rec.e_leaf_clusters);
 806         }
 807 
 808         if (ret > 0)
 809                 ret = 0;
 810 
 811 out_unlock:
 812         brelse(di_bh);
 813 
 814         up_read(&OCFS2_I(inode)->ip_alloc_sem);
 815 
 816         ocfs2_inode_unlock(inode, 0);
 817 out:
 818 
 819         return ret;
 820 }
 821 
 822 /* Is IO overwriting allocated blocks? */
 823 int ocfs2_overwrite_io(struct inode *inode, struct buffer_head *di_bh,
 824                        u64 map_start, u64 map_len)
 825 {
 826         int ret = 0, is_last;
 827         u32 mapping_end, cpos;
 828         struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
 829         struct ocfs2_extent_rec rec;
 830 
 831         if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) {
 832                 if (ocfs2_size_fits_inline_data(di_bh, map_start + map_len))
 833                         return ret;
 834                 else
 835                         return -EAGAIN;
 836         }
 837 
 838         cpos = map_start >> osb->s_clustersize_bits;
 839         mapping_end = ocfs2_clusters_for_bytes(inode->i_sb,
 840                                                map_start + map_len);
 841         is_last = 0;
 842         while (cpos < mapping_end && !is_last) {
 843                 ret = ocfs2_get_clusters_nocache(inode, di_bh, cpos,
 844                                                  NULL, &rec, &is_last);
 845                 if (ret) {
 846                         mlog_errno(ret);
 847                         goto out;
 848                 }
 849 
 850                 if (rec.e_blkno == 0ULL)
 851                         break;
 852 
 853                 if (rec.e_flags & OCFS2_EXT_REFCOUNTED)
 854                         break;
 855 
 856                 cpos = le32_to_cpu(rec.e_cpos) +
 857                         le16_to_cpu(rec.e_leaf_clusters);
 858         }
 859 
 860         if (cpos < mapping_end)
 861                 ret = -EAGAIN;
 862 out:
 863         return ret;
 864 }
 865 
 866 int ocfs2_seek_data_hole_offset(struct file *file, loff_t *offset, int whence)
 867 {
 868         struct inode *inode = file->f_mapping->host;
 869         int ret;
 870         unsigned int is_last = 0, is_data = 0;
 871         u16 cs_bits = OCFS2_SB(inode->i_sb)->s_clustersize_bits;
 872         u32 cpos, cend, clen, hole_size;
 873         u64 extoff, extlen;
 874         struct buffer_head *di_bh = NULL;
 875         struct ocfs2_extent_rec rec;
 876 
 877         BUG_ON(whence != SEEK_DATA && whence != SEEK_HOLE);
 878 
 879         ret = ocfs2_inode_lock(inode, &di_bh, 0);
 880         if (ret) {
 881                 mlog_errno(ret);
 882                 goto out;
 883         }
 884 
 885         down_read(&OCFS2_I(inode)->ip_alloc_sem);
 886 
 887         if (*offset >= i_size_read(inode)) {
 888                 ret = -ENXIO;
 889                 goto out_unlock;
 890         }
 891 
 892         if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) {
 893                 if (whence == SEEK_HOLE)
 894                         *offset = i_size_read(inode);
 895                 goto out_unlock;
 896         }
 897 
 898         clen = 0;
 899         cpos = *offset >> cs_bits;
 900         cend = ocfs2_clusters_for_bytes(inode->i_sb, i_size_read(inode));
 901 
 902         while (cpos < cend && !is_last) {
 903                 ret = ocfs2_get_clusters_nocache(inode, di_bh, cpos, &hole_size,
 904                                                  &rec, &is_last);
 905                 if (ret) {
 906                         mlog_errno(ret);
 907                         goto out_unlock;
 908                 }
 909 
 910                 extoff = cpos;
 911                 extoff <<= cs_bits;
 912 
 913                 if (rec.e_blkno == 0ULL) {
 914                         clen = hole_size;
 915                         is_data = 0;
 916                 } else {
 917                         clen = le16_to_cpu(rec.e_leaf_clusters) -
 918                                 (cpos - le32_to_cpu(rec.e_cpos));
 919                         is_data = (rec.e_flags & OCFS2_EXT_UNWRITTEN) ?  0 : 1;
 920                 }
 921 
 922                 if ((!is_data && whence == SEEK_HOLE) ||
 923                     (is_data && whence == SEEK_DATA)) {
 924                         if (extoff > *offset)
 925                                 *offset = extoff;
 926                         goto out_unlock;
 927                 }
 928 
 929                 if (!is_last)
 930                         cpos += clen;
 931         }
 932 
 933         if (whence == SEEK_HOLE) {
 934                 extoff = cpos;
 935                 extoff <<= cs_bits;
 936                 extlen = clen;
 937                 extlen <<=  cs_bits;
 938 
 939                 if ((extoff + extlen) > i_size_read(inode))
 940                         extlen = i_size_read(inode) - extoff;
 941                 extoff += extlen;
 942                 if (extoff > *offset)
 943                         *offset = extoff;
 944                 goto out_unlock;
 945         }
 946 
 947         ret = -ENXIO;
 948 
 949 out_unlock:
 950 
 951         brelse(di_bh);
 952 
 953         up_read(&OCFS2_I(inode)->ip_alloc_sem);
 954 
 955         ocfs2_inode_unlock(inode, 0);
 956 out:
 957         return ret;
 958 }
 959 
 960 int ocfs2_read_virt_blocks(struct inode *inode, u64 v_block, int nr,
 961                            struct buffer_head *bhs[], int flags,
 962                            int (*validate)(struct super_block *sb,
 963                                            struct buffer_head *bh))
 964 {
 965         int rc = 0;
 966         u64 p_block, p_count;
 967         int i, count, done = 0;
 968 
 969         trace_ocfs2_read_virt_blocks(
 970              inode, (unsigned long long)v_block, nr, bhs, flags,
 971              validate);
 972 
 973         if (((v_block + nr - 1) << inode->i_sb->s_blocksize_bits) >=
 974             i_size_read(inode)) {
 975                 BUG_ON(!(flags & OCFS2_BH_READAHEAD));
 976                 goto out;
 977         }
 978 
 979         while (done < nr) {
 980                 down_read(&OCFS2_I(inode)->ip_alloc_sem);
 981                 rc = ocfs2_extent_map_get_blocks(inode, v_block + done,
 982                                                  &p_block, &p_count, NULL);
 983                 up_read(&OCFS2_I(inode)->ip_alloc_sem);
 984                 if (rc) {
 985                         mlog_errno(rc);
 986                         break;
 987                 }
 988 
 989                 if (!p_block) {
 990                         rc = -EIO;
 991                         mlog(ML_ERROR,
 992                              "Inode #%llu contains a hole at offset %llu\n",
 993                              (unsigned long long)OCFS2_I(inode)->ip_blkno,
 994                              (unsigned long long)(v_block + done) <<
 995                              inode->i_sb->s_blocksize_bits);
 996                         break;
 997                 }
 998 
 999                 count = nr - done;
1000                 if (p_count < count)
1001                         count = p_count;
1002 
1003                 /*
1004                  * If the caller passed us bhs, they should have come
1005                  * from a previous readahead call to this function.  Thus,
1006                  * they should have the right b_blocknr.
1007                  */
1008                 for (i = 0; i < count; i++) {
1009                         if (!bhs[done + i])
1010                                 continue;
1011                         BUG_ON(bhs[done + i]->b_blocknr != (p_block + i));
1012                 }
1013 
1014                 rc = ocfs2_read_blocks(INODE_CACHE(inode), p_block, count,
1015                                        bhs + done, flags, validate);
1016                 if (rc) {
1017                         mlog_errno(rc);
1018                         break;
1019                 }
1020                 done += count;
1021         }
1022 
1023 out:
1024         return rc;
1025 }
1026 
1027 

/* [<][>][^][v][top][bottom][index][help] */