root/fs/btrfs/qgroup.h

/* [<][>][^][v][top][bottom][index][help] */

INCLUDED FROM


DEFINITIONS

This source file includes following definitions.
  1. btrfs_qgroup_reserve_meta_pertrans
  2. btrfs_qgroup_reserve_meta_prealloc
  3. btrfs_qgroup_free_meta_pertrans
  4. btrfs_qgroup_free_meta_prealloc

   1 /* SPDX-License-Identifier: GPL-2.0 */
   2 /*
   3  * Copyright (C) 2014 Facebook.  All rights reserved.
   4  */
   5 
   6 #ifndef BTRFS_QGROUP_H
   7 #define BTRFS_QGROUP_H
   8 
   9 #include <linux/spinlock.h>
  10 #include <linux/rbtree.h>
  11 #include "ulist.h"
  12 #include "delayed-ref.h"
  13 
  14 /*
  15  * Btrfs qgroup overview
  16  *
  17  * Btrfs qgroup splits into 3 main part:
  18  * 1) Reserve
  19  *    Reserve metadata/data space for incoming operations
  20  *    Affect how qgroup limit works
  21  *
  22  * 2) Trace
  23  *    Tell btrfs qgroup to trace dirty extents.
  24  *
  25  *    Dirty extents including:
  26  *    - Newly allocated extents
  27  *    - Extents going to be deleted (in this trans)
  28  *    - Extents whose owner is going to be modified
  29  *
  30  *    This is the main part affects whether qgroup numbers will stay
  31  *    consistent.
  32  *    Btrfs qgroup can trace clean extents and won't cause any problem,
  33  *    but it will consume extra CPU time, it should be avoided if possible.
  34  *
  35  * 3) Account
  36  *    Btrfs qgroup will updates its numbers, based on dirty extents traced
  37  *    in previous step.
  38  *
  39  *    Normally at qgroup rescan and transaction commit time.
  40  */
  41 
  42 /*
  43  * Special performance optimization for balance.
  44  *
  45  * For balance, we need to swap subtree of subvolume and reloc trees.
  46  * In theory, we need to trace all subtree blocks of both subvolume and reloc
  47  * trees, since their owner has changed during such swap.
  48  *
  49  * However since balance has ensured that both subtrees are containing the
  50  * same contents and have the same tree structures, such swap won't cause
  51  * qgroup number change.
  52  *
  53  * But there is a race window between subtree swap and transaction commit,
  54  * during that window, if we increase/decrease tree level or merge/split tree
  55  * blocks, we still need to trace the original subtrees.
  56  *
  57  * So for balance, we use a delayed subtree tracing, whose workflow is:
  58  *
  59  * 1) Record the subtree root block get swapped.
  60  *
  61  *    During subtree swap:
  62  *    O = Old tree blocks
  63  *    N = New tree blocks
  64  *          reloc tree                     subvolume tree X
  65  *             Root                               Root
  66  *            /    \                             /    \
  67  *          NA     OB                          OA      OB
  68  *        /  |     |  \                      /  |      |  \
  69  *      NC  ND     OE  OF                   OC  OD     OE  OF
  70  *
  71  *   In this case, NA and OA are going to be swapped, record (NA, OA) into
  72  *   subvolume tree X.
  73  *
  74  * 2) After subtree swap.
  75  *          reloc tree                     subvolume tree X
  76  *             Root                               Root
  77  *            /    \                             /    \
  78  *          OA     OB                          NA      OB
  79  *        /  |     |  \                      /  |      |  \
  80  *      OC  OD     OE  OF                   NC  ND     OE  OF
  81  *
  82  * 3a) COW happens for OB
  83  *     If we are going to COW tree block OB, we check OB's bytenr against
  84  *     tree X's swapped_blocks structure.
  85  *     If it doesn't fit any, nothing will happen.
  86  *
  87  * 3b) COW happens for NA
  88  *     Check NA's bytenr against tree X's swapped_blocks, and get a hit.
  89  *     Then we do subtree scan on both subtrees OA and NA.
  90  *     Resulting 6 tree blocks to be scanned (OA, OC, OD, NA, NC, ND).
  91  *
  92  *     Then no matter what we do to subvolume tree X, qgroup numbers will
  93  *     still be correct.
  94  *     Then NA's record gets removed from X's swapped_blocks.
  95  *
  96  * 4)  Transaction commit
  97  *     Any record in X's swapped_blocks gets removed, since there is no
  98  *     modification to the swapped subtrees, no need to trigger heavy qgroup
  99  *     subtree rescan for them.
 100  */
 101 
 102 /*
 103  * Record a dirty extent, and info qgroup to update quota on it
 104  * TODO: Use kmem cache to alloc it.
 105  */
 106 struct btrfs_qgroup_extent_record {
 107         struct rb_node node;
 108         u64 bytenr;
 109         u64 num_bytes;
 110 
 111         /*
 112          * For qgroup reserved data space freeing.
 113          *
 114          * @data_rsv_refroot and @data_rsv will be recorded after
 115          * BTRFS_ADD_DELAYED_EXTENT is called.
 116          * And will be used to free reserved qgroup space at
 117          * transaction commit time.
 118          */
 119         u32 data_rsv;           /* reserved data space needs to be freed */
 120         u64 data_rsv_refroot;   /* which root the reserved data belongs to */
 121         struct ulist *old_roots;
 122 };
 123 
 124 struct btrfs_qgroup_swapped_block {
 125         struct rb_node node;
 126 
 127         int level;
 128         bool trace_leaf;
 129 
 130         /* bytenr/generation of the tree block in subvolume tree after swap */
 131         u64 subvol_bytenr;
 132         u64 subvol_generation;
 133 
 134         /* bytenr/generation of the tree block in reloc tree after swap */
 135         u64 reloc_bytenr;
 136         u64 reloc_generation;
 137 
 138         u64 last_snapshot;
 139         struct btrfs_key first_key;
 140 };
 141 
 142 /*
 143  * Qgroup reservation types:
 144  *
 145  * DATA:
 146  *      space reserved for data
 147  *
 148  * META_PERTRANS:
 149  *      Space reserved for metadata (per-transaction)
 150  *      Due to the fact that qgroup data is only updated at transaction commit
 151  *      time, reserved space for metadata must be kept until transaction
 152  *      commits.
 153  *      Any metadata reserved that are used in btrfs_start_transaction() should
 154  *      be of this type.
 155  *
 156  * META_PREALLOC:
 157  *      There are cases where metadata space is reserved before starting
 158  *      transaction, and then btrfs_join_transaction() to get a trans handle.
 159  *      Any metadata reserved for such usage should be of this type.
 160  *      And after join_transaction() part (or all) of such reservation should
 161  *      be converted into META_PERTRANS.
 162  */
 163 enum btrfs_qgroup_rsv_type {
 164         BTRFS_QGROUP_RSV_DATA,
 165         BTRFS_QGROUP_RSV_META_PERTRANS,
 166         BTRFS_QGROUP_RSV_META_PREALLOC,
 167         BTRFS_QGROUP_RSV_LAST,
 168 };
 169 
 170 /*
 171  * Represents how many bytes we have reserved for this qgroup.
 172  *
 173  * Each type should have different reservation behavior.
 174  * E.g, data follows its io_tree flag modification, while
 175  * *currently* meta is just reserve-and-clear during transaction.
 176  *
 177  * TODO: Add new type for reservation which can survive transaction commit.
 178  * Current metadata reservation behavior is not suitable for such case.
 179  */
 180 struct btrfs_qgroup_rsv {
 181         u64 values[BTRFS_QGROUP_RSV_LAST];
 182 };
 183 
 184 /*
 185  * one struct for each qgroup, organized in fs_info->qgroup_tree.
 186  */
 187 struct btrfs_qgroup {
 188         u64 qgroupid;
 189 
 190         /*
 191          * state
 192          */
 193         u64 rfer;       /* referenced */
 194         u64 rfer_cmpr;  /* referenced compressed */
 195         u64 excl;       /* exclusive */
 196         u64 excl_cmpr;  /* exclusive compressed */
 197 
 198         /*
 199          * limits
 200          */
 201         u64 lim_flags;  /* which limits are set */
 202         u64 max_rfer;
 203         u64 max_excl;
 204         u64 rsv_rfer;
 205         u64 rsv_excl;
 206 
 207         /*
 208          * reservation tracking
 209          */
 210         struct btrfs_qgroup_rsv rsv;
 211 
 212         /*
 213          * lists
 214          */
 215         struct list_head groups;  /* groups this group is member of */
 216         struct list_head members; /* groups that are members of this group */
 217         struct list_head dirty;   /* dirty groups */
 218         struct rb_node node;      /* tree of qgroups */
 219 
 220         /*
 221          * temp variables for accounting operations
 222          * Refer to qgroup_shared_accounting() for details.
 223          */
 224         u64 old_refcnt;
 225         u64 new_refcnt;
 226 };
 227 
 228 /*
 229  * For qgroup event trace points only
 230  */
 231 #define QGROUP_RESERVE          (1<<0)
 232 #define QGROUP_RELEASE          (1<<1)
 233 #define QGROUP_FREE             (1<<2)
 234 
 235 int btrfs_quota_enable(struct btrfs_fs_info *fs_info);
 236 int btrfs_quota_disable(struct btrfs_fs_info *fs_info);
 237 int btrfs_qgroup_rescan(struct btrfs_fs_info *fs_info);
 238 void btrfs_qgroup_rescan_resume(struct btrfs_fs_info *fs_info);
 239 int btrfs_qgroup_wait_for_completion(struct btrfs_fs_info *fs_info,
 240                                      bool interruptible);
 241 int btrfs_add_qgroup_relation(struct btrfs_trans_handle *trans, u64 src,
 242                               u64 dst);
 243 int btrfs_del_qgroup_relation(struct btrfs_trans_handle *trans, u64 src,
 244                               u64 dst);
 245 int btrfs_create_qgroup(struct btrfs_trans_handle *trans, u64 qgroupid);
 246 int btrfs_remove_qgroup(struct btrfs_trans_handle *trans, u64 qgroupid);
 247 int btrfs_limit_qgroup(struct btrfs_trans_handle *trans, u64 qgroupid,
 248                        struct btrfs_qgroup_limit *limit);
 249 int btrfs_read_qgroup_config(struct btrfs_fs_info *fs_info);
 250 void btrfs_free_qgroup_config(struct btrfs_fs_info *fs_info);
 251 struct btrfs_delayed_extent_op;
 252 
 253 /*
 254  * Inform qgroup to trace one dirty extent, its info is recorded in @record.
 255  * So qgroup can account it at transaction committing time.
 256  *
 257  * No lock version, caller must acquire delayed ref lock and allocated memory,
 258  * then call btrfs_qgroup_trace_extent_post() after exiting lock context.
 259  *
 260  * Return 0 for success insert
 261  * Return >0 for existing record, caller can free @record safely.
 262  * Error is not possible
 263  */
 264 int btrfs_qgroup_trace_extent_nolock(
 265                 struct btrfs_fs_info *fs_info,
 266                 struct btrfs_delayed_ref_root *delayed_refs,
 267                 struct btrfs_qgroup_extent_record *record);
 268 
 269 /*
 270  * Post handler after qgroup_trace_extent_nolock().
 271  *
 272  * NOTE: Current qgroup does the expensive backref walk at transaction
 273  * committing time with TRANS_STATE_COMMIT_DOING, this blocks incoming
 274  * new transaction.
 275  * This is designed to allow btrfs_find_all_roots() to get correct new_roots
 276  * result.
 277  *
 278  * However for old_roots there is no need to do backref walk at that time,
 279  * since we search commit roots to walk backref and result will always be
 280  * correct.
 281  *
 282  * Due to the nature of no lock version, we can't do backref there.
 283  * So we must call btrfs_qgroup_trace_extent_post() after exiting
 284  * spinlock context.
 285  *
 286  * TODO: If we can fix and prove btrfs_find_all_roots() can get correct result
 287  * using current root, then we can move all expensive backref walk out of
 288  * transaction committing, but not now as qgroup accounting will be wrong again.
 289  */
 290 int btrfs_qgroup_trace_extent_post(struct btrfs_fs_info *fs_info,
 291                                    struct btrfs_qgroup_extent_record *qrecord);
 292 
 293 /*
 294  * Inform qgroup to trace one dirty extent, specified by @bytenr and
 295  * @num_bytes.
 296  * So qgroup can account it at commit trans time.
 297  *
 298  * Better encapsulated version, with memory allocation and backref walk for
 299  * commit roots.
 300  * So this can sleep.
 301  *
 302  * Return 0 if the operation is done.
 303  * Return <0 for error, like memory allocation failure or invalid parameter
 304  * (NULL trans)
 305  */
 306 int btrfs_qgroup_trace_extent(struct btrfs_trans_handle *trans, u64 bytenr,
 307                               u64 num_bytes, gfp_t gfp_flag);
 308 
 309 /*
 310  * Inform qgroup to trace all leaf items of data
 311  *
 312  * Return 0 for success
 313  * Return <0 for error(ENOMEM)
 314  */
 315 int btrfs_qgroup_trace_leaf_items(struct btrfs_trans_handle *trans,
 316                                   struct extent_buffer *eb);
 317 /*
 318  * Inform qgroup to trace a whole subtree, including all its child tree
 319  * blocks and data.
 320  * The root tree block is specified by @root_eb.
 321  *
 322  * Normally used by relocation(tree block swap) and subvolume deletion.
 323  *
 324  * Return 0 for success
 325  * Return <0 for error(ENOMEM or tree search error)
 326  */
 327 int btrfs_qgroup_trace_subtree(struct btrfs_trans_handle *trans,
 328                                struct extent_buffer *root_eb,
 329                                u64 root_gen, int root_level);
 330 int btrfs_qgroup_account_extent(struct btrfs_trans_handle *trans, u64 bytenr,
 331                                 u64 num_bytes, struct ulist *old_roots,
 332                                 struct ulist *new_roots);
 333 int btrfs_qgroup_account_extents(struct btrfs_trans_handle *trans);
 334 int btrfs_run_qgroups(struct btrfs_trans_handle *trans);
 335 int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans, u64 srcid,
 336                          u64 objectid, struct btrfs_qgroup_inherit *inherit);
 337 void btrfs_qgroup_free_refroot(struct btrfs_fs_info *fs_info,
 338                                u64 ref_root, u64 num_bytes,
 339                                enum btrfs_qgroup_rsv_type type);
 340 
 341 #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
 342 int btrfs_verify_qgroup_counts(struct btrfs_fs_info *fs_info, u64 qgroupid,
 343                                u64 rfer, u64 excl);
 344 #endif
 345 
 346 /* New io_tree based accurate qgroup reserve API */
 347 int btrfs_qgroup_reserve_data(struct inode *inode,
 348                         struct extent_changeset **reserved, u64 start, u64 len);
 349 int btrfs_qgroup_release_data(struct inode *inode, u64 start, u64 len);
 350 int btrfs_qgroup_free_data(struct inode *inode,
 351                         struct extent_changeset *reserved, u64 start, u64 len);
 352 
 353 int __btrfs_qgroup_reserve_meta(struct btrfs_root *root, int num_bytes,
 354                                 enum btrfs_qgroup_rsv_type type, bool enforce);
 355 /* Reserve metadata space for pertrans and prealloc type */
 356 static inline int btrfs_qgroup_reserve_meta_pertrans(struct btrfs_root *root,
 357                                 int num_bytes, bool enforce)
 358 {
 359         return __btrfs_qgroup_reserve_meta(root, num_bytes,
 360                         BTRFS_QGROUP_RSV_META_PERTRANS, enforce);
 361 }
 362 static inline int btrfs_qgroup_reserve_meta_prealloc(struct btrfs_root *root,
 363                                 int num_bytes, bool enforce)
 364 {
 365         return __btrfs_qgroup_reserve_meta(root, num_bytes,
 366                         BTRFS_QGROUP_RSV_META_PREALLOC, enforce);
 367 }
 368 
 369 void __btrfs_qgroup_free_meta(struct btrfs_root *root, int num_bytes,
 370                              enum btrfs_qgroup_rsv_type type);
 371 
 372 /* Free per-transaction meta reservation for error handling */
 373 static inline void btrfs_qgroup_free_meta_pertrans(struct btrfs_root *root,
 374                                                    int num_bytes)
 375 {
 376         __btrfs_qgroup_free_meta(root, num_bytes,
 377                         BTRFS_QGROUP_RSV_META_PERTRANS);
 378 }
 379 
 380 /* Pre-allocated meta reservation can be freed at need */
 381 static inline void btrfs_qgroup_free_meta_prealloc(struct btrfs_root *root,
 382                                                    int num_bytes)
 383 {
 384         __btrfs_qgroup_free_meta(root, num_bytes,
 385                         BTRFS_QGROUP_RSV_META_PREALLOC);
 386 }
 387 
 388 /*
 389  * Per-transaction meta reservation should be all freed at transaction commit
 390  * time
 391  */
 392 void btrfs_qgroup_free_meta_all_pertrans(struct btrfs_root *root);
 393 
 394 /*
 395  * Convert @num_bytes of META_PREALLOCATED reservation to META_PERTRANS.
 396  *
 397  * This is called when preallocated meta reservation needs to be used.
 398  * Normally after btrfs_join_transaction() call.
 399  */
 400 void btrfs_qgroup_convert_reserved_meta(struct btrfs_root *root, int num_bytes);
 401 
 402 void btrfs_qgroup_check_reserved_leak(struct inode *inode);
 403 
 404 /* btrfs_qgroup_swapped_blocks related functions */
 405 void btrfs_qgroup_init_swapped_blocks(
 406         struct btrfs_qgroup_swapped_blocks *swapped_blocks);
 407 
 408 void btrfs_qgroup_clean_swapped_blocks(struct btrfs_root *root);
 409 int btrfs_qgroup_add_swapped_blocks(struct btrfs_trans_handle *trans,
 410                 struct btrfs_root *subvol_root,
 411                 struct btrfs_block_group_cache *bg,
 412                 struct extent_buffer *subvol_parent, int subvol_slot,
 413                 struct extent_buffer *reloc_parent, int reloc_slot,
 414                 u64 last_snapshot);
 415 int btrfs_qgroup_trace_subtree_after_cow(struct btrfs_trans_handle *trans,
 416                 struct btrfs_root *root, struct extent_buffer *eb);
 417 void btrfs_qgroup_destroy_extent_records(struct btrfs_transaction *trans);
 418 
 419 #endif

/* [<][>][^][v][top][bottom][index][help] */