root/fs/ubifs/shrinker.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. shrink_tnc
  2. shrink_tnc_trees
  3. kick_a_thread
  4. ubifs_shrink_count
  5. ubifs_shrink_scan

   1 // SPDX-License-Identifier: GPL-2.0-only
   2 /*
   3  * This file is part of UBIFS.
   4  *
   5  * Copyright (C) 2006-2008 Nokia Corporation.
   6  *
   7  * Authors: Artem Bityutskiy (Битюцкий Артём)
   8  *          Adrian Hunter
   9  */
  10 
  11 /*
  12  * This file implements UBIFS shrinker which evicts clean znodes from the TNC
  13  * tree when Linux VM needs more RAM.
  14  *
  15  * We do not implement any LRU lists to find oldest znodes to free because it
  16  * would add additional overhead to the file system fast paths. So the shrinker
  17  * just walks the TNC tree when searching for znodes to free.
  18  *
  19  * If the root of a TNC sub-tree is clean and old enough, then the children are
  20  * also clean and old enough. So the shrinker walks the TNC in level order and
  21  * dumps entire sub-trees.
  22  *
  23  * The age of znodes is just the time-stamp when they were last looked at.
  24  * The current shrinker first tries to evict old znodes, then young ones.
  25  *
  26  * Since the shrinker is global, it has to protect against races with FS
  27  * un-mounts, which is done by the 'ubifs_infos_lock' and 'c->umount_mutex'.
  28  */
  29 
  30 #include "ubifs.h"
  31 
  32 /* List of all UBIFS file-system instances */
  33 LIST_HEAD(ubifs_infos);
  34 
  35 /*
  36  * We number each shrinker run and record the number on the ubifs_info structure
  37  * so that we can easily work out which ubifs_info structures have already been
  38  * done by the current run.
  39  */
  40 static unsigned int shrinker_run_no;
  41 
  42 /* Protects 'ubifs_infos' list */
  43 DEFINE_SPINLOCK(ubifs_infos_lock);
  44 
  45 /* Global clean znode counter (for all mounted UBIFS instances) */
  46 atomic_long_t ubifs_clean_zn_cnt;
  47 
  48 /**
  49  * shrink_tnc - shrink TNC tree.
  50  * @c: UBIFS file-system description object
  51  * @nr: number of znodes to free
  52  * @age: the age of znodes to free
  53  * @contention: if any contention, this is set to %1
  54  *
  55  * This function traverses TNC tree and frees clean znodes. It does not free
  56  * clean znodes which younger then @age. Returns number of freed znodes.
  57  */
  58 static int shrink_tnc(struct ubifs_info *c, int nr, int age, int *contention)
  59 {
  60         int total_freed = 0;
  61         struct ubifs_znode *znode, *zprev;
  62         time64_t time = ktime_get_seconds();
  63 
  64         ubifs_assert(c, mutex_is_locked(&c->umount_mutex));
  65         ubifs_assert(c, mutex_is_locked(&c->tnc_mutex));
  66 
  67         if (!c->zroot.znode || atomic_long_read(&c->clean_zn_cnt) == 0)
  68                 return 0;
  69 
  70         /*
  71          * Traverse the TNC tree in levelorder manner, so that it is possible
  72          * to destroy large sub-trees. Indeed, if a znode is old, then all its
  73          * children are older or of the same age.
  74          *
  75          * Note, we are holding 'c->tnc_mutex', so we do not have to lock the
  76          * 'c->space_lock' when _reading_ 'c->clean_zn_cnt', because it is
  77          * changed only when the 'c->tnc_mutex' is held.
  78          */
  79         zprev = NULL;
  80         znode = ubifs_tnc_levelorder_next(c, c->zroot.znode, NULL);
  81         while (znode && total_freed < nr &&
  82                atomic_long_read(&c->clean_zn_cnt) > 0) {
  83                 int freed;
  84 
  85                 /*
  86                  * If the znode is clean, but it is in the 'c->cnext' list, this
  87                  * means that this znode has just been written to flash as a
  88                  * part of commit and was marked clean. They will be removed
  89                  * from the list at end commit. We cannot change the list,
  90                  * because it is not protected by any mutex (design decision to
  91                  * make commit really independent and parallel to main I/O). So
  92                  * we just skip these znodes.
  93                  *
  94                  * Note, the 'clean_zn_cnt' counters are not updated until
  95                  * after the commit, so the UBIFS shrinker does not report
  96                  * the znodes which are in the 'c->cnext' list as freeable.
  97                  *
  98                  * Also note, if the root of a sub-tree is not in 'c->cnext',
  99                  * then the whole sub-tree is not in 'c->cnext' as well, so it
 100                  * is safe to dump whole sub-tree.
 101                  */
 102 
 103                 if (znode->cnext) {
 104                         /*
 105                          * Very soon these znodes will be removed from the list
 106                          * and become freeable.
 107                          */
 108                         *contention = 1;
 109                 } else if (!ubifs_zn_dirty(znode) &&
 110                            abs(time - znode->time) >= age) {
 111                         if (znode->parent)
 112                                 znode->parent->zbranch[znode->iip].znode = NULL;
 113                         else
 114                                 c->zroot.znode = NULL;
 115 
 116                         freed = ubifs_destroy_tnc_subtree(c, znode);
 117                         atomic_long_sub(freed, &ubifs_clean_zn_cnt);
 118                         atomic_long_sub(freed, &c->clean_zn_cnt);
 119                         total_freed += freed;
 120                         znode = zprev;
 121                 }
 122 
 123                 if (unlikely(!c->zroot.znode))
 124                         break;
 125 
 126                 zprev = znode;
 127                 znode = ubifs_tnc_levelorder_next(c, c->zroot.znode, znode);
 128                 cond_resched();
 129         }
 130 
 131         return total_freed;
 132 }
 133 
 134 /**
 135  * shrink_tnc_trees - shrink UBIFS TNC trees.
 136  * @nr: number of znodes to free
 137  * @age: the age of znodes to free
 138  * @contention: if any contention, this is set to %1
 139  *
 140  * This function walks the list of mounted UBIFS file-systems and frees clean
 141  * znodes which are older than @age, until at least @nr znodes are freed.
 142  * Returns the number of freed znodes.
 143  */
 144 static int shrink_tnc_trees(int nr, int age, int *contention)
 145 {
 146         struct ubifs_info *c;
 147         struct list_head *p;
 148         unsigned int run_no;
 149         int freed = 0;
 150 
 151         spin_lock(&ubifs_infos_lock);
 152         do {
 153                 run_no = ++shrinker_run_no;
 154         } while (run_no == 0);
 155         /* Iterate over all mounted UBIFS file-systems and try to shrink them */
 156         p = ubifs_infos.next;
 157         while (p != &ubifs_infos) {
 158                 c = list_entry(p, struct ubifs_info, infos_list);
 159                 /*
 160                  * We move the ones we do to the end of the list, so we stop
 161                  * when we see one we have already done.
 162                  */
 163                 if (c->shrinker_run_no == run_no)
 164                         break;
 165                 if (!mutex_trylock(&c->umount_mutex)) {
 166                         /* Some un-mount is in progress, try next FS */
 167                         *contention = 1;
 168                         p = p->next;
 169                         continue;
 170                 }
 171                 /*
 172                  * We're holding 'c->umount_mutex', so the file-system won't go
 173                  * away.
 174                  */
 175                 if (!mutex_trylock(&c->tnc_mutex)) {
 176                         mutex_unlock(&c->umount_mutex);
 177                         *contention = 1;
 178                         p = p->next;
 179                         continue;
 180                 }
 181                 spin_unlock(&ubifs_infos_lock);
 182                 /*
 183                  * OK, now we have TNC locked, the file-system cannot go away -
 184                  * it is safe to reap the cache.
 185                  */
 186                 c->shrinker_run_no = run_no;
 187                 freed += shrink_tnc(c, nr, age, contention);
 188                 mutex_unlock(&c->tnc_mutex);
 189                 spin_lock(&ubifs_infos_lock);
 190                 /* Get the next list element before we move this one */
 191                 p = p->next;
 192                 /*
 193                  * Move this one to the end of the list to provide some
 194                  * fairness.
 195                  */
 196                 list_move_tail(&c->infos_list, &ubifs_infos);
 197                 mutex_unlock(&c->umount_mutex);
 198                 if (freed >= nr)
 199                         break;
 200         }
 201         spin_unlock(&ubifs_infos_lock);
 202         return freed;
 203 }
 204 
 205 /**
 206  * kick_a_thread - kick a background thread to start commit.
 207  *
 208  * This function kicks a background thread to start background commit. Returns
 209  * %-1 if a thread was kicked or there is another reason to assume the memory
 210  * will soon be freed or become freeable. If there are no dirty znodes, returns
 211  * %0.
 212  */
 213 static int kick_a_thread(void)
 214 {
 215         int i;
 216         struct ubifs_info *c;
 217 
 218         /*
 219          * Iterate over all mounted UBIFS file-systems and find out if there is
 220          * already an ongoing commit operation there. If no, then iterate for
 221          * the second time and initiate background commit.
 222          */
 223         spin_lock(&ubifs_infos_lock);
 224         for (i = 0; i < 2; i++) {
 225                 list_for_each_entry(c, &ubifs_infos, infos_list) {
 226                         long dirty_zn_cnt;
 227 
 228                         if (!mutex_trylock(&c->umount_mutex)) {
 229                                 /*
 230                                  * Some un-mount is in progress, it will
 231                                  * certainly free memory, so just return.
 232                                  */
 233                                 spin_unlock(&ubifs_infos_lock);
 234                                 return -1;
 235                         }
 236 
 237                         dirty_zn_cnt = atomic_long_read(&c->dirty_zn_cnt);
 238 
 239                         if (!dirty_zn_cnt || c->cmt_state == COMMIT_BROKEN ||
 240                             c->ro_mount || c->ro_error) {
 241                                 mutex_unlock(&c->umount_mutex);
 242                                 continue;
 243                         }
 244 
 245                         if (c->cmt_state != COMMIT_RESTING) {
 246                                 spin_unlock(&ubifs_infos_lock);
 247                                 mutex_unlock(&c->umount_mutex);
 248                                 return -1;
 249                         }
 250 
 251                         if (i == 1) {
 252                                 list_move_tail(&c->infos_list, &ubifs_infos);
 253                                 spin_unlock(&ubifs_infos_lock);
 254 
 255                                 ubifs_request_bg_commit(c);
 256                                 mutex_unlock(&c->umount_mutex);
 257                                 return -1;
 258                         }
 259                         mutex_unlock(&c->umount_mutex);
 260                 }
 261         }
 262         spin_unlock(&ubifs_infos_lock);
 263 
 264         return 0;
 265 }
 266 
 267 unsigned long ubifs_shrink_count(struct shrinker *shrink,
 268                                  struct shrink_control *sc)
 269 {
 270         long clean_zn_cnt = atomic_long_read(&ubifs_clean_zn_cnt);
 271 
 272         /*
 273          * Due to the way UBIFS updates the clean znode counter it may
 274          * temporarily be negative.
 275          */
 276         return clean_zn_cnt >= 0 ? clean_zn_cnt : 1;
 277 }
 278 
 279 unsigned long ubifs_shrink_scan(struct shrinker *shrink,
 280                                 struct shrink_control *sc)
 281 {
 282         unsigned long nr = sc->nr_to_scan;
 283         int contention = 0;
 284         unsigned long freed;
 285         long clean_zn_cnt = atomic_long_read(&ubifs_clean_zn_cnt);
 286 
 287         if (!clean_zn_cnt) {
 288                 /*
 289                  * No clean znodes, nothing to reap. All we can do in this case
 290                  * is to kick background threads to start commit, which will
 291                  * probably make clean znodes which, in turn, will be freeable.
 292                  * And we return -1 which means will make VM call us again
 293                  * later.
 294                  */
 295                 dbg_tnc("no clean znodes, kick a thread");
 296                 return kick_a_thread();
 297         }
 298 
 299         freed = shrink_tnc_trees(nr, OLD_ZNODE_AGE, &contention);
 300         if (freed >= nr)
 301                 goto out;
 302 
 303         dbg_tnc("not enough old znodes, try to free young ones");
 304         freed += shrink_tnc_trees(nr - freed, YOUNG_ZNODE_AGE, &contention);
 305         if (freed >= nr)
 306                 goto out;
 307 
 308         dbg_tnc("not enough young znodes, free all");
 309         freed += shrink_tnc_trees(nr - freed, 0, &contention);
 310 
 311         if (!freed && contention) {
 312                 dbg_tnc("freed nothing, but contention");
 313                 return SHRINK_STOP;
 314         }
 315 
 316 out:
 317         dbg_tnc("%lu znodes were freed, requested %lu", freed, nr);
 318         return freed;
 319 }

/* [<][>][^][v][top][bottom][index][help] */