root/fs/erofs/utils.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. erofs_allocpage
  2. erofs_get_pcpubuf
  3. erofs_workgroup_get
  4. erofs_find_workgroup
  5. erofs_register_workgroup
  6. __erofs_workgroup_free
  7. erofs_workgroup_put
  8. erofs_workgroup_unfreeze_final
  9. erofs_try_to_release_workgroup
  10. erofs_shrink_workstation
  11. erofs_shrinker_register
  12. erofs_shrinker_unregister
  13. erofs_shrink_count
  14. erofs_shrink_scan
  15. erofs_init_shrinker
  16. erofs_exit_shrinker

   1 // SPDX-License-Identifier: GPL-2.0-only
   2 /*
   3  * Copyright (C) 2018 HUAWEI, Inc.
   4  *             http://www.huawei.com/
   5  * Created by Gao Xiang <gaoxiang25@huawei.com>
   6  */
   7 #include "internal.h"
   8 #include <linux/pagevec.h>
   9 
  10 struct page *erofs_allocpage(struct list_head *pool, gfp_t gfp, bool nofail)
  11 {
  12         struct page *page;
  13 
  14         if (!list_empty(pool)) {
  15                 page = lru_to_page(pool);
  16                 DBG_BUGON(page_ref_count(page) != 1);
  17                 list_del(&page->lru);
  18         } else {
  19                 page = alloc_pages(gfp | (nofail ? __GFP_NOFAIL : 0), 0);
  20         }
  21         return page;
  22 }
  23 
  24 #if (EROFS_PCPUBUF_NR_PAGES > 0)
  25 static struct {
  26         u8 data[PAGE_SIZE * EROFS_PCPUBUF_NR_PAGES];
  27 } ____cacheline_aligned_in_smp erofs_pcpubuf[NR_CPUS];
  28 
  29 void *erofs_get_pcpubuf(unsigned int pagenr)
  30 {
  31         preempt_disable();
  32         return &erofs_pcpubuf[smp_processor_id()].data[pagenr * PAGE_SIZE];
  33 }
  34 #endif
  35 
  36 #ifdef CONFIG_EROFS_FS_ZIP
  37 /* global shrink count (for all mounted EROFS instances) */
  38 static atomic_long_t erofs_global_shrink_cnt;
  39 
  40 #define __erofs_workgroup_get(grp)      atomic_inc(&(grp)->refcount)
  41 #define __erofs_workgroup_put(grp)      atomic_dec(&(grp)->refcount)
  42 
  43 static int erofs_workgroup_get(struct erofs_workgroup *grp)
  44 {
  45         int o;
  46 
  47 repeat:
  48         o = erofs_wait_on_workgroup_freezed(grp);
  49         if (o <= 0)
  50                 return -1;
  51 
  52         if (atomic_cmpxchg(&grp->refcount, o, o + 1) != o)
  53                 goto repeat;
  54 
  55         /* decrease refcount paired by erofs_workgroup_put */
  56         if (o == 1)
  57                 atomic_long_dec(&erofs_global_shrink_cnt);
  58         return 0;
  59 }
  60 
  61 struct erofs_workgroup *erofs_find_workgroup(struct super_block *sb,
  62                                              pgoff_t index, bool *tag)
  63 {
  64         struct erofs_sb_info *sbi = EROFS_SB(sb);
  65         struct erofs_workgroup *grp;
  66 
  67 repeat:
  68         rcu_read_lock();
  69         grp = radix_tree_lookup(&sbi->workstn_tree, index);
  70         if (grp) {
  71                 *tag = xa_pointer_tag(grp);
  72                 grp = xa_untag_pointer(grp);
  73 
  74                 if (erofs_workgroup_get(grp)) {
  75                         /* prefer to relax rcu read side */
  76                         rcu_read_unlock();
  77                         goto repeat;
  78                 }
  79 
  80                 DBG_BUGON(index != grp->index);
  81         }
  82         rcu_read_unlock();
  83         return grp;
  84 }
  85 
  86 int erofs_register_workgroup(struct super_block *sb,
  87                              struct erofs_workgroup *grp,
  88                              bool tag)
  89 {
  90         struct erofs_sb_info *sbi;
  91         int err;
  92 
  93         /* grp shouldn't be broken or used before */
  94         if (atomic_read(&grp->refcount) != 1) {
  95                 DBG_BUGON(1);
  96                 return -EINVAL;
  97         }
  98 
  99         err = radix_tree_preload(GFP_NOFS);
 100         if (err)
 101                 return err;
 102 
 103         sbi = EROFS_SB(sb);
 104         xa_lock(&sbi->workstn_tree);
 105 
 106         grp = xa_tag_pointer(grp, tag);
 107 
 108         /*
 109          * Bump up reference count before making this workgroup
 110          * visible to other users in order to avoid potential UAF
 111          * without serialized by workstn_lock.
 112          */
 113         __erofs_workgroup_get(grp);
 114 
 115         err = radix_tree_insert(&sbi->workstn_tree, grp->index, grp);
 116         if (err)
 117                 /*
 118                  * it's safe to decrease since the workgroup isn't visible
 119                  * and refcount >= 2 (cannot be freezed).
 120                  */
 121                 __erofs_workgroup_put(grp);
 122 
 123         xa_unlock(&sbi->workstn_tree);
 124         radix_tree_preload_end();
 125         return err;
 126 }
 127 
 128 static void  __erofs_workgroup_free(struct erofs_workgroup *grp)
 129 {
 130         atomic_long_dec(&erofs_global_shrink_cnt);
 131         erofs_workgroup_free_rcu(grp);
 132 }
 133 
 134 int erofs_workgroup_put(struct erofs_workgroup *grp)
 135 {
 136         int count = atomic_dec_return(&grp->refcount);
 137 
 138         if (count == 1)
 139                 atomic_long_inc(&erofs_global_shrink_cnt);
 140         else if (!count)
 141                 __erofs_workgroup_free(grp);
 142         return count;
 143 }
 144 
 145 static void erofs_workgroup_unfreeze_final(struct erofs_workgroup *grp)
 146 {
 147         erofs_workgroup_unfreeze(grp, 0);
 148         __erofs_workgroup_free(grp);
 149 }
 150 
 151 static bool erofs_try_to_release_workgroup(struct erofs_sb_info *sbi,
 152                                            struct erofs_workgroup *grp,
 153                                            bool cleanup)
 154 {
 155         /*
 156          * If managed cache is on, refcount of workgroups
 157          * themselves could be < 0 (freezed). In other words,
 158          * there is no guarantee that all refcounts > 0.
 159          */
 160         if (!erofs_workgroup_try_to_freeze(grp, 1))
 161                 return false;
 162 
 163         /*
 164          * Note that all cached pages should be unattached
 165          * before deleted from the radix tree. Otherwise some
 166          * cached pages could be still attached to the orphan
 167          * old workgroup when the new one is available in the tree.
 168          */
 169         if (erofs_try_to_free_all_cached_pages(sbi, grp)) {
 170                 erofs_workgroup_unfreeze(grp, 1);
 171                 return false;
 172         }
 173 
 174         /*
 175          * It's impossible to fail after the workgroup is freezed,
 176          * however in order to avoid some race conditions, add a
 177          * DBG_BUGON to observe this in advance.
 178          */
 179         DBG_BUGON(xa_untag_pointer(radix_tree_delete(&sbi->workstn_tree,
 180                                                      grp->index)) != grp);
 181 
 182         /*
 183          * If managed cache is on, last refcount should indicate
 184          * the related workstation.
 185          */
 186         erofs_workgroup_unfreeze_final(grp);
 187         return true;
 188 }
 189 
 190 static unsigned long erofs_shrink_workstation(struct erofs_sb_info *sbi,
 191                                               unsigned long nr_shrink,
 192                                               bool cleanup)
 193 {
 194         pgoff_t first_index = 0;
 195         void *batch[PAGEVEC_SIZE];
 196         unsigned int freed = 0;
 197 
 198         int i, found;
 199 repeat:
 200         xa_lock(&sbi->workstn_tree);
 201 
 202         found = radix_tree_gang_lookup(&sbi->workstn_tree,
 203                                        batch, first_index, PAGEVEC_SIZE);
 204 
 205         for (i = 0; i < found; ++i) {
 206                 struct erofs_workgroup *grp = xa_untag_pointer(batch[i]);
 207 
 208                 first_index = grp->index + 1;
 209 
 210                 /* try to shrink each valid workgroup */
 211                 if (!erofs_try_to_release_workgroup(sbi, grp, cleanup))
 212                         continue;
 213 
 214                 ++freed;
 215                 if (!--nr_shrink)
 216                         break;
 217         }
 218         xa_unlock(&sbi->workstn_tree);
 219 
 220         if (i && nr_shrink)
 221                 goto repeat;
 222         return freed;
 223 }
 224 
 225 /* protected by 'erofs_sb_list_lock' */
 226 static unsigned int shrinker_run_no;
 227 
 228 /* protects the mounted 'erofs_sb_list' */
 229 static DEFINE_SPINLOCK(erofs_sb_list_lock);
 230 static LIST_HEAD(erofs_sb_list);
 231 
 232 void erofs_shrinker_register(struct super_block *sb)
 233 {
 234         struct erofs_sb_info *sbi = EROFS_SB(sb);
 235 
 236         mutex_init(&sbi->umount_mutex);
 237 
 238         spin_lock(&erofs_sb_list_lock);
 239         list_add(&sbi->list, &erofs_sb_list);
 240         spin_unlock(&erofs_sb_list_lock);
 241 }
 242 
 243 void erofs_shrinker_unregister(struct super_block *sb)
 244 {
 245         struct erofs_sb_info *const sbi = EROFS_SB(sb);
 246 
 247         mutex_lock(&sbi->umount_mutex);
 248         erofs_shrink_workstation(sbi, ~0UL, true);
 249 
 250         spin_lock(&erofs_sb_list_lock);
 251         list_del(&sbi->list);
 252         spin_unlock(&erofs_sb_list_lock);
 253         mutex_unlock(&sbi->umount_mutex);
 254 }
 255 
 256 static unsigned long erofs_shrink_count(struct shrinker *shrink,
 257                                         struct shrink_control *sc)
 258 {
 259         return atomic_long_read(&erofs_global_shrink_cnt);
 260 }
 261 
 262 static unsigned long erofs_shrink_scan(struct shrinker *shrink,
 263                                        struct shrink_control *sc)
 264 {
 265         struct erofs_sb_info *sbi;
 266         struct list_head *p;
 267 
 268         unsigned long nr = sc->nr_to_scan;
 269         unsigned int run_no;
 270         unsigned long freed = 0;
 271 
 272         spin_lock(&erofs_sb_list_lock);
 273         do {
 274                 run_no = ++shrinker_run_no;
 275         } while (run_no == 0);
 276 
 277         /* Iterate over all mounted superblocks and try to shrink them */
 278         p = erofs_sb_list.next;
 279         while (p != &erofs_sb_list) {
 280                 sbi = list_entry(p, struct erofs_sb_info, list);
 281 
 282                 /*
 283                  * We move the ones we do to the end of the list, so we stop
 284                  * when we see one we have already done.
 285                  */
 286                 if (sbi->shrinker_run_no == run_no)
 287                         break;
 288 
 289                 if (!mutex_trylock(&sbi->umount_mutex)) {
 290                         p = p->next;
 291                         continue;
 292                 }
 293 
 294                 spin_unlock(&erofs_sb_list_lock);
 295                 sbi->shrinker_run_no = run_no;
 296 
 297                 freed += erofs_shrink_workstation(sbi, nr - freed, false);
 298 
 299                 spin_lock(&erofs_sb_list_lock);
 300                 /* Get the next list element before we move this one */
 301                 p = p->next;
 302 
 303                 /*
 304                  * Move this one to the end of the list to provide some
 305                  * fairness.
 306                  */
 307                 list_move_tail(&sbi->list, &erofs_sb_list);
 308                 mutex_unlock(&sbi->umount_mutex);
 309 
 310                 if (freed >= nr)
 311                         break;
 312         }
 313         spin_unlock(&erofs_sb_list_lock);
 314         return freed;
 315 }
 316 
 317 static struct shrinker erofs_shrinker_info = {
 318         .scan_objects = erofs_shrink_scan,
 319         .count_objects = erofs_shrink_count,
 320         .seeks = DEFAULT_SEEKS,
 321 };
 322 
 323 int __init erofs_init_shrinker(void)
 324 {
 325         return register_shrinker(&erofs_shrinker_info);
 326 }
 327 
 328 void erofs_exit_shrinker(void)
 329 {
 330         unregister_shrinker(&erofs_shrinker_info);
 331 }
 332 #endif  /* !CONFIG_EROFS_FS_ZIP */
 333 

/* [<][>][^][v][top][bottom][index][help] */