1/*
2 * linux/fs/mbcache.c
3 * (C) 2001-2002 Andreas Gruenbacher, <a.gruenbacher@computer.org>
4 */
5
6/*
7 * Filesystem Meta Information Block Cache (mbcache)
8 *
9 * The mbcache caches blocks of block devices that need to be located
10 * by their device/block number, as well as by other criteria (such
11 * as the block's contents).
12 *
13 * There can only be one cache entry in a cache per device and block number.
14 * Additional indexes need not be unique in this sense. The number of
15 * additional indexes (=other criteria) can be hardwired at compile time
16 * or specified at cache create time.
17 *
18 * Each cache entry is of fixed size. An entry may be `valid' or `invalid'
19 * in the cache. A valid entry is in the main hash tables of the cache,
20 * and may also be in the lru list. An invalid entry is not in any hashes
21 * or lists.
22 *
23 * A valid cache entry is only in the lru list if no handles refer to it.
24 * Invalid cache entries will be freed when the last handle to the cache
25 * entry is released. Entries that cannot be freed immediately are put
26 * back on the lru list.
27 */
28
29/*
30 * Lock descriptions and usage:
31 *
32 * Each hash chain of both the block and index hash tables now contains
33 * a built-in lock used to serialize accesses to the hash chain.
34 *
35 * Accesses to global data structures mb_cache_list and mb_cache_lru_list
36 * are serialized via the global spinlock mb_cache_spinlock.
37 *
38 * Each mb_cache_entry contains a spinlock, e_entry_lock, to serialize
39 * accesses to its local data, such as e_used and e_queued.
40 *
41 * Lock ordering:
42 *
43 * Each block hash chain's lock has the highest lock order, followed by an
44 * index hash chain's lock, mb_cache_bg_lock (used to implement mb_cache_entry's
45 * lock), and mb_cach_spinlock, with the lowest order.  While holding
46 * either a block or index hash chain lock, a thread can acquire an
47 * mc_cache_bg_lock, which in turn can also acquire mb_cache_spinlock.
48 *
49 * Synchronization:
50 *
51 * Since both mb_cache_entry_get and mb_cache_entry_find scan the block and
52 * index hash chian, it needs to lock the corresponding hash chain.  For each
53 * mb_cache_entry within the chain, it needs to lock the mb_cache_entry to
54 * prevent either any simultaneous release or free on the entry and also
55 * to serialize accesses to either the e_used or e_queued member of the entry.
56 *
57 * To avoid having a dangling reference to an already freed
58 * mb_cache_entry, an mb_cache_entry is only freed when it is not on a
59 * block hash chain and also no longer being referenced, both e_used,
60 * and e_queued are 0's.  When an mb_cache_entry is explicitly freed it is
61 * first removed from a block hash chain.
62 */
63
64#include <linux/kernel.h>
65#include <linux/module.h>
66
67#include <linux/hash.h>
68#include <linux/fs.h>
69#include <linux/mm.h>
70#include <linux/slab.h>
71#include <linux/sched.h>
72#include <linux/list_bl.h>
73#include <linux/mbcache.h>
74#include <linux/init.h>
75#include <linux/blockgroup_lock.h>
76#include <linux/log2.h>
77
78#ifdef MB_CACHE_DEBUG
79# define mb_debug(f...) do { \
80		printk(KERN_DEBUG f); \
81		printk("\n"); \
82	} while (0)
83#define mb_assert(c) do { if (!(c)) \
84		printk(KERN_ERR "assertion " #c " failed\n"); \
85	} while(0)
86#else
87# define mb_debug(f...) do { } while(0)
88# define mb_assert(c) do { } while(0)
89#endif
90#define mb_error(f...) do { \
91		printk(KERN_ERR f); \
92		printk("\n"); \
93	} while(0)
94
95#define MB_CACHE_WRITER ((unsigned short)~0U >> 1)
96
97#define MB_CACHE_ENTRY_LOCK_BITS	ilog2(NR_BG_LOCKS)
98#define	MB_CACHE_ENTRY_LOCK_INDEX(ce)			\
99	(hash_long((unsigned long)ce, MB_CACHE_ENTRY_LOCK_BITS))
100
101static DECLARE_WAIT_QUEUE_HEAD(mb_cache_queue);
102static struct blockgroup_lock *mb_cache_bg_lock;
103static struct kmem_cache *mb_cache_kmem_cache;
104
105MODULE_AUTHOR("Andreas Gruenbacher <a.gruenbacher@computer.org>");
106MODULE_DESCRIPTION("Meta block cache (for extended attributes)");
107MODULE_LICENSE("GPL");
108
109EXPORT_SYMBOL(mb_cache_create);
110EXPORT_SYMBOL(mb_cache_shrink);
111EXPORT_SYMBOL(mb_cache_destroy);
112EXPORT_SYMBOL(mb_cache_entry_alloc);
113EXPORT_SYMBOL(mb_cache_entry_insert);
114EXPORT_SYMBOL(mb_cache_entry_release);
115EXPORT_SYMBOL(mb_cache_entry_free);
116EXPORT_SYMBOL(mb_cache_entry_get);
117#if !defined(MB_CACHE_INDEXES_COUNT) || (MB_CACHE_INDEXES_COUNT > 0)
118EXPORT_SYMBOL(mb_cache_entry_find_first);
119EXPORT_SYMBOL(mb_cache_entry_find_next);
120#endif
121
122/*
123 * Global data: list of all mbcache's, lru list, and a spinlock for
124 * accessing cache data structures on SMP machines. The lru list is
125 * global across all mbcaches.
126 */
127
128static LIST_HEAD(mb_cache_list);
129static LIST_HEAD(mb_cache_lru_list);
130static DEFINE_SPINLOCK(mb_cache_spinlock);
131
132static inline void
133__spin_lock_mb_cache_entry(struct mb_cache_entry *ce)
134{
135	spin_lock(bgl_lock_ptr(mb_cache_bg_lock,
136		MB_CACHE_ENTRY_LOCK_INDEX(ce)));
137}
138
139static inline void
140__spin_unlock_mb_cache_entry(struct mb_cache_entry *ce)
141{
142	spin_unlock(bgl_lock_ptr(mb_cache_bg_lock,
143		MB_CACHE_ENTRY_LOCK_INDEX(ce)));
144}
145
146static inline int
147__mb_cache_entry_is_block_hashed(struct mb_cache_entry *ce)
148{
149	return !hlist_bl_unhashed(&ce->e_block_list);
150}
151
152
153static inline void
154__mb_cache_entry_unhash_block(struct mb_cache_entry *ce)
155{
156	if (__mb_cache_entry_is_block_hashed(ce))
157		hlist_bl_del_init(&ce->e_block_list);
158}
159
160static inline int
161__mb_cache_entry_is_index_hashed(struct mb_cache_entry *ce)
162{
163	return !hlist_bl_unhashed(&ce->e_index.o_list);
164}
165
166static inline void
167__mb_cache_entry_unhash_index(struct mb_cache_entry *ce)
168{
169	if (__mb_cache_entry_is_index_hashed(ce))
170		hlist_bl_del_init(&ce->e_index.o_list);
171}
172
173/*
174 * __mb_cache_entry_unhash_unlock()
175 *
176 * This function is called to unhash both the block and index hash
177 * chain.
178 * It assumes both the block and index hash chain is locked upon entry.
179 * It also unlock both hash chains both exit
180 */
181static inline void
182__mb_cache_entry_unhash_unlock(struct mb_cache_entry *ce)
183{
184	__mb_cache_entry_unhash_index(ce);
185	hlist_bl_unlock(ce->e_index_hash_p);
186	__mb_cache_entry_unhash_block(ce);
187	hlist_bl_unlock(ce->e_block_hash_p);
188}
189
190static void
191__mb_cache_entry_forget(struct mb_cache_entry *ce, gfp_t gfp_mask)
192{
193	struct mb_cache *cache = ce->e_cache;
194
195	mb_assert(!(ce->e_used || ce->e_queued || atomic_read(&ce->e_refcnt)));
196	kmem_cache_free(cache->c_entry_cache, ce);
197	atomic_dec(&cache->c_entry_count);
198}
199
200static void
201__mb_cache_entry_release(struct mb_cache_entry *ce)
202{
203	/* First lock the entry to serialize access to its local data. */
204	__spin_lock_mb_cache_entry(ce);
205	/* Wake up all processes queuing for this cache entry. */
206	if (ce->e_queued)
207		wake_up_all(&mb_cache_queue);
208	if (ce->e_used >= MB_CACHE_WRITER)
209		ce->e_used -= MB_CACHE_WRITER;
210	/*
211	 * Make sure that all cache entries on lru_list have
212	 * both e_used and e_qued of 0s.
213	 */
214	ce->e_used--;
215	if (!(ce->e_used || ce->e_queued || atomic_read(&ce->e_refcnt))) {
216		if (!__mb_cache_entry_is_block_hashed(ce)) {
217			__spin_unlock_mb_cache_entry(ce);
218			goto forget;
219		}
220		/*
221		 * Need access to lru list, first drop entry lock,
222		 * then reacquire the lock in the proper order.
223		 */
224		spin_lock(&mb_cache_spinlock);
225		if (list_empty(&ce->e_lru_list))
226			list_add_tail(&ce->e_lru_list, &mb_cache_lru_list);
227		spin_unlock(&mb_cache_spinlock);
228	}
229	__spin_unlock_mb_cache_entry(ce);
230	return;
231forget:
232	mb_assert(list_empty(&ce->e_lru_list));
233	__mb_cache_entry_forget(ce, GFP_KERNEL);
234}
235
236/*
237 * mb_cache_shrink_scan()  memory pressure callback
238 *
239 * This function is called by the kernel memory management when memory
240 * gets low.
241 *
242 * @shrink: (ignored)
243 * @sc: shrink_control passed from reclaim
244 *
245 * Returns the number of objects freed.
246 */
247static unsigned long
248mb_cache_shrink_scan(struct shrinker *shrink, struct shrink_control *sc)
249{
250	LIST_HEAD(free_list);
251	struct mb_cache_entry *entry, *tmp;
252	int nr_to_scan = sc->nr_to_scan;
253	gfp_t gfp_mask = sc->gfp_mask;
254	unsigned long freed = 0;
255
256	mb_debug("trying to free %d entries", nr_to_scan);
257	spin_lock(&mb_cache_spinlock);
258	while ((nr_to_scan-- > 0) && !list_empty(&mb_cache_lru_list)) {
259		struct mb_cache_entry *ce =
260			list_entry(mb_cache_lru_list.next,
261				struct mb_cache_entry, e_lru_list);
262		list_del_init(&ce->e_lru_list);
263		if (ce->e_used || ce->e_queued || atomic_read(&ce->e_refcnt))
264			continue;
265		spin_unlock(&mb_cache_spinlock);
266		/* Prevent any find or get operation on the entry */
267		hlist_bl_lock(ce->e_block_hash_p);
268		hlist_bl_lock(ce->e_index_hash_p);
269		/* Ignore if it is touched by a find/get */
270		if (ce->e_used || ce->e_queued || atomic_read(&ce->e_refcnt) ||
271			!list_empty(&ce->e_lru_list)) {
272			hlist_bl_unlock(ce->e_index_hash_p);
273			hlist_bl_unlock(ce->e_block_hash_p);
274			spin_lock(&mb_cache_spinlock);
275			continue;
276		}
277		__mb_cache_entry_unhash_unlock(ce);
278		list_add_tail(&ce->e_lru_list, &free_list);
279		spin_lock(&mb_cache_spinlock);
280	}
281	spin_unlock(&mb_cache_spinlock);
282
283	list_for_each_entry_safe(entry, tmp, &free_list, e_lru_list) {
284		__mb_cache_entry_forget(entry, gfp_mask);
285		freed++;
286	}
287	return freed;
288}
289
290static unsigned long
291mb_cache_shrink_count(struct shrinker *shrink, struct shrink_control *sc)
292{
293	struct mb_cache *cache;
294	unsigned long count = 0;
295
296	spin_lock(&mb_cache_spinlock);
297	list_for_each_entry(cache, &mb_cache_list, c_cache_list) {
298		mb_debug("cache %s (%d)", cache->c_name,
299			  atomic_read(&cache->c_entry_count));
300		count += atomic_read(&cache->c_entry_count);
301	}
302	spin_unlock(&mb_cache_spinlock);
303
304	return vfs_pressure_ratio(count);
305}
306
307static struct shrinker mb_cache_shrinker = {
308	.count_objects = mb_cache_shrink_count,
309	.scan_objects = mb_cache_shrink_scan,
310	.seeks = DEFAULT_SEEKS,
311};
312
313/*
314 * mb_cache_create()  create a new cache
315 *
316 * All entries in one cache are equal size. Cache entries may be from
317 * multiple devices. If this is the first mbcache created, registers
318 * the cache with kernel memory management. Returns NULL if no more
319 * memory was available.
320 *
321 * @name: name of the cache (informal)
322 * @bucket_bits: log2(number of hash buckets)
323 */
324struct mb_cache *
325mb_cache_create(const char *name, int bucket_bits)
326{
327	int n, bucket_count = 1 << bucket_bits;
328	struct mb_cache *cache = NULL;
329
330	if (!mb_cache_bg_lock) {
331		mb_cache_bg_lock = kmalloc(sizeof(struct blockgroup_lock),
332			GFP_KERNEL);
333		if (!mb_cache_bg_lock)
334			return NULL;
335		bgl_lock_init(mb_cache_bg_lock);
336	}
337
338	cache = kmalloc(sizeof(struct mb_cache), GFP_KERNEL);
339	if (!cache)
340		return NULL;
341	cache->c_name = name;
342	atomic_set(&cache->c_entry_count, 0);
343	cache->c_bucket_bits = bucket_bits;
344	cache->c_block_hash = kmalloc(bucket_count *
345		sizeof(struct hlist_bl_head), GFP_KERNEL);
346	if (!cache->c_block_hash)
347		goto fail;
348	for (n=0; n<bucket_count; n++)
349		INIT_HLIST_BL_HEAD(&cache->c_block_hash[n]);
350	cache->c_index_hash = kmalloc(bucket_count *
351		sizeof(struct hlist_bl_head), GFP_KERNEL);
352	if (!cache->c_index_hash)
353		goto fail;
354	for (n=0; n<bucket_count; n++)
355		INIT_HLIST_BL_HEAD(&cache->c_index_hash[n]);
356	if (!mb_cache_kmem_cache) {
357		mb_cache_kmem_cache = kmem_cache_create(name,
358			sizeof(struct mb_cache_entry), 0,
359			SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD, NULL);
360		if (!mb_cache_kmem_cache)
361			goto fail2;
362	}
363	cache->c_entry_cache = mb_cache_kmem_cache;
364
365	/*
366	 * Set an upper limit on the number of cache entries so that the hash
367	 * chains won't grow too long.
368	 */
369	cache->c_max_entries = bucket_count << 4;
370
371	spin_lock(&mb_cache_spinlock);
372	list_add(&cache->c_cache_list, &mb_cache_list);
373	spin_unlock(&mb_cache_spinlock);
374	return cache;
375
376fail2:
377	kfree(cache->c_index_hash);
378
379fail:
380	kfree(cache->c_block_hash);
381	kfree(cache);
382	return NULL;
383}
384
385
386/*
387 * mb_cache_shrink()
388 *
389 * Removes all cache entries of a device from the cache. All cache entries
390 * currently in use cannot be freed, and thus remain in the cache. All others
391 * are freed.
392 *
393 * @bdev: which device's cache entries to shrink
394 */
395void
396mb_cache_shrink(struct block_device *bdev)
397{
398	LIST_HEAD(free_list);
399	struct list_head *l;
400	struct mb_cache_entry *ce, *tmp;
401
402	l = &mb_cache_lru_list;
403	spin_lock(&mb_cache_spinlock);
404	while (!list_is_last(l, &mb_cache_lru_list)) {
405		l = l->next;
406		ce = list_entry(l, struct mb_cache_entry, e_lru_list);
407		if (ce->e_bdev == bdev) {
408			list_del_init(&ce->e_lru_list);
409			if (ce->e_used || ce->e_queued ||
410				atomic_read(&ce->e_refcnt))
411				continue;
412			spin_unlock(&mb_cache_spinlock);
413			/*
414			 * Prevent any find or get operation on the entry.
415			 */
416			hlist_bl_lock(ce->e_block_hash_p);
417			hlist_bl_lock(ce->e_index_hash_p);
418			/* Ignore if it is touched by a find/get */
419			if (ce->e_used || ce->e_queued ||
420				atomic_read(&ce->e_refcnt) ||
421				!list_empty(&ce->e_lru_list)) {
422				hlist_bl_unlock(ce->e_index_hash_p);
423				hlist_bl_unlock(ce->e_block_hash_p);
424				l = &mb_cache_lru_list;
425				spin_lock(&mb_cache_spinlock);
426				continue;
427			}
428			__mb_cache_entry_unhash_unlock(ce);
429			mb_assert(!(ce->e_used || ce->e_queued ||
430				atomic_read(&ce->e_refcnt)));
431			list_add_tail(&ce->e_lru_list, &free_list);
432			l = &mb_cache_lru_list;
433			spin_lock(&mb_cache_spinlock);
434		}
435	}
436	spin_unlock(&mb_cache_spinlock);
437
438	list_for_each_entry_safe(ce, tmp, &free_list, e_lru_list) {
439		__mb_cache_entry_forget(ce, GFP_KERNEL);
440	}
441}
442
443
444/*
445 * mb_cache_destroy()
446 *
447 * Shrinks the cache to its minimum possible size (hopefully 0 entries),
448 * and then destroys it. If this was the last mbcache, un-registers the
449 * mbcache from kernel memory management.
450 */
451void
452mb_cache_destroy(struct mb_cache *cache)
453{
454	LIST_HEAD(free_list);
455	struct mb_cache_entry *ce, *tmp;
456
457	spin_lock(&mb_cache_spinlock);
458	list_for_each_entry_safe(ce, tmp, &mb_cache_lru_list, e_lru_list) {
459		if (ce->e_cache == cache)
460			list_move_tail(&ce->e_lru_list, &free_list);
461	}
462	list_del(&cache->c_cache_list);
463	spin_unlock(&mb_cache_spinlock);
464
465	list_for_each_entry_safe(ce, tmp, &free_list, e_lru_list) {
466		list_del_init(&ce->e_lru_list);
467		/*
468		 * Prevent any find or get operation on the entry.
469		 */
470		hlist_bl_lock(ce->e_block_hash_p);
471		hlist_bl_lock(ce->e_index_hash_p);
472		mb_assert(!(ce->e_used || ce->e_queued ||
473			atomic_read(&ce->e_refcnt)));
474		__mb_cache_entry_unhash_unlock(ce);
475		__mb_cache_entry_forget(ce, GFP_KERNEL);
476	}
477
478	if (atomic_read(&cache->c_entry_count) > 0) {
479		mb_error("cache %s: %d orphaned entries",
480			  cache->c_name,
481			  atomic_read(&cache->c_entry_count));
482	}
483
484	if (list_empty(&mb_cache_list)) {
485		kmem_cache_destroy(mb_cache_kmem_cache);
486		mb_cache_kmem_cache = NULL;
487	}
488	kfree(cache->c_index_hash);
489	kfree(cache->c_block_hash);
490	kfree(cache);
491}
492
493/*
494 * mb_cache_entry_alloc()
495 *
496 * Allocates a new cache entry. The new entry will not be valid initially,
497 * and thus cannot be looked up yet. It should be filled with data, and
498 * then inserted into the cache using mb_cache_entry_insert(). Returns NULL
499 * if no more memory was available.
500 */
501struct mb_cache_entry *
502mb_cache_entry_alloc(struct mb_cache *cache, gfp_t gfp_flags)
503{
504	struct mb_cache_entry *ce;
505
506	if (atomic_read(&cache->c_entry_count) >= cache->c_max_entries) {
507		struct list_head *l;
508
509		l = &mb_cache_lru_list;
510		spin_lock(&mb_cache_spinlock);
511		while (!list_is_last(l, &mb_cache_lru_list)) {
512			l = l->next;
513			ce = list_entry(l, struct mb_cache_entry, e_lru_list);
514			if (ce->e_cache == cache) {
515				list_del_init(&ce->e_lru_list);
516				if (ce->e_used || ce->e_queued ||
517					atomic_read(&ce->e_refcnt))
518					continue;
519				spin_unlock(&mb_cache_spinlock);
520				/*
521				 * Prevent any find or get operation on the
522				 * entry.
523				 */
524				hlist_bl_lock(ce->e_block_hash_p);
525				hlist_bl_lock(ce->e_index_hash_p);
526				/* Ignore if it is touched by a find/get */
527				if (ce->e_used || ce->e_queued ||
528					atomic_read(&ce->e_refcnt) ||
529					!list_empty(&ce->e_lru_list)) {
530					hlist_bl_unlock(ce->e_index_hash_p);
531					hlist_bl_unlock(ce->e_block_hash_p);
532					l = &mb_cache_lru_list;
533					spin_lock(&mb_cache_spinlock);
534					continue;
535				}
536				mb_assert(list_empty(&ce->e_lru_list));
537				mb_assert(!(ce->e_used || ce->e_queued ||
538					atomic_read(&ce->e_refcnt)));
539				__mb_cache_entry_unhash_unlock(ce);
540				goto found;
541			}
542		}
543		spin_unlock(&mb_cache_spinlock);
544	}
545
546	ce = kmem_cache_alloc(cache->c_entry_cache, gfp_flags);
547	if (!ce)
548		return NULL;
549	atomic_inc(&cache->c_entry_count);
550	INIT_LIST_HEAD(&ce->e_lru_list);
551	INIT_HLIST_BL_NODE(&ce->e_block_list);
552	INIT_HLIST_BL_NODE(&ce->e_index.o_list);
553	ce->e_cache = cache;
554	ce->e_queued = 0;
555	atomic_set(&ce->e_refcnt, 0);
556found:
557	ce->e_block_hash_p = &cache->c_block_hash[0];
558	ce->e_index_hash_p = &cache->c_index_hash[0];
559	ce->e_used = 1 + MB_CACHE_WRITER;
560	return ce;
561}
562
563
564/*
565 * mb_cache_entry_insert()
566 *
567 * Inserts an entry that was allocated using mb_cache_entry_alloc() into
568 * the cache. After this, the cache entry can be looked up, but is not yet
569 * in the lru list as the caller still holds a handle to it. Returns 0 on
570 * success, or -EBUSY if a cache entry for that device + inode exists
571 * already (this may happen after a failed lookup, but when another process
572 * has inserted the same cache entry in the meantime).
573 *
574 * @bdev: device the cache entry belongs to
575 * @block: block number
576 * @key: lookup key
577 */
578int
579mb_cache_entry_insert(struct mb_cache_entry *ce, struct block_device *bdev,
580		      sector_t block, unsigned int key)
581{
582	struct mb_cache *cache = ce->e_cache;
583	unsigned int bucket;
584	struct hlist_bl_node *l;
585	struct hlist_bl_head *block_hash_p;
586	struct hlist_bl_head *index_hash_p;
587	struct mb_cache_entry *lce;
588
589	mb_assert(ce);
590	bucket = hash_long((unsigned long)bdev + (block & 0xffffffff),
591			   cache->c_bucket_bits);
592	block_hash_p = &cache->c_block_hash[bucket];
593	hlist_bl_lock(block_hash_p);
594	hlist_bl_for_each_entry(lce, l, block_hash_p, e_block_list) {
595		if (lce->e_bdev == bdev && lce->e_block == block) {
596			hlist_bl_unlock(block_hash_p);
597			return -EBUSY;
598		}
599	}
600	mb_assert(!__mb_cache_entry_is_block_hashed(ce));
601	__mb_cache_entry_unhash_block(ce);
602	__mb_cache_entry_unhash_index(ce);
603	ce->e_bdev = bdev;
604	ce->e_block = block;
605	ce->e_block_hash_p = block_hash_p;
606	ce->e_index.o_key = key;
607	hlist_bl_add_head(&ce->e_block_list, block_hash_p);
608	hlist_bl_unlock(block_hash_p);
609	bucket = hash_long(key, cache->c_bucket_bits);
610	index_hash_p = &cache->c_index_hash[bucket];
611	hlist_bl_lock(index_hash_p);
612	ce->e_index_hash_p = index_hash_p;
613	hlist_bl_add_head(&ce->e_index.o_list, index_hash_p);
614	hlist_bl_unlock(index_hash_p);
615	return 0;
616}
617
618
619/*
620 * mb_cache_entry_release()
621 *
622 * Release a handle to a cache entry. When the last handle to a cache entry
623 * is released it is either freed (if it is invalid) or otherwise inserted
624 * in to the lru list.
625 */
626void
627mb_cache_entry_release(struct mb_cache_entry *ce)
628{
629	__mb_cache_entry_release(ce);
630}
631
632
633/*
634 * mb_cache_entry_free()
635 *
636 */
637void
638mb_cache_entry_free(struct mb_cache_entry *ce)
639{
640	mb_assert(ce);
641	mb_assert(list_empty(&ce->e_lru_list));
642	hlist_bl_lock(ce->e_index_hash_p);
643	__mb_cache_entry_unhash_index(ce);
644	hlist_bl_unlock(ce->e_index_hash_p);
645	hlist_bl_lock(ce->e_block_hash_p);
646	__mb_cache_entry_unhash_block(ce);
647	hlist_bl_unlock(ce->e_block_hash_p);
648	__mb_cache_entry_release(ce);
649}
650
651
652/*
653 * mb_cache_entry_get()
654 *
655 * Get a cache entry  by device / block number. (There can only be one entry
656 * in the cache per device and block.) Returns NULL if no such cache entry
657 * exists. The returned cache entry is locked for exclusive access ("single
658 * writer").
659 */
660struct mb_cache_entry *
661mb_cache_entry_get(struct mb_cache *cache, struct block_device *bdev,
662		   sector_t block)
663{
664	unsigned int bucket;
665	struct hlist_bl_node *l;
666	struct mb_cache_entry *ce;
667	struct hlist_bl_head *block_hash_p;
668
669	bucket = hash_long((unsigned long)bdev + (block & 0xffffffff),
670			   cache->c_bucket_bits);
671	block_hash_p = &cache->c_block_hash[bucket];
672	/* First serialize access to the block corresponding hash chain. */
673	hlist_bl_lock(block_hash_p);
674	hlist_bl_for_each_entry(ce, l, block_hash_p, e_block_list) {
675		mb_assert(ce->e_block_hash_p == block_hash_p);
676		if (ce->e_bdev == bdev && ce->e_block == block) {
677			/*
678			 * Prevent a free from removing the entry.
679			 */
680			atomic_inc(&ce->e_refcnt);
681			hlist_bl_unlock(block_hash_p);
682			__spin_lock_mb_cache_entry(ce);
683			atomic_dec(&ce->e_refcnt);
684			if (ce->e_used > 0) {
685				DEFINE_WAIT(wait);
686				while (ce->e_used > 0) {
687					ce->e_queued++;
688					prepare_to_wait(&mb_cache_queue, &wait,
689							TASK_UNINTERRUPTIBLE);
690					__spin_unlock_mb_cache_entry(ce);
691					schedule();
692					__spin_lock_mb_cache_entry(ce);
693					ce->e_queued--;
694				}
695				finish_wait(&mb_cache_queue, &wait);
696			}
697			ce->e_used += 1 + MB_CACHE_WRITER;
698			__spin_unlock_mb_cache_entry(ce);
699
700			if (!list_empty(&ce->e_lru_list)) {
701				spin_lock(&mb_cache_spinlock);
702				list_del_init(&ce->e_lru_list);
703				spin_unlock(&mb_cache_spinlock);
704			}
705			if (!__mb_cache_entry_is_block_hashed(ce)) {
706				__mb_cache_entry_release(ce);
707				return NULL;
708			}
709			return ce;
710		}
711	}
712	hlist_bl_unlock(block_hash_p);
713	return NULL;
714}
715
716#if !defined(MB_CACHE_INDEXES_COUNT) || (MB_CACHE_INDEXES_COUNT > 0)
717
718static struct mb_cache_entry *
719__mb_cache_entry_find(struct hlist_bl_node *l, struct hlist_bl_head *head,
720		      struct block_device *bdev, unsigned int key)
721{
722
723	/* The index hash chain is alredy acquire by caller. */
724	while (l != NULL) {
725		struct mb_cache_entry *ce =
726			hlist_bl_entry(l, struct mb_cache_entry,
727				e_index.o_list);
728		mb_assert(ce->e_index_hash_p == head);
729		if (ce->e_bdev == bdev && ce->e_index.o_key == key) {
730			/*
731			 * Prevent a free from removing the entry.
732			 */
733			atomic_inc(&ce->e_refcnt);
734			hlist_bl_unlock(head);
735			__spin_lock_mb_cache_entry(ce);
736			atomic_dec(&ce->e_refcnt);
737			ce->e_used++;
738			/* Incrementing before holding the lock gives readers
739			   priority over writers. */
740			if (ce->e_used >= MB_CACHE_WRITER) {
741				DEFINE_WAIT(wait);
742
743				while (ce->e_used >= MB_CACHE_WRITER) {
744					ce->e_queued++;
745					prepare_to_wait(&mb_cache_queue, &wait,
746							TASK_UNINTERRUPTIBLE);
747					__spin_unlock_mb_cache_entry(ce);
748					schedule();
749					__spin_lock_mb_cache_entry(ce);
750					ce->e_queued--;
751				}
752				finish_wait(&mb_cache_queue, &wait);
753			}
754			__spin_unlock_mb_cache_entry(ce);
755			if (!list_empty(&ce->e_lru_list)) {
756				spin_lock(&mb_cache_spinlock);
757				list_del_init(&ce->e_lru_list);
758				spin_unlock(&mb_cache_spinlock);
759			}
760			if (!__mb_cache_entry_is_block_hashed(ce)) {
761				__mb_cache_entry_release(ce);
762				return ERR_PTR(-EAGAIN);
763			}
764			return ce;
765		}
766		l = l->next;
767	}
768	hlist_bl_unlock(head);
769	return NULL;
770}
771
772
773/*
774 * mb_cache_entry_find_first()
775 *
776 * Find the first cache entry on a given device with a certain key in
777 * an additional index. Additional matches can be found with
778 * mb_cache_entry_find_next(). Returns NULL if no match was found. The
779 * returned cache entry is locked for shared access ("multiple readers").
780 *
781 * @cache: the cache to search
782 * @bdev: the device the cache entry should belong to
783 * @key: the key in the index
784 */
785struct mb_cache_entry *
786mb_cache_entry_find_first(struct mb_cache *cache, struct block_device *bdev,
787			  unsigned int key)
788{
789	unsigned int bucket = hash_long(key, cache->c_bucket_bits);
790	struct hlist_bl_node *l;
791	struct mb_cache_entry *ce = NULL;
792	struct hlist_bl_head *index_hash_p;
793
794	index_hash_p = &cache->c_index_hash[bucket];
795	hlist_bl_lock(index_hash_p);
796	if (!hlist_bl_empty(index_hash_p)) {
797		l = hlist_bl_first(index_hash_p);
798		ce = __mb_cache_entry_find(l, index_hash_p, bdev, key);
799	} else
800		hlist_bl_unlock(index_hash_p);
801	return ce;
802}
803
804
805/*
806 * mb_cache_entry_find_next()
807 *
808 * Find the next cache entry on a given device with a certain key in an
809 * additional index. Returns NULL if no match could be found. The previous
810 * entry is atomatically released, so that mb_cache_entry_find_next() can
811 * be called like this:
812 *
813 * entry = mb_cache_entry_find_first();
814 * while (entry) {
815 * 	...
816 *	entry = mb_cache_entry_find_next(entry, ...);
817 * }
818 *
819 * @prev: The previous match
820 * @bdev: the device the cache entry should belong to
821 * @key: the key in the index
822 */
823struct mb_cache_entry *
824mb_cache_entry_find_next(struct mb_cache_entry *prev,
825			 struct block_device *bdev, unsigned int key)
826{
827	struct mb_cache *cache = prev->e_cache;
828	unsigned int bucket = hash_long(key, cache->c_bucket_bits);
829	struct hlist_bl_node *l;
830	struct mb_cache_entry *ce;
831	struct hlist_bl_head *index_hash_p;
832
833	index_hash_p = &cache->c_index_hash[bucket];
834	mb_assert(prev->e_index_hash_p == index_hash_p);
835	hlist_bl_lock(index_hash_p);
836	mb_assert(!hlist_bl_empty(index_hash_p));
837	l = prev->e_index.o_list.next;
838	ce = __mb_cache_entry_find(l, index_hash_p, bdev, key);
839	__mb_cache_entry_release(prev);
840	return ce;
841}
842
843#endif  /* !defined(MB_CACHE_INDEXES_COUNT) || (MB_CACHE_INDEXES_COUNT > 0) */
844
845static int __init init_mbcache(void)
846{
847	register_shrinker(&mb_cache_shrinker);
848	return 0;
849}
850
851static void __exit exit_mbcache(void)
852{
853	unregister_shrinker(&mb_cache_shrinker);
854}
855
856module_init(init_mbcache)
857module_exit(exit_mbcache)
858
859