1/*
2 * bitmap.c two-level bitmap (C) Peter T. Breuer (ptb@ot.uc3m.es) 2003
3 *
4 * bitmap_create  - sets up the bitmap structure
5 * bitmap_destroy - destroys the bitmap structure
6 *
7 * additions, Copyright (C) 2003-2004, Paul Clements, SteelEye Technology, Inc.:
8 * - added disk storage for bitmap
9 * - changes to allow various bitmap chunk sizes
10 */
11
12/*
13 * Still to do:
14 *
15 * flush after percent set rather than just time based. (maybe both).
16 */
17
18#include <linux/blkdev.h>
19#include <linux/module.h>
20#include <linux/errno.h>
21#include <linux/slab.h>
22#include <linux/init.h>
23#include <linux/timer.h>
24#include <linux/sched.h>
25#include <linux/list.h>
26#include <linux/file.h>
27#include <linux/mount.h>
28#include <linux/buffer_head.h>
29#include <linux/seq_file.h>
30#include "md.h"
31#include "bitmap.h"
32
33static inline char *bmname(struct bitmap *bitmap)
34{
35	return bitmap->mddev ? mdname(bitmap->mddev) : "mdX";
36}
37
38/*
39 * check a page and, if necessary, allocate it (or hijack it if the alloc fails)
40 *
41 * 1) check to see if this page is allocated, if it's not then try to alloc
42 * 2) if the alloc fails, set the page's hijacked flag so we'll use the
43 *    page pointer directly as a counter
44 *
45 * if we find our page, we increment the page's refcount so that it stays
46 * allocated while we're using it
47 */
48static int bitmap_checkpage(struct bitmap_counts *bitmap,
49			    unsigned long page, int create)
50__releases(bitmap->lock)
51__acquires(bitmap->lock)
52{
53	unsigned char *mappage;
54
55	if (page >= bitmap->pages) {
56		/* This can happen if bitmap_start_sync goes beyond
57		 * End-of-device while looking for a whole page.
58		 * It is harmless.
59		 */
60		return -EINVAL;
61	}
62
63	if (bitmap->bp[page].hijacked) /* it's hijacked, don't try to alloc */
64		return 0;
65
66	if (bitmap->bp[page].map) /* page is already allocated, just return */
67		return 0;
68
69	if (!create)
70		return -ENOENT;
71
72	/* this page has not been allocated yet */
73
74	spin_unlock_irq(&bitmap->lock);
75	/* It is possible that this is being called inside a
76	 * prepare_to_wait/finish_wait loop from raid5c:make_request().
77	 * In general it is not permitted to sleep in that context as it
78	 * can cause the loop to spin freely.
79	 * That doesn't apply here as we can only reach this point
80	 * once with any loop.
81	 * When this function completes, either bp[page].map or
82	 * bp[page].hijacked.  In either case, this function will
83	 * abort before getting to this point again.  So there is
84	 * no risk of a free-spin, and so it is safe to assert
85	 * that sleeping here is allowed.
86	 */
87	sched_annotate_sleep();
88	mappage = kzalloc(PAGE_SIZE, GFP_NOIO);
89	spin_lock_irq(&bitmap->lock);
90
91	if (mappage == NULL) {
92		pr_debug("md/bitmap: map page allocation failed, hijacking\n");
93		/* failed - set the hijacked flag so that we can use the
94		 * pointer as a counter */
95		if (!bitmap->bp[page].map)
96			bitmap->bp[page].hijacked = 1;
97	} else if (bitmap->bp[page].map ||
98		   bitmap->bp[page].hijacked) {
99		/* somebody beat us to getting the page */
100		kfree(mappage);
101		return 0;
102	} else {
103
104		/* no page was in place and we have one, so install it */
105
106		bitmap->bp[page].map = mappage;
107		bitmap->missing_pages--;
108	}
109	return 0;
110}
111
112/* if page is completely empty, put it back on the free list, or dealloc it */
113/* if page was hijacked, unmark the flag so it might get alloced next time */
114/* Note: lock should be held when calling this */
115static void bitmap_checkfree(struct bitmap_counts *bitmap, unsigned long page)
116{
117	char *ptr;
118
119	if (bitmap->bp[page].count) /* page is still busy */
120		return;
121
122	/* page is no longer in use, it can be released */
123
124	if (bitmap->bp[page].hijacked) { /* page was hijacked, undo this now */
125		bitmap->bp[page].hijacked = 0;
126		bitmap->bp[page].map = NULL;
127	} else {
128		/* normal case, free the page */
129		ptr = bitmap->bp[page].map;
130		bitmap->bp[page].map = NULL;
131		bitmap->missing_pages++;
132		kfree(ptr);
133	}
134}
135
136/*
137 * bitmap file handling - read and write the bitmap file and its superblock
138 */
139
140/*
141 * basic page I/O operations
142 */
143
144/* IO operations when bitmap is stored near all superblocks */
145static int read_sb_page(struct mddev *mddev, loff_t offset,
146			struct page *page,
147			unsigned long index, int size)
148{
149	/* choose a good rdev and read the page from there */
150
151	struct md_rdev *rdev;
152	sector_t target;
153
154	rdev_for_each(rdev, mddev) {
155		if (! test_bit(In_sync, &rdev->flags)
156		    || test_bit(Faulty, &rdev->flags))
157			continue;
158
159		target = offset + index * (PAGE_SIZE/512);
160
161		if (sync_page_io(rdev, target,
162				 roundup(size, bdev_logical_block_size(rdev->bdev)),
163				 page, READ, true)) {
164			page->index = index;
165			return 0;
166		}
167	}
168	return -EIO;
169}
170
171static struct md_rdev *next_active_rdev(struct md_rdev *rdev, struct mddev *mddev)
172{
173	/* Iterate the disks of an mddev, using rcu to protect access to the
174	 * linked list, and raising the refcount of devices we return to ensure
175	 * they don't disappear while in use.
176	 * As devices are only added or removed when raid_disk is < 0 and
177	 * nr_pending is 0 and In_sync is clear, the entries we return will
178	 * still be in the same position on the list when we re-enter
179	 * list_for_each_entry_continue_rcu.
180	 *
181	 * Note that if entered with 'rdev == NULL' to start at the
182	 * beginning, we temporarily assign 'rdev' to an address which
183	 * isn't really an rdev, but which can be used by
184	 * list_for_each_entry_continue_rcu() to find the first entry.
185	 */
186	rcu_read_lock();
187	if (rdev == NULL)
188		/* start at the beginning */
189		rdev = list_entry(&mddev->disks, struct md_rdev, same_set);
190	else {
191		/* release the previous rdev and start from there. */
192		rdev_dec_pending(rdev, mddev);
193	}
194	list_for_each_entry_continue_rcu(rdev, &mddev->disks, same_set) {
195		if (rdev->raid_disk >= 0 &&
196		    !test_bit(Faulty, &rdev->flags)) {
197			/* this is a usable devices */
198			atomic_inc(&rdev->nr_pending);
199			rcu_read_unlock();
200			return rdev;
201		}
202	}
203	rcu_read_unlock();
204	return NULL;
205}
206
207static int write_sb_page(struct bitmap *bitmap, struct page *page, int wait)
208{
209	struct md_rdev *rdev = NULL;
210	struct block_device *bdev;
211	struct mddev *mddev = bitmap->mddev;
212	struct bitmap_storage *store = &bitmap->storage;
213	int node_offset = 0;
214
215	if (mddev_is_clustered(bitmap->mddev))
216		node_offset = bitmap->cluster_slot * store->file_pages;
217
218	while ((rdev = next_active_rdev(rdev, mddev)) != NULL) {
219		int size = PAGE_SIZE;
220		loff_t offset = mddev->bitmap_info.offset;
221
222		bdev = (rdev->meta_bdev) ? rdev->meta_bdev : rdev->bdev;
223
224		if (page->index == store->file_pages-1) {
225			int last_page_size = store->bytes & (PAGE_SIZE-1);
226			if (last_page_size == 0)
227				last_page_size = PAGE_SIZE;
228			size = roundup(last_page_size,
229				       bdev_logical_block_size(bdev));
230		}
231		/* Just make sure we aren't corrupting data or
232		 * metadata
233		 */
234		if (mddev->external) {
235			/* Bitmap could be anywhere. */
236			if (rdev->sb_start + offset + (page->index
237						       * (PAGE_SIZE/512))
238			    > rdev->data_offset
239			    &&
240			    rdev->sb_start + offset
241			    < (rdev->data_offset + mddev->dev_sectors
242			     + (PAGE_SIZE/512)))
243				goto bad_alignment;
244		} else if (offset < 0) {
245			/* DATA  BITMAP METADATA  */
246			if (offset
247			    + (long)(page->index * (PAGE_SIZE/512))
248			    + size/512 > 0)
249				/* bitmap runs in to metadata */
250				goto bad_alignment;
251			if (rdev->data_offset + mddev->dev_sectors
252			    > rdev->sb_start + offset)
253				/* data runs in to bitmap */
254				goto bad_alignment;
255		} else if (rdev->sb_start < rdev->data_offset) {
256			/* METADATA BITMAP DATA */
257			if (rdev->sb_start
258			    + offset
259			    + page->index*(PAGE_SIZE/512) + size/512
260			    > rdev->data_offset)
261				/* bitmap runs in to data */
262				goto bad_alignment;
263		} else {
264			/* DATA METADATA BITMAP - no problems */
265		}
266		md_super_write(mddev, rdev,
267			       rdev->sb_start + offset
268			       + page->index * (PAGE_SIZE/512),
269			       size,
270			       page);
271	}
272
273	if (wait)
274		md_super_wait(mddev);
275	return 0;
276
277 bad_alignment:
278	return -EINVAL;
279}
280
281static void bitmap_file_kick(struct bitmap *bitmap);
282/*
283 * write out a page to a file
284 */
285static void write_page(struct bitmap *bitmap, struct page *page, int wait)
286{
287	struct buffer_head *bh;
288
289	if (bitmap->storage.file == NULL) {
290		switch (write_sb_page(bitmap, page, wait)) {
291		case -EINVAL:
292			set_bit(BITMAP_WRITE_ERROR, &bitmap->flags);
293		}
294	} else {
295
296		bh = page_buffers(page);
297
298		while (bh && bh->b_blocknr) {
299			atomic_inc(&bitmap->pending_writes);
300			set_buffer_locked(bh);
301			set_buffer_mapped(bh);
302			submit_bh(WRITE | REQ_SYNC, bh);
303			bh = bh->b_this_page;
304		}
305
306		if (wait)
307			wait_event(bitmap->write_wait,
308				   atomic_read(&bitmap->pending_writes)==0);
309	}
310	if (test_bit(BITMAP_WRITE_ERROR, &bitmap->flags))
311		bitmap_file_kick(bitmap);
312}
313
314static void end_bitmap_write(struct buffer_head *bh, int uptodate)
315{
316	struct bitmap *bitmap = bh->b_private;
317
318	if (!uptodate)
319		set_bit(BITMAP_WRITE_ERROR, &bitmap->flags);
320	if (atomic_dec_and_test(&bitmap->pending_writes))
321		wake_up(&bitmap->write_wait);
322}
323
324/* copied from buffer.c */
325static void
326__clear_page_buffers(struct page *page)
327{
328	ClearPagePrivate(page);
329	set_page_private(page, 0);
330	page_cache_release(page);
331}
332static void free_buffers(struct page *page)
333{
334	struct buffer_head *bh;
335
336	if (!PagePrivate(page))
337		return;
338
339	bh = page_buffers(page);
340	while (bh) {
341		struct buffer_head *next = bh->b_this_page;
342		free_buffer_head(bh);
343		bh = next;
344	}
345	__clear_page_buffers(page);
346	put_page(page);
347}
348
349/* read a page from a file.
350 * We both read the page, and attach buffers to the page to record the
351 * address of each block (using bmap).  These addresses will be used
352 * to write the block later, completely bypassing the filesystem.
353 * This usage is similar to how swap files are handled, and allows us
354 * to write to a file with no concerns of memory allocation failing.
355 */
356static int read_page(struct file *file, unsigned long index,
357		     struct bitmap *bitmap,
358		     unsigned long count,
359		     struct page *page)
360{
361	int ret = 0;
362	struct inode *inode = file_inode(file);
363	struct buffer_head *bh;
364	sector_t block;
365
366	pr_debug("read bitmap file (%dB @ %llu)\n", (int)PAGE_SIZE,
367		 (unsigned long long)index << PAGE_SHIFT);
368
369	bh = alloc_page_buffers(page, 1<<inode->i_blkbits, 0);
370	if (!bh) {
371		ret = -ENOMEM;
372		goto out;
373	}
374	attach_page_buffers(page, bh);
375	block = index << (PAGE_SHIFT - inode->i_blkbits);
376	while (bh) {
377		if (count == 0)
378			bh->b_blocknr = 0;
379		else {
380			bh->b_blocknr = bmap(inode, block);
381			if (bh->b_blocknr == 0) {
382				/* Cannot use this file! */
383				ret = -EINVAL;
384				goto out;
385			}
386			bh->b_bdev = inode->i_sb->s_bdev;
387			if (count < (1<<inode->i_blkbits))
388				count = 0;
389			else
390				count -= (1<<inode->i_blkbits);
391
392			bh->b_end_io = end_bitmap_write;
393			bh->b_private = bitmap;
394			atomic_inc(&bitmap->pending_writes);
395			set_buffer_locked(bh);
396			set_buffer_mapped(bh);
397			submit_bh(READ, bh);
398		}
399		block++;
400		bh = bh->b_this_page;
401	}
402	page->index = index;
403
404	wait_event(bitmap->write_wait,
405		   atomic_read(&bitmap->pending_writes)==0);
406	if (test_bit(BITMAP_WRITE_ERROR, &bitmap->flags))
407		ret = -EIO;
408out:
409	if (ret)
410		printk(KERN_ALERT "md: bitmap read error: (%dB @ %llu): %d\n",
411			(int)PAGE_SIZE,
412			(unsigned long long)index << PAGE_SHIFT,
413			ret);
414	return ret;
415}
416
417/*
418 * bitmap file superblock operations
419 */
420
421/* update the event counter and sync the superblock to disk */
422void bitmap_update_sb(struct bitmap *bitmap)
423{
424	bitmap_super_t *sb;
425
426	if (!bitmap || !bitmap->mddev) /* no bitmap for this array */
427		return;
428	if (bitmap->mddev->bitmap_info.external)
429		return;
430	if (!bitmap->storage.sb_page) /* no superblock */
431		return;
432	sb = kmap_atomic(bitmap->storage.sb_page);
433	sb->events = cpu_to_le64(bitmap->mddev->events);
434	if (bitmap->mddev->events < bitmap->events_cleared)
435		/* rocking back to read-only */
436		bitmap->events_cleared = bitmap->mddev->events;
437	sb->events_cleared = cpu_to_le64(bitmap->events_cleared);
438	sb->state = cpu_to_le32(bitmap->flags);
439	/* Just in case these have been changed via sysfs: */
440	sb->daemon_sleep = cpu_to_le32(bitmap->mddev->bitmap_info.daemon_sleep/HZ);
441	sb->write_behind = cpu_to_le32(bitmap->mddev->bitmap_info.max_write_behind);
442	/* This might have been changed by a reshape */
443	sb->sync_size = cpu_to_le64(bitmap->mddev->resync_max_sectors);
444	sb->chunksize = cpu_to_le32(bitmap->mddev->bitmap_info.chunksize);
445	sb->nodes = cpu_to_le32(bitmap->mddev->bitmap_info.nodes);
446	sb->sectors_reserved = cpu_to_le32(bitmap->mddev->
447					   bitmap_info.space);
448	kunmap_atomic(sb);
449	write_page(bitmap, bitmap->storage.sb_page, 1);
450}
451
452/* print out the bitmap file superblock */
453void bitmap_print_sb(struct bitmap *bitmap)
454{
455	bitmap_super_t *sb;
456
457	if (!bitmap || !bitmap->storage.sb_page)
458		return;
459	sb = kmap_atomic(bitmap->storage.sb_page);
460	printk(KERN_DEBUG "%s: bitmap file superblock:\n", bmname(bitmap));
461	printk(KERN_DEBUG "         magic: %08x\n", le32_to_cpu(sb->magic));
462	printk(KERN_DEBUG "       version: %d\n", le32_to_cpu(sb->version));
463	printk(KERN_DEBUG "          uuid: %08x.%08x.%08x.%08x\n",
464					*(__u32 *)(sb->uuid+0),
465					*(__u32 *)(sb->uuid+4),
466					*(__u32 *)(sb->uuid+8),
467					*(__u32 *)(sb->uuid+12));
468	printk(KERN_DEBUG "        events: %llu\n",
469			(unsigned long long) le64_to_cpu(sb->events));
470	printk(KERN_DEBUG "events cleared: %llu\n",
471			(unsigned long long) le64_to_cpu(sb->events_cleared));
472	printk(KERN_DEBUG "         state: %08x\n", le32_to_cpu(sb->state));
473	printk(KERN_DEBUG "     chunksize: %d B\n", le32_to_cpu(sb->chunksize));
474	printk(KERN_DEBUG "  daemon sleep: %ds\n", le32_to_cpu(sb->daemon_sleep));
475	printk(KERN_DEBUG "     sync size: %llu KB\n",
476			(unsigned long long)le64_to_cpu(sb->sync_size)/2);
477	printk(KERN_DEBUG "max write behind: %d\n", le32_to_cpu(sb->write_behind));
478	kunmap_atomic(sb);
479}
480
481/*
482 * bitmap_new_disk_sb
483 * @bitmap
484 *
485 * This function is somewhat the reverse of bitmap_read_sb.  bitmap_read_sb
486 * reads and verifies the on-disk bitmap superblock and populates bitmap_info.
487 * This function verifies 'bitmap_info' and populates the on-disk bitmap
488 * structure, which is to be written to disk.
489 *
490 * Returns: 0 on success, -Exxx on error
491 */
492static int bitmap_new_disk_sb(struct bitmap *bitmap)
493{
494	bitmap_super_t *sb;
495	unsigned long chunksize, daemon_sleep, write_behind;
496
497	bitmap->storage.sb_page = alloc_page(GFP_KERNEL | __GFP_ZERO);
498	if (bitmap->storage.sb_page == NULL)
499		return -ENOMEM;
500	bitmap->storage.sb_page->index = 0;
501
502	sb = kmap_atomic(bitmap->storage.sb_page);
503
504	sb->magic = cpu_to_le32(BITMAP_MAGIC);
505	sb->version = cpu_to_le32(BITMAP_MAJOR_HI);
506
507	chunksize = bitmap->mddev->bitmap_info.chunksize;
508	BUG_ON(!chunksize);
509	if (!is_power_of_2(chunksize)) {
510		kunmap_atomic(sb);
511		printk(KERN_ERR "bitmap chunksize not a power of 2\n");
512		return -EINVAL;
513	}
514	sb->chunksize = cpu_to_le32(chunksize);
515
516	daemon_sleep = bitmap->mddev->bitmap_info.daemon_sleep;
517	if (!daemon_sleep ||
518	    (daemon_sleep < 1) || (daemon_sleep > MAX_SCHEDULE_TIMEOUT)) {
519		printk(KERN_INFO "Choosing daemon_sleep default (5 sec)\n");
520		daemon_sleep = 5 * HZ;
521	}
522	sb->daemon_sleep = cpu_to_le32(daemon_sleep);
523	bitmap->mddev->bitmap_info.daemon_sleep = daemon_sleep;
524
525	/*
526	 * FIXME: write_behind for RAID1.  If not specified, what
527	 * is a good choice?  We choose COUNTER_MAX / 2 arbitrarily.
528	 */
529	write_behind = bitmap->mddev->bitmap_info.max_write_behind;
530	if (write_behind > COUNTER_MAX)
531		write_behind = COUNTER_MAX / 2;
532	sb->write_behind = cpu_to_le32(write_behind);
533	bitmap->mddev->bitmap_info.max_write_behind = write_behind;
534
535	/* keep the array size field of the bitmap superblock up to date */
536	sb->sync_size = cpu_to_le64(bitmap->mddev->resync_max_sectors);
537
538	memcpy(sb->uuid, bitmap->mddev->uuid, 16);
539
540	set_bit(BITMAP_STALE, &bitmap->flags);
541	sb->state = cpu_to_le32(bitmap->flags);
542	bitmap->events_cleared = bitmap->mddev->events;
543	sb->events_cleared = cpu_to_le64(bitmap->mddev->events);
544	bitmap->mddev->bitmap_info.nodes = 0;
545
546	kunmap_atomic(sb);
547
548	return 0;
549}
550
551/* read the superblock from the bitmap file and initialize some bitmap fields */
552static int bitmap_read_sb(struct bitmap *bitmap)
553{
554	char *reason = NULL;
555	bitmap_super_t *sb;
556	unsigned long chunksize, daemon_sleep, write_behind;
557	unsigned long long events;
558	int nodes = 0;
559	unsigned long sectors_reserved = 0;
560	int err = -EINVAL;
561	struct page *sb_page;
562
563	if (!bitmap->storage.file && !bitmap->mddev->bitmap_info.offset) {
564		chunksize = 128 * 1024 * 1024;
565		daemon_sleep = 5 * HZ;
566		write_behind = 0;
567		set_bit(BITMAP_STALE, &bitmap->flags);
568		err = 0;
569		goto out_no_sb;
570	}
571	/* page 0 is the superblock, read it... */
572	sb_page = alloc_page(GFP_KERNEL);
573	if (!sb_page)
574		return -ENOMEM;
575	bitmap->storage.sb_page = sb_page;
576
577re_read:
578	/* If cluster_slot is set, the cluster is setup */
579	if (bitmap->cluster_slot >= 0) {
580		sector_t bm_blocks = bitmap->mddev->resync_max_sectors;
581
582		sector_div(bm_blocks,
583			   bitmap->mddev->bitmap_info.chunksize >> 9);
584		/* bits to bytes */
585		bm_blocks = ((bm_blocks+7) >> 3) + sizeof(bitmap_super_t);
586		/* to 4k blocks */
587		bm_blocks = DIV_ROUND_UP_SECTOR_T(bm_blocks, 4096);
588		bitmap->mddev->bitmap_info.offset += bitmap->cluster_slot * (bm_blocks << 3);
589		pr_info("%s:%d bm slot: %d offset: %llu\n", __func__, __LINE__,
590			bitmap->cluster_slot, (unsigned long long)bitmap->mddev->bitmap_info.offset);
591	}
592
593	if (bitmap->storage.file) {
594		loff_t isize = i_size_read(bitmap->storage.file->f_mapping->host);
595		int bytes = isize > PAGE_SIZE ? PAGE_SIZE : isize;
596
597		err = read_page(bitmap->storage.file, 0,
598				bitmap, bytes, sb_page);
599	} else {
600		err = read_sb_page(bitmap->mddev,
601				   bitmap->mddev->bitmap_info.offset,
602				   sb_page,
603				   0, sizeof(bitmap_super_t));
604	}
605	if (err)
606		return err;
607
608	err = -EINVAL;
609	sb = kmap_atomic(sb_page);
610
611	chunksize = le32_to_cpu(sb->chunksize);
612	daemon_sleep = le32_to_cpu(sb->daemon_sleep) * HZ;
613	write_behind = le32_to_cpu(sb->write_behind);
614	sectors_reserved = le32_to_cpu(sb->sectors_reserved);
615	/* XXX: This is a hack to ensure that we don't use clustering
616	 *  in case:
617	 *	- dm-raid is in use and
618	 *	- the nodes written in bitmap_sb is erroneous.
619	 */
620	if (!bitmap->mddev->sync_super) {
621		nodes = le32_to_cpu(sb->nodes);
622		strlcpy(bitmap->mddev->bitmap_info.cluster_name,
623				sb->cluster_name, 64);
624	}
625
626	/* verify that the bitmap-specific fields are valid */
627	if (sb->magic != cpu_to_le32(BITMAP_MAGIC))
628		reason = "bad magic";
629	else if (le32_to_cpu(sb->version) < BITMAP_MAJOR_LO ||
630		 le32_to_cpu(sb->version) > BITMAP_MAJOR_HI)
631		reason = "unrecognized superblock version";
632	else if (chunksize < 512)
633		reason = "bitmap chunksize too small";
634	else if (!is_power_of_2(chunksize))
635		reason = "bitmap chunksize not a power of 2";
636	else if (daemon_sleep < 1 || daemon_sleep > MAX_SCHEDULE_TIMEOUT)
637		reason = "daemon sleep period out of range";
638	else if (write_behind > COUNTER_MAX)
639		reason = "write-behind limit out of range (0 - 16383)";
640	if (reason) {
641		printk(KERN_INFO "%s: invalid bitmap file superblock: %s\n",
642			bmname(bitmap), reason);
643		goto out;
644	}
645
646	/* keep the array size field of the bitmap superblock up to date */
647	sb->sync_size = cpu_to_le64(bitmap->mddev->resync_max_sectors);
648
649	if (bitmap->mddev->persistent) {
650		/*
651		 * We have a persistent array superblock, so compare the
652		 * bitmap's UUID and event counter to the mddev's
653		 */
654		if (memcmp(sb->uuid, bitmap->mddev->uuid, 16)) {
655			printk(KERN_INFO
656			       "%s: bitmap superblock UUID mismatch\n",
657			       bmname(bitmap));
658			goto out;
659		}
660		events = le64_to_cpu(sb->events);
661		if (!nodes && (events < bitmap->mddev->events)) {
662			printk(KERN_INFO
663			       "%s: bitmap file is out of date (%llu < %llu) "
664			       "-- forcing full recovery\n",
665			       bmname(bitmap), events,
666			       (unsigned long long) bitmap->mddev->events);
667			set_bit(BITMAP_STALE, &bitmap->flags);
668		}
669	}
670
671	/* assign fields using values from superblock */
672	bitmap->flags |= le32_to_cpu(sb->state);
673	if (le32_to_cpu(sb->version) == BITMAP_MAJOR_HOSTENDIAN)
674		set_bit(BITMAP_HOSTENDIAN, &bitmap->flags);
675	bitmap->events_cleared = le64_to_cpu(sb->events_cleared);
676	strlcpy(bitmap->mddev->bitmap_info.cluster_name, sb->cluster_name, 64);
677	err = 0;
678
679out:
680	kunmap_atomic(sb);
681	/* Assiging chunksize is required for "re_read" */
682	bitmap->mddev->bitmap_info.chunksize = chunksize;
683	if (nodes && (bitmap->cluster_slot < 0)) {
684		err = md_setup_cluster(bitmap->mddev, nodes);
685		if (err) {
686			pr_err("%s: Could not setup cluster service (%d)\n",
687					bmname(bitmap), err);
688			goto out_no_sb;
689		}
690		bitmap->cluster_slot = md_cluster_ops->slot_number(bitmap->mddev);
691		goto re_read;
692	}
693
694
695out_no_sb:
696	if (test_bit(BITMAP_STALE, &bitmap->flags))
697		bitmap->events_cleared = bitmap->mddev->events;
698	bitmap->mddev->bitmap_info.chunksize = chunksize;
699	bitmap->mddev->bitmap_info.daemon_sleep = daemon_sleep;
700	bitmap->mddev->bitmap_info.max_write_behind = write_behind;
701	bitmap->mddev->bitmap_info.nodes = nodes;
702	if (bitmap->mddev->bitmap_info.space == 0 ||
703	    bitmap->mddev->bitmap_info.space > sectors_reserved)
704		bitmap->mddev->bitmap_info.space = sectors_reserved;
705	if (err) {
706		bitmap_print_sb(bitmap);
707		if (bitmap->cluster_slot < 0)
708			md_cluster_stop(bitmap->mddev);
709	}
710	return err;
711}
712
713/*
714 * general bitmap file operations
715 */
716
717/*
718 * on-disk bitmap:
719 *
720 * Use one bit per "chunk" (block set). We do the disk I/O on the bitmap
721 * file a page at a time. There's a superblock at the start of the file.
722 */
723/* calculate the index of the page that contains this bit */
724static inline unsigned long file_page_index(struct bitmap_storage *store,
725					    unsigned long chunk)
726{
727	if (store->sb_page)
728		chunk += sizeof(bitmap_super_t) << 3;
729	return chunk >> PAGE_BIT_SHIFT;
730}
731
732/* calculate the (bit) offset of this bit within a page */
733static inline unsigned long file_page_offset(struct bitmap_storage *store,
734					     unsigned long chunk)
735{
736	if (store->sb_page)
737		chunk += sizeof(bitmap_super_t) << 3;
738	return chunk & (PAGE_BITS - 1);
739}
740
741/*
742 * return a pointer to the page in the filemap that contains the given bit
743 *
744 */
745static inline struct page *filemap_get_page(struct bitmap_storage *store,
746					    unsigned long chunk)
747{
748	if (file_page_index(store, chunk) >= store->file_pages)
749		return NULL;
750	return store->filemap[file_page_index(store, chunk)];
751}
752
753static int bitmap_storage_alloc(struct bitmap_storage *store,
754				unsigned long chunks, int with_super,
755				int slot_number)
756{
757	int pnum, offset = 0;
758	unsigned long num_pages;
759	unsigned long bytes;
760
761	bytes = DIV_ROUND_UP(chunks, 8);
762	if (with_super)
763		bytes += sizeof(bitmap_super_t);
764
765	num_pages = DIV_ROUND_UP(bytes, PAGE_SIZE);
766	offset = slot_number * (num_pages - 1);
767
768	store->filemap = kmalloc(sizeof(struct page *)
769				 * num_pages, GFP_KERNEL);
770	if (!store->filemap)
771		return -ENOMEM;
772
773	if (with_super && !store->sb_page) {
774		store->sb_page = alloc_page(GFP_KERNEL|__GFP_ZERO);
775		if (store->sb_page == NULL)
776			return -ENOMEM;
777	}
778
779	pnum = 0;
780	if (store->sb_page) {
781		store->filemap[0] = store->sb_page;
782		pnum = 1;
783		store->sb_page->index = offset;
784	}
785
786	for ( ; pnum < num_pages; pnum++) {
787		store->filemap[pnum] = alloc_page(GFP_KERNEL|__GFP_ZERO);
788		if (!store->filemap[pnum]) {
789			store->file_pages = pnum;
790			return -ENOMEM;
791		}
792		store->filemap[pnum]->index = pnum + offset;
793	}
794	store->file_pages = pnum;
795
796	/* We need 4 bits per page, rounded up to a multiple
797	 * of sizeof(unsigned long) */
798	store->filemap_attr = kzalloc(
799		roundup(DIV_ROUND_UP(num_pages*4, 8), sizeof(unsigned long)),
800		GFP_KERNEL);
801	if (!store->filemap_attr)
802		return -ENOMEM;
803
804	store->bytes = bytes;
805
806	return 0;
807}
808
809static void bitmap_file_unmap(struct bitmap_storage *store)
810{
811	struct page **map, *sb_page;
812	int pages;
813	struct file *file;
814
815	file = store->file;
816	map = store->filemap;
817	pages = store->file_pages;
818	sb_page = store->sb_page;
819
820	while (pages--)
821		if (map[pages] != sb_page) /* 0 is sb_page, release it below */
822			free_buffers(map[pages]);
823	kfree(map);
824	kfree(store->filemap_attr);
825
826	if (sb_page)
827		free_buffers(sb_page);
828
829	if (file) {
830		struct inode *inode = file_inode(file);
831		invalidate_mapping_pages(inode->i_mapping, 0, -1);
832		fput(file);
833	}
834}
835
836/*
837 * bitmap_file_kick - if an error occurs while manipulating the bitmap file
838 * then it is no longer reliable, so we stop using it and we mark the file
839 * as failed in the superblock
840 */
841static void bitmap_file_kick(struct bitmap *bitmap)
842{
843	char *path, *ptr = NULL;
844
845	if (!test_and_set_bit(BITMAP_STALE, &bitmap->flags)) {
846		bitmap_update_sb(bitmap);
847
848		if (bitmap->storage.file) {
849			path = kmalloc(PAGE_SIZE, GFP_KERNEL);
850			if (path)
851				ptr = d_path(&bitmap->storage.file->f_path,
852					     path, PAGE_SIZE);
853
854			printk(KERN_ALERT
855			      "%s: kicking failed bitmap file %s from array!\n",
856			      bmname(bitmap), IS_ERR(ptr) ? "" : ptr);
857
858			kfree(path);
859		} else
860			printk(KERN_ALERT
861			       "%s: disabling internal bitmap due to errors\n",
862			       bmname(bitmap));
863	}
864}
865
866enum bitmap_page_attr {
867	BITMAP_PAGE_DIRTY = 0,     /* there are set bits that need to be synced */
868	BITMAP_PAGE_PENDING = 1,   /* there are bits that are being cleaned.
869				    * i.e. counter is 1 or 2. */
870	BITMAP_PAGE_NEEDWRITE = 2, /* there are cleared bits that need to be synced */
871};
872
873static inline void set_page_attr(struct bitmap *bitmap, int pnum,
874				 enum bitmap_page_attr attr)
875{
876	set_bit((pnum<<2) + attr, bitmap->storage.filemap_attr);
877}
878
879static inline void clear_page_attr(struct bitmap *bitmap, int pnum,
880				   enum bitmap_page_attr attr)
881{
882	clear_bit((pnum<<2) + attr, bitmap->storage.filemap_attr);
883}
884
885static inline int test_page_attr(struct bitmap *bitmap, int pnum,
886				 enum bitmap_page_attr attr)
887{
888	return test_bit((pnum<<2) + attr, bitmap->storage.filemap_attr);
889}
890
891static inline int test_and_clear_page_attr(struct bitmap *bitmap, int pnum,
892					   enum bitmap_page_attr attr)
893{
894	return test_and_clear_bit((pnum<<2) + attr,
895				  bitmap->storage.filemap_attr);
896}
897/*
898 * bitmap_file_set_bit -- called before performing a write to the md device
899 * to set (and eventually sync) a particular bit in the bitmap file
900 *
901 * we set the bit immediately, then we record the page number so that
902 * when an unplug occurs, we can flush the dirty pages out to disk
903 */
904static void bitmap_file_set_bit(struct bitmap *bitmap, sector_t block)
905{
906	unsigned long bit;
907	struct page *page;
908	void *kaddr;
909	unsigned long chunk = block >> bitmap->counts.chunkshift;
910
911	page = filemap_get_page(&bitmap->storage, chunk);
912	if (!page)
913		return;
914	bit = file_page_offset(&bitmap->storage, chunk);
915
916	/* set the bit */
917	kaddr = kmap_atomic(page);
918	if (test_bit(BITMAP_HOSTENDIAN, &bitmap->flags))
919		set_bit(bit, kaddr);
920	else
921		set_bit_le(bit, kaddr);
922	kunmap_atomic(kaddr);
923	pr_debug("set file bit %lu page %lu\n", bit, page->index);
924	/* record page number so it gets flushed to disk when unplug occurs */
925	set_page_attr(bitmap, page->index, BITMAP_PAGE_DIRTY);
926}
927
928static void bitmap_file_clear_bit(struct bitmap *bitmap, sector_t block)
929{
930	unsigned long bit;
931	struct page *page;
932	void *paddr;
933	unsigned long chunk = block >> bitmap->counts.chunkshift;
934
935	page = filemap_get_page(&bitmap->storage, chunk);
936	if (!page)
937		return;
938	bit = file_page_offset(&bitmap->storage, chunk);
939	paddr = kmap_atomic(page);
940	if (test_bit(BITMAP_HOSTENDIAN, &bitmap->flags))
941		clear_bit(bit, paddr);
942	else
943		clear_bit_le(bit, paddr);
944	kunmap_atomic(paddr);
945	if (!test_page_attr(bitmap, page->index, BITMAP_PAGE_NEEDWRITE)) {
946		set_page_attr(bitmap, page->index, BITMAP_PAGE_PENDING);
947		bitmap->allclean = 0;
948	}
949}
950
951static int bitmap_file_test_bit(struct bitmap *bitmap, sector_t block)
952{
953	unsigned long bit;
954	struct page *page;
955	void *paddr;
956	unsigned long chunk = block >> bitmap->counts.chunkshift;
957	int set = 0;
958
959	page = filemap_get_page(&bitmap->storage, chunk);
960	if (!page)
961		return -EINVAL;
962	bit = file_page_offset(&bitmap->storage, chunk);
963	paddr = kmap_atomic(page);
964	if (test_bit(BITMAP_HOSTENDIAN, &bitmap->flags))
965		set = test_bit(bit, paddr);
966	else
967		set = test_bit_le(bit, paddr);
968	kunmap_atomic(paddr);
969	return set;
970}
971
972
973/* this gets called when the md device is ready to unplug its underlying
974 * (slave) device queues -- before we let any writes go down, we need to
975 * sync the dirty pages of the bitmap file to disk */
976void bitmap_unplug(struct bitmap *bitmap)
977{
978	unsigned long i;
979	int dirty, need_write;
980
981	if (!bitmap || !bitmap->storage.filemap ||
982	    test_bit(BITMAP_STALE, &bitmap->flags))
983		return;
984
985	/* look at each page to see if there are any set bits that need to be
986	 * flushed out to disk */
987	for (i = 0; i < bitmap->storage.file_pages; i++) {
988		if (!bitmap->storage.filemap)
989			return;
990		dirty = test_and_clear_page_attr(bitmap, i, BITMAP_PAGE_DIRTY);
991		need_write = test_and_clear_page_attr(bitmap, i,
992						      BITMAP_PAGE_NEEDWRITE);
993		if (dirty || need_write) {
994			clear_page_attr(bitmap, i, BITMAP_PAGE_PENDING);
995			write_page(bitmap, bitmap->storage.filemap[i], 0);
996		}
997	}
998	if (bitmap->storage.file)
999		wait_event(bitmap->write_wait,
1000			   atomic_read(&bitmap->pending_writes)==0);
1001	else
1002		md_super_wait(bitmap->mddev);
1003
1004	if (test_bit(BITMAP_WRITE_ERROR, &bitmap->flags))
1005		bitmap_file_kick(bitmap);
1006}
1007EXPORT_SYMBOL(bitmap_unplug);
1008
1009static void bitmap_set_memory_bits(struct bitmap *bitmap, sector_t offset, int needed);
1010/* * bitmap_init_from_disk -- called at bitmap_create time to initialize
1011 * the in-memory bitmap from the on-disk bitmap -- also, sets up the
1012 * memory mapping of the bitmap file
1013 * Special cases:
1014 *   if there's no bitmap file, or if the bitmap file had been
1015 *   previously kicked from the array, we mark all the bits as
1016 *   1's in order to cause a full resync.
1017 *
1018 * We ignore all bits for sectors that end earlier than 'start'.
1019 * This is used when reading an out-of-date bitmap...
1020 */
1021static int bitmap_init_from_disk(struct bitmap *bitmap, sector_t start)
1022{
1023	unsigned long i, chunks, index, oldindex, bit, node_offset = 0;
1024	struct page *page = NULL;
1025	unsigned long bit_cnt = 0;
1026	struct file *file;
1027	unsigned long offset;
1028	int outofdate;
1029	int ret = -ENOSPC;
1030	void *paddr;
1031	struct bitmap_storage *store = &bitmap->storage;
1032
1033	chunks = bitmap->counts.chunks;
1034	file = store->file;
1035
1036	if (!file && !bitmap->mddev->bitmap_info.offset) {
1037		/* No permanent bitmap - fill with '1s'. */
1038		store->filemap = NULL;
1039		store->file_pages = 0;
1040		for (i = 0; i < chunks ; i++) {
1041			/* if the disk bit is set, set the memory bit */
1042			int needed = ((sector_t)(i+1) << (bitmap->counts.chunkshift)
1043				      >= start);
1044			bitmap_set_memory_bits(bitmap,
1045					       (sector_t)i << bitmap->counts.chunkshift,
1046					       needed);
1047		}
1048		return 0;
1049	}
1050
1051	outofdate = test_bit(BITMAP_STALE, &bitmap->flags);
1052	if (outofdate)
1053		printk(KERN_INFO "%s: bitmap file is out of date, doing full "
1054			"recovery\n", bmname(bitmap));
1055
1056	if (file && i_size_read(file->f_mapping->host) < store->bytes) {
1057		printk(KERN_INFO "%s: bitmap file too short %lu < %lu\n",
1058		       bmname(bitmap),
1059		       (unsigned long) i_size_read(file->f_mapping->host),
1060		       store->bytes);
1061		goto err;
1062	}
1063
1064	oldindex = ~0L;
1065	offset = 0;
1066	if (!bitmap->mddev->bitmap_info.external)
1067		offset = sizeof(bitmap_super_t);
1068
1069	if (mddev_is_clustered(bitmap->mddev))
1070		node_offset = bitmap->cluster_slot * (DIV_ROUND_UP(store->bytes, PAGE_SIZE));
1071
1072	for (i = 0; i < chunks; i++) {
1073		int b;
1074		index = file_page_index(&bitmap->storage, i);
1075		bit = file_page_offset(&bitmap->storage, i);
1076		if (index != oldindex) { /* this is a new page, read it in */
1077			int count;
1078			/* unmap the old page, we're done with it */
1079			if (index == store->file_pages-1)
1080				count = store->bytes - index * PAGE_SIZE;
1081			else
1082				count = PAGE_SIZE;
1083			page = store->filemap[index];
1084			if (file)
1085				ret = read_page(file, index, bitmap,
1086						count, page);
1087			else
1088				ret = read_sb_page(
1089					bitmap->mddev,
1090					bitmap->mddev->bitmap_info.offset,
1091					page,
1092					index + node_offset, count);
1093
1094			if (ret)
1095				goto err;
1096
1097			oldindex = index;
1098
1099			if (outofdate) {
1100				/*
1101				 * if bitmap is out of date, dirty the
1102				 * whole page and write it out
1103				 */
1104				paddr = kmap_atomic(page);
1105				memset(paddr + offset, 0xff,
1106				       PAGE_SIZE - offset);
1107				kunmap_atomic(paddr);
1108				write_page(bitmap, page, 1);
1109
1110				ret = -EIO;
1111				if (test_bit(BITMAP_WRITE_ERROR,
1112					     &bitmap->flags))
1113					goto err;
1114			}
1115		}
1116		paddr = kmap_atomic(page);
1117		if (test_bit(BITMAP_HOSTENDIAN, &bitmap->flags))
1118			b = test_bit(bit, paddr);
1119		else
1120			b = test_bit_le(bit, paddr);
1121		kunmap_atomic(paddr);
1122		if (b) {
1123			/* if the disk bit is set, set the memory bit */
1124			int needed = ((sector_t)(i+1) << bitmap->counts.chunkshift
1125				      >= start);
1126			bitmap_set_memory_bits(bitmap,
1127					       (sector_t)i << bitmap->counts.chunkshift,
1128					       needed);
1129			bit_cnt++;
1130		}
1131		offset = 0;
1132	}
1133
1134	printk(KERN_INFO "%s: bitmap initialized from disk: "
1135	       "read %lu pages, set %lu of %lu bits\n",
1136	       bmname(bitmap), store->file_pages,
1137	       bit_cnt, chunks);
1138
1139	return 0;
1140
1141 err:
1142	printk(KERN_INFO "%s: bitmap initialisation failed: %d\n",
1143	       bmname(bitmap), ret);
1144	return ret;
1145}
1146
1147void bitmap_write_all(struct bitmap *bitmap)
1148{
1149	/* We don't actually write all bitmap blocks here,
1150	 * just flag them as needing to be written
1151	 */
1152	int i;
1153
1154	if (!bitmap || !bitmap->storage.filemap)
1155		return;
1156	if (bitmap->storage.file)
1157		/* Only one copy, so nothing needed */
1158		return;
1159
1160	for (i = 0; i < bitmap->storage.file_pages; i++)
1161		set_page_attr(bitmap, i,
1162			      BITMAP_PAGE_NEEDWRITE);
1163	bitmap->allclean = 0;
1164}
1165
1166static void bitmap_count_page(struct bitmap_counts *bitmap,
1167			      sector_t offset, int inc)
1168{
1169	sector_t chunk = offset >> bitmap->chunkshift;
1170	unsigned long page = chunk >> PAGE_COUNTER_SHIFT;
1171	bitmap->bp[page].count += inc;
1172	bitmap_checkfree(bitmap, page);
1173}
1174
1175static void bitmap_set_pending(struct bitmap_counts *bitmap, sector_t offset)
1176{
1177	sector_t chunk = offset >> bitmap->chunkshift;
1178	unsigned long page = chunk >> PAGE_COUNTER_SHIFT;
1179	struct bitmap_page *bp = &bitmap->bp[page];
1180
1181	if (!bp->pending)
1182		bp->pending = 1;
1183}
1184
1185static bitmap_counter_t *bitmap_get_counter(struct bitmap_counts *bitmap,
1186					    sector_t offset, sector_t *blocks,
1187					    int create);
1188
1189/*
1190 * bitmap daemon -- periodically wakes up to clean bits and flush pages
1191 *			out to disk
1192 */
1193
1194void bitmap_daemon_work(struct mddev *mddev)
1195{
1196	struct bitmap *bitmap;
1197	unsigned long j;
1198	unsigned long nextpage;
1199	sector_t blocks;
1200	struct bitmap_counts *counts;
1201
1202	/* Use a mutex to guard daemon_work against
1203	 * bitmap_destroy.
1204	 */
1205	mutex_lock(&mddev->bitmap_info.mutex);
1206	bitmap = mddev->bitmap;
1207	if (bitmap == NULL) {
1208		mutex_unlock(&mddev->bitmap_info.mutex);
1209		return;
1210	}
1211	if (time_before(jiffies, bitmap->daemon_lastrun
1212			+ mddev->bitmap_info.daemon_sleep))
1213		goto done;
1214
1215	bitmap->daemon_lastrun = jiffies;
1216	if (bitmap->allclean) {
1217		mddev->thread->timeout = MAX_SCHEDULE_TIMEOUT;
1218		goto done;
1219	}
1220	bitmap->allclean = 1;
1221
1222	/* Any file-page which is PENDING now needs to be written.
1223	 * So set NEEDWRITE now, then after we make any last-minute changes
1224	 * we will write it.
1225	 */
1226	for (j = 0; j < bitmap->storage.file_pages; j++)
1227		if (test_and_clear_page_attr(bitmap, j,
1228					     BITMAP_PAGE_PENDING))
1229			set_page_attr(bitmap, j,
1230				      BITMAP_PAGE_NEEDWRITE);
1231
1232	if (bitmap->need_sync &&
1233	    mddev->bitmap_info.external == 0) {
1234		/* Arrange for superblock update as well as
1235		 * other changes */
1236		bitmap_super_t *sb;
1237		bitmap->need_sync = 0;
1238		if (bitmap->storage.filemap) {
1239			sb = kmap_atomic(bitmap->storage.sb_page);
1240			sb->events_cleared =
1241				cpu_to_le64(bitmap->events_cleared);
1242			kunmap_atomic(sb);
1243			set_page_attr(bitmap, 0,
1244				      BITMAP_PAGE_NEEDWRITE);
1245		}
1246	}
1247	/* Now look at the bitmap counters and if any are '2' or '1',
1248	 * decrement and handle accordingly.
1249	 */
1250	counts = &bitmap->counts;
1251	spin_lock_irq(&counts->lock);
1252	nextpage = 0;
1253	for (j = 0; j < counts->chunks; j++) {
1254		bitmap_counter_t *bmc;
1255		sector_t  block = (sector_t)j << counts->chunkshift;
1256
1257		if (j == nextpage) {
1258			nextpage += PAGE_COUNTER_RATIO;
1259			if (!counts->bp[j >> PAGE_COUNTER_SHIFT].pending) {
1260				j |= PAGE_COUNTER_MASK;
1261				continue;
1262			}
1263			counts->bp[j >> PAGE_COUNTER_SHIFT].pending = 0;
1264		}
1265		bmc = bitmap_get_counter(counts,
1266					 block,
1267					 &blocks, 0);
1268
1269		if (!bmc) {
1270			j |= PAGE_COUNTER_MASK;
1271			continue;
1272		}
1273		if (*bmc == 1 && !bitmap->need_sync) {
1274			/* We can clear the bit */
1275			*bmc = 0;
1276			bitmap_count_page(counts, block, -1);
1277			bitmap_file_clear_bit(bitmap, block);
1278		} else if (*bmc && *bmc <= 2) {
1279			*bmc = 1;
1280			bitmap_set_pending(counts, block);
1281			bitmap->allclean = 0;
1282		}
1283	}
1284	spin_unlock_irq(&counts->lock);
1285
1286	/* Now start writeout on any page in NEEDWRITE that isn't DIRTY.
1287	 * DIRTY pages need to be written by bitmap_unplug so it can wait
1288	 * for them.
1289	 * If we find any DIRTY page we stop there and let bitmap_unplug
1290	 * handle all the rest.  This is important in the case where
1291	 * the first blocking holds the superblock and it has been updated.
1292	 * We mustn't write any other blocks before the superblock.
1293	 */
1294	for (j = 0;
1295	     j < bitmap->storage.file_pages
1296		     && !test_bit(BITMAP_STALE, &bitmap->flags);
1297	     j++) {
1298		if (test_page_attr(bitmap, j,
1299				   BITMAP_PAGE_DIRTY))
1300			/* bitmap_unplug will handle the rest */
1301			break;
1302		if (test_and_clear_page_attr(bitmap, j,
1303					     BITMAP_PAGE_NEEDWRITE)) {
1304			write_page(bitmap, bitmap->storage.filemap[j], 0);
1305		}
1306	}
1307
1308 done:
1309	if (bitmap->allclean == 0)
1310		mddev->thread->timeout =
1311			mddev->bitmap_info.daemon_sleep;
1312	mutex_unlock(&mddev->bitmap_info.mutex);
1313}
1314
1315static bitmap_counter_t *bitmap_get_counter(struct bitmap_counts *bitmap,
1316					    sector_t offset, sector_t *blocks,
1317					    int create)
1318__releases(bitmap->lock)
1319__acquires(bitmap->lock)
1320{
1321	/* If 'create', we might release the lock and reclaim it.
1322	 * The lock must have been taken with interrupts enabled.
1323	 * If !create, we don't release the lock.
1324	 */
1325	sector_t chunk = offset >> bitmap->chunkshift;
1326	unsigned long page = chunk >> PAGE_COUNTER_SHIFT;
1327	unsigned long pageoff = (chunk & PAGE_COUNTER_MASK) << COUNTER_BYTE_SHIFT;
1328	sector_t csize;
1329	int err;
1330
1331	err = bitmap_checkpage(bitmap, page, create);
1332
1333	if (bitmap->bp[page].hijacked ||
1334	    bitmap->bp[page].map == NULL)
1335		csize = ((sector_t)1) << (bitmap->chunkshift +
1336					  PAGE_COUNTER_SHIFT - 1);
1337	else
1338		csize = ((sector_t)1) << bitmap->chunkshift;
1339	*blocks = csize - (offset & (csize - 1));
1340
1341	if (err < 0)
1342		return NULL;
1343
1344	/* now locked ... */
1345
1346	if (bitmap->bp[page].hijacked) { /* hijacked pointer */
1347		/* should we use the first or second counter field
1348		 * of the hijacked pointer? */
1349		int hi = (pageoff > PAGE_COUNTER_MASK);
1350		return  &((bitmap_counter_t *)
1351			  &bitmap->bp[page].map)[hi];
1352	} else /* page is allocated */
1353		return (bitmap_counter_t *)
1354			&(bitmap->bp[page].map[pageoff]);
1355}
1356
1357int bitmap_startwrite(struct bitmap *bitmap, sector_t offset, unsigned long sectors, int behind)
1358{
1359	if (!bitmap)
1360		return 0;
1361
1362	if (behind) {
1363		int bw;
1364		atomic_inc(&bitmap->behind_writes);
1365		bw = atomic_read(&bitmap->behind_writes);
1366		if (bw > bitmap->behind_writes_used)
1367			bitmap->behind_writes_used = bw;
1368
1369		pr_debug("inc write-behind count %d/%lu\n",
1370			 bw, bitmap->mddev->bitmap_info.max_write_behind);
1371	}
1372
1373	while (sectors) {
1374		sector_t blocks;
1375		bitmap_counter_t *bmc;
1376
1377		spin_lock_irq(&bitmap->counts.lock);
1378		bmc = bitmap_get_counter(&bitmap->counts, offset, &blocks, 1);
1379		if (!bmc) {
1380			spin_unlock_irq(&bitmap->counts.lock);
1381			return 0;
1382		}
1383
1384		if (unlikely(COUNTER(*bmc) == COUNTER_MAX)) {
1385			DEFINE_WAIT(__wait);
1386			/* note that it is safe to do the prepare_to_wait
1387			 * after the test as long as we do it before dropping
1388			 * the spinlock.
1389			 */
1390			prepare_to_wait(&bitmap->overflow_wait, &__wait,
1391					TASK_UNINTERRUPTIBLE);
1392			spin_unlock_irq(&bitmap->counts.lock);
1393			schedule();
1394			finish_wait(&bitmap->overflow_wait, &__wait);
1395			continue;
1396		}
1397
1398		switch (*bmc) {
1399		case 0:
1400			bitmap_file_set_bit(bitmap, offset);
1401			bitmap_count_page(&bitmap->counts, offset, 1);
1402			/* fall through */
1403		case 1:
1404			*bmc = 2;
1405		}
1406
1407		(*bmc)++;
1408
1409		spin_unlock_irq(&bitmap->counts.lock);
1410
1411		offset += blocks;
1412		if (sectors > blocks)
1413			sectors -= blocks;
1414		else
1415			sectors = 0;
1416	}
1417	return 0;
1418}
1419EXPORT_SYMBOL(bitmap_startwrite);
1420
1421void bitmap_endwrite(struct bitmap *bitmap, sector_t offset, unsigned long sectors,
1422		     int success, int behind)
1423{
1424	if (!bitmap)
1425		return;
1426	if (behind) {
1427		if (atomic_dec_and_test(&bitmap->behind_writes))
1428			wake_up(&bitmap->behind_wait);
1429		pr_debug("dec write-behind count %d/%lu\n",
1430			 atomic_read(&bitmap->behind_writes),
1431			 bitmap->mddev->bitmap_info.max_write_behind);
1432	}
1433
1434	while (sectors) {
1435		sector_t blocks;
1436		unsigned long flags;
1437		bitmap_counter_t *bmc;
1438
1439		spin_lock_irqsave(&bitmap->counts.lock, flags);
1440		bmc = bitmap_get_counter(&bitmap->counts, offset, &blocks, 0);
1441		if (!bmc) {
1442			spin_unlock_irqrestore(&bitmap->counts.lock, flags);
1443			return;
1444		}
1445
1446		if (success && !bitmap->mddev->degraded &&
1447		    bitmap->events_cleared < bitmap->mddev->events) {
1448			bitmap->events_cleared = bitmap->mddev->events;
1449			bitmap->need_sync = 1;
1450			sysfs_notify_dirent_safe(bitmap->sysfs_can_clear);
1451		}
1452
1453		if (!success && !NEEDED(*bmc))
1454			*bmc |= NEEDED_MASK;
1455
1456		if (COUNTER(*bmc) == COUNTER_MAX)
1457			wake_up(&bitmap->overflow_wait);
1458
1459		(*bmc)--;
1460		if (*bmc <= 2) {
1461			bitmap_set_pending(&bitmap->counts, offset);
1462			bitmap->allclean = 0;
1463		}
1464		spin_unlock_irqrestore(&bitmap->counts.lock, flags);
1465		offset += blocks;
1466		if (sectors > blocks)
1467			sectors -= blocks;
1468		else
1469			sectors = 0;
1470	}
1471}
1472EXPORT_SYMBOL(bitmap_endwrite);
1473
1474static int __bitmap_start_sync(struct bitmap *bitmap, sector_t offset, sector_t *blocks,
1475			       int degraded)
1476{
1477	bitmap_counter_t *bmc;
1478	int rv;
1479	if (bitmap == NULL) {/* FIXME or bitmap set as 'failed' */
1480		*blocks = 1024;
1481		return 1; /* always resync if no bitmap */
1482	}
1483	spin_lock_irq(&bitmap->counts.lock);
1484	bmc = bitmap_get_counter(&bitmap->counts, offset, blocks, 0);
1485	rv = 0;
1486	if (bmc) {
1487		/* locked */
1488		if (RESYNC(*bmc))
1489			rv = 1;
1490		else if (NEEDED(*bmc)) {
1491			rv = 1;
1492			if (!degraded) { /* don't set/clear bits if degraded */
1493				*bmc |= RESYNC_MASK;
1494				*bmc &= ~NEEDED_MASK;
1495			}
1496		}
1497	}
1498	spin_unlock_irq(&bitmap->counts.lock);
1499	return rv;
1500}
1501
1502int bitmap_start_sync(struct bitmap *bitmap, sector_t offset, sector_t *blocks,
1503		      int degraded)
1504{
1505	/* bitmap_start_sync must always report on multiples of whole
1506	 * pages, otherwise resync (which is very PAGE_SIZE based) will
1507	 * get confused.
1508	 * So call __bitmap_start_sync repeatedly (if needed) until
1509	 * At least PAGE_SIZE>>9 blocks are covered.
1510	 * Return the 'or' of the result.
1511	 */
1512	int rv = 0;
1513	sector_t blocks1;
1514
1515	*blocks = 0;
1516	while (*blocks < (PAGE_SIZE>>9)) {
1517		rv |= __bitmap_start_sync(bitmap, offset,
1518					  &blocks1, degraded);
1519		offset += blocks1;
1520		*blocks += blocks1;
1521	}
1522	return rv;
1523}
1524EXPORT_SYMBOL(bitmap_start_sync);
1525
1526void bitmap_end_sync(struct bitmap *bitmap, sector_t offset, sector_t *blocks, int aborted)
1527{
1528	bitmap_counter_t *bmc;
1529	unsigned long flags;
1530
1531	if (bitmap == NULL) {
1532		*blocks = 1024;
1533		return;
1534	}
1535	spin_lock_irqsave(&bitmap->counts.lock, flags);
1536	bmc = bitmap_get_counter(&bitmap->counts, offset, blocks, 0);
1537	if (bmc == NULL)
1538		goto unlock;
1539	/* locked */
1540	if (RESYNC(*bmc)) {
1541		*bmc &= ~RESYNC_MASK;
1542
1543		if (!NEEDED(*bmc) && aborted)
1544			*bmc |= NEEDED_MASK;
1545		else {
1546			if (*bmc <= 2) {
1547				bitmap_set_pending(&bitmap->counts, offset);
1548				bitmap->allclean = 0;
1549			}
1550		}
1551	}
1552 unlock:
1553	spin_unlock_irqrestore(&bitmap->counts.lock, flags);
1554}
1555EXPORT_SYMBOL(bitmap_end_sync);
1556
1557void bitmap_close_sync(struct bitmap *bitmap)
1558{
1559	/* Sync has finished, and any bitmap chunks that weren't synced
1560	 * properly have been aborted.  It remains to us to clear the
1561	 * RESYNC bit wherever it is still on
1562	 */
1563	sector_t sector = 0;
1564	sector_t blocks;
1565	if (!bitmap)
1566		return;
1567	while (sector < bitmap->mddev->resync_max_sectors) {
1568		bitmap_end_sync(bitmap, sector, &blocks, 0);
1569		sector += blocks;
1570	}
1571}
1572EXPORT_SYMBOL(bitmap_close_sync);
1573
1574void bitmap_cond_end_sync(struct bitmap *bitmap, sector_t sector)
1575{
1576	sector_t s = 0;
1577	sector_t blocks;
1578
1579	if (!bitmap)
1580		return;
1581	if (sector == 0) {
1582		bitmap->last_end_sync = jiffies;
1583		return;
1584	}
1585	if (time_before(jiffies, (bitmap->last_end_sync
1586				  + bitmap->mddev->bitmap_info.daemon_sleep)))
1587		return;
1588	wait_event(bitmap->mddev->recovery_wait,
1589		   atomic_read(&bitmap->mddev->recovery_active) == 0);
1590
1591	bitmap->mddev->curr_resync_completed = sector;
1592	set_bit(MD_CHANGE_CLEAN, &bitmap->mddev->flags);
1593	sector &= ~((1ULL << bitmap->counts.chunkshift) - 1);
1594	s = 0;
1595	while (s < sector && s < bitmap->mddev->resync_max_sectors) {
1596		bitmap_end_sync(bitmap, s, &blocks, 0);
1597		s += blocks;
1598	}
1599	bitmap->last_end_sync = jiffies;
1600	sysfs_notify(&bitmap->mddev->kobj, NULL, "sync_completed");
1601}
1602EXPORT_SYMBOL(bitmap_cond_end_sync);
1603
1604static void bitmap_set_memory_bits(struct bitmap *bitmap, sector_t offset, int needed)
1605{
1606	/* For each chunk covered by any of these sectors, set the
1607	 * counter to 2 and possibly set resync_needed.  They should all
1608	 * be 0 at this point
1609	 */
1610
1611	sector_t secs;
1612	bitmap_counter_t *bmc;
1613	spin_lock_irq(&bitmap->counts.lock);
1614	bmc = bitmap_get_counter(&bitmap->counts, offset, &secs, 1);
1615	if (!bmc) {
1616		spin_unlock_irq(&bitmap->counts.lock);
1617		return;
1618	}
1619	if (!*bmc) {
1620		*bmc = 2;
1621		bitmap_count_page(&bitmap->counts, offset, 1);
1622		bitmap_set_pending(&bitmap->counts, offset);
1623		bitmap->allclean = 0;
1624	}
1625	if (needed)
1626		*bmc |= NEEDED_MASK;
1627	spin_unlock_irq(&bitmap->counts.lock);
1628}
1629
1630/* dirty the memory and file bits for bitmap chunks "s" to "e" */
1631void bitmap_dirty_bits(struct bitmap *bitmap, unsigned long s, unsigned long e)
1632{
1633	unsigned long chunk;
1634
1635	for (chunk = s; chunk <= e; chunk++) {
1636		sector_t sec = (sector_t)chunk << bitmap->counts.chunkshift;
1637		bitmap_set_memory_bits(bitmap, sec, 1);
1638		bitmap_file_set_bit(bitmap, sec);
1639		if (sec < bitmap->mddev->recovery_cp)
1640			/* We are asserting that the array is dirty,
1641			 * so move the recovery_cp address back so
1642			 * that it is obvious that it is dirty
1643			 */
1644			bitmap->mddev->recovery_cp = sec;
1645	}
1646}
1647
1648/*
1649 * flush out any pending updates
1650 */
1651void bitmap_flush(struct mddev *mddev)
1652{
1653	struct bitmap *bitmap = mddev->bitmap;
1654	long sleep;
1655
1656	if (!bitmap) /* there was no bitmap */
1657		return;
1658
1659	/* run the daemon_work three time to ensure everything is flushed
1660	 * that can be
1661	 */
1662	sleep = mddev->bitmap_info.daemon_sleep * 2;
1663	bitmap->daemon_lastrun -= sleep;
1664	bitmap_daemon_work(mddev);
1665	bitmap->daemon_lastrun -= sleep;
1666	bitmap_daemon_work(mddev);
1667	bitmap->daemon_lastrun -= sleep;
1668	bitmap_daemon_work(mddev);
1669	bitmap_update_sb(bitmap);
1670}
1671
1672/*
1673 * free memory that was allocated
1674 */
1675static void bitmap_free(struct bitmap *bitmap)
1676{
1677	unsigned long k, pages;
1678	struct bitmap_page *bp;
1679
1680	if (!bitmap) /* there was no bitmap */
1681		return;
1682
1683	if (mddev_is_clustered(bitmap->mddev) && bitmap->mddev->cluster_info &&
1684		bitmap->cluster_slot == md_cluster_ops->slot_number(bitmap->mddev))
1685		md_cluster_stop(bitmap->mddev);
1686
1687	/* Shouldn't be needed - but just in case.... */
1688	wait_event(bitmap->write_wait,
1689		   atomic_read(&bitmap->pending_writes) == 0);
1690
1691	/* release the bitmap file  */
1692	bitmap_file_unmap(&bitmap->storage);
1693
1694	bp = bitmap->counts.bp;
1695	pages = bitmap->counts.pages;
1696
1697	/* free all allocated memory */
1698
1699	if (bp) /* deallocate the page memory */
1700		for (k = 0; k < pages; k++)
1701			if (bp[k].map && !bp[k].hijacked)
1702				kfree(bp[k].map);
1703	kfree(bp);
1704	kfree(bitmap);
1705}
1706
1707void bitmap_destroy(struct mddev *mddev)
1708{
1709	struct bitmap *bitmap = mddev->bitmap;
1710
1711	if (!bitmap) /* there was no bitmap */
1712		return;
1713
1714	mutex_lock(&mddev->bitmap_info.mutex);
1715	spin_lock(&mddev->lock);
1716	mddev->bitmap = NULL; /* disconnect from the md device */
1717	spin_unlock(&mddev->lock);
1718	mutex_unlock(&mddev->bitmap_info.mutex);
1719	if (mddev->thread)
1720		mddev->thread->timeout = MAX_SCHEDULE_TIMEOUT;
1721
1722	if (bitmap->sysfs_can_clear)
1723		sysfs_put(bitmap->sysfs_can_clear);
1724
1725	bitmap_free(bitmap);
1726}
1727
1728/*
1729 * initialize the bitmap structure
1730 * if this returns an error, bitmap_destroy must be called to do clean up
1731 */
1732struct bitmap *bitmap_create(struct mddev *mddev, int slot)
1733{
1734	struct bitmap *bitmap;
1735	sector_t blocks = mddev->resync_max_sectors;
1736	struct file *file = mddev->bitmap_info.file;
1737	int err;
1738	struct kernfs_node *bm = NULL;
1739
1740	BUILD_BUG_ON(sizeof(bitmap_super_t) != 256);
1741
1742	BUG_ON(file && mddev->bitmap_info.offset);
1743
1744	bitmap = kzalloc(sizeof(*bitmap), GFP_KERNEL);
1745	if (!bitmap)
1746		return ERR_PTR(-ENOMEM);
1747
1748	spin_lock_init(&bitmap->counts.lock);
1749	atomic_set(&bitmap->pending_writes, 0);
1750	init_waitqueue_head(&bitmap->write_wait);
1751	init_waitqueue_head(&bitmap->overflow_wait);
1752	init_waitqueue_head(&bitmap->behind_wait);
1753
1754	bitmap->mddev = mddev;
1755	bitmap->cluster_slot = slot;
1756
1757	if (mddev->kobj.sd)
1758		bm = sysfs_get_dirent(mddev->kobj.sd, "bitmap");
1759	if (bm) {
1760		bitmap->sysfs_can_clear = sysfs_get_dirent(bm, "can_clear");
1761		sysfs_put(bm);
1762	} else
1763		bitmap->sysfs_can_clear = NULL;
1764
1765	bitmap->storage.file = file;
1766	if (file) {
1767		get_file(file);
1768		/* As future accesses to this file will use bmap,
1769		 * and bypass the page cache, we must sync the file
1770		 * first.
1771		 */
1772		vfs_fsync(file, 1);
1773	}
1774	/* read superblock from bitmap file (this sets mddev->bitmap_info.chunksize) */
1775	if (!mddev->bitmap_info.external) {
1776		/*
1777		 * If 'MD_ARRAY_FIRST_USE' is set, then device-mapper is
1778		 * instructing us to create a new on-disk bitmap instance.
1779		 */
1780		if (test_and_clear_bit(MD_ARRAY_FIRST_USE, &mddev->flags))
1781			err = bitmap_new_disk_sb(bitmap);
1782		else
1783			err = bitmap_read_sb(bitmap);
1784	} else {
1785		err = 0;
1786		if (mddev->bitmap_info.chunksize == 0 ||
1787		    mddev->bitmap_info.daemon_sleep == 0)
1788			/* chunksize and time_base need to be
1789			 * set first. */
1790			err = -EINVAL;
1791	}
1792	if (err)
1793		goto error;
1794
1795	bitmap->daemon_lastrun = jiffies;
1796	err = bitmap_resize(bitmap, blocks, mddev->bitmap_info.chunksize, 1);
1797	if (err)
1798		goto error;
1799
1800	printk(KERN_INFO "created bitmap (%lu pages) for device %s\n",
1801	       bitmap->counts.pages, bmname(bitmap));
1802
1803	err = test_bit(BITMAP_WRITE_ERROR, &bitmap->flags) ? -EIO : 0;
1804	if (err)
1805		goto error;
1806
1807	return bitmap;
1808 error:
1809	bitmap_free(bitmap);
1810	return ERR_PTR(err);
1811}
1812
1813int bitmap_load(struct mddev *mddev)
1814{
1815	int err = 0;
1816	sector_t start = 0;
1817	sector_t sector = 0;
1818	struct bitmap *bitmap = mddev->bitmap;
1819
1820	if (!bitmap)
1821		goto out;
1822
1823	/* Clear out old bitmap info first:  Either there is none, or we
1824	 * are resuming after someone else has possibly changed things,
1825	 * so we should forget old cached info.
1826	 * All chunks should be clean, but some might need_sync.
1827	 */
1828	while (sector < mddev->resync_max_sectors) {
1829		sector_t blocks;
1830		bitmap_start_sync(bitmap, sector, &blocks, 0);
1831		sector += blocks;
1832	}
1833	bitmap_close_sync(bitmap);
1834
1835	if (mddev->degraded == 0
1836	    || bitmap->events_cleared == mddev->events)
1837		/* no need to keep dirty bits to optimise a
1838		 * re-add of a missing device */
1839		start = mddev->recovery_cp;
1840
1841	mutex_lock(&mddev->bitmap_info.mutex);
1842	err = bitmap_init_from_disk(bitmap, start);
1843	mutex_unlock(&mddev->bitmap_info.mutex);
1844
1845	if (err)
1846		goto out;
1847	clear_bit(BITMAP_STALE, &bitmap->flags);
1848
1849	/* Kick recovery in case any bits were set */
1850	set_bit(MD_RECOVERY_NEEDED, &bitmap->mddev->recovery);
1851
1852	mddev->thread->timeout = mddev->bitmap_info.daemon_sleep;
1853	md_wakeup_thread(mddev->thread);
1854
1855	bitmap_update_sb(bitmap);
1856
1857	if (test_bit(BITMAP_WRITE_ERROR, &bitmap->flags))
1858		err = -EIO;
1859out:
1860	return err;
1861}
1862EXPORT_SYMBOL_GPL(bitmap_load);
1863
1864/* Loads the bitmap associated with slot and copies the resync information
1865 * to our bitmap
1866 */
1867int bitmap_copy_from_slot(struct mddev *mddev, int slot,
1868		sector_t *low, sector_t *high, bool clear_bits)
1869{
1870	int rv = 0, i, j;
1871	sector_t block, lo = 0, hi = 0;
1872	struct bitmap_counts *counts;
1873	struct bitmap *bitmap = bitmap_create(mddev, slot);
1874
1875	if (IS_ERR(bitmap))
1876		return PTR_ERR(bitmap);
1877
1878	rv = bitmap_read_sb(bitmap);
1879	if (rv)
1880		goto err;
1881
1882	rv = bitmap_init_from_disk(bitmap, 0);
1883	if (rv)
1884		goto err;
1885
1886	counts = &bitmap->counts;
1887	for (j = 0; j < counts->chunks; j++) {
1888		block = (sector_t)j << counts->chunkshift;
1889		if (bitmap_file_test_bit(bitmap, block)) {
1890			if (!lo)
1891				lo = block;
1892			hi = block;
1893			bitmap_file_clear_bit(bitmap, block);
1894			bitmap_set_memory_bits(mddev->bitmap, block, 1);
1895			bitmap_file_set_bit(mddev->bitmap, block);
1896		}
1897	}
1898
1899	if (clear_bits) {
1900		bitmap_update_sb(bitmap);
1901		/* Setting this for the ev_page should be enough.
1902		 * And we do not require both write_all and PAGE_DIRT either
1903		 */
1904		for (i = 0; i < bitmap->storage.file_pages; i++)
1905			set_page_attr(bitmap, i, BITMAP_PAGE_DIRTY);
1906		bitmap_write_all(bitmap);
1907		bitmap_unplug(bitmap);
1908	}
1909	*low = lo;
1910	*high = hi;
1911err:
1912	bitmap_free(bitmap);
1913	return rv;
1914}
1915EXPORT_SYMBOL_GPL(bitmap_copy_from_slot);
1916
1917
1918void bitmap_status(struct seq_file *seq, struct bitmap *bitmap)
1919{
1920	unsigned long chunk_kb;
1921	struct bitmap_counts *counts;
1922
1923	if (!bitmap)
1924		return;
1925
1926	counts = &bitmap->counts;
1927
1928	chunk_kb = bitmap->mddev->bitmap_info.chunksize >> 10;
1929	seq_printf(seq, "bitmap: %lu/%lu pages [%luKB], "
1930		   "%lu%s chunk",
1931		   counts->pages - counts->missing_pages,
1932		   counts->pages,
1933		   (counts->pages - counts->missing_pages)
1934		   << (PAGE_SHIFT - 10),
1935		   chunk_kb ? chunk_kb : bitmap->mddev->bitmap_info.chunksize,
1936		   chunk_kb ? "KB" : "B");
1937	if (bitmap->storage.file) {
1938		seq_printf(seq, ", file: ");
1939		seq_path(seq, &bitmap->storage.file->f_path, " \t\n");
1940	}
1941
1942	seq_printf(seq, "\n");
1943}
1944
1945int bitmap_resize(struct bitmap *bitmap, sector_t blocks,
1946		  int chunksize, int init)
1947{
1948	/* If chunk_size is 0, choose an appropriate chunk size.
1949	 * Then possibly allocate new storage space.
1950	 * Then quiesce, copy bits, replace bitmap, and re-start
1951	 *
1952	 * This function is called both to set up the initial bitmap
1953	 * and to resize the bitmap while the array is active.
1954	 * If this happens as a result of the array being resized,
1955	 * chunksize will be zero, and we need to choose a suitable
1956	 * chunksize, otherwise we use what we are given.
1957	 */
1958	struct bitmap_storage store;
1959	struct bitmap_counts old_counts;
1960	unsigned long chunks;
1961	sector_t block;
1962	sector_t old_blocks, new_blocks;
1963	int chunkshift;
1964	int ret = 0;
1965	long pages;
1966	struct bitmap_page *new_bp;
1967
1968	if (chunksize == 0) {
1969		/* If there is enough space, leave the chunk size unchanged,
1970		 * else increase by factor of two until there is enough space.
1971		 */
1972		long bytes;
1973		long space = bitmap->mddev->bitmap_info.space;
1974
1975		if (space == 0) {
1976			/* We don't know how much space there is, so limit
1977			 * to current size - in sectors.
1978			 */
1979			bytes = DIV_ROUND_UP(bitmap->counts.chunks, 8);
1980			if (!bitmap->mddev->bitmap_info.external)
1981				bytes += sizeof(bitmap_super_t);
1982			space = DIV_ROUND_UP(bytes, 512);
1983			bitmap->mddev->bitmap_info.space = space;
1984		}
1985		chunkshift = bitmap->counts.chunkshift;
1986		chunkshift--;
1987		do {
1988			/* 'chunkshift' is shift from block size to chunk size */
1989			chunkshift++;
1990			chunks = DIV_ROUND_UP_SECTOR_T(blocks, 1 << chunkshift);
1991			bytes = DIV_ROUND_UP(chunks, 8);
1992			if (!bitmap->mddev->bitmap_info.external)
1993				bytes += sizeof(bitmap_super_t);
1994		} while (bytes > (space << 9));
1995	} else
1996		chunkshift = ffz(~chunksize) - BITMAP_BLOCK_SHIFT;
1997
1998	chunks = DIV_ROUND_UP_SECTOR_T(blocks, 1 << chunkshift);
1999	memset(&store, 0, sizeof(store));
2000	if (bitmap->mddev->bitmap_info.offset || bitmap->mddev->bitmap_info.file)
2001		ret = bitmap_storage_alloc(&store, chunks,
2002					   !bitmap->mddev->bitmap_info.external,
2003					   mddev_is_clustered(bitmap->mddev)
2004					   ? bitmap->cluster_slot : 0);
2005	if (ret)
2006		goto err;
2007
2008	pages = DIV_ROUND_UP(chunks, PAGE_COUNTER_RATIO);
2009
2010	new_bp = kzalloc(pages * sizeof(*new_bp), GFP_KERNEL);
2011	ret = -ENOMEM;
2012	if (!new_bp) {
2013		bitmap_file_unmap(&store);
2014		goto err;
2015	}
2016
2017	if (!init)
2018		bitmap->mddev->pers->quiesce(bitmap->mddev, 1);
2019
2020	store.file = bitmap->storage.file;
2021	bitmap->storage.file = NULL;
2022
2023	if (store.sb_page && bitmap->storage.sb_page)
2024		memcpy(page_address(store.sb_page),
2025		       page_address(bitmap->storage.sb_page),
2026		       sizeof(bitmap_super_t));
2027	bitmap_file_unmap(&bitmap->storage);
2028	bitmap->storage = store;
2029
2030	old_counts = bitmap->counts;
2031	bitmap->counts.bp = new_bp;
2032	bitmap->counts.pages = pages;
2033	bitmap->counts.missing_pages = pages;
2034	bitmap->counts.chunkshift = chunkshift;
2035	bitmap->counts.chunks = chunks;
2036	bitmap->mddev->bitmap_info.chunksize = 1 << (chunkshift +
2037						     BITMAP_BLOCK_SHIFT);
2038
2039	blocks = min(old_counts.chunks << old_counts.chunkshift,
2040		     chunks << chunkshift);
2041
2042	spin_lock_irq(&bitmap->counts.lock);
2043	for (block = 0; block < blocks; ) {
2044		bitmap_counter_t *bmc_old, *bmc_new;
2045		int set;
2046
2047		bmc_old = bitmap_get_counter(&old_counts, block,
2048					     &old_blocks, 0);
2049		set = bmc_old && NEEDED(*bmc_old);
2050
2051		if (set) {
2052			bmc_new = bitmap_get_counter(&bitmap->counts, block,
2053						     &new_blocks, 1);
2054			if (*bmc_new == 0) {
2055				/* need to set on-disk bits too. */
2056				sector_t end = block + new_blocks;
2057				sector_t start = block >> chunkshift;
2058				start <<= chunkshift;
2059				while (start < end) {
2060					bitmap_file_set_bit(bitmap, block);
2061					start += 1 << chunkshift;
2062				}
2063				*bmc_new = 2;
2064				bitmap_count_page(&bitmap->counts,
2065						  block, 1);
2066				bitmap_set_pending(&bitmap->counts,
2067						   block);
2068			}
2069			*bmc_new |= NEEDED_MASK;
2070			if (new_blocks < old_blocks)
2071				old_blocks = new_blocks;
2072		}
2073		block += old_blocks;
2074	}
2075
2076	if (!init) {
2077		int i;
2078		while (block < (chunks << chunkshift)) {
2079			bitmap_counter_t *bmc;
2080			bmc = bitmap_get_counter(&bitmap->counts, block,
2081						 &new_blocks, 1);
2082			if (bmc) {
2083				/* new space.  It needs to be resynced, so
2084				 * we set NEEDED_MASK.
2085				 */
2086				if (*bmc == 0) {
2087					*bmc = NEEDED_MASK | 2;
2088					bitmap_count_page(&bitmap->counts,
2089							  block, 1);
2090					bitmap_set_pending(&bitmap->counts,
2091							   block);
2092				}
2093			}
2094			block += new_blocks;
2095		}
2096		for (i = 0; i < bitmap->storage.file_pages; i++)
2097			set_page_attr(bitmap, i, BITMAP_PAGE_DIRTY);
2098	}
2099	spin_unlock_irq(&bitmap->counts.lock);
2100
2101	if (!init) {
2102		bitmap_unplug(bitmap);
2103		bitmap->mddev->pers->quiesce(bitmap->mddev, 0);
2104	}
2105	ret = 0;
2106err:
2107	return ret;
2108}
2109EXPORT_SYMBOL_GPL(bitmap_resize);
2110
2111static ssize_t
2112location_show(struct mddev *mddev, char *page)
2113{
2114	ssize_t len;
2115	if (mddev->bitmap_info.file)
2116		len = sprintf(page, "file");
2117	else if (mddev->bitmap_info.offset)
2118		len = sprintf(page, "%+lld", (long long)mddev->bitmap_info.offset);
2119	else
2120		len = sprintf(page, "none");
2121	len += sprintf(page+len, "\n");
2122	return len;
2123}
2124
2125static ssize_t
2126location_store(struct mddev *mddev, const char *buf, size_t len)
2127{
2128
2129	if (mddev->pers) {
2130		if (!mddev->pers->quiesce)
2131			return -EBUSY;
2132		if (mddev->recovery || mddev->sync_thread)
2133			return -EBUSY;
2134	}
2135
2136	if (mddev->bitmap || mddev->bitmap_info.file ||
2137	    mddev->bitmap_info.offset) {
2138		/* bitmap already configured.  Only option is to clear it */
2139		if (strncmp(buf, "none", 4) != 0)
2140			return -EBUSY;
2141		if (mddev->pers) {
2142			mddev->pers->quiesce(mddev, 1);
2143			bitmap_destroy(mddev);
2144			mddev->pers->quiesce(mddev, 0);
2145		}
2146		mddev->bitmap_info.offset = 0;
2147		if (mddev->bitmap_info.file) {
2148			struct file *f = mddev->bitmap_info.file;
2149			mddev->bitmap_info.file = NULL;
2150			fput(f);
2151		}
2152	} else {
2153		/* No bitmap, OK to set a location */
2154		long long offset;
2155		if (strncmp(buf, "none", 4) == 0)
2156			/* nothing to be done */;
2157		else if (strncmp(buf, "file:", 5) == 0) {
2158			/* Not supported yet */
2159			return -EINVAL;
2160		} else {
2161			int rv;
2162			if (buf[0] == '+')
2163				rv = kstrtoll(buf+1, 10, &offset);
2164			else
2165				rv = kstrtoll(buf, 10, &offset);
2166			if (rv)
2167				return rv;
2168			if (offset == 0)
2169				return -EINVAL;
2170			if (mddev->bitmap_info.external == 0 &&
2171			    mddev->major_version == 0 &&
2172			    offset != mddev->bitmap_info.default_offset)
2173				return -EINVAL;
2174			mddev->bitmap_info.offset = offset;
2175			if (mddev->pers) {
2176				struct bitmap *bitmap;
2177				mddev->pers->quiesce(mddev, 1);
2178				bitmap = bitmap_create(mddev, -1);
2179				if (IS_ERR(bitmap))
2180					rv = PTR_ERR(bitmap);
2181				else {
2182					mddev->bitmap = bitmap;
2183					rv = bitmap_load(mddev);
2184					if (rv) {
2185						bitmap_destroy(mddev);
2186						mddev->bitmap_info.offset = 0;
2187					}
2188				}
2189				mddev->pers->quiesce(mddev, 0);
2190				if (rv)
2191					return rv;
2192			}
2193		}
2194	}
2195	if (!mddev->external) {
2196		/* Ensure new bitmap info is stored in
2197		 * metadata promptly.
2198		 */
2199		set_bit(MD_CHANGE_DEVS, &mddev->flags);
2200		md_wakeup_thread(mddev->thread);
2201	}
2202	return len;
2203}
2204
2205static struct md_sysfs_entry bitmap_location =
2206__ATTR(location, S_IRUGO|S_IWUSR, location_show, location_store);
2207
2208/* 'bitmap/space' is the space available at 'location' for the
2209 * bitmap.  This allows the kernel to know when it is safe to
2210 * resize the bitmap to match a resized array.
2211 */
2212static ssize_t
2213space_show(struct mddev *mddev, char *page)
2214{
2215	return sprintf(page, "%lu\n", mddev->bitmap_info.space);
2216}
2217
2218static ssize_t
2219space_store(struct mddev *mddev, const char *buf, size_t len)
2220{
2221	unsigned long sectors;
2222	int rv;
2223
2224	rv = kstrtoul(buf, 10, &sectors);
2225	if (rv)
2226		return rv;
2227
2228	if (sectors == 0)
2229		return -EINVAL;
2230
2231	if (mddev->bitmap &&
2232	    sectors < (mddev->bitmap->storage.bytes + 511) >> 9)
2233		return -EFBIG; /* Bitmap is too big for this small space */
2234
2235	/* could make sure it isn't too big, but that isn't really
2236	 * needed - user-space should be careful.
2237	 */
2238	mddev->bitmap_info.space = sectors;
2239	return len;
2240}
2241
2242static struct md_sysfs_entry bitmap_space =
2243__ATTR(space, S_IRUGO|S_IWUSR, space_show, space_store);
2244
2245static ssize_t
2246timeout_show(struct mddev *mddev, char *page)
2247{
2248	ssize_t len;
2249	unsigned long secs = mddev->bitmap_info.daemon_sleep / HZ;
2250	unsigned long jifs = mddev->bitmap_info.daemon_sleep % HZ;
2251
2252	len = sprintf(page, "%lu", secs);
2253	if (jifs)
2254		len += sprintf(page+len, ".%03u", jiffies_to_msecs(jifs));
2255	len += sprintf(page+len, "\n");
2256	return len;
2257}
2258
2259static ssize_t
2260timeout_store(struct mddev *mddev, const char *buf, size_t len)
2261{
2262	/* timeout can be set at any time */
2263	unsigned long timeout;
2264	int rv = strict_strtoul_scaled(buf, &timeout, 4);
2265	if (rv)
2266		return rv;
2267
2268	/* just to make sure we don't overflow... */
2269	if (timeout >= LONG_MAX / HZ)
2270		return -EINVAL;
2271
2272	timeout = timeout * HZ / 10000;
2273
2274	if (timeout >= MAX_SCHEDULE_TIMEOUT)
2275		timeout = MAX_SCHEDULE_TIMEOUT-1;
2276	if (timeout < 1)
2277		timeout = 1;
2278	mddev->bitmap_info.daemon_sleep = timeout;
2279	if (mddev->thread) {
2280		/* if thread->timeout is MAX_SCHEDULE_TIMEOUT, then
2281		 * the bitmap is all clean and we don't need to
2282		 * adjust the timeout right now
2283		 */
2284		if (mddev->thread->timeout < MAX_SCHEDULE_TIMEOUT) {
2285			mddev->thread->timeout = timeout;
2286			md_wakeup_thread(mddev->thread);
2287		}
2288	}
2289	return len;
2290}
2291
2292static struct md_sysfs_entry bitmap_timeout =
2293__ATTR(time_base, S_IRUGO|S_IWUSR, timeout_show, timeout_store);
2294
2295static ssize_t
2296backlog_show(struct mddev *mddev, char *page)
2297{
2298	return sprintf(page, "%lu\n", mddev->bitmap_info.max_write_behind);
2299}
2300
2301static ssize_t
2302backlog_store(struct mddev *mddev, const char *buf, size_t len)
2303{
2304	unsigned long backlog;
2305	int rv = kstrtoul(buf, 10, &backlog);
2306	if (rv)
2307		return rv;
2308	if (backlog > COUNTER_MAX)
2309		return -EINVAL;
2310	mddev->bitmap_info.max_write_behind = backlog;
2311	return len;
2312}
2313
2314static struct md_sysfs_entry bitmap_backlog =
2315__ATTR(backlog, S_IRUGO|S_IWUSR, backlog_show, backlog_store);
2316
2317static ssize_t
2318chunksize_show(struct mddev *mddev, char *page)
2319{
2320	return sprintf(page, "%lu\n", mddev->bitmap_info.chunksize);
2321}
2322
2323static ssize_t
2324chunksize_store(struct mddev *mddev, const char *buf, size_t len)
2325{
2326	/* Can only be changed when no bitmap is active */
2327	int rv;
2328	unsigned long csize;
2329	if (mddev->bitmap)
2330		return -EBUSY;
2331	rv = kstrtoul(buf, 10, &csize);
2332	if (rv)
2333		return rv;
2334	if (csize < 512 ||
2335	    !is_power_of_2(csize))
2336		return -EINVAL;
2337	mddev->bitmap_info.chunksize = csize;
2338	return len;
2339}
2340
2341static struct md_sysfs_entry bitmap_chunksize =
2342__ATTR(chunksize, S_IRUGO|S_IWUSR, chunksize_show, chunksize_store);
2343
2344static ssize_t metadata_show(struct mddev *mddev, char *page)
2345{
2346	if (mddev_is_clustered(mddev))
2347		return sprintf(page, "clustered\n");
2348	return sprintf(page, "%s\n", (mddev->bitmap_info.external
2349				      ? "external" : "internal"));
2350}
2351
2352static ssize_t metadata_store(struct mddev *mddev, const char *buf, size_t len)
2353{
2354	if (mddev->bitmap ||
2355	    mddev->bitmap_info.file ||
2356	    mddev->bitmap_info.offset)
2357		return -EBUSY;
2358	if (strncmp(buf, "external", 8) == 0)
2359		mddev->bitmap_info.external = 1;
2360	else if ((strncmp(buf, "internal", 8) == 0) ||
2361			(strncmp(buf, "clustered", 9) == 0))
2362		mddev->bitmap_info.external = 0;
2363	else
2364		return -EINVAL;
2365	return len;
2366}
2367
2368static struct md_sysfs_entry bitmap_metadata =
2369__ATTR(metadata, S_IRUGO|S_IWUSR, metadata_show, metadata_store);
2370
2371static ssize_t can_clear_show(struct mddev *mddev, char *page)
2372{
2373	int len;
2374	spin_lock(&mddev->lock);
2375	if (mddev->bitmap)
2376		len = sprintf(page, "%s\n", (mddev->bitmap->need_sync ?
2377					     "false" : "true"));
2378	else
2379		len = sprintf(page, "\n");
2380	spin_unlock(&mddev->lock);
2381	return len;
2382}
2383
2384static ssize_t can_clear_store(struct mddev *mddev, const char *buf, size_t len)
2385{
2386	if (mddev->bitmap == NULL)
2387		return -ENOENT;
2388	if (strncmp(buf, "false", 5) == 0)
2389		mddev->bitmap->need_sync = 1;
2390	else if (strncmp(buf, "true", 4) == 0) {
2391		if (mddev->degraded)
2392			return -EBUSY;
2393		mddev->bitmap->need_sync = 0;
2394	} else
2395		return -EINVAL;
2396	return len;
2397}
2398
2399static struct md_sysfs_entry bitmap_can_clear =
2400__ATTR(can_clear, S_IRUGO|S_IWUSR, can_clear_show, can_clear_store);
2401
2402static ssize_t
2403behind_writes_used_show(struct mddev *mddev, char *page)
2404{
2405	ssize_t ret;
2406	spin_lock(&mddev->lock);
2407	if (mddev->bitmap == NULL)
2408		ret = sprintf(page, "0\n");
2409	else
2410		ret = sprintf(page, "%lu\n",
2411			      mddev->bitmap->behind_writes_used);
2412	spin_unlock(&mddev->lock);
2413	return ret;
2414}
2415
2416static ssize_t
2417behind_writes_used_reset(struct mddev *mddev, const char *buf, size_t len)
2418{
2419	if (mddev->bitmap)
2420		mddev->bitmap->behind_writes_used = 0;
2421	return len;
2422}
2423
2424static struct md_sysfs_entry max_backlog_used =
2425__ATTR(max_backlog_used, S_IRUGO | S_IWUSR,
2426       behind_writes_used_show, behind_writes_used_reset);
2427
2428static struct attribute *md_bitmap_attrs[] = {
2429	&bitmap_location.attr,
2430	&bitmap_space.attr,
2431	&bitmap_timeout.attr,
2432	&bitmap_backlog.attr,
2433	&bitmap_chunksize.attr,
2434	&bitmap_metadata.attr,
2435	&bitmap_can_clear.attr,
2436	&max_backlog_used.attr,
2437	NULL
2438};
2439struct attribute_group md_bitmap_group = {
2440	.name = "bitmap",
2441	.attrs = md_bitmap_attrs,
2442};
2443
2444