1/*
2 * linux/kernel/power/swap.c
3 *
4 * This file provides functions for reading the suspend image from
5 * and writing it to a swap partition.
6 *
7 * Copyright (C) 1998,2001-2005 Pavel Machek <pavel@ucw.cz>
8 * Copyright (C) 2006 Rafael J. Wysocki <rjw@sisk.pl>
9 * Copyright (C) 2010-2012 Bojan Smojver <bojan@rexursive.com>
10 *
11 * This file is released under the GPLv2.
12 *
13 */
14
15#include <linux/module.h>
16#include <linux/file.h>
17#include <linux/delay.h>
18#include <linux/bitops.h>
19#include <linux/genhd.h>
20#include <linux/device.h>
21#include <linux/bio.h>
22#include <linux/blkdev.h>
23#include <linux/swap.h>
24#include <linux/swapops.h>
25#include <linux/pm.h>
26#include <linux/slab.h>
27#include <linux/lzo.h>
28#include <linux/vmalloc.h>
29#include <linux/cpumask.h>
30#include <linux/atomic.h>
31#include <linux/kthread.h>
32#include <linux/crc32.h>
33#include <linux/ktime.h>
34
35#include "power.h"
36
37#define HIBERNATE_SIG	"S1SUSPEND"
38
39/*
40 *	The swap map is a data structure used for keeping track of each page
41 *	written to a swap partition.  It consists of many swap_map_page
42 *	structures that contain each an array of MAP_PAGE_ENTRIES swap entries.
43 *	These structures are stored on the swap and linked together with the
44 *	help of the .next_swap member.
45 *
46 *	The swap map is created during suspend.  The swap map pages are
47 *	allocated and populated one at a time, so we only need one memory
48 *	page to set up the entire structure.
49 *
50 *	During resume we pick up all swap_map_page structures into a list.
51 */
52
53#define MAP_PAGE_ENTRIES	(PAGE_SIZE / sizeof(sector_t) - 1)
54
55/*
56 * Number of free pages that are not high.
57 */
58static inline unsigned long low_free_pages(void)
59{
60	return nr_free_pages() - nr_free_highpages();
61}
62
63/*
64 * Number of pages required to be kept free while writing the image. Always
65 * half of all available low pages before the writing starts.
66 */
67static inline unsigned long reqd_free_pages(void)
68{
69	return low_free_pages() / 2;
70}
71
72struct swap_map_page {
73	sector_t entries[MAP_PAGE_ENTRIES];
74	sector_t next_swap;
75};
76
77struct swap_map_page_list {
78	struct swap_map_page *map;
79	struct swap_map_page_list *next;
80};
81
82/**
83 *	The swap_map_handle structure is used for handling swap in
84 *	a file-alike way
85 */
86
87struct swap_map_handle {
88	struct swap_map_page *cur;
89	struct swap_map_page_list *maps;
90	sector_t cur_swap;
91	sector_t first_sector;
92	unsigned int k;
93	unsigned long reqd_free_pages;
94	u32 crc32;
95};
96
97struct swsusp_header {
98	char reserved[PAGE_SIZE - 20 - sizeof(sector_t) - sizeof(int) -
99	              sizeof(u32)];
100	u32	crc32;
101	sector_t image;
102	unsigned int flags;	/* Flags to pass to the "boot" kernel */
103	char	orig_sig[10];
104	char	sig[10];
105} __packed;
106
107static struct swsusp_header *swsusp_header;
108
109/**
110 *	The following functions are used for tracing the allocated
111 *	swap pages, so that they can be freed in case of an error.
112 */
113
114struct swsusp_extent {
115	struct rb_node node;
116	unsigned long start;
117	unsigned long end;
118};
119
120static struct rb_root swsusp_extents = RB_ROOT;
121
122static int swsusp_extents_insert(unsigned long swap_offset)
123{
124	struct rb_node **new = &(swsusp_extents.rb_node);
125	struct rb_node *parent = NULL;
126	struct swsusp_extent *ext;
127
128	/* Figure out where to put the new node */
129	while (*new) {
130		ext = rb_entry(*new, struct swsusp_extent, node);
131		parent = *new;
132		if (swap_offset < ext->start) {
133			/* Try to merge */
134			if (swap_offset == ext->start - 1) {
135				ext->start--;
136				return 0;
137			}
138			new = &((*new)->rb_left);
139		} else if (swap_offset > ext->end) {
140			/* Try to merge */
141			if (swap_offset == ext->end + 1) {
142				ext->end++;
143				return 0;
144			}
145			new = &((*new)->rb_right);
146		} else {
147			/* It already is in the tree */
148			return -EINVAL;
149		}
150	}
151	/* Add the new node and rebalance the tree. */
152	ext = kzalloc(sizeof(struct swsusp_extent), GFP_KERNEL);
153	if (!ext)
154		return -ENOMEM;
155
156	ext->start = swap_offset;
157	ext->end = swap_offset;
158	rb_link_node(&ext->node, parent, new);
159	rb_insert_color(&ext->node, &swsusp_extents);
160	return 0;
161}
162
163/**
164 *	alloc_swapdev_block - allocate a swap page and register that it has
165 *	been allocated, so that it can be freed in case of an error.
166 */
167
168sector_t alloc_swapdev_block(int swap)
169{
170	unsigned long offset;
171
172	offset = swp_offset(get_swap_page_of_type(swap));
173	if (offset) {
174		if (swsusp_extents_insert(offset))
175			swap_free(swp_entry(swap, offset));
176		else
177			return swapdev_block(swap, offset);
178	}
179	return 0;
180}
181
182/**
183 *	free_all_swap_pages - free swap pages allocated for saving image data.
184 *	It also frees the extents used to register which swap entries had been
185 *	allocated.
186 */
187
188void free_all_swap_pages(int swap)
189{
190	struct rb_node *node;
191
192	while ((node = swsusp_extents.rb_node)) {
193		struct swsusp_extent *ext;
194		unsigned long offset;
195
196		ext = container_of(node, struct swsusp_extent, node);
197		rb_erase(node, &swsusp_extents);
198		for (offset = ext->start; offset <= ext->end; offset++)
199			swap_free(swp_entry(swap, offset));
200
201		kfree(ext);
202	}
203}
204
205int swsusp_swap_in_use(void)
206{
207	return (swsusp_extents.rb_node != NULL);
208}
209
210/*
211 * General things
212 */
213
214static unsigned short root_swap = 0xffff;
215struct block_device *hib_resume_bdev;
216
217/*
218 * Saving part
219 */
220
221static int mark_swapfiles(struct swap_map_handle *handle, unsigned int flags)
222{
223	int error;
224
225	hib_bio_read_page(swsusp_resume_block, swsusp_header, NULL);
226	if (!memcmp("SWAP-SPACE",swsusp_header->sig, 10) ||
227	    !memcmp("SWAPSPACE2",swsusp_header->sig, 10)) {
228		memcpy(swsusp_header->orig_sig,swsusp_header->sig, 10);
229		memcpy(swsusp_header->sig, HIBERNATE_SIG, 10);
230		swsusp_header->image = handle->first_sector;
231		swsusp_header->flags = flags;
232		if (flags & SF_CRC32_MODE)
233			swsusp_header->crc32 = handle->crc32;
234		error = hib_bio_write_page(swsusp_resume_block,
235					swsusp_header, NULL);
236	} else {
237		printk(KERN_ERR "PM: Swap header not found!\n");
238		error = -ENODEV;
239	}
240	return error;
241}
242
243/**
244 *	swsusp_swap_check - check if the resume device is a swap device
245 *	and get its index (if so)
246 *
247 *	This is called before saving image
248 */
249static int swsusp_swap_check(void)
250{
251	int res;
252
253	res = swap_type_of(swsusp_resume_device, swsusp_resume_block,
254			&hib_resume_bdev);
255	if (res < 0)
256		return res;
257
258	root_swap = res;
259	res = blkdev_get(hib_resume_bdev, FMODE_WRITE, NULL);
260	if (res)
261		return res;
262
263	res = set_blocksize(hib_resume_bdev, PAGE_SIZE);
264	if (res < 0)
265		blkdev_put(hib_resume_bdev, FMODE_WRITE);
266
267	return res;
268}
269
270/**
271 *	write_page - Write one page to given swap location.
272 *	@buf:		Address we're writing.
273 *	@offset:	Offset of the swap page we're writing to.
274 *	@bio_chain:	Link the next write BIO here
275 */
276
277static int write_page(void *buf, sector_t offset, struct bio **bio_chain)
278{
279	void *src;
280	int ret;
281
282	if (!offset)
283		return -ENOSPC;
284
285	if (bio_chain) {
286		src = (void *)__get_free_page(__GFP_WAIT | __GFP_NOWARN |
287		                              __GFP_NORETRY);
288		if (src) {
289			copy_page(src, buf);
290		} else {
291			ret = hib_wait_on_bio_chain(bio_chain); /* Free pages */
292			if (ret)
293				return ret;
294			src = (void *)__get_free_page(__GFP_WAIT |
295			                              __GFP_NOWARN |
296			                              __GFP_NORETRY);
297			if (src) {
298				copy_page(src, buf);
299			} else {
300				WARN_ON_ONCE(1);
301				bio_chain = NULL;	/* Go synchronous */
302				src = buf;
303			}
304		}
305	} else {
306		src = buf;
307	}
308	return hib_bio_write_page(offset, src, bio_chain);
309}
310
311static void release_swap_writer(struct swap_map_handle *handle)
312{
313	if (handle->cur)
314		free_page((unsigned long)handle->cur);
315	handle->cur = NULL;
316}
317
318static int get_swap_writer(struct swap_map_handle *handle)
319{
320	int ret;
321
322	ret = swsusp_swap_check();
323	if (ret) {
324		if (ret != -ENOSPC)
325			printk(KERN_ERR "PM: Cannot find swap device, try "
326					"swapon -a.\n");
327		return ret;
328	}
329	handle->cur = (struct swap_map_page *)get_zeroed_page(GFP_KERNEL);
330	if (!handle->cur) {
331		ret = -ENOMEM;
332		goto err_close;
333	}
334	handle->cur_swap = alloc_swapdev_block(root_swap);
335	if (!handle->cur_swap) {
336		ret = -ENOSPC;
337		goto err_rel;
338	}
339	handle->k = 0;
340	handle->reqd_free_pages = reqd_free_pages();
341	handle->first_sector = handle->cur_swap;
342	return 0;
343err_rel:
344	release_swap_writer(handle);
345err_close:
346	swsusp_close(FMODE_WRITE);
347	return ret;
348}
349
350static int swap_write_page(struct swap_map_handle *handle, void *buf,
351				struct bio **bio_chain)
352{
353	int error = 0;
354	sector_t offset;
355
356	if (!handle->cur)
357		return -EINVAL;
358	offset = alloc_swapdev_block(root_swap);
359	error = write_page(buf, offset, bio_chain);
360	if (error)
361		return error;
362	handle->cur->entries[handle->k++] = offset;
363	if (handle->k >= MAP_PAGE_ENTRIES) {
364		offset = alloc_swapdev_block(root_swap);
365		if (!offset)
366			return -ENOSPC;
367		handle->cur->next_swap = offset;
368		error = write_page(handle->cur, handle->cur_swap, bio_chain);
369		if (error)
370			goto out;
371		clear_page(handle->cur);
372		handle->cur_swap = offset;
373		handle->k = 0;
374
375		if (bio_chain && low_free_pages() <= handle->reqd_free_pages) {
376			error = hib_wait_on_bio_chain(bio_chain);
377			if (error)
378				goto out;
379			/*
380			 * Recalculate the number of required free pages, to
381			 * make sure we never take more than half.
382			 */
383			handle->reqd_free_pages = reqd_free_pages();
384		}
385	}
386 out:
387	return error;
388}
389
390static int flush_swap_writer(struct swap_map_handle *handle)
391{
392	if (handle->cur && handle->cur_swap)
393		return write_page(handle->cur, handle->cur_swap, NULL);
394	else
395		return -EINVAL;
396}
397
398static int swap_writer_finish(struct swap_map_handle *handle,
399		unsigned int flags, int error)
400{
401	if (!error) {
402		flush_swap_writer(handle);
403		printk(KERN_INFO "PM: S");
404		error = mark_swapfiles(handle, flags);
405		printk("|\n");
406	}
407
408	if (error)
409		free_all_swap_pages(root_swap);
410	release_swap_writer(handle);
411	swsusp_close(FMODE_WRITE);
412
413	return error;
414}
415
416/* We need to remember how much compressed data we need to read. */
417#define LZO_HEADER	sizeof(size_t)
418
419/* Number of pages/bytes we'll compress at one time. */
420#define LZO_UNC_PAGES	32
421#define LZO_UNC_SIZE	(LZO_UNC_PAGES * PAGE_SIZE)
422
423/* Number of pages/bytes we need for compressed data (worst case). */
424#define LZO_CMP_PAGES	DIV_ROUND_UP(lzo1x_worst_compress(LZO_UNC_SIZE) + \
425			             LZO_HEADER, PAGE_SIZE)
426#define LZO_CMP_SIZE	(LZO_CMP_PAGES * PAGE_SIZE)
427
428/* Maximum number of threads for compression/decompression. */
429#define LZO_THREADS	3
430
431/* Minimum/maximum number of pages for read buffering. */
432#define LZO_MIN_RD_PAGES	1024
433#define LZO_MAX_RD_PAGES	8192
434
435
436/**
437 *	save_image - save the suspend image data
438 */
439
440static int save_image(struct swap_map_handle *handle,
441                      struct snapshot_handle *snapshot,
442                      unsigned int nr_to_write)
443{
444	unsigned int m;
445	int ret;
446	int nr_pages;
447	int err2;
448	struct bio *bio;
449	ktime_t start;
450	ktime_t stop;
451
452	printk(KERN_INFO "PM: Saving image data pages (%u pages)...\n",
453		nr_to_write);
454	m = nr_to_write / 10;
455	if (!m)
456		m = 1;
457	nr_pages = 0;
458	bio = NULL;
459	start = ktime_get();
460	while (1) {
461		ret = snapshot_read_next(snapshot);
462		if (ret <= 0)
463			break;
464		ret = swap_write_page(handle, data_of(*snapshot), &bio);
465		if (ret)
466			break;
467		if (!(nr_pages % m))
468			printk(KERN_INFO "PM: Image saving progress: %3d%%\n",
469			       nr_pages / m * 10);
470		nr_pages++;
471	}
472	err2 = hib_wait_on_bio_chain(&bio);
473	stop = ktime_get();
474	if (!ret)
475		ret = err2;
476	if (!ret)
477		printk(KERN_INFO "PM: Image saving done.\n");
478	swsusp_show_speed(start, stop, nr_to_write, "Wrote");
479	return ret;
480}
481
482/**
483 * Structure used for CRC32.
484 */
485struct crc_data {
486	struct task_struct *thr;                  /* thread */
487	atomic_t ready;                           /* ready to start flag */
488	atomic_t stop;                            /* ready to stop flag */
489	unsigned run_threads;                     /* nr current threads */
490	wait_queue_head_t go;                     /* start crc update */
491	wait_queue_head_t done;                   /* crc update done */
492	u32 *crc32;                               /* points to handle's crc32 */
493	size_t *unc_len[LZO_THREADS];             /* uncompressed lengths */
494	unsigned char *unc[LZO_THREADS];          /* uncompressed data */
495};
496
497/**
498 * CRC32 update function that runs in its own thread.
499 */
500static int crc32_threadfn(void *data)
501{
502	struct crc_data *d = data;
503	unsigned i;
504
505	while (1) {
506		wait_event(d->go, atomic_read(&d->ready) ||
507		                  kthread_should_stop());
508		if (kthread_should_stop()) {
509			d->thr = NULL;
510			atomic_set(&d->stop, 1);
511			wake_up(&d->done);
512			break;
513		}
514		atomic_set(&d->ready, 0);
515
516		for (i = 0; i < d->run_threads; i++)
517			*d->crc32 = crc32_le(*d->crc32,
518			                     d->unc[i], *d->unc_len[i]);
519		atomic_set(&d->stop, 1);
520		wake_up(&d->done);
521	}
522	return 0;
523}
524/**
525 * Structure used for LZO data compression.
526 */
527struct cmp_data {
528	struct task_struct *thr;                  /* thread */
529	atomic_t ready;                           /* ready to start flag */
530	atomic_t stop;                            /* ready to stop flag */
531	int ret;                                  /* return code */
532	wait_queue_head_t go;                     /* start compression */
533	wait_queue_head_t done;                   /* compression done */
534	size_t unc_len;                           /* uncompressed length */
535	size_t cmp_len;                           /* compressed length */
536	unsigned char unc[LZO_UNC_SIZE];          /* uncompressed buffer */
537	unsigned char cmp[LZO_CMP_SIZE];          /* compressed buffer */
538	unsigned char wrk[LZO1X_1_MEM_COMPRESS];  /* compression workspace */
539};
540
541/**
542 * Compression function that runs in its own thread.
543 */
544static int lzo_compress_threadfn(void *data)
545{
546	struct cmp_data *d = data;
547
548	while (1) {
549		wait_event(d->go, atomic_read(&d->ready) ||
550		                  kthread_should_stop());
551		if (kthread_should_stop()) {
552			d->thr = NULL;
553			d->ret = -1;
554			atomic_set(&d->stop, 1);
555			wake_up(&d->done);
556			break;
557		}
558		atomic_set(&d->ready, 0);
559
560		d->ret = lzo1x_1_compress(d->unc, d->unc_len,
561		                          d->cmp + LZO_HEADER, &d->cmp_len,
562		                          d->wrk);
563		atomic_set(&d->stop, 1);
564		wake_up(&d->done);
565	}
566	return 0;
567}
568
569/**
570 * save_image_lzo - Save the suspend image data compressed with LZO.
571 * @handle: Swap map handle to use for saving the image.
572 * @snapshot: Image to read data from.
573 * @nr_to_write: Number of pages to save.
574 */
575static int save_image_lzo(struct swap_map_handle *handle,
576                          struct snapshot_handle *snapshot,
577                          unsigned int nr_to_write)
578{
579	unsigned int m;
580	int ret = 0;
581	int nr_pages;
582	int err2;
583	struct bio *bio;
584	ktime_t start;
585	ktime_t stop;
586	size_t off;
587	unsigned thr, run_threads, nr_threads;
588	unsigned char *page = NULL;
589	struct cmp_data *data = NULL;
590	struct crc_data *crc = NULL;
591
592	/*
593	 * We'll limit the number of threads for compression to limit memory
594	 * footprint.
595	 */
596	nr_threads = num_online_cpus() - 1;
597	nr_threads = clamp_val(nr_threads, 1, LZO_THREADS);
598
599	page = (void *)__get_free_page(__GFP_WAIT | __GFP_HIGH);
600	if (!page) {
601		printk(KERN_ERR "PM: Failed to allocate LZO page\n");
602		ret = -ENOMEM;
603		goto out_clean;
604	}
605
606	data = vmalloc(sizeof(*data) * nr_threads);
607	if (!data) {
608		printk(KERN_ERR "PM: Failed to allocate LZO data\n");
609		ret = -ENOMEM;
610		goto out_clean;
611	}
612	for (thr = 0; thr < nr_threads; thr++)
613		memset(&data[thr], 0, offsetof(struct cmp_data, go));
614
615	crc = kmalloc(sizeof(*crc), GFP_KERNEL);
616	if (!crc) {
617		printk(KERN_ERR "PM: Failed to allocate crc\n");
618		ret = -ENOMEM;
619		goto out_clean;
620	}
621	memset(crc, 0, offsetof(struct crc_data, go));
622
623	/*
624	 * Start the compression threads.
625	 */
626	for (thr = 0; thr < nr_threads; thr++) {
627		init_waitqueue_head(&data[thr].go);
628		init_waitqueue_head(&data[thr].done);
629
630		data[thr].thr = kthread_run(lzo_compress_threadfn,
631		                            &data[thr],
632		                            "image_compress/%u", thr);
633		if (IS_ERR(data[thr].thr)) {
634			data[thr].thr = NULL;
635			printk(KERN_ERR
636			       "PM: Cannot start compression threads\n");
637			ret = -ENOMEM;
638			goto out_clean;
639		}
640	}
641
642	/*
643	 * Start the CRC32 thread.
644	 */
645	init_waitqueue_head(&crc->go);
646	init_waitqueue_head(&crc->done);
647
648	handle->crc32 = 0;
649	crc->crc32 = &handle->crc32;
650	for (thr = 0; thr < nr_threads; thr++) {
651		crc->unc[thr] = data[thr].unc;
652		crc->unc_len[thr] = &data[thr].unc_len;
653	}
654
655	crc->thr = kthread_run(crc32_threadfn, crc, "image_crc32");
656	if (IS_ERR(crc->thr)) {
657		crc->thr = NULL;
658		printk(KERN_ERR "PM: Cannot start CRC32 thread\n");
659		ret = -ENOMEM;
660		goto out_clean;
661	}
662
663	/*
664	 * Adjust the number of required free pages after all allocations have
665	 * been done. We don't want to run out of pages when writing.
666	 */
667	handle->reqd_free_pages = reqd_free_pages();
668
669	printk(KERN_INFO
670		"PM: Using %u thread(s) for compression.\n"
671		"PM: Compressing and saving image data (%u pages)...\n",
672		nr_threads, nr_to_write);
673	m = nr_to_write / 10;
674	if (!m)
675		m = 1;
676	nr_pages = 0;
677	bio = NULL;
678	start = ktime_get();
679	for (;;) {
680		for (thr = 0; thr < nr_threads; thr++) {
681			for (off = 0; off < LZO_UNC_SIZE; off += PAGE_SIZE) {
682				ret = snapshot_read_next(snapshot);
683				if (ret < 0)
684					goto out_finish;
685
686				if (!ret)
687					break;
688
689				memcpy(data[thr].unc + off,
690				       data_of(*snapshot), PAGE_SIZE);
691
692				if (!(nr_pages % m))
693					printk(KERN_INFO
694					       "PM: Image saving progress: "
695					       "%3d%%\n",
696				               nr_pages / m * 10);
697				nr_pages++;
698			}
699			if (!off)
700				break;
701
702			data[thr].unc_len = off;
703
704			atomic_set(&data[thr].ready, 1);
705			wake_up(&data[thr].go);
706		}
707
708		if (!thr)
709			break;
710
711		crc->run_threads = thr;
712		atomic_set(&crc->ready, 1);
713		wake_up(&crc->go);
714
715		for (run_threads = thr, thr = 0; thr < run_threads; thr++) {
716			wait_event(data[thr].done,
717			           atomic_read(&data[thr].stop));
718			atomic_set(&data[thr].stop, 0);
719
720			ret = data[thr].ret;
721
722			if (ret < 0) {
723				printk(KERN_ERR "PM: LZO compression failed\n");
724				goto out_finish;
725			}
726
727			if (unlikely(!data[thr].cmp_len ||
728			             data[thr].cmp_len >
729			             lzo1x_worst_compress(data[thr].unc_len))) {
730				printk(KERN_ERR
731				       "PM: Invalid LZO compressed length\n");
732				ret = -1;
733				goto out_finish;
734			}
735
736			*(size_t *)data[thr].cmp = data[thr].cmp_len;
737
738			/*
739			 * Given we are writing one page at a time to disk, we
740			 * copy that much from the buffer, although the last
741			 * bit will likely be smaller than full page. This is
742			 * OK - we saved the length of the compressed data, so
743			 * any garbage at the end will be discarded when we
744			 * read it.
745			 */
746			for (off = 0;
747			     off < LZO_HEADER + data[thr].cmp_len;
748			     off += PAGE_SIZE) {
749				memcpy(page, data[thr].cmp + off, PAGE_SIZE);
750
751				ret = swap_write_page(handle, page, &bio);
752				if (ret)
753					goto out_finish;
754			}
755		}
756
757		wait_event(crc->done, atomic_read(&crc->stop));
758		atomic_set(&crc->stop, 0);
759	}
760
761out_finish:
762	err2 = hib_wait_on_bio_chain(&bio);
763	stop = ktime_get();
764	if (!ret)
765		ret = err2;
766	if (!ret)
767		printk(KERN_INFO "PM: Image saving done.\n");
768	swsusp_show_speed(start, stop, nr_to_write, "Wrote");
769out_clean:
770	if (crc) {
771		if (crc->thr)
772			kthread_stop(crc->thr);
773		kfree(crc);
774	}
775	if (data) {
776		for (thr = 0; thr < nr_threads; thr++)
777			if (data[thr].thr)
778				kthread_stop(data[thr].thr);
779		vfree(data);
780	}
781	if (page) free_page((unsigned long)page);
782
783	return ret;
784}
785
786/**
787 *	enough_swap - Make sure we have enough swap to save the image.
788 *
789 *	Returns TRUE or FALSE after checking the total amount of swap
790 *	space avaiable from the resume partition.
791 */
792
793static int enough_swap(unsigned int nr_pages, unsigned int flags)
794{
795	unsigned int free_swap = count_swap_pages(root_swap, 1);
796	unsigned int required;
797
798	pr_debug("PM: Free swap pages: %u\n", free_swap);
799
800	required = PAGES_FOR_IO + nr_pages;
801	return free_swap > required;
802}
803
804/**
805 *	swsusp_write - Write entire image and metadata.
806 *	@flags: flags to pass to the "boot" kernel in the image header
807 *
808 *	It is important _NOT_ to umount filesystems at this point. We want
809 *	them synced (in case something goes wrong) but we DO not want to mark
810 *	filesystem clean: it is not. (And it does not matter, if we resume
811 *	correctly, we'll mark system clean, anyway.)
812 */
813
814int swsusp_write(unsigned int flags)
815{
816	struct swap_map_handle handle;
817	struct snapshot_handle snapshot;
818	struct swsusp_info *header;
819	unsigned long pages;
820	int error;
821
822	pages = snapshot_get_image_size();
823	error = get_swap_writer(&handle);
824	if (error) {
825		printk(KERN_ERR "PM: Cannot get swap writer\n");
826		return error;
827	}
828	if (flags & SF_NOCOMPRESS_MODE) {
829		if (!enough_swap(pages, flags)) {
830			printk(KERN_ERR "PM: Not enough free swap\n");
831			error = -ENOSPC;
832			goto out_finish;
833		}
834	}
835	memset(&snapshot, 0, sizeof(struct snapshot_handle));
836	error = snapshot_read_next(&snapshot);
837	if (error < PAGE_SIZE) {
838		if (error >= 0)
839			error = -EFAULT;
840
841		goto out_finish;
842	}
843	header = (struct swsusp_info *)data_of(snapshot);
844	error = swap_write_page(&handle, header, NULL);
845	if (!error) {
846		error = (flags & SF_NOCOMPRESS_MODE) ?
847			save_image(&handle, &snapshot, pages - 1) :
848			save_image_lzo(&handle, &snapshot, pages - 1);
849	}
850out_finish:
851	error = swap_writer_finish(&handle, flags, error);
852	return error;
853}
854
855/**
856 *	The following functions allow us to read data using a swap map
857 *	in a file-alike way
858 */
859
860static void release_swap_reader(struct swap_map_handle *handle)
861{
862	struct swap_map_page_list *tmp;
863
864	while (handle->maps) {
865		if (handle->maps->map)
866			free_page((unsigned long)handle->maps->map);
867		tmp = handle->maps;
868		handle->maps = handle->maps->next;
869		kfree(tmp);
870	}
871	handle->cur = NULL;
872}
873
874static int get_swap_reader(struct swap_map_handle *handle,
875		unsigned int *flags_p)
876{
877	int error;
878	struct swap_map_page_list *tmp, *last;
879	sector_t offset;
880
881	*flags_p = swsusp_header->flags;
882
883	if (!swsusp_header->image) /* how can this happen? */
884		return -EINVAL;
885
886	handle->cur = NULL;
887	last = handle->maps = NULL;
888	offset = swsusp_header->image;
889	while (offset) {
890		tmp = kmalloc(sizeof(*handle->maps), GFP_KERNEL);
891		if (!tmp) {
892			release_swap_reader(handle);
893			return -ENOMEM;
894		}
895		memset(tmp, 0, sizeof(*tmp));
896		if (!handle->maps)
897			handle->maps = tmp;
898		if (last)
899			last->next = tmp;
900		last = tmp;
901
902		tmp->map = (struct swap_map_page *)
903		           __get_free_page(__GFP_WAIT | __GFP_HIGH);
904		if (!tmp->map) {
905			release_swap_reader(handle);
906			return -ENOMEM;
907		}
908
909		error = hib_bio_read_page(offset, tmp->map, NULL);
910		if (error) {
911			release_swap_reader(handle);
912			return error;
913		}
914		offset = tmp->map->next_swap;
915	}
916	handle->k = 0;
917	handle->cur = handle->maps->map;
918	return 0;
919}
920
921static int swap_read_page(struct swap_map_handle *handle, void *buf,
922				struct bio **bio_chain)
923{
924	sector_t offset;
925	int error;
926	struct swap_map_page_list *tmp;
927
928	if (!handle->cur)
929		return -EINVAL;
930	offset = handle->cur->entries[handle->k];
931	if (!offset)
932		return -EFAULT;
933	error = hib_bio_read_page(offset, buf, bio_chain);
934	if (error)
935		return error;
936	if (++handle->k >= MAP_PAGE_ENTRIES) {
937		handle->k = 0;
938		free_page((unsigned long)handle->maps->map);
939		tmp = handle->maps;
940		handle->maps = handle->maps->next;
941		kfree(tmp);
942		if (!handle->maps)
943			release_swap_reader(handle);
944		else
945			handle->cur = handle->maps->map;
946	}
947	return error;
948}
949
950static int swap_reader_finish(struct swap_map_handle *handle)
951{
952	release_swap_reader(handle);
953
954	return 0;
955}
956
957/**
958 *	load_image - load the image using the swap map handle
959 *	@handle and the snapshot handle @snapshot
960 *	(assume there are @nr_pages pages to load)
961 */
962
963static int load_image(struct swap_map_handle *handle,
964                      struct snapshot_handle *snapshot,
965                      unsigned int nr_to_read)
966{
967	unsigned int m;
968	int ret = 0;
969	ktime_t start;
970	ktime_t stop;
971	struct bio *bio;
972	int err2;
973	unsigned nr_pages;
974
975	printk(KERN_INFO "PM: Loading image data pages (%u pages)...\n",
976		nr_to_read);
977	m = nr_to_read / 10;
978	if (!m)
979		m = 1;
980	nr_pages = 0;
981	bio = NULL;
982	start = ktime_get();
983	for ( ; ; ) {
984		ret = snapshot_write_next(snapshot);
985		if (ret <= 0)
986			break;
987		ret = swap_read_page(handle, data_of(*snapshot), &bio);
988		if (ret)
989			break;
990		if (snapshot->sync_read)
991			ret = hib_wait_on_bio_chain(&bio);
992		if (ret)
993			break;
994		if (!(nr_pages % m))
995			printk(KERN_INFO "PM: Image loading progress: %3d%%\n",
996			       nr_pages / m * 10);
997		nr_pages++;
998	}
999	err2 = hib_wait_on_bio_chain(&bio);
1000	stop = ktime_get();
1001	if (!ret)
1002		ret = err2;
1003	if (!ret) {
1004		printk(KERN_INFO "PM: Image loading done.\n");
1005		snapshot_write_finalize(snapshot);
1006		if (!snapshot_image_loaded(snapshot))
1007			ret = -ENODATA;
1008	}
1009	swsusp_show_speed(start, stop, nr_to_read, "Read");
1010	return ret;
1011}
1012
1013/**
1014 * Structure used for LZO data decompression.
1015 */
1016struct dec_data {
1017	struct task_struct *thr;                  /* thread */
1018	atomic_t ready;                           /* ready to start flag */
1019	atomic_t stop;                            /* ready to stop flag */
1020	int ret;                                  /* return code */
1021	wait_queue_head_t go;                     /* start decompression */
1022	wait_queue_head_t done;                   /* decompression done */
1023	size_t unc_len;                           /* uncompressed length */
1024	size_t cmp_len;                           /* compressed length */
1025	unsigned char unc[LZO_UNC_SIZE];          /* uncompressed buffer */
1026	unsigned char cmp[LZO_CMP_SIZE];          /* compressed buffer */
1027};
1028
1029/**
1030 * Deompression function that runs in its own thread.
1031 */
1032static int lzo_decompress_threadfn(void *data)
1033{
1034	struct dec_data *d = data;
1035
1036	while (1) {
1037		wait_event(d->go, atomic_read(&d->ready) ||
1038		                  kthread_should_stop());
1039		if (kthread_should_stop()) {
1040			d->thr = NULL;
1041			d->ret = -1;
1042			atomic_set(&d->stop, 1);
1043			wake_up(&d->done);
1044			break;
1045		}
1046		atomic_set(&d->ready, 0);
1047
1048		d->unc_len = LZO_UNC_SIZE;
1049		d->ret = lzo1x_decompress_safe(d->cmp + LZO_HEADER, d->cmp_len,
1050		                               d->unc, &d->unc_len);
1051		atomic_set(&d->stop, 1);
1052		wake_up(&d->done);
1053	}
1054	return 0;
1055}
1056
1057/**
1058 * load_image_lzo - Load compressed image data and decompress them with LZO.
1059 * @handle: Swap map handle to use for loading data.
1060 * @snapshot: Image to copy uncompressed data into.
1061 * @nr_to_read: Number of pages to load.
1062 */
1063static int load_image_lzo(struct swap_map_handle *handle,
1064                          struct snapshot_handle *snapshot,
1065                          unsigned int nr_to_read)
1066{
1067	unsigned int m;
1068	int ret = 0;
1069	int eof = 0;
1070	struct bio *bio;
1071	ktime_t start;
1072	ktime_t stop;
1073	unsigned nr_pages;
1074	size_t off;
1075	unsigned i, thr, run_threads, nr_threads;
1076	unsigned ring = 0, pg = 0, ring_size = 0,
1077	         have = 0, want, need, asked = 0;
1078	unsigned long read_pages = 0;
1079	unsigned char **page = NULL;
1080	struct dec_data *data = NULL;
1081	struct crc_data *crc = NULL;
1082
1083	/*
1084	 * We'll limit the number of threads for decompression to limit memory
1085	 * footprint.
1086	 */
1087	nr_threads = num_online_cpus() - 1;
1088	nr_threads = clamp_val(nr_threads, 1, LZO_THREADS);
1089
1090	page = vmalloc(sizeof(*page) * LZO_MAX_RD_PAGES);
1091	if (!page) {
1092		printk(KERN_ERR "PM: Failed to allocate LZO page\n");
1093		ret = -ENOMEM;
1094		goto out_clean;
1095	}
1096
1097	data = vmalloc(sizeof(*data) * nr_threads);
1098	if (!data) {
1099		printk(KERN_ERR "PM: Failed to allocate LZO data\n");
1100		ret = -ENOMEM;
1101		goto out_clean;
1102	}
1103	for (thr = 0; thr < nr_threads; thr++)
1104		memset(&data[thr], 0, offsetof(struct dec_data, go));
1105
1106	crc = kmalloc(sizeof(*crc), GFP_KERNEL);
1107	if (!crc) {
1108		printk(KERN_ERR "PM: Failed to allocate crc\n");
1109		ret = -ENOMEM;
1110		goto out_clean;
1111	}
1112	memset(crc, 0, offsetof(struct crc_data, go));
1113
1114	/*
1115	 * Start the decompression threads.
1116	 */
1117	for (thr = 0; thr < nr_threads; thr++) {
1118		init_waitqueue_head(&data[thr].go);
1119		init_waitqueue_head(&data[thr].done);
1120
1121		data[thr].thr = kthread_run(lzo_decompress_threadfn,
1122		                            &data[thr],
1123		                            "image_decompress/%u", thr);
1124		if (IS_ERR(data[thr].thr)) {
1125			data[thr].thr = NULL;
1126			printk(KERN_ERR
1127			       "PM: Cannot start decompression threads\n");
1128			ret = -ENOMEM;
1129			goto out_clean;
1130		}
1131	}
1132
1133	/*
1134	 * Start the CRC32 thread.
1135	 */
1136	init_waitqueue_head(&crc->go);
1137	init_waitqueue_head(&crc->done);
1138
1139	handle->crc32 = 0;
1140	crc->crc32 = &handle->crc32;
1141	for (thr = 0; thr < nr_threads; thr++) {
1142		crc->unc[thr] = data[thr].unc;
1143		crc->unc_len[thr] = &data[thr].unc_len;
1144	}
1145
1146	crc->thr = kthread_run(crc32_threadfn, crc, "image_crc32");
1147	if (IS_ERR(crc->thr)) {
1148		crc->thr = NULL;
1149		printk(KERN_ERR "PM: Cannot start CRC32 thread\n");
1150		ret = -ENOMEM;
1151		goto out_clean;
1152	}
1153
1154	/*
1155	 * Set the number of pages for read buffering.
1156	 * This is complete guesswork, because we'll only know the real
1157	 * picture once prepare_image() is called, which is much later on
1158	 * during the image load phase. We'll assume the worst case and
1159	 * say that none of the image pages are from high memory.
1160	 */
1161	if (low_free_pages() > snapshot_get_image_size())
1162		read_pages = (low_free_pages() - snapshot_get_image_size()) / 2;
1163	read_pages = clamp_val(read_pages, LZO_MIN_RD_PAGES, LZO_MAX_RD_PAGES);
1164
1165	for (i = 0; i < read_pages; i++) {
1166		page[i] = (void *)__get_free_page(i < LZO_CMP_PAGES ?
1167		                                  __GFP_WAIT | __GFP_HIGH :
1168		                                  __GFP_WAIT | __GFP_NOWARN |
1169		                                  __GFP_NORETRY);
1170
1171		if (!page[i]) {
1172			if (i < LZO_CMP_PAGES) {
1173				ring_size = i;
1174				printk(KERN_ERR
1175				       "PM: Failed to allocate LZO pages\n");
1176				ret = -ENOMEM;
1177				goto out_clean;
1178			} else {
1179				break;
1180			}
1181		}
1182	}
1183	want = ring_size = i;
1184
1185	printk(KERN_INFO
1186		"PM: Using %u thread(s) for decompression.\n"
1187		"PM: Loading and decompressing image data (%u pages)...\n",
1188		nr_threads, nr_to_read);
1189	m = nr_to_read / 10;
1190	if (!m)
1191		m = 1;
1192	nr_pages = 0;
1193	bio = NULL;
1194	start = ktime_get();
1195
1196	ret = snapshot_write_next(snapshot);
1197	if (ret <= 0)
1198		goto out_finish;
1199
1200	for(;;) {
1201		for (i = 0; !eof && i < want; i++) {
1202			ret = swap_read_page(handle, page[ring], &bio);
1203			if (ret) {
1204				/*
1205				 * On real read error, finish. On end of data,
1206				 * set EOF flag and just exit the read loop.
1207				 */
1208				if (handle->cur &&
1209				    handle->cur->entries[handle->k]) {
1210					goto out_finish;
1211				} else {
1212					eof = 1;
1213					break;
1214				}
1215			}
1216			if (++ring >= ring_size)
1217				ring = 0;
1218		}
1219		asked += i;
1220		want -= i;
1221
1222		/*
1223		 * We are out of data, wait for some more.
1224		 */
1225		if (!have) {
1226			if (!asked)
1227				break;
1228
1229			ret = hib_wait_on_bio_chain(&bio);
1230			if (ret)
1231				goto out_finish;
1232			have += asked;
1233			asked = 0;
1234			if (eof)
1235				eof = 2;
1236		}
1237
1238		if (crc->run_threads) {
1239			wait_event(crc->done, atomic_read(&crc->stop));
1240			atomic_set(&crc->stop, 0);
1241			crc->run_threads = 0;
1242		}
1243
1244		for (thr = 0; have && thr < nr_threads; thr++) {
1245			data[thr].cmp_len = *(size_t *)page[pg];
1246			if (unlikely(!data[thr].cmp_len ||
1247			             data[thr].cmp_len >
1248			             lzo1x_worst_compress(LZO_UNC_SIZE))) {
1249				printk(KERN_ERR
1250				       "PM: Invalid LZO compressed length\n");
1251				ret = -1;
1252				goto out_finish;
1253			}
1254
1255			need = DIV_ROUND_UP(data[thr].cmp_len + LZO_HEADER,
1256			                    PAGE_SIZE);
1257			if (need > have) {
1258				if (eof > 1) {
1259					ret = -1;
1260					goto out_finish;
1261				}
1262				break;
1263			}
1264
1265			for (off = 0;
1266			     off < LZO_HEADER + data[thr].cmp_len;
1267			     off += PAGE_SIZE) {
1268				memcpy(data[thr].cmp + off,
1269				       page[pg], PAGE_SIZE);
1270				have--;
1271				want++;
1272				if (++pg >= ring_size)
1273					pg = 0;
1274			}
1275
1276			atomic_set(&data[thr].ready, 1);
1277			wake_up(&data[thr].go);
1278		}
1279
1280		/*
1281		 * Wait for more data while we are decompressing.
1282		 */
1283		if (have < LZO_CMP_PAGES && asked) {
1284			ret = hib_wait_on_bio_chain(&bio);
1285			if (ret)
1286				goto out_finish;
1287			have += asked;
1288			asked = 0;
1289			if (eof)
1290				eof = 2;
1291		}
1292
1293		for (run_threads = thr, thr = 0; thr < run_threads; thr++) {
1294			wait_event(data[thr].done,
1295			           atomic_read(&data[thr].stop));
1296			atomic_set(&data[thr].stop, 0);
1297
1298			ret = data[thr].ret;
1299
1300			if (ret < 0) {
1301				printk(KERN_ERR
1302				       "PM: LZO decompression failed\n");
1303				goto out_finish;
1304			}
1305
1306			if (unlikely(!data[thr].unc_len ||
1307			             data[thr].unc_len > LZO_UNC_SIZE ||
1308			             data[thr].unc_len & (PAGE_SIZE - 1))) {
1309				printk(KERN_ERR
1310				       "PM: Invalid LZO uncompressed length\n");
1311				ret = -1;
1312				goto out_finish;
1313			}
1314
1315			for (off = 0;
1316			     off < data[thr].unc_len; off += PAGE_SIZE) {
1317				memcpy(data_of(*snapshot),
1318				       data[thr].unc + off, PAGE_SIZE);
1319
1320				if (!(nr_pages % m))
1321					printk(KERN_INFO
1322					       "PM: Image loading progress: "
1323					       "%3d%%\n",
1324					       nr_pages / m * 10);
1325				nr_pages++;
1326
1327				ret = snapshot_write_next(snapshot);
1328				if (ret <= 0) {
1329					crc->run_threads = thr + 1;
1330					atomic_set(&crc->ready, 1);
1331					wake_up(&crc->go);
1332					goto out_finish;
1333				}
1334			}
1335		}
1336
1337		crc->run_threads = thr;
1338		atomic_set(&crc->ready, 1);
1339		wake_up(&crc->go);
1340	}
1341
1342out_finish:
1343	if (crc->run_threads) {
1344		wait_event(crc->done, atomic_read(&crc->stop));
1345		atomic_set(&crc->stop, 0);
1346	}
1347	stop = ktime_get();
1348	if (!ret) {
1349		printk(KERN_INFO "PM: Image loading done.\n");
1350		snapshot_write_finalize(snapshot);
1351		if (!snapshot_image_loaded(snapshot))
1352			ret = -ENODATA;
1353		if (!ret) {
1354			if (swsusp_header->flags & SF_CRC32_MODE) {
1355				if(handle->crc32 != swsusp_header->crc32) {
1356					printk(KERN_ERR
1357					       "PM: Invalid image CRC32!\n");
1358					ret = -ENODATA;
1359				}
1360			}
1361		}
1362	}
1363	swsusp_show_speed(start, stop, nr_to_read, "Read");
1364out_clean:
1365	for (i = 0; i < ring_size; i++)
1366		free_page((unsigned long)page[i]);
1367	if (crc) {
1368		if (crc->thr)
1369			kthread_stop(crc->thr);
1370		kfree(crc);
1371	}
1372	if (data) {
1373		for (thr = 0; thr < nr_threads; thr++)
1374			if (data[thr].thr)
1375				kthread_stop(data[thr].thr);
1376		vfree(data);
1377	}
1378	vfree(page);
1379
1380	return ret;
1381}
1382
1383/**
1384 *	swsusp_read - read the hibernation image.
1385 *	@flags_p: flags passed by the "frozen" kernel in the image header should
1386 *		  be written into this memory location
1387 */
1388
1389int swsusp_read(unsigned int *flags_p)
1390{
1391	int error;
1392	struct swap_map_handle handle;
1393	struct snapshot_handle snapshot;
1394	struct swsusp_info *header;
1395
1396	memset(&snapshot, 0, sizeof(struct snapshot_handle));
1397	error = snapshot_write_next(&snapshot);
1398	if (error < PAGE_SIZE)
1399		return error < 0 ? error : -EFAULT;
1400	header = (struct swsusp_info *)data_of(snapshot);
1401	error = get_swap_reader(&handle, flags_p);
1402	if (error)
1403		goto end;
1404	if (!error)
1405		error = swap_read_page(&handle, header, NULL);
1406	if (!error) {
1407		error = (*flags_p & SF_NOCOMPRESS_MODE) ?
1408			load_image(&handle, &snapshot, header->pages - 1) :
1409			load_image_lzo(&handle, &snapshot, header->pages - 1);
1410	}
1411	swap_reader_finish(&handle);
1412end:
1413	if (!error)
1414		pr_debug("PM: Image successfully loaded\n");
1415	else
1416		pr_debug("PM: Error %d resuming\n", error);
1417	return error;
1418}
1419
1420/**
1421 *      swsusp_check - Check for swsusp signature in the resume device
1422 */
1423
1424int swsusp_check(void)
1425{
1426	int error;
1427
1428	hib_resume_bdev = blkdev_get_by_dev(swsusp_resume_device,
1429					    FMODE_READ, NULL);
1430	if (!IS_ERR(hib_resume_bdev)) {
1431		set_blocksize(hib_resume_bdev, PAGE_SIZE);
1432		clear_page(swsusp_header);
1433		error = hib_bio_read_page(swsusp_resume_block,
1434					swsusp_header, NULL);
1435		if (error)
1436			goto put;
1437
1438		if (!memcmp(HIBERNATE_SIG, swsusp_header->sig, 10)) {
1439			memcpy(swsusp_header->sig, swsusp_header->orig_sig, 10);
1440			/* Reset swap signature now */
1441			error = hib_bio_write_page(swsusp_resume_block,
1442						swsusp_header, NULL);
1443		} else {
1444			error = -EINVAL;
1445		}
1446
1447put:
1448		if (error)
1449			blkdev_put(hib_resume_bdev, FMODE_READ);
1450		else
1451			pr_debug("PM: Image signature found, resuming\n");
1452	} else {
1453		error = PTR_ERR(hib_resume_bdev);
1454	}
1455
1456	if (error)
1457		pr_debug("PM: Image not found (code %d)\n", error);
1458
1459	return error;
1460}
1461
1462/**
1463 *	swsusp_close - close swap device.
1464 */
1465
1466void swsusp_close(fmode_t mode)
1467{
1468	if (IS_ERR(hib_resume_bdev)) {
1469		pr_debug("PM: Image device not initialised\n");
1470		return;
1471	}
1472
1473	blkdev_put(hib_resume_bdev, mode);
1474}
1475
1476/**
1477 *      swsusp_unmark - Unmark swsusp signature in the resume device
1478 */
1479
1480#ifdef CONFIG_SUSPEND
1481int swsusp_unmark(void)
1482{
1483	int error;
1484
1485	hib_bio_read_page(swsusp_resume_block, swsusp_header, NULL);
1486	if (!memcmp(HIBERNATE_SIG,swsusp_header->sig, 10)) {
1487		memcpy(swsusp_header->sig,swsusp_header->orig_sig, 10);
1488		error = hib_bio_write_page(swsusp_resume_block,
1489					swsusp_header, NULL);
1490	} else {
1491		printk(KERN_ERR "PM: Cannot find swsusp signature!\n");
1492		error = -ENODEV;
1493	}
1494
1495	/*
1496	 * We just returned from suspend, we don't need the image any more.
1497	 */
1498	free_all_swap_pages(root_swap);
1499
1500	return error;
1501}
1502#endif
1503
1504static int swsusp_header_init(void)
1505{
1506	swsusp_header = (struct swsusp_header*) __get_free_page(GFP_KERNEL);
1507	if (!swsusp_header)
1508		panic("Could not allocate memory for swsusp_header\n");
1509	return 0;
1510}
1511
1512core_initcall(swsusp_header_init);
1513