2 * linux/kernel/power/swsusp.c
4 * This file provides code to write suspend image to swap and read it back.
6 * Copyright (C) 1998-2001 Gabor Kuti <seasons@fornax.hu>
7 * Copyright (C) 1998,2001-2005 Pavel Machek <pavel@suse.cz>
9 * This file is released under the GPLv2.
11 * I'd like to thank the following people for their work:
13 * Pavel Machek <pavel@ucw.cz>:
14 * Modifications, defectiveness pointing, being with me at the very beginning,
15 * suspend to swap space, stop all tasks. Port to 2.4.18-ac and 2.5.17.
17 * Steve Doddi <dirk@loth.demon.co.uk>:
18 * Support the possibility of hardware state restoring.
20 * Raph <grey.havens@earthling.net>:
21 * Support for preserving states of network devices and virtual console
22 * (including X and svgatextmode)
24 * Kurt Garloff <garloff@suse.de>:
25 * Straightened the critical function in order to prevent compilers from
26 * playing tricks with local variables.
28 * Andreas Mohr <a.mohr@mailto.de>
30 * Alex Badea <vampire@go.ro>:
33 * Rafael J. Wysocki <rjw@sisk.pl>
34 * Added the swap map data structure and reworked the handling of swap
36 * More state savers are welcome. Especially for the scsi layer...
38 * For TODOs,FIXMEs also look in Documentation/power/swsusp.txt
41 #include <linux/module.h>
43 #include <linux/suspend.h>
44 #include <linux/smp_lock.h>
45 #include <linux/file.h>
46 #include <linux/utsname.h>
47 #include <linux/version.h>
48 #include <linux/delay.h>
49 #include <linux/bitops.h>
50 #include <linux/spinlock.h>
51 #include <linux/genhd.h>
52 #include <linux/kernel.h>
53 #include <linux/major.h>
54 #include <linux/swap.h>
56 #include <linux/device.h>
57 #include <linux/buffer_head.h>
58 #include <linux/swapops.h>
59 #include <linux/bootmem.h>
60 #include <linux/syscalls.h>
61 #include <linux/highmem.h>
62 #include <linux/bio.h>
64 #include <asm/uaccess.h>
65 #include <asm/mmu_context.h>
66 #include <asm/pgtable.h>
67 #include <asm/tlbflush.h>
73 * Preferred image size in bytes (tunable via /sys/power/image_size).
74 * When it is set to N, swsusp will do its best to ensure the image
75 * size will not exceed N bytes, but if that is impossible, it will
76 * try to create the smallest image possible.
78 unsigned long image_size = 500 * 1024 * 1024;
80 int in_suspend __nosavedata = 0;
83 unsigned int count_highmem_pages(void);
84 int save_highmem(void);
85 int restore_highmem(void);
87 static int save_highmem(void) { return 0; }
88 static int restore_highmem(void) { return 0; }
89 static unsigned int count_highmem_pages(void) { return 0; }
92 extern char resume_file[];
94 #define SWSUSP_SIG "S1SUSPEND"
96 static struct swsusp_header {
97 char reserved[PAGE_SIZE - 20 - sizeof(swp_entry_t)];
101 } __attribute__((packed, aligned(PAGE_SIZE))) swsusp_header;
107 static unsigned short root_swap = 0xffff;
109 static int mark_swapfiles(swp_entry_t start)
113 rw_swap_page_sync(READ,
114 swp_entry(root_swap, 0),
115 virt_to_page((unsigned long)&swsusp_header));
116 if (!memcmp("SWAP-SPACE",swsusp_header.sig, 10) ||
117 !memcmp("SWAPSPACE2",swsusp_header.sig, 10)) {
118 memcpy(swsusp_header.orig_sig,swsusp_header.sig, 10);
119 memcpy(swsusp_header.sig,SWSUSP_SIG, 10);
120 swsusp_header.image = start;
121 error = rw_swap_page_sync(WRITE,
122 swp_entry(root_swap, 0),
123 virt_to_page((unsigned long)
126 pr_debug("swsusp: Partition is not swap space.\n");
133 * swsusp_swap_check - check if the resume device is a swap device
134 * and get its index (if so)
137 static int swsusp_swap_check(void) /* This is called before saving image */
139 int res = swap_type_of(swsusp_resume_device);
149 * The bitmap is used for tracing allocated swap pages
151 * The entire bitmap consists of a number of bitmap_page
152 * structures linked with the help of the .next member.
153 * Thus each page can be allocated individually, so we only
154 * need to make 0-order memory allocations to create
158 #define BITMAP_PAGE_SIZE (PAGE_SIZE - sizeof(void *))
159 #define BITMAP_PAGE_CHUNKS (BITMAP_PAGE_SIZE / sizeof(long))
160 #define BITS_PER_CHUNK (sizeof(long) * 8)
161 #define BITMAP_PAGE_BITS (BITMAP_PAGE_CHUNKS * BITS_PER_CHUNK)
164 unsigned long chunks[BITMAP_PAGE_CHUNKS];
165 struct bitmap_page *next;
169 * The following functions are used for tracing the allocated
170 * swap pages, so that they can be freed in case of an error.
172 * The functions operate on a linked bitmap structure defined
176 static void free_bitmap(struct bitmap_page *bitmap)
178 struct bitmap_page *bp;
182 free_page((unsigned long)bitmap);
187 static struct bitmap_page *alloc_bitmap(unsigned int nr_bits)
189 struct bitmap_page *bitmap, *bp;
195 bitmap = (struct bitmap_page *)get_zeroed_page(GFP_KERNEL);
197 for (n = BITMAP_PAGE_BITS; n < nr_bits; n += BITMAP_PAGE_BITS) {
198 bp->next = (struct bitmap_page *)get_zeroed_page(GFP_KERNEL);
208 static int bitmap_set(struct bitmap_page *bitmap, unsigned long bit)
212 n = BITMAP_PAGE_BITS;
213 while (bitmap && n <= bit) {
214 n += BITMAP_PAGE_BITS;
215 bitmap = bitmap->next;
219 n -= BITMAP_PAGE_BITS;
222 while (bit >= BITS_PER_CHUNK) {
223 bit -= BITS_PER_CHUNK;
226 bitmap->chunks[n] |= (1UL << bit);
230 static unsigned long alloc_swap_page(int swap, struct bitmap_page *bitmap)
232 unsigned long offset;
234 offset = swp_offset(get_swap_page_of_type(swap));
236 if (bitmap_set(bitmap, offset)) {
237 swap_free(swp_entry(swap, offset));
244 static void free_all_swap_pages(int swap, struct bitmap_page *bitmap)
251 for (n = 0; n < BITMAP_PAGE_CHUNKS; n++)
252 for (test = 1UL; test; test <<= 1) {
253 if (bitmap->chunks[n] & test)
254 swap_free(swp_entry(swap, bit));
257 bitmap = bitmap->next;
262 * write_page - Write one page to given swap location.
263 * @buf: Address we're writing.
264 * @offset: Offset of the swap page we're writing to.
267 static int write_page(void *buf, unsigned long offset)
273 entry = swp_entry(root_swap, offset);
274 error = rw_swap_page_sync(WRITE, entry, virt_to_page(buf));
280 * The swap map is a data structure used for keeping track of each page
281 * written to a swap partition. It consists of many swap_map_page
282 * structures that contain each an array of MAP_PAGE_SIZE swap entries.
283 * These structures are stored on the swap and linked together with the
284 * help of the .next_swap member.
286 * The swap map is created during suspend. The swap map pages are
287 * allocated and populated one at a time, so we only need one memory
288 * page to set up the entire structure.
290 * During resume we also only need to use one swap_map_page structure
294 #define MAP_PAGE_ENTRIES (PAGE_SIZE / sizeof(long) - 1)
296 struct swap_map_page {
297 unsigned long entries[MAP_PAGE_ENTRIES];
298 unsigned long next_swap;
302 * The swap_map_handle structure is used for handling swap in
306 struct swap_map_handle {
307 struct swap_map_page *cur;
308 unsigned long cur_swap;
309 struct bitmap_page *bitmap;
313 static void release_swap_writer(struct swap_map_handle *handle)
316 free_page((unsigned long)handle->cur);
319 free_bitmap(handle->bitmap);
320 handle->bitmap = NULL;
323 static int get_swap_writer(struct swap_map_handle *handle)
325 handle->cur = (struct swap_map_page *)get_zeroed_page(GFP_KERNEL);
328 handle->bitmap = alloc_bitmap(count_swap_pages(root_swap, 0));
329 if (!handle->bitmap) {
330 release_swap_writer(handle);
333 handle->cur_swap = alloc_swap_page(root_swap, handle->bitmap);
334 if (!handle->cur_swap) {
335 release_swap_writer(handle);
342 static int swap_write_page(struct swap_map_handle *handle, void *buf)
345 unsigned long offset;
349 offset = alloc_swap_page(root_swap, handle->bitmap);
350 error = write_page(buf, offset);
353 handle->cur->entries[handle->k++] = offset;
354 if (handle->k >= MAP_PAGE_ENTRIES) {
355 offset = alloc_swap_page(root_swap, handle->bitmap);
358 handle->cur->next_swap = offset;
359 error = write_page(handle->cur, handle->cur_swap);
362 memset(handle->cur, 0, PAGE_SIZE);
363 handle->cur_swap = offset;
369 static int flush_swap_writer(struct swap_map_handle *handle)
371 if (handle->cur && handle->cur_swap)
372 return write_page(handle->cur, handle->cur_swap);
378 * save_image - save the suspend image data
381 static int save_image(struct swap_map_handle *handle,
382 struct snapshot_handle *snapshot,
383 unsigned int nr_pages)
389 printk("Saving image data pages (%u pages) ... ", nr_pages);
395 ret = snapshot_read_next(snapshot, PAGE_SIZE);
397 error = swap_write_page(handle, data_of(*snapshot));
401 printk("\b\b\b\b%3d%%", nr_pages / m);
406 printk("\b\b\b\bdone\n");
411 * enough_swap - Make sure we have enough swap to save the image.
413 * Returns TRUE or FALSE after checking the total amount of swap
414 * space avaiable from the resume partition.
417 static int enough_swap(unsigned int nr_pages)
419 unsigned int free_swap = count_swap_pages(root_swap, 1);
421 pr_debug("swsusp: free swap pages: %u\n", free_swap);
422 return free_swap > (nr_pages + PAGES_FOR_IO +
423 (nr_pages + PBES_PER_PAGE - 1) / PBES_PER_PAGE);
427 * swsusp_write - Write entire image and metadata.
429 * It is important _NOT_ to umount filesystems at this point. We want
430 * them synced (in case something goes wrong) but we DO not want to mark
431 * filesystem clean: it is not. (And it does not matter, if we resume
432 * correctly, we'll mark system clean, anyway.)
435 int swsusp_write(void)
437 struct swap_map_handle handle;
438 struct snapshot_handle snapshot;
439 struct swsusp_info *header;
443 if ((error = swsusp_swap_check())) {
444 printk(KERN_ERR "swsusp: Cannot find swap device, try swapon -a.\n");
447 memset(&snapshot, 0, sizeof(struct snapshot_handle));
448 error = snapshot_read_next(&snapshot, PAGE_SIZE);
449 if (error < PAGE_SIZE)
450 return error < 0 ? error : -EFAULT;
451 header = (struct swsusp_info *)data_of(snapshot);
452 if (!enough_swap(header->pages)) {
453 printk(KERN_ERR "swsusp: Not enough free swap\n");
456 error = get_swap_writer(&handle);
458 start = handle.cur_swap;
459 error = swap_write_page(&handle, header);
462 error = save_image(&handle, &snapshot, header->pages - 1);
464 flush_swap_writer(&handle);
466 error = mark_swapfiles(swp_entry(root_swap, start));
470 free_all_swap_pages(root_swap, handle.bitmap);
471 release_swap_writer(&handle);
476 * swsusp_shrink_memory - Try to free as much memory as needed
478 * ... but do not OOM-kill anyone
480 * Notice: all userland should be stopped before it is called, or
481 * livelock is possible.
484 #define SHRINK_BITE 10000
486 int swsusp_shrink_memory(void)
490 unsigned long pages = 0;
494 printk("Shrinking memory... ");
496 size = 2 * count_highmem_pages();
497 size += size / 50 + count_data_pages();
498 size += (size + PBES_PER_PAGE - 1) / PBES_PER_PAGE +
502 if (!is_highmem(zone))
503 tmp -= zone->free_pages;
505 tmp = shrink_all_memory(SHRINK_BITE);
509 } else if (size > image_size / PAGE_SIZE) {
510 tmp = shrink_all_memory(SHRINK_BITE);
513 printk("\b%c", p[i++%4]);
515 printk("\bdone (%lu pages freed)\n", pages);
520 int swsusp_suspend(void)
524 if ((error = arch_prepare_suspend()))
527 /* At this point, device_suspend() has been called, but *not*
528 * device_power_down(). We *must* device_power_down() now.
529 * Otherwise, drivers for some devices (e.g. interrupt controllers)
530 * become desynchronized with the actual state of the hardware
531 * at resume time, and evil weirdness ensues.
533 if ((error = device_power_down(PMSG_FREEZE))) {
534 printk(KERN_ERR "Some devices failed to power down, aborting suspend\n");
538 if ((error = save_highmem())) {
539 printk(KERN_ERR "swsusp: Not enough free pages for highmem\n");
540 goto Restore_highmem;
543 save_processor_state();
544 if ((error = swsusp_arch_suspend()))
545 printk(KERN_ERR "Error %d suspending\n", error);
546 /* Restore control flow magically appears here */
547 restore_processor_state();
556 int swsusp_resume(void)
560 if (device_power_down(PMSG_FREEZE))
561 printk(KERN_ERR "Some devices failed to power down, very bad\n");
562 /* We'll ignore saved state, but this gets preempt count (etc) right */
563 save_processor_state();
564 error = swsusp_arch_resume();
565 /* Code below is only ever reached in case of failure. Otherwise
566 * execution continues at place where swsusp_arch_suspend was called
569 /* The only reason why swsusp_arch_resume() can fail is memory being
570 * very tight, so we have to free it as soon as we can to avoid
571 * subsequent failures
574 restore_processor_state();
576 touch_softlockup_watchdog();
583 * Using bio to read from swap.
584 * This code requires a bit more work than just using buffer heads
585 * but, it is the recommended way for 2.5/2.6.
586 * The following are to signal the beginning and end of I/O. Bios
587 * finish asynchronously, while we want them to happen synchronously.
588 * A simple atomic_t, and a wait loop take care of this problem.
591 static atomic_t io_done = ATOMIC_INIT(0);
593 static int end_io(struct bio *bio, unsigned int num, int err)
595 if (!test_bit(BIO_UPTODATE, &bio->bi_flags))
596 panic("I/O error reading memory image");
597 atomic_set(&io_done, 0);
601 static struct block_device *resume_bdev;
604 * submit - submit BIO request.
605 * @rw: READ or WRITE.
606 * @off physical offset of page.
607 * @page: page we're reading or writing.
609 * Straight from the textbook - allocate and initialize the bio.
610 * If we're writing, make sure the page is marked as dirty.
611 * Then submit it and wait.
614 static int submit(int rw, pgoff_t page_off, void *page)
619 bio = bio_alloc(GFP_ATOMIC, 1);
622 bio->bi_sector = page_off * (PAGE_SIZE >> 9);
623 bio->bi_bdev = resume_bdev;
624 bio->bi_end_io = end_io;
626 if (bio_add_page(bio, virt_to_page(page), PAGE_SIZE, 0) < PAGE_SIZE) {
627 printk("swsusp: ERROR: adding page to bio at %ld\n",page_off);
633 atomic_set(&io_done, 1);
634 submit_bio(rw | (1 << BIO_RW_SYNC), bio);
635 while (atomic_read(&io_done))
638 bio_set_pages_dirty(bio);
644 static int bio_read_page(pgoff_t page_off, void *page)
646 return submit(READ, page_off, page);
649 static int bio_write_page(pgoff_t page_off, void *page)
651 return submit(WRITE, page_off, page);
655 * The following functions allow us to read data using a swap map
656 * in a file-alike way
659 static void release_swap_reader(struct swap_map_handle *handle)
662 free_page((unsigned long)handle->cur);
666 static int get_swap_reader(struct swap_map_handle *handle,
671 if (!swp_offset(start))
673 handle->cur = (struct swap_map_page *)get_zeroed_page(GFP_ATOMIC);
676 error = bio_read_page(swp_offset(start), handle->cur);
678 release_swap_reader(handle);
685 static int swap_read_page(struct swap_map_handle *handle, void *buf)
687 unsigned long offset;
692 offset = handle->cur->entries[handle->k];
695 error = bio_read_page(offset, buf);
698 if (++handle->k >= MAP_PAGE_ENTRIES) {
700 offset = handle->cur->next_swap;
702 release_swap_reader(handle);
704 error = bio_read_page(offset, handle->cur);
710 * load_image - load the image using the swap map handle
711 * @handle and the snapshot handle @snapshot
712 * (assume there are @nr_pages pages to load)
715 static int load_image(struct swap_map_handle *handle,
716 struct snapshot_handle *snapshot,
717 unsigned int nr_pages)
723 printk("Loading image data pages (%u pages) ... ", nr_pages);
729 ret = snapshot_write_next(snapshot, PAGE_SIZE);
731 error = swap_read_page(handle, data_of(*snapshot));
735 printk("\b\b\b\b%3d%%", nr_pages / m);
740 printk("\b\b\b\bdone\n");
741 if (!snapshot_image_loaded(snapshot))
746 int swsusp_read(void)
749 struct swap_map_handle handle;
750 struct snapshot_handle snapshot;
751 struct swsusp_info *header;
752 unsigned int nr_pages;
754 if (IS_ERR(resume_bdev)) {
755 pr_debug("swsusp: block device not initialised\n");
756 return PTR_ERR(resume_bdev);
759 memset(&snapshot, 0, sizeof(struct snapshot_handle));
760 error = snapshot_write_next(&snapshot, PAGE_SIZE);
761 if (error < PAGE_SIZE)
762 return error < 0 ? error : -EFAULT;
763 header = (struct swsusp_info *)data_of(snapshot);
764 error = get_swap_reader(&handle, swsusp_header.image);
766 error = swap_read_page(&handle, header);
768 nr_pages = header->image_pages;
769 error = load_image(&handle, &snapshot, nr_pages);
771 release_swap_reader(&handle);
773 blkdev_put(resume_bdev);
776 pr_debug("swsusp: Reading resume file was successful\n");
778 pr_debug("swsusp: Error %d resuming\n", error);
783 * swsusp_check - Check for swsusp signature in the resume device
786 int swsusp_check(void)
790 resume_bdev = open_by_devnum(swsusp_resume_device, FMODE_READ);
791 if (!IS_ERR(resume_bdev)) {
792 set_blocksize(resume_bdev, PAGE_SIZE);
793 memset(&swsusp_header, 0, sizeof(swsusp_header));
794 if ((error = bio_read_page(0, &swsusp_header)))
796 if (!memcmp(SWSUSP_SIG, swsusp_header.sig, 10)) {
797 memcpy(swsusp_header.sig, swsusp_header.orig_sig, 10);
798 /* Reset swap signature now */
799 error = bio_write_page(0, &swsusp_header);
804 blkdev_put(resume_bdev);
806 pr_debug("swsusp: Signature found, resuming\n");
808 error = PTR_ERR(resume_bdev);
812 pr_debug("swsusp: Error %d check for resume file\n", error);
818 * swsusp_close - close swap device.
821 void swsusp_close(void)
823 if (IS_ERR(resume_bdev)) {
824 pr_debug("swsusp: block device not initialised\n");
828 blkdev_put(resume_bdev);