From: Pavel Machek <pavel@ucw.cz>

This is patch from Rafael, it eliminates order-5 (or worse) allocations
during suspend.  I did few style/whitespace modifications.  It was tested
by me, Rafael, and Stefan from SuSE.

Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
Signed-off-by: Pavel Machek <pavel@suse.cz>
Signed-off-by: Adrian Bunk <bunk@stusta.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
---

 25-akpm/include/linux/suspend.h |   17 +++
 25-akpm/kernel/power/swsusp.c   |  175 +++++++++++++++++++++-------------------
 2 files changed, 106 insertions(+), 86 deletions(-)

diff -puN include/linux/suspend.h~swsusp-do-not-use-higher-order-memory-allocations-on-suspend include/linux/suspend.h
--- 25/include/linux/suspend.h~swsusp-do-not-use-higher-order-memory-allocations-on-suspend	2005-02-23 02:07:56.000000000 -0800
+++ 25-akpm/include/linux/suspend.h	2005-02-23 02:07:56.000000000 -0800
@@ -15,11 +15,22 @@ typedef struct pbe {
 	unsigned long address;		/* address of the copy */
 	unsigned long orig_address;	/* original address of page */
 	swp_entry_t swap_address;	
-	swp_entry_t dummy;		/* we need scratch space at 
-					 * end of page (see link, diskpage)
-					 */
+
+	struct pbe *next;	/* also used as scratch space at
+				 * end of page (see link, diskpage)
+				 */
 } suspend_pagedir_t;
 
+#define for_each_pbe(pbe, pblist) \
+	for (pbe = pblist ; pbe ; pbe = pbe->next)
+
+#define PBES_PER_PAGE      (PAGE_SIZE/sizeof(struct pbe))
+#define PB_PAGE_SKIP       (PBES_PER_PAGE-1)
+
+#define for_each_pb_page(pbe, pblist) \
+	for (pbe = pblist ; pbe ; pbe = (pbe+PB_PAGE_SKIP)->next)
+
+
 #define SWAP_FILENAME_MAXLENGTH	32
 
 
diff -puN kernel/power/swsusp.c~swsusp-do-not-use-higher-order-memory-allocations-on-suspend kernel/power/swsusp.c
--- 25/kernel/power/swsusp.c~swsusp-do-not-use-higher-order-memory-allocations-on-suspend	2005-02-23 02:07:56.000000000 -0800
+++ 25-akpm/kernel/power/swsusp.c	2005-02-23 02:08:07.000000000 -0800
@@ -76,7 +76,6 @@
 extern const void __nosave_begin, __nosave_end;
 
 /* Variables to be preserved over suspend */
-static int pagedir_order_check;
 static int nr_copy_pages_check;
 
 extern char resume_file[];
@@ -225,8 +224,6 @@ static void lock_swapdevices(void)
 	swap_list_unlock();
 }
 
-
-
 /**
  *	write_swap_page - Write one page to a fresh swap location.
  *	@addr:	Address we're writing.
@@ -239,7 +236,6 @@ static void lock_swapdevices(void)
  *	This is a partial improvement, since we will at least return other
  *	errors, though we need to eventually fix the damn code.
  */
-
 static int write_page(unsigned long addr, swp_entry_t * loc)
 {
 	swp_entry_t entry;
@@ -259,14 +255,12 @@ static int write_page(unsigned long addr
 	return error;
 }
 
-
 /**
  *	data_free - Free the swap entries used by the saved image.
  *
  *	Walk the list of used swap entries and free each one. 
  *	This is only used for cleanup when suspend fails.
  */
-
 static void data_free(void)
 {
 	swp_entry_t entry;
@@ -282,28 +276,27 @@ static void data_free(void)
 	}
 }
 
-
 /**
  *	data_write - Write saved image to swap.
  *
  *	Walk the list of pages in the image and sync each one to swap.
  */
-
 static int data_write(void)
 {
-	int error = 0;
-	int i;
+	int error = 0, i = 0;
 	unsigned int mod = nr_copy_pages / 100;
+	struct pbe *p;
 
 	if (!mod)
 		mod = 1;
 
 	printk( "Writing data to swap (%d pages)...     ", nr_copy_pages );
-	for (i = 0; i < nr_copy_pages && !error; i++) {
+	for_each_pbe(p, pagedir_nosave) {
 		if (!(i%mod))
 			printk( "\b\b\b\b%3d%%", i / mod );
-		error = write_page((pagedir_nosave+i)->address,
-					  &((pagedir_nosave+i)->swap_address));
+		if ((error = write_page(p->address, &(p->swap_address))))
+			return error;
+		i++;
 	}
 	printk("\b\b\b\bdone\n");
 	return error;
@@ -326,15 +319,14 @@ static void dump_info(void)
 
 static void init_header(void)
 {
-	memset(&swsusp_info,0,sizeof(swsusp_info));
+	memset(&swsusp_info, 0, sizeof(swsusp_info));
 	swsusp_info.version_code = LINUX_VERSION_CODE;
 	swsusp_info.num_physpages = num_physpages;
-	memcpy(&swsusp_info.uts,&system_utsname,sizeof(system_utsname));
+	memcpy(&swsusp_info.uts, &system_utsname, sizeof(system_utsname));
 
 	swsusp_info.suspend_pagedir = pagedir_nosave;
 	swsusp_info.cpus = num_online_cpus();
 	swsusp_info.image_pages = nr_copy_pages;
-	dump_info();
 }
 
 static int close_swap(void)
@@ -342,7 +334,8 @@ static int close_swap(void)
 	swp_entry_t entry;
 	int error;
 
-	error = write_page((unsigned long)&swsusp_info,&entry);
+	dump_info();
+	error = write_page((unsigned long)&swsusp_info, &entry);
 	if (!error) { 
 		printk( "S" );
 		error = mark_swapfiles(entry);
@@ -373,15 +366,18 @@ static void free_pagedir_entries(void)
 
 static int write_pagedir(void)
 {
-	unsigned long addr = (unsigned long)pagedir_nosave;
 	int error = 0;
-	int n = SUSPEND_PD_PAGES(nr_copy_pages);
-	int i;
+	unsigned n = 0;
+	struct pbe * pbe;
+
+	printk( "Writing pagedir...");
+	for_each_pb_page(pbe, pagedir_nosave) {
+		if ((error = write_page((unsigned long)pbe, &swsusp_info.pagedir[n++])))
+			return error;
+	}
 
 	swsusp_info.pagedir_pages = n;
-	printk( "Writing pagedir (%d pages)\n", n);
-	for (i = 0; i < n && !error; i++, addr += PAGE_SIZE)
-		error = write_page(addr, &swsusp_info.pagedir[i]);
+	printk("done (%u pages)\n", n);
 	return error;
 }
 
@@ -567,8 +563,8 @@ static void copy_data_pages(void)
 	struct zone *zone;
 	unsigned long zone_pfn;
 	struct pbe * pbe = pagedir_nosave;
-	int to_copy = nr_copy_pages;
 	
+	pr_debug("copy_data_pages(): pages to copy: %d\n", nr_copy_pages);
 	for_each_zone(zone) {
 		if (is_highmem(zone))
 			continue;
@@ -577,78 +573,94 @@ static void copy_data_pages(void)
 			if (saveable(zone, &zone_pfn)) {
 				struct page * page;
 				page = pfn_to_page(zone_pfn + zone->zone_start_pfn);
+				BUG_ON(!pbe);
 				pbe->orig_address = (long) page_address(page);
 				/* copy_page is not usable for copying task structs. */
 				memcpy((void *)pbe->address, (void *)pbe->orig_address, PAGE_SIZE);
-				pbe++;
-				to_copy--;
+				pbe = pbe->next;
 			}
 		}
 	}
-	BUG_ON(to_copy);
+	BUG_ON(pbe);
 }
 
 
 /**
- *	calc_order - Determine the order of allocation needed for pagedir_save.
- *
- *	This looks tricky, but is just subtle. Please fix it some time.
- *	Since there are %nr_copy_pages worth of pages in the snapshot, we need
- *	to allocate enough contiguous space to hold 
- *		(%nr_copy_pages * sizeof(struct pbe)), 
- *	which has the saved/orig locations of the page.. 
- *
- *	SUSPEND_PD_PAGES() tells us how many pages we need to hold those 
- *	structures, then we call get_bitmask_order(), which will tell us the
- *	last bit set in the number, starting with 1. (If we need 30 pages, that
- *	is 0x0000001e in hex. The last bit is the 5th, which is the order we 
- *	would use to allocate 32 contiguous pages).
- *
- *	Since we also need to save those pages, we add the number of pages that
- *	we need to nr_copy_pages, and in case of an overflow, do the 
- *	calculation again to update the number of pages needed. 
- *
- *	With this model, we will tend to waste a lot of memory if we just cross
- *	an order boundary. Plus, the higher the order of allocation that we try
- *	to do, the more likely we are to fail in a low-memory situtation 
- *	(though	we're unlikely to get this far in such a case, since swsusp 
- *	requires half of memory to be free anyway).
+ *	calc_nr - Determine the number of pages needed for a pbe list.
  */
 
-
-static void calc_order(void)
+static int calc_nr(int nr_copy)
 {
-	int diff = 0;
-	int order = 0;
+	int extra = 0;
+	int mod = !!(nr_copy % PBES_PER_PAGE);
+	int diff = (nr_copy / PBES_PER_PAGE) + mod;
 
 	do {
-		diff = get_bitmask_order(SUSPEND_PD_PAGES(nr_copy_pages)) - order;
-		if (diff) {
-			order += diff;
-			nr_copy_pages += 1 << diff;
-		}
-	} while(diff);
-	pagedir_order = order;
+		extra += diff;
+		nr_copy += diff;
+		mod = !!(nr_copy % PBES_PER_PAGE);
+		diff = (nr_copy / PBES_PER_PAGE) + mod - extra;
+	} while (diff > 0);
+
+	return nr_copy;
 }
 
+/**
+ *	free_pagedir - free pages allocated with alloc_pagedir()
+ */
+
+static inline void free_pagedir(struct pbe *pblist)
+{
+	struct pbe *pbe;
+
+	while (pblist) {
+		pbe = (pblist + PB_PAGE_SKIP)->next;
+		free_page((unsigned long)pblist);
+		pblist = pbe;
+	}
+}
 
 /**
  *	alloc_pagedir - Allocate the page directory.
  *
- *	First, determine exactly how many contiguous pages we need and
+ *	First, determine exactly how many pages we need and
  *	allocate them.
+ *
+ *	We arrange the pages in a chain: each page is an array of PBES_PER_PAGE
+ *	struct pbe elements (pbes) and the last element in the page points
+ *	to the next page.
+ *
+ *	On each page we set up a list of struct_pbe elements.
  */
 
-static int alloc_pagedir(void)
+static struct pbe * alloc_pagedir(unsigned nr_pages)
 {
-	calc_order();
-	pagedir_save = (suspend_pagedir_t *)__get_free_pages(GFP_ATOMIC | __GFP_COLD,
-							     pagedir_order);
-	if (!pagedir_save)
-		return -ENOMEM;
-	memset(pagedir_save, 0, (1 << pagedir_order) * PAGE_SIZE);
-	pagedir_nosave = pagedir_save;
-	return 0;
+	unsigned num;
+	struct pbe *pblist, *pbe, *p;
+
+	if (!nr_pages)
+		return NULL;
+
+	pr_debug("alloc_pagedir(): nr_pages = %d\n", nr_pages);
+	pblist = (struct pbe *)get_zeroed_page(GFP_ATOMIC | __GFP_COLD);
+	for (pbe = pblist, num = PBES_PER_PAGE; pbe && num < nr_pages;
+        		pbe = pbe->next, num += PBES_PER_PAGE) {
+		p = pbe;
+		pbe += PB_PAGE_SKIP;
+		do
+			p->next = p + 1;
+		while (p++ < pbe);
+		pbe->next = (struct pbe *)get_zeroed_page(GFP_ATOMIC | __GFP_COLD);
+	}
+	if (pbe) {
+		for (num -= PBES_PER_PAGE - 1, p = pbe; num < nr_pages; p++, num++)
+			p->next = p + 1;
+	} else { /* get_zeroed_page() failed */
+		free_pagedir(pblist);
+		pblist = NULL;
+        }
+	pr_debug("alloc_pagedir(): allocated %d PBEs\n", num);
+	return pblist;
 }
 
 /**
@@ -658,10 +670,8 @@ static int alloc_pagedir(void)
 static void free_image_pages(void)
 {
 	struct pbe * p;
-	int i;
 
-	p = pagedir_save;
-	for (i = 0, p = pagedir_save; i < nr_copy_pages; i++, p++) {
+	for_each_pbe(p, pagedir_save) {
 		if (p->address) {
 			ClearPageNosave(virt_to_page(p->address));
 			free_page(p->address);
@@ -672,15 +682,13 @@ static void free_image_pages(void)
 
 /**
  *	alloc_image_pages - Allocate pages for the snapshot.
- *
  */
 
 static int alloc_image_pages(void)
 {
 	struct pbe * p;
-	int i;
 
-	for (i = 0, p = pagedir_save; i < nr_copy_pages; i++, p++) {
+	for_each_pbe(p, pagedir_save) {
 		p->address = get_zeroed_page(GFP_ATOMIC | __GFP_COLD);
 		if (!p->address)
 			return -ENOMEM;
@@ -694,7 +702,7 @@ void swsusp_free(void)
 	BUG_ON(PageNosave(virt_to_page(pagedir_save)));
 	BUG_ON(PageNosaveFree(virt_to_page(pagedir_save)));
 	free_image_pages();
-	free_pages((unsigned long) pagedir_save, pagedir_order);
+	free_pagedir(pagedir_save);
 }
 
 
@@ -752,10 +760,13 @@ static int swsusp_alloc(void)
 	if (!enough_swap())
 		return -ENOSPC;
 
-	if ((error = alloc_pagedir())) {
+	nr_copy_pages = calc_nr(nr_copy_pages);
+
+	if (!(pagedir_save = alloc_pagedir(nr_copy_pages))) {
 		printk(KERN_ERR "suspend: Allocating pagedir failed.\n");
-		return error;
+		return -ENOMEM;
 	}
+	pagedir_nosave = pagedir_save;
 	if ((error = alloc_image_pages())) {
 		printk(KERN_ERR "suspend: Allocating image pages failed.\n");
 		swsusp_free();
@@ -763,7 +774,6 @@ static int swsusp_alloc(void)
 	}
 
 	nr_copy_pages_check = nr_copy_pages;
-	pagedir_order_check = pagedir_order;
 	return 0;
 }
 
@@ -780,7 +790,7 @@ static int suspend_prepare_image(void)
 
 	drain_local_pages();
 	count_data_pages();
-	printk("swsusp: Need to copy %u pages\n",nr_copy_pages);
+	printk("swsusp: Need to copy %u pages\n", nr_copy_pages);
 
 	error = swsusp_alloc();
 	if (error)
@@ -867,7 +877,6 @@ int swsusp_suspend(void)
 asmlinkage int swsusp_restore(void)
 {
 	BUG_ON (nr_copy_pages_check != nr_copy_pages);
-	BUG_ON (pagedir_order_check != pagedir_order);
 	
 	/* Even mappings of "global" things (vmalloc) need to be fixed */
 	__flush_tlb_global();
_