From: Andrew Morton <akpm@osdl.org>

- Move ifdef out of init/main.c

- Coding style fixups

- Fit it into 80-col xterm

- Return -ENOMEM on failure, not -ENOBUFS

- Factor out common code into zone_batchsize().  (This fixes a bug: the
  "clamp to 2^n-1" logic was missing for NUMA).

- Fix missing conversion in zoneinfo_show().

Cc: Christoph Lameter <christoph@lameter.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
---

 include/linux/mm.h |    5 ++
 init/main.c        |    2 -
 mm/page_alloc.c    |   89 +++++++++++++++++++++++++++--------------------------
 base/node.c        |    0 
 linux/mmzone.h     |    0 
 mempolicy.c        |    0 
 6 files changed, 52 insertions(+), 44 deletions(-)

diff -puN drivers/base/node.c~node-local-per-cpu-pages-tidy drivers/base/node.c
diff -puN include/linux/mm.h~node-local-per-cpu-pages-tidy include/linux/mm.h
--- 25/include/linux/mm.h~node-local-per-cpu-pages-tidy	2005-06-06 00:13:53.000000000 -0700
+++ 25-akpm/include/linux/mm.h	2005-06-06 00:13:53.000000000 -0700
@@ -783,7 +783,12 @@ extern void mem_init(void);
 extern void show_mem(void);
 extern void si_meminfo(struct sysinfo * val);
 extern void si_meminfo_node(struct sysinfo *val, int nid);
+
+#ifdef CONFIG_NUMA
 extern void setup_per_cpu_pageset(void);
+#else
+static inline void setup_per_cpu_pageset(void) {}
+#endif
 
 /* prio_tree.c */
 void vma_prio_tree_add(struct vm_area_struct *, struct vm_area_struct *old);
diff -puN include/linux/mmzone.h~node-local-per-cpu-pages-tidy include/linux/mmzone.h
diff -puN init/main.c~node-local-per-cpu-pages-tidy init/main.c
--- 25/init/main.c~node-local-per-cpu-pages-tidy	2005-06-06 00:13:53.000000000 -0700
+++ 25-akpm/init/main.c	2005-06-06 00:13:53.000000000 -0700
@@ -490,9 +490,7 @@ asmlinkage void __init start_kernel(void
 	vfs_caches_init_early();
 	mem_init();
 	kmem_cache_init();
-#ifdef CONFIG_NUMA
 	setup_per_cpu_pageset();
-#endif
 	numa_policy_init();
 	if (late_time_init)
 		late_time_init();
diff -puN mm/mempolicy.c~node-local-per-cpu-pages-tidy mm/mempolicy.c
diff -puN mm/page_alloc.c~node-local-per-cpu-pages-tidy mm/page_alloc.c
--- 25/mm/page_alloc.c~node-local-per-cpu-pages-tidy	2005-06-06 00:13:53.000000000 -0700
+++ 25-akpm/mm/page_alloc.c	2005-06-06 00:13:53.000000000 -0700
@@ -1668,12 +1668,44 @@ void zonetable_add(struct zone *zone, in
 	memmap_init_zone((size), (nid), (zone), (start_pfn))
 #endif
 
+static int __devinit zone_batchsize(struct zone *zone)
+{
+	int batch;
+
+	/*
+	 * The per-cpu-pages pools are set to around 1000th of the
+	 * size of the zone.  But no more than 1/4 of a meg - there's
+	 * no point in going beyond the size of L2 cache.
+	 *
+	 * OK, so we don't know how big the cache is.  So guess.
+	 */
+	batch = zone->present_pages / 1024;
+	if (batch * PAGE_SIZE > 256 * 1024)
+		batch = (256 * 1024) / PAGE_SIZE;
+	batch /= 4;		/* We effectively *= 4 below */
+	if (batch < 1)
+		batch = 1;
+
+	/*
+	 * Clamp the batch to a 2^n - 1 value. Having a power
+	 * of 2 value was found to be more likely to have
+	 * suboptimal cache aliasing properties in some cases.
+	 *
+	 * For example if 2 tasks are alternately allocating
+	 * batches of pages, one task can end up with a lot
+	 * of pages of one half of the possible page colors
+	 * and the other with pages of the other colors.
+	 */
+	batch = (1 << fls(batch + batch/2)) - 1;
+	return batch;
+}
+
 #ifdef CONFIG_NUMA
 /*
  * Dynamicaly allocate memory for the
  * per cpu pageset array in struct zone.
  */
-static inline int __devinit process_zones(int cpu)
+static int __devinit process_zones(int cpu)
 {
 	struct zone *zone, *dzone;
 	int i;
@@ -1683,29 +1715,26 @@ static inline int __devinit process_zone
 
 		npageset = kmalloc_node(sizeof(struct per_cpu_pageset),
 					 GFP_KERNEL, cpu_to_node(cpu));
-		if(!npageset) {
+		if (!npageset) {
 			zone->pageset[cpu] = NULL;
 			goto bad;
 		}
 
-		if(zone->pageset[cpu]) {
-			memcpy(npageset, zone->pageset[cpu], sizeof(struct per_cpu_pageset));
+		if (zone->pageset[cpu]) {
+			memcpy(npageset, zone->pageset[cpu],
+					sizeof(struct per_cpu_pageset));
 
 			/* Relocate lists */
-			for(i = 0; i<2; i++) {
+			for (i = 0; i < 2; i++) {
 				INIT_LIST_HEAD(&npageset->pcp[i].list);
-				list_splice(&zone->pageset[cpu]->pcp[i].list, &npageset->pcp[i].list);
+				list_splice(&zone->pageset[cpu]->pcp[i].list,
+					&npageset->pcp[i].list);
 			}
  		} else {
 			struct per_cpu_pages *pcp;
 			unsigned long batch;
 
-			batch = zone->present_pages / 1024;
-			if (batch * PAGE_SIZE > 256 * 1024)
-				batch = (256 * 1024) / PAGE_SIZE;
-			batch /= 4;             /* We effectively *= 4 below */
-			if (batch < 1)
-				batch = 1;
+			batch = zone_batchsize(zone);
 
 			pcp = &npageset->pcp[0];		/* hot */
 			pcp->count = 0;
@@ -1727,12 +1756,12 @@ static inline int __devinit process_zone
 	return 0;
 bad:
 	for_each_zone(dzone) {
-		if(dzone == zone)
+		if (dzone == zone)
 			break;
 		kfree(dzone->pageset[cpu]);
 		dzone->pageset[cpu] = NULL;
 	}
-	return -ENOBUFS;
+	return -ENOMEM;
 }
 
 static int __devinit pageset_cpuup_callback(struct notifier_block *nfb,
@@ -1741,9 +1770,9 @@ static int __devinit pageset_cpuup_callb
 {
 	int cpu = (long)hcpu;
 
-	switch(action) {
+	switch (action) {
 		case CPU_UP_PREPARE:
-			if(process_zones(cpu))
+			if (process_zones(cpu))
 				goto bad;
 			break;
 #ifdef CONFIG_HOTPLUG_CPU
@@ -1826,31 +1855,7 @@ static void __init free_area_init_core(s
 
 		zone->temp_priority = zone->prev_priority = DEF_PRIORITY;
 
-		/*
-		 * The per-cpu-pages pools are set to around 1000th of the
-		 * size of the zone.  But no more than 1/4 of a meg - there's
-		 * no point in going beyond the size of L2 cache.
-		 *
-		 * OK, so we don't know how big the cache is.  So guess.
-		 */
-		batch = zone->present_pages / 1024;
-		if (batch * PAGE_SIZE > 256 * 1024)
-			batch = (256 * 1024) / PAGE_SIZE;
-		batch /= 4;		/* We effectively *= 4 below */
-		if (batch < 1)
-			batch = 1;
-
-		/*
-		 * Clamp the batch to a 2^n - 1 value. Having a power
-		 * of 2 value was found to be more likely to have
-		 * suboptimal cache aliasing properties in some cases.
-		 *
-		 * For example if 2 tasks are alternately allocating
-		 * batches of pages, one task can end up with a lot
-		 * of pages of one half of the possible page colors
-		 * and the other with pages of the other colors.
-		 */
-		batch = (1 << fls(batch + batch/2)) - 1;
+		batch = zone_batchsize(zone);
 
 		for (cpu = 0; cpu < NR_CPUS; cpu++) {
 			struct per_cpu_pages *pcp;
@@ -2083,7 +2088,7 @@ static int zoneinfo_show(struct seq_file
 			struct per_cpu_pageset *pageset;
 			int j;
 
-			pageset = &zone->pageset[i];
+			pageset = zone_pcp(zone, i);
 			for (j = 0; j < ARRAY_SIZE(pageset->pcp); j++) {
 				if (pageset->pcp[j].count)
 					break;
_