From: Alex Williamson <alex.williamson@hp.com>

I noticed the function __read_page_state() curiously high in a q-tools
profile of a write to a software raid0 device.  Seems this is because we're
checking page_states for all possible cpus and we have NR_CPUS possible
when CONFIG_HOTPLUG_CPU=y.  The default config for ia64 is now NR_CPUS=512,
so on a little 8-way box, this is a significant waste of time.  The patch
below updates __read_page_state() and __get_page_state() to only count
page_state info for online cpus.  To keep the stats consistent, the
page_alloc notifier is updated to move page_states off of the cpu going
offline.  On my profile, this dropped __read_page_state() back into the
noise and boosted block write performance by 5% (as measured by spew -
http://spew.berlios.de).

Signed-off-by: Alex Williamson <alex.williamson@hp.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
---

 25-akpm/mm/page_alloc.c |   34 ++++++++++++++++++++++------------
 1 files changed, 22 insertions(+), 12 deletions(-)

diff -puN mm/page_alloc.c~collect-page_states-only-from-online-cpus mm/page_alloc.c
--- 25/mm/page_alloc.c~collect-page_states-only-from-online-cpus	Fri Dec 17 14:42:34 2004
+++ 25-akpm/mm/page_alloc.c	Fri Dec 17 14:42:34 2004
@@ -938,18 +938,18 @@ void __get_page_state(struct page_state 
 	int cpu = 0;
 
 	memset(ret, 0, sizeof(*ret));
+
+	cpu = first_cpu(cpu_online_map);
 	while (cpu < NR_CPUS) {
 		unsigned long *in, *out, off;
 
-		if (!cpu_possible(cpu)) {
-			cpu++;
-			continue;
-		}
-
 		in = (unsigned long *)&per_cpu(page_states, cpu);
-		cpu++;
-		if (cpu < NR_CPUS && cpu_possible(cpu))
+
+		cpu = next_cpu(cpu, cpu_online_map);
+
+		if (cpu < NR_CPUS)
 			prefetch(&per_cpu(page_states, cpu));
+
 		out = (unsigned long *)ret;
 		for (off = 0; off < nr; off++)
 			*out++ += *in++;
@@ -976,12 +976,9 @@ unsigned long __read_page_state(unsigned
 	unsigned long ret = 0;
 	int cpu;
 
-	for (cpu = 0; cpu < NR_CPUS; cpu++) {
+	for_each_online_cpu(cpu) {
 		unsigned long in;
 
-		if (!cpu_possible(cpu))
-			continue;
-
 		in = (unsigned long)&per_cpu(page_states, cpu) + offset;
 		ret += *((unsigned long *)in);
 	}
@@ -1811,8 +1808,9 @@ struct seq_operations vmstat_op = {
 static int page_alloc_cpu_notify(struct notifier_block *self,
 				 unsigned long action, void *hcpu)
 {
-	int cpu = (unsigned long)hcpu;
+	int i, cpu = (unsigned long)hcpu;
 	long *count;
+	unsigned long *src, *dest;
 
 	if (action == CPU_DEAD) {
 		/* Drain local pagecache count. */
@@ -1821,6 +1819,18 @@ static int page_alloc_cpu_notify(struct 
 		*count = 0;
 		local_irq_disable();
 		__drain_pages(cpu);
+
+		/* Add dead cpu's page_states to our own. */
+		dest = (unsigned long *)&__get_cpu_var(page_states);
+		src = (unsigned long *)&per_cpu(page_states, cpu);
+
+		i = sizeof(struct page_state) / sizeof(unsigned long);
+		do {
+			i--;
+			dest[i] += src[i];
+			src[i] = 0;
+		} while (i > 0);
+
 		local_irq_enable();
 	}
 	return NOTIFY_OK;
_