From: Zwane Mwaikambo <zwane@arm.linux.org.uk>

Introduce cpu_idle_wait() on architectures requiring modification of
pm_idle from modules, this will ensure that all processors have updated
their cached values of pm_idle upon exit.  This patch is to address the bug
report at http://bugme.osdl.org/show_bug.cgi?id=1716 and replaces the
current code fix which is in violation of normal RCU usage as pointed out
by Stephen, Dipankar and Paul.

Signed-off-by: Zwane Mwaikambo <zwane@arm.linux.org.uk>
Signed-off-by: Andrew Morton <akpm@osdl.org>
---

 25-akpm/arch/i386/kernel/apm.c       |    2 +-
 25-akpm/arch/i386/kernel/process.c   |   30 +++++++++++++++++++++++-------
 25-akpm/arch/ia64/kernel/process.c   |   30 +++++++++++++++++++++++-------
 25-akpm/arch/x86_64/kernel/process.c |   31 ++++++++++++++++++++++++-------
 25-akpm/drivers/acpi/processor.c     |    2 +-
 25-akpm/include/asm-i386/system.h    |    1 +
 25-akpm/include/asm-ia64/system.h    |    1 +
 25-akpm/include/asm-x86_64/system.h  |    2 ++
 8 files changed, 76 insertions(+), 23 deletions(-)

diff -puN arch/i386/kernel/apm.c~remove-rcu-abuse-in-cpu_idle arch/i386/kernel/apm.c
--- 25/arch/i386/kernel/apm.c~remove-rcu-abuse-in-cpu_idle	2004-12-11 22:29:47.151927368 -0800
+++ 25-akpm/arch/i386/kernel/apm.c	2004-12-11 22:29:47.167924936 -0800
@@ -2369,7 +2369,7 @@ static void __exit apm_exit(void)
 		 * (pm_idle), Wait for all processors to update cached/local
 		 * copies of pm_idle before proceeding.
 		 */
-		synchronize_kernel();
+		cpu_idle_wait();
 	}
 	if (((apm_info.bios.flags & APM_BIOS_DISENGAGED) == 0)
 	    && (apm_info.connection_version > 0x0100)) {
diff -puN arch/i386/kernel/process.c~remove-rcu-abuse-in-cpu_idle arch/i386/kernel/process.c
--- 25/arch/i386/kernel/process.c~remove-rcu-abuse-in-cpu_idle	2004-12-11 22:29:47.153927064 -0800
+++ 25-akpm/arch/i386/kernel/process.c	2004-12-11 22:29:47.168924784 -0800
@@ -72,6 +72,7 @@ unsigned long thread_saved_pc(struct tas
  * Powermanagement idle function, if any..
  */
 void (*pm_idle)(void);
+static cpumask_t cpu_idle_map;
 
 void disable_hlt(void)
 {
@@ -144,16 +145,16 @@ static void poll_idle (void)
  */
 void cpu_idle (void)
 {
+	int cpu = smp_processor_id();
+
 	/* endless idle loop with no priority at all */
 	while (1) {
 		while (!need_resched()) {
 			void (*idle)(void);
-			/*
-			 * Mark this as an RCU critical section so that
-			 * synchronize_kernel() in the unload path waits
-			 * for our completion.
-			 */
-			rcu_read_lock();
+
+			if (cpu_isset(cpu, cpu_idle_map))
+				cpu_clear(cpu, cpu_idle_map);
+			rmb();
 			idle = pm_idle;
 
 			if (!idle)
@@ -161,12 +162,27 @@ void cpu_idle (void)
 
 			irq_stat[smp_processor_id()].idle_timestamp = jiffies;
 			idle();
-			rcu_read_unlock();
 		}
 		schedule();
 	}
 }
 
+void cpu_idle_wait(void)
+{
+	int cpu;
+	cpumask_t map;
+
+	for_each_online_cpu(cpu)
+		cpu_set(cpu, cpu_idle_map);
+
+	wmb();
+	do {
+		schedule_timeout(HZ);
+		cpus_and(map, cpu_idle_map, cpu_online_map);
+	} while (!cpus_empty(map));
+}
+EXPORT_SYMBOL_GPL(cpu_idle_wait);
+
 /*
  * This uses new MONITOR/MWAIT instructions on P4 processors with PNI,
  * which can obviate IPI to trigger checking of need_resched.
diff -puN arch/ia64/kernel/process.c~remove-rcu-abuse-in-cpu_idle arch/ia64/kernel/process.c
--- 25/arch/ia64/kernel/process.c~remove-rcu-abuse-in-cpu_idle	2004-12-11 22:29:47.154926912 -0800
+++ 25-akpm/arch/ia64/kernel/process.c	2004-12-11 22:29:47.169924632 -0800
@@ -46,6 +46,7 @@
 #include "sigframe.h"
 
 void (*ia64_mark_idle)(int);
+static cpumask_t cpu_idle_map;
 
 unsigned long boot_option_idle_override = 0;
 EXPORT_SYMBOL(boot_option_idle_override);
@@ -225,10 +226,28 @@ static inline void play_dead(void)
 }
 #endif /* CONFIG_HOTPLUG_CPU */
 
+
+void cpu_idle_wait(void)
+{
+        int cpu;
+        cpumask_t map;
+
+        for_each_online_cpu(cpu)
+                cpu_set(cpu, cpu_idle_map);
+
+        wmb();
+        do {
+                schedule_timeout(HZ);
+                cpus_and(map, cpu_idle_map, cpu_online_map);
+        } while (!cpus_empty(map));
+}
+EXPORT_SYMBOL_GPL(cpu_idle_wait);
+
 void __attribute__((noreturn))
 cpu_idle (void *unused)
 {
 	void (*mark_idle)(int) = ia64_mark_idle;
+	int cpu = smp_processor_id();
 
 	/* endless idle loop with no priority at all */
 	while (1) {
@@ -241,17 +260,14 @@ cpu_idle (void *unused)
 
 			if (mark_idle)
 				(*mark_idle)(1);
-			/*
-			 * Mark this as an RCU critical section so that
-			 * synchronize_kernel() in the unload path waits
-			 * for our completion.
-			 */
-			rcu_read_lock();
+
+			if (cpu_isset(cpu, cpu_idle_map))
+				cpu_clear(cpu, cpu_idle_map);
+			rmb();
 			idle = pm_idle;
 			if (!idle)
 				idle = default_idle;
 			(*idle)();
-			rcu_read_unlock();
 		}
 
 		if (mark_idle)
diff -puN arch/x86_64/kernel/process.c~remove-rcu-abuse-in-cpu_idle arch/x86_64/kernel/process.c
--- 25/arch/x86_64/kernel/process.c~remove-rcu-abuse-in-cpu_idle	2004-12-11 22:29:47.156926608 -0800
+++ 25-akpm/arch/x86_64/kernel/process.c	2004-12-11 22:29:47.170924480 -0800
@@ -61,6 +61,7 @@ EXPORT_SYMBOL(boot_option_idle_override)
  * Powermanagement idle function, if any..
  */
 void (*pm_idle)(void);
+static cpumask_t cpu_idle_map;
 
 void disable_hlt(void)
 {
@@ -123,6 +124,23 @@ static void poll_idle (void)
 	}
 }
 
+
+void cpu_idle_wait(void)
+{
+        int cpu;
+        cpumask_t map;
+
+        for_each_online_cpu(cpu)
+                cpu_set(cpu, cpu_idle_map);
+
+        wmb();
+        do {
+                schedule_timeout(HZ);
+                cpus_and(map, cpu_idle_map, cpu_online_map);
+        } while (!cpus_empty(map));
+}
+EXPORT_SYMBOL_GPL(cpu_idle_wait);
+
 /*
  * The idle thread. There's no useful work to be
  * done, so just try to conserve power and have a
@@ -131,21 +149,20 @@ static void poll_idle (void)
  */
 void cpu_idle (void)
 {
+	int cpu = smp_processor_id();
+
 	/* endless idle loop with no priority at all */
 	while (1) {
 		while (!need_resched()) {
 			void (*idle)(void);
-			/*
-			 * Mark this as an RCU critical section so that
-			 * synchronize_kernel() in the unload path waits
-			 * for our completion.
-			 */
-			rcu_read_lock();
+
+			if (cpu_isset(cpu, cpu_idle_map))
+				cpu_clear(cpu, cpu_idle_map);
+			rmb();
 			idle = pm_idle;
 			if (!idle)
 				idle = default_idle;
 			idle();
-			rcu_read_unlock();
 		}
 		schedule();
 	}
diff -puN drivers/acpi/processor.c~remove-rcu-abuse-in-cpu_idle drivers/acpi/processor.c
--- 25/drivers/acpi/processor.c~remove-rcu-abuse-in-cpu_idle	2004-12-11 22:29:47.158926304 -0800
+++ 25-akpm/drivers/acpi/processor.c	2004-12-11 22:29:47.172924176 -0800
@@ -2578,7 +2578,7 @@ acpi_processor_remove (
 		 * (pm_idle), Wait for all processors to update cached/local
 		 * copies of pm_idle before proceeding.
 		 */
-		synchronize_kernel();
+		cpu_idle_wait();
 	}
 
 	status = acpi_remove_notify_handler(pr->handle, ACPI_DEVICE_NOTIFY, 
diff -puN include/asm-i386/system.h~remove-rcu-abuse-in-cpu_idle include/asm-i386/system.h
--- 25/include/asm-i386/system.h~remove-rcu-abuse-in-cpu_idle	2004-12-11 22:29:47.159926152 -0800
+++ 25-akpm/include/asm-i386/system.h	2004-12-11 22:29:47.173924024 -0800
@@ -543,5 +543,6 @@ void disable_hlt(void);
 void enable_hlt(void);
 
 extern int es7000_plat;
+void cpu_idle_wait(void);
 
 #endif
diff -puN include/asm-ia64/system.h~remove-rcu-abuse-in-cpu_idle include/asm-ia64/system.h
--- 25/include/asm-ia64/system.h~remove-rcu-abuse-in-cpu_idle	2004-12-11 22:29:47.160926000 -0800
+++ 25-akpm/include/asm-ia64/system.h	2004-12-11 22:29:47.173924024 -0800
@@ -284,6 +284,7 @@ do {						\
 
 #define ia64_platform_is(x) (strcmp(x, platform_name) == 0)
 
+void cpu_idle_wait(void);
 #endif /* __KERNEL__ */
 
 #endif /* __ASSEMBLY__ */
diff -puN include/asm-x86_64/system.h~remove-rcu-abuse-in-cpu_idle include/asm-x86_64/system.h
--- 25/include/asm-x86_64/system.h~remove-rcu-abuse-in-cpu_idle	2004-12-11 22:29:47.162925696 -0800
+++ 25-akpm/include/asm-x86_64/system.h	2004-12-11 22:29:47.174923872 -0800
@@ -326,6 +326,8 @@ static inline unsigned long __cmpxchg(vo
 /* For spinlocks etc */
 #define local_irq_save(x) 	do { warn_if_not_ulong(x); __asm__ __volatile__("# local_irq_save \n\t pushfq ; popq %0 ; cli":"=g" (x): /* no input */ :"memory"); } while (0)
 
+void cpu_idle_wait(void);
+
 /*
  * disable hlt during certain critical i/o operations
  */
_