From: Rusty Russell <rusty@rustcorp.com.au>

[ These patches are the version of the hotplug code which atomically
  takes down CPUs, as suggested by Ingo Molnar.  They have been
  test-compiled as a group with CONFIG_SMP on and off,
  CONFIG_HOTPLUG_CPU on and off, CONFIG_MODULE_UNLOAD on and off. ]

The "bogolock" code was introduced in module.c, as a way of freezing the
machine when we wanted to remove a module.  This patch moves it out to
stop_machine.c and stop_machine.h.

Since the code changes affinity and proirity, it's impolite to hijack the
current context, so we use a kthread.  This means we have to pass the
function rather than implement "stop_machine()" and "restart_machine()".



---

 25-akpm/include/linux/stop_machine.h |   52 +++++++++
 25-akpm/init/Kconfig                 |    6 +
 25-akpm/kernel/Makefile              |    1 
 25-akpm/kernel/stop_machine.c        |  199 +++++++++++++++++++++++++++++++++++
 4 files changed, 258 insertions(+)

diff -puN /dev/null include/linux/stop_machine.h
--- /dev/null	Thu Apr 11 07:25:15 2002
+++ 25-akpm/include/linux/stop_machine.h	Wed Feb 25 15:22:59 2004
@@ -0,0 +1,52 @@
+#ifndef _LINUX_STOP_MACHINE
+#define _LINUX_STOP_MACHINE
+/* "Bogolock": stop the entire machine, disable interrupts.  This is a
+   very heavy lock, which is equivalent to grabbing every spinlock
+   (and more).  So the "read" side to such a lock is anything which
+   diables preeempt. */
+#include <linux/config.h>
+#include <linux/cpu.h>
+#include <asm/system.h>
+
+#ifdef CONFIG_SMP
+/**
+ * stop_machine_run: freeze the machine on all CPUs and run this function
+ * @fn: the function to run
+ * @data: the data ptr for the @fn()
+ * @cpu: the cpu to run @fn() on (or any, if @cpu == NR_CPUS.
+ *
+ * Description: This causes a thread to be scheduled on every other cpu,
+ * each of which disables interrupts, and finally interrupts are disabled
+ * on the current CPU.  The result is that noone is holding a spinlock
+ * or inside any other preempt-disabled region when @fn() runs.
+ *
+ * This can be thought of as a very heavy write lock, equivalent to
+ * grabbing every spinlock in the kernel. */
+int stop_machine_run(int (*fn)(void *), void *data, unsigned int cpu);
+
+/**
+ * __stop_machine_run: freeze the machine on all CPUs and run this function
+ * @fn: the function to run
+ * @data: the data ptr for the @fn
+ * @cpu: the cpu to run @fn on (or any, if @cpu == NR_CPUS.
+ *
+ * Description: This is a special version of the above, which returns the
+ * thread which has run @fn(): kthread_stop will return the return value
+ * of @fn().  Used by hotplug cpu.
+ */
+struct task_struct *__stop_machine_run(int (*fn)(void *), void *data,
+				       unsigned int cpu);
+
+#else
+
+static inline int stop_machine_run(int (*fn)(void *), void *data,
+				   unsigned int cpu)
+{
+	int ret;
+	local_irq_disable();
+	ret = fn(data);
+	local_irq_enable();
+	return ret;
+}
+#endif /* CONFIG_SMP */
+#endif /* _LINUX_STOP_MACHINE */
diff -puN init/Kconfig~hotplugcpu-generalise-bogolock init/Kconfig
--- 25/init/Kconfig~hotplugcpu-generalise-bogolock	Wed Feb 25 15:22:59 2004
+++ 25-akpm/init/Kconfig	Wed Feb 25 15:22:59 2004
@@ -304,4 +304,10 @@ config KMOD
 	  runs modprobe with the appropriate arguments, thereby
 	  loading the module if it is available.  If unsure, say Y.
 
+config STOP_MACHINE
+	bool
+	default y
+	depends on (SMP && MODULE_UNLOAD) || HOTPLUG_CPU
+	help
+	  Need stop_machine() primitive.
 endmenu
diff -puN kernel/Makefile~hotplugcpu-generalise-bogolock kernel/Makefile
--- 25/kernel/Makefile~hotplugcpu-generalise-bogolock	Wed Feb 25 15:22:59 2004
+++ 25-akpm/kernel/Makefile	Wed Feb 25 15:22:59 2004
@@ -20,6 +20,7 @@ obj-$(CONFIG_BSD_PROCESS_ACCT) += acct.o
 obj-$(CONFIG_COMPAT) += compat.o compat_signal.o
 obj-$(CONFIG_IKCONFIG) += configs.o
 obj-$(CONFIG_IKCONFIG_PROC) += configs.o
+obj-$(CONFIG_STOP_MACHINE) += stop_machine.o
 
 ifneq ($(CONFIG_IA64),y)
 # According to Alan Modra <alan@linuxcare.com.au>, the -fno-omit-frame-pointer is
diff -puN /dev/null kernel/stop_machine.c
--- /dev/null	Thu Apr 11 07:25:15 2002
+++ 25-akpm/kernel/stop_machine.c	Wed Feb 25 15:22:59 2004
@@ -0,0 +1,199 @@
+#include <linux/stop_machine.h>
+#include <linux/kthread.h>
+#include <linux/sched.h>
+#include <linux/cpu.h>
+#include <linux/err.h>
+#include <asm/atomic.h>
+#include <asm/semaphore.h>
+
+/* Since we effect priority and affinity (both of which are visible
+ * to, and settable by outside processes) we do indirection via a
+ * kthread. */
+
+/* Thread to stop each CPU in user context. */
+enum stopmachine_state {
+	STOPMACHINE_WAIT,
+	STOPMACHINE_PREPARE,
+	STOPMACHINE_DISABLE_IRQ,
+	STOPMACHINE_EXIT,
+};
+
+static enum stopmachine_state stopmachine_state;
+static unsigned int stopmachine_num_threads;
+static atomic_t stopmachine_thread_ack;
+static DECLARE_MUTEX(stopmachine_mutex);
+
+static int stopmachine(void *cpu)
+{
+	int irqs_disabled = 0;
+	int prepared = 0;
+
+	set_cpus_allowed(current, cpumask_of_cpu((int)(long)cpu));
+
+	/* Ack: we are alive */
+	mb(); /* Theoretically the ack = 0 might not be on this CPU yet. */
+	atomic_inc(&stopmachine_thread_ack);
+
+	/* Simple state machine */
+	while (stopmachine_state != STOPMACHINE_EXIT) {
+		if (stopmachine_state == STOPMACHINE_DISABLE_IRQ
+		    && !irqs_disabled) {
+			local_irq_disable();
+			irqs_disabled = 1;
+			/* Ack: irqs disabled. */
+			mb(); /* Must read state first. */
+			atomic_inc(&stopmachine_thread_ack);
+		} else if (stopmachine_state == STOPMACHINE_PREPARE
+			   && !prepared) {
+			/* Everyone is in place, hold CPU. */
+			preempt_disable();
+			prepared = 1;
+			mb(); /* Must read state first. */
+			atomic_inc(&stopmachine_thread_ack);
+		}
+		cpu_relax();
+	}
+
+	/* Ack: we are exiting. */
+	mb(); /* Must read state first. */
+	atomic_inc(&stopmachine_thread_ack);
+
+	if (irqs_disabled)
+		local_irq_enable();
+	if (prepared)
+		preempt_enable();
+
+	return 0;
+}
+
+/* Change the thread state */
+static void stopmachine_set_state(enum stopmachine_state state)
+{
+	atomic_set(&stopmachine_thread_ack, 0);
+	wmb();
+	stopmachine_state = state;
+	while (atomic_read(&stopmachine_thread_ack) != stopmachine_num_threads)
+		cpu_relax();
+}
+
+static int stop_machine(void)
+{
+	int i, ret = 0;
+	struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 };
+
+	/* One high-prio thread per cpu.  We'll do this one. */
+	sys_sched_setscheduler(current->pid, SCHED_FIFO, &param);
+
+	atomic_set(&stopmachine_thread_ack, 0);
+	stopmachine_num_threads = 0;
+	stopmachine_state = STOPMACHINE_WAIT;
+
+	for_each_online_cpu(i) {
+		if (i == smp_processor_id())
+			continue;
+		ret = kernel_thread(stopmachine, (void *)(long)i,CLONE_KERNEL);
+		if (ret < 0)
+			break;
+		stopmachine_num_threads++;
+	}
+
+	/* Wait for them all to come to life. */
+	while (atomic_read(&stopmachine_thread_ack) != stopmachine_num_threads)
+		yield();
+
+	/* If some failed, kill them all. */
+	if (ret < 0) {
+		stopmachine_set_state(STOPMACHINE_EXIT);
+		up(&stopmachine_mutex);
+		return ret;
+	}
+
+	/* Don't schedule us away at this point, please. */
+	local_irq_disable();
+
+	/* Now they are all started, make them hold the CPUs, ready. */
+	stopmachine_set_state(STOPMACHINE_PREPARE);
+
+	/* Make them disable irqs. */
+	stopmachine_set_state(STOPMACHINE_DISABLE_IRQ);
+
+	return 0;
+}
+
+static void restart_machine(void)
+{
+	stopmachine_set_state(STOPMACHINE_EXIT);
+	local_irq_enable();
+}
+
+struct stop_machine_data
+{
+	int (*fn)(void *);
+	void *data;
+	struct completion done;
+};
+
+static int do_stop(void *_smdata)
+{
+	struct stop_machine_data *smdata = _smdata;
+	int ret;
+
+	ret = stop_machine();
+	if (ret == 0) {
+		ret = smdata->fn(smdata->data);
+		restart_machine();
+	}
+
+	/* We're done: you can kthread_stop us now */
+	complete(&smdata->done);
+
+	/* Wait for kthread_stop */
+	while (!signal_pending(current)) {
+		__set_current_state(TASK_INTERRUPTIBLE);
+		schedule();
+	}
+	return ret;
+}
+
+struct task_struct *__stop_machine_run(int (*fn)(void *), void *data,
+				       unsigned int cpu)
+{
+	struct stop_machine_data smdata;
+	struct task_struct *p;
+
+	smdata.fn = fn;
+	smdata.data = data;
+	init_completion(&smdata.done);
+
+	down(&stopmachine_mutex);
+
+	/* If they don't care which CPU fn runs on, bind to any online one. */
+	if (cpu == NR_CPUS)
+		cpu = smp_processor_id();
+
+	p = kthread_create(do_stop, &smdata, "kstopmachine");
+	if (!IS_ERR(p)) {
+		kthread_bind(p, cpu);
+		wake_up_process(p);
+		wait_for_completion(&smdata.done);
+	}
+	up(&stopmachine_mutex);
+	return p;
+}
+
+int stop_machine_run(int (*fn)(void *), void *data, unsigned int cpu)
+{
+	struct task_struct *p;
+	int ret;
+
+	/* No CPUs can come up or down during this. */
+	lock_cpu_hotplug();
+	p = __stop_machine_run(fn, data, cpu);
+	if (!IS_ERR(p))
+		ret = kthread_stop(p);
+	else
+		ret = PTR_ERR(p);
+	unlock_cpu_hotplug();
+
+	return ret;
+}

_