From: Li Shaohua <shaohua.li@intel.com>

Boot a CPU at runtime.

Signed-off-by: Li Shaohua<shaohua.li@intel.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
---

 arch/i386/kernel/smpboot.c |  112 +++++++++++++++++++++++++++++++++------------
 drivers/base/cpu.c         |    8 ++-
 include/asm-i386/smp.h     |    3 +
 3 files changed, 94 insertions(+), 29 deletions(-)

diff -puN arch/i386/kernel/smpboot.c~physical-cpu-hot-add arch/i386/kernel/smpboot.c
--- 25/arch/i386/kernel/smpboot.c~physical-cpu-hot-add	2005-06-18 02:48:48.000000000 -0700
+++ 25-akpm/arch/i386/kernel/smpboot.c	2005-06-18 02:48:48.000000000 -0700
@@ -91,6 +91,12 @@ cpumask_t cpu_callout_map;
 EXPORT_SYMBOL(cpu_callout_map);
 static cpumask_t smp_commenced_mask;
 
+/* TSC's upper 32 bits can't be written in eariler CPU (before prescott), there
+ * is no way to resync one AP against BP. TBD: for prescott and above, we
+ * should use IA64's algorithm
+ */
+static int __devinitdata tsc_sync_disabled;
+
 /* Per CPU bogomips and other parameters */
 struct cpuinfo_x86 cpu_data[NR_CPUS] __cacheline_aligned;
 EXPORT_SYMBOL(cpu_data);
@@ -428,7 +434,7 @@ static void __devinit smp_callin(void)
 	/*
 	 *      Synchronize the TSC with the BP
 	 */
-	if (cpu_has_tsc && cpu_khz)
+	if (cpu_has_tsc && cpu_khz && !tsc_sync_disabled)
 		synchronize_tsc_ap();
 }
 
@@ -830,6 +836,31 @@ static inline int alloc_cpu_id(void)
 	return cpu;
 }
 
+#ifdef CONFIG_HOTPLUG_CPU
+static struct task_struct * __devinitdata cpu_idle_tasks[NR_CPUS];
+static inline struct task_struct * alloc_idle_task(int cpu)
+{
+	struct task_struct *idle;
+
+	if ((idle = cpu_idle_tasks[cpu]) != NULL) {
+		/* initialize thread_struct.  we really want to avoid destroy
+		 * idle tread
+		 */
+		idle->thread.esp = (unsigned long)(((struct pt_regs *)
+			(THREAD_SIZE + (unsigned long) idle->thread_info)) - 1);
+		init_idle(idle, cpu);
+		return idle;
+	}
+	idle = fork_idle(cpu);
+
+	if (!IS_ERR(idle))
+		cpu_idle_tasks[cpu] = idle;
+	return idle;
+}
+#else
+#define alloc_idle_task(cpu) fork_idle(cpu)
+#endif
+
 static int __devinit do_boot_cpu(int apicid, int cpu)
 /*
  * NOTE - on most systems this is a PHYSICAL apic ID, but on multiquad
@@ -849,7 +880,7 @@ static int __devinit do_boot_cpu(int api
 	 * We can't use kernel_thread since we must avoid to
 	 * reschedule the child.
 	 */
-	idle = fork_idle(cpu);
+	idle = alloc_idle_task(cpu);
 	if (IS_ERR(idle))
 		panic("failed fork for CPU %d", cpu);
 	idle->thread.eip = (unsigned long) start_secondary;
@@ -952,6 +983,55 @@ void cpu_exit_clear(void)
 	cpu_clear(cpu, smp_commenced_mask);
 	unmap_cpu_to_logical_apicid(cpu);
 }
+
+struct warm_boot_cpu_info {
+	struct completion *complete;
+	int apicid;
+	int cpu;
+};
+
+static void __devinit do_warm_boot_cpu(void *p)
+{
+	struct warm_boot_cpu_info *info = p;
+	do_boot_cpu(info->apicid, info->cpu);
+	complete(info->complete);
+}
+
+int __devinit smp_prepare_cpu(int cpu)
+{
+	DECLARE_COMPLETION(done);
+	struct warm_boot_cpu_info info;
+	struct work_struct task;
+	int	apicid, ret;
+
+	lock_cpu_hotplug();
+	apicid = x86_cpu_to_apicid[cpu];
+	if (apicid == BAD_APICID) {
+		ret = -ENODEV;
+		goto exit;
+	}
+
+	info.complete = &done;
+	info.apicid = apicid;
+	info.cpu = cpu;
+	INIT_WORK(&task, do_warm_boot_cpu, &info);
+
+	tsc_sync_disabled = 1;
+
+	/* init low mem mapping */
+	memcpy(swapper_pg_dir, swapper_pg_dir + USER_PGD_PTRS,
+			sizeof(swapper_pg_dir[0]) * KERNEL_PGD_PTRS);
+	flush_tlb_all();
+	schedule_work(&task);
+	wait_for_completion(&done);
+
+	tsc_sync_disabled = 0;
+	zap_low_mappings();
+	ret = 0;
+exit:
+	unlock_cpu_hotplug();
+	return ret;
+}
 #endif
 
 static void smp_tune_scheduling (void)
@@ -1195,24 +1275,6 @@ void __devinit smp_prepare_boot_cpu(void
 }
 
 #ifdef CONFIG_HOTPLUG_CPU
-
-/* must be called with the cpucontrol mutex held */
-static int __devinit cpu_enable(unsigned int cpu)
-{
-	/* get the target out of its holding state */
-	per_cpu(cpu_state, cpu) = CPU_UP_PREPARE;
-	wmb();
-
-	/* wait for the processor to ack it. timeout? */
-	while (!cpu_online(cpu))
-		cpu_relax();
-
-	fixup_irqs(cpu_online_map);
-	/* counter the disable in fixup_irqs() */
-	local_irq_enable();
-	return 0;
-}
-
 static void
 remove_siblinginfo(int cpu)
 {
@@ -1298,14 +1360,6 @@ int __devinit __cpu_up(unsigned int cpu)
 		return -EIO;
 	}
 
-#ifdef CONFIG_HOTPLUG_CPU
-	/* Already up, and in cpu_quiescent now? */
-	if (cpu_isset(cpu, smp_commenced_mask)) {
-		cpu_enable(cpu);
-		return 0;
-	}
-#endif
-
 	local_irq_enable();
 	per_cpu(cpu_state, cpu) = CPU_UP_PREPARE;
 	/* Unleash the CPU! */
@@ -1321,10 +1375,12 @@ void __init smp_cpus_done(unsigned int m
 	setup_ioapic_dest();
 #endif
 	zap_low_mappings();
+#ifndef CONFIG_HOTPLUG_CPU
 	/*
 	 * Disable executability of the SMP trampoline:
 	 */
 	set_kernel_exec((unsigned long)trampoline_base, trampoline_exec);
+#endif
 }
 
 void __init smp_intr_init(void)
diff -puN drivers/base/cpu.c~physical-cpu-hot-add drivers/base/cpu.c
--- 25/drivers/base/cpu.c~physical-cpu-hot-add	2005-06-18 02:48:48.000000000 -0700
+++ 25-akpm/drivers/base/cpu.c	2005-06-18 02:48:48.000000000 -0700
@@ -16,6 +16,10 @@ struct sysdev_class cpu_sysdev_class = {
 EXPORT_SYMBOL(cpu_sysdev_class);
 
 #ifdef CONFIG_HOTPLUG_CPU
+#ifndef __HAVE_ARCH_SMP_PREPARE_CPU
+#define smp_prepare_cpu(cpu) (0)
+#endif
+
 static ssize_t show_online(struct sys_device *dev, char *buf)
 {
 	struct cpu *cpu = container_of(dev, struct cpu, sysdev);
@@ -36,7 +40,9 @@ static ssize_t store_online(struct sys_d
 			kobject_hotplug(&dev->kobj, KOBJ_OFFLINE);
 		break;
 	case '1':
-		ret = cpu_up(cpu->sysdev.id);
+		ret = smp_prepare_cpu(cpu->sysdev.id);
+		if (ret == 0)
+			ret = cpu_up(cpu->sysdev.id);
 		break;
 	default:
 		ret = -EINVAL;
diff -puN include/asm-i386/smp.h~physical-cpu-hot-add include/asm-i386/smp.h
--- 25/include/asm-i386/smp.h~physical-cpu-hot-add	2005-06-18 02:48:48.000000000 -0700
+++ 25-akpm/include/asm-i386/smp.h	2005-06-18 02:48:48.000000000 -0700
@@ -51,6 +51,9 @@ extern u8 x86_cpu_to_apicid[];
 #ifdef CONFIG_HOTPLUG_CPU
 extern void cpu_exit_clear(void);
 extern void cpu_uninit(void);
+
+#define __HAVE_ARCH_SMP_PREPARE_CPU
+extern int smp_prepare_cpu(int cpu);
 #endif
 
 /*
_