From: Matthew Dobson <colpatch@us.ibm.com>

Here's yet another version of a patch to implement per-arch SD_*_INITs. 
This follows the same basic idea of my last patch, but 

1) defines an arch-specific SD_NODE_INIT for the 4 NUMA arches (i386,
   x86_64, IA64 & PPC64), 

2) defines *default* SD_CPU_INIT & SD_SIBLING_INIT for *all* arches,
   with the possibility of them being overridden by simply defining an
   arch-specific version in include/asm/topology.h.

The motivation behind the third version of this patch is that Martin feels
that there should be no "default" NUMA initializer because NUMA
characteristics are *very* arch/platform specific, and hence a "default"
NUMA initializer can only lead to confusion.  I agree with most of that,
but don't quite see as much harm in having a default as he does. 
Nevertheless, to keep him quiet, I've run up this version of the patch. 
Martin, please run this through your magic test suite and make sure I
didn't break anything trivial.

Signed-off-by: Andrew Morton <akpm@osdl.org>
---

 25-akpm/arch/ia64/kernel/domain.c     |    1 
 25-akpm/include/asm-i386/topology.h   |   20 +++++++++
 25-akpm/include/asm-ia64/processor.h  |   21 ---------
 25-akpm/include/asm-ia64/topology.h   |   20 +++++++++
 25-akpm/include/asm-ppc64/topology.h  |   20 +++++++++
 25-akpm/include/asm-x86_64/topology.h |   22 ++++++++++
 25-akpm/include/linux/sched.h         |   74 ----------------------------------
 25-akpm/include/linux/topology.h      |   72 +++++++++++++++++++++++++++++++++
 8 files changed, 156 insertions(+), 94 deletions(-)

diff -puN arch/ia64/kernel/domain.c~sched_domains-make-sd_node_init-per-arch-2 arch/ia64/kernel/domain.c
--- 25/arch/ia64/kernel/domain.c~sched_domains-make-sd_node_init-per-arch-2	2004-10-02 18:33:15.005494352 -0700
+++ 25-akpm/arch/ia64/kernel/domain.c	2004-10-02 18:33:15.020492072 -0700
@@ -11,7 +11,6 @@
 #include <linux/cpumask.h>
 #include <linux/init.h>
 #include <linux/topology.h>
-#include <asm/processor.h>
 
 #define SD_NODES_PER_DOMAIN 6
 
diff -puN include/asm-i386/topology.h~sched_domains-make-sd_node_init-per-arch-2 include/asm-i386/topology.h
--- 25/include/asm-i386/topology.h~sched_domains-make-sd_node_init-per-arch-2	2004-10-02 18:33:15.006494200 -0700
+++ 25-akpm/include/asm-i386/topology.h	2004-10-02 18:33:15.020492072 -0700
@@ -72,6 +72,26 @@ static inline cpumask_t pcibus_to_cpumas
 /* Cross-node load balancing interval. */
 #define NODE_BALANCE_RATE 100
 
+/* sched_domains SD_NODE_INIT for NUMAQ machines */
+#define SD_NODE_INIT (struct sched_domain) {		\
+	.span			= CPU_MASK_NONE,	\
+	.parent			= NULL,			\
+	.groups			= NULL,			\
+	.min_interval		= 8,			\
+	.max_interval		= 32,			\
+	.busy_factor		= 32,			\
+	.imbalance_pct		= 125,			\
+	.cache_hot_time		= (10*1000),		\
+	.cache_nice_tries	= 1,			\
+	.per_cpu_gain		= 100,			\
+	.flags			= SD_LOAD_BALANCE	\
+				| SD_BALANCE_EXEC	\
+				| SD_WAKE_BALANCE,	\
+	.last_balance		= jiffies,		\
+	.balance_interval	= 1,			\
+	.nr_balance_failed	= 0,			\
+}
+
 #else /* !CONFIG_NUMA */
 /*
  * Other i386 platforms should define their own version of the 
diff -puN include/asm-ia64/processor.h~sched_domains-make-sd_node_init-per-arch-2 include/asm-ia64/processor.h
--- 25/include/asm-ia64/processor.h~sched_domains-make-sd_node_init-per-arch-2	2004-10-02 18:33:15.008493896 -0700
+++ 25-akpm/include/asm-ia64/processor.h	2004-10-02 18:33:15.021491920 -0700
@@ -337,27 +337,6 @@ struct task_struct;
 /* Prepare to copy thread state - unlazy all lazy status */
 #define prepare_to_copy(tsk)	do { } while (0)
 
-#ifdef CONFIG_NUMA
-#define SD_NODE_INIT (struct sched_domain) {		\
-	.span			= CPU_MASK_NONE,	\
-	.parent			= NULL,			\
-	.groups			= NULL,			\
-	.min_interval		= 80,			\
-	.max_interval		= 320,			\
-	.busy_factor		= 320,			\
-	.imbalance_pct		= 125,			\
-	.cache_hot_time		= (10*1000000),		\
-	.cache_nice_tries	= 1,			\
-	.per_cpu_gain		= 100,			\
-	.flags			= SD_LOAD_BALANCE	\
-				| SD_BALANCE_EXEC	\
-				| SD_WAKE_BALANCE,	\
-	.last_balance		= jiffies,		\
-	.balance_interval	= 10,			\
-	.nr_balance_failed	= 0,			\
-}
-#endif
-
 /*
  * This is the mechanism for creating a new kernel thread.
  *
diff -puN include/asm-ia64/topology.h~sched_domains-make-sd_node_init-per-arch-2 include/asm-ia64/topology.h
--- 25/include/asm-ia64/topology.h~sched_domains-make-sd_node_init-per-arch-2	2004-10-02 18:33:15.009493744 -0700
+++ 25-akpm/include/asm-ia64/topology.h	2004-10-02 18:33:15.021491920 -0700
@@ -45,6 +45,26 @@
 
 void build_cpu_to_node_map(void);
 
+/* sched_domains SD_NODE_INIT for IA64 NUMA machines */
+#define SD_NODE_INIT (struct sched_domain) {		\
+	.span			= CPU_MASK_NONE,	\
+	.parent			= NULL,			\
+	.groups			= NULL,			\
+	.min_interval		= 80,			\
+	.max_interval		= 320,			\
+	.busy_factor		= 320,			\
+	.imbalance_pct		= 125,			\
+	.cache_hot_time		= (10*1000000),		\
+	.cache_nice_tries	= 1,			\
+	.per_cpu_gain		= 100,			\
+	.flags			= SD_LOAD_BALANCE	\
+				| SD_BALANCE_EXEC	\
+				| SD_WAKE_BALANCE,	\
+	.last_balance		= jiffies,		\
+	.balance_interval	= 10,			\
+	.nr_balance_failed	= 0,			\
+}
+
 #endif /* CONFIG_NUMA */
 
 #include <asm-generic/topology.h>
diff -puN include/asm-ppc64/topology.h~sched_domains-make-sd_node_init-per-arch-2 include/asm-ppc64/topology.h
--- 25/include/asm-ppc64/topology.h~sched_domains-make-sd_node_init-per-arch-2	2004-10-02 18:33:15.011493440 -0700
+++ 25-akpm/include/asm-ppc64/topology.h	2004-10-02 18:33:15.022491768 -0700
@@ -40,6 +40,26 @@ static inline int node_to_first_cpu(int 
 /* Cross-node load balancing interval. */
 #define NODE_BALANCE_RATE 10
 
+/* sched_domains SD_NODE_INIT for PPC64 machines */
+#define SD_NODE_INIT (struct sched_domain) {		\
+	.span			= CPU_MASK_NONE,	\
+	.parent			= NULL,			\
+	.groups			= NULL,			\
+	.min_interval		= 8,			\
+	.max_interval		= 32,			\
+	.busy_factor		= 32,			\
+	.imbalance_pct		= 125,			\
+	.cache_hot_time		= (10*1000),		\
+	.cache_nice_tries	= 1,			\
+	.per_cpu_gain		= 100,			\
+	.flags			= SD_LOAD_BALANCE	\
+				| SD_BALANCE_EXEC	\
+				| SD_WAKE_BALANCE,	\
+	.last_balance		= jiffies,		\
+	.balance_interval	= 1,			\
+	.nr_balance_failed	= 0,			\
+}
+
 #else /* !CONFIG_NUMA */
 
 #include <asm-generic/topology.h>
diff -puN include/asm-x86_64/topology.h~sched_domains-make-sd_node_init-per-arch-2 include/asm-x86_64/topology.h
--- 25/include/asm-x86_64/topology.h~sched_domains-make-sd_node_init-per-arch-2	2004-10-02 18:33:15.012493288 -0700
+++ 25-akpm/include/asm-x86_64/topology.h	2004-10-02 18:33:15.022491768 -0700
@@ -34,6 +34,28 @@ static inline cpumask_t __pcibus_to_cpum
 
 #define NODE_BALANCE_RATE 30	/* CHECKME */ 
 
+#ifdef CONFIG_NUMA
+/* sched_domains SD_NODE_INIT for x86_64 machines */
+#define SD_NODE_INIT (struct sched_domain) {		\
+	.span			= CPU_MASK_NONE,	\
+	.parent			= NULL,			\
+	.groups			= NULL,			\
+	.min_interval		= 8,			\
+	.max_interval		= 32,			\
+	.busy_factor		= 32,			\
+	.imbalance_pct		= 125,			\
+	.cache_hot_time		= (10*1000),		\
+	.cache_nice_tries	= 1,			\
+	.per_cpu_gain		= 100,			\
+	.flags			= SD_LOAD_BALANCE	\
+				| SD_BALANCE_EXEC	\
+				| SD_WAKE_BALANCE,	\
+	.last_balance		= jiffies,		\
+	.balance_interval	= 1,			\
+	.nr_balance_failed	= 0,			\
+}
+#endif
+
 #endif
 
 #include <asm-generic/topology.h>
diff -puN include/linux/sched.h~sched_domains-make-sd_node_init-per-arch-2 include/linux/sched.h
--- 25/include/linux/sched.h~sched_domains-make-sd_node_init-per-arch-2	2004-10-02 18:33:15.014492984 -0700
+++ 25-akpm/include/linux/sched.h	2004-10-02 18:33:15.024491464 -0700
@@ -29,6 +29,7 @@
 #include <linux/completion.h>
 #include <linux/pid.h>
 #include <linux/percpu.h>
+#include <linux/topology.h>
 
 struct exec_domain;
 
@@ -482,78 +483,7 @@ extern cpumask_t cpu_isolated_map;
 extern void init_sched_build_groups(struct sched_group groups[],
 	                        cpumask_t span, int (*group_fn)(int cpu));
 extern void cpu_attach_domain(struct sched_domain *sd, int cpu);
-#endif
-
-#ifndef ARCH_HAS_SCHED_TUNE
-#ifdef CONFIG_SCHED_SMT
-#define ARCH_HAS_SCHED_WAKE_IDLE
-/* Common values for SMT siblings */
-#define SD_SIBLING_INIT (struct sched_domain) {		\
-	.span			= CPU_MASK_NONE,	\
-	.parent			= NULL,			\
-	.groups			= NULL,			\
-	.min_interval		= 1,			\
-	.max_interval		= 2,			\
-	.busy_factor		= 8,			\
-	.imbalance_pct		= 110,			\
-	.cache_hot_time		= 0,			\
-	.cache_nice_tries	= 0,			\
-	.per_cpu_gain		= 25,			\
-	.flags			= SD_LOAD_BALANCE	\
-				| SD_BALANCE_NEWIDLE	\
-				| SD_BALANCE_EXEC	\
-				| SD_WAKE_AFFINE	\
-				| SD_WAKE_IDLE		\
-				| SD_SHARE_CPUPOWER,	\
-	.last_balance		= jiffies,		\
-	.balance_interval	= 1,			\
-	.nr_balance_failed	= 0,			\
-}
-#endif
-
-/* Common values for CPUs */
-#define SD_CPU_INIT (struct sched_domain) {		\
-	.span			= CPU_MASK_NONE,	\
-	.parent			= NULL,			\
-	.groups			= NULL,			\
-	.min_interval		= 1,			\
-	.max_interval		= 4,			\
-	.busy_factor		= 64,			\
-	.imbalance_pct		= 125,			\
-	.cache_hot_time		= (5*1000/2),		\
-	.cache_nice_tries	= 1,			\
-	.per_cpu_gain		= 100,			\
-	.flags			= SD_LOAD_BALANCE	\
-				| SD_BALANCE_NEWIDLE	\
-				| SD_BALANCE_EXEC	\
-				| SD_WAKE_AFFINE	\
-				| SD_WAKE_BALANCE,	\
-	.last_balance		= jiffies,		\
-	.balance_interval	= 1,			\
-	.nr_balance_failed	= 0,			\
-}
-
-#if defined(CONFIG_NUMA) && !defined(SD_NODE_INIT)
-#define SD_NODE_INIT (struct sched_domain) {		\
-	.span			= CPU_MASK_NONE,	\
-	.parent			= NULL,			\
-	.groups			= NULL,			\
-	.min_interval		= 8,			\
-	.max_interval		= 32,			\
-	.busy_factor		= 32,			\
-	.imbalance_pct		= 125,			\
-	.cache_hot_time		= (10*1000),		\
-	.cache_nice_tries	= 1,			\
-	.per_cpu_gain		= 100,			\
-	.flags			= SD_LOAD_BALANCE	\
-				| SD_BALANCE_EXEC	\
-				| SD_WAKE_BALANCE,	\
-	.last_balance		= jiffies,		\
-	.balance_interval	= 1,			\
-	.nr_balance_failed	= 0,			\
-}
-#endif
-#endif /* ARCH_HAS_SCHED_TUNE */
+#endif /* ARCH_HAS_SCHED_DOMAIN */
 #endif /* CONFIG_SMP */
 
 
diff -puN include/linux/topology.h~sched_domains-make-sd_node_init-per-arch-2 include/linux/topology.h
--- 25/include/linux/topology.h~sched_domains-make-sd_node_init-per-arch-2	2004-10-02 18:33:15.015492832 -0700
+++ 25-akpm/include/linux/topology.h	2004-10-02 18:33:15.025491312 -0700
@@ -61,4 +61,76 @@ static inline int __next_node_with_cpus(
 #define PENALTY_FOR_NODE_WITH_CPUS	(1)
 #endif
 
+/*
+ * Below are the 3 major initializers used in building sched_domains:
+ * SD_SIBLING_INIT, for SMT domains
+ * SD_CPU_INIT, for SMP domains
+ * SD_NODE_INIT, for NUMA domains
+ *
+ * Any architecture that cares to do any tuning to these values should do so
+ * by defining their own arch-specific initializer in include/asm/topology.h.
+ * A definition there will automagically override these default initializers
+ * and allow arch-specific performance tuning of sched_domains.
+ */
+#ifdef CONFIG_SCHED_SMT
+/* MCD - Do we really need this?  It is always on if CONFIG_SCHED_SMT is,
+ * so can't we drop this in favor of CONFIG_SCHED_SMT?
+ */
+#define ARCH_HAS_SCHED_WAKE_IDLE
+/* Common values for SMT siblings */
+#ifndef SD_SIBLING_INIT
+#define SD_SIBLING_INIT (struct sched_domain) {		\
+	.span			= CPU_MASK_NONE,	\
+	.parent			= NULL,			\
+	.groups			= NULL,			\
+	.min_interval		= 1,			\
+	.max_interval		= 2,			\
+	.busy_factor		= 8,			\
+	.imbalance_pct		= 110,			\
+	.cache_hot_time		= 0,			\
+	.cache_nice_tries	= 0,			\
+	.per_cpu_gain		= 25,			\
+	.flags			= SD_LOAD_BALANCE	\
+				| SD_BALANCE_NEWIDLE	\
+				| SD_BALANCE_EXEC	\
+				| SD_WAKE_AFFINE	\
+				| SD_WAKE_IDLE		\
+				| SD_SHARE_CPUPOWER,	\
+	.last_balance		= jiffies,		\
+	.balance_interval	= 1,			\
+	.nr_balance_failed	= 0,			\
+}
+#endif
+#endif /* CONFIG_SCHED_SMT */
+
+/* Common values for CPUs */
+#ifndef SD_CPU_INIT
+#define SD_CPU_INIT (struct sched_domain) {		\
+	.span			= CPU_MASK_NONE,	\
+	.parent			= NULL,			\
+	.groups			= NULL,			\
+	.min_interval		= 1,			\
+	.max_interval		= 4,			\
+	.busy_factor		= 64,			\
+	.imbalance_pct		= 125,			\
+	.cache_hot_time		= (5*1000/2),		\
+	.cache_nice_tries	= 1,			\
+	.per_cpu_gain		= 100,			\
+	.flags			= SD_LOAD_BALANCE	\
+				| SD_BALANCE_NEWIDLE	\
+				| SD_BALANCE_EXEC	\
+				| SD_WAKE_AFFINE	\
+				| SD_WAKE_BALANCE,	\
+	.last_balance		= jiffies,		\
+	.balance_interval	= 1,			\
+	.nr_balance_failed	= 0,			\
+}
+#endif
+
+#ifdef CONFIG_NUMA
+#ifndef SD_NODE_INIT
+#error Please define an appropriate SD_NODE_INIT in include/asm/topology.h!!!
+#endif
+#endif /* CONFIG_NUMA */
+
 #endif /* _LINUX_TOPOLOGY_H */
_