From: Nick Piggin <nickpiggin@yahoo.com.au>




---

 25-akpm/include/linux/sched.h |    9 ++++++---
 25-akpm/kernel/sched.c        |   35 +++++++++++++++++++----------------
 2 files changed, 25 insertions(+), 19 deletions(-)

diff -puN include/linux/sched.h~sched-damp-passive-balance include/linux/sched.h
--- 25/include/linux/sched.h~sched-damp-passive-balance	2004-04-03 15:20:55.603952496 -0800
+++ 25-akpm/include/linux/sched.h	2004-04-03 15:20:55.609951584 -0800
@@ -543,7 +543,8 @@ do { if (atomic_dec_and_test(&(tsk)->usa
 #define SD_BALANCE_EXEC		2	/* Balance on exec */
 #define SD_WAKE_IDLE		4	/* Wake to idle CPU on task wakeup */
 #define SD_WAKE_AFFINE		8	/* Wake task to waking CPU */
-#define SD_SHARE_CPUPOWER	16	/* Domain members share cpu power */
+#define SD_WAKE_BALANCE		16	/* Perform balancing at task wakeup */
+#define SD_SHARE_CPUPOWER	32	/* Domain members share cpu power */
 
 struct sched_group {
 	struct sched_group *next;	/* Must be a circular list */
@@ -612,7 +613,8 @@ struct sched_domain {
 	.cache_nice_tries	= 1,			\
 	.per_cpu_gain		= 100,			\
 	.flags			= SD_BALANCE_NEWIDLE	\
-				| SD_WAKE_AFFINE,	\
+				| SD_WAKE_AFFINE	\
+				| SD_WAKE_BALANCE,	\
 	.last_balance		= jiffies,		\
 	.balance_interval	= 1,			\
 	.nr_balance_failed	= 0,			\
@@ -631,7 +633,8 @@ struct sched_domain {
 	.cache_hot_time		= (10*1000000),		\
 	.cache_nice_tries	= 1,			\
 	.per_cpu_gain		= 100,			\
-	.flags			= SD_BALANCE_EXEC,	\
+	.flags			= SD_BALANCE_EXEC	\
+				| SD_WAKE_BALANCE,	\
 	.last_balance		= jiffies,		\
 	.balance_interval	= 1,			\
 	.nr_balance_failed	= 0,			\
diff -puN kernel/sched.c~sched-damp-passive-balance kernel/sched.c
--- 25/kernel/sched.c~sched-damp-passive-balance	2004-04-03 15:20:55.605952192 -0800
+++ 25-akpm/kernel/sched.c	2004-04-03 15:20:55.612951128 -0800
@@ -760,25 +760,28 @@ static int try_to_wake_up(task_t * p, un
 	new_cpu = this_cpu; /* Wake to this CPU if we can */
 
 	/*
-	 * Passive load balancing. If the queues are very out of balance
-	 * we might as well balance here rather than the periodic load
-	 * balancing.
-	 */
-	if (load > this_load + SCHED_LOAD_SCALE*2)
-		goto out_set_cpu;
-
-	/*
-	 * Migrate the task to the waking domain.
-	 * Do not violate hard affinity.
+	 * Scan domains for affine wakeup and passive balancing
+	 * possibilities.
 	 */
 	for_each_domain(this_cpu, sd) {
-		if (!(sd->flags & SD_WAKE_AFFINE))
-			break;
-		if (task_hot(p, rq->timestamp_last_tick, sd))
-			break;
+		unsigned int imbalance;
+		/*
+		 * Start passive balancing when half the imbalance_pct
+		 * limit is reached.
+		 */
+		imbalance = sd->imbalance_pct + (sd->imbalance_pct - 100) / 2;
 
-		if (cpu_isset(cpu, sd->span))
-			goto out_set_cpu;
+		if ( ((sd->flags & SD_WAKE_AFFINE) &&
+				!task_hot(p, rq->timestamp_last_tick, sd))
+			|| ((sd->flags & SD_WAKE_BALANCE) &&
+				imbalance*this_load <= 100*load) ) {
+			/*
+			 * Now sd has SD_WAKE_AFFINE and p is cache cold in sd
+			 * or sd has SD_WAKE_BALANCE and there is an imbalance
+			 */
+			if (cpu_isset(cpu, sd->span))
+				goto out_set_cpu;
+		}
 	}
 
 	new_cpu = cpu; /* Could not wake to this_cpu. Wake to cpu instead */

_