From: Nick Piggin <nickpiggin@yahoo.com.au>

Make affine wakes and "passive load balancing" more conservative.  Aggressive
affine wakeups were causing huge regressions in dbt3-pgsql on 8-way non NUMA
systems at OSDL's STP.


---

 25-akpm/kernel/sched.c |   24 +++++++++++++++++-------
 1 files changed, 17 insertions(+), 7 deletions(-)

diff -puN kernel/sched.c~sched-wakebalance-fixes kernel/sched.c
--- 25/kernel/sched.c~sched-wakebalance-fixes	2004-03-28 23:16:21.699105800 -0800
+++ 25-akpm/kernel/sched.c	2004-03-28 23:16:21.703105192 -0800
@@ -706,15 +706,26 @@ static int try_to_wake_up(task_t * p, un
 	if (unlikely(task_running(rq, p) || cpu_is_offline(this_cpu)))
 		goto out_activate;
 
-	new_cpu = this_cpu; /* Wake to this CPU if we can */
+	new_cpu = cpu;
 
 	if (cpu == this_cpu || unlikely(!cpu_isset(this_cpu, p->cpus_allowed)))
 		goto out_set_cpu;
 
-	/* Passive load balancing */
 	load = get_low_cpu_load(cpu);
-	this_load = get_high_cpu_load(this_cpu) + SCHED_LOAD_SCALE;
-	if (load > this_load)
+	this_load = get_high_cpu_load(this_cpu);
+
+	/* Don't pull the task off an idle CPU to a busy one */
+	if (load < SCHED_LOAD_SCALE/2 && this_load > SCHED_LOAD_SCALE/2)
+		goto out_set_cpu;
+
+	new_cpu = this_cpu; /* Wake to this CPU if we can */
+
+	/*
+	 * Passive load balancing. If the queues are very out of balance
+	 * we might as well balance here rather than the periodic load
+	 * balancing.
+	 */
+	if (load > this_load + SCHED_LOAD_SCALE*2)
 		goto out_set_cpu;
 
 	now = sched_clock();
@@ -726,7 +737,7 @@ static int try_to_wake_up(task_t * p, un
 	for_each_domain(this_cpu, sd) {
 		if (!(sd->flags & SD_WAKE_AFFINE))
 			break;
-		if (now - p->timestamp < sd->cache_hot_time)
+		if (rq->timestamp_last_tick - p->timestamp < sd->cache_hot_time)
 			break;
 
 		if (cpu_isset(cpu, sd->span))
@@ -1264,8 +1275,7 @@ int can_migrate_task(task_t *p, runqueue
 	/* Aggressive migration if we've failed balancing */
 	if (idle == NEWLY_IDLE ||
 			sd->nr_balance_failed < sd->cache_nice_tries) {
-		if ((rq->timestamp_last_tick - p->timestamp)
-						< sd->cache_hot_time)
+		if (rq->timestamp_last_tick - p->timestamp < sd->cache_hot_time)
 			return 0;
 	}
 

_