diff -purN -X /home/mbligh/.diff.exclude 390-slabtune/kernel/sched.c 400-less_bouncy/kernel/sched.c
--- 390-slabtune/kernel/sched.c	2003-12-11 17:27:31.000000000 -0800
+++ 400-less_bouncy/kernel/sched.c	2003-12-11 17:29:33.000000000 -0800
@@ -1474,11 +1474,16 @@ static inline void pull_task(runqueue_t 
  */
 
 static inline int
-can_migrate_task(task_t *tsk, runqueue_t *rq, int this_cpu, int idle)
+can_migrate_task(task_t *tsk, runqueue_t *rq, int this_cpu, int idle, int crossnode)
 {
 	unsigned long delta = sched_clock() - tsk->timestamp;
+	int task_is_warm = (delta <= JIFFIES_TO_NS(cache_decay_ticks)) ? 1 : 0;
 
-	if (!idle && (delta <= JIFFIES_TO_NS(cache_decay_ticks)))
+	/* only idle processors may steal warm tasks ... */
+	if (!idle && task_is_warm)
+		return 0;
+	/* ... but no stealing warm tasks cross node on NUMA systems */
+	if (crossnode && task_is_warm)
 		return 0;
 	if (task_running(rq, tsk))
 		return 0;
@@ -1495,7 +1500,7 @@ can_migrate_task(task_t *tsk, runqueue_t
  * We call this with the current runqueue locked,
  * irqs disabled.
  */
-static void load_balance(runqueue_t *this_rq, int idle, cpumask_t cpumask)
+static void load_balance(runqueue_t *this_rq, int idle, int crossnode, cpumask_t cpumask)
 {
 	int imbalance, idx, this_cpu = smp_processor_id();
 	runqueue_t *busiest;
@@ -1559,7 +1564,7 @@ skip_queue:
 
 	curr = curr->prev;
 
-	if (!can_migrate_task(tmp, busiest, this_cpu, idle)) {
+	if (!can_migrate_task(tmp, busiest, this_cpu, idle, crossnode)) {
 		if (curr != head)
 			goto skip_queue;
 		idx++;
@@ -1605,7 +1610,7 @@ static void balance_node(runqueue_t *thi
 		SCHEDSTAT_INC(this_cpu, lb_bnode);
 		cpu_set(this_cpu, cpumask);
 		spin_lock(&this_rq->lock);
-		load_balance(this_rq, idle, cpumask);
+		load_balance(this_rq, idle, 1, cpumask);
 		spin_unlock(&this_rq->lock);
 	}
 }
@@ -1634,7 +1639,7 @@ static void rebalance_tick(runqueue_t *t
 		if (!(j % IDLE_REBALANCE_TICK)) {
 			spin_lock(&this_rq->lock);
 			SCHEDSTAT_INC(this_cpu, lb_idle);
-			load_balance(this_rq, idle, cpu_to_node_mask(this_cpu));
+			load_balance(this_rq, idle, 0, cpu_to_node_mask(this_cpu));
 			spin_unlock(&this_rq->lock);
 		}
 		return;
@@ -1646,7 +1651,7 @@ static void rebalance_tick(runqueue_t *t
 	if (!(j % BUSY_REBALANCE_TICK)) {
 		spin_lock(&this_rq->lock);
 		SCHEDSTAT_INC(this_cpu, lb_busy);
-		load_balance(this_rq, idle, cpu_to_node_mask(this_cpu));
+		load_balance(this_rq, idle, 0, cpu_to_node_mask(this_cpu));
 		spin_unlock(&this_rq->lock);
 	}
 }
@@ -1867,7 +1872,7 @@ pick_next_task:
 	if (unlikely(!rq->nr_running)) {
 #ifdef CONFIG_SMP
 		SCHEDSTAT_INC(this_cpu, lb_resched);
-		load_balance(rq, 1, cpu_to_node_mask(smp_processor_id()));
+		load_balance(rq, 1, 0, cpu_to_node_mask(smp_processor_id()));
 		if (rq->nr_running)
 			goto pick_next_task;
 #endif