From: Rusty Russell <rusty@rustcorp.com.au>

Someone is triggering the WARN_ON() in futex.c.  We know that software
suspend could do it, in theory.  But noone else should be.

This code adds a PF_FUTEX_DEBUG flag, which is set in the futex code
when we sleep, and also when we wake up.  If a task with
PF_FUTEX_DEBUG is woken by a task without PF_FUTEX_DEBUG, we have
found our culprit.



---

 25-akpm/include/linux/sched.h |    1 +
 25-akpm/kernel/futex.c        |   15 +++++++++++++--
 25-akpm/kernel/sched.c        |    8 ++++++++
 25-akpm/kernel/timer.c        |   12 +++++++++++-
 4 files changed, 33 insertions(+), 3 deletions(-)

diff -puN include/linux/sched.h~futex_wait-debug include/linux/sched.h
--- 25/include/linux/sched.h~futex_wait-debug	Thu Feb 19 15:16:48 2004
+++ 25-akpm/include/linux/sched.h	Thu Feb 19 15:16:48 2004
@@ -532,6 +532,7 @@ do { if (atomic_dec_and_test(&(tsk)->usa
 #define PF_SWAPOFF	0x00080000	/* I am in swapoff */
 #define PF_LESS_THROTTLE 0x00100000	/* Throttle me less: I clean memory */
 #define PF_SYNCWRITE	0x00200000	/* I am doing a sync write */
+#define PF_FUTEX_DEBUG	0x00400000
 
 #ifdef CONFIG_SMP
 #define SCHED_LOAD_SHIFT 7	/* increase resolution of load calculations */
diff -puN kernel/futex.c~futex_wait-debug kernel/futex.c
--- 25/kernel/futex.c~futex_wait-debug	Thu Feb 19 15:16:48 2004
+++ 25-akpm/kernel/futex.c	Thu Feb 19 15:16:48 2004
@@ -269,7 +269,11 @@ static void wake_futex(struct futex_q *q
 	 * The lock in wake_up_all() is a crucial memory barrier after the
 	 * list_del_init() and also before assigning to q->lock_ptr.
 	 */
+
+	current->flags |= PF_FUTEX_DEBUG;
 	wake_up_all(&q->waiters);
+	current->flags &= ~PF_FUTEX_DEBUG;
+
 	/*
 	 * The waiting task can free the futex_q as soon as this is written,
 	 * without taking any locks.  This must come last.
@@ -490,8 +494,11 @@ static int futex_wait(unsigned long uadd
 	 * !list_empty() is safe here without any lock.
 	 * q.lock_ptr != 0 is not safe, because of ordering against wakeup.
 	 */
-	if (likely(!list_empty(&q.list)))
+	if (likely(!list_empty(&q.list))) {
+		current->flags |= PF_FUTEX_DEBUG;
 		time = schedule_timeout(time);
+		current->flags &= ~PF_FUTEX_DEBUG;
+	}
 	__set_current_state(TASK_RUNNING);
 
 	/*
@@ -505,7 +512,11 @@ static int futex_wait(unsigned long uadd
 	if (time == 0)
 		return -ETIMEDOUT;
 	/* A spurious wakeup should never happen. */
-	WARN_ON(!signal_pending(current));
+	if (!signal_pending(current)) {
+		printk("futex_wait woken: %lu %i %lu\n",
+		       uaddr, val, time);
+		WARN_ON(1);
+	}
 	return -EINTR;
 
  out_unqueue:
diff -puN kernel/sched.c~futex_wait-debug kernel/sched.c
--- 25/kernel/sched.c~futex_wait-debug	Thu Feb 19 15:16:48 2004
+++ 25-akpm/kernel/sched.c	Thu Feb 19 15:16:48 2004
@@ -762,6 +762,14 @@ static int try_to_wake_up(task_t * p, un
 	}
 	goto out_activate;
 
+	if ((p->flags & PF_FUTEX_DEBUG)
+	    && !(current->flags & PF_FUTEX_DEBUG)) {
+		printk("%s %i waking %s: %i %i\n",
+		       current->comm, (int)in_interrupt(),
+		       p->comm, p->tgid, p->pid);
+		WARN_ON(1);
+	}
+
 repeat_lock_task:
 	task_rq_unlock(rq, &flags);
 	rq = task_rq_lock(p, &flags);
diff -puN kernel/timer.c~futex_wait-debug kernel/timer.c
--- 25/kernel/timer.c~futex_wait-debug	Thu Feb 19 15:16:48 2004
+++ 25-akpm/kernel/timer.c	Thu Feb 19 15:16:48 2004
@@ -971,6 +971,13 @@ static void process_timeout(unsigned lon
 	wake_up_process((task_t *)__data);
 }
 
+static void futex_timeout(unsigned long __data)
+{
+	current->flags |= PF_FUTEX_DEBUG;
+	wake_up_process((task_t *)__data);
+	current->flags &= ~PF_FUTEX_DEBUG;
+}
+
 /**
  * schedule_timeout - sleep until timeout
  * @timeout: timeout value in jiffies
@@ -1037,7 +1044,10 @@ signed long schedule_timeout(signed long
 	init_timer(&timer);
 	timer.expires = expire;
 	timer.data = (unsigned long) current;
-	timer.function = process_timeout;
+	if (current->flags & PF_FUTEX_DEBUG)
+		timer.function = futex_timeout;
+	else
+		timer.function = process_timeout;
 
 	add_timer(&timer);
 	schedule();

_