From: Rusty Russell Someone is triggering the WARN_ON() in futex.c. We know that software suspend could do it, in theory. But noone else should be. This code adds a PF_FUTEX_DEBUG flag, which is set in the futex code when we sleep, and also when we wake up. If a task with PF_FUTEX_DEBUG is woken by a task without PF_FUTEX_DEBUG, we have found our culprit. --- include/linux/sched.h | 1 + kernel/futex.c | 15 +++++++++++++-- kernel/sched.c | 8 ++++++++ kernel/timer.c | 12 +++++++++++- 4 files changed, 33 insertions(+), 3 deletions(-) diff -puN include/linux/sched.h~futex_wait-debug include/linux/sched.h --- 25/include/linux/sched.h~futex_wait-debug 2004-03-08 23:27:30.000000000 -0800 +++ 25-akpm/include/linux/sched.h 2004-03-08 23:27:30.000000000 -0800 @@ -532,6 +532,7 @@ do { if (atomic_dec_and_test(&(tsk)->usa #define PF_SWAPOFF 0x00080000 /* I am in swapoff */ #define PF_LESS_THROTTLE 0x00100000 /* Throttle me less: I clean memory */ #define PF_SYNCWRITE 0x00200000 /* I am doing a sync write */ +#define PF_FUTEX_DEBUG 0x00400000 #ifdef CONFIG_SMP #define SCHED_LOAD_SHIFT 7 /* increase resolution of load calculations */ diff -puN kernel/futex.c~futex_wait-debug kernel/futex.c --- 25/kernel/futex.c~futex_wait-debug 2004-03-08 23:27:30.000000000 -0800 +++ 25-akpm/kernel/futex.c 2004-03-08 23:27:30.000000000 -0800 @@ -269,7 +269,11 @@ static void wake_futex(struct futex_q *q * The lock in wake_up_all() is a crucial memory barrier after the * list_del_init() and also before assigning to q->lock_ptr. */ + + current->flags |= PF_FUTEX_DEBUG; wake_up_all(&q->waiters); + current->flags &= ~PF_FUTEX_DEBUG; + /* * The waiting task can free the futex_q as soon as this is written, * without taking any locks. This must come last. @@ -490,8 +494,11 @@ static int futex_wait(unsigned long uadd * !list_empty() is safe here without any lock. * q.lock_ptr != 0 is not safe, because of ordering against wakeup. */ - if (likely(!list_empty(&q.list))) + if (likely(!list_empty(&q.list))) { + current->flags |= PF_FUTEX_DEBUG; time = schedule_timeout(time); + current->flags &= ~PF_FUTEX_DEBUG; + } __set_current_state(TASK_RUNNING); /* @@ -505,7 +512,11 @@ static int futex_wait(unsigned long uadd if (time == 0) return -ETIMEDOUT; /* A spurious wakeup should never happen. */ - WARN_ON(!signal_pending(current)); + if (!signal_pending(current)) { + printk("futex_wait woken: %lu %i %lu\n", + uaddr, val, time); + WARN_ON(1); + } return -EINTR; out_unqueue: diff -puN kernel/sched.c~futex_wait-debug kernel/sched.c --- 25/kernel/sched.c~futex_wait-debug 2004-03-08 23:27:30.000000000 -0800 +++ 25-akpm/kernel/sched.c 2004-03-08 23:27:30.000000000 -0800 @@ -761,6 +761,14 @@ static int try_to_wake_up(task_t * p, un } goto out_activate; + if ((p->flags & PF_FUTEX_DEBUG) + && !(current->flags & PF_FUTEX_DEBUG)) { + printk("%s %i waking %s: %i %i\n", + current->comm, (int)in_interrupt(), + p->comm, p->tgid, p->pid); + WARN_ON(1); + } + repeat_lock_task: task_rq_unlock(rq, &flags); rq = task_rq_lock(p, &flags); diff -puN kernel/timer.c~futex_wait-debug kernel/timer.c --- 25/kernel/timer.c~futex_wait-debug 2004-03-08 23:27:30.000000000 -0800 +++ 25-akpm/kernel/timer.c 2004-03-08 23:27:30.000000000 -0800 @@ -971,6 +971,13 @@ static void process_timeout(unsigned lon wake_up_process((task_t *)__data); } +static void futex_timeout(unsigned long __data) +{ + current->flags |= PF_FUTEX_DEBUG; + wake_up_process((task_t *)__data); + current->flags &= ~PF_FUTEX_DEBUG; +} + /** * schedule_timeout - sleep until timeout * @timeout: timeout value in jiffies @@ -1037,7 +1044,10 @@ fastcall signed long schedule_timeout(si init_timer(&timer); timer.expires = expire; timer.data = (unsigned long) current; - timer.function = process_timeout; + if (current->flags & PF_FUTEX_DEBUG) + timer.function = futex_timeout; + else + timer.function = process_timeout; add_timer(&timer); schedule(); _