Move rwsem's up_read wakeups out of the semaphore's wait_lock


 linux-2.6-npiggin/lib/rwsem-spinlock.c |   27 ++++++++++-------
 linux-2.6-npiggin/lib/rwsem.c          |   51 +++++++++++++++------------------
 2 files changed, 40 insertions(+), 38 deletions(-)

diff -puN lib/rwsem.c~rwsem-scale lib/rwsem.c
--- linux-2.6/lib/rwsem.c~rwsem-scale	2004-04-12 00:28:52.000000000 +1000
+++ linux-2.6-npiggin/lib/rwsem.c	2004-04-12 00:36:58.000000000 +1000
@@ -36,13 +36,15 @@ void rwsemtrace(struct rw_semaphore *sem
  * - the spinlock must be held by the caller
  * - woken process blocks are discarded from the list after having flags zeroised
  * - writers are only woken if wakewrite is non-zero
+ *
+ * The spinlock will be dropped by this function
  */
 static inline struct rw_semaphore *__rwsem_do_wake(struct rw_semaphore *sem, int wakewrite)
 {
+	LIST_HEAD(wake_list);
 	struct rwsem_waiter *waiter;
-	struct list_head *next;
 	signed long oldcount;
-	int woken, loop;
+	int woken;
 
 	rwsemtrace(sem,"Entering __rwsem_do_wake");
 
@@ -64,9 +66,8 @@ static inline struct rw_semaphore *__rws
 	if (!(waiter->flags & RWSEM_WAITING_FOR_WRITE))
 		goto readers_only;
 
-	list_del(&waiter->list);
+	list_move_tail(&waiter->list, &wake_list);
 	waiter->flags = 0;
-	wake_up_process(waiter->task);
 	goto out;
 
 	/* don't want to wake any writers */
@@ -75,39 +76,36 @@ static inline struct rw_semaphore *__rws
 	if (waiter->flags & RWSEM_WAITING_FOR_WRITE)
 		goto out;
 
-	/* grant an infinite number of read locks to the readers at the front of the queue
-	 * - note we increment the 'active part' of the count by the number of readers (less one
-	 *   for the activity decrement we've already done) before waking any processes up
+	/* grant an infinite number of read locks to the readers at the front
+	 * of the queue - note we increment the 'active part' of the count by
+	 * the number of readers (less one for the activity decrement we've
+	 * already done) before waking any processes up
 	 */
  readers_only:
 	woken = 0;
 	do {
+		list_move_tail(&waiter->list, &wake_list);
+		waiter->flags = 0;
 		woken++;
 
 		if (waiter->list.next==&sem->wait_list)
 			break;
 
-		waiter = list_entry(waiter->list.next,struct rwsem_waiter,list);
+		waiter = list_entry(sem->wait_list.next,struct rwsem_waiter,list);
 
 	} while (waiter->flags & RWSEM_WAITING_FOR_READ);
 
-	loop = woken;
 	woken *= RWSEM_ACTIVE_BIAS-RWSEM_WAITING_BIAS;
 	woken -= RWSEM_ACTIVE_BIAS;
 	rwsem_atomic_add(woken,sem);
 
-	next = sem->wait_list.next;
-	for (; loop>0; loop--) {
-		waiter = list_entry(next,struct rwsem_waiter,list);
-		next = waiter->list.next;
-		waiter->flags = 0;
+ out:
+	spin_unlock(&sem->wait_lock);
+	while (!list_empty(&wake_list)) {
+		waiter = list_entry(wake_list.next,struct rwsem_waiter,list);
+		list_del(&waiter->list);
 		wake_up_process(waiter->task);
 	}
-
-	sem->wait_list.next = next;
-	next->prev = &sem->wait_list;
-
- out:
 	rwsemtrace(sem,"Leaving __rwsem_do_wake");
 	return sem;
 
@@ -131,9 +129,8 @@ static inline struct rw_semaphore *rwsem
 	set_task_state(tsk,TASK_UNINTERRUPTIBLE);
 
 	/* set up my own style of waitqueue */
-	spin_lock(&sem->wait_lock);
 	waiter->task = tsk;
-
+	spin_lock(&sem->wait_lock);
 	list_add_tail(&waiter->list,&sem->wait_list);
 
 	/* note that we're now waiting on the lock, but no longer actively read-locking */
@@ -144,8 +141,8 @@ static inline struct rw_semaphore *rwsem
 	 */
 	if (!(count & RWSEM_ACTIVE_MASK))
 		sem = __rwsem_do_wake(sem,1);
-
-	spin_unlock(&sem->wait_lock);
+	else
+		spin_unlock(&sem->wait_lock);
 
 	/* wait to be given the lock */
 	for (;;) {
@@ -205,8 +202,8 @@ struct rw_semaphore fastcall *rwsem_wake
 	/* do nothing if list empty */
 	if (!list_empty(&sem->wait_list))
 		sem = __rwsem_do_wake(sem,1);
-
-	spin_unlock(&sem->wait_lock);
+	else
+		spin_unlock(&sem->wait_lock);
 
 	rwsemtrace(sem,"Leaving rwsem_wake");
 
@@ -227,8 +224,8 @@ struct rw_semaphore fastcall *rwsem_down
 	/* do nothing if list empty */
 	if (!list_empty(&sem->wait_list))
 		sem = __rwsem_do_wake(sem,0);
-
-	spin_unlock(&sem->wait_lock);
+	else
+		spin_unlock(&sem->wait_lock);
 
 	rwsemtrace(sem,"Leaving rwsem_downgrade_wake");
 	return sem;
diff -puN lib/rwsem-spinlock.c~rwsem-scale lib/rwsem-spinlock.c
--- linux-2.6/lib/rwsem-spinlock.c~rwsem-scale	2004-04-12 00:28:52.000000000 +1000
+++ linux-2.6-npiggin/lib/rwsem-spinlock.c	2004-04-12 11:09:54.000000000 +1000
@@ -47,9 +47,12 @@ void fastcall init_rwsem(struct rw_semap
  * - the spinlock must be held by the caller
  * - woken process blocks are discarded from the list after having flags zeroised
  * - writers are only woken if wakewrite is non-zero
+ *
+ *   The spinlock will be dropped by this function
  */
 static inline struct rw_semaphore *__rwsem_do_wake(struct rw_semaphore *sem, int wakewrite)
 {
+	LIST_HEAD(wake_list);
 	struct rwsem_waiter *waiter;
 	int woken;
 
@@ -69,9 +72,8 @@ static inline struct rw_semaphore *__rws
 	 */
 	if (waiter->flags & RWSEM_WAITING_FOR_WRITE) {
 		sem->activity = -1;
-		list_del(&waiter->list);
+		list_move_tail(&waiter->list, &wake_list);
 		waiter->flags = 0;
-		wake_up_process(waiter->task);
 		goto out;
 	}
 
@@ -79,20 +81,23 @@ static inline struct rw_semaphore *__rws
  dont_wake_writers:
 	woken = 0;
 	while (waiter->flags&RWSEM_WAITING_FOR_READ) {
-		struct list_head *next = waiter->list.next;
-
-		list_del(&waiter->list);
+		list_move_tail(&waiter->list, &wake_list);
 		waiter->flags = 0;
-		wake_up_process(waiter->task);
 		woken++;
 		if (list_empty(&sem->wait_list))
 			break;
-		waiter = list_entry(next,struct rwsem_waiter,list);
+		waiter = list_entry(sem->wait_list.next,struct rwsem_waiter,list);
 	}
 
 	sem->activity += woken;
 
  out:
+	spin_unlock(&sem->wait_lock);
+	while (!list_empty(&wake_list)) {
+		waiter = list_entry(wake_list.next, struct rwsem_waiter, list);
+		list_del(&waiter->list);
+		wake_up_process(waiter->task);
+	}
 	rwsemtrace(sem,"Leaving __rwsem_do_wake");
 	return sem;
 }
@@ -278,8 +283,8 @@ void fastcall __up_write(struct rw_semap
 	sem->activity = 0;
 	if (!list_empty(&sem->wait_list))
 		sem = __rwsem_do_wake(sem, 1);
-
-	spin_unlock(&sem->wait_lock);
+	else
+		spin_unlock(&sem->wait_lock);
 
 	rwsemtrace(sem,"Leaving __up_write");
 }
@@ -297,8 +302,8 @@ void fastcall __downgrade_write(struct r
 	sem->activity = 1;
 	if (!list_empty(&sem->wait_list))
 		sem = __rwsem_do_wake(sem,0);
-
-	spin_unlock(&sem->wait_lock);
+	else
+		spin_unlock(&sem->wait_lock);
 
 	rwsemtrace(sem,"Leaving __downgrade_write");
 }

_