From: William Lee Irwin III <wli@holomorphy.com>

Eliminate the inode waitqueue hashtable using bit_waitqueue() via
wait_on_bit() and wake_up_bit() to locate the waitqueue head associated
with a bit.

Signed-off-by: Andrew Morton <akpm@osdl.org>
---

 25-akpm/fs/fs-writeback.c         |   20 +++++++----
 25-akpm/fs/inode.c                |   69 +++++++++++---------------------------
 25-akpm/include/linux/fs.h        |    3 +
 25-akpm/include/linux/writeback.h |    6 +--
 25-akpm/kernel/wait.c             |    1 
 5 files changed, 40 insertions(+), 59 deletions(-)

diff -puN fs/fs-writeback.c~eliminate-inode-waitqueue-hashtable fs/fs-writeback.c
--- 25/fs/fs-writeback.c~eliminate-inode-waitqueue-hashtable	2004-09-21 01:57:14.062300024 -0700
+++ 25-akpm/fs/fs-writeback.c	2004-09-21 01:57:14.072298504 -0700
@@ -244,6 +244,8 @@ static int
 __writeback_single_inode(struct inode *inode,
 			struct writeback_control *wbc)
 {
+	wait_queue_head_t *wqh;
+
 	if ((wbc->sync_mode != WB_SYNC_ALL) && (inode->i_state & I_LOCK)) {
 		list_move(&inode->i_list, &inode->i_sb->s_dirty);
 		return 0;
@@ -252,12 +254,18 @@ __writeback_single_inode(struct inode *i
 	/*
 	 * It's a data-integrity sync.  We must wait.
 	 */
-	while (inode->i_state & I_LOCK) {
-		__iget(inode);
-		spin_unlock(&inode_lock);
-		__wait_on_inode(inode);
-		iput(inode);
-		spin_lock(&inode_lock);
+	if (inode->i_state & I_LOCK) {
+		DEFINE_WAIT_BIT(wq, &inode->i_state, __I_LOCK);
+
+		wqh = bit_waitqueue(&inode->i_state, __I_LOCK);
+		do {
+			__iget(inode);
+			spin_unlock(&inode_lock);
+			__wait_on_bit(wqh, &wq, &inode->i_state, __I_LOCK,
+					inode_wait, TASK_UNINTERRUPTIBLE);
+			iput(inode);
+			spin_lock(&inode_lock);
+		} while (inode->i_state & I_LOCK);
 	}
 	return __sync_single_inode(inode, wbc);
 }
diff -puN fs/inode.c~eliminate-inode-waitqueue-hashtable fs/inode.c
--- 25/fs/inode.c~eliminate-inode-waitqueue-hashtable	2004-09-21 01:57:14.064299720 -0700
+++ 25-akpm/fs/inode.c	2004-09-21 01:57:14.074298200 -0700
@@ -1265,37 +1265,10 @@ void remove_dquot_ref(struct super_block
 
 #endif
 
-/*
- * Hashed waitqueues for wait_on_inode().  The table is pretty small - the
- * kernel doesn't lock many inodes at the same time.
- */
-#define I_WAIT_TABLE_ORDER	3
-static struct i_wait_queue_head {
-	wait_queue_head_t wqh;
-} ____cacheline_aligned_in_smp i_wait_queue_heads[1<<I_WAIT_TABLE_ORDER];
-
-/*
- * Return the address of the waitqueue_head to be used for this inode
- */
-static wait_queue_head_t *i_waitq_head(struct inode *inode)
+int inode_wait(void *word)
 {
-	return &i_wait_queue_heads[hash_ptr(inode, I_WAIT_TABLE_ORDER)].wqh;
-}
-
-void __wait_on_inode(struct inode *inode)
-{
-	DECLARE_WAITQUEUE(wait, current);
-	wait_queue_head_t *wq = i_waitq_head(inode);
-
-	add_wait_queue(wq, &wait);
-repeat:
-	set_current_state(TASK_UNINTERRUPTIBLE);
-	if (inode->i_state & I_LOCK) {
-		schedule();
-		goto repeat;
-	}
-	remove_wait_queue(wq, &wait);
-	__set_current_state(TASK_RUNNING);
+	schedule();
+	return 0;
 }
 
 /*
@@ -1304,36 +1277,39 @@ repeat:
  * that it isn't found.  This is because iget will immediately call
  * ->read_inode, and we want to be sure that evidence of the deletion is found
  * by ->read_inode.
- *
- * This call might return early if an inode which shares the waitq is woken up.
- * This is most easily handled by the caller which will loop around again
- * looking for the inode.
- *
  * This is called with inode_lock held.
  */
 static void __wait_on_freeing_inode(struct inode *inode)
 {
-	DECLARE_WAITQUEUE(wait, current);
-	wait_queue_head_t *wq = i_waitq_head(inode);
+	wait_queue_head_t *wq;
+	DEFINE_WAIT_BIT(wait, &inode->i_state, __I_LOCK);
 
-	add_wait_queue(wq, &wait);
-	set_current_state(TASK_UNINTERRUPTIBLE);
+	/*
+	 * I_FREEING and I_CLEAR are cleared in process context under
+	 * inode_lock, so we have to give the tasks who would clear them
+	 * a chance to run and acquire inode_lock.
+	 */
+	if (!(inode->i_state & I_LOCK)) {
+		spin_unlock(&inode_lock);
+		yield();
+		spin_lock(&inode_lock);
+		return;
+	}
+	wq = bit_waitqueue(&inode->i_state, __I_LOCK);
+	prepare_to_wait(wq, &wait.wait, TASK_UNINTERRUPTIBLE);
 	spin_unlock(&inode_lock);
 	schedule();
-	remove_wait_queue(wq, &wait);
+	finish_wait(wq, &wait.wait);
 	spin_lock(&inode_lock);
 }
 
 void wake_up_inode(struct inode *inode)
 {
-	wait_queue_head_t *wq = i_waitq_head(inode);
-
 	/*
 	 * Prevent speculative execution through spin_unlock(&inode_lock);
 	 */
 	smp_mb();
-	if (waitqueue_active(wq))
-		wake_up_all(wq);
+	wake_up_bit(&inode->i_state, __I_LOCK);
 }
 EXPORT_SYMBOL(wake_up_inode);
 
@@ -1369,11 +1345,6 @@ void __init inode_init_early(void)
 
 void __init inode_init(unsigned long mempages)
 {
-	int i;
-
-	for (i = 0; i < ARRAY_SIZE(i_wait_queue_heads); i++)
-		init_waitqueue_head(&i_wait_queue_heads[i].wqh);
-
 	/* inode slab cache */
 	inode_cachep = kmem_cache_create("inode_cache", sizeof(struct inode),
 				0, SLAB_PANIC, init_once, NULL);
diff -puN include/linux/fs.h~eliminate-inode-waitqueue-hashtable include/linux/fs.h
--- 25/include/linux/fs.h~eliminate-inode-waitqueue-hashtable	2004-09-21 01:57:14.066299416 -0700
+++ 25-akpm/include/linux/fs.h	2004-09-21 01:57:14.076297896 -0700
@@ -1056,7 +1056,8 @@ struct super_operations {
 #define I_DIRTY_SYNC		1 /* Not dirty enough for O_DATASYNC */
 #define I_DIRTY_DATASYNC	2 /* Data-related inode changes pending */
 #define I_DIRTY_PAGES		4 /* Data-related inode changes pending */
-#define I_LOCK			8
+#define __I_LOCK		3
+#define I_LOCK			(1 << __I_LOCK)
 #define I_FREEING		16
 #define I_CLEAR			32
 #define I_NEW			64
diff -puN include/linux/writeback.h~eliminate-inode-waitqueue-hashtable include/linux/writeback.h
--- 25/include/linux/writeback.h~eliminate-inode-waitqueue-hashtable	2004-09-21 01:57:14.067299264 -0700
+++ 25-akpm/include/linux/writeback.h	2004-09-21 01:57:14.076297896 -0700
@@ -68,7 +68,7 @@ struct writeback_control {
  */	
 void writeback_inodes(struct writeback_control *wbc);
 void wake_up_inode(struct inode *inode);
-void __wait_on_inode(struct inode * inode);
+int inode_wait(void *);
 void sync_inodes_sb(struct super_block *, int wait);
 void sync_inodes(int wait);
 
@@ -76,8 +76,8 @@ void sync_inodes(int wait);
 static inline void wait_on_inode(struct inode *inode)
 {
 	might_sleep();
-	if (inode->i_state & I_LOCK)
-		__wait_on_inode(inode);
+	wait_on_bit(&inode->i_state, __I_LOCK, inode_wait,
+							TASK_UNINTERRUPTIBLE);
 }
 
 /*
diff -puN kernel/wait.c~eliminate-inode-waitqueue-hashtable kernel/wait.c
--- 25/kernel/wait.c~eliminate-inode-waitqueue-hashtable	2004-09-21 01:57:14.069298960 -0700
+++ 25-akpm/kernel/wait.c	2004-09-21 01:57:14.077297744 -0700
@@ -7,6 +7,7 @@
 #include <linux/init.h>
 #include <linux/module.h>
 #include <linux/sched.h>
+#include <linux/mm.h>
 #include <linux/wait.h>
 #include <linux/hash.h>
 
_