While path walking we do follow_mount or follow_down which uses 
dcache_lock for serialisation.  vfsmount related operations also use 
dcache_lock for all updates. I think we can use a separate lock for 
vfsmount related work and can improve path walking. 

The following two patches does the same. The first one replaces 
dcache_lock with new vfsmount_lock in namespace.c. The lock is 
local to namespace.c and is not required outside. The second patch 
uses RCU to have lock free lookup_mnt(). The patches are quite simple 
and straight forward.

The lockmeter reults show reduced contention, and lock acquisitions 
for dcache_lock while running dcachebench* on a 4-way SMP box

SPINLOCKS         HOLD            WAIT
UTIL  CON    MEAN(  MAX )   MEAN(  MAX )(% CPU)     TOTAL NOWAIT SPIN RJECT  NAME

baselkm-2569: 	
20.7% 20.9%  0.5us( 146us)  2.9us( 144us)(0.81%)  31590840 79.1% 20.9%    0%  dcache_lock
mntlkm-2569: 	
14.3% 13.6%  0.4us( 170us)  2.9us( 187us)(0.42%)  23071746 86.4% 13.6%    0%  dcache_lock

We get more than 8% improvement on 4-way SMP and 44% improvement on 16-way
NUMAQ while runing dcachebench*.

		Average (usecs/iteration)	Std. Deviation 
		(lower is better)
4-way SMP
2.5.69		15739.3				470.90
2.5.69-mnt	14459.6				298.51

16-way NUMAQ	
2.5.69		120426.5			363.78
2.5.69-mnt	 63225.8			427.60

*dcachebench is a microbenchmark written by Bill Hartner and is available at
http://www-124.ibm.com/developerworks/opensource/linuxperf/dcachebench/dcachebench.html

vfsmount_lock.patch
-------------------
- Patch for replacing dcache_lock with new vfsmount_lock for all mount 
  related operation. This removes the need to take dcache_lock while
  doing follow_mount or follow_down operations in path walking.

I re-ran dcachebench with 2.5.70 as base on 16-way NUMAQ box.

                	Average (usecs/iteration)       Std. Deviation
                	(lower is better)
16-way NUMAQ
2.5.70 				120710.9		 	230.67
 + vfsmount_lock.patch  	65209.6				242.97
    + lookup_mnt-rcu.patch 	64042.3				416.61
 
So just the lock splitting (vfsmount_lock.patch) gives almost similar benifits



 fs/namei.c     |   24 +++++++++------------
 fs/namespace.c |   64 ++++++++++++++++++++++++++++++++-------------------------
 2 files changed, 48 insertions(+), 40 deletions(-)

diff -puN fs/namei.c~vfsmount_lock fs/namei.c
--- 25/fs/namei.c~vfsmount_lock	2003-06-05 18:51:48.000000000 -0700
+++ 25-akpm/fs/namei.c	2003-06-05 18:51:48.000000000 -0700
@@ -434,19 +434,17 @@ int follow_up(struct vfsmount **mnt, str
 	return 1;
 }
 
+/* no need for dcache_lock, as serialization is taken care in
+ * namespace.c
+ */
 static int follow_mount(struct vfsmount **mnt, struct dentry **dentry)
 {
 	int res = 0;
 	while (d_mountpoint(*dentry)) {
-		struct vfsmount *mounted;
-		spin_lock(&dcache_lock);
-		mounted = lookup_mnt(*mnt, *dentry);
-		if (!mounted) {
-			spin_unlock(&dcache_lock);
+		struct vfsmount *mounted = lookup_mnt(*mnt, *dentry);
+		if (!mounted) 
 			break;
-		}
-		*mnt = mntget(mounted);
-		spin_unlock(&dcache_lock);
+		*mnt = mounted;
 		dput(*dentry);
 		mntput(mounted->mnt_parent);
 		*dentry = dget(mounted->mnt_root);
@@ -455,21 +453,21 @@ static int follow_mount(struct vfsmount 
 	return res;
 }
 
+/* no need for dcache_lock, as serialization is taken care in
+ * namespace.c
+ */
 static inline int __follow_down(struct vfsmount **mnt, struct dentry **dentry)
 {
 	struct vfsmount *mounted;
-
-	spin_lock(&dcache_lock);
+	
 	mounted = lookup_mnt(*mnt, *dentry);
 	if (mounted) {
-		*mnt = mntget(mounted);
-		spin_unlock(&dcache_lock);
+		*mnt = mounted;
 		dput(*dentry);
 		mntput(mounted->mnt_parent);
 		*dentry = dget(mounted->mnt_root);
 		return 1;
 	}
-	spin_unlock(&dcache_lock);
 	return 0;
 }
 
diff -puN fs/namespace.c~vfsmount_lock fs/namespace.c
--- 25/fs/namespace.c~vfsmount_lock	2003-06-05 18:51:48.000000000 -0700
+++ 25-akpm/fs/namespace.c	2003-06-05 18:51:48.000000000 -0700
@@ -26,6 +26,8 @@
 extern int __init init_rootfs(void);
 extern int __init sysfs_init(void);
 
+/* spinlock for vfsmount related operations, inplace of dcache_lock */
+static spinlock_t vfsmount_lock __cacheline_aligned_in_smp = SPIN_LOCK_UNLOCKED;
 static struct list_head *mount_hashtable;
 static int hash_mask, hash_bits;
 static kmem_cache_t *mnt_cache; 
@@ -66,30 +68,38 @@ void free_vfsmnt(struct vfsmount *mnt)
 	kmem_cache_free(mnt_cache, mnt);
 }
 
+/*
+ * Now, lookup_mnt increments the ref count before returning
+ * the vfsmount struct.
+ */
 struct vfsmount *lookup_mnt(struct vfsmount *mnt, struct dentry *dentry)
 {
 	struct list_head * head = mount_hashtable + hash(mnt, dentry);
 	struct list_head * tmp = head;
-	struct vfsmount *p;
+	struct vfsmount *p, *found = NULL;
 
+	spin_lock(&vfsmount_lock);
 	for (;;) {
 		tmp = tmp->next;
 		p = NULL;
 		if (tmp == head)
 			break;
 		p = list_entry(tmp, struct vfsmount, mnt_hash);
-		if (p->mnt_parent == mnt && p->mnt_mountpoint == dentry)
+		if (p->mnt_parent == mnt && p->mnt_mountpoint == dentry) {
+			found = mntget(p);
 			break;
+		}
 	}
-	return p;
+	spin_unlock(&vfsmount_lock);
+	return found;
 }
 
 static int check_mnt(struct vfsmount *mnt)
 {
-	spin_lock(&dcache_lock);
+	spin_lock(&vfsmount_lock);
 	while (mnt->mnt_parent != mnt)
 		mnt = mnt->mnt_parent;
-	spin_unlock(&dcache_lock);
+	spin_unlock(&vfsmount_lock);
 	return mnt == current->namespace->root;
 }
 
@@ -263,15 +273,15 @@ void umount_tree(struct vfsmount *mnt)
 		mnt = list_entry(kill.next, struct vfsmount, mnt_list);
 		list_del_init(&mnt->mnt_list);
 		if (mnt->mnt_parent == mnt) {
-			spin_unlock(&dcache_lock);
+			spin_unlock(&vfsmount_lock);
 		} else {
 			struct nameidata old_nd;
 			detach_mnt(mnt, &old_nd);
-			spin_unlock(&dcache_lock);
+			spin_unlock(&vfsmount_lock);
 			path_release(&old_nd);
 		}
 		mntput(mnt);
-		spin_lock(&dcache_lock);
+		spin_lock(&vfsmount_lock);
 	}
 }
 
@@ -324,17 +334,17 @@ static int do_umount(struct vfsmount *mn
 	}
 
 	down_write(&current->namespace->sem);
-	spin_lock(&dcache_lock);
+	spin_lock(&vfsmount_lock);
 
 	if (atomic_read(&sb->s_active) == 1) {
 		/* last instance - try to be smart */
-		spin_unlock(&dcache_lock);
+		spin_unlock(&vfsmount_lock);
 		lock_kernel();
 		DQUOT_OFF(sb);
 		acct_auto_close(sb);
 		unlock_kernel();
 		security_sb_umount_close(mnt);
-		spin_lock(&dcache_lock);
+		spin_lock(&vfsmount_lock);
 	}
 	retval = -EBUSY;
 	if (atomic_read(&mnt->mnt_count) == 2 || flags & MNT_DETACH) {
@@ -342,7 +352,7 @@ static int do_umount(struct vfsmount *mn
 			umount_tree(mnt);
 		retval = 0;
 	}
-	spin_unlock(&dcache_lock);
+	spin_unlock(&vfsmount_lock);
 	if (retval)
 		security_sb_umount_busy(mnt);
 	up_write(&current->namespace->sem);
@@ -449,18 +459,18 @@ static struct vfsmount *copy_tree(struct
 			q = clone_mnt(p, p->mnt_root);
 			if (!q)
 				goto Enomem;
-			spin_lock(&dcache_lock);
+			spin_lock(&vfsmount_lock);
 			list_add_tail(&q->mnt_list, &res->mnt_list);
 			attach_mnt(q, &nd);
-			spin_unlock(&dcache_lock);
+			spin_unlock(&vfsmount_lock);
 		}
 	}
 	return res;
  Enomem:
 	if (res) {
-		spin_lock(&dcache_lock);
+		spin_lock(&vfsmount_lock);
 		umount_tree(res);
-		spin_unlock(&dcache_lock);
+		spin_unlock(&vfsmount_lock);
 	}
 	return NULL;
 }
@@ -485,7 +495,7 @@ static int graft_tree(struct vfsmount *m
 		goto out_unlock;
 
 	err = -ENOENT;
-	spin_lock(&dcache_lock);
+	spin_lock(&vfsmount_lock);
 	if (IS_ROOT(nd->dentry) || !d_unhashed(nd->dentry)) {
 		struct list_head head;
 
@@ -495,7 +505,7 @@ static int graft_tree(struct vfsmount *m
 		mntget(mnt);
 		err = 0;
 	}
-	spin_unlock(&dcache_lock);
+	spin_unlock(&vfsmount_lock);
 out_unlock:
 	up(&nd->dentry->d_inode->i_sem);
 	if (!err)
@@ -532,9 +542,9 @@ static int do_loopback(struct nameidata 
 	if (mnt) {
 		err = graft_tree(mnt, nd);
 		if (err) {
-			spin_lock(&dcache_lock);
+			spin_lock(&vfsmount_lock);
 			umount_tree(mnt);
-			spin_unlock(&dcache_lock);
+			spin_unlock(&vfsmount_lock);
 		} else
 			mntput(mnt);
 	}
@@ -599,7 +609,7 @@ static int do_move_mount(struct nameidat
 	if (IS_DEADDIR(nd->dentry->d_inode))
 		goto out1;
 
-	spin_lock(&dcache_lock);
+	spin_lock(&vfsmount_lock);
 	if (!IS_ROOT(nd->dentry) && d_unhashed(nd->dentry))
 		goto out2;
 
@@ -623,7 +633,7 @@ static int do_move_mount(struct nameidat
 	detach_mnt(old_nd.mnt, &parent_nd);
 	attach_mnt(old_nd.mnt, nd);
 out2:
-	spin_unlock(&dcache_lock);
+	spin_unlock(&vfsmount_lock);
 out1:
 	up(&nd->dentry->d_inode->i_sem);
 out:
@@ -804,9 +814,9 @@ int copy_namespace(int flags, struct tas
 	down_write(&tsk->namespace->sem);
 	/* First pass: copy the tree topology */
 	new_ns->root = copy_tree(namespace->root, namespace->root->mnt_root);
-	spin_lock(&dcache_lock);
+	spin_lock(&vfsmount_lock);
 	list_add_tail(&new_ns->list, &new_ns->root->mnt_list);
-	spin_unlock(&dcache_lock);
+	spin_unlock(&vfsmount_lock);
 
 	/* Second pass: switch the tsk->fs->* elements */
 	if (fs) {
@@ -1027,7 +1037,7 @@ asmlinkage long sys_pivot_root(const cha
 	if (new_nd.mnt->mnt_root != new_nd.dentry)
 		goto out2; /* not a mountpoint */
 	tmp = old_nd.mnt; /* make sure we can reach put_old from new_root */
-	spin_lock(&dcache_lock);
+	spin_lock(&vfsmount_lock);
 	if (tmp != new_nd.mnt) {
 		for (;;) {
 			if (tmp->mnt_parent == tmp)
@@ -1044,7 +1054,7 @@ asmlinkage long sys_pivot_root(const cha
 	detach_mnt(user_nd.mnt, &root_parent);
 	attach_mnt(user_nd.mnt, &old_nd);
 	attach_mnt(new_nd.mnt, &root_parent);
-	spin_unlock(&dcache_lock);
+	spin_unlock(&vfsmount_lock);
 	chroot_fs_refs(&user_nd, &new_nd);
 	security_sb_post_pivotroot(&user_nd, &new_nd);
 	error = 0;
@@ -1061,7 +1071,7 @@ out0:
 	unlock_kernel();
 	return error;
 out3:
-	spin_unlock(&dcache_lock);
+	spin_unlock(&vfsmount_lock);
 	goto out2;
 }
 

_