Patch from: Neil Brown <neilb@cse.unsw.edu.au>

Yes, definately a good analysis.  I don't think the fix is quite safe.
The possible problem is that another 'block' could be allocate for the
file+lock while the semaphore is released.

The following patch releases the semaphore before calling
nlmsvc_create_block (which is just as well, because if that fails, we
just return, currently with the semaphore held!!),
and get nlmsvc_create_block to reclaim the semaphore and retest for
existance before creating a new one.

I must admit, it feel very clumsy working with code that depends on
the BKL for sync after working with explicit spinlocks for so long,
but I think this will do the right thing...




 lockd/svclock.c |   22 +++++++++++++++++-----
 1 files changed, 17 insertions(+), 5 deletions(-)

diff -puN fs/lockd/svclock.c~lockd-lockup-fix fs/lockd/svclock.c
--- 25/fs/lockd/svclock.c~lockd-lockup-fix	2003-02-14 00:29:11.000000000 -0800
+++ 25-akpm/fs/lockd/svclock.c	2003-02-14 00:29:11.000000000 -0800
@@ -171,7 +171,7 @@ static inline struct nlm_block *
 nlmsvc_create_block(struct svc_rqst *rqstp, struct nlm_file *file,
 				struct nlm_lock *lock, struct nlm_cookie *cookie)
 {
-	struct nlm_block	*block;
+	struct nlm_block	*block, *temp = NULL;
 	struct nlm_host		*host;
 	struct nlm_rqst		*call;
 
@@ -181,6 +181,14 @@ nlmsvc_create_block(struct svc_rqst *rqs
 	if (host == NULL)
 		return NULL;
 
+	/* claim the semaphore and search for the block again.  This
+	 * avoids the block being allocated twice 
+	 */
+	down(&file->f_sema);
+	temp = nlmsvc_lookup_block(file, lock, 0);
+	if (temp)
+		goto failed; /* actually a round-about success */
+
 	/* Allocate memory for block, and initialize arguments */
 	if (!(block = (struct nlm_block *) kmalloc(sizeof(*block), GFP_KERNEL)))
 		goto failed;
@@ -211,13 +219,15 @@ nlmsvc_create_block(struct svc_rqst *rqs
 	call->a_host    = host;
 	call->a_flags   = RPC_TASK_ASYNC;
 
+	up(&file->f_sema);
 	return block;
 
 failed_free:
 	kfree(block);
 failed:
 	nlm_release_host(host);
-	return NULL;
+	up(&file->f_sema);
+	return temp;
 }
 
 /*
@@ -294,7 +304,7 @@ nlmsvc_lock(struct svc_rqst *rqstp, stru
 			struct nlm_lock *lock, int wait, struct nlm_cookie *cookie)
 {
 	struct file_lock	*conflock;
-	struct nlm_block	*block;
+	struct nlm_block	*block = NULL;
 	int			error;
 
 	dprintk("lockd: nlmsvc_lock(%s/%ld, ty=%d, pi=%d, %Ld-%Ld, bl=%d)\n",
@@ -305,15 +315,16 @@ nlmsvc_lock(struct svc_rqst *rqstp, stru
 				(long long)lock->fl.fl_end,
 				wait);
 
+ again:
 	/* Lock file against concurrent access */
 	down(&file->f_sema);
 
 	/* Get existing block (in case client is busy-waiting) */
-	block = nlmsvc_lookup_block(file, lock, 0);
+	if (block == NULL)
+		block = nlmsvc_lookup_block(file, lock, 0);
 
 	lock->fl.fl_flags |= FL_LOCKD;
 
-again:
 	if (!(conflock = posix_test_lock(&file->f_file, &lock->fl))) {
 		error = posix_lock_file(&file->f_file, &lock->fl);
 
@@ -347,6 +358,7 @@ again:
 	/* If we don't have a block, create and initialize it. Then
 	 * retry because we may have slept in kmalloc. */
 	if (block == NULL) {
+		up(&file->f_sema);
 		dprintk("lockd: blocking on this lock (allocating).\n");
 		if (!(block = nlmsvc_create_block(rqstp, file, lock, cookie)))
 			return nlm_lck_denied_nolocks;

_