From: Trond Myklebust <trond.myklebust@fys.uio.no>

I can never guarantee you perfect service with soft mounts (a 5 second
network patition/server congestion is all it takes) but I do have a
patch that just went into 2.4.22 that backs out some of the
Van Jacobson exponential backoff changes. This helps stabilize things
a lot.

I haven't yet had time to port that patch to 2.5.x, but the code
should be pretty much identical, so if you want to give it a go, then
here it is...



 net/sunrpc/clnt.c |    4 ++--
 net/sunrpc/xprt.c |   30 ++++--------------------------
 2 files changed, 6 insertions(+), 28 deletions(-)

diff -puN net/sunrpc/clnt.c~nfs-revert-backoff net/sunrpc/clnt.c
--- 25/net/sunrpc/clnt.c~nfs-revert-backoff	2003-07-31 22:08:57.000000000 -0700
+++ 25-akpm/net/sunrpc/clnt.c	2003-07-31 22:08:57.000000000 -0700
@@ -744,14 +744,14 @@ call_timeout(struct rpc_task *task)
 
 	dprintk("RPC: %4d call_timeout (major)\n", task->tk_pid);
 	if (clnt->cl_softrtry) {
-		if (clnt->cl_chatty && !task->tk_exit)
+		if (clnt->cl_chatty)
 			printk(KERN_NOTICE "%s: server %s not responding, timed out\n",
 				clnt->cl_protname, clnt->cl_server);
 		rpc_exit(task, -EIO);
 		return;
 	}
 
-	if (clnt->cl_chatty && !(task->tk_flags & RPC_CALL_MAJORSEEN) && rpc_ntimeo(&clnt->cl_rtt) > 7) {
+	if (clnt->cl_chatty && !(task->tk_flags & RPC_CALL_MAJORSEEN)) {
 		task->tk_flags |= RPC_CALL_MAJORSEEN;
 		printk(KERN_NOTICE "%s: server %s not responding, still trying\n",
 			clnt->cl_protname, clnt->cl_server);
diff -puN net/sunrpc/xprt.c~nfs-revert-backoff net/sunrpc/xprt.c
--- 25/net/sunrpc/xprt.c~nfs-revert-backoff	2003-07-31 22:08:57.000000000 -0700
+++ 25-akpm/net/sunrpc/xprt.c	2003-07-31 22:08:57.000000000 -0700
@@ -1040,21 +1040,6 @@ out:
 }
 
 /*
- * Exponential backoff for UDP retries
- */
-static inline int
-xprt_expbackoff(struct rpc_task *task, struct rpc_rqst *req)
-{
-	int backoff;
-
-	req->rq_ntimeo++;
-	backoff = min(rpc_ntimeo(&task->tk_client->cl_rtt), XPRT_MAX_BACKOFF);
-	if (req->rq_ntimeo < (1 << backoff))
-		return 1;
-	return 0;
-}
-
-/*
  * RPC receive timeout handler.
  */
 static void
@@ -1067,14 +1052,7 @@ xprt_timer(struct rpc_task *task)
 	if (req->rq_received)
 		goto out;
 
-	if (!xprt->nocong) {
-		if (xprt_expbackoff(task, req)) {
-			rpc_add_timer(task, xprt_timer);
-			goto out_unlock;
-		}
-		rpc_inc_timeo(&task->tk_client->cl_rtt);
-		xprt_adjust_cwnd(req->rq_xprt, -ETIMEDOUT);
-	}
+	xprt_adjust_cwnd(req->rq_xprt, -ETIMEDOUT);
 	req->rq_nresend++;
 
 	dprintk("RPC: %4d xprt_timer (%s request)\n",
@@ -1084,7 +1062,6 @@ xprt_timer(struct rpc_task *task)
 out:
 	task->tk_timeout = 0;
 	rpc_wake_up_task(task);
-out_unlock:
 	spin_unlock(&xprt->sock_lock);
 }
 
@@ -1220,16 +1197,17 @@ xprt_transmit(struct rpc_task *task)
 	return;
  out_receive:
 	dprintk("RPC: %4d xmit complete\n", task->tk_pid);
+	spin_lock_bh(&xprt->sock_lock);
 	/* Set the task's receive timeout value */
 	if (!xprt->nocong) {
 		task->tk_timeout = rpc_calc_rto(&clnt->cl_rtt,
 				task->tk_msg.rpc_proc->p_timer);
-		req->rq_ntimeo = 0;
+		task->tk_timeout <<= clnt->cl_timeout.to_retries
+			- req->rq_timeout.to_retries;
 		if (task->tk_timeout > req->rq_timeout.to_maxval)
 			task->tk_timeout = req->rq_timeout.to_maxval;
 	} else
 		task->tk_timeout = req->rq_timeout.to_current;
-	spin_lock_bh(&xprt->sock_lock);
 	/* Don't race with disconnect */
 	if (!xprt_connected(xprt))
 		task->tk_status = -ENOTCONN;

_