patch-2.1.120 linux/net/ipv4/tcp.c

Next file: linux/net/ipv4/tcp_input.c
Previous file: linux/net/ipv4/route.c
Back to the patch index
Back to the overall index

diff -u --recursive --new-file v2.1.119/linux/net/ipv4/tcp.c linux/net/ipv4/tcp.c
@@ -5,7 +5,7 @@
  *
  *		Implementation of the Transmission Control Protocol(TCP).
  *
- * Version:	$Id: tcp.c,v 1.116 1998/07/26 03:06:54 davem Exp $
+ * Version:	$Id: tcp.c,v 1.119 1998/08/26 12:04:14 davem Exp $
  *
  * Authors:	Ross Biro, <bir7@leland.Stanford.Edu>
  *		Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
@@ -201,6 +201,7 @@
  *					tcp_do_sendmsg to avoid burstiness.
  *		Eric Schenk	:	Fix fast close down bug with
  *					shutdown() followed by close().
+ *		Andi Kleen :	Make poll agree with SIGIO
  *					
  *		This program is free software; you can redistribute it and/or
  *		modify it under the terms of the GNU General Public License
@@ -383,13 +384,14 @@
  *
  * ICMP messages (4.2.3.9)
  *   MUST act on ICMP errors. (does)
- *   MUST slow transmission upon receipt of a Source Quench. (does)
+ *   MUST slow transmission upon receipt of a Source Quench. (doesn't anymore 
+ *   because that is deprecated now by the IETF, can be turned on)
  *   MUST NOT abort connection upon receipt of soft Destination
  *     Unreachables (0, 1, 5), Time Exceededs and Parameter
  *     Problems. (doesn't)
  *   SHOULD report soft Destination Unreachables etc. to the
- *     application. (does, but may drop them in the ICMP error handler
- *	during an accept())
+ *     application. (does, except during SYN_RECV and may drop messages
+ *     in some rare cases before accept() - ICMP is unreliable)	
  *   SHOULD abort connection upon receipt of hard Destination Unreachable
  *     messages (2, 3, 4). (does, but see above)
  *
@@ -397,7 +399,7 @@
  *   MUST reject as an error OPEN for invalid remote IP address. (does)
  *   MUST ignore SYN with invalid source address. (does)
  *   MUST silently discard incoming SYN for broadcast/multicast
- *     address. (I'm not sure if it does. Someone should check this.)
+ *     address. (does)
  *
  * Asynchronous Reports (4.2.4.1)
  * MUST provide mechanism for reporting soft errors to application
@@ -537,6 +539,21 @@
 }
 
 /*
+ *	Compute minimal free write space needed to queue new packets. 
+ */
+static inline int tcp_min_write_space(struct sock *sk, struct tcp_opt *tp)
+{
+	int space;
+#if 1 /* This needs benchmarking and real world tests */
+	space = max(tp->mss_cache + 128, MIN_WRITE_SPACE);
+#else /* 2.0 way */
+	/* More than half of the socket queue free? */
+	space = atomic_read(&sk->wmem_alloc) / 2;
+#endif
+	return space;
+}
+
+/*
  *	Wait for a TCP event.
  *
  *	Note that we don't need to lock the socket, as the upper poll layers
@@ -556,36 +573,56 @@
 	mask = 0;
 	if (sk->err)
 		mask = POLLERR;
+
+	/*
+	 * POLLHUP is certainly not done right. But poll() doesn't
+	 * have a notion of HUP in just one direction, and for a
+	 * socket the read side is more interesting.
+	 *
+	 * Some poll() documentation says that POLLHUP is incompatible
+	 * with the POLLOUT/POLLWR flags, so somebody should check this
+	 * all. But careful, it tends to be safer to return too many
+	 * bits than too few, and you can easily break real applications
+	 * if you don't tell them that something has hung up!
+	 *
+	 * Check-me.
+	 */
+	if (sk->shutdown & RCV_SHUTDOWN)
+		mask |= POLLHUP;
+
 	/* Connected? */
 	if ((1 << sk->state) & ~(TCPF_SYN_SENT|TCPF_SYN_RECV)) {
-		int space;
-
-		if (sk->shutdown & RCV_SHUTDOWN)
-			mask |= POLLHUP;
-		
 		if ((tp->rcv_nxt != tp->copied_seq) &&
 		    (tp->urg_seq != tp->copied_seq ||
 		     tp->rcv_nxt != tp->copied_seq+1 ||
 		     sk->urginline || !tp->urg_data))
 			mask |= POLLIN | POLLRDNORM;
 
-#if 1 /* This needs benchmarking and real world tests */
-		space = (sk->dst_cache ? sk->dst_cache->pmtu : sk->mss) + 128;
-		if (space < 2048) /* XXX */
-			space = 2048;
-#else /* 2.0 way */
-		/* More than half of the socket queue free? */
-		space = atomic_read(&sk->wmem_alloc) / 2;
-#endif
 		/* Always wake the user up when an error occurred */
-		if (sock_wspace(sk) >= space || sk->err)
+		if (sock_wspace(sk) >= tcp_min_write_space(sk, tp) || sk->err)
 			mask |= POLLOUT | POLLWRNORM;
 		if (tp->urg_data & URG_VALID)
-		    	mask |= POLLPRI;
+			mask |= POLLPRI;
 	}
 	return mask;
 }
 
+/*
+ *	Socket write_space callback.
+ *	This (or rather the sock_wake_async) should agree with poll. 
+ */
+void tcp_write_space(struct sock *sk)
+{
+	if (sk->dead)
+		return; 
+
+	wake_up_interruptible(sk->sleep);
+	if (sock_wspace(sk) >=
+	    tcp_min_write_space(sk, &(sk->tp_pinfo.af_tcp)))
+		sock_wake_async(sk->socket, 2);
+}
+
+
 int tcp_ioctl(struct sock *sk, int cmd, unsigned long arg)
 {
 	int answ;
@@ -1025,7 +1062,7 @@
 		 * which don't advertize a larger window.
 		 */
 		if((copied >= rcv_window_now) &&
-		   ((rcv_window_now + sk->mss) <= tp->window_clamp))
+		   ((rcv_window_now + tp->mss_cache) <= tp->window_clamp))
 			tcp_read_wakeup(sk);
 	}
 }
@@ -1543,16 +1580,18 @@
 
 	tcp_synq_unlink(tp, req, prev);
 	newsk = req->sk;
+	req->class->destructor(req);
 	tcp_openreq_free(req);
 	sk->ack_backlog--; 
 
-	/* FIXME: need to check here if newsk has already
-	 * an soft_err or err set.
-	 * We have two options here then: reply (this behaviour matches
-	 * Solaris) or return the error to the application (old Linux)
-	 */
+	/*
+	 * This does not pass any already set errors on the new socket
+	 * to the user, but they will be returned on the first socket operation
+	 * after the accept.
+	 */ 
+
 	error = 0;
- out:
+out:
 	release_sock(sk);
 	sk->err = error;
 	return newsk;
@@ -1586,7 +1625,7 @@
  */
 	  		if(val<1||val>MAX_WINDOW)
 				return -EINVAL;
-			sk->user_mss=val;
+			tp->user_mss=val;
 			return 0;
 		case TCP_NODELAY:
 			sk->nonagle=(val==0)?0:1;
@@ -1614,7 +1653,7 @@
 
 	switch(optname) {
 		case TCP_MAXSEG:
-			val=sk->user_mss;
+			val=tp->user_mss;
 			break;
 		case TCP_NODELAY:
 			val=sk->nonagle;
@@ -1640,7 +1679,7 @@
 
 extern void __skb_cb_too_small_for_tcp(int, int);
 
-__initfunc(void tcp_init(void))
+void __init tcp_init(void)
 {
 	struct sk_buff *skb = NULL;
 

FUNET's LINUX-ADM group, linux-adm@nic.funet.fi
TCL-scripts by Sam Shen, slshen@lbl.gov