patch-2.4.19 linux-2.4.19/net/ipv4/tcp_minisocks.c

Next file: linux-2.4.19/net/ipv4/tcp_output.c
Previous file: linux-2.4.19/net/ipv4/tcp_ipv4.c
Back to the patch index
Back to the overall index

diff -urN linux-2.4.18/net/ipv4/tcp_minisocks.c linux-2.4.19/net/ipv4/tcp_minisocks.c
@@ -5,7 +5,7 @@
  *
  *		Implementation of the Transmission Control Protocol(TCP).
  *
- * Version:	$Id: tcp_minisocks.c,v 1.14 2001/09/21 21:27:34 davem Exp $
+ * Version:	$Id: tcp_minisocks.c,v 1.14.2.1 2002/03/05 04:30:08 davem Exp $
  *
  * Authors:	Ross Biro, <bir7@leland.Stanford.Edu>
  *		Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
@@ -75,17 +75,16 @@
 	/* Disassociate with bind bucket. */
 	bhead = &tcp_bhash[tcp_bhashfn(tw->num)];
 	spin_lock(&bhead->lock);
-	if ((tb = tw->tb) != NULL) {
-		if(tw->bind_next)
-			tw->bind_next->bind_pprev = tw->bind_pprev;
-		*(tw->bind_pprev) = tw->bind_next;
-		tw->tb = NULL;
-		if (tb->owners == NULL) {
-			if (tb->next)
-				tb->next->pprev = tb->pprev;
-			*(tb->pprev) = tb->next;
-			kmem_cache_free(tcp_bucket_cachep, tb);
-		}
+	tb = tw->tb;
+	if(tw->bind_next)
+		tw->bind_next->bind_pprev = tw->bind_pprev;
+	*(tw->bind_pprev) = tw->bind_next;
+	tw->tb = NULL;
+	if (tb->owners == NULL) {
+		if (tb->next)
+			tb->next->pprev = tb->pprev;
+		*(tb->pprev) = tb->next;
+		kmem_cache_free(tcp_bucket_cachep, tb);
 	}
 	spin_unlock(&bhead->lock);
 
@@ -304,9 +303,23 @@
 	struct tcp_bind_hashbucket *bhead;
 	struct sock **head, *sktw;
 
+	/* Step 1: Put TW into bind hash. Original socket stays there too.
+	   Note, that any socket with sk->num!=0 MUST be bound in binding
+	   cache, even if it is closed.
+	 */
+	bhead = &tcp_bhash[tcp_bhashfn(sk->num)];
+	spin_lock(&bhead->lock);
+	tw->tb = (struct tcp_bind_bucket *)sk->prev;
+	BUG_TRAP(sk->prev!=NULL);
+	if ((tw->bind_next = tw->tb->owners) != NULL)
+		tw->tb->owners->bind_pprev = &tw->bind_next;
+	tw->tb->owners = (struct sock*)tw;
+	tw->bind_pprev = &tw->tb->owners;
+	spin_unlock(&bhead->lock);
+
 	write_lock(&ehead->lock);
 
-	/* Step 1: Remove SK from established hash. */
+	/* Step 2: Remove SK from established hash. */
 	if (sk->pprev) {
 		if(sk->next)
 			sk->next->pprev = sk->pprev;
@@ -315,7 +328,7 @@
 		sock_prot_dec_use(sk->prot);
 	}
 
-	/* Step 2: Hash TW into TIMEWAIT half of established hash table. */
+	/* Step 3: Hash TW into TIMEWAIT half of established hash table. */
 	head = &(ehead + tcp_ehash_size)->chain;
 	sktw = (struct sock *)tw;
 	if((sktw->next = *head) != NULL)
@@ -325,20 +338,6 @@
 	atomic_inc(&tw->refcnt);
 
 	write_unlock(&ehead->lock);
-
-	/* Step 3: Put TW into bind hash. Original socket stays there too.
-	   Note, that any socket with sk->num!=0 MUST be bound in binding
-	   cache, even if it is closed.
-	 */
-	bhead = &tcp_bhash[tcp_bhashfn(sk->num)];
-	spin_lock(&bhead->lock);
-	tw->tb = (struct tcp_bind_bucket *)sk->prev;
-	BUG_TRAP(sk->prev!=NULL);
-	if ((tw->bind_next = tw->tb->owners) != NULL)
-		tw->tb->owners->bind_pprev = &tw->bind_next;
-	tw->tb->owners = (struct sock*)tw;
-	tw->bind_pprev = &tw->tb->owners;
-	spin_unlock(&bhead->lock);
 }
 
 /* 
@@ -783,6 +782,8 @@
 			newtp->ack.last_seg_size = skb->len-newtp->tcp_header_len;
 		newtp->mss_clamp = req->mss;
 		TCP_ECN_openreq_child(newtp, req);
+
+		TCP_INC_STATS_BH(TcpPassiveOpens);
 	}
 	return newsk;
 }
@@ -846,8 +847,38 @@
 	/* Further reproduces section "SEGMENT ARRIVES"
 	   for state SYN-RECEIVED of RFC793.
 	   It is broken, however, it does not work only
-	   when SYNs are crossed, which is impossible in our
-	   case.
+	   when SYNs are crossed.
+
+	   You would think that SYN crossing is impossible here, since
+	   we should have a SYN_SENT socket (from connect()) on our end,
+	   but this is not true if the crossed SYNs were sent to both
+	   ends by a malicious third party.  We must defend against this,
+	   and to do that we first verify the ACK (as per RFC793, page
+	   36) and reset if it is invalid.  Is this a true full defense?
+	   To convince ourselves, let us consider a way in which the ACK
+	   test can still pass in this 'malicious crossed SYNs' case.
+	   Malicious sender sends identical SYNs (and thus identical sequence
+	   numbers) to both A and B:
+
+		A: gets SYN, seq=7
+		B: gets SYN, seq=7
+
+	   By our good fortune, both A and B select the same initial
+	   send sequence number of seven :-)
+
+		A: sends SYN|ACK, seq=7, ack_seq=8
+		B: sends SYN|ACK, seq=7, ack_seq=8
+
+	   So we are now A eating this SYN|ACK, ACK test passes.  So
+	   does sequence test, SYN is truncated, and thus we consider
+	   it a bare ACK.
+
+	   If tp->defer_accept, we silently drop this bare ACK.  Otherwise,
+	   we create an established connection.  Both ends (listening sockets)
+	   accept the new incoming connection and try to talk to each other. 8-)
+
+	   Note: This case is both harmless, and rare.  Possibility is about the
+	   same as us discovering intelligent life on another plant tomorrow.
 
 	   But generally, we should (RFC lies!) to accept ACK
 	   from SYNACK both here and in tcp_rcv_state_process().
@@ -859,6 +890,22 @@
 	   before attempt to create socket.
 	 */
 
+	/* RFC793 page 36: "If the connection is in any non-synchronized state ...
+	 *                  and the incoming segment acknowledges something not yet
+	 *                  sent (the segment carries an unaccaptable ACK) ...
+	 *                  a reset is sent."
+	 */
+	if (!(flg & TCP_FLAG_ACK))
+		return NULL;
+
+	/* Invalid ACK: reset will be sent by listening socket */
+	if (TCP_SKB_CB(skb)->ack_seq != req->snt_isn+1)
+		return sk;
+	/* Also, it would be not so bad idea to check rcv_tsecr, which
+	 * is essentially ACK extension and too early or too late values
+	 * should cause reset in unsynchronized states.
+	 */
+
 	/* RFC793: "first check sequence number". */
 
 	if (paws_reject || !tcp_in_window(TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq,
@@ -888,19 +935,6 @@
 	if (flg & (TCP_FLAG_RST|TCP_FLAG_SYN))
 		goto embryonic_reset;
 
-	/* RFC793: "fifth check the ACK field" */
-
-	if (!(flg & TCP_FLAG_ACK))
-		return NULL;
-
-	/* Invalid ACK: reset will be sent by listening socket */
-	if (TCP_SKB_CB(skb)->ack_seq != req->snt_isn+1)
-		return sk;
-	/* Also, it would be not so bad idea to check rcv_tsecr, which
-	 * is essentially ACK extension and too early or too late values
-	 * should cause reset in unsynchronized states.
-	 */
-
 	/* If TCP_DEFER_ACCEPT is set, drop bare ACK. */
 	if (tp->defer_accept && TCP_SKB_CB(skb)->end_seq == req->rcv_isn+1) {
 		req->acked = 1;

FUNET's LINUX-ADM group, linux-adm@nic.funet.fi
TCL-scripts by Sam Shen (who was at: slshen@lbl.gov)