patch-2.1.36 linux/net/ipv4/tcp_output.c

Next file: linux/net/ipv4/tcp_timer.c
Previous file: linux/net/ipv4/tcp_ipv4.c
Back to the patch index
Back to the overall index

diff -u --recursive --new-file v2.1.35/linux/net/ipv4/tcp_output.c linux/net/ipv4/tcp_output.c
@@ -5,7 +5,7 @@
  *
  *		Implementation of the Transmission Control Protocol(TCP).
  *
- * Version:	$Id: tcp_output.c,v 1.34 1997/04/12 04:32:33 davem Exp $
+ * Version:	$Id: tcp_output.c,v 1.42 1997/04/22 01:06:33 davem Exp $
  *
  * Authors:	Ross Biro, <bir7@leland.Stanford.Edu>
  *		Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
@@ -34,14 +34,18 @@
 
 #include <net/tcp.h>
 
-/*
- *	Get rid of any delayed acks, we sent one already..
- */
+extern int sysctl_tcp_sack;
+extern int sysctl_tcp_tsack;
+extern int sysctl_tcp_timestamps;
+extern int sysctl_tcp_window_scaling;
+
+/* Get rid of any delayed acks, we sent one already.. */
 static __inline__ void clear_delayed_acks(struct sock * sk)
 {
-	sk->delayed_acks = 0;
+	struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
+
+	tp->delayed_acks = 0;
 	sk->ack_backlog = 0;
-	sk->bytes_rcv = 0;
 	tcp_clear_xmit_timer(sk, TIME_DACK);
 }
 
@@ -50,12 +54,8 @@
 	struct tcp_opt *tp = &sk->tp_pinfo.af_tcp;
 	
 	tp->send_head = tp->send_head->next;
-
 	if (tp->send_head == (struct sk_buff *) &sk->write_queue)
-	{
 		tp->send_head = NULL;
-	}
-
 }
 
 static __inline__ int tcp_snd_test(struct sock *sk, struct sk_buff *skb)
@@ -64,8 +64,7 @@
 	int nagle_check = 1;
 	int len;
 
-	/*
-	 *	RFC 1122 - section 4.2.3.4
+	/*	RFC 1122 - section 4.2.3.4
 	 *
 	 *	We must queue if
 	 *
@@ -76,17 +75,41 @@
 	 *	c) We are retransmiting [Nagle]
 	 *	d) We have too many packets 'in flight'
 	 */
-		
 	len = skb->end_seq - skb->seq;
-
-	if (!sk->nonagle && len < (sk->mss >> 1) && atomic_read(&sk->packets_out))
-	{
+	if (!sk->nonagle && len < (sk->mss >> 1) && tp->packets_out)
 		nagle_check = 0;
-	}
 
-	return (nagle_check && atomic_read(&sk->packets_out) < tp->snd_cwnd &&
+	return (nagle_check && tp->packets_out < tp->snd_cwnd &&
 		!after(skb->end_seq, tp->snd_una + tp->snd_wnd) &&
-		atomic_read(&sk->retransmits) == 0);
+		tp->retransmits == 0);
+}
+
+static __inline__ void tcp_build_options(__u32 *ptr, struct tcp_opt *tp)
+{
+	/* FIXME: We will still need to do SACK here. */
+	if (tp->tstamp_ok) {
+		*ptr++ = ntohl((TCPOPT_NOP << 24)
+			| (TCPOPT_NOP << 16)
+                        | (TCPOPT_TIMESTAMP << 8)
+			| TCPOLEN_TIMESTAMP);
+		/* WARNING: If HZ is ever larger than 1000 on some system,
+	 	 * then we will be violating RFC1323 here because our timestamps
+	 	 * will be moving too fast.
+		 * FIXME: code TCP so it uses at most ~ 1000 ticks a second?
+		 * (I notice alpha is 1024 ticks now). -- erics
+	 	 */
+		*ptr++ = htonl(jiffies);
+		*ptr = htonl(tp->ts_recent);
+	}
+}
+
+static __inline__ void tcp_update_options(__u32 *ptr, struct tcp_opt *tp)
+{
+	/* FIXME: We will still need to do SACK here. */
+	if (tp->tstamp_ok) {
+		*++ptr = htonl(jiffies);
+		*++ptr = htonl(tp->ts_recent);
+	}
 }
 
 /*
@@ -100,75 +123,56 @@
 	struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
 	int size;
 
-	/*
-	 *	length of packet (not counting length of pre-tcp headers) 
-	 */
-
+	/* Length of packet (not counting length of pre-tcp headers). */
 	size = skb->len - ((unsigned char *) th - skb->data);
 
-	/*
-	 *	Sanity check it..
-	 */
-
-	if (size < sizeof(struct tcphdr) || size > skb->len) 
-	{
-		printk(KERN_DEBUG "tcp_send_skb: bad skb (skb = %p, data = %p, th = %p, len = %u)\n",
-			skb, skb->data, th, skb->len);
+	/* Sanity check it.. */
+	if (size < sizeof(struct tcphdr) || size > skb->len) {
+		printk(KERN_DEBUG "tcp_send_skb: bad skb "
+		       "(skb = %p, data = %p, th = %p, len = %u)\n",
+		       skb, skb->data, th, skb->len);
 		kfree_skb(skb, FREE_WRITE);
 		return 0;
 	}
 
-	/*
-	 *	If we have queued a header size packet.. (these crash a few
-	 *	tcp stacks if ack is not set)
-	 */
-
-	if (size == sizeof(struct tcphdr))
-	{
-		/* 
-                 * If it's got a syn or fin discard
-                 */
-		if(!th->syn && !th->fin) 
-		{
+	/* If we have queued a header size packet.. (these crash a few
+	 * tcp stacks if ack is not set)
+	 * FIXME: What is the equivalent below when we have options?
+	 */
+	if (size == sizeof(struct tcphdr)) {
+		/* If it's got a syn or fin discard. */
+		if(!th->syn && !th->fin) {
 			printk(KERN_DEBUG "tcp_send_skb: attempt to queue a bogon.\n");
 			kfree_skb(skb,FREE_WRITE);
 			return 0;
 		}
 	}
 
-	/*
-	 *	Actual processing.
-	 */
+	/* Actual processing. */
 	skb->seq = ntohl(th->seq);
 	skb->end_seq = skb->seq + size - 4*th->doff;
 
 	skb_queue_tail(&sk->write_queue, skb);
 
-	if (tp->send_head == NULL && tcp_snd_test(sk, skb))
-	{
+	if (tp->send_head == NULL && tcp_snd_test(sk, skb)) {
 		struct sk_buff * buff;
 
-		/*
-		 *	This is going straight out
-		 */
-
-		th->ack_seq = htonl(tp->rcv_nxt);
+		/* This is going straight out. */
+		tp->last_ack_sent = th->ack_seq = htonl(tp->rcv_nxt);
 		th->window = htons(tcp_select_window(sk));
+		tcp_update_options((__u32 *)(th+1),tp);
 
 		tp->af_specific->send_check(sk, th, size, skb);
 
 		buff = skb_clone(skb, GFP_KERNEL);
-
 		if (buff == NULL)
-		{
 			goto queue;
-		}
 		
 		clear_delayed_acks(sk);
 		skb_set_owner_w(buff, sk);
 
 		tp->snd_nxt = skb->end_seq;
-		atomic_inc(&sk->packets_out);
+		tp->packets_out++;
 
 		skb->when = jiffies;
 
@@ -182,19 +186,13 @@
 	}
 
 queue:
-	/* 
-	 *	Remember where we must start sending
-	 */
-
+	/* Remember where we must start sending. */
 	if (tp->send_head == NULL)
 		tp->send_head = skb;
-
-	if (atomic_read(&sk->packets_out) == 0 && !tp->pending)
-	{
+	if (tp->packets_out == 0 && !tp->pending) {
 		tp->pending = TIME_PROBE0;
 		tcp_reset_xmit_timer(sk, TIME_PROBE0, tp->rto);
 	}
-
 	return 0;
 }
 
@@ -215,86 +213,61 @@
 
 	th = skb->h.th;
 
-	/* size of new segment */
-	nsize = skb->tail - ((unsigned char *) (th + 1)) - len;
-
-	if (nsize <= 0)
-	{
+	/* Size of new segment. */
+	nsize = skb->tail - ((unsigned char *)(th)+tp->tcp_header_len) - len;
+	if (nsize <= 0) {
 		printk(KERN_DEBUG "tcp_fragment: bug size <= 0\n");
 		return -1;
 	}
 
-	/*
-	 *	Get a new skb... force flag on
-	 */
+	/* Get a new skb... force flag on. */
 	buff = sock_wmalloc(sk, nsize + 128 + sk->prot->max_header + 15, 1, 
 			    GFP_ATOMIC);
-
 	if (buff == NULL)
 		return -1;
 
-	/*
-	 *	Put headers on the new packet
-	 */
-
+	/* Put headers on the new packet. */
 	tmp = tp->af_specific->build_net_header(sk, buff);
-
-	if (tmp < 0)
-	{
+	if (tmp < 0) {
 		kfree_skb(buff, FREE_WRITE);
 		return -1;
 	}
 		
-	/*
-	 *	Move the TCP header over
-	 */
-	
-	nth = (struct tcphdr *) skb_put(buff, sizeof(*th));
-
+	/* Move the TCP header over. */
+	nth = (struct tcphdr *) skb_put(buff, tp->tcp_header_len);
 	buff->h.th = nth;
+	memcpy(nth, th, tp->tcp_header_len);
+
+	/* FIXME: Make sure this gets tcp options right. */
 	
-	memcpy(nth, th, sizeof(*th));
-	
-	/*
-	 *	Correct the new header
-	 */
-	
+	/* Correct the new header. */
 	buff->seq = skb->seq + len;
 	buff->end_seq = skb->end_seq;
 	nth->seq = htonl(buff->seq);
 	nth->check = 0;
-	nth->doff  = 5; 
+	nth->doff  = th->doff;
 	
 	/* urg data is always an headache */
-	if (th->urg)
-	{
-		if (th->urg_ptr > len)
-		{
+	if (th->urg) {
+		if (th->urg_ptr > len) {
 			th->urg = 0;
 			nth->urg_ptr -= len;
-		}
-		else
-		{
+		} else {
 			nth->urg = 0;
 		}
 	}
 
-	/*
-	 *	Copy TCP options and data start to our new buffer
-	 */
-	
-	buff->csum = csum_partial_copy(((u8 *)(th + 1)) + len,
+	/* Copy data tail to our new buffer. */
+	buff->csum = csum_partial_copy(((u8 *)(th)+tp->tcp_header_len) + len,
 				       skb_put(buff, nsize),
 				       nsize, 0);
-       
 
 	skb->end_seq -= nsize;
-
 	skb_trim(skb, skb->len - nsize);
 
-	/* remember to checksum this packet afterwards */
+	/* Remember to checksum this packet afterwards. */
 	th->check = 0;
-	skb->csum = csum_partial((u8*) (th + 1), skb->tail - ((u8 *) (th + 1)),
+	skb->csum = csum_partial((u8*)(th) + tp->tcp_header_len, skb->tail - ((u8 *) (th)+tp->tcp_header_len),
 				 0);
 
 	skb_append(skb, buff);
@@ -304,12 +277,10 @@
 
 static void tcp_wrxmit_prob(struct sock *sk, struct sk_buff *skb)
 {
-	/*
-	 *	This is acked data. We can discard it. This 
-	 *	cannot currently occur.
-	 */
+	struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
 
-	atomic_set(&sk->retransmits, 0);
+	/* This is acked data. We can discard it. This cannot currently occur. */
+	tp->retransmits = 0;
 
 	printk(KERN_DEBUG "tcp_write_xmit: bug skb in write queue\n");
 
@@ -329,17 +300,13 @@
 	SOCK_DEBUG(sk, "tcp_write_xmit: frag needed size=%d mss=%d\n",
 		   size, sk->mss);
 
-	if (tcp_fragment(sk, skb, sk->mss))
-	{
+	if (tcp_fragment(sk, skb, sk->mss)) {
 		/* !tcp_frament Failed! */
 		tp->send_head = skb;
-		atomic_dec(&sk->packets_out);
+		tp->packets_out--;
 		return -1;
-	}
-	else
-	{
-		/* 
-		 * If tcp_fragment succeded then
+	} else {
+		/* If tcp_fragment succeded then
 		 * the send head is the resulting
 		 * fragment
 		 */
@@ -357,69 +324,52 @@
 void tcp_write_xmit(struct sock *sk)
 {
 	struct sk_buff *skb;
-	struct tcp_opt *tp = &sk->tp_pinfo.af_tcp;
+	struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
 	u16 rcv_wnd;
 	int sent_pkts = 0;
 
-	/*
-	 *	The bytes will have to remain here. In time closedown will
-	 *	empty the write queue and all will be happy
+	/* The bytes will have to remain here. In time closedown will
+	 * empty the write queue and all will be happy.
 	 */
-
 	if(sk->zapped)
 		return;
 
-	/*
-	 *	Anything on the transmit queue that fits the window can
+	/*	Anything on the transmit queue that fits the window can
 	 *	be added providing we are:
 	 *
 	 *	a) following SWS avoidance [and Nagle algorithm]
 	 *	b) not exceeding our congestion window.
 	 *	c) not retransmiting [Nagle]
 	 */
-
 	rcv_wnd = htons(tcp_select_window(sk));
-
-	while((skb = tp->send_head) && tcp_snd_test(sk, skb))
-	{
+	while((skb = tp->send_head) && tcp_snd_test(sk, skb)) {
 		struct tcphdr *th;
 		struct sk_buff *buff;
 		int size;
 
-		IS_SKB(skb);
-
-		/*
-		 *	See if we really need to send the packet.
-		 *	(debugging code)
-		 */
-
-		if (!after(skb->end_seq, tp->snd_una))
-		{
+		/* See if we really need to send the packet. (debugging code) */
+		if (!after(skb->end_seq, tp->snd_una)) {
 			tcp_wrxmit_prob(sk, skb);
 			continue;
-		} 
-
+		}
 
-		/*
-		 *	Put in the ack seq and window at this point rather
+		/*	Put in the ack seq and window at this point rather
 		 *	than earlier, in order to keep them monotonic.
 		 *	We really want to avoid taking back window allocations.
 		 *	That's legal, but RFC1122 says it's frowned on.
 		 *	Ack and window will in general have changed since
 		 *	this packet was put on the write queue.
 		 */
-
 		th = skb->h.th;
 		size = skb->len - (((unsigned char *) th) - skb->data);
-
-		if (size - (th->doff << 2) > sk->mss)
-		{
+		if (size - (th->doff << 2) > sk->mss) {
 			if (tcp_wrxmit_frag(sk, skb, size))
 				break;
 		}
 
-		th->ack_seq = htonl(tp->rcv_nxt);
+		tp->last_ack_sent = th->ack_seq = htonl(tp->rcv_nxt);
 		th->window = rcv_wnd;
+		tcp_update_options((__u32 *)(th+1),tp);
 
 		tp->af_specific->send_check(sk, th, size, skb);
 
@@ -430,18 +380,14 @@
 #endif
 
 		buff = skb_clone(skb, GFP_ATOMIC);
-
 		if (buff == NULL)
 			break;
 
-		/*
-		 *	Advance the send_head. This one is going out.
-		 */
-
+		/* Advance the send_head.  This one is going out. */
 		update_send_head(sk);
 		clear_delayed_acks(sk);
 
-		atomic_inc(&sk->packets_out);
+		tp->packets_out++;
 		skb_set_owner_w(buff, sk);
 
 		tp->snd_nxt = skb->end_seq;
@@ -450,13 +396,10 @@
 
 		sent_pkts = 1;
 		tp->af_specific->queue_xmit(buff);
-
 	}
 
 	if (sent_pkts && !tcp_timer_is_set(sk, TIME_RETRANS))
-	{
 		tcp_reset_xmit_timer(sk, TIME_RETRANS, tp->rto);
-	}
 }
 
 
@@ -469,33 +412,25 @@
  *	2. We limit memory per socket
  */
 
-
 unsigned short tcp_select_window(struct sock *sk)
 {
 	struct tcp_opt *tp = &sk->tp_pinfo.af_tcp;
 	int mss = sk->mss;
 	long free_space = sock_rspace(sk);
-	long window;
-	long cur_win;
-	long usable;
+	long window, cur_win, usable;
 
-	
-	if (sk->window_clamp)
-	{
-		free_space = min(sk->window_clamp, free_space);
-		mss = min(sk->window_clamp, mss);
+	if (tp->window_clamp) {
+		free_space = min(tp->window_clamp, free_space);
+		mss = min(tp->window_clamp, mss);
 	}
 	
-	/*
-	 * compute the actual window i.e.
+	/* compute the actual window i.e.
 	 * old_window - received_bytes_on_that_win
 	 */
-
 	cur_win = tp->rcv_wup - (tp->rcv_nxt - tp->rcv_wnd);
 	window  = tp->rcv_wnd;
 	
-	if ( cur_win < 0 )
-	{
+	if (cur_win < 0) {
 		cur_win = 0;
 		printk(KERN_DEBUG "TSW: win < 0 w=%d 1=%u 2=%u\n",
 		       tp->rcv_wnd, tp->rcv_nxt, tp->rcv_wup);
@@ -511,49 +446,33 @@
 	 * it MSS bytes
 	 */
 
-	/*
-	 * It would be a good idea if it didn't break header prediction.
+	/* It would be a good idea if it didn't break header prediction.
 	 * and BSD made the header predition standard...
 	 * It expects the same value in the header i.e. th->window to be
 	 * constant
 	 */
-
 	usable = free_space - cur_win;
 	if (usable < 0)
-	{
 		usable = 0;
-	}
 
-	if ( window <  usable )
-	{
-		/*
-		 *	Window is not blocking the sender
+	if (window < usable) {
+		/*	Window is not blocking the sender
 		 *	and we have enought free space for it
 		 */
-
 		if (cur_win > (sk->mss << 1))
 			goto out;
 	}
-
        	
-	if (window >= usable)
-	{
-		/*
-		 *	We are offering too much, cut it down... 
+	if (window >= usable) {
+		/*	We are offering too much, cut it down... 
 		 *	but don't shrink the window
 		 */
-		
 		window = max(usable, cur_win);
-	}
-	else
-	{	
+	} else {
 		if ((usable - window) >= mss)
-		{
 			window += mss;
-		}
 	}
-
-  out:
+out:
 	tp->rcv_wnd = window;
 	tp->rcv_wup = tp->rcv_nxt;
 	return window;
@@ -561,6 +480,7 @@
 
 static int tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *skb)
 {
+	struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
 	struct tcphdr *th1, *th2;
 	int size1, size2, avail;
 	struct sk_buff *buff = skb->next;
@@ -572,54 +492,35 @@
 
 	avail = skb_tailroom(skb);
 
-	/*
-	 *  size of tcp payload
-	 */
-
-	size1 = skb->tail - (u8 *) (th1 + 1);
+	/* Size of TCP payload. */
+	size1 = skb->tail - ((u8 *) (th1)+(th1->doff<<2));
 	
 	th2 = buff->h.th;
-
-	size2 = buff->tail - (u8 *) (th2 + 1); 
+	size2 = buff->tail - ((u8 *) (th2)+(th2->doff<<2)); 
 
 	if (size2 > avail || size1 + size2 > sk->mss )
 		return -1;
 
-	/*
-	 *  ok. we will be able to collapse the packet
-	 */
-
+	/* Ok.  We will be able to collapse the packet. */
 	skb_unlink(buff);
-
 	memcpy(skb_put(skb, size2), ((char *) th2) + (th2->doff << 2), size2);
 	
-	/*
-	 * update sizes on original skb. both TCP and IP
-	 */
- 
+	/* Update sizes on original skb, both TCP and IP. */
 	skb->end_seq += size2;
-
-	if (th2->urg)
-	{
+	if (th2->urg) {
 		th1->urg = 1;
 		th1->urg_ptr = th2->urg_ptr + size1;
 	}
 
-	/*
-	 * ... and off you go.
-	 */
-
+	/* ... and off you go. */
 	kfree_skb(buff, FREE_WRITE);
-	atomic_dec(&sk->packets_out);
+	tp->packets_out--;
 
-	/* 
-	 *	Header checksum will be set by the retransmit procedure
-	 *	after calling rebuild header
+	/* Header checksum will be set by the retransmit procedure
+	 * after calling rebuild header.
 	 */
-
 	th1->check = 0;
-	skb->csum = csum_partial((u8*) (th1+1), size1 + size2, 0);
-
+	skb->csum = csum_partial((u8*)(th1)+(th1->doff<<2), size1 + size2, 0);
 	return 0;
 }
 
@@ -643,17 +544,13 @@
 	if (tp->retrans_head == tp->send_head)
 		tp->retrans_head = NULL;
 	
-	while ((skb = tp->retrans_head) != NULL)
-	{
+	while ((skb = tp->retrans_head) != NULL) {
 		struct sk_buff *buff;
 		struct tcphdr *th;
 		int tcp_size;
 		int size;
 
-		IS_SKB(skb);
-		
-		/*
-		 * In general it's OK just to use the old packet.  However we
+		/* In general it's OK just to use the old packet.  However we
 		 * need to use the current ack and window fields.  Urg and
 		 * urg_ptr could possibly stand to be updated as well, but we
 		 * don't keep the necessary data.  That shouldn't be a problem,
@@ -663,28 +560,23 @@
 
 		th = skb->h.th;
 
-		tcp_size = skb->tail - ((unsigned char *) (th + 1));
+		tcp_size = skb->tail - ((unsigned char *)(th)+tp->tcp_header_len);
 
-		if (tcp_size > sk->mss)
-		{
-			if (tcp_fragment(sk, skb, sk->mss))
-			{
+		if (tcp_size > sk->mss) {
+			if (tcp_fragment(sk, skb, sk->mss)) {
 				printk(KERN_DEBUG "tcp_fragment failed\n");
 				return;
 			}
-			atomic_inc(&sk->packets_out);
+			tp->packets_out++;
 		}
 
 		if (!th->syn &&
 		    tcp_size < (sk->mss >> 1) &&
 		    skb->next != tp->send_head &&
 		    skb->next != (struct sk_buff *)&sk->write_queue)
-		{
 			tcp_retrans_try_collapse(sk, skb);
-		}	       		
 
-		if (tp->af_specific->rebuild_header(sk, skb)) 
-		{
+		if (tp->af_specific->rebuild_header(sk, skb)) {
 #ifdef TCP_DEBUG
 			printk(KERN_DEBUG "tcp_do_rebuild_header failed\n");
 #endif
@@ -693,12 +585,10 @@
 
 		SOCK_DEBUG(sk, "retransmit sending\n");
 
-		/*
-		 *	update ack and window
-		 */
-
-		th->ack_seq = htonl(tp->rcv_nxt);
+		/* Update ack and window. */
+		tp->last_ack_sent = th->ack_seq = htonl(tp->rcv_nxt);
 		th->window = ntohs(tcp_select_window(sk));
+		tcp_update_options((__u32 *)(th+1),tp);
 
 		size = skb->tail - (unsigned char *) th;
 		tp->af_specific->send_check(sk, th, size, skb);
@@ -706,49 +596,32 @@
 		skb->when = jiffies;
 
 		buff = skb_clone(skb, GFP_ATOMIC);
-
 		if (buff == NULL)
 			break;
+
 		skb_set_owner_w(buff, sk);
 
 		clear_delayed_acks(sk);
-
 		tp->af_specific->queue_xmit(buff);
 		
-		/*
-		 *	Count retransmissions
-		 */
-		 
+		/* Count retransmissions. */
 		ct++;
-		sk->prot->retransmits++; /* ???: atomic_t necessary here? -DaveM */
+		sk->prot->retransmits++;
 		tcp_statistics.TcpRetransSegs++;
 
-		tp->high_seq = tp->snd_nxt;
-
-		/*
-		 *	Only one retransmit requested.
-		 */
-	
+		/* Only one retransmit requested. */
 		if (!all)
 			break;
 
-		/*
-		 *	This should cut it off before we send too many packets.
-		 */
-
+		/* This should cut it off before we send too many packets. */
 		if (ct >= tp->snd_cwnd)
 			break;
 
-		/*
-		 *	Advance the pointer
-		 */
-		
+		/* Advance the pointer. */
 		tp->retrans_head = skb->next;
 		if ((tp->retrans_head == tp->send_head) ||
 		    (tp->retrans_head == (struct sk_buff *) &sk->write_queue))
-		{
 			tp->retrans_head = NULL;
-		}
 	}
 }
 
@@ -764,53 +637,40 @@
 	struct sk_buff *buff;
 	int tmp;
 	
-		
-	buff = sock_wmalloc(sk, MAX_RESET_SIZE, 1, GFP_KERNEL);
-
-	if (buff == NULL)
-	{
-		/* This is a disaster if it occurs */
+	buff = sock_wmalloc(sk, BASE_ACK_SIZE + tp->tcp_header_len, 1, GFP_KERNEL);
+	if (buff == NULL) {
+		/* FIXME: This is a disaster if it occurs. */
 		printk(KERN_INFO "tcp_send_fin: Impossible malloc failure");
 		return;
 	}
 
-	/*
-	 *	Administrivia
-	 */
-	 
+	/* Administrivia. */
 	buff->csum = 0;
 
-	/*
-	 *	Put in the IP header and routing stuff. 
-	 */
-
+	/* Put in the IP header and routing stuff. */
 	tmp = tp->af_specific->build_net_header(sk, buff);
-
-	if (tmp < 0) 
-	{
+	if (tmp < 0) {
 		int t;
-  		/*
-  		 *	Finish anyway, treat this as a send that got lost. 
-  		 *	(Not good).
+
+  		/* FIXME: We must not throw this out. Eventually we must
+                 * put a FIN into the queue, otherwise it never gets queued.
   		 */
-  		 
 		kfree_skb(buff, FREE_WRITE);
 		sk->write_seq++;
-		t=del_timer(&sk->timer);
-		if(t)
+		t = del_timer(&sk->timer);
+		if (t)
 			add_timer(&sk->timer);
 		else
 			tcp_reset_msl_timer(sk, TIME_CLOSE, TCP_TIMEWAIT_LEN);
 		return;
 	}
 	
-	/*
-	 *	We ought to check if the end of the queue is a buffer and
-	 *	if so simply add the fin to that buffer, not send it ahead.
+	/* We ought to check if the end of the queue is a buffer and
+	 * if so simply add the fin to that buffer, not send it ahead.
 	 */
-
-	t1 =(struct tcphdr *)skb_put(buff,sizeof(struct tcphdr));
+	t1 =(struct tcphdr *)skb_put(buff,tp->tcp_header_len);
 	buff->h.th =  t1;
+	tcp_build_options((__u32 *)(t1+1),tp);
 
 	memcpy(t1, th, sizeof(*t1));
 	buff->seq = sk->write_seq;
@@ -821,26 +681,19 @@
 	t1->window = htons(tcp_select_window(sk));
 	t1->fin = 1;
 
-	tp->af_specific->send_check(sk, t1, sizeof(*t1), buff);
+	tp->af_specific->send_check(sk, t1, tp->tcp_header_len, buff);
 
-	/*
-	 *	The fin can only be transmited after the data.
- 	 */
- 	
+	/* The fin can only be transmited after the data. */
 	skb_queue_tail(&sk->write_queue, buff);
-
- 	if (tp->send_head == NULL)
-	{
+ 	if (tp->send_head == NULL) {
 		struct sk_buff *skb1;
 
-		atomic_inc(&sk->packets_out);
+		tp->packets_out++;
 		tp->snd_nxt = sk->write_seq;
 		buff->when = jiffies;
 
 		skb1 = skb_clone(buff, GFP_KERNEL);
-
-		if (skb1)
-		{
+		if (skb1) {
 			skb_set_owner_w(skb1, sk);
 			tp->af_specific->queue_xmit(skb1);
 		}
@@ -856,20 +709,14 @@
 	struct sk_buff * skb;	
 	struct sk_buff * buff;
 	struct tcphdr *th;
-	unsigned char *ptr;
 	int tmp;
 	
 	skb = sock_wmalloc(sk, MAX_SYN_SIZE, 1, GFP_ATOMIC);
-
 	if (skb == NULL) 
-	{
 		return -ENOMEM;
-	}
 
 	tmp = tp->af_specific->build_net_header(sk, skb);
-	
-	if (tmp < 0)
-	{
+	if (tmp < 0) {
 		kfree_skb(skb, FREE_WRITE);
 		return tmp;
 	}
@@ -890,27 +737,23 @@
 
 	th->window = ntohs(tp->rcv_wnd);
 
-	th->ack_seq = htonl(tp->rcv_nxt);
-	th->doff = sizeof(*th)/4 + 1;
+	tp->last_ack_sent = th->ack_seq = htonl(tp->rcv_nxt);
 
-	ptr = skb_put(skb, TCPOLEN_MSS);
-	ptr[0] = TCPOPT_MSS;
-	ptr[1] = TCPOLEN_MSS;
-	ptr[2] = ((sk->mss) >> 8) & 0xff;
-	ptr[3] = (sk->mss) & 0xff;
-	skb->csum = csum_partial(ptr, TCPOLEN_MSS, 0);
+	tmp = tcp_syn_build_options(skb, sk->mss,
+		tp->sack_ok, tp->tstamp_ok,
+		tp->snd_wscale?tp->rcv_wscale:0);
+	skb->csum = 0;
+	th->doff = (sizeof(*th) + tmp)>>2;
 
-	tp->af_specific->send_check(sk, th, sizeof(*th)+4, skb);
+	tp->af_specific->send_check(sk, th, sizeof(*th)+tmp, skb);
 
 	skb_queue_tail(&sk->write_queue, skb);
 	
 	buff = skb_clone(skb, GFP_ATOMIC);
-
-	if (buff)
-	{
+	if (buff) {
 		skb_set_owner_w(buff, sk);
 
-		atomic_inc(&sk->packets_out);
+		tp->packets_out++;
 		skb->when = jiffies;
 
 		tp->af_specific->queue_xmit(buff);
@@ -935,22 +778,19 @@
 	struct tcp_opt *tp = &sk->tp_pinfo.af_tcp;
 	unsigned long timeout, now;
 
-	/* Calculate new timeout */
+	/* Calculate new timeout. */
 	now = jiffies;
 	timeout = tp->ato;
 
-	if (timeout > max_timeout || sk->bytes_rcv > (sk->mss << 2))
-	{
+	if (timeout > max_timeout ||
+	    ((tp->rcv_nxt - tp->rcv_wup) > (sk->mss << 2)))
 		timeout = now;
-	}
 	else
 		timeout += now;
 
-	/* Use new timeout only if there wasn't a older one earlier  */
+	/* Use new timeout only if there wasn't a older one earlier. */
 	if (!del_timer(&tp->delack_timer) || timeout < tp->delack_timer.expires)
-	{
 		tp->delack_timer.expires = timeout;
-	}
 
 	add_timer(&tp->delack_timer);
 }
@@ -968,75 +808,53 @@
 	struct tcphdr *th;
 	int tmp;
 
-	
 	if(sk->zapped)
-	{
-		/* We have been reset, we may not send again */
-		return;		
-	}
+		return;	/* We have been reset, we may not send again. */
 
-	/*
-	 * We need to grab some memory, and put together an ack,
+	/* We need to grab some memory, and put together an ack,
 	 * and then put it into the queue to be sent.
+	 * FIXME: is it better to waste memory here and use a
+	 * constant sized ACK?
 	 */
-
-	buff = sock_wmalloc(sk, MAX_ACK_SIZE, 1, GFP_ATOMIC);
-	if (buff == NULL) 
-	{
-		/* 
-		 *	Force it to send an ack. We don't have to do this
-		 *	(ACK is unreliable) but it's much better use of 
+	buff = sock_wmalloc(sk, BASE_ACK_SIZE + tp->tcp_header_len, 1, GFP_ATOMIC);
+	if (buff == NULL) {
+		/*	Force it to send an ack. We don't have to do this
+		 *	(ACK is unreliable) but it's much better use of
 		 *	bandwidth on slow links to send a spare ack than
-		 *	resend packets. 
+		 *	resend packets.
 		 */
-		 
 		tcp_send_delayed_ack(sk, HZ/2);
 		return;
 	}
 
 	clear_delayed_acks(sk);
 
-	/*
-	 *	Assemble a suitable TCP frame
-	 */
-	 
+	/* Assemble a suitable TCP frame. */
 	buff->csum = 0;
 
-	/* 
-	 *	Put in the IP header and routing stuff. 
-	 */
-	 
+	/* Put in the IP header and routing stuff. */
 	tmp = tp->af_specific->build_net_header(sk, buff);
-
-	if (tmp < 0) 
-	{
+	if (tmp < 0) {
 		kfree_skb(buff, FREE_WRITE);
 		return;
 	}
 
-	th =(struct tcphdr *)skb_put(buff,sizeof(struct tcphdr));
-
+	th = (struct tcphdr *)skb_put(buff,tp->tcp_header_len);
 	memcpy(th, &sk->dummy_th, sizeof(struct tcphdr));
+	tcp_build_options((__u32 *)(th+1),tp);
 
-	/*
-	 *	Swap the send and the receive. 
-	 */
-	 
+	/* Swap the send and the receive. */
 	th->window	= ntohs(tcp_select_window(sk));
 	th->seq		= ntohl(tp->snd_nxt);
-	th->ack_seq	= ntohl(tp->rcv_nxt);
-
-  	/*
-  	 *	Fill in the packet and send it
-  	 */
+	tp->last_ack_sent = th->ack_seq	= ntohl(tp->rcv_nxt);
 
-	tp->af_specific->send_check(sk, th, sizeof(struct tcphdr), buff);
+  	/* Fill in the packet and send it. */
+	tp->af_specific->send_check(sk, th, tp->tcp_header_len, buff);
 
 	SOCK_DEBUG(sk, "\rtcp_send_ack: seq %x ack %x\n",
 		   tp->snd_nxt, tp->rcv_nxt);
 
 	tp->af_specific->queue_xmit(buff);
-
   	tcp_statistics.TcpOutSegs++;
 }
 
@@ -1053,61 +871,44 @@
 	int tmp;
 
 	if (sk->zapped)
-		return;	/* After a valid reset we can send no more */
+		return;	/* After a valid reset we can send no more. */
 
-	/*
-	 *	Write data can still be transmitted/retransmitted in the
+	/*	Write data can still be transmitted/retransmitted in the
 	 *	following states.  If any other state is encountered, return.
 	 *	[listen/close will never occur here anyway]
 	 */
-
 	if (sk->state != TCP_ESTABLISHED && 
 	    sk->state != TCP_CLOSE_WAIT &&
 	    sk->state != TCP_FIN_WAIT1 && 
 	    sk->state != TCP_LAST_ACK &&
-	    sk->state != TCP_CLOSING
-	) 
-	{
+	    sk->state != TCP_CLOSING)
 		return;
-	}
-
-	if (before(tp->snd_nxt, tp->snd_una + tp->snd_wnd) && 
-	    (skb=tp->send_head))
-	{
-		/*
-	    	 * We are probing the opening of a window
-	    	 * but the window size is != 0
-	    	 * must have been a result SWS avoidance ( sender )
-	    	 */
 
+	if (before(tp->snd_nxt, tp->snd_una + tp->snd_wnd) && (skb=tp->send_head)) {
 		struct tcphdr *th;
 		unsigned long win_size;
 
+		/* We are probing the opening of a window
+	    	 * but the window size is != 0
+	    	 * must have been a result SWS avoidance ( sender )
+	    	 */
 		win_size = tp->snd_wnd - (tp->snd_nxt - tp->snd_una);
-
-		if (win_size < skb->end_seq - skb->seq)
-		{
-			if (tcp_fragment(sk, skb, win_size))
-			{
+		if (win_size < skb->end_seq - skb->seq) {
+			if (tcp_fragment(sk, skb, win_size)) {
 				printk(KERN_DEBUG "tcp_write_wakeup: "
 				       "fragment failed\n");
 				return;
 			}
 		}
 
-			    	
 		th = skb->h.th;
-		
-		tp->af_specific->send_check(sk, th, th->doff * 4 + win_size, 
-					    skb);
-
+		tp->af_specific->send_check(sk, th, th->doff * 4 + win_size, skb);
 		buff = skb_clone(skb, GFP_ATOMIC);
 		if (buff == NULL)
-		{
 			return;
-		}
+
 		skb_set_owner_w(buff, sk);
-		atomic_inc(&sk->packets_out);
+		tp->packets_out++;
 
 		clear_delayed_acks(sk);
 
@@ -1115,36 +916,29 @@
 			tcp_reset_xmit_timer(sk, TIME_RETRANS, tp->rto);
 
 		skb->when = jiffies;
-
 		update_send_head(sk);
-
 		tp->snd_nxt = skb->end_seq;
-	}
-	else
-	{	
-		buff = sock_wmalloc(sk,MAX_ACK_SIZE, 1, GFP_ATOMIC);
+	} else {
+		buff = sock_wmalloc(sk, MAX_ACK_SIZE, 1, GFP_ATOMIC);
 		if (buff == NULL) 
 			return;
 
 		buff->csum = 0;
 
-		/*
-		 *	Put in the IP header and routing stuff. 
-		 */
-
+		/* Put in the IP header and routing stuff. */
 		tmp = tp->af_specific->build_net_header(sk, buff);
-
-		if (tmp < 0)
-		{
+		if (tmp < 0) {
 			kfree_skb(buff, FREE_WRITE);
 			return;
 		}
 
 		t1 = (struct tcphdr *) skb_put(buff, sizeof(struct tcphdr));
 		memcpy(t1,(void *) &sk->dummy_th, sizeof(*t1));
+		/* FIXME: should zero window probes have SACK and/or TIMESTAMP data?
+		 * If so we have to tack them on here.
+		 */
 
-		/*
-		 *	Use a previous sequence.
+		/*	Use a previous sequence.
 		 *	This should cause the other end to send an ack.
 		 */
 	 
@@ -1153,13 +947,13 @@
 		t1->ack_seq = htonl(tp->rcv_nxt);
 		t1->window = htons(tcp_select_window(sk));
 
+		/* Value from dummy_th may be larger. */
+		t1->doff = sizeof(struct tcphdr)/4;
+
 		tp->af_specific->send_check(sk, t1, sizeof(*t1), buff);
 	}
 
-	/*
-	 *	Send it.
-	 */
-
+	/* Send it. */
 	tp->af_specific->queue_xmit(buff);
 	tcp_statistics.TcpOutSegs++;
 }
@@ -1175,16 +969,12 @@
 	struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
 
 	if (sk->zapped)
-		return;		/* After a valid reset we can send no more */
-
+		return; /* After a valid reset we can send no more. */
 
 	tcp_write_wakeup(sk);
-
 	tp->pending = TIME_PROBE0;
-
 	tp->backoff++;
 	tp->probes_out++;
-
 	tcp_reset_xmit_timer (sk, TIME_PROBE0, 
 			      min(tp->rto << tp->backoff, 120*HZ));
 }

FUNET's LINUX-ADM group, linux-adm@nic.funet.fi
TCL-scripts by Sam Shen, slshen@lbl.gov