patch-1.3.62 linux/net/ipv4/tcp.c

Next file: linux/net/ipv4/tcp_input.c
Previous file: linux/net/ipv4/af_inet.c
Back to the patch index
Back to the overall index

diff -u --recursive --new-file v1.3.61/linux/net/ipv4/tcp.c linux/net/ipv4/tcp.c
@@ -33,7 +33,7 @@
  *					wakes people on errors. select 
  *					behaves and the icmp error race
  *					has gone by moving it into sock.c
- *		Alan Cox	:	tcp_reset() fixed to work for 
+ *		Alan Cox	:	tcp_send_reset() fixed to work for 
  *					everything not just packets for 
  *					unknown sockets.
  *		Alan Cox	:	tcp option processing.
@@ -410,206 +410,25 @@
  * (Whew. -- MS 950903)
  **/
 
-#include <linux/types.h>
-#include <linux/sched.h>
-#include <linux/mm.h>
-#include <linux/time.h>
-#include <linux/string.h>
 #include <linux/config.h>
-#include <linux/socket.h>
-#include <linux/sockios.h>
-#include <linux/termios.h>
-#include <linux/in.h>
+#include <linux/types.h>
 #include <linux/fcntl.h>
-#include <linux/inet.h>
-#include <linux/netdevice.h>
-#include <net/snmp.h>
-#include <net/ip.h>
-#include <net/protocol.h>
+
 #include <net/icmp.h>
 #include <net/tcp.h>
-#include <net/arp.h>
-#include <linux/skbuff.h>
-#include <net/sock.h>
-#include <net/route.h>
-#include <linux/errno.h>
-#include <linux/timer.h>
-#include <asm/system.h>
-#include <asm/segment.h>
-#include <linux/mm.h>
-#include <net/checksum.h>
 
-/*
- *	The MSL timer is the 'normal' timer.
- */
- 
-#define reset_msl_timer(x,y,z)	reset_timer(x,y,z)
+#include <asm/segment.h>
 
-#define SEQ_TICK 3
 unsigned long seq_offset;
 struct tcp_mib	tcp_statistics;
 
-/*
- *	Cached last hit socket
- */
- 
-volatile unsigned long 	th_cache_saddr,th_cache_daddr;
-volatile unsigned short  th_cache_dport, th_cache_sport;
-volatile struct sock *th_cache_sk;
-
-void tcp_cache_zap(void)
-{
-	unsigned long flags;
-	save_flags(flags);
-	cli();
-	th_cache_saddr=0;
-	th_cache_daddr=0;
-	th_cache_dport=0;
-	th_cache_sport=0;
-	th_cache_sk=NULL;
-	restore_flags(flags);
-}
-
 static void tcp_close(struct sock *sk, int timeout);
-static void tcp_read_wakeup(struct sock *sk);
 
 /*
  *	The less said about this the better, but it works and will do for 1.2  (and 1.4 ;))
  */
 
-static struct wait_queue *master_select_wakeup;
-
-static __inline__ int min(unsigned int a, unsigned int b)
-{
-	if (a < b) 
-		return(a);
-	return(b);
-}
-
-#undef STATE_TRACE
-
-#ifdef STATE_TRACE
-static char *statename[]={
-	"Unused","Established","Syn Sent","Syn Recv",
-	"Fin Wait 1","Fin Wait 2","Time Wait", "Close",
-	"Close Wait","Last ACK","Listen","Closing"
-};
-#endif
-
-static __inline__ void tcp_set_state(struct sock *sk, int state)
-{
-	if(sk->state==TCP_ESTABLISHED)
-		tcp_statistics.TcpCurrEstab--;
-#ifdef STATE_TRACE
-	if(sk->debug)
-		printk("TCP sk=%p, State %s -> %s\n",sk, statename[sk->state],statename[state]);
-#endif	
-	/* This is a hack but it doesn't occur often and it's going to
-	   be a real        to fix nicely */
-	   
-	if(state==TCP_ESTABLISHED && sk->state==TCP_SYN_RECV)
-	{
-		wake_up_interruptible(&master_select_wakeup);
-	}
-	sk->state=state;
-	if(state==TCP_ESTABLISHED)
-		tcp_statistics.TcpCurrEstab++;
-	if(sk->state==TCP_CLOSE)
-		tcp_cache_zap();
-}
-
-/*
- *	This routine picks a TCP windows for a socket based on
- *	the following constraints
- *  
- *	1. The window can never be shrunk once it is offered (RFC 793)
- *	2. We limit memory per socket
- */
-
-
-static __inline__ unsigned short tcp_select_window(struct sock *sk)
-{
-	long free_space = sock_rspace(sk);	
-	long window = 0;
-
-	if (free_space > 1024)
-		free_space &= ~0x3FF;  /* make free space a multiple of 1024 */
- 
-	if(sk->window_clamp)
-		free_space = min(sk->window_clamp, free_space);
- 
-	/* 
-         * compute the actual window i.e. 
-         * old_window - received_bytes_on_that_win 
-	 */
-
-	if (sk->mss == 0)
-		sk->mss = sk->mtu;
-
-	window = sk->window - (sk->acked_seq - sk->lastwin_seq);
- 
-	if ( window < 0 ) {	
-		window = 0;
-		printk(KERN_DEBUG "TSW: win < 0 w=%d 1=%u 2=%u\n", 
-		       sk->window, sk->acked_seq, sk->lastwin_seq);
-	}
-
-        /*
-	 * RFC 1122:
-	 * "the suggested [SWS] avoidance algoritm for the receiver is to keep
-	 *  RECV.NEXT + RCV.WIN fixed until:
-	 *  RCV.BUFF - RCV.USER - RCV.WINDOW >= min(1/2 RCV.BUFF, MSS)"
-	 * 
-	 * i.e. don't raise the right edge of the window until you can't raise
-	 * it MSS bytes
-	 */
-	
-	if ( (free_space - window) >= min(sk->mss, MAX_WINDOW/2) )
-		window += ((free_space - window) / sk->mss) * sk->mss;
-	
-	sk->window = window;
-	sk->lastwin_seq = sk->acked_seq;
-	
-	return sk->window;
-}
-
-/*
- *      This function returns the amount that we can raise the
- *      usable window.
- */
-
-static __inline__ unsigned short tcp_raise_window(struct sock *sk)
-{
-	long free_space = sock_rspace(sk);
-	long window = 0;
-
-	if (free_space > 1024)
-		free_space &= ~0x3FF; /* make free space a multiple of 1024 */
-
-	if(sk->window_clamp)
-		free_space = min(sk->window_clamp, free_space);
- 
-	/* 
-         * compute the actual window i.e. 
-         * old_window - received_bytes_on_that_win 
-	 */
-
-	window = sk->window - (sk->acked_seq - sk->lastwin_seq);
-
-	if (sk->mss == 0)
-		sk->mss = sk->mtu;
- 
-	if ( window < 0 ) {	
-		window = 0;
-		printk(KERN_DEBUG "TRW: win < 0 w=%d 1=%u 2=%u\n", 
-		       sk->window, sk->acked_seq, sk->lastwin_seq);
-	}
-	
-	if ( (free_space - window) >= min(sk->mss, MAX_WINDOW/2) )
-		return ((free_space - window) / sk->mss) * sk->mss;
-
-	return 0;
-}
+struct wait_queue *master_select_wakeup;
 
 /*
  *	Find someone to 'accept'. Must be called with
@@ -672,585 +491,141 @@
  *	Enter the time wait state. 
  */
 
-static void tcp_time_wait(struct sock *sk)
+void tcp_time_wait(struct sock *sk)
 {
 	tcp_set_state(sk,TCP_TIME_WAIT);
 	sk->shutdown = SHUTDOWN_MASK;
 	if (!sk->dead)
 		sk->state_change(sk);
-	reset_msl_timer(sk, TIME_CLOSE, TCP_TIMEWAIT_LEN);
+	tcp_reset_msl_timer(sk, TIME_CLOSE, TCP_TIMEWAIT_LEN);
 }
 
+
 /*
- *	A socket has timed out on its send queue and wants to do a
- *	little retransmitting. Currently this means TCP.
+ * This routine is called by the ICMP module when it gets some
+ * sort of error condition.  If err < 0 then the socket should
+ * be closed and the error returned to the user.  If err > 0
+ * it's just the icmp type << 8 | icmp code.  After adjustment
+ * header points to the first 8 bytes of the tcp header.  We need
+ * to find the appropriate port.
  */
 
-void tcp_do_retransmit(struct sock *sk, int all)
+void tcp_err(int type, int code, unsigned char *header, __u32 daddr,
+	__u32 saddr, struct inet_protocol *protocol)
 {
-	struct sk_buff * skb;
-	struct proto *prot;
-	struct device *dev;
-	int ct=0;
-	struct rtable *rt;
-
-	prot = sk->prot;
-	skb = sk->send_head;
+	struct tcphdr *th = (struct tcphdr *)header;
+	struct sock *sk;
+	
+	/*
+	 *	This one is _WRONG_. FIXME urgently.
+	 */
+#ifndef CONFIG_NO_PATH_MTU_DISCOVERY	 
+	struct iphdr *iph=(struct iphdr *)(header-sizeof(struct iphdr));
+#endif  
+	th =(struct tcphdr *)header;
+	sk = get_sock(&tcp_prot, th->source, daddr, th->dest, saddr);
 
-	while (skb != NULL)
+	if (sk == NULL) 
+		return;
+  
+	if (type == ICMP_SOURCE_QUENCH) 
 	{
-		struct tcphdr *th;
-		struct iphdr *iph;
-		int size;
-
-		dev = skb->dev;
-		IS_SKB(skb);
-		skb->when = jiffies;
-		
-		/* dl1bke 960201 - @%$$! Hope this cures strange race conditions    */
-		/*		   with AX.25 mode VC. (esp. DAMA)		    */
-		/*		   if the buffer is locked we should not retransmit */
-		/*		   anyway, so we don't need all the fuss to prepare */
-		/*		   the buffer in this case. 			    */
-		/*		   (the skb_pull() changes skb->data while we may   */
-		/*		   actually try to send the data. Ough. A side	    */
-		/*		   effect is that we'll send some unnecessary data, */
-		/*		   but the alternative is desastrous...		    */
-		
-		if (skb_device_locked(skb))
-			break;
-
-		/*
-		 *	Discard the surplus MAC header
-		 */
-		 
-		skb_pull(skb,((unsigned char *)skb->ip_hdr)-skb->data);
-
-		/*
-		 * In general it's OK just to use the old packet.  However we
-		 * need to use the current ack and window fields.  Urg and
-		 * urg_ptr could possibly stand to be updated as well, but we
-		 * don't keep the necessary data.  That shouldn't be a problem,
-		 * if the other end is doing the right thing.  Since we're
-		 * changing the packet, we have to issue a new IP identifier.
-		 */
-
-		iph = (struct iphdr *)skb->data;
-		th = (struct tcphdr *)(((char *)iph) + (iph->ihl << 2));
-		size = ntohs(iph->tot_len) - (iph->ihl<<2);
-		
-		/*
-		 *	Note: We ought to check for window limits here but
-		 *	currently this is done (less efficiently) elsewhere.
-		 */
-
 		/*
-		 *	Put a MAC header back on (may cause ARPing)
+		 * FIXME:
+		 * For now we will just trigger a linear backoff.
+		 * The slow start code should cause a real backoff here.
 		 */
-		 
-	        {
-			/* ANK: UGLY, but the bug, that was here, should be fixed.
-			 */
-			struct options *  opt = (struct options*)skb->proto_priv;
-			rt = ip_check_route(&sk->ip_route_cache, opt->srr?opt->faddr:iph->daddr, skb->localroute);
-	        }
-
-		iph->id = htons(ip_id_count++);
-#ifndef CONFIG_NO_PATH_MTU_DISCOVERY
-		if (rt && ntohs(iph->tot_len) > rt->rt_mtu)
-			iph->frag_off &= ~htons(IP_DF);
-#endif
-		ip_send_check(iph);
-			
-		if (rt==NULL)	/* Deep poo */
-		{
-			if(skb->sk)
-			{
-				skb->sk->err_soft=ENETUNREACH;
-				skb->sk->error_report(skb->sk);
-			}
-		}
-		else
-		{
-			dev=rt->rt_dev;
-			skb->raddr=rt->rt_gateway;
-			skb->dev=dev;
-			skb->arp=1;
-			if (rt->rt_hh)
-			{
-				memcpy(skb_push(skb,dev->hard_header_len),rt->rt_hh->hh_data,dev->hard_header_len);
-				if (!rt->rt_hh->hh_uptodate)
-				{
-					skb->arp = 0;
-#if RT_CACHE_DEBUG >= 2
-					printk("tcp_do_retransmit: hh miss %08x via %08x\n", iph->daddr, rt->rt_gateway);
-#endif
-				}
-			}
-			else if (dev->hard_header)
-			{
-				if(dev->hard_header(skb, dev, ETH_P_IP, NULL, NULL, skb->len)<0)
-					skb->arp=0;
-			}
-		
-			/*
-			 *	This is not the right way to handle this. We have to
-			 *	issue an up to date window and ack report with this 
-			 *	retransmit to keep the odd buggy tcp that relies on 
-			 *	the fact BSD does this happy. 
-			 *	We don't however need to recalculate the entire 
-			 *	checksum, so someone wanting a small problem to play
-			 *	with might like to implement RFC1141/RFC1624 and speed
-			 *	this up by avoiding a full checksum.
-			 */
-		 
-			th->ack_seq = htonl(sk->acked_seq);
-			sk->ack_backlog = 0;
-			sk->bytes_rcv = 0;
-			th->window = ntohs(tcp_select_window(sk));
-			tcp_send_check(th, sk->saddr, sk->daddr, size, sk);
-		
-			/*
-			 *	If the interface is (still) up and running, kick it.
-			 */
+		if (sk->cong_window > 4)
+			sk->cong_window--;
+		return;
+	}
 	
-			if (dev->flags & IFF_UP)
-			{
-				/*
-				 *	If the packet is still being sent by the device/protocol
-				 *	below then don't retransmit. This is both needed, and good -
-				 *	especially with connected mode AX.25 where it stops resends
-				 *	occurring of an as yet unsent anyway frame!
-				 *	We still add up the counts as the round trip time wants
-				 *	adjusting.
-				 */
-				if (sk && !skb_device_locked(skb))
-				{
-					/* Remove it from any existing driver queue first! */
-					skb_unlink(skb);
-					/* Now queue it */
-					ip_statistics.IpOutRequests++;
-					dev_queue_xmit(skb, dev, sk->priority);
-				}
-			}
-		}
-		
-		/*
-		 *	Count retransmissions
-		 */
-		 
-		ct++;
-		sk->prot->retransmits ++;
-		tcp_statistics.TcpRetransSegs++;
-		
+	if (type == ICMP_PARAMETERPROB)
+	{
+		sk->err=EPROTO;
+		sk->error_report(sk);
+	}
 
+#ifndef CONFIG_NO_PATH_MTU_DISCOVERY
+	if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED)
+	{
+		struct rtable * rt;
 		/*
-		 *	Only one retransmit requested.
+		 * Ugly trick to pass MTU to protocol layer.
+		 * Really we should add argument "info" to error handler.
 		 */
-	
-		if (!all)
-			break;
+		unsigned short new_mtu = ntohs(iph->id);
 
-		/*
-		 *	This should cut it off before we send too many packets.
-		 */
+		if ((rt = sk->ip_route_cache) != NULL)
+			if (rt->rt_mtu > new_mtu)
+				rt->rt_mtu = new_mtu;
 
-		if (ct >= sk->cong_window)
-			break;
-		skb = skb->link3;
-	}
-}
+		if (sk->mtu > new_mtu - sizeof(struct iphdr) - sizeof(struct tcphdr)
+			&& new_mtu > sizeof(struct iphdr)+sizeof(struct tcphdr))
+			sk->mtu = new_mtu - sizeof(struct iphdr) - sizeof(struct tcphdr);
 
-/*
- *	Reset the retransmission timer
- */
- 
-static void reset_xmit_timer(struct sock *sk, int why, unsigned long when)
-{
-	del_timer(&sk->retransmit_timer);
-	sk->ip_xmit_timeout = why;
-	if((long)when < 0)
-	{
-		when=3;
-		printk("Error: Negative timer in xmit_timer\n");
+		return;
 	}
-	sk->retransmit_timer.expires=jiffies+when;
-	add_timer(&sk->retransmit_timer);
-}
-
-/*
- * 	This is the normal code called for timeouts.  It does the retransmission
- * 	and then does backoff.  tcp_do_retransmit is separated out because
- * 	tcp_ack needs to send stuff from the retransmit queue without
- * 	initiating a backoff.
- */
-
-
-void tcp_retransmit_time(struct sock *sk, int all)
-{
-	tcp_do_retransmit(sk, all);
+#endif
 
 	/*
-	 * Increase the timeout each time we retransmit.  Note that
-	 * we do not increase the rtt estimate.  rto is initialized
-	 * from rtt, but increases here.  Jacobson (SIGCOMM 88) suggests
-	 * that doubling rto each time is the least we can get away with.
-	 * In KA9Q, Karn uses this for the first few times, and then
-	 * goes to quadratic.  netBSD doubles, but only goes up to *64,
-	 * and clamps at 1 to 64 sec afterwards.  Note that 120 sec is
-	 * defined in the protocol as the maximum possible RTT.  I guess
-	 * we'll have to use something other than TCP to talk to the
-	 * University of Mars.
-	 *
-	 * PAWS allows us longer timeouts and large windows, so once
-	 * implemented ftp to mars will work nicely. We will have to fix
-	 * the 120 second clamps though!
+	 * If we've already connected we will keep trying
+	 * until we time out, or the user gives up.
 	 */
 
-	sk->retransmits++;
-	sk->prot->retransmits++;
-	sk->backoff++;
-	sk->rto = min(sk->rto << 1, 120*HZ);
-	reset_xmit_timer(sk, TIME_WRITE, sk->rto);
+	if (code < 13)
+	{	
+		if(icmp_err_convert[code].fatal || sk->state == TCP_SYN_SENT || sk->state == TCP_SYN_RECV)
+		{
+			sk->err = icmp_err_convert[code].errno;
+			if (sk->state == TCP_SYN_SENT || sk->state == TCP_SYN_RECV) 
+			{
+				tcp_statistics.TcpAttemptFails++;
+				tcp_set_state(sk,TCP_CLOSE);
+				sk->error_report(sk);		/* Wake people up to see the error (see connect in sock.c) */
+			}
+		}
+		else	/* Only an error on timeout */
+			sk->err_soft = icmp_err_convert[code].errno;
+	}
 }
 
 
 /*
- *	A timer event has trigger a tcp retransmit timeout. The
- *	socket xmit queue is ready and set up to send. Because
- *	the ack receive code keeps the queue straight we do
- *	nothing clever here.
+ *	Walk down the receive queue counting readable data until we hit the end or we find a gap
+ *	in the received data queue (ie a frame missing that needs sending to us). Not
+ *	sorting using two queues as data arrives makes life so much harder.
  */
 
-static void tcp_retransmit(struct sock *sk, int all)
+static int tcp_readable(struct sock *sk)
 {
-	if (all) 
-	{
-		tcp_retransmit_time(sk, all);
-		return;
-	}
-
-	sk->ssthresh = sk->cong_window >> 1; /* remember window where we lost */
-	/* sk->ssthresh in theory can be zero.  I guess that's OK */
-	sk->cong_count = 0;
-
-	sk->cong_window = 1;
-
-	/* Do the actual retransmit. */
-	tcp_retransmit_time(sk, all);
-}
+	unsigned long counted;
+	unsigned long amount;
+	struct sk_buff *skb;
+	int sum;
+	unsigned long flags;
 
-/*
- *	A write timeout has occurred. Process the after effects.
- */
+	if(sk && sk->debug)
+	  	printk("tcp_readable: %p - ",sk);
 
-static int tcp_write_timeout(struct sock *sk)
-{
-	/*
-	 *	Look for a 'soft' timeout.
-	 */
-	if ((sk->state == TCP_ESTABLISHED && sk->retransmits && !(sk->retransmits & 7))
-		|| (sk->state != TCP_ESTABLISHED && sk->retransmits > TCP_RETR1)) 
+	save_flags(flags);
+	cli();
+	if (sk == NULL || (skb = skb_peek(&sk->receive_queue)) == NULL)
 	{
-		/*
-		 *	Attempt to recover if arp has changed (unlikely!) or
-		 *	a route has shifted (not supported prior to 1.3).
-		 */
-		ip_rt_advice(&sk->ip_route_cache, 0);
+		restore_flags(flags);
+	  	if(sk && sk->debug) 
+	  		printk("empty\n");
+	  	return(0);
 	}
-	
-	/*
-	 *	Have we tried to SYN too many times (repent repent 8))
+  
+	counted = sk->copied_seq;	/* Where we are at the moment */
+	amount = 0;
+  
+	/* 
+	 *	Do until a push or until we are out of data. 
 	 */
 	 
-	if(sk->retransmits > TCP_SYN_RETRIES && sk->state==TCP_SYN_SENT)
-	{
-		if(sk->err_soft)
-			sk->err=sk->err_soft;
-		else
-			sk->err=ETIMEDOUT;
-		sk->error_report(sk);
-		del_timer(&sk->retransmit_timer);
-		tcp_statistics.TcpAttemptFails++;	/* Is this right ??? - FIXME - */
-		tcp_set_state(sk,TCP_CLOSE);
-		/* Don't FIN, we got nothing back */
-		release_sock(sk);
-		return 0;
-	}
-	/*
-	 *	Has it gone just too far ?
-	 */
-	if (sk->retransmits > TCP_RETR2) 
-	{
-		if(sk->err_soft)
-			sk->err = sk->err_soft;
-		else
-			sk->err = ETIMEDOUT;
-		sk->error_report(sk);
-		del_timer(&sk->retransmit_timer);
-		/*
-		 *	Time wait the socket 
-		 */
-		if (sk->state == TCP_FIN_WAIT1 || sk->state == TCP_FIN_WAIT2 || sk->state == TCP_CLOSING ) 
-		{
-			tcp_set_state(sk,TCP_TIME_WAIT);
-			reset_msl_timer (sk, TIME_CLOSE, TCP_TIMEWAIT_LEN);
-		}
-		else
-		{
-			/*
-			 *	Clean up time.
-			 */
-			tcp_set_state(sk, TCP_CLOSE);
-			release_sock(sk);
-			return 0;
-		}
-	}
-	return 1;
-}
-
-/*
- *	The TCP retransmit timer. This lacks a few small details.
- *
- *	1. 	An initial rtt timeout on the probe0 should cause what we can
- *		of the first write queue buffer to be split and sent.
- *	2.	On a 'major timeout' as defined by RFC1122 we shouldn't report
- *		ETIMEDOUT if we know an additional 'soft' error caused this.
- *		tcp_err should save a 'soft error' for us.
- */
-
-static void retransmit_timer(unsigned long data)
-{
-	struct sock *sk = (struct sock*)data;
-	int why = sk->ip_xmit_timeout;
-
-	/*
-	 *	We are reset. We will send no more retransmits.
-	 */
-	 
-	if(sk->zapped)
-		return;
-		
-	/* 
-	 *	Only process if socket is not in use
-	 */
-
-	cli();
-	if (sk->inuse || in_bh) 
-	{
-		/* Try again in 1 second */
-		sk->retransmit_timer.expires = jiffies+HZ;
-		add_timer(&sk->retransmit_timer);
-		sti();
-		return;
-	}
-
-	sk->inuse = 1;
-	sti();
-
-
-	if (sk->ack_backlog && !sk->dead) 
-		sk->data_ready(sk,0);
-
-	/* Now we need to figure out why the socket was on the timer. */
-
-	switch (why) 
-	{
-		/* Window probing */
-		case TIME_PROBE0:
-			tcp_send_probe0(sk);
-			tcp_write_timeout(sk);
-			break;
-		/* Retransmitting */
-		case TIME_WRITE:
-			/* It could be we got here because we needed to send an ack.
-			 * So we need to check for that.
-			 */
-		{
-			struct sk_buff *skb;
-			unsigned long flags;
-
-			save_flags(flags);
-			cli();
-			skb = sk->send_head;
-			if (!skb) 
-			{
-				if (sk->ack_backlog)
-					tcp_read_wakeup(sk);
-				restore_flags(flags);
-			} 
-			else 
-			{
-				/*
-				 *	Kicked by a delayed ack. Reset timer
-				 *	correctly now
-				 */
-				if (jiffies < skb->when + sk->rto) 
-				{
-					if (sk->ack_backlog)
-						tcp_read_wakeup(sk);
-					reset_xmit_timer (sk, TIME_WRITE, skb->when + sk->rto - jiffies);
-					restore_flags(flags);
-					break;
-				}
-				restore_flags(flags);
-				/*
-				 *	Retransmission
-				 */
-				sk->retransmits++;
-				sk->prot->retransmits++;
-				sk->prot->retransmit (sk, 0);
-				tcp_write_timeout(sk);
-			}
-			break;
-		}
-		/* Sending Keepalives */
-		case TIME_KEEPOPEN:
-			/* 
-			 * this reset_timer() call is a hack, this is not
-			 * how KEEPOPEN is supposed to work.
-			 */
-			reset_xmit_timer (sk, TIME_KEEPOPEN, TCP_TIMEOUT_LEN);
-
-			/* Send something to keep the connection open. */
-			if (sk->prot->write_wakeup)
-				  sk->prot->write_wakeup (sk);
-			sk->retransmits++;
-			sk->prot->retransmits++;
-			tcp_write_timeout(sk);
-			break;
-		default:
-			printk ("rexmit_timer: timer expired - reason unknown\n");
-			break;
-	}
-	release_sock(sk);
-}
-
-/*
- * This routine is called by the ICMP module when it gets some
- * sort of error condition.  If err < 0 then the socket should
- * be closed and the error returned to the user.  If err > 0
- * it's just the icmp type << 8 | icmp code.  After adjustment
- * header points to the first 8 bytes of the tcp header.  We need
- * to find the appropriate port.
- */
-
-void tcp_err(int type, int code, unsigned char *header, __u32 daddr,
-	__u32 saddr, struct inet_protocol *protocol)
-{
-	struct tcphdr *th = (struct tcphdr *)header;
-	struct sock *sk;
-	
-	/*
-	 *	This one is _WRONG_. FIXME urgently.
-	 */
-#ifndef CONFIG_NO_PATH_MTU_DISCOVERY	 
-	struct iphdr *iph=(struct iphdr *)(header-sizeof(struct iphdr));
-#endif  
-	th =(struct tcphdr *)header;
-	sk = get_sock(&tcp_prot, th->source, daddr, th->dest, saddr);
-
-	if (sk == NULL) 
-		return;
-  
-	if (type == ICMP_SOURCE_QUENCH) 
-	{
-		/*
-		 * FIXME:
-		 * For now we will just trigger a linear backoff.
-		 * The slow start code should cause a real backoff here.
-		 */
-		if (sk->cong_window > 4)
-			sk->cong_window--;
-		return;
-	}
-	
-	if (type == ICMP_PARAMETERPROB)
-	{
-		sk->err=EPROTO;
-		sk->error_report(sk);
-	}
-
-#ifndef CONFIG_NO_PATH_MTU_DISCOVERY
-	if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED)
-	{
-		struct rtable * rt;
-		/*
-		 * Ugly trick to pass MTU to protocol layer.
-		 * Really we should add argument "info" to error handler.
-		 */
-		unsigned short new_mtu = ntohs(iph->id);
-
-		if ((rt = sk->ip_route_cache) != NULL)
-			if (rt->rt_mtu > new_mtu)
-				rt->rt_mtu = new_mtu;
-
-		if (sk->mtu > new_mtu - sizeof(struct iphdr) - sizeof(struct tcphdr)
-			&& new_mtu > sizeof(struct iphdr)+sizeof(struct tcphdr))
-			sk->mtu = new_mtu - sizeof(struct iphdr) - sizeof(struct tcphdr);
-
-		return;
-	}
-#endif
-
-	/*
-	 * If we've already connected we will keep trying
-	 * until we time out, or the user gives up.
-	 */
-
-	if (code < 13)
-	{	
-		if(icmp_err_convert[code].fatal || sk->state == TCP_SYN_SENT || sk->state == TCP_SYN_RECV)
-		{
-			sk->err = icmp_err_convert[code].errno;
-			if (sk->state == TCP_SYN_SENT || sk->state == TCP_SYN_RECV) 
-			{
-				tcp_statistics.TcpAttemptFails++;
-				tcp_set_state(sk,TCP_CLOSE);
-				sk->error_report(sk);		/* Wake people up to see the error (see connect in sock.c) */
-			}
-		}
-		else	/* Only an error on timeout */
-			sk->err_soft = icmp_err_convert[code].errno;
-	}
-}
-
-
-/*
- *	Walk down the receive queue counting readable data until we hit the end or we find a gap
- *	in the received data queue (ie a frame missing that needs sending to us). Not
- *	sorting using two queues as data arrives makes life so much harder.
- */
-
-static int tcp_readable(struct sock *sk)
-{
-	unsigned long counted;
-	unsigned long amount;
-	struct sk_buff *skb;
-	int sum;
-	unsigned long flags;
-
-	if(sk && sk->debug)
-	  	printk("tcp_readable: %p - ",sk);
-
-	save_flags(flags);
-	cli();
-	if (sk == NULL || (skb = skb_peek(&sk->receive_queue)) == NULL)
-	{
-		restore_flags(flags);
-	  	if(sk && sk->debug) 
-	  		printk("empty\n");
-	  	return(0);
-	}
-  
-	counted = sk->copied_seq;	/* Where we are at the moment */
-	amount = 0;
-  
-	/* 
-	 *	Do until a push or until we are out of data. 
-	 */
-	 
-	do 
+	do 
 	{
 		if (before(counted, skb->seq))	 	/* Found a hole so stops here */
 			break;
@@ -1428,12 +803,6 @@
  *	Jorge Cwik <jorge@laser.satlink.net>
  */
  
-unsigned short tcp_check(struct tcphdr *th, int len,
-	  unsigned long saddr, unsigned long daddr, unsigned long base)
-{     
-	return csum_tcpudp_magic(saddr,daddr,len,IPPROTO_TCP,base);
-}
-
 void tcp_send_check(struct tcphdr *th, unsigned long saddr, 
 		unsigned long daddr, int len, struct sock *sk)
 {
@@ -1443,357 +812,56 @@
 	return;
 }
 
-/*
- *	This is the main buffer sending routine. We queue the buffer
- *	having checked it is sane seeming.
+
+/* 
+ *	This routine builds a generic TCP header. 
  */
  
-static void tcp_send_skb(struct sock *sk, struct sk_buff *skb)
+extern __inline int tcp_build_header(struct tcphdr *th, struct sock *sk, int push)
 {
-	int size;
-	struct tcphdr * th = skb->h.th;
 
-	/*
-	 *	length of packet (not counting length of pre-tcp headers) 
-	 */
-	 
-	size = skb->len - ((unsigned char *) th - skb->data);
+	memcpy(th,(void *) &(sk->dummy_th), sizeof(*th));
+	th->seq = htonl(sk->write_seq);
+	th->psh =(push == 0) ? 1 : 0;
+	th->doff = sizeof(*th)/4;
+	th->ack = 1;
+	th->fin = 0;
+	sk->ack_backlog = 0;
+	sk->bytes_rcv = 0;
+	sk->ack_timed = 0;
+	th->ack_seq = htonl(sk->acked_seq);
+	sk->window = tcp_select_window(sk);
+	th->window = htons(sk->window);
 
-	/*
-	 *	Sanity check it.. 
-	 */
-	 
-	if (size < sizeof(struct tcphdr) || size > skb->len) 
-	{
-		printk("tcp_send_skb: bad skb (skb = %p, data = %p, th = %p, len = %lu)\n",
-			skb, skb->data, th, skb->len);
-		kfree_skb(skb, FREE_WRITE);
-		return;
-	}
+	return(sizeof(*th));
+}
+
+/*
+ *	This routine copies from a user buffer into a socket,
+ *	and starts the transmit system.
+ */
 
+static int tcp_sendmsg(struct sock *sk, struct msghdr *msg,
+	  int len, int nonblock, int flags)
+{
+	int copied = 0;
+	int copy;
+	int tmp;
+	int seglen;
+	int iovct=0;
+	struct sk_buff *skb;
+	struct sk_buff *send_tmp;
+	struct proto *prot;
+	struct device *dev = NULL;
+	unsigned char *from;
+	
 	/*
-	 *	If we have queued a header size packet.. (these crash a few
-	 *	tcp stacks if ack is not set)
+	 *	Do sanity checking for sendmsg/sendto/send
 	 */
 	 
-	if (size == sizeof(struct tcphdr)) 
-	{
-		/* If it's got a syn or fin it's notionally included in the size..*/
-		if(!th->syn && !th->fin) 
-		{
-			printk("tcp_send_skb: attempt to queue a bogon.\n");
-			kfree_skb(skb,FREE_WRITE);
-			return;
-		}
-	}
-
-	/*
-	 *	Actual processing.
-	 */
-	 
-	tcp_statistics.TcpOutSegs++;  
-	skb->seq = ntohl(th->seq);
-	skb->end_seq = skb->seq + size - 4*th->doff;
-	
-	/*
-	 *	We must queue if
-	 *
-	 *	a) The right edge of this frame exceeds the window
-	 *	b) We are retransmitting (Nagle's rule)
-	 *	c) We have too many packets 'in flight'
-	 */
-	 
-	if (after(skb->end_seq, sk->window_seq) ||
-	    (sk->retransmits && sk->ip_xmit_timeout == TIME_WRITE) ||
-	     sk->packets_out >= sk->cong_window) 
-	{
-		/* checksum will be supplied by tcp_write_xmit.  So
-		 * we shouldn't need to set it at all.  I'm being paranoid */
-		th->check = 0;
-		if (skb->next != NULL) 
-		{
-			printk("tcp_send_partial: next != NULL\n");
-			skb_unlink(skb);
-		}
-		skb_queue_tail(&sk->write_queue, skb);
-		
-		/*
-		 *	If we don't fit we have to start the zero window
-		 *	probes. This is broken - we really need to do a partial
-		 *	send _first_ (This is what causes the Cisco and PC/TCP
-		 *	grief).
-		 */
-		 
-		if (before(sk->window_seq, sk->write_queue.next->end_seq) &&
-		    sk->send_head == NULL && sk->ack_backlog == 0)
-			reset_xmit_timer(sk, TIME_PROBE0, sk->rto);
-	} 
-	else 
-	{
-		/*
-		 *	This is going straight out
-		 */
-		 
-		th->ack_seq = htonl(sk->acked_seq);
-		th->window = htons(tcp_select_window(sk));
-
-		tcp_send_check(th, sk->saddr, sk->daddr, size, sk);
-
-		sk->sent_seq = sk->write_seq;
-		
-		/*
-		 *	This is mad. The tcp retransmit queue is put together
-		 *	by the ip layer. This causes half the problems with
-		 *	unroutable FIN's and other things.
-		 */
-		 
-		sk->prot->queue_xmit(sk, skb->dev, skb, 0);
-		
-		
-		sk->ack_backlog = 0;
-		sk->bytes_rcv = 0;
-
-		/*
-		 *	Set for next retransmit based on expected ACK time.
-		 *	FIXME: We set this every time which means our 
-		 *	retransmits are really about a window behind.
-		 */
-
-		reset_xmit_timer(sk, TIME_WRITE, sk->rto);
-	}
-}
-
-/*
- *	Locking problems lead us to a messy situation where we can have
- *	multiple partially complete buffers queued up. This is really bad
- *	as we don't want to be sending partial buffers. Fix this with
- *	a semaphore or similar to lock tcp_write per socket.
- *
- *	These routines are pretty self descriptive.
- */
- 
-struct sk_buff * tcp_dequeue_partial(struct sock * sk)
-{
-	struct sk_buff * skb;
-	unsigned long flags;
-
-	save_flags(flags);
-	cli();
-	skb = sk->partial;
-	if (skb) {
-		sk->partial = NULL;
-		del_timer(&sk->partial_timer);
-	}
-	restore_flags(flags);
-	return skb;
-}
-
-/*
- *	Empty the partial queue
- */
- 
-static void tcp_send_partial(struct sock *sk)
-{
-	struct sk_buff *skb;
-
-	if (sk == NULL)
-		return;
-	while ((skb = tcp_dequeue_partial(sk)) != NULL)
-		tcp_send_skb(sk, skb);
-}
-
-/*
- *	Queue a partial frame
- */
- 
-void tcp_enqueue_partial(struct sk_buff * skb, struct sock * sk)
-{
-	struct sk_buff * tmp;
-	unsigned long flags;
-
-	save_flags(flags);
-	cli();
-	tmp = sk->partial;
-	if (tmp)
-		del_timer(&sk->partial_timer);
-	sk->partial = skb;
-	init_timer(&sk->partial_timer);
-	/*
-	 *	Wait up to 1 second for the buffer to fill.
-	 */
-	sk->partial_timer.expires = jiffies+HZ;
-	sk->partial_timer.function = (void (*)(unsigned long)) tcp_send_partial;
-	sk->partial_timer.data = (unsigned long) sk;
-	add_timer(&sk->partial_timer);
-	restore_flags(flags);
-	if (tmp)
-		tcp_send_skb(sk, tmp);
-}
-
-
-
-/*
- *	This routine sends an ack and also updates the window. 
- */
- 
-static void tcp_send_ack(u32 sequence, u32 ack,
-	     struct sock *sk,
-	     struct tcphdr *th, unsigned long daddr)
-{
-	struct sk_buff *buff;
-	struct tcphdr *t1;
-	struct device *dev = NULL;
-	int tmp;
-
-	if(sk->zapped)
-		return;		/* We have been reset, we may not send again */
-		
-	/*
-	 * We need to grab some memory, and put together an ack,
-	 * and then put it into the queue to be sent.
-	 */
-
-	buff = sock_wmalloc(sk, MAX_ACK_SIZE, 1, GFP_ATOMIC);
-	if (buff == NULL) 
-	{
-		/* 
-		 *	Force it to send an ack. We don't have to do this
-		 *	(ACK is unreliable) but it's much better use of 
-		 *	bandwidth on slow links to send a spare ack than
-		 *	resend packets. 
-		 */
-		 
-		sk->ack_backlog++;
-		if (sk->ip_xmit_timeout != TIME_WRITE && tcp_connected(sk->state)) 
-		{
-			reset_xmit_timer(sk, TIME_WRITE, HZ);
-		}
-		return;
-	}
-
-	/*
-	 *	Assemble a suitable TCP frame
-	 */
-	 
-	buff->sk = sk;
-	buff->localroute = sk->localroute;
-
-	/* 
-	 *	Put in the IP header and routing stuff. 
-	 */
-	 
-	tmp = sk->prot->build_header(buff, sk->saddr, daddr, &dev,
-				IPPROTO_TCP, sk->opt, MAX_ACK_SIZE,sk->ip_tos,sk->ip_ttl,&sk->ip_route_cache);
-	if (tmp < 0) 
-	{
-  		buff->free = 1;
-		sock_wfree(sk, buff);
-		return;
-	}
-	t1 =(struct tcphdr *)skb_put(buff,sizeof(struct tcphdr));
-
-	memcpy(t1, th, sizeof(*t1));
-
-	/*
-	 *	Swap the send and the receive. 
-	 */
-	 
-	t1->dest = th->source;
-	t1->source = th->dest;
-	t1->seq = ntohl(sequence);
-	t1->ack = 1;
-	sk->window = tcp_select_window(sk);
-	t1->window = ntohs(sk->window);
-	t1->res1 = 0;
-	t1->res2 = 0;
-	t1->rst = 0;
-	t1->urg = 0;
-	t1->syn = 0;
-	t1->psh = 0;
-	t1->fin = 0;
-	
-	/*
-	 *	If we have nothing queued for transmit and the transmit timer
-	 *	is on we are just doing an ACK timeout and need to switch
-	 *	to a keepalive.
-	 */
-	 
-	if (ack == sk->acked_seq) {	       	  
-		sk->ack_backlog = 0;
-		sk->bytes_rcv = 0;
-		sk->ack_timed = 0;
-
-		if (sk->send_head == NULL && skb_peek(&sk->write_queue) == NULL
-		    && sk->ip_xmit_timeout == TIME_WRITE) 	
-		  if(sk->keepopen) 
-		    reset_xmit_timer(sk,TIME_KEEPOPEN,TCP_TIMEOUT_LEN);
-		  else 
-		    delete_timer(sk);	         		
-	}
-
-  	/*
-  	 *	Fill in the packet and send it
-  	 */
-  	 
-  	t1->ack_seq = htonl(ack);
-  	t1->doff = sizeof(*t1)/4;
-  	tcp_send_check(t1, sk->saddr, daddr, sizeof(*t1), sk);
-  	if (sk->debug)
-  		 printk("\rtcp_ack: seq %x ack %x\n", sequence, ack);
-  	tcp_statistics.TcpOutSegs++;
-  	sk->prot->queue_xmit(sk, dev, buff, 1);
-}
-
-
-/* 
- *	This routine builds a generic TCP header. 
- */
- 
-extern __inline int tcp_build_header(struct tcphdr *th, struct sock *sk, int push)
-{
-
-	memcpy(th,(void *) &(sk->dummy_th), sizeof(*th));
-	th->seq = htonl(sk->write_seq);
-	th->psh =(push == 0) ? 1 : 0;
-	th->doff = sizeof(*th)/4;
-	th->ack = 1;
-	th->fin = 0;
-	sk->ack_backlog = 0;
-	sk->bytes_rcv = 0;
-	sk->ack_timed = 0;
-	th->ack_seq = htonl(sk->acked_seq);
-	sk->window = tcp_select_window(sk);
-	th->window = htons(sk->window);
-
-	return(sizeof(*th));
-}
-
-/*
- *	This routine copies from a user buffer into a socket,
- *	and starts the transmit system.
- */
-
-static int tcp_sendmsg(struct sock *sk, struct msghdr *msg,
-	  int len, int nonblock, int flags)
-{
-	int copied = 0;
-	int copy;
-	int tmp;
-	int seglen;
-	int iovct=0;
-	struct sk_buff *skb;
-	struct sk_buff *send_tmp;
-	struct proto *prot;
-	struct device *dev = NULL;
-	unsigned char *from;
-	
-	/*
-	 *	Do sanity checking for sendmsg/sendto/send
-	 */
-	 
-	if (flags & ~(MSG_OOB|MSG_DONTROUTE))
-		return -EINVAL;
-	if (msg->msg_name)
+	if (flags & ~(MSG_OOB|MSG_DONTROUTE))
+		return -EINVAL;
+	if (msg->msg_name)
 	{
 		struct sockaddr_in *addr=(struct sockaddr_in *)msg->msg_name;
 		if(sk->state == TCP_CLOSE)
@@ -2027,18 +1095,10 @@
 					return(-EAGAIN);
 				}
 
-				/*
-				 *	FIXME: here is another race condition. 
-				 */
-
-				tmp = sk->wmem_alloc;
 				release_sock(sk);
 				cli();
-				/*
-				 *	Again we will try to avoid it. 
-				 */
-				if (tmp <= sk->wmem_alloc &&
-					  (sk->state == TCP_ESTABLISHED||sk->state == TCP_CLOSE_WAIT)
+				if (sk->wmem_alloc*2 > sk->sndbuf &&
+				    (sk->state == TCP_ESTABLISHED||sk->state == TCP_CLOSE_WAIT)
 					&& sk->err == 0) 
 				{
 					sk->socket->flags &= ~SO_NOSPACE;
@@ -2142,7 +1202,7 @@
  *      This is called for delayed acks also.
  */
  
-static void tcp_read_wakeup(struct sock *sk)
+void tcp_read_wakeup(struct sock *sk)
 {
 	int tmp;
 	struct device *dev = NULL;
@@ -2174,7 +1234,7 @@
 	if (buff == NULL) 
 	{
 		/* Try again real soon. */
-		reset_xmit_timer(sk, TIME_WRITE, HZ);
+		tcp_reset_xmit_timer(sk, TIME_WRITE, HZ);
 		return;
  	}
 
@@ -2301,7 +1361,7 @@
 		int was_active = del_timer(&sk->retransmit_timer);
 		if (!was_active || jiffies+TCP_ACK_TIME < sk->timer.expires) 
 		{
-			reset_xmit_timer(sk, TIME_WRITE, TCP_ACK_TIME);
+			tcp_reset_xmit_timer(sk, TIME_WRITE, TCP_ACK_TIME);
 		} 
 		else
 			add_timer(&sk->retransmit_timer);
@@ -2666,114 +1726,13 @@
 		if(timer_active)
 			add_timer(&sk->timer);
 		else
-			reset_msl_timer(sk, TIME_CLOSE, TCP_FIN_TIMEOUT);
+			tcp_reset_msl_timer(sk, TIME_CLOSE, TCP_FIN_TIMEOUT);
 	}
 	
 	return send_fin;
 }
 
 /*
- *	Send a fin.
- */
-
-static void tcp_send_fin(struct sock *sk)
-{
-	struct proto *prot =(struct proto *)sk->prot;
-	struct tcphdr *th =(struct tcphdr *)&sk->dummy_th;
-	struct tcphdr *t1;
-	struct sk_buff *buff;
-	struct device *dev=NULL;
-	int tmp;
-		
-	release_sock(sk); /* in case the malloc sleeps. */
-	
-	buff = sock_wmalloc(sk, MAX_RESET_SIZE,1 , GFP_KERNEL);
-	sk->inuse = 1;
-
-	if (buff == NULL)
-	{
-		/* This is a disaster if it occurs */
-		printk("tcp_send_fin: Impossible malloc failure");
-		return;
-	}
-
-	/*
-	 *	Administrivia
-	 */
-	 
-	buff->sk = sk;
-	buff->localroute = sk->localroute;
-
-	/*
-	 *	Put in the IP header and routing stuff. 
-	 */
-
-	tmp = prot->build_header(buff,sk->saddr, sk->daddr, &dev,
-			   IPPROTO_TCP, sk->opt,
-			   sizeof(struct tcphdr),sk->ip_tos,sk->ip_ttl,&sk->ip_route_cache);
-	if (tmp < 0) 
-	{
-		int t;
-  		/*
-  		 *	Finish anyway, treat this as a send that got lost. 
-  		 *	(Not good).
-  		 */
-  		 
-	  	buff->free = 1;
-		sock_wfree(sk,buff);
-		sk->write_seq++;
-		t=del_timer(&sk->timer);
-		if(t)
-			add_timer(&sk->timer);
-		else
-			reset_msl_timer(sk, TIME_CLOSE, TCP_TIMEWAIT_LEN);
-		return;
-	}
-	
-	/*
-	 *	We ought to check if the end of the queue is a buffer and
-	 *	if so simply add the fin to that buffer, not send it ahead.
-	 */
-
-	t1 =(struct tcphdr *)skb_put(buff,sizeof(struct tcphdr));
-	buff->dev = dev;
-	memcpy(t1, th, sizeof(*t1));
-	buff->seq = sk->write_seq;
-	sk->write_seq++;
-	buff->end_seq = sk->write_seq;
-	t1->seq = htonl(buff->seq);
-	t1->ack = 1;
-	t1->ack_seq = htonl(sk->acked_seq);
-	t1->window = htons(sk->window=tcp_select_window(sk));
-	t1->fin = 1;
-	t1->rst = 0;
-	t1->doff = sizeof(*t1)/4;
-	tcp_send_check(t1, sk->saddr, sk->daddr, sizeof(*t1), sk);
-
-	/*
-	 * If there is data in the write queue, the fin must be appended to
-	 * the write queue.
- 	 */
- 	
- 	if (skb_peek(&sk->write_queue) != NULL) 
- 	{
-  		buff->free = 0;
-		if (buff->next != NULL) 
-		{
-			printk("tcp_send_fin: next != NULL\n");
-			skb_unlink(buff);
-		}
-		skb_queue_tail(&sk->write_queue, buff);
-  	} 
-  	else 
-  	{
-        	sk->sent_seq = sk->write_seq;
-		sk->prot->queue_xmit(sk, dev, buff, 0);
-		reset_xmit_timer(sk, TIME_WRITE, sk->rto);
-	}
-}
-
-/*
  *	Shutdown the sending side of a connection. Much like close except
  *	that we don't receive shut down or set sk->dead=1.
  */
@@ -2829,374 +1788,249 @@
 	release_sock(sk);
 }
 
-/*
- *	This routine will send an RST to the other tcp. 
- */
- 
-static void tcp_reset(unsigned long saddr, unsigned long daddr, struct tcphdr *th,
-	  struct proto *prot, struct options *opt, struct device *dev, int tos, int ttl)
+static void tcp_close(struct sock *sk, int timeout)
 {
-	struct sk_buff *buff;
-	struct tcphdr *t1;
-	int tmp;
-	struct device *ndev=NULL;
-
-	/*
-	 *	Cannot reset a reset (Think about it).
-	 */
-	 
-	if(th->rst)
-		return;
-  
 	/*
-	 * We need to grab some memory, and put together an RST,
+	 * We need to grab some memory, and put together a FIN,	
 	 * and then put it into the queue to be sent.
 	 */
-
-	buff = sock_wmalloc(NULL, MAX_RESET_SIZE, 1, GFP_ATOMIC);
-	if (buff == NULL) 
-	  	return;
-
-	buff->sk = NULL;
-	buff->dev = dev;
-	buff->localroute = 0;
-
-	/*
-	 *	Put in the IP header and routing stuff. 
-	 */
-
-	tmp = prot->build_header(buff, saddr, daddr, &ndev, IPPROTO_TCP, opt,
-			   sizeof(struct tcphdr),tos,ttl,NULL);
-	if (tmp < 0) 
+	
+	sk->inuse = 1;
+	
+	tcp_cache_zap();
+	if(sk->state == TCP_LISTEN)
 	{
-  		buff->free = 1;
-		sock_wfree(NULL, buff);
+		/* Special case */
+		tcp_set_state(sk, TCP_CLOSE);
+		tcp_close_pending(sk);
+		release_sock(sk);
 		return;
 	}
+	
+	sk->keepopen = 1;
+	sk->shutdown = SHUTDOWN_MASK;
 
-	t1 =(struct tcphdr *)skb_put(buff,sizeof(struct tcphdr));
-	memcpy(t1, th, sizeof(*t1));
+	if (!sk->dead) 
+	  	sk->state_change(sk);
+
+	if (timeout == 0) 
+	{
+		struct sk_buff *skb;
+		
+		/*
+		 *  We need to flush the recv. buffs.  We do this only on the
+		 *  descriptor close, not protocol-sourced closes, because the
+		 *  reader process may not have drained the data yet!
+		 */
+		 
+		while((skb=skb_dequeue(&sk->receive_queue))!=NULL)
+			kfree_skb(skb, FREE_READ);
+		/*
+		 *	Get rid off any half-completed packets. 
+		 */
+
+		if (sk->partial) 
+			tcp_send_partial(sk);
+	}
 
+		
 	/*
-	 *	Swap the send and the receive. 
+	 *	Timeout is not the same thing - however the code likes
+	 *	to send both the same way (sigh).
 	 */
-
-	t1->dest = th->source;
-	t1->source = th->dest;
-	t1->rst = 1;  
-	t1->window = 0;
-  
-	if(th->ack)
+	 
+	if(timeout)
 	{
-		t1->ack = 0;
-	  	t1->seq = th->ack_seq;
-	  	t1->ack_seq = 0;
+		tcp_set_state(sk, TCP_CLOSE);	/* Dead */
 	}
 	else
 	{
-	  	t1->ack = 1;
-	  	if(!th->syn)
-			t1->ack_seq = th->seq;
-		else
-			t1->ack_seq = htonl(ntohl(th->seq)+1);
-		t1->seq = 0;
+		if(tcp_close_state(sk,1)==1)
+		{
+			tcp_send_fin(sk);
+		}
 	}
-
-	t1->syn = 0;
-	t1->urg = 0;
-	t1->fin = 0;
-	t1->psh = 0;
-	t1->doff = sizeof(*t1)/4;
-	tcp_send_check(t1, saddr, daddr, sizeof(*t1), NULL);
-	prot->queue_xmit(NULL, ndev, buff, 1);
-	tcp_statistics.TcpOutSegs++;
+	release_sock(sk);
 }
 
 
 /*
- *	Look for tcp options. Parses everything but only knows about MSS.
- *	This routine is always called with the packet containing the SYN.
- *	However it may also be called with the ack to the SYN.  So you
- *	can't assume this is always the SYN.  It's always called after
- *	we have set up sk->mtu to our own MTU.
- *
- *	We need at minimum to add PAWS support here. Possibly large windows
- *	as Linux gets deployed on 100Mb/sec networks.
+ *	This will accept the next outstanding connection. 
  */
  
-static void tcp_options(struct sock *sk, struct tcphdr *th)
+static struct sock *tcp_accept(struct sock *sk, int flags)
 {
-	unsigned char *ptr;
-	int length=(th->doff*4)-sizeof(struct tcphdr);
-	int mss_seen = 0;
-    
-	ptr = (unsigned char *)(th + 1);
+	struct sock *newsk;
+	struct sk_buff *skb;
   
-	while(length>0)
+  /*
+   * We need to make sure that this socket is listening,
+   * and that it has something pending.
+   */
+
+	if (sk->state != TCP_LISTEN) 
 	{
-	  	int opcode=*ptr++;
-	  	int opsize=*ptr++;
-	  	switch(opcode)
-	  	{
-	  		case TCPOPT_EOL:
-	  			return;
-	  		case TCPOPT_NOP:	/* Ref: RFC 793 section 3.1 */
-	  			length--;
-	  			ptr--;		/* the opsize=*ptr++ above was a mistake */
-	  			continue;
-	  		
-	  		default:
-	  			if(opsize<=2)	/* Avoid silly options looping forever */
-	  				return;
-	  			switch(opcode)
-	  			{
-	  				case TCPOPT_MSS:
-	  					if(opsize==4 && th->syn)
-	  					{
-	  						sk->mtu=min(sk->mtu,ntohs(*(unsigned short *)ptr));
-							mss_seen = 1;
-	  					}
-	  					break;
-		  				/* Add other options here as people feel the urge to implement stuff like large windows */
-	  			}
-	  			ptr+=opsize-2;
-	  			length-=opsize;
-	  	}
+		sk->err = EINVAL;
+		return(NULL); 
 	}
-	if (th->syn) 
+
+	/* Avoid the race. */
+	cli();
+	sk->inuse = 1;
+
+	while((skb = tcp_dequeue_established(sk)) == NULL) 
 	{
-		if (! mss_seen)
-		      sk->mtu=min(sk->mtu, 536);  /* default MSS if none sent */
-	}
-#ifdef CONFIG_INET_PCTCP
-	sk->mss = min(sk->max_window >> 1, sk->mtu);
-#else    
-	sk->mss = min(sk->max_window, sk->mtu);
-	sk->max_unacked = 2 * sk->mss;
-#endif  
-}
+		if (flags & O_NONBLOCK) 
+		{
+			sti();
+			release_sock(sk);
+			sk->err = EAGAIN;
+			return(NULL);
+		}
 
-static inline unsigned long default_mask(unsigned long dst)
-{
-	dst = ntohl(dst);
-	if (IN_CLASSA(dst))
-		return htonl(IN_CLASSA_NET);
-	if (IN_CLASSB(dst))
-		return htonl(IN_CLASSB_NET);
-	return htonl(IN_CLASSC_NET);
-}
+		release_sock(sk);
+		interruptible_sleep_on(sk->sleep);
+		if (current->signal & ~current->blocked) 
+		{
+			sti();
+			sk->err = ERESTARTSYS;
+			return(NULL);
+		}
+		sk->inuse = 1;
+  	}
+	sti();
 
-/*
- *	Default sequence number picking algorithm.
- *	As close as possible to RFC 793, which
- *	suggests using a 250kHz clock.
- *	Further reading shows this assumes 2MB/s networks.
- *	For 10MB/s ethernet, a 1MHz clock is appropriate.
- *	That's funny, Linux has one built in!  Use it!
- */
+	/*
+	 *	Now all we need to do is return skb->sk. 
+	 */
 
-extern inline u32 tcp_init_seq(void)
-{
-	struct timeval tv;
-	do_gettimeofday(&tv);
-	return tv.tv_usec+tv.tv_sec*1000000;
+	newsk = skb->sk;
+
+	kfree_skb(skb, FREE_READ);
+	sk->ack_backlog--;
+	release_sock(sk);
+	return(newsk);
 }
 
 /*
- *	This routine handles a connection request.
- *	It should make sure we haven't already responded.
- *	Because of the way BSD works, we have to send a syn/ack now.
- *	This also means it will be harder to close a socket which is
- *	listening.
+ *	This will initiate an outgoing connection. 
  */
  
-static void tcp_conn_request(struct sock *sk, struct sk_buff *skb,
-		 unsigned long daddr, unsigned long saddr,
-		 struct options *opt, struct device *dev, u32 seq)
+static int tcp_connect(struct sock *sk, struct sockaddr_in *usin, int addr_len)
 {
 	struct sk_buff *buff;
-	struct tcphdr *t1;
+	struct device *dev=NULL;
 	unsigned char *ptr;
-	struct sock *newsk;
-	struct tcphdr *th;
-	struct device *ndev=NULL;
 	int tmp;
+	int atype;
+	struct tcphdr *t1;
 	struct rtable *rt;
-  
-	th = skb->h.th;
 
-	/* If the socket is dead, don't accept the connection. */
-	if (!sk->dead) 
-	{
-  		sk->data_ready(sk,0);
-	}
-	else 
-	{
-		if(sk->debug)
-			printk("Reset on %p: Connect on dead socket.\n",sk);
-		tcp_reset(daddr, saddr, th, sk->prot, opt, dev, sk->ip_tos,sk->ip_ttl);
-		tcp_statistics.TcpAttemptFails++;
-		kfree_skb(skb, FREE_READ);
-		return;
-	}
+	if (sk->state != TCP_CLOSE) 
+		return(-EISCONN);
 
 	/*
-	 * Make sure we can accept more.  This will prevent a
-	 * flurry of syns from eating up all our memory.
+	 *	Don't allow a double connect.
 	 */
+	 	
+	if(sk->daddr)
+		return -EINVAL;
+	
+	if (addr_len < 8) 
+		return(-EINVAL);
 
-	if (sk->ack_backlog >= sk->max_ack_backlog) 
-	{
-		tcp_statistics.TcpAttemptFails++;
-		kfree_skb(skb, FREE_READ);
-		return;
-	}
+	if (usin->sin_family && usin->sin_family != AF_INET) 
+		return(-EAFNOSUPPORT);
 
+  	/*
+  	 *	connect() to INADDR_ANY means loopback (BSD'ism).
+  	 */
+  	
+  	if(usin->sin_addr.s_addr==INADDR_ANY)
+		usin->sin_addr.s_addr=ip_my_addr();
+		  
 	/*
-	 * We need to build a new sock struct.
-	 * It is sort of bad to have a socket without an inode attached
-	 * to it, but the wake_up's will just wake up the listening socket,
-	 * and if the listening socket is destroyed before this is taken
-	 * off of the queue, this will take care of it.
+	 *	Don't want a TCP connection going to a broadcast address 
 	 */
 
-	newsk = (struct sock *) kmalloc(sizeof(struct sock), GFP_ATOMIC);
-	if (newsk == NULL) 
-	{
-		/* just ignore the syn.  It will get retransmitted. */
-		tcp_statistics.TcpAttemptFails++;
-		kfree_skb(skb, FREE_READ);
-		return;
-	}
+	if ((atype=ip_chk_addr(usin->sin_addr.s_addr)) == IS_BROADCAST || atype==IS_MULTICAST) 
+		return -ENETUNREACH;
+  
+	sk->inuse = 1;
+	sk->daddr = usin->sin_addr.s_addr;
+	sk->write_seq = tcp_init_seq();
+	sk->window_seq = sk->write_seq;
+	sk->rcv_ack_seq = sk->write_seq -1;
+	sk->err = 0;
+	sk->dummy_th.dest = usin->sin_port;
+	release_sock(sk);
 
-	memcpy(newsk, sk, sizeof(*newsk));
-	newsk->opt = NULL;
-	newsk->ip_route_cache  = NULL;
-	if (opt && opt->optlen) {
-	  sk->opt = (struct options*)kmalloc(sizeof(struct options)+opt->optlen, GFP_ATOMIC);
-	  if (!sk->opt) {
-	        kfree_s(newsk, sizeof(struct sock));
-		tcp_statistics.TcpAttemptFails++;
-		kfree_skb(skb, FREE_READ);
-		return;
-	  }
-	  if (ip_options_echo(sk->opt, opt, daddr, saddr, skb)) {
-		kfree_s(sk->opt, sizeof(struct options)+opt->optlen);
-	        kfree_s(newsk, sizeof(struct sock));
-		tcp_statistics.TcpAttemptFails++;
-		kfree_skb(skb, FREE_READ);
-		return;
-	  }
+	buff = sock_wmalloc(sk,MAX_SYN_SIZE,0, GFP_KERNEL);
+	if (buff == NULL) 
+	{
+		return(-ENOMEM);
 	}
-	skb_queue_head_init(&newsk->write_queue);
-	skb_queue_head_init(&newsk->receive_queue);
-	newsk->send_head = NULL;
-	newsk->send_tail = NULL;
-	skb_queue_head_init(&newsk->back_log);
-	newsk->rtt = 0;		/*TCP_CONNECT_TIME<<3*/
-	newsk->rto = TCP_TIMEOUT_INIT;
-	newsk->mdev = 0;
-	newsk->max_window = 0;
-	newsk->cong_window = 1;
-	newsk->cong_count = 0;
-	newsk->ssthresh = 0;
-	newsk->backoff = 0;
-	newsk->blog = 0;
-	newsk->intr = 0;
-	newsk->proc = 0;
-	newsk->done = 0;
-	newsk->partial = NULL;
-	newsk->pair = NULL;
-	newsk->wmem_alloc = 0;
-	newsk->rmem_alloc = 0;
-	newsk->localroute = sk->localroute;
-
-	newsk->max_unacked = MAX_WINDOW - TCP_WINDOW_DIFF;
-
-	newsk->err = 0;
-	newsk->shutdown = 0;
-	newsk->ack_backlog = 0;
-	newsk->acked_seq = skb->seq+1;
-	newsk->lastwin_seq = skb->seq+1;
-	newsk->delay_acks = 1;
-	newsk->copied_seq = skb->seq+1;
-	newsk->fin_seq = skb->seq;
-	newsk->state = TCP_SYN_RECV;
-	newsk->timeout = 0;
-	newsk->ip_xmit_timeout = 0;
-	newsk->write_seq = seq; 
-	newsk->window_seq = newsk->write_seq;
-	newsk->rcv_ack_seq = newsk->write_seq;
-	newsk->urg_data = 0;
-	newsk->retransmits = 0;
-	newsk->linger=0;
-	newsk->destroy = 0;
-	init_timer(&newsk->timer);
-	newsk->timer.data = (unsigned long)newsk;
-	newsk->timer.function = &net_timer;
-	init_timer(&newsk->retransmit_timer);
-	newsk->retransmit_timer.data = (unsigned long)newsk;
-	newsk->retransmit_timer.function=&retransmit_timer;
-	newsk->dummy_th.source = skb->h.th->dest;
-	newsk->dummy_th.dest = skb->h.th->source;
+	sk->inuse = 1;
+	buff->sk = sk;
+	buff->free = 0;
+	buff->localroute = sk->localroute;
 	
-	/*
-	 *	Swap these two, they are from our point of view. 
-	 */
-	 
-	newsk->daddr = saddr;
-	newsk->saddr = daddr;
-	newsk->rcv_saddr = daddr;
-
-	put_sock(newsk->num,newsk);
-	newsk->dummy_th.res1 = 0;
-	newsk->dummy_th.doff = 6;
-	newsk->dummy_th.fin = 0;
-	newsk->dummy_th.syn = 0;
-	newsk->dummy_th.rst = 0;	
-	newsk->dummy_th.psh = 0;
-	newsk->dummy_th.ack = 0;
-	newsk->dummy_th.urg = 0;
-	newsk->dummy_th.res2 = 0;
-	newsk->acked_seq = skb->seq + 1;
-	newsk->copied_seq = skb->seq + 1;
-	newsk->socket = NULL;
-
-	/*
-	 *	Grab the ttl and tos values and use them 
-	 */
-
-	newsk->ip_ttl=sk->ip_ttl;
-	newsk->ip_tos=skb->ip_hdr->tos;
 
 	/*
-	 *	Use 512 or whatever user asked for 
+	 *	Put in the IP header and routing stuff.
 	 */
+	 
+	tmp = sk->prot->build_header(buff, sk->saddr, sk->daddr, &dev,
+		IPPROTO_TCP, NULL, MAX_SYN_SIZE,sk->ip_tos,sk->ip_ttl,&sk->ip_route_cache);
+	if (tmp < 0) 
+	{
+		sock_wfree(sk, buff);
+		release_sock(sk);
+		return(-ENETUNREACH);
+	}
+	if ((rt = sk->ip_route_cache) != NULL && !sk->saddr)
+		sk->saddr = rt->rt_src;
+	sk->rcv_saddr = sk->saddr;
 
-	/*
-	 * 	Note use of sk->user_mss, since user has no direct access to newsk 
-	 */
+	t1 = (struct tcphdr *) skb_put(buff,sizeof(struct tcphdr));
 
-	rt = ip_rt_route(newsk->opt && newsk->opt->srr ? newsk->opt->faddr : saddr, 0);
-	newsk->ip_route_cache = rt;
+	memcpy(t1,(void *)&(sk->dummy_th), sizeof(*t1));
+	buff->seq = sk->write_seq++;
+	t1->seq = htonl(buff->seq);
+	sk->sent_seq = sk->write_seq;
+	buff->end_seq = sk->write_seq;
+	t1->ack = 0;
+	t1->window = 2;
+	t1->res1=0;
+	t1->res2=0;
+	t1->rst = 0;
+	t1->urg = 0;
+	t1->psh = 0;
+	t1->syn = 1;
+	t1->urg_ptr = 0;
+	t1->doff = 6;
+	/* use 512 or whatever user asked for */
 	
 	if(rt!=NULL && (rt->rt_flags&RTF_WINDOW))
-		newsk->window_clamp = rt->rt_window;
+		sk->window_clamp=rt->rt_window;
 	else
-		newsk->window_clamp = 0;
-		
+		sk->window_clamp=0;
+
 	if (sk->user_mss)
-		newsk->mtu = sk->user_mss;
+		sk->mtu = sk->user_mss;
 	else if (rt)
-		newsk->mtu = rt->rt_mtu - sizeof(struct iphdr) - sizeof(struct tcphdr);
+		sk->mtu = rt->rt_mtu - sizeof(struct iphdr) - sizeof(struct tcphdr);
 	else 
-		newsk->mtu = 576 - sizeof(struct iphdr) - sizeof(struct tcphdr);
+		sk->mtu = 576 - sizeof(struct iphdr) - sizeof(struct tcphdr);
 
 	/*
-	 *	But not bigger than device MTU 
+	 *	but not bigger than device MTU 
 	 */
 
-	newsk->mtu = min(newsk->mtu, dev->mtu - sizeof(struct iphdr) - sizeof(struct tcphdr));
+	if(sk->mtu <32)
+		sk->mtu = 32;	/* Sanity limit */
+		
+	sk->mtu = min(sk->mtu, dev->mtu - sizeof(struct iphdr) - sizeof(struct tcphdr));
 
 #ifdef CONFIG_SKIP
 	
@@ -3212,2328 +2046,42 @@
 	if(skip_pick_mtu!=NULL)		/* If SKIP is loaded.. */
 		sk->mtu=skip_pick_mtu(sk->mtu,dev);
 #endif
+	
 	/*
-	 *	This will min with what arrived in the packet 
+	 *	Put in the TCP options to say MTU. 
 	 */
 
-	tcp_options(newsk,skb->h.th);
-	
-	tcp_cache_zap();
-
-	buff = sock_wmalloc(newsk, MAX_SYN_SIZE, 1, GFP_ATOMIC);
-	if (buff == NULL) 
-	{
-		sk->err = ENOMEM;
-		newsk->dead = 1;
-		newsk->state = TCP_CLOSE;
-		/* And this will destroy it */
-		release_sock(newsk);
-		kfree_skb(skb, FREE_READ);
-		tcp_statistics.TcpAttemptFails++;
-		return;
-	}
-  
-	buff->sk = newsk;
-	buff->localroute = newsk->localroute;
-
-	/*
-	 *	Put in the IP header and routing stuff. 
-	 */
-
-	tmp = sk->prot->build_header(buff, newsk->saddr, newsk->daddr, &ndev,
-			       IPPROTO_TCP, NULL, MAX_SYN_SIZE,sk->ip_tos,sk->ip_ttl,&newsk->ip_route_cache);
-
-	/*
-	 *	Something went wrong. 
-	 */
-
-	if (tmp < 0) 
-	{
-		sk->err = tmp;
-		buff->free = 1;
-		kfree_skb(buff,FREE_WRITE);
-		newsk->dead = 1;
-		newsk->state = TCP_CLOSE;
-		release_sock(newsk);
-		skb->sk = sk;
-		kfree_skb(skb, FREE_READ);
-		tcp_statistics.TcpAttemptFails++;
-		return;
-	}
-
-	t1 =(struct tcphdr *)skb_put(buff,sizeof(struct tcphdr));
-  
-	memcpy(t1, skb->h.th, sizeof(*t1));
-	buff->seq = newsk->write_seq++;
-	buff->end_seq = newsk->write_seq;
-	/*
-	 *	Swap the send and the receive. 
-	 */
-	t1->dest = skb->h.th->source;
-	t1->source = newsk->dummy_th.source;
-	t1->seq = ntohl(buff->seq);
-	t1->ack = 1;
-	newsk->sent_seq = newsk->write_seq;
-	t1->window = ntohs(tcp_select_window(newsk));
-	t1->res1 = 0;
-	t1->res2 = 0;
-	t1->rst = 0;
-	t1->urg = 0;
-	t1->psh = 0;
-	t1->syn = 1;
-	t1->ack_seq = htonl(newsk->acked_seq);
-	t1->doff = sizeof(*t1)/4+1;
 	ptr = skb_put(buff,4);
 	ptr[0] = 2;
 	ptr[1] = 4;
-	ptr[2] = ((newsk->mtu) >> 8) & 0xff;
-	ptr[3] =(newsk->mtu) & 0xff;
-
-	tcp_send_check(t1, daddr, saddr, sizeof(*t1)+4, newsk);
-	newsk->prot->queue_xmit(newsk, ndev, buff, 0);
-	reset_xmit_timer(newsk, TIME_WRITE , TCP_TIMEOUT_INIT);
-	skb->sk = newsk;
-
-	/*
-	 *	Charge the sock_buff to newsk. 
-	 */
-	 
-	sk->rmem_alloc -= skb->truesize;
-	newsk->rmem_alloc += skb->truesize;
-	
-	skb_queue_tail(&sk->receive_queue,skb);
-	sk->ack_backlog++;
-	release_sock(newsk);
-	tcp_statistics.TcpOutSegs++;
-}
-
+	ptr[2] = (sk->mtu) >> 8;
+	ptr[3] = (sk->mtu) & 0xff;
+	tcp_send_check(t1, sk->saddr, sk->daddr,
+		  sizeof(struct tcphdr) + 4, sk);
 
-static void tcp_close(struct sock *sk, int timeout)
-{
 	/*
-	 * We need to grab some memory, and put together a FIN,	
-	 * and then put it into the queue to be sent.
+	 *	This must go first otherwise a really quick response will get reset. 
 	 */
-	
-	sk->inuse = 1;
-	
-	if(th_cache_sk==sk)
-		tcp_cache_zap();
-	if(sk->state == TCP_LISTEN)
-	{
-		/* Special case */
-		tcp_set_state(sk, TCP_CLOSE);
-		tcp_close_pending(sk);
-		release_sock(sk);
-		return;
-	}
-	
-	sk->keepopen = 1;
-	sk->shutdown = SHUTDOWN_MASK;
-
-	if (!sk->dead) 
-	  	sk->state_change(sk);
 
-	if (timeout == 0) 
-	{
-		struct sk_buff *skb;
-		
-		/*
-		 *  We need to flush the recv. buffs.  We do this only on the
-		 *  descriptor close, not protocol-sourced closes, because the
-		 *  reader process may not have drained the data yet!
-		 */
-		 
-		while((skb=skb_dequeue(&sk->receive_queue))!=NULL)
-			kfree_skb(skb, FREE_READ);
-		/*
-		 *	Get rid off any half-completed packets. 
-		 */
-
-		if (sk->partial) 
-			tcp_send_partial(sk);
-	}
-
-		
-	/*
-	 *	Timeout is not the same thing - however the code likes
-	 *	to send both the same way (sigh).
-	 */
-	 
-	if(timeout)
-	{
-		tcp_set_state(sk, TCP_CLOSE);	/* Dead */
-	}
+	tcp_cache_zap();
+	tcp_set_state(sk,TCP_SYN_SENT);
+	if(rt&&rt->rt_flags&RTF_IRTT)
+		sk->rto = rt->rt_irtt;
 	else
-	{
-		if(tcp_close_state(sk,1)==1)
-		{
-			tcp_send_fin(sk);
-		}
-	}
-	release_sock(sk);
-}
-
-
-/*
- * 	This routine takes stuff off of the write queue,
- *	and puts it in the xmit queue. This happens as incoming acks
- *	open up the remote window for us.
- */
- 
-static void tcp_write_xmit(struct sock *sk)
-{
-	struct sk_buff *skb;
-
-	/*
-	 *	The bytes will have to remain here. In time closedown will
-	 *	empty the write queue and all will be happy 
-	 */
-
-	if(sk->zapped)
-		return;
-
-	/*
-	 *	Anything on the transmit queue that fits the window can
-	 *	be added providing we are not
-	 *
-	 *	a) retransmitting (Nagle's rule)
-	 *	b) exceeding our congestion window.
-	 */
-	 
-	while((skb = skb_peek(&sk->write_queue)) != NULL &&
-		before(skb->end_seq, sk->window_seq + 1) &&
-		(sk->retransmits == 0 ||
-		 sk->ip_xmit_timeout != TIME_WRITE ||
-		 before(skb->end_seq, sk->rcv_ack_seq + 1))
-		&& sk->packets_out < sk->cong_window) 
-	{
-		IS_SKB(skb);
-		skb_unlink(skb);
-		
-		/*
-		 *	See if we really need to send the packet. 
-		 */
-		 
-		if (before(skb->end_seq, sk->rcv_ack_seq +1)) 
-		{
-			/*
-			 *	This is acked data. We can discard it. This 
-			 *	cannot currently occur.
-			 */
-			 
-			sk->retransmits = 0;
-			kfree_skb(skb, FREE_WRITE);
-			if (!sk->dead) 
-				sk->write_space(sk);
-		} 
-		else
-		{
-			struct tcphdr *th;
-			struct iphdr *iph;
-			int size;
-/*
- * put in the ack seq and window at this point rather than earlier,
- * in order to keep them monotonic.  We really want to avoid taking
- * back window allocations.  That's legal, but RFC1122 says it's frowned on.
- * Ack and window will in general have changed since this packet was put
- * on the write queue.
- */
-			iph = skb->ip_hdr;
-			th = (struct tcphdr *)(((char *)iph) +(iph->ihl << 2));
-			size = skb->len - (((unsigned char *) th) - skb->data);
-#ifndef CONFIG_NO_PATH_MTU_DISCOVERY
-			if (size > sk->mtu - sizeof(struct iphdr))
-			{
-				iph->frag_off &= ~htons(IP_DF);
-				ip_send_check(iph);
-			}
-#endif
-			
-			th->ack_seq = htonl(sk->acked_seq);
-			th->window = htons(tcp_select_window(sk));
-
-			tcp_send_check(th, sk->saddr, sk->daddr, size, sk);
-
-			sk->sent_seq = skb->end_seq;
-			
-			/*
-			 *	IP manages our queue for some crazy reason
-			 */
-			 
-			sk->prot->queue_xmit(sk, skb->dev, skb, skb->free);
-			
-			
-			sk->ack_backlog = 0;
-			sk->bytes_rcv = 0;
-
-			/*
-			 *	Again we slide the timer wrongly
-			 */
-			 
-			reset_xmit_timer(sk, TIME_WRITE, sk->rto);
-		}
-	}
-}
-
-
-/*
- *	This routine deals with incoming acks, but not outgoing ones.
- */
-
-extern __inline__ int tcp_ack(struct sock *sk, struct tcphdr *th, unsigned long saddr, int len)
-{
-	u32 ack;
-	int flag = 0;
-
-	/* 
-	 * 1 - there was data in packet as well as ack or new data is sent or 
-	 *     in shutdown state
-	 * 2 - data from retransmit queue was acked and removed
-	 * 4 - window shrunk or data from retransmit queue was acked and removed
-	 */
-
-	if(sk->zapped)
-		return(1);	/* Dead, cant ack any more so why bother */
-
-	/*
-	 *	Have we discovered a larger window
-	 */
-	 
-	ack = ntohl(th->ack_seq);
-
-	if (ntohs(th->window) > sk->max_window) 
-	{
-  		sk->max_window = ntohs(th->window);
-#ifdef CONFIG_INET_PCTCP
-		/* Hack because we don't send partial packets to non SWS
-		   handling hosts */
-		sk->mss = min(sk->max_window>>1, sk->mtu);
-#else
-		sk->mss = min(sk->max_window, sk->mtu);
-#endif	
-	}
-
-	/*
-	 *	We have dropped back to keepalive timeouts. Thus we have
-	 *	no retransmits pending.
-	 */
-	 
-	if (sk->retransmits && sk->ip_xmit_timeout == TIME_KEEPOPEN)
-	  	sk->retransmits = 0;
-
-	/*
-	 *	If the ack is newer than sent or older than previous acks
-	 *	then we can probably ignore it.
-	 */
-	 
-	if (after(ack, sk->sent_seq) || before(ack, sk->rcv_ack_seq)) 
-	{
-		if(sk->debug)
-			printk("Ack ignored %u %u\n",ack,sk->sent_seq);
-			
-		/*
-		 *	Keepalive processing.
-		 */
-		 
-		if (after(ack, sk->sent_seq)) 
-		{
-			return(0);
-		}
-		
-		/*
-		 *	Restart the keepalive timer.
-		 */
-		 
-		if (sk->keepopen) 
-		{
-			if(sk->ip_xmit_timeout==TIME_KEEPOPEN)
-				reset_xmit_timer(sk, TIME_KEEPOPEN, TCP_TIMEOUT_LEN);
-		}
-		return(1);
-	}
-
-	/*
-	 *	If there is data set flag 1
-	 */
-	 
-	if (len != th->doff*4) 
-		flag |= 1;
-
-	/*
-	 *	See if our window has been shrunk. 
-	 */
-
-	if (after(sk->window_seq, ack+ntohs(th->window))) 
-	{
-		/*
-		 * We may need to move packets from the send queue
-		 * to the write queue, if the window has been shrunk on us.
-		 * The RFC says you are not allowed to shrink your window
-		 * like this, but if the other end does, you must be able
-		 * to deal with it.
-		 */
-		struct sk_buff *skb;
-		struct sk_buff *skb2;
-		struct sk_buff *wskb = NULL;
-  	
-		skb2 = sk->send_head;
-		sk->send_head = NULL;
-		sk->send_tail = NULL;
-	
-		/*
-		 *	This is an artifact of a flawed concept. We want one
-		 *	queue and a smarter send routine when we send all.
-		 */
-	
-		flag |= 4;	/* Window changed */
-	
-		sk->window_seq = ack + ntohs(th->window);
-		cli();
-		while (skb2 != NULL) 
-		{
-			skb = skb2;
-			skb2 = skb->link3;
-			skb->link3 = NULL;
-			if (after(skb->end_seq, sk->window_seq)) 
-			{
-				if (sk->packets_out > 0) 
-					sk->packets_out--;
-				/* We may need to remove this from the dev send list. */
-				if (skb->next != NULL) 
-				{
-					skb_unlink(skb);				
-				}
-				/* Now add it to the write_queue. */
-				if (wskb == NULL)
-					skb_queue_head(&sk->write_queue,skb);
-				else
-					skb_append(wskb,skb);
-				wskb = skb;
-			} 
-			else 
-			{
-				if (sk->send_head == NULL) 
-				{
-					sk->send_head = skb;
-					sk->send_tail = skb;
-				}
-				else
-				{
-					sk->send_tail->link3 = skb;
-					sk->send_tail = skb;
-				}
-				skb->link3 = NULL;
-			}
-		}
-		sti();
-	}
-
-	/*
-	 *	Pipe has emptied
-	 */
-	 
-	if (sk->send_tail == NULL || sk->send_head == NULL) 
-	{
-		sk->send_head = NULL;
-		sk->send_tail = NULL;
-		sk->packets_out= 0;
-	}
-
-	/*
-	 *	Update the right hand window edge of the host
-	 */
-	 
-	sk->window_seq = ack + ntohs(th->window);
+		sk->rto = TCP_TIMEOUT_INIT;
+	sk->retransmit_timer.function=&tcp_retransmit_timer;
+	sk->retransmit_timer.data = (unsigned long)sk;
+	tcp_reset_xmit_timer(sk, TIME_WRITE, sk->rto);	/* Timer for repeating the SYN until an answer  */
+	sk->retransmits = 0;				/* Now works the right way instead of a hacked 
+											initial setting */
 
-	/*
-	 *	We don't want too many packets out there. 
-	 */
-	 
-	if (sk->ip_xmit_timeout == TIME_WRITE && 
-		sk->cong_window < 2048 && after(ack, sk->rcv_ack_seq)) 
-	{
-		/* 
-		 * This is Jacobson's slow start and congestion avoidance. 
-		 * SIGCOMM '88, p. 328.  Because we keep cong_window in integral
-		 * mss's, we can't do cwnd += 1 / cwnd.  Instead, maintain a 
-		 * counter and increment it once every cwnd times.  It's possible
-		 * that this should be done only if sk->retransmits == 0.  I'm
-		 * interpreting "new data is acked" as including data that has
-		 * been retransmitted but is just now being acked.
-		 */
-		if (sk->cong_window < sk->ssthresh)  
-			/* 
-			 *	In "safe" area, increase
-			 */
-			sk->cong_window++;
-		else 
-		{
-			/*
-			 *	In dangerous area, increase slowly.  In theory this is
-			 *  	sk->cong_window += 1 / sk->cong_window
-			 */
-			if (sk->cong_count >= sk->cong_window) 
-			{
-				sk->cong_window++;
-				sk->cong_count = 0;
-			}
-			else 
-				sk->cong_count++;
-		}
-	}
-
-	/*
-	 *	Remember the highest ack received.
-	 */
-	 
-	sk->rcv_ack_seq = ack;
-	
-	/*
-	 *	We passed data and got it acked, remove any soft error
-	 *	log. Something worked...
-	 */
-	 
-	sk->err_soft = 0;
-
-	/*
-	 *	If this ack opens up a zero window, clear backoff.  It was
-	 *	being used to time the probes, and is probably far higher than
-	 *	it needs to be for normal retransmission.
-	 */
-
-	if (sk->ip_xmit_timeout == TIME_PROBE0) 
-	{
-		sk->retransmits = 0;	/* Our probe was answered */
-		
-		/*
-		 *	Was it a usable window open ?
-		 */
-		 
-  		if (skb_peek(&sk->write_queue) != NULL &&   /* should always be non-null */
-		    ! before (sk->window_seq, sk->write_queue.next->end_seq)) 
-		{
-			sk->backoff = 0;
-			
-			/*
-			 *	Recompute rto from rtt.  this eliminates any backoff.
-			 */
-
-			sk->rto = ((sk->rtt >> 2) + sk->mdev) >> 1;
-			if (sk->rto > 120*HZ)
-				sk->rto = 120*HZ;
-			if (sk->rto < HZ/5)	/* Was 1*HZ, then 1 - turns out we must allow about
-						   .2 of a second because of BSD delayed acks - on a 100Mb/sec link
-						   .2 of a second is going to need huge windows (SIGH) */
-			sk->rto = HZ/5;
-		}
-	}
-
-	/* 
-	 *	See if we can take anything off of the retransmit queue.
-	 */
-   
-	while(sk->send_head != NULL) 
-	{
-		/* Check for a bug. */
-		if (sk->send_head->link3 &&
-		    after(sk->send_head->end_seq, sk->send_head->link3->end_seq)) 
-			printk("INET: tcp.c: *** bug send_list out of order.\n");
-			
-		/*
-		 *	If our packet is before the ack sequence we can
-		 *	discard it as it's confirmed to have arrived the other end.
-		 */
-		 
-		if (before(sk->send_head->end_seq, ack+1)) 
-		{
-			struct sk_buff *oskb;	
-			if (sk->retransmits) 
-			{	
-				/*
-				 *	We were retransmitting.  don't count this in RTT est 
-				 */
-				flag |= 2;
-
-				/*
-				 * even though we've gotten an ack, we're still
-				 * retransmitting as long as we're sending from
-				 * the retransmit queue.  Keeping retransmits non-zero
-				 * prevents us from getting new data interspersed with
-				 * retransmissions.
-				 */
-
-				if (sk->send_head->link3)	/* Any more queued retransmits? */
-					sk->retransmits = 1;
-				else
-					sk->retransmits = 0;
-			}
-  			/*
-			 * Note that we only reset backoff and rto in the
-			 * rtt recomputation code.  And that doesn't happen
-			 * if there were retransmissions in effect.  So the
-			 * first new packet after the retransmissions is
-			 * sent with the backoff still in effect.  Not until
-			 * we get an ack from a non-retransmitted packet do
-			 * we reset the backoff and rto.  This allows us to deal
-			 * with a situation where the network delay has increased
-			 * suddenly.  I.e. Karn's algorithm. (SIGCOMM '87, p5.)
-			 */
-
-			/*
-			 *	We have one less packet out there. 
-			 */
-			 
-			if (sk->packets_out > 0) 
-				sk->packets_out --;
-			/* 
-			 *	Wake up the process, it can probably write more. 
-			 */
-			if (!sk->dead) 
-				sk->write_space(sk);
-			oskb = sk->send_head;
-
-			if (!(flag&2)) 	/* Not retransmitting */
-			{
-				long m;
-	
-				/*
-				 *	The following amusing code comes from Jacobson's
-				 *	article in SIGCOMM '88.  Note that rtt and mdev
-				 *	are scaled versions of rtt and mean deviation.
-				 *	This is designed to be as fast as possible 
-				 *	m stands for "measurement".
-				 */
-	
-				m = jiffies - oskb->when;  /* RTT */
-				if(m<=0)
-					m=1;		/* IS THIS RIGHT FOR <0 ??? */
-				m -= (sk->rtt >> 3);    /* m is now error in rtt est */
-				sk->rtt += m;           /* rtt = 7/8 rtt + 1/8 new */
-				if (m < 0)
-					m = -m;		/* m is now abs(error) */
-				m -= (sk->mdev >> 2);   /* similar update on mdev */
-				sk->mdev += m;	    	/* mdev = 3/4 mdev + 1/4 new */
-	
-				/*
-				 *	Now update timeout.  Note that this removes any backoff.
-				 */
-			 
-				sk->rto = ((sk->rtt >> 2) + sk->mdev) >> 1;
-				if (sk->rto > 120*HZ)
-					sk->rto = 120*HZ;
-				if (sk->rto < HZ/5)	/* Was 1*HZ - keep .2 as minimum cos of the BSD delayed acks */
-					sk->rto = HZ/5;
-				sk->backoff = 0;
-			}
-			flag |= (2|4);	/* 2 is really more like 'don't adjust the rtt 
-					   In this case as we just set it up */
-			cli();
-			oskb = sk->send_head;
-			IS_SKB(oskb);
-			sk->send_head = oskb->link3;
-			if (sk->send_head == NULL) 
-			{
-				sk->send_tail = NULL;
-			}
-
-		/*
-		 *	We may need to remove this from the dev send list. 
-		 */
-
-			if (oskb->next)
-				skb_unlink(oskb);
-			sti();
-			kfree_skb(oskb, FREE_WRITE); /* write. */
-			if (!sk->dead) 
-				sk->write_space(sk);
-		}
-		else
-		{
-			break;
-		}
-	}
-
-	/*
-	 * XXX someone ought to look at this too.. at the moment, if skb_peek()
-	 * returns non-NULL, we complete ignore the timer stuff in the else
-	 * clause.  We ought to organize the code so that else clause can
-	 * (should) be executed regardless, possibly moving the PROBE timer
-	 * reset over.  The skb_peek() thing should only move stuff to the
-	 * write queue, NOT also manage the timer functions.
-	 */
-
-	/*
-	 * Maybe we can take some stuff off of the write queue,
-	 * and put it onto the xmit queue.
-	 */
-	if (skb_peek(&sk->write_queue) != NULL) 
-	{
-		if (after (sk->window_seq+1, sk->write_queue.next->end_seq) &&
-			(sk->retransmits == 0 || 
-			 sk->ip_xmit_timeout != TIME_WRITE ||
-			 before(sk->write_queue.next->end_seq, sk->rcv_ack_seq + 1))
-			&& sk->packets_out < sk->cong_window) 
-		{
-			/*
-			 *	Add more data to the send queue.
-			 */
-			flag |= 1;
-			tcp_write_xmit(sk);
-		}
-		else if (before(sk->window_seq, sk->write_queue.next->end_seq) &&
- 			sk->send_head == NULL &&
- 			sk->ack_backlog == 0 &&
- 			sk->state != TCP_TIME_WAIT) 
- 		{
- 			/*
- 			 *	Data to queue but no room.
- 			 */
- 			reset_xmit_timer(sk, TIME_PROBE0, sk->rto);
- 		}		
-	}
-	else
-	{
-		/*
-		 * from TIME_WAIT we stay in TIME_WAIT as long as we rx packets
-		 * from TCP_CLOSE we don't do anything
-		 *
-		 * from anything else, if there is write data (or fin) pending,
-		 * we use a TIME_WRITE timeout, else if keepalive we reset to
-		 * a KEEPALIVE timeout, else we delete the timer.
-		 *
-		 * We do not set flag for nominal write data, otherwise we may
-		 * force a state where we start to write itsy bitsy tidbits
-		 * of data.
-		 */
-
-		switch(sk->state) {
-		case TCP_TIME_WAIT:
-			/*
-			 * keep us in TIME_WAIT until we stop getting packets,
-			 * reset the timeout.
-			 */
-			reset_msl_timer(sk, TIME_CLOSE, TCP_TIMEWAIT_LEN);
-			break;
-		case TCP_CLOSE:
-			/*
-			 * don't touch the timer.
-			 */
-			break;
-		default:
-			/*
-			 * 	Must check send_head, write_queue, and ack_backlog
-			 * 	to determine which timeout to use.
-			 */
-			if (sk->send_head || skb_peek(&sk->write_queue) != NULL || sk->ack_backlog) {
-				reset_xmit_timer(sk, TIME_WRITE, sk->rto);
-			} else if (sk->keepopen) {
-				reset_xmit_timer(sk, TIME_KEEPOPEN, TCP_TIMEOUT_LEN);
-			} else {
-				del_timer(&sk->retransmit_timer);
-				sk->ip_xmit_timeout = 0;
-			}
-			break;
-		}
-	}
-
-	/*
-	 *	We have nothing queued but space to send. Send any partial
-	 *	packets immediately (end of Nagle rule application).
-	 */
-	 
-	if (sk->packets_out == 0 && sk->partial != NULL &&
-		skb_peek(&sk->write_queue) == NULL && sk->send_head == NULL) 
-	{
-		flag |= 1;
-		tcp_send_partial(sk);
-	}
-
-	/*
-	 * In the LAST_ACK case, the other end FIN'd us.  We then FIN'd them, and
-	 * we are now waiting for an acknowledge to our FIN.  The other end is
-	 * already in TIME_WAIT.
-	 *
-	 * Move to TCP_CLOSE on success.
-	 */
-
-	if (sk->state == TCP_LAST_ACK) 
-	{
-		if (!sk->dead)
-			sk->state_change(sk);
-		if(sk->debug)
-			printk("rcv_ack_seq: %X==%X, acked_seq: %X==%X\n",
-				sk->rcv_ack_seq,sk->write_seq,sk->acked_seq,sk->fin_seq);
-		if (sk->rcv_ack_seq == sk->write_seq /*&& sk->acked_seq == sk->fin_seq*/) 
-		{
-			flag |= 1;
-			sk->shutdown = SHUTDOWN_MASK;
-			tcp_set_state(sk,TCP_CLOSE);
-			return 1;
-		}
-	}
-
-	/*
-	 *	Incoming ACK to a FIN we sent in the case of our initiating the close.
-	 *
-	 *	Move to FIN_WAIT2 to await a FIN from the other end. Set
-	 *	SEND_SHUTDOWN but not RCV_SHUTDOWN as data can still be coming in.
-	 */
-
-	if (sk->state == TCP_FIN_WAIT1) 
-	{
-
-		if (!sk->dead) 
-			sk->state_change(sk);
-		if (sk->rcv_ack_seq == sk->write_seq) 
-		{
-			flag |= 1;
-			sk->shutdown |= SEND_SHUTDOWN;
-			tcp_set_state(sk, TCP_FIN_WAIT2);
-		}
-	}
-
-	/*
-	 *	Incoming ACK to a FIN we sent in the case of a simultaneous close.
-	 *
-	 *	Move to TIME_WAIT
-	 */
-
-	if (sk->state == TCP_CLOSING) 
-	{
-
-		if (!sk->dead) 
-			sk->state_change(sk);
-		if (sk->rcv_ack_seq == sk->write_seq) 
-		{
-			flag |= 1;
-			tcp_time_wait(sk);
-		}
-	}
-	
-	/*
-	 *	Final ack of a three way shake 
-	 */
-	 
-	if(sk->state==TCP_SYN_RECV)
-	{
-		tcp_set_state(sk, TCP_ESTABLISHED);
-		tcp_options(sk,th);
-		sk->dummy_th.dest=th->source;
-		sk->copied_seq = sk->acked_seq;
-		if(!sk->dead)
-			sk->state_change(sk);
-		if(sk->max_window==0)
-		{
-			sk->max_window=32;	/* Sanity check */
-			sk->mss=min(sk->max_window,sk->mtu);
-		}
-	}
-	
-	/*
-	 * I make no guarantees about the first clause in the following
-	 * test, i.e. "(!flag) || (flag&4)".  I'm not entirely sure under
-	 * what conditions "!flag" would be true.  However I think the rest
-	 * of the conditions would prevent that from causing any
-	 * unnecessary retransmission. 
-	 *   Clearly if the first packet has expired it should be 
-	 * retransmitted.  The other alternative, "flag&2 && retransmits", is
-	 * harder to explain:  You have to look carefully at how and when the
-	 * timer is set and with what timeout.  The most recent transmission always
-	 * sets the timer.  So in general if the most recent thing has timed
-	 * out, everything before it has as well.  So we want to go ahead and
-	 * retransmit some more.  If we didn't explicitly test for this
-	 * condition with "flag&2 && retransmits", chances are "when + rto < jiffies"
-	 * would not be true.  If you look at the pattern of timing, you can
-	 * show that rto is increased fast enough that the next packet would
-	 * almost never be retransmitted immediately.  Then you'd end up
-	 * waiting for a timeout to send each packet on the retransmission
-	 * queue.  With my implementation of the Karn sampling algorithm,
-	 * the timeout would double each time.  The net result is that it would
-	 * take a hideous amount of time to recover from a single dropped packet.
-	 * It's possible that there should also be a test for TIME_WRITE, but
-	 * I think as long as "send_head != NULL" and "retransmit" is on, we've
-	 * got to be in real retransmission mode.
-	 *   Note that tcp_do_retransmit is called with all==1.  Setting cong_window
-	 * back to 1 at the timeout will cause us to send 1, then 2, etc. packets.
-	 * As long as no further losses occur, this seems reasonable.
-	 */
-	
-	if (((!flag) || (flag&4)) && sk->send_head != NULL &&
-	       (((flag&2) && sk->retransmits) ||
-	       (sk->send_head->when + sk->rto < jiffies))) 
-	{
-		if(sk->send_head->when + sk->rto < jiffies)
-			tcp_retransmit(sk,0);	
-		else
-		{
-			tcp_do_retransmit(sk, 1);
-			reset_xmit_timer(sk, TIME_WRITE, sk->rto);
-		}
-	}
-
-	return(1);
-}
-
-
-/*
- * 	Process the FIN bit. This now behaves as it is supposed to work
- *	and the FIN takes effect when it is validly part of sequence
- *	space. Not before when we get holes.
- *
- *	If we are ESTABLISHED, a received fin moves us to CLOSE-WAIT
- *	(and thence onto LAST-ACK and finally, CLOSE, we never enter
- *	TIME-WAIT)
- *
- *	If we are in FINWAIT-1, a received FIN indicates simultaneous
- *	close and we go into CLOSING (and later onto TIME-WAIT)
- *
- *	If we are in FINWAIT-2, a received FIN moves us to TIME-WAIT.
- *
- */
- 
-static int tcp_fin(struct sk_buff *skb, struct sock *sk, struct tcphdr *th)
-{
-	sk->fin_seq = skb->end_seq;
-
-	if (!sk->dead) 
-	{
-		sk->state_change(sk);
-		sock_wake_async(sk->socket, 1);
-	}
-
-	switch(sk->state) 
-	{
-		case TCP_SYN_RECV:
-		case TCP_SYN_SENT:
-		case TCP_ESTABLISHED:
-			/*
-			 * move to CLOSE_WAIT, tcp_data() already handled
-			 * sending the ack.
-			 */
-			tcp_set_state(sk,TCP_CLOSE_WAIT);
-			if (th->rst)
-				sk->shutdown = SHUTDOWN_MASK;
-			break;
-
-		case TCP_CLOSE_WAIT:
-		case TCP_CLOSING:
-			/*
-			 * received a retransmission of the FIN, do
-			 * nothing.
-			 */
-			break;
-		case TCP_TIME_WAIT:
-			/*
-			 * received a retransmission of the FIN,
-			 * restart the TIME_WAIT timer.
-			 */
-			reset_msl_timer(sk, TIME_CLOSE, TCP_TIMEWAIT_LEN);
-			return(0);
-		case TCP_FIN_WAIT1:
-			/*
-			 * This case occurs when a simultaneous close
-			 * happens, we must ack the received FIN and
-			 * enter the CLOSING state.
-			 *
-			 * This causes a WRITE timeout, which will either
-			 * move on to TIME_WAIT when we timeout, or resend
-			 * the FIN properly (maybe we get rid of that annoying
-			 * FIN lost hang). The TIME_WRITE code is already correct
-			 * for handling this timeout.
-			 */
-
-			if(sk->ip_xmit_timeout != TIME_WRITE)
-				reset_xmit_timer(sk, TIME_WRITE, sk->rto);
-			tcp_set_state(sk,TCP_CLOSING);
-			break;
-		case TCP_FIN_WAIT2:
-			/*
-			 * received a FIN -- send ACK and enter TIME_WAIT
-			 */
-			reset_msl_timer(sk, TIME_CLOSE, TCP_TIMEWAIT_LEN);
-			sk->shutdown|=SHUTDOWN_MASK;
-			tcp_set_state(sk,TCP_TIME_WAIT);
-			break;
-		case TCP_CLOSE:
-			/*
-			 * already in CLOSE
-			 */
-			break;
-		default:
-			tcp_set_state(sk,TCP_LAST_ACK);
-	
-			/* Start the timers. */
-			reset_msl_timer(sk, TIME_CLOSE, TCP_TIMEWAIT_LEN);
-			return(0);
-	}
-
-	return(0);
-}
-
-
-
-/*
- *	This routine handles the data.  If there is room in the buffer,
- *	it will be have already been moved into it.  If there is no
- *	room, then we will just have to discard the packet.
- */
-
-extern /* __inline__ */ int tcp_data(struct sk_buff *skb, struct sock *sk, 
-	 unsigned long saddr, unsigned short len)
-{
-	struct sk_buff *skb1, *skb2;
-	struct tcphdr *th;
-	int dup_dumped=0;
-	u32 new_seq, shut_seq;
-
-	th = skb->h.th;
-	skb_pull(skb,th->doff*4);
-	skb_trim(skb,len-(th->doff*4));
-
-	/*
-	 *	The bytes in the receive read/assembly queue has increased. Needed for the
-	 *	low memory discard algorithm 
-	 */
-	   
-	sk->bytes_rcv += skb->len;
-	
-	if (skb->len == 0 && !th->fin) 
-	{
-		/* 
-		 *	Don't want to keep passing ack's back and forth. 
-		 *	(someone sent us dataless, boring frame)
-		 */
-		if (!th->ack)
-			tcp_send_ack(sk->sent_seq, sk->acked_seq,sk, th, saddr);
-		kfree_skb(skb, FREE_READ);
-		return(0);
-	}
-	
-	/*
-	 *	We no longer have anyone receiving data on this connection.
-	 */
-
-#ifndef TCP_DONT_RST_SHUTDOWN		 
-
-	if(sk->shutdown & RCV_SHUTDOWN)
-	{
-		/*
-		 *	FIXME: BSD has some magic to avoid sending resets to
-		 *	broken 4.2 BSD keepalives. Much to my surprise a few non
-		 *	BSD stacks still have broken keepalives so we want to
-		 *	cope with it.
-		 */
-
-		if(skb->len)	/* We don't care if it's just an ack or
-				   a keepalive/window probe */
-		{
-			new_seq = skb->seq + skb->len + th->syn;	/* Right edge of _data_ part of frame */
-			
-			/* Do this the way 4.4BSD treats it. Not what I'd
-			   regard as the meaning of the spec but it's what BSD
-			   does and clearly they know everything 8) */
-
-			/*
-			 *	This is valid because of two things
-			 *
-			 *	a) The way tcp_data behaves at the bottom.
-			 *	b) A fin takes effect when read not when received.
-			 */
-			 
-			shut_seq = sk->acked_seq+1;	/* Last byte */
-			
-			if(after(new_seq,shut_seq))
-			{
-				if(sk->debug)
-					printk("Data arrived on %p after close [Data right edge %X, Socket shut on %X] %d\n",
-						sk, new_seq, shut_seq, sk->blog);
-				if(sk->dead)
-				{
-					sk->acked_seq = new_seq + th->fin;
-					tcp_reset(sk->saddr, sk->daddr, skb->h.th,
-						sk->prot, NULL, skb->dev, sk->ip_tos, sk->ip_ttl);
-					tcp_statistics.TcpEstabResets++;
-					sk->err = EPIPE;
-					sk->error_report(sk);
-					sk->shutdown = SHUTDOWN_MASK;
-					tcp_set_state(sk,TCP_CLOSE);
-					kfree_skb(skb, FREE_READ);
-					return 0;
-				}
-			}
-		}
-	}
-
-#endif
-
-	/*
-	 * 	Now we have to walk the chain, and figure out where this one
-	 * 	goes into it.  This is set up so that the last packet we received
-	 * 	will be the first one we look at, that way if everything comes
-	 * 	in order, there will be no performance loss, and if they come
-	 * 	out of order we will be able to fit things in nicely.
-	 *
-	 *	[AC: This is wrong. We should assume in order first and then walk
-	 *	 forwards from the first hole based upon real traffic patterns.]
-	 *	
-	 */
-
-	if (skb_peek(&sk->receive_queue) == NULL) 	/* Empty queue is easy case */
-	{
-		skb_queue_head(&sk->receive_queue,skb);
-		skb1= NULL;
-	} 
-	else
-	{
-		for(skb1=sk->receive_queue.prev; ; skb1 = skb1->prev) 
-		{
-			if(sk->debug)
-			{
-				printk("skb1=%p :", skb1);
-				printk("skb1->seq = %d: ", skb1->seq);
-				printk("skb->seq = %d\n",skb->seq);
-				printk("copied_seq = %d acked_seq = %d\n", sk->copied_seq,
-						sk->acked_seq);
-			}
-			
-			/*
-			 *	Optimisation: Duplicate frame or extension of previous frame from
-			 *	same sequence point (lost ack case).
-			 *	The frame contains duplicate data or replaces a previous frame
-			 *	discard the previous frame (safe as sk->inuse is set) and put
-			 *	the new one in its place.
-			 */
-			 
-			if (skb->seq==skb1->seq && skb->len>=skb1->len)
-			{
-				skb_append(skb1,skb);
-				skb_unlink(skb1);
-				kfree_skb(skb1,FREE_READ);
-				dup_dumped=1;
-				skb1=NULL;
-				break;
-			}
-			
-			/*
-			 *	Found where it fits
-			 */
-			 
-			if (after(skb->seq+1, skb1->seq))
-			{
-				skb_append(skb1,skb);
-				break;
-			}
-			
-			/*
-			 *	See if we've hit the start. If so insert.
-			 */
-			if (skb1 == skb_peek(&sk->receive_queue))
-			{
-				skb_queue_head(&sk->receive_queue, skb);
-				break;
-			}
-		}
-  	}
-
-	/*
-	 *	Figure out what the ack value for this frame is
-	 */
-	 
-	if (before(sk->acked_seq, sk->copied_seq)) 
-	{
-		printk("*** tcp.c:tcp_data bug acked < copied\n");
-		sk->acked_seq = sk->copied_seq;
-	}
-
-	/*
-	 *	Now figure out if we can ack anything. This is very messy because we really want two
-	 *	receive queues, a completed and an assembly queue. We also want only one transmit
-	 *	queue.
-	 */
-
-	if ((!dup_dumped && (skb1 == NULL || skb1->acked)) || before(skb->seq, sk->acked_seq+1)) 
-	{
-		if (before(skb->seq, sk->acked_seq+1)) 
-		{
-
-			if (after(skb->end_seq, sk->acked_seq)) 
-				sk->acked_seq = skb->end_seq;
-
-			skb->acked = 1;
-
-			/*
-			 *	When we ack the fin, we do the FIN 
-			 *	processing.
-			 */
-
-			if (skb->h.th->fin) 
-			{
-				tcp_fin(skb,sk,skb->h.th);
-			}
-	  
-			for(skb2 = skb->next;
-			    skb2 != (struct sk_buff *)&sk->receive_queue;
-			    skb2 = skb2->next) 
-			{
-				if (before(skb2->seq, sk->acked_seq+1)) 
-				{
-					if (after(skb2->end_seq, sk->acked_seq))
-						sk->acked_seq = skb2->end_seq;
-
-					skb2->acked = 1;
-					/*
-					 * 	When we ack the fin, we do
-					 * 	the fin handling.
-					 */
-					if (skb2->h.th->fin) 
-					{
-						tcp_fin(skb,sk,skb->h.th);
-					}
-
-					/*
-					 *	Force an immediate ack.
-					 */
-					 
-					sk->ack_backlog = sk->max_ack_backlog;
-				}
-				else
-				{
-					break;
-				}
-			}
-
-			/*
-			 *	This also takes care of updating the window.
-			 *	This if statement needs to be simplified.
-			 *
-			 *      rules for delaying an ack:
-			 *      - delay time <= 0.5 HZ
-			 *      - we don't have a window update to send
-			 *      - must send at least every 2 full sized packets
-			 */
-			if (!sk->delay_acks ||
-			    sk->ack_backlog >= sk->max_ack_backlog || 
-			    sk->bytes_rcv > sk->max_unacked || th->fin ||
-			    sk->ato > HZ/2 ||
-			    tcp_raise_window(sk)) {
-	/*			tcp_send_ack(sk->sent_seq, sk->acked_seq,sk,th, saddr); */
-			}
-			else 
-			{
-				sk->ack_backlog++;
-				
-				if(sk->debug)				
-					printk("Ack queued.\n");
-				reset_xmit_timer(sk, TIME_WRITE, sk->ato);
-			}
-		}
-	}
-
-	/*
-	 *	If we've missed a packet, send an ack.
-	 *	Also start a timer to send another.
-	 */
-	 
-	if (!skb->acked) 
-	{
-	
-	/*
-	 *	This is important.  If we don't have much room left,
-	 *	we need to throw out a few packets so we have a good
-	 *	window.  Note that mtu is used, not mss, because mss is really
-	 *	for the send side.  He could be sending us stuff as large as mtu.
-	 */
-		 
-		while (sock_rspace(sk) < sk->mtu) 
-		{
-			skb1 = skb_peek(&sk->receive_queue);
-			if (skb1 == NULL) 
-			{
-				printk("INET: tcp.c:tcp_data memory leak detected.\n");
-				break;
-			}
-
-			/*
-			 *	Don't throw out something that has been acked. 
-			 */
-		 
-			if (skb1->acked) 
-			{
-				break;
-			}
-		
-			skb_unlink(skb1);
-			kfree_skb(skb1, FREE_READ);
-		}
-		tcp_send_ack(sk->sent_seq, sk->acked_seq, sk, th, saddr);
-		sk->ack_backlog++;
-		reset_xmit_timer(sk, TIME_WRITE, min(sk->ato, 0.5 * HZ));
-	}
-	else
-	{
-		tcp_send_ack(sk->sent_seq, sk->acked_seq, sk, th, saddr);
-	}
-
-	/*
-	 *	Now tell the user we may have some data. 
-	 */
-	 
-	if (!sk->dead) 
-	{
-        	if(sk->debug)
-        		printk("Data wakeup.\n");
-		sk->data_ready(sk,0);
-	} 
-	return(0);
-}
-
-
-/*
- *	This routine is only called when we have urgent data
- *	signalled. Its the 'slow' part of tcp_urg. It could be
- *	moved inline now as tcp_urg is only called from one
- *	place. We handle URGent data wrong. We have to - as
- *	BSD still doesn't use the correction from RFC961.
- */
- 
-static void tcp_check_urg(struct sock * sk, struct tcphdr * th)
-{
-	u32 ptr = ntohs(th->urg_ptr);
-
-	if (ptr)
-		ptr--;
-	ptr += ntohl(th->seq);
-
-	/* ignore urgent data that we've already seen and read */
-	if (after(sk->copied_seq, ptr))
-		return;
-
-	/* do we already have a newer (or duplicate) urgent pointer? */
-	if (sk->urg_data && !after(ptr, sk->urg_seq))
-		return;
-
-	/* tell the world about our new urgent pointer */
-	if (sk->proc != 0) {
-		if (sk->proc > 0) {
-			kill_proc(sk->proc, SIGURG, 1);
-		} else {
-			kill_pg(-sk->proc, SIGURG, 1);
-		}
-	}
-	sk->urg_data = URG_NOTYET;
-	sk->urg_seq = ptr;
-}
-
-/*
- *	This is the 'fast' part of urgent handling.
- */
- 
-extern __inline__ int tcp_urg(struct sock *sk, struct tcphdr *th,
-	unsigned long saddr, unsigned long len)
-{
-	u32 ptr;
-
-	/*
-	 *	Check if we get a new urgent pointer - normally not 
-	 */
-	 
-	if (th->urg)
-		tcp_check_urg(sk,th);
-
-	/*
-	 *	Do we wait for any urgent data? - normally not
-	 */
-	 
-	if (sk->urg_data != URG_NOTYET)
-		return 0;
-
-	/*
-	 *	Is the urgent pointer pointing into this packet? 
-	 */
-	 
-	ptr = sk->urg_seq - ntohl(th->seq) + th->doff*4;
-	if (ptr >= len)
-		return 0;
-
-	/*
-	 *	Ok, got the correct packet, update info 
-	 */
-	 
-	sk->urg_data = URG_VALID | *(ptr + (unsigned char *) th);
-	if (!sk->dead)
-		sk->data_ready(sk,0);
-	return 0;
-}
-
-/*
- *	This will accept the next outstanding connection. 
- */
- 
-static struct sock *tcp_accept(struct sock *sk, int flags)
-{
-	struct sock *newsk;
-	struct sk_buff *skb;
-  
-  /*
-   * We need to make sure that this socket is listening,
-   * and that it has something pending.
-   */
-
-	if (sk->state != TCP_LISTEN) 
-	{
-		sk->err = EINVAL;
-		return(NULL); 
-	}
-
-	/* Avoid the race. */
-	cli();
-	sk->inuse = 1;
-
-	while((skb = tcp_dequeue_established(sk)) == NULL) 
-	{
-		if (flags & O_NONBLOCK) 
-		{
-			sti();
-			release_sock(sk);
-			sk->err = EAGAIN;
-			return(NULL);
-		}
-
-		release_sock(sk);
-		interruptible_sleep_on(sk->sleep);
-		if (current->signal & ~current->blocked) 
-		{
-			sti();
-			sk->err = ERESTARTSYS;
-			return(NULL);
-		}
-		sk->inuse = 1;
-  	}
-	sti();
-
-	/*
-	 *	Now all we need to do is return skb->sk. 
-	 */
-
-	newsk = skb->sk;
-
-	kfree_skb(skb, FREE_READ);
-	sk->ack_backlog--;
-	release_sock(sk);
-	return(newsk);
-}
-
-
-/*
- *	This will initiate an outgoing connection. 
- */
- 
-static int tcp_connect(struct sock *sk, struct sockaddr_in *usin, int addr_len)
-{
-	struct sk_buff *buff;
-	struct device *dev=NULL;
-	unsigned char *ptr;
-	int tmp;
-	int atype;
-	struct tcphdr *t1;
-	struct rtable *rt;
-
-	if (sk->state != TCP_CLOSE) 
-		return(-EISCONN);
-
-	/*
-	 *	Don't allow a double connect.
-	 */
-	 	
-	if(sk->daddr)
-		return -EINVAL;
-	
-	if (addr_len < 8) 
-		return(-EINVAL);
-
-	if (usin->sin_family && usin->sin_family != AF_INET) 
-		return(-EAFNOSUPPORT);
-
-  	/*
-  	 *	connect() to INADDR_ANY means loopback (BSD'ism).
-  	 */
-  	
-  	if(usin->sin_addr.s_addr==INADDR_ANY)
-		usin->sin_addr.s_addr=ip_my_addr();
-		  
-	/*
-	 *	Don't want a TCP connection going to a broadcast address 
-	 */
-
-	if ((atype=ip_chk_addr(usin->sin_addr.s_addr)) == IS_BROADCAST || atype==IS_MULTICAST) 
-		return -ENETUNREACH;
-  
-	sk->inuse = 1;
-	sk->daddr = usin->sin_addr.s_addr;
-	sk->write_seq = tcp_init_seq();
-	sk->window_seq = sk->write_seq;
-	sk->rcv_ack_seq = sk->write_seq -1;
-	sk->err = 0;
-	sk->dummy_th.dest = usin->sin_port;
-	release_sock(sk);
-
-	buff = sock_wmalloc(sk,MAX_SYN_SIZE,0, GFP_KERNEL);
-	if (buff == NULL) 
-	{
-		return(-ENOMEM);
-	}
-	sk->inuse = 1;
-	buff->sk = sk;
-	buff->free = 0;
-	buff->localroute = sk->localroute;
-	
-
-	/*
-	 *	Put in the IP header and routing stuff.
-	 */
-	 
-	tmp = sk->prot->build_header(buff, sk->saddr, sk->daddr, &dev,
-		IPPROTO_TCP, NULL, MAX_SYN_SIZE,sk->ip_tos,sk->ip_ttl,&sk->ip_route_cache);
-	if (tmp < 0) 
-	{
-		sock_wfree(sk, buff);
-		release_sock(sk);
-		return(-ENETUNREACH);
-	}
-	if ((rt = sk->ip_route_cache) != NULL && !sk->saddr)
-		sk->saddr = rt->rt_src;
-	sk->rcv_saddr = sk->saddr;
-
-	t1 = (struct tcphdr *) skb_put(buff,sizeof(struct tcphdr));
-
-	memcpy(t1,(void *)&(sk->dummy_th), sizeof(*t1));
-	buff->seq = sk->write_seq++;
-	t1->seq = htonl(buff->seq);
-	sk->sent_seq = sk->write_seq;
-	buff->end_seq = sk->write_seq;
-	t1->ack = 0;
-	t1->window = 2;
-	t1->res1=0;
-	t1->res2=0;
-	t1->rst = 0;
-	t1->urg = 0;
-	t1->psh = 0;
-	t1->syn = 1;
-	t1->urg_ptr = 0;
-	t1->doff = 6;
-	/* use 512 or whatever user asked for */
-	
-	if(rt!=NULL && (rt->rt_flags&RTF_WINDOW))
-		sk->window_clamp=rt->rt_window;
-	else
-		sk->window_clamp=0;
-
-	if (sk->user_mss)
-		sk->mtu = sk->user_mss;
-	else if (rt)
-		sk->mtu = rt->rt_mtu - sizeof(struct iphdr) - sizeof(struct tcphdr);
-	else 
-		sk->mtu = 576 - sizeof(struct iphdr) - sizeof(struct tcphdr);
-
-	/*
-	 *	but not bigger than device MTU 
-	 */
-
-	if(sk->mtu <32)
-		sk->mtu = 32;	/* Sanity limit */
-		
-	sk->mtu = min(sk->mtu, dev->mtu - sizeof(struct iphdr) - sizeof(struct tcphdr));
-
-#ifdef CONFIG_SKIP
-	
-	/*
-	 *	SKIP devices set their MTU to 65535. This is so they can take packets
-	 *	unfragmented to security process then fragment. They could lie to the
-	 *	TCP layer about a suitable MTU, but its easier to let skip sort it out
-	 *	simply because the final package we want unfragmented is going to be
-	 *
-	 *	[IPHDR][IPSP][Security data][Modified TCP data][Security data]
-	 */
-	 
-	if(skip_pick_mtu!=NULL)		/* If SKIP is loaded.. */
-		sk->mtu=skip_pick_mtu(sk->mtu,dev);
-#endif
-	
-	/*
-	 *	Put in the TCP options to say MTU. 
-	 */
-
-	ptr = skb_put(buff,4);
-	ptr[0] = 2;
-	ptr[1] = 4;
-	ptr[2] = (sk->mtu) >> 8;
-	ptr[3] = (sk->mtu) & 0xff;
-	tcp_send_check(t1, sk->saddr, sk->daddr,
-		  sizeof(struct tcphdr) + 4, sk);
-
-	/*
-	 *	This must go first otherwise a really quick response will get reset. 
-	 */
-
-	tcp_cache_zap();
-	tcp_set_state(sk,TCP_SYN_SENT);
-	if(rt&&rt->rt_flags&RTF_IRTT)
-		sk->rto = rt->rt_irtt;
-	else
-		sk->rto = TCP_TIMEOUT_INIT;
-	sk->retransmit_timer.function=&retransmit_timer;
-	sk->retransmit_timer.data = (unsigned long)sk;
-	reset_xmit_timer(sk, TIME_WRITE, sk->rto);	/* Timer for repeating the SYN until an answer  */
-	sk->retransmits = 0;				/* Now works the right way instead of a hacked 
-											initial setting */
-
-	sk->prot->queue_xmit(sk, dev, buff, 0);  
-	reset_xmit_timer(sk, TIME_WRITE, sk->rto);
-	tcp_statistics.TcpActiveOpens++;
-	tcp_statistics.TcpOutSegs++;
-  
-	release_sock(sk);
-	return(0);
-}
-
-/*
- * React to a out-of-window TCP sequence number in an incoming packet
- */
-static void bad_tcp_sequence(struct sock *sk, struct tcphdr *th, short len,
-	     struct options *opt, unsigned long saddr, struct device *dev)
-{
-	if (th->rst)
-		return;
-
-	/*
-	 *	Send a reset if we get something not ours and we are
-	 *	unsynchronized. Note: We don't do anything to our end. We
-	 *	are just killing the bogus remote connection then we will
-	 *	connect again and it will work (with luck).
-	 */
-  	 
-	if (sk->state==TCP_SYN_SENT || sk->state==TCP_SYN_RECV) 
-	{
-		tcp_reset(sk->saddr,sk->daddr,th,sk->prot,NULL,dev, sk->ip_tos,sk->ip_ttl);
-		return;
-	}
-
-	/* Try to resync things. */
-	tcp_send_ack(sk->sent_seq, sk->acked_seq, sk, th, saddr);
-	return;
-}
-
-/*
- *	This functions checks to see if the tcp header is actually acceptable. 
- */
- 
-extern __inline__ int tcp_sequence(struct sock *sk, u32 seq, u32 end_seq)
-{
-	/* does the packet contain any unseen data AND */
-	/* does the packet start before the window? */
-	return	after(end_seq+1, sk->acked_seq) &&
-		before(seq, sk->acked_seq + sk->window + 1);
-}
-
-/*
- *	When we get a reset we do this.
- */
-
-static int tcp_std_reset(struct sock *sk, struct sk_buff *skb)
-{
-	sk->zapped = 1;
-	sk->err = ECONNRESET;
-	if (sk->state == TCP_SYN_SENT)
-		sk->err = ECONNREFUSED;
-	if (sk->state == TCP_CLOSE_WAIT)
-		sk->err = EPIPE;
-#ifdef TCP_DO_RFC1337		
-	/*
-	 *	Time wait assassination protection [RFC1337]
-	 */
-	if(sk->state!=TCP_TIME_WAIT)
-	{	
-		tcp_set_state(sk,TCP_CLOSE);
-		sk->shutdown = SHUTDOWN_MASK;
-	}
-#else	
-	tcp_set_state(sk,TCP_CLOSE);
-	sk->shutdown = SHUTDOWN_MASK;
-#endif	
-	if (!sk->dead) 
-		sk->state_change(sk);
-	kfree_skb(skb, FREE_READ);
-	release_sock(sk);
-	return(0);
-}
-
-/*
- *	Find the socket, using the last hit cache if applicable.
- */
-static inline struct sock * get_tcp_sock(u32 saddr, u16 sport, u32 daddr, u16 dport)
-{
-	struct sock * sk;
-
-	sk = (struct sock *) th_cache_sk;
-	if (saddr != th_cache_saddr || daddr != th_cache_daddr ||
-	    sport != th_cache_sport || dport != th_cache_dport) {
-		sk = get_sock(&tcp_prot, dport, saddr, sport, daddr);
-		if (sk) {
-			th_cache_saddr=saddr;
-			th_cache_daddr=daddr;
-  			th_cache_dport=dport;
-			th_cache_sport=sport;
-			th_cache_sk=sk;
-		}
-	}
-	return sk;
-}
-
-
-/*
- *	A TCP packet has arrived.
- *		skb->h.raw is the TCP header.
- */
- 
-int tcp_rcv(struct sk_buff *skb, struct device *dev, struct options *opt,
-	__u32 daddr, unsigned short len,
-	__u32 saddr, int redo, struct inet_protocol * protocol)
-{
-	struct tcphdr *th;
-	struct sock *sk;
-	int syn_ok=0;
-
-	/*
-	 * "redo" is 1 if we have already seen this skb but couldn't
-	 * use it at that time (the socket was locked).  In that case
-	 * we have already done a lot of the work (looked up the socket
-	 * etc).
-	 */
-	th = skb->h.th;
-	sk = skb->sk;
-	if (!redo) {
-		tcp_statistics.TcpInSegs++;
-		if (skb->pkt_type!=PACKET_HOST)
-		{
-		  	kfree_skb(skb,FREE_READ);
-		  	return(0);
-		}
-		/*
-		 *	Pull up the IP header.
-		 */
-		skb_pull(skb, skb->h.raw-skb->data);
-		/*
-		 *	Try to use the device checksum if provided.
-		 */
-		if (
-			((skb->ip_summed == CHECKSUM_HW) && tcp_check(th, len, saddr, daddr, skb->csum ))||
-		    	((skb->ip_summed == CHECKSUM_NONE) && tcp_check(th, len, saddr, daddr, csum_partial((char *)th, len, 0)))
-		    /* skip if CHECKSUM_UNNECESSARY */
-		    )
-		{
-			skb->sk = NULL;
-			kfree_skb(skb,FREE_READ);
-			/*
-			 *	We don't release the socket because it was
-			 *	never marked in use.
-			 */
-			return(0);
-		}
-		sk = get_tcp_sock(saddr, th->source, daddr, th->dest);
-		if (!sk)
-			goto no_tcp_socket;
-		skb->sk = sk;
-		skb->seq = ntohl(th->seq);
-		skb->end_seq = skb->seq + th->syn + th->fin + len - th->doff*4;
-		skb->ack_seq = ntohl(th->ack_seq);
-
-		skb->acked = 0;
-		skb->used = 0;
-		skb->free = 0;
-		skb->saddr = daddr;
-		skb->daddr = saddr;
-	
-		/* We may need to add it to the backlog here. */
-		cli();
-		if (sk->inuse) 
-		{
-			skb_queue_tail(&sk->back_log, skb);
-			sti();
-			return(0);
-		}
-		sk->inuse = 1;
-		sti();
-	}
-
-	/*
-	 *	If this socket has got a reset it's to all intents and purposes 
-	 *	really dead. Count closed sockets as dead.
-	 *
-	 *	Note: BSD appears to have a bug here. A 'closed' TCP in BSD
-	 *	simply drops data. This seems incorrect as a 'closed' TCP doesn't
-	 *	exist so should cause resets as if the port was unreachable.
-	 */
-
-	if (sk->zapped || sk->state==TCP_CLOSE)
-		goto no_tcp_socket;
-
-	if (!sk->prot) 
-	{
-		printk("IMPOSSIBLE 3\n");
-		return(0);
-	}
-
-
-	/*
-	 *	Charge the memory to the socket. 
-	 */
-	 
-	skb->sk=sk;
-	sk->rmem_alloc += skb->truesize;
-
-	/*
-	 *	This basically follows the flow suggested by RFC793, with the corrections in RFC1122. We
-	 *	don't implement precedence and we process URG incorrectly (deliberately so) for BSD bug
-	 *	compatibility. We also set up variables more thoroughly [Karn notes in the
-	 *	KA9Q code the RFC793 incoming segment rules don't initialise the variables for all paths].
-	 */
-
-	if(sk->state!=TCP_ESTABLISHED)		/* Skip this lot for normal flow */
-	{
-	
-		/*
-		 *	Now deal with unusual cases.
-		 */
-	 
-		if(sk->state==TCP_LISTEN)
-		{
-			if(th->ack)	/* These use the socket TOS.. might want to be the received TOS */
-				tcp_reset(daddr,saddr,th,sk->prot,opt,dev,sk->ip_tos, sk->ip_ttl);
-
-			/*
-			 *	We don't care for RST, and non SYN are absorbed (old segments)
-			 *	Broadcast/multicast SYN isn't allowed. Note - bug if you change the
-			 *	netmask on a running connection it can go broadcast. Even Sun's have
-			 *	this problem so I'm ignoring it 
-			 */
-			   
-			if(th->rst || !th->syn || th->ack || ip_chk_addr(daddr)!=IS_MYADDR)
-			{
-				kfree_skb(skb, FREE_READ);
-				release_sock(sk);
-				return 0;
-			}
-		
-			/*	
-			 *	Guess we need to make a new socket up 
-			 */
-		
-			tcp_conn_request(sk, skb, daddr, saddr, opt, dev, tcp_init_seq());
-		
-			/*
-			 *	Now we have several options: In theory there is nothing else
-			 *	in the frame. KA9Q has an option to send data with the syn,
-			 *	BSD accepts data with the syn up to the [to be] advertised window
-			 *	and Solaris 2.1 gives you a protocol error. For now we just ignore
-			 *	it, that fits the spec precisely and avoids incompatibilities. It
-			 *	would be nice in future to drop through and process the data.
-			 */
-			 
-			release_sock(sk);
-			return 0;
-		}
-	
-		/* retransmitted SYN? */
-		if (sk->state == TCP_SYN_RECV && th->syn && skb->seq+1 == sk->acked_seq)
-		{
-			kfree_skb(skb, FREE_READ);
-			release_sock(sk);
-			return 0;
-		}
-		
-		/*
-		 *	SYN sent means we have to look for a suitable ack and either reset
-		 *	for bad matches or go to connected 
-		 */
-	   
-		if(sk->state==TCP_SYN_SENT)
-		{
-			/* Crossed SYN or previous junk segment */
-			if(th->ack)
-			{
-				/* We got an ack, but it's not a good ack */
-				if(!tcp_ack(sk,th,saddr,len))
-				{
-					/* Reset the ack - its an ack from a 
-					   different connection  [ th->rst is checked in tcp_reset()] */
-					tcp_statistics.TcpAttemptFails++;
-					tcp_reset(daddr, saddr, th,
-						sk->prot, opt,dev,sk->ip_tos,sk->ip_ttl);
-					kfree_skb(skb, FREE_READ);
-					release_sock(sk);
-					return(0);
-				}
-				if(th->rst)
-					return tcp_std_reset(sk,skb);
-				if(!th->syn)
-				{
-					/* A valid ack from a different connection
-					   start. Shouldn't happen but cover it */
-                 			tcp_statistics.TcpAttemptFails++;
-                                        tcp_reset(daddr, saddr, th,
-                                                sk->prot, opt,dev,sk->ip_tos,sk->ip_ttl);
-					kfree_skb(skb, FREE_READ);
-					release_sock(sk);
-					return 0;
-				}
-				/*
-				 *	Ok.. it's good. Set up sequence numbers and
-				 *	move to established.
-				 */
-				syn_ok=1;	/* Don't reset this connection for the syn */
-				sk->acked_seq = skb->seq+1;
-				sk->lastwin_seq = skb->seq+1;
-				sk->fin_seq = skb->seq;
-				tcp_send_ack(sk->sent_seq,sk->acked_seq,sk,th,sk->daddr);
-				tcp_set_state(sk, TCP_ESTABLISHED);
-				tcp_options(sk,th);
-				sk->dummy_th.dest=th->source;
-				sk->copied_seq = sk->acked_seq;
-				if(!sk->dead)
-				{
-					sk->state_change(sk);
-					sock_wake_async(sk->socket, 0);
-				}
-				if(sk->max_window==0)
-				{
-					sk->max_window = 32;
-					sk->mss = min(sk->max_window, sk->mtu);
-				}
-			}
-			else
-			{
-				/* See if SYN's cross. Drop if boring */
-				if(th->syn && !th->rst)
-				{
-					/* Crossed SYN's are fine - but talking to
-					   yourself is right out... */
-					if(sk->saddr==saddr && sk->daddr==daddr &&
-						sk->dummy_th.source==th->source &&
-						sk->dummy_th.dest==th->dest)
-					{
-						tcp_statistics.TcpAttemptFails++;
-						return tcp_std_reset(sk,skb);
-					}
-					tcp_set_state(sk,TCP_SYN_RECV);
-					
-					/*
-					 *	FIXME:
-					 *	Must send SYN|ACK here
-					 */
-				}		
-				/* Discard junk segment */
-				kfree_skb(skb, FREE_READ);
-				release_sock(sk);
-				return 0;
-			}
-			/*
-			 *	SYN_RECV with data maybe.. drop through
-			 */
-			goto rfc_step6;
-		}
-
-	/*
-	 *	BSD has a funny hack with TIME_WAIT and fast reuse of a port. There is
-	 *	a more complex suggestion for fixing these reuse issues in RFC1644
-	 *	but not yet ready for general use. Also see RFC1379.
-	 */
-	
-#define BSD_TIME_WAIT
-#ifdef BSD_TIME_WAIT
-		if (sk->state == TCP_TIME_WAIT && th->syn && sk->dead && 
-			after(skb->seq, sk->acked_seq) && !th->rst)
-		{
-			u32 seq = sk->write_seq;
-			if(sk->debug)
-				printk("Doing a BSD time wait\n");
-			tcp_statistics.TcpEstabResets++;	   
-			sk->rmem_alloc -= skb->truesize;
-			skb->sk = NULL;
-			sk->err=ECONNRESET;
-			tcp_set_state(sk, TCP_CLOSE);
-			sk->shutdown = SHUTDOWN_MASK;
-			release_sock(sk);
-			sk=get_sock(&tcp_prot, th->dest, saddr, th->source, daddr);
-			if (sk && sk->state==TCP_LISTEN)
-			{
-				sk->inuse=1;
-				skb->sk = sk;
-				sk->rmem_alloc += skb->truesize;
-				tcp_conn_request(sk, skb, daddr, saddr,opt, dev,seq+128000);
-				release_sock(sk);
-				return 0;
-			}
-			kfree_skb(skb, FREE_READ);
-			return 0;
-		}
-#endif	
-	}
-
-	/*
-	 *	We are now in normal data flow (see the step list in the RFC)
-	 *	Note most of these are inline now. I'll inline the lot when
-	 *	I have time to test it hard and look at what gcc outputs 
-	 */
-	
-	if (!tcp_sequence(sk, skb->seq, skb->end_seq))
-	{
-		bad_tcp_sequence(sk, th, len, opt, saddr, dev);
-		kfree_skb(skb, FREE_READ);
-		release_sock(sk);
-		return 0;
-	}
-
-	if(th->rst)
-		return tcp_std_reset(sk,skb);
-	
-	/*
-	 *	!syn_ok is effectively the state test in RFC793.
-	 */
-	 
-	if(th->syn && !syn_ok)
-	{
-		tcp_reset(daddr,saddr,th, &tcp_prot, opt, dev, skb->ip_hdr->tos, 255);
-		return tcp_std_reset(sk,skb);	
-	}
-
-
-	/*
-	 *	Delayed ACK time estimator.
-	 */
-	
-	if (sk->lrcvtime == 0) 
-	{
-		sk->lrcvtime = jiffies;
-		sk->ato = HZ/3;
-	}
-	else 
-	{
-		int m;
-		
-		m = jiffies - sk->lrcvtime;
-
-		sk->lrcvtime = jiffies;
-
-		if (m <= 0)
-			m = 1;
-
-		if (m > (sk->rtt >> 3)) 
-		{
-			sk->ato = sk->rtt >> 3;
-			/*
-			 * printk(KERN_DEBUG "ato: rtt %lu\n", sk->ato);
-			 */
-		}
-		else 
-		{
-			sk->ato = (sk->ato >> 1) + m;
-			/*
-			 * printk(KERN_DEBUG "ato: m %lu\n", sk->ato);
-			 */
-		}
-	}
-	  
-	/*
-	 *	Process the ACK
-	 */
-	 
-
-	if(th->ack && !tcp_ack(sk,th,saddr,len))
-	{
-		/*
-		 *	Our three way handshake failed.
-		 */
-		 
-		if(sk->state==TCP_SYN_RECV)
-		{
-			tcp_reset(daddr, saddr, th,sk->prot, opt, dev,sk->ip_tos,sk->ip_ttl);
-		}
-		kfree_skb(skb, FREE_READ);
-		release_sock(sk);
-		return 0;
-	}
-	
-rfc_step6:		/* I'll clean this up later */
-
-	/*
-	 *	If the accepted buffer put us over our queue size we
-	 *	now drop it (we must process the ack first to avoid
-	 *	deadlock cases).
-	 */
-	 
-	if (sk->rmem_alloc  >= sk->rcvbuf) 
-	{
-		kfree_skb(skb, FREE_READ);
-		release_sock(sk);
-		return(0);
-	}
-
-
-	/*
-	 *	Process urgent data
-	 */
-	 	
-	if(tcp_urg(sk, th, saddr, len))
-	{
-		kfree_skb(skb, FREE_READ);
-		release_sock(sk);
-		return 0;
-	}
-	
-	/*
-	 *	Process the encapsulated data
-	 */
-	
-	if(tcp_data(skb,sk, saddr, len))
-	{
-		kfree_skb(skb, FREE_READ);
-		release_sock(sk);
-		return 0;
-	}
-
-	/*
-	 *	And done
-	 */	
-	
-	release_sock(sk);
-	return 0;
-
-no_tcp_socket:
-	/*
-	 *	No such TCB. If th->rst is 0 send a reset (checked in tcp_reset)
-	 */
-	tcp_reset(daddr, saddr, th, &tcp_prot, opt,dev,skb->ip_hdr->tos,255);
-	skb->sk = NULL;
-	/*
-	 *	Discard frame
-	 */
-	kfree_skb(skb, FREE_READ);
-	return 0;
-}
-
-/*
- *	This routine sends a packet with an out of date sequence
- *	number. It assumes the other end will try to ack it.
- */
-
-static void tcp_write_wakeup(struct sock *sk)
-{
-	struct sk_buff *buff,*skb;
-	struct tcphdr *t1;
-	struct device *dev=NULL;
-	int tmp;
-
-	if (sk->zapped)
-		return;	/* After a valid reset we can send no more */
-
-	/*
-	 *	Write data can still be transmitted/retransmitted in the
-	 *	following states.  If any other state is encountered, return.
-	 *	[listen/close will never occur here anyway]
-	 */
-
-	if (sk->state != TCP_ESTABLISHED && 
-	    sk->state != TCP_CLOSE_WAIT &&
-	    sk->state != TCP_FIN_WAIT1 && 
-	    sk->state != TCP_LAST_ACK &&
-	    sk->state != TCP_CLOSING
-	) 
-	{
-		return;
-	}
-	if ( before(sk->sent_seq, sk->window_seq) && 
-	    (skb=skb_peek(&sk->write_queue)))
-	{
-		/*
-	    	 * We are probing the opening of a window
-	    	 * but the window size is != 0
-	    	 * must have been a result SWS advoidance ( sender )
-	    	 */
-	    
-	    	struct iphdr *iph;
-	    	struct tcphdr *th;
-	    	struct tcphdr *nth;
-	    	unsigned long win_size;
-#if 0
-		unsigned long ow_size;
-#endif
-	    	void * tcp_data_start;
-	
-		/*
-		 *	How many bytes can we send ?
-		 */
-		 
-		win_size = sk->window_seq - sk->sent_seq;
-
-		/*
-		 *	Recover the buffer pointers
-		 */
-		 
-	    	iph = (struct iphdr *)skb->ip_hdr;
-	    	th = (struct tcphdr *)(((char *)iph) +(iph->ihl << 2));
-
-		/*
-		 *	Grab the data for a temporary frame
-		 */
-		 
-	    	buff = sock_wmalloc(sk, win_size + th->doff * 4 + 
-				     (iph->ihl << 2) +
-				     sk->prot->max_header + 15, 
-				     1, GFP_ATOMIC);
-	    	if ( buff == NULL )
-	    		return;
-
-	 	/* 
-	 	 *	If we strip the packet on the write queue we must
-	 	 *	be ready to retransmit this one 
-	 	 */
-	    
-	    	buff->free = /*0*/1;
-
-	    	buff->sk = sk;
-	    	buff->localroute = sk->localroute;
-	    	
-	    	/*
-	    	 *	Put headers on the new packet
-	    	 */
-
-	    	tmp = sk->prot->build_header(buff, sk->saddr, sk->daddr, &dev,
-					 IPPROTO_TCP, sk->opt, buff->truesize,
-					 sk->ip_tos,sk->ip_ttl,&sk->ip_route_cache);
-	    	if (tmp < 0) 
-	    	{
-			sock_wfree(sk, buff);
-			return;
-		}
-		
-		/*
-		 *	Move the TCP header over
-		 */
-
-		buff->dev = dev;
-
-		nth = (struct tcphdr *) skb_put(buff,th->doff*4);
-
-		memcpy(nth, th, th->doff * 4);
-		
-		/*
-		 *	Correct the new header
-		 */
-		 
-		nth->ack = 1; 
-		nth->ack_seq = htonl(sk->acked_seq);
-		nth->window = htons(tcp_select_window(sk));
-		nth->check = 0;
-
-		/*
-		 *	Find the first data byte.
-		 */
-		 
-		tcp_data_start = (char *) th + (th->doff << 2);
-
-		/*
-		 *	Add it to our new buffer
-		 */
-		 
-		memcpy(skb_put(buff,win_size), tcp_data_start, win_size);
-		
-		/*
-		 *	Remember our right edge sequence number.
-		 */
-		 
-	    	buff->end_seq = sk->sent_seq + win_size;
-	    	sk->sent_seq = buff->end_seq;		/* Hack */
-		if(th->urg && ntohs(th->urg_ptr) < win_size)
-			nth->urg = 0;
-
-		/*
-		 *	Checksum the split buffer
-		 */
-		 
-	    	tcp_send_check(nth, sk->saddr, sk->daddr, 
-			   nth->doff * 4 + win_size , sk);
-	}
-	else
-	{	
-		buff = sock_wmalloc(sk,MAX_ACK_SIZE,1, GFP_ATOMIC);
-		if (buff == NULL) 
-			return;
-
-		buff->free = 1;
-		buff->sk = sk;
-		buff->localroute = sk->localroute;
-
-		/*
-		 *	Put in the IP header and routing stuff. 
-		 */
-		 
-		tmp = sk->prot->build_header(buff, sk->saddr, sk->daddr, &dev,
-				IPPROTO_TCP, sk->opt, MAX_ACK_SIZE,sk->ip_tos,sk->ip_ttl,&sk->ip_route_cache);
-		if (tmp < 0) 
-		{
-			sock_wfree(sk, buff);
-			return;
-		}
-
-		t1 = (struct tcphdr *)skb_put(buff,sizeof(struct tcphdr));
-		memcpy(t1,(void *) &sk->dummy_th, sizeof(*t1));
-
-		/*
-		 *	Use a previous sequence.
-		 *	This should cause the other end to send an ack.
-		 */
-	 
-		t1->seq = htonl(sk->sent_seq-1);
-		t1->ack = 1; 
-		t1->res1= 0;
-		t1->res2= 0;
-		t1->rst = 0;
-		t1->urg = 0;
-		t1->psh = 0;
-		t1->fin = 0;	/* We are sending a 'previous' sequence, and 0 bytes of data - thus no FIN bit */
-		t1->syn = 0;
-		t1->ack_seq = htonl(sk->acked_seq);
-		t1->window = htons(tcp_select_window(sk));
-		t1->doff = sizeof(*t1)/4;
-		tcp_send_check(t1, sk->saddr, sk->daddr, sizeof(*t1), sk);
-
-	}		
-
-	/*
-	 *	Send it.
-	 */
-	
-	sk->prot->queue_xmit(sk, dev, buff, 1);
+	sk->prot->queue_xmit(sk, dev, buff, 0);  
+	tcp_reset_xmit_timer(sk, TIME_WRITE, sk->rto);
+	tcp_statistics.TcpActiveOpens++;
 	tcp_statistics.TcpOutSegs++;
-}
-
-/*
- *	A window probe timeout has occurred.
- */
-
-void tcp_send_probe0(struct sock *sk)
-{
-	if (sk->zapped)
-		return;		/* After a valid reset we can send no more */
-
-	tcp_write_wakeup(sk);
-
-	sk->backoff++;
-	sk->rto = min(sk->rto << 1, 120*HZ);
-	sk->retransmits++;
-	sk->prot->retransmits ++;
-	reset_xmit_timer (sk, TIME_PROBE0, sk->rto);
+  
+	release_sock(sk);
+	return(0);
 }
 
 /*

FUNET's LINUX-ADM group, linux-adm@nic.funet.fi
TCL-scripts by Sam Shen, slshen@lbl.gov with Sam's (original) version
of this