patch-2.4.4 linux/net/ipv4/tcp_output.c
Next file: linux/net/ipv4/tcp_timer.c
Previous file: linux/net/ipv4/tcp_minisocks.c
Back to the patch index
Back to the overall index
- Lines: 408
- Date:
Thu Apr 12 12:11:39 2001
- Orig file:
v2.4.3/linux/net/ipv4/tcp_output.c
- Orig date:
Tue Nov 28 21:53:45 2000
diff -u --recursive --new-file v2.4.3/linux/net/ipv4/tcp_output.c linux/net/ipv4/tcp_output.c
@@ -5,7 +5,7 @@
*
* Implementation of the Transmission Control Protocol(TCP).
*
- * Version: $Id: tcp_output.c,v 1.129 2000/11/28 17:04:10 davem Exp $
+ * Version: $Id: tcp_output.c,v 1.136 2001/03/06 22:42:56 davem Exp $
*
* Authors: Ross Biro, <bir7@leland.Stanford.Edu>
* Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
@@ -166,20 +166,9 @@
/* RFC1323 scaling applied */
new_win >>= tp->rcv_wscale;
-#ifdef TCP_FORMAL_WINDOW
- if (new_win == 0) {
- /* If we advertise zero window, disable fast path. */
+ /* If we advertise zero window, disable fast path. */
+ if (new_win == 0)
tp->pred_flags = 0;
- } else if (cur_win == 0 && tp->pred_flags == 0 &&
- skb_queue_len(&tp->out_of_order_queue) == 0 &&
- !tp->urg_data) {
- /* If we open zero window, enable fast path.
- Without this it will be open by the first data packet,
- it is too late to merge checksumming to copy.
- */
- tcp_fast_path_on(tp);
- }
-#endif
return new_win;
}
@@ -337,6 +326,91 @@
tp->send_head = skb;
}
+/* Send _single_ skb sitting at the send head. This function requires
+ * true push pending frames to setup probe timer etc.
+ */
+void tcp_push_one(struct sock *sk, unsigned cur_mss)
+{
+ struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
+ struct sk_buff *skb = tp->send_head;
+
+ if (tcp_snd_test(tp, skb, cur_mss, 1)) {
+ /* Send it out now. */
+ TCP_SKB_CB(skb)->when = tcp_time_stamp;
+ if (tcp_transmit_skb(sk, skb_clone(skb, sk->allocation)) == 0) {
+ tp->send_head = NULL;
+ tp->snd_nxt = TCP_SKB_CB(skb)->end_seq;
+ if (tp->packets_out++ == 0)
+ tcp_reset_xmit_timer(sk, TCP_TIME_RETRANS, tp->rto);
+ return;
+ }
+ }
+}
+
+/* Split fragmented skb to two parts at length len. */
+
+static void skb_split(struct sk_buff *skb, struct sk_buff *skb1, u32 len)
+{
+ int i;
+ int pos = skb->len - skb->data_len;
+
+ if (len < pos) {
+ /* Split line is inside header. */
+ memcpy(skb_put(skb1, pos-len), skb->data + len, pos-len);
+
+ /* And move data appendix as is. */
+ for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
+ skb_shinfo(skb1)->frags[i] = skb_shinfo(skb)->frags[i];
+
+ skb_shinfo(skb1)->nr_frags = skb_shinfo(skb)->nr_frags;
+ skb_shinfo(skb)->nr_frags = 0;
+
+ skb1->data_len = skb->data_len;
+ skb1->len += skb1->data_len;
+ skb->data_len = 0;
+ skb->len = len;
+ skb->tail = skb->data+len;
+ } else {
+ int k = 0;
+ int nfrags = skb_shinfo(skb)->nr_frags;
+
+ /* Second chunk has no header, nothing to copy. */
+
+ skb_shinfo(skb)->nr_frags = 0;
+ skb1->len = skb1->data_len = skb->len - len;
+ skb->len = len;
+ skb->data_len = len - pos;
+
+ for (i=0; i<nfrags; i++) {
+ int size = skb_shinfo(skb)->frags[i].size;
+ if (pos + size > len) {
+ skb_shinfo(skb1)->frags[k] = skb_shinfo(skb)->frags[i];
+
+ if (pos < len) {
+ /* Split frag.
+ * We have to variants in this case:
+ * 1. Move all the frag to the second
+ * part, if it is possible. F.e.
+ * this approach is mandatory for TUX,
+ * where splitting is expensive.
+ * 2. Split is accurately. We make this.
+ */
+ get_page(skb_shinfo(skb)->frags[i].page);
+ skb_shinfo(skb1)->frags[0].page_offset += (len-pos);
+ skb_shinfo(skb1)->frags[0].size -= (len-pos);
+ skb_shinfo(skb)->frags[i].size = len-pos;
+ skb_shinfo(skb)->nr_frags++;
+ }
+ k++;
+ } else {
+ skb_shinfo(skb)->nr_frags++;
+ }
+ pos += size;
+ }
+ skb_shinfo(skb1)->nr_frags = k;
+ }
+}
+
/* Function to create two new TCP segments. Shrinks the given segment
* to the specified size and appends a new segment with the rest of the
* packet to the list. This won't be called frequently, I hope.
@@ -349,19 +423,22 @@
int nsize = skb->len - len;
u16 flags;
+ if (skb_cloned(skb) &&
+ skb_is_nonlinear(skb) &&
+ pskb_expand_head(skb, 0, 0, GFP_ATOMIC))
+ return -ENOMEM;
+
/* Get a new skb... force flag on. */
- buff = tcp_alloc_skb(sk, nsize + MAX_TCP_HEADER, GFP_ATOMIC);
+ buff = tcp_alloc_skb(sk, nsize, GFP_ATOMIC);
if (buff == NULL)
return -ENOMEM; /* We'll just try again later. */
tcp_charge_skb(sk, buff);
- /* Reserve space for headers. */
- skb_reserve(buff, MAX_TCP_HEADER);
-
/* Correct the sequence numbers. */
TCP_SKB_CB(buff)->seq = TCP_SKB_CB(skb)->seq + len;
TCP_SKB_CB(buff)->end_seq = TCP_SKB_CB(skb)->end_seq;
-
+ TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(buff)->seq;
+
/* PSH and FIN should only be set in the second packet. */
flags = TCP_SKB_CB(skb)->flags;
TCP_SKB_CB(skb)->flags = flags & ~(TCPCB_FLAG_FIN|TCPCB_FLAG_PSH);
@@ -371,18 +448,22 @@
tp->lost_out++;
tp->left_out++;
}
- TCP_SKB_CB(buff)->sacked &= ~TCPCB_AT_TAIL;
+ TCP_SKB_CB(skb)->sacked &= ~TCPCB_AT_TAIL;
- /* Copy and checksum data tail into the new buffer. */
- buff->csum = csum_partial_copy_nocheck(skb->data + len, skb_put(buff, nsize),
- nsize, 0);
+ if (!skb_shinfo(skb)->nr_frags && skb->ip_summed != CHECKSUM_HW) {
+ /* Copy and checksum data tail into the new buffer. */
+ buff->csum = csum_partial_copy_nocheck(skb->data + len, skb_put(buff, nsize),
+ nsize, 0);
- /* This takes care of the FIN sequence number too. */
- TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(buff)->seq;
- skb_trim(skb, len);
+ skb_trim(skb, len);
- /* Rechecksum original buffer. */
- skb->csum = csum_partial(skb->data, skb->len, 0);
+ skb->csum = csum_block_sub(skb->csum, buff->csum, len);
+ } else {
+ skb->ip_summed = CHECKSUM_HW;
+ skb_split(skb, buff, len);
+ }
+
+ buff->ip_summed = skb->ip_summed;
/* Looks stupid, but our code really uses when of
* skbs, which it never sent before. --ANK
@@ -461,7 +542,7 @@
* Returns 1, if no segments are in flight and we have queued segments, but
* cannot send anything now because of SWS or another problem.
*/
-int tcp_write_xmit(struct sock *sk)
+int tcp_write_xmit(struct sock *sk, int nonagle)
{
struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
unsigned int mss_now;
@@ -482,7 +563,7 @@
mss_now = tcp_current_mss(sk);
while((skb = tp->send_head) &&
- tcp_snd_test(tp, skb, mss_now, tcp_skb_is_last(sk, skb) ? tp->nonagle : 1)) {
+ tcp_snd_test(tp, skb, mss_now, tcp_skb_is_last(sk, skb) ? nonagle : 1)) {
if (skb->len > mss_now) {
if (tcp_fragment(sk, skb, mss_now))
break;
@@ -568,22 +649,21 @@
* but may be worse for the performance because of rcv_mss
* fluctuations. --SAW 1998/11/1
*/
- unsigned int mss = tp->ack.rcv_mss;
- int free_space;
- u32 window;
-
- /* Sometimes free_space can be < 0. */
- free_space = tcp_space(sk);
- if (tp->window_clamp < mss)
- mss = tp->window_clamp;
+ int mss = tp->ack.rcv_mss;
+ int free_space = tcp_space(sk);
+ int full_space = min(tp->window_clamp, tcp_full_space(sk));
+ int window;
+
+ if (mss > full_space)
+ mss = full_space;
- if (free_space < (int)min(tp->window_clamp, tcp_full_space(sk)) / 2) {
+ if (free_space < full_space/2) {
tp->ack.quick = 0;
if (tcp_memory_pressure)
tp->rcv_ssthresh = min(tp->rcv_ssthresh, 4*tp->advmss);
- if (free_space < ((int)mss))
+ if (free_space < mss)
return 0;
}
@@ -599,9 +679,8 @@
* is too small.
*/
window = tp->rcv_wnd;
- if ((((int) window) <= (free_space - ((int) mss))) ||
- (((int) window) > free_space))
- window = (((unsigned int) free_space)/mss)*mss;
+ if (window <= free_space - mss || window > free_space)
+ window = (free_space/mss)*mss;
return window;
}
@@ -638,19 +717,14 @@
/* Ok. We will be able to collapse the packet. */
__skb_unlink(next_skb, next_skb->list);
- if(skb->len % 4) {
- /* Must copy and rechecksum all data. */
+ if (next_skb->ip_summed == CHECKSUM_HW)
+ skb->ip_summed = CHECKSUM_HW;
+
+ if (skb->ip_summed != CHECKSUM_HW) {
memcpy(skb_put(skb, next_skb_size), next_skb->data, next_skb_size);
- skb->csum = csum_partial(skb->data, skb->len, 0);
- } else {
- /* Optimize, actually we could also combine next_skb->csum
- * to skb->csum using a single add w/carry operation too.
- */
- skb->csum = csum_partial_copy_nocheck(next_skb->data,
- skb_put(skb, next_skb_size),
- next_skb_size, skb->csum);
+ skb->csum = csum_block_add(skb->csum, next_skb->csum, skb->len);
}
-
+
/* Update sequence range on original skb. */
TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(next_skb)->end_seq;
@@ -668,11 +742,12 @@
tp->lost_out--;
tp->left_out--;
}
+ /* Reno case is special. Sigh... */
if (!tp->sack_ok && tp->sacked_out) {
- /* Reno case is special. Sigh... */
tp->sacked_out--;
tp->left_out--;
}
+
/* Not quite right: it can be > snd.fack, but
* it is better to underestimate fackets.
*/
@@ -712,7 +787,7 @@
if (!lost)
return;
- tp->left_out = tp->sacked_out + tp->lost_out;
+ tcp_sync_left_out(tp);
/* Don't muck with the congestion window here.
* Reason is that we do not increase amount of _data_
@@ -745,6 +820,15 @@
if (atomic_read(&sk->wmem_alloc) > min(sk->wmem_queued+(sk->wmem_queued>>2),sk->sndbuf))
return -EAGAIN;
+ /* If receiver has shrunk his window, and skb is out of
+ * new window, do not retransmit it. The exception is the
+ * case, when window is shrunk to zero. In this case
+ * our retransmit serves as a zero window probe.
+ */
+ if (!before(TCP_SKB_CB(skb)->seq, tp->snd_una+tp->snd_wnd)
+ && TCP_SKB_CB(skb)->seq != tp->snd_una)
+ return -EAGAIN;
+
if(skb->len > cur_mss) {
if(tcp_fragment(sk, skb, cur_mss))
return -ENOMEM; /* We'll try again later. */
@@ -758,6 +842,7 @@
(skb->len < (cur_mss >> 1)) &&
(skb->next != tp->send_head) &&
(skb->next != (struct sk_buff *)&sk->write_queue) &&
+ (skb_shinfo(skb)->nr_frags == 0 && skb_shinfo(skb->next)->nr_frags == 0) &&
(sysctl_tcp_retrans_collapse != 0))
tcp_retrans_try_collapse(sk, skb, cur_mss);
@@ -771,9 +856,11 @@
if(skb->len > 0 &&
(TCP_SKB_CB(skb)->flags & TCPCB_FLAG_FIN) &&
tp->snd_una == (TCP_SKB_CB(skb)->end_seq - 1)) {
- TCP_SKB_CB(skb)->seq = TCP_SKB_CB(skb)->end_seq - 1;
- skb_trim(skb, 0);
- skb->csum = 0;
+ if (!pskb_trim(skb, 0)) {
+ TCP_SKB_CB(skb)->seq = TCP_SKB_CB(skb)->end_seq - 1;
+ skb->ip_summed = CHECKSUM_NONE;
+ skb->csum = 0;
+ }
}
/* Make a copy, if the first transmission SKB clone we made
@@ -782,7 +869,7 @@
TCP_SKB_CB(skb)->when = tcp_time_stamp;
err = tcp_transmit_skb(sk, (skb_cloned(skb) ?
- skb_copy(skb, GFP_ATOMIC):
+ pskb_copy(skb, GFP_ATOMIC):
skb_clone(skb, GFP_ATOMIC)));
if (err == 0) {
@@ -912,28 +999,10 @@
*/
mss_now = tcp_current_mss(sk);
- /* Please, find seven differences of 2.3.33 and loook
- * what I broke here. 8) --ANK
- */
-
if(tp->send_head != NULL) {
- /* tcp_write_xmit() takes care of the rest. */
TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_FIN;
TCP_SKB_CB(skb)->end_seq++;
tp->write_seq++;
-
- /* Special case to avoid Nagle bogosity. If this
- * segment is the last segment, and it was queued
- * due to Nagle/SWS-avoidance, send it out now.
- */
- if(tp->send_head == skb &&
- !after(tp->write_seq, tp->snd_una + tp->snd_wnd)) {
- TCP_SKB_CB(skb)->when = tcp_time_stamp;
- if (!tcp_transmit_skb(sk, skb_clone(skb, GFP_KERNEL)))
- update_send_head(sk, tp, skb);
- else
- tcp_check_probe_timer(sk, tp);
- }
} else {
/* Socket is locked, keep trying until memory is available. */
for (;;) {
@@ -953,9 +1022,9 @@
/* FIN eats a sequence byte, write_seq advanced by tcp_send_skb(). */
TCP_SKB_CB(skb)->seq = tp->write_seq;
TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(skb)->seq + 1;
- tcp_send_skb(sk, skb, 0, mss_now);
- __tcp_push_pending_frames(sk, tp, mss_now, 1);
+ tcp_send_skb(sk, skb, 1, mss_now);
}
+ __tcp_push_pending_frames(sk, tp, mss_now, 1);
}
/* We get here when a process closes a file descriptor (either due to
@@ -1224,23 +1293,6 @@
tp->ack.timeout = timeout;
if (!mod_timer(&tp->delack_timer, timeout))
sock_hold(sk);
-
-#ifdef TCP_FORMAL_WINDOW
- /* Explanation. Header prediction path does not handle
- * case of zero window. If we send ACK immediately, pred_flags
- * are reset when sending ACK. If rcv_nxt is advanced and
- * ack is not sent, than delayed ack is scheduled.
- * Hence, it is the best place to check for zero window.
- */
- if (tp->pred_flags) {
- if (tcp_receive_window(tp) == 0)
- tp->pred_flags = 0;
- } else {
- if (skb_queue_len(&tp->out_of_order_queue) == 0 &&
- !tp->urg_data)
- tcp_fast_path_on(tp);
- }
-#endif
}
/* This routine sends an ack and also updates the window. */
FUNET's LINUX-ADM group, linux-adm@nic.funet.fi
TCL-scripts by Sam Shen (who was at: slshen@lbl.gov)