]> err.no Git - linux-2.6/blobdiff - net/ipv4/tcp_input.c
tcp: TCP connection times out if ICMP frag needed is delayed
[linux-2.6] / net / ipv4 / tcp_input.c
index eda4f4a233f36e940fe81cf83188dcbc2817e0fa..b54d9d37b636a79b26de0f0ee36e9f6024ba453e 100644 (file)
@@ -66,6 +66,7 @@
 #include <linux/mm.h>
 #include <linux/module.h>
 #include <linux/sysctl.h>
+#include <net/dst.h>
 #include <net/tcp.h>
 #include <net/inet_common.h>
 #include <linux/ipsec.h>
@@ -113,8 +114,6 @@ int sysctl_tcp_abc __read_mostly;
 #define FLAG_FORWARD_PROGRESS  (FLAG_ACKED|FLAG_DATA_SACKED)
 #define FLAG_ANY_PROGRESS      (FLAG_FORWARD_PROGRESS|FLAG_SND_UNA_ADVANCED)
 
-#define IsSackFrto() (sysctl_tcp_frto == 0x2)
-
 #define TCP_REMNANT (TCP_FLAG_FIN|TCP_FLAG_URG|TCP_FLAG_SYN|TCP_FLAG_PSH)
 #define TCP_HP_BITS (~(TCP_RESERVED_BITS|TCP_FLAG_PSH))
 
@@ -605,7 +604,7 @@ static u32 tcp_rto_min(struct sock *sk)
        u32 rto_min = TCP_RTO_MIN;
 
        if (dst && dst_metric_locked(dst, RTAX_RTO_MIN))
-               rto_min = dst->metrics[RTAX_RTO_MIN - 1];
+               rto_min = dst_metric(dst, RTAX_RTO_MIN);
        return rto_min;
 }
 
@@ -769,7 +768,7 @@ void tcp_update_metrics(struct sock *sk)
                                dst->metrics[RTAX_RTTVAR - 1] = m;
                        else
                                dst->metrics[RTAX_RTTVAR-1] -=
-                                       (dst->metrics[RTAX_RTTVAR-1] - m)>>2;
+                                       (dst_metric(dst, RTAX_RTTVAR) - m)>>2;
                }
 
                if (tp->snd_ssthresh >= 0xFFFF) {
@@ -788,21 +787,21 @@ void tcp_update_metrics(struct sock *sk)
                                dst->metrics[RTAX_SSTHRESH-1] =
                                        max(tp->snd_cwnd >> 1, tp->snd_ssthresh);
                        if (!dst_metric_locked(dst, RTAX_CWND))
-                               dst->metrics[RTAX_CWND-1] = (dst->metrics[RTAX_CWND-1] + tp->snd_cwnd) >> 1;
+                               dst->metrics[RTAX_CWND-1] = (dst_metric(dst, RTAX_CWND) + tp->snd_cwnd) >> 1;
                } else {
                        /* Else slow start did not finish, cwnd is non-sense,
                           ssthresh may be also invalid.
                         */
                        if (!dst_metric_locked(dst, RTAX_CWND))
-                               dst->metrics[RTAX_CWND-1] = (dst->metrics[RTAX_CWND-1] + tp->snd_ssthresh) >> 1;
-                       if (dst->metrics[RTAX_SSTHRESH-1] &&
+                               dst->metrics[RTAX_CWND-1] = (dst_metric(dst, RTAX_CWND) + tp->snd_ssthresh) >> 1;
+                       if (dst_metric(dst, RTAX_SSTHRESH) &&
                            !dst_metric_locked(dst, RTAX_SSTHRESH) &&
-                           tp->snd_ssthresh > dst->metrics[RTAX_SSTHRESH-1])
+                           tp->snd_ssthresh > dst_metric(dst, RTAX_SSTHRESH))
                                dst->metrics[RTAX_SSTHRESH-1] = tp->snd_ssthresh;
                }
 
                if (!dst_metric_locked(dst, RTAX_REORDERING)) {
-                       if (dst->metrics[RTAX_REORDERING-1] < tp->reordering &&
+                       if (dst_metric(dst, RTAX_REORDERING) < tp->reordering &&
                            tp->reordering != sysctl_tcp_reordering)
                                dst->metrics[RTAX_REORDERING-1] = tp->reordering;
                }
@@ -1685,6 +1684,11 @@ static inline void tcp_reset_reno_sack(struct tcp_sock *tp)
        tp->sacked_out = 0;
 }
 
+static int tcp_is_sackfrto(const struct tcp_sock *tp)
+{
+       return (sysctl_tcp_frto == 0x2) && !tcp_is_reno(tp);
+}
+
 /* F-RTO can only be used if TCP has never retransmitted anything other than
  * head (SACK enhanced variant from Appendix B of RFC4138 is more robust here)
  */
@@ -1701,7 +1705,7 @@ int tcp_use_frto(struct sock *sk)
        if (icsk->icsk_mtup.probe_size)
                return 0;
 
-       if (IsSackFrto())
+       if (tcp_is_sackfrto(tp))
                return 1;
 
        /* Avoid expensive walking of rexmit queue if possible */
@@ -1791,7 +1795,7 @@ void tcp_enter_frto(struct sock *sk)
        /* Earlier loss recovery underway (see RFC4138; Appendix B).
         * The last condition is necessary at least in tp->frto_counter case.
         */
-       if (IsSackFrto() && (tp->frto_counter ||
+       if (tcp_is_sackfrto(tp) && (tp->frto_counter ||
            ((1 << icsk->icsk_ca_state) & (TCPF_CA_Recovery|TCPF_CA_Loss))) &&
            after(tp->high_seq, tp->snd_una)) {
                tp->frto_highmark = tp->high_seq;
@@ -1838,9 +1842,16 @@ static void tcp_enter_frto_loss(struct sock *sk, int allowed_segments, int flag)
                        TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS;
                }
 
-               /* Don't lost mark skbs that were fwd transmitted after RTO */
-               if (!(TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED) &&
-                   !after(TCP_SKB_CB(skb)->end_seq, tp->frto_highmark)) {
+               /* Marking forward transmissions that were made after RTO lost
+                * can cause unnecessary retransmissions in some scenarios,
+                * SACK blocks will mitigate that in some but not in all cases.
+                * We used to not mark them but it was causing break-ups with
+                * receivers that do only in-order receival.
+                *
+                * TODO: we could detect presence of such receiver and select
+                * different behavior per flow.
+                */
+               if (!(TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)) {
                        TCP_SKB_CB(skb)->sacked |= TCPCB_LOST;
                        tp->lost_out += tcp_skb_pcount(skb);
                }
@@ -1856,7 +1867,7 @@ static void tcp_enter_frto_loss(struct sock *sk, int allowed_segments, int flag)
        tp->reordering = min_t(unsigned int, tp->reordering,
                               sysctl_tcp_reordering);
        tcp_set_ca_state(sk, TCP_CA_Loss);
-       tp->high_seq = tp->frto_highmark;
+       tp->high_seq = tp->snd_nxt;
        TCP_ECN_queue_cwr(tp);
 
        tcp_clear_retrans_hints_partial(tp);
@@ -2478,7 +2489,7 @@ static void tcp_try_to_open(struct sock *sk, int flag)
 
        tcp_verify_left_out(tp);
 
-       if (tp->retrans_out == 0)
+       if (!tp->frto_counter && tp->retrans_out == 0)
                tp->retrans_stamp = 0;
 
        if (flag & FLAG_ECE)
@@ -3123,7 +3134,7 @@ static int tcp_process_frto(struct sock *sk, int flag)
                return 1;
        }
 
-       if (!IsSackFrto() || tcp_is_reno(tp)) {
+       if (!tcp_is_sackfrto(tp)) {
                /* RFC4138 shortcoming in step 2; should also have case c):
                 * ACK isn't duplicate nor advances window, e.g., opposite dir
                 * data, winupdate