From 4d1a2d9ec1c17df077ed09a0d135bccf5637a3b7 Mon Sep 17 00:00:00 2001 From: Damian Lukowski Date: Wed, 26 Aug 2009 00:16:27 +0000 Subject: Revert Backoff [v3]: Rename skb to icmp_skb in tcp_v4_err() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This supplementary patch renames skb to icmp_skb in tcp_v4_err() in order to disambiguate from another sk_buff variable, which will be introduced in a separate patch. Signed-off-by: Damian Lukowski Acked-by: Ilpo Järvinen Signed-off-by: David S. Miller --- net/ipv4/tcp_ipv4.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'net/ipv4/tcp_ipv4.c') diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 6d88219c5e2..6ca1bc8c302 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -328,26 +328,26 @@ static void do_pmtu_discovery(struct sock *sk, struct iphdr *iph, u32 mtu) * */ -void tcp_v4_err(struct sk_buff *skb, u32 info) +void tcp_v4_err(struct sk_buff *icmp_skb, u32 info) { - struct iphdr *iph = (struct iphdr *)skb->data; - struct tcphdr *th = (struct tcphdr *)(skb->data + (iph->ihl << 2)); + struct iphdr *iph = (struct iphdr *)icmp_skb->data; + struct tcphdr *th = (struct tcphdr *)(icmp_skb->data + (iph->ihl << 2)); struct tcp_sock *tp; struct inet_sock *inet; - const int type = icmp_hdr(skb)->type; - const int code = icmp_hdr(skb)->code; + const int type = icmp_hdr(icmp_skb)->type; + const int code = icmp_hdr(icmp_skb)->code; struct sock *sk; __u32 seq; int err; - struct net *net = dev_net(skb->dev); + struct net *net = dev_net(icmp_skb->dev); - if (skb->len < (iph->ihl << 2) + 8) { + if (icmp_skb->len < (iph->ihl << 2) + 8) { ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS); return; } sk = inet_lookup(net, &tcp_hashinfo, iph->daddr, th->dest, - iph->saddr, th->source, inet_iif(skb)); + iph->saddr, th->source, inet_iif(icmp_skb)); if (!sk) { ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS); return; -- cgit v1.2.3 From f1ecd5d9e7366609d640ff4040304ea197fbc618 Mon Sep 17 00:00:00 2001 From: Damian Lukowski Date: Wed, 26 Aug 2009 00:16:31 +0000 Subject: Revert Backoff [v3]: Revert RTO on ICMP destination unreachable MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Here, an ICMP host/network unreachable message, whose payload fits to TCP's SND.UNA, is taken as an indication that the RTO retransmission has not been lost due to congestion, but because of a route failure somewhere along the path. With true congestion, a router won't trigger such a message and the patched TCP will operate as standard TCP. This patch reverts one RTO backoff, if an ICMP host/network unreachable message, whose payload fits to TCP's SND.UNA, arrives. Based on the new RTO, the retransmission timer is reset to reflect the remaining time, or - if the revert clocked out the timer - a retransmission is sent out immediately. Backoffs are only reverted, if TCP is in RTO loss recovery, i.e. if there have been retransmissions and reversible backoffs, already. Changes from v2: 1) Renaming of skb in tcp_v4_err() moved to another patch. 2) Reintroduced tcp_bound_rto() and __tcp_set_rto(). 3) Fixed code comments. Signed-off-by: Damian Lukowski Acked-by: Ilpo Järvinen Signed-off-by: David S. Miller --- net/ipv4/tcp_ipv4.c | 37 +++++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) (limited to 'net/ipv4/tcp_ipv4.c') diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 6ca1bc8c302..6755e29a6dd 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -332,12 +332,15 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info) { struct iphdr *iph = (struct iphdr *)icmp_skb->data; struct tcphdr *th = (struct tcphdr *)(icmp_skb->data + (iph->ihl << 2)); + struct inet_connection_sock *icsk; struct tcp_sock *tp; struct inet_sock *inet; const int type = icmp_hdr(icmp_skb)->type; const int code = icmp_hdr(icmp_skb)->code; struct sock *sk; + struct sk_buff *skb; __u32 seq; + __u32 remaining; int err; struct net *net = dev_net(icmp_skb->dev); @@ -367,6 +370,7 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info) if (sk->sk_state == TCP_CLOSE) goto out; + icsk = inet_csk(sk); tp = tcp_sk(sk); seq = ntohl(th->seq); if (sk->sk_state != TCP_LISTEN && @@ -393,6 +397,39 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info) } err = icmp_err_convert[code].errno; + /* check if icmp_skb allows revert of backoff + * (see draft-zimmermann-tcp-lcd) */ + if (code != ICMP_NET_UNREACH && code != ICMP_HOST_UNREACH) + break; + if (seq != tp->snd_una || !icsk->icsk_retransmits || + !icsk->icsk_backoff) + break; + + icsk->icsk_backoff--; + inet_csk(sk)->icsk_rto = __tcp_set_rto(tp) << + icsk->icsk_backoff; + tcp_bound_rto(sk); + + skb = tcp_write_queue_head(sk); + BUG_ON(!skb); + + remaining = icsk->icsk_rto - min(icsk->icsk_rto, + tcp_time_stamp - TCP_SKB_CB(skb)->when); + + if (remaining) { + inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, + remaining, TCP_RTO_MAX); + } else if (sock_owned_by_user(sk)) { + /* RTO revert clocked out retransmission, + * but socket is locked. Will defer. */ + inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, + HZ/20, TCP_RTO_MAX); + } else { + /* RTO revert clocked out retransmission. + * Will retransmit now */ + tcp_retransmit_timer(sk); + } + break; case ICMP_TIME_EXCEEDED: err = EHOSTUNREACH; -- cgit v1.2.3 From b2e4b3debc327a5b53d9622e0b1785eea2ea2aad Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Tue, 1 Sep 2009 19:25:03 +0000 Subject: tcp: MD5 operations should be const Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- net/ipv4/tcp_ipv4.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net/ipv4/tcp_ipv4.c') diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 6755e29a6dd..3efbe94f022 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1195,7 +1195,7 @@ struct request_sock_ops tcp_request_sock_ops __read_mostly = { }; #ifdef CONFIG_TCP_MD5SIG -static struct tcp_request_sock_ops tcp_request_sock_ipv4_ops = { +static const struct tcp_request_sock_ops tcp_request_sock_ipv4_ops = { .md5_lookup = tcp_v4_reqsk_md5_lookup, .calc_md5_hash = tcp_v4_md5_hash_skb, }; @@ -1774,7 +1774,7 @@ struct inet_connection_sock_af_ops ipv4_specific = { }; #ifdef CONFIG_TCP_MD5SIG -static struct tcp_sock_af_ops tcp_sock_ipv4_specific = { +static const struct tcp_sock_af_ops tcp_sock_ipv4_specific = { .md5_lookup = tcp_v4_md5_lookup, .calc_md5_hash = tcp_v4_md5_hash_skb, .md5_add = tcp_v4_md5_add_func, -- cgit v1.2.3 From 3b401a81c0d50ea9c718cf837f62cc2e6e79cc30 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Tue, 1 Sep 2009 19:25:04 +0000 Subject: inet: inet_connection_sock_af_ops const The function block inet_connect_sock_af_ops contains no data make it constant. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- net/ipv4/tcp_ipv4.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/ipv4/tcp_ipv4.c') diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 3efbe94f022..ce7d3b021ff 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1754,7 +1754,7 @@ int tcp_v4_tw_remember_stamp(struct inet_timewait_sock *tw) return 0; } -struct inet_connection_sock_af_ops ipv4_specific = { +const struct inet_connection_sock_af_ops ipv4_specific = { .queue_xmit = ip_queue_xmit, .send_check = tcp_v4_send_check, .rebuild_header = inet_sk_rebuild_header, -- cgit v1.2.3 From aa1330766c49199bdab4d4a9096d98b072df9044 Mon Sep 17 00:00:00 2001 From: Wu Fengguang Date: Wed, 2 Sep 2009 23:45:45 -0700 Subject: tcp: replace hard coded GFP_KERNEL with sk_allocation This fixed a lockdep warning which appeared when doing stress memory tests over NFS: inconsistent {RECLAIM_FS-ON-W} -> {IN-RECLAIM_FS-W} usage. page reclaim => nfs_writepage => tcp_sendmsg => lock sk_lock mount_root => nfs_root_data => tcp_close => lock sk_lock => tcp_send_fin => alloc_skb_fclone => page reclaim David raised a concern that if the allocation fails in tcp_send_fin(), and it's GFP_ATOMIC, we are going to yield() (which sleeps) and loop endlessly waiting for the allocation to succeed. But fact is, the original GFP_KERNEL also sleeps. GFP_ATOMIC+yield() looks weird, but it is no worse the implicit sleep inside GFP_KERNEL. Both could loop endlessly under memory pressure. CC: Arnaldo Carvalho de Melo CC: David S. Miller CC: Herbert Xu Signed-off-by: Wu Fengguang Signed-off-by: David S. Miller --- net/ipv4/tcp_ipv4.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'net/ipv4/tcp_ipv4.c') diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index ce7d3b021ff..0543561da99 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -886,7 +886,7 @@ int tcp_v4_md5_do_add(struct sock *sk, __be32 addr, } sk->sk_route_caps &= ~NETIF_F_GSO_MASK; } - if (tcp_alloc_md5sig_pool() == NULL) { + if (tcp_alloc_md5sig_pool(sk) == NULL) { kfree(newkey); return -ENOMEM; } @@ -1007,8 +1007,9 @@ static int tcp_v4_parse_md5_keys(struct sock *sk, char __user *optval, if (!tcp_sk(sk)->md5sig_info) { struct tcp_sock *tp = tcp_sk(sk); - struct tcp_md5sig_info *p = kzalloc(sizeof(*p), GFP_KERNEL); + struct tcp_md5sig_info *p; + p = kzalloc(sizeof(*p), sk->sk_allocation); if (!p) return -EINVAL; @@ -1016,7 +1017,7 @@ static int tcp_v4_parse_md5_keys(struct sock *sk, char __user *optval, sk->sk_route_caps &= ~NETIF_F_GSO_MASK; } - newkey = kmemdup(cmd.tcpm_key, cmd.tcpm_keylen, GFP_KERNEL); + newkey = kmemdup(cmd.tcpm_key, cmd.tcpm_keylen, sk->sk_allocation); if (!newkey) return -ENOMEM; return tcp_v4_md5_do_add(sk, sin->sin_addr.s_addr, -- cgit v1.2.3