From f291196979ca80cdef199ca2b55e2758e8c23a0d Mon Sep 17 00:00:00 2001 From: Herbert Xu ~{PmVHI~} Date: Mon, 5 Jun 2006 15:03:37 -0700 Subject: [TCP]: Avoid skb_pull if possible when trimming head Trimming the head of an skb by calling skb_pull can cause the packet to become unaligned if the length pulled is odd. Since the length is entirely arbitrary for a FIN packet carrying data, this is actually quite common. Unaligned data is not the end of the world, but we should avoid it if it's easily done. In this case it is trivial. Since we're discarding all of the head data it doesn't matter whether we move skb->data forward or back. However, it is still possible to have unaligned skb->data in general. So network drivers should be prepared to handle it instead of crashing. This patch also adds an unlikely marking on len < headlen since partial ACKs on head data are extremely rare in the wild. As the return value of __pskb_trim_head is no longer ever NULL that has been removed. Signed-off-by: Herbert Xu ~{PmV>HI~} Signed-off-by: David S. Miller --- net/ipv4/tcp_output.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) (limited to 'net/ipv4') diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 743016baa04..f33c9dddaa1 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -642,7 +642,7 @@ int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len, unsigned int mss * eventually). The difference is that pulled data not copied, but * immediately discarded. */ -static unsigned char *__pskb_trim_head(struct sk_buff *skb, int len) +static void __pskb_trim_head(struct sk_buff *skb, int len) { int i, k, eat; @@ -667,7 +667,6 @@ static unsigned char *__pskb_trim_head(struct sk_buff *skb, int len) skb->tail = skb->data; skb->data_len -= len; skb->len = skb->data_len; - return skb->tail; } int tcp_trim_head(struct sock *sk, struct sk_buff *skb, u32 len) @@ -676,12 +675,11 @@ int tcp_trim_head(struct sock *sk, struct sk_buff *skb, u32 len) pskb_expand_head(skb, 0, 0, GFP_ATOMIC)) return -ENOMEM; - if (len <= skb_headlen(skb)) { + /* If len == headlen, we avoid __skb_pull to preserve alignment. */ + if (unlikely(len < skb_headlen(skb))) __skb_pull(skb, len); - } else { - if (__pskb_trim_head(skb, len-skb_headlen(skb)) == NULL) - return -ENOMEM; - } + else + __pskb_trim_head(skb, len - skb_headlen(skb)); TCP_SKB_CB(skb)->seq += len; skb->ip_summed = CHECKSUM_HW; -- cgit v1.2.3 From 79320d7e14900c549c3520791a297328f53ff71e Mon Sep 17 00:00:00 2001 From: Aki M Nyrhinen Date: Sun, 11 Jun 2006 21:18:56 -0700 Subject: [TCP]: continued: reno sacked_out count fix From: Aki M Nyrhinen IMHO the current fix to the problem (in_flight underflow in reno) is incorrect. it treats the symptons but ignores the problem. the problem is timing out packets other than the head packet when we don't have sack. i try to explain (sorry if explaining the obvious). with sack, scanning the retransmit queue for timed out packets is fine because we know which packets in our retransmit queue have been acked by the receiver. without sack, we know only how many packets in our retransmit queue the receiver has acknowledged, but no idea which packets. think of a "typical" slow-start overshoot case, where for example every third packet in a window get lost because a router buffer gets full. with sack, we check for timeouts on those every third packet (as the rest have been sacked). the packet counting works out and if there is no reordering, we'll retransmit exactly the packets that were lost. without sack, however, we check for timeout on every packet and end up retransmitting consecutive packets in the retransmit queue. in our slow-start example, 2/3 of those retransmissions are unnecessary. these unnecessary retransmissions eat the congestion window and evetually prevent fast recovery from continuing, if enough packets were lost. Signed-off-by: David S. Miller --- net/ipv4/tcp_input.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'net/ipv4') diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 4a538bc1683..b5521a9d3dc 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -1649,7 +1649,7 @@ static void tcp_update_scoreboard(struct sock *sk, struct tcp_sock *tp) * Hence, we can detect timed out packets during fast * retransmit without falling to slow start. */ - if (tcp_head_timedout(sk, tp)) { + if (!IsReno(tp) && tcp_head_timedout(sk, tp)) { struct sk_buff *skb; skb = tp->scoreboard_skb_hint ? tp->scoreboard_skb_hint @@ -1662,8 +1662,6 @@ static void tcp_update_scoreboard(struct sock *sk, struct tcp_sock *tp) if (!(TCP_SKB_CB(skb)->sacked&TCPCB_TAGBITS)) { TCP_SKB_CB(skb)->sacked |= TCPCB_LOST; tp->lost_out += tcp_skb_pcount(skb); - if (IsReno(tp)) - tcp_remove_reno_sacks(sk, tp, tcp_skb_pcount(skb) + 1); /* clear xmit_retrans hint */ if (tp->retransmit_skb_hint && -- cgit v1.2.3 From 42d1d52e695d87475846e9a09964cae1209eeecb Mon Sep 17 00:00:00 2001 From: Weidong Date: Mon, 12 Jun 2006 13:09:59 -0700 Subject: [IPV4]: Increment ipInHdrErrors when TTL expires. Signed-off-by: Weidong Signed-off-by: David S. Miller --- net/ipv4/ip_forward.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net/ipv4') diff --git a/net/ipv4/ip_forward.c b/net/ipv4/ip_forward.c index 0923add122b..9f0bb529ab7 100644 --- a/net/ipv4/ip_forward.c +++ b/net/ipv4/ip_forward.c @@ -116,6 +116,7 @@ sr_failed: too_many_hops: /* Tell the sender its packet died... */ + IP_INC_STATS_BH(IPSTATS_MIB_INHDRERRORS); icmp_send(skb, ICMP_TIME_EXCEEDED, ICMP_EXC_TTL, 0); drop: kfree_skb(skb); -- cgit v1.2.3