From 499923c7a3254971873e55a1690d07d3700eea47 Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Thu, 9 Apr 2009 17:37:33 +0000 Subject: ipv6: Fix NULL pointer dereference with time-wait sockets Commit b2f5e7cd3dee2ed721bf0675e1a1ddebb849aee6 (ipv6: Fix conflict resolutions during ipv6 binding) introduced a regression where time-wait sockets were not treated correctly. This resulted in the following: BUG: unable to handle kernel NULL pointer dereference at 0000000000000062 IP: [] ipv4_rcv_saddr_equal+0x61/0x70 ... Call Trace: [] ipv6_rcv_saddr_equal+0x1bb/0x250 [ipv6] [] inet6_csk_bind_conflict+0x88/0xd0 [ipv6] [] inet_csk_get_port+0x1ee/0x400 [] inet6_bind+0x1cf/0x3a0 [ipv6] [] ? sockfd_lookup_light+0x3c/0xd0 [] sys_bind+0x89/0x100 [] ? trace_hardirqs_on_thunk+0x3a/0x3c [] system_call_fastpath+0x16/0x1b Tested-by: Brian Haley Tested-by: Ed Tomlinson Signed-off-by: Vlad Yasevich Signed-off-by: David S. Miller --- net/ipv4/udp.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'net/ipv4') diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index bda08a09357..7a1d1ce22e6 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -222,7 +222,7 @@ fail: return error; } -int ipv4_rcv_saddr_equal(const struct sock *sk1, const struct sock *sk2) +static int ipv4_rcv_saddr_equal(const struct sock *sk1, const struct sock *sk2) { struct inet_sock *inet1 = inet_sk(sk1), *inet2 = inet_sk(sk2); @@ -1823,7 +1823,6 @@ EXPORT_SYMBOL(udp_lib_getsockopt); EXPORT_SYMBOL(udp_lib_setsockopt); EXPORT_SYMBOL(udp_poll); EXPORT_SYMBOL(udp_lib_get_port); -EXPORT_SYMBOL(ipv4_rcv_saddr_equal); #ifdef CONFIG_PROC_FS EXPORT_SYMBOL(udp_proc_register); -- cgit v1.2.3 From 86bcebafc5e7f5163ccf828792fe694b112ed6fa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= Date: Tue, 14 Apr 2009 02:08:53 -0700 Subject: tcp: fix >2 iw selection MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A long-standing feature in tcp_init_metrics() is such that any of its goto reset prevents call to tcp_init_cwnd(). Signed-off-by: Ilpo Järvinen Signed-off-by: David S. Miller --- net/ipv4/tcp_input.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'net/ipv4') diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 2bc8e27a163..c96a6bb2543 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -928,6 +928,8 @@ static void tcp_init_metrics(struct sock *sk) tcp_set_rto(sk); if (inet_csk(sk)->icsk_rto < TCP_TIMEOUT_INIT && !tp->rx_opt.saw_tstamp) goto reset; + +cwnd: tp->snd_cwnd = tcp_init_cwnd(tp, dst); tp->snd_cwnd_stamp = tcp_time_stamp; return; @@ -942,6 +944,7 @@ reset: tp->mdev = tp->mdev_max = tp->rttvar = TCP_TIMEOUT_INIT; inet_csk(sk)->icsk_rto = TCP_TIMEOUT_INIT; } + goto cwnd; } static void tcp_update_reordering(struct sock *sk, const int metric, -- cgit v1.2.3 From 98d500d66cb7940747b424b245fc6a51ecfbf005 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Thu, 16 Apr 2009 18:33:01 +0200 Subject: netfilter: nf_nat: add support for persistent mappings The removal of the SAME target accidentally removed one feature that is not available from the normal NAT targets so far, having multi-range mappings that use the same mapping for each connection from a single client. The current behaviour is to choose the address from the range based on source and destination IP, which breaks when communicating with sites having multiple addresses that require all connections to originate from the same IP address. Introduce a IP_NAT_RANGE_PERSISTENT option that controls whether the destination address is taken into account for selecting addresses. http://bugzilla.kernel.org/show_bug.cgi?id=12954 Signed-off-by: Patrick McHardy --- net/ipv4/netfilter/nf_nat_core.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net/ipv4') diff --git a/net/ipv4/netfilter/nf_nat_core.c b/net/ipv4/netfilter/nf_nat_core.c index fe65187810f..3229e0a81ba 100644 --- a/net/ipv4/netfilter/nf_nat_core.c +++ b/net/ipv4/netfilter/nf_nat_core.c @@ -211,7 +211,8 @@ find_best_ips_proto(struct nf_conntrack_tuple *tuple, minip = ntohl(range->min_ip); maxip = ntohl(range->max_ip); j = jhash_2words((__force u32)tuple->src.u3.ip, - (__force u32)tuple->dst.u3.ip, 0); + range->flags & IP_NAT_RANGE_PERSISTENT ? + (__force u32)tuple->dst.u3.ip : 0, 0); j = ((u64)j * (maxip - minip + 1)) >> 32; *var_ipp = htonl(minip + j); } -- cgit v1.2.3 From a0a69a0106dab8d20596f97f6674bed3b394d1ee Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Fri, 17 Apr 2009 02:34:38 -0700 Subject: gro: Fix use after free in tcp_gro_receive After calling skb_gro_receive skb->len can no longer be relied on since if the skb was merged using frags, then its pages will have been removed and the length reduced. This caused tcp_gro_receive to prematurely end merging which resulted in suboptimal performance with ixgbe. The fix is to store skb->len on the stack. Reported-by: Mark Wagner Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- net/ipv4/tcp.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'net/ipv4') diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index fafbec8b073..1d7f49c6f0c 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -2511,6 +2511,7 @@ struct sk_buff **tcp_gro_receive(struct sk_buff **head, struct sk_buff *skb) struct sk_buff *p; struct tcphdr *th; struct tcphdr *th2; + unsigned int len; unsigned int thlen; unsigned int flags; unsigned int mss = 1; @@ -2531,6 +2532,7 @@ struct sk_buff **tcp_gro_receive(struct sk_buff **head, struct sk_buff *skb) skb_gro_pull(skb, thlen); + len = skb_gro_len(skb); flags = tcp_flag_word(th); for (; (p = *head); head = &p->next) { @@ -2561,7 +2563,7 @@ found: mss = skb_shinfo(p)->gso_size; - flush |= (skb_gro_len(skb) > mss) | !skb_gro_len(skb); + flush |= (len > mss) | !len; flush |= (ntohl(th2->seq) + skb_gro_len(p)) ^ ntohl(th->seq); if (flush || skb_gro_receive(head, skb)) { @@ -2574,7 +2576,7 @@ found: tcp_flag_word(th2) |= flags & (TCP_FLAG_FIN | TCP_FLAG_PSH); out_check_final: - flush = skb_gro_len(skb) < mss; + flush = len < mss; flush |= flags & (TCP_FLAG_URG | TCP_FLAG_PSH | TCP_FLAG_RST | TCP_FLAG_SYN | TCP_FLAG_FIN); -- cgit v1.2.3 From 52cf3cc8acea52ecb93ef1dddb4ef2ae4e35c319 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= Date: Sat, 18 Apr 2009 05:48:48 +0000 Subject: tcp: fix mid-wq adjustment helper MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Just noticed while doing some new work that the recent mid-wq adjustment logic will misbehave when FACK is not in use (happens either due sysctl'ed off or auto-detected reordering) because I forgot the relevant TCPCB tagbit. Signed-off-by: Ilpo Järvinen Signed-off-by: David S. Miller --- net/ipv4/tcp_output.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/ipv4') diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 53300fa2359..59aec609cec 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -778,7 +778,7 @@ static void tcp_adjust_pcount(struct sock *sk, struct sk_buff *skb, int decr) if (tp->lost_skb_hint && before(TCP_SKB_CB(skb)->seq, TCP_SKB_CB(tp->lost_skb_hint)->seq) && - (tcp_is_fack(tp) || TCP_SKB_CB(skb)->sacked)) + (tcp_is_fack(tp) || (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED))) tp->lost_cnt_hint -= decr; tcp_verify_left_out(tp); -- cgit v1.2.3