From 0bfbedb14a8a96c529341bec88991a92b41fac72 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Mon, 5 Oct 2009 00:11:22 -0700
Subject: tunnels: Optimize tx path

We currently dirty a cache line to update tunnel device stats
(tx_packets/tx_bytes). We better use the txq->tx_bytes/tx_packets
counters that already are present in cpu cache, in the cache
line shared with txq->_xmit_lock

This patch extends IPTUNNEL_XMIT() macro to use txq pointer
provided by the caller.

Also &tunnel->dev->stats can be replaced by &dev->stats

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/sit.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'net/ipv6')

diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index dbd19a78ca7..99da272951d 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -555,7 +555,8 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb,
 				     struct net_device *dev)
 {
 	struct ip_tunnel *tunnel = netdev_priv(dev);
-	struct net_device_stats *stats = &tunnel->dev->stats;
+	struct net_device_stats *stats = &dev->stats;
+	struct netdev_queue *txq = netdev_get_tx_queue(dev, 0);
 	struct iphdr  *tiph = &tunnel->parms.iph;
 	struct ipv6hdr *iph6 = ipv6_hdr(skb);
 	u8     tos = tunnel->parms.iph.tos;
@@ -688,7 +689,7 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb,
 		struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom);
 		if (!new_skb) {
 			ip_rt_put(rt);
-			stats->tx_dropped++;
+			txq->tx_dropped++;
 			dev_kfree_skb(skb);
 			return NETDEV_TX_OK;
 		}
-- 
cgit v1.2.3


From fa857afcf77da669eb6b7031ec07ad14b912c307 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?YOSHIFUJI=20Hideaki=20/=20=E5=90=89=E8=97=A4=E8=8B=B1?=
 =?UTF-8?q?=E6=98=8E?= <yoshfuji@linux-ipv6.org>
Date: Tue, 22 Sep 2009 23:43:14 +0000
Subject: ipv6 sit: 6rd (IPv6 Rapid Deployment) Support.

IPv6 Rapid Deployment (6rd; draft-ietf-softwire-ipv6-6rd) builds upon
mechanisms of 6to4 (RFC3056) to enable a service provider to rapidly
deploy IPv6 unicast service to IPv4 sites to which it provides
customer premise equipment.  Like 6to4, it utilizes stateless IPv6 in
IPv4 encapsulation in order to transit IPv4-only network
infrastructure.  Unlike 6to4, a 6rd service provider uses an IPv6
prefix of its own in place of the fixed 6to4 prefix.

With this option enabled, the SIT driver offers 6rd functionality by
providing additional ioctl API to configure the IPv6 Prefix for in
stead of static 2002::/16 for 6to4.

Original patch was done by Alexandre Cassen <acassen@freebox.fr>
based on old Internet-Draft.

Signed-off-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/Kconfig |  19 +++++++++
 net/ipv6/sit.c   | 124 +++++++++++++++++++++++++++++++++++++++++++++++++++----
 2 files changed, 135 insertions(+), 8 deletions(-)

(limited to 'net/ipv6')

diff --git a/net/ipv6/Kconfig b/net/ipv6/Kconfig
index ead6c7a42f4..f5619982745 100644
--- a/net/ipv6/Kconfig
+++ b/net/ipv6/Kconfig
@@ -170,6 +170,25 @@ config IPV6_SIT
 
 	  Saying M here will produce a module called sit. If unsure, say Y.
 
+config IPV6_SIT_6RD
+	bool "IPv6: IPv6 Rapid Development (6RD) (EXPERIMENTAL)"
+	depends on IPV6_SIT && EXPERIMENTAL
+	default n
+	---help---
+	  IPv6 Rapid Deployment (6rd; draft-ietf-softwire-ipv6-6rd) builds upon
+	  mechanisms of 6to4 (RFC3056) to enable a service provider to rapidly
+	  deploy IPv6 unicast service to IPv4 sites to which it provides
+	  customer premise equipment.  Like 6to4, it utilizes stateless IPv6 in
+	  IPv4 encapsulation in order to transit IPv4-only network
+	  infrastructure.  Unlike 6to4, a 6rd service provider uses an IPv6
+	  prefix of its own in place of the fixed 6to4 prefix.
+
+	  With this option enabled, the SIT driver offers 6rd functionality by
+	  providing additional ioctl API to configure the IPv6 Prefix for in
+	  stead of static 2002::/16 for 6to4.
+
+	  If unsure, say N.
+
 config IPV6_NDISC_NODETYPE
 	bool
 
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index 99da272951d..6955654262a 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -161,6 +161,21 @@ static void ipip6_tunnel_link(struct sit_net *sitn, struct ip_tunnel *t)
 	write_unlock_bh(&ipip6_lock);
 }
 
+static void ipip6_tunnel_clone_6rd(struct ip_tunnel *t, struct sit_net *sitn)
+{
+#ifdef CONFIG_IPV6_SIT_6RD
+	if (t->dev == sitn->fb_tunnel_dev) {
+		ipv6_addr_set(&t->ip6rd.prefix, htonl(0x20020000), 0, 0, 0);
+		t->ip6rd.relay_prefix = 0;
+		t->ip6rd.prefixlen = 16;
+		t->ip6rd.relay_prefixlen = 0;
+	} else {
+		struct ip_tunnel *t0 = netdev_priv(sitn->fb_tunnel_dev);
+		memcpy(&t->ip6rd, &t0->ip6rd, sizeof(t->ip6rd));
+	}
+#endif
+}
+
 static struct ip_tunnel * ipip6_tunnel_locate(struct net *net,
 		struct ip_tunnel_parm *parms, int create)
 {
@@ -213,6 +228,8 @@ static struct ip_tunnel * ipip6_tunnel_locate(struct net *net,
 
 	dev_hold(dev);
 
+	ipip6_tunnel_clone_6rd(t, sitn);
+
 	ipip6_tunnel_link(sitn, nt);
 	return nt;
 
@@ -532,17 +549,41 @@ out:
 	return 0;
 }
 
-/* Returns the embedded IPv4 address if the IPv6 address
-   comes from 6to4 (RFC 3056) addr space */
-
-static inline __be32 try_6to4(struct in6_addr *v6dst)
+/*
+ * Returns the embedded IPv4 address if the IPv6 address
+ * comes from 6rd / 6to4 (RFC 3056) addr space.
+ */
+static inline
+__be32 try_6rd(struct in6_addr *v6dst, struct ip_tunnel *tunnel)
 {
 	__be32 dst = 0;
 
+#ifdef CONFIG_IPV6_SIT_6RD
+	if (ipv6_prefix_equal(v6dst, &tunnel->ip6rd.prefix,
+			      tunnel->ip6rd.prefixlen)) {
+		unsigned pbw0, pbi0;
+		int pbi1;
+		u32 d;
+
+		pbw0 = tunnel->ip6rd.prefixlen >> 5;
+		pbi0 = tunnel->ip6rd.prefixlen & 0x1f;
+
+		d = (ntohl(tunnel->ip6rd.prefix.s6_addr32[pbw0]) << pbi0) >>
+		    tunnel->ip6rd.relay_prefixlen;
+
+		pbi1 = pbi0 - tunnel->ip6rd.relay_prefixlen;
+		if (pbi1 > 0)
+			d |= ntohl(tunnel->ip6rd.prefix.s6_addr32[pbw0 + 1]) >>
+			     (32 - pbi1);
+
+		dst = tunnel->ip6rd.relay_prefix | htonl(d);
+	}
+#else
 	if (v6dst->s6_addr16[0] == htons(0x2002)) {
 		/* 6to4 v6 addr has 16 bits prefix, 32 v4addr, 16 SLA, ... */
 		memcpy(&dst, &v6dst->s6_addr16[1], 4);
 	}
+#endif
 	return dst;
 }
 
@@ -596,7 +637,7 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb,
 	}
 
 	if (!dst)
-		dst = try_6to4(&iph6->daddr);
+		dst = try_6rd(&iph6->daddr, tunnel);
 
 	if (!dst) {
 		struct neighbour *neigh = NULL;
@@ -786,9 +827,15 @@ ipip6_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
 	struct ip_tunnel *t;
 	struct net *net = dev_net(dev);
 	struct sit_net *sitn = net_generic(net, sit_net_id);
+#ifdef CONFIG_IPV6_SIT_6RD
+	struct ip_tunnel_6rd ip6rd;
+#endif
 
 	switch (cmd) {
 	case SIOCGETTUNNEL:
+#ifdef CONFIG_IPV6_SIT_6RD
+	case SIOCGET6RD:
+#endif
 		t = NULL;
 		if (dev == sitn->fb_tunnel_dev) {
 			if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) {
@@ -799,9 +846,25 @@ ipip6_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
 		}
 		if (t == NULL)
 			t = netdev_priv(dev);
-		memcpy(&p, &t->parms, sizeof(p));
-		if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
-			err = -EFAULT;
+
+		err = -EFAULT;
+		if (cmd == SIOCGETTUNNEL) {
+			memcpy(&p, &t->parms, sizeof(p));
+			if (copy_to_user(ifr->ifr_ifru.ifru_data, &p,
+					 sizeof(p)))
+				goto done;
+#ifdef CONFIG_IPV6_SIT_6RD
+		} else {
+			ipv6_addr_copy(&ip6rd.prefix, &t->ip6rd.prefix);
+			ip6rd.relay_prefix = t->ip6rd.relay_prefix;
+			ip6rd.prefixlen = t->ip6rd.prefixlen;
+			ip6rd.relay_prefixlen = t->ip6rd.relay_prefixlen;
+			if (copy_to_user(ifr->ifr_ifru.ifru_data, &ip6rd,
+					 sizeof(ip6rd)))
+				goto done;
+#endif
+		}
+		err = 0;
 		break;
 
 	case SIOCADDTUNNEL:
@@ -922,6 +985,51 @@ ipip6_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
 		netdev_state_change(dev);
 		break;
 
+#ifdef CONFIG_IPV6_SIT_6RD
+	case SIOCADD6RD:
+	case SIOCCHG6RD:
+	case SIOCDEL6RD:
+		err = -EPERM;
+		if (!capable(CAP_NET_ADMIN))
+			goto done;
+
+		err = -EFAULT;
+		if (copy_from_user(&ip6rd, ifr->ifr_ifru.ifru_data,
+				   sizeof(ip6rd)))
+			goto done;
+
+		t = netdev_priv(dev);
+
+		if (cmd != SIOCDEL6RD) {
+			struct in6_addr prefix;
+			__be32 relay_prefix;
+
+			err = -EINVAL;
+			if (ip6rd.relay_prefixlen > 32 ||
+			    ip6rd.prefixlen + (32 - ip6rd.relay_prefixlen) > 64)
+				goto done;
+
+			ipv6_addr_prefix(&prefix, &ip6rd.prefix,
+					 ip6rd.prefixlen);
+			if (!ipv6_addr_equal(&prefix, &ip6rd.prefix))
+				goto done;
+			relay_prefix = ip6rd.relay_prefix &
+				       htonl(0xffffffffUL <<
+					     (32 - ip6rd.relay_prefixlen));
+			if (relay_prefix != ip6rd.relay_prefix)
+				goto done;
+
+			ipv6_addr_copy(&t->ip6rd.prefix, &prefix);
+			t->ip6rd.relay_prefix = relay_prefix;
+			t->ip6rd.prefixlen = ip6rd.prefixlen;
+			t->ip6rd.relay_prefixlen = ip6rd.relay_prefixlen;
+		} else
+			ipip6_tunnel_clone_6rd(t, sitn);
+
+		err = 0;
+		break;
+#endif
+
 	default:
 		err = -EINVAL;
 	}
-- 
cgit v1.2.3


From 51953d5bc43e468f24cc573a45cde1d32af129b8 Mon Sep 17 00:00:00 2001
From: Brian Haley <brian.haley@hp.com>
Date: Mon, 5 Oct 2009 08:24:16 +0000
Subject: Use sk_mark for IPv6 routing lookups

Atis Elsts wrote:
> Not sure if there is need to fill the mark from skb in tunnel xmit functions. In any case, it's not done for GRE or IPIP tunnels at the moment.

Ok, I'll just drop that part, I'm not sure what should be done in this case.

> Also, in this patch you are doing that for SIT (v6-in-v4) tunnels only, and not doing it for v4-in-v6 or v6-in-v6 tunnels. Any reason for that?

I just sent that patch out too quickly, here's a better one with the updates.

Add support for IPv6 route lookups using sk_mark.

Signed-off-by: Brian Haley <brian.haley@hp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/af_inet6.c              | 1 +
 net/ipv6/datagram.c              | 1 +
 net/ipv6/inet6_connection_sock.c | 1 +
 net/ipv6/ipv6_sockglue.c         | 1 +
 net/ipv6/syncookies.c            | 1 +
 net/ipv6/tcp_ipv6.c              | 4 ++++
 net/ipv6/udp.c                   | 2 ++
 7 files changed, 11 insertions(+)

(limited to 'net/ipv6')

diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index e127a32f954..da36497ae64 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -654,6 +654,7 @@ int inet6_sk_rebuild_header(struct sock *sk)
 		ipv6_addr_copy(&fl.fl6_src, &np->saddr);
 		fl.fl6_flowlabel = np->flow_label;
 		fl.oif = sk->sk_bound_dev_if;
+		fl.mark = sk->sk_mark;
 		fl.fl_ip_dport = inet->dport;
 		fl.fl_ip_sport = inet->sport;
 		security_sk_classify_flow(sk, &fl);
diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c
index e2bdc6d83a4..a615b4dea6c 100644
--- a/net/ipv6/datagram.c
+++ b/net/ipv6/datagram.c
@@ -147,6 +147,7 @@ ipv4_connected:
 	ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
 	ipv6_addr_copy(&fl.fl6_src, &np->saddr);
 	fl.oif = sk->sk_bound_dev_if;
+	fl.mark = sk->sk_mark;
 	fl.fl_ip_dport = inet->dport;
 	fl.fl_ip_sport = inet->sport;
 
diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c
index cc4797dd832..a9f4a21b31e 100644
--- a/net/ipv6/inet6_connection_sock.c
+++ b/net/ipv6/inet6_connection_sock.c
@@ -194,6 +194,7 @@ int inet6_csk_xmit(struct sk_buff *skb, int ipfragok)
 	fl.fl6_flowlabel = np->flow_label;
 	IP6_ECN_flow_xmit(sk, fl.fl6_flowlabel);
 	fl.oif = sk->sk_bound_dev_if;
+	fl.mark = sk->sk_mark;
 	fl.fl_ip_sport = inet->sport;
 	fl.fl_ip_dport = inet->dport;
 	security_sk_classify_flow(sk, &fl);
diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index 14f54eb5a7f..dc0f7366073 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -424,6 +424,7 @@ sticky_done:
 
 		fl.fl6_flowlabel = 0;
 		fl.oif = sk->sk_bound_dev_if;
+		fl.mark = sk->sk_mark;
 
 		if (optlen == 0)
 			goto update;
diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c
index 6b6ae913b5d..cbe55e5d9f9 100644
--- a/net/ipv6/syncookies.c
+++ b/net/ipv6/syncookies.c
@@ -252,6 +252,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
 		}
 		ipv6_addr_copy(&fl.fl6_src, &ireq6->loc_addr);
 		fl.oif = sk->sk_bound_dev_if;
+		fl.mark = sk->sk_mark;
 		fl.fl_ip_dport = inet_rsk(req)->rmt_port;
 		fl.fl_ip_sport = inet_sk(sk)->sport;
 		security_req_classify_flow(req, &fl);
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 21d100b68b1..321aafd40dc 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -243,6 +243,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
 	ipv6_addr_copy(&fl.fl6_src,
 		       (saddr ? saddr : &np->saddr));
 	fl.oif = sk->sk_bound_dev_if;
+	fl.mark = sk->sk_mark;
 	fl.fl_ip_dport = usin->sin6_port;
 	fl.fl_ip_sport = inet->sport;
 
@@ -383,6 +384,7 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 			ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
 			ipv6_addr_copy(&fl.fl6_src, &np->saddr);
 			fl.oif = sk->sk_bound_dev_if;
+			fl.mark = sk->sk_mark;
 			fl.fl_ip_dport = inet->dport;
 			fl.fl_ip_sport = inet->sport;
 			security_skb_classify_flow(skb, &fl);
@@ -477,6 +479,7 @@ static int tcp_v6_send_synack(struct sock *sk, struct request_sock *req)
 	ipv6_addr_copy(&fl.fl6_src, &treq->loc_addr);
 	fl.fl6_flowlabel = 0;
 	fl.oif = treq->iif;
+	fl.mark = sk->sk_mark;
 	fl.fl_ip_dport = inet_rsk(req)->rmt_port;
 	fl.fl_ip_sport = inet_rsk(req)->loc_port;
 	security_req_classify_flow(req, &fl);
@@ -1345,6 +1348,7 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
 		}
 		ipv6_addr_copy(&fl.fl6_src, &treq->loc_addr);
 		fl.oif = sk->sk_bound_dev_if;
+		fl.mark = sk->sk_mark;
 		fl.fl_ip_dport = inet_rsk(req)->rmt_port;
 		fl.fl_ip_sport = inet_rsk(req)->loc_port;
 		security_req_classify_flow(req, &fl);
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 3a60f12b34e..3842c557d6f 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -879,6 +879,8 @@ do_udp_sendmsg:
 	if (!fl.oif)
 		fl.oif = np->sticky_pktinfo.ipi6_ifindex;
 
+	fl.mark = sk->sk_mark;
+
 	if (msg->msg_controllen) {
 		opt = &opt_space;
 		memset(opt, 0, sizeof(struct ipv6_txoptions));
-- 
cgit v1.2.3


From f7734fdf61ec6bb848e0bafc1fb8bad2c124bb50 Mon Sep 17 00:00:00 2001
From: Octavian Purdila <opurdila@ixiacom.com>
Date: Fri, 2 Oct 2009 11:39:15 +0000
Subject: make TLLAO option for NA packets configurable

On Friday 02 October 2009 20:53:51 you wrote:

> This is good although I would have shortened the name.

Ah, I knew I forgot something :) Here is v4.

tavi

>From 24d96d825b9fa832b22878cc6c990d5711968734 Mon Sep 17 00:00:00 2001
From: Octavian Purdila <opurdila@ixiacom.com>
Date: Fri, 2 Oct 2009 00:51:15 +0300
Subject: [PATCH] ipv6: new sysctl for sending TLLAO with unicast NAs

Neighbor advertisements responding to unicast neighbor solicitations
did not include the target link-layer address option. This patch adds
a new sysctl option (disabled by default) which controls whether this
option should be sent even with unicast NAs.

The need for this arose because certain routers expect the TLLAO in
some situations even as a response to unicast NS packets.

Moreover, RFC 2461 recommends sending this to avoid a race condition
(section 4.4, Target link-layer address)

Signed-off-by: Cosmin Ratiu <cratiu@ixiacom.com>
Signed-off-by: Octavian Purdila <opurdila@ixiacom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/addrconf.c | 8 ++++++++
 net/ipv6/ndisc.c    | 1 +
 2 files changed, 9 insertions(+)

(limited to 'net/ipv6')

diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 1fd0a3d775d..bdcee6981c6 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -4352,6 +4352,14 @@ static struct addrconf_sysctl_table
 			.mode		=	0644,
 			.proc_handler	=	proc_dointvec,
 		},
+		{
+			.ctl_name       = CTL_UNNUMBERED,
+			.procname       = "force_tllao",
+			.data           = &ipv6_devconf.force_tllao,
+			.maxlen         = sizeof(int),
+			.mode           = 0644,
+			.proc_handler   = proc_dointvec
+		},
 		{
 			.ctl_name	=	0,	/* sentinel */
 		}
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index f74e4e2cdd0..3507cfe1e7a 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -598,6 +598,7 @@ static void ndisc_send_na(struct net_device *dev, struct neighbour *neigh,
 	icmp6h.icmp6_solicited = solicited;
 	icmp6h.icmp6_override = override;
 
+	inc_opt |= ifp->idev->cnf.force_tllao;
 	__ndisc_send(dev, neigh, daddr, src_addr,
 		     &icmp6h, solicited_addr,
 		     inc_opt ? ND_OPT_TARGET_LL_ADDR : 0);
-- 
cgit v1.2.3


From ec1b4cf74c81bfd0fbe5bf62bafc86c45917e72f Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <shemminger@vyatta.com>
Date: Mon, 5 Oct 2009 05:58:39 +0000
Subject: net: mark net_proto_ops as const

All usages of structure net_proto_ops should be declared const.

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/af_inet6.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net/ipv6')

diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index da36497ae64..94216519873 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -552,7 +552,7 @@ const struct proto_ops inet6_dgram_ops = {
 #endif
 };
 
-static struct net_proto_family inet6_family_ops = {
+static const struct net_proto_family inet6_family_ops = {
 	.family = PF_INET6,
 	.create = inet6_create,
 	.owner	= THIS_MODULE,
-- 
cgit v1.2.3


From 86c36ce45dc2e2f022562c6481cd778f4cc381a9 Mon Sep 17 00:00:00 2001
From: Brian Haley <brian.haley@hp.com>
Date: Wed, 7 Oct 2009 13:58:01 -0700
Subject: IPv6: use ipv6_addr_copy() in ip6_route_redirect()

Change ip6_route_redirect() to use ipv6_addr_copy().

Signed-off-by: Brian Haley <brian.haley@hp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/route.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'net/ipv6')

diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index d6fe7646a8f..df9432a46ff 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -1471,9 +1471,10 @@ static struct rt6_info *ip6_route_redirect(struct in6_addr *dest,
 				},
 			},
 		},
-		.gateway = *gateway,
 	};
 
+	ipv6_addr_copy(&rdfl.gateway, gateway);
+
 	if (rt6_need_strict(dest))
 		flags |= RT6_LOOKUP_F_IFACE;
 
-- 
cgit v1.2.3


From b301e82cf8104cfddbe5452ebe625bab49597c64 Mon Sep 17 00:00:00 2001
From: Brian Haley <brian.haley@hp.com>
Date: Wed, 7 Oct 2009 13:58:25 -0700
Subject: IPv6: use ipv6_addr_set_v4mapped()

Might as well use the ipv6_addr_set_v4mapped() inline we created last
year.

Signed-off-by: Brian Haley <brian.haley@hp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/datagram.c | 24 ++++++++++--------------
 net/ipv6/tcp_ipv6.c | 12 ++++--------
 net/ipv6/udp.c      |  4 ++--
 3 files changed, 16 insertions(+), 24 deletions(-)

(limited to 'net/ipv6')

diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c
index a615b4dea6c..dbfec7147aa 100644
--- a/net/ipv6/datagram.c
+++ b/net/ipv6/datagram.c
@@ -98,17 +98,14 @@ ipv4_connected:
 		if (err)
 			goto out;
 
-		ipv6_addr_set(&np->daddr, 0, 0, htonl(0x0000ffff), inet->daddr);
+		ipv6_addr_set_v4mapped(inet->daddr, &np->daddr);
 
-		if (ipv6_addr_any(&np->saddr)) {
-			ipv6_addr_set(&np->saddr, 0, 0, htonl(0x0000ffff),
-				      inet->saddr);
-		}
+		if (ipv6_addr_any(&np->saddr))
+			ipv6_addr_set_v4mapped(inet->saddr, &np->saddr);
+
+		if (ipv6_addr_any(&np->rcv_saddr))
+			ipv6_addr_set_v4mapped(inet->rcv_saddr, &np->rcv_saddr);
 
-		if (ipv6_addr_any(&np->rcv_saddr)) {
-			ipv6_addr_set(&np->rcv_saddr, 0, 0, htonl(0x0000ffff),
-				      inet->rcv_saddr);
-		}
 		goto out;
 	}
 
@@ -330,9 +327,8 @@ int ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len)
 			if (ipv6_addr_type(&sin->sin6_addr) & IPV6_ADDR_LINKLOCAL)
 				sin->sin6_scope_id = IP6CB(skb)->iif;
 		} else {
-			ipv6_addr_set(&sin->sin6_addr, 0, 0,
-				      htonl(0xffff),
-				      *(__be32 *)(nh + serr->addr_offset));
+			ipv6_addr_set_v4mapped(*(__be32 *)(nh + serr->addr_offset),
+					       &sin->sin6_addr);
 		}
 	}
 
@@ -352,8 +348,8 @@ int ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len)
 		} else {
 			struct inet_sock *inet = inet_sk(sk);
 
-			ipv6_addr_set(&sin->sin6_addr, 0, 0,
-				      htonl(0xffff), ip_hdr(skb)->saddr);
+			ipv6_addr_set_v4mapped(ip_hdr(skb)->saddr,
+					       &sin->sin6_addr);
 			if (inet->cmsg_flags)
 				ip_cmsg_recv(msg, skb);
 		}
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 321aafd40dc..45176305914 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -226,10 +226,8 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
 #endif
 			goto failure;
 		} else {
-			ipv6_addr_set(&np->saddr, 0, 0, htonl(0x0000FFFF),
-				      inet->saddr);
-			ipv6_addr_set(&np->rcv_saddr, 0, 0, htonl(0x0000FFFF),
-				      inet->rcv_saddr);
+			ipv6_addr_set_v4mapped(inet->saddr, &np->saddr);
+			ipv6_addr_set_v4mapped(inet->rcv_saddr, &np->rcv_saddr);
 		}
 
 		return err;
@@ -1293,11 +1291,9 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
 
 		memcpy(newnp, np, sizeof(struct ipv6_pinfo));
 
-		ipv6_addr_set(&newnp->daddr, 0, 0, htonl(0x0000FFFF),
-			      newinet->daddr);
+		ipv6_addr_set_v4mapped(newinet->daddr, &newnp->daddr);
 
-		ipv6_addr_set(&newnp->saddr, 0, 0, htonl(0x0000FFFF),
-			      newinet->saddr);
+		ipv6_addr_set_v4mapped(newinet->saddr, &newnp->saddr);
 
 		ipv6_addr_copy(&newnp->rcv_saddr, &newnp->saddr);
 
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 3842c557d6f..c6a303ec834 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -265,8 +265,8 @@ try_again:
 		sin6->sin6_scope_id = 0;
 
 		if (is_udp4)
-			ipv6_addr_set(&sin6->sin6_addr, 0, 0,
-				      htonl(0xffff), ip_hdr(skb)->saddr);
+			ipv6_addr_set_v4mapped(ip_hdr(skb)->saddr,
+					       &sin6->sin6_addr);
 		else {
 			ipv6_addr_copy(&sin6->sin6_addr,
 				       &ipv6_hdr(skb)->saddr);
-- 
cgit v1.2.3


From 8a6dfd43d1891882f8ca05d73aa7735fb0edae3b Mon Sep 17 00:00:00 2001
From: Alexandre Cassen <acassen@freebox.fr>
Date: Wed, 7 Oct 2009 14:50:30 -0700
Subject: IPv6: Fix 6RD typo

Following fix a small typo.

Signed-off-by: Alexandre Cassen <acassen@freebox.fr>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net/ipv6')

diff --git a/net/ipv6/Kconfig b/net/ipv6/Kconfig
index f5619982745..a578096152a 100644
--- a/net/ipv6/Kconfig
+++ b/net/ipv6/Kconfig
@@ -171,7 +171,7 @@ config IPV6_SIT
 	  Saying M here will produce a module called sit. If unsure, say Y.
 
 config IPV6_SIT_6RD
-	bool "IPv6: IPv6 Rapid Development (6RD) (EXPERIMENTAL)"
+	bool "IPv6: IPv6 Rapid Deployment (6RD) (EXPERIMENTAL)"
 	depends on IPV6_SIT && EXPERIMENTAL
 	default n
 	---help---
-- 
cgit v1.2.3


From f86dcc5aa8c7908f2c287e7a211228df599e3e71 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Wed, 7 Oct 2009 00:37:59 +0000
Subject: udp: dynamically size hash tables at boot time

UDP_HTABLE_SIZE was initialy defined to 128, which is a bit small for
several setups.

4000 active UDP sockets -> 32 sockets per chain in average. An
incoming frame has to lookup all sockets to find best match, so long
chains hurt latency.

Instead of a fixed size hash table that cant be perfect for every
needs, let UDP stack choose its table size at boot time like tcp/ip
route, using alloc_large_system_hash() helper

Add an optional boot parameter, uhash_entries=x so that an admin can
force a size between 256 and 65536 if needed, like thash_entries and
rhash_entries.

dmesg logs two new lines :
[    0.647039] UDP hash table entries: 512 (order: 0, 4096 bytes)
[    0.647099] UDP Lite hash table entries: 512 (order: 0, 4096 bytes)

Maximal size on 64bit arches would be 65536 slots, ie 1 MBytes for non
debugging spinlocks.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/udp.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'net/ipv6')

diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index c6a303ec834..ff778c172ef 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -132,7 +132,7 @@ static struct sock *__udp6_lib_lookup(struct net *net,
 	struct sock *sk, *result;
 	struct hlist_nulls_node *node;
 	unsigned short hnum = ntohs(dport);
-	unsigned int hash = udp_hashfn(net, hnum);
+	unsigned int hash = udp_hashfn(net, hnum, udptable->mask);
 	struct udp_hslot *hslot = &udptable->hash[hash];
 	int score, badness;
 
@@ -452,7 +452,7 @@ static int __udp6_lib_mcast_deliver(struct net *net, struct sk_buff *skb,
 {
 	struct sock *sk, *sk2;
 	const struct udphdr *uh = udp_hdr(skb);
-	struct udp_hslot *hslot = &udptable->hash[udp_hashfn(net, ntohs(uh->dest))];
+	struct udp_hslot *hslot = udp_hashslot(udptable, net, ntohs(uh->dest));
 	int dif;
 
 	spin_lock(&hslot->lock);
@@ -1197,7 +1197,7 @@ static void udp6_sock_seq_show(struct seq_file *seq, struct sock *sp, int bucket
 	destp = ntohs(inet->dport);
 	srcp  = ntohs(inet->sport);
 	seq_printf(seq,
-		   "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
+		   "%5d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
 		   "%02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %p %d\n",
 		   bucket,
 		   src->s6_addr32[0], src->s6_addr32[1],
-- 
cgit v1.2.3


From e0c93948154328e13b4c0b0502d66af93f0fdfc4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?YOSHIFUJI=20Hideaki=20/=20=E5=90=89=E8=97=A4=E8=8B=B1?=
 =?UTF-8?q?=E6=98=8E?= <yoshfuji@linux-ipv6.org>
Date: Sun, 11 Oct 2009 03:31:34 +0000
Subject: ipv6 sit: Ensure to initialize 6rd parameters.

ipv6 sit: Ensure to initialize 6rd parameters.

Signed-off-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/sit.c | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

(limited to 'net/ipv6')

diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index 6955654262a..8594451c747 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -161,9 +161,11 @@ static void ipip6_tunnel_link(struct sit_net *sitn, struct ip_tunnel *t)
 	write_unlock_bh(&ipip6_lock);
 }
 
-static void ipip6_tunnel_clone_6rd(struct ip_tunnel *t, struct sit_net *sitn)
+static void ipip6_tunnel_clone_6rd(struct net_device *dev, struct sit_net *sitn)
 {
 #ifdef CONFIG_IPV6_SIT_6RD
+	struct ip_tunnel *t = netdev_priv(dev);
+
 	if (t->dev == sitn->fb_tunnel_dev) {
 		ipv6_addr_set(&t->ip6rd.prefix, htonl(0x20020000), 0, 0, 0);
 		t->ip6rd.relay_prefix = 0;
@@ -219,6 +221,7 @@ static struct ip_tunnel * ipip6_tunnel_locate(struct net *net,
 
 	nt->parms = *parms;
 	ipip6_tunnel_init(dev);
+	ipip6_tunnel_clone_6rd(dev, sitn);
 
 	if (parms->i_flags & SIT_ISATAP)
 		dev->priv_flags |= IFF_ISATAP;
@@ -228,8 +231,6 @@ static struct ip_tunnel * ipip6_tunnel_locate(struct net *net,
 
 	dev_hold(dev);
 
-	ipip6_tunnel_clone_6rd(t, sitn);
-
 	ipip6_tunnel_link(sitn, nt);
 	return nt;
 
@@ -1024,7 +1025,7 @@ ipip6_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
 			t->ip6rd.prefixlen = ip6rd.prefixlen;
 			t->ip6rd.relay_prefixlen = ip6rd.relay_prefixlen;
 		} else
-			ipip6_tunnel_clone_6rd(t, sitn);
+			ipip6_tunnel_clone_6rd(dev, sitn);
 
 		err = 0;
 		break;
@@ -1148,6 +1149,7 @@ static int sit_init_net(struct net *net)
 	dev_net_set(sitn->fb_tunnel_dev, net);
 
 	ipip6_fb_tunnel_init(sitn->fb_tunnel_dev);
+	ipip6_tunnel_clone_6rd(sitn->fb_tunnel_dev, sitn);
 
 	if ((err = register_netdev(sitn->fb_tunnel_dev)))
 		goto err_reg_dev;
-- 
cgit v1.2.3


From e7db38c38fe8df1d890ae772737e27308bdc5956 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?YOSHIFUJI=20Hideaki=20/=20=E5=90=89=E8=97=A4=E8=8B=B1?=
 =?UTF-8?q?=E6=98=8E?= <yoshfuji@linux-ipv6.org>
Date: Sun, 11 Oct 2009 03:44:45 +0000
Subject: ipv6 sit: Fix 6rd relay address.

ipv6 sit: Fix 6rd relay address.

Relay's address should be extracted from real IPv6 address
instead of configured prefix.

Signed-off-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/sit.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'net/ipv6')

diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index 8594451c747..193d0c6c5ce 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -569,12 +569,12 @@ __be32 try_6rd(struct in6_addr *v6dst, struct ip_tunnel *tunnel)
 		pbw0 = tunnel->ip6rd.prefixlen >> 5;
 		pbi0 = tunnel->ip6rd.prefixlen & 0x1f;
 
-		d = (ntohl(tunnel->ip6rd.prefix.s6_addr32[pbw0]) << pbi0) >>
+		d = (ntohl(v6dst->s6_addr32[pbw0]) << pbi0) >>
 		    tunnel->ip6rd.relay_prefixlen;
 
 		pbi1 = pbi0 - tunnel->ip6rd.relay_prefixlen;
 		if (pbi1 > 0)
-			d |= ntohl(tunnel->ip6rd.prefix.s6_addr32[pbw0 + 1]) >>
+			d |= ntohl(v6dst->s6_addr32[pbw0 + 1]) >>
 			     (32 - pbi1);
 
 		dst = tunnel->ip6rd.relay_prefix | htonl(d);
-- 
cgit v1.2.3


From 91b2a3f9bb0fa8d64b365a10b0624b0341e1a338 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?YOSHIFUJI=20Hideaki=20/=20=E5=90=89=E8=97=A4=E8=8B=B1?=
 =?UTF-8?q?=E6=98=8E?= <yoshfuji@linux-ipv6.org>
Date: Sun, 11 Oct 2009 03:45:13 +0000
Subject: ipv6 sit: Set relay to 0.0.0.0 directly if relay_prefixlen == 0.

ipv6 sit: Set relay to 0.0.0.0 directly if relay_prefixlen == 0.

Do not use bit-shift if relay_prefixlen == 0;
relay_prefix << 32 does not result in 0.

Signed-off-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/sit.c | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

(limited to 'net/ipv6')

diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index 193d0c6c5ce..510d31f3cb9 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -1014,9 +1014,12 @@ ipip6_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
 					 ip6rd.prefixlen);
 			if (!ipv6_addr_equal(&prefix, &ip6rd.prefix))
 				goto done;
-			relay_prefix = ip6rd.relay_prefix &
-				       htonl(0xffffffffUL <<
-					     (32 - ip6rd.relay_prefixlen));
+			if (ip6rd.relay_prefixlen)
+				relay_prefix = ip6rd.relay_prefix &
+					       htonl(0xffffffffUL <<
+						     (32 - ip6rd.relay_prefixlen));
+			else
+				relay_prefix = 0;
 			if (relay_prefix != ip6rd.relay_prefix)
 				goto done;
 
-- 
cgit v1.2.3


From 3b885787ea4112eaa80945999ea0901bf742707f Mon Sep 17 00:00:00 2001
From: Neil Horman <nhorman@tuxdriver.com>
Date: Mon, 12 Oct 2009 13:26:31 -0700
Subject: net: Generalize socket rx gap / receive queue overflow cmsg

Create a new socket level option to report number of queue overflows

Recently I augmented the AF_PACKET protocol to report the number of frames lost
on the socket receive queue between any two enqueued frames.  This value was
exported via a SOL_PACKET level cmsg.  AFter I completed that work it was
requested that this feature be generalized so that any datagram oriented socket
could make use of this option.  As such I've created this patch, It creates a
new SOL_SOCKET level option called SO_RXQ_OVFL, which when enabled exports a
SOL_SOCKET level cmsg that reports the nubmer of times the sk_receive_queue
overflowed between any two given frames.  It also augments the AF_PACKET
protocol to take advantage of this new feature (as it previously did not touch
sk->sk_drops, which this patch uses to record the overflow count).  Tested
successfully by me.

Notes:

1) Unlike my previous patch, this patch simply records the sk_drops value, which
is not a number of drops between packets, but rather a total number of drops.
Deltas must be computed in user space.

2) While this patch currently works with datagram oriented protocols, it will
also be accepted by non-datagram oriented protocols. I'm not sure if thats
agreeable to everyone, but my argument in favor of doing so is that, for those
protocols which aren't applicable to this option, sk_drops will always be zero,
and reporting no drops on a receive queue that isn't used for those
non-participating protocols seems reasonable to me.  This also saves us having
to code in a per-protocol opt in mechanism.

3) This applies cleanly to net-next assuming that commit
977750076d98c7ff6cbda51858bb5a5894a9d9ab (my af packet cmsg patch) is reverted

Signed-off-by: Neil Horman <nhorman@tuxdriver.com>
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/raw.c | 2 +-
 net/ipv6/udp.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'net/ipv6')

diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index 4f24570b086..d8375bc7f2d 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -497,7 +497,7 @@ static int rawv6_recvmsg(struct kiocb *iocb, struct sock *sk,
 			sin6->sin6_scope_id = IP6CB(skb)->iif;
 	}
 
-	sock_recv_timestamp(msg, sk, skb);
+	sock_recv_ts_and_drops(msg, sk, skb);
 
 	if (np->rxopt.all)
 		datagram_recv_ctl(sk, msg, skb);
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index ff778c172ef..1f8e2afa449 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -252,7 +252,7 @@ try_again:
 					UDP_MIB_INDATAGRAMS, is_udplite);
 	}
 
-	sock_recv_timestamp(msg, sk, skb);
+	sock_recv_ts_and_drops(msg, sk, skb);
 
 	/* Copy the address. */
 	if (msg->msg_name) {
-- 
cgit v1.2.3


From c3faca053d0a9c877597935b434150b422dbc6fb Mon Sep 17 00:00:00 2001
From: Cosmin Ratiu <cratiu@ixiacom.com>
Date: Fri, 9 Oct 2009 03:11:14 +0000
Subject: ipv6: fix devconf after adding force_tllao option

Signed-off-by: Cosmin Ratiu <cratiu@ixiacom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/addrconf.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net/ipv6')

diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index bdcee6981c6..91864840961 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -3708,6 +3708,7 @@ static inline void ipv6_store_devconf(struct ipv6_devconf *cnf,
 #endif
 	array[DEVCONF_DISABLE_IPV6] = cnf->disable_ipv6;
 	array[DEVCONF_ACCEPT_DAD] = cnf->accept_dad;
+	array[DEVCONF_FORCE_TLLAO] = cnf->force_tllao;
 }
 
 static inline size_t inet6_if_nlmsg_size(void)
-- 
cgit v1.2.3


From f373b53b5fe67aa4a6f28f921a529cc90f88e79b Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Fri, 9 Oct 2009 00:16:19 +0000
Subject: tcp: replace ehash_size by ehash_mask

Storing the mask (size - 1) instead of the size allows fast path to be
a bit faster.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/inet6_hashtables.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net/ipv6')

diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c
index 1bcc3431859..874aed86e1a 100644
--- a/net/ipv6/inet6_hashtables.c
+++ b/net/ipv6/inet6_hashtables.c
@@ -73,7 +73,7 @@ struct sock *__inet6_lookup_established(struct net *net,
 	 * have wildcards anyways.
 	 */
 	unsigned int hash = inet6_ehashfn(net, daddr, hnum, saddr, sport);
-	unsigned int slot = hash & (hashinfo->ehash_size - 1);
+	unsigned int slot = hash & hashinfo->ehash_mask;
 	struct inet_ehash_bucket *head = &hashinfo->ehash[slot];
 
 
-- 
cgit v1.2.3


From 766e9037cc139ee25ed93ee5ad11e1450c4b99f6 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Wed, 14 Oct 2009 20:40:11 -0700
Subject: net: sk_drops consolidation

sock_queue_rcv_skb() can update sk_drops itself, removing need for
callers to take care of it. This is more consistent since
sock_queue_rcv_skb() also reads sk_drops when queueing a skb.

This adds sk_drops managment to many protocols that not cared yet.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/raw.c | 3 +--
 net/ipv6/udp.c | 6 ++----
 2 files changed, 3 insertions(+), 6 deletions(-)

(limited to 'net/ipv6')

diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index d8375bc7f2d..fd737efed96 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -381,8 +381,7 @@ static inline int rawv6_rcv_skb(struct sock * sk, struct sk_buff * skb)
 	}
 
 	/* Charge it to the socket. */
-	if (sock_queue_rcv_skb(sk,skb)<0) {
-		atomic_inc(&sk->sk_drops);
+	if (sock_queue_rcv_skb(sk, skb) < 0) {
 		kfree_skb(skb);
 		return NET_RX_DROP;
 	}
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 1f8e2afa449..b86425b7ea2 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -385,13 +385,11 @@ int udpv6_queue_rcv_skb(struct sock * sk, struct sk_buff *skb)
 			goto drop;
 	}
 
-	if ((rc = sock_queue_rcv_skb(sk,skb)) < 0) {
+	if ((rc = sock_queue_rcv_skb(sk, skb)) < 0) {
 		/* Note that an ENOMEM error is charged twice */
-		if (rc == -ENOMEM) {
+		if (rc == -ENOMEM)
 			UDP6_INC_STATS_BH(sock_net(sk),
 					UDP_MIB_RCVBUFERRORS, is_udplite);
-			atomic_inc(&sk->sk_drops);
-		}
 		goto drop;
 	}
 
-- 
cgit v1.2.3


From c720c7e8383aff1cb219bddf474ed89d850336e3 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Thu, 15 Oct 2009 06:30:45 +0000
Subject: inet: rename some inet_sock fields

In order to have better cache layouts of struct sock (separate zones
for rx/tx paths), we need this preliminary patch.

Goal is to transfert fields used at lookup time in the first
read-mostly cache line (inside struct sock_common) and move sk_refcnt
to a separate cache line (only written by rx path)

This patch adds inet_ prefix to daddr, rcv_saddr, dport, num, saddr,
sport and id fields. This allows a future patch to define these
fields as macros, like sk_refcnt, without name clashes.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/af_inet6.c              | 28 ++++++++++++++--------------
 net/ipv6/datagram.c              | 15 ++++++++-------
 net/ipv6/inet6_connection_sock.c |  6 +++---
 net/ipv6/inet6_hashtables.c      | 12 ++++++------
 net/ipv6/ip6mr.c                 |  2 +-
 net/ipv6/ipv6_sockglue.c         |  6 +++---
 net/ipv6/raw.c                   | 30 +++++++++++++++---------------
 net/ipv6/syncookies.c            |  2 +-
 net/ipv6/tcp_ipv6.c              | 32 +++++++++++++++++---------------
 net/ipv6/udp.c                   | 24 ++++++++++++------------
 10 files changed, 80 insertions(+), 77 deletions(-)

(limited to 'net/ipv6')

diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index 94216519873..b6d05881867 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -185,7 +185,7 @@ lookup_protocol:
 	inet->is_icsk = (INET_PROTOSW_ICSK & answer_flags) != 0;
 
 	if (SOCK_RAW == sock->type) {
-		inet->num = protocol;
+		inet->inet_num = protocol;
 		if (IPPROTO_RAW == protocol)
 			inet->hdrincl = 1;
 	}
@@ -228,12 +228,12 @@ lookup_protocol:
 	 */
 	sk_refcnt_debug_inc(sk);
 
-	if (inet->num) {
+	if (inet->inet_num) {
 		/* It assumes that any protocol which allows
 		 * the user to assign a number at socket
 		 * creation time automatically shares.
 		 */
-		inet->sport = htons(inet->num);
+		inet->inet_sport = htons(inet->inet_num);
 		sk->sk_prot->hash(sk);
 	}
 	if (sk->sk_prot->init) {
@@ -281,7 +281,7 @@ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
 	lock_sock(sk);
 
 	/* Check these errors (active socket, double bind). */
-	if (sk->sk_state != TCP_CLOSE || inet->num) {
+	if (sk->sk_state != TCP_CLOSE || inet->inet_num) {
 		err = -EINVAL;
 		goto out;
 	}
@@ -353,8 +353,8 @@ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
 		}
 	}
 
-	inet->rcv_saddr = v4addr;
-	inet->saddr = v4addr;
+	inet->inet_rcv_saddr = v4addr;
+	inet->inet_saddr = v4addr;
 
 	ipv6_addr_copy(&np->rcv_saddr, &addr->sin6_addr);
 
@@ -375,9 +375,9 @@ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
 	}
 	if (snum)
 		sk->sk_userlocks |= SOCK_BINDPORT_LOCK;
-	inet->sport = htons(inet->num);
-	inet->dport = 0;
-	inet->daddr = 0;
+	inet->inet_sport = htons(inet->inet_num);
+	inet->inet_dport = 0;
+	inet->inet_daddr = 0;
 out:
 	release_sock(sk);
 	return err;
@@ -441,12 +441,12 @@ int inet6_getname(struct socket *sock, struct sockaddr *uaddr,
 	sin->sin6_flowinfo = 0;
 	sin->sin6_scope_id = 0;
 	if (peer) {
-		if (!inet->dport)
+		if (!inet->inet_dport)
 			return -ENOTCONN;
 		if (((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_SYN_SENT)) &&
 		    peer == 1)
 			return -ENOTCONN;
-		sin->sin6_port = inet->dport;
+		sin->sin6_port = inet->inet_dport;
 		ipv6_addr_copy(&sin->sin6_addr, &np->daddr);
 		if (np->sndflow)
 			sin->sin6_flowinfo = np->flow_label;
@@ -456,7 +456,7 @@ int inet6_getname(struct socket *sock, struct sockaddr *uaddr,
 		else
 			ipv6_addr_copy(&sin->sin6_addr, &np->rcv_saddr);
 
-		sin->sin6_port = inet->sport;
+		sin->sin6_port = inet->inet_sport;
 	}
 	if (ipv6_addr_type(&sin->sin6_addr) & IPV6_ADDR_LINKLOCAL)
 		sin->sin6_scope_id = sk->sk_bound_dev_if;
@@ -655,8 +655,8 @@ int inet6_sk_rebuild_header(struct sock *sk)
 		fl.fl6_flowlabel = np->flow_label;
 		fl.oif = sk->sk_bound_dev_if;
 		fl.mark = sk->sk_mark;
-		fl.fl_ip_dport = inet->dport;
-		fl.fl_ip_sport = inet->sport;
+		fl.fl_ip_dport = inet->inet_dport;
+		fl.fl_ip_sport = inet->inet_sport;
 		security_sk_classify_flow(sk, &fl);
 
 		if (np->opt && np->opt->srcrt) {
diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c
index dbfec7147aa..9f70452a69e 100644
--- a/net/ipv6/datagram.c
+++ b/net/ipv6/datagram.c
@@ -98,13 +98,14 @@ ipv4_connected:
 		if (err)
 			goto out;
 
-		ipv6_addr_set_v4mapped(inet->daddr, &np->daddr);
+		ipv6_addr_set_v4mapped(inet->inet_daddr, &np->daddr);
 
 		if (ipv6_addr_any(&np->saddr))
-			ipv6_addr_set_v4mapped(inet->saddr, &np->saddr);
+			ipv6_addr_set_v4mapped(inet->inet_saddr, &np->saddr);
 
 		if (ipv6_addr_any(&np->rcv_saddr))
-			ipv6_addr_set_v4mapped(inet->rcv_saddr, &np->rcv_saddr);
+			ipv6_addr_set_v4mapped(inet->inet_rcv_saddr,
+					       &np->rcv_saddr);
 
 		goto out;
 	}
@@ -133,7 +134,7 @@ ipv4_connected:
 	ipv6_addr_copy(&np->daddr, daddr);
 	np->flow_label = fl.fl6_flowlabel;
 
-	inet->dport = usin->sin6_port;
+	inet->inet_dport = usin->sin6_port;
 
 	/*
 	 *	Check for a route to destination an obtain the
@@ -145,8 +146,8 @@ ipv4_connected:
 	ipv6_addr_copy(&fl.fl6_src, &np->saddr);
 	fl.oif = sk->sk_bound_dev_if;
 	fl.mark = sk->sk_mark;
-	fl.fl_ip_dport = inet->dport;
-	fl.fl_ip_sport = inet->sport;
+	fl.fl_ip_dport = inet->inet_dport;
+	fl.fl_ip_sport = inet->inet_sport;
 
 	if (!fl.oif && (addr_type&IPV6_ADDR_MULTICAST))
 		fl.oif = np->mcast_oif;
@@ -188,7 +189,7 @@ ipv4_connected:
 
 	if (ipv6_addr_any(&np->rcv_saddr)) {
 		ipv6_addr_copy(&np->rcv_saddr, &fl.fl6_src);
-		inet->rcv_saddr = LOOPBACK4_IPV6;
+		inet->inet_rcv_saddr = LOOPBACK4_IPV6;
 	}
 
 	ip6_dst_store(sk, dst,
diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c
index a9f4a21b31e..19dceef4fcc 100644
--- a/net/ipv6/inet6_connection_sock.c
+++ b/net/ipv6/inet6_connection_sock.c
@@ -132,7 +132,7 @@ void inet6_csk_addr2sockaddr(struct sock *sk, struct sockaddr * uaddr)
 
 	sin6->sin6_family = AF_INET6;
 	ipv6_addr_copy(&sin6->sin6_addr, &np->daddr);
-	sin6->sin6_port	= inet_sk(sk)->dport;
+	sin6->sin6_port	= inet_sk(sk)->inet_dport;
 	/* We do not store received flowlabel for TCP */
 	sin6->sin6_flowinfo = 0;
 	sin6->sin6_scope_id = 0;
@@ -195,8 +195,8 @@ int inet6_csk_xmit(struct sk_buff *skb, int ipfragok)
 	IP6_ECN_flow_xmit(sk, fl.fl6_flowlabel);
 	fl.oif = sk->sk_bound_dev_if;
 	fl.mark = sk->sk_mark;
-	fl.fl_ip_sport = inet->sport;
-	fl.fl_ip_dport = inet->dport;
+	fl.fl_ip_sport = inet->inet_sport;
+	fl.fl_ip_dport = inet->inet_dport;
 	security_sk_classify_flow(sk, &fl);
 
 	if (np->opt && np->opt->srcrt) {
diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c
index 874aed86e1a..00c6a3e6cdd 100644
--- a/net/ipv6/inet6_hashtables.c
+++ b/net/ipv6/inet6_hashtables.c
@@ -125,7 +125,7 @@ static int inline compute_score(struct sock *sk, struct net *net,
 {
 	int score = -1;
 
-	if (net_eq(sock_net(sk), net) && inet_sk(sk)->num == hnum &&
+	if (net_eq(sock_net(sk), net) && inet_sk(sk)->inet_num == hnum &&
 	    sk->sk_family == PF_INET6) {
 		const struct ipv6_pinfo *np = inet6_sk(sk);
 
@@ -214,10 +214,10 @@ static int __inet6_check_established(struct inet_timewait_death_row *death_row,
 	const struct in6_addr *daddr = &np->rcv_saddr;
 	const struct in6_addr *saddr = &np->daddr;
 	const int dif = sk->sk_bound_dev_if;
-	const __portpair ports = INET_COMBINED_PORTS(inet->dport, lport);
+	const __portpair ports = INET_COMBINED_PORTS(inet->inet_dport, lport);
 	struct net *net = sock_net(sk);
 	const unsigned int hash = inet6_ehashfn(net, daddr, lport, saddr,
-						inet->dport);
+						inet->inet_dport);
 	struct inet_ehash_bucket *head = inet_ehash_bucket(hinfo, hash);
 	spinlock_t *lock = inet_ehash_lockp(hinfo, hash);
 	struct sock *sk2;
@@ -248,8 +248,8 @@ static int __inet6_check_established(struct inet_timewait_death_row *death_row,
 unique:
 	/* Must record num and sport now. Otherwise we will see
 	 * in hash table socket with a funny identity. */
-	inet->num = lport;
-	inet->sport = htons(lport);
+	inet->inet_num = lport;
+	inet->inet_sport = htons(lport);
 	WARN_ON(!sk_unhashed(sk));
 	__sk_nulls_add_node_rcu(sk, &head->chain);
 	sk->sk_hash = hash;
@@ -279,7 +279,7 @@ static inline u32 inet6_sk_port_offset(const struct sock *sk)
 	const struct ipv6_pinfo *np = inet6_sk(sk);
 	return secure_ipv6_port_ephemeral(np->rcv_saddr.s6_addr32,
 					  np->daddr.s6_addr32,
-					  inet->dport);
+					  inet->inet_dport);
 }
 
 int inet6_hash_connect(struct inet_timewait_death_row *death_row,
diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index 716153941fc..85849b4f5a3 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -1297,7 +1297,7 @@ int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, uns
 	switch (optname) {
 	case MRT6_INIT:
 		if (sk->sk_type != SOCK_RAW ||
-		    inet_sk(sk)->num != IPPROTO_ICMPV6)
+		    inet_sk(sk)->inet_num != IPPROTO_ICMPV6)
 			return -EOPNOTSUPP;
 		if (optlen < sizeof(int))
 			return -EINVAL;
diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index dc0f7366073..39e10ac8801 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -64,7 +64,7 @@ int ip6_ra_control(struct sock *sk, int sel)
 	struct ip6_ra_chain *ra, *new_ra, **rap;
 
 	/* RA packet may be delivered ONLY to IPPROTO_RAW socket */
-	if (sk->sk_type != SOCK_RAW || inet_sk(sk)->num != IPPROTO_RAW)
+	if (sk->sk_type != SOCK_RAW || inet_sk(sk)->inet_num != IPPROTO_RAW)
 		return -ENOPROTOOPT;
 
 	new_ra = (sel>=0) ? kmalloc(sizeof(*new_ra), GFP_KERNEL) : NULL;
@@ -106,7 +106,7 @@ struct ipv6_txoptions *ipv6_update_options(struct sock *sk,
 	if (inet_sk(sk)->is_icsk) {
 		if (opt &&
 		    !((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE)) &&
-		    inet_sk(sk)->daddr != LOOPBACK4_IPV6) {
+		    inet_sk(sk)->inet_daddr != LOOPBACK4_IPV6) {
 			struct inet_connection_sock *icsk = inet_csk(sk);
 			icsk->icsk_ext_hdr_len = opt->opt_flen + opt->opt_nflen;
 			icsk->icsk_sync_mss(sk, icsk->icsk_pmtu_cookie);
@@ -234,7 +234,7 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname,
 
 	case IPV6_V6ONLY:
 		if (optlen < sizeof(int) ||
-		    inet_sk(sk)->num)
+		    inet_sk(sk)->inet_num)
 			goto e_inval;
 		np->ipv6only = valbool;
 		retv = 0;
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index fd737efed96..52ed7d7f9da 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -72,7 +72,7 @@ static struct sock *__raw_v6_lookup(struct net *net, struct sock *sk,
 	int is_multicast = ipv6_addr_is_multicast(loc_addr);
 
 	sk_for_each_from(sk, node)
-		if (inet_sk(sk)->num == num) {
+		if (inet_sk(sk)->inet_num == num) {
 			struct ipv6_pinfo *np = inet6_sk(sk);
 
 			if (!net_eq(sock_net(sk), net))
@@ -298,7 +298,7 @@ static int rawv6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len)
 			dev_put(dev);
 	}
 
-	inet->rcv_saddr = inet->saddr = v4addr;
+	inet->inet_rcv_saddr = inet->inet_saddr = v4addr;
 	ipv6_addr_copy(&np->rcv_saddr, &addr->sin6_addr);
 	if (!(addr_type & IPV6_ADDR_MULTICAST))
 		ipv6_addr_copy(&np->saddr, &addr->sin6_addr);
@@ -415,14 +415,14 @@ int rawv6_rcv(struct sock *sk, struct sk_buff *skb)
 				   skb_network_header_len(skb));
 		if (!csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
 				     &ipv6_hdr(skb)->daddr,
-				     skb->len, inet->num, skb->csum))
+				     skb->len, inet->inet_num, skb->csum))
 			skb->ip_summed = CHECKSUM_UNNECESSARY;
 	}
 	if (!skb_csum_unnecessary(skb))
 		skb->csum = ~csum_unfold(csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
 							 &ipv6_hdr(skb)->daddr,
 							 skb->len,
-							 inet->num, 0));
+							 inet->inet_num, 0));
 
 	if (inet->hdrincl) {
 		if (skb_checksum_complete(skb)) {
@@ -765,8 +765,8 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk,
 		proto = ntohs(sin6->sin6_port);
 
 		if (!proto)
-			proto = inet->num;
-		else if (proto != inet->num)
+			proto = inet->inet_num;
+		else if (proto != inet->inet_num)
 			return(-EINVAL);
 
 		if (proto > 255)
@@ -799,7 +799,7 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk,
 		if (sk->sk_state != TCP_ESTABLISHED)
 			return -EDESTADDRREQ;
 
-		proto = inet->num;
+		proto = inet->inet_num;
 		daddr = &np->daddr;
 		fl.fl6_flowlabel = np->flow_label;
 	}
@@ -966,7 +966,7 @@ static int do_rawv6_setsockopt(struct sock *sk, int level, int optname,
 
 	switch (optname) {
 		case IPV6_CHECKSUM:
-			if (inet_sk(sk)->num == IPPROTO_ICMPV6 &&
+			if (inet_sk(sk)->inet_num == IPPROTO_ICMPV6 &&
 			    level == IPPROTO_IPV6) {
 				/*
 				 * RFC3542 tells that IPV6_CHECKSUM socket
@@ -1006,7 +1006,7 @@ static int rawv6_setsockopt(struct sock *sk, int level, int optname,
 			break;
 
 		case SOL_ICMPV6:
-			if (inet_sk(sk)->num != IPPROTO_ICMPV6)
+			if (inet_sk(sk)->inet_num != IPPROTO_ICMPV6)
 				return -EOPNOTSUPP;
 			return rawv6_seticmpfilter(sk, level, optname, optval,
 						   optlen);
@@ -1029,7 +1029,7 @@ static int compat_rawv6_setsockopt(struct sock *sk, int level, int optname,
 	case SOL_RAW:
 		break;
 	case SOL_ICMPV6:
-		if (inet_sk(sk)->num != IPPROTO_ICMPV6)
+		if (inet_sk(sk)->inet_num != IPPROTO_ICMPV6)
 			return -EOPNOTSUPP;
 		return rawv6_seticmpfilter(sk, level, optname, optval, optlen);
 	case SOL_IPV6:
@@ -1086,7 +1086,7 @@ static int rawv6_getsockopt(struct sock *sk, int level, int optname,
 			break;
 
 		case SOL_ICMPV6:
-			if (inet_sk(sk)->num != IPPROTO_ICMPV6)
+			if (inet_sk(sk)->inet_num != IPPROTO_ICMPV6)
 				return -EOPNOTSUPP;
 			return rawv6_geticmpfilter(sk, level, optname, optval,
 						   optlen);
@@ -1109,7 +1109,7 @@ static int compat_rawv6_getsockopt(struct sock *sk, int level, int optname,
 	case SOL_RAW:
 		break;
 	case SOL_ICMPV6:
-		if (inet_sk(sk)->num != IPPROTO_ICMPV6)
+		if (inet_sk(sk)->inet_num != IPPROTO_ICMPV6)
 			return -EOPNOTSUPP;
 		return rawv6_geticmpfilter(sk, level, optname, optval, optlen);
 	case SOL_IPV6:
@@ -1156,7 +1156,7 @@ static int rawv6_ioctl(struct sock *sk, int cmd, unsigned long arg)
 
 static void rawv6_close(struct sock *sk, long timeout)
 {
-	if (inet_sk(sk)->num == IPPROTO_RAW)
+	if (inet_sk(sk)->inet_num == IPPROTO_RAW)
 		ip6_ra_control(sk, -1);
 	ip6mr_sk_done(sk);
 	sk_common_release(sk);
@@ -1175,7 +1175,7 @@ static int rawv6_init_sk(struct sock *sk)
 {
 	struct raw6_sock *rp = raw6_sk(sk);
 
-	switch (inet_sk(sk)->num) {
+	switch (inet_sk(sk)->inet_num) {
 	case IPPROTO_ICMPV6:
 		rp->checksum = 1;
 		rp->offset   = 2;
@@ -1225,7 +1225,7 @@ static void raw6_sock_seq_show(struct seq_file *seq, struct sock *sp, int i)
 	dest  = &np->daddr;
 	src   = &np->rcv_saddr;
 	destp = 0;
-	srcp  = inet_sk(sp)->num;
+	srcp  = inet_sk(sp)->inet_num;
 	seq_printf(seq,
 		   "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
 		   "%02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %p %d\n",
diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c
index cbe55e5d9f9..c46da533888 100644
--- a/net/ipv6/syncookies.c
+++ b/net/ipv6/syncookies.c
@@ -254,7 +254,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
 		fl.oif = sk->sk_bound_dev_if;
 		fl.mark = sk->sk_mark;
 		fl.fl_ip_dport = inet_rsk(req)->rmt_port;
-		fl.fl_ip_sport = inet_sk(sk)->sport;
+		fl.fl_ip_sport = inet_sk(sk)->inet_sport;
 		security_req_classify_flow(req, &fl);
 		if (ip6_dst_lookup(sk, &dst, &fl))
 			goto out_free;
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 45176305914..c54ec3615de 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -226,8 +226,9 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
 #endif
 			goto failure;
 		} else {
-			ipv6_addr_set_v4mapped(inet->saddr, &np->saddr);
-			ipv6_addr_set_v4mapped(inet->rcv_saddr, &np->rcv_saddr);
+			ipv6_addr_set_v4mapped(inet->inet_saddr, &np->saddr);
+			ipv6_addr_set_v4mapped(inet->inet_rcv_saddr,
+					       &np->rcv_saddr);
 		}
 
 		return err;
@@ -243,7 +244,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
 	fl.oif = sk->sk_bound_dev_if;
 	fl.mark = sk->sk_mark;
 	fl.fl_ip_dport = usin->sin6_port;
-	fl.fl_ip_sport = inet->sport;
+	fl.fl_ip_sport = inet->inet_sport;
 
 	if (np->opt && np->opt->srcrt) {
 		struct rt0_hdr *rt0 = (struct rt0_hdr *)np->opt->srcrt;
@@ -275,7 +276,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
 
 	/* set the source address */
 	ipv6_addr_copy(&np->saddr, saddr);
-	inet->rcv_saddr = LOOPBACK4_IPV6;
+	inet->inet_rcv_saddr = LOOPBACK4_IPV6;
 
 	sk->sk_gso_type = SKB_GSO_TCPV6;
 	__ip6_dst_store(sk, dst, NULL, NULL);
@@ -287,7 +288,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
 
 	tp->rx_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
 
-	inet->dport = usin->sin6_port;
+	inet->inet_dport = usin->sin6_port;
 
 	tcp_set_state(sk, TCP_SYN_SENT);
 	err = inet6_hash_connect(&tcp_death_row, sk);
@@ -297,8 +298,8 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
 	if (!tp->write_seq)
 		tp->write_seq = secure_tcpv6_sequence_number(np->saddr.s6_addr32,
 							     np->daddr.s6_addr32,
-							     inet->sport,
-							     inet->dport);
+							     inet->inet_sport,
+							     inet->inet_dport);
 
 	err = tcp_connect(sk);
 	if (err)
@@ -310,7 +311,7 @@ late_failure:
 	tcp_set_state(sk, TCP_CLOSE);
 	__sk_dst_reset(sk);
 failure:
-	inet->dport = 0;
+	inet->inet_dport = 0;
 	sk->sk_route_caps = 0;
 	return err;
 }
@@ -383,8 +384,8 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 			ipv6_addr_copy(&fl.fl6_src, &np->saddr);
 			fl.oif = sk->sk_bound_dev_if;
 			fl.mark = sk->sk_mark;
-			fl.fl_ip_dport = inet->dport;
-			fl.fl_ip_sport = inet->sport;
+			fl.fl_ip_dport = inet->inet_dport;
+			fl.fl_ip_sport = inet->inet_sport;
 			security_skb_classify_flow(skb, &fl);
 
 			if ((err = ip6_dst_lookup(sk, &dst, &fl))) {
@@ -1291,9 +1292,9 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
 
 		memcpy(newnp, np, sizeof(struct ipv6_pinfo));
 
-		ipv6_addr_set_v4mapped(newinet->daddr, &newnp->daddr);
+		ipv6_addr_set_v4mapped(newinet->inet_daddr, &newnp->daddr);
 
-		ipv6_addr_set_v4mapped(newinet->saddr, &newnp->saddr);
+		ipv6_addr_set_v4mapped(newinet->inet_saddr, &newnp->saddr);
 
 		ipv6_addr_copy(&newnp->rcv_saddr, &newnp->saddr);
 
@@ -1431,7 +1432,8 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
 	newtp->advmss = dst_metric(dst, RTAX_ADVMSS);
 	tcp_initialize_rcv_mss(newsk);
 
-	newinet->daddr = newinet->saddr = newinet->rcv_saddr = LOOPBACK4_IPV6;
+	newinet->inet_daddr = newinet->inet_saddr = LOOPBACK4_IPV6;
+	newinet->inet_rcv_saddr = LOOPBACK4_IPV6;
 
 #ifdef CONFIG_TCP_MD5SIG
 	/* Copy over the MD5 key from the original socket */
@@ -1931,8 +1933,8 @@ static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
 
 	dest  = &np->daddr;
 	src   = &np->rcv_saddr;
-	destp = ntohs(inet->dport);
-	srcp  = ntohs(inet->sport);
+	destp = ntohs(inet->inet_dport);
+	srcp  = ntohs(inet->inet_sport);
 
 	if (icsk->icsk_pending == ICSK_TIME_RETRANS) {
 		timer_active	= 1;
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index b86425b7ea2..829d300a6f3 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -53,7 +53,7 @@ int ipv6_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2)
 {
 	const struct in6_addr *sk_rcv_saddr6 = &inet6_sk(sk)->rcv_saddr;
 	const struct in6_addr *sk2_rcv_saddr6 = inet6_rcv_saddr(sk2);
-	__be32 sk_rcv_saddr = inet_sk(sk)->rcv_saddr;
+	__be32 sk1_rcv_saddr = inet_sk(sk)->inet_rcv_saddr;
 	__be32 sk2_rcv_saddr = inet_rcv_saddr(sk2);
 	int sk_ipv6only = ipv6_only_sock(sk);
 	int sk2_ipv6only = inet_v6_ipv6only(sk2);
@@ -63,8 +63,8 @@ int ipv6_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2)
 	/* if both are mapped, treat as IPv4 */
 	if (addr_type == IPV6_ADDR_MAPPED && addr_type2 == IPV6_ADDR_MAPPED)
 		return (!sk2_ipv6only &&
-			(!sk_rcv_saddr || !sk2_rcv_saddr ||
-			  sk_rcv_saddr == sk2_rcv_saddr));
+			(!sk1_rcv_saddr || !sk2_rcv_saddr ||
+			  sk1_rcv_saddr == sk2_rcv_saddr));
 
 	if (addr_type2 == IPV6_ADDR_ANY &&
 	    !(sk2_ipv6only && addr_type == IPV6_ADDR_MAPPED))
@@ -100,8 +100,8 @@ static inline int compute_score(struct sock *sk, struct net *net,
 		struct inet_sock *inet = inet_sk(sk);
 
 		score = 0;
-		if (inet->dport) {
-			if (inet->dport != sport)
+		if (inet->inet_dport) {
+			if (inet->inet_dport != sport)
 				return -1;
 			score++;
 		}
@@ -417,8 +417,8 @@ static struct sock *udp_v6_mcast_next(struct net *net, struct sock *sk,
 
 		if (s->sk_hash == num && s->sk_family == PF_INET6) {
 			struct ipv6_pinfo *np = inet6_sk(s);
-			if (inet->dport) {
-				if (inet->dport != rmt_port)
+			if (inet->inet_dport) {
+				if (inet->inet_dport != rmt_port)
 					continue;
 			}
 			if (!ipv6_addr_any(&np->daddr) &&
@@ -792,7 +792,7 @@ int udpv6_sendmsg(struct kiocb *iocb, struct sock *sk,
 		if (ipv6_addr_v4mapped(daddr)) {
 			struct sockaddr_in sin;
 			sin.sin_family = AF_INET;
-			sin.sin_port = sin6 ? sin6->sin6_port : inet->dport;
+			sin.sin_port = sin6 ? sin6->sin6_port : inet->inet_dport;
 			sin.sin_addr.s_addr = daddr->s6_addr32[3];
 			msg->msg_name = &sin;
 			msg->msg_namelen = sizeof(sin);
@@ -865,7 +865,7 @@ do_udp_sendmsg:
 		if (sk->sk_state != TCP_ESTABLISHED)
 			return -EDESTADDRREQ;
 
-		fl.fl_ip_dport = inet->dport;
+		fl.fl_ip_dport = inet->inet_dport;
 		daddr = &np->daddr;
 		fl.fl6_flowlabel = np->flow_label;
 		connected = 1;
@@ -911,7 +911,7 @@ do_udp_sendmsg:
 		fl.fl6_dst.s6_addr[15] = 0x1; /* :: means loopback (BSD'ism) */
 	if (ipv6_addr_any(&fl.fl6_src) && !ipv6_addr_any(&np->saddr))
 		ipv6_addr_copy(&fl.fl6_src, &np->saddr);
-	fl.fl_ip_sport = inet->sport;
+	fl.fl_ip_sport = inet->inet_sport;
 
 	/* merge ip6_build_xmit from ip6_output */
 	if (opt && opt->srcrt) {
@@ -1192,8 +1192,8 @@ static void udp6_sock_seq_show(struct seq_file *seq, struct sock *sp, int bucket
 
 	dest  = &np->daddr;
 	src   = &np->rcv_saddr;
-	destp = ntohs(inet->dport);
-	srcp  = ntohs(inet->sport);
+	destp = ntohs(inet->inet_dport);
+	srcp  = ntohs(inet->inet_sport);
 	seq_printf(seq,
 		   "%5d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
 		   "%02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %p %d\n",
-- 
cgit v1.2.3


From 8edf19c2fe028563fc6ea9cb1995b8ee4172d4b6 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Thu, 15 Oct 2009 00:12:40 +0000
Subject: net: sk_drops consolidation part 2

- skb_kill_datagram() can increment sk->sk_drops itself, not callers.

- UDP on IPV4 & IPV6 dropped frames (because of bad checksum or policy checks) increment sk_drops

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/raw.c | 1 -
 net/ipv6/udp.c | 4 +++-
 2 files changed, 3 insertions(+), 2 deletions(-)

(limited to 'net/ipv6')

diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index 52ed7d7f9da..cb834ab7f07 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -517,7 +517,6 @@ csum_copy_err:
 	   as some normal condition.
 	 */
 	err = (flags&MSG_DONTWAIT) ? -EAGAIN : -EHOSTUNREACH;
-	atomic_inc(&sk->sk_drops);
 	goto out;
 }
 
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 829d300a6f3..d3b59d73f50 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -390,11 +390,13 @@ int udpv6_queue_rcv_skb(struct sock * sk, struct sk_buff *skb)
 		if (rc == -ENOMEM)
 			UDP6_INC_STATS_BH(sock_net(sk),
 					UDP_MIB_RCVBUFERRORS, is_udplite);
-		goto drop;
+		goto drop_no_sk_drops_inc;
 	}
 
 	return 0;
 drop:
+	atomic_inc(&sk->sk_drops);
+drop_no_sk_drops_inc:
 	UDP6_INC_STATS_BH(sock_net(sk), UDP_MIB_INERRORS, is_udplite);
 	kfree_skb(skb);
 	return -1;
-- 
cgit v1.2.3


From 8631e9bdfea189b2e5efe3b03825cc24ebb6cfb7 Mon Sep 17 00:00:00 2001
From: Steffen Klassert <steffen.klassert@secunet.com>
Date: Wed, 7 Oct 2009 22:49:01 +0000
Subject: ah6: convert to ahash

This patch converts ah6 to the new ahash interface.

Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/ah6.c | 352 ++++++++++++++++++++++++++++++++++++++++++++-------------
 1 file changed, 272 insertions(+), 80 deletions(-)

(limited to 'net/ipv6')

diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c
index c1589e2f1dc..0f526f8ea51 100644
--- a/net/ipv6/ah6.c
+++ b/net/ipv6/ah6.c
@@ -24,18 +24,92 @@
  * 	This file is derived from net/ipv4/ah.c.
  */
 
+#include <crypto/hash.h>
 #include <linux/module.h>
 #include <net/ip.h>
 #include <net/ah.h>
 #include <linux/crypto.h>
 #include <linux/pfkeyv2.h>
-#include <linux/spinlock.h>
 #include <linux/string.h>
+#include <linux/scatterlist.h>
 #include <net/icmp.h>
 #include <net/ipv6.h>
 #include <net/protocol.h>
 #include <net/xfrm.h>
 
+#define IPV6HDR_BASELEN 8
+
+struct tmp_ext {
+#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
+		struct in6_addr saddr;
+#endif
+		struct in6_addr daddr;
+		char hdrs[0];
+};
+
+struct ah_skb_cb {
+	struct xfrm_skb_cb xfrm;
+	void *tmp;
+};
+
+#define AH_SKB_CB(__skb) ((struct ah_skb_cb *)&((__skb)->cb[0]))
+
+static void *ah_alloc_tmp(struct crypto_ahash *ahash, int nfrags,
+			  unsigned int size)
+{
+	unsigned int len;
+
+	len = size + crypto_ahash_digestsize(ahash) +
+	      (crypto_ahash_alignmask(ahash) &
+	       ~(crypto_tfm_ctx_alignment() - 1));
+
+	len = ALIGN(len, crypto_tfm_ctx_alignment());
+
+	len += sizeof(struct ahash_request) + crypto_ahash_reqsize(ahash);
+	len = ALIGN(len, __alignof__(struct scatterlist));
+
+	len += sizeof(struct scatterlist) * nfrags;
+
+	return kmalloc(len, GFP_ATOMIC);
+}
+
+static inline struct tmp_ext *ah_tmp_ext(void *base)
+{
+	return base + IPV6HDR_BASELEN;
+}
+
+static inline u8 *ah_tmp_auth(u8 *tmp, unsigned int offset)
+{
+	return tmp + offset;
+}
+
+static inline u8 *ah_tmp_icv(struct crypto_ahash *ahash, void *tmp,
+			     unsigned int offset)
+{
+	return PTR_ALIGN((u8 *)tmp + offset, crypto_ahash_alignmask(ahash) + 1);
+}
+
+static inline struct ahash_request *ah_tmp_req(struct crypto_ahash *ahash,
+					       u8 *icv)
+{
+	struct ahash_request *req;
+
+	req = (void *)PTR_ALIGN(icv + crypto_ahash_digestsize(ahash),
+				crypto_tfm_ctx_alignment());
+
+	ahash_request_set_tfm(req, ahash);
+
+	return req;
+}
+
+static inline struct scatterlist *ah_req_sg(struct crypto_ahash *ahash,
+					     struct ahash_request *req)
+{
+	return (void *)ALIGN((unsigned long)(req + 1) +
+			     crypto_ahash_reqsize(ahash),
+			     __alignof__(struct scatterlist));
+}
+
 static int zero_out_mutable_opts(struct ipv6_opt_hdr *opthdr)
 {
 	u8 *opt = (u8 *)opthdr;
@@ -218,24 +292,85 @@ static int ipv6_clear_mutable_options(struct ipv6hdr *iph, int len, int dir)
 	return 0;
 }
 
+static void ah6_output_done(struct crypto_async_request *base, int err)
+{
+	int extlen;
+	u8 *iph_base;
+	u8 *icv;
+	struct sk_buff *skb = base->data;
+	struct xfrm_state *x = skb_dst(skb)->xfrm;
+	struct ah_data *ahp = x->data;
+	struct ipv6hdr *top_iph = ipv6_hdr(skb);
+	struct ip_auth_hdr *ah = ip_auth_hdr(skb);
+	struct tmp_ext *iph_ext;
+
+	extlen = skb_network_header_len(skb) - sizeof(struct ipv6hdr);
+	if (extlen)
+		extlen += sizeof(*iph_ext);
+
+	iph_base = AH_SKB_CB(skb)->tmp;
+	iph_ext = ah_tmp_ext(iph_base);
+	icv = ah_tmp_icv(ahp->ahash, iph_ext, extlen);
+
+	memcpy(ah->auth_data, icv, ahp->icv_trunc_len);
+	memcpy(top_iph, iph_base, IPV6HDR_BASELEN);
+
+	if (extlen) {
+#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
+		memcpy(&top_iph->saddr, iph_ext, extlen);
+#else
+		memcpy(&top_iph->daddr, iph_ext, extlen);
+#endif
+	}
+
+	err = ah->nexthdr;
+
+	kfree(AH_SKB_CB(skb)->tmp);
+	xfrm_output_resume(skb, err);
+}
+
 static int ah6_output(struct xfrm_state *x, struct sk_buff *skb)
 {
 	int err;
+	int nfrags;
 	int extlen;
+	u8 *iph_base;
+	u8 *icv;
+	u8 nexthdr;
+	struct sk_buff *trailer;
+	struct crypto_ahash *ahash;
+	struct ahash_request *req;
+	struct scatterlist *sg;
 	struct ipv6hdr *top_iph;
 	struct ip_auth_hdr *ah;
 	struct ah_data *ahp;
-	u8 nexthdr;
-	char tmp_base[8];
-	struct {
-#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
-		struct in6_addr saddr;
-#endif
-		struct in6_addr daddr;
-		char hdrs[0];
-	} *tmp_ext;
+	struct tmp_ext *iph_ext;
+
+	ahp = x->data;
+	ahash = ahp->ahash;
+
+	if ((err = skb_cow_data(skb, 0, &trailer)) < 0)
+		goto out;
+	nfrags = err;
 
 	skb_push(skb, -skb_network_offset(skb));
+	extlen = skb_network_header_len(skb) - sizeof(struct ipv6hdr);
+	if (extlen)
+		extlen += sizeof(*iph_ext);
+
+	err = -ENOMEM;
+	iph_base = ah_alloc_tmp(ahash, nfrags, IPV6HDR_BASELEN + extlen);
+	if (!iph_base)
+		goto out;
+
+	iph_ext = ah_tmp_ext(iph_base);
+	icv = ah_tmp_icv(ahash, iph_ext, extlen);
+	req = ah_tmp_req(ahash, icv);
+	sg = ah_req_sg(ahash, req);
+
+	ah = ip_auth_hdr(skb);
+	memset(ah->auth_data, 0, ahp->icv_trunc_len);
+
 	top_iph = ipv6_hdr(skb);
 	top_iph->payload_len = htons(skb->len - sizeof(*top_iph));
 
@@ -245,31 +380,22 @@ static int ah6_output(struct xfrm_state *x, struct sk_buff *skb)
 	/* When there are no extension headers, we only need to save the first
 	 * 8 bytes of the base IP header.
 	 */
-	memcpy(tmp_base, top_iph, sizeof(tmp_base));
+	memcpy(iph_base, top_iph, IPV6HDR_BASELEN);
 
-	tmp_ext = NULL;
-	extlen = skb_transport_offset(skb) - sizeof(struct ipv6hdr);
 	if (extlen) {
-		extlen += sizeof(*tmp_ext);
-		tmp_ext = kmalloc(extlen, GFP_ATOMIC);
-		if (!tmp_ext) {
-			err = -ENOMEM;
-			goto error;
-		}
 #if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
-		memcpy(tmp_ext, &top_iph->saddr, extlen);
+		memcpy(iph_ext, &top_iph->saddr, extlen);
 #else
-		memcpy(tmp_ext, &top_iph->daddr, extlen);
+		memcpy(iph_ext, &top_iph->daddr, extlen);
 #endif
 		err = ipv6_clear_mutable_options(top_iph,
-						 extlen - sizeof(*tmp_ext) +
+						 extlen - sizeof(*iph_ext) +
 						 sizeof(*top_iph),
 						 XFRM_POLICY_OUT);
 		if (err)
-			goto error_free_iph;
+			goto out_free;
 	}
 
-	ah = ip_auth_hdr(skb);
 	ah->nexthdr = nexthdr;
 
 	top_iph->priority    = 0;
@@ -278,36 +404,80 @@ static int ah6_output(struct xfrm_state *x, struct sk_buff *skb)
 	top_iph->flow_lbl[2] = 0;
 	top_iph->hop_limit   = 0;
 
-	ahp = x->data;
 	ah->hdrlen  = (XFRM_ALIGN8(sizeof(*ah) + ahp->icv_trunc_len) >> 2) - 2;
 
 	ah->reserved = 0;
 	ah->spi = x->id.spi;
 	ah->seq_no = htonl(XFRM_SKB_CB(skb)->seq.output);
 
-	spin_lock_bh(&x->lock);
-	err = ah_mac_digest(ahp, skb, ah->auth_data);
-	memcpy(ah->auth_data, ahp->work_icv, ahp->icv_trunc_len);
-	spin_unlock_bh(&x->lock);
+	sg_init_table(sg, nfrags);
+	skb_to_sgvec(skb, sg, 0, skb->len);
 
-	if (err)
-		goto error_free_iph;
+	ahash_request_set_crypt(req, sg, icv, skb->len);
+	ahash_request_set_callback(req, 0, ah6_output_done, skb);
+
+	AH_SKB_CB(skb)->tmp = iph_base;
 
-	memcpy(top_iph, tmp_base, sizeof(tmp_base));
-	if (tmp_ext) {
+	err = crypto_ahash_digest(req);
+	if (err) {
+		if (err == -EINPROGRESS)
+			goto out;
+
+		if (err == -EBUSY)
+			err = NET_XMIT_DROP;
+		goto out_free;
+	}
+
+	memcpy(ah->auth_data, icv, ahp->icv_trunc_len);
+	memcpy(top_iph, iph_base, IPV6HDR_BASELEN);
+
+	if (extlen) {
 #if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
-		memcpy(&top_iph->saddr, tmp_ext, extlen);
+		memcpy(&top_iph->saddr, iph_ext, extlen);
 #else
-		memcpy(&top_iph->daddr, tmp_ext, extlen);
+		memcpy(&top_iph->daddr, iph_ext, extlen);
 #endif
-error_free_iph:
-		kfree(tmp_ext);
 	}
 
-error:
+out_free:
+	kfree(iph_base);
+out:
 	return err;
 }
 
+static void ah6_input_done(struct crypto_async_request *base, int err)
+{
+	u8 *auth_data;
+	u8 *icv;
+	u8 *work_iph;
+	struct sk_buff *skb = base->data;
+	struct xfrm_state *x = xfrm_input_state(skb);
+	struct ah_data *ahp = x->data;
+	struct ip_auth_hdr *ah = ip_auth_hdr(skb);
+	int hdr_len = skb_network_header_len(skb);
+	int ah_hlen = (ah->hdrlen + 2) << 2;
+
+	work_iph = AH_SKB_CB(skb)->tmp;
+	auth_data = ah_tmp_auth(work_iph, hdr_len);
+	icv = ah_tmp_icv(ahp->ahash, auth_data, ahp->icv_trunc_len);
+
+	err = memcmp(icv, auth_data, ahp->icv_trunc_len) ? -EBADMSG: 0;
+	if (err)
+		goto out;
+
+	skb->network_header += ah_hlen;
+	memcpy(skb_network_header(skb), work_iph, hdr_len);
+	__skb_pull(skb, ah_hlen + hdr_len);
+	skb_set_transport_header(skb, -hdr_len);
+
+	err = ah->nexthdr;
+out:
+	kfree(AH_SKB_CB(skb)->tmp);
+	xfrm_input_resume(skb, err);
+}
+
+
+
 static int ah6_input(struct xfrm_state *x, struct sk_buff *skb)
 {
 	/*
@@ -325,14 +495,21 @@ static int ah6_input(struct xfrm_state *x, struct sk_buff *skb)
 	 * There is offset of AH before IPv6 header after the process.
 	 */
 
+	u8 *auth_data;
+	u8 *icv;
+	u8 *work_iph;
+	struct sk_buff *trailer;
+	struct crypto_ahash *ahash;
+	struct ahash_request *req;
+	struct scatterlist *sg;
 	struct ip_auth_hdr *ah;
 	struct ipv6hdr *ip6h;
 	struct ah_data *ahp;
-	unsigned char *tmp_hdr = NULL;
 	u16 hdr_len;
 	u16 ah_hlen;
 	int nexthdr;
-	int err = -EINVAL;
+	int nfrags;
+	int err = -ENOMEM;
 
 	if (!pskb_may_pull(skb, sizeof(struct ip_auth_hdr)))
 		goto out;
@@ -345,9 +522,11 @@ static int ah6_input(struct xfrm_state *x, struct sk_buff *skb)
 
 	skb->ip_summed = CHECKSUM_NONE;
 
-	hdr_len = skb->data - skb_network_header(skb);
+	hdr_len = skb_network_header_len(skb);
 	ah = (struct ip_auth_hdr *)skb->data;
 	ahp = x->data;
+	ahash = ahp->ahash;
+
 	nexthdr = ah->nexthdr;
 	ah_hlen = (ah->hdrlen + 2) << 2;
 
@@ -358,48 +537,67 @@ static int ah6_input(struct xfrm_state *x, struct sk_buff *skb)
 	if (!pskb_may_pull(skb, ah_hlen))
 		goto out;
 
-	tmp_hdr = kmemdup(skb_network_header(skb), hdr_len, GFP_ATOMIC);
-	if (!tmp_hdr)
-		goto out;
 	ip6h = ipv6_hdr(skb);
+
+	skb_push(skb, hdr_len);
+
+	if ((err = skb_cow_data(skb, 0, &trailer)) < 0)
+		goto out;
+	nfrags = err;
+
+	work_iph = ah_alloc_tmp(ahash, nfrags, hdr_len + ahp->icv_trunc_len);
+	if (!work_iph)
+		goto out;
+
+	auth_data = ah_tmp_auth(work_iph, hdr_len);
+	icv = ah_tmp_icv(ahash, auth_data, ahp->icv_trunc_len);
+	req = ah_tmp_req(ahash, icv);
+	sg = ah_req_sg(ahash, req);
+
+	memcpy(work_iph, ip6h, hdr_len);
+	memcpy(auth_data, ah->auth_data, ahp->icv_trunc_len);
+	memset(ah->auth_data, 0, ahp->icv_trunc_len);
+
 	if (ipv6_clear_mutable_options(ip6h, hdr_len, XFRM_POLICY_IN))
-		goto free_out;
+		goto out_free;
+
 	ip6h->priority    = 0;
 	ip6h->flow_lbl[0] = 0;
 	ip6h->flow_lbl[1] = 0;
 	ip6h->flow_lbl[2] = 0;
 	ip6h->hop_limit   = 0;
 
-	spin_lock(&x->lock);
-	{
-		u8 auth_data[MAX_AH_AUTH_LEN];
+	sg_init_table(sg, nfrags);
+	skb_to_sgvec(skb, sg, 0, skb->len);
 
-		memcpy(auth_data, ah->auth_data, ahp->icv_trunc_len);
-		memset(ah->auth_data, 0, ahp->icv_trunc_len);
-		skb_push(skb, hdr_len);
-		err = ah_mac_digest(ahp, skb, ah->auth_data);
-		if (err)
-			goto unlock;
-		if (memcmp(ahp->work_icv, auth_data, ahp->icv_trunc_len))
-			err = -EBADMSG;
+	ahash_request_set_crypt(req, sg, icv, skb->len);
+	ahash_request_set_callback(req, 0, ah6_input_done, skb);
+
+	AH_SKB_CB(skb)->tmp = work_iph;
+
+	err = crypto_ahash_digest(req);
+	if (err) {
+		if (err == -EINPROGRESS)
+			goto out;
+
+		if (err == -EBUSY)
+			err = NET_XMIT_DROP;
+		goto out_free;
 	}
-unlock:
-	spin_unlock(&x->lock);
 
+	err = memcmp(icv, auth_data, ahp->icv_trunc_len) ? -EBADMSG: 0;
 	if (err)
-		goto free_out;
+		goto out_free;
 
 	skb->network_header += ah_hlen;
-	memcpy(skb_network_header(skb), tmp_hdr, hdr_len);
+	memcpy(skb_network_header(skb), work_iph, hdr_len);
 	skb->transport_header = skb->network_header;
 	__skb_pull(skb, ah_hlen + hdr_len);
 
-	kfree(tmp_hdr);
+	err = nexthdr;
 
-	return nexthdr;
-
-free_out:
-	kfree(tmp_hdr);
+out_free:
+	kfree(work_iph);
 out:
 	return err;
 }
@@ -430,7 +628,7 @@ static int ah6_init_state(struct xfrm_state *x)
 {
 	struct ah_data *ahp = NULL;
 	struct xfrm_algo_desc *aalg_desc;
-	struct crypto_hash *tfm;
+	struct crypto_ahash *ahash;
 
 	if (!x->aalg)
 		goto error;
@@ -442,12 +640,12 @@ static int ah6_init_state(struct xfrm_state *x)
 	if (ahp == NULL)
 		return -ENOMEM;
 
-	tfm = crypto_alloc_hash(x->aalg->alg_name, 0, CRYPTO_ALG_ASYNC);
-	if (IS_ERR(tfm))
+	ahash = crypto_alloc_ahash(x->aalg->alg_name, 0, 0);
+	if (IS_ERR(ahash))
 		goto error;
 
-	ahp->tfm = tfm;
-	if (crypto_hash_setkey(tfm, x->aalg->alg_key,
+	ahp->ahash = ahash;
+	if (crypto_ahash_setkey(ahash, x->aalg->alg_key,
 			       (x->aalg->alg_key_len + 7) / 8))
 		goto error;
 
@@ -461,9 +659,9 @@ static int ah6_init_state(struct xfrm_state *x)
 	BUG_ON(!aalg_desc);
 
 	if (aalg_desc->uinfo.auth.icv_fullbits/8 !=
-	    crypto_hash_digestsize(tfm)) {
+	    crypto_ahash_digestsize(ahash)) {
 		printk(KERN_INFO "AH: %s digestsize %u != %hu\n",
-		       x->aalg->alg_name, crypto_hash_digestsize(tfm),
+		       x->aalg->alg_name, crypto_ahash_digestsize(ahash),
 		       aalg_desc->uinfo.auth.icv_fullbits/8);
 		goto error;
 	}
@@ -473,10 +671,6 @@ static int ah6_init_state(struct xfrm_state *x)
 
 	BUG_ON(ahp->icv_trunc_len > MAX_AH_AUTH_LEN);
 
-	ahp->work_icv = kmalloc(ahp->icv_full_len, GFP_KERNEL);
-	if (!ahp->work_icv)
-		goto error;
-
 	x->props.header_len = XFRM_ALIGN8(sizeof(struct ip_auth_hdr) +
 					  ahp->icv_trunc_len);
 	switch (x->props.mode) {
@@ -495,8 +689,7 @@ static int ah6_init_state(struct xfrm_state *x)
 
 error:
 	if (ahp) {
-		kfree(ahp->work_icv);
-		crypto_free_hash(ahp->tfm);
+		crypto_free_ahash(ahp->ahash);
 		kfree(ahp);
 	}
 	return -EINVAL;
@@ -509,8 +702,7 @@ static void ah6_destroy(struct xfrm_state *x)
 	if (!ahp)
 		return;
 
-	kfree(ahp->work_icv);
-	crypto_free_hash(ahp->tfm);
+	crypto_free_ahash(ahp->ahash);
 	kfree(ahp);
 }
 
-- 
cgit v1.2.3


From 0eae750e6019a93643063924209c1daf9cb9b4a7 Mon Sep 17 00:00:00 2001
From: John Dykstra <john.dykstra1@gmail.com>
Date: Mon, 19 Oct 2009 21:53:53 -0700
Subject: IP: Cleanups

Use symbols instead of magic constants while checking PMTU discovery
setsockopt.

Remove redundant test in ip_rt_frag_needed() (done by caller).

Signed-off-by: John Dykstra <john.dykstra1@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/ipv6_sockglue.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net/ipv6')

diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index 39e10ac8801..68566de4bcc 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -662,7 +662,7 @@ done:
 	case IPV6_MTU_DISCOVER:
 		if (optlen < sizeof(int))
 			goto e_inval;
-		if (val<0 || val>3)
+		if (val < IP_PMTUDISC_DONT || val > IP_PMTUDISC_PROBE)
 			goto e_inval;
 		np->pmtudisc = val;
 		retv = 0;
-- 
cgit v1.2.3


From f04c8276248d3dd3e15a9a72f9711ba5e4069049 Mon Sep 17 00:00:00 2001
From: Krishna Kumar <krkumar2@in.ibm.com>
Date: Mon, 19 Oct 2009 23:46:32 +0000
Subject: net: IPv6 changes

IPv6: Reset sk_tx_queue_mapping when dst_cache is reset. Use existing
macro to do the work.

Signed-off-by: Krishna Kumar <krkumar2@in.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/inet6_connection_sock.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'net/ipv6')

diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c
index 19dceef4fcc..3516e6fe2e5 100644
--- a/net/ipv6/inet6_connection_sock.c
+++ b/net/ipv6/inet6_connection_sock.c
@@ -168,8 +168,7 @@ struct dst_entry *__inet6_csk_dst_check(struct sock *sk, u32 cookie)
 	if (dst) {
 		struct rt6_info *rt = (struct rt6_info *)dst;
 		if (rt->rt6i_flow_cache_genid != atomic_read(&flow_cache_genid)) {
-			sk->sk_dst_cache = NULL;
-			dst_release(dst);
+			__sk_dst_reset(sk);
 			dst = NULL;
 		}
 	}
-- 
cgit v1.2.3


From ef9a9d1183b36fbf3de327f44596533b770c3005 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Fri, 23 Oct 2009 17:51:26 +0000
Subject: ipv6 sit: RCU conversion phase I

SIT tunnels use one rwlock to protect their prl entries.

This first patch adds RCU locking for prl management,
with standard call_rcu() calls.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/sit.c | 73 ++++++++++++++++++++++++++++++++++++++++------------------
 1 file changed, 50 insertions(+), 23 deletions(-)

(limited to 'net/ipv6')

diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index 510d31f3cb9..8cdcc2ad048 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -240,15 +240,22 @@ failed:
 	return NULL;
 }
 
+static DEFINE_SPINLOCK(ipip6_prl_lock);
+
+#define for_each_prl_rcu(start)			\
+	for (prl = rcu_dereference(start);	\
+	     prl;				\
+	     prl = rcu_dereference(prl->next))
+
 static struct ip_tunnel_prl_entry *
 __ipip6_tunnel_locate_prl(struct ip_tunnel *t, __be32 addr)
 {
-	struct ip_tunnel_prl_entry *p = (struct ip_tunnel_prl_entry *)NULL;
+	struct ip_tunnel_prl_entry *prl;
 
-	for (p = t->prl; p; p = p->next)
-		if (p->addr == addr)
+	for_each_prl_rcu(t->prl)
+		if (prl->addr == addr)
 			break;
-	return p;
+	return prl;
 
 }
 
@@ -273,7 +280,7 @@ static int ipip6_tunnel_get_prl(struct ip_tunnel *t,
 		kcalloc(cmax, sizeof(*kp), GFP_KERNEL) :
 		NULL;
 
-	read_lock(&ipip6_lock);
+	rcu_read_lock();
 
 	ca = t->prl_count < cmax ? t->prl_count : cmax;
 
@@ -291,7 +298,7 @@ static int ipip6_tunnel_get_prl(struct ip_tunnel *t,
 	}
 
 	c = 0;
-	for (prl = t->prl; prl; prl = prl->next) {
+	for_each_prl_rcu(t->prl) {
 		if (c >= cmax)
 			break;
 		if (kprl.addr != htonl(INADDR_ANY) && prl->addr != kprl.addr)
@@ -303,7 +310,7 @@ static int ipip6_tunnel_get_prl(struct ip_tunnel *t,
 			break;
 	}
 out:
-	read_unlock(&ipip6_lock);
+	rcu_read_unlock();
 
 	len = sizeof(*kp) * c;
 	ret = 0;
@@ -324,12 +331,14 @@ ipip6_tunnel_add_prl(struct ip_tunnel *t, struct ip_tunnel_prl *a, int chg)
 	if (a->addr == htonl(INADDR_ANY))
 		return -EINVAL;
 
-	write_lock(&ipip6_lock);
+	spin_lock(&ipip6_prl_lock);
 
 	for (p = t->prl; p; p = p->next) {
 		if (p->addr == a->addr) {
-			if (chg)
-				goto update;
+			if (chg) {
+				p->flags = a->flags;
+				goto out;
+			}
 			err = -EEXIST;
 			goto out;
 		}
@@ -346,46 +355,63 @@ ipip6_tunnel_add_prl(struct ip_tunnel *t, struct ip_tunnel_prl *a, int chg)
 		goto out;
 	}
 
+	INIT_RCU_HEAD(&p->rcu_head);
 	p->next = t->prl;
-	t->prl = p;
-	t->prl_count++;
-update:
 	p->addr = a->addr;
 	p->flags = a->flags;
+	t->prl_count++;
+	rcu_assign_pointer(t->prl, p);
 out:
-	write_unlock(&ipip6_lock);
+	spin_unlock(&ipip6_prl_lock);
 	return err;
 }
 
+static void prl_entry_destroy_rcu(struct rcu_head *head)
+{
+	kfree(container_of(head, struct ip_tunnel_prl_entry, rcu_head));
+}
+
+static void prl_list_destroy_rcu(struct rcu_head *head)
+{
+	struct ip_tunnel_prl_entry *p, *n;
+
+	p = container_of(head, struct ip_tunnel_prl_entry, rcu_head);
+	do {
+		n = p->next;
+		kfree(p);
+		p = n;
+	} while (p);
+}
+
 static int
 ipip6_tunnel_del_prl(struct ip_tunnel *t, struct ip_tunnel_prl *a)
 {
 	struct ip_tunnel_prl_entry *x, **p;
 	int err = 0;
 
-	write_lock(&ipip6_lock);
+	spin_lock(&ipip6_prl_lock);
 
 	if (a && a->addr != htonl(INADDR_ANY)) {
 		for (p = &t->prl; *p; p = &(*p)->next) {
 			if ((*p)->addr == a->addr) {
 				x = *p;
 				*p = x->next;
-				kfree(x);
+				call_rcu(&x->rcu_head, prl_entry_destroy_rcu);
 				t->prl_count--;
 				goto out;
 			}
 		}
 		err = -ENXIO;
 	} else {
-		while (t->prl) {
+		if (t->prl) {
+			t->prl_count = 0;
 			x = t->prl;
-			t->prl = t->prl->next;
-			kfree(x);
-			t->prl_count--;
+			call_rcu(&x->rcu_head, prl_list_destroy_rcu);
+			t->prl = NULL;
 		}
 	}
 out:
-	write_unlock(&ipip6_lock);
+	spin_unlock(&ipip6_prl_lock);
 	return err;
 }
 
@@ -395,7 +421,7 @@ isatap_chksrc(struct sk_buff *skb, struct iphdr *iph, struct ip_tunnel *t)
 	struct ip_tunnel_prl_entry *p;
 	int ok = 1;
 
-	read_lock(&ipip6_lock);
+	rcu_read_lock();
 	p = __ipip6_tunnel_locate_prl(t, iph->saddr);
 	if (p) {
 		if (p->flags & PRL_DEFAULT)
@@ -411,7 +437,7 @@ isatap_chksrc(struct sk_buff *skb, struct iphdr *iph, struct ip_tunnel *t)
 		else
 			ok = 0;
 	}
-	read_unlock(&ipip6_lock);
+	rcu_read_unlock();
 	return ok;
 }
 
@@ -1192,6 +1218,7 @@ static void __exit sit_cleanup(void)
 	xfrm4_tunnel_deregister(&sit_handler, AF_INET6);
 
 	unregister_pernet_gen_device(sit_net_id, &sit_net_ops);
+	rcu_barrier(); /* Wait for completion of call_rcu()'s */
 }
 
 static int __init sit_init(void)
-- 
cgit v1.2.3


From 4543c10de267bdd4f9acdb7708456909ed7eed3c Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Fri, 23 Oct 2009 17:52:14 +0000
Subject: ipv6 sit: RCU conversion phase II

SIT tunnels use one rwlock to protect their hash tables.

This locking scheme can be converted to RCU for free, since netdevice
already must wait for a RCU grace period at dismantle time.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/sit.c | 45 +++++++++++++++++++++++++++------------------
 1 file changed, 27 insertions(+), 18 deletions(-)

(limited to 'net/ipv6')

diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index 8cdcc2ad048..b6b16264b30 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -77,8 +77,17 @@ struct sit_net {
 	struct net_device *fb_tunnel_dev;
 };
 
-static DEFINE_RWLOCK(ipip6_lock);
+/*
+ * Locking : hash tables are protected by RCU and a spinlock
+ */
+static DEFINE_SPINLOCK(ipip6_lock);
+
+#define for_each_ip_tunnel_rcu(start) \
+	for (t = rcu_dereference(start); t; t = rcu_dereference(t->next))
 
+/*
+ * Must be invoked with rcu_read_lock
+ */
 static struct ip_tunnel * ipip6_tunnel_lookup(struct net *net,
 		struct net_device *dev, __be32 remote, __be32 local)
 {
@@ -87,26 +96,26 @@ static struct ip_tunnel * ipip6_tunnel_lookup(struct net *net,
 	struct ip_tunnel *t;
 	struct sit_net *sitn = net_generic(net, sit_net_id);
 
-	for (t = sitn->tunnels_r_l[h0^h1]; t; t = t->next) {
+	for_each_ip_tunnel_rcu(sitn->tunnels_r_l[h0 ^ h1]) {
 		if (local == t->parms.iph.saddr &&
 		    remote == t->parms.iph.daddr &&
 		    (!dev || !t->parms.link || dev->iflink == t->parms.link) &&
 		    (t->dev->flags & IFF_UP))
 			return t;
 	}
-	for (t = sitn->tunnels_r[h0]; t; t = t->next) {
+	for_each_ip_tunnel_rcu(sitn->tunnels_r[h0]) {
 		if (remote == t->parms.iph.daddr &&
 		    (!dev || !t->parms.link || dev->iflink == t->parms.link) &&
 		    (t->dev->flags & IFF_UP))
 			return t;
 	}
-	for (t = sitn->tunnels_l[h1]; t; t = t->next) {
+	for_each_ip_tunnel_rcu(sitn->tunnels_l[h1]) {
 		if (local == t->parms.iph.saddr &&
 		    (!dev || !t->parms.link || dev->iflink == t->parms.link) &&
 		    (t->dev->flags & IFF_UP))
 			return t;
 	}
-	t = sitn->tunnels_wc[0];
+	t = rcu_dereference(sitn->tunnels_wc[0]);
 	if ((t != NULL) && (t->dev->flags & IFF_UP))
 		return t;
 	return NULL;
@@ -143,9 +152,9 @@ static void ipip6_tunnel_unlink(struct sit_net *sitn, struct ip_tunnel *t)
 
 	for (tp = ipip6_bucket(sitn, t); *tp; tp = &(*tp)->next) {
 		if (t == *tp) {
-			write_lock_bh(&ipip6_lock);
+			spin_lock_bh(&ipip6_lock);
 			*tp = t->next;
-			write_unlock_bh(&ipip6_lock);
+			spin_unlock_bh(&ipip6_lock);
 			break;
 		}
 	}
@@ -155,10 +164,10 @@ static void ipip6_tunnel_link(struct sit_net *sitn, struct ip_tunnel *t)
 {
 	struct ip_tunnel **tp = ipip6_bucket(sitn, t);
 
+	spin_lock_bh(&ipip6_lock);
 	t->next = *tp;
-	write_lock_bh(&ipip6_lock);
-	*tp = t;
-	write_unlock_bh(&ipip6_lock);
+	rcu_assign_pointer(*tp, t);
+	spin_unlock_bh(&ipip6_lock);
 }
 
 static void ipip6_tunnel_clone_6rd(struct net_device *dev, struct sit_net *sitn)
@@ -447,9 +456,9 @@ static void ipip6_tunnel_uninit(struct net_device *dev)
 	struct sit_net *sitn = net_generic(net, sit_net_id);
 
 	if (dev == sitn->fb_tunnel_dev) {
-		write_lock_bh(&ipip6_lock);
+		spin_lock_bh(&ipip6_lock);
 		sitn->tunnels_wc[0] = NULL;
-		write_unlock_bh(&ipip6_lock);
+		spin_unlock_bh(&ipip6_lock);
 		dev_put(dev);
 	} else {
 		ipip6_tunnel_unlink(sitn, netdev_priv(dev));
@@ -502,7 +511,7 @@ static int ipip6_err(struct sk_buff *skb, u32 info)
 
 	err = -ENOENT;
 
-	read_lock(&ipip6_lock);
+	rcu_read_lock();
 	t = ipip6_tunnel_lookup(dev_net(skb->dev),
 				skb->dev,
 				iph->daddr,
@@ -520,7 +529,7 @@ static int ipip6_err(struct sk_buff *skb, u32 info)
 		t->err_count = 1;
 	t->err_time = jiffies;
 out:
-	read_unlock(&ipip6_lock);
+	rcu_read_unlock();
 	return err;
 }
 
@@ -540,7 +549,7 @@ static int ipip6_rcv(struct sk_buff *skb)
 
 	iph = ip_hdr(skb);
 
-	read_lock(&ipip6_lock);
+	rcu_read_lock();
 	tunnel = ipip6_tunnel_lookup(dev_net(skb->dev), skb->dev,
 				     iph->saddr, iph->daddr);
 	if (tunnel != NULL) {
@@ -554,7 +563,7 @@ static int ipip6_rcv(struct sk_buff *skb)
 		if ((tunnel->dev->priv_flags & IFF_ISATAP) &&
 		    !isatap_chksrc(skb, iph, tunnel)) {
 			tunnel->dev->stats.rx_errors++;
-			read_unlock(&ipip6_lock);
+			rcu_read_unlock();
 			kfree_skb(skb);
 			return 0;
 		}
@@ -565,12 +574,12 @@ static int ipip6_rcv(struct sk_buff *skb)
 		nf_reset(skb);
 		ipip6_ecn_decapsulate(iph, skb);
 		netif_rx(skb);
-		read_unlock(&ipip6_lock);
+		rcu_read_unlock();
 		return 0;
 	}
 
 	icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
-	read_unlock(&ipip6_lock);
+	rcu_read_unlock();
 out:
 	kfree_skb(skb);
 	return 0;
-- 
cgit v1.2.3


From 91cc3bb0b04ffef49bb044e06b221ea5de053e91 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Fri, 23 Oct 2009 18:19:19 +0000
Subject: xfrm6_tunnel: RCU conversion

xfrm6_tunnels use one rwlock to protect their hash tables.

Plain and straightforward conversion to RCU locking to permit better SMP
performance.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/xfrm6_tunnel.c | 47 ++++++++++++++++++++++++++++-------------------
 1 file changed, 28 insertions(+), 19 deletions(-)

(limited to 'net/ipv6')

diff --git a/net/ipv6/xfrm6_tunnel.c b/net/ipv6/xfrm6_tunnel.c
index 81a95c00e50..438831d3359 100644
--- a/net/ipv6/xfrm6_tunnel.c
+++ b/net/ipv6/xfrm6_tunnel.c
@@ -23,7 +23,7 @@
  */
 #include <linux/module.h>
 #include <linux/xfrm.h>
-#include <linux/list.h>
+#include <linux/rculist.h>
 #include <net/ip.h>
 #include <net/xfrm.h>
 #include <net/ipv6.h>
@@ -36,14 +36,15 @@
  * per xfrm_address_t.
  */
 struct xfrm6_tunnel_spi {
-	struct hlist_node list_byaddr;
-	struct hlist_node list_byspi;
-	xfrm_address_t addr;
-	u32 spi;
-	atomic_t refcnt;
+	struct hlist_node	list_byaddr;
+	struct hlist_node	list_byspi;
+	xfrm_address_t		addr;
+	u32			spi;
+	atomic_t		refcnt;
+	struct rcu_head		rcu_head;
 };
 
-static DEFINE_RWLOCK(xfrm6_tunnel_spi_lock);
+static DEFINE_SPINLOCK(xfrm6_tunnel_spi_lock);
 
 static u32 xfrm6_tunnel_spi;
 
@@ -107,6 +108,7 @@ static void xfrm6_tunnel_spi_fini(void)
 		if (!hlist_empty(&xfrm6_tunnel_spi_byspi[i]))
 			return;
 	}
+	rcu_barrier();
 	kmem_cache_destroy(xfrm6_tunnel_spi_kmem);
 	xfrm6_tunnel_spi_kmem = NULL;
 }
@@ -116,7 +118,7 @@ static struct xfrm6_tunnel_spi *__xfrm6_tunnel_spi_lookup(xfrm_address_t *saddr)
 	struct xfrm6_tunnel_spi *x6spi;
 	struct hlist_node *pos;
 
-	hlist_for_each_entry(x6spi, pos,
+	hlist_for_each_entry_rcu(x6spi, pos,
 			     &xfrm6_tunnel_spi_byaddr[xfrm6_tunnel_spi_hash_byaddr(saddr)],
 			     list_byaddr) {
 		if (memcmp(&x6spi->addr, saddr, sizeof(x6spi->addr)) == 0)
@@ -131,10 +133,10 @@ __be32 xfrm6_tunnel_spi_lookup(xfrm_address_t *saddr)
 	struct xfrm6_tunnel_spi *x6spi;
 	u32 spi;
 
-	read_lock_bh(&xfrm6_tunnel_spi_lock);
+	rcu_read_lock_bh();
 	x6spi = __xfrm6_tunnel_spi_lookup(saddr);
 	spi = x6spi ? x6spi->spi : 0;
-	read_unlock_bh(&xfrm6_tunnel_spi_lock);
+	rcu_read_unlock_bh();
 	return htonl(spi);
 }
 
@@ -185,14 +187,15 @@ alloc_spi:
 	if (!x6spi)
 		goto out;
 
+	INIT_RCU_HEAD(&x6spi->rcu_head);
 	memcpy(&x6spi->addr, saddr, sizeof(x6spi->addr));
 	x6spi->spi = spi;
 	atomic_set(&x6spi->refcnt, 1);
 
-	hlist_add_head(&x6spi->list_byspi, &xfrm6_tunnel_spi_byspi[index]);
+	hlist_add_head_rcu(&x6spi->list_byspi, &xfrm6_tunnel_spi_byspi[index]);
 
 	index = xfrm6_tunnel_spi_hash_byaddr(saddr);
-	hlist_add_head(&x6spi->list_byaddr, &xfrm6_tunnel_spi_byaddr[index]);
+	hlist_add_head_rcu(&x6spi->list_byaddr, &xfrm6_tunnel_spi_byaddr[index]);
 out:
 	return spi;
 }
@@ -202,26 +205,32 @@ __be32 xfrm6_tunnel_alloc_spi(xfrm_address_t *saddr)
 	struct xfrm6_tunnel_spi *x6spi;
 	u32 spi;
 
-	write_lock_bh(&xfrm6_tunnel_spi_lock);
+	spin_lock_bh(&xfrm6_tunnel_spi_lock);
 	x6spi = __xfrm6_tunnel_spi_lookup(saddr);
 	if (x6spi) {
 		atomic_inc(&x6spi->refcnt);
 		spi = x6spi->spi;
 	} else
 		spi = __xfrm6_tunnel_alloc_spi(saddr);
-	write_unlock_bh(&xfrm6_tunnel_spi_lock);
+	spin_unlock_bh(&xfrm6_tunnel_spi_lock);
 
 	return htonl(spi);
 }
 
 EXPORT_SYMBOL(xfrm6_tunnel_alloc_spi);
 
+static void x6spi_destroy_rcu(struct rcu_head *head)
+{
+	kmem_cache_free(xfrm6_tunnel_spi_kmem,
+			container_of(head, struct xfrm6_tunnel_spi, rcu_head));
+}
+
 void xfrm6_tunnel_free_spi(xfrm_address_t *saddr)
 {
 	struct xfrm6_tunnel_spi *x6spi;
 	struct hlist_node *pos, *n;
 
-	write_lock_bh(&xfrm6_tunnel_spi_lock);
+	spin_lock_bh(&xfrm6_tunnel_spi_lock);
 
 	hlist_for_each_entry_safe(x6spi, pos, n,
 				  &xfrm6_tunnel_spi_byaddr[xfrm6_tunnel_spi_hash_byaddr(saddr)],
@@ -229,14 +238,14 @@ void xfrm6_tunnel_free_spi(xfrm_address_t *saddr)
 	{
 		if (memcmp(&x6spi->addr, saddr, sizeof(x6spi->addr)) == 0) {
 			if (atomic_dec_and_test(&x6spi->refcnt)) {
-				hlist_del(&x6spi->list_byaddr);
-				hlist_del(&x6spi->list_byspi);
-				kmem_cache_free(xfrm6_tunnel_spi_kmem, x6spi);
+				hlist_del_rcu(&x6spi->list_byaddr);
+				hlist_del_rcu(&x6spi->list_byspi);
+				call_rcu(&x6spi->rcu_head, x6spi_destroy_rcu);
 				break;
 			}
 		}
 	}
-	write_unlock_bh(&xfrm6_tunnel_spi_lock);
+	spin_unlock_bh(&xfrm6_tunnel_spi_lock);
 }
 
 EXPORT_SYMBOL(xfrm6_tunnel_free_spi);
-- 
cgit v1.2.3


From 2922bc8aedfcd41ca6171cfe1a79ff111ad72019 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Fri, 23 Oct 2009 06:34:34 +0000
Subject: ip6tnl: convert hash tables locking to RCU

ip6_tunnels use one rwlock to protect their hash tables.

This locking scheme can be converted to RCU for free, since netdevice
already must wait for a RCU grace period at dismantle time.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/ip6_tunnel.c | 44 +++++++++++++++++++++++++-------------------
 1 file changed, 25 insertions(+), 19 deletions(-)

(limited to 'net/ipv6')

diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index c595bbe1ed9..670c291d256 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -88,8 +88,10 @@ struct ip6_tnl_net {
 	struct ip6_tnl **tnls[2];
 };
 
-/* lock for the tunnel lists */
-static DEFINE_RWLOCK(ip6_tnl_lock);
+/*
+ * Locking : hash tables are protected by RCU and a spinlock
+ */
+static DEFINE_SPINLOCK(ip6_tnl_lock);
 
 static inline struct dst_entry *ip6_tnl_dst_check(struct ip6_tnl *t)
 {
@@ -130,6 +132,9 @@ static inline void ip6_tnl_dst_store(struct ip6_tnl *t, struct dst_entry *dst)
  *   else %NULL
  **/
 
+#define for_each_ip6_tunnel_rcu(start) \
+	for (t = rcu_dereference(start); t; t = rcu_dereference(t->next))
+
 static struct ip6_tnl *
 ip6_tnl_lookup(struct net *net, struct in6_addr *remote, struct in6_addr *local)
 {
@@ -138,13 +143,14 @@ ip6_tnl_lookup(struct net *net, struct in6_addr *remote, struct in6_addr *local)
 	struct ip6_tnl *t;
 	struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);
 
-	for (t = ip6n->tnls_r_l[h0 ^ h1]; t; t = t->next) {
+	for_each_ip6_tunnel_rcu(ip6n->tnls_r_l[h0 ^ h1]) {
 		if (ipv6_addr_equal(local, &t->parms.laddr) &&
 		    ipv6_addr_equal(remote, &t->parms.raddr) &&
 		    (t->dev->flags & IFF_UP))
 			return t;
 	}
-	if ((t = ip6n->tnls_wc[0]) != NULL && (t->dev->flags & IFF_UP))
+	t = rcu_dereference(ip6n->tnls_wc[0]);
+	if (t && (t->dev->flags & IFF_UP))
 		return t;
 
 	return NULL;
@@ -186,10 +192,10 @@ ip6_tnl_link(struct ip6_tnl_net *ip6n, struct ip6_tnl *t)
 {
 	struct ip6_tnl **tp = ip6_tnl_bucket(ip6n, &t->parms);
 
+	spin_lock_bh(&ip6_tnl_lock);
 	t->next = *tp;
-	write_lock_bh(&ip6_tnl_lock);
-	*tp = t;
-	write_unlock_bh(&ip6_tnl_lock);
+	rcu_assign_pointer(*tp, t);
+	spin_unlock_bh(&ip6_tnl_lock);
 }
 
 /**
@@ -204,9 +210,9 @@ ip6_tnl_unlink(struct ip6_tnl_net *ip6n, struct ip6_tnl *t)
 
 	for (tp = ip6_tnl_bucket(ip6n, &t->parms); *tp; tp = &(*tp)->next) {
 		if (t == *tp) {
-			write_lock_bh(&ip6_tnl_lock);
+			spin_lock_bh(&ip6_tnl_lock);
 			*tp = t->next;
-			write_unlock_bh(&ip6_tnl_lock);
+			spin_unlock_bh(&ip6_tnl_lock);
 			break;
 		}
 	}
@@ -313,9 +319,9 @@ ip6_tnl_dev_uninit(struct net_device *dev)
 	struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);
 
 	if (dev == ip6n->fb_tnl_dev) {
-		write_lock_bh(&ip6_tnl_lock);
+		spin_lock_bh(&ip6_tnl_lock);
 		ip6n->tnls_wc[0] = NULL;
-		write_unlock_bh(&ip6_tnl_lock);
+		spin_unlock_bh(&ip6_tnl_lock);
 	} else {
 		ip6_tnl_unlink(ip6n, t);
 	}
@@ -409,7 +415,7 @@ ip6_tnl_err(struct sk_buff *skb, __u8 ipproto, struct inet6_skb_parm *opt,
 	   in trouble since we might need the source address for further
 	   processing of the error. */
 
-	read_lock(&ip6_tnl_lock);
+	rcu_read_lock();
 	if ((t = ip6_tnl_lookup(dev_net(skb->dev), &ipv6h->daddr,
 					&ipv6h->saddr)) == NULL)
 		goto out;
@@ -482,7 +488,7 @@ ip6_tnl_err(struct sk_buff *skb, __u8 ipproto, struct inet6_skb_parm *opt,
 	*msg = rel_msg;
 
 out:
-	read_unlock(&ip6_tnl_lock);
+	rcu_read_unlock();
 	return err;
 }
 
@@ -693,23 +699,23 @@ static int ip6_tnl_rcv(struct sk_buff *skb, __u16 protocol,
 	struct ip6_tnl *t;
 	struct ipv6hdr *ipv6h = ipv6_hdr(skb);
 
-	read_lock(&ip6_tnl_lock);
+	rcu_read_lock();
 
 	if ((t = ip6_tnl_lookup(dev_net(skb->dev), &ipv6h->saddr,
 					&ipv6h->daddr)) != NULL) {
 		if (t->parms.proto != ipproto && t->parms.proto != 0) {
-			read_unlock(&ip6_tnl_lock);
+			rcu_read_unlock();
 			goto discard;
 		}
 
 		if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
-			read_unlock(&ip6_tnl_lock);
+			rcu_read_unlock();
 			goto discard;
 		}
 
 		if (!ip6_tnl_rcv_ctl(t)) {
 			t->dev->stats.rx_dropped++;
-			read_unlock(&ip6_tnl_lock);
+			rcu_read_unlock();
 			goto discard;
 		}
 		secpath_reset(skb);
@@ -727,10 +733,10 @@ static int ip6_tnl_rcv(struct sk_buff *skb, __u16 protocol,
 		t->dev->stats.rx_packets++;
 		t->dev->stats.rx_bytes += skb->len;
 		netif_rx(skb);
-		read_unlock(&ip6_tnl_lock);
+		rcu_read_unlock();
 		return 0;
 	}
-	read_unlock(&ip6_tnl_lock);
+	rcu_read_unlock();
 	return 1;
 
 discard:
-- 
cgit v1.2.3


From cf4432f550a0fe4e08e7cd522568cfbae754582c Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Wed, 28 Oct 2009 05:16:51 +0000
Subject: ip6tnl: Optimize multiple unregistration

Speedup module unloading by factorizing synchronize_rcu() calls

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/ip6_tunnel.c | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

(limited to 'net/ipv6')

diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index 670c291d256..6c1b5c98e81 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -1393,14 +1393,19 @@ static void ip6_tnl_destroy_tunnels(struct ip6_tnl_net *ip6n)
 {
 	int h;
 	struct ip6_tnl *t;
+	LIST_HEAD(list);
 
 	for (h = 0; h < HASH_SIZE; h++) {
-		while ((t = ip6n->tnls_r_l[h]) != NULL)
-			unregister_netdevice(t->dev);
+		t = ip6n->tnls_r_l[h];
+		while (t != NULL) {
+			unregister_netdevice_queue(t->dev, &list);
+			t = t->next;
+		}
 	}
 
 	t = ip6n->tnls_wc[0];
-	unregister_netdevice(t->dev);
+	unregister_netdevice_queue(t->dev, &list);
+	unregister_netdevice_many(&list);
 }
 
 static int ip6_tnl_init_net(struct net *net)
-- 
cgit v1.2.3


From 62808f91237181dbac74c2b5be4fa92adfbbbe40 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Wed, 28 Oct 2009 04:37:43 +0000
Subject: ipv6 sit: Optimize multiple unregistration

Speedup module unloading by factorizing synchronize_rcu() calls

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/sit.c | 17 +++++++++++------
 1 file changed, 11 insertions(+), 6 deletions(-)

(limited to 'net/ipv6')

diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index b6b16264b30..2362a3397e9 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -1145,16 +1145,19 @@ static struct xfrm_tunnel sit_handler = {
 	.priority	=	1,
 };
 
-static void sit_destroy_tunnels(struct sit_net *sitn)
+static void sit_destroy_tunnels(struct sit_net *sitn, struct list_head *head)
 {
 	int prio;
 
 	for (prio = 1; prio < 4; prio++) {
 		int h;
 		for (h = 0; h < HASH_SIZE; h++) {
-			struct ip_tunnel *t;
-			while ((t = sitn->tunnels[prio][h]) != NULL)
-				unregister_netdevice(t->dev);
+			struct ip_tunnel *t = sitn->tunnels[prio][h];
+
+			while (t != NULL) {
+				unregister_netdevice_queue(t->dev, head);
+				t = t->next;
+			}
 		}
 	}
 }
@@ -1208,11 +1211,13 @@ err_alloc:
 static void sit_exit_net(struct net *net)
 {
 	struct sit_net *sitn;
+	LIST_HEAD(list);
 
 	sitn = net_generic(net, sit_net_id);
 	rtnl_lock();
-	sit_destroy_tunnels(sitn);
-	unregister_netdevice(sitn->fb_tunnel_dev);
+	sit_destroy_tunnels(sitn, &list);
+	unregister_netdevice_queue(sitn->fb_tunnel_dev, &list);
+	unregister_netdevice_many(&list);
 	rtnl_unlock();
 	kfree(sitn);
 }
-- 
cgit v1.2.3


From c871e664ea39363c2a1011ec2dc4bc172dc396a0 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Wed, 28 Oct 2009 04:48:11 +0000
Subject: ip6mr: Optimize multiple unregistration

Speedup module unloading by factorizing synchronize_rcu() calls

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/ip6mr.c | 15 ++++++++++-----
 1 file changed, 10 insertions(+), 5 deletions(-)

(limited to 'net/ipv6')

diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index 85849b4f5a3..52e0f74fdfe 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -477,7 +477,7 @@ failure:
  *	Delete a VIF entry
  */
 
-static int mif6_delete(struct net *net, int vifi)
+static int mif6_delete(struct net *net, int vifi, struct list_head *head)
 {
 	struct mif_device *v;
 	struct net_device *dev;
@@ -519,7 +519,7 @@ static int mif6_delete(struct net *net, int vifi)
 		in6_dev->cnf.mc_forwarding--;
 
 	if (v->flags & MIFF_REGISTER)
-		unregister_netdevice(dev);
+		unregister_netdevice_queue(dev, head);
 
 	dev_put(dev);
 	return 0;
@@ -976,6 +976,7 @@ static int ip6mr_device_event(struct notifier_block *this,
 	struct net *net = dev_net(dev);
 	struct mif_device *v;
 	int ct;
+	LIST_HEAD(list);
 
 	if (event != NETDEV_UNREGISTER)
 		return NOTIFY_DONE;
@@ -983,8 +984,10 @@ static int ip6mr_device_event(struct notifier_block *this,
 	v = &net->ipv6.vif6_table[0];
 	for (ct = 0; ct < net->ipv6.maxvif; ct++, v++) {
 		if (v->dev == dev)
-			mif6_delete(net, ct);
+			mif6_delete(net, ct, &list);
 	}
+	unregister_netdevice_many(&list);
+
 	return NOTIFY_DONE;
 }
 
@@ -1188,14 +1191,16 @@ static int ip6mr_mfc_add(struct net *net, struct mf6cctl *mfc, int mrtsock)
 static void mroute_clean_tables(struct net *net)
 {
 	int i;
+	LIST_HEAD(list);
 
 	/*
 	 *	Shut down all active vif entries
 	 */
 	for (i = 0; i < net->ipv6.maxvif; i++) {
 		if (!(net->ipv6.vif6_table[i].flags & VIFF_STATIC))
-			mif6_delete(net, i);
+			mif6_delete(net, i, &list);
 	}
+	unregister_netdevice_many(&list);
 
 	/*
 	 *	Wipe the cache
@@ -1325,7 +1330,7 @@ int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, uns
 		if (copy_from_user(&mifi, optval, sizeof(mifi_t)))
 			return -EFAULT;
 		rtnl_lock();
-		ret = mif6_delete(net, mifi);
+		ret = mif6_delete(net, mifi, NULL);
 		rtnl_unlock();
 		return ret;
 
-- 
cgit v1.2.3


From 022c3f7d82f0f1c68018696f2f027b87b9bb45c2 Mon Sep 17 00:00:00 2001
From: Gilad Ben-Yossef <gilad@codefidence.com>
Date: Wed, 28 Oct 2009 04:15:22 +0000
Subject: Allow tcp_parse_options to consult dst entry

We need tcp_parse_options to be aware of dst_entry to
take into account per dst_entry TCP options settings

Signed-off-by: Gilad Ben-Yossef <gilad@codefidence.com>
Sigend-off-by: Ori Finkelman <ori@comsleep.com>
Sigend-off-by: Yony Amit <yony@comsleep.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/syncookies.c | 28 +++++++++++++++-------------
 net/ipv6/tcp_ipv6.c   |  3 ++-
 2 files changed, 17 insertions(+), 14 deletions(-)

(limited to 'net/ipv6')

diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c
index c46da533888..612fc53e0bb 100644
--- a/net/ipv6/syncookies.c
+++ b/net/ipv6/syncookies.c
@@ -184,13 +184,6 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
 
 	NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_SYNCOOKIESRECV);
 
-	/* check for timestamp cookie support */
-	memset(&tcp_opt, 0, sizeof(tcp_opt));
-	tcp_parse_options(skb, &tcp_opt, 0);
-
-	if (tcp_opt.saw_tstamp)
-		cookie_check_timestamp(&tcp_opt);
-
 	ret = NULL;
 	req = inet6_reqsk_alloc(&tcp6_request_sock_ops);
 	if (!req)
@@ -224,12 +217,6 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
 	req->expires = 0UL;
 	req->retrans = 0;
 	ireq->ecn_ok		= 0;
-	ireq->snd_wscale	= tcp_opt.snd_wscale;
-	ireq->rcv_wscale	= tcp_opt.rcv_wscale;
-	ireq->sack_ok		= tcp_opt.sack_ok;
-	ireq->wscale_ok		= tcp_opt.wscale_ok;
-	ireq->tstamp_ok		= tcp_opt.saw_tstamp;
-	req->ts_recent		= tcp_opt.saw_tstamp ? tcp_opt.rcv_tsval : 0;
 	treq->rcv_isn = ntohl(th->seq) - 1;
 	treq->snt_isn = cookie;
 
@@ -265,6 +252,21 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
 			goto out_free;
 	}
 
+	/* check for timestamp cookie support */
+	memset(&tcp_opt, 0, sizeof(tcp_opt));
+	tcp_parse_options(skb, &tcp_opt, 0, dst);
+
+	if (tcp_opt.saw_tstamp)
+		cookie_check_timestamp(&tcp_opt);
+
+	req->ts_recent          = tcp_opt.saw_tstamp ? tcp_opt.rcv_tsval : 0;
+
+	ireq->snd_wscale        = tcp_opt.snd_wscale;
+	ireq->rcv_wscale        = tcp_opt.rcv_wscale;
+	ireq->sack_ok           = tcp_opt.sack_ok;
+	ireq->wscale_ok         = tcp_opt.wscale_ok;
+	ireq->tstamp_ok         = tcp_opt.saw_tstamp;
+
 	req->window_clamp = tp->window_clamp ? :dst_metric(dst, RTAX_WINDOW);
 	tcp_select_initial_window(tcp_full_space(sk), req->mss,
 				  &req->rcv_wnd, &req->window_clamp,
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index c54ec3615de..34925f089e0 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -1167,6 +1167,7 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
 	struct tcp_sock *tp = tcp_sk(sk);
 	struct request_sock *req = NULL;
 	__u32 isn = TCP_SKB_CB(skb)->when;
+	struct dst_entry *dst = __sk_dst_get(sk);
 #ifdef CONFIG_SYN_COOKIES
 	int want_cookie = 0;
 #else
@@ -1205,7 +1206,7 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
 	tmp_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
 	tmp_opt.user_mss = tp->rx_opt.user_mss;
 
-	tcp_parse_options(skb, &tmp_opt, 0);
+	tcp_parse_options(skb, &tmp_opt, 0, dst);
 
 	if (want_cookie && !tmp_opt.saw_tstamp)
 		tcp_clear_options(&tmp_opt);
-- 
cgit v1.2.3


From f1a28eab20076542322fcab3efa016834bd732f2 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Mon, 2 Nov 2009 11:21:37 +0100
Subject: ip6tnl: less dev_put() calls

Using dev_get_by_index_rcu() in ip6_tnl_rcv_ctl() & ip6_tnl_xmit_ctl()
avoids touching device refcount.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/ip6_tunnel.c | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

(limited to 'net/ipv6')

diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index 6c1b5c98e81..1d614113a4b 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -658,6 +658,7 @@ static void ip6ip6_dscp_ecn_decapsulate(struct ip6_tnl *t,
 		IP6_ECN_set_ce(ipv6_hdr(skb));
 }
 
+/* called with rcu_read_lock() */
 static inline int ip6_tnl_rcv_ctl(struct ip6_tnl *t)
 {
 	struct ip6_tnl_parm *p = &t->parms;
@@ -668,15 +669,13 @@ static inline int ip6_tnl_rcv_ctl(struct ip6_tnl *t)
 		struct net_device *ldev = NULL;
 
 		if (p->link)
-			ldev = dev_get_by_index(net, p->link);
+			ldev = dev_get_by_index_rcu(net, p->link);
 
 		if ((ipv6_addr_is_multicast(&p->laddr) ||
 		     likely(ipv6_chk_addr(net, &p->laddr, ldev, 0))) &&
 		    likely(!ipv6_chk_addr(net, &p->raddr, NULL, 0)))
 			ret = 1;
 
-		if (ldev)
-			dev_put(ldev);
 	}
 	return ret;
 }
@@ -804,8 +803,9 @@ static inline int ip6_tnl_xmit_ctl(struct ip6_tnl *t)
 	if (p->flags & IP6_TNL_F_CAP_XMIT) {
 		struct net_device *ldev = NULL;
 
+		rcu_read_lock();
 		if (p->link)
-			ldev = dev_get_by_index(net, p->link);
+			ldev = dev_get_by_index_rcu(net, p->link);
 
 		if (unlikely(!ipv6_chk_addr(net, &p->laddr, ldev, 0)))
 			printk(KERN_WARNING
@@ -819,8 +819,7 @@ static inline int ip6_tnl_xmit_ctl(struct ip6_tnl *t)
 			       p->name);
 		else
 			ret = 1;
-		if (ldev)
-			dev_put(ldev);
+		rcu_read_unlock();
 	}
 	return ret;
 }
-- 
cgit v1.2.3


From 16ba5e8e7c01d2da87ff1d17e83545f164665b5c Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Mon, 2 Nov 2009 12:10:39 +0100
Subject: ipv6: no more dev_put() in inet6_bind()

Avoids touching device refcount in inet6_bind(), thanks to RCU

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/af_inet6.c | 17 +++++++++--------
 1 file changed, 9 insertions(+), 8 deletions(-)

(limited to 'net/ipv6')

diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index b6d05881867..9105b25defe 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -314,6 +314,7 @@ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
 		if (addr_type != IPV6_ADDR_ANY) {
 			struct net_device *dev = NULL;
 
+			rcu_read_lock();
 			if (addr_type & IPV6_ADDR_LINKLOCAL) {
 				if (addr_len >= sizeof(struct sockaddr_in6) &&
 				    addr->sin6_scope_id) {
@@ -326,12 +327,12 @@ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
 				/* Binding to link-local address requires an interface */
 				if (!sk->sk_bound_dev_if) {
 					err = -EINVAL;
-					goto out;
+					goto out_unlock;
 				}
-				dev = dev_get_by_index(net, sk->sk_bound_dev_if);
+				dev = dev_get_by_index_rcu(net, sk->sk_bound_dev_if);
 				if (!dev) {
 					err = -ENODEV;
-					goto out;
+					goto out_unlock;
 				}
 			}
 
@@ -342,14 +343,11 @@ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
 			if (!(addr_type & IPV6_ADDR_MULTICAST))	{
 				if (!ipv6_chk_addr(net, &addr->sin6_addr,
 						   dev, 0)) {
-					if (dev)
-						dev_put(dev);
 					err = -EADDRNOTAVAIL;
-					goto out;
+					goto out_unlock;
 				}
 			}
-			if (dev)
-				dev_put(dev);
+			rcu_read_unlock();
 		}
 	}
 
@@ -381,6 +379,9 @@ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
 out:
 	release_sock(sk);
 	return err;
+out_unlock:
+	rcu_read_unlock();
+	goto out;
 }
 
 EXPORT_SYMBOL(inet6_bind);
-- 
cgit v1.2.3


From 536b2e92f1b7a86e177aeced097e4c051eeebe7d Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Mon, 2 Nov 2009 12:21:06 +0100
Subject: ipv6: no more dev_put() in datagram_send_ctl()

Avoids touching device refcount in datagram_send_ctl(), thanks to RCU

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/datagram.c | 14 +++++++++-----
 1 file changed, 9 insertions(+), 5 deletions(-)

(limited to 'net/ipv6')

diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c
index 9f70452a69e..e6f9cdf780f 100644
--- a/net/ipv6/datagram.c
+++ b/net/ipv6/datagram.c
@@ -537,12 +537,17 @@ int datagram_send_ctl(struct net *net,
 
 			addr_type = __ipv6_addr_type(&src_info->ipi6_addr);
 
+			rcu_read_lock();
 			if (fl->oif) {
-				dev = dev_get_by_index(net, fl->oif);
-				if (!dev)
+				dev = dev_get_by_index_rcu(net, fl->oif);
+				if (!dev) {
+					rcu_read_unlock();
 					return -ENODEV;
-			} else if (addr_type & IPV6_ADDR_LINKLOCAL)
+				}
+			} else if (addr_type & IPV6_ADDR_LINKLOCAL) {
+				rcu_read_unlock();
 				return -EINVAL;
+			}
 
 			if (addr_type != IPV6_ADDR_ANY) {
 				int strict = __ipv6_addr_src_scope(addr_type) <= IPV6_ADDR_SCOPE_LINKLOCAL;
@@ -553,8 +558,7 @@ int datagram_send_ctl(struct net *net,
 					ipv6_addr_copy(&fl->fl6_src, &src_info->ipi6_addr);
 			}
 
-			if (dev)
-				dev_put(dev);
+			rcu_read_unlock();
 
 			if (err)
 				goto exit_f;
-- 
cgit v1.2.3


From c6d14c84566d6b70ad9dc1618db0dec87cca9300 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Wed, 4 Nov 2009 05:43:23 -0800
Subject: net: Introduce for_each_netdev_rcu() iterator

Adds RCU management to the list of netdevices.

Convert some for_each_netdev() users to RCU version, if
it can avoid read_lock-ing dev_base_lock

Ie:
	read_lock(&dev_base_loack);
	for_each_netdev(net, dev)
		some_action();
	read_unlock(&dev_base_lock);

becomes :

	rcu_read_lock();
	for_each_netdev_rcu(net, dev)
		some_action();
	rcu_read_unlock();


Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/addrconf.c | 20 +++++++-------------
 net/ipv6/anycast.c  |  6 +++---
 2 files changed, 10 insertions(+), 16 deletions(-)

(limited to 'net/ipv6')

diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 91864840961..024bba30de2 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -481,9 +481,8 @@ static void addrconf_forward_change(struct net *net, __s32 newf)
 	struct net_device *dev;
 	struct inet6_dev *idev;
 
-	read_lock(&dev_base_lock);
-	for_each_netdev(net, dev) {
-		rcu_read_lock();
+	rcu_read_lock();
+	for_each_netdev_rcu(net, dev) {
 		idev = __in6_dev_get(dev);
 		if (idev) {
 			int changed = (!idev->cnf.forwarding) ^ (!newf);
@@ -491,9 +490,8 @@ static void addrconf_forward_change(struct net *net, __s32 newf)
 			if (changed)
 				dev_forward_change(idev);
 		}
-		rcu_read_unlock();
 	}
-	read_unlock(&dev_base_lock);
+	rcu_read_unlock();
 }
 
 static int addrconf_fixup_forwarding(struct ctl_table *table, int *p, int old)
@@ -1137,10 +1135,9 @@ int ipv6_dev_get_saddr(struct net *net, struct net_device *dst_dev,
 	hiscore->rule = -1;
 	hiscore->ifa = NULL;
 
-	read_lock(&dev_base_lock);
 	rcu_read_lock();
 
-	for_each_netdev(net, dev) {
+	for_each_netdev_rcu(net, dev) {
 		struct inet6_dev *idev;
 
 		/* Candidate Source Address (section 4)
@@ -1235,7 +1232,6 @@ try_nextdev:
 		read_unlock_bh(&idev->lock);
 	}
 	rcu_read_unlock();
-	read_unlock(&dev_base_lock);
 
 	if (!hiscore->ifa)
 		return -EADDRNOTAVAIL;
@@ -4052,9 +4048,8 @@ static void addrconf_disable_change(struct net *net, __s32 newf)
 	struct net_device *dev;
 	struct inet6_dev *idev;
 
-	read_lock(&dev_base_lock);
-	for_each_netdev(net, dev) {
-		rcu_read_lock();
+	rcu_read_lock();
+	for_each_netdev_rcu(net, dev) {
 		idev = __in6_dev_get(dev);
 		if (idev) {
 			int changed = (!idev->cnf.disable_ipv6) ^ (!newf);
@@ -4062,9 +4057,8 @@ static void addrconf_disable_change(struct net *net, __s32 newf)
 			if (changed)
 				dev_disable_change(idev);
 		}
-		rcu_read_unlock();
 	}
-	read_unlock(&dev_base_lock);
+	rcu_read_unlock();
 }
 
 static int addrconf_disable_ipv6(struct ctl_table *table, int *p, int old)
diff --git a/net/ipv6/anycast.c b/net/ipv6/anycast.c
index 1ae58bec1de..2f00ca83f04 100644
--- a/net/ipv6/anycast.c
+++ b/net/ipv6/anycast.c
@@ -404,13 +404,13 @@ int ipv6_chk_acast_addr(struct net *net, struct net_device *dev,
 
 	if (dev)
 		return ipv6_chk_acast_dev(dev, addr);
-	read_lock(&dev_base_lock);
-	for_each_netdev(net, dev)
+	rcu_read_lock();
+	for_each_netdev_rcu(net, dev)
 		if (ipv6_chk_acast_dev(dev, addr)) {
 			found = 1;
 			break;
 		}
-	read_unlock(&dev_base_lock);
+	rcu_read_unlock();
 	return found;
 }
 
-- 
cgit v1.2.3


From 9481721be10a1bfd1ee9ccf507eecd7f37caa5ec Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Wed, 4 Nov 2009 21:14:31 +0100
Subject: netfilter: remove synchronize_net() calls in ip_queue/ip6_queue

nf_unregister_queue_handlers() already does a synchronize_rcu()
call, we dont need to do it again in callers.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/ipv6/netfilter/ip6_queue.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net/ipv6')

diff --git a/net/ipv6/netfilter/ip6_queue.c b/net/ipv6/netfilter/ip6_queue.c
index 1cf3f0c6a95..a82016fd5d6 100644
--- a/net/ipv6/netfilter/ip6_queue.c
+++ b/net/ipv6/netfilter/ip6_queue.c
@@ -625,7 +625,7 @@ cleanup_netlink_notifier:
 static void __exit ip6_queue_fini(void)
 {
 	nf_unregister_queue_handlers(&nfqh);
-	synchronize_net();
+
 	ipq_flush(NULL, 0);
 
 #ifdef CONFIG_SYSCTL
-- 
cgit v1.2.3


From 13f18aa05f5abe135f47b6417537ae2b2fedc18c Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Thu, 5 Nov 2009 20:44:37 -0800
Subject: net: drop capability from protocol definitions

struct can_proto had a capability field which wasn't ever used.  It is
dropped entirely.

struct inet_protosw had a capability field which can be more clearly
expressed in the code by just checking if sock->type = SOCK_RAW.

Signed-off-by: Eric Paris <eparis@redhat.com>
Acked-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/af_inet6.c | 2 +-
 net/ipv6/raw.c      | 1 -
 net/ipv6/tcp_ipv6.c | 1 -
 net/ipv6/udp.c      | 1 -
 net/ipv6/udplite.c  | 1 -
 5 files changed, 1 insertion(+), 5 deletions(-)

(limited to 'net/ipv6')

diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index 9105b25defe..1b388935659 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -158,7 +158,7 @@ lookup_protocol:
 	}
 
 	err = -EPERM;
-	if (answer->capability > 0 && !capable(answer->capability))
+	if (sock->type == SOCK_RAW && !capable(CAP_NET_RAW))
 		goto out_rcu_unlock;
 
 	sock->ops = answer->ops;
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index cb834ab7f07..818ef21ba76 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -1336,7 +1336,6 @@ static struct inet_protosw rawv6_protosw = {
 	.protocol	= IPPROTO_IP,	/* wild card */
 	.prot		= &rawv6_prot,
 	.ops		= &inet6_sockraw_ops,
-	.capability	= CAP_NET_RAW,
 	.no_check	= UDP_CSUM_DEFAULT,
 	.flags		= INET_PROTOSW_REUSE,
 };
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 34925f089e0..696a22f034e 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -2112,7 +2112,6 @@ static struct inet_protosw tcpv6_protosw = {
 	.protocol	=	IPPROTO_TCP,
 	.prot		=	&tcpv6_prot,
 	.ops		=	&inet6_stream_ops,
-	.capability	=	-1,
 	.no_check	=	0,
 	.flags		=	INET_PROTOSW_PERMANENT |
 				INET_PROTOSW_ICSK,
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index d3b59d73f50..bbe2f3e445f 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -1286,7 +1286,6 @@ static struct inet_protosw udpv6_protosw = {
 	.protocol =  IPPROTO_UDP,
 	.prot =      &udpv6_prot,
 	.ops =       &inet6_dgram_ops,
-	.capability =-1,
 	.no_check =  UDP_CSUM_DEFAULT,
 	.flags =     INET_PROTOSW_PERMANENT,
 };
diff --git a/net/ipv6/udplite.c b/net/ipv6/udplite.c
index d737a27ee01..6ea6938919e 100644
--- a/net/ipv6/udplite.c
+++ b/net/ipv6/udplite.c
@@ -62,7 +62,6 @@ static struct inet_protosw udplite6_protosw = {
 	.protocol	= IPPROTO_UDPLITE,
 	.prot		= &udplitev6_prot,
 	.ops		= &inet6_dgram_ops,
-	.capability	= -1,
 	.no_check	= 0,
 	.flags		= INET_PROTOSW_PERMANENT,
 };
-- 
cgit v1.2.3


From 3f378b684453f2a028eda463ce383370545d9cc9 Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Thu, 5 Nov 2009 22:18:14 -0800
Subject: net: pass kern to net_proto_family create function

The generic __sock_create function has a kern argument which allows the
security system to make decisions based on if a socket is being created by
the kernel or by userspace.  This patch passes that flag to the
net_proto_family specific create function, so it can do the same thing.

Signed-off-by: Eric Paris <eparis@redhat.com>
Acked-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/af_inet6.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'net/ipv6')

diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index 1b388935659..45ed5e05ab3 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -95,7 +95,8 @@ static __inline__ struct ipv6_pinfo *inet6_sk_generic(struct sock *sk)
 	return (struct ipv6_pinfo *)(((u8 *)sk) + offset);
 }
 
-static int inet6_create(struct net *net, struct socket *sock, int protocol)
+static int inet6_create(struct net *net, struct socket *sock, int protocol,
+			int kern)
 {
 	struct inet_sock *inet;
 	struct ipv6_pinfo *np;
-- 
cgit v1.2.3


From c84b3268da3b85c9d8a9e504e1001a14ed829e94 Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Thu, 5 Nov 2009 20:45:52 -0800
Subject: net: check kern before calling security subsystem

Before calling capable(CAP_NET_RAW) check if this operations is on behalf
of the kernel or on behalf of userspace.  Do not do the security check if
it is on behalf of the kernel.

Signed-off-by: Eric Paris <eparis@redhat.com>
Acked-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/af_inet6.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net/ipv6')

diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index 45ed5e05ab3..12e69d364dd 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -159,7 +159,7 @@ lookup_protocol:
 	}
 
 	err = -EPERM;
-	if (sock->type == SOCK_RAW && !capable(CAP_NET_RAW))
+	if (sock->type == SOCK_RAW && !kern && !capable(CAP_NET_RAW))
 		goto out_rcu_unlock;
 
 	sock->ops = answer->ops;
-- 
cgit v1.2.3


From 69df9d5993bd7dd7499ad0e98fe824147fbe5667 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Thu, 5 Nov 2009 20:59:47 -0800
Subject: ip_frag: dont touch device refcount

When sending fragmentation expiration ICMP V4/V6 messages,
we can avoid touching device refcount, thanks to RCU

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/reassembly.c | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)

(limited to 'net/ipv6')

diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
index da5bd0ed83d..dce699fb267 100644
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -208,18 +208,17 @@ static void ip6_frag_expire(unsigned long data)
 	fq_kill(fq);
 
 	net = container_of(fq->q.net, struct net, ipv6.frags);
-	dev = dev_get_by_index(net, fq->iif);
+	rcu_read_lock();
+	dev = dev_get_by_index_rcu(net, fq->iif);
 	if (!dev)
-		goto out;
+		goto out_rcu_unlock;
 
-	rcu_read_lock();
 	IP6_INC_STATS_BH(net, __in6_dev_get(dev), IPSTATS_MIB_REASMTIMEOUT);
 	IP6_INC_STATS_BH(net, __in6_dev_get(dev), IPSTATS_MIB_REASMFAILS);
-	rcu_read_unlock();
 
 	/* Don't send error if the first segment did not arrive. */
 	if (!(fq->q.last_in & INET_FRAG_FIRST_IN) || !fq->q.fragments)
-		goto out;
+		goto out_rcu_unlock;
 
 	/*
 	   But use as source device on which LAST ARRIVED
@@ -228,9 +227,9 @@ static void ip6_frag_expire(unsigned long data)
 	 */
 	fq->q.fragments->dev = dev;
 	icmpv6_send(fq->q.fragments, ICMPV6_TIME_EXCEED, ICMPV6_EXC_FRAGTIME, 0, dev);
+out_rcu_unlock:
+	rcu_read_unlock();
 out:
-	if (dev)
-		dev_put(dev);
 	spin_unlock(&fq->q.lock);
 	fq_put(fq);
 }
-- 
cgit v1.2.3


From dee5817e88ac8195e5938d6671f434a071e35698 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Fri, 6 Nov 2009 17:04:00 +0100
Subject: netfilter: remove unneccessary checks from netlink notifiers

The NETLINK_URELEASE notifier is only invoked for bound sockets, so
there is no need to check ->pid again.

Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/ipv6/netfilter/ip6_queue.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'net/ipv6')

diff --git a/net/ipv6/netfilter/ip6_queue.c b/net/ipv6/netfilter/ip6_queue.c
index a82016fd5d6..47a3623e711 100644
--- a/net/ipv6/netfilter/ip6_queue.c
+++ b/net/ipv6/netfilter/ip6_queue.c
@@ -499,8 +499,7 @@ ipq_rcv_nl_event(struct notifier_block *this,
 {
 	struct netlink_notify *n = ptr;
 
-	if (event == NETLINK_URELEASE &&
-	    n->protocol == NETLINK_IP6_FW && n->pid) {
+	if (event == NETLINK_URELEASE && n->protocol == NETLINK_IP6_FW) {
 		write_lock_bh(&queue_lock);
 		if ((n->net == &init_net) && (n->pid == peer_pid))
 			__ipq_reset();
-- 
cgit v1.2.3


From fd5c00276127661f12e0315e3bbec41a1c0be376 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Fri, 6 Nov 2009 07:01:17 +0000
Subject: ipv6: avoid dev_hold()/dev_put() in rawv6_bind()

Using RCU helps not touching device refcount in rawv6_bind()

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/raw.c | 23 +++++++++++------------
 1 file changed, 11 insertions(+), 12 deletions(-)

(limited to 'net/ipv6')

diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index 818ef21ba76..926ce8eeffa 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -249,7 +249,7 @@ static int rawv6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len)
 
 	/* Raw sockets are IPv6 only */
 	if (addr_type == IPV6_ADDR_MAPPED)
-		return(-EADDRNOTAVAIL);
+		return -EADDRNOTAVAIL;
 
 	lock_sock(sk);
 
@@ -257,6 +257,7 @@ static int rawv6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len)
 	if (sk->sk_state != TCP_CLOSE)
 		goto out;
 
+	rcu_read_lock();
 	/* Check if the address belongs to the host. */
 	if (addr_type != IPV6_ADDR_ANY) {
 		struct net_device *dev = NULL;
@@ -272,13 +273,13 @@ static int rawv6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len)
 
 			/* Binding to link-local address requires an interface */
 			if (!sk->sk_bound_dev_if)
-				goto out;
+				goto out_unlock;
 
-			dev = dev_get_by_index(sock_net(sk), sk->sk_bound_dev_if);
-			if (!dev) {
-				err = -ENODEV;
-				goto out;
-			}
+			err = -ENODEV;
+			dev = dev_get_by_index_rcu(sock_net(sk),
+						   sk->sk_bound_dev_if);
+			if (!dev)
+				goto out_unlock;
 		}
 
 		/* ipv4 addr of the socket is invalid.  Only the
@@ -289,13 +290,9 @@ static int rawv6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len)
 			err = -EADDRNOTAVAIL;
 			if (!ipv6_chk_addr(sock_net(sk), &addr->sin6_addr,
 					   dev, 0)) {
-				if (dev)
-					dev_put(dev);
-				goto out;
+				goto out_unlock;
 			}
 		}
-		if (dev)
-			dev_put(dev);
 	}
 
 	inet->inet_rcv_saddr = inet->inet_saddr = v4addr;
@@ -303,6 +300,8 @@ static int rawv6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len)
 	if (!(addr_type & IPV6_ADDR_MULTICAST))
 		ipv6_addr_copy(&np->saddr, &addr->sin6_addr);
 	err = 0;
+out_unlock:
+	rcu_read_unlock();
 out:
 	release_sock(sk);
 	return err;
-- 
cgit v1.2.3


From d4cada4ae1c012815f95fa507eb86a0ae9d607d7 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Sun, 8 Nov 2009 10:17:30 +0000
Subject: udp: split sk_hash into two u16 hashes

Union sk_hash with two u16 hashes for udp (no extra memory taken)

One 16 bits hash on (local port) value (the previous udp 'hash')

One 16 bits hash on (local address, local port) values, initialized
but not yet used. This second hash is using jenkin hash for better
distribution.

Because the 'port' is xored later, a partial hash is performed
on local address + net_hash_mix(net)

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/udp.c | 27 +++++++++++++++++++++++++--
 1 file changed, 25 insertions(+), 2 deletions(-)

(limited to 'net/ipv6')

diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 5bc7cdbf030..1e5fadd997b 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -81,8 +81,30 @@ int ipv6_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2)
 	return 0;
 }
 
+static unsigned int udp6_portaddr_hash(struct net *net,
+				       const struct in6_addr *addr6,
+				       unsigned int port)
+{
+	unsigned int hash, mix = net_hash_mix(net);
+
+	if (ipv6_addr_any(addr6))
+		hash = jhash_1word(0, mix);
+	else if (ipv6_addr_type(addr6) == IPV6_ADDR_MAPPED)
+		hash = jhash_1word(addr6->s6_addr32[3], mix);
+	else
+		hash = jhash2(addr6->s6_addr32, 4, mix);
+
+	return hash ^ port;
+}
+
+
 int udp_v6_get_port(struct sock *sk, unsigned short snum)
 {
+	/* precompute partial secondary hash */
+	udp_sk(sk)->udp_portaddr_hash =
+		udp6_portaddr_hash(sock_net(sk),
+				   &inet6_sk(sk)->rcv_saddr,
+				   0);
 	return udp_lib_get_port(sk, snum, ipv6_rcv_saddr_equal);
 }
 
@@ -94,7 +116,7 @@ static inline int compute_score(struct sock *sk, struct net *net,
 {
 	int score = -1;
 
-	if (net_eq(sock_net(sk), net) && sk->sk_hash == hnum &&
+	if (net_eq(sock_net(sk), net) && udp_sk(sk)->udp_port_hash == hnum &&
 			sk->sk_family == PF_INET6) {
 		struct ipv6_pinfo *np = inet6_sk(sk);
 		struct inet_sock *inet = inet_sk(sk);
@@ -415,7 +437,8 @@ static struct sock *udp_v6_mcast_next(struct net *net, struct sock *sk,
 		if (!net_eq(sock_net(s), net))
 			continue;
 
-		if (s->sk_hash == num && s->sk_family == PF_INET6) {
+		if (udp_sk(s)->udp_port_hash == num &&
+		    s->sk_family == PF_INET6) {
 			struct ipv6_pinfo *np = inet6_sk(s);
 			if (inet->inet_dport) {
 				if (inet->inet_dport != rmt_port)
-- 
cgit v1.2.3


From fddc17defa22d8caba1cdfb2e22b50bb4b9f35c0 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Sun, 8 Nov 2009 10:18:30 +0000
Subject: ipv6: udp: optimize unicast RX path

We first locate the (local port) hash chain head
If few sockets are in this chain, we proceed with previous lookup algo.

If too many sockets are listed, we take a look at the secondary
(port, address) hash chain.

We choose the shortest chain and proceed with a RCU lookup on the elected chain.

But, if we chose (port, address) chain, and fail to find a socket on given address,
 we must try another lookup on (port, in6addr_any) chain to find sockets not bound
to a particular IP.

-> No extra cost for typical setups, where the first lookup will probabbly
be performed.

RCU lookups everywhere, we dont acquire spinlock.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/udp.c | 112 +++++++++++++++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 109 insertions(+), 3 deletions(-)

(limited to 'net/ipv6')

diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 1e5fadd997b..f580cf92511 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -146,6 +146,88 @@ static inline int compute_score(struct sock *sk, struct net *net,
 	return score;
 }
 
+#define SCORE2_MAX (1 + 1 + 1)
+static inline int compute_score2(struct sock *sk, struct net *net,
+				const struct in6_addr *saddr, __be16 sport,
+				const struct in6_addr *daddr, unsigned short hnum,
+				int dif)
+{
+	int score = -1;
+
+	if (net_eq(sock_net(sk), net) && udp_sk(sk)->udp_port_hash == hnum &&
+			sk->sk_family == PF_INET6) {
+		struct ipv6_pinfo *np = inet6_sk(sk);
+		struct inet_sock *inet = inet_sk(sk);
+
+		if (!ipv6_addr_equal(&np->rcv_saddr, daddr))
+			return -1;
+		score = 0;
+		if (inet->inet_dport) {
+			if (inet->inet_dport != sport)
+				return -1;
+			score++;
+		}
+		if (!ipv6_addr_any(&np->daddr)) {
+			if (!ipv6_addr_equal(&np->daddr, saddr))
+				return -1;
+			score++;
+		}
+		if (sk->sk_bound_dev_if) {
+			if (sk->sk_bound_dev_if != dif)
+				return -1;
+			score++;
+		}
+	}
+	return score;
+}
+
+#define udp_portaddr_for_each_entry_rcu(__sk, node, list) \
+	hlist_nulls_for_each_entry_rcu(__sk, node, list, __sk_common.skc_portaddr_node)
+
+/* called with read_rcu_lock() */
+static struct sock *udp6_lib_lookup2(struct net *net,
+		const struct in6_addr *saddr, __be16 sport,
+		const struct in6_addr *daddr, unsigned int hnum, int dif,
+		struct udp_hslot *hslot2, unsigned int slot2)
+{
+	struct sock *sk, *result;
+	struct hlist_nulls_node *node;
+	int score, badness;
+
+begin:
+	result = NULL;
+	badness = -1;
+	udp_portaddr_for_each_entry_rcu(sk, node, &hslot2->head) {
+		score = compute_score2(sk, net, saddr, sport,
+				      daddr, hnum, dif);
+		if (score > badness) {
+			result = sk;
+			badness = score;
+			if (score == SCORE2_MAX)
+				goto exact_match;
+		}
+	}
+	/*
+	 * if the nulls value we got at the end of this lookup is
+	 * not the expected one, we must restart lookup.
+	 * We probably met an item that was moved to another chain.
+	 */
+	if (get_nulls_value(node) != slot2)
+		goto begin;
+
+	if (result) {
+exact_match:
+		if (unlikely(!atomic_inc_not_zero(&result->sk_refcnt)))
+			result = NULL;
+		else if (unlikely(compute_score2(result, net, saddr, sport,
+				  daddr, hnum, dif) < badness)) {
+			sock_put(result);
+			goto begin;
+		}
+	}
+	return result;
+}
+
 static struct sock *__udp6_lib_lookup(struct net *net,
 				      struct in6_addr *saddr, __be16 sport,
 				      struct in6_addr *daddr, __be16 dport,
@@ -154,11 +236,35 @@ static struct sock *__udp6_lib_lookup(struct net *net,
 	struct sock *sk, *result;
 	struct hlist_nulls_node *node;
 	unsigned short hnum = ntohs(dport);
-	unsigned int hash = udp_hashfn(net, hnum, udptable->mask);
-	struct udp_hslot *hslot = &udptable->hash[hash];
+	unsigned int hash2, slot2, slot = udp_hashfn(net, hnum, udptable->mask);
+	struct udp_hslot *hslot2, *hslot = &udptable->hash[slot];
 	int score, badness;
 
 	rcu_read_lock();
+	if (hslot->count > 10) {
+		hash2 = udp6_portaddr_hash(net, daddr, hnum);
+		slot2 = hash2 & udptable->mask;
+		hslot2 = &udptable->hash2[slot2];
+		if (hslot->count < hslot2->count)
+			goto begin;
+
+		result = udp6_lib_lookup2(net, saddr, sport,
+					  daddr, hnum, dif,
+					  hslot2, slot2);
+		if (!result) {
+			hash2 = udp6_portaddr_hash(net, &in6addr_any, hnum);
+			slot2 = hash2 & udptable->mask;
+			hslot2 = &udptable->hash2[slot2];
+			if (hslot->count < hslot2->count)
+				goto begin;
+
+			result = udp6_lib_lookup2(net, &in6addr_any, sport,
+						  daddr, hnum, dif,
+						  hslot2, slot2);
+		}
+		rcu_read_unlock();
+		return result;
+	}
 begin:
 	result = NULL;
 	badness = -1;
@@ -174,7 +280,7 @@ begin:
 	 * not the expected one, we must restart lookup.
 	 * We probably met an item that was moved to another chain.
 	 */
-	if (get_nulls_value(node) != hash)
+	if (get_nulls_value(node) != slot)
 		goto begin;
 
 	if (result) {
-- 
cgit v1.2.3


From a1ab77f97ed03f5dae66ae4c64375beffab83772 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Sun, 8 Nov 2009 10:18:52 +0000
Subject: ipv6: udp: Optimise multicast reception

IPV6 UDP multicast rx path is a bit complex and can hold a spinlock
for a long time.

Using a small (32 or 64 entries) stack of socket pointers can help
to perform expensive operations (skb_clone(), udp_queue_rcv_skb())
outside of the lock, in most cases.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/udp.c | 71 ++++++++++++++++++++++++++++++++++++++--------------------
 1 file changed, 47 insertions(+), 24 deletions(-)

(limited to 'net/ipv6')

diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index f580cf92511..948e823d70c 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -569,6 +569,27 @@ static struct sock *udp_v6_mcast_next(struct net *net, struct sock *sk,
 	return NULL;
 }
 
+static void flush_stack(struct sock **stack, unsigned int count,
+			struct sk_buff *skb, unsigned int final)
+{
+	unsigned int i;
+	struct sock *sk;
+	struct sk_buff *skb1;
+
+	for (i = 0; i < count; i++) {
+		skb1 = (i == final) ? skb : skb_clone(skb, GFP_ATOMIC);
+
+		if (skb1) {
+			sk = stack[i];
+			bh_lock_sock(sk);
+			if (!sock_owned_by_user(sk))
+				udpv6_queue_rcv_skb(sk, skb1);
+			else
+				sk_add_backlog(sk, skb1);
+			bh_unlock_sock(sk);
+		}
+	}
+}
 /*
  * Note: called only from the BH handler context,
  * so we don't need to lock the hashes.
@@ -577,41 +598,43 @@ static int __udp6_lib_mcast_deliver(struct net *net, struct sk_buff *skb,
 		struct in6_addr *saddr, struct in6_addr *daddr,
 		struct udp_table *udptable)
 {
-	struct sock *sk, *sk2;
+	struct sock *sk, *stack[256 / sizeof(struct sock *)];
 	const struct udphdr *uh = udp_hdr(skb);
 	struct udp_hslot *hslot = udp_hashslot(udptable, net, ntohs(uh->dest));
 	int dif;
+	unsigned int i, count = 0;
 
 	spin_lock(&hslot->lock);
 	sk = sk_nulls_head(&hslot->head);
 	dif = inet6_iif(skb);
 	sk = udp_v6_mcast_next(net, sk, uh->dest, daddr, uh->source, saddr, dif);
-	if (!sk) {
-		kfree_skb(skb);
-		goto out;
-	}
-
-	sk2 = sk;
-	while ((sk2 = udp_v6_mcast_next(net, sk_nulls_next(sk2), uh->dest, daddr,
-					uh->source, saddr, dif))) {
-		struct sk_buff *buff = skb_clone(skb, GFP_ATOMIC);
-		if (buff) {
-			bh_lock_sock(sk2);
-			if (!sock_owned_by_user(sk2))
-				udpv6_queue_rcv_skb(sk2, buff);
-			else
-				sk_add_backlog(sk2, buff);
-			bh_unlock_sock(sk2);
+	while (sk) {
+		stack[count++] = sk;
+		sk = udp_v6_mcast_next(net, sk_nulls_next(sk), uh->dest, daddr,
+				       uh->source, saddr, dif);
+		if (unlikely(count == ARRAY_SIZE(stack))) {
+			if (!sk)
+				break;
+			flush_stack(stack, count, skb, ~0);
+			count = 0;
 		}
 	}
-	bh_lock_sock(sk);
-	if (!sock_owned_by_user(sk))
-		udpv6_queue_rcv_skb(sk, skb);
-	else
-		sk_add_backlog(sk, skb);
-	bh_unlock_sock(sk);
-out:
+	/*
+	 * before releasing the lock, we must take reference on sockets
+	 */
+	for (i = 0; i < count; i++)
+		sock_hold(stack[i]);
+
 	spin_unlock(&hslot->lock);
+
+	if (count) {
+		flush_stack(stack, count, skb, count - 1);
+
+		for (i = 0; i < count; i++)
+			sock_put(stack[i]);
+	} else {
+		kfree_skb(skb);
+	}
 	return 0;
 }
 
-- 
cgit v1.2.3


From f6b8f32ca71406de718391369490f6b1e81fe0bb Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Sun, 8 Nov 2009 10:20:19 +0000
Subject: udp: multicast RX should increment SNMP/sk_drops counter in
 allocation failures

When skb_clone() fails, we should increment sk_drops and SNMP counters.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/udp.c | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

(limited to 'net/ipv6')

diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 948e823d70c..2915e1dad72 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -579,14 +579,20 @@ static void flush_stack(struct sock **stack, unsigned int count,
 	for (i = 0; i < count; i++) {
 		skb1 = (i == final) ? skb : skb_clone(skb, GFP_ATOMIC);
 
+		sk = stack[i];
 		if (skb1) {
-			sk = stack[i];
 			bh_lock_sock(sk);
 			if (!sock_owned_by_user(sk))
 				udpv6_queue_rcv_skb(sk, skb1);
 			else
 				sk_add_backlog(sk, skb1);
 			bh_unlock_sock(sk);
+		} else {
+			atomic_inc(&sk->sk_drops);
+			UDP6_INC_STATS_BH(sock_net(sk),
+					UDP_MIB_RCVBUFERRORS, IS_UDPLITE(sk));
+			UDP6_INC_STATS_BH(sock_net(sk),
+					UDP_MIB_INERRORS, IS_UDPLITE(sk));
 		}
 	}
 }
-- 
cgit v1.2.3


From 30fff9231fad757c061285e347b33c5149c2c2e4 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Mon, 9 Nov 2009 05:26:33 +0000
Subject: udp: bind() optimisation

UDP bind() can be O(N^2) in some pathological cases.

Thanks to secondary hash tables, we can make it O(N)

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/udp.c | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

(limited to 'net/ipv6')

diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 2915e1dad72..f4c85b20005 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -100,12 +100,14 @@ static unsigned int udp6_portaddr_hash(struct net *net,
 
 int udp_v6_get_port(struct sock *sk, unsigned short snum)
 {
+	unsigned int hash2_nulladdr =
+		udp6_portaddr_hash(sock_net(sk), &in6addr_any, snum);
+	unsigned int hash2_partial = 
+		udp6_portaddr_hash(sock_net(sk), &inet6_sk(sk)->rcv_saddr, 0);
+
 	/* precompute partial secondary hash */
-	udp_sk(sk)->udp_portaddr_hash =
-		udp6_portaddr_hash(sock_net(sk),
-				   &inet6_sk(sk)->rcv_saddr,
-				   0);
-	return udp_lib_get_port(sk, snum, ipv6_rcv_saddr_equal);
+	udp_sk(sk)->udp_portaddr_hash = hash2_partial;
+	return udp_lib_get_port(sk, snum, ipv6_rcv_saddr_equal, hash2_nulladdr);
 }
 
 static inline int compute_score(struct sock *sk, struct net *net,
@@ -181,8 +183,6 @@ static inline int compute_score2(struct sock *sk, struct net *net,
 	return score;
 }
 
-#define udp_portaddr_for_each_entry_rcu(__sk, node, list) \
-	hlist_nulls_for_each_entry_rcu(__sk, node, list, __sk_common.skc_portaddr_node)
 
 /* called with read_rcu_lock() */
 static struct sock *udp6_lib_lookup2(struct net *net,
-- 
cgit v1.2.3


From 84d2697d9649339215675551eae28ba04068dea1 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Mon, 9 Nov 2009 12:11:28 +0000
Subject: ipv6: speedup inet6_dump_ifinfo()

When handling large number of netdevice, inet6_dump_ifinfo()
is very slow because it has O(N^2) complexity.

Instead of scanning one single list, we can use the 256 sub lists
of the dev_index hash table, and RCU lookups.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/addrconf.c | 45 ++++++++++++++++++++++++++++-----------------
 1 file changed, 28 insertions(+), 17 deletions(-)

(limited to 'net/ipv6')

diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 024bba30de2..f9f7fd6ee1f 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -3823,28 +3823,39 @@ nla_put_failure:
 static int inet6_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
 {
 	struct net *net = sock_net(skb->sk);
-	int idx, err;
-	int s_idx = cb->args[0];
+	int h, s_h;
+	int idx = 0, err, s_idx;
 	struct net_device *dev;
 	struct inet6_dev *idev;
+	struct hlist_head *head;
+	struct hlist_node *node;
 
-	read_lock(&dev_base_lock);
-	idx = 0;
-	for_each_netdev(net, dev) {
-		if (idx < s_idx)
-			goto cont;
-		if ((idev = in6_dev_get(dev)) == NULL)
-			goto cont;
-		err = inet6_fill_ifinfo(skb, idev, NETLINK_CB(cb->skb).pid,
-				cb->nlh->nlmsg_seq, RTM_NEWLINK, NLM_F_MULTI);
-		in6_dev_put(idev);
-		if (err <= 0)
-			break;
+	s_h = cb->args[0];
+	s_idx = cb->args[1];
+
+	rcu_read_lock();
+	for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
+		idx = 0;
+		head = &net->dev_index_head[h];
+		hlist_for_each_entry_rcu(dev, node, head, index_hlist) {
+			if (idx < s_idx)
+				goto cont;
+			idev = __in6_dev_get(dev);
+			if (!idev)
+				goto cont;
+			if (inet6_fill_ifinfo(skb, idev,
+					      NETLINK_CB(cb->skb).pid,
+					      cb->nlh->nlmsg_seq,
+					      RTM_NEWLINK, NLM_F_MULTI) <= 0)
+				goto out;
 cont:
-		idx++;
+			idx++;
+		}
 	}
-	read_unlock(&dev_base_lock);
-	cb->args[0] = idx;
+out:
+	rcu_read_unlock();
+	cb->args[1] = idx;
+	cb->args[0] = h;
 
 	return skb->len;
 }
-- 
cgit v1.2.3


From bcd323262a94b14b84341982b90443a76a477861 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Mon, 9 Nov 2009 07:40:17 +0000
Subject: ipv6: Allow inet6_dump_addr() to handle more than 64 addresses

Apparently, inet6_dump_addr() is not able to handle more than
64 ipv6 addresses per device. We must break from inner loops
in case skb is full, or else cursor is put at the end of list.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/addrconf.c | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'net/ipv6')

diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index f9f7fd6ee1f..0ab39fedd2d 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -3519,6 +3519,8 @@ static int inet6_dump_addr(struct sk_buff *skb, struct netlink_callback *cb,
 							cb->nlh->nlmsg_seq,
 							RTM_NEWADDR,
 							NLM_F_MULTI);
+				if (err <= 0)
+					break;
 			}
 			break;
 		case MULTICAST_ADDR:
@@ -3532,6 +3534,8 @@ static int inet6_dump_addr(struct sk_buff *skb, struct netlink_callback *cb,
 							  cb->nlh->nlmsg_seq,
 							  RTM_GETMULTICAST,
 							  NLM_F_MULTI);
+				if (err <= 0)
+					break;
 			}
 			break;
 		case ANYCAST_ADDR:
@@ -3545,6 +3549,8 @@ static int inet6_dump_addr(struct sk_buff *skb, struct netlink_callback *cb,
 							  cb->nlh->nlmsg_seq,
 							  RTM_GETANYCAST,
 							  NLM_F_MULTI);
+				if (err <= 0)
+					break;
 			}
 			break;
 		default:
-- 
cgit v1.2.3


From 292f4f3ce4b57f17a667cb34c72bca081dcc0281 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Mon, 9 Nov 2009 08:42:01 +0000
Subject: sit: Clean up DF code by copying from IPIP

This patch rearranges the SIT DF bit handling using the new IPIP DF
code.  The only externally visible effect should be the case where
PMTU is enabled and the MTU is exactly 1280 bytes.  In this case the
previous code would send packets out with DF off while the new code
would set the DF bit.  This is inline with RFC 4213.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>

Thanks,
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/sit.c | 42 +++++++++++++++++++++---------------------
 1 file changed, 21 insertions(+), 21 deletions(-)

(limited to 'net/ipv6')

diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index 2362a3397e9..b6e145a673a 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -637,6 +637,7 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb,
 	struct iphdr  *tiph = &tunnel->parms.iph;
 	struct ipv6hdr *iph6 = ipv6_hdr(skb);
 	u8     tos = tunnel->parms.iph.tos;
+	__be16 df = tiph->frag_off;
 	struct rtable *rt;     			/* Route to the other host */
 	struct net_device *tdev;			/* Device to other host */
 	struct iphdr  *iph;			/* Our new IP header */
@@ -726,25 +727,28 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb,
 		goto tx_error;
 	}
 
-	if (tiph->frag_off)
+	if (df) {
 		mtu = dst_mtu(&rt->u.dst) - sizeof(struct iphdr);
-	else
-		mtu = skb_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu;
 
-	if (mtu < 68) {
-		stats->collisions++;
-		ip_rt_put(rt);
-		goto tx_error;
-	}
-	if (mtu < IPV6_MIN_MTU)
-		mtu = IPV6_MIN_MTU;
-	if (tunnel->parms.iph.daddr && skb_dst(skb))
-		skb_dst(skb)->ops->update_pmtu(skb_dst(skb), mtu);
+		if (mtu < 68) {
+			stats->collisions++;
+			ip_rt_put(rt);
+			goto tx_error;
+		}
 
-	if (skb->len > mtu) {
-		icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, dev);
-		ip_rt_put(rt);
-		goto tx_error;
+		if (mtu < IPV6_MIN_MTU) {
+			mtu = IPV6_MIN_MTU;
+			df = 0;
+		}
+
+		if (tunnel->parms.iph.daddr && skb_dst(skb))
+			skb_dst(skb)->ops->update_pmtu(skb_dst(skb), mtu);
+
+		if (skb->len > mtu) {
+			icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, dev);
+			ip_rt_put(rt);
+			goto tx_error;
+		}
 	}
 
 	if (tunnel->err_count > 0) {
@@ -792,11 +796,7 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb,
 	iph 			=	ip_hdr(skb);
 	iph->version		=	4;
 	iph->ihl		=	sizeof(struct iphdr)>>2;
-	if (mtu > IPV6_MIN_MTU)
-		iph->frag_off	=	tiph->frag_off;
-	else
-		iph->frag_off	=	0;
-
+	iph->frag_off		=	df;
 	iph->protocol		=	IPPROTO_IPV6;
 	iph->tos		=	INET_ECN_encapsulate(tos, ipv6_get_dsfield(iph6));
 	iph->daddr		=	rt->rt_dst;
-- 
cgit v1.2.3


From 856540ee3116ac04a49bc06c2f30f54dd3faf7db Mon Sep 17 00:00:00 2001
From: Brian Haley <brian.haley@hp.com>
Date: Mon, 9 Nov 2009 12:05:53 +0000
Subject: IPv6: use ipv6_addr_v4mapped()

Change udp6_portaddr_hash() to use ipv6_addr_v4mapped()
inline instead of ipv6_addr_type().

Signed-off-by: Brian Haley <brian.haley@hp.com>
Acked-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/udp.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net/ipv6')

diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index f4c85b20005..69ebdbe78c4 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -89,7 +89,7 @@ static unsigned int udp6_portaddr_hash(struct net *net,
 
 	if (ipv6_addr_any(addr6))
 		hash = jhash_1word(0, mix);
-	else if (ipv6_addr_type(addr6) == IPV6_ADDR_MAPPED)
+	else if (ipv6_addr_v4mapped(addr6))
 		hash = jhash_1word(addr6->s6_addr32[3], mix);
 	else
 		hash = jhash2(addr6->s6_addr32, 4, mix);
-- 
cgit v1.2.3


From 434a8a58d75faa7170807a7ac2fcf7f3d85a0dc3 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Wed, 11 Nov 2009 18:53:00 -0800
Subject: ipv6: Remove unused var in inet6_dump_ifinfo()

Reported by Stephen Rothwell:

--------------------
Today's linux-next build (x86_64 allmodconfig) produced this warning:

net/ipv6/addrconf.c: In function 'inet6_dump_ifinfo':
net/ipv6/addrconf.c:3833: warning: unused variable 'err'

Introduced by commit 84d2697d9649339215675551eae28ba04068dea1 ("ipv6:
speedup inet6_dump_ifinfo()").
--------------------

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/addrconf.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net/ipv6')

diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 0ab39fedd2d..9ff8ab9a154 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -3830,7 +3830,7 @@ static int inet6_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
 {
 	struct net *net = sock_net(skb->sk);
 	int h, s_h;
-	int idx = 0, err, s_idx;
+	int idx = 0, s_idx;
 	struct net_device *dev;
 	struct inet6_dev *idev;
 	struct hlist_head *head;
-- 
cgit v1.2.3


From bee7ca9ec03a26676ea2b1c28dc4039348eff3e1 Mon Sep 17 00:00:00 2001
From: William Allen Simpson <william.allen.simpson@gmail.com>
Date: Tue, 10 Nov 2009 09:51:18 +0000
Subject: net: TCP_MSS_DEFAULT, TCP_MSS_DESIRED

Define two symbols needed in both kernel and user space.

Remove old (somewhat incorrect) kernel variant that wasn't used in
most cases.  Default should apply to both RMSS and SMSS (RFC2581).

Replace numeric constants with defined symbols.

Stand-alone patch, originally developed for TCPCT.

Signed-off-by: William.Allen.Simpson@gmail.com
Acked-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/tcp_ipv6.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net/ipv6')

diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 696a22f034e..de709091b26 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -1851,7 +1851,7 @@ static int tcp_v6_init_sock(struct sock *sk)
 	 */
 	tp->snd_ssthresh = TCP_INFINITE_SSTHRESH;
 	tp->snd_cwnd_clamp = ~0;
-	tp->mss_cache = 536;
+	tp->mss_cache = TCP_MSS_DEFAULT;
 
 	tp->reordering = sysctl_tcp_reordering;
 
-- 
cgit v1.2.3


From ce81b76a39835a721cd168e0c0bcfe7132f1f66b Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Wed, 11 Nov 2009 17:34:30 +0000
Subject: ipv6: use RCU to walk list of network devices

No longer need read_lock(&dev_base_lock), use RCU instead.
We also can avoid taking references on inet6_dev structs.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/anycast.c | 29 ++++++++++++++---------------
 net/ipv6/mcast.c   | 51 +++++++++++++++++++++++----------------------------
 2 files changed, 37 insertions(+), 43 deletions(-)

(limited to 'net/ipv6')

diff --git a/net/ipv6/anycast.c b/net/ipv6/anycast.c
index 2f00ca83f04..f1c74c8ef9d 100644
--- a/net/ipv6/anycast.c
+++ b/net/ipv6/anycast.c
@@ -431,9 +431,9 @@ static inline struct ifacaddr6 *ac6_get_first(struct seq_file *seq)
 	struct net *net = seq_file_net(seq);
 
 	state->idev = NULL;
-	for_each_netdev(net, state->dev) {
+	for_each_netdev_rcu(net, state->dev) {
 		struct inet6_dev *idev;
-		idev = in6_dev_get(state->dev);
+		idev = __in6_dev_get(state->dev);
 		if (!idev)
 			continue;
 		read_lock_bh(&idev->lock);
@@ -443,7 +443,6 @@ static inline struct ifacaddr6 *ac6_get_first(struct seq_file *seq)
 			break;
 		}
 		read_unlock_bh(&idev->lock);
-		in6_dev_put(idev);
 	}
 	return im;
 }
@@ -454,16 +453,15 @@ static struct ifacaddr6 *ac6_get_next(struct seq_file *seq, struct ifacaddr6 *im
 
 	im = im->aca_next;
 	while (!im) {
-		if (likely(state->idev != NULL)) {
+		if (likely(state->idev != NULL))
 			read_unlock_bh(&state->idev->lock);
-			in6_dev_put(state->idev);
-		}
-		state->dev = next_net_device(state->dev);
+
+		state->dev = next_net_device_rcu(state->dev);
 		if (!state->dev) {
 			state->idev = NULL;
 			break;
 		}
-		state->idev = in6_dev_get(state->dev);
+		state->idev = __in6_dev_get(state->dev);
 		if (!state->idev)
 			continue;
 		read_lock_bh(&state->idev->lock);
@@ -482,29 +480,30 @@ static struct ifacaddr6 *ac6_get_idx(struct seq_file *seq, loff_t pos)
 }
 
 static void *ac6_seq_start(struct seq_file *seq, loff_t *pos)
-	__acquires(dev_base_lock)
+	__acquires(RCU)
 {
-	read_lock(&dev_base_lock);
+	rcu_read_lock();
 	return ac6_get_idx(seq, *pos);
 }
 
 static void *ac6_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 {
-	struct ifacaddr6 *im;
-	im = ac6_get_next(seq, v);
+	struct ifacaddr6 *im = ac6_get_next(seq, v);
+
 	++*pos;
 	return im;
 }
 
 static void ac6_seq_stop(struct seq_file *seq, void *v)
-	__releases(dev_base_lock)
+	__releases(RCU)
 {
 	struct ac6_iter_state *state = ac6_seq_private(seq);
+
 	if (likely(state->idev != NULL)) {
 		read_unlock_bh(&state->idev->lock);
-		in6_dev_put(state->idev);
+		state->idev = NULL;
 	}
-	read_unlock(&dev_base_lock);
+	rcu_read_unlock();
 }
 
 static int ac6_seq_show(struct seq_file *seq, void *v)
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index f9fcf690bd5..1f9c44442e6 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -2375,9 +2375,9 @@ static inline struct ifmcaddr6 *igmp6_mc_get_first(struct seq_file *seq)
 	struct net *net = seq_file_net(seq);
 
 	state->idev = NULL;
-	for_each_netdev(net, state->dev) {
+	for_each_netdev_rcu(net, state->dev) {
 		struct inet6_dev *idev;
-		idev = in6_dev_get(state->dev);
+		idev = __in6_dev_get(state->dev);
 		if (!idev)
 			continue;
 		read_lock_bh(&idev->lock);
@@ -2387,7 +2387,6 @@ static inline struct ifmcaddr6 *igmp6_mc_get_first(struct seq_file *seq)
 			break;
 		}
 		read_unlock_bh(&idev->lock);
-		in6_dev_put(idev);
 	}
 	return im;
 }
@@ -2398,16 +2397,15 @@ static struct ifmcaddr6 *igmp6_mc_get_next(struct seq_file *seq, struct ifmcaddr
 
 	im = im->next;
 	while (!im) {
-		if (likely(state->idev != NULL)) {
+		if (likely(state->idev != NULL))
 			read_unlock_bh(&state->idev->lock);
-			in6_dev_put(state->idev);
-		}
-		state->dev = next_net_device(state->dev);
+
+		state->dev = next_net_device_rcu(state->dev);
 		if (!state->dev) {
 			state->idev = NULL;
 			break;
 		}
-		state->idev = in6_dev_get(state->dev);
+		state->idev = __in6_dev_get(state->dev);
 		if (!state->idev)
 			continue;
 		read_lock_bh(&state->idev->lock);
@@ -2426,31 +2424,31 @@ static struct ifmcaddr6 *igmp6_mc_get_idx(struct seq_file *seq, loff_t pos)
 }
 
 static void *igmp6_mc_seq_start(struct seq_file *seq, loff_t *pos)
-	__acquires(dev_base_lock)
+	__acquires(RCU)
 {
-	read_lock(&dev_base_lock);
+	rcu_read_lock();
 	return igmp6_mc_get_idx(seq, *pos);
 }
 
 static void *igmp6_mc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 {
-	struct ifmcaddr6 *im;
-	im = igmp6_mc_get_next(seq, v);
+	struct ifmcaddr6 *im = igmp6_mc_get_next(seq, v);
+
 	++*pos;
 	return im;
 }
 
 static void igmp6_mc_seq_stop(struct seq_file *seq, void *v)
-	__releases(dev_base_lock)
+	__releases(RCU)
 {
 	struct igmp6_mc_iter_state *state = igmp6_mc_seq_private(seq);
+
 	if (likely(state->idev != NULL)) {
 		read_unlock_bh(&state->idev->lock);
-		in6_dev_put(state->idev);
 		state->idev = NULL;
 	}
 	state->dev = NULL;
-	read_unlock(&dev_base_lock);
+	rcu_read_unlock();
 }
 
 static int igmp6_mc_seq_show(struct seq_file *seq, void *v)
@@ -2507,9 +2505,9 @@ static inline struct ip6_sf_list *igmp6_mcf_get_first(struct seq_file *seq)
 
 	state->idev = NULL;
 	state->im = NULL;
-	for_each_netdev(net, state->dev) {
+	for_each_netdev_rcu(net, state->dev) {
 		struct inet6_dev *idev;
-		idev = in6_dev_get(state->dev);
+		idev = __in6_dev_get(state->dev);
 		if (unlikely(idev == NULL))
 			continue;
 		read_lock_bh(&idev->lock);
@@ -2525,7 +2523,6 @@ static inline struct ip6_sf_list *igmp6_mcf_get_first(struct seq_file *seq)
 			spin_unlock_bh(&im->mca_lock);
 		}
 		read_unlock_bh(&idev->lock);
-		in6_dev_put(idev);
 	}
 	return psf;
 }
@@ -2539,16 +2536,15 @@ static struct ip6_sf_list *igmp6_mcf_get_next(struct seq_file *seq, struct ip6_s
 		spin_unlock_bh(&state->im->mca_lock);
 		state->im = state->im->next;
 		while (!state->im) {
-			if (likely(state->idev != NULL)) {
+			if (likely(state->idev != NULL))
 				read_unlock_bh(&state->idev->lock);
-				in6_dev_put(state->idev);
-			}
-			state->dev = next_net_device(state->dev);
+
+			state->dev = next_net_device_rcu(state->dev);
 			if (!state->dev) {
 				state->idev = NULL;
 				goto out;
 			}
-			state->idev = in6_dev_get(state->dev);
+			state->idev = __in6_dev_get(state->dev);
 			if (!state->idev)
 				continue;
 			read_lock_bh(&state->idev->lock);
@@ -2573,9 +2569,9 @@ static struct ip6_sf_list *igmp6_mcf_get_idx(struct seq_file *seq, loff_t pos)
 }
 
 static void *igmp6_mcf_seq_start(struct seq_file *seq, loff_t *pos)
-	__acquires(dev_base_lock)
+	__acquires(RCU)
 {
-	read_lock(&dev_base_lock);
+	rcu_read_lock();
 	return *pos ? igmp6_mcf_get_idx(seq, *pos - 1) : SEQ_START_TOKEN;
 }
 
@@ -2591,7 +2587,7 @@ static void *igmp6_mcf_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 }
 
 static void igmp6_mcf_seq_stop(struct seq_file *seq, void *v)
-	__releases(dev_base_lock)
+	__releases(RCU)
 {
 	struct igmp6_mcf_iter_state *state = igmp6_mcf_seq_private(seq);
 	if (likely(state->im != NULL)) {
@@ -2600,11 +2596,10 @@ static void igmp6_mcf_seq_stop(struct seq_file *seq, void *v)
 	}
 	if (likely(state->idev != NULL)) {
 		read_unlock_bh(&state->idev->lock);
-		in6_dev_put(state->idev);
 		state->idev = NULL;
 	}
 	state->dev = NULL;
-	read_unlock(&dev_base_lock);
+	rcu_read_unlock();
 }
 
 static int igmp6_mcf_seq_show(struct seq_file *seq, void *v)
-- 
cgit v1.2.3


From 234b27c3fd58fc0e15c04dd0fbf4337fac9c2a06 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Thu, 12 Nov 2009 04:11:50 +0000
Subject: ipv6: speedup inet6_dump_addr()

When handling large number of netdevices, inet6_dump_addr()
is very slow because it has O(N^2) complexity.

Instead of scanning one single list, we can use the NETDEV_HASHENTRIES
sub lists of the dev_index hash table, and RCU lookups.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/addrconf.c | 171 +++++++++++++++++++++++++++++-----------------------
 1 file changed, 97 insertions(+), 74 deletions(-)

(limited to 'net/ipv6')

diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 9ff8ab9a154..522bdc77206 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -3481,91 +3481,114 @@ enum addr_type_t
 	ANYCAST_ADDR,
 };
 
+/* called with rcu_read_lock() */
+static int in6_dump_addrs(struct inet6_dev *idev, struct sk_buff *skb,
+			  struct netlink_callback *cb, enum addr_type_t type,
+			  int s_ip_idx, int *p_ip_idx)
+{
+	struct inet6_ifaddr *ifa;
+	struct ifmcaddr6 *ifmca;
+	struct ifacaddr6 *ifaca;
+	int err = 1;
+	int ip_idx = *p_ip_idx;
+
+	read_lock_bh(&idev->lock);
+	switch (type) {
+	case UNICAST_ADDR:
+		/* unicast address incl. temp addr */
+		for (ifa = idev->addr_list; ifa;
+		     ifa = ifa->if_next, ip_idx++) {
+			if (ip_idx < s_ip_idx)
+				continue;
+			err = inet6_fill_ifaddr(skb, ifa,
+						NETLINK_CB(cb->skb).pid,
+						cb->nlh->nlmsg_seq,
+						RTM_NEWADDR,
+						NLM_F_MULTI);
+			if (err <= 0)
+				break;
+		}
+		break;
+	case MULTICAST_ADDR:
+		/* multicast address */
+		for (ifmca = idev->mc_list; ifmca;
+		     ifmca = ifmca->next, ip_idx++) {
+			if (ip_idx < s_ip_idx)
+				continue;
+			err = inet6_fill_ifmcaddr(skb, ifmca,
+						  NETLINK_CB(cb->skb).pid,
+						  cb->nlh->nlmsg_seq,
+						  RTM_GETMULTICAST,
+						  NLM_F_MULTI);
+			if (err <= 0)
+				break;
+		}
+		break;
+	case ANYCAST_ADDR:
+		/* anycast address */
+		for (ifaca = idev->ac_list; ifaca;
+		     ifaca = ifaca->aca_next, ip_idx++) {
+			if (ip_idx < s_ip_idx)
+				continue;
+			err = inet6_fill_ifacaddr(skb, ifaca,
+						  NETLINK_CB(cb->skb).pid,
+						  cb->nlh->nlmsg_seq,
+						  RTM_GETANYCAST,
+						  NLM_F_MULTI);
+			if (err <= 0)
+				break;
+		}
+		break;
+	default:
+		break;
+	}
+	read_unlock_bh(&idev->lock);
+	*p_ip_idx = ip_idx;
+	return err;
+}
+
 static int inet6_dump_addr(struct sk_buff *skb, struct netlink_callback *cb,
 			   enum addr_type_t type)
 {
+	struct net *net = sock_net(skb->sk);
+	int h, s_h;
 	int idx, ip_idx;
 	int s_idx, s_ip_idx;
-	int err = 1;
 	struct net_device *dev;
-	struct inet6_dev *idev = NULL;
-	struct inet6_ifaddr *ifa;
-	struct ifmcaddr6 *ifmca;
-	struct ifacaddr6 *ifaca;
-	struct net *net = sock_net(skb->sk);
+	struct inet6_dev *idev;
+	struct hlist_head *head;
+	struct hlist_node *node;
 
-	s_idx = cb->args[0];
-	s_ip_idx = ip_idx = cb->args[1];
+	s_h = cb->args[0];
+	s_idx = idx = cb->args[1];
+	s_ip_idx = ip_idx = cb->args[2];
 
-	idx = 0;
-	for_each_netdev(net, dev) {
-		if (idx < s_idx)
-			goto cont;
-		if (idx > s_idx)
-			s_ip_idx = 0;
-		ip_idx = 0;
-		if ((idev = in6_dev_get(dev)) == NULL)
-			goto cont;
-		read_lock_bh(&idev->lock);
-		switch (type) {
-		case UNICAST_ADDR:
-			/* unicast address incl. temp addr */
-			for (ifa = idev->addr_list; ifa;
-			     ifa = ifa->if_next, ip_idx++) {
-				if (ip_idx < s_ip_idx)
-					continue;
-				err = inet6_fill_ifaddr(skb, ifa,
-							NETLINK_CB(cb->skb).pid,
-							cb->nlh->nlmsg_seq,
-							RTM_NEWADDR,
-							NLM_F_MULTI);
-				if (err <= 0)
-					break;
-			}
-			break;
-		case MULTICAST_ADDR:
-			/* multicast address */
-			for (ifmca = idev->mc_list; ifmca;
-			     ifmca = ifmca->next, ip_idx++) {
-				if (ip_idx < s_ip_idx)
-					continue;
-				err = inet6_fill_ifmcaddr(skb, ifmca,
-							  NETLINK_CB(cb->skb).pid,
-							  cb->nlh->nlmsg_seq,
-							  RTM_GETMULTICAST,
-							  NLM_F_MULTI);
-				if (err <= 0)
-					break;
-			}
-			break;
-		case ANYCAST_ADDR:
-			/* anycast address */
-			for (ifaca = idev->ac_list; ifaca;
-			     ifaca = ifaca->aca_next, ip_idx++) {
-				if (ip_idx < s_ip_idx)
-					continue;
-				err = inet6_fill_ifacaddr(skb, ifaca,
-							  NETLINK_CB(cb->skb).pid,
-							  cb->nlh->nlmsg_seq,
-							  RTM_GETANYCAST,
-							  NLM_F_MULTI);
-				if (err <= 0)
-					break;
-			}
-			break;
-		default:
-			break;
-		}
-		read_unlock_bh(&idev->lock);
-		in6_dev_put(idev);
+	rcu_read_lock();
+	for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
+		idx = 0;
+		head = &net->dev_index_head[h];
+		hlist_for_each_entry_rcu(dev, node, head, index_hlist) {
+			if (idx < s_idx)
+				goto cont;
+			if (idx > s_idx)
+				s_ip_idx = 0;
+			ip_idx = 0;
+			if ((idev = __in6_dev_get(dev)) == NULL)
+				goto cont;
 
-		if (err <= 0)
-			break;
+			if (in6_dump_addrs(idev, skb, cb, type,
+					   s_ip_idx, &ip_idx) <= 0)
+				goto done;
 cont:
-		idx++;
+			idx++;
+		}
 	}
-	cb->args[0] = idx;
-	cb->args[1] = ip_idx;
+done:
+	rcu_read_unlock();
+	cb->args[0] = h;
+	cb->args[1] = idx;
+	cb->args[2] = ip_idx;
+
 	return skb->len;
 }
 
-- 
cgit v1.2.3


From f99189b186f3922ede4fa33c02f6edc735b8c981 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Tue, 17 Nov 2009 10:42:49 +0000
Subject: netns: net_identifiers should be read_mostly

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/ip6_tunnel.c | 2 +-
 net/ipv6/sit.c        | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'net/ipv6')

diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index 1d614113a4b..e5c0f6bb831 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -78,7 +78,7 @@ static void ip6_fb_tnl_dev_init(struct net_device *dev);
 static void ip6_tnl_dev_init(struct net_device *dev);
 static void ip6_tnl_dev_setup(struct net_device *dev);
 
-static int ip6_tnl_net_id;
+static int ip6_tnl_net_id __read_mostly;
 struct ip6_tnl_net {
 	/* the IPv6 tunnel fallback device */
 	struct net_device *fb_tnl_dev;
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index b6e145a673a..d9deaa7753e 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -66,7 +66,7 @@ static void ipip6_fb_tunnel_init(struct net_device *dev);
 static void ipip6_tunnel_init(struct net_device *dev);
 static void ipip6_tunnel_setup(struct net_device *dev);
 
-static int sit_net_id;
+static int sit_net_id __read_mostly;
 struct sit_net {
 	struct ip_tunnel *tunnels_r_l[HASH_SIZE];
 	struct ip_tunnel *tunnels_r[HASH_SIZE];
-- 
cgit v1.2.3


From 3666ed1c4837fd6906da0224c5373d7a2186a193 Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Mon, 23 Nov 2009 23:17:06 +0100
Subject: netfilter: net/ipv[46]/netfilter: Move && and || to end of previous
 line

Compile tested only.

Signed-off-by: Joe Perches <joe@perches.com>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/ipv6/netfilter/ip6_tables.c                | 42 +++++++++++------------
 net/ipv6/netfilter/ip6t_LOG.c                  |  4 +--
 net/ipv6/netfilter/ip6t_REJECT.c               |  4 +--
 net/ipv6/netfilter/ip6t_ah.c                   | 19 +++++------
 net/ipv6/netfilter/ip6t_frag.c                 | 47 ++++++++++++--------------
 net/ipv6/netfilter/ip6t_rt.c                   |  9 ++---
 net/ipv6/netfilter/ip6table_filter.c           |  4 +--
 net/ipv6/netfilter/ip6table_mangle.c           | 14 ++++----
 net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c | 12 +++----
 9 files changed, 72 insertions(+), 83 deletions(-)

(limited to 'net/ipv6')

diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index cc9f8ef303f..480d7f8c980 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -105,9 +105,9 @@ ip6_packet_match(const struct sk_buff *skb,
 #define FWINV(bool, invflg) ((bool) ^ !!(ip6info->invflags & (invflg)))
 
 	if (FWINV(ipv6_masked_addr_cmp(&ipv6->saddr, &ip6info->smsk,
-				       &ip6info->src), IP6T_INV_SRCIP)
-	    || FWINV(ipv6_masked_addr_cmp(&ipv6->daddr, &ip6info->dmsk,
-					  &ip6info->dst), IP6T_INV_DSTIP)) {
+				       &ip6info->src), IP6T_INV_SRCIP) ||
+	    FWINV(ipv6_masked_addr_cmp(&ipv6->daddr, &ip6info->dmsk,
+				       &ip6info->dst), IP6T_INV_DSTIP)) {
 		dprintf("Source or dest mismatch.\n");
 /*
 		dprintf("SRC: %u. Mask: %u. Target: %u.%s\n", ip->saddr,
@@ -277,11 +277,11 @@ get_chainname_rulenum(struct ip6t_entry *s, struct ip6t_entry *e,
 	} else if (s == e) {
 		(*rulenum)++;
 
-		if (s->target_offset == sizeof(struct ip6t_entry)
-		   && strcmp(t->target.u.kernel.target->name,
-			     IP6T_STANDARD_TARGET) == 0
-		   && t->verdict < 0
-		   && unconditional(&s->ipv6)) {
+		if (s->target_offset == sizeof(struct ip6t_entry) &&
+		    strcmp(t->target.u.kernel.target->name,
+			   IP6T_STANDARD_TARGET) == 0 &&
+		    t->verdict < 0 &&
+		    unconditional(&s->ipv6)) {
 			/* Tail of chains: STANDARD target (return/policy) */
 			*comment = *chainname == hookname
 				? comments[NF_IP6_TRACE_COMMENT_POLICY]
@@ -418,8 +418,8 @@ ip6t_do_table(struct sk_buff *skb,
 				back = get_entry(table_base, back->comefrom);
 				continue;
 			}
-			if (table_base + v != ip6t_next_entry(e)
-			    && !(e->ipv6.flags & IP6T_F_GOTO)) {
+			if (table_base + v != ip6t_next_entry(e) &&
+			    !(e->ipv6.flags & IP6T_F_GOTO)) {
 				/* Save old back ptr in next entry */
 				struct ip6t_entry *next = ip6t_next_entry(e);
 				next->comefrom = (void *)back - table_base;
@@ -505,11 +505,11 @@ mark_source_chains(struct xt_table_info *newinfo,
 			e->comefrom |= ((1 << hook) | (1 << NF_INET_NUMHOOKS));
 
 			/* Unconditional return/END. */
-			if ((e->target_offset == sizeof(struct ip6t_entry)
-			    && (strcmp(t->target.u.user.name,
-				       IP6T_STANDARD_TARGET) == 0)
-			    && t->verdict < 0
-			    && unconditional(&e->ipv6)) || visited) {
+			if ((e->target_offset == sizeof(struct ip6t_entry) &&
+			     (strcmp(t->target.u.user.name,
+				     IP6T_STANDARD_TARGET) == 0) &&
+			     t->verdict < 0 &&
+			     unconditional(&e->ipv6)) || visited) {
 				unsigned int oldpos, size;
 
 				if ((strcmp(t->target.u.user.name,
@@ -556,8 +556,8 @@ mark_source_chains(struct xt_table_info *newinfo,
 				int newpos = t->verdict;
 
 				if (strcmp(t->target.u.user.name,
-					   IP6T_STANDARD_TARGET) == 0
-				    && newpos >= 0) {
+					   IP6T_STANDARD_TARGET) == 0 &&
+				    newpos >= 0) {
 					if (newpos > newinfo->size -
 						sizeof(struct ip6t_entry)) {
 						duprintf("mark_source_chains: "
@@ -767,8 +767,8 @@ check_entry_size_and_hooks(struct ip6t_entry *e,
 {
 	unsigned int h;
 
-	if ((unsigned long)e % __alignof__(struct ip6t_entry) != 0
-	    || (unsigned char *)e + sizeof(struct ip6t_entry) >= limit) {
+	if ((unsigned long)e % __alignof__(struct ip6t_entry) != 0 ||
+	    (unsigned char *)e + sizeof(struct ip6t_entry) >= limit) {
 		duprintf("Bad offset %p\n", e);
 		return -EINVAL;
 	}
@@ -1584,8 +1584,8 @@ check_compat_entry_size_and_hooks(struct compat_ip6t_entry *e,
 	int ret, off, h;
 
 	duprintf("check_compat_entry_size_and_hooks %p\n", e);
-	if ((unsigned long)e % __alignof__(struct compat_ip6t_entry) != 0
-	    || (unsigned char *)e + sizeof(struct compat_ip6t_entry) >= limit) {
+	if ((unsigned long)e % __alignof__(struct compat_ip6t_entry) != 0 ||
+	    (unsigned char *)e + sizeof(struct compat_ip6t_entry) >= limit) {
 		duprintf("Bad offset %p, limit = %p\n", e, limit);
 		return -EINVAL;
 	}
diff --git a/net/ipv6/netfilter/ip6t_LOG.c b/net/ipv6/netfilter/ip6t_LOG.c
index 7018cac4fdd..b285fdf1905 100644
--- a/net/ipv6/netfilter/ip6t_LOG.c
+++ b/net/ipv6/netfilter/ip6t_LOG.c
@@ -249,8 +249,8 @@ static void dump_packet(const struct nf_loginfo *info,
 		/* Max length: 11 "URGP=65535 " */
 		printk("URGP=%u ", ntohs(th->urg_ptr));
 
-		if ((logflags & IP6T_LOG_TCPOPT)
-		    && th->doff * 4 > sizeof(struct tcphdr)) {
+		if ((logflags & IP6T_LOG_TCPOPT) &&
+		    th->doff * 4 > sizeof(struct tcphdr)) {
 			u_int8_t _opt[60 - sizeof(struct tcphdr)];
 			const u_int8_t *op;
 			unsigned int i;
diff --git a/net/ipv6/netfilter/ip6t_REJECT.c b/net/ipv6/netfilter/ip6t_REJECT.c
index 5a7f00cd15c..8311ca31816 100644
--- a/net/ipv6/netfilter/ip6t_REJECT.c
+++ b/net/ipv6/netfilter/ip6t_REJECT.c
@@ -223,8 +223,8 @@ static bool reject_tg6_check(const struct xt_tgchk_param *par)
 		return false;
 	} else if (rejinfo->with == IP6T_TCP_RESET) {
 		/* Must specify that it's a TCP packet */
-		if (e->ipv6.proto != IPPROTO_TCP
-		    || (e->ipv6.invflags & XT_INV_PROTO)) {
+		if (e->ipv6.proto != IPPROTO_TCP ||
+		    (e->ipv6.invflags & XT_INV_PROTO)) {
 			printk("ip6t_REJECT: TCP_RESET illegal for non-tcp\n");
 			return false;
 		}
diff --git a/net/ipv6/netfilter/ip6t_ah.c b/net/ipv6/netfilter/ip6t_ah.c
index 3a82f24746b..ac0b7c629d7 100644
--- a/net/ipv6/netfilter/ip6t_ah.c
+++ b/net/ipv6/netfilter/ip6t_ah.c
@@ -77,17 +77,14 @@ static bool ah_mt6(const struct sk_buff *skb, const struct xt_match_param *par)
 		 ahinfo->hdrres, ah->reserved,
 		 !(ahinfo->hdrres && ah->reserved));
 
-	return (ah != NULL)
-	       &&
-	       spi_match(ahinfo->spis[0], ahinfo->spis[1],
-			 ntohl(ah->spi),
-			 !!(ahinfo->invflags & IP6T_AH_INV_SPI))
-	       &&
-	       (!ahinfo->hdrlen ||
-		(ahinfo->hdrlen == hdrlen) ^
-		!!(ahinfo->invflags & IP6T_AH_INV_LEN))
-	       &&
-	       !(ahinfo->hdrres && ah->reserved);
+	return (ah != NULL) &&
+		spi_match(ahinfo->spis[0], ahinfo->spis[1],
+			  ntohl(ah->spi),
+			  !!(ahinfo->invflags & IP6T_AH_INV_SPI)) &&
+		(!ahinfo->hdrlen ||
+		 (ahinfo->hdrlen == hdrlen) ^
+		 !!(ahinfo->invflags & IP6T_AH_INV_LEN)) &&
+		!(ahinfo->hdrres && ah->reserved);
 }
 
 static bool ah_mt6_check(const struct xt_mtchk_param *par)
diff --git a/net/ipv6/netfilter/ip6t_frag.c b/net/ipv6/netfilter/ip6t_frag.c
index 673aa0a5084..7b91c2598ed 100644
--- a/net/ipv6/netfilter/ip6t_frag.c
+++ b/net/ipv6/netfilter/ip6t_frag.c
@@ -70,41 +70,36 @@ frag_mt6(const struct sk_buff *skb, const struct xt_match_param *par)
 	pr_debug("res %02X %02X%04X %02X ",
 		 fraginfo->flags & IP6T_FRAG_RES, fh->reserved,
 		 ntohs(fh->frag_off) & 0x6,
-		 !((fraginfo->flags & IP6T_FRAG_RES)
-		   && (fh->reserved || (ntohs(fh->frag_off) & 0x06))));
+		 !((fraginfo->flags & IP6T_FRAG_RES) &&
+		   (fh->reserved || (ntohs(fh->frag_off) & 0x06))));
 	pr_debug("first %02X %02X %02X ",
 		 fraginfo->flags & IP6T_FRAG_FST,
 		 ntohs(fh->frag_off) & ~0x7,
-		 !((fraginfo->flags & IP6T_FRAG_FST)
-		   && (ntohs(fh->frag_off) & ~0x7)));
+		 !((fraginfo->flags & IP6T_FRAG_FST) &&
+		   (ntohs(fh->frag_off) & ~0x7)));
 	pr_debug("mf %02X %02X %02X ",
 		 fraginfo->flags & IP6T_FRAG_MF,
 		 ntohs(fh->frag_off) & IP6_MF,
-		 !((fraginfo->flags & IP6T_FRAG_MF)
-		   && !((ntohs(fh->frag_off) & IP6_MF))));
+		 !((fraginfo->flags & IP6T_FRAG_MF) &&
+		   !((ntohs(fh->frag_off) & IP6_MF))));
 	pr_debug("last %02X %02X %02X\n",
 		 fraginfo->flags & IP6T_FRAG_NMF,
 		 ntohs(fh->frag_off) & IP6_MF,
-		 !((fraginfo->flags & IP6T_FRAG_NMF)
-		   && (ntohs(fh->frag_off) & IP6_MF)));
-
-	return (fh != NULL)
-	       &&
-	       id_match(fraginfo->ids[0], fraginfo->ids[1],
-			ntohl(fh->identification),
-			!!(fraginfo->invflags & IP6T_FRAG_INV_IDS))
-	       &&
-	       !((fraginfo->flags & IP6T_FRAG_RES)
-		 && (fh->reserved || (ntohs(fh->frag_off) & 0x6)))
-	       &&
-	       !((fraginfo->flags & IP6T_FRAG_FST)
-		 && (ntohs(fh->frag_off) & ~0x7))
-	       &&
-	       !((fraginfo->flags & IP6T_FRAG_MF)
-		 && !(ntohs(fh->frag_off) & IP6_MF))
-	       &&
-	       !((fraginfo->flags & IP6T_FRAG_NMF)
-		 && (ntohs(fh->frag_off) & IP6_MF));
+		 !((fraginfo->flags & IP6T_FRAG_NMF) &&
+		   (ntohs(fh->frag_off) & IP6_MF)));
+
+	return (fh != NULL) &&
+		id_match(fraginfo->ids[0], fraginfo->ids[1],
+			 ntohl(fh->identification),
+			 !!(fraginfo->invflags & IP6T_FRAG_INV_IDS)) &&
+		!((fraginfo->flags & IP6T_FRAG_RES) &&
+		  (fh->reserved || (ntohs(fh->frag_off) & 0x6))) &&
+		!((fraginfo->flags & IP6T_FRAG_FST) &&
+		  (ntohs(fh->frag_off) & ~0x7)) &&
+		!((fraginfo->flags & IP6T_FRAG_MF) &&
+		  !(ntohs(fh->frag_off) & IP6_MF)) &&
+		!((fraginfo->flags & IP6T_FRAG_NMF) &&
+		  (ntohs(fh->frag_off) & IP6_MF));
 }
 
 static bool frag_mt6_check(const struct xt_mtchk_param *par)
diff --git a/net/ipv6/netfilter/ip6t_rt.c b/net/ipv6/netfilter/ip6t_rt.c
index 356b8d6f6ba..b77307fc874 100644
--- a/net/ipv6/netfilter/ip6t_rt.c
+++ b/net/ipv6/netfilter/ip6t_rt.c
@@ -92,16 +92,13 @@ static bool rt_mt6(const struct sk_buff *skb, const struct xt_match_param *par)
 		 !((rtinfo->flags & IP6T_RT_RES) &&
 		   (((const struct rt0_hdr *)rh)->reserved)));
 
-	ret = (rh != NULL)
-	      &&
+	ret = (rh != NULL) &&
 	      (segsleft_match(rtinfo->segsleft[0], rtinfo->segsleft[1],
 			      rh->segments_left,
-			      !!(rtinfo->invflags & IP6T_RT_INV_SGS)))
-	      &&
+			      !!(rtinfo->invflags & IP6T_RT_INV_SGS))) &&
 	      (!(rtinfo->flags & IP6T_RT_LEN) ||
 	       ((rtinfo->hdrlen == hdrlen) ^
-		!!(rtinfo->invflags & IP6T_RT_INV_LEN)))
-	      &&
+		!!(rtinfo->invflags & IP6T_RT_INV_LEN))) &&
 	      (!(rtinfo->flags & IP6T_RT_TYP) ||
 	       ((rtinfo->rt_type == rh->type) ^
 		!!(rtinfo->invflags & IP6T_RT_INV_TYP)));
diff --git a/net/ipv6/netfilter/ip6table_filter.c b/net/ipv6/netfilter/ip6table_filter.c
index 6f4383ad86f..ad378efd0eb 100644
--- a/net/ipv6/netfilter/ip6table_filter.c
+++ b/net/ipv6/netfilter/ip6table_filter.c
@@ -79,8 +79,8 @@ ip6t_local_out_hook(unsigned int hook,
 {
 #if 0
 	/* root is playing with raw sockets. */
-	if (skb->len < sizeof(struct iphdr)
-	    || ip_hdrlen(skb) < sizeof(struct iphdr)) {
+	if (skb->len < sizeof(struct iphdr) ||
+	    ip_hdrlen(skb) < sizeof(struct iphdr)) {
 		if (net_ratelimit())
 			printk("ip6t_hook: happy cracking.\n");
 		return NF_ACCEPT;
diff --git a/net/ipv6/netfilter/ip6table_mangle.c b/net/ipv6/netfilter/ip6table_mangle.c
index 0ad91433ed6..a929c19d30e 100644
--- a/net/ipv6/netfilter/ip6table_mangle.c
+++ b/net/ipv6/netfilter/ip6table_mangle.c
@@ -102,8 +102,8 @@ ip6t_local_out_hook(unsigned int hook,
 
 #if 0
 	/* root is playing with raw sockets. */
-	if (skb->len < sizeof(struct iphdr)
-	    || ip_hdrlen(skb) < sizeof(struct iphdr)) {
+	if (skb->len < sizeof(struct iphdr) ||
+	    ip_hdrlen(skb) < sizeof(struct iphdr)) {
 		if (net_ratelimit())
 			printk("ip6t_hook: happy cracking.\n");
 		return NF_ACCEPT;
@@ -122,11 +122,11 @@ ip6t_local_out_hook(unsigned int hook,
 	ret = ip6t_do_table(skb, hook, in, out,
 			    dev_net(out)->ipv6.ip6table_mangle);
 
-	if (ret != NF_DROP && ret != NF_STOLEN
-		&& (memcmp(&ipv6_hdr(skb)->saddr, &saddr, sizeof(saddr))
-		    || memcmp(&ipv6_hdr(skb)->daddr, &daddr, sizeof(daddr))
-		    || skb->mark != mark
-		    || ipv6_hdr(skb)->hop_limit != hop_limit))
+	if (ret != NF_DROP && ret != NF_STOLEN &&
+	    (memcmp(&ipv6_hdr(skb)->saddr, &saddr, sizeof(saddr)) ||
+	     memcmp(&ipv6_hdr(skb)->daddr, &daddr, sizeof(daddr)) ||
+	     skb->mark != mark ||
+	     ipv6_hdr(skb)->hop_limit != hop_limit))
 		return ip6_route_me_harder(skb) == 0 ? ret : NF_DROP;
 
 	return ret;
diff --git a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
index 642dcb127ba..0f3df45718a 100644
--- a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
+++ b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
@@ -244,18 +244,18 @@ static const struct nla_policy icmpv6_nla_policy[CTA_PROTO_MAX+1] = {
 static int icmpv6_nlattr_to_tuple(struct nlattr *tb[],
 				struct nf_conntrack_tuple *tuple)
 {
-	if (!tb[CTA_PROTO_ICMPV6_TYPE]
-	    || !tb[CTA_PROTO_ICMPV6_CODE]
-	    || !tb[CTA_PROTO_ICMPV6_ID])
+	if (!tb[CTA_PROTO_ICMPV6_TYPE] ||
+	    !tb[CTA_PROTO_ICMPV6_CODE] ||
+	    !tb[CTA_PROTO_ICMPV6_ID])
 		return -EINVAL;
 
 	tuple->dst.u.icmp.type = nla_get_u8(tb[CTA_PROTO_ICMPV6_TYPE]);
 	tuple->dst.u.icmp.code = nla_get_u8(tb[CTA_PROTO_ICMPV6_CODE]);
 	tuple->src.u.icmp.id = nla_get_be16(tb[CTA_PROTO_ICMPV6_ID]);
 
-	if (tuple->dst.u.icmp.type < 128
-	    || tuple->dst.u.icmp.type - 128 >= sizeof(invmap)
-	    || !invmap[tuple->dst.u.icmp.type - 128])
+	if (tuple->dst.u.icmp.type < 128 ||
+	    tuple->dst.u.icmp.type - 128 >= sizeof(invmap) ||
+	    !invmap[tuple->dst.u.icmp.type - 128])
 		return -EINVAL;
 
 	return 0;
-- 
cgit v1.2.3


From 35700212b45ea9f98fa682cfc1bc1a67c9ccc34b Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Tue, 24 Nov 2009 14:52:52 -0800
Subject: net/ipv6: Move && and || to end of previous line

Signed-off-by: Joe Perches <joe@perches.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/ip6_flowlabel.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'net/ipv6')

diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c
index 7712578bdc6..593a67e8d3f 100644
--- a/net/ipv6/ip6_flowlabel.c
+++ b/net/ipv6/ip6_flowlabel.c
@@ -377,8 +377,8 @@ fl_create(struct net *net, struct in6_flowlabel_req *freq, char __user *optval,
 		goto done;
 	fl->share = freq->flr_share;
 	addr_type = ipv6_addr_type(&freq->flr_dst);
-	if ((addr_type&IPV6_ADDR_MAPPED)
-	    || addr_type == IPV6_ADDR_ANY) {
+	if ((addr_type & IPV6_ADDR_MAPPED) ||
+	    addr_type == IPV6_ADDR_ANY) {
 		err = -EINVAL;
 		goto done;
 	}
@@ -421,8 +421,8 @@ static int mem_check(struct sock *sk)
 
 	if (room <= 0 ||
 	    ((count >= FL_MAX_PER_SOCK ||
-	     (count > 0 && room < FL_MAX_SIZE/2) || room < FL_MAX_SIZE/4)
-	     && !capable(CAP_NET_ADMIN)))
+	      (count > 0 && room < FL_MAX_SIZE/2) || room < FL_MAX_SIZE/4) &&
+	     !capable(CAP_NET_ADMIN)))
 		return -ENOBUFS;
 
 	return 0;
-- 
cgit v1.2.3


From 09ad9bc752519cc167d0a573e1acf69b5c707c67 Mon Sep 17 00:00:00 2001
From: Octavian Purdila <opurdila@ixiacom.com>
Date: Wed, 25 Nov 2009 15:14:13 -0800
Subject: net: use net_eq to compare nets

Generated with the following semantic patch

@@
struct net *n1;
struct net *n2;
@@
- n1 == n2
+ net_eq(n1, n2)

@@
struct net *n1;
struct net *n2;
@@
- n1 != n2
+ !net_eq(n1, n2)

applied over {include,net,drivers/net}.

Signed-off-by: Octavian Purdila <opurdila@ixiacom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/addrconf.c            | 4 ++--
 net/ipv6/ip6_flowlabel.c       | 9 +++++----
 net/ipv6/netfilter/ip6_queue.c | 2 +-
 net/ipv6/reassembly.c          | 4 ++--
 4 files changed, 10 insertions(+), 9 deletions(-)

(limited to 'net/ipv6')

diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 522bdc77206..b1ce8fc6204 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -4498,7 +4498,7 @@ static int addrconf_init_net(struct net *net)
 	all = &ipv6_devconf;
 	dflt = &ipv6_devconf_dflt;
 
-	if (net != &init_net) {
+	if (!net_eq(net, &init_net)) {
 		all = kmemdup(all, sizeof(ipv6_devconf), GFP_KERNEL);
 		if (all == NULL)
 			goto err_alloc_all;
@@ -4546,7 +4546,7 @@ static void addrconf_exit_net(struct net *net)
 	__addrconf_sysctl_unregister(net->ipv6.devconf_dflt);
 	__addrconf_sysctl_unregister(net->ipv6.devconf_all);
 #endif
-	if (net != &init_net) {
+	if (!net_eq(net, &init_net)) {
 		kfree(net->ipv6.devconf_dflt);
 		kfree(net->ipv6.devconf_all);
 	}
diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c
index 593a67e8d3f..6e7bffa2205 100644
--- a/net/ipv6/ip6_flowlabel.c
+++ b/net/ipv6/ip6_flowlabel.c
@@ -67,7 +67,7 @@ static inline struct ip6_flowlabel *__fl_lookup(struct net *net, __be32 label)
 	struct ip6_flowlabel *fl;
 
 	for (fl=fl_ht[FL_HASH(label)]; fl; fl = fl->next) {
-		if (fl->label == label && fl->fl_net == net)
+		if (fl->label == label && net_eq(fl->fl_net, net))
 			return fl;
 	}
 	return NULL;
@@ -163,7 +163,8 @@ static void ip6_fl_purge(struct net *net)
 		struct ip6_flowlabel *fl, **flp;
 		flp = &fl_ht[i];
 		while ((fl = *flp) != NULL) {
-			if (fl->fl_net == net && atomic_read(&fl->users) == 0) {
+			if (net_eq(fl->fl_net, net) &&
+			    atomic_read(&fl->users) == 0) {
 				*flp = fl->next;
 				fl_free(fl);
 				atomic_dec(&fl_size);
@@ -630,7 +631,7 @@ static struct ip6_flowlabel *ip6fl_get_first(struct seq_file *seq)
 	for (state->bucket = 0; state->bucket <= FL_HASH_MASK; ++state->bucket) {
 		fl = fl_ht[state->bucket];
 
-		while (fl && fl->fl_net != net)
+		while (fl && !net_eq(fl->fl_net, net))
 			fl = fl->next;
 		if (fl)
 			break;
@@ -645,7 +646,7 @@ static struct ip6_flowlabel *ip6fl_get_next(struct seq_file *seq, struct ip6_flo
 
 	fl = fl->next;
 try_again:
-	while (fl && fl->fl_net != net)
+	while (fl && !net_eq(fl->fl_net, net))
 		fl = fl->next;
 
 	while (!fl) {
diff --git a/net/ipv6/netfilter/ip6_queue.c b/net/ipv6/netfilter/ip6_queue.c
index 1cf3f0c6a95..4c7a18abcaf 100644
--- a/net/ipv6/netfilter/ip6_queue.c
+++ b/net/ipv6/netfilter/ip6_queue.c
@@ -502,7 +502,7 @@ ipq_rcv_nl_event(struct notifier_block *this,
 	if (event == NETLINK_URELEASE &&
 	    n->protocol == NETLINK_IP6_FW && n->pid) {
 		write_lock_bh(&queue_lock);
-		if ((n->net == &init_net) && (n->pid == peer_pid))
+		if ((net_eq(n->net, &init_net)) && (n->pid == peer_pid))
 			__ipq_reset();
 		write_unlock_bh(&queue_lock);
 	}
diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
index dce699fb267..45efc39753e 100644
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -681,7 +681,7 @@ static int ip6_frags_ns_sysctl_register(struct net *net)
 	struct ctl_table_header *hdr;
 
 	table = ip6_frags_ns_ctl_table;
-	if (net != &init_net) {
+	if (!net_eq(net, &init_net)) {
 		table = kmemdup(table, sizeof(ip6_frags_ns_ctl_table), GFP_KERNEL);
 		if (table == NULL)
 			goto err_alloc;
@@ -699,7 +699,7 @@ static int ip6_frags_ns_sysctl_register(struct net *net)
 	return 0;
 
 err_reg:
-	if (net != &init_net)
+	if (!net_eq(net, &init_net))
 		kfree(table);
 err_alloc:
 	return -ENOMEM;
-- 
cgit v1.2.3


From 8f8a088c2127c729638da8f2d33860e346c01eda Mon Sep 17 00:00:00 2001
From: Martin Willi <martin@strongswan.org>
Date: Wed, 25 Nov 2009 00:29:53 +0000
Subject: xfrm: Use the user specified truncation length in ESP and AH

Instead of using the hardcoded truncation for authentication
algorithms, use the truncation length specified on xfrm_state.

Signed-off-by: Martin Willi <martin@strongswan.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/ah6.c  | 2 +-
 net/ipv6/esp6.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'net/ipv6')

diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c
index 0f526f8ea51..c2f300c314b 100644
--- a/net/ipv6/ah6.c
+++ b/net/ipv6/ah6.c
@@ -667,7 +667,7 @@ static int ah6_init_state(struct xfrm_state *x)
 	}
 
 	ahp->icv_full_len = aalg_desc->uinfo.auth.icv_fullbits/8;
-	ahp->icv_trunc_len = aalg_desc->uinfo.auth.icv_truncbits/8;
+	ahp->icv_trunc_len = x->aalg->alg_trunc_len/8;
 
 	BUG_ON(ahp->icv_trunc_len > MAX_AH_AUTH_LEN);
 
diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c
index af597c73ebe..668a46b655e 100644
--- a/net/ipv6/esp6.c
+++ b/net/ipv6/esp6.c
@@ -473,7 +473,7 @@ static int esp_init_authenc(struct xfrm_state *x)
 		}
 
 		err = crypto_aead_setauthsize(
-			aead, aalg_desc->uinfo.auth.icv_truncbits / 8);
+			aead, x->aalg->alg_trunc_len / 8);
 		if (err)
 			goto free_key;
 	}
-- 
cgit v1.2.3


From ac31cd3cbadc45d6ed2a0ae5d116918b15bb6c22 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Sun, 29 Nov 2009 15:46:15 +0000
Subject: net: Simplify ip6_tunnel pernet operations.

Take advantage of the new pernet automatic storage management,
and stop using compatibility network namespace functions.

Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/ip6_tunnel.c | 25 ++++++-------------------
 1 file changed, 6 insertions(+), 19 deletions(-)

(limited to 'net/ipv6')

diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index e5c0f6bb831..d453d07b0df 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -1409,17 +1409,8 @@ static void ip6_tnl_destroy_tunnels(struct ip6_tnl_net *ip6n)
 
 static int ip6_tnl_init_net(struct net *net)
 {
+	struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);
 	int err;
-	struct ip6_tnl_net *ip6n;
-
-	err = -ENOMEM;
-	ip6n = kzalloc(sizeof(struct ip6_tnl_net), GFP_KERNEL);
-	if (ip6n == NULL)
-		goto err_alloc;
-
-	err = net_assign_generic(net, ip6_tnl_net_id, ip6n);
-	if (err < 0)
-		goto err_assign;
 
 	ip6n->tnls[0] = ip6n->tnls_wc;
 	ip6n->tnls[1] = ip6n->tnls_r_l;
@@ -1442,27 +1433,23 @@ static int ip6_tnl_init_net(struct net *net)
 err_register:
 	free_netdev(ip6n->fb_tnl_dev);
 err_alloc_dev:
-	/* nothing */
-err_assign:
-	kfree(ip6n);
-err_alloc:
 	return err;
 }
 
 static void ip6_tnl_exit_net(struct net *net)
 {
-	struct ip6_tnl_net *ip6n;
+	struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);
 
-	ip6n = net_generic(net, ip6_tnl_net_id);
 	rtnl_lock();
 	ip6_tnl_destroy_tunnels(ip6n);
 	rtnl_unlock();
-	kfree(ip6n);
 }
 
 static struct pernet_operations ip6_tnl_net_ops = {
 	.init = ip6_tnl_init_net,
 	.exit = ip6_tnl_exit_net,
+	.id   = &ip6_tnl_net_id,
+	.size = sizeof(struct ip6_tnl_net),
 };
 
 /**
@@ -1487,7 +1474,7 @@ static int __init ip6_tunnel_init(void)
 		goto unreg_ip4ip6;
 	}
 
-	err = register_pernet_gen_device(&ip6_tnl_net_id, &ip6_tnl_net_ops);
+	err = register_pernet_device(&ip6_tnl_net_ops);
 	if (err < 0)
 		goto err_pernet;
 	return 0;
@@ -1511,7 +1498,7 @@ static void __exit ip6_tunnel_cleanup(void)
 	if (xfrm6_tunnel_deregister(&ip6ip6_handler, AF_INET6))
 		printk(KERN_INFO "ip6_tunnel close: can't deregister ip6ip6\n");
 
-	unregister_pernet_gen_device(ip6_tnl_net_id, &ip6_tnl_net_ops);
+	unregister_pernet_device(&ip6_tnl_net_ops);
 }
 
 module_init(ip6_tunnel_init);
-- 
cgit v1.2.3


From 671011720baa222b6de667cd688aed4dc8908924 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Sun, 29 Nov 2009 15:46:16 +0000
Subject: net: Simplify ipip6 aka sit pernet operations.

Take advantage of the new pernet automatic storage management,
and stop using compatibility network namespace functions.

Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/sit.c | 25 ++++++-------------------
 1 file changed, 6 insertions(+), 19 deletions(-)

(limited to 'net/ipv6')

diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index d9deaa7753e..976e68244b9 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -1164,17 +1164,8 @@ static void sit_destroy_tunnels(struct sit_net *sitn, struct list_head *head)
 
 static int sit_init_net(struct net *net)
 {
+	struct sit_net *sitn = net_generic(net, sit_net_id);
 	int err;
-	struct sit_net *sitn;
-
-	err = -ENOMEM;
-	sitn = kzalloc(sizeof(struct sit_net), GFP_KERNEL);
-	if (sitn == NULL)
-		goto err_alloc;
-
-	err = net_assign_generic(net, sit_net_id, sitn);
-	if (err < 0)
-		goto err_assign;
 
 	sitn->tunnels[0] = sitn->tunnels_wc;
 	sitn->tunnels[1] = sitn->tunnels_l;
@@ -1201,37 +1192,33 @@ err_reg_dev:
 	dev_put(sitn->fb_tunnel_dev);
 	free_netdev(sitn->fb_tunnel_dev);
 err_alloc_dev:
-	/* nothing */
-err_assign:
-	kfree(sitn);
-err_alloc:
 	return err;
 }
 
 static void sit_exit_net(struct net *net)
 {
-	struct sit_net *sitn;
+	struct sit_net *sitn = net_generic(net, sit_net_id);
 	LIST_HEAD(list);
 
-	sitn = net_generic(net, sit_net_id);
 	rtnl_lock();
 	sit_destroy_tunnels(sitn, &list);
 	unregister_netdevice_queue(sitn->fb_tunnel_dev, &list);
 	unregister_netdevice_many(&list);
 	rtnl_unlock();
-	kfree(sitn);
 }
 
 static struct pernet_operations sit_net_ops = {
 	.init = sit_init_net,
 	.exit = sit_exit_net,
+	.id   = &sit_net_id,
+	.size = sizeof(struct sit_net),
 };
 
 static void __exit sit_cleanup(void)
 {
 	xfrm4_tunnel_deregister(&sit_handler, AF_INET6);
 
-	unregister_pernet_gen_device(sit_net_id, &sit_net_ops);
+	unregister_pernet_device(&sit_net_ops);
 	rcu_barrier(); /* Wait for completion of call_rcu()'s */
 }
 
@@ -1246,7 +1233,7 @@ static int __init sit_init(void)
 		return -EAGAIN;
 	}
 
-	err = register_pernet_gen_device(&sit_net_id, &sit_net_ops);
+	err = register_pernet_device(&sit_net_ops);
 	if (err < 0)
 		xfrm4_tunnel_deregister(&sit_handler, AF_INET6);
 
-- 
cgit v1.2.3


From e6b4d11367519bc71729c09d05a126b133c755be Mon Sep 17 00:00:00 2001
From: William Allen Simpson <william.allen.simpson@gmail.com>
Date: Wed, 2 Dec 2009 18:07:39 +0000
Subject: TCPCT part 1a: add request_values parameter for sending SYNACK

Add optional function parameters associated with sending SYNACK.
These parameters are not needed after sending SYNACK, and are not
used for retransmission.  Avoids extending struct tcp_request_sock,
and avoids allocating kernel memory.

Also affects DCCP as it uses common struct request_sock_ops,
but this parameter is currently reserved for future use.

Signed-off-by: William.Allen.Simpson@gmail.com
Acked-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/tcp_ipv6.c | 27 ++++++++++++---------------
 1 file changed, 12 insertions(+), 15 deletions(-)

(limited to 'net/ipv6')

diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index de709091b26..da6e24416d7 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -461,7 +461,8 @@ out:
 }
 
 
-static int tcp_v6_send_synack(struct sock *sk, struct request_sock *req)
+static int tcp_v6_send_synack(struct sock *sk, struct request_sock *req,
+			      struct request_values *rvp)
 {
 	struct inet6_request_sock *treq = inet6_rsk(req);
 	struct ipv6_pinfo *np = inet6_sk(sk);
@@ -499,7 +500,7 @@ static int tcp_v6_send_synack(struct sock *sk, struct request_sock *req)
 	if ((err = xfrm_lookup(sock_net(sk), &dst, &fl, sk, 0)) < 0)
 		goto done;
 
-	skb = tcp_make_synack(sk, dst, req);
+	skb = tcp_make_synack(sk, dst, req, rvp);
 	if (skb) {
 		struct tcphdr *th = tcp_hdr(skb);
 
@@ -1161,13 +1162,13 @@ static struct sock *tcp_v6_hnd_req(struct sock *sk,struct sk_buff *skb)
  */
 static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
 {
+	struct tcp_options_received tmp_opt;
+	struct request_sock *req;
 	struct inet6_request_sock *treq;
 	struct ipv6_pinfo *np = inet6_sk(sk);
-	struct tcp_options_received tmp_opt;
 	struct tcp_sock *tp = tcp_sk(sk);
-	struct request_sock *req = NULL;
-	__u32 isn = TCP_SKB_CB(skb)->when;
 	struct dst_entry *dst = __sk_dst_get(sk);
+	__u32 isn = TCP_SKB_CB(skb)->when;
 #ifdef CONFIG_SYN_COOKIES
 	int want_cookie = 0;
 #else
@@ -1239,23 +1240,19 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
 
 		isn = tcp_v6_init_sequence(skb);
 	}
-
 	tcp_rsk(req)->snt_isn = isn;
 
 	security_inet_conn_request(sk, skb, req);
 
-	if (tcp_v6_send_synack(sk, req))
-		goto drop;
+	if (tcp_v6_send_synack(sk, req, NULL) || want_cookie)
+		goto drop_and_free;
 
-	if (!want_cookie) {
-		inet6_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT);
-		return 0;
-	}
+	inet6_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT);
+	return 0;
 
+drop_and_free:
+	reqsk_free(req);
 drop:
-	if (req)
-		reqsk_free(req);
-
 	return 0; /* don't send reset */
 }
 
-- 
cgit v1.2.3


From 435cf559f02ea3a3159eb316f97dc88bdebe9432 Mon Sep 17 00:00:00 2001
From: William Allen Simpson <william.allen.simpson@gmail.com>
Date: Wed, 2 Dec 2009 18:17:05 +0000
Subject: TCPCT part 1d: define TCP cookie option, extend existing struct's

Data structures are carefully composed to require minimal additions.
For example, the struct tcp_options_received cookie_plus variable fits
between existing 16-bit and 8-bit variables, requiring no additional
space (taking alignment into consideration).  There are no additions to
tcp_request_sock, and only 1 pointer in tcp_sock.

This is a significantly revised implementation of an earlier (year-old)
patch that no longer applies cleanly, with permission of the original
author (Adam Langley):

    http://thread.gmane.org/gmane.linux.network/102586

The principle difference is using a TCP option to carry the cookie nonce,
instead of a user configured offset in the data.  This is more flexible and
less subject to user configuration error.  Such a cookie option has been
suggested for many years, and is also useful without SYN data, allowing
several related concepts to use the same extension option.

    "Re: SYN floods (was: does history repeat itself?)", September 9, 1996.
    http://www.merit.net/mail.archives/nanog/1996-09/msg00235.html

    "Re: what a new TCP header might look like", May 12, 1998.
    ftp://ftp.isi.edu/end2end/end2end-interest-1998.mail

These functions will also be used in subsequent patches that implement
additional features.

Requires:
   TCPCT part 1a: add request_values parameter for sending SYNACK
   TCPCT part 1b: generate Responder Cookie secret
   TCPCT part 1c: sysctl_tcp_cookie_size, socket option TCP_COOKIE_TRANSACTIONS

Signed-off-by: William.Allen.Simpson@gmail.com
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/tcp_ipv6.c | 13 +++++++++++++
 1 file changed, 13 insertions(+)

(limited to 'net/ipv6')

diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index da6e24416d7..f2ec38289a4 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -1864,6 +1864,19 @@ static int tcp_v6_init_sock(struct sock *sk)
 	tp->af_specific = &tcp_sock_ipv6_specific;
 #endif
 
+	/* TCP Cookie Transactions */
+	if (sysctl_tcp_cookie_size > 0) {
+		/* Default, cookies without s_data_payload. */
+		tp->cookie_values =
+			kzalloc(sizeof(*tp->cookie_values),
+				sk->sk_allocation);
+		if (tp->cookie_values != NULL)
+			kref_init(&tp->cookie_values->kref);
+	}
+	/* Presumed zeroed, in order of appearance:
+	 *	cookie_in_always, cookie_out_never,
+	 *	s_data_constant, s_data_in, s_data_out
+	 */
 	sk->sk_sndbuf = sysctl_tcp_wmem[1];
 	sk->sk_rcvbuf = sysctl_tcp_rmem[1];
 
-- 
cgit v1.2.3


From 4957faade11b3a278c3b3cade3411ddc20afa791 Mon Sep 17 00:00:00 2001
From: William Allen Simpson <william.allen.simpson@gmail.com>
Date: Wed, 2 Dec 2009 18:25:27 +0000
Subject: TCPCT part 1g: Responder Cookie => Initiator

Parse incoming TCP_COOKIE option(s).

Calculate <SYN,ACK> TCP_COOKIE option.

Send optional <SYN,ACK> data.

This is a significantly revised implementation of an earlier (year-old)
patch that no longer applies cleanly, with permission of the original
author (Adam Langley):

    http://thread.gmane.org/gmane.linux.network/102586

Requires:
   TCPCT part 1a: add request_values parameter for sending SYNACK
   TCPCT part 1b: generate Responder Cookie secret
   TCPCT part 1c: sysctl_tcp_cookie_size, socket option TCP_COOKIE_TRANSACTIONS
   TCPCT part 1d: define TCP cookie option, extend existing struct's
   TCPCT part 1e: implement socket option TCP_COOKIE_TRANSACTIONS
   TCPCT part 1f: Initiator Cookie => Responder

Signed-off-by: William.Allen.Simpson@gmail.com
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/syncookies.c |  5 +++--
 net/ipv6/tcp_ipv6.c   | 52 +++++++++++++++++++++++++++++++++++++++++++++++++--
 2 files changed, 53 insertions(+), 4 deletions(-)

(limited to 'net/ipv6')

diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c
index 612fc53e0bb..5b9af508b8f 100644
--- a/net/ipv6/syncookies.c
+++ b/net/ipv6/syncookies.c
@@ -159,6 +159,8 @@ static inline int cookie_check(struct sk_buff *skb, __u32 cookie)
 
 struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
 {
+	struct tcp_options_received tcp_opt;
+	u8 *hash_location;
 	struct inet_request_sock *ireq;
 	struct inet6_request_sock *ireq6;
 	struct tcp_request_sock *treq;
@@ -171,7 +173,6 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
 	int mss;
 	struct dst_entry *dst;
 	__u8 rcv_wscale;
-	struct tcp_options_received tcp_opt;
 
 	if (!sysctl_tcp_syncookies || !th->ack)
 		goto out;
@@ -254,7 +255,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
 
 	/* check for timestamp cookie support */
 	memset(&tcp_opt, 0, sizeof(tcp_opt));
-	tcp_parse_options(skb, &tcp_opt, 0, dst);
+	tcp_parse_options(skb, &tcp_opt, &hash_location, 0, dst);
 
 	if (tcp_opt.saw_tstamp)
 		cookie_check_timestamp(&tcp_opt);
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index f2ec38289a4..fc0a4e5895e 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -1162,7 +1162,9 @@ static struct sock *tcp_v6_hnd_req(struct sock *sk,struct sk_buff *skb)
  */
 static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
 {
+	struct tcp_extend_values tmp_ext;
 	struct tcp_options_received tmp_opt;
+	u8 *hash_location;
 	struct request_sock *req;
 	struct inet6_request_sock *treq;
 	struct ipv6_pinfo *np = inet6_sk(sk);
@@ -1206,8 +1208,52 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
 	tcp_clear_options(&tmp_opt);
 	tmp_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
 	tmp_opt.user_mss = tp->rx_opt.user_mss;
+	tcp_parse_options(skb, &tmp_opt, &hash_location, 0, dst);
+
+	if (tmp_opt.cookie_plus > 0 &&
+	    tmp_opt.saw_tstamp &&
+	    !tp->rx_opt.cookie_out_never &&
+	    (sysctl_tcp_cookie_size > 0 ||
+	     (tp->cookie_values != NULL &&
+	      tp->cookie_values->cookie_desired > 0))) {
+		u8 *c;
+		u32 *d;
+		u32 *mess = &tmp_ext.cookie_bakery[COOKIE_DIGEST_WORDS];
+		int l = tmp_opt.cookie_plus - TCPOLEN_COOKIE_BASE;
+
+		if (tcp_cookie_generator(&tmp_ext.cookie_bakery[0]) != 0)
+			goto drop_and_free;
+
+		/* Secret recipe starts with IP addresses */
+		d = &ipv6_hdr(skb)->daddr.s6_addr32[0];
+		*mess++ ^= *d++;
+		*mess++ ^= *d++;
+		*mess++ ^= *d++;
+		*mess++ ^= *d++;
+		d = &ipv6_hdr(skb)->saddr.s6_addr32[0];
+		*mess++ ^= *d++;
+		*mess++ ^= *d++;
+		*mess++ ^= *d++;
+		*mess++ ^= *d++;
+
+		/* plus variable length Initiator Cookie */
+		c = (u8 *)mess;
+		while (l-- > 0)
+			*c++ ^= *hash_location++;
 
-	tcp_parse_options(skb, &tmp_opt, 0, dst);
+#ifdef CONFIG_SYN_COOKIES
+		want_cookie = 0;	/* not our kind of cookie */
+#endif
+		tmp_ext.cookie_out_never = 0; /* false */
+		tmp_ext.cookie_plus = tmp_opt.cookie_plus;
+	} else if (!tp->rx_opt.cookie_in_always) {
+		/* redundant indications, but ensure initialization. */
+		tmp_ext.cookie_out_never = 1; /* true */
+		tmp_ext.cookie_plus = 0;
+	} else {
+		goto drop_and_free;
+	}
+	tmp_ext.cookie_in_always = tp->rx_opt.cookie_in_always;
 
 	if (want_cookie && !tmp_opt.saw_tstamp)
 		tcp_clear_options(&tmp_opt);
@@ -1244,7 +1290,9 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
 
 	security_inet_conn_request(sk, skb, req);
 
-	if (tcp_v6_send_synack(sk, req, NULL) || want_cookie)
+	if (tcp_v6_send_synack(sk, req,
+			       (struct request_values *)&tmp_ext) ||
+	    want_cookie)
 		goto drop_and_free;
 
 	inet6_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT);
-- 
cgit v1.2.3


From 5adef1809147a9c39119ffd5a13a1ca4fe23a411 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Thu, 3 Dec 2009 01:25:57 +0000
Subject: net 04/05: fib_rules: allow to delete local rule

commit d124356ce314fff22a047ea334379d5105b2d834
Author: Patrick McHardy <kaber@trash.net>
Date:   Thu Dec 3 12:16:35 2009 +0100

    net: fib_rules: allow to delete local rule

    Allow to delete the local rule and recreate it with a higher priority. This
    can be used to force packets with a local destination out on the wire instead
    of routing them to loopback. Additionally this patch allows to recreate rules
    with a priority of 0.

    Combined with the previous patch to allow oif classification, a socket can
    be bound to the desired interface and packets routed to the wire like this:

    # move local rule to lower priority
    ip rule add pref 1000 lookup local
    ip rule del pref 0

    # route packets of sockets bound to eth0 to the wire independant
    # of the destination address
    ip rule add pref 100 oif eth0 lookup 100
    ip route add default dev eth0 table 100

    Signed-off-by: Patrick McHardy <kaber@trash.net>

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/fib6_rules.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net/ipv6')

diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c
index 00a7a5e4ac9..3b38f49f2c2 100644
--- a/net/ipv6/fib6_rules.c
+++ b/net/ipv6/fib6_rules.c
@@ -276,7 +276,7 @@ static int fib6_rules_net_init(struct net *net)
 	INIT_LIST_HEAD(&net->ipv6.fib6_rules_ops->rules_list);
 
 	err = fib_default_rule_add(net->ipv6.fib6_rules_ops, 0,
-				   RT6_TABLE_LOCAL, FIB_RULE_PERMANENT);
+				   RT6_TABLE_LOCAL, 0);
 	if (err)
 		goto out_fib6_rules_ops;
 
-- 
cgit v1.2.3


From e9c5158ac26affd5d8ce006521bdfb7148090e18 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Thu, 3 Dec 2009 12:22:55 -0800
Subject: net: Allow fib_rule_unregister to batch

Refactor the code so fib_rules_register always takes a template instead
of the actual fib_rules_ops structure that will be used.  This is
required for network namespace support so 2 out of the 3 callers already
do this, it allows the error handling to be made common, and it allows
fib_rules_unregister to free the template for hte caller.

Modify fib_rules_unregister to use call_rcu instead of syncrhonize_rcu
to allw multiple namespaces to be cleaned up in the same rcu grace
period.

Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/fib6_rules.c | 22 +++++++---------------
 1 file changed, 7 insertions(+), 15 deletions(-)

(limited to 'net/ipv6')

diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c
index 3b38f49f2c2..b7aa7c64cc4 100644
--- a/net/ipv6/fib6_rules.c
+++ b/net/ipv6/fib6_rules.c
@@ -264,16 +264,14 @@ static struct fib_rules_ops fib6_rules_ops_template = {
 
 static int fib6_rules_net_init(struct net *net)
 {
+	struct fib_rules_ops *ops;
 	int err = -ENOMEM;
 
-	net->ipv6.fib6_rules_ops = kmemdup(&fib6_rules_ops_template,
-					   sizeof(*net->ipv6.fib6_rules_ops),
-					   GFP_KERNEL);
-	if (!net->ipv6.fib6_rules_ops)
-		goto out;
+	ops = fib_rules_register(&fib6_rules_ops_template, net);
+	if (IS_ERR(ops))
+		return PTR_ERR(ops);
+	net->ipv6.fib6_rules_ops = ops;
 
-	net->ipv6.fib6_rules_ops->fro_net = net;
-	INIT_LIST_HEAD(&net->ipv6.fib6_rules_ops->rules_list);
 
 	err = fib_default_rule_add(net->ipv6.fib6_rules_ops, 0,
 				   RT6_TABLE_LOCAL, 0);
@@ -283,25 +281,19 @@ static int fib6_rules_net_init(struct net *net)
 	err = fib_default_rule_add(net->ipv6.fib6_rules_ops,
 				   0x7FFE, RT6_TABLE_MAIN, 0);
 	if (err)
-		goto out_fib6_default_rule_add;
+		goto out_fib6_rules_ops;
 
-	err = fib_rules_register(net->ipv6.fib6_rules_ops);
-	if (err)
-		goto out_fib6_default_rule_add;
 out:
 	return err;
 
-out_fib6_default_rule_add:
-	fib_rules_cleanup_ops(net->ipv6.fib6_rules_ops);
 out_fib6_rules_ops:
-	kfree(net->ipv6.fib6_rules_ops);
+	fib_rules_unregister(ops);
 	goto out;
 }
 
 static void fib6_rules_net_exit(struct net *net)
 {
 	fib_rules_unregister(net->ipv6.fib6_rules_ops);
-	kfree(net->ipv6.fib6_rules_ops);
 }
 
 static struct pernet_operations fib6_rules_net_ops = {
-- 
cgit v1.2.3


From b099ce2602d806deb41caaa578731848995cdb2a Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Thu, 3 Dec 2009 02:29:09 +0000
Subject: net: Batch inet_twsk_purge

This function walks the whole hashtable so there is no point in
passing it a network namespace.  Instead I purge all timewait
sockets from dead network namespaces that I find.  If the namespace
is one of the once I am trying to purge I am guaranteed no new timewait
sockets can be formed so this will get them all.  If the namespace
is one I am not acting for it might form a few more but I will
call inet_twsk_purge again and  shortly to get rid of them.  In
any even if the network namespace is dead timewait sockets are
useless.

Move the calls of inet_twsk_purge into batch_exit routines so
that if I am killing a bunch of namespaces at once I will just
call inet_twsk_purge once and save a lot of redundant unnecessary
work.

My simple 4k network namespace exit test the cleanup time dropped from
roughly 8.2s to 1.6s.  While the time spent running inet_twsk_purge fell
to about 2ms.  1ms for ipv4 and 1ms for ipv6.

Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/tcp_ipv6.c | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

(limited to 'net/ipv6')

diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index fc0a4e5895e..aadd7cef73b 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -2184,12 +2184,17 @@ static int tcpv6_net_init(struct net *net)
 static void tcpv6_net_exit(struct net *net)
 {
 	inet_ctl_sock_destroy(net->ipv6.tcp_sk);
-	inet_twsk_purge(net, &tcp_hashinfo, &tcp_death_row, AF_INET6);
+}
+
+static void tcpv6_net_exit_batch(struct list_head *net_exit_list)
+{
+	inet_twsk_purge(&tcp_hashinfo, &tcp_death_row, AF_INET6);
 }
 
 static struct pernet_operations tcpv6_net_ops = {
-	.init = tcpv6_net_init,
-	.exit = tcpv6_net_exit,
+	.init	    = tcpv6_net_init,
+	.exit	    = tcpv6_net_exit,
+	.exit_batch = tcpv6_net_exit_batch,
 };
 
 int __init tcpv6_init(void)
-- 
cgit v1.2.3


From 13475a30b66cdb9250a34052c19ac98847373030 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Wed, 2 Dec 2009 22:31:19 +0000
Subject: tcp: connect() race with timewait reuse

Its currently possible that several threads issuing a connect() find
the same timewait socket and try to reuse it, leading to list
corruptions.

Condition for bug is that these threads bound their socket on same
address/port of to-be-find timewait socket, and connected to same
target. (SO_REUSEADDR needed)

To fix this problem, we could unhash timewait socket while holding
ehash lock, to make sure lookups/changes will be serialized. Only
first thread finds the timewait socket, other ones find the
established socket and return an EADDRNOTAVAIL error.

This second version takes into account Evgeniy's review and makes sure
inet_twsk_put() is called outside of locked sections.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/inet6_hashtables.c | 15 ++++++++++-----
 1 file changed, 10 insertions(+), 5 deletions(-)

(limited to 'net/ipv6')

diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c
index 00c6a3e6cdd..c813e294ec0 100644
--- a/net/ipv6/inet6_hashtables.c
+++ b/net/ipv6/inet6_hashtables.c
@@ -223,6 +223,7 @@ static int __inet6_check_established(struct inet_timewait_death_row *death_row,
 	struct sock *sk2;
 	const struct hlist_nulls_node *node;
 	struct inet_timewait_sock *tw;
+	int twrefcnt = 0;
 
 	spin_lock(lock);
 
@@ -250,19 +251,23 @@ unique:
 	 * in hash table socket with a funny identity. */
 	inet->inet_num = lport;
 	inet->inet_sport = htons(lport);
+	sk->sk_hash = hash;
 	WARN_ON(!sk_unhashed(sk));
 	__sk_nulls_add_node_rcu(sk, &head->chain);
-	sk->sk_hash = hash;
+	if (tw) {
+		twrefcnt = inet_twsk_unhash(tw);
+		NET_INC_STATS_BH(net, LINUX_MIB_TIMEWAITRECYCLED);
+	}
 	spin_unlock(lock);
+	if (twrefcnt)
+		inet_twsk_put(tw);
 	sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
 
-	if (twp != NULL) {
+	if (twp) {
 		*twp = tw;
-		NET_INC_STATS_BH(net, LINUX_MIB_TIMEWAITRECYCLED);
-	} else if (tw != NULL) {
+	} else if (tw) {
 		/* Silly. Should hash-dance instead... */
 		inet_twsk_deschedule(tw, death_row);
-		NET_INC_STATS_BH(net, LINUX_MIB_TIMEWAITRECYCLED);
 
 		inet_twsk_put(tw);
 	}
-- 
cgit v1.2.3