aboutsummaryrefslogtreecommitdiff
path: root/net/ipv6
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv6')
-rw-r--r--net/ipv6/Kconfig19
-rw-r--r--net/ipv6/addrconf.c29
-rw-r--r--net/ipv6/af_inet6.c53
-rw-r--r--net/ipv6/ah6.c352
-rw-r--r--net/ipv6/anycast.c6
-rw-r--r--net/ipv6/datagram.c48
-rw-r--r--net/ipv6/inet6_connection_sock.c10
-rw-r--r--net/ipv6/inet6_hashtables.c14
-rw-r--r--net/ipv6/ip6_tunnel.c66
-rw-r--r--net/ipv6/ip6mr.c17
-rw-r--r--net/ipv6/ipv6_sockglue.c9
-rw-r--r--net/ipv6/ndisc.c1
-rw-r--r--net/ipv6/raw.c60
-rw-r--r--net/ipv6/reassembly.c13
-rw-r--r--net/ipv6/route.c3
-rw-r--r--net/ipv6/sit.c269
-rw-r--r--net/ipv6/syncookies.c31
-rw-r--r--net/ipv6/tcp_ipv6.c44
-rw-r--r--net/ipv6/udp.c263
-rw-r--r--net/ipv6/udplite.c1
-rw-r--r--net/ipv6/xfrm6_tunnel.c47
21 files changed, 957 insertions, 398 deletions
diff --git a/net/ipv6/Kconfig b/net/ipv6/Kconfig
index ead6c7a42f4..a578096152a 100644
--- a/net/ipv6/Kconfig
+++ b/net/ipv6/Kconfig
@@ -170,6 +170,25 @@ config IPV6_SIT
Saying M here will produce a module called sit. If unsure, say Y.
+config IPV6_SIT_6RD
+ bool "IPv6: IPv6 Rapid Deployment (6RD) (EXPERIMENTAL)"
+ depends on IPV6_SIT && EXPERIMENTAL
+ default n
+ ---help---
+ IPv6 Rapid Deployment (6rd; draft-ietf-softwire-ipv6-6rd) builds upon
+ mechanisms of 6to4 (RFC3056) to enable a service provider to rapidly
+ deploy IPv6 unicast service to IPv4 sites to which it provides
+ customer premise equipment. Like 6to4, it utilizes stateless IPv6 in
+ IPv4 encapsulation in order to transit IPv4-only network
+ infrastructure. Unlike 6to4, a 6rd service provider uses an IPv6
+ prefix of its own in place of the fixed 6to4 prefix.
+
+ With this option enabled, the SIT driver offers 6rd functionality by
+ providing additional ioctl API to configure the IPv6 Prefix for in
+ stead of static 2002::/16 for 6to4.
+
+ If unsure, say N.
+
config IPV6_NDISC_NODETYPE
bool
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 1fd0a3d775d..024bba30de2 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -481,9 +481,8 @@ static void addrconf_forward_change(struct net *net, __s32 newf)
struct net_device *dev;
struct inet6_dev *idev;
- read_lock(&dev_base_lock);
- for_each_netdev(net, dev) {
- rcu_read_lock();
+ rcu_read_lock();
+ for_each_netdev_rcu(net, dev) {
idev = __in6_dev_get(dev);
if (idev) {
int changed = (!idev->cnf.forwarding) ^ (!newf);
@@ -491,9 +490,8 @@ static void addrconf_forward_change(struct net *net, __s32 newf)
if (changed)
dev_forward_change(idev);
}
- rcu_read_unlock();
}
- read_unlock(&dev_base_lock);
+ rcu_read_unlock();
}
static int addrconf_fixup_forwarding(struct ctl_table *table, int *p, int old)
@@ -1137,10 +1135,9 @@ int ipv6_dev_get_saddr(struct net *net, struct net_device *dst_dev,
hiscore->rule = -1;
hiscore->ifa = NULL;
- read_lock(&dev_base_lock);
rcu_read_lock();
- for_each_netdev(net, dev) {
+ for_each_netdev_rcu(net, dev) {
struct inet6_dev *idev;
/* Candidate Source Address (section 4)
@@ -1235,7 +1232,6 @@ try_nextdev:
read_unlock_bh(&idev->lock);
}
rcu_read_unlock();
- read_unlock(&dev_base_lock);
if (!hiscore->ifa)
return -EADDRNOTAVAIL;
@@ -3708,6 +3704,7 @@ static inline void ipv6_store_devconf(struct ipv6_devconf *cnf,
#endif
array[DEVCONF_DISABLE_IPV6] = cnf->disable_ipv6;
array[DEVCONF_ACCEPT_DAD] = cnf->accept_dad;
+ array[DEVCONF_FORCE_TLLAO] = cnf->force_tllao;
}
static inline size_t inet6_if_nlmsg_size(void)
@@ -4051,9 +4048,8 @@ static void addrconf_disable_change(struct net *net, __s32 newf)
struct net_device *dev;
struct inet6_dev *idev;
- read_lock(&dev_base_lock);
- for_each_netdev(net, dev) {
- rcu_read_lock();
+ rcu_read_lock();
+ for_each_netdev_rcu(net, dev) {
idev = __in6_dev_get(dev);
if (idev) {
int changed = (!idev->cnf.disable_ipv6) ^ (!newf);
@@ -4061,9 +4057,8 @@ static void addrconf_disable_change(struct net *net, __s32 newf)
if (changed)
dev_disable_change(idev);
}
- rcu_read_unlock();
}
- read_unlock(&dev_base_lock);
+ rcu_read_unlock();
}
static int addrconf_disable_ipv6(struct ctl_table *table, int *p, int old)
@@ -4353,6 +4348,14 @@ static struct addrconf_sysctl_table
.proc_handler = proc_dointvec,
},
{
+ .ctl_name = CTL_UNNUMBERED,
+ .procname = "force_tllao",
+ .data = &ipv6_devconf.force_tllao,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec
+ },
+ {
.ctl_name = 0, /* sentinel */
}
},
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index e127a32f954..12e69d364dd 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -95,7 +95,8 @@ static __inline__ struct ipv6_pinfo *inet6_sk_generic(struct sock *sk)
return (struct ipv6_pinfo *)(((u8 *)sk) + offset);
}
-static int inet6_create(struct net *net, struct socket *sock, int protocol)
+static int inet6_create(struct net *net, struct socket *sock, int protocol,
+ int kern)
{
struct inet_sock *inet;
struct ipv6_pinfo *np;
@@ -158,7 +159,7 @@ lookup_protocol:
}
err = -EPERM;
- if (answer->capability > 0 && !capable(answer->capability))
+ if (sock->type == SOCK_RAW && !kern && !capable(CAP_NET_RAW))
goto out_rcu_unlock;
sock->ops = answer->ops;
@@ -185,7 +186,7 @@ lookup_protocol:
inet->is_icsk = (INET_PROTOSW_ICSK & answer_flags) != 0;
if (SOCK_RAW == sock->type) {
- inet->num = protocol;
+ inet->inet_num = protocol;
if (IPPROTO_RAW == protocol)
inet->hdrincl = 1;
}
@@ -228,12 +229,12 @@ lookup_protocol:
*/
sk_refcnt_debug_inc(sk);
- if (inet->num) {
+ if (inet->inet_num) {
/* It assumes that any protocol which allows
* the user to assign a number at socket
* creation time automatically shares.
*/
- inet->sport = htons(inet->num);
+ inet->inet_sport = htons(inet->inet_num);
sk->sk_prot->hash(sk);
}
if (sk->sk_prot->init) {
@@ -281,7 +282,7 @@ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
lock_sock(sk);
/* Check these errors (active socket, double bind). */
- if (sk->sk_state != TCP_CLOSE || inet->num) {
+ if (sk->sk_state != TCP_CLOSE || inet->inet_num) {
err = -EINVAL;
goto out;
}
@@ -314,6 +315,7 @@ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
if (addr_type != IPV6_ADDR_ANY) {
struct net_device *dev = NULL;
+ rcu_read_lock();
if (addr_type & IPV6_ADDR_LINKLOCAL) {
if (addr_len >= sizeof(struct sockaddr_in6) &&
addr->sin6_scope_id) {
@@ -326,12 +328,12 @@ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
/* Binding to link-local address requires an interface */
if (!sk->sk_bound_dev_if) {
err = -EINVAL;
- goto out;
+ goto out_unlock;
}
- dev = dev_get_by_index(net, sk->sk_bound_dev_if);
+ dev = dev_get_by_index_rcu(net, sk->sk_bound_dev_if);
if (!dev) {
err = -ENODEV;
- goto out;
+ goto out_unlock;
}
}
@@ -342,19 +344,16 @@ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
if (!(addr_type & IPV6_ADDR_MULTICAST)) {
if (!ipv6_chk_addr(net, &addr->sin6_addr,
dev, 0)) {
- if (dev)
- dev_put(dev);
err = -EADDRNOTAVAIL;
- goto out;
+ goto out_unlock;
}
}
- if (dev)
- dev_put(dev);
+ rcu_read_unlock();
}
}
- inet->rcv_saddr = v4addr;
- inet->saddr = v4addr;
+ inet->inet_rcv_saddr = v4addr;
+ inet->inet_saddr = v4addr;
ipv6_addr_copy(&np->rcv_saddr, &addr->sin6_addr);
@@ -375,12 +374,15 @@ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
}
if (snum)
sk->sk_userlocks |= SOCK_BINDPORT_LOCK;
- inet->sport = htons(inet->num);
- inet->dport = 0;
- inet->daddr = 0;
+ inet->inet_sport = htons(inet->inet_num);
+ inet->inet_dport = 0;
+ inet->inet_daddr = 0;
out:
release_sock(sk);
return err;
+out_unlock:
+ rcu_read_unlock();
+ goto out;
}
EXPORT_SYMBOL(inet6_bind);
@@ -441,12 +443,12 @@ int inet6_getname(struct socket *sock, struct sockaddr *uaddr,
sin->sin6_flowinfo = 0;
sin->sin6_scope_id = 0;
if (peer) {
- if (!inet->dport)
+ if (!inet->inet_dport)
return -ENOTCONN;
if (((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_SYN_SENT)) &&
peer == 1)
return -ENOTCONN;
- sin->sin6_port = inet->dport;
+ sin->sin6_port = inet->inet_dport;
ipv6_addr_copy(&sin->sin6_addr, &np->daddr);
if (np->sndflow)
sin->sin6_flowinfo = np->flow_label;
@@ -456,7 +458,7 @@ int inet6_getname(struct socket *sock, struct sockaddr *uaddr,
else
ipv6_addr_copy(&sin->sin6_addr, &np->rcv_saddr);
- sin->sin6_port = inet->sport;
+ sin->sin6_port = inet->inet_sport;
}
if (ipv6_addr_type(&sin->sin6_addr) & IPV6_ADDR_LINKLOCAL)
sin->sin6_scope_id = sk->sk_bound_dev_if;
@@ -552,7 +554,7 @@ const struct proto_ops inet6_dgram_ops = {
#endif
};
-static struct net_proto_family inet6_family_ops = {
+static const struct net_proto_family inet6_family_ops = {
.family = PF_INET6,
.create = inet6_create,
.owner = THIS_MODULE,
@@ -654,8 +656,9 @@ int inet6_sk_rebuild_header(struct sock *sk)
ipv6_addr_copy(&fl.fl6_src, &np->saddr);
fl.fl6_flowlabel = np->flow_label;
fl.oif = sk->sk_bound_dev_if;
- fl.fl_ip_dport = inet->dport;
- fl.fl_ip_sport = inet->sport;
+ fl.mark = sk->sk_mark;
+ fl.fl_ip_dport = inet->inet_dport;
+ fl.fl_ip_sport = inet->inet_sport;
security_sk_classify_flow(sk, &fl);
if (np->opt && np->opt->srcrt) {
diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c
index c1589e2f1dc..0f526f8ea51 100644
--- a/net/ipv6/ah6.c
+++ b/net/ipv6/ah6.c
@@ -24,18 +24,92 @@
* This file is derived from net/ipv4/ah.c.
*/
+#include <crypto/hash.h>
#include <linux/module.h>
#include <net/ip.h>
#include <net/ah.h>
#include <linux/crypto.h>
#include <linux/pfkeyv2.h>
-#include <linux/spinlock.h>
#include <linux/string.h>
+#include <linux/scatterlist.h>
#include <net/icmp.h>
#include <net/ipv6.h>
#include <net/protocol.h>
#include <net/xfrm.h>
+#define IPV6HDR_BASELEN 8
+
+struct tmp_ext {
+#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
+ struct in6_addr saddr;
+#endif
+ struct in6_addr daddr;
+ char hdrs[0];
+};
+
+struct ah_skb_cb {
+ struct xfrm_skb_cb xfrm;
+ void *tmp;
+};
+
+#define AH_SKB_CB(__skb) ((struct ah_skb_cb *)&((__skb)->cb[0]))
+
+static void *ah_alloc_tmp(struct crypto_ahash *ahash, int nfrags,
+ unsigned int size)
+{
+ unsigned int len;
+
+ len = size + crypto_ahash_digestsize(ahash) +
+ (crypto_ahash_alignmask(ahash) &
+ ~(crypto_tfm_ctx_alignment() - 1));
+
+ len = ALIGN(len, crypto_tfm_ctx_alignment());
+
+ len += sizeof(struct ahash_request) + crypto_ahash_reqsize(ahash);
+ len = ALIGN(len, __alignof__(struct scatterlist));
+
+ len += sizeof(struct scatterlist) * nfrags;
+
+ return kmalloc(len, GFP_ATOMIC);
+}
+
+static inline struct tmp_ext *ah_tmp_ext(void *base)
+{
+ return base + IPV6HDR_BASELEN;
+}
+
+static inline u8 *ah_tmp_auth(u8 *tmp, unsigned int offset)
+{
+ return tmp + offset;
+}
+
+static inline u8 *ah_tmp_icv(struct crypto_ahash *ahash, void *tmp,
+ unsigned int offset)
+{
+ return PTR_ALIGN((u8 *)tmp + offset, crypto_ahash_alignmask(ahash) + 1);
+}
+
+static inline struct ahash_request *ah_tmp_req(struct crypto_ahash *ahash,
+ u8 *icv)
+{
+ struct ahash_request *req;
+
+ req = (void *)PTR_ALIGN(icv + crypto_ahash_digestsize(ahash),
+ crypto_tfm_ctx_alignment());
+
+ ahash_request_set_tfm(req, ahash);
+
+ return req;
+}
+
+static inline struct scatterlist *ah_req_sg(struct crypto_ahash *ahash,
+ struct ahash_request *req)
+{
+ return (void *)ALIGN((unsigned long)(req + 1) +
+ crypto_ahash_reqsize(ahash),
+ __alignof__(struct scatterlist));
+}
+
static int zero_out_mutable_opts(struct ipv6_opt_hdr *opthdr)
{
u8 *opt = (u8 *)opthdr;
@@ -218,24 +292,85 @@ static int ipv6_clear_mutable_options(struct ipv6hdr *iph, int len, int dir)
return 0;
}
+static void ah6_output_done(struct crypto_async_request *base, int err)
+{
+ int extlen;
+ u8 *iph_base;
+ u8 *icv;
+ struct sk_buff *skb = base->data;
+ struct xfrm_state *x = skb_dst(skb)->xfrm;
+ struct ah_data *ahp = x->data;
+ struct ipv6hdr *top_iph = ipv6_hdr(skb);
+ struct ip_auth_hdr *ah = ip_auth_hdr(skb);
+ struct tmp_ext *iph_ext;
+
+ extlen = skb_network_header_len(skb) - sizeof(struct ipv6hdr);
+ if (extlen)
+ extlen += sizeof(*iph_ext);
+
+ iph_base = AH_SKB_CB(skb)->tmp;
+ iph_ext = ah_tmp_ext(iph_base);
+ icv = ah_tmp_icv(ahp->ahash, iph_ext, extlen);
+
+ memcpy(ah->auth_data, icv, ahp->icv_trunc_len);
+ memcpy(top_iph, iph_base, IPV6HDR_BASELEN);
+
+ if (extlen) {
+#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
+ memcpy(&top_iph->saddr, iph_ext, extlen);
+#else
+ memcpy(&top_iph->daddr, iph_ext, extlen);
+#endif
+ }
+
+ err = ah->nexthdr;
+
+ kfree(AH_SKB_CB(skb)->tmp);
+ xfrm_output_resume(skb, err);
+}
+
static int ah6_output(struct xfrm_state *x, struct sk_buff *skb)
{
int err;
+ int nfrags;
int extlen;
+ u8 *iph_base;
+ u8 *icv;
+ u8 nexthdr;
+ struct sk_buff *trailer;
+ struct crypto_ahash *ahash;
+ struct ahash_request *req;
+ struct scatterlist *sg;
struct ipv6hdr *top_iph;
struct ip_auth_hdr *ah;
struct ah_data *ahp;
- u8 nexthdr;
- char tmp_base[8];
- struct {
-#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
- struct in6_addr saddr;
-#endif
- struct in6_addr daddr;
- char hdrs[0];
- } *tmp_ext;
+ struct tmp_ext *iph_ext;
+
+ ahp = x->data;
+ ahash = ahp->ahash;
+
+ if ((err = skb_cow_data(skb, 0, &trailer)) < 0)
+ goto out;
+ nfrags = err;
skb_push(skb, -skb_network_offset(skb));
+ extlen = skb_network_header_len(skb) - sizeof(struct ipv6hdr);
+ if (extlen)
+ extlen += sizeof(*iph_ext);
+
+ err = -ENOMEM;
+ iph_base = ah_alloc_tmp(ahash, nfrags, IPV6HDR_BASELEN + extlen);
+ if (!iph_base)
+ goto out;
+
+ iph_ext = ah_tmp_ext(iph_base);
+ icv = ah_tmp_icv(ahash, iph_ext, extlen);
+ req = ah_tmp_req(ahash, icv);
+ sg = ah_req_sg(ahash, req);
+
+ ah = ip_auth_hdr(skb);
+ memset(ah->auth_data, 0, ahp->icv_trunc_len);
+
top_iph = ipv6_hdr(skb);
top_iph->payload_len = htons(skb->len - sizeof(*top_iph));
@@ -245,31 +380,22 @@ static int ah6_output(struct xfrm_state *x, struct sk_buff *skb)
/* When there are no extension headers, we only need to save the first
* 8 bytes of the base IP header.
*/
- memcpy(tmp_base, top_iph, sizeof(tmp_base));
+ memcpy(iph_base, top_iph, IPV6HDR_BASELEN);
- tmp_ext = NULL;
- extlen = skb_transport_offset(skb) - sizeof(struct ipv6hdr);
if (extlen) {
- extlen += sizeof(*tmp_ext);
- tmp_ext = kmalloc(extlen, GFP_ATOMIC);
- if (!tmp_ext) {
- err = -ENOMEM;
- goto error;
- }
#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
- memcpy(tmp_ext, &top_iph->saddr, extlen);
+ memcpy(iph_ext, &top_iph->saddr, extlen);
#else
- memcpy(tmp_ext, &top_iph->daddr, extlen);
+ memcpy(iph_ext, &top_iph->daddr, extlen);
#endif
err = ipv6_clear_mutable_options(top_iph,
- extlen - sizeof(*tmp_ext) +
+ extlen - sizeof(*iph_ext) +
sizeof(*top_iph),
XFRM_POLICY_OUT);
if (err)
- goto error_free_iph;
+ goto out_free;
}
- ah = ip_auth_hdr(skb);
ah->nexthdr = nexthdr;
top_iph->priority = 0;
@@ -278,36 +404,80 @@ static int ah6_output(struct xfrm_state *x, struct sk_buff *skb)
top_iph->flow_lbl[2] = 0;
top_iph->hop_limit = 0;
- ahp = x->data;
ah->hdrlen = (XFRM_ALIGN8(sizeof(*ah) + ahp->icv_trunc_len) >> 2) - 2;
ah->reserved = 0;
ah->spi = x->id.spi;
ah->seq_no = htonl(XFRM_SKB_CB(skb)->seq.output);
- spin_lock_bh(&x->lock);
- err = ah_mac_digest(ahp, skb, ah->auth_data);
- memcpy(ah->auth_data, ahp->work_icv, ahp->icv_trunc_len);
- spin_unlock_bh(&x->lock);
+ sg_init_table(sg, nfrags);
+ skb_to_sgvec(skb, sg, 0, skb->len);
- if (err)
- goto error_free_iph;
+ ahash_request_set_crypt(req, sg, icv, skb->len);
+ ahash_request_set_callback(req, 0, ah6_output_done, skb);
+
+ AH_SKB_CB(skb)->tmp = iph_base;
- memcpy(top_iph, tmp_base, sizeof(tmp_base));
- if (tmp_ext) {
+ err = crypto_ahash_digest(req);
+ if (err) {
+ if (err == -EINPROGRESS)
+ goto out;
+
+ if (err == -EBUSY)
+ err = NET_XMIT_DROP;
+ goto out_free;
+ }
+
+ memcpy(ah->auth_data, icv, ahp->icv_trunc_len);
+ memcpy(top_iph, iph_base, IPV6HDR_BASELEN);
+
+ if (extlen) {
#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
- memcpy(&top_iph->saddr, tmp_ext, extlen);
+ memcpy(&top_iph->saddr, iph_ext, extlen);
#else
- memcpy(&top_iph->daddr, tmp_ext, extlen);
+ memcpy(&top_iph->daddr, iph_ext, extlen);
#endif
-error_free_iph:
- kfree(tmp_ext);
}
-error:
+out_free:
+ kfree(iph_base);
+out:
return err;
}
+static void ah6_input_done(struct crypto_async_request *base, int err)
+{
+ u8 *auth_data;
+ u8 *icv;
+ u8 *work_iph;
+ struct sk_buff *skb = base->data;
+ struct xfrm_state *x = xfrm_input_state(skb);
+ struct ah_data *ahp = x->data;
+ struct ip_auth_hdr *ah = ip_auth_hdr(skb);
+ int hdr_len = skb_network_header_len(skb);
+ int ah_hlen = (ah->hdrlen + 2) << 2;
+
+ work_iph = AH_SKB_CB(skb)->tmp;
+ auth_data = ah_tmp_auth(work_iph, hdr_len);
+ icv = ah_tmp_icv(ahp->ahash, auth_data, ahp->icv_trunc_len);
+
+ err = memcmp(icv, auth_data, ahp->icv_trunc_len) ? -EBADMSG: 0;
+ if (err)
+ goto out;
+
+ skb->network_header += ah_hlen;
+ memcpy(skb_network_header(skb), work_iph, hdr_len);
+ __skb_pull(skb, ah_hlen + hdr_len);
+ skb_set_transport_header(skb, -hdr_len);
+
+ err = ah->nexthdr;
+out:
+ kfree(AH_SKB_CB(skb)->tmp);
+ xfrm_input_resume(skb, err);
+}
+
+
+
static int ah6_input(struct xfrm_state *x, struct sk_buff *skb)
{
/*
@@ -325,14 +495,21 @@ static int ah6_input(struct xfrm_state *x, struct sk_buff *skb)
* There is offset of AH before IPv6 header after the process.
*/
+ u8 *auth_data;
+ u8 *icv;
+ u8 *work_iph;
+ struct sk_buff *trailer;
+ struct crypto_ahash *ahash;
+ struct ahash_request *req;
+ struct scatterlist *sg;
struct ip_auth_hdr *ah;
struct ipv6hdr *ip6h;
struct ah_data *ahp;
- unsigned char *tmp_hdr = NULL;
u16 hdr_len;
u16 ah_hlen;
int nexthdr;
- int err = -EINVAL;
+ int nfrags;
+ int err = -ENOMEM;
if (!pskb_may_pull(skb, sizeof(struct ip_auth_hdr)))
goto out;
@@ -345,9 +522,11 @@ static int ah6_input(struct xfrm_state *x, struct sk_buff *skb)
skb->ip_summed = CHECKSUM_NONE;
- hdr_len = skb->data - skb_network_header(skb);
+ hdr_len = skb_network_header_len(skb);
ah = (struct ip_auth_hdr *)skb->data;
ahp = x->data;
+ ahash = ahp->ahash;
+
nexthdr = ah->nexthdr;
ah_hlen = (ah->hdrlen + 2) << 2;
@@ -358,48 +537,67 @@ static int ah6_input(struct xfrm_state *x, struct sk_buff *skb)
if (!pskb_may_pull(skb, ah_hlen))
goto out;
- tmp_hdr = kmemdup(skb_network_header(skb), hdr_len, GFP_ATOMIC);
- if (!tmp_hdr)
- goto out;
ip6h = ipv6_hdr(skb);
+
+ skb_push(skb, hdr_len);
+
+ if ((err = skb_cow_data(skb, 0, &trailer)) < 0)
+ goto out;
+ nfrags = err;
+
+ work_iph = ah_alloc_tmp(ahash, nfrags, hdr_len + ahp->icv_trunc_len);
+ if (!work_iph)
+ goto out;
+
+ auth_data = ah_tmp_auth(work_iph, hdr_len);
+ icv = ah_tmp_icv(ahash, auth_data, ahp->icv_trunc_len);
+ req = ah_tmp_req(ahash, icv);
+ sg = ah_req_sg(ahash, req);
+
+ memcpy(work_iph, ip6h, hdr_len);
+ memcpy(auth_data, ah->auth_data, ahp->icv_trunc_len);
+ memset(ah->auth_data, 0, ahp->icv_trunc_len);
+
if (ipv6_clear_mutable_options(ip6h, hdr_len, XFRM_POLICY_IN))
- goto free_out;
+ goto out_free;
+
ip6h->priority = 0;
ip6h->flow_lbl[0] = 0;
ip6h->flow_lbl[1] = 0;
ip6h->flow_lbl[2] = 0;
ip6h->hop_limit = 0;
- spin_lock(&x->lock);
- {
- u8 auth_data[MAX_AH_AUTH_LEN];
+ sg_init_table(sg, nfrags);
+ skb_to_sgvec(skb, sg, 0, skb->len);
- memcpy(auth_data, ah->auth_data, ahp->icv_trunc_len);
- memset(ah->auth_data, 0, ahp->icv_trunc_len);
- skb_push(skb, hdr_len);
- err = ah_mac_digest(ahp, skb, ah->auth_data);
- if (err)
- goto unlock;
- if (memcmp(ahp->work_icv, auth_data, ahp->icv_trunc_len))
- err = -EBADMSG;
+ ahash_request_set_crypt(req, sg, icv, skb->len);
+ ahash_request_set_callback(req, 0, ah6_input_done, skb);
+
+ AH_SKB_CB(skb)->tmp = work_iph;
+
+ err = crypto_ahash_digest(req);
+ if (err) {
+ if (err == -EINPROGRESS)
+ goto out;
+
+ if (err == -EBUSY)
+ err = NET_XMIT_DROP;
+ goto out_free;
}
-unlock:
- spin_unlock(&x->lock);
+ err = memcmp(icv, auth_data, ahp->icv_trunc_len) ? -EBADMSG: 0;
if (err)
- goto free_out;
+ goto out_free;
skb->network_header += ah_hlen;
- memcpy(skb_network_header(skb), tmp_hdr, hdr_len);
+ memcpy(skb_network_header(skb), work_iph, hdr_len);
skb->transport_header = skb->network_header;
__skb_pull(skb, ah_hlen + hdr_len);
- kfree(tmp_hdr);
+ err = nexthdr;
- return nexthdr;
-
-free_out:
- kfree(tmp_hdr);
+out_free:
+ kfree(work_iph);
out:
return err;
}
@@ -430,7 +628,7 @@ static int ah6_init_state(struct xfrm_state *x)
{
struct ah_data *ahp = NULL;
struct xfrm_algo_desc *aalg_desc;
- struct crypto_hash *tfm;
+ struct crypto_ahash *ahash;
if (!x->aalg)
goto error;
@@ -442,12 +640,12 @@ static int ah6_init_state(struct xfrm_state *x)
if (ahp == NULL)
return -ENOMEM;
- tfm = crypto_alloc_hash(x->aalg->alg_name, 0, CRYPTO_ALG_ASYNC);
- if (IS_ERR(tfm))
+ ahash = crypto_alloc_ahash(x->aalg->alg_name, 0, 0);
+ if (IS_ERR(ahash))
goto error;
- ahp->tfm = tfm;
- if (crypto_hash_setkey(tfm, x->aalg->alg_key,
+ ahp->ahash = ahash;
+ if (crypto_ahash_setkey(ahash, x->aalg->alg_key,
(x->aalg->alg_key_len + 7) / 8))
goto error;
@@ -461,9 +659,9 @@ static int ah6_init_state(struct xfrm_state *x)
BUG_ON(!aalg_desc);
if (aalg_desc->uinfo.auth.icv_fullbits/8 !=
- crypto_hash_digestsize(tfm)) {
+ crypto_ahash_digestsize(ahash)) {
printk(KERN_INFO "AH: %s digestsize %u != %hu\n",
- x->aalg->alg_name, crypto_hash_digestsize(tfm),
+ x->aalg->alg_name, crypto_ahash_digestsize(ahash),
aalg_desc->uinfo.auth.icv_fullbits/8);
goto error;
}
@@ -473,10 +671,6 @@ static int ah6_init_state(struct xfrm_state *x)
BUG_ON(ahp->icv_trunc_len > MAX_AH_AUTH_LEN);
- ahp->work_icv = kmalloc(ahp->icv_full_len, GFP_KERNEL);
- if (!ahp->work_icv)
- goto error;
-
x->props.header_len = XFRM_ALIGN8(sizeof(struct ip_auth_hdr) +
ahp->icv_trunc_len);
switch (x->props.mode) {
@@ -495,8 +689,7 @@ static int ah6_init_state(struct xfrm_state *x)
error:
if (ahp) {
- kfree(ahp->work_icv);
- crypto_free_hash(ahp->tfm);
+ crypto_free_ahash(ahp->ahash);
kfree(ahp);
}
return -EINVAL;
@@ -509,8 +702,7 @@ static void ah6_destroy(struct xfrm_state *x)
if (!ahp)
return;
- kfree(ahp->work_icv);
- crypto_free_hash(ahp->tfm);
+ crypto_free_ahash(ahp->ahash);
kfree(ahp);
}
diff --git a/net/ipv6/anycast.c b/net/ipv6/anycast.c
index 1ae58bec1de..2f00ca83f04 100644
--- a/net/ipv6/anycast.c
+++ b/net/ipv6/anycast.c
@@ -404,13 +404,13 @@ int ipv6_chk_acast_addr(struct net *net, struct net_device *dev,
if (dev)
return ipv6_chk_acast_dev(dev, addr);
- read_lock(&dev_base_lock);
- for_each_netdev(net, dev)
+ rcu_read_lock();
+ for_each_netdev_rcu(net, dev)
if (ipv6_chk_acast_dev(dev, addr)) {
found = 1;
break;
}
- read_unlock(&dev_base_lock);
+ rcu_read_unlock();
return found;
}
diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c
index e2bdc6d83a4..e6f9cdf780f 100644
--- a/net/ipv6/datagram.c
+++ b/net/ipv6/datagram.c
@@ -98,17 +98,15 @@ ipv4_connected:
if (err)
goto out;
- ipv6_addr_set(&np->daddr, 0, 0, htonl(0x0000ffff), inet->daddr);
+ ipv6_addr_set_v4mapped(inet->inet_daddr, &np->daddr);
- if (ipv6_addr_any(&np->saddr)) {
- ipv6_addr_set(&np->saddr, 0, 0, htonl(0x0000ffff),
- inet->saddr);
- }
+ if (ipv6_addr_any(&np->saddr))
+ ipv6_addr_set_v4mapped(inet->inet_saddr, &np->saddr);
+
+ if (ipv6_addr_any(&np->rcv_saddr))
+ ipv6_addr_set_v4mapped(inet->inet_rcv_saddr,
+ &np->rcv_saddr);
- if (ipv6_addr_any(&np->rcv_saddr)) {
- ipv6_addr_set(&np->rcv_saddr, 0, 0, htonl(0x0000ffff),
- inet->rcv_saddr);
- }
goto out;
}
@@ -136,7 +134,7 @@ ipv4_connected:
ipv6_addr_copy(&np->daddr, daddr);
np->flow_label = fl.fl6_flowlabel;
- inet->dport = usin->sin6_port;
+ inet->inet_dport = usin->sin6_port;
/*
* Check for a route to destination an obtain the
@@ -147,8 +145,9 @@ ipv4_connected:
ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
ipv6_addr_copy(&fl.fl6_src, &np->saddr);
fl.oif = sk->sk_bound_dev_if;
- fl.fl_ip_dport = inet->dport;
- fl.fl_ip_sport = inet->sport;
+ fl.mark = sk->sk_mark;
+ fl.fl_ip_dport = inet->inet_dport;
+ fl.fl_ip_sport = inet->inet_sport;
if (!fl.oif && (addr_type&IPV6_ADDR_MULTICAST))
fl.oif = np->mcast_oif;
@@ -190,7 +189,7 @@ ipv4_connected:
if (ipv6_addr_any(&np->rcv_saddr)) {
ipv6_addr_copy(&np->rcv_saddr, &fl.fl6_src);
- inet->rcv_saddr = LOOPBACK4_IPV6;
+ inet->inet_rcv_saddr = LOOPBACK4_IPV6;
}
ip6_dst_store(sk, dst,
@@ -329,9 +328,8 @@ int ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len)
if (ipv6_addr_type(&sin->sin6_addr) & IPV6_ADDR_LINKLOCAL)
sin->sin6_scope_id = IP6CB(skb)->iif;
} else {
- ipv6_addr_set(&sin->sin6_addr, 0, 0,
- htonl(0xffff),
- *(__be32 *)(nh + serr->addr_offset));
+ ipv6_addr_set_v4mapped(*(__be32 *)(nh + serr->addr_offset),
+ &sin->sin6_addr);
}
}
@@ -351,8 +349,8 @@ int ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len)
} else {
struct inet_sock *inet = inet_sk(sk);
- ipv6_addr_set(&sin->sin6_addr, 0, 0,
- htonl(0xffff), ip_hdr(skb)->saddr);
+ ipv6_addr_set_v4mapped(ip_hdr(skb)->saddr,
+ &sin->sin6_addr);
if (inet->cmsg_flags)
ip_cmsg_recv(msg, skb);
}
@@ -539,12 +537,17 @@ int datagram_send_ctl(struct net *net,
addr_type = __ipv6_addr_type(&src_info->ipi6_addr);
+ rcu_read_lock();
if (fl->oif) {
- dev = dev_get_by_index(net, fl->oif);
- if (!dev)
+ dev = dev_get_by_index_rcu(net, fl->oif);
+ if (!dev) {
+ rcu_read_unlock();
return -ENODEV;
- } else if (addr_type & IPV6_ADDR_LINKLOCAL)
+ }
+ } else if (addr_type & IPV6_ADDR_LINKLOCAL) {
+ rcu_read_unlock();
return -EINVAL;
+ }
if (addr_type != IPV6_ADDR_ANY) {
int strict = __ipv6_addr_src_scope(addr_type) <= IPV6_ADDR_SCOPE_LINKLOCAL;
@@ -555,8 +558,7 @@ int datagram_send_ctl(struct net *net,
ipv6_addr_copy(&fl->fl6_src, &src_info->ipi6_addr);
}
- if (dev)
- dev_put(dev);
+ rcu_read_unlock();
if (err)
goto exit_f;
diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c
index cc4797dd832..3516e6fe2e5 100644
--- a/net/ipv6/inet6_connection_sock.c
+++ b/net/ipv6/inet6_connection_sock.c
@@ -132,7 +132,7 @@ void inet6_csk_addr2sockaddr(struct sock *sk, struct sockaddr * uaddr)
sin6->sin6_family = AF_INET6;
ipv6_addr_copy(&sin6->sin6_addr, &np->daddr);
- sin6->sin6_port = inet_sk(sk)->dport;
+ sin6->sin6_port = inet_sk(sk)->inet_dport;
/* We do not store received flowlabel for TCP */
sin6->sin6_flowinfo = 0;
sin6->sin6_scope_id = 0;
@@ -168,8 +168,7 @@ struct dst_entry *__inet6_csk_dst_check(struct sock *sk, u32 cookie)
if (dst) {
struct rt6_info *rt = (struct rt6_info *)dst;
if (rt->rt6i_flow_cache_genid != atomic_read(&flow_cache_genid)) {
- sk->sk_dst_cache = NULL;
- dst_release(dst);
+ __sk_dst_reset(sk);
dst = NULL;
}
}
@@ -194,8 +193,9 @@ int inet6_csk_xmit(struct sk_buff *skb, int ipfragok)
fl.fl6_flowlabel = np->flow_label;
IP6_ECN_flow_xmit(sk, fl.fl6_flowlabel);
fl.oif = sk->sk_bound_dev_if;
- fl.fl_ip_sport = inet->sport;
- fl.fl_ip_dport = inet->dport;
+ fl.mark = sk->sk_mark;
+ fl.fl_ip_sport = inet->inet_sport;
+ fl.fl_ip_dport = inet->inet_dport;
security_sk_classify_flow(sk, &fl);
if (np->opt && np->opt->srcrt) {
diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c
index 1bcc3431859..00c6a3e6cdd 100644
--- a/net/ipv6/inet6_hashtables.c
+++ b/net/ipv6/inet6_hashtables.c
@@ -73,7 +73,7 @@ struct sock *__inet6_lookup_established(struct net *net,
* have wildcards anyways.
*/
unsigned int hash = inet6_ehashfn(net, daddr, hnum, saddr, sport);
- unsigned int slot = hash & (hashinfo->ehash_size - 1);
+ unsigned int slot = hash & hashinfo->ehash_mask;
struct inet_ehash_bucket *head = &hashinfo->ehash[slot];
@@ -125,7 +125,7 @@ static int inline compute_score(struct sock *sk, struct net *net,
{
int score = -1;
- if (net_eq(sock_net(sk), net) && inet_sk(sk)->num == hnum &&
+ if (net_eq(sock_net(sk), net) && inet_sk(sk)->inet_num == hnum &&
sk->sk_family == PF_INET6) {
const struct ipv6_pinfo *np = inet6_sk(sk);
@@ -214,10 +214,10 @@ static int __inet6_check_established(struct inet_timewait_death_row *death_row,
const struct in6_addr *daddr = &np->rcv_saddr;
const struct in6_addr *saddr = &np->daddr;
const int dif = sk->sk_bound_dev_if;
- const __portpair ports = INET_COMBINED_PORTS(inet->dport, lport);
+ const __portpair ports = INET_COMBINED_PORTS(inet->inet_dport, lport);
struct net *net = sock_net(sk);
const unsigned int hash = inet6_ehashfn(net, daddr, lport, saddr,
- inet->dport);
+ inet->inet_dport);
struct inet_ehash_bucket *head = inet_ehash_bucket(hinfo, hash);
spinlock_t *lock = inet_ehash_lockp(hinfo, hash);
struct sock *sk2;
@@ -248,8 +248,8 @@ static int __inet6_check_established(struct inet_timewait_death_row *death_row,
unique:
/* Must record num and sport now. Otherwise we will see
* in hash table socket with a funny identity. */
- inet->num = lport;
- inet->sport = htons(lport);
+ inet->inet_num = lport;
+ inet->inet_sport = htons(lport);
WARN_ON(!sk_unhashed(sk));
__sk_nulls_add_node_rcu(sk, &head->chain);
sk->sk_hash = hash;
@@ -279,7 +279,7 @@ static inline u32 inet6_sk_port_offset(const struct sock *sk)
const struct ipv6_pinfo *np = inet6_sk(sk);
return secure_ipv6_port_ephemeral(np->rcv_saddr.s6_addr32,
np->daddr.s6_addr32,
- inet->dport);
+ inet->inet_dport);
}
int inet6_hash_connect(struct inet_timewait_death_row *death_row,
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index c595bbe1ed9..1d614113a4b 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -88,8 +88,10 @@ struct ip6_tnl_net {
struct ip6_tnl **tnls[2];
};
-/* lock for the tunnel lists */
-static DEFINE_RWLOCK(ip6_tnl_lock);
+/*
+ * Locking : hash tables are protected by RCU and a spinlock
+ */
+static DEFINE_SPINLOCK(ip6_tnl_lock);
static inline struct dst_entry *ip6_tnl_dst_check(struct ip6_tnl *t)
{
@@ -130,6 +132,9 @@ static inline void ip6_tnl_dst_store(struct ip6_tnl *t, struct dst_entry *dst)
* else %NULL
**/
+#define for_each_ip6_tunnel_rcu(start) \
+ for (t = rcu_dereference(start); t; t = rcu_dereference(t->next))
+
static struct ip6_tnl *
ip6_tnl_lookup(struct net *net, struct in6_addr *remote, struct in6_addr *local)
{
@@ -138,13 +143,14 @@ ip6_tnl_lookup(struct net *net, struct in6_addr *remote, struct in6_addr *local)
struct ip6_tnl *t;
struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);
- for (t = ip6n->tnls_r_l[h0 ^ h1]; t; t = t->next) {
+ for_each_ip6_tunnel_rcu(ip6n->tnls_r_l[h0 ^ h1]) {
if (ipv6_addr_equal(local, &t->parms.laddr) &&
ipv6_addr_equal(remote, &t->parms.raddr) &&
(t->dev->flags & IFF_UP))
return t;
}
- if ((t = ip6n->tnls_wc[0]) != NULL && (t->dev->flags & IFF_UP))
+ t = rcu_dereference(ip6n->tnls_wc[0]);
+ if (t && (t->dev->flags & IFF_UP))
return t;
return NULL;
@@ -186,10 +192,10 @@ ip6_tnl_link(struct ip6_tnl_net *ip6n, struct ip6_tnl *t)
{
struct ip6_tnl **tp = ip6_tnl_bucket(ip6n, &t->parms);
+ spin_lock_bh(&ip6_tnl_lock);
t->next = *tp;
- write_lock_bh(&ip6_tnl_lock);
- *tp = t;
- write_unlock_bh(&ip6_tnl_lock);
+ rcu_assign_pointer(*tp, t);
+ spin_unlock_bh(&ip6_tnl_lock);
}
/**
@@ -204,9 +210,9 @@ ip6_tnl_unlink(struct ip6_tnl_net *ip6n, struct ip6_tnl *t)
for (tp = ip6_tnl_bucket(ip6n, &t->parms); *tp; tp = &(*tp)->next) {
if (t == *tp) {
- write_lock_bh(&ip6_tnl_lock);
+ spin_lock_bh(&ip6_tnl_lock);
*tp = t->next;
- write_unlock_bh(&ip6_tnl_lock);
+ spin_unlock_bh(&ip6_tnl_lock);
break;
}
}
@@ -313,9 +319,9 @@ ip6_tnl_dev_uninit(struct net_device *dev)
struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);
if (dev == ip6n->fb_tnl_dev) {
- write_lock_bh(&ip6_tnl_lock);
+ spin_lock_bh(&ip6_tnl_lock);
ip6n->tnls_wc[0] = NULL;
- write_unlock_bh(&ip6_tnl_lock);
+ spin_unlock_bh(&ip6_tnl_lock);
} else {
ip6_tnl_unlink(ip6n, t);
}
@@ -409,7 +415,7 @@ ip6_tnl_err(struct sk_buff *skb, __u8 ipproto, struct inet6_skb_parm *opt,
in trouble since we might need the source address for further
processing of the error. */
- read_lock(&ip6_tnl_lock);
+ rcu_read_lock();
if ((t = ip6_tnl_lookup(dev_net(skb->dev), &ipv6h->daddr,
&ipv6h->saddr)) == NULL)
goto out;
@@ -482,7 +488,7 @@ ip6_tnl_err(struct sk_buff *skb, __u8 ipproto, struct inet6_skb_parm *opt,
*msg = rel_msg;
out:
- read_unlock(&ip6_tnl_lock);
+ rcu_read_unlock();
return err;
}
@@ -652,6 +658,7 @@ static void ip6ip6_dscp_ecn_decapsulate(struct ip6_tnl *t,
IP6_ECN_set_ce(ipv6_hdr(skb));
}
+/* called with rcu_read_lock() */
static inline int ip6_tnl_rcv_ctl(struct ip6_tnl *t)
{
struct ip6_tnl_parm *p = &t->parms;
@@ -662,15 +669,13 @@ static inline int ip6_tnl_rcv_ctl(struct ip6_tnl *t)
struct net_device *ldev = NULL;
if (p->link)
- ldev = dev_get_by_index(net, p->link);
+ ldev = dev_get_by_index_rcu(net, p->link);
if ((ipv6_addr_is_multicast(&p->laddr) ||
likely(ipv6_chk_addr(net, &p->laddr, ldev, 0))) &&
likely(!ipv6_chk_addr(net, &p->raddr, NULL, 0)))
ret = 1;
- if (ldev)
- dev_put(ldev);
}
return ret;
}
@@ -693,23 +698,23 @@ static int ip6_tnl_rcv(struct sk_buff *skb, __u16 protocol,
struct ip6_tnl *t;
struct ipv6hdr *ipv6h = ipv6_hdr(skb);
- read_lock(&ip6_tnl_lock);
+ rcu_read_lock();
if ((t = ip6_tnl_lookup(dev_net(skb->dev), &ipv6h->saddr,
&ipv6h->daddr)) != NULL) {
if (t->parms.proto != ipproto && t->parms.proto != 0) {
- read_unlock(&ip6_tnl_lock);
+ rcu_read_unlock();
goto discard;
}
if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
- read_unlock(&ip6_tnl_lock);
+ rcu_read_unlock();
goto discard;
}
if (!ip6_tnl_rcv_ctl(t)) {
t->dev->stats.rx_dropped++;
- read_unlock(&ip6_tnl_lock);
+ rcu_read_unlock();
goto discard;
}
secpath_reset(skb);
@@ -727,10 +732,10 @@ static int ip6_tnl_rcv(struct sk_buff *skb, __u16 protocol,
t->dev->stats.rx_packets++;
t->dev->stats.rx_bytes += skb->len;
netif_rx(skb);
- read_unlock(&ip6_tnl_lock);
+ rcu_read_unlock();
return 0;
}
- read_unlock(&ip6_tnl_lock);
+ rcu_read_unlock();
return 1;
discard:
@@ -798,8 +803,9 @@ static inline int ip6_tnl_xmit_ctl(struct ip6_tnl *t)
if (p->flags & IP6_TNL_F_CAP_XMIT) {
struct net_device *ldev = NULL;
+ rcu_read_lock();
if (p->link)
- ldev = dev_get_by_index(net, p->link);
+ ldev = dev_get_by_index_rcu(net, p->link);
if (unlikely(!ipv6_chk_addr(net, &p->laddr, ldev, 0)))
printk(KERN_WARNING
@@ -813,8 +819,7 @@ static inline int ip6_tnl_xmit_ctl(struct ip6_tnl *t)
p->name);
else
ret = 1;
- if (ldev)
- dev_put(ldev);
+ rcu_read_unlock();
}
return ret;
}
@@ -1387,14 +1392,19 @@ static void ip6_tnl_destroy_tunnels(struct ip6_tnl_net *ip6n)
{
int h;
struct ip6_tnl *t;
+ LIST_HEAD(list);
for (h = 0; h < HASH_SIZE; h++) {
- while ((t = ip6n->tnls_r_l[h]) != NULL)
- unregister_netdevice(t->dev);
+ t = ip6n->tnls_r_l[h];
+ while (t != NULL) {
+ unregister_netdevice_queue(t->dev, &list);
+ t = t->next;
+ }
}
t = ip6n->tnls_wc[0];
- unregister_netdevice(t->dev);
+ unregister_netdevice_queue(t->dev, &list);
+ unregister_netdevice_many(&list);
}
static int ip6_tnl_init_net(struct net *net)
diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index 716153941fc..52e0f74fdfe 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -477,7 +477,7 @@ failure:
* Delete a VIF entry
*/
-static int mif6_delete(struct net *net, int vifi)
+static int mif6_delete(struct net *net, int vifi, struct list_head *head)
{
struct mif_device *v;
struct net_device *dev;
@@ -519,7 +519,7 @@ static int mif6_delete(struct net *net, int vifi)
in6_dev->cnf.mc_forwarding--;
if (v->flags & MIFF_REGISTER)
- unregister_netdevice(dev);
+ unregister_netdevice_queue(dev, head);
dev_put(dev);
return 0;
@@ -976,6 +976,7 @@ static int ip6mr_device_event(struct notifier_block *this,
struct net *net = dev_net(dev);
struct mif_device *v;
int ct;
+ LIST_HEAD(list);
if (event != NETDEV_UNREGISTER)
return NOTIFY_DONE;
@@ -983,8 +984,10 @@ static int ip6mr_device_event(struct notifier_block *this,
v = &net->ipv6.vif6_table[0];
for (ct = 0; ct < net->ipv6.maxvif; ct++, v++) {
if (v->dev == dev)
- mif6_delete(net, ct);
+ mif6_delete(net, ct, &list);
}
+ unregister_netdevice_many(&list);
+
return NOTIFY_DONE;
}
@@ -1188,14 +1191,16 @@ static int ip6mr_mfc_add(struct net *net, struct mf6cctl *mfc, int mrtsock)
static void mroute_clean_tables(struct net *net)
{
int i;
+ LIST_HEAD(list);
/*
* Shut down all active vif entries
*/
for (i = 0; i < net->ipv6.maxvif; i++) {
if (!(net->ipv6.vif6_table[i].flags & VIFF_STATIC))
- mif6_delete(net, i);
+ mif6_delete(net, i, &list);
}
+ unregister_netdevice_many(&list);
/*
* Wipe the cache
@@ -1297,7 +1302,7 @@ int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, uns
switch (optname) {
case MRT6_INIT:
if (sk->sk_type != SOCK_RAW ||
- inet_sk(sk)->num != IPPROTO_ICMPV6)
+ inet_sk(sk)->inet_num != IPPROTO_ICMPV6)
return -EOPNOTSUPP;
if (optlen < sizeof(int))
return -EINVAL;
@@ -1325,7 +1330,7 @@ int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, uns
if (copy_from_user(&mifi, optval, sizeof(mifi_t)))
return -EFAULT;
rtnl_lock();
- ret = mif6_delete(net, mifi);
+ ret = mif6_delete(net, mifi, NULL);
rtnl_unlock();
return ret;
diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index 4f7aaf6996a..430454ee5ea 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -64,7 +64,7 @@ int ip6_ra_control(struct sock *sk, int sel)
struct ip6_ra_chain *ra, *new_ra, **rap;
/* RA packet may be delivered ONLY to IPPROTO_RAW socket */
- if (sk->sk_type != SOCK_RAW || inet_sk(sk)->num != IPPROTO_RAW)
+ if (sk->sk_type != SOCK_RAW || inet_sk(sk)->inet_num != IPPROTO_RAW)
return -ENOPROTOOPT;
new_ra = (sel>=0) ? kmalloc(sizeof(*new_ra), GFP_KERNEL) : NULL;
@@ -106,7 +106,7 @@ struct ipv6_txoptions *ipv6_update_options(struct sock *sk,
if (inet_sk(sk)->is_icsk) {
if (opt &&
!((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE)) &&
- inet_sk(sk)->daddr != LOOPBACK4_IPV6) {
+ inet_sk(sk)->inet_daddr != LOOPBACK4_IPV6) {
struct inet_connection_sock *icsk = inet_csk(sk);
icsk->icsk_ext_hdr_len = opt->opt_flen + opt->opt_nflen;
icsk->icsk_sync_mss(sk, icsk->icsk_pmtu_cookie);
@@ -234,7 +234,7 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname,
case IPV6_V6ONLY:
if (optlen < sizeof(int) ||
- inet_sk(sk)->num)
+ inet_sk(sk)->inet_num)
goto e_inval;
np->ipv6only = valbool;
retv = 0;
@@ -424,6 +424,7 @@ sticky_done:
fl.fl6_flowlabel = 0;
fl.oif = sk->sk_bound_dev_if;
+ fl.mark = sk->sk_mark;
if (optlen == 0)
goto update;
@@ -665,7 +666,7 @@ done:
case IPV6_MTU_DISCOVER:
if (optlen < sizeof(int))
goto e_inval;
- if (val<0 || val>3)
+ if (val < IP_PMTUDISC_DONT || val > IP_PMTUDISC_PROBE)
goto e_inval;
np->pmtudisc = val;
retv = 0;
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index f74e4e2cdd0..3507cfe1e7a 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -598,6 +598,7 @@ static void ndisc_send_na(struct net_device *dev, struct neighbour *neigh,
icmp6h.icmp6_solicited = solicited;
icmp6h.icmp6_override = override;
+ inc_opt |= ifp->idev->cnf.force_tllao;
__ndisc_send(dev, neigh, daddr, src_addr,
&icmp6h, solicited_addr,
inc_opt ? ND_OPT_TARGET_LL_ADDR : 0);
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index 4f24570b086..926ce8eeffa 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -72,7 +72,7 @@ static struct sock *__raw_v6_lookup(struct net *net, struct sock *sk,
int is_multicast = ipv6_addr_is_multicast(loc_addr);
sk_for_each_from(sk, node)
- if (inet_sk(sk)->num == num) {
+ if (inet_sk(sk)->inet_num == num) {
struct ipv6_pinfo *np = inet6_sk(sk);
if (!net_eq(sock_net(sk), net))
@@ -249,7 +249,7 @@ static int rawv6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len)
/* Raw sockets are IPv6 only */
if (addr_type == IPV6_ADDR_MAPPED)
- return(-EADDRNOTAVAIL);
+ return -EADDRNOTAVAIL;
lock_sock(sk);
@@ -257,6 +257,7 @@ static int rawv6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len)
if (sk->sk_state != TCP_CLOSE)
goto out;
+ rcu_read_lock();
/* Check if the address belongs to the host. */
if (addr_type != IPV6_ADDR_ANY) {
struct net_device *dev = NULL;
@@ -272,13 +273,13 @@ static int rawv6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len)
/* Binding to link-local address requires an interface */
if (!sk->sk_bound_dev_if)
- goto out;
+ goto out_unlock;
- dev = dev_get_by_index(sock_net(sk), sk->sk_bound_dev_if);
- if (!dev) {
- err = -ENODEV;
- goto out;
- }
+ err = -ENODEV;
+ dev = dev_get_by_index_rcu(sock_net(sk),
+ sk->sk_bound_dev_if);
+ if (!dev)
+ goto out_unlock;
}
/* ipv4 addr of the socket is invalid. Only the
@@ -289,20 +290,18 @@ static int rawv6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len)
err = -EADDRNOTAVAIL;
if (!ipv6_chk_addr(sock_net(sk), &addr->sin6_addr,
dev, 0)) {
- if (dev)
- dev_put(dev);
- goto out;
+ goto out_unlock;
}
}
- if (dev)
- dev_put(dev);
}
- inet->rcv_saddr = inet->saddr = v4addr;
+ inet->inet_rcv_saddr = inet->inet_saddr = v4addr;
ipv6_addr_copy(&np->rcv_saddr, &addr->sin6_addr);
if (!(addr_type & IPV6_ADDR_MULTICAST))
ipv6_addr_copy(&np->saddr, &addr->sin6_addr);
err = 0;
+out_unlock:
+ rcu_read_unlock();
out:
release_sock(sk);
return err;
@@ -381,8 +380,7 @@ static inline int rawv6_rcv_skb(struct sock * sk, struct sk_buff * skb)
}
/* Charge it to the socket. */
- if (sock_queue_rcv_skb(sk,skb)<0) {
- atomic_inc(&sk->sk_drops);
+ if (sock_queue_rcv_skb(sk, skb) < 0) {
kfree_skb(skb);
return NET_RX_DROP;
}
@@ -416,14 +414,14 @@ int rawv6_rcv(struct sock *sk, struct sk_buff *skb)
skb_network_header_len(skb));
if (!csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
&ipv6_hdr(skb)->daddr,
- skb->len, inet->num, skb->csum))
+ skb->len, inet->inet_num, skb->csum))
skb->ip_summed = CHECKSUM_UNNECESSARY;
}
if (!skb_csum_unnecessary(skb))
skb->csum = ~csum_unfold(csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
&ipv6_hdr(skb)->daddr,
skb->len,
- inet->num, 0));
+ inet->inet_num, 0));
if (inet->hdrincl) {
if (skb_checksum_complete(skb)) {
@@ -497,7 +495,7 @@ static int rawv6_recvmsg(struct kiocb *iocb, struct sock *sk,
sin6->sin6_scope_id = IP6CB(skb)->iif;
}
- sock_recv_timestamp(msg, sk, skb);
+ sock_recv_ts_and_drops(msg, sk, skb);
if (np->rxopt.all)
datagram_recv_ctl(sk, msg, skb);
@@ -518,7 +516,6 @@ csum_copy_err:
as some normal condition.
*/
err = (flags&MSG_DONTWAIT) ? -EAGAIN : -EHOSTUNREACH;
- atomic_inc(&sk->sk_drops);
goto out;
}
@@ -766,8 +763,8 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk,
proto = ntohs(sin6->sin6_port);
if (!proto)
- proto = inet->num;
- else if (proto != inet->num)
+ proto = inet->inet_num;
+ else if (proto != inet->inet_num)
return(-EINVAL);
if (proto > 255)
@@ -800,7 +797,7 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk,
if (sk->sk_state != TCP_ESTABLISHED)
return -EDESTADDRREQ;
- proto = inet->num;
+ proto = inet->inet_num;
daddr = &np->daddr;
fl.fl6_flowlabel = np->flow_label;
}
@@ -967,7 +964,7 @@ static int do_rawv6_setsockopt(struct sock *sk, int level, int optname,
switch (optname) {
case IPV6_CHECKSUM:
- if (inet_sk(sk)->num == IPPROTO_ICMPV6 &&
+ if (inet_sk(sk)->inet_num == IPPROTO_ICMPV6 &&
level == IPPROTO_IPV6) {
/*
* RFC3542 tells that IPV6_CHECKSUM socket
@@ -1007,7 +1004,7 @@ static int rawv6_setsockopt(struct sock *sk, int level, int optname,
break;
case SOL_ICMPV6:
- if (inet_sk(sk)->num != IPPROTO_ICMPV6)
+ if (inet_sk(sk)->inet_num != IPPROTO_ICMPV6)
return -EOPNOTSUPP;
return rawv6_seticmpfilter(sk, level, optname, optval,
optlen);
@@ -1030,7 +1027,7 @@ static int compat_rawv6_setsockopt(struct sock *sk, int level, int optname,
case SOL_RAW:
break;
case SOL_ICMPV6:
- if (inet_sk(sk)->num != IPPROTO_ICMPV6)
+ if (inet_sk(sk)->inet_num != IPPROTO_ICMPV6)
return -EOPNOTSUPP;
return rawv6_seticmpfilter(sk, level, optname, optval, optlen);
case SOL_IPV6:
@@ -1087,7 +1084,7 @@ static int rawv6_getsockopt(struct sock *sk, int level, int optname,
break;
case SOL_ICMPV6:
- if (inet_sk(sk)->num != IPPROTO_ICMPV6)
+ if (inet_sk(sk)->inet_num != IPPROTO_ICMPV6)
return -EOPNOTSUPP;
return rawv6_geticmpfilter(sk, level, optname, optval,
optlen);
@@ -1110,7 +1107,7 @@ static int compat_rawv6_getsockopt(struct sock *sk, int level, int optname,
case SOL_RAW:
break;
case SOL_ICMPV6:
- if (inet_sk(sk)->num != IPPROTO_ICMPV6)
+ if (inet_sk(sk)->inet_num != IPPROTO_ICMPV6)
return -EOPNOTSUPP;
return rawv6_geticmpfilter(sk, level, optname, optval, optlen);
case SOL_IPV6:
@@ -1157,7 +1154,7 @@ static int rawv6_ioctl(struct sock *sk, int cmd, unsigned long arg)
static void rawv6_close(struct sock *sk, long timeout)
{
- if (inet_sk(sk)->num == IPPROTO_RAW)
+ if (inet_sk(sk)->inet_num == IPPROTO_RAW)
ip6_ra_control(sk, -1);
ip6mr_sk_done(sk);
sk_common_release(sk);
@@ -1176,7 +1173,7 @@ static int rawv6_init_sk(struct sock *sk)
{
struct raw6_sock *rp = raw6_sk(sk);
- switch (inet_sk(sk)->num) {
+ switch (inet_sk(sk)->inet_num) {
case IPPROTO_ICMPV6:
rp->checksum = 1;
rp->offset = 2;
@@ -1226,7 +1223,7 @@ static void raw6_sock_seq_show(struct seq_file *seq, struct sock *sp, int i)
dest = &np->daddr;
src = &np->rcv_saddr;
destp = 0;
- srcp = inet_sk(sp)->num;
+ srcp = inet_sk(sp)->inet_num;
seq_printf(seq,
"%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
"%02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %p %d\n",
@@ -1338,7 +1335,6 @@ static struct inet_protosw rawv6_protosw = {
.protocol = IPPROTO_IP, /* wild card */
.prot = &rawv6_prot,
.ops = &inet6_sockraw_ops,
- .capability = CAP_NET_RAW,
.no_check = UDP_CSUM_DEFAULT,
.flags = INET_PROTOSW_REUSE,
};
diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
index da5bd0ed83d..dce699fb267 100644
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -208,18 +208,17 @@ static void ip6_frag_expire(unsigned long data)
fq_kill(fq);
net = container_of(fq->q.net, struct net, ipv6.frags);
- dev = dev_get_by_index(net, fq->iif);
+ rcu_read_lock();
+ dev = dev_get_by_index_rcu(net, fq->iif);
if (!dev)
- goto out;
+ goto out_rcu_unlock;
- rcu_read_lock();
IP6_INC_STATS_BH(net, __in6_dev_get(dev), IPSTATS_MIB_REASMTIMEOUT);
IP6_INC_STATS_BH(net, __in6_dev_get(dev), IPSTATS_MIB_REASMFAILS);
- rcu_read_unlock();
/* Don't send error if the first segment did not arrive. */
if (!(fq->q.last_in & INET_FRAG_FIRST_IN) || !fq->q.fragments)
- goto out;
+ goto out_rcu_unlock;
/*
But use as source device on which LAST ARRIVED
@@ -228,9 +227,9 @@ static void ip6_frag_expire(unsigned long data)
*/
fq->q.fragments->dev = dev;
icmpv6_send(fq->q.fragments, ICMPV6_TIME_EXCEED, ICMPV6_EXC_FRAGTIME, 0, dev);
+out_rcu_unlock:
+ rcu_read_unlock();
out:
- if (dev)
- dev_put(dev);
spin_unlock(&fq->q.lock);
fq_put(fq);
}
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index d6fe7646a8f..df9432a46ff 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -1471,9 +1471,10 @@ static struct rt6_info *ip6_route_redirect(struct in6_addr *dest,
},
},
},
- .gateway = *gateway,
};
+ ipv6_addr_copy(&rdfl.gateway, gateway);
+
if (rt6_need_strict(dest))
flags |= RT6_LOOKUP_F_IFACE;
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index dbd19a78ca7..2362a3397e9 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -77,8 +77,17 @@ struct sit_net {
struct net_device *fb_tunnel_dev;
};
-static DEFINE_RWLOCK(ipip6_lock);
+/*
+ * Locking : hash tables are protected by RCU and a spinlock
+ */
+static DEFINE_SPINLOCK(ipip6_lock);
+#define for_each_ip_tunnel_rcu(start) \
+ for (t = rcu_dereference(start); t; t = rcu_dereference(t->next))
+
+/*
+ * Must be invoked with rcu_read_lock
+ */
static struct ip_tunnel * ipip6_tunnel_lookup(struct net *net,
struct net_device *dev, __be32 remote, __be32 local)
{
@@ -87,26 +96,26 @@ static struct ip_tunnel * ipip6_tunnel_lookup(struct net *net,
struct ip_tunnel *t;
struct sit_net *sitn = net_generic(net, sit_net_id);
- for (t = sitn->tunnels_r_l[h0^h1]; t; t = t->next) {
+ for_each_ip_tunnel_rcu(sitn->tunnels_r_l[h0 ^ h1]) {
if (local == t->parms.iph.saddr &&
remote == t->parms.iph.daddr &&
(!dev || !t->parms.link || dev->iflink == t->parms.link) &&
(t->dev->flags & IFF_UP))
return t;
}
- for (t = sitn->tunnels_r[h0]; t; t = t->next) {
+ for_each_ip_tunnel_rcu(sitn->tunnels_r[h0]) {
if (remote == t->parms.iph.daddr &&
(!dev || !t->parms.link || dev->iflink == t->parms.link) &&
(t->dev->flags & IFF_UP))
return t;
}
- for (t = sitn->tunnels_l[h1]; t; t = t->next) {
+ for_each_ip_tunnel_rcu(sitn->tunnels_l[h1]) {
if (local == t->parms.iph.saddr &&
(!dev || !t->parms.link || dev->iflink == t->parms.link) &&
(t->dev->flags & IFF_UP))
return t;
}
- t = sitn->tunnels_wc[0];
+ t = rcu_dereference(sitn->tunnels_wc[0]);
if ((t != NULL) && (t->dev->flags & IFF_UP))
return t;
return NULL;
@@ -143,9 +152,9 @@ static void ipip6_tunnel_unlink(struct sit_net *sitn, struct ip_tunnel *t)
for (tp = ipip6_bucket(sitn, t); *tp; tp = &(*tp)->next) {
if (t == *tp) {
- write_lock_bh(&ipip6_lock);
+ spin_lock_bh(&ipip6_lock);
*tp = t->next;
- write_unlock_bh(&ipip6_lock);
+ spin_unlock_bh(&ipip6_lock);
break;
}
}
@@ -155,10 +164,27 @@ static void ipip6_tunnel_link(struct sit_net *sitn, struct ip_tunnel *t)
{
struct ip_tunnel **tp = ipip6_bucket(sitn, t);
+ spin_lock_bh(&ipip6_lock);
t->next = *tp;
- write_lock_bh(&ipip6_lock);
- *tp = t;
- write_unlock_bh(&ipip6_lock);
+ rcu_assign_pointer(*tp, t);
+ spin_unlock_bh(&ipip6_lock);
+}
+
+static void ipip6_tunnel_clone_6rd(struct net_device *dev, struct sit_net *sitn)
+{
+#ifdef CONFIG_IPV6_SIT_6RD
+ struct ip_tunnel *t = netdev_priv(dev);
+
+ if (t->dev == sitn->fb_tunnel_dev) {
+ ipv6_addr_set(&t->ip6rd.prefix, htonl(0x20020000), 0, 0, 0);
+ t->ip6rd.relay_prefix = 0;
+ t->ip6rd.prefixlen = 16;
+ t->ip6rd.relay_prefixlen = 0;
+ } else {
+ struct ip_tunnel *t0 = netdev_priv(sitn->fb_tunnel_dev);
+ memcpy(&t->ip6rd, &t0->ip6rd, sizeof(t->ip6rd));
+ }
+#endif
}
static struct ip_tunnel * ipip6_tunnel_locate(struct net *net,
@@ -204,6 +230,7 @@ static struct ip_tunnel * ipip6_tunnel_locate(struct net *net,
nt->parms = *parms;
ipip6_tunnel_init(dev);
+ ipip6_tunnel_clone_6rd(dev, sitn);
if (parms->i_flags & SIT_ISATAP)
dev->priv_flags |= IFF_ISATAP;
@@ -222,15 +249,22 @@ failed:
return NULL;
}
+static DEFINE_SPINLOCK(ipip6_prl_lock);
+
+#define for_each_prl_rcu(start) \
+ for (prl = rcu_dereference(start); \
+ prl; \
+ prl = rcu_dereference(prl->next))
+
static struct ip_tunnel_prl_entry *
__ipip6_tunnel_locate_prl(struct ip_tunnel *t, __be32 addr)
{
- struct ip_tunnel_prl_entry *p = (struct ip_tunnel_prl_entry *)NULL;
+ struct ip_tunnel_prl_entry *prl;
- for (p = t->prl; p; p = p->next)
- if (p->addr == addr)
+ for_each_prl_rcu(t->prl)
+ if (prl->addr == addr)
break;
- return p;
+ return prl;
}
@@ -255,7 +289,7 @@ static int ipip6_tunnel_get_prl(struct ip_tunnel *t,
kcalloc(cmax, sizeof(*kp), GFP_KERNEL) :
NULL;
- read_lock(&ipip6_lock);
+ rcu_read_lock();
ca = t->prl_count < cmax ? t->prl_count : cmax;
@@ -273,7 +307,7 @@ static int ipip6_tunnel_get_prl(struct ip_tunnel *t,
}
c = 0;
- for (prl = t->prl; prl; prl = prl->next) {
+ for_each_prl_rcu(t->prl) {
if (c >= cmax)
break;
if (kprl.addr != htonl(INADDR_ANY) && prl->addr != kprl.addr)
@@ -285,7 +319,7 @@ static int ipip6_tunnel_get_prl(struct ip_tunnel *t,
break;
}
out:
- read_unlock(&ipip6_lock);
+ rcu_read_unlock();
len = sizeof(*kp) * c;
ret = 0;
@@ -306,12 +340,14 @@ ipip6_tunnel_add_prl(struct ip_tunnel *t, struct ip_tunnel_prl *a, int chg)
if (a->addr == htonl(INADDR_ANY))
return -EINVAL;
- write_lock(&ipip6_lock);
+ spin_lock(&ipip6_prl_lock);
for (p = t->prl; p; p = p->next) {
if (p->addr == a->addr) {
- if (chg)
- goto update;
+ if (chg) {
+ p->flags = a->flags;
+ goto out;
+ }
err = -EEXIST;
goto out;
}
@@ -328,46 +364,63 @@ ipip6_tunnel_add_prl(struct ip_tunnel *t, struct ip_tunnel_prl *a, int chg)
goto out;
}
+ INIT_RCU_HEAD(&p->rcu_head);
p->next = t->prl;
- t->prl = p;
- t->prl_count++;
-update:
p->addr = a->addr;
p->flags = a->flags;
+ t->prl_count++;
+ rcu_assign_pointer(t->prl, p);
out:
- write_unlock(&ipip6_lock);
+ spin_unlock(&ipip6_prl_lock);
return err;
}
+static void prl_entry_destroy_rcu(struct rcu_head *head)
+{
+ kfree(container_of(head, struct ip_tunnel_prl_entry, rcu_head));
+}
+
+static void prl_list_destroy_rcu(struct rcu_head *head)
+{
+ struct ip_tunnel_prl_entry *p, *n;
+
+ p = container_of(head, struct ip_tunnel_prl_entry, rcu_head);
+ do {
+ n = p->next;
+ kfree(p);
+ p = n;
+ } while (p);
+}
+
static int
ipip6_tunnel_del_prl(struct ip_tunnel *t, struct ip_tunnel_prl *a)
{
struct ip_tunnel_prl_entry *x, **p;
int err = 0;
- write_lock(&ipip6_lock);
+ spin_lock(&ipip6_prl_lock);
if (a && a->addr != htonl(INADDR_ANY)) {
for (p = &t->prl; *p; p = &(*p)->next) {
if ((*p)->addr == a->addr) {
x = *p;
*p = x->next;
- kfree(x);
+ call_rcu(&x->rcu_head, prl_entry_destroy_rcu);
t->prl_count--;
goto out;
}
}
err = -ENXIO;
} else {
- while (t->prl) {
+ if (t->prl) {
+ t->prl_count = 0;
x = t->prl;
- t->prl = t->prl->next;
- kfree(x);
- t->prl_count--;
+ call_rcu(&x->rcu_head, prl_list_destroy_rcu);
+ t->prl = NULL;
}
}
out:
- write_unlock(&ipip6_lock);
+ spin_unlock(&ipip6_prl_lock);
return err;
}
@@ -377,7 +430,7 @@ isatap_chksrc(struct sk_buff *skb, struct iphdr *iph, struct ip_tunnel *t)
struct ip_tunnel_prl_entry *p;
int ok = 1;
- read_lock(&ipip6_lock);
+ rcu_read_lock();
p = __ipip6_tunnel_locate_prl(t, iph->saddr);
if (p) {
if (p->flags & PRL_DEFAULT)
@@ -393,7 +446,7 @@ isatap_chksrc(struct sk_buff *skb, struct iphdr *iph, struct ip_tunnel *t)
else
ok = 0;
}
- read_unlock(&ipip6_lock);
+ rcu_read_unlock();
return ok;
}
@@ -403,9 +456,9 @@ static void ipip6_tunnel_uninit(struct net_device *dev)
struct sit_net *sitn = net_generic(net, sit_net_id);
if (dev == sitn->fb_tunnel_dev) {
- write_lock_bh(&ipip6_lock);
+ spin_lock_bh(&ipip6_lock);
sitn->tunnels_wc[0] = NULL;
- write_unlock_bh(&ipip6_lock);
+ spin_unlock_bh(&ipip6_lock);
dev_put(dev);
} else {
ipip6_tunnel_unlink(sitn, netdev_priv(dev));
@@ -458,7 +511,7 @@ static int ipip6_err(struct sk_buff *skb, u32 info)
err = -ENOENT;
- read_lock(&ipip6_lock);
+ rcu_read_lock();
t = ipip6_tunnel_lookup(dev_net(skb->dev),
skb->dev,
iph->daddr,
@@ -476,7 +529,7 @@ static int ipip6_err(struct sk_buff *skb, u32 info)
t->err_count = 1;
t->err_time = jiffies;
out:
- read_unlock(&ipip6_lock);
+ rcu_read_unlock();
return err;
}
@@ -496,7 +549,7 @@ static int ipip6_rcv(struct sk_buff *skb)
iph = ip_hdr(skb);
- read_lock(&ipip6_lock);
+ rcu_read_lock();
tunnel = ipip6_tunnel_lookup(dev_net(skb->dev), skb->dev,
iph->saddr, iph->daddr);
if (tunnel != NULL) {
@@ -510,7 +563,7 @@ static int ipip6_rcv(struct sk_buff *skb)
if ((tunnel->dev->priv_flags & IFF_ISATAP) &&
!isatap_chksrc(skb, iph, tunnel)) {
tunnel->dev->stats.rx_errors++;
- read_unlock(&ipip6_lock);
+ rcu_read_unlock();
kfree_skb(skb);
return 0;
}
@@ -521,28 +574,52 @@ static int ipip6_rcv(struct sk_buff *skb)
nf_reset(skb);
ipip6_ecn_decapsulate(iph, skb);
netif_rx(skb);
- read_unlock(&ipip6_lock);
+ rcu_read_unlock();
return 0;
}
icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
- read_unlock(&ipip6_lock);
+ rcu_read_unlock();
out:
kfree_skb(skb);
return 0;
}
-/* Returns the embedded IPv4 address if the IPv6 address
- comes from 6to4 (RFC 3056) addr space */
-
-static inline __be32 try_6to4(struct in6_addr *v6dst)
+/*
+ * Returns the embedded IPv4 address if the IPv6 address
+ * comes from 6rd / 6to4 (RFC 3056) addr space.
+ */
+static inline
+__be32 try_6rd(struct in6_addr *v6dst, struct ip_tunnel *tunnel)
{
__be32 dst = 0;
+#ifdef CONFIG_IPV6_SIT_6RD
+ if (ipv6_prefix_equal(v6dst, &tunnel->ip6rd.prefix,
+ tunnel->ip6rd.prefixlen)) {
+ unsigned pbw0, pbi0;
+ int pbi1;
+ u32 d;
+
+ pbw0 = tunnel->ip6rd.prefixlen >> 5;
+ pbi0 = tunnel->ip6rd.prefixlen & 0x1f;
+
+ d = (ntohl(v6dst->s6_addr32[pbw0]) << pbi0) >>
+ tunnel->ip6rd.relay_prefixlen;
+
+ pbi1 = pbi0 - tunnel->ip6rd.relay_prefixlen;
+ if (pbi1 > 0)
+ d |= ntohl(v6dst->s6_addr32[pbw0 + 1]) >>
+ (32 - pbi1);
+
+ dst = tunnel->ip6rd.relay_prefix | htonl(d);
+ }
+#else
if (v6dst->s6_addr16[0] == htons(0x2002)) {
/* 6to4 v6 addr has 16 bits prefix, 32 v4addr, 16 SLA, ... */
memcpy(&dst, &v6dst->s6_addr16[1], 4);
}
+#endif
return dst;
}
@@ -555,7 +632,8 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb,
struct net_device *dev)
{
struct ip_tunnel *tunnel = netdev_priv(dev);
- struct net_device_stats *stats = &tunnel->dev->stats;
+ struct net_device_stats *stats = &dev->stats;
+ struct netdev_queue *txq = netdev_get_tx_queue(dev, 0);
struct iphdr *tiph = &tunnel->parms.iph;
struct ipv6hdr *iph6 = ipv6_hdr(skb);
u8 tos = tunnel->parms.iph.tos;
@@ -595,7 +673,7 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb,
}
if (!dst)
- dst = try_6to4(&iph6->daddr);
+ dst = try_6rd(&iph6->daddr, tunnel);
if (!dst) {
struct neighbour *neigh = NULL;
@@ -688,7 +766,7 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb,
struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom);
if (!new_skb) {
ip_rt_put(rt);
- stats->tx_dropped++;
+ txq->tx_dropped++;
dev_kfree_skb(skb);
return NETDEV_TX_OK;
}
@@ -785,9 +863,15 @@ ipip6_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
struct ip_tunnel *t;
struct net *net = dev_net(dev);
struct sit_net *sitn = net_generic(net, sit_net_id);
+#ifdef CONFIG_IPV6_SIT_6RD
+ struct ip_tunnel_6rd ip6rd;
+#endif
switch (cmd) {
case SIOCGETTUNNEL:
+#ifdef CONFIG_IPV6_SIT_6RD
+ case SIOCGET6RD:
+#endif
t = NULL;
if (dev == sitn->fb_tunnel_dev) {
if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) {
@@ -798,9 +882,25 @@ ipip6_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
}
if (t == NULL)
t = netdev_priv(dev);
- memcpy(&p, &t->parms, sizeof(p));
- if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
- err = -EFAULT;
+
+ err = -EFAULT;
+ if (cmd == SIOCGETTUNNEL) {
+ memcpy(&p, &t->parms, sizeof(p));
+ if (copy_to_user(ifr->ifr_ifru.ifru_data, &p,
+ sizeof(p)))
+ goto done;
+#ifdef CONFIG_IPV6_SIT_6RD
+ } else {
+ ipv6_addr_copy(&ip6rd.prefix, &t->ip6rd.prefix);
+ ip6rd.relay_prefix = t->ip6rd.relay_prefix;
+ ip6rd.prefixlen = t->ip6rd.prefixlen;
+ ip6rd.relay_prefixlen = t->ip6rd.relay_prefixlen;
+ if (copy_to_user(ifr->ifr_ifru.ifru_data, &ip6rd,
+ sizeof(ip6rd)))
+ goto done;
+#endif
+ }
+ err = 0;
break;
case SIOCADDTUNNEL:
@@ -921,6 +1021,54 @@ ipip6_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
netdev_state_change(dev);
break;
+#ifdef CONFIG_IPV6_SIT_6RD
+ case SIOCADD6RD:
+ case SIOCCHG6RD:
+ case SIOCDEL6RD:
+ err = -EPERM;
+ if (!capable(CAP_NET_ADMIN))
+ goto done;
+
+ err = -EFAULT;
+ if (copy_from_user(&ip6rd, ifr->ifr_ifru.ifru_data,
+ sizeof(ip6rd)))
+ goto done;
+
+ t = netdev_priv(dev);
+
+ if (cmd != SIOCDEL6RD) {
+ struct in6_addr prefix;
+ __be32 relay_prefix;
+
+ err = -EINVAL;
+ if (ip6rd.relay_prefixlen > 32 ||
+ ip6rd.prefixlen + (32 - ip6rd.relay_prefixlen) > 64)
+ goto done;
+
+ ipv6_addr_prefix(&prefix, &ip6rd.prefix,
+ ip6rd.prefixlen);
+ if (!ipv6_addr_equal(&prefix, &ip6rd.prefix))
+ goto done;
+ if (ip6rd.relay_prefixlen)
+ relay_prefix = ip6rd.relay_prefix &
+ htonl(0xffffffffUL <<
+ (32 - ip6rd.relay_prefixlen));
+ else
+ relay_prefix = 0;
+ if (relay_prefix != ip6rd.relay_prefix)
+ goto done;
+
+ ipv6_addr_copy(&t->ip6rd.prefix, &prefix);
+ t->ip6rd.relay_prefix = relay_prefix;
+ t->ip6rd.prefixlen = ip6rd.prefixlen;
+ t->ip6rd.relay_prefixlen = ip6rd.relay_prefixlen;
+ } else
+ ipip6_tunnel_clone_6rd(dev, sitn);
+
+ err = 0;
+ break;
+#endif
+
default:
err = -EINVAL;
}
@@ -997,16 +1145,19 @@ static struct xfrm_tunnel sit_handler = {
.priority = 1,
};
-static void sit_destroy_tunnels(struct sit_net *sitn)
+static void sit_destroy_tunnels(struct sit_net *sitn, struct list_head *head)
{
int prio;
for (prio = 1; prio < 4; prio++) {
int h;
for (h = 0; h < HASH_SIZE; h++) {
- struct ip_tunnel *t;
- while ((t = sitn->tunnels[prio][h]) != NULL)
- unregister_netdevice(t->dev);
+ struct ip_tunnel *t = sitn->tunnels[prio][h];
+
+ while (t != NULL) {
+ unregister_netdevice_queue(t->dev, head);
+ t = t->next;
+ }
}
}
}
@@ -1039,6 +1190,7 @@ static int sit_init_net(struct net *net)
dev_net_set(sitn->fb_tunnel_dev, net);
ipip6_fb_tunnel_init(sitn->fb_tunnel_dev);
+ ipip6_tunnel_clone_6rd(sitn->fb_tunnel_dev, sitn);
if ((err = register_netdev(sitn->fb_tunnel_dev)))
goto err_reg_dev;
@@ -1059,11 +1211,13 @@ err_alloc:
static void sit_exit_net(struct net *net)
{
struct sit_net *sitn;
+ LIST_HEAD(list);
sitn = net_generic(net, sit_net_id);
rtnl_lock();
- sit_destroy_tunnels(sitn);
- unregister_netdevice(sitn->fb_tunnel_dev);
+ sit_destroy_tunnels(sitn, &list);
+ unregister_netdevice_queue(sitn->fb_tunnel_dev, &list);
+ unregister_netdevice_many(&list);
rtnl_unlock();
kfree(sitn);
}
@@ -1078,6 +1232,7 @@ static void __exit sit_cleanup(void)
xfrm4_tunnel_deregister(&sit_handler, AF_INET6);
unregister_pernet_gen_device(sit_net_id, &sit_net_ops);
+ rcu_barrier(); /* Wait for completion of call_rcu()'s */
}
static int __init sit_init(void)
diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c
index 6b6ae913b5d..612fc53e0bb 100644
--- a/net/ipv6/syncookies.c
+++ b/net/ipv6/syncookies.c
@@ -184,13 +184,6 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_SYNCOOKIESRECV);
- /* check for timestamp cookie support */
- memset(&tcp_opt, 0, sizeof(tcp_opt));
- tcp_parse_options(skb, &tcp_opt, 0);
-
- if (tcp_opt.saw_tstamp)
- cookie_check_timestamp(&tcp_opt);
-
ret = NULL;
req = inet6_reqsk_alloc(&tcp6_request_sock_ops);
if (!req)
@@ -224,12 +217,6 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
req->expires = 0UL;
req->retrans = 0;
ireq->ecn_ok = 0;
- ireq->snd_wscale = tcp_opt.snd_wscale;
- ireq->rcv_wscale = tcp_opt.rcv_wscale;
- ireq->sack_ok = tcp_opt.sack_ok;
- ireq->wscale_ok = tcp_opt.wscale_ok;
- ireq->tstamp_ok = tcp_opt.saw_tstamp;
- req->ts_recent = tcp_opt.saw_tstamp ? tcp_opt.rcv_tsval : 0;
treq->rcv_isn = ntohl(th->seq) - 1;
treq->snt_isn = cookie;
@@ -252,8 +239,9 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
}
ipv6_addr_copy(&fl.fl6_src, &ireq6->loc_addr);
fl.oif = sk->sk_bound_dev_if;
+ fl.mark = sk->sk_mark;
fl.fl_ip_dport = inet_rsk(req)->rmt_port;
- fl.fl_ip_sport = inet_sk(sk)->sport;
+ fl.fl_ip_sport = inet_sk(sk)->inet_sport;
security_req_classify_flow(req, &fl);
if (ip6_dst_lookup(sk, &dst, &fl))
goto out_free;
@@ -264,6 +252,21 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
goto out_free;
}
+ /* check for timestamp cookie support */
+ memset(&tcp_opt, 0, sizeof(tcp_opt));
+ tcp_parse_options(skb, &tcp_opt, 0, dst);
+
+ if (tcp_opt.saw_tstamp)
+ cookie_check_timestamp(&tcp_opt);
+
+ req->ts_recent = tcp_opt.saw_tstamp ? tcp_opt.rcv_tsval : 0;
+
+ ireq->snd_wscale = tcp_opt.snd_wscale;
+ ireq->rcv_wscale = tcp_opt.rcv_wscale;
+ ireq->sack_ok = tcp_opt.sack_ok;
+ ireq->wscale_ok = tcp_opt.wscale_ok;
+ ireq->tstamp_ok = tcp_opt.saw_tstamp;
+
req->window_clamp = tp->window_clamp ? :dst_metric(dst, RTAX_WINDOW);
tcp_select_initial_window(tcp_full_space(sk), req->mss,
&req->rcv_wnd, &req->window_clamp,
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 21d100b68b1..696a22f034e 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -226,10 +226,9 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
#endif
goto failure;
} else {
- ipv6_addr_set(&np->saddr, 0, 0, htonl(0x0000FFFF),
- inet->saddr);
- ipv6_addr_set(&np->rcv_saddr, 0, 0, htonl(0x0000FFFF),
- inet->rcv_saddr);
+ ipv6_addr_set_v4mapped(inet->inet_saddr, &np->saddr);
+ ipv6_addr_set_v4mapped(inet->inet_rcv_saddr,
+ &np->rcv_saddr);
}
return err;
@@ -243,8 +242,9 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
ipv6_addr_copy(&fl.fl6_src,
(saddr ? saddr : &np->saddr));
fl.oif = sk->sk_bound_dev_if;
+ fl.mark = sk->sk_mark;
fl.fl_ip_dport = usin->sin6_port;
- fl.fl_ip_sport = inet->sport;
+ fl.fl_ip_sport = inet->inet_sport;
if (np->opt && np->opt->srcrt) {
struct rt0_hdr *rt0 = (struct rt0_hdr *)np->opt->srcrt;
@@ -276,7 +276,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
/* set the source address */
ipv6_addr_copy(&np->saddr, saddr);
- inet->rcv_saddr = LOOPBACK4_IPV6;
+ inet->inet_rcv_saddr = LOOPBACK4_IPV6;
sk->sk_gso_type = SKB_GSO_TCPV6;
__ip6_dst_store(sk, dst, NULL, NULL);
@@ -288,7 +288,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
tp->rx_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
- inet->dport = usin->sin6_port;
+ inet->inet_dport = usin->sin6_port;
tcp_set_state(sk, TCP_SYN_SENT);
err = inet6_hash_connect(&tcp_death_row, sk);
@@ -298,8 +298,8 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
if (!tp->write_seq)
tp->write_seq = secure_tcpv6_sequence_number(np->saddr.s6_addr32,
np->daddr.s6_addr32,
- inet->sport,
- inet->dport);
+ inet->inet_sport,
+ inet->inet_dport);
err = tcp_connect(sk);
if (err)
@@ -311,7 +311,7 @@ late_failure:
tcp_set_state(sk, TCP_CLOSE);
__sk_dst_reset(sk);
failure:
- inet->dport = 0;
+ inet->inet_dport = 0;
sk->sk_route_caps = 0;
return err;
}
@@ -383,8 +383,9 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
ipv6_addr_copy(&fl.fl6_src, &np->saddr);
fl.oif = sk->sk_bound_dev_if;
- fl.fl_ip_dport = inet->dport;
- fl.fl_ip_sport = inet->sport;
+ fl.mark = sk->sk_mark;
+ fl.fl_ip_dport = inet->inet_dport;
+ fl.fl_ip_sport = inet->inet_sport;
security_skb_classify_flow(skb, &fl);
if ((err = ip6_dst_lookup(sk, &dst, &fl))) {
@@ -477,6 +478,7 @@ static int tcp_v6_send_synack(struct sock *sk, struct request_sock *req)
ipv6_addr_copy(&fl.fl6_src, &treq->loc_addr);
fl.fl6_flowlabel = 0;
fl.oif = treq->iif;
+ fl.mark = sk->sk_mark;
fl.fl_ip_dport = inet_rsk(req)->rmt_port;
fl.fl_ip_sport = inet_rsk(req)->loc_port;
security_req_classify_flow(req, &fl);
@@ -1165,6 +1167,7 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
struct tcp_sock *tp = tcp_sk(sk);
struct request_sock *req = NULL;
__u32 isn = TCP_SKB_CB(skb)->when;
+ struct dst_entry *dst = __sk_dst_get(sk);
#ifdef CONFIG_SYN_COOKIES
int want_cookie = 0;
#else
@@ -1203,7 +1206,7 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
tmp_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
tmp_opt.user_mss = tp->rx_opt.user_mss;
- tcp_parse_options(skb, &tmp_opt, 0);
+ tcp_parse_options(skb, &tmp_opt, 0, dst);
if (want_cookie && !tmp_opt.saw_tstamp)
tcp_clear_options(&tmp_opt);
@@ -1290,11 +1293,9 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
memcpy(newnp, np, sizeof(struct ipv6_pinfo));
- ipv6_addr_set(&newnp->daddr, 0, 0, htonl(0x0000FFFF),
- newinet->daddr);
+ ipv6_addr_set_v4mapped(newinet->inet_daddr, &newnp->daddr);
- ipv6_addr_set(&newnp->saddr, 0, 0, htonl(0x0000FFFF),
- newinet->saddr);
+ ipv6_addr_set_v4mapped(newinet->inet_saddr, &newnp->saddr);
ipv6_addr_copy(&newnp->rcv_saddr, &newnp->saddr);
@@ -1345,6 +1346,7 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
}
ipv6_addr_copy(&fl.fl6_src, &treq->loc_addr);
fl.oif = sk->sk_bound_dev_if;
+ fl.mark = sk->sk_mark;
fl.fl_ip_dport = inet_rsk(req)->rmt_port;
fl.fl_ip_sport = inet_rsk(req)->loc_port;
security_req_classify_flow(req, &fl);
@@ -1431,7 +1433,8 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
newtp->advmss = dst_metric(dst, RTAX_ADVMSS);
tcp_initialize_rcv_mss(newsk);
- newinet->daddr = newinet->saddr = newinet->rcv_saddr = LOOPBACK4_IPV6;
+ newinet->inet_daddr = newinet->inet_saddr = LOOPBACK4_IPV6;
+ newinet->inet_rcv_saddr = LOOPBACK4_IPV6;
#ifdef CONFIG_TCP_MD5SIG
/* Copy over the MD5 key from the original socket */
@@ -1931,8 +1934,8 @@ static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
dest = &np->daddr;
src = &np->rcv_saddr;
- destp = ntohs(inet->dport);
- srcp = ntohs(inet->sport);
+ destp = ntohs(inet->inet_dport);
+ srcp = ntohs(inet->inet_sport);
if (icsk->icsk_pending == ICSK_TIME_RETRANS) {
timer_active = 1;
@@ -2109,7 +2112,6 @@ static struct inet_protosw tcpv6_protosw = {
.protocol = IPPROTO_TCP,
.prot = &tcpv6_prot,
.ops = &inet6_stream_ops,
- .capability = -1,
.no_check = 0,
.flags = INET_PROTOSW_PERMANENT |
INET_PROTOSW_ICSK,
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index cf538ed5ef6..2915e1dad72 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -53,7 +53,7 @@ int ipv6_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2)
{
const struct in6_addr *sk_rcv_saddr6 = &inet6_sk(sk)->rcv_saddr;
const struct in6_addr *sk2_rcv_saddr6 = inet6_rcv_saddr(sk2);
- __be32 sk_rcv_saddr = inet_sk(sk)->rcv_saddr;
+ __be32 sk1_rcv_saddr = inet_sk(sk)->inet_rcv_saddr;
__be32 sk2_rcv_saddr = inet_rcv_saddr(sk2);
int sk_ipv6only = ipv6_only_sock(sk);
int sk2_ipv6only = inet_v6_ipv6only(sk2);
@@ -63,8 +63,8 @@ int ipv6_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2)
/* if both are mapped, treat as IPv4 */
if (addr_type == IPV6_ADDR_MAPPED && addr_type2 == IPV6_ADDR_MAPPED)
return (!sk2_ipv6only &&
- (!sk_rcv_saddr || !sk2_rcv_saddr ||
- sk_rcv_saddr == sk2_rcv_saddr));
+ (!sk1_rcv_saddr || !sk2_rcv_saddr ||
+ sk1_rcv_saddr == sk2_rcv_saddr));
if (addr_type2 == IPV6_ADDR_ANY &&
!(sk2_ipv6only && addr_type == IPV6_ADDR_MAPPED))
@@ -81,8 +81,30 @@ int ipv6_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2)
return 0;
}
+static unsigned int udp6_portaddr_hash(struct net *net,
+ const struct in6_addr *addr6,
+ unsigned int port)
+{
+ unsigned int hash, mix = net_hash_mix(net);
+
+ if (ipv6_addr_any(addr6))
+ hash = jhash_1word(0, mix);
+ else if (ipv6_addr_type(addr6) == IPV6_ADDR_MAPPED)
+ hash = jhash_1word(addr6->s6_addr32[3], mix);
+ else
+ hash = jhash2(addr6->s6_addr32, 4, mix);
+
+ return hash ^ port;
+}
+
+
int udp_v6_get_port(struct sock *sk, unsigned short snum)
{
+ /* precompute partial secondary hash */
+ udp_sk(sk)->udp_portaddr_hash =
+ udp6_portaddr_hash(sock_net(sk),
+ &inet6_sk(sk)->rcv_saddr,
+ 0);
return udp_lib_get_port(sk, snum, ipv6_rcv_saddr_equal);
}
@@ -94,14 +116,14 @@ static inline int compute_score(struct sock *sk, struct net *net,
{
int score = -1;
- if (net_eq(sock_net(sk), net) && sk->sk_hash == hnum &&
+ if (net_eq(sock_net(sk), net) && udp_sk(sk)->udp_port_hash == hnum &&
sk->sk_family == PF_INET6) {
struct ipv6_pinfo *np = inet6_sk(sk);
struct inet_sock *inet = inet_sk(sk);
score = 0;
- if (inet->dport) {
- if (inet->dport != sport)
+ if (inet->inet_dport) {
+ if (inet->inet_dport != sport)
return -1;
score++;
}
@@ -124,6 +146,88 @@ static inline int compute_score(struct sock *sk, struct net *net,
return score;
}
+#define SCORE2_MAX (1 + 1 + 1)
+static inline int compute_score2(struct sock *sk, struct net *net,
+ const struct in6_addr *saddr, __be16 sport,
+ const struct in6_addr *daddr, unsigned short hnum,
+ int dif)
+{
+ int score = -1;
+
+ if (net_eq(sock_net(sk), net) && udp_sk(sk)->udp_port_hash == hnum &&
+ sk->sk_family == PF_INET6) {
+ struct ipv6_pinfo *np = inet6_sk(sk);
+ struct inet_sock *inet = inet_sk(sk);
+
+ if (!ipv6_addr_equal(&np->rcv_saddr, daddr))
+ return -1;
+ score = 0;
+ if (inet->inet_dport) {
+ if (inet->inet_dport != sport)
+ return -1;
+ score++;
+ }
+ if (!ipv6_addr_any(&np->daddr)) {
+ if (!ipv6_addr_equal(&np->daddr, saddr))
+ return -1;
+ score++;
+ }
+ if (sk->sk_bound_dev_if) {
+ if (sk->sk_bound_dev_if != dif)
+ return -1;
+ score++;
+ }
+ }
+ return score;
+}
+
+#define udp_portaddr_for_each_entry_rcu(__sk, node, list) \
+ hlist_nulls_for_each_entry_rcu(__sk, node, list, __sk_common.skc_portaddr_node)
+
+/* called with read_rcu_lock() */
+static struct sock *udp6_lib_lookup2(struct net *net,
+ const struct in6_addr *saddr, __be16 sport,
+ const struct in6_addr *daddr, unsigned int hnum, int dif,
+ struct udp_hslot *hslot2, unsigned int slot2)
+{
+ struct sock *sk, *result;
+ struct hlist_nulls_node *node;
+ int score, badness;
+
+begin:
+ result = NULL;
+ badness = -1;
+ udp_portaddr_for_each_entry_rcu(sk, node, &hslot2->head) {
+ score = compute_score2(sk, net, saddr, sport,
+ daddr, hnum, dif);
+ if (score > badness) {
+ result = sk;
+ badness = score;
+ if (score == SCORE2_MAX)
+ goto exact_match;
+ }
+ }
+ /*
+ * if the nulls value we got at the end of this lookup is
+ * not the expected one, we must restart lookup.
+ * We probably met an item that was moved to another chain.
+ */
+ if (get_nulls_value(node) != slot2)
+ goto begin;
+
+ if (result) {
+exact_match:
+ if (unlikely(!atomic_inc_not_zero(&result->sk_refcnt)))
+ result = NULL;
+ else if (unlikely(compute_score2(result, net, saddr, sport,
+ daddr, hnum, dif) < badness)) {
+ sock_put(result);
+ goto begin;
+ }
+ }
+ return result;
+}
+
static struct sock *__udp6_lib_lookup(struct net *net,
struct in6_addr *saddr, __be16 sport,
struct in6_addr *daddr, __be16 dport,
@@ -132,11 +236,35 @@ static struct sock *__udp6_lib_lookup(struct net *net,
struct sock *sk, *result;
struct hlist_nulls_node *node;
unsigned short hnum = ntohs(dport);
- unsigned int hash = udp_hashfn(net, hnum);
- struct udp_hslot *hslot = &udptable->hash[hash];
+ unsigned int hash2, slot2, slot = udp_hashfn(net, hnum, udptable->mask);
+ struct udp_hslot *hslot2, *hslot = &udptable->hash[slot];
int score, badness;
rcu_read_lock();
+ if (hslot->count > 10) {
+ hash2 = udp6_portaddr_hash(net, daddr, hnum);
+ slot2 = hash2 & udptable->mask;
+ hslot2 = &udptable->hash2[slot2];
+ if (hslot->count < hslot2->count)
+ goto begin;
+
+ result = udp6_lib_lookup2(net, saddr, sport,
+ daddr, hnum, dif,
+ hslot2, slot2);
+ if (!result) {
+ hash2 = udp6_portaddr_hash(net, &in6addr_any, hnum);
+ slot2 = hash2 & udptable->mask;
+ hslot2 = &udptable->hash2[slot2];
+ if (hslot->count < hslot2->count)
+ goto begin;
+
+ result = udp6_lib_lookup2(net, &in6addr_any, sport,
+ daddr, hnum, dif,
+ hslot2, slot2);
+ }
+ rcu_read_unlock();
+ return result;
+ }
begin:
result = NULL;
badness = -1;
@@ -152,7 +280,7 @@ begin:
* not the expected one, we must restart lookup.
* We probably met an item that was moved to another chain.
*/
- if (get_nulls_value(node) != hash)
+ if (get_nulls_value(node) != slot)
goto begin;
if (result) {
@@ -252,7 +380,7 @@ try_again:
UDP_MIB_INDATAGRAMS, is_udplite);
}
- sock_recv_timestamp(msg, sk, skb);
+ sock_recv_ts_and_drops(msg, sk, skb);
/* Copy the address. */
if (msg->msg_name) {
@@ -265,8 +393,8 @@ try_again:
sin6->sin6_scope_id = 0;
if (is_udp4)
- ipv6_addr_set(&sin6->sin6_addr, 0, 0,
- htonl(0xffff), ip_hdr(skb)->saddr);
+ ipv6_addr_set_v4mapped(ip_hdr(skb)->saddr,
+ &sin6->sin6_addr);
else {
ipv6_addr_copy(&sin6->sin6_addr,
&ipv6_hdr(skb)->saddr);
@@ -383,18 +511,18 @@ int udpv6_queue_rcv_skb(struct sock * sk, struct sk_buff *skb)
goto drop;
}
- if ((rc = sock_queue_rcv_skb(sk,skb)) < 0) {
+ if ((rc = sock_queue_rcv_skb(sk, skb)) < 0) {
/* Note that an ENOMEM error is charged twice */
- if (rc == -ENOMEM) {
+ if (rc == -ENOMEM)
UDP6_INC_STATS_BH(sock_net(sk),
UDP_MIB_RCVBUFERRORS, is_udplite);
- atomic_inc(&sk->sk_drops);
- }
- goto drop;
+ goto drop_no_sk_drops_inc;
}
return 0;
drop:
+ atomic_inc(&sk->sk_drops);
+drop_no_sk_drops_inc:
UDP6_INC_STATS_BH(sock_net(sk), UDP_MIB_INERRORS, is_udplite);
kfree_skb(skb);
return -1;
@@ -415,10 +543,11 @@ static struct sock *udp_v6_mcast_next(struct net *net, struct sock *sk,
if (!net_eq(sock_net(s), net))
continue;
- if (s->sk_hash == num && s->sk_family == PF_INET6) {
+ if (udp_sk(s)->udp_port_hash == num &&
+ s->sk_family == PF_INET6) {
struct ipv6_pinfo *np = inet6_sk(s);
- if (inet->dport) {
- if (inet->dport != rmt_port)
+ if (inet->inet_dport) {
+ if (inet->inet_dport != rmt_port)
continue;
}
if (!ipv6_addr_any(&np->daddr) &&
@@ -440,6 +569,33 @@ static struct sock *udp_v6_mcast_next(struct net *net, struct sock *sk,
return NULL;
}
+static void flush_stack(struct sock **stack, unsigned int count,
+ struct sk_buff *skb, unsigned int final)
+{
+ unsigned int i;
+ struct sock *sk;
+ struct sk_buff *skb1;
+
+ for (i = 0; i < count; i++) {
+ skb1 = (i == final) ? skb : skb_clone(skb, GFP_ATOMIC);
+
+ sk = stack[i];
+ if (skb1) {
+ bh_lock_sock(sk);
+ if (!sock_owned_by_user(sk))
+ udpv6_queue_rcv_skb(sk, skb1);
+ else
+ sk_add_backlog(sk, skb1);
+ bh_unlock_sock(sk);
+ } else {
+ atomic_inc(&sk->sk_drops);
+ UDP6_INC_STATS_BH(sock_net(sk),
+ UDP_MIB_RCVBUFERRORS, IS_UDPLITE(sk));
+ UDP6_INC_STATS_BH(sock_net(sk),
+ UDP_MIB_INERRORS, IS_UDPLITE(sk));
+ }
+ }
+}
/*
* Note: called only from the BH handler context,
* so we don't need to lock the hashes.
@@ -448,41 +604,43 @@ static int __udp6_lib_mcast_deliver(struct net *net, struct sk_buff *skb,
struct in6_addr *saddr, struct in6_addr *daddr,
struct udp_table *udptable)
{
- struct sock *sk, *sk2;
+ struct sock *sk, *stack[256 / sizeof(struct sock *)];
const struct udphdr *uh = udp_hdr(skb);
- struct udp_hslot *hslot = &udptable->hash[udp_hashfn(net, ntohs(uh->dest))];
+ struct udp_hslot *hslot = udp_hashslot(udptable, net, ntohs(uh->dest));
int dif;
+ unsigned int i, count = 0;
spin_lock(&hslot->lock);
sk = sk_nulls_head(&hslot->head);
dif = inet6_iif(skb);
sk = udp_v6_mcast_next(net, sk, uh->dest, daddr, uh->source, saddr, dif);
- if (!sk) {
- kfree_skb(skb);
- goto out;
- }
-
- sk2 = sk;
- while ((sk2 = udp_v6_mcast_next(net, sk_nulls_next(sk2), uh->dest, daddr,
- uh->source, saddr, dif))) {
- struct sk_buff *buff = skb_clone(skb, GFP_ATOMIC);
- if (buff) {
- bh_lock_sock(sk2);
- if (!sock_owned_by_user(sk2))
- udpv6_queue_rcv_skb(sk2, buff);
- else
- sk_add_backlog(sk2, buff);
- bh_unlock_sock(sk2);
+ while (sk) {
+ stack[count++] = sk;
+ sk = udp_v6_mcast_next(net, sk_nulls_next(sk), uh->dest, daddr,
+ uh->source, saddr, dif);
+ if (unlikely(count == ARRAY_SIZE(stack))) {
+ if (!sk)
+ break;
+ flush_stack(stack, count, skb, ~0);
+ count = 0;
}
}
- bh_lock_sock(sk);
- if (!sock_owned_by_user(sk))
- udpv6_queue_rcv_skb(sk, skb);
- else
- sk_add_backlog(sk, skb);
- bh_unlock_sock(sk);
-out:
+ /*
+ * before releasing the lock, we must take reference on sockets
+ */
+ for (i = 0; i < count; i++)
+ sock_hold(stack[i]);
+
spin_unlock(&hslot->lock);
+
+ if (count) {
+ flush_stack(stack, count, skb, count - 1);
+
+ for (i = 0; i < count; i++)
+ sock_put(stack[i]);
+ } else {
+ kfree_skb(skb);
+ }
return 0;
}
@@ -792,7 +950,7 @@ int udpv6_sendmsg(struct kiocb *iocb, struct sock *sk,
if (ipv6_addr_v4mapped(daddr)) {
struct sockaddr_in sin;
sin.sin_family = AF_INET;
- sin.sin_port = sin6 ? sin6->sin6_port : inet->dport;
+ sin.sin_port = sin6 ? sin6->sin6_port : inet->inet_dport;
sin.sin_addr.s_addr = daddr->s6_addr32[3];
msg->msg_name = &sin;
msg->msg_namelen = sizeof(sin);
@@ -865,7 +1023,7 @@ do_udp_sendmsg:
if (sk->sk_state != TCP_ESTABLISHED)
return -EDESTADDRREQ;
- fl.fl_ip_dport = inet->dport;
+ fl.fl_ip_dport = inet->inet_dport;
daddr = &np->daddr;
fl.fl6_flowlabel = np->flow_label;
connected = 1;
@@ -877,6 +1035,8 @@ do_udp_sendmsg:
if (!fl.oif)
fl.oif = np->sticky_pktinfo.ipi6_ifindex;
+ fl.mark = sk->sk_mark;
+
if (msg->msg_controllen) {
opt = &opt_space;
memset(opt, 0, sizeof(struct ipv6_txoptions));
@@ -909,7 +1069,7 @@ do_udp_sendmsg:
fl.fl6_dst.s6_addr[15] = 0x1; /* :: means loopback (BSD'ism) */
if (ipv6_addr_any(&fl.fl6_src) && !ipv6_addr_any(&np->saddr))
ipv6_addr_copy(&fl.fl6_src, &np->saddr);
- fl.fl_ip_sport = inet->sport;
+ fl.fl_ip_sport = inet->inet_sport;
/* merge ip6_build_xmit from ip6_output */
if (opt && opt->srcrt) {
@@ -1190,10 +1350,10 @@ static void udp6_sock_seq_show(struct seq_file *seq, struct sock *sp, int bucket
dest = &np->daddr;
src = &np->rcv_saddr;
- destp = ntohs(inet->dport);
- srcp = ntohs(inet->sport);
+ destp = ntohs(inet->inet_dport);
+ srcp = ntohs(inet->inet_sport);
seq_printf(seq,
- "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
+ "%5d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
"%02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %p %d\n",
bucket,
src->s6_addr32[0], src->s6_addr32[1],
@@ -1282,7 +1442,6 @@ static struct inet_protosw udpv6_protosw = {
.protocol = IPPROTO_UDP,
.prot = &udpv6_prot,
.ops = &inet6_dgram_ops,
- .capability =-1,
.no_check = UDP_CSUM_DEFAULT,
.flags = INET_PROTOSW_PERMANENT,
};
diff --git a/net/ipv6/udplite.c b/net/ipv6/udplite.c
index d737a27ee01..6ea6938919e 100644
--- a/net/ipv6/udplite.c
+++ b/net/ipv6/udplite.c
@@ -62,7 +62,6 @@ static struct inet_protosw udplite6_protosw = {
.protocol = IPPROTO_UDPLITE,
.prot = &udplitev6_prot,
.ops = &inet6_dgram_ops,
- .capability = -1,
.no_check = 0,
.flags = INET_PROTOSW_PERMANENT,
};
diff --git a/net/ipv6/xfrm6_tunnel.c b/net/ipv6/xfrm6_tunnel.c
index 81a95c00e50..438831d3359 100644
--- a/net/ipv6/xfrm6_tunnel.c
+++ b/net/ipv6/xfrm6_tunnel.c
@@ -23,7 +23,7 @@
*/
#include <linux/module.h>
#include <linux/xfrm.h>
-#include <linux/list.h>
+#include <linux/rculist.h>
#include <net/ip.h>
#include <net/xfrm.h>
#include <net/ipv6.h>
@@ -36,14 +36,15 @@
* per xfrm_address_t.
*/
struct xfrm6_tunnel_spi {
- struct hlist_node list_byaddr;
- struct hlist_node list_byspi;
- xfrm_address_t addr;
- u32 spi;
- atomic_t refcnt;
+ struct hlist_node list_byaddr;
+ struct hlist_node list_byspi;
+ xfrm_address_t addr;
+ u32 spi;
+ atomic_t refcnt;
+ struct rcu_head rcu_head;
};
-static DEFINE_RWLOCK(xfrm6_tunnel_spi_lock);
+static DEFINE_SPINLOCK(xfrm6_tunnel_spi_lock);
static u32 xfrm6_tunnel_spi;
@@ -107,6 +108,7 @@ static void xfrm6_tunnel_spi_fini(void)
if (!hlist_empty(&xfrm6_tunnel_spi_byspi[i]))
return;
}
+ rcu_barrier();
kmem_cache_destroy(xfrm6_tunnel_spi_kmem);
xfrm6_tunnel_spi_kmem = NULL;
}
@@ -116,7 +118,7 @@ static struct xfrm6_tunnel_spi *__xfrm6_tunnel_spi_lookup(xfrm_address_t *saddr)
struct xfrm6_tunnel_spi *x6spi;
struct hlist_node *pos;
- hlist_for_each_entry(x6spi, pos,
+ hlist_for_each_entry_rcu(x6spi, pos,
&xfrm6_tunnel_spi_byaddr[xfrm6_tunnel_spi_hash_byaddr(saddr)],
list_byaddr) {
if (memcmp(&x6spi->addr, saddr, sizeof(x6spi->addr)) == 0)
@@ -131,10 +133,10 @@ __be32 xfrm6_tunnel_spi_lookup(xfrm_address_t *saddr)
struct xfrm6_tunnel_spi *x6spi;
u32 spi;
- read_lock_bh(&xfrm6_tunnel_spi_lock);
+ rcu_read_lock_bh();
x6spi = __xfrm6_tunnel_spi_lookup(saddr);
spi = x6spi ? x6spi->spi : 0;
- read_unlock_bh(&xfrm6_tunnel_spi_lock);
+ rcu_read_unlock_bh();
return htonl(spi);
}
@@ -185,14 +187,15 @@ alloc_spi:
if (!x6spi)
goto out;
+ INIT_RCU_HEAD(&x6spi->rcu_head);
memcpy(&x6spi->addr, saddr, sizeof(x6spi->addr));
x6spi->spi = spi;
atomic_set(&x6spi->refcnt, 1);
- hlist_add_head(&x6spi->list_byspi, &xfrm6_tunnel_spi_byspi[index]);
+ hlist_add_head_rcu(&x6spi->list_byspi, &xfrm6_tunnel_spi_byspi[index]);
index = xfrm6_tunnel_spi_hash_byaddr(saddr);
- hlist_add_head(&x6spi->list_byaddr, &xfrm6_tunnel_spi_byaddr[index]);
+ hlist_add_head_rcu(&x6spi->list_byaddr, &xfrm6_tunnel_spi_byaddr[index]);
out:
return spi;
}
@@ -202,26 +205,32 @@ __be32 xfrm6_tunnel_alloc_spi(xfrm_address_t *saddr)
struct xfrm6_tunnel_spi *x6spi;
u32 spi;
- write_lock_bh(&xfrm6_tunnel_spi_lock);
+ spin_lock_bh(&xfrm6_tunnel_spi_lock);
x6spi = __xfrm6_tunnel_spi_lookup(saddr);
if (x6spi) {
atomic_inc(&x6spi->refcnt);
spi = x6spi->spi;
} else
spi = __xfrm6_tunnel_alloc_spi(saddr);
- write_unlock_bh(&xfrm6_tunnel_spi_lock);
+ spin_unlock_bh(&xfrm6_tunnel_spi_lock);
return htonl(spi);
}
EXPORT_SYMBOL(xfrm6_tunnel_alloc_spi);
+static void x6spi_destroy_rcu(struct rcu_head *head)
+{
+ kmem_cache_free(xfrm6_tunnel_spi_kmem,
+ container_of(head, struct xfrm6_tunnel_spi, rcu_head));
+}
+
void xfrm6_tunnel_free_spi(xfrm_address_t *saddr)
{
struct xfrm6_tunnel_spi *x6spi;
struct hlist_node *pos, *n;
- write_lock_bh(&xfrm6_tunnel_spi_lock);
+ spin_lock_bh(&xfrm6_tunnel_spi_lock);
hlist_for_each_entry_safe(x6spi, pos, n,
&xfrm6_tunnel_spi_byaddr[xfrm6_tunnel_spi_hash_byaddr(saddr)],
@@ -229,14 +238,14 @@ void xfrm6_tunnel_free_spi(xfrm_address_t *saddr)
{
if (memcmp(&x6spi->addr, saddr, sizeof(x6spi->addr)) == 0) {
if (atomic_dec_and_test(&x6spi->refcnt)) {
- hlist_del(&x6spi->list_byaddr);
- hlist_del(&x6spi->list_byspi);
- kmem_cache_free(xfrm6_tunnel_spi_kmem, x6spi);
+ hlist_del_rcu(&x6spi->list_byaddr);
+ hlist_del_rcu(&x6spi->list_byspi);
+ call_rcu(&x6spi->rcu_head, x6spi_destroy_rcu);
break;
}
}
}
- write_unlock_bh(&xfrm6_tunnel_spi_lock);
+ spin_unlock_bh(&xfrm6_tunnel_spi_lock);
}
EXPORT_SYMBOL(xfrm6_tunnel_free_spi);