diff options
Diffstat (limited to 'net')
-rw-r--r-- | net/core/dev.c | 7 | ||||
-rw-r--r-- | net/ipv4/af_inet.c | 31 | ||||
-rw-r--r-- | net/ipv4/arp.c | 43 | ||||
-rw-r--r-- | net/ipv4/inet_connection_sock.c | 2 | ||||
-rw-r--r-- | net/ipv4/ip_fragment.c | 5 | ||||
-rw-r--r-- | net/ipv4/ip_input.c | 14 | ||||
-rw-r--r-- | net/ipv4/ip_options.c | 21 | ||||
-rw-r--r-- | net/ipv4/ip_output.c | 16 | ||||
-rw-r--r-- | net/ipv4/ip_sockglue.c | 11 | ||||
-rw-r--r-- | net/ipv4/raw.c | 4 | ||||
-rw-r--r-- | net/ipv4/tcp_ipv4.c | 19 | ||||
-rw-r--r-- | net/ipv4/udp.c | 23 | ||||
-rw-r--r-- | net/ipv4/udplite.c | 30 | ||||
-rw-r--r-- | net/ipv6/addrconf.c | 433 | ||||
-rw-r--r-- | net/ipv6/fib6_rules.c | 12 | ||||
-rw-r--r-- | net/ipv6/icmp.c | 8 | ||||
-rw-r--r-- | net/ipv6/ip6_output.c | 20 | ||||
-rw-r--r-- | net/ipv6/ipv6_sockglue.c | 82 | ||||
-rw-r--r-- | net/ipv6/ndisc.c | 4 | ||||
-rw-r--r-- | net/ipv6/raw.c | 4 | ||||
-rw-r--r-- | net/ipv6/route.c | 29 | ||||
-rw-r--r-- | net/ipv6/udp.c | 4 | ||||
-rw-r--r-- | net/ipv6/udplite.c | 19 | ||||
-rw-r--r-- | net/ipv6/xfrm6_input.c | 55 | ||||
-rw-r--r-- | net/ipv6/xfrm6_policy.c | 2 | ||||
-rw-r--r-- | net/ipv6/xfrm6_state.c | 171 | ||||
-rw-r--r-- | net/ipv6/xfrm6_tunnel.c | 45 | ||||
-rw-r--r-- | net/netfilter/core.c | 8 | ||||
-rw-r--r-- | net/sctp/ipv6.c | 4 | ||||
-rw-r--r-- | net/xfrm/xfrm_policy.c | 49 |
30 files changed, 685 insertions, 490 deletions
diff --git a/net/core/dev.c b/net/core/dev.c index f973e38b81a..aebd0860604 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2615,7 +2615,7 @@ static int ptype_seq_show(struct seq_file *seq, void *v) if (v == SEQ_START_TOKEN) seq_puts(seq, "Type Device Function\n"); - else { + else if (pt->dev == NULL || pt->dev->nd_net == seq_file_net(seq)) { if (pt->type == htons(ETH_P_ALL)) seq_puts(seq, "ALL "); else @@ -2639,7 +2639,8 @@ static const struct seq_operations ptype_seq_ops = { static int ptype_seq_open(struct inode *inode, struct file *file) { - return seq_open(file, &ptype_seq_ops); + return seq_open_net(inode, file, &ptype_seq_ops, + sizeof(struct seq_net_private)); } static const struct file_operations ptype_seq_fops = { @@ -2647,7 +2648,7 @@ static const struct file_operations ptype_seq_fops = { .open = ptype_seq_open, .read = seq_read, .llseek = seq_lseek, - .release = seq_release, + .release = seq_release_net, }; diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 44f5ce1fbfa..06cfb0bed63 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -243,6 +243,23 @@ void build_ehash_secret(void) } EXPORT_SYMBOL(build_ehash_secret); +static inline int inet_netns_ok(struct net *net, int protocol) +{ + int hash; + struct net_protocol *ipprot; + + if (net == &init_net) + return 1; + + hash = protocol & (MAX_INET_PROTOS - 1); + ipprot = rcu_dereference(inet_protos[hash]); + + if (ipprot == NULL) + /* raw IP is OK */ + return 1; + return ipprot->netns_ok; +} + /* * Create an inet socket. */ @@ -259,9 +276,6 @@ static int inet_create(struct net *net, struct socket *sock, int protocol) int try_loading_module = 0; int err; - if (net != &init_net) - return -EAFNOSUPPORT; - if (sock->type != SOCK_RAW && sock->type != SOCK_DGRAM && !inet_ehash_secret) @@ -320,6 +334,10 @@ lookup_protocol: if (answer->capability > 0 && !capable(answer->capability)) goto out_rcu_unlock; + err = -EAFNOSUPPORT; + if (!inet_netns_ok(net, protocol)) + goto out_rcu_unlock; + sock->ops = answer->ops; answer_prot = answer->prot; answer_no_check = answer->no_check; @@ -446,7 +464,7 @@ int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) if (addr_len < sizeof(struct sockaddr_in)) goto out; - chk_addr_ret = inet_addr_type(&init_net, addr->sin_addr.s_addr); + chk_addr_ret = inet_addr_type(sk->sk_net, addr->sin_addr.s_addr); /* Not specified by any standard per-se, however it breaks too * many applications when removed. It is unfortunate since @@ -1114,7 +1132,7 @@ int inet_sk_rebuild_header(struct sock *sk) }; security_sk_classify_flow(sk, &fl); - err = ip_route_output_flow(&init_net, &rt, &fl, sk, 0); + err = ip_route_output_flow(sk->sk_net, &rt, &fl, sk, 0); } if (!err) sk_setup_caps(sk, &rt->u.dst); @@ -1284,17 +1302,20 @@ static struct net_protocol tcp_protocol = { .gso_send_check = tcp_v4_gso_send_check, .gso_segment = tcp_tso_segment, .no_policy = 1, + .netns_ok = 1, }; static struct net_protocol udp_protocol = { .handler = udp_rcv, .err_handler = udp_err, .no_policy = 1, + .netns_ok = 1, }; static struct net_protocol icmp_protocol = { .handler = icmp_rcv, .no_policy = 1, + .netns_ok = 1, }; static int __init init_ipv4_mibs(void) diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index efe01df8fc0..832473e30b3 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c @@ -242,7 +242,7 @@ static int arp_constructor(struct neighbour *neigh) return -EINVAL; } - neigh->type = inet_addr_type(&init_net, addr); + neigh->type = inet_addr_type(dev->nd_net, addr); parms = in_dev->arp_parms; __neigh_parms_put(neigh->parms); @@ -341,14 +341,14 @@ static void arp_solicit(struct neighbour *neigh, struct sk_buff *skb) switch (IN_DEV_ARP_ANNOUNCE(in_dev)) { default: case 0: /* By default announce any local IP */ - if (skb && inet_addr_type(&init_net, ip_hdr(skb)->saddr) == RTN_LOCAL) + if (skb && inet_addr_type(dev->nd_net, ip_hdr(skb)->saddr) == RTN_LOCAL) saddr = ip_hdr(skb)->saddr; break; case 1: /* Restrict announcements of saddr in same subnet */ if (!skb) break; saddr = ip_hdr(skb)->saddr; - if (inet_addr_type(&init_net, saddr) == RTN_LOCAL) { + if (inet_addr_type(dev->nd_net, saddr) == RTN_LOCAL) { /* saddr should be known to target */ if (inet_addr_onlink(in_dev, target, saddr)) break; @@ -424,7 +424,7 @@ static int arp_filter(__be32 sip, __be32 tip, struct net_device *dev) int flag = 0; /*unsigned long now; */ - if (ip_route_output_key(&init_net, &rt, &fl) < 0) + if (ip_route_output_key(dev->nd_net, &rt, &fl) < 0) return 1; if (rt->u.dst.dev != dev) { NET_INC_STATS_BH(LINUX_MIB_ARPFILTER); @@ -477,7 +477,7 @@ int arp_find(unsigned char *haddr, struct sk_buff *skb) paddr = skb->rtable->rt_gateway; - if (arp_set_predefined(inet_addr_type(&init_net, paddr), haddr, paddr, dev)) + if (arp_set_predefined(inet_addr_type(dev->nd_net, paddr), haddr, paddr, dev)) return 0; n = __neigh_lookup(&arp_tbl, &paddr, dev, 1); @@ -709,6 +709,7 @@ static int arp_process(struct sk_buff *skb) u16 dev_type = dev->type; int addr_type; struct neighbour *n; + struct net *net = dev->nd_net; /* arp_rcv below verifies the ARP header and verifies the device * is ARP'able. @@ -804,7 +805,7 @@ static int arp_process(struct sk_buff *skb) /* Special case: IPv4 duplicate address detection packet (RFC2131) */ if (sip == 0) { if (arp->ar_op == htons(ARPOP_REQUEST) && - inet_addr_type(&init_net, tip) == RTN_LOCAL && + inet_addr_type(net, tip) == RTN_LOCAL && !arp_ignore(in_dev, sip, tip)) arp_send(ARPOP_REPLY, ETH_P_ARP, sip, dev, tip, sha, dev->dev_addr, sha); @@ -834,7 +835,7 @@ static int arp_process(struct sk_buff *skb) goto out; } else if (IN_DEV_FORWARD(in_dev)) { if (addr_type == RTN_UNICAST && rt->u.dst.dev != dev && - (arp_fwd_proxy(in_dev, rt) || pneigh_lookup(&arp_tbl, &init_net, &tip, dev, 0))) { + (arp_fwd_proxy(in_dev, rt) || pneigh_lookup(&arp_tbl, net, &tip, dev, 0))) { n = neigh_event_ns(&arp_tbl, sha, &sip, dev); if (n) neigh_release(n); @@ -864,7 +865,7 @@ static int arp_process(struct sk_buff *skb) */ if (n == NULL && arp->ar_op == htons(ARPOP_REPLY) && - inet_addr_type(&init_net, sip) == RTN_UNICAST) + inet_addr_type(net, sip) == RTN_UNICAST) n = __neigh_lookup(&arp_tbl, &sip, dev, 1); } @@ -911,9 +912,6 @@ static int arp_rcv(struct sk_buff *skb, struct net_device *dev, { struct arphdr *arp; - if (dev->nd_net != &init_net) - goto freeskb; - /* ARP header, plus 2 device addresses, plus 2 IP addresses. */ if (!pskb_may_pull(skb, arp_hdr_len(dev))) goto freeskb; @@ -1198,9 +1196,6 @@ static int arp_netdev_event(struct notifier_block *this, unsigned long event, vo { struct net_device *dev = ptr; - if (dev->nd_net != &init_net) - return NOTIFY_DONE; - switch (event) { case NETDEV_CHANGEADDR: neigh_changeaddr(&arp_tbl, dev); @@ -1382,13 +1377,29 @@ static const struct file_operations arp_seq_fops = { .release = seq_release_net, }; -static int __init arp_proc_init(void) + +static int __net_init arp_net_init(struct net *net) { - if (!proc_net_fops_create(&init_net, "arp", S_IRUGO, &arp_seq_fops)) + if (!proc_net_fops_create(net, "arp", S_IRUGO, &arp_seq_fops)) return -ENOMEM; return 0; } +static void __net_exit arp_net_exit(struct net *net) +{ + proc_net_remove(net, "arp"); +} + +static struct pernet_operations arp_net_ops = { + .init = arp_net_init, + .exit = arp_net_exit, +}; + +static int __init arp_proc_init(void) +{ + return register_pernet_subsys(&arp_net_ops); +} + #else /* CONFIG_PROC_FS */ static int __init arp_proc_init(void) diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index f9c5c4def1b..d13c5f12bb3 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -333,7 +333,7 @@ struct dst_entry* inet_csk_route_req(struct sock *sk, .dport = ireq->rmt_port } } }; security_req_classify_flow(req, &fl); - if (ip_route_output_flow(&init_net, &rt, &fl, sk, 0)) { + if (ip_route_output_flow(sk->sk_net, &rt, &fl, sk, 0)) { IP_INC_STATS_BH(IPSTATS_MIB_OUTNOROUTES); return NULL; } diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index 3b2e5adca83..8b448c4b908 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -204,8 +204,11 @@ static void ip_expire(unsigned long arg) if ((qp->q.last_in&FIRST_IN) && qp->q.fragments != NULL) { struct sk_buff *head = qp->q.fragments; + struct net *net; + + net = container_of(qp->q.net, struct net, ipv4.frags); /* Send an ICMP "Fragment Reassembly Timeout" message. */ - if ((head->dev = dev_get_by_index(&init_net, qp->iif)) != NULL) { + if ((head->dev = dev_get_by_index(net, qp->iif)) != NULL) { icmp_send(head, ICMP_TIME_EXCEEDED, ICMP_EXC_FRAGTIME, 0); dev_put(head->dev); } diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c index e3a0c78fa7b..2aeea5d1542 100644 --- a/net/ipv4/ip_input.c +++ b/net/ipv4/ip_input.c @@ -160,6 +160,7 @@ int ip_call_ra_chain(struct sk_buff *skb) struct ip_ra_chain *ra; u8 protocol = ip_hdr(skb)->protocol; struct sock *last = NULL; + struct net_device *dev = skb->dev; read_lock(&ip_ra_lock); for (ra = ip_ra_chain; ra; ra = ra->next) { @@ -170,7 +171,8 @@ int ip_call_ra_chain(struct sk_buff *skb) */ if (sk && inet_sk(sk)->num == protocol && (!sk->sk_bound_dev_if || - sk->sk_bound_dev_if == skb->dev->ifindex)) { + sk->sk_bound_dev_if == dev->ifindex) && + sk->sk_net == dev->nd_net) { if (ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)) { if (ip_defrag(skb, IP_DEFRAG_CALL_RA_CHAIN)) { read_unlock(&ip_ra_lock); @@ -197,6 +199,8 @@ int ip_call_ra_chain(struct sk_buff *skb) static int ip_local_deliver_finish(struct sk_buff *skb) { + struct net *net = skb->dev->nd_net; + __skb_pull(skb, ip_hdrlen(skb)); /* Point into the IP datagram, just past the header. */ @@ -212,7 +216,8 @@ static int ip_local_deliver_finish(struct sk_buff *skb) raw = raw_local_deliver(skb, protocol); hash = protocol & (MAX_INET_PROTOS - 1); - if ((ipprot = rcu_dereference(inet_protos[hash])) != NULL) { + ipprot = rcu_dereference(inet_protos[hash]); + if (ipprot != NULL && (net == &init_net || ipprot->netns_ok)) { int ret; if (!ipprot->no_policy) { @@ -286,7 +291,7 @@ static inline int ip_rcv_options(struct sk_buff *skb) opt = &(IPCB(skb)->opt); opt->optlen = iph->ihl*4 - sizeof(struct iphdr); - if (ip_options_compile(opt, skb)) { + if (ip_options_compile(dev->nd_net, opt, skb)) { IP_INC_STATS_BH(IPSTATS_MIB_INHDRERRORS); goto drop; } @@ -373,9 +378,6 @@ int ip_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct iphdr *iph; u32 len; - if (dev->nd_net != &init_net) - goto drop; - /* When the interface is in promisc. mode, drop all the crap * that it receives, do not try to analyse it. */ diff --git a/net/ipv4/ip_options.c b/net/ipv4/ip_options.c index aeed4e5858e..87cc1222c60 100644 --- a/net/ipv4/ip_options.c +++ b/net/ipv4/ip_options.c @@ -145,7 +145,7 @@ int ip_options_echo(struct ip_options * dopt, struct sk_buff * skb) __be32 addr; memcpy(&addr, sptr+soffset-1, 4); - if (inet_addr_type(&init_net, addr) != RTN_LOCAL) { + if (inet_addr_type(skb->dst->dev->nd_net, addr) != RTN_LOCAL) { dopt->ts_needtime = 1; soffset += 8; } @@ -248,7 +248,8 @@ void ip_options_fragment(struct sk_buff * skb) * If opt == NULL, then skb->data should point to IP header. */ -int ip_options_compile(struct ip_options * opt, struct sk_buff * skb) +int ip_options_compile(struct net *net, + struct ip_options * opt, struct sk_buff * skb) { int l; unsigned char * iph; @@ -389,7 +390,7 @@ int ip_options_compile(struct ip_options * opt, struct sk_buff * skb) { __be32 addr; memcpy(&addr, &optptr[optptr[2]-1], 4); - if (inet_addr_type(&init_net, addr) == RTN_UNICAST) + if (inet_addr_type(net, addr) == RTN_UNICAST) break; if (skb) timeptr = (__be32*)&optptr[optptr[2]+3]; @@ -506,13 +507,13 @@ static struct ip_options *ip_options_get_alloc(const int optlen) GFP_KERNEL); } -static int ip_options_get_finish(struct ip_options **optp, +static int ip_options_get_finish(struct net *net, struct ip_options **optp, struct ip_options *opt, int optlen) { while (optlen & 3) opt->__data[optlen++] = IPOPT_END; opt->optlen = optlen; - if (optlen && ip_options_compile(opt, NULL)) { + if (optlen && ip_options_compile(net, opt, NULL)) { kfree(opt); return -EINVAL; } @@ -521,7 +522,8 @@ static int ip_options_get_finish(struct ip_options **optp, return 0; } -int ip_options_get_from_user(struct ip_options **optp, unsigned char __user *data, int optlen) +int ip_options_get_from_user(struct net *net, struct ip_options **optp, + unsigned char __user *data, int optlen) { struct ip_options *opt = ip_options_get_alloc(optlen); @@ -531,10 +533,11 @@ int ip_options_get_from_user(struct ip_options **optp, unsigned char __user *dat kfree(opt); return -EFAULT; } - return ip_options_get_finish(optp, opt, optlen); + return ip_options_get_finish(net, optp, opt, optlen); } -int ip_options_get(struct ip_options **optp, unsigned char *data, int optlen) +int ip_options_get(struct net *net, struct ip_options **optp, + unsigned char *data, int optlen) { struct ip_options *opt = ip_options_get_alloc(optlen); @@ -542,7 +545,7 @@ int ip_options_get(struct ip_options **optp, unsigned char *data, int optlen) return -ENOMEM; if (optlen) memcpy(opt->__data, data, optlen); - return ip_options_get_finish(optp, opt, optlen); + return ip_options_get_finish(net, optp, opt, optlen); } void ip_forward_options(struct sk_buff *skb) diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index dc494ea594a..913266cd990 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -351,7 +351,7 @@ int ip_queue_xmit(struct sk_buff *skb, int ipfragok) * itself out. */ security_sk_classify_flow(sk, &fl); - if (ip_route_output_flow(&init_net, &rt, &fl, sk, 0)) + if (ip_route_output_flow(sk->sk_net, &rt, &fl, sk, 0)) goto no_route; } sk_setup_caps(sk, &rt->u.dst); @@ -825,7 +825,7 @@ int ip_append_data(struct sock *sk, inet->cork.fragsize = mtu = inet->pmtudisc == IP_PMTUDISC_PROBE ? rt->u.dst.dev->mtu : dst_mtu(rt->u.dst.path); - inet->cork.rt = rt; + inet->cork.dst = &rt->u.dst; inet->cork.length = 0; sk->sk_sndmsg_page = NULL; sk->sk_sndmsg_off = 0; @@ -834,7 +834,7 @@ int ip_append_data(struct sock *sk, transhdrlen += exthdrlen; } } else { - rt = inet->cork.rt; + rt = (struct rtable *)inet->cork.dst; if (inet->cork.flags & IPCORK_OPT) opt = inet->cork.opt; @@ -1083,7 +1083,7 @@ ssize_t ip_append_page(struct sock *sk, struct page *page, if (skb_queue_empty(&sk->sk_write_queue)) return -EINVAL; - rt = inet->cork.rt; + rt = (struct rtable *)inet->cork.dst; if (inet->cork.flags & IPCORK_OPT) opt = inet->cork.opt; @@ -1208,10 +1208,8 @@ static void ip_cork_release(struct inet_sock *inet) inet->cork.flags &= ~IPCORK_OPT; kfree(inet->cork.opt); inet->cork.opt = NULL; - if (inet->cork.rt) { - ip_rt_put(inet->cork.rt); - inet->cork.rt = NULL; - } + dst_release(inet->cork.dst); + inet->cork.dst = NULL; } /* @@ -1224,7 +1222,7 @@ int ip_push_pending_frames(struct sock *sk) struct sk_buff **tail_skb; struct inet_sock *inet = inet_sk(sk); struct ip_options *opt = NULL; - struct rtable *rt = inet->cork.rt; + struct rtable *rt = (struct rtable *)inet->cork.dst; struct iphdr *iph; __be16 df = 0; __u8 ttl; diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index bb3cbe5ec36..b854431047a 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -163,7 +163,7 @@ void ip_cmsg_recv(struct msghdr *msg, struct sk_buff *skb) ip_cmsg_recv_security(msg, skb); } -int ip_cmsg_send(struct msghdr *msg, struct ipcm_cookie *ipc) +int ip_cmsg_send(struct net *net, struct msghdr *msg, struct ipcm_cookie *ipc) { int err; struct cmsghdr *cmsg; @@ -176,7 +176,7 @@ int ip_cmsg_send(struct msghdr *msg, struct ipcm_cookie *ipc) switch (cmsg->cmsg_type) { case IP_RETOPTS: err = cmsg->cmsg_len - CMSG_ALIGN(sizeof(struct cmsghdr)); - err = ip_options_get(&ipc->opt, CMSG_DATA(cmsg), err < 40 ? err : 40); + err = ip_options_get(net, &ipc->opt, CMSG_DATA(cmsg), err < 40 ? err : 40); if (err) return err; break; @@ -449,7 +449,8 @@ static int do_ip_setsockopt(struct sock *sk, int level, struct ip_options * opt = NULL; if (optlen > 40 || optlen < 0) goto e_inval; - err = ip_options_get_from_user(&opt, optval, optlen); + err = ip_options_get_from_user(sk->sk_net, &opt, + optval, optlen); if (err) break; if (inet->is_icsk) { @@ -589,13 +590,13 @@ static int do_ip_setsockopt(struct sock *sk, int level, err = 0; break; } - dev = ip_dev_find(&init_net, mreq.imr_address.s_addr); + dev = ip_dev_find(sk->sk_net, mreq.imr_address.s_addr); if (dev) { mreq.imr_ifindex = dev->ifindex; dev_put(dev); } } else - dev = __dev_get_by_index(&init_net, mreq.imr_ifindex); + dev = __dev_get_by_index(sk->sk_net, mreq.imr_ifindex); err = -EADDRNOTAVAIL; diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index b433b485a88..3f68a937b60 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -499,7 +499,7 @@ static int raw_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, ipc.oif = sk->sk_bound_dev_if; if (msg->msg_controllen) { - err = ip_cmsg_send(msg, &ipc); + err = ip_cmsg_send(sk->sk_net, msg, &ipc); if (err) goto out; if (ipc.opt) @@ -553,7 +553,7 @@ static int raw_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, } security_sk_classify_flow(sk, &fl); - err = ip_route_output_flow(&init_net, &rt, &fl, sk, 1); + err = ip_route_output_flow(sk->sk_net, &rt, &fl, sk, 1); } if (err) goto done; diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 1a47719a55a..649d00a50cb 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -2425,14 +2425,29 @@ static struct tcp_seq_afinfo tcp4_seq_afinfo = { .seq_fops = &tcp4_seq_fops, }; +static int tcp4_proc_init_net(struct net *net) +{ + return tcp_proc_register(net, &tcp4_seq_afinfo); +} + +static void tcp4_proc_exit_net(struct net *net) +{ + tcp_proc_unregister(net, &tcp4_seq_afinfo); +} + +static struct pernet_operations tcp4_net_ops = { + .init = tcp4_proc_init_net, + .exit = tcp4_proc_exit_net, +}; + int __init tcp4_proc_init(void) { - return tcp_proc_register(&init_net, &tcp4_seq_afinfo); + return register_pernet_subsys(&tcp4_net_ops); } void tcp4_proc_exit(void) { - tcp_proc_unregister(&init_net, &tcp4_seq_afinfo); + unregister_pernet_subsys(&tcp4_net_ops); } #endif /* CONFIG_PROC_FS */ diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 8c1f5eaafcd..b37581dfd02 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -607,7 +607,7 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, ipc.oif = sk->sk_bound_dev_if; if (msg->msg_controllen) { - err = ip_cmsg_send(msg, &ipc); + err = ip_cmsg_send(sk->sk_net, msg, &ipc); if (err) return err; if (ipc.opt) @@ -656,7 +656,7 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, { .sport = inet->sport, .dport = dport } } }; security_sk_classify_flow(sk, &fl); - err = ip_route_output_flow(&init_net, &rt, &fl, sk, 1); + err = ip_route_output_flow(sk->sk_net, &rt, &fl, sk, 1); if (err) { if (err == -ENETUNREACH) IP_INC_STATS_BH(IPSTATS_MIB_OUTNOROUTES); @@ -1701,14 +1701,29 @@ static struct udp_seq_afinfo udp4_seq_afinfo = { .seq_fops = &udp4_seq_fops, }; +static int udp4_proc_init_net(struct net *net) +{ + return udp_proc_register(net, &udp4_seq_afinfo); +} + +static void udp4_proc_exit_net(struct net *net) +{ + udp_proc_unregister(net, &udp4_seq_afinfo); +} + +static struct pernet_operations udp4_net_ops = { + .init = udp4_proc_init_net, + .exit = udp4_proc_exit_net, +}; + int __init udp4_proc_init(void) { - return udp_proc_register(&init_net, &udp4_seq_afinfo); + return register_pernet_subsys(&udp4_net_ops); } void udp4_proc_exit(void) { - udp_proc_unregister(&init_net, &udp4_seq_afinfo); + unregister_pernet_subsys(&udp4_net_ops); } #endif /* CONFIG_PROC_FS */ diff --git a/net/ipv4/udplite.c b/net/ipv4/udplite.c index 8d42e344b04..53f3ed49632 100644 --- a/net/ipv4/udplite.c +++ b/net/ipv4/udplite.c @@ -31,6 +31,7 @@ static struct net_protocol udplite_protocol = { .handler = udplite_rcv, .err_handler = udplite_err, .no_policy = 1, + .netns_ok = 1, }; DEFINE_PROTO_INUSE(udplite) @@ -82,6 +83,31 @@ static struct udp_seq_afinfo udplite4_seq_afinfo = { .seq_show = udp4_seq_show, .seq_fops = &udplite4_seq_fops, }; + +static int udplite4_proc_init_net(struct net *net) +{ + return udp_proc_register(net, &udplite4_seq_afinfo); +} + +static void udplite4_proc_exit_net(struct net *net) +{ + udp_proc_unregister(net, &udplite4_seq_afinfo); +} + +static struct pernet_operations udplite4_net_ops = { + .init = udplite4_proc_init_net, + .exit = udplite4_proc_exit_net, +}; + +static __init int udplite4_proc_init(void) +{ + return register_pernet_subsys(&udplite4_net_ops); +} +#else +static inline int udplite4_proc_init(void) +{ + return 0; +} #endif void __init udplite4_register(void) @@ -94,10 +120,8 @@ void __init udplite4_register(void) inet_register_protosw(&udplite4_protosw); -#ifdef CONFIG_PROC_FS - if (udp_proc_register(&init_net, &udplite4_seq_afinfo)) + if (udplite4_proc_init()) printk(KERN_ERR "%s: Cannot register /proc!\n", __func__); -#endif return; out_unregister_proto: diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 4b86d388bf6..89954885dee 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -877,20 +877,40 @@ out: /* * Choose an appropriate source address (RFC3484) */ +enum { + IPV6_SADDR_RULE_INIT = 0, + IPV6_SADDR_RULE_LOCAL, + IPV6_SADDR_RULE_SCOPE, + IPV6_SADDR_RULE_PREFERRED, +#ifdef CONFIG_IPV6_MIP6 + IPV6_SADDR_RULE_HOA, +#endif + IPV6_SADDR_RULE_OIF, + IPV6_SADDR_RULE_LABEL, +#ifdef CONFIG_IPV6_PRIVACY + IPV6_SADDR_RULE_PRIVACY, +#endif + IPV6_SADDR_RULE_ORCHID, + IPV6_SADDR_RULE_PREFIX, + IPV6_SADDR_RULE_MAX +}; + struct ipv6_saddr_score { - int addr_type; - unsigned int attrs; - int matchlen; - int scope; - unsigned int rule; + int rule; + int addr_type; + struct inet6_ifaddr *ifa; + DECLARE_BITMAP(scorebits, IPV6_SADDR_RULE_MAX); + int scopedist; + int matchlen; }; -#define IPV6_SADDR_SCORE_LOCAL 0x0001 -#define IPV6_SADDR_SCORE_PREFERRED 0x0004 -#define IPV6_SADDR_SCORE_HOA 0x0008 -#define IPV6_SADDR_SCORE_OIF 0x0010 -#define IPV6_SADDR_SCORE_LABEL 0x0020 -#define IPV6_SADDR_SCORE_PRIVACY 0x0040 +struct ipv6_saddr_dst { + struct in6_addr *addr; + int ifindex; + int scope; + int label; + unsigned int prefs; +}; static inline int ipv6_saddr_preferred(int type) { @@ -900,28 +920,152 @@ static inline int ipv6_saddr_preferred(int type) return 0; } -int ipv6_dev_get_saddr(struct net_device *daddr_dev, - struct in6_addr *daddr, struct in6_addr *saddr) +static int ipv6_get_saddr_eval(struct ipv6_saddr_score *score, + struct ipv6_saddr_dst *dst, + int i) +{ + int ret; + + if (i <= score->rule) { + switch (i) { + case IPV6_SADDR_RULE_SCOPE: + ret = score->scopedist; + break; + case IPV6_SADDR_RULE_PREFIX: + ret = score->matchlen; + break; + default: + ret = !!test_bit(i, score->scorebits); + } + goto out; + } + + switch (i) { + case IPV6_SADDR_RULE_INIT: + /* Rule 0: remember if hiscore is not ready yet */ + ret = !!score->ifa; + break; + case IPV6_SADDR_RULE_LOCAL: + /* Rule 1: Prefer same address */ + ret = ipv6_addr_equal(&score->ifa->addr, dst->addr); + break; + case IPV6_SADDR_RULE_SCOPE: + /* Rule 2: Prefer appropriate scope + * + * ret + * ^ + * -1 | d 15 + * ---+--+-+---> scope + * | + * | d is scope of the destination. + * B-d | \ + * | \ <- smaller scope is better if + * B-15 | \ if scope is enough for destinaion. + * | ret = B - scope (-1 <= scope >= d <= 15). + * d-C-1 | / + * |/ <- greater is better + * -C / if scope is not enough for destination. + * /| ret = scope - C (-1 <= d < scope <= 15). + * + * d - C - 1 < B -15 (for all -1 <= d <= 15). + * C > d + 14 - B >= 15 + 14 - B = 29 - B. + * Assume B = 0 and we get C > 29. + */ + ret = __ipv6_addr_src_scope(score->addr_type); + if (ret >= dst->scope) + ret = -ret; + else + ret -= 128; /* 30 is enough */ + score->scopedist = ret; + break; + case IPV6_SADDR_RULE_PREFERRED: + /* Rule 3: Avoid deprecated and optimistic addresses */ + ret = ipv6_saddr_preferred(score->addr_type) || + !(score->ifa->flags & (IFA_F_DEPRECATED|IFA_F_OPTIMISTIC)); + break; +#ifdef CONFIG_IPV6_MIP6 + case IPV6_SADDR_RULE_HOA: + { + /* Rule 4: Prefer home address */ + int prefhome = !(dst->prefs & IPV6_PREFER_SRC_COA); + ret = !(score->ifa->flags & IFA_F_HOMEADDRESS) ^ prefhome; + break; + } +#endif + case IPV6_SADDR_RULE_OIF: + /* Rule 5: Prefer outgoing interface */ + ret = (!dst->ifindex || + dst->ifindex == score->ifa->idev->dev->ifindex); + break; + case IPV6_SADDR_RULE_LABEL: + /* Rule 6: Prefer matching label */ + ret = ipv6_addr_label(&score->ifa->addr, score->addr_type, + score->ifa->idev->dev->ifindex) == dst->label; + break; +#ifdef CONFIG_IPV6_PRIVACY + case IPV6_SADDR_RULE_PRIVACY: + { + /* Rule 7: Prefer public address + * Note: prefer temprary address if use_tempaddr >= 2 + */ + int preftmp = dst->prefs & (IPV6_PREFER_SRC_PUBLIC|IPV6_PREFER_SRC_TMP) ? + !!(dst->prefs & IPV6_PREFER_SRC_TMP) : + score->ifa->idev->cnf.use_tempaddr >= 2; + ret = (!(score->ifa->flags & IFA_F_TEMPORARY)) ^ preftmp; + break; + } +#endif + case IPV6_SADDR_RULE_ORCHID: + /* Rule 8-: Prefer ORCHID vs ORCHID or + * non-ORCHID vs non-ORCHID + */ + ret = !(ipv6_addr_orchid(&score->ifa->addr) ^ + ipv6_addr_orchid(dst->addr)); + break; + case IPV6_SADDR_RULE_PREFIX: + /* Rule 8: Use longest matching prefix */ + score->matchlen = ret = ipv6_addr_diff(&score->ifa->addr, + dst->addr); + break; + default: + ret = 0; + } + + if (ret) + __set_bit(i, score->scorebits); + score->rule = i; +out: + return ret; +} + +int ipv6_dev_get_saddr(struct net_device *dst_dev, + struct in6_addr *daddr, unsigned int prefs, + struct in6_addr *saddr) { - struct ipv6_saddr_score hiscore; - struct inet6_ifaddr *ifa_result = NULL; - struct net *net = daddr_dev->nd_net; - int daddr_type = __ipv6_addr_type(daddr); - int daddr_scope = __ipv6_addr_src_scope(daddr_type); - int daddr_ifindex = daddr_dev ? daddr_dev->ifindex : 0; - u32 daddr_label = ipv6_addr_label(daddr, daddr_type, daddr_ifindex); + struct ipv6_saddr_score scores[2], + *score = &scores[0], *hiscore = &scores[1]; + struct net *net = dst_dev->nd_net; + struct ipv6_saddr_dst dst; struct net_device *dev; + int dst_type; + + dst_type = __ipv6_addr_type(daddr); + dst.addr = daddr; + dst.ifindex = dst_dev ? dst_dev->ifindex : 0; + dst.scope = __ipv6_addr_src_scope(dst_type); + dst.label = ipv6_addr_label(daddr, dst_type, dst.ifindex); + dst.prefs = prefs; - memset(&hiscore, 0, sizeof(hiscore)); + hiscore->rule = -1; + hiscore->ifa = NULL; read_lock(&dev_base_lock); rcu_read_lock(); for_each_netdev(net, dev) { struct inet6_dev *idev; - struct inet6_ifaddr *ifa; - /* Rule 0: Candidate Source Address (section 4) + /* Candidate Source Address (section 4) * - multicast and link-local destination address, * the set of candidate source address MUST only * include addresses assigned to interfaces @@ -933,9 +1077,9 @@ int ipv6_dev_get_saddr(struct net_device *daddr_dev, * belonging to the same site as the outgoing * interface.) */ - if ((daddr_type & IPV6_ADDR_MULTICAST || - daddr_scope <= IPV6_ADDR_SCOPE_LINKLOCAL) && - daddr_dev && dev != daddr_dev) + if (((dst_type & IPV6_ADDR_MULTICAST) || + dst.scope <= IPV6_ADDR_SCOPE_LINKLOCAL) && + dst.ifindex && dev->ifindex != dst.ifindex) continue; idev = __in6_dev_get(dev); @@ -943,12 +1087,10 @@ int ipv6_dev_get_saddr(struct net_device *daddr_dev, continue; read_lock_bh(&idev->lock); - for (ifa = idev->addr_list; ifa; ifa = ifa->if_next) { - struct ipv6_saddr_score score; + for (score->ifa = idev->addr_list; score->ifa; score->ifa = score->ifa->if_next) { + int i; - score.addr_type = __ipv6_addr_type(&ifa->addr); - - /* Rule 0: + /* * - Tentative Address (RFC2462 section 5.4) * - A tentative address is not considered * "assigned to an interface" in the traditional @@ -958,11 +1100,14 @@ int ipv6_dev_get_saddr(struct net_device *daddr_dev, * addresses, and the unspecified address MUST * NOT be included in a candidate set. */ - if ((ifa->flags & IFA_F_TENTATIVE) && - (!(ifa->flags & IFA_F_OPTIMISTIC))) + if ((score->ifa->flags & IFA_F_TENTATIVE) && + (!(score->ifa->flags & IFA_F_OPTIMISTIC))) continue; - if (unlikely(score.addr_type == IPV6_ADDR_ANY || - score.addr_type & IPV6_ADDR_MULTICAST)) { + + score->addr_type = __ipv6_addr_type(&score->ifa->addr); + + if (unlikely(score->addr_type == IPV6_ADDR_ANY || + score->addr_type & IPV6_ADDR_MULTICAST)) { LIMIT_NETDEBUG(KERN_DEBUG "ADDRCONF: unspecified / multicast address " "assigned as unicast address on %s", @@ -970,201 +1115,59 @@ int ipv6_dev_get_saddr(struct net_device *daddr_dev, continue; } - score.attrs = 0; - score.matchlen = 0; - score.scope = 0; - score.rule = 0; - - if (ifa_result == NULL) { - /* record it if the first available entry */ - goto record_it; - } - - /* Rule 1: Prefer same address */ - if (hiscore.rule < 1) { - if (ipv6_addr_equal(&ifa_result->addr, daddr)) - hiscore.attrs |= IPV6_SADDR_SCORE_LOCAL; - hiscore.rule++; - } - if (ipv6_addr_equal(&ifa->addr, daddr)) { - score.attrs |= IPV6_SADDR_SCORE_LOCAL; - if (!(hiscore.attrs & IPV6_SADDR_SCORE_LOCAL)) { - score.rule = 1; - goto record_it; - } - } else { - if (hiscore.attrs & IPV6_SADDR_SCORE_LOCAL) - continue; - } + score->rule = -1; + bitmap_zero(score->scorebits, IPV6_SADDR_RULE_MAX); + + for (i = 0; i < IPV6_SADDR_RULE_MAX; i++) { + int minihiscore, miniscore; + + minihiscore = ipv6_get_saddr_eval(hiscore, &dst, i); + miniscore = ipv6_get_saddr_eval(score, &dst, i); + + if (minihiscore > miniscore) { + if (i == IPV6_SADDR_RULE_SCOPE && + score->scopedist > 0) { + /* + * special case: + * each remaining entry + * has too small (not enough) + * scope, because ifa entries + * are sorted by their scope + * values. + */ + goto try_nextdev; + } + break; + } else if (minihiscore < miniscore) { + struct ipv6_saddr_score *tmp; - /* Rule 2: Prefer appropriate scope */ - if (hiscore.rule < 2) { - hiscore.scope = __ipv6_addr_src_scope(hiscore.addr_type); - hiscore.rule++; - } - score.scope = __ipv6_addr_src_scope(score.addr_type); - if (hiscore.scope < score.scope) { - if (hiscore.scope < daddr_scope) { - score.rule = 2; - goto record_it; - } else - continue; - } else if (score.scope < hiscore.scope) { - if (score.scope < daddr_scope) - break; /* addresses sorted by scope */ - else { - score.rule = 2; - goto record_it; - } - } + if (hiscore->ifa) + in6_ifa_put(hiscore->ifa); - /* Rule 3: Avoid deprecated and optimistic addresses */ - if (hiscore.rule < 3) { - if (ipv6_saddr_preferred(hiscore.addr_type) || - (((ifa_result->flags & - (IFA_F_DEPRECATED|IFA_F_OPTIMISTIC)) == 0))) - hiscore.attrs |= IPV6_SADDR_SCORE_PREFERRED; - hiscore.rule++; - } - if (ipv6_saddr_preferred(score.addr_type) || - (((ifa->flags & - (IFA_F_DEPRECATED|IFA_F_OPTIMISTIC)) == 0))) { - score.attrs |= IPV6_SADDR_SCORE_PREFERRED; - if (!(hiscore.attrs & IPV6_SADDR_SCORE_PREFERRED)) { - score.rule = 3; - goto record_it; - } - } else { - if (hiscore.attrs & IPV6_SADDR_SCORE_PREFERRED) - continue; - } + in6_ifa_hold(score->ifa); - /* Rule 4: Prefer home address */ -#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) - if (hiscore.rule < 4) { - if (ifa_result->flags & IFA_F_HOMEADDRESS) - hiscore.attrs |= IPV6_SADDR_SCORE_HOA; - hiscore.rule++; - } - if (ifa->flags & IFA_F_HOMEADDRESS) { - score.attrs |= IPV6_SADDR_SCORE_HOA; - if (!(ifa_result->flags & IFA_F_HOMEADDRESS)) { - score.rule = 4; - goto record_it; - } - } else { - if (hiscore.attrs & IPV6_SADDR_SCORE_HOA) - continue; - } -#else - if (hiscore.rule < 4) - hiscore.rule++; -#endif + tmp = hiscore; + hiscore = score; + score = tmp; - /* Rule 5: Prefer outgoing interface */ - if (hiscore.rule < 5) { - if (daddr_dev == NULL || - daddr_dev == ifa_result->idev->dev) - hiscore.attrs |= IPV6_SADDR_SCORE_OIF; - hiscore.rule++; - } - if (daddr_dev == NULL || - daddr_dev == ifa->idev->dev) { - score.attrs |= IPV6_SADDR_SCORE_OIF; - if (!(hiscore.attrs & IPV6_SADDR_SCORE_OIF)) { - score.rule = 5; - goto record_it; - } - } else { - if (hiscore.attrs & IPV6_SADDR_SCORE_OIF) - continue; - } - - /* Rule 6: Prefer matching label */ - if (hiscore.rule < 6) { - if (ipv6_addr_label(&ifa_result->addr, - hiscore.addr_type, - ifa_result->idev->dev->ifindex) == daddr_label) - hiscore.attrs |= IPV6_SADDR_SCORE_LABEL; - hiscore.rule++; - } - if (ipv6_addr_label(&ifa->addr, - score.addr_type, - ifa->idev->dev->ifindex) == daddr_label) { - score.attrs |= IPV6_SADDR_SCORE_LABEL; - if (!(hiscore.attrs & IPV6_SADDR_SCORE_LABEL)) { - score.rule = 6; - goto record_it; - } - } else { - if (hiscore.attrs & IPV6_SADDR_SCORE_LABEL) - continue; - } + /* restore our iterator */ + score->ifa = hiscore->ifa; -#ifdef CONFIG_IPV6_PRIVACY - /* Rule 7: Prefer public address - * Note: prefer temprary address if use_tempaddr >= 2 - */ - if (hiscore.rule < 7) { - if ((!(ifa_result->flags & IFA_F_TEMPORARY)) ^ - (ifa_result->idev->cnf.use_tempaddr >= 2)) - hiscore.attrs |= IPV6_SADDR_SCORE_PRIVACY; - hiscore.rule++; - } - if ((!(ifa->flags & IFA_F_TEMPORARY)) ^ - (ifa->idev->cnf.use_tempaddr >= 2)) { - score.attrs |= IPV6_SADDR_SCORE_PRIVACY; - if (!(hiscore.attrs & IPV6_SADDR_SCORE_PRIVACY)) { - score.rule = 7; - goto record_it; + break; } - } else { - if (hiscore.attrs & IPV6_SADDR_SCORE_PRIVACY) - continue; - } -#else - if (hiscore.rule < 7) - hiscore.rule++; -#endif - - /* Skip rule 8 for orchid -> non-orchid address pairs. */ - if (ipv6_addr_orchid(&ifa->addr) && !ipv6_addr_orchid(daddr)) - continue; - - /* Rule 8: Use longest matching prefix */ - if (hiscore.rule < 8) { - hiscore.matchlen = ipv6_addr_diff(&ifa_result->addr, daddr); - hiscore.rule++; - } - score.matchlen = ipv6_addr_diff(&ifa->addr, daddr); - if (score.matchlen > hiscore.matchlen) { - score.rule = 8; - goto record_it; } -#if 0 - else if (score.matchlen < hiscore.matchlen) - continue; -#endif - - /* Final Rule: choose first available one */ - continue; -record_it: - if (ifa_result) - in6_ifa_put(ifa_result); - in6_ifa_hold(ifa); - ifa_result = ifa; - hiscore = score; } +try_nextdev: read_unlock_bh(&idev->lock); } rcu_read_unlock(); read_unlock(&dev_base_lock); - if (!ifa_result) + if (!hiscore->ifa) return -EADDRNOTAVAIL; - ipv6_addr_copy(saddr, &ifa_result->addr); - in6_ifa_put(ifa_result); + ipv6_addr_copy(saddr, &hiscore->ifa->addr); + in6_ifa_put(hiscore->ifa); return 0; } diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c index 55137408f05..e7a7fe26ceb 100644 --- a/net/ipv6/fib6_rules.c +++ b/net/ipv6/fib6_rules.c @@ -84,8 +84,18 @@ static int fib6_rule_action(struct fib_rule *rule, struct flowi *flp, if ((rule->flags & FIB_RULE_FIND_SADDR) && r->src.plen && !(flags & RT6_LOOKUP_F_HAS_SADDR)) { struct in6_addr saddr; + unsigned int srcprefs = 0; + + if (flags & RT6_LOOKUP_F_SRCPREF_TMP) + srcprefs |= IPV6_PREFER_SRC_TMP; + if (flags & RT6_LOOKUP_F_SRCPREF_PUBLIC) + srcprefs |= IPV6_PREFER_SRC_PUBLIC; + if (flags & RT6_LOOKUP_F_SRCPREF_COA) + srcprefs |= IPV6_PREFER_SRC_COA; + if (ipv6_dev_get_saddr(ip6_dst_idev(&rt->u.dst)->dev, - &flp->fl6_dst, &saddr)) + &flp->fl6_dst, srcprefs, + &saddr)) goto again; if (!ipv6_prefix_equal(&saddr, &r->src.addr, r->src.plen)) diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index 6b5391ab834..86332417b40 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -464,9 +464,7 @@ route_done: else hlimit = np->hop_limit; if (hlimit < 0) - hlimit = dst_metric(dst, RTAX_HOPLIMIT); - if (hlimit < 0) - hlimit = ipv6_get_hoplimit(dst->dev); + hlimit = ip6_dst_hoplimit(dst); tclass = np->tclass; if (tclass < 0) @@ -560,9 +558,7 @@ static void icmpv6_echo_reply(struct sk_buff *skb) else hlimit = np->hop_limit; if (hlimit < 0) - hlimit = dst_metric(dst, RTAX_HOPLIMIT); - if (hlimit < 0) - hlimit = ipv6_get_hoplimit(dst->dev); + hlimit = ip6_dst_hoplimit(dst); tclass = np->tclass; if (tclass < 0) diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 98762fde2b6..d34aa61353b 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -237,9 +237,7 @@ int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl, if (np) hlimit = np->hop_limit; if (hlimit < 0) - hlimit = dst_metric(dst, RTAX_HOPLIMIT); - if (hlimit < 0) - hlimit = ipv6_get_hoplimit(dst->dev); + hlimit = ip6_dst_hoplimit(dst); tclass = -1; if (np) @@ -922,7 +920,9 @@ static int ip6_dst_lookup_tail(struct sock *sk, if (ipv6_addr_any(&fl->fl6_src)) { err = ipv6_dev_get_saddr(ip6_dst_idev(*dst)->dev, - &fl->fl6_dst, &fl->fl6_src); + &fl->fl6_dst, + sk ? inet6_sk(sk)->srcprefs : 0, + &fl->fl6_src); if (err) goto out_err_release; } @@ -1115,7 +1115,7 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to, /* need source address above miyazawa*/ } dst_hold(&rt->u.dst); - np->cork.rt = rt; + inet->cork.dst = &rt->u.dst; inet->cork.fl = *fl; np->cork.hop_limit = hlimit; np->cork.tclass = tclass; @@ -1136,7 +1136,7 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to, length += exthdrlen; transhdrlen += exthdrlen; } else { - rt = np->cork.rt; + rt = (struct rt6_info *)inet->cork.dst; fl = &inet->cork.fl; if (inet->cork.flags & IPCORK_OPT) opt = np->cork.opt; @@ -1381,9 +1381,9 @@ static void ip6_cork_release(struct inet_sock *inet, struct ipv6_pinfo *np) inet->cork.flags &= ~IPCORK_OPT; kfree(np->cork.opt); np->cork.opt = NULL; - if (np->cork.rt) { - dst_release(&np->cork.rt->u.dst); - np->cork.rt = NULL; + if (inet->cork.dst) { + dst_release(inet->cork.dst); + inet->cork.dst = NULL; inet->cork.flags &= ~IPCORK_ALLFRAG; } memset(&inet->cork.fl, 0, sizeof(inet->cork.fl)); @@ -1398,7 +1398,7 @@ int ip6_push_pending_frames(struct sock *sk) struct ipv6_pinfo *np = inet6_sk(sk); struct ipv6hdr *hdr; struct ipv6_txoptions *opt = np->cork.opt; - struct rt6_info *rt = np->cork.rt; + struct rt6_info *rt = (struct rt6_info *)inet->cork.dst; struct flowi *fl = &inet->cork.fl; unsigned char proto = fl->proto; int err = 0; diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index c11c76cab37..dc6695cc576 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -617,7 +617,67 @@ done: retv = xfrm_user_policy(sk, optname, optval, optlen); break; + case IPV6_ADDR_PREFERENCES: + { + unsigned int pref = 0; + unsigned int prefmask = ~0; + + retv = -EINVAL; + + /* check PUBLIC/TMP/PUBTMP_DEFAULT conflicts */ + switch (val & (IPV6_PREFER_SRC_PUBLIC| + IPV6_PREFER_SRC_TMP| + IPV6_PREFER_SRC_PUBTMP_DEFAULT)) { + case IPV6_PREFER_SRC_PUBLIC: + pref |= IPV6_PREFER_SRC_PUBLIC; + break; + case IPV6_PREFER_SRC_TMP: + pref |= IPV6_PREFER_SRC_TMP; + break; + case IPV6_PREFER_SRC_PUBTMP_DEFAULT: + break; + case 0: + goto pref_skip_pubtmp; + default: + goto e_inval; + } + + prefmask &= ~(IPV6_PREFER_SRC_PUBLIC| + IPV6_PREFER_SRC_TMP); +pref_skip_pubtmp: + + /* check HOME/COA conflicts */ + switch (val & (IPV6_PREFER_SRC_HOME|IPV6_PREFER_SRC_COA)) { + case IPV6_PREFER_SRC_HOME: + break; + case IPV6_PREFER_SRC_COA: + pref |= IPV6_PREFER_SRC_COA; + case 0: + goto pref_skip_coa; + default: + goto e_inval; + } + + prefmask &= ~IPV6_PREFER_SRC_COA; +pref_skip_coa: + + /* check CGA/NONCGA conflicts */ + switch (val & (IPV6_PREFER_SRC_CGA|IPV6_PREFER_SRC_NONCGA)) { + case IPV6_PREFER_SRC_CGA: + case IPV6_PREFER_SRC_NONCGA: + case 0: + break; + default: + goto e_inval; + } + + np->srcprefs = (np->srcprefs & prefmask) | pref; + retv = 0; + + break; + } } + release_sock(sk); return retv; @@ -904,9 +964,7 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname, dst = sk_dst_get(sk); if (dst) { if (val < 0) - val = dst_metric(dst, RTAX_HOPLIMIT); - if (val < 0) - val = ipv6_get_hoplimit(dst->dev); + val = ip6_dst_hoplimit(dst); dst_release(dst); } if (val < 0) @@ -934,6 +992,24 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname, val = np->sndflow; break; + case IPV6_ADDR_PREFERENCES: + val = 0; + + if (np->srcprefs & IPV6_PREFER_SRC_TMP) + val |= IPV6_PREFER_SRC_TMP; + else if (np->srcprefs & IPV6_PREFER_SRC_PUBLIC) + val |= IPV6_PREFER_SRC_PUBLIC; + else { + /* XXX: should we return system default? */ + val |= IPV6_PREFER_SRC_PUBTMP_DEFAULT; + } + + if (np->srcprefs & IPV6_PREFER_SRC_COA) + val |= IPV6_PREFER_SRC_COA; + else + val |= IPV6_PREFER_SRC_HOME; + break; + default: return -ENOPROTOOPT; } diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index e7d8e74704c..3f68a6eae7b 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -546,7 +546,9 @@ static void ndisc_send_na(struct net_device *dev, struct neighbour *neigh, override = 0; in6_ifa_put(ifp); } else { - if (ipv6_dev_get_saddr(dev, daddr, &tmpaddr)) + if (ipv6_dev_get_saddr(dev, daddr, + inet6_sk(dev->nd_net->ipv6.ndisc_sk)->srcprefs, + &tmpaddr)) return; src_addr = &tmpaddr; } diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index a9e4235157a..548d0763f4d 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -885,9 +885,7 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk, else hlimit = np->hop_limit; if (hlimit < 0) - hlimit = dst_metric(dst, RTAX_HOPLIMIT); - if (hlimit < 0) - hlimit = ipv6_get_hoplimit(dst->dev); + hlimit = ip6_dst_hoplimit(dst); } if (tclass < 0) { diff --git a/net/ipv6/route.c b/net/ipv6/route.c index a4b5aee0f68..06faa46920e 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -782,6 +782,15 @@ struct dst_entry * ip6_route_output(struct net *net, struct sock *sk, if (!ipv6_addr_any(&fl->fl6_src)) flags |= RT6_LOOKUP_F_HAS_SADDR; + else if (sk) { + unsigned int prefs = inet6_sk(sk)->srcprefs; + if (prefs & IPV6_PREFER_SRC_TMP) + flags |= RT6_LOOKUP_F_SRCPREF_TMP; + if (prefs & IPV6_PREFER_SRC_PUBLIC) + flags |= RT6_LOOKUP_F_SRCPREF_PUBLIC; + if (prefs & IPV6_PREFER_SRC_COA) + flags |= RT6_LOOKUP_F_SRCPREF_COA; + } return fib6_rule_lookup(net, fl, flags, ip6_pol_route_output); } @@ -1034,15 +1043,17 @@ static int ipv6_get_mtu(struct net_device *dev) return mtu; } -int ipv6_get_hoplimit(struct net_device *dev) +int ip6_dst_hoplimit(struct dst_entry *dst) { - int hoplimit = ipv6_devconf.hop_limit; - struct inet6_dev *idev; - - idev = in6_dev_get(dev); - if (idev) { - hoplimit = idev->cnf.hop_limit; - in6_dev_put(idev); + int hoplimit = dst_metric(dst, RTAX_HOPLIMIT); + if (hoplimit < 0) { + struct net_device *dev = dst->dev; + struct inet6_dev *idev = in6_dev_get(dev); + if (idev) { + hoplimit = idev->cnf.hop_limit; + in6_dev_put(idev); + } else + hoplimit = ipv6_devconf.hop_limit; } return hoplimit; } @@ -2160,7 +2171,7 @@ static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt, else if (dst) { struct in6_addr saddr_buf; if (ipv6_dev_get_saddr(ip6_dst_idev(&rt->u.dst)->dev, - dst, &saddr_buf) == 0) + dst, 0, &saddr_buf) == 0) NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf); } diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 5f5d1218c34..593d3efadaf 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -792,9 +792,7 @@ do_udp_sendmsg: else hlimit = np->hop_limit; if (hlimit < 0) - hlimit = dst_metric(dst, RTAX_HOPLIMIT); - if (hlimit < 0) - hlimit = ipv6_get_hoplimit(dst->dev); + hlimit = ip6_dst_hoplimit(dst); } if (tclass < 0) { diff --git a/net/ipv6/udplite.c b/net/ipv6/udplite.c index 93e52e0d57f..706c5c375a0 100644 --- a/net/ipv6/udplite.c +++ b/net/ipv6/udplite.c @@ -109,13 +109,28 @@ static struct udp_seq_afinfo udplite6_seq_afinfo = { .seq_fops = &udplite6_seq_fops, }; +static int udplite6_proc_init_net(struct net *net) +{ + return udp_proc_register(net, &udplite6_seq_afinfo); +} + +static void udplite6_proc_exit_net(struct net *net) +{ + udp_proc_unregister(net, &udplite6_seq_afinfo); +} + +static struct pernet_operations udplite6_net_ops = { + .init = udplite6_proc_init_net, + .exit = udplite6_proc_exit_net, +}; + int __init udplite6_proc_init(void) { - return udp_proc_register(&init_net, &udplite6_seq_afinfo); + return register_pernet_subsys(&udplite6_net_ops); } void udplite6_proc_exit(void) { - udp_proc_unregister(&init_net, &udplite6_seq_afinfo); + unregister_pernet_subsys(&udplite6_net_ops); } #endif diff --git a/net/ipv6/xfrm6_input.c b/net/ipv6/xfrm6_input.c index a4714d76ae6..a71c7ddcb41 100644 --- a/net/ipv6/xfrm6_input.c +++ b/net/ipv6/xfrm6_input.c @@ -59,9 +59,6 @@ int xfrm6_input_addr(struct sk_buff *skb, xfrm_address_t *daddr, xfrm_address_t *saddr, u8 proto) { struct xfrm_state *x = NULL; - int wildcard = 0; - xfrm_address_t *xany; - int nh = 0; int i = 0; /* Allocate new secpath or COW existing one. */ @@ -83,10 +80,9 @@ int xfrm6_input_addr(struct sk_buff *skb, xfrm_address_t *daddr, goto drop; } - xany = (xfrm_address_t *)&in6addr_any; - for (i = 0; i < 3; i++) { xfrm_address_t *dst, *src; + switch (i) { case 0: dst = daddr; @@ -94,16 +90,13 @@ int xfrm6_input_addr(struct sk_buff *skb, xfrm_address_t *daddr, break; case 1: /* lookup state with wild-card source address */ - wildcard = 1; dst = daddr; - src = xany; + src = (xfrm_address_t *)&in6addr_any; break; - case 2: default: /* lookup state with wild-card addresses */ - wildcard = 1; /* XXX */ - dst = xany; - src = xany; + dst = (xfrm_address_t *)&in6addr_any; + src = (xfrm_address_t *)&in6addr_any; break; } @@ -113,39 +106,19 @@ int xfrm6_input_addr(struct sk_buff *skb, xfrm_address_t *daddr, spin_lock(&x->lock); - if (wildcard) { - if ((x->props.flags & XFRM_STATE_WILDRECV) == 0) { - spin_unlock(&x->lock); - xfrm_state_put(x); - x = NULL; - continue; - } - } - - if (unlikely(x->km.state != XFRM_STATE_VALID)) { + if ((!i || (x->props.flags & XFRM_STATE_WILDRECV)) && + likely(x->km.state == XFRM_STATE_VALID) && + !xfrm_state_check_expire(x)) { spin_unlock(&x->lock); - xfrm_state_put(x); - x = NULL; - continue; - } - if (xfrm_state_check_expire(x)) { + if (x->type->input(x, skb) > 0) { + /* found a valid state */ + break; + } + } else spin_unlock(&x->lock); - xfrm_state_put(x); - x = NULL; - continue; - } - - spin_unlock(&x->lock); - - nh = x->type->input(x, skb); - if (nh <= 0) { - xfrm_state_put(x); - x = NULL; - continue; - } - /* Found a state */ - break; + xfrm_state_put(x); + x = NULL; } if (!x) { diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c index e96dafdc703..d92d1fceb8c 100644 --- a/net/ipv6/xfrm6_policy.c +++ b/net/ipv6/xfrm6_policy.c @@ -58,7 +58,7 @@ static int xfrm6_get_saddr(xfrm_address_t *saddr, xfrm_address_t *daddr) return -EHOSTUNREACH; ipv6_dev_get_saddr(ip6_dst_idev(dst)->dev, - (struct in6_addr *)&daddr->a6, + (struct in6_addr *)&daddr->a6, 0, (struct in6_addr *)&saddr->a6); dst_release(dst); return 0; diff --git a/net/ipv6/xfrm6_state.c b/net/ipv6/xfrm6_state.c index dc817e035e2..5a46bb99c3a 100644 --- a/net/ipv6/xfrm6_state.c +++ b/net/ipv6/xfrm6_state.c @@ -49,125 +49,102 @@ __xfrm6_init_tempsel(struct xfrm_state *x, struct flowi *fl, x->props.family = AF_INET6; } +/* distribution counting sort function for xfrm_state and xfrm_tmpl */ static int -__xfrm6_state_sort(struct xfrm_state **dst, struct xfrm_state **src, int n) +__xfrm6_sort(void **dst, void **src, int n, int (*cmp)(void *p), int maxclass) { int i; - int j = 0; + int class[XFRM_MAX_DEPTH]; + int count[maxclass]; - /* Rule 1: select IPsec transport except AH */ - for (i = 0; i < n; i++) { - if (src[i]->props.mode == XFRM_MODE_TRANSPORT && - src[i]->id.proto != IPPROTO_AH) { - dst[j++] = src[i]; - src[i] = NULL; - } - } - if (j == n) - goto end; + memset(count, 0, sizeof(count)); - /* Rule 2: select MIPv6 RO or inbound trigger */ -#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) for (i = 0; i < n; i++) { - if (src[i] && - (src[i]->props.mode == XFRM_MODE_ROUTEOPTIMIZATION || - src[i]->props.mode == XFRM_MODE_IN_TRIGGER)) { - dst[j++] = src[i]; - src[i] = NULL; - } + int c; + class[i] = c = cmp(src[i]); + count[c]++; } - if (j == n) - goto end; -#endif - /* Rule 3: select IPsec transport AH */ - for (i = 0; i < n; i++) { - if (src[i] && - src[i]->props.mode == XFRM_MODE_TRANSPORT && - src[i]->id.proto == IPPROTO_AH) { - dst[j++] = src[i]; - src[i] = NULL; - } - } - if (j == n) - goto end; + for (i = 2; i < maxclass; i++) + count[i] += count[i - 1]; - /* Rule 4: select IPsec tunnel */ for (i = 0; i < n; i++) { - if (src[i] && - (src[i]->props.mode == XFRM_MODE_TUNNEL || - src[i]->props.mode == XFRM_MODE_BEET)) { - dst[j++] = src[i]; - src[i] = NULL; - } + dst[count[class[i] - 1]++] = src[i]; + src[i] = 0; } - if (likely(j == n)) - goto end; - /* Final rule */ - for (i = 0; i < n; i++) { - if (src[i]) { - dst[j++] = src[i]; - src[i] = NULL; - } - } - - end: return 0; } -static int -__xfrm6_tmpl_sort(struct xfrm_tmpl **dst, struct xfrm_tmpl **src, int n) +/* + * Rule for xfrm_state: + * + * rule 1: select IPsec transport except AH + * rule 2: select MIPv6 RO or inbound trigger + * rule 3: select IPsec transport AH + * rule 4: select IPsec tunnel + * rule 5: others + */ +static int __xfrm6_state_sort_cmp(void *p) { - int i; - int j = 0; - - /* Rule 1: select IPsec transport */ - for (i = 0; i < n; i++) { - if (src[i]->mode == XFRM_MODE_TRANSPORT) { - dst[j++] = src[i]; - src[i] = NULL; - } - } - if (j == n) - goto end; - - /* Rule 2: select MIPv6 RO or inbound trigger */ + struct xfrm_state *v = p; + + switch (v->props.mode) { + case XFRM_MODE_TRANSPORT: + if (v->id.proto != IPPROTO_AH) + return 1; + else + return 3; #if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) - for (i = 0; i < n; i++) { - if (src[i] && - (src[i]->mode == XFRM_MODE_ROUTEOPTIMIZATION || - src[i]->mode == XFRM_MODE_IN_TRIGGER)) { - dst[j++] = src[i]; - src[i] = NULL; - } - } - if (j == n) - goto end; + case XFRM_MODE_ROUTEOPTIMIZATION: + case XFRM_MODE_IN_TRIGGER: + return 2; #endif - - /* Rule 3: select IPsec tunnel */ - for (i = 0; i < n; i++) { - if (src[i] && - (src[i]->mode == XFRM_MODE_TUNNEL || - src[i]->mode == XFRM_MODE_BEET)) { - dst[j++] = src[i]; - src[i] = NULL; - } + case XFRM_MODE_TUNNEL: + case XFRM_MODE_BEET: + return 4; } - if (likely(j == n)) - goto end; + return 5; +} - /* Final rule */ - for (i = 0; i < n; i++) { - if (src[i]) { - dst[j++] = src[i]; - src[i] = NULL; - } +static int +__xfrm6_state_sort(struct xfrm_state **dst, struct xfrm_state **src, int n) +{ + return __xfrm6_sort((void **)dst, (void **)src, n, + __xfrm6_state_sort_cmp, 6); +} + +/* + * Rule for xfrm_tmpl: + * + * rule 1: select IPsec transport + * rule 2: select MIPv6 RO or inbound trigger + * rule 3: select IPsec tunnel + * rule 4: others + */ +static int __xfrm6_tmpl_sort_cmp(void *p) +{ + struct xfrm_tmpl *v = p; + switch (v->mode) { + case XFRM_MODE_TRANSPORT: + return 1; +#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) + case XFRM_MODE_ROUTEOPTIMIZATION: + case XFRM_MODE_IN_TRIGGER: + return 2; +#endif + case XFRM_MODE_TUNNEL: + case XFRM_MODE_BEET: + return 3; } + return 4; +} - end: - return 0; +static int +__xfrm6_tmpl_sort(struct xfrm_tmpl **dst, struct xfrm_tmpl **src, int n) +{ + return __xfrm6_sort((void **)dst, (void **)src, n, + __xfrm6_tmpl_sort_cmp, 5); } int xfrm6_extract_header(struct sk_buff *skb) diff --git a/net/ipv6/xfrm6_tunnel.c b/net/ipv6/xfrm6_tunnel.c index 639fe8a6ff1..c2b27813860 100644 --- a/net/ipv6/xfrm6_tunnel.c +++ b/net/ipv6/xfrm6_tunnel.c @@ -140,12 +140,26 @@ __be32 xfrm6_tunnel_spi_lookup(xfrm_address_t *saddr) EXPORT_SYMBOL(xfrm6_tunnel_spi_lookup); +static int __xfrm6_tunnel_spi_check(u32 spi) +{ + struct xfrm6_tunnel_spi *x6spi; + int index = xfrm6_tunnel_spi_hash_byspi(spi); + struct hlist_node *pos; + + hlist_for_each_entry(x6spi, pos, + &xfrm6_tunnel_spi_byspi[index], + list_byspi) { + if (x6spi->spi == spi) + return -1; + } + return index; +} + static u32 __xfrm6_tunnel_alloc_spi(xfrm_address_t *saddr) { u32 spi; struct xfrm6_tunnel_spi *x6spi; - struct hlist_node *pos; - unsigned index; + int index; if (xfrm6_tunnel_spi < XFRM6_TUNNEL_SPI_MIN || xfrm6_tunnel_spi >= XFRM6_TUNNEL_SPI_MAX) @@ -154,32 +168,19 @@ static u32 __xfrm6_tunnel_alloc_spi(xfrm_address_t *saddr) xfrm6_tunnel_spi++; for (spi = xfrm6_tunnel_spi; spi <= XFRM6_TUNNEL_SPI_MAX; spi++) { - index = xfrm6_tunnel_spi_hash_byspi(spi); - hlist_for_each_entry(x6spi, pos, - &xfrm6_tunnel_spi_byspi[index], - list_byspi) { - if (x6spi->spi == spi) - goto try_next_1; - } - xfrm6_tunnel_spi = spi; - goto alloc_spi; -try_next_1:; + index = __xfrm6_tunnel_spi_check(spi); + if (index >= 0) + goto alloc_spi; } for (spi = XFRM6_TUNNEL_SPI_MIN; spi < xfrm6_tunnel_spi; spi++) { - index = xfrm6_tunnel_spi_hash_byspi(spi); - hlist_for_each_entry(x6spi, pos, - &xfrm6_tunnel_spi_byspi[index], - list_byspi) { - if (x6spi->spi == spi) - goto try_next_2; - } - xfrm6_tunnel_spi = spi; - goto alloc_spi; -try_next_2:; + index = __xfrm6_tunnel_spi_check(spi); + if (index >= 0) + goto alloc_spi; } spi = 0; goto out; alloc_spi: + xfrm6_tunnel_spi = spi; x6spi = kmem_cache_alloc(xfrm6_tunnel_spi_kmem, GFP_ATOMIC); if (!x6spi) goto out; diff --git a/net/netfilter/core.c b/net/netfilter/core.c index c4065b8f9a9..ec05684c56d 100644 --- a/net/netfilter/core.c +++ b/net/netfilter/core.c @@ -165,6 +165,14 @@ int nf_hook_slow(int pf, unsigned int hook, struct sk_buff *skb, unsigned int verdict; int ret = 0; +#ifdef CONFIG_NET_NS + struct net *net; + + net = indev == NULL ? outdev->nd_net : indev->nd_net; + if (net != &init_net) + return 1; +#endif + /* We may already have this, but read-locks nest anyway */ rcu_read_lock(); diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c index 46c5b3c5cb9..dc71d0d8375 100644 --- a/net/sctp/ipv6.c +++ b/net/sctp/ipv6.c @@ -316,7 +316,9 @@ static void sctp_v6_get_saddr(struct sctp_association *asoc, if (!asoc) { ipv6_dev_get_saddr(dst ? ip6_dst_idev(dst)->dev : NULL, - &daddr->v6.sin6_addr, &saddr->v6.sin6_addr); + &daddr->v6.sin6_addr, + inet6_sk(asoc->base.sk)->srcprefs, + &saddr->v6.sin6_addr); SCTP_DEBUG_PRINTK("saddr from ipv6_get_saddr: " NIP6_FMT "\n", NIP6(saddr->v6.sin6_addr)); return; diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index bae94a8031a..8e588f20c60 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -97,25 +97,52 @@ int xfrm_selector_match(struct xfrm_selector *sel, struct flowi *fl, return 0; } +static inline struct dst_entry *__xfrm_dst_lookup(int tos, + xfrm_address_t *saddr, + xfrm_address_t *daddr, + int family) +{ + struct xfrm_policy_afinfo *afinfo; + struct dst_entry *dst; + + afinfo = xfrm_policy_get_afinfo(family); + if (unlikely(afinfo == NULL)) + return ERR_PTR(-EAFNOSUPPORT); + + dst = afinfo->dst_lookup(tos, saddr, daddr); + + xfrm_policy_put_afinfo(afinfo); + + return dst; +} + static inline struct dst_entry *xfrm_dst_lookup(struct xfrm_state *x, int tos, + xfrm_address_t *prev_saddr, + xfrm_address_t *prev_daddr, int family) { xfrm_address_t *saddr = &x->props.saddr; xfrm_address_t *daddr = &x->id.daddr; - struct xfrm_policy_afinfo *afinfo; struct dst_entry *dst; - if (x->type->flags & XFRM_TYPE_LOCAL_COADDR) + if (x->type->flags & XFRM_TYPE_LOCAL_COADDR) { saddr = x->coaddr; - if (x->type->flags & XFRM_TYPE_REMOTE_COADDR) + daddr = prev_daddr; + } + if (x->type->flags & XFRM_TYPE_REMOTE_COADDR) { + saddr = prev_saddr; daddr = x->coaddr; + } - afinfo = xfrm_policy_get_afinfo(family); - if (unlikely(afinfo == NULL)) - return ERR_PTR(-EAFNOSUPPORT); + dst = __xfrm_dst_lookup(tos, saddr, daddr, family); + + if (!IS_ERR(dst)) { + if (prev_saddr != saddr) + memcpy(prev_saddr, saddr, sizeof(*prev_saddr)); + if (prev_daddr != daddr) + memcpy(prev_daddr, daddr, sizeof(*prev_daddr)); + } - dst = afinfo->dst_lookup(tos, saddr, daddr); - xfrm_policy_put_afinfo(afinfo); return dst; } @@ -1354,6 +1381,9 @@ static struct dst_entry *xfrm_bundle_create(struct xfrm_policy *policy, int trailer_len = 0; int tos; int family = policy->selector.family; + xfrm_address_t saddr, daddr; + + xfrm_flowi_addr_get(fl, &saddr, &daddr, family); tos = xfrm_get_tos(fl, family); err = tos; @@ -1384,7 +1414,8 @@ static struct dst_entry *xfrm_bundle_create(struct xfrm_policy *policy, if (xfrm[i]->props.mode != XFRM_MODE_TRANSPORT) { family = xfrm[i]->props.family; - dst = xfrm_dst_lookup(xfrm[i], tos, family); + dst = xfrm_dst_lookup(xfrm[i], tos, &saddr, &daddr, + family); err = PTR_ERR(dst); if (IS_ERR(dst)) goto put_states; |