diff options
Diffstat (limited to 'net/ipv6')
-rw-r--r-- | net/ipv6/Makefile | 4 | ||||
-rw-r--r-- | net/ipv6/addrconf.c | 203 | ||||
-rw-r--r-- | net/ipv6/addrlabel.c | 5 | ||||
-rw-r--r-- | net/ipv6/af_inet6.c | 189 | ||||
-rw-r--r-- | net/ipv6/anycast.c | 2 | ||||
-rw-r--r-- | net/ipv6/fib6_rules.c | 103 | ||||
-rw-r--r-- | net/ipv6/icmp.c | 133 | ||||
-rw-r--r-- | net/ipv6/ip6_fib.c | 292 | ||||
-rw-r--r-- | net/ipv6/ip6_input.c | 5 | ||||
-rw-r--r-- | net/ipv6/ip6_output.c | 8 | ||||
-rw-r--r-- | net/ipv6/ip6_tunnel.c | 6 | ||||
-rw-r--r-- | net/ipv6/ipv6_sockglue.c | 128 | ||||
-rw-r--r-- | net/ipv6/mcast.c | 72 | ||||
-rw-r--r-- | net/ipv6/ndisc.c | 34 | ||||
-rw-r--r-- | net/ipv6/netfilter.c | 4 | ||||
-rw-r--r-- | net/ipv6/netfilter/ip6t_REJECT.c | 2 | ||||
-rw-r--r-- | net/ipv6/proc.c | 9 | ||||
-rw-r--r-- | net/ipv6/route.c | 551 | ||||
-rw-r--r-- | net/ipv6/sit.c | 6 | ||||
-rw-r--r-- | net/ipv6/syncookies.c | 267 | ||||
-rw-r--r-- | net/ipv6/sysctl_net_ipv6.c | 15 | ||||
-rw-r--r-- | net/ipv6/tcp_ipv6.c | 122 | ||||
-rw-r--r-- | net/ipv6/udp_ipv6.c (renamed from net/ipv6/udp.c) | 16 | ||||
-rw-r--r-- | net/ipv6/udplite_ipv6.c (renamed from net/ipv6/udplite.c) | 0 | ||||
-rw-r--r-- | net/ipv6/xfrm6_policy.c | 7 |
25 files changed, 1415 insertions, 768 deletions
diff --git a/net/ipv6/Makefile b/net/ipv6/Makefile index 24f3aa0f2a3..107051f7c22 100644 --- a/net/ipv6/Makefile +++ b/net/ipv6/Makefile @@ -6,7 +6,7 @@ obj-$(CONFIG_IPV6) += ipv6.o ipv6-objs := af_inet6.o anycast.o ip6_output.o ip6_input.o addrconf.o \ addrlabel.o \ - route.o ip6_fib.o ipv6_sockglue.o ndisc.o udp.o udplite.o \ + route.o ip6_fib.o ipv6_sockglue.o ndisc.o udp_ipv6.o \ raw.o protocol.o icmp.o mcast.o reassembly.o tcp_ipv6.o \ exthdrs.o datagram.o ip6_flowlabel.o inet6_connection_sock.o @@ -16,6 +16,8 @@ ipv6-$(CONFIG_XFRM) += xfrm6_policy.o xfrm6_state.o xfrm6_input.o \ ipv6-$(CONFIG_NETFILTER) += netfilter.o ipv6-$(CONFIG_IPV6_MULTIPLE_TABLES) += fib6_rules.o ipv6-$(CONFIG_PROC_FS) += proc.o +ipv6-$(CONFIG_SYN_COOKIES) += syncookies.o +ipv6-$(CONFIG_IP_UDPLITE) += udplite_ipv6.o ipv6-objs += $(ipv6-y) diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 101e0e70ba2..c878fb681ef 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -493,7 +493,7 @@ static void addrconf_fixup_forwarding(struct ctl_table *table, int *p, int old) dev_forward_change((struct inet6_dev *)table->extra1); if (*p) - rt6_purge_dflt_routers(); + rt6_purge_dflt_routers(net); } #endif @@ -561,7 +561,7 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr, int pfxlen, write_lock(&addrconf_hash_lock); /* Ignore adding duplicate addresses on an interface */ - if (ipv6_chk_same_addr(&init_net, addr, idev->dev)) { + if (ipv6_chk_same_addr(idev->dev->nd_net, addr, idev->dev)) { ADBG(("ipv6_add_addr: already assigned\n")); err = -EEXIST; goto out; @@ -751,9 +751,9 @@ static void ipv6_del_addr(struct inet6_ifaddr *ifp) if ((ifp->flags & IFA_F_PERMANENT) && onlink < 1) { struct in6_addr prefix; struct rt6_info *rt; - + struct net *net = ifp->idev->dev->nd_net; ipv6_addr_prefix(&prefix, &ifp->addr, ifp->prefix_len); - rt = rt6_lookup(&prefix, NULL, ifp->idev->dev->ifindex, 1); + rt = rt6_lookup(net, &prefix, NULL, ifp->idev->dev->ifindex, 1); if (rt && ((rt->rt6i_flags & (RTF_GATEWAY | RTF_DEFAULT)) == 0)) { if (onlink == 0) { @@ -905,6 +905,7 @@ int ipv6_dev_get_saddr(struct net_device *daddr_dev, { struct ipv6_saddr_score hiscore; struct inet6_ifaddr *ifa_result = NULL; + struct net *net = daddr_dev->nd_net; int daddr_type = __ipv6_addr_type(daddr); int daddr_scope = __ipv6_addr_src_scope(daddr_type); int daddr_ifindex = daddr_dev ? daddr_dev->ifindex : 0; @@ -916,7 +917,7 @@ int ipv6_dev_get_saddr(struct net_device *daddr_dev, read_lock(&dev_base_lock); rcu_read_lock(); - for_each_netdev(&init_net, dev) { + for_each_netdev(net, dev) { struct inet6_dev *idev; struct inet6_ifaddr *ifa; @@ -1125,6 +1126,11 @@ int ipv6_dev_get_saddr(struct net_device *daddr_dev, if (hiscore.rule < 7) hiscore.rule++; #endif + + /* Skip rule 8 for orchid -> non-orchid address pairs. */ + if (ipv6_addr_orchid(&ifa->addr) && !ipv6_addr_orchid(daddr)) + continue; + /* Rule 8: Use longest matching prefix */ if (hiscore.rule < 8) { hiscore.matchlen = ipv6_addr_diff(&ifa_result->addr, daddr); @@ -1162,14 +1168,7 @@ record_it: return 0; } - -int ipv6_get_saddr(struct dst_entry *dst, - struct in6_addr *daddr, struct in6_addr *saddr) -{ - return ipv6_dev_get_saddr(dst ? ip6_dst_idev(dst)->dev : NULL, daddr, saddr); -} - -EXPORT_SYMBOL(ipv6_get_saddr); +EXPORT_SYMBOL(ipv6_dev_get_saddr); int ipv6_get_lladdr(struct net_device *dev, struct in6_addr *addr, unsigned char banned_flags) @@ -1557,7 +1556,7 @@ addrconf_prefix_route(struct in6_addr *pfx, int plen, struct net_device *dev, .fc_expires = expires, .fc_dst_len = plen, .fc_flags = RTF_UP | flags, - .fc_nlinfo.nl_net = &init_net, + .fc_nlinfo.nl_net = dev->nd_net, }; ipv6_addr_copy(&cfg.fc_dst, pfx); @@ -1584,7 +1583,7 @@ static void addrconf_add_mroute(struct net_device *dev) .fc_ifindex = dev->ifindex, .fc_dst_len = 8, .fc_flags = RTF_UP, - .fc_nlinfo.nl_net = &init_net, + .fc_nlinfo.nl_net = dev->nd_net, }; ipv6_addr_set(&cfg.fc_dst, htonl(0xFF000000), 0, 0, 0); @@ -1601,7 +1600,7 @@ static void sit_route_add(struct net_device *dev) .fc_ifindex = dev->ifindex, .fc_dst_len = 96, .fc_flags = RTF_UP | RTF_NONEXTHOP, - .fc_nlinfo.nl_net = &init_net, + .fc_nlinfo.nl_net = dev->nd_net, }; /* prefix length - 96 bits "::d.d.d.d" */ @@ -1702,7 +1701,8 @@ void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len) if (pinfo->onlink) { struct rt6_info *rt; - rt = rt6_lookup(&pinfo->prefix, NULL, dev->ifindex, 1); + rt = rt6_lookup(dev->nd_net, &pinfo->prefix, NULL, + dev->ifindex, 1); if (rt && ((rt->rt6i_flags & (RTF_GATEWAY | RTF_DEFAULT)) == 0)) { if (rt->rt6i_flags&RTF_EXPIRES) { @@ -1745,7 +1745,7 @@ void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len) ok: - ifp = ipv6_get_ifaddr(&init_net, &addr, dev, 1); + ifp = ipv6_get_ifaddr(dev->nd_net, &addr, dev, 1); if (ifp == NULL && valid_lft) { int max_addresses = in6_dev->cnf.max_addresses; @@ -1868,7 +1868,7 @@ ok: * Special case for SIT interfaces where we create a new "virtual" * device. */ -int addrconf_set_dstaddr(void __user *arg) +int addrconf_set_dstaddr(struct net *net, void __user *arg) { struct in6_ifreq ireq; struct net_device *dev; @@ -1880,7 +1880,7 @@ int addrconf_set_dstaddr(void __user *arg) if (copy_from_user(&ireq, arg, sizeof(struct in6_ifreq))) goto err_exit; - dev = __dev_get_by_index(&init_net, ireq.ifr6_ifindex); + dev = __dev_get_by_index(net, ireq.ifr6_ifindex); err = -ENODEV; if (dev == NULL) @@ -1911,7 +1911,8 @@ int addrconf_set_dstaddr(void __user *arg) if (err == 0) { err = -ENOBUFS; - if ((dev = __dev_get_by_name(&init_net, p.name)) == NULL) + dev = __dev_get_by_name(net, p.name); + if (!dev) goto err_exit; err = dev_open(dev); } @@ -1926,8 +1927,9 @@ err_exit: /* * Manual configuration of address on an interface */ -static int inet6_addr_add(int ifindex, struct in6_addr *pfx, int plen, - __u8 ifa_flags, __u32 prefered_lft, __u32 valid_lft) +static int inet6_addr_add(struct net *net, int ifindex, struct in6_addr *pfx, + int plen, __u8 ifa_flags, __u32 prefered_lft, + __u32 valid_lft) { struct inet6_ifaddr *ifp; struct inet6_dev *idev; @@ -1941,7 +1943,8 @@ static int inet6_addr_add(int ifindex, struct in6_addr *pfx, int plen, if (!valid_lft || prefered_lft > valid_lft) return -EINVAL; - if ((dev = __dev_get_by_index(&init_net, ifindex)) == NULL) + dev = __dev_get_by_index(net, ifindex); + if (!dev) return -ENODEV; if ((idev = addrconf_add_dev(dev)) == NULL) @@ -1986,13 +1989,15 @@ static int inet6_addr_add(int ifindex, struct in6_addr *pfx, int plen, return PTR_ERR(ifp); } -static int inet6_addr_del(int ifindex, struct in6_addr *pfx, int plen) +static int inet6_addr_del(struct net *net, int ifindex, struct in6_addr *pfx, + int plen) { struct inet6_ifaddr *ifp; struct inet6_dev *idev; struct net_device *dev; - if ((dev = __dev_get_by_index(&init_net, ifindex)) == NULL) + dev = __dev_get_by_index(net, ifindex); + if (!dev) return -ENODEV; if ((idev = __in6_dev_get(dev)) == NULL) @@ -2020,7 +2025,7 @@ static int inet6_addr_del(int ifindex, struct in6_addr *pfx, int plen) } -int addrconf_add_ifaddr(void __user *arg) +int addrconf_add_ifaddr(struct net *net, void __user *arg) { struct in6_ifreq ireq; int err; @@ -2032,13 +2037,14 @@ int addrconf_add_ifaddr(void __user *arg) return -EFAULT; rtnl_lock(); - err = inet6_addr_add(ireq.ifr6_ifindex, &ireq.ifr6_addr, ireq.ifr6_prefixlen, - IFA_F_PERMANENT, INFINITY_LIFE_TIME, INFINITY_LIFE_TIME); + err = inet6_addr_add(net, ireq.ifr6_ifindex, &ireq.ifr6_addr, + ireq.ifr6_prefixlen, IFA_F_PERMANENT, + INFINITY_LIFE_TIME, INFINITY_LIFE_TIME); rtnl_unlock(); return err; } -int addrconf_del_ifaddr(void __user *arg) +int addrconf_del_ifaddr(struct net *net, void __user *arg) { struct in6_ifreq ireq; int err; @@ -2050,7 +2056,8 @@ int addrconf_del_ifaddr(void __user *arg) return -EFAULT; rtnl_lock(); - err = inet6_addr_del(ireq.ifr6_ifindex, &ireq.ifr6_addr, ireq.ifr6_prefixlen); + err = inet6_addr_del(net, ireq.ifr6_ifindex, &ireq.ifr6_addr, + ireq.ifr6_prefixlen); rtnl_unlock(); return err; } @@ -2061,6 +2068,7 @@ static void sit_add_v4_addrs(struct inet6_dev *idev) struct inet6_ifaddr * ifp; struct in6_addr addr; struct net_device *dev; + struct net *net = idev->dev->nd_net; int scope; ASSERT_RTNL(); @@ -2087,7 +2095,7 @@ static void sit_add_v4_addrs(struct inet6_dev *idev) return; } - for_each_netdev(&init_net, dev) { + for_each_netdev(net, dev) { struct in_device * in_dev = __in_dev_get_rtnl(dev); if (in_dev && (dev->flags & IFF_UP)) { struct in_ifaddr * ifa; @@ -2250,15 +2258,16 @@ ipv6_inherit_linklocal(struct inet6_dev *idev, struct net_device *link_dev) static void ip6_tnl_add_linklocal(struct inet6_dev *idev) { struct net_device *link_dev; + struct net *net = idev->dev->nd_net; /* first try to inherit the link-local address from the link device */ if (idev->dev->iflink && - (link_dev = __dev_get_by_index(&init_net, idev->dev->iflink))) { + (link_dev = __dev_get_by_index(net, idev->dev->iflink))) { if (!ipv6_inherit_linklocal(idev, link_dev)) return; } /* then try to inherit it from any device */ - for_each_netdev(&init_net, link_dev) { + for_each_netdev(net, link_dev) { if (!ipv6_inherit_linklocal(idev, link_dev)) return; } @@ -2291,9 +2300,6 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event, int run_pending = 0; int err; - if (dev->nd_net != &init_net) - return NOTIFY_DONE; - switch(event) { case NETDEV_REGISTER: if (!idev && dev->mtu >= IPV6_MIN_MTU) { @@ -2433,6 +2439,7 @@ static int addrconf_ifdown(struct net_device *dev, int how) { struct inet6_dev *idev; struct inet6_ifaddr *ifa, **bifa; + struct net *net = dev->nd_net; int i; ASSERT_RTNL(); @@ -2440,7 +2447,7 @@ static int addrconf_ifdown(struct net_device *dev, int how) if (dev == init_net.loopback_dev && how == 1) how = 0; - rt6_ifdown(dev); + rt6_ifdown(net, dev); neigh_ifdown(&nd_tbl, dev); idev = __in6_dev_get(dev); @@ -3050,9 +3057,6 @@ inet6_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) struct in6_addr *pfx; int err; - if (net != &init_net) - return -EINVAL; - err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv6_policy); if (err < 0) return err; @@ -3062,7 +3066,7 @@ inet6_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) if (pfx == NULL) return -EINVAL; - return inet6_addr_del(ifm->ifa_index, pfx, ifm->ifa_prefixlen); + return inet6_addr_del(net, ifm->ifa_index, pfx, ifm->ifa_prefixlen); } static int inet6_addr_modify(struct inet6_ifaddr *ifp, u8 ifa_flags, @@ -3115,9 +3119,6 @@ inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) u8 ifa_flags; int err; - if (net != &init_net) - return -EINVAL; - err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv6_policy); if (err < 0) return err; @@ -3138,7 +3139,7 @@ inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) valid_lft = INFINITY_LIFE_TIME; } - dev = __dev_get_by_index(&init_net, ifm->ifa_index); + dev = __dev_get_by_index(net, ifm->ifa_index); if (dev == NULL) return -ENODEV; @@ -3151,8 +3152,9 @@ inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) * It would be best to check for !NLM_F_CREATE here but * userspace alreay relies on not having to provide this. */ - return inet6_addr_add(ifm->ifa_index, pfx, ifm->ifa_prefixlen, - ifa_flags, preferred_lft, valid_lft); + return inet6_addr_add(net, ifm->ifa_index, pfx, + ifm->ifa_prefixlen, ifa_flags, + preferred_lft, valid_lft); } if (nlh->nlmsg_flags & NLM_F_EXCL || @@ -3317,12 +3319,13 @@ static int inet6_dump_addr(struct sk_buff *skb, struct netlink_callback *cb, struct inet6_ifaddr *ifa; struct ifmcaddr6 *ifmca; struct ifacaddr6 *ifaca; + struct net *net = skb->sk->sk_net; s_idx = cb->args[0]; s_ip_idx = ip_idx = cb->args[1]; idx = 0; - for_each_netdev(&init_net, dev) { + for_each_netdev(net, dev) { if (idx < s_idx) goto cont; if (idx > s_idx) @@ -3389,35 +3392,23 @@ cont: static int inet6_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb) { - struct net *net = skb->sk->sk_net; enum addr_type_t type = UNICAST_ADDR; - if (net != &init_net) - return 0; - return inet6_dump_addr(skb, cb, type); } static int inet6_dump_ifmcaddr(struct sk_buff *skb, struct netlink_callback *cb) { - struct net *net = skb->sk->sk_net; enum addr_type_t type = MULTICAST_ADDR; - if (net != &init_net) - return 0; - return inet6_dump_addr(skb, cb, type); } static int inet6_dump_ifacaddr(struct sk_buff *skb, struct netlink_callback *cb) { - struct net *net = skb->sk->sk_net; enum addr_type_t type = ANYCAST_ADDR; - if (net != &init_net) - return 0; - return inet6_dump_addr(skb, cb, type); } @@ -3433,9 +3424,6 @@ static int inet6_rtm_getaddr(struct sk_buff *in_skb, struct nlmsghdr* nlh, struct sk_buff *skb; int err; - if (net != &init_net) - return -EINVAL; - err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv6_policy); if (err < 0) goto errout; @@ -3448,7 +3436,7 @@ static int inet6_rtm_getaddr(struct sk_buff *in_skb, struct nlmsghdr* nlh, ifm = nlmsg_data(nlh); if (ifm->ifa_index) - dev = __dev_get_by_index(&init_net, ifm->ifa_index); + dev = __dev_get_by_index(net, ifm->ifa_index); if ((ifa = ipv6_get_ifaddr(net, addr, dev, 1)) == NULL) { err = -EADDRNOTAVAIL; @@ -3468,7 +3456,7 @@ static int inet6_rtm_getaddr(struct sk_buff *in_skb, struct nlmsghdr* nlh, kfree_skb(skb); goto errout_ifa; } - err = rtnl_unicast(skb, &init_net, NETLINK_CB(in_skb).pid); + err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).pid); errout_ifa: in6_ifa_put(ifa); errout: @@ -3478,6 +3466,7 @@ errout: static void inet6_ifa_notify(int event, struct inet6_ifaddr *ifa) { struct sk_buff *skb; + struct net *net = ifa->idev->dev->nd_net; int err = -ENOBUFS; skb = nlmsg_new(inet6_ifaddr_msgsize(), GFP_ATOMIC); @@ -3491,10 +3480,10 @@ static void inet6_ifa_notify(int event, struct inet6_ifaddr *ifa) kfree_skb(skb); goto errout; } - err = rtnl_notify(skb, &init_net, 0, RTNLGRP_IPV6_IFADDR, NULL, GFP_ATOMIC); + err = rtnl_notify(skb, net, 0, RTNLGRP_IPV6_IFADDR, NULL, GFP_ATOMIC); errout: if (err < 0) - rtnl_set_sk_err(&init_net, RTNLGRP_IPV6_IFADDR, err); + rtnl_set_sk_err(net, RTNLGRP_IPV6_IFADDR, err); } static inline void ipv6_store_devconf(struct ipv6_devconf *cnf, @@ -3659,12 +3648,9 @@ static int inet6_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb) struct net_device *dev; struct inet6_dev *idev; - if (net != &init_net) - return 0; - read_lock(&dev_base_lock); idx = 0; - for_each_netdev(&init_net, dev) { + for_each_netdev(net, dev) { if (idx < s_idx) goto cont; if ((idev = in6_dev_get(dev)) == NULL) @@ -3686,6 +3672,7 @@ cont: void inet6_ifinfo_notify(int event, struct inet6_dev *idev) { struct sk_buff *skb; + struct net *net = idev->dev->nd_net; int err = -ENOBUFS; skb = nlmsg_new(inet6_if_nlmsg_size(), GFP_ATOMIC); @@ -3699,10 +3686,10 @@ void inet6_ifinfo_notify(int event, struct inet6_dev *idev) kfree_skb(skb); goto errout; } - err = rtnl_notify(skb, &init_net, 0, RTNLGRP_IPV6_IFADDR, NULL, GFP_ATOMIC); + err = rtnl_notify(skb, net, 0, RTNLGRP_IPV6_IFADDR, NULL, GFP_ATOMIC); errout: if (err < 0) - rtnl_set_sk_err(&init_net, RTNLGRP_IPV6_IFADDR, err); + rtnl_set_sk_err(net, RTNLGRP_IPV6_IFADDR, err); } static inline size_t inet6_prefix_nlmsg_size(void) @@ -3755,6 +3742,7 @@ static void inet6_prefix_notify(int event, struct inet6_dev *idev, struct prefix_info *pinfo) { struct sk_buff *skb; + struct net *net = idev->dev->nd_net; int err = -ENOBUFS; skb = nlmsg_new(inet6_prefix_nlmsg_size(), GFP_ATOMIC); @@ -3768,10 +3756,10 @@ static void inet6_prefix_notify(int event, struct inet6_dev *idev, kfree_skb(skb); goto errout; } - err = rtnl_notify(skb, &init_net, 0, RTNLGRP_IPV6_PREFIX, NULL, GFP_ATOMIC); + err = rtnl_notify(skb, net, 0, RTNLGRP_IPV6_PREFIX, NULL, GFP_ATOMIC); errout: if (err < 0) - rtnl_set_sk_err(&init_net, RTNLGRP_IPV6_PREFIX, err); + rtnl_set_sk_err(net, RTNLGRP_IPV6_PREFIX, err); } static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp) @@ -4261,6 +4249,41 @@ int unregister_inet6addr_notifier(struct notifier_block *nb) EXPORT_SYMBOL(unregister_inet6addr_notifier); + +static int addrconf_net_init(struct net *net) +{ + return 0; +} + +static void addrconf_net_exit(struct net *net) +{ + struct net_device *dev; + + /* + * Remove loopback references from default routing entries + */ +/* in6_dev_put(net->ipv6.ip6_null_entry->rt6i_idev); */ +/* #ifdef CONFIG_IPV6_MULTIPLE_TABLES */ +/* in6_dev_put(net->ipv6.ip6_prohibit_entry->rt6i_idev); */ +/* in6_dev_put(net->ipv6.ip6_blk_hole_entry->rt6i_idev); */ +/* #endif */ + + rtnl_lock(); + /* clean dev list */ + for_each_netdev(net, dev) { + if (__in6_dev_get(dev) == NULL) + continue; + addrconf_ifdown(dev, 1); + } + addrconf_ifdown(net->loopback_dev, 2); + rtnl_unlock(); +} + +static struct pernet_operations addrconf_net_ops = { + .init = addrconf_net_init, + .exit = addrconf_net_exit, +}; + /* * Init / cleanup code */ @@ -4302,14 +4325,9 @@ int __init addrconf_init(void) if (err) goto errlo; - ip6_null_entry.u.dst.dev = init_net.loopback_dev; - ip6_null_entry.rt6i_idev = in6_dev_get(init_net.loopback_dev); -#ifdef CONFIG_IPV6_MULTIPLE_TABLES - ip6_prohibit_entry.u.dst.dev = init_net.loopback_dev; - ip6_prohibit_entry.rt6i_idev = in6_dev_get(init_net.loopback_dev); - ip6_blk_hole_entry.u.dst.dev = init_net.loopback_dev; - ip6_blk_hole_entry.rt6i_idev = in6_dev_get(init_net.loopback_dev); -#endif + err = register_pernet_device(&addrconf_net_ops); + if (err) + return err; register_netdevice_notifier(&ipv6_dev_notf); @@ -4339,31 +4357,19 @@ errlo: void addrconf_cleanup(void) { - struct net_device *dev; struct inet6_ifaddr *ifa; int i; unregister_netdevice_notifier(&ipv6_dev_notf); + unregister_pernet_device(&addrconf_net_ops); unregister_pernet_subsys(&addrconf_ops); rtnl_lock(); /* - * clean dev list. - */ - - for_each_netdev(&init_net, dev) { - if (__in6_dev_get(dev) == NULL) - continue; - addrconf_ifdown(dev, 1); - } - addrconf_ifdown(init_net.loopback_dev, 2); - - /* * Check hash table. */ - write_lock_bh(&addrconf_hash_lock); for (i=0; i < IN6_ADDR_HSIZE; i++) { for (ifa=inet6_addr_lst[i]; ifa; ) { @@ -4380,6 +4386,7 @@ void addrconf_cleanup(void) write_unlock_bh(&addrconf_hash_lock); del_timer(&addr_chk_timer); - rtnl_unlock(); + + unregister_pernet_subsys(&addrconf_net_ops); } diff --git a/net/ipv6/addrlabel.c b/net/ipv6/addrlabel.c index a3c5a72218f..3a8b3f52da3 100644 --- a/net/ipv6/addrlabel.c +++ b/net/ipv6/addrlabel.c @@ -58,6 +58,7 @@ static struct ip6addrlbl_table * ::ffff:0:0/96 V4MAPPED 4 * fc00::/7 N/A 5 ULA (RFC 4193) * 2001::/32 N/A 6 Teredo (RFC 4380) + * 2001:10::/28 N/A 7 ORCHID (RFC 4843) * * Note: 0xffffffff is used if we do not have any policies. */ @@ -85,6 +86,10 @@ static const __initdata struct ip6addrlbl_init_table .prefix = &(struct in6_addr){{{ 0x20, 0x01 }}}, .prefixlen = 32, .label = 6, + },{ /* 2001:10::/28 */ + .prefix = &(struct in6_addr){{{ 0x20, 0x01, 0x00, 0x10 }}}, + .prefixlen = 28, + .label = 7, },{ /* ::ffff:0:0 */ .prefix = &(struct in6_addr){{{ [10] = 0xff, [11] = 0xff }}}, .prefixlen = 96, diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index f0aa9773874..afe9276d042 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -92,9 +92,6 @@ static int inet6_create(struct net *net, struct socket *sock, int protocol) int try_loading_module = 0; int err; - if (net != &init_net) - return -EAFNOSUPPORT; - if (sock->type != SOCK_RAW && sock->type != SOCK_DGRAM && !inet_ehash_secret) @@ -248,6 +245,7 @@ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) struct sock *sk = sock->sk; struct inet_sock *inet = inet_sk(sk); struct ipv6_pinfo *np = inet6_sk(sk); + struct net *net = sk->sk_net; __be32 v4addr = 0; unsigned short snum; int addr_type = 0; @@ -278,7 +276,7 @@ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) /* Check if the address belongs to the host. */ if (addr_type == IPV6_ADDR_MAPPED) { v4addr = addr->sin6_addr.s6_addr32[3]; - if (inet_addr_type(&init_net, v4addr) != RTN_LOCAL) { + if (inet_addr_type(net, v4addr) != RTN_LOCAL) { err = -EADDRNOTAVAIL; goto out; } @@ -300,7 +298,7 @@ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) err = -EINVAL; goto out; } - dev = dev_get_by_index(&init_net, sk->sk_bound_dev_if); + dev = dev_get_by_index(net, sk->sk_bound_dev_if); if (!dev) { err = -ENODEV; goto out; @@ -312,7 +310,7 @@ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) */ v4addr = LOOPBACK4_IPV6; if (!(addr_type & IPV6_ADDR_MULTICAST)) { - if (!ipv6_chk_addr(&init_net, &addr->sin6_addr, + if (!ipv6_chk_addr(net, &addr->sin6_addr, dev, 0)) { if (dev) dev_put(dev); @@ -440,6 +438,7 @@ EXPORT_SYMBOL(inet6_getname); int inet6_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) { struct sock *sk = sock->sk; + struct net *net = sk->sk_net; switch(cmd) { @@ -452,14 +451,14 @@ int inet6_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) case SIOCADDRT: case SIOCDELRT: - return(ipv6_route_ioctl(cmd,(void __user *)arg)); + return(ipv6_route_ioctl(net, cmd, (void __user *)arg)); case SIOCSIFADDR: - return addrconf_add_ifaddr((void __user *) arg); + return addrconf_add_ifaddr(net, (void __user *) arg); case SIOCDIFADDR: - return addrconf_del_ifaddr((void __user *) arg); + return addrconf_del_ifaddr(net, (void __user *) arg); case SIOCSIFDSTADDR: - return addrconf_set_dstaddr((void __user *) arg); + return addrconf_set_dstaddr(net, (void __user *) arg); default: if (!sk->sk_prot->ioctl) return -ENOIOCTLCMD; @@ -678,6 +677,129 @@ int ipv6_opt_accepted(struct sock *sk, struct sk_buff *skb) EXPORT_SYMBOL_GPL(ipv6_opt_accepted); +static struct inet6_protocol *ipv6_gso_pull_exthdrs(struct sk_buff *skb, + int proto) +{ + struct inet6_protocol *ops = NULL; + + for (;;) { + struct ipv6_opt_hdr *opth; + int len; + + if (proto != NEXTHDR_HOP) { + ops = rcu_dereference(inet6_protos[proto]); + + if (unlikely(!ops)) + break; + + if (!(ops->flags & INET6_PROTO_GSO_EXTHDR)) + break; + } + + if (unlikely(!pskb_may_pull(skb, 8))) + break; + + opth = (void *)skb->data; + len = ipv6_optlen(opth); + + if (unlikely(!pskb_may_pull(skb, len))) + break; + + proto = opth->nexthdr; + __skb_pull(skb, len); + } + + return ops; +} + +static int ipv6_gso_send_check(struct sk_buff *skb) +{ + struct ipv6hdr *ipv6h; + struct inet6_protocol *ops; + int err = -EINVAL; + + if (unlikely(!pskb_may_pull(skb, sizeof(*ipv6h)))) + goto out; + + ipv6h = ipv6_hdr(skb); + __skb_pull(skb, sizeof(*ipv6h)); + err = -EPROTONOSUPPORT; + + rcu_read_lock(); + ops = ipv6_gso_pull_exthdrs(skb, ipv6h->nexthdr); + if (likely(ops && ops->gso_send_check)) { + skb_reset_transport_header(skb); + err = ops->gso_send_check(skb); + } + rcu_read_unlock(); + +out: + return err; +} + +static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb, int features) +{ + struct sk_buff *segs = ERR_PTR(-EINVAL); + struct ipv6hdr *ipv6h; + struct inet6_protocol *ops; + + if (!(features & NETIF_F_V6_CSUM)) + features &= ~NETIF_F_SG; + + if (unlikely(skb_shinfo(skb)->gso_type & + ~(SKB_GSO_UDP | + SKB_GSO_DODGY | + SKB_GSO_TCP_ECN | + SKB_GSO_TCPV6 | + 0))) + goto out; + + if (unlikely(!pskb_may_pull(skb, sizeof(*ipv6h)))) + goto out; + + ipv6h = ipv6_hdr(skb); + __skb_pull(skb, sizeof(*ipv6h)); + segs = ERR_PTR(-EPROTONOSUPPORT); + + rcu_read_lock(); + ops = ipv6_gso_pull_exthdrs(skb, ipv6h->nexthdr); + if (likely(ops && ops->gso_segment)) { + skb_reset_transport_header(skb); + segs = ops->gso_segment(skb, features); + } + rcu_read_unlock(); + + if (unlikely(IS_ERR(segs))) + goto out; + + for (skb = segs; skb; skb = skb->next) { + ipv6h = ipv6_hdr(skb); + ipv6h->payload_len = htons(skb->len - skb->mac_len - + sizeof(*ipv6h)); + } + +out: + return segs; +} + +static struct packet_type ipv6_packet_type = { + .type = __constant_htons(ETH_P_IPV6), + .func = ipv6_rcv, + .gso_send_check = ipv6_gso_send_check, + .gso_segment = ipv6_gso_segment, +}; + +static int __init ipv6_packet_init(void) +{ + dev_add_pack(&ipv6_packet_type); + return 0; +} + +static void ipv6_packet_cleanup(void) +{ + dev_remove_pack(&ipv6_packet_type); +} + static int __init init_ipv6_mibs(void) { if (snmp_mib_init((void **)ipv6_statistics, @@ -691,12 +813,16 @@ static int __init init_ipv6_mibs(void) goto err_icmpmsg_mib; if (snmp_mib_init((void **)udp_stats_in6, sizeof (struct udp_mib)) < 0) goto err_udp_mib; +#ifdef CONFIG_IP_UDPLITE if (snmp_mib_init((void **)udplite_stats_in6, sizeof (struct udp_mib)) < 0) goto err_udplite_mib; +#endif return 0; +#ifdef CONFIG_IP_UDPLITE err_udplite_mib: +#endif snmp_mib_free((void **)udp_stats_in6); err_udp_mib: snmp_mib_free((void **)icmpv6msg_statistics); @@ -715,7 +841,9 @@ static void cleanup_ipv6_mibs(void) snmp_mib_free((void **)icmpv6_statistics); snmp_mib_free((void **)icmpv6msg_statistics); snmp_mib_free((void **)udp_stats_in6); +#ifdef CONFIG_IP_UDPLITE snmp_mib_free((void **)udplite_stats_in6); +#endif } static int inet6_net_init(struct net *net) @@ -760,9 +888,11 @@ static int __init inet6_init(void) if (err) goto out_unregister_tcp_proto; +#ifdef CONFIG_IP_UDPLITE err = proto_register(&udplitev6_prot, 1); if (err) goto out_unregister_udp_proto; +#endif err = proto_register(&rawv6_prot, 1); if (err) @@ -802,19 +932,13 @@ static int __init inet6_init(void) err = register_pernet_subsys(&inet6_net_ops); if (err) goto register_pernet_fail; - -#ifdef CONFIG_SYSCTL - err = ipv6_sysctl_register(); - if (err) - goto sysctl_fail; -#endif - err = icmpv6_init(&inet6_family_ops); + err = icmpv6_init(); if (err) goto icmp_fail; - err = ndisc_init(&inet6_family_ops); + err = ndisc_init(); if (err) goto ndisc_fail; - err = igmp6_init(&inet6_family_ops); + err = igmp6_init(); if (err) goto igmp_fail; err = ipv6_netfilter_init(); @@ -874,9 +998,19 @@ static int __init inet6_init(void) err = ipv6_packet_init(); if (err) goto ipv6_packet_fail; + +#ifdef CONFIG_SYSCTL + err = ipv6_sysctl_register(); + if (err) + goto sysctl_fail; +#endif out: return err; +#ifdef CONFIG_SYSCTL +sysctl_fail: + ipv6_packet_cleanup(); +#endif ipv6_packet_fail: tcpv6_exit(); tcpv6_fail: @@ -918,10 +1052,6 @@ igmp_fail: ndisc_fail: icmpv6_cleanup(); icmp_fail: -#ifdef CONFIG_SYSCTL - ipv6_sysctl_unregister(); -sysctl_fail: -#endif unregister_pernet_subsys(&inet6_net_ops); register_pernet_fail: cleanup_ipv6_mibs(); @@ -933,8 +1063,10 @@ out_sock_register_fail: out_unregister_raw_proto: proto_unregister(&rawv6_prot); out_unregister_udplite_proto: +#ifdef CONFIG_IP_UDPLITE proto_unregister(&udplitev6_prot); out_unregister_udp_proto: +#endif proto_unregister(&udpv6_prot); out_unregister_tcp_proto: proto_unregister(&tcpv6_prot); @@ -949,8 +1081,13 @@ static void __exit inet6_exit(void) /* Disallow any further netlink messages */ rtnl_unregister_all(PF_INET6); +#ifdef CONFIG_SYSCTL + ipv6_sysctl_unregister(); +#endif udpv6_exit(); +#ifdef CONFIG_IP_UDPLITE udplitev6_exit(); +#endif tcpv6_exit(); /* Cleanup code parts. */ @@ -976,13 +1113,13 @@ static void __exit inet6_exit(void) ndisc_cleanup(); icmpv6_cleanup(); rawv6_exit(); -#ifdef CONFIG_SYSCTL - ipv6_sysctl_unregister(); -#endif + unregister_pernet_subsys(&inet6_net_ops); cleanup_ipv6_mibs(); proto_unregister(&rawv6_prot); +#ifdef CONFIG_IP_UDPLITE proto_unregister(&udplitev6_prot); +#endif proto_unregister(&udpv6_prot); proto_unregister(&tcpv6_prot); } diff --git a/net/ipv6/anycast.c b/net/ipv6/anycast.c index 9c7f83fbc3a..96868b994b3 100644 --- a/net/ipv6/anycast.c +++ b/net/ipv6/anycast.c @@ -101,7 +101,7 @@ int ipv6_sock_ac_join(struct sock *sk, int ifindex, struct in6_addr *addr) if (ifindex == 0) { struct rt6_info *rt; - rt = rt6_lookup(addr, NULL, 0, 0); + rt = rt6_lookup(&init_net, addr, NULL, 0, 0); if (rt) { dev = rt->rt6i_dev; dev_hold(dev); diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c index 695c0ca8a41..55137408f05 100644 --- a/net/ipv6/fib6_rules.c +++ b/net/ipv6/fib6_rules.c @@ -29,24 +29,22 @@ struct fib6_rule u8 tclass; }; -static struct fib_rules_ops fib6_rules_ops; - -struct dst_entry *fib6_rule_lookup(struct flowi *fl, int flags, - pol_lookup_t lookup) +struct dst_entry *fib6_rule_lookup(struct net *net, struct flowi *fl, + int flags, pol_lookup_t lookup) { struct fib_lookup_arg arg = { .lookup_ptr = lookup, }; - fib_rules_lookup(&fib6_rules_ops, fl, flags, &arg); + fib_rules_lookup(net->ipv6.fib6_rules_ops, fl, flags, &arg); if (arg.rule) fib_rule_put(arg.rule); if (arg.result) return arg.result; - dst_hold(&ip6_null_entry.u.dst); - return &ip6_null_entry.u.dst; + dst_hold(&net->ipv6.ip6_null_entry->u.dst); + return &net->ipv6.ip6_null_entry->u.dst; } static int fib6_rule_action(struct fib_rule *rule, struct flowi *flp, @@ -54,28 +52,29 @@ static int fib6_rule_action(struct fib_rule *rule, struct flowi *flp, { struct rt6_info *rt = NULL; struct fib6_table *table; + struct net *net = rule->fr_net; pol_lookup_t lookup = arg->lookup_ptr; switch (rule->action) { case FR_ACT_TO_TBL: break; case FR_ACT_UNREACHABLE: - rt = &ip6_null_entry; + rt = net->ipv6.ip6_null_entry; goto discard_pkt; default: case FR_ACT_BLACKHOLE: - rt = &ip6_blk_hole_entry; + rt = net->ipv6.ip6_blk_hole_entry; goto discard_pkt; case FR_ACT_PROHIBIT: - rt = &ip6_prohibit_entry; + rt = net->ipv6.ip6_prohibit_entry; goto discard_pkt; } - table = fib6_get_table(rule->table); + table = fib6_get_table(net, rule->table); if (table) - rt = lookup(table, flp, flags); + rt = lookup(net, table, flp, flags); - if (rt != &ip6_null_entry) { + if (rt != net->ipv6.ip6_null_entry) { struct fib6_rule *r = (struct fib6_rule *)rule; /* @@ -85,8 +84,8 @@ static int fib6_rule_action(struct fib_rule *rule, struct flowi *flp, if ((rule->flags & FIB_RULE_FIND_SADDR) && r->src.plen && !(flags & RT6_LOOKUP_F_HAS_SADDR)) { struct in6_addr saddr; - if (ipv6_get_saddr(&rt->u.dst, &flp->fl6_dst, - &saddr)) + if (ipv6_dev_get_saddr(ip6_dst_idev(&rt->u.dst)->dev, + &flp->fl6_dst, &saddr)) goto again; if (!ipv6_prefix_equal(&saddr, &r->src.addr, r->src.plen)) @@ -145,13 +144,14 @@ static int fib6_rule_configure(struct fib_rule *rule, struct sk_buff *skb, struct nlattr **tb) { int err = -EINVAL; + struct net *net = skb->sk->sk_net; struct fib6_rule *rule6 = (struct fib6_rule *) rule; if (rule->action == FR_ACT_TO_TBL) { if (rule->table == RT6_TABLE_UNSPEC) goto errout; - if (fib6_new_table(rule->table) == NULL) { + if (fib6_new_table(net, rule->table) == NULL) { err = -ENOBUFS; goto errout; } @@ -234,7 +234,7 @@ static size_t fib6_rule_nlmsg_payload(struct fib_rule *rule) + nla_total_size(16); /* src */ } -static struct fib_rules_ops fib6_rules_ops = { +static struct fib_rules_ops fib6_rules_ops_template = { .family = AF_INET6, .rule_size = sizeof(struct fib6_rule), .addr_size = sizeof(struct in6_addr), @@ -247,45 +247,64 @@ static struct fib_rules_ops fib6_rules_ops = { .nlmsg_payload = fib6_rule_nlmsg_payload, .nlgroup = RTNLGRP_IPV6_RULE, .policy = fib6_rule_policy, - .rules_list = LIST_HEAD_INIT(fib6_rules_ops.rules_list), .owner = THIS_MODULE, .fro_net = &init_net, }; -static int __init fib6_default_rules_init(void) +static int fib6_rules_net_init(struct net *net) { - int err; + int err = -ENOMEM; - err = fib_default_rule_add(&fib6_rules_ops, 0, - RT6_TABLE_LOCAL, FIB_RULE_PERMANENT); - if (err < 0) - return err; - err = fib_default_rule_add(&fib6_rules_ops, 0x7FFE, RT6_TABLE_MAIN, 0); - if (err < 0) - return err; - return 0; -} + net->ipv6.fib6_rules_ops = kmemdup(&fib6_rules_ops_template, + sizeof(*net->ipv6.fib6_rules_ops), + GFP_KERNEL); + if (!net->ipv6.fib6_rules_ops) + goto out; -int __init fib6_rules_init(void) -{ - int ret; + net->ipv6.fib6_rules_ops->fro_net = net; + INIT_LIST_HEAD(&net->ipv6.fib6_rules_ops->rules_list); - ret = fib6_default_rules_init(); - if (ret) - goto out; + err = fib_default_rule_add(net->ipv6.fib6_rules_ops, 0, + RT6_TABLE_LOCAL, FIB_RULE_PERMANENT); + if (err) + goto out_fib6_rules_ops; - ret = fib_rules_register(&fib6_rules_ops); - if (ret) - goto out_default_rules_init; + err = fib_default_rule_add(net->ipv6.fib6_rules_ops, + 0x7FFE, RT6_TABLE_MAIN, 0); + if (err) + goto out_fib6_default_rule_add; + + err = fib_rules_register(net->ipv6.fib6_rules_ops); + if (err) + goto out_fib6_default_rule_add; out: - return ret; + return err; -out_default_rules_init: - fib_rules_cleanup_ops(&fib6_rules_ops); +out_fib6_default_rule_add: + fib_rules_cleanup_ops(net->ipv6.fib6_rules_ops); +out_fib6_rules_ops: + kfree(net->ipv6.fib6_rules_ops); goto out; } +static void fib6_rules_net_exit(struct net *net) +{ + fib_rules_unregister(net->ipv6.fib6_rules_ops); + kfree(net->ipv6.fib6_rules_ops); +} + +static struct pernet_operations fib6_rules_net_ops = { + .init = fib6_rules_net_init, + .exit = fib6_rules_net_exit, +}; + +int __init fib6_rules_init(void) +{ + return register_pernet_subsys(&fib6_rules_net_ops); +} + + void fib6_rules_cleanup(void) { - fib_rules_unregister(&fib6_rules_ops); + return unregister_pernet_subsys(&fib6_rules_net_ops); } diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index 121d517bf91..6b5391ab834 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -80,8 +80,10 @@ EXPORT_SYMBOL(icmpv6msg_statistics); * * On SMP we have one ICMP socket per-cpu. */ -static DEFINE_PER_CPU(struct socket *, __icmpv6_socket) = NULL; -#define icmpv6_socket __get_cpu_var(__icmpv6_socket) +static inline struct sock *icmpv6_sk(struct net *net) +{ + return net->ipv6.icmp_sk[smp_processor_id()]; +} static int icmpv6_rcv(struct sk_buff *skb); @@ -90,11 +92,11 @@ static struct inet6_protocol icmpv6_protocol = { .flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL, }; -static __inline__ int icmpv6_xmit_lock(void) +static __inline__ int icmpv6_xmit_lock(struct sock *sk) { local_bh_disable(); - if (unlikely(!spin_trylock(&icmpv6_socket->sk->sk_lock.slock))) { + if (unlikely(!spin_trylock(&sk->sk_lock.slock))) { /* This can happen if the output path (f.e. SIT or * ip6ip6 tunnel) signals dst_link_failure() for an * outgoing ICMP6 packet. @@ -105,9 +107,9 @@ static __inline__ int icmpv6_xmit_lock(void) return 0; } -static __inline__ void icmpv6_xmit_unlock(void) +static __inline__ void icmpv6_xmit_unlock(struct sock *sk) { - spin_unlock_bh(&icmpv6_socket->sk->sk_lock.slock); + spin_unlock_bh(&sk->sk_lock.slock); } /* @@ -161,6 +163,7 @@ static inline int icmpv6_xrlim_allow(struct sock *sk, int type, struct flowi *fl) { struct dst_entry *dst; + struct net *net = sk->sk_net; int res = 0; /* Informational messages are not limited. */ @@ -176,7 +179,7 @@ static inline int icmpv6_xrlim_allow(struct sock *sk, int type, * XXX: perhaps the expire for routing entries cloned by * this lookup should be more aggressive (not longer than timeout). */ - dst = ip6_route_output(sk, fl); + dst = ip6_route_output(net, sk, fl); if (dst->error) { IP6_INC_STATS(ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES); @@ -184,7 +187,7 @@ static inline int icmpv6_xrlim_allow(struct sock *sk, int type, res = 1; } else { struct rt6_info *rt = (struct rt6_info *)dst; - int tmo = init_net.ipv6.sysctl.icmpv6_time; + int tmo = net->ipv6.sysctl.icmpv6_time; /* Give more bandwidth to wider prefixes. */ if (rt->rt6i_dst.plen < 128) @@ -303,6 +306,7 @@ static inline void mip6_addr_swap(struct sk_buff *skb) {} void icmpv6_send(struct sk_buff *skb, int type, int code, __u32 info, struct net_device *dev) { + struct net *net = skb->dev->nd_net; struct inet6_dev *idev = NULL; struct ipv6hdr *hdr = ipv6_hdr(skb); struct sock *sk; @@ -332,7 +336,7 @@ void icmpv6_send(struct sk_buff *skb, int type, int code, __u32 info, */ addr_type = ipv6_addr_type(&hdr->daddr); - if (ipv6_chk_addr(&init_net, &hdr->daddr, skb->dev, 0)) + if (ipv6_chk_addr(net, &hdr->daddr, skb->dev, 0)) saddr = &hdr->daddr; /* @@ -389,12 +393,12 @@ void icmpv6_send(struct sk_buff *skb, int type, int code, __u32 info, fl.fl_icmp_code = code; security_skb_classify_flow(skb, &fl); - if (icmpv6_xmit_lock()) - return; - - sk = icmpv6_socket->sk; + sk = icmpv6_sk(net); np = inet6_sk(sk); + if (icmpv6_xmit_lock(sk)) + return; + if (!icmpv6_xrlim_allow(sk, type, &fl)) goto out; @@ -498,13 +502,14 @@ out_put: out_dst_release: dst_release(dst); out: - icmpv6_xmit_unlock(); + icmpv6_xmit_unlock(sk); } EXPORT_SYMBOL(icmpv6_send); static void icmpv6_echo_reply(struct sk_buff *skb) { + struct net *net = skb->dev->nd_net; struct sock *sk; struct inet6_dev *idev; struct ipv6_pinfo *np; @@ -535,12 +540,12 @@ static void icmpv6_echo_reply(struct sk_buff *skb) fl.fl_icmp_type = ICMPV6_ECHO_REPLY; security_skb_classify_flow(skb, &fl); - if (icmpv6_xmit_lock()) - return; - - sk = icmpv6_socket->sk; + sk = icmpv6_sk(net); np = inet6_sk(sk); + if (icmpv6_xmit_lock(sk)) + return; + if (!fl.oif && ipv6_addr_is_multicast(&fl.fl6_dst)) fl.oif = np->mcast_oif; @@ -584,7 +589,7 @@ out_put: in6_dev_put(idev); dst_release(dst); out: - icmpv6_xmit_unlock(); + icmpv6_xmit_unlock(sk); } static void icmpv6_notify(struct sk_buff *skb, int type, int code, __be32 info) @@ -775,19 +780,41 @@ drop_no_count: return 0; } +void icmpv6_flow_init(struct sock *sk, struct flowi *fl, + u8 type, + const struct in6_addr *saddr, + const struct in6_addr *daddr, + int oif) +{ + memset(fl, 0, sizeof(*fl)); + ipv6_addr_copy(&fl->fl6_src, saddr); + ipv6_addr_copy(&fl->fl6_dst, daddr); + fl->proto = IPPROTO_ICMPV6; + fl->fl_icmp_type = type; + fl->fl_icmp_code = 0; + fl->oif = oif; + security_sk_classify_flow(sk, fl); +} + /* - * Special lock-class for __icmpv6_socket: + * Special lock-class for __icmpv6_sk: */ static struct lock_class_key icmpv6_socket_sk_dst_lock_key; -int __init icmpv6_init(struct net_proto_family *ops) +static int __net_init icmpv6_sk_init(struct net *net) { struct sock *sk; int err, i, j; + net->ipv6.icmp_sk = + kzalloc(nr_cpu_ids * sizeof(struct sock *), GFP_KERNEL); + if (net->ipv6.icmp_sk == NULL) + return -ENOMEM; + for_each_possible_cpu(i) { + struct socket *sock; err = sock_create_kern(PF_INET6, SOCK_RAW, IPPROTO_ICMPV6, - &per_cpu(__icmpv6_socket, i)); + &sock); if (err < 0) { printk(KERN_ERR "Failed to initialize the ICMP6 control socket " @@ -796,12 +823,14 @@ int __init icmpv6_init(struct net_proto_family *ops) goto fail; } - sk = per_cpu(__icmpv6_socket, i)->sk; + net->ipv6.icmp_sk[i] = sk = sock->sk; + sk_change_net(sk, net); + sk->sk_allocation = GFP_ATOMIC; /* * Split off their lock-class, because sk->sk_dst_lock * gets used from softirqs, which is safe for - * __icmpv6_socket (because those never get directly used + * __icmpv6_sk (because those never get directly used * via userspace syscalls), but unsafe for normal sockets. */ lockdep_set_class(&sk->sk_dst_lock, @@ -815,36 +844,56 @@ int __init icmpv6_init(struct net_proto_family *ops) sk->sk_prot->unhash(sk); } - - - if (inet6_add_protocol(&icmpv6_protocol, IPPROTO_ICMPV6) < 0) { - printk(KERN_ERR "Failed to register ICMP6 protocol\n"); - err = -EAGAIN; - goto fail; - } - return 0; fail: - for (j = 0; j < i; j++) { - if (!cpu_possible(j)) - continue; - sock_release(per_cpu(__icmpv6_socket, j)); - } - + for (j = 0; j < i; j++) + sk_release_kernel(net->ipv6.icmp_sk[j]); + kfree(net->ipv6.icmp_sk); return err; } -void icmpv6_cleanup(void) +static void __net_exit icmpv6_sk_exit(struct net *net) { int i; for_each_possible_cpu(i) { - sock_release(per_cpu(__icmpv6_socket, i)); + sk_release_kernel(net->ipv6.icmp_sk[i]); } + kfree(net->ipv6.icmp_sk); +} + +static struct pernet_operations icmpv6_sk_ops = { + .init = icmpv6_sk_init, + .exit = icmpv6_sk_exit, +}; + +int __init icmpv6_init(void) +{ + int err; + + err = register_pernet_subsys(&icmpv6_sk_ops); + if (err < 0) + return err; + + err = -EAGAIN; + if (inet6_add_protocol(&icmpv6_protocol, IPPROTO_ICMPV6) < 0) + goto fail; + return 0; + +fail: + printk(KERN_ERR "Failed to register ICMP6 protocol\n"); + unregister_pernet_subsys(&icmpv6_sk_ops); + return err; +} + +void icmpv6_cleanup(void) +{ + unregister_pernet_subsys(&icmpv6_sk_ops); inet6_del_protocol(&icmpv6_protocol, IPPROTO_ICMPV6); } + static const struct icmp6_err { int err; int fatal; @@ -925,6 +974,10 @@ struct ctl_table *ipv6_icmp_sysctl_init(struct net *net) table = kmemdup(ipv6_icmp_table_template, sizeof(ipv6_icmp_table_template), GFP_KERNEL); + + if (table) + table[0].data = &net->ipv6.sysctl.icmpv6_time; + return table; } #endif diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index bab72b6f144..b0814b0082e 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -48,8 +48,6 @@ #define RT6_TRACE(x...) do { ; } while (0) #endif -struct rt6_statistics rt6_stats; - static struct kmem_cache * fib6_node_kmem __read_mostly; enum fib_walk_state_t @@ -66,6 +64,7 @@ enum fib_walk_state_t struct fib6_cleaner_t { struct fib6_walker_t w; + struct net *net; int (*func)(struct rt6_info *, void *arg); void *arg; }; @@ -78,9 +77,10 @@ static DEFINE_RWLOCK(fib6_walker_lock); #define FWS_INIT FWS_L #endif -static void fib6_prune_clones(struct fib6_node *fn, struct rt6_info *rt); -static struct rt6_info * fib6_find_prefix(struct fib6_node *fn); -static struct fib6_node * fib6_repair_tree(struct fib6_node *fn); +static void fib6_prune_clones(struct net *net, struct fib6_node *fn, + struct rt6_info *rt); +static struct rt6_info *fib6_find_prefix(struct net *net, struct fib6_node *fn); +static struct fib6_node *fib6_repair_tree(struct net *net, struct fib6_node *fn); static int fib6_walk(struct fib6_walker_t *w); static int fib6_walk_continue(struct fib6_walker_t *w); @@ -93,7 +93,7 @@ static int fib6_walk_continue(struct fib6_walker_t *w); static __u32 rt_sernum; -static DEFINE_TIMER(ip6_fib_timer, fib6_run_gc, 0, 0); +static void fib6_gc_timer_cb(unsigned long arg); static struct fib6_walker_t fib6_walker_list = { .prev = &fib6_walker_list, @@ -166,22 +166,13 @@ static __inline__ void rt6_release(struct rt6_info *rt) dst_free(&rt->u.dst); } -static struct fib6_table fib6_main_tbl = { - .tb6_id = RT6_TABLE_MAIN, - .tb6_root = { - .leaf = &ip6_null_entry, - .fn_flags = RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO, - }, -}; - #ifdef CONFIG_IPV6_MULTIPLE_TABLES #define FIB_TABLE_HASHSZ 256 #else #define FIB_TABLE_HASHSZ 1 #endif -static struct hlist_head fib_table_hash[FIB_TABLE_HASHSZ]; -static void fib6_link_table(struct fib6_table *tb) +static void fib6_link_table(struct net *net, struct fib6_table *tb) { unsigned int h; @@ -197,52 +188,46 @@ static void fib6_link_table(struct fib6_table *tb) * No protection necessary, this is the only list mutatation * operation, tables never disappear once they exist. */ - hlist_add_head_rcu(&tb->tb6_hlist, &fib_table_hash[h]); + hlist_add_head_rcu(&tb->tb6_hlist, &net->ipv6.fib_table_hash[h]); } #ifdef CONFIG_IPV6_MULTIPLE_TABLES -static struct fib6_table fib6_local_tbl = { - .tb6_id = RT6_TABLE_LOCAL, - .tb6_root = { - .leaf = &ip6_null_entry, - .fn_flags = RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO, - }, -}; -static struct fib6_table *fib6_alloc_table(u32 id) +static struct fib6_table *fib6_alloc_table(struct net *net, u32 id) { struct fib6_table *table; table = kzalloc(sizeof(*table), GFP_ATOMIC); if (table != NULL) { table->tb6_id = id; - table->tb6_root.leaf = &ip6_null_entry; + table->tb6_root.leaf = net->ipv6.ip6_null_entry; table->tb6_root.fn_flags = RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO; } return table; } -struct fib6_table *fib6_new_table(u32 id) +struct fib6_table *fib6_new_table(struct net *net, u32 id) { struct fib6_table *tb; if (id == 0) id = RT6_TABLE_MAIN; - tb = fib6_get_table(id); + tb = fib6_get_table(net, id); if (tb) return tb; - tb = fib6_alloc_table(id); + tb = fib6_alloc_table(net, id); if (tb != NULL) - fib6_link_table(tb); + fib6_link_table(net, tb); return tb; } -struct fib6_table *fib6_get_table(u32 id) +struct fib6_table *fib6_get_table(struct net *net, u32 id) { struct fib6_table *tb; + struct hlist_head *head; struct hlist_node *node; unsigned int h; @@ -250,7 +235,8 @@ struct fib6_table *fib6_get_table(u32 id) id = RT6_TABLE_MAIN; h = id & (FIB_TABLE_HASHSZ - 1); rcu_read_lock(); - hlist_for_each_entry_rcu(tb, node, &fib_table_hash[h], tb6_hlist) { + head = &net->ipv6.fib_table_hash[h]; + hlist_for_each_entry_rcu(tb, node, head, tb6_hlist) { if (tb->tb6_id == id) { rcu_read_unlock(); return tb; @@ -261,33 +247,32 @@ struct fib6_table *fib6_get_table(u32 id) return NULL; } -static void __init fib6_tables_init(void) +static void fib6_tables_init(struct net *net) { - fib6_link_table(&fib6_main_tbl); - fib6_link_table(&fib6_local_tbl); + fib6_link_table(net, net->ipv6.fib6_main_tbl); + fib6_link_table(net, net->ipv6.fib6_local_tbl); } - #else -struct fib6_table *fib6_new_table(u32 id) +struct fib6_table *fib6_new_table(struct net *net, u32 id) { - return fib6_get_table(id); + return fib6_get_table(net, id); } -struct fib6_table *fib6_get_table(u32 id) +struct fib6_table *fib6_get_table(struct net *net, u32 id) { - return &fib6_main_tbl; + return net->ipv6.fib6_main_tbl; } -struct dst_entry *fib6_rule_lookup(struct flowi *fl, int flags, - pol_lookup_t lookup) +struct dst_entry *fib6_rule_lookup(struct net *net, struct flowi *fl, + int flags, pol_lookup_t lookup) { - return (struct dst_entry *) lookup(&fib6_main_tbl, fl, flags); + return (struct dst_entry *) lookup(net, net->ipv6.fib6_main_tbl, fl, flags); } -static void __init fib6_tables_init(void) +static void fib6_tables_init(struct net *net) { - fib6_link_table(&fib6_main_tbl); + fib6_link_table(net, net->ipv6.fib6_main_tbl); } #endif @@ -368,11 +353,9 @@ static int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb) struct fib6_walker_t *w; struct fib6_table *tb; struct hlist_node *node; + struct hlist_head *head; int res = 0; - if (net != &init_net) - return 0; - s_h = cb->args[0]; s_e = cb->args[1]; @@ -401,7 +384,8 @@ static int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb) for (h = s_h; h < FIB_TABLE_HASHSZ; h++, s_e = 0) { e = 0; - hlist_for_each_entry(tb, node, &fib_table_hash[h], tb6_hlist) { + head = &net->ipv6.fib_table_hash[h]; + hlist_for_each_entry(tb, node, head, tb6_hlist) { if (e < s_e) goto next; res = fib6_dump_table(tb, skb, cb); @@ -667,29 +651,29 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt, rt->rt6i_node = fn; atomic_inc(&rt->rt6i_ref); inet6_rt_notify(RTM_NEWROUTE, rt, info); - rt6_stats.fib_rt_entries++; + info->nl_net->ipv6.rt6_stats->fib_rt_entries++; if ((fn->fn_flags & RTN_RTINFO) == 0) { - rt6_stats.fib_route_nodes++; + info->nl_net->ipv6.rt6_stats->fib_route_nodes++; fn->fn_flags |= RTN_RTINFO; } return 0; } -static __inline__ void fib6_start_gc(struct rt6_info *rt) +static __inline__ void fib6_start_gc(struct net *net, struct rt6_info *rt) { - if (ip6_fib_timer.expires == 0 && + if (net->ipv6.ip6_fib_timer->expires == 0 && (rt->rt6i_flags & (RTF_EXPIRES|RTF_CACHE))) - mod_timer(&ip6_fib_timer, jiffies + - init_net.ipv6.sysctl.ip6_rt_gc_interval); + mod_timer(net->ipv6.ip6_fib_timer, jiffies + + net->ipv6.sysctl.ip6_rt_gc_interval); } -void fib6_force_start_gc(void) +void fib6_force_start_gc(struct net *net) { - if (ip6_fib_timer.expires == 0) - mod_timer(&ip6_fib_timer, jiffies + - init_net.ipv6.sysctl.ip6_rt_gc_interval); + if (net->ipv6.ip6_fib_timer->expires == 0) + mod_timer(net->ipv6.ip6_fib_timer, jiffies + + net->ipv6.sysctl.ip6_rt_gc_interval); } /* @@ -733,8 +717,8 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt, struct nl_info *info) if (sfn == NULL) goto st_failure; - sfn->leaf = &ip6_null_entry; - atomic_inc(&ip6_null_entry.rt6i_ref); + sfn->leaf = info->nl_net->ipv6.ip6_null_entry; + atomic_inc(&info->nl_net->ipv6.ip6_null_entry->rt6i_ref); sfn->fn_flags = RTN_ROOT; sfn->fn_sernum = fib6_new_sernum(); @@ -776,9 +760,9 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt, struct nl_info *info) err = fib6_add_rt2node(fn, rt, info); if (err == 0) { - fib6_start_gc(rt); + fib6_start_gc(info->nl_net, rt); if (!(rt->rt6i_flags&RTF_CACHE)) - fib6_prune_clones(pn, rt); + fib6_prune_clones(info->nl_net, pn, rt); } out: @@ -789,11 +773,11 @@ out: * super-tree leaf node we have to find a new one for it. */ if (pn != fn && !pn->leaf && !(pn->fn_flags & RTN_RTINFO)) { - pn->leaf = fib6_find_prefix(pn); + pn->leaf = fib6_find_prefix(info->nl_net, pn); #if RT6_DEBUG >= 2 if (!pn->leaf) { BUG_TRAP(pn->leaf != NULL); - pn->leaf = &ip6_null_entry; + pn->leaf = info->nl_net->ipv6.ip6_null_entry; } #endif atomic_inc(&pn->leaf->rt6i_ref); @@ -809,7 +793,7 @@ out: */ st_failure: if (fn && !(fn->fn_flags & (RTN_RTINFO|RTN_ROOT))) - fib6_repair_tree(fn); + fib6_repair_tree(info->nl_net, fn); dst_free(&rt->u.dst); return err; #endif @@ -975,10 +959,10 @@ struct fib6_node * fib6_locate(struct fib6_node *root, * */ -static struct rt6_info * fib6_find_prefix(struct fib6_node *fn) +static struct rt6_info *fib6_find_prefix(struct net *net, struct fib6_node *fn) { if (fn->fn_flags&RTN_ROOT) - return &ip6_null_entry; + return net->ipv6.ip6_null_entry; while(fn) { if(fn->left) @@ -997,7 +981,8 @@ static struct rt6_info * fib6_find_prefix(struct fib6_node *fn) * is the node we want to try and remove. */ -static struct fib6_node * fib6_repair_tree(struct fib6_node *fn) +static struct fib6_node *fib6_repair_tree(struct net *net, + struct fib6_node *fn) { int children; int nstate; @@ -1024,11 +1009,11 @@ static struct fib6_node * fib6_repair_tree(struct fib6_node *fn) || (children && fn->fn_flags&RTN_ROOT) #endif ) { - fn->leaf = fib6_find_prefix(fn); + fn->leaf = fib6_find_prefix(net, fn); #if RT6_DEBUG >= 2 if (fn->leaf==NULL) { BUG_TRAP(fn->leaf); - fn->leaf = &ip6_null_entry; + fn->leaf = net->ipv6.ip6_null_entry; } #endif atomic_inc(&fn->leaf->rt6i_ref); @@ -1101,14 +1086,15 @@ static void fib6_del_route(struct fib6_node *fn, struct rt6_info **rtp, { struct fib6_walker_t *w; struct rt6_info *rt = *rtp; + struct net *net = info->nl_net; RT6_TRACE("fib6_del_route\n"); /* Unlink it */ *rtp = rt->u.dst.rt6_next; rt->rt6i_node = NULL; - rt6_stats.fib_rt_entries--; - rt6_stats.fib_discarded_routes++; + net->ipv6.rt6_stats->fib_rt_entries--; + net->ipv6.rt6_stats->fib_discarded_routes++; /* Reset round-robin state, if necessary */ if (fn->rr_ptr == rt) @@ -1131,8 +1117,8 @@ static void fib6_del_route(struct fib6_node *fn, struct rt6_info **rtp, /* If it was last route, expunge its radix tree node */ if (fn->leaf == NULL) { fn->fn_flags &= ~RTN_RTINFO; - rt6_stats.fib_route_nodes--; - fn = fib6_repair_tree(fn); + net->ipv6.rt6_stats->fib_route_nodes--; + fn = fib6_repair_tree(net, fn); } if (atomic_read(&rt->rt6i_ref) != 1) { @@ -1144,7 +1130,7 @@ static void fib6_del_route(struct fib6_node *fn, struct rt6_info **rtp, */ while (fn) { if (!(fn->fn_flags&RTN_RTINFO) && fn->leaf == rt) { - fn->leaf = fib6_find_prefix(fn); + fn->leaf = fib6_find_prefix(net, fn); atomic_inc(&fn->leaf->rt6i_ref); rt6_release(rt); } @@ -1160,6 +1146,7 @@ static void fib6_del_route(struct fib6_node *fn, struct rt6_info **rtp, int fib6_del(struct rt6_info *rt, struct nl_info *info) { + struct net *net = info->nl_net; struct fib6_node *fn = rt->rt6i_node; struct rt6_info **rtp; @@ -1169,7 +1156,7 @@ int fib6_del(struct rt6_info *rt, struct nl_info *info) return -ENOENT; } #endif - if (fn == NULL || rt == &ip6_null_entry) + if (fn == NULL || rt == net->ipv6.ip6_null_entry) return -ENOENT; BUG_TRAP(fn->fn_flags&RTN_RTINFO); @@ -1184,7 +1171,7 @@ int fib6_del(struct rt6_info *rt, struct nl_info *info) pn = pn->parent; } #endif - fib6_prune_clones(pn, rt); + fib6_prune_clones(info->nl_net, pn, rt); } /* @@ -1314,12 +1301,12 @@ static int fib6_walk(struct fib6_walker_t *w) static int fib6_clean_node(struct fib6_walker_t *w) { - struct nl_info info = { - .nl_net = &init_net, - }; int res; struct rt6_info *rt; struct fib6_cleaner_t *c = container_of(w, struct fib6_cleaner_t, w); + struct nl_info info = { + .nl_net = c->net, + }; for (rt = w->leaf; rt; rt = rt->u.dst.rt6_next) { res = c->func(rt, c->arg); @@ -1351,7 +1338,7 @@ static int fib6_clean_node(struct fib6_walker_t *w) * ignoring pure split nodes) will be scanned. */ -static void fib6_clean_tree(struct fib6_node *root, +static void fib6_clean_tree(struct net *net, struct fib6_node *root, int (*func)(struct rt6_info *, void *arg), int prune, void *arg) { @@ -1362,23 +1349,26 @@ static void fib6_clean_tree(struct fib6_node *root, c.w.prune = prune; c.func = func; c.arg = arg; + c.net = net; fib6_walk(&c.w); } -void fib6_clean_all(int (*func)(struct rt6_info *, void *arg), +void fib6_clean_all(struct net *net, int (*func)(struct rt6_info *, void *arg), int prune, void *arg) { struct fib6_table *table; struct hlist_node *node; + struct hlist_head *head; unsigned int h; rcu_read_lock(); for (h = 0; h < FIB_TABLE_HASHSZ; h++) { - hlist_for_each_entry_rcu(table, node, &fib_table_hash[h], - tb6_hlist) { + head = &net->ipv6.fib_table_hash[h]; + hlist_for_each_entry_rcu(table, node, head, tb6_hlist) { write_lock_bh(&table->tb6_lock); - fib6_clean_tree(&table->tb6_root, func, prune, arg); + fib6_clean_tree(net, &table->tb6_root, + func, prune, arg); write_unlock_bh(&table->tb6_lock); } } @@ -1395,9 +1385,10 @@ static int fib6_prune_clone(struct rt6_info *rt, void *arg) return 0; } -static void fib6_prune_clones(struct fib6_node *fn, struct rt6_info *rt) +static void fib6_prune_clones(struct net *net, struct fib6_node *fn, + struct rt6_info *rt) { - fib6_clean_tree(fn, fib6_prune_clone, 1, rt); + fib6_clean_tree(net, fn, fib6_prune_clone, 1, rt); } /* @@ -1447,54 +1438,145 @@ static int fib6_age(struct rt6_info *rt, void *arg) static DEFINE_SPINLOCK(fib6_gc_lock); -void fib6_run_gc(unsigned long dummy) +void fib6_run_gc(unsigned long expires, struct net *net) { - if (dummy != ~0UL) { + if (expires != ~0UL) { spin_lock_bh(&fib6_gc_lock); - gc_args.timeout = dummy ? (int)dummy : - init_net.ipv6.sysctl.ip6_rt_gc_interval; + gc_args.timeout = expires ? (int)expires : + net->ipv6.sysctl.ip6_rt_gc_interval; } else { local_bh_disable(); if (!spin_trylock(&fib6_gc_lock)) { - mod_timer(&ip6_fib_timer, jiffies + HZ); + mod_timer(net->ipv6.ip6_fib_timer, jiffies + HZ); local_bh_enable(); return; } - gc_args.timeout = init_net.ipv6.sysctl.ip6_rt_gc_interval; + gc_args.timeout = net->ipv6.sysctl.ip6_rt_gc_interval; } gc_args.more = 0; - ndisc_dst_gc(&gc_args.more); - fib6_clean_all(fib6_age, 0, NULL); + icmp6_dst_gc(&gc_args.more); + + fib6_clean_all(net, fib6_age, 0, NULL); if (gc_args.more) - mod_timer(&ip6_fib_timer, jiffies + - init_net.ipv6.sysctl.ip6_rt_gc_interval); + mod_timer(net->ipv6.ip6_fib_timer, jiffies + + net->ipv6.sysctl.ip6_rt_gc_interval); else { - del_timer(&ip6_fib_timer); - ip6_fib_timer.expires = 0; + del_timer(net->ipv6.ip6_fib_timer); + net->ipv6.ip6_fib_timer->expires = 0; } spin_unlock_bh(&fib6_gc_lock); } -int __init fib6_init(void) +static void fib6_gc_timer_cb(unsigned long arg) +{ + fib6_run_gc(0, (struct net *)arg); +} + +static int fib6_net_init(struct net *net) { int ret; + struct timer_list *timer; + + ret = -ENOMEM; + timer = kzalloc(sizeof(*timer), GFP_KERNEL); + if (!timer) + goto out; + + setup_timer(timer, fib6_gc_timer_cb, (unsigned long)net); + net->ipv6.ip6_fib_timer = timer; + + net->ipv6.rt6_stats = kzalloc(sizeof(*net->ipv6.rt6_stats), GFP_KERNEL); + if (!net->ipv6.rt6_stats) + goto out_timer; + + net->ipv6.fib_table_hash = + kzalloc(sizeof(*net->ipv6.fib_table_hash)*FIB_TABLE_HASHSZ, + GFP_KERNEL); + if (!net->ipv6.fib_table_hash) + goto out_rt6_stats; + + net->ipv6.fib6_main_tbl = kzalloc(sizeof(*net->ipv6.fib6_main_tbl), + GFP_KERNEL); + if (!net->ipv6.fib6_main_tbl) + goto out_fib_table_hash; + + net->ipv6.fib6_main_tbl->tb6_id = RT6_TABLE_MAIN; + net->ipv6.fib6_main_tbl->tb6_root.leaf = net->ipv6.ip6_null_entry; + net->ipv6.fib6_main_tbl->tb6_root.fn_flags = + RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO; + +#ifdef CONFIG_IPV6_MULTIPLE_TABLES + net->ipv6.fib6_local_tbl = kzalloc(sizeof(*net->ipv6.fib6_local_tbl), + GFP_KERNEL); + if (!net->ipv6.fib6_local_tbl) + goto out_fib6_main_tbl; + net->ipv6.fib6_local_tbl->tb6_id = RT6_TABLE_LOCAL; + net->ipv6.fib6_local_tbl->tb6_root.leaf = net->ipv6.ip6_null_entry; + net->ipv6.fib6_local_tbl->tb6_root.fn_flags = + RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO; +#endif + fib6_tables_init(net); + + ret = 0; +out: + return ret; + +#ifdef CONFIG_IPV6_MULTIPLE_TABLES +out_fib6_main_tbl: + kfree(net->ipv6.fib6_main_tbl); +#endif +out_fib_table_hash: + kfree(net->ipv6.fib_table_hash); +out_rt6_stats: + kfree(net->ipv6.rt6_stats); +out_timer: + kfree(timer); + goto out; + } + +static void fib6_net_exit(struct net *net) +{ + rt6_ifdown(net, NULL); + del_timer(net->ipv6.ip6_fib_timer); + kfree(net->ipv6.ip6_fib_timer); +#ifdef CONFIG_IPV6_MULTIPLE_TABLES + kfree(net->ipv6.fib6_local_tbl); +#endif + kfree(net->ipv6.fib6_main_tbl); + kfree(net->ipv6.fib_table_hash); + kfree(net->ipv6.rt6_stats); +} + +static struct pernet_operations fib6_net_ops = { + .init = fib6_net_init, + .exit = fib6_net_exit, +}; + +int __init fib6_init(void) +{ + int ret = -ENOMEM; + fib6_node_kmem = kmem_cache_create("fib6_nodes", sizeof(struct fib6_node), 0, SLAB_HWCACHE_ALIGN, NULL); if (!fib6_node_kmem) - return -ENOMEM; + goto out; - fib6_tables_init(); + ret = register_pernet_subsys(&fib6_net_ops); + if (ret) + goto out_kmem_cache_create; ret = __rtnl_register(PF_INET6, RTM_GETROUTE, NULL, inet6_dump_fib); if (ret) - goto out_kmem_cache_create; + goto out_unregister_subsys; out: return ret; +out_unregister_subsys: + unregister_pernet_subsys(&fib6_net_ops); out_kmem_cache_create: kmem_cache_destroy(fib6_node_kmem); goto out; @@ -1502,6 +1584,6 @@ out_kmem_cache_create: void fib6_gc_cleanup(void) { - del_timer(&ip6_fib_timer); + unregister_pernet_subsys(&fib6_net_ops); kmem_cache_destroy(fib6_node_kmem); } diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c index 178aebc0427..7e36269826b 100644 --- a/net/ipv6/ip6_input.c +++ b/net/ipv6/ip6_input.c @@ -61,11 +61,6 @@ int ipv6_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt u32 pkt_len; struct inet6_dev *idev; - if (dev->nd_net != &init_net) { - kfree_skb(skb); - return 0; - } - if (skb->pkt_type == PACKET_OTHERHOST) { kfree_skb(skb); return 0; diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 8b67ca07467..937018529d1 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -596,7 +596,6 @@ int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr) return offset; } -EXPORT_SYMBOL_GPL(ip6_find_1stfragopt); static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) { @@ -914,13 +913,14 @@ static int ip6_dst_lookup_tail(struct sock *sk, int err; if (*dst == NULL) - *dst = ip6_route_output(sk, fl); + *dst = ip6_route_output(sk->sk_net, sk, fl); if ((err = (*dst)->error)) goto out_err_release; if (ipv6_addr_any(&fl->fl6_src)) { - err = ipv6_get_saddr(*dst, &fl->fl6_dst, &fl->fl6_src); + err = ipv6_dev_get_saddr(ip6_dst_idev(*dst)->dev, + &fl->fl6_dst, &fl->fl6_src); if (err) goto out_err_release; } @@ -954,7 +954,7 @@ static int ip6_dst_lookup_tail(struct sock *sk, dst_release(*dst); memcpy(&fl_gw, fl, sizeof(struct flowi)); memset(&fl_gw.fl6_dst, 0, sizeof(struct in6_addr)); - *dst = ip6_route_output(sk, &fl_gw); + *dst = ip6_route_output(sk->sk_net, sk, &fl_gw); if ((err = (*dst)->error)) goto out_err_release; } diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 78f43888092..1e1ad1ed87e 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -602,7 +602,7 @@ ip6ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, skb_reset_network_header(skb2); /* Try to guess incoming interface */ - rt = rt6_lookup(&ipv6_hdr(skb2)->saddr, NULL, 0, 0); + rt = rt6_lookup(&init_net, &ipv6_hdr(skb2)->saddr, NULL, 0, 0); if (rt && rt->rt6i_dev) skb2->dev = rt->rt6i_dev; @@ -847,7 +847,7 @@ static int ip6_tnl_xmit2(struct sk_buff *skb, if ((dst = ip6_tnl_dst_check(t)) != NULL) dst_hold(dst); else { - dst = ip6_route_output(NULL, fl); + dst = ip6_route_output(&init_net, NULL, fl); if (dst->error || xfrm_lookup(&dst, fl, NULL, 0) < 0) goto tx_err_link_failure; @@ -1112,7 +1112,7 @@ static void ip6_tnl_link_config(struct ip6_tnl *t) int strict = (ipv6_addr_type(&p->raddr) & (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL)); - struct rt6_info *rt = rt6_lookup(&p->raddr, &p->laddr, + struct rt6_info *rt = rt6_lookup(&init_net, &p->raddr, &p->laddr, p->link, strict); if (rt == NULL) diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index bf2a686aa13..3bbfdff698d 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -57,118 +57,6 @@ DEFINE_SNMP_STAT(struct ipstats_mib, ipv6_statistics) __read_mostly; -static struct inet6_protocol *ipv6_gso_pull_exthdrs(struct sk_buff *skb, - int proto) -{ - struct inet6_protocol *ops = NULL; - - for (;;) { - struct ipv6_opt_hdr *opth; - int len; - - if (proto != NEXTHDR_HOP) { - ops = rcu_dereference(inet6_protos[proto]); - - if (unlikely(!ops)) - break; - - if (!(ops->flags & INET6_PROTO_GSO_EXTHDR)) - break; - } - - if (unlikely(!pskb_may_pull(skb, 8))) - break; - - opth = (void *)skb->data; - len = opth->hdrlen * 8 + 8; - - if (unlikely(!pskb_may_pull(skb, len))) - break; - - proto = opth->nexthdr; - __skb_pull(skb, len); - } - - return ops; -} - -static int ipv6_gso_send_check(struct sk_buff *skb) -{ - struct ipv6hdr *ipv6h; - struct inet6_protocol *ops; - int err = -EINVAL; - - if (unlikely(!pskb_may_pull(skb, sizeof(*ipv6h)))) - goto out; - - ipv6h = ipv6_hdr(skb); - __skb_pull(skb, sizeof(*ipv6h)); - err = -EPROTONOSUPPORT; - - rcu_read_lock(); - ops = ipv6_gso_pull_exthdrs(skb, ipv6h->nexthdr); - if (likely(ops && ops->gso_send_check)) { - skb_reset_transport_header(skb); - err = ops->gso_send_check(skb); - } - rcu_read_unlock(); - -out: - return err; -} - -static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb, int features) -{ - struct sk_buff *segs = ERR_PTR(-EINVAL); - struct ipv6hdr *ipv6h; - struct inet6_protocol *ops; - - if (!(features & NETIF_F_V6_CSUM)) - features &= ~NETIF_F_SG; - - if (unlikely(skb_shinfo(skb)->gso_type & - ~(SKB_GSO_UDP | - SKB_GSO_DODGY | - SKB_GSO_TCP_ECN | - SKB_GSO_TCPV6 | - 0))) - goto out; - - if (unlikely(!pskb_may_pull(skb, sizeof(*ipv6h)))) - goto out; - - ipv6h = ipv6_hdr(skb); - __skb_pull(skb, sizeof(*ipv6h)); - segs = ERR_PTR(-EPROTONOSUPPORT); - - rcu_read_lock(); - ops = ipv6_gso_pull_exthdrs(skb, ipv6h->nexthdr); - if (likely(ops && ops->gso_segment)) { - skb_reset_transport_header(skb); - segs = ops->gso_segment(skb, features); - } - rcu_read_unlock(); - - if (unlikely(IS_ERR(segs))) - goto out; - - for (skb = segs; skb; skb = skb->next) { - ipv6h = ipv6_hdr(skb); - ipv6h->payload_len = htons(skb->len - skb->mac_len - - sizeof(*ipv6h)); - } - -out: - return segs; -} - -static struct packet_type ipv6_packet_type = { - .type = __constant_htons(ETH_P_IPV6), - .func = ipv6_rcv, - .gso_send_check = ipv6_gso_send_check, - .gso_segment = ipv6_gso_segment, -}; - struct ip6_ra_chain *ip6_ra_chain; DEFINE_RWLOCK(ip6_ra_lock); @@ -239,7 +127,9 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname, struct sk_buff *pktopt; if (sk->sk_protocol != IPPROTO_UDP && +#ifdef CONFIG_IP_UDPLITE sk->sk_protocol != IPPROTO_UDPLITE && +#endif sk->sk_protocol != IPPROTO_TCP) break; @@ -279,7 +169,7 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname, } else { struct proto *prot = &udp_prot; - if (sk->sk_protocol == IPPROTO_UDPLITE) + if (IS_PROTO_UDPLITE(sk->sk_protocol)) prot = &udplite_prot; local_bh_disable(); sock_prot_inuse_add(sk->sk_prot, -1); @@ -844,7 +734,9 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname, switch (optname) { case IPV6_ADDRFORM: if (sk->sk_protocol != IPPROTO_UDP && +#ifdef CONFIG_IP_UDPLITE sk->sk_protocol != IPPROTO_UDPLITE && +#endif sk->sk_protocol != IPPROTO_TCP) return -EINVAL; if (sk->sk_state != TCP_ESTABLISHED) @@ -1128,13 +1020,3 @@ int compat_ipv6_getsockopt(struct sock *sk, int level, int optname, EXPORT_SYMBOL(compat_ipv6_getsockopt); #endif -int __init ipv6_packet_init(void) -{ - dev_add_pack(&ipv6_packet_type); - return 0; -} - -void ipv6_packet_cleanup(void) -{ - dev_remove_pack(&ipv6_packet_type); -} diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index ab228d1ea11..197ca390a15 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c @@ -208,7 +208,7 @@ int ipv6_sock_mc_join(struct sock *sk, int ifindex, struct in6_addr *addr) if (ifindex == 0) { struct rt6_info *rt; - rt = rt6_lookup(addr, NULL, 0, 0); + rt = rt6_lookup(&init_net, addr, NULL, 0, 0); if (rt) { dev = rt->rt6i_dev; dev_hold(dev); @@ -294,7 +294,7 @@ static struct inet6_dev *ip6_mc_find_dev(struct in6_addr *group, int ifindex) if (ifindex == 0) { struct rt6_info *rt; - rt = rt6_lookup(group, NULL, 0, 0); + rt = rt6_lookup(&init_net, group, NULL, 0, 0); if (rt) { dev = rt->rt6i_dev; dev_hold(dev); @@ -1433,25 +1433,6 @@ static struct sk_buff *mld_newpack(struct net_device *dev, int size) return skb; } -static inline int mld_dev_queue_xmit2(struct sk_buff *skb) -{ - struct net_device *dev = skb->dev; - unsigned char ha[MAX_ADDR_LEN]; - - ndisc_mc_map(&ipv6_hdr(skb)->daddr, ha, dev, 1); - if (dev_hard_header(skb, dev, ETH_P_IPV6, ha, NULL, skb->len) < 0) { - kfree_skb(skb); - return -EINVAL; - } - return dev_queue_xmit(skb); -} - -static inline int mld_dev_queue_xmit(struct sk_buff *skb) -{ - return NF_HOOK(PF_INET6, NF_INET_POST_ROUTING, skb, NULL, skb->dev, - mld_dev_queue_xmit2); -} - static void mld_sendpack(struct sk_buff *skb) { struct ipv6hdr *pip6 = ipv6_hdr(skb); @@ -1460,6 +1441,7 @@ static void mld_sendpack(struct sk_buff *skb) int payload_len, mldlen; struct inet6_dev *idev = in6_dev_get(skb->dev); int err; + struct flowi fl; IP6_INC_STATS(idev, IPSTATS_MIB_OUTREQUESTS); payload_len = (skb->tail - skb->network_header) - sizeof(*pip6); @@ -1469,8 +1451,25 @@ static void mld_sendpack(struct sk_buff *skb) pmr->csum = csum_ipv6_magic(&pip6->saddr, &pip6->daddr, mldlen, IPPROTO_ICMPV6, csum_partial(skb_transport_header(skb), mldlen, 0)); + + skb->dst = icmp6_dst_alloc(skb->dev, NULL, &ipv6_hdr(skb)->daddr); + + if (!skb->dst) { + err = -ENOMEM; + goto err_out; + } + + icmpv6_flow_init(igmp6_socket->sk, &fl, ICMPV6_MLD2_REPORT, + &ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr, + skb->dev->ifindex); + + err = xfrm_lookup(&skb->dst, &fl, NULL, 0); + if (err) + goto err_out; + err = NF_HOOK(PF_INET6, NF_INET_LOCAL_OUT, skb, NULL, skb->dev, - mld_dev_queue_xmit); + dst_output); +out: if (!err) { ICMP6MSGOUT_INC_STATS_BH(idev, ICMPV6_MLD2_REPORT); ICMP6_INC_STATS_BH(idev, ICMP6_MIB_OUTMSGS); @@ -1480,6 +1479,11 @@ static void mld_sendpack(struct sk_buff *skb) if (likely(idev != NULL)) in6_dev_put(idev); + return; + +err_out: + kfree_skb(skb); + goto out; } static int grec_size(struct ifmcaddr6 *pmc, int type, int gdel, int sdel) @@ -1761,6 +1765,7 @@ static void igmp6_send(struct in6_addr *addr, struct net_device *dev, int type) u8 ra[8] = { IPPROTO_ICMPV6, 0, IPV6_TLV_ROUTERALERT, 2, 0, 0, IPV6_TLV_PADN, 0 }; + struct flowi fl; rcu_read_lock(); IP6_INC_STATS(__in6_dev_get(dev), @@ -1813,8 +1818,23 @@ static void igmp6_send(struct in6_addr *addr, struct net_device *dev, int type) idev = in6_dev_get(skb->dev); + skb->dst = icmp6_dst_alloc(skb->dev, NULL, &ipv6_hdr(skb)->daddr); + if (!skb->dst) { + err = -ENOMEM; + goto err_out; + } + + icmpv6_flow_init(igmp6_socket->sk, &fl, type, + &ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr, + skb->dev->ifindex); + + err = xfrm_lookup(&skb->dst, &fl, NULL, 0); + if (err) + goto err_out; + err = NF_HOOK(PF_INET6, NF_INET_LOCAL_OUT, skb, NULL, skb->dev, - mld_dev_queue_xmit); + dst_output); +out: if (!err) { ICMP6MSGOUT_INC_STATS(idev, type); ICMP6_INC_STATS(idev, ICMP6_MIB_OUTMSGS); @@ -1825,6 +1845,10 @@ static void igmp6_send(struct in6_addr *addr, struct net_device *dev, int type) if (likely(idev != NULL)) in6_dev_put(idev); return; + +err_out: + kfree_skb(skb); + goto out; } static int ip6_mc_del1_src(struct ifmcaddr6 *pmc, int sfmode, @@ -2597,7 +2621,7 @@ static const struct file_operations igmp6_mcf_seq_fops = { }; #endif -int __init igmp6_init(struct net_proto_family *ops) +int __init igmp6_init(void) { struct ipv6_pinfo *np; struct sock *sk; diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index 0d33a7d3212..b5b4fd173e9 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -441,21 +441,6 @@ static void pndisc_destructor(struct pneigh_entry *n) /* * Send a Neighbour Advertisement */ - -static inline void ndisc_flow_init(struct flowi *fl, u8 type, - struct in6_addr *saddr, struct in6_addr *daddr, - int oif) -{ - memset(fl, 0, sizeof(*fl)); - ipv6_addr_copy(&fl->fl6_src, saddr); - ipv6_addr_copy(&fl->fl6_dst, daddr); - fl->proto = IPPROTO_ICMPV6; - fl->fl_icmp_type = type; - fl->fl_icmp_code = 0; - fl->oif = oif; - security_sk_classify_flow(ndisc_socket->sk, fl); -} - static void __ndisc_send(struct net_device *dev, struct neighbour *neigh, struct in6_addr *daddr, struct in6_addr *saddr, @@ -474,10 +459,10 @@ static void __ndisc_send(struct net_device *dev, type = icmp6h->icmp6_type; - ndisc_flow_init(&fl, type, saddr, daddr, - dev->ifindex); + icmpv6_flow_init(ndisc_socket->sk, &fl, type, + saddr, daddr, dev->ifindex); - dst = ndisc_dst_alloc(dev, neigh, daddr, ip6_output); + dst = icmp6_dst_alloc(dev, neigh, daddr); if (!dst) return; @@ -1439,10 +1424,10 @@ void ndisc_send_redirect(struct sk_buff *skb, struct neighbour *neigh, return; } - ndisc_flow_init(&fl, NDISC_REDIRECT, &saddr_buf, &ipv6_hdr(skb)->saddr, - dev->ifindex); + icmpv6_flow_init(ndisc_socket->sk, &fl, NDISC_REDIRECT, + &saddr_buf, &ipv6_hdr(skb)->saddr, dev->ifindex); - dst = ip6_route_output(NULL, &fl); + dst = ip6_route_output(&init_net, NULL, &fl); if (dst == NULL) return; @@ -1613,6 +1598,7 @@ int ndisc_rcv(struct sk_buff *skb) static int ndisc_netdev_event(struct notifier_block *this, unsigned long event, void *ptr) { struct net_device *dev = ptr; + struct net *net = dev->nd_net; if (dev->nd_net != &init_net) return NOTIFY_DONE; @@ -1620,11 +1606,11 @@ static int ndisc_netdev_event(struct notifier_block *this, unsigned long event, switch (event) { case NETDEV_CHANGEADDR: neigh_changeaddr(&nd_tbl, dev); - fib6_run_gc(~0UL); + fib6_run_gc(~0UL, net); break; case NETDEV_DOWN: neigh_ifdown(&nd_tbl, dev); - fib6_run_gc(~0UL); + fib6_run_gc(~0UL, net); break; default: break; @@ -1733,7 +1719,7 @@ static int ndisc_ifinfo_sysctl_strategy(ctl_table *ctl, int __user *name, #endif -int __init ndisc_init(struct net_proto_family *ops) +int __init ndisc_init(void) { struct ipv6_pinfo *np; struct sock *sk; diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c index 2e06724dc34..aed51bcc66b 100644 --- a/net/ipv6/netfilter.c +++ b/net/ipv6/netfilter.c @@ -23,7 +23,7 @@ int ip6_route_me_harder(struct sk_buff *skb) .saddr = iph->saddr, } }, }; - dst = ip6_route_output(skb->sk, &fl); + dst = ip6_route_output(&init_net, skb->sk, &fl); #ifdef CONFIG_XFRM if (!(IP6CB(skb)->flags & IP6SKB_XFRM_TRANSFORMED) && @@ -86,7 +86,7 @@ static int nf_ip6_reroute(struct sk_buff *skb, static int nf_ip6_route(struct dst_entry **dst, struct flowi *fl) { - *dst = ip6_route_output(NULL, fl); + *dst = ip6_route_output(&init_net, NULL, fl); return (*dst)->error; } diff --git a/net/ipv6/netfilter/ip6t_REJECT.c b/net/ipv6/netfilter/ip6t_REJECT.c index b23baa635fe..831708aeab3 100644 --- a/net/ipv6/netfilter/ip6t_REJECT.c +++ b/net/ipv6/netfilter/ip6t_REJECT.c @@ -93,7 +93,7 @@ static void send_reset(struct sk_buff *oldskb) fl.fl_ip_sport = otcph.dest; fl.fl_ip_dport = otcph.source; security_skb_classify_flow(oldskb, &fl); - dst = ip6_route_output(NULL, &fl); + dst = ip6_route_output(&init_net, NULL, &fl); if (dst == NULL) return; if (dst->error || xfrm_lookup(&dst, &fl, NULL, 0)) diff --git a/net/ipv6/proc.c b/net/ipv6/proc.c index 199ef379e50..2453f2229ef 100644 --- a/net/ipv6/proc.c +++ b/net/ipv6/proc.c @@ -39,8 +39,10 @@ static int sockstat6_seq_show(struct seq_file *seq, void *v) sock_prot_inuse_get(&tcpv6_prot)); seq_printf(seq, "UDP6: inuse %d\n", sock_prot_inuse_get(&udpv6_prot)); +#ifdef CONFIG_IP_UDPLITE seq_printf(seq, "UDPLITE6: inuse %d\n", sock_prot_inuse_get(&udplitev6_prot)); +#endif seq_printf(seq, "RAW6: inuse %d\n", sock_prot_inuse_get(&rawv6_prot)); seq_printf(seq, "FRAG6: inuse %d memory %d\n", @@ -111,6 +113,7 @@ static struct snmp_mib snmp6_udp6_list[] = { SNMP_MIB_SENTINEL }; +#ifdef CONFIG_IP_UDPLITE static struct snmp_mib snmp6_udplite6_list[] = { SNMP_MIB_ITEM("UdpLite6InDatagrams", UDP_MIB_INDATAGRAMS), SNMP_MIB_ITEM("UdpLite6NoPorts", UDP_MIB_NOPORTS), @@ -118,6 +121,7 @@ static struct snmp_mib snmp6_udplite6_list[] = { SNMP_MIB_ITEM("UdpLite6OutDatagrams", UDP_MIB_OUTDATAGRAMS), SNMP_MIB_SENTINEL }; +#endif static void snmp6_seq_show_icmpv6msg(struct seq_file *seq, void **mib) { @@ -176,7 +180,9 @@ static int snmp6_seq_show(struct seq_file *seq, void *v) snmp6_seq_show_item(seq, (void **)icmpv6_statistics, snmp6_icmp6_list); snmp6_seq_show_icmpv6msg(seq, (void **)icmpv6msg_statistics); snmp6_seq_show_item(seq, (void **)udp_stats_in6, snmp6_udp6_list); +#ifdef CONFIG_IP_UDPLITE snmp6_seq_show_item(seq, (void **)udplite_stats_in6, snmp6_udplite6_list); +#endif } return 0; } @@ -214,6 +220,9 @@ int snmp6_register_dev(struct inet6_dev *idev) if (!idev || !idev->dev) return -EINVAL; + if (idev->dev->nd_net != &init_net) + return 0; + if (!proc_net_devsnmp6) return -ENOENT; diff --git a/net/ipv6/route.c b/net/ipv6/route.c index e8b241cb60b..f31d7dc11e7 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -40,6 +40,7 @@ #include <linux/if_arp.h> #include <linux/proc_fs.h> #include <linux/seq_file.h> +#include <linux/nsproxy.h> #include <net/net_namespace.h> #include <net/snmp.h> #include <net/ipv6.h> @@ -87,14 +88,16 @@ static void ip6_link_failure(struct sk_buff *skb); static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu); #ifdef CONFIG_IPV6_ROUTE_INFO -static struct rt6_info *rt6_add_route_info(struct in6_addr *prefix, int prefixlen, +static struct rt6_info *rt6_add_route_info(struct net *net, + struct in6_addr *prefix, int prefixlen, struct in6_addr *gwaddr, int ifindex, unsigned pref); -static struct rt6_info *rt6_get_route_info(struct in6_addr *prefix, int prefixlen, +static struct rt6_info *rt6_get_route_info(struct net *net, + struct in6_addr *prefix, int prefixlen, struct in6_addr *gwaddr, int ifindex); #endif -static struct dst_ops ip6_dst_ops = { +static struct dst_ops ip6_dst_ops_template = { .family = AF_INET6, .protocol = __constant_htons(ETH_P_IPV6), .gc = ip6_dst_gc, @@ -124,7 +127,7 @@ static struct dst_ops ip6_dst_blackhole_ops = { .entries = ATOMIC_INIT(0), }; -struct rt6_info ip6_null_entry = { +static struct rt6_info ip6_null_entry_template = { .u = { .dst = { .__refcnt = ATOMIC_INIT(1), @@ -134,8 +137,6 @@ struct rt6_info ip6_null_entry = { .metrics = { [RTAX_HOPLIMIT - 1] = 255, }, .input = ip6_pkt_discard, .output = ip6_pkt_discard_out, - .ops = &ip6_dst_ops, - .path = (struct dst_entry*)&ip6_null_entry, } }, .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP), @@ -148,7 +149,7 @@ struct rt6_info ip6_null_entry = { static int ip6_pkt_prohibit(struct sk_buff *skb); static int ip6_pkt_prohibit_out(struct sk_buff *skb); -struct rt6_info ip6_prohibit_entry = { +struct rt6_info ip6_prohibit_entry_template = { .u = { .dst = { .__refcnt = ATOMIC_INIT(1), @@ -158,8 +159,6 @@ struct rt6_info ip6_prohibit_entry = { .metrics = { [RTAX_HOPLIMIT - 1] = 255, }, .input = ip6_pkt_prohibit, .output = ip6_pkt_prohibit_out, - .ops = &ip6_dst_ops, - .path = (struct dst_entry*)&ip6_prohibit_entry, } }, .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP), @@ -167,7 +166,7 @@ struct rt6_info ip6_prohibit_entry = { .rt6i_ref = ATOMIC_INIT(1), }; -struct rt6_info ip6_blk_hole_entry = { +static struct rt6_info ip6_blk_hole_entry_template = { .u = { .dst = { .__refcnt = ATOMIC_INIT(1), @@ -177,8 +176,6 @@ struct rt6_info ip6_blk_hole_entry = { .metrics = { [RTAX_HOPLIMIT - 1] = 255, }, .input = dst_discard, .output = dst_discard, - .ops = &ip6_dst_ops, - .path = (struct dst_entry*)&ip6_blk_hole_entry, } }, .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP), @@ -189,9 +186,9 @@ struct rt6_info ip6_blk_hole_entry = { #endif /* allocate dst with ip6_dst_ops */ -static __inline__ struct rt6_info *ip6_dst_alloc(void) +static inline struct rt6_info *ip6_dst_alloc(struct dst_ops *ops) { - return (struct rt6_info *)dst_alloc(&ip6_dst_ops); + return (struct rt6_info *)dst_alloc(ops); } static void ip6_dst_destroy(struct dst_entry *dst) @@ -239,7 +236,8 @@ static inline int rt6_need_strict(struct in6_addr *daddr) * Route lookup. Any table->tb6_lock is implied. */ -static __inline__ struct rt6_info *rt6_device_match(struct rt6_info *rt, +static inline struct rt6_info *rt6_device_match(struct net *net, + struct rt6_info *rt, int oif, int strict) { @@ -268,7 +266,7 @@ static __inline__ struct rt6_info *rt6_device_match(struct rt6_info *rt, return local; if (strict) - return &ip6_null_entry; + return net->ipv6.ip6_null_entry; } return rt; } @@ -409,6 +407,7 @@ static struct rt6_info *find_rr_leaf(struct fib6_node *fn, static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict) { struct rt6_info *match, *rt0; + struct net *net; RT6_TRACE("%s(fn->leaf=%p, oif=%d)\n", __FUNCTION__, fn->leaf, oif); @@ -434,13 +433,15 @@ static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict) RT6_TRACE("%s() => %p\n", __FUNCTION__, match); - return (match ? match : &ip6_null_entry); + net = rt0->rt6i_dev->nd_net; + return (match ? match : net->ipv6.ip6_null_entry); } #ifdef CONFIG_IPV6_ROUTE_INFO int rt6_route_rcv(struct net_device *dev, u8 *opt, int len, struct in6_addr *gwaddr) { + struct net *net = dev->nd_net; struct route_info *rinfo = (struct route_info *) opt; struct in6_addr prefix_buf, *prefix; unsigned int pref; @@ -488,7 +489,8 @@ int rt6_route_rcv(struct net_device *dev, u8 *opt, int len, prefix = &prefix_buf; } - rt = rt6_get_route_info(prefix, rinfo->prefix_len, gwaddr, dev->ifindex); + rt = rt6_get_route_info(net, prefix, rinfo->prefix_len, gwaddr, + dev->ifindex); if (rt && !lifetime) { ip6_del_rt(rt); @@ -496,7 +498,7 @@ int rt6_route_rcv(struct net_device *dev, u8 *opt, int len, } if (!rt && lifetime) - rt = rt6_add_route_info(prefix, rinfo->prefix_len, gwaddr, dev->ifindex, + rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex, pref); else if (rt) rt->rt6i_flags = RTF_ROUTEINFO | @@ -515,9 +517,9 @@ int rt6_route_rcv(struct net_device *dev, u8 *opt, int len, } #endif -#define BACKTRACK(saddr) \ +#define BACKTRACK(__net, saddr) \ do { \ - if (rt == &ip6_null_entry) { \ + if (rt == __net->ipv6.ip6_null_entry) { \ struct fib6_node *pn; \ while (1) { \ if (fn->fn_flags & RTN_TL_ROOT) \ @@ -533,7 +535,8 @@ do { \ } \ } while(0) -static struct rt6_info *ip6_pol_route_lookup(struct fib6_table *table, +static struct rt6_info *ip6_pol_route_lookup(struct net *net, + struct fib6_table *table, struct flowi *fl, int flags) { struct fib6_node *fn; @@ -543,8 +546,8 @@ static struct rt6_info *ip6_pol_route_lookup(struct fib6_table *table, fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src); restart: rt = fn->leaf; - rt = rt6_device_match(rt, fl->oif, flags); - BACKTRACK(&fl->fl6_src); + rt = rt6_device_match(net, rt, fl->oif, flags); + BACKTRACK(net, &fl->fl6_src); out: dst_use(&rt->u.dst, jiffies); read_unlock_bh(&table->tb6_lock); @@ -552,8 +555,8 @@ out: } -struct rt6_info *rt6_lookup(struct in6_addr *daddr, struct in6_addr *saddr, - int oif, int strict) +struct rt6_info *rt6_lookup(struct net *net, struct in6_addr *daddr, + struct in6_addr *saddr, int oif, int strict) { struct flowi fl = { .oif = oif, @@ -571,7 +574,7 @@ struct rt6_info *rt6_lookup(struct in6_addr *daddr, struct in6_addr *saddr, flags |= RT6_LOOKUP_F_HAS_SADDR; } - dst = fib6_rule_lookup(&fl, flags, ip6_pol_route_lookup); + dst = fib6_rule_lookup(net, &fl, flags, ip6_pol_route_lookup); if (dst->error == 0) return (struct rt6_info *) dst; @@ -604,7 +607,7 @@ static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info) int ip6_ins_rt(struct rt6_info *rt) { struct nl_info info = { - .nl_net = &init_net, + .nl_net = rt->rt6i_dev->nd_net, }; return __ip6_ins_rt(rt, &info); } @@ -660,8 +663,8 @@ static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort, struct in6_addr *d return rt; } -static struct rt6_info *ip6_pol_route(struct fib6_table *table, int oif, - struct flowi *fl, int flags) +static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif, + struct flowi *fl, int flags) { struct fib6_node *fn; struct rt6_info *rt, *nrt; @@ -680,8 +683,9 @@ restart_2: restart: rt = rt6_select(fn, oif, strict | reachable); - BACKTRACK(&fl->fl6_src); - if (rt == &ip6_null_entry || + + BACKTRACK(net, &fl->fl6_src); + if (rt == net->ipv6.ip6_null_entry || rt->rt6i_flags & RTF_CACHE) goto out; @@ -699,7 +703,7 @@ restart: } dst_release(&rt->u.dst); - rt = nrt ? : &ip6_null_entry; + rt = nrt ? : net->ipv6.ip6_null_entry; dst_hold(&rt->u.dst); if (nrt) { @@ -732,15 +736,16 @@ out2: return rt; } -static struct rt6_info *ip6_pol_route_input(struct fib6_table *table, +static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table, struct flowi *fl, int flags) { - return ip6_pol_route(table, fl->iif, fl, flags); + return ip6_pol_route(net, table, fl->iif, fl, flags); } void ip6_route_input(struct sk_buff *skb) { struct ipv6hdr *iph = ipv6_hdr(skb); + struct net *net = skb->dev->nd_net; int flags = RT6_LOOKUP_F_HAS_SADDR; struct flowi fl = { .iif = skb->dev->ifindex, @@ -758,16 +763,17 @@ void ip6_route_input(struct sk_buff *skb) if (rt6_need_strict(&iph->daddr)) flags |= RT6_LOOKUP_F_IFACE; - skb->dst = fib6_rule_lookup(&fl, flags, ip6_pol_route_input); + skb->dst = fib6_rule_lookup(net, &fl, flags, ip6_pol_route_input); } -static struct rt6_info *ip6_pol_route_output(struct fib6_table *table, +static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table, struct flowi *fl, int flags) { - return ip6_pol_route(table, fl->oif, fl, flags); + return ip6_pol_route(net, table, fl->oif, fl, flags); } -struct dst_entry * ip6_route_output(struct sock *sk, struct flowi *fl) +struct dst_entry * ip6_route_output(struct net *net, struct sock *sk, + struct flowi *fl) { int flags = 0; @@ -777,7 +783,7 @@ struct dst_entry * ip6_route_output(struct sock *sk, struct flowi *fl) if (!ipv6_addr_any(&fl->fl6_src)) flags |= RT6_LOOKUP_F_HAS_SADDR; - return fib6_rule_lookup(fl, flags, ip6_pol_route_output); + return fib6_rule_lookup(net, fl, flags, ip6_pol_route_output); } EXPORT_SYMBOL(ip6_route_output); @@ -886,12 +892,12 @@ static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu) static int ipv6_get_mtu(struct net_device *dev); -static inline unsigned int ipv6_advmss(unsigned int mtu) +static inline unsigned int ipv6_advmss(struct net *net, unsigned int mtu) { mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr); - if (mtu < init_net.ipv6.sysctl.ip6_rt_min_advmss) - mtu = init_net.ipv6.sysctl.ip6_rt_min_advmss; + if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss) + mtu = net->ipv6.sysctl.ip6_rt_min_advmss; /* * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and @@ -904,21 +910,21 @@ static inline unsigned int ipv6_advmss(unsigned int mtu) return mtu; } -static struct dst_entry *ndisc_dst_gc_list; -static DEFINE_SPINLOCK(ndisc_lock); +static struct dst_entry *icmp6_dst_gc_list; +static DEFINE_SPINLOCK(icmp6_dst_lock); -struct dst_entry *ndisc_dst_alloc(struct net_device *dev, +struct dst_entry *icmp6_dst_alloc(struct net_device *dev, struct neighbour *neigh, - struct in6_addr *addr, - int (*output)(struct sk_buff *)) + struct in6_addr *addr) { struct rt6_info *rt; struct inet6_dev *idev = in6_dev_get(dev); + struct net *net = dev->nd_net; if (unlikely(idev == NULL)) return NULL; - rt = ip6_dst_alloc(); + rt = ip6_dst_alloc(net->ipv6.ip6_dst_ops); if (unlikely(rt == NULL)) { in6_dev_put(idev); goto out; @@ -936,8 +942,8 @@ struct dst_entry *ndisc_dst_alloc(struct net_device *dev, atomic_set(&rt->u.dst.__refcnt, 1); rt->u.dst.metrics[RTAX_HOPLIMIT-1] = 255; rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev); - rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst)); - rt->u.dst.output = output; + rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, dst_mtu(&rt->u.dst)); + rt->u.dst.output = ip6_output; #if 0 /* there's no chance to use these for ndisc */ rt->u.dst.flags = ipv6_addr_type(addr) & IPV6_ADDR_UNICAST @@ -947,18 +953,18 @@ struct dst_entry *ndisc_dst_alloc(struct net_device *dev, rt->rt6i_dst.plen = 128; #endif - spin_lock_bh(&ndisc_lock); - rt->u.dst.next = ndisc_dst_gc_list; - ndisc_dst_gc_list = &rt->u.dst; - spin_unlock_bh(&ndisc_lock); + spin_lock_bh(&icmp6_dst_lock); + rt->u.dst.next = icmp6_dst_gc_list; + icmp6_dst_gc_list = &rt->u.dst; + spin_unlock_bh(&icmp6_dst_lock); - fib6_force_start_gc(); + fib6_force_start_gc(net); out: return &rt->u.dst; } -int ndisc_dst_gc(int *more) +int icmp6_dst_gc(int *more) { struct dst_entry *dst, *next, **pprev; int freed; @@ -966,8 +972,8 @@ int ndisc_dst_gc(int *more) next = NULL; freed = 0; - spin_lock_bh(&ndisc_lock); - pprev = &ndisc_dst_gc_list; + spin_lock_bh(&icmp6_dst_lock); + pprev = &icmp6_dst_gc_list; while ((dst = *pprev) != NULL) { if (!atomic_read(&dst->__refcnt)) { @@ -980,30 +986,33 @@ int ndisc_dst_gc(int *more) } } - spin_unlock_bh(&ndisc_lock); + spin_unlock_bh(&icmp6_dst_lock); return freed; } static int ip6_dst_gc(struct dst_ops *ops) { - static unsigned expire = 30*HZ; - static unsigned long last_gc; unsigned long now = jiffies; - - if (time_after(last_gc + init_net.ipv6.sysctl.ip6_rt_gc_min_interval, now) && - atomic_read(&ip6_dst_ops.entries) <= init_net.ipv6.sysctl.ip6_rt_max_size) + struct net *net = ops->dst_net; + int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval; + int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size; + int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity; + int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout; + unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc; + + if (time_after(rt_last_gc + rt_min_interval, now) && + atomic_read(&ops->entries) <= rt_max_size) goto out; - expire++; - fib6_run_gc(expire); - last_gc = now; - if (atomic_read(&ip6_dst_ops.entries) < ip6_dst_ops.gc_thresh) - expire = init_net.ipv6.sysctl.ip6_rt_gc_timeout>>1; - + net->ipv6.ip6_rt_gc_expire++; + fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net); + net->ipv6.ip6_rt_last_gc = now; + if (atomic_read(&ops->entries) < ops->gc_thresh) + net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1; out: - expire -= expire>>init_net.ipv6.sysctl.ip6_rt_gc_elasticity; - return (atomic_read(&ip6_dst_ops.entries) > init_net.ipv6.sysctl.ip6_rt_max_size); + net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity; + return (atomic_read(&ops->entries) > rt_max_size); } /* Clean host part of a prefix. Not necessary in radix tree, @@ -1045,6 +1054,7 @@ int ipv6_get_hoplimit(struct net_device *dev) int ip6_route_add(struct fib6_config *cfg) { int err; + struct net *net = cfg->fc_nlinfo.nl_net; struct rt6_info *rt = NULL; struct net_device *dev = NULL; struct inet6_dev *idev = NULL; @@ -1059,7 +1069,7 @@ int ip6_route_add(struct fib6_config *cfg) #endif if (cfg->fc_ifindex) { err = -ENODEV; - dev = dev_get_by_index(&init_net, cfg->fc_ifindex); + dev = dev_get_by_index(net, cfg->fc_ifindex); if (!dev) goto out; idev = in6_dev_get(dev); @@ -1070,13 +1080,13 @@ int ip6_route_add(struct fib6_config *cfg) if (cfg->fc_metric == 0) cfg->fc_metric = IP6_RT_PRIO_USER; - table = fib6_new_table(cfg->fc_table); + table = fib6_new_table(net, cfg->fc_table); if (table == NULL) { err = -ENOBUFS; goto out; } - rt = ip6_dst_alloc(); + rt = ip6_dst_alloc(net->ipv6.ip6_dst_ops); if (rt == NULL) { err = -ENOMEM; @@ -1117,12 +1127,12 @@ int ip6_route_add(struct fib6_config *cfg) if ((cfg->fc_flags & RTF_REJECT) || (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK))) { /* hold loopback dev/idev if we haven't done so. */ - if (dev != init_net.loopback_dev) { + if (dev != net->loopback_dev) { if (dev) { dev_put(dev); in6_dev_put(idev); } - dev = init_net.loopback_dev; + dev = net->loopback_dev; dev_hold(dev); idev = in6_dev_get(dev); if (!idev) { @@ -1159,7 +1169,7 @@ int ip6_route_add(struct fib6_config *cfg) if (!(gwa_type&IPV6_ADDR_UNICAST)) goto out; - grt = rt6_lookup(gw_addr, NULL, cfg->fc_ifindex, 1); + grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1); err = -EHOSTUNREACH; if (grt == NULL) @@ -1226,10 +1236,13 @@ install_route: if (!rt->u.dst.metrics[RTAX_MTU-1]) rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(dev); if (!rt->u.dst.metrics[RTAX_ADVMSS-1]) - rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst)); + rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, dst_mtu(&rt->u.dst)); rt->u.dst.dev = dev; rt->rt6i_idev = idev; rt->rt6i_table = table; + + cfg->fc_nlinfo.nl_net = dev->nd_net; + return __ip6_ins_rt(rt, &cfg->fc_nlinfo); out: @@ -1246,8 +1259,9 @@ static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info) { int err; struct fib6_table *table; + struct net *net = rt->rt6i_dev->nd_net; - if (rt == &ip6_null_entry) + if (rt == net->ipv6.ip6_null_entry) return -ENOENT; table = rt->rt6i_table; @@ -1264,7 +1278,7 @@ static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info) int ip6_del_rt(struct rt6_info *rt) { struct nl_info info = { - .nl_net = &init_net, + .nl_net = rt->rt6i_dev->nd_net, }; return __ip6_del_rt(rt, &info); } @@ -1276,7 +1290,7 @@ static int ip6_route_del(struct fib6_config *cfg) struct rt6_info *rt; int err = -ESRCH; - table = fib6_get_table(cfg->fc_table); + table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table); if (table == NULL) return err; @@ -1316,7 +1330,8 @@ struct ip6rd_flowi { struct in6_addr gateway; }; -static struct rt6_info *__ip6_route_redirect(struct fib6_table *table, +static struct rt6_info *__ip6_route_redirect(struct net *net, + struct fib6_table *table, struct flowi *fl, int flags) { @@ -1359,8 +1374,8 @@ restart: } if (!rt) - rt = &ip6_null_entry; - BACKTRACK(&fl->fl6_src); + rt = net->ipv6.ip6_null_entry; + BACKTRACK(net, &fl->fl6_src); out: dst_hold(&rt->u.dst); @@ -1375,6 +1390,7 @@ static struct rt6_info *ip6_route_redirect(struct in6_addr *dest, struct net_device *dev) { int flags = RT6_LOOKUP_F_HAS_SADDR; + struct net *net = dev->nd_net; struct ip6rd_flowi rdfl = { .fl = { .oif = dev->ifindex, @@ -1391,7 +1407,8 @@ static struct rt6_info *ip6_route_redirect(struct in6_addr *dest, if (rt6_need_strict(dest)) flags |= RT6_LOOKUP_F_IFACE; - return (struct rt6_info *)fib6_rule_lookup((struct flowi *)&rdfl, flags, __ip6_route_redirect); + return (struct rt6_info *)fib6_rule_lookup(net, (struct flowi *)&rdfl, + flags, __ip6_route_redirect); } void rt6_redirect(struct in6_addr *dest, struct in6_addr *src, @@ -1400,10 +1417,11 @@ void rt6_redirect(struct in6_addr *dest, struct in6_addr *src, { struct rt6_info *rt, *nrt = NULL; struct netevent_redirect netevent; + struct net *net = neigh->dev->nd_net; rt = ip6_route_redirect(dest, src, saddr, neigh->dev); - if (rt == &ip6_null_entry) { + if (rt == net->ipv6.ip6_null_entry) { if (net_ratelimit()) printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop " "for redirect target\n"); @@ -1448,7 +1466,8 @@ void rt6_redirect(struct in6_addr *dest, struct in6_addr *src, nrt->rt6i_nexthop = neigh_clone(neigh); /* Reset pmtu, it may be better */ nrt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(neigh->dev); - nrt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&nrt->u.dst)); + nrt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(neigh->dev->nd_net, + dst_mtu(&nrt->u.dst)); if (ip6_ins_rt(nrt)) goto out; @@ -1476,9 +1495,10 @@ void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr, struct net_device *dev, u32 pmtu) { struct rt6_info *rt, *nrt; + struct net *net = dev->nd_net; int allfrag = 0; - rt = rt6_lookup(daddr, saddr, dev->ifindex, 0); + rt = rt6_lookup(net, daddr, saddr, dev->ifindex, 0); if (rt == NULL) return; @@ -1511,7 +1531,7 @@ void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr, rt->u.dst.metrics[RTAX_MTU-1] = pmtu; if (allfrag) rt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG; - dst_set_expires(&rt->u.dst, init_net.ipv6.sysctl.ip6_rt_mtu_expires); + dst_set_expires(&rt->u.dst, net->ipv6.sysctl.ip6_rt_mtu_expires); rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES; goto out; } @@ -1537,7 +1557,7 @@ void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr, * which is 10 mins. After 10 mins the decreased pmtu is expired * and detecting PMTU increase will be automatically happened. */ - dst_set_expires(&nrt->u.dst, init_net.ipv6.sysctl.ip6_rt_mtu_expires); + dst_set_expires(&nrt->u.dst, net->ipv6.sysctl.ip6_rt_mtu_expires); nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES; ip6_ins_rt(nrt); @@ -1552,7 +1572,8 @@ out: static struct rt6_info * ip6_rt_copy(struct rt6_info *ort) { - struct rt6_info *rt = ip6_dst_alloc(); + struct net *net = ort->rt6i_dev->nd_net; + struct rt6_info *rt = ip6_dst_alloc(net->ipv6.ip6_dst_ops); if (rt) { rt->u.dst.input = ort->u.dst.input; @@ -1583,14 +1604,15 @@ static struct rt6_info * ip6_rt_copy(struct rt6_info *ort) } #ifdef CONFIG_IPV6_ROUTE_INFO -static struct rt6_info *rt6_get_route_info(struct in6_addr *prefix, int prefixlen, +static struct rt6_info *rt6_get_route_info(struct net *net, + struct in6_addr *prefix, int prefixlen, struct in6_addr *gwaddr, int ifindex) { struct fib6_node *fn; struct rt6_info *rt = NULL; struct fib6_table *table; - table = fib6_get_table(RT6_TABLE_INFO); + table = fib6_get_table(net, RT6_TABLE_INFO); if (table == NULL) return NULL; @@ -1614,7 +1636,8 @@ out: return rt; } -static struct rt6_info *rt6_add_route_info(struct in6_addr *prefix, int prefixlen, +static struct rt6_info *rt6_add_route_info(struct net *net, + struct in6_addr *prefix, int prefixlen, struct in6_addr *gwaddr, int ifindex, unsigned pref) { @@ -1625,6 +1648,9 @@ static struct rt6_info *rt6_add_route_info(struct in6_addr *prefix, int prefixle .fc_dst_len = prefixlen, .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO | RTF_UP | RTF_PREF(pref), + .fc_nlinfo.pid = 0, + .fc_nlinfo.nlh = NULL, + .fc_nlinfo.nl_net = net, }; ipv6_addr_copy(&cfg.fc_dst, prefix); @@ -1636,7 +1662,7 @@ static struct rt6_info *rt6_add_route_info(struct in6_addr *prefix, int prefixle ip6_route_add(&cfg); - return rt6_get_route_info(prefix, prefixlen, gwaddr, ifindex); + return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex); } #endif @@ -1645,7 +1671,7 @@ struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct net_device *d struct rt6_info *rt; struct fib6_table *table; - table = fib6_get_table(RT6_TABLE_DFLT); + table = fib6_get_table(dev->nd_net, RT6_TABLE_DFLT); if (table == NULL) return NULL; @@ -1674,6 +1700,9 @@ struct rt6_info *rt6_add_dflt_router(struct in6_addr *gwaddr, .fc_ifindex = dev->ifindex, .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT | RTF_UP | RTF_EXPIRES | RTF_PREF(pref), + .fc_nlinfo.pid = 0, + .fc_nlinfo.nlh = NULL, + .fc_nlinfo.nl_net = dev->nd_net, }; ipv6_addr_copy(&cfg.fc_gateway, gwaddr); @@ -1683,13 +1712,13 @@ struct rt6_info *rt6_add_dflt_router(struct in6_addr *gwaddr, return rt6_get_dflt_router(gwaddr, dev); } -void rt6_purge_dflt_routers(void) +void rt6_purge_dflt_routers(struct net *net) { struct rt6_info *rt; struct fib6_table *table; /* NOTE: Keep consistent with rt6_get_dflt_router */ - table = fib6_get_table(RT6_TABLE_DFLT); + table = fib6_get_table(net, RT6_TABLE_DFLT); if (table == NULL) return; @@ -1706,7 +1735,8 @@ restart: read_unlock_bh(&table->tb6_lock); } -static void rtmsg_to_fib6_config(struct in6_rtmsg *rtmsg, +static void rtmsg_to_fib6_config(struct net *net, + struct in6_rtmsg *rtmsg, struct fib6_config *cfg) { memset(cfg, 0, sizeof(*cfg)); @@ -1719,14 +1749,14 @@ static void rtmsg_to_fib6_config(struct in6_rtmsg *rtmsg, cfg->fc_src_len = rtmsg->rtmsg_src_len; cfg->fc_flags = rtmsg->rtmsg_flags; - cfg->fc_nlinfo.nl_net = &init_net; + cfg->fc_nlinfo.nl_net = net; ipv6_addr_copy(&cfg->fc_dst, &rtmsg->rtmsg_dst); ipv6_addr_copy(&cfg->fc_src, &rtmsg->rtmsg_src); ipv6_addr_copy(&cfg->fc_gateway, &rtmsg->rtmsg_gateway); } -int ipv6_route_ioctl(unsigned int cmd, void __user *arg) +int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg) { struct fib6_config cfg; struct in6_rtmsg rtmsg; @@ -1742,7 +1772,7 @@ int ipv6_route_ioctl(unsigned int cmd, void __user *arg) if (err) return -EFAULT; - rtmsg_to_fib6_config(&rtmsg, &cfg); + rtmsg_to_fib6_config(net, &rtmsg, &cfg); rtnl_lock(); switch (cmd) { @@ -1821,21 +1851,22 @@ struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev, const struct in6_addr *addr, int anycast) { - struct rt6_info *rt = ip6_dst_alloc(); + struct net *net = idev->dev->nd_net; + struct rt6_info *rt = ip6_dst_alloc(net->ipv6.ip6_dst_ops); if (rt == NULL) return ERR_PTR(-ENOMEM); - dev_hold(init_net.loopback_dev); + dev_hold(net->loopback_dev); in6_dev_hold(idev); rt->u.dst.flags = DST_HOST; rt->u.dst.input = ip6_input; rt->u.dst.output = ip6_output; - rt->rt6i_dev = init_net.loopback_dev; + rt->rt6i_dev = net->loopback_dev; rt->rt6i_idev = idev; rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev); - rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst)); + rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, dst_mtu(&rt->u.dst)); rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1; rt->u.dst.obsolete = -1; @@ -1852,26 +1883,39 @@ struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev, ipv6_addr_copy(&rt->rt6i_dst.addr, addr); rt->rt6i_dst.plen = 128; - rt->rt6i_table = fib6_get_table(RT6_TABLE_LOCAL); + rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL); atomic_set(&rt->u.dst.__refcnt, 1); return rt; } +struct arg_dev_net { + struct net_device *dev; + struct net *net; +}; + static int fib6_ifdown(struct rt6_info *rt, void *arg) { - if (((void*)rt->rt6i_dev == arg || arg == NULL) && - rt != &ip6_null_entry) { + struct net_device *dev = ((struct arg_dev_net *)arg)->dev; + struct net *net = ((struct arg_dev_net *)arg)->net; + + if (((void *)rt->rt6i_dev == dev || dev == NULL) && + rt != net->ipv6.ip6_null_entry) { RT6_TRACE("deleted by ifdown %p\n", rt); return -1; } return 0; } -void rt6_ifdown(struct net_device *dev) +void rt6_ifdown(struct net *net, struct net_device *dev) { - fib6_clean_all(fib6_ifdown, 0, dev); + struct arg_dev_net adn = { + .dev = dev, + .net = net, + }; + + fib6_clean_all(net, fib6_ifdown, 0, &adn); } struct rt6_mtu_change_arg @@ -1884,6 +1928,7 @@ static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg) { struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg; struct inet6_dev *idev; + struct net *net = arg->dev->nd_net; /* In IPv6 pmtu discovery is not optional, so that RTAX_MTU lock cannot disable it. @@ -1915,7 +1960,7 @@ static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg) (dst_mtu(&rt->u.dst) < arg->mtu && dst_mtu(&rt->u.dst) == idev->cnf.mtu6))) { rt->u.dst.metrics[RTAX_MTU-1] = arg->mtu; - rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(arg->mtu); + rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, arg->mtu); } return 0; } @@ -1927,7 +1972,7 @@ void rt6_mtu_change(struct net_device *dev, unsigned mtu) .mtu = mtu, }; - fib6_clean_all(rt6_mtu_change_route, 0, &arg); + fib6_clean_all(dev->nd_net, rt6_mtu_change_route, 0, &arg); } static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = { @@ -2010,13 +2055,9 @@ errout: static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) { - struct net *net = skb->sk->sk_net; struct fib6_config cfg; int err; - if (net != &init_net) - return -EINVAL; - err = rtm_to_fib6_config(skb, nlh, &cfg); if (err < 0) return err; @@ -2026,13 +2067,9 @@ static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *a static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) { - struct net *net = skb->sk->sk_net; struct fib6_config cfg; int err; - if (net != &init_net) - return -EINVAL; - err = rtm_to_fib6_config(skb, nlh, &cfg); if (err < 0) return err; @@ -2122,7 +2159,8 @@ static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt, NLA_PUT_U32(skb, RTA_IIF, iif); else if (dst) { struct in6_addr saddr_buf; - if (ipv6_get_saddr(&rt->u.dst, dst, &saddr_buf) == 0) + if (ipv6_dev_get_saddr(ip6_dst_idev(&rt->u.dst)->dev, + dst, &saddr_buf) == 0) NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf); } @@ -2175,9 +2213,6 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void struct flowi fl; int err, iif = 0; - if (net != &init_net) - return -EINVAL; - err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy); if (err < 0) goto errout; @@ -2207,7 +2242,7 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void if (iif) { struct net_device *dev; - dev = __dev_get_by_index(&init_net, iif); + dev = __dev_get_by_index(net, iif); if (!dev) { err = -ENODEV; goto errout; @@ -2226,7 +2261,7 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void skb_reset_mac_header(skb); skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr)); - rt = (struct rt6_info*) ip6_route_output(NULL, &fl); + rt = (struct rt6_info*) ip6_route_output(&init_net, NULL, &fl); skb->dst = &rt->u.dst; err = rt6_fill_node(skb, rt, &fl.fl6_dst, &fl.fl6_src, iif, @@ -2237,7 +2272,7 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void goto errout; } - err = rtnl_unicast(skb, &init_net, NETLINK_CB(in_skb).pid); + err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).pid); errout: return err; } @@ -2245,6 +2280,7 @@ errout: void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info) { struct sk_buff *skb; + struct net *net = info->nl_net; u32 seq; int err; @@ -2263,11 +2299,31 @@ void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info) kfree_skb(skb); goto errout; } - err = rtnl_notify(skb, &init_net, info->pid, - RTNLGRP_IPV6_ROUTE, info->nlh, gfp_any()); + err = rtnl_notify(skb, net, info->pid, RTNLGRP_IPV6_ROUTE, + info->nlh, gfp_any()); errout: if (err < 0) - rtnl_set_sk_err(&init_net, RTNLGRP_IPV6_ROUTE, err); + rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err); +} + +static int ip6_route_dev_notify(struct notifier_block *this, + unsigned long event, void *data) +{ + struct net_device *dev = (struct net_device *)data; + struct net *net = dev->nd_net; + + if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) { + net->ipv6.ip6_null_entry->u.dst.dev = dev; + net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev); +#ifdef CONFIG_IPV6_MULTIPLE_TABLES + net->ipv6.ip6_prohibit_entry->u.dst.dev = dev; + net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev); + net->ipv6.ip6_blk_hole_entry->u.dst.dev = dev; + net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev); +#endif + } + + return NOTIFY_OK; } /* @@ -2316,13 +2372,25 @@ static int rt6_info_route(struct rt6_info *rt, void *p_arg) static int ipv6_route_show(struct seq_file *m, void *v) { - fib6_clean_all(rt6_info_route, 0, m); + struct net *net = (struct net *)m->private; + fib6_clean_all(net, rt6_info_route, 0, m); return 0; } static int ipv6_route_open(struct inode *inode, struct file *file) { - return single_open(file, ipv6_route_show, NULL); + struct net *net = get_proc_net(inode); + if (!net) + return -ENXIO; + return single_open(file, ipv6_route_show, net); +} + +static int ipv6_route_release(struct inode *inode, struct file *file) +{ + struct seq_file *seq = file->private_data; + struct net *net = seq->private; + put_net(net); + return single_release(inode, file); } static const struct file_operations ipv6_route_proc_fops = { @@ -2330,24 +2398,36 @@ static const struct file_operations ipv6_route_proc_fops = { .open = ipv6_route_open, .read = seq_read, .llseek = seq_lseek, - .release = single_release, + .release = ipv6_route_release, }; static int rt6_stats_seq_show(struct seq_file *seq, void *v) { + struct net *net = (struct net *)seq->private; seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n", - rt6_stats.fib_nodes, rt6_stats.fib_route_nodes, - rt6_stats.fib_rt_alloc, rt6_stats.fib_rt_entries, - rt6_stats.fib_rt_cache, - atomic_read(&ip6_dst_ops.entries), - rt6_stats.fib_discarded_routes); + net->ipv6.rt6_stats->fib_nodes, + net->ipv6.rt6_stats->fib_route_nodes, + net->ipv6.rt6_stats->fib_rt_alloc, + net->ipv6.rt6_stats->fib_rt_entries, + net->ipv6.rt6_stats->fib_rt_cache, + atomic_read(&net->ipv6.ip6_dst_ops->entries), + net->ipv6.rt6_stats->fib_discarded_routes); return 0; } static int rt6_stats_seq_open(struct inode *inode, struct file *file) { - return single_open(file, rt6_stats_seq_show, NULL); + struct net *net = get_proc_net(inode); + return single_open(file, rt6_stats_seq_show, net); +} + +static int rt6_stats_seq_release(struct inode *inode, struct file *file) +{ + struct seq_file *seq = file->private_data; + struct net *net = (struct net *)seq->private; + put_net(net); + return single_release(inode, file); } static const struct file_operations rt6_stats_seq_fops = { @@ -2355,42 +2435,8 @@ static const struct file_operations rt6_stats_seq_fops = { .open = rt6_stats_seq_open, .read = seq_read, .llseek = seq_lseek, - .release = single_release, + .release = rt6_stats_seq_release, }; - -static int ipv6_route_proc_init(struct net *net) -{ - int ret = -ENOMEM; - if (!proc_net_fops_create(net, "ipv6_route", - 0, &ipv6_route_proc_fops)) - goto out; - - if (!proc_net_fops_create(net, "rt6_stats", - S_IRUGO, &rt6_stats_seq_fops)) - goto out_ipv6_route; - - ret = 0; -out: - return ret; -out_ipv6_route: - proc_net_remove(net, "ipv6_route"); - goto out; -} - -static void ipv6_route_proc_fini(struct net *net) -{ - proc_net_remove(net, "ipv6_route"); - proc_net_remove(net, "rt6_stats"); -} -#else -static inline int ipv6_route_proc_init(struct net *net) -{ - return 0; -} -static inline void ipv6_route_proc_fini(struct net *net) -{ - return ; -} #endif /* CONFIG_PROC_FS */ #ifdef CONFIG_SYSCTL @@ -2399,10 +2445,11 @@ static int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write, struct file * filp, void __user *buffer, size_t *lenp, loff_t *ppos) { - int delay = init_net.ipv6.sysctl.flush_delay; + struct net *net = current->nsproxy->net_ns; + int delay = net->ipv6.sysctl.flush_delay; if (write) { proc_dointvec(ctl, write, filp, buffer, lenp, ppos); - fib6_run_gc(delay <= 0 ? ~0UL : (unsigned long)delay); + fib6_run_gc(delay <= 0 ? ~0UL : (unsigned long)delay, net); return 0; } else return -EINVAL; @@ -2419,7 +2466,7 @@ ctl_table ipv6_route_table_template[] = { { .ctl_name = NET_IPV6_ROUTE_GC_THRESH, .procname = "gc_thresh", - .data = &ip6_dst_ops.gc_thresh, + .data = &ip6_dst_ops_template.gc_thresh, .maxlen = sizeof(int), .mode = 0644, .proc_handler = &proc_dointvec, @@ -2505,33 +2552,141 @@ struct ctl_table *ipv6_route_sysctl_init(struct net *net) table = kmemdup(ipv6_route_table_template, sizeof(ipv6_route_table_template), GFP_KERNEL); + + if (table) { + table[0].data = &net->ipv6.sysctl.flush_delay; + table[1].data = &net->ipv6.ip6_dst_ops->gc_thresh; + table[2].data = &net->ipv6.sysctl.ip6_rt_max_size; + table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval; + table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout; + table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval; + table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity; + table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires; + table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss; + } + return table; } #endif +static int ip6_route_net_init(struct net *net) +{ + int ret = 0; + + ret = -ENOMEM; + net->ipv6.ip6_dst_ops = kmemdup(&ip6_dst_ops_template, + sizeof(*net->ipv6.ip6_dst_ops), + GFP_KERNEL); + if (!net->ipv6.ip6_dst_ops) + goto out; + net->ipv6.ip6_dst_ops->dst_net = net; + + net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template, + sizeof(*net->ipv6.ip6_null_entry), + GFP_KERNEL); + if (!net->ipv6.ip6_null_entry) + goto out_ip6_dst_ops; + net->ipv6.ip6_null_entry->u.dst.path = + (struct dst_entry *)net->ipv6.ip6_null_entry; + net->ipv6.ip6_null_entry->u.dst.ops = net->ipv6.ip6_dst_ops; + +#ifdef CONFIG_IPV6_MULTIPLE_TABLES + net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template, + sizeof(*net->ipv6.ip6_prohibit_entry), + GFP_KERNEL); + if (!net->ipv6.ip6_prohibit_entry) { + kfree(net->ipv6.ip6_null_entry); + goto out; + } + net->ipv6.ip6_prohibit_entry->u.dst.path = + (struct dst_entry *)net->ipv6.ip6_prohibit_entry; + net->ipv6.ip6_prohibit_entry->u.dst.ops = net->ipv6.ip6_dst_ops; + + net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template, + sizeof(*net->ipv6.ip6_blk_hole_entry), + GFP_KERNEL); + if (!net->ipv6.ip6_blk_hole_entry) { + kfree(net->ipv6.ip6_null_entry); + kfree(net->ipv6.ip6_prohibit_entry); + goto out; + } + net->ipv6.ip6_blk_hole_entry->u.dst.path = + (struct dst_entry *)net->ipv6.ip6_blk_hole_entry; + net->ipv6.ip6_blk_hole_entry->u.dst.ops = net->ipv6.ip6_dst_ops; +#endif + +#ifdef CONFIG_PROC_FS + proc_net_fops_create(net, "ipv6_route", 0, &ipv6_route_proc_fops); + proc_net_fops_create(net, "rt6_stats", S_IRUGO, &rt6_stats_seq_fops); +#endif + net->ipv6.ip6_rt_gc_expire = 30*HZ; + + ret = 0; +out: + return ret; + +out_ip6_dst_ops: + kfree(net->ipv6.ip6_dst_ops); + goto out; +} + +static void ip6_route_net_exit(struct net *net) +{ +#ifdef CONFIG_PROC_FS + proc_net_remove(net, "ipv6_route"); + proc_net_remove(net, "rt6_stats"); +#endif + kfree(net->ipv6.ip6_null_entry); +#ifdef CONFIG_IPV6_MULTIPLE_TABLES + kfree(net->ipv6.ip6_prohibit_entry); + kfree(net->ipv6.ip6_blk_hole_entry); +#endif + kfree(net->ipv6.ip6_dst_ops); +} + +static struct pernet_operations ip6_route_net_ops = { + .init = ip6_route_net_init, + .exit = ip6_route_net_exit, +}; + +static struct notifier_block ip6_route_dev_notifier = { + .notifier_call = ip6_route_dev_notify, + .priority = 0, +}; + int __init ip6_route_init(void) { int ret; - ip6_dst_ops.kmem_cachep = + ret = -ENOMEM; + ip6_dst_ops_template.kmem_cachep = kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0, SLAB_HWCACHE_ALIGN, NULL); - if (!ip6_dst_ops.kmem_cachep) - return -ENOMEM; + if (!ip6_dst_ops_template.kmem_cachep) + goto out;; - ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops.kmem_cachep; - - ret = fib6_init(); + ret = register_pernet_subsys(&ip6_route_net_ops); if (ret) goto out_kmem_cache; - ret = ipv6_route_proc_init(&init_net); + /* Registering of the loopback is done before this portion of code, + * the loopback reference in rt6_info will not be taken, do it + * manually for init_net */ + init_net.ipv6.ip6_null_entry->u.dst.dev = init_net.loopback_dev; + init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev); + #ifdef CONFIG_IPV6_MULTIPLE_TABLES + init_net.ipv6.ip6_prohibit_entry->u.dst.dev = init_net.loopback_dev; + init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev); + init_net.ipv6.ip6_blk_hole_entry->u.dst.dev = init_net.loopback_dev; + init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev); + #endif + ret = fib6_init(); if (ret) - goto out_fib6_init; + goto out_register_subsys; ret = xfrm6_init(); if (ret) - goto out_proc_init; + goto out_fib6_init; ret = fib6_rules_init(); if (ret) @@ -2543,7 +2698,10 @@ int __init ip6_route_init(void) __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL)) goto fib6_rules_init; - ret = 0; + ret = register_netdevice_notifier(&ip6_route_dev_notifier); + if (ret) + goto fib6_rules_init; + out: return ret; @@ -2551,22 +2709,21 @@ fib6_rules_init: fib6_rules_cleanup(); xfrm6_init: xfrm6_fini(); -out_proc_init: - ipv6_route_proc_fini(&init_net); out_fib6_init: - rt6_ifdown(NULL); fib6_gc_cleanup(); +out_register_subsys: + unregister_pernet_subsys(&ip6_route_net_ops); out_kmem_cache: - kmem_cache_destroy(ip6_dst_ops.kmem_cachep); + kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep); goto out; } void ip6_route_cleanup(void) { + unregister_netdevice_notifier(&ip6_route_dev_notifier); fib6_rules_cleanup(); - ipv6_route_proc_fini(&init_net); xfrm6_fini(); - rt6_ifdown(NULL); fib6_gc_cleanup(); - kmem_cache_destroy(ip6_dst_ops.kmem_cachep); + unregister_pernet_subsys(&ip6_route_net_ops); + kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep); } diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index 1656c003b98..1b8196c8d14 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -339,11 +339,11 @@ out: skb_reset_network_header(skb2); /* Try to guess incoming interface */ - rt6i = rt6_lookup(&iph6->saddr, NULL, NULL, 0); + rt6i = rt6_lookup(&init_net, &iph6->saddr, NULL, NULL, 0); if (rt6i && rt6i->rt6i_dev) { skb2->dev = rt6i->rt6i_dev; - rt6i = rt6_lookup(&iph6->daddr, &iph6->saddr, NULL, 0); + rt6i = rt6_lookup(&init_net, &iph6->daddr, &iph6->saddr, NULL, 0); if (rt6i && rt6i->rt6i_dev && rt6i->rt6i_dev->type == ARPHRD_SIT) { struct ip_tunnel *t = netdev_priv(rt6i->rt6i_dev); @@ -393,7 +393,7 @@ isatap_srcok(struct sk_buff *skb, struct iphdr *iph, struct net_device *dev) fl.oif = dev->ifindex; security_skb_classify_flow(skb, &fl); - dst = ip6_route_output(NULL, &fl); + dst = ip6_route_output(&init_net, NULL, &fl); if (!dst->error && (dst->dev == dev) && (neigh = dst->neighbour)) { addr6 = (struct in6_addr*)&neigh->primary_key; diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c new file mode 100644 index 00000000000..827c5aa7524 --- /dev/null +++ b/net/ipv6/syncookies.c @@ -0,0 +1,267 @@ +/* + * IPv6 Syncookies implementation for the Linux kernel + * + * Authors: + * Glenn Griffin <ggriffin.kernel@gmail.com> + * + * Based on IPv4 implementation by Andi Kleen + * linux/net/ipv4/syncookies.c + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + */ + +#include <linux/tcp.h> +#include <linux/random.h> +#include <linux/cryptohash.h> +#include <linux/kernel.h> +#include <net/ipv6.h> +#include <net/tcp.h> + +extern int sysctl_tcp_syncookies; +extern __u32 syncookie_secret[2][16-3+SHA_DIGEST_WORDS]; + +#define COOKIEBITS 24 /* Upper bits store count */ +#define COOKIEMASK (((__u32)1 << COOKIEBITS) - 1) + +/* + * This table has to be sorted and terminated with (__u16)-1. + * XXX generate a better table. + * Unresolved Issues: HIPPI with a 64k MSS is not well supported. + * + * Taken directly from ipv4 implementation. + * Should this list be modified for ipv6 use or is it close enough? + * rfc 2460 8.3 suggests mss values 20 bytes less than ipv4 counterpart + */ +static __u16 const msstab[] = { + 64 - 1, + 256 - 1, + 512 - 1, + 536 - 1, + 1024 - 1, + 1440 - 1, + 1460 - 1, + 4312 - 1, + (__u16)-1 +}; +/* The number doesn't include the -1 terminator */ +#define NUM_MSS (ARRAY_SIZE(msstab) - 1) + +/* + * This (misnamed) value is the age of syncookie which is permitted. + * Its ideal value should be dependent on TCP_TIMEOUT_INIT and + * sysctl_tcp_retries1. It's a rather complicated formula (exponential + * backoff) to compute at runtime so it's currently hardcoded here. + */ +#define COUNTER_TRIES 4 + +static inline struct sock *get_cookie_sock(struct sock *sk, struct sk_buff *skb, + struct request_sock *req, + struct dst_entry *dst) +{ + struct inet_connection_sock *icsk = inet_csk(sk); + struct sock *child; + + child = icsk->icsk_af_ops->syn_recv_sock(sk, skb, req, dst); + if (child) + inet_csk_reqsk_queue_add(sk, req, child); + else + reqsk_free(req); + + return child; +} + +static DEFINE_PER_CPU(__u32, cookie_scratch)[16 + 5 + SHA_WORKSPACE_WORDS]; + +static u32 cookie_hash(struct in6_addr *saddr, struct in6_addr *daddr, + __be16 sport, __be16 dport, u32 count, int c) +{ + __u32 *tmp = __get_cpu_var(cookie_scratch); + + /* + * we have 320 bits of information to hash, copy in the remaining + * 192 bits required for sha_transform, from the syncookie_secret + * and overwrite the digest with the secret + */ + memcpy(tmp + 10, syncookie_secret[c], 44); + memcpy(tmp, saddr, 16); + memcpy(tmp + 4, daddr, 16); + tmp[8] = ((__force u32)sport << 16) + (__force u32)dport; + tmp[9] = count; + sha_transform(tmp + 16, (__u8 *)tmp, tmp + 16 + 5); + + return tmp[17]; +} + +static __u32 secure_tcp_syn_cookie(struct in6_addr *saddr, struct in6_addr *daddr, + __be16 sport, __be16 dport, __u32 sseq, + __u32 count, __u32 data) +{ + return (cookie_hash(saddr, daddr, sport, dport, 0, 0) + + sseq + (count << COOKIEBITS) + + ((cookie_hash(saddr, daddr, sport, dport, count, 1) + data) + & COOKIEMASK)); +} + +static __u32 check_tcp_syn_cookie(__u32 cookie, struct in6_addr *saddr, + struct in6_addr *daddr, __be16 sport, + __be16 dport, __u32 sseq, __u32 count, + __u32 maxdiff) +{ + __u32 diff; + + cookie -= cookie_hash(saddr, daddr, sport, dport, 0, 0) + sseq; + + diff = (count - (cookie >> COOKIEBITS)) & ((__u32) -1 >> COOKIEBITS); + if (diff >= maxdiff) + return (__u32)-1; + + return (cookie - + cookie_hash(saddr, daddr, sport, dport, count - diff, 1)) + & COOKIEMASK; +} + +__u32 cookie_v6_init_sequence(struct sock *sk, struct sk_buff *skb, __u16 *mssp) +{ + struct ipv6hdr *iph = ipv6_hdr(skb); + const struct tcphdr *th = tcp_hdr(skb); + int mssind; + const __u16 mss = *mssp; + + tcp_sk(sk)->last_synq_overflow = jiffies; + + for (mssind = 0; mss > msstab[mssind + 1]; mssind++) + ; + *mssp = msstab[mssind] + 1; + + NET_INC_STATS_BH(LINUX_MIB_SYNCOOKIESSENT); + + return secure_tcp_syn_cookie(&iph->saddr, &iph->daddr, th->source, + th->dest, ntohl(th->seq), + jiffies / (HZ * 60), mssind); +} + +static inline int cookie_check(struct sk_buff *skb, __u32 cookie) +{ + struct ipv6hdr *iph = ipv6_hdr(skb); + const struct tcphdr *th = tcp_hdr(skb); + __u32 seq = ntohl(th->seq) - 1; + __u32 mssind = check_tcp_syn_cookie(cookie, &iph->saddr, &iph->daddr, + th->source, th->dest, seq, + jiffies / (HZ * 60), COUNTER_TRIES); + + return mssind < NUM_MSS ? msstab[mssind] + 1 : 0; +} + +struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) +{ + struct inet_request_sock *ireq; + struct inet6_request_sock *ireq6; + struct tcp_request_sock *treq; + struct ipv6_pinfo *np = inet6_sk(sk); + struct tcp_sock *tp = tcp_sk(sk); + const struct tcphdr *th = tcp_hdr(skb); + __u32 cookie = ntohl(th->ack_seq) - 1; + struct sock *ret = sk; + struct request_sock *req; + int mss; + struct dst_entry *dst; + __u8 rcv_wscale; + + if (!sysctl_tcp_syncookies || !th->ack) + goto out; + + if (time_after(jiffies, tp->last_synq_overflow + TCP_TIMEOUT_INIT) || + (mss = cookie_check(skb, cookie)) == 0) { + NET_INC_STATS_BH(LINUX_MIB_SYNCOOKIESFAILED); + goto out; + } + + NET_INC_STATS_BH(LINUX_MIB_SYNCOOKIESRECV); + + ret = NULL; + req = inet6_reqsk_alloc(&tcp6_request_sock_ops); + if (!req) + goto out; + + ireq = inet_rsk(req); + ireq6 = inet6_rsk(req); + treq = tcp_rsk(req); + ireq6->pktopts = NULL; + + if (security_inet_conn_request(sk, skb, req)) { + reqsk_free(req); + goto out; + } + + req->mss = mss; + ireq->rmt_port = th->source; + ipv6_addr_copy(&ireq6->rmt_addr, &ipv6_hdr(skb)->saddr); + ipv6_addr_copy(&ireq6->loc_addr, &ipv6_hdr(skb)->daddr); + if (ipv6_opt_accepted(sk, skb) || + np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo || + np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim) { + atomic_inc(&skb->users); + ireq6->pktopts = skb; + } + + ireq6->iif = sk->sk_bound_dev_if; + /* So that link locals have meaning */ + if (!sk->sk_bound_dev_if && + ipv6_addr_type(&ireq6->rmt_addr) & IPV6_ADDR_LINKLOCAL) + ireq6->iif = inet6_iif(skb); + + req->expires = 0UL; + req->retrans = 0; + ireq->snd_wscale = ireq->rcv_wscale = ireq->tstamp_ok = 0; + ireq->wscale_ok = ireq->sack_ok = 0; + treq->rcv_isn = ntohl(th->seq) - 1; + treq->snt_isn = cookie; + + /* + * We need to lookup the dst_entry to get the correct window size. + * This is taken from tcp_v6_syn_recv_sock. Somebody please enlighten + * me if there is a preferred way. + */ + { + struct in6_addr *final_p = NULL, final; + struct flowi fl; + memset(&fl, 0, sizeof(fl)); + fl.proto = IPPROTO_TCP; + ipv6_addr_copy(&fl.fl6_dst, &ireq6->rmt_addr); + if (np->opt && np->opt->srcrt) { + struct rt0_hdr *rt0 = (struct rt0_hdr *) np->opt->srcrt; + ipv6_addr_copy(&final, &fl.fl6_dst); + ipv6_addr_copy(&fl.fl6_dst, rt0->addr); + final_p = &final; + } + ipv6_addr_copy(&fl.fl6_src, &ireq6->loc_addr); + fl.oif = sk->sk_bound_dev_if; + fl.fl_ip_dport = inet_rsk(req)->rmt_port; + fl.fl_ip_sport = inet_sk(sk)->sport; + security_req_classify_flow(req, &fl); + if (ip6_dst_lookup(sk, &dst, &fl)) { + reqsk_free(req); + goto out; + } + if (final_p) + ipv6_addr_copy(&fl.fl6_dst, final_p); + if ((xfrm_lookup(&dst, &fl, sk, 0)) < 0) + goto out; + } + + req->window_clamp = dst_metric(dst, RTAX_WINDOW); + tcp_select_initial_window(tcp_full_space(sk), req->mss, + &req->rcv_wnd, &req->window_clamp, + 0, &rcv_wscale); + + ireq->rcv_wscale = rcv_wscale; + + ret = get_cookie_sock(sk, skb, req, dst); + +out: return ret; +} + diff --git a/net/ipv6/sysctl_net_ipv6.c b/net/ipv6/sysctl_net_ipv6.c index d6d3e68086f..3804dcbbfab 100644 --- a/net/ipv6/sysctl_net_ipv6.c +++ b/net/ipv6/sysctl_net_ipv6.c @@ -71,24 +71,11 @@ static int ipv6_sysctl_net_init(struct net *net) ipv6_route_table = ipv6_route_sysctl_init(net); if (!ipv6_route_table) goto out_ipv6_table; + ipv6_table[0].child = ipv6_route_table; ipv6_icmp_table = ipv6_icmp_sysctl_init(net); if (!ipv6_icmp_table) goto out_ipv6_route_table; - - ipv6_route_table[0].data = &net->ipv6.sysctl.flush_delay; - /* ipv6_route_table[1].data will be handled when we have - routes per namespace */ - ipv6_route_table[2].data = &net->ipv6.sysctl.ip6_rt_max_size; - ipv6_route_table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval; - ipv6_route_table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout; - ipv6_route_table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval; - ipv6_route_table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity; - ipv6_route_table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires; - ipv6_route_table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss; - ipv6_table[0].child = ipv6_route_table; - - ipv6_icmp_table[0].data = &net->ipv6.sysctl.icmpv6_time; ipv6_table[1].child = ipv6_icmp_table; ipv6_table[2].data = &net->ipv6.sysctl.bindv6only; diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 12750f2b05a..aacbb7688bf 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -455,8 +455,7 @@ out: } -static int tcp_v6_send_synack(struct sock *sk, struct request_sock *req, - struct dst_entry *dst) +static int tcp_v6_send_synack(struct sock *sk, struct request_sock *req) { struct inet6_request_sock *treq = inet6_rsk(req); struct ipv6_pinfo *np = inet6_sk(sk); @@ -464,6 +463,7 @@ static int tcp_v6_send_synack(struct sock *sk, struct request_sock *req, struct ipv6_txoptions *opt = NULL; struct in6_addr * final_p = NULL, final; struct flowi fl; + struct dst_entry *dst; int err = -1; memset(&fl, 0, sizeof(fl)); @@ -476,24 +476,22 @@ static int tcp_v6_send_synack(struct sock *sk, struct request_sock *req, fl.fl_ip_sport = inet_sk(sk)->sport; security_req_classify_flow(req, &fl); - if (dst == NULL) { - opt = np->opt; - if (opt && opt->srcrt) { - struct rt0_hdr *rt0 = (struct rt0_hdr *) opt->srcrt; - ipv6_addr_copy(&final, &fl.fl6_dst); - ipv6_addr_copy(&fl.fl6_dst, rt0->addr); - final_p = &final; - } - - err = ip6_dst_lookup(sk, &dst, &fl); - if (err) - goto done; - if (final_p) - ipv6_addr_copy(&fl.fl6_dst, final_p); - if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) - goto done; + opt = np->opt; + if (opt && opt->srcrt) { + struct rt0_hdr *rt0 = (struct rt0_hdr *) opt->srcrt; + ipv6_addr_copy(&final, &fl.fl6_dst); + ipv6_addr_copy(&fl.fl6_dst, rt0->addr); + final_p = &final; } + err = ip6_dst_lookup(sk, &dst, &fl); + if (err) + goto done; + if (final_p) + ipv6_addr_copy(&fl.fl6_dst, final_p); + if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) + goto done; + skb = tcp_make_synack(sk, dst, req); if (skb) { struct tcphdr *th = tcp_hdr(skb); @@ -514,6 +512,20 @@ done: return err; } +static inline void syn_flood_warning(struct sk_buff *skb) +{ +#ifdef CONFIG_SYN_COOKIES + if (sysctl_tcp_syncookies) + printk(KERN_INFO + "TCPv6: Possible SYN flooding on port %d. " + "Sending cookies.\n", ntohs(tcp_hdr(skb)->dest)); + else +#endif + printk(KERN_INFO + "TCPv6: Possible SYN flooding on port %d. " + "Dropping request.\n", ntohs(tcp_hdr(skb)->dest)); +} + static void tcp_v6_reqsk_destructor(struct request_sock *req) { if (inet6_rsk(req)->pktopts) @@ -917,7 +929,7 @@ done_opts: } #endif -static struct request_sock_ops tcp6_request_sock_ops __read_mostly = { +struct request_sock_ops tcp6_request_sock_ops __read_mostly = { .family = AF_INET6, .obj_size = sizeof(struct tcp6_request_sock), .rtx_syn_ack = tcp_v6_send_synack, @@ -1059,8 +1071,11 @@ static void tcp_v6_send_reset(struct sock *sk, struct sk_buff *skb) fl.fl_ip_sport = t1->source; security_skb_classify_flow(skb, &fl); - /* sk = NULL, but it is safe for now. RST socket required. */ - if (!ip6_dst_lookup(NULL, &buff->dst, &fl)) { + /* Pass a socket to ip6_dst_lookup either it is for RST + * Underlying function will use this to retrieve the network + * namespace + */ + if (!ip6_dst_lookup(tcp6_socket->sk, &buff->dst, &fl)) { if (xfrm_lookup(&buff->dst, &fl, NULL, 0) >= 0) { ip6_xmit(tcp6_socket->sk, buff, &fl, NULL, 0); @@ -1160,7 +1175,7 @@ static void tcp_v6_send_ack(struct tcp_timewait_sock *tw, fl.fl_ip_sport = t1->source; security_skb_classify_flow(skb, &fl); - if (!ip6_dst_lookup(NULL, &buff->dst, &fl)) { + if (!ip6_dst_lookup(tcp6_socket->sk, &buff->dst, &fl)) { if (xfrm_lookup(&buff->dst, &fl, NULL, 0) >= 0) { ip6_xmit(tcp6_socket->sk, buff, &fl, NULL, 0); TCP_INC_STATS_BH(TCP_MIB_OUTSEGS); @@ -1215,9 +1230,9 @@ static struct sock *tcp_v6_hnd_req(struct sock *sk,struct sk_buff *skb) return NULL; } -#if 0 /*def CONFIG_SYN_COOKIES*/ +#ifdef CONFIG_SYN_COOKIES if (!th->rst && !th->syn && th->ack) - sk = cookie_v6_check(sk, skb, &(IPCB(skb)->opt)); + sk = cookie_v6_check(sk, skb); #endif return sk; } @@ -1233,6 +1248,11 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb) struct tcp_sock *tp = tcp_sk(sk); struct request_sock *req = NULL; __u32 isn = TCP_SKB_CB(skb)->when; +#ifdef CONFIG_SYN_COOKIES + int want_cookie = 0; +#else +#define want_cookie 0 +#endif if (skb->protocol == htons(ETH_P_IP)) return tcp_v4_conn_request(sk, skb); @@ -1240,12 +1260,14 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb) if (!ipv6_unicast_destination(skb)) goto drop; - /* - * There are no SYN attacks on IPv6, yet... - */ if (inet_csk_reqsk_queue_is_full(sk) && !isn) { if (net_ratelimit()) - printk(KERN_INFO "TCPv6: dropping request, synflood is possible\n"); + syn_flood_warning(skb); +#ifdef CONFIG_SYN_COOKIES + if (sysctl_tcp_syncookies) + want_cookie = 1; + else +#endif goto drop; } @@ -1266,39 +1288,51 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb) tcp_parse_options(skb, &tmp_opt, 0); + if (want_cookie) { + tcp_clear_options(&tmp_opt); + tmp_opt.saw_tstamp = 0; + } + tmp_opt.tstamp_ok = tmp_opt.saw_tstamp; tcp_openreq_init(req, &tmp_opt, skb); treq = inet6_rsk(req); ipv6_addr_copy(&treq->rmt_addr, &ipv6_hdr(skb)->saddr); ipv6_addr_copy(&treq->loc_addr, &ipv6_hdr(skb)->daddr); - TCP_ECN_create_request(req, tcp_hdr(skb)); treq->pktopts = NULL; - if (ipv6_opt_accepted(sk, skb) || - np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo || - np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim) { - atomic_inc(&skb->users); - treq->pktopts = skb; - } - treq->iif = sk->sk_bound_dev_if; + if (!want_cookie) + TCP_ECN_create_request(req, tcp_hdr(skb)); + + if (want_cookie) { + isn = cookie_v6_init_sequence(sk, skb, &req->mss); + } else if (!isn) { + if (ipv6_opt_accepted(sk, skb) || + np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo || + np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim) { + atomic_inc(&skb->users); + treq->pktopts = skb; + } + treq->iif = sk->sk_bound_dev_if; - /* So that link locals have meaning */ - if (!sk->sk_bound_dev_if && - ipv6_addr_type(&treq->rmt_addr) & IPV6_ADDR_LINKLOCAL) - treq->iif = inet6_iif(skb); + /* So that link locals have meaning */ + if (!sk->sk_bound_dev_if && + ipv6_addr_type(&treq->rmt_addr) & IPV6_ADDR_LINKLOCAL) + treq->iif = inet6_iif(skb); - if (isn == 0) isn = tcp_v6_init_sequence(skb); + } tcp_rsk(req)->snt_isn = isn; security_inet_conn_request(sk, skb, req); - if (tcp_v6_send_synack(sk, req, NULL)) + if (tcp_v6_send_synack(sk, req)) goto drop; - inet6_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT); - return 0; + if (!want_cookie) { + inet6_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT); + return 0; + } drop: if (req) diff --git a/net/ipv6/udp.c b/net/ipv6/udp_ipv6.c index 53739de829d..55feac7ba71 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp_ipv6.c @@ -400,7 +400,7 @@ static inline int udp6_csum_init(struct sk_buff *skb, struct udphdr *uh, UDP_SKB_CB(skb)->partial_cov = 0; UDP_SKB_CB(skb)->cscov = skb->len; - if (proto == IPPROTO_UDPLITE) { + if (IS_PROTO_UDPLITE(proto)) { err = udplite_checksum_init(skb, uh); if (err) return err; @@ -489,7 +489,7 @@ int __udp6_lib_rcv(struct sk_buff *skb, struct hlist_head udptable[], if (udp_lib_checksum_complete(skb)) goto discard; - UDP6_INC_STATS_BH(UDP_MIB_NOPORTS, proto == IPPROTO_UDPLITE); + UDP6_INC_STATS_BH(UDP_MIB_NOPORTS, IS_PROTO_UDPLITE(proto)); icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_PORT_UNREACH, 0, dev); @@ -510,11 +510,11 @@ int __udp6_lib_rcv(struct sk_buff *skb, struct hlist_head udptable[], short_packet: LIMIT_NETDEBUG(KERN_DEBUG "UDP%sv6: short packet: %d/%u\n", - proto == IPPROTO_UDPLITE ? "-Lite" : "", + IS_PROTO_UDPLITE(proto) ? "-Lite" : "", ulen, skb->len); discard: - UDP6_INC_STATS_BH(UDP_MIB_INERRORS, proto == IPPROTO_UDPLITE); + UDP6_INC_STATS_BH(UDP_MIB_INERRORS, IS_PROTO_UDPLITE(proto)); kfree_skb(skb); return 0; } @@ -890,7 +890,7 @@ int udpv6_destroy_sock(struct sock *sk) int udpv6_setsockopt(struct sock *sk, int level, int optname, char __user *optval, int optlen) { - if (level == SOL_UDP || level == SOL_UDPLITE) + if (IS_SOL_UDPFAMILY(level)) return udp_lib_setsockopt(sk, level, optname, optval, optlen, udp_v6_push_pending_frames); return ipv6_setsockopt(sk, level, optname, optval, optlen); @@ -900,7 +900,7 @@ int udpv6_setsockopt(struct sock *sk, int level, int optname, int compat_udpv6_setsockopt(struct sock *sk, int level, int optname, char __user *optval, int optlen) { - if (level == SOL_UDP || level == SOL_UDPLITE) + if (IS_SOL_UDPFAMILY(level)) return udp_lib_setsockopt(sk, level, optname, optval, optlen, udp_v6_push_pending_frames); return compat_ipv6_setsockopt(sk, level, optname, optval, optlen); @@ -910,7 +910,7 @@ int compat_udpv6_setsockopt(struct sock *sk, int level, int optname, int udpv6_getsockopt(struct sock *sk, int level, int optname, char __user *optval, int __user *optlen) { - if (level == SOL_UDP || level == SOL_UDPLITE) + if (IS_SOL_UDPFAMILY(level)) return udp_lib_getsockopt(sk, level, optname, optval, optlen); return ipv6_getsockopt(sk, level, optname, optval, optlen); } @@ -919,7 +919,7 @@ int udpv6_getsockopt(struct sock *sk, int level, int optname, int compat_udpv6_getsockopt(struct sock *sk, int level, int optname, char __user *optval, int __user *optlen) { - if (level == SOL_UDP || level == SOL_UDPLITE) + if (IS_SOL_UDPFAMILY(level)) return udp_lib_getsockopt(sk, level, optname, optval, optlen); return compat_ipv6_getsockopt(sk, level, optname, optval, optlen); } diff --git a/net/ipv6/udplite.c b/net/ipv6/udplite_ipv6.c index 87d4202522e..87d4202522e 100644 --- a/net/ipv6/udplite.c +++ b/net/ipv6/udplite_ipv6.c diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c index 7d20199ee1f..e96dafdc703 100644 --- a/net/ipv6/xfrm6_policy.c +++ b/net/ipv6/xfrm6_policy.c @@ -38,7 +38,7 @@ static struct dst_entry *xfrm6_dst_lookup(int tos, xfrm_address_t *saddr, if (saddr) memcpy(&fl.fl6_src, saddr, sizeof(fl.fl6_src)); - dst = ip6_route_output(NULL, &fl); + dst = ip6_route_output(&init_net, NULL, &fl); err = dst->error; if (dst->error) { @@ -57,8 +57,9 @@ static int xfrm6_get_saddr(xfrm_address_t *saddr, xfrm_address_t *daddr) if (IS_ERR(dst)) return -EHOSTUNREACH; - ipv6_get_saddr(dst, (struct in6_addr *)&daddr->a6, - (struct in6_addr *)&saddr->a6); + ipv6_dev_get_saddr(ip6_dst_idev(dst)->dev, + (struct in6_addr *)&daddr->a6, + (struct in6_addr *)&saddr->a6); dst_release(dst); return 0; } |