From f945fa7ad9c12a3356a3de7fb2143ccc2f2c3bca Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Tue, 22 Jan 2008 22:39:26 -0800 Subject: [INET]: Fix truesize setting in ip_append_data As it is ip_append_data only counts page fragments to the skb that allocated it. As such it means that the first skb gets hit with a 4K charge even though it might have only used a fraction of it while all subsequent skb's that use the same page gets away with no charge at all. This bug was exposed by the UDP accounting patch. [ The wmem_alloc bumping needs to be moved with the truesize, noticed by Takahiro Yasui. -DaveM ] Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- net/ipv6/ip6_output.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 6338a9c1aa1..3bef30e4a23 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -1316,8 +1316,6 @@ alloc_new_skb: skb_fill_page_desc(skb, i, page, 0, 0); frag = &skb_shinfo(skb)->frags[i]; - skb->truesize += PAGE_SIZE; - atomic_add(PAGE_SIZE, &sk->sk_wmem_alloc); } else { err = -EMSGSIZE; goto error; @@ -1330,6 +1328,8 @@ alloc_new_skb: frag->size += copy; skb->len += copy; skb->data_len += copy; + skb->truesize += copy; + atomic_add(copy, &sk->sk_wmem_alloc); } offset += copy; length -= copy; -- cgit v1.2.3 From a9e527e3f9f4510e9f3450ca3bc51bc3ef2854fd Mon Sep 17 00:00:00 2001 From: Rolf Manderscheid Date: Mon, 10 Dec 2007 13:38:41 -0700 Subject: IPoIB: improve IPv4/IPv6 to IB mcast mapping functions An IPoIB subnet on an IB fabric that spans multiple IB subnets can't use link-local scope in multicast GIDs. The existing routines that map IP/IPv6 multicast addresses into IB link-level addresses hard-code the scope to link-local, and they also leave the partition key field uninitialised. This patch adds a parameter (the link-level broadcast address) to the mapping routines, allowing them to initialise both the scope and the P_Key appropriately, and fixes up the call sites. The next step will be to add a way to configure the scope for an IPoIB interface. Signed-off-by: Rolf Manderscheid Signed-off-by: Roland Dreier --- net/ipv6/ndisc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/ipv6') diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index 777ed733b2d..85947eae5bf 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -337,7 +337,7 @@ int ndisc_mc_map(struct in6_addr *addr, char *buf, struct net_device *dev, int d ipv6_arcnet_mc_map(addr, buf); return 0; case ARPHRD_INFINIBAND: - ipv6_ib_mc_map(addr, buf); + ipv6_ib_mc_map(addr, dev->broadcast, buf); return 0; default: if (dir) { -- cgit v1.2.3 From a0974dd3da87667e26ef5d3b32989a43319866f2 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 6 Nov 2007 23:31:58 -0800 Subject: [TCP] splice: add tcp_splice_read() to IPV6 Thanks to YOSHIFUJI Hideaki for the hint! Signed-off-by: Jens Axboe Signed-off-by: David S. Miller --- net/ipv6/af_inet6.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net/ipv6') diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index ecbd38894fd..85178f71b21 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -491,6 +491,7 @@ const struct proto_ops inet6_stream_ops = { .recvmsg = sock_common_recvmsg, /* ok */ .mmap = sock_no_mmap, .sendpage = tcp_sendpage, + .splice_read = tcp_splice_read, #ifdef CONFIG_COMPAT .compat_setsockopt = compat_sock_common_setsockopt, .compat_getsockopt = compat_sock_common_getsockopt, -- cgit v1.2.3 From a92aa318b4b369091fd80433c80e62838db8bc1c Mon Sep 17 00:00:00 2001 From: Wang Chen Date: Tue, 13 Nov 2007 20:31:14 -0800 Subject: [IPV6]: Add raw6 drops counter. Add raw drops counter for IPv6 in /proc/net/raw6 . Signed-off-by: Wang Chen Signed-off-by: David S. Miller --- net/ipv6/raw.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index 807260d0358..ae314f3fea4 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -354,14 +354,14 @@ static inline int rawv6_rcv_skb(struct sock * sk, struct sk_buff * skb) { if ((raw6_sk(sk)->checksum || sk->sk_filter) && skb_checksum_complete(skb)) { - /* FIXME: increment a raw6 drops counter here */ + atomic_inc(&sk->sk_drops); kfree_skb(skb); return 0; } /* Charge it to the socket. */ if (sock_queue_rcv_skb(sk,skb)<0) { - /* FIXME: increment a raw6 drops counter here */ + atomic_inc(&sk->sk_drops); kfree_skb(skb); return 0; } @@ -382,6 +382,7 @@ int rawv6_rcv(struct sock *sk, struct sk_buff *skb) struct raw6_sock *rp = raw6_sk(sk); if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb)) { + atomic_inc(&sk->sk_drops); kfree_skb(skb); return NET_RX_DROP; } @@ -405,7 +406,7 @@ int rawv6_rcv(struct sock *sk, struct sk_buff *skb) if (inet->hdrincl) { if (skb_checksum_complete(skb)) { - /* FIXME: increment a raw6 drops counter here */ + atomic_inc(&sk->sk_drops); kfree_skb(skb); return 0; } @@ -496,7 +497,7 @@ csum_copy_err: as some normal condition. */ err = (flags&MSG_DONTWAIT) ? -EAGAIN : -EHOSTUNREACH; - /* FIXME: increment a raw6 drops counter here */ + atomic_inc(&sk->sk_drops); goto out; } @@ -1254,7 +1255,7 @@ static void raw6_sock_seq_show(struct seq_file *seq, struct sock *sp, int i) srcp = inet_sk(sp)->num; seq_printf(seq, "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X " - "%02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %p\n", + "%02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %p %d\n", i, src->s6_addr32[0], src->s6_addr32[1], src->s6_addr32[2], src->s6_addr32[3], srcp, @@ -1266,7 +1267,7 @@ static void raw6_sock_seq_show(struct seq_file *seq, struct sock *sp, int i) 0, 0L, 0, sock_i_uid(sp), 0, sock_i_ino(sp), - atomic_read(&sp->sk_refcnt), sp); + atomic_read(&sp->sk_refcnt), sp, atomic_read(&sp->sk_drops)); } static int raw6_seq_show(struct seq_file *seq, void *v) @@ -1277,7 +1278,7 @@ static int raw6_seq_show(struct seq_file *seq, void *v) "local_address " "remote_address " "st tx_queue rx_queue tr tm->when retrnsmt" - " uid timeout inode\n"); + " uid timeout inode drops\n"); else raw6_sock_seq_show(seq, v, raw6_seq_private(seq)->bucket); return 0; -- cgit v1.2.3 From b24b8a247ff65c01b252025926fe564209fae4fc Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Wed, 23 Jan 2008 21:20:07 -0800 Subject: [NET]: Convert init_timer into setup_timer Many-many code in the kernel initialized the timer->function and timer->data together with calling init_timer(timer). There is already a helper for this. Use it for networking code. The patch is HUGE, but makes the code 130 lines shorter (98 insertions(+), 228 deletions(-)). Signed-off-by: Pavel Emelyanov Acked-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 4 +--- net/ipv6/mcast.c | 14 +++++--------- 2 files changed, 6 insertions(+), 12 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index e8c347579da..c0720e4659b 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -366,9 +366,7 @@ static struct inet6_dev * ipv6_add_dev(struct net_device *dev) in6_dev_hold(ndev); #ifdef CONFIG_IPV6_PRIVACY - init_timer(&ndev->regen_timer); - ndev->regen_timer.function = ipv6_regen_rndid; - ndev->regen_timer.data = (unsigned long) ndev; + setup_timer(&ndev->regen_timer, ipv6_regen_rndid, (unsigned long)ndev); if ((dev->flags&IFF_LOOPBACK) || dev->type == ARPHRD_TUNNEL || #if defined(CONFIG_IPV6_SIT) || defined(CONFIG_IPV6_SIT_MODULE) diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index 331d728c203..17d7318ff7b 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c @@ -903,9 +903,7 @@ int ipv6_dev_mc_inc(struct net_device *dev, struct in6_addr *addr) return -ENOMEM; } - init_timer(&mc->mca_timer); - mc->mca_timer.function = igmp6_timer_handler; - mc->mca_timer.data = (unsigned long) mc; + setup_timer(&mc->mca_timer, igmp6_timer_handler, (unsigned long)mc); ipv6_addr_copy(&mc->mca_addr, addr); mc->idev = idev; @@ -2259,14 +2257,12 @@ void ipv6_mc_init_dev(struct inet6_dev *idev) write_lock_bh(&idev->lock); rwlock_init(&idev->mc_lock); idev->mc_gq_running = 0; - init_timer(&idev->mc_gq_timer); - idev->mc_gq_timer.data = (unsigned long) idev; - idev->mc_gq_timer.function = &mld_gq_timer_expire; + setup_timer(&idev->mc_gq_timer, mld_gq_timer_expire, + (unsigned long)idev); idev->mc_tomb = NULL; idev->mc_ifc_count = 0; - init_timer(&idev->mc_ifc_timer); - idev->mc_ifc_timer.data = (unsigned long) idev; - idev->mc_ifc_timer.function = &mld_ifc_timer_expire; + setup_timer(&idev->mc_ifc_timer, mld_ifc_timer_expire, + (unsigned long)idev); idev->mc_qrv = MLD_QRV_DEFAULT; idev->mc_maxdelay = IGMP6_UNSOLICITED_IVAL; idev->mc_v1_seen = 0; -- cgit v1.2.3 From 0148894223740da4818d7f4e6f92cbb5481a25b8 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Tue, 13 Nov 2007 21:32:26 -0800 Subject: [IPV6]: Only set nfheader_len for top xfrm dst We only need to set nfheader_len in the top xfrm dst. This is because we only ever read the nfheader_len from the top xfrm dst. It is also easier to count nfheader_len as part of header_len which then lets us remove the ugly wrapper functions for incrementing and decrementing header lengths in xfrm6_policy.c. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- net/ipv6/ip6_output.c | 3 ++- net/ipv6/xfrm6_policy.c | 26 ++++---------------------- 2 files changed, 6 insertions(+), 23 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 3bef30e4a23..150615758fd 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -1098,7 +1098,8 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to, inet->cork.length = 0; sk->sk_sndmsg_page = NULL; sk->sk_sndmsg_off = 0; - exthdrlen = rt->u.dst.header_len + (opt ? opt->opt_flen : 0); + exthdrlen = rt->u.dst.header_len + (opt ? opt->opt_flen : 0) - + rt->u.dst.nfheader_len; length += exthdrlen; transhdrlen += exthdrlen; } else { diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c index b8e9eb445d7..3cad3e8dc4d 100644 --- a/net/ipv6/xfrm6_policy.c +++ b/net/ipv6/xfrm6_policy.c @@ -102,24 +102,6 @@ __xfrm6_bundle_addr_local(struct xfrm_state *x, struct in6_addr *addr) (struct in6_addr*)&x->props.saddr; } -static inline void -__xfrm6_bundle_len_inc(int *len, int *nflen, struct xfrm_state *x) -{ - if (x->type->flags & XFRM_TYPE_NON_FRAGMENT) - *nflen += x->props.header_len; - else - *len += x->props.header_len; -} - -static inline void -__xfrm6_bundle_len_dec(int *len, int *nflen, struct xfrm_state *x) -{ - if (x->type->flags & XFRM_TYPE_NON_FRAGMENT) - *nflen -= x->props.header_len; - else - *len -= x->props.header_len; -} - /* Allocate chain of dst_entry's, attach known xfrm's, calculate * all the metrics... Shortly, bundle a bundle. */ @@ -142,7 +124,6 @@ __xfrm6_bundle_create(struct xfrm_policy *policy, struct xfrm_state **xfrm, int int i; int err = 0; int header_len = 0; - int nfheader_len = 0; int trailer_len = 0; dst = dst_prev = NULL; @@ -175,7 +156,9 @@ __xfrm6_bundle_create(struct xfrm_policy *policy, struct xfrm_state **xfrm, int dst1->next = dst_prev; dst_prev = dst1; - __xfrm6_bundle_len_inc(&header_len, &nfheader_len, xfrm[i]); + if (xfrm[i]->type->flags & XFRM_TYPE_NON_FRAGMENT) + dst->nfheader_len += xfrm[i]->props.header_len; + header_len += xfrm[i]->props.header_len; trailer_len += xfrm[i]->props.trailer_len; if (xfrm[i]->props.mode != XFRM_MODE_TRANSPORT) { @@ -223,7 +206,6 @@ __xfrm6_bundle_create(struct xfrm_policy *policy, struct xfrm_state **xfrm, int dst_prev->flags |= DST_HOST; dst_prev->lastuse = jiffies; dst_prev->header_len = header_len; - dst_prev->nfheader_len = nfheader_len; dst_prev->trailer_len = trailer_len; memcpy(&dst_prev->metrics, &x->route->metrics, sizeof(dst_prev->metrics)); @@ -242,7 +224,7 @@ __xfrm6_bundle_create(struct xfrm_policy *policy, struct xfrm_state **xfrm, int x->u.rt6.rt6i_src = rt0->rt6i_src; x->u.rt6.rt6i_idev = rt0->rt6i_idev; in6_dev_hold(rt0->rt6i_idev); - __xfrm6_bundle_len_dec(&header_len, &nfheader_len, x->u.dst.xfrm); + header_len -= x->u.dst.xfrm->props.header_len; trailer_len -= x->u.dst.xfrm->props.trailer_len; } -- cgit v1.2.3 From b4ce92775c2e7ff9cf79cca4e0a19c8c5fd6287b Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Tue, 13 Nov 2007 21:33:32 -0800 Subject: [IPV6]: Move nfheader_len into rt6_info The dst member nfheader_len is only used by IPv6. It's also currently creating a rather ugly alignment hole in struct dst. Therefore this patch moves it from there into struct rt6_info. It also reorders the fields in rt6_info to minimize holes. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- net/ipv6/ip6_output.c | 5 +++-- net/ipv6/xfrm6_policy.c | 3 ++- 2 files changed, 5 insertions(+), 3 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 150615758fd..387030d2483 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -1099,7 +1099,7 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to, sk->sk_sndmsg_page = NULL; sk->sk_sndmsg_off = 0; exthdrlen = rt->u.dst.header_len + (opt ? opt->opt_flen : 0) - - rt->u.dst.nfheader_len; + rt->nfheader_len; length += exthdrlen; transhdrlen += exthdrlen; } else { @@ -1114,7 +1114,8 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to, hh_len = LL_RESERVED_SPACE(rt->u.dst.dev); - fragheaderlen = sizeof(struct ipv6hdr) + rt->u.dst.nfheader_len + (opt ? opt->opt_nflen : 0); + fragheaderlen = sizeof(struct ipv6hdr) + rt->nfheader_len + + (opt ? opt->opt_nflen : 0); maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen - sizeof(struct frag_hdr); if (mtu <= sizeof(struct ipv6hdr) + IPV6_MAXPLEN) { diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c index 3cad3e8dc4d..5b02f0efd38 100644 --- a/net/ipv6/xfrm6_policy.c +++ b/net/ipv6/xfrm6_policy.c @@ -157,7 +157,8 @@ __xfrm6_bundle_create(struct xfrm_policy *policy, struct xfrm_state **xfrm, int dst_prev = dst1; if (xfrm[i]->type->flags & XFRM_TYPE_NON_FRAGMENT) - dst->nfheader_len += xfrm[i]->props.header_len; + ((struct rt6_info *)dst)->nfheader_len += + xfrm[i]->props.header_len; header_len += xfrm[i]->props.header_len; trailer_len += xfrm[i]->props.trailer_len; -- cgit v1.2.3 From 352e512c32b634768303a43768245a0363cebbe7 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Tue, 13 Nov 2007 21:34:06 -0800 Subject: [NET]: Eliminate duplicate copies of dst_discard We have a number of copies of dst_discard scattered around the place which all do the same thing, namely free a packet on the input or output paths. This patch deletes all of them except dst_discard and points all the users to it. The only non-trivial bit is decnet where it returns an error. However, conceptually this is identical to the blackhole functions used in IPv4 and IPv6 which do not return errors. So they should either all return errors or all return zero. For now I've stuck with the majority and picked zero as the return value. It doesn't really matter in practice since few if any driver would react differently depending on a zero return value or NET_RX_DROP. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- net/ipv6/exthdrs.c | 13 ++----------- net/ipv6/route.c | 21 ++++----------------- 2 files changed, 6 insertions(+), 28 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c index 1e89efd38a0..cee06b1655c 100644 --- a/net/ipv6/exthdrs.c +++ b/net/ipv6/exthdrs.c @@ -32,6 +32,7 @@ #include #include +#include #include #include @@ -318,18 +319,8 @@ void __init ipv6_destopt_init(void) printk(KERN_ERR "ipv6_destopt_init: Could not register protocol\n"); } -/******************************** - NONE header. No data in packet. - ********************************/ - -static int ipv6_nodata_rcv(struct sk_buff *skb) -{ - kfree_skb(skb); - return 0; -} - static struct inet6_protocol nodata_protocol = { - .handler = ipv6_nodata_rcv, + .handler = dst_discard, .flags = INET6_PROTO_NOPOLICY, }; diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 20083e0d399..ac70e2d3b10 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -152,7 +152,6 @@ struct rt6_info ip6_null_entry = { static int ip6_pkt_prohibit(struct sk_buff *skb); static int ip6_pkt_prohibit_out(struct sk_buff *skb); -static int ip6_pkt_blk_hole(struct sk_buff *skb); struct rt6_info ip6_prohibit_entry = { .u = { @@ -181,8 +180,8 @@ struct rt6_info ip6_blk_hole_entry = { .obsolete = -1, .error = -EINVAL, .metrics = { [RTAX_HOPLIMIT - 1] = 255, }, - .input = ip6_pkt_blk_hole, - .output = ip6_pkt_blk_hole, + .input = dst_discard, + .output = dst_discard, .ops = &ip6_dst_ops, .path = (struct dst_entry*)&ip6_blk_hole_entry, } @@ -782,12 +781,6 @@ struct dst_entry * ip6_route_output(struct sock *sk, struct flowi *fl) EXPORT_SYMBOL(ip6_route_output); -static int ip6_blackhole_output(struct sk_buff *skb) -{ - kfree_skb(skb); - return 0; -} - int ip6_dst_blackhole(struct sock *sk, struct dst_entry **dstp, struct flowi *fl) { struct rt6_info *ort = (struct rt6_info *) *dstp; @@ -800,8 +793,8 @@ int ip6_dst_blackhole(struct sock *sk, struct dst_entry **dstp, struct flowi *fl atomic_set(&new->__refcnt, 1); new->__use = 1; - new->input = ip6_blackhole_output; - new->output = ip6_blackhole_output; + new->input = dst_discard; + new->output = dst_discard; memcpy(new->metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32)); new->dev = ort->u.dst.dev; @@ -1811,12 +1804,6 @@ static int ip6_pkt_prohibit_out(struct sk_buff *skb) return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES); } -static int ip6_pkt_blk_hole(struct sk_buff *skb) -{ - kfree_skb(skb); - return 0; -} - #endif /* -- cgit v1.2.3 From 8ce68ceb55fb62d2c8e9a3e94c4ef6ff3e3064ce Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Tue, 13 Nov 2007 21:35:01 -0800 Subject: [IPSEC]: Only set neighbour on top xfrm dst The neighbour field is only used by dst_confirm which only ever happens on the top-most xfrm dst. So it's a waste to duplicate for every other xfrm dst. This patch moves its setting out of the loop so that only the top one gets set. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- net/ipv6/xfrm6_policy.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c index 5b02f0efd38..bc508d0a87d 100644 --- a/net/ipv6/xfrm6_policy.c +++ b/net/ipv6/xfrm6_policy.c @@ -188,6 +188,10 @@ __xfrm6_bundle_create(struct xfrm_policy *policy, struct xfrm_state **xfrm, int dst_prev->child = &rt->u.dst; dst->path = &rt->u.dst; + + /* Copy neighbour for reachability confirmation */ + dst->neighbour = neigh_clone(rt->u.dst.neighbour); + if (rt->rt6i_node) ((struct xfrm_dst *)dst)->path_cookie = rt->rt6i_node->fn_sernum; @@ -210,8 +214,6 @@ __xfrm6_bundle_create(struct xfrm_policy *policy, struct xfrm_state **xfrm, int dst_prev->trailer_len = trailer_len; memcpy(&dst_prev->metrics, &x->route->metrics, sizeof(dst_prev->metrics)); - /* Copy neighbour for reachability confirmation */ - dst_prev->neighbour = neigh_clone(rt->u.dst.neighbour); dst_prev->input = rt->u.dst.input; dst_prev->output = dst_prev->xfrm->outer_mode->afinfo->output; /* Sheit... I remember I did this right. Apparently, -- cgit v1.2.3 From 45ff5a3f9a3d0b1b4f063b5285ab39b7fac59471 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Tue, 13 Nov 2007 21:35:32 -0800 Subject: [IPSEC]: Set dst->input to dst_discard The input function should never be invoked on IPsec dst objects. This is because we don't apply IPsec on input until after we've made the routing decision. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- net/ipv6/xfrm6_policy.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net/ipv6') diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c index bc508d0a87d..89432279d3a 100644 --- a/net/ipv6/xfrm6_policy.c +++ b/net/ipv6/xfrm6_policy.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include #include @@ -214,7 +215,7 @@ __xfrm6_bundle_create(struct xfrm_policy *policy, struct xfrm_state **xfrm, int dst_prev->trailer_len = trailer_len; memcpy(&dst_prev->metrics, &x->route->metrics, sizeof(dst_prev->metrics)); - dst_prev->input = rt->u.dst.input; + dst_prev->input = dst_discard; dst_prev->output = dst_prev->xfrm->outer_mode->afinfo->output; /* Sheit... I remember I did this right. Apparently, * it was magically lost, so this code needs audit */ -- cgit v1.2.3 From fff693888012806370c98c601fbaa141fb12dfca Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Tue, 13 Nov 2007 21:36:07 -0800 Subject: [IPSEC]: Make sure idev is consistent with dev in xfrm_dst Previously we took the device from the bottom route and idev from the top route. This is bad because idev may well point to a different device. This patch changes it so that we get the idev from the device directly. It also makes it an error if either dev or idev is NULL. This is consistent with the rest of the routing code which also treats these cases as errors. I've removed the err initialisation in xfrm6_policy.c because it achieves no purpose and hid a bug when an initial version of this patch neglected to set err to -ENODEV (fortunately the IPv4 version warned about it). Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- net/ipv6/xfrm6_policy.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c index 89432279d3a..77dc3651437 100644 --- a/net/ipv6/xfrm6_policy.c +++ b/net/ipv6/xfrm6_policy.c @@ -123,7 +123,7 @@ __xfrm6_bundle_create(struct xfrm_policy *policy, struct xfrm_state **xfrm, int } }; int i; - int err = 0; + int err; int header_len = 0; int trailer_len = 0; @@ -201,13 +201,20 @@ __xfrm6_bundle_create(struct xfrm_policy *policy, struct xfrm_state **xfrm, int dst_prev = *dst_p; i = 0; + err = -ENODEV; for (; dst_prev != &rt->u.dst; dst_prev = dst_prev->child) { struct xfrm_dst *x = (struct xfrm_dst*)dst_prev; dst_prev->xfrm = xfrm[i++]; dst_prev->dev = rt->u.dst.dev; - if (rt->u.dst.dev) - dev_hold(rt->u.dst.dev); + if (!rt->u.dst.dev) + goto error; + dev_hold(rt->u.dst.dev); + + x->u.rt6.rt6i_idev = in6_dev_get(rt->u.dst.dev); + if (!x->u.rt6.rt6i_idev) + goto error; + dst_prev->obsolete = -1; dst_prev->flags |= DST_HOST; dst_prev->lastuse = jiffies; @@ -226,8 +233,6 @@ __xfrm6_bundle_create(struct xfrm_policy *policy, struct xfrm_state **xfrm, int memcpy(&x->u.rt6.rt6i_gateway, &rt0->rt6i_gateway, sizeof(x->u.rt6.rt6i_gateway)); x->u.rt6.rt6i_dst = rt0->rt6i_dst; x->u.rt6.rt6i_src = rt0->rt6i_src; - x->u.rt6.rt6i_idev = rt0->rt6i_idev; - in6_dev_hold(rt0->rt6i_idev); header_len -= x->u.dst.xfrm->props.header_len; trailer_len -= x->u.dst.xfrm->props.trailer_len; } -- cgit v1.2.3 From f04e7e8d7f175c05bbde3ae748bf2541da53721d Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Tue, 13 Nov 2007 21:36:51 -0800 Subject: [IPSEC]: Replace x->type->{local,remote}_addr with flags The functions local_addr and remote_addr are more than what they're needed for. The same thing can be done easily with flags on the type object. This patch does that and simplifies the wrapper functions in xfrm6_policy accordingly. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- net/ipv6/mip6.c | 11 ++--------- net/ipv6/xfrm6_policy.c | 20 ++++++++------------ 2 files changed, 10 insertions(+), 21 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/mip6.c b/net/ipv6/mip6.c index 7fd841d4101..edfd9cdd721 100644 --- a/net/ipv6/mip6.c +++ b/net/ipv6/mip6.c @@ -34,11 +34,6 @@ #include #include -static xfrm_address_t *mip6_xfrm_addr(struct xfrm_state *x, xfrm_address_t *addr) -{ - return x->coaddr; -} - static inline unsigned int calc_padlen(unsigned int len, unsigned int n) { return (n - len + 16) & 0x7; @@ -337,14 +332,13 @@ static struct xfrm_type mip6_destopt_type = .description = "MIP6DESTOPT", .owner = THIS_MODULE, .proto = IPPROTO_DSTOPTS, - .flags = XFRM_TYPE_NON_FRAGMENT, + .flags = XFRM_TYPE_NON_FRAGMENT | XFRM_TYPE_LOCAL_COADDR, .init_state = mip6_destopt_init_state, .destructor = mip6_destopt_destroy, .input = mip6_destopt_input, .output = mip6_destopt_output, .reject = mip6_destopt_reject, .hdr_offset = mip6_destopt_offset, - .local_addr = mip6_xfrm_addr, }; static int mip6_rthdr_input(struct xfrm_state *x, struct sk_buff *skb) @@ -467,13 +461,12 @@ static struct xfrm_type mip6_rthdr_type = .description = "MIP6RT", .owner = THIS_MODULE, .proto = IPPROTO_ROUTING, - .flags = XFRM_TYPE_NON_FRAGMENT, + .flags = XFRM_TYPE_NON_FRAGMENT | XFRM_TYPE_REMOTE_COADDR, .init_state = mip6_rthdr_init_state, .destructor = mip6_rthdr_destroy, .input = mip6_rthdr_input, .output = mip6_rthdr_output, .hdr_offset = mip6_rthdr_offset, - .remote_addr = mip6_xfrm_addr, }; static int __init mip6_init(void) diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c index 77dc3651437..3b38e493d15 100644 --- a/net/ipv6/xfrm6_policy.c +++ b/net/ipv6/xfrm6_policy.c @@ -87,20 +87,16 @@ __xfrm6_find_bundle(struct flowi *fl, struct xfrm_policy *policy) return dst; } -static inline struct in6_addr* -__xfrm6_bundle_addr_remote(struct xfrm_state *x, struct in6_addr *addr) +static inline xfrm_address_t *__xfrm6_bundle_addr_remote(struct xfrm_state *x) { - return (x->type->remote_addr) ? - (struct in6_addr*)x->type->remote_addr(x, (xfrm_address_t *)addr) : - (struct in6_addr*)&x->id.daddr; + return (x->type->flags & XFRM_TYPE_REMOTE_COADDR) ? x->coaddr : + &x->id.daddr; } -static inline struct in6_addr* -__xfrm6_bundle_addr_local(struct xfrm_state *x, struct in6_addr *addr) +static inline xfrm_address_t *__xfrm6_bundle_addr_local(struct xfrm_state *x) { - return (x->type->local_addr) ? - (struct in6_addr*)x->type->local_addr(x, (xfrm_address_t *)addr) : - (struct in6_addr*)&x->props.saddr; + return (x->type->flags & XFRM_TYPE_LOCAL_COADDR) ? x->coaddr : + &x->props.saddr; } /* Allocate chain of dst_entry's, attach known xfrm's, calculate @@ -171,9 +167,9 @@ __xfrm6_bundle_create(struct xfrm_policy *policy, struct xfrm_state **xfrm, int fl_tunnel.fl4_src = xfrm[i]->props.saddr.a4; break; case AF_INET6: - ipv6_addr_copy(&fl_tunnel.fl6_dst, __xfrm6_bundle_addr_remote(xfrm[i], &fl->fl6_dst)); + ipv6_addr_copy(&fl_tunnel.fl6_dst, (struct in6_addr *)__xfrm6_bundle_addr_remote(xfrm[i])); - ipv6_addr_copy(&fl_tunnel.fl6_src, __xfrm6_bundle_addr_local(xfrm[i], &fl->fl6_src)); + ipv6_addr_copy(&fl_tunnel.fl6_src, (struct in6_addr *)__xfrm6_bundle_addr_local(xfrm[i])); break; default: BUG_ON(1); -- cgit v1.2.3 From 66cdb3ca27323a92712d289fc5edc7841d74a139 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Tue, 13 Nov 2007 21:37:28 -0800 Subject: [IPSEC]: Move flow construction into xfrm_dst_lookup This patch moves the flow construction from the callers of xfrm_dst_lookup into that function. It also changes xfrm_dst_lookup so that it takes an xfrm state as its argument instead of explicit addresses. This removes any address-specific logic from the callers of xfrm_dst_lookup which is needed to correctly support inter-family transforms. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- net/ipv6/xfrm6_policy.c | 97 ++++++++++++++++++------------------------------- 1 file changed, 35 insertions(+), 62 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c index 3b38e493d15..8e78530865a 100644 --- a/net/ipv6/xfrm6_policy.c +++ b/net/ipv6/xfrm6_policy.c @@ -11,7 +11,8 @@ * */ -#include +#include +#include #include #include #include @@ -26,35 +27,40 @@ static struct dst_ops xfrm6_dst_ops; static struct xfrm_policy_afinfo xfrm6_policy_afinfo; -static int xfrm6_dst_lookup(struct xfrm_dst **xdst, struct flowi *fl) +static struct dst_entry *xfrm6_dst_lookup(int tos, xfrm_address_t *saddr, + xfrm_address_t *daddr) { - struct dst_entry *dst = ip6_route_output(NULL, fl); - int err = dst->error; - if (!err) - *xdst = (struct xfrm_dst *) dst; - else + struct flowi fl = {}; + struct dst_entry *dst; + int err; + + memcpy(&fl.fl6_dst, daddr, sizeof(fl.fl6_dst)); + if (saddr) + memcpy(&fl.fl6_src, saddr, sizeof(fl.fl6_src)); + + dst = ip6_route_output(NULL, &fl); + + err = dst->error; + if (dst->error) { dst_release(dst); - return err; + dst = ERR_PTR(err); + } + + return dst; } static int xfrm6_get_saddr(xfrm_address_t *saddr, xfrm_address_t *daddr) { - struct rt6_info *rt; - struct flowi fl_tunnel = { - .nl_u = { - .ip6_u = { - .daddr = *(struct in6_addr *)&daddr->a6, - }, - }, - }; - - if (!xfrm6_dst_lookup((struct xfrm_dst **)&rt, &fl_tunnel)) { - ipv6_get_saddr(&rt->u.dst, (struct in6_addr *)&daddr->a6, - (struct in6_addr *)&saddr->a6); - dst_release(&rt->u.dst); - return 0; - } - return -EHOSTUNREACH; + struct dst_entry *dst; + + dst = xfrm6_dst_lookup(0, NULL, daddr); + if (IS_ERR(dst)) + return -EHOSTUNREACH; + + ipv6_get_saddr(dst, (struct in6_addr *)&daddr->a6, + (struct in6_addr *)&saddr->a6); + dst_release(dst); + return 0; } static struct dst_entry * @@ -87,18 +93,6 @@ __xfrm6_find_bundle(struct flowi *fl, struct xfrm_policy *policy) return dst; } -static inline xfrm_address_t *__xfrm6_bundle_addr_remote(struct xfrm_state *x) -{ - return (x->type->flags & XFRM_TYPE_REMOTE_COADDR) ? x->coaddr : - &x->id.daddr; -} - -static inline xfrm_address_t *__xfrm6_bundle_addr_local(struct xfrm_state *x) -{ - return (x->type->flags & XFRM_TYPE_LOCAL_COADDR) ? x->coaddr : - &x->props.saddr; -} - /* Allocate chain of dst_entry's, attach known xfrm's, calculate * all the metrics... Shortly, bundle a bundle. */ @@ -110,14 +104,6 @@ __xfrm6_bundle_create(struct xfrm_policy *policy, struct xfrm_state **xfrm, int struct dst_entry *dst, *dst_prev; struct rt6_info *rt0 = (struct rt6_info*)(*dst_p); struct rt6_info *rt = rt0; - struct flowi fl_tunnel = { - .nl_u = { - .ip6_u = { - .saddr = fl->fl6_src, - .daddr = fl->fl6_dst, - } - } - }; int i; int err; int header_len = 0; @@ -160,25 +146,12 @@ __xfrm6_bundle_create(struct xfrm_policy *policy, struct xfrm_state **xfrm, int trailer_len += xfrm[i]->props.trailer_len; if (xfrm[i]->props.mode != XFRM_MODE_TRANSPORT) { - unsigned short encap_family = xfrm[i]->props.family; - switch(encap_family) { - case AF_INET: - fl_tunnel.fl4_dst = xfrm[i]->id.daddr.a4; - fl_tunnel.fl4_src = xfrm[i]->props.saddr.a4; - break; - case AF_INET6: - ipv6_addr_copy(&fl_tunnel.fl6_dst, (struct in6_addr *)__xfrm6_bundle_addr_remote(xfrm[i])); - - ipv6_addr_copy(&fl_tunnel.fl6_src, (struct in6_addr *)__xfrm6_bundle_addr_local(xfrm[i])); - break; - default: - BUG_ON(1); - } - - err = xfrm_dst_lookup((struct xfrm_dst **) &rt, - &fl_tunnel, encap_family); - if (err) + dst1 = xfrm_dst_lookup(xfrm[i], 0); + err = PTR_ERR(dst1); + if (IS_ERR(dst1)) goto error; + + rt = (struct rt6_info *)dst1; } else dst_hold(&rt->u.dst); } -- cgit v1.2.3 From 25ee3286dcbc830a833354bb1d15567956844813 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Tue, 11 Dec 2007 09:32:34 -0800 Subject: [IPSEC]: Merge common code into xfrm_bundle_create Half of the code in xfrm4_bundle_create and xfrm6_bundle_create are common. This patch extracts that logic and puts it into xfrm_bundle_create. The rest of it are then accessed through afinfo. As a result this fixes the problem with inter-family transforms where we treat every xfrm dst in the bundle as if it belongs to the top family. This patch also fixes a long-standing error-path bug where we may free the xfrm states twice. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- net/ipv6/xfrm6_policy.c | 136 ++++++++---------------------------------------- 1 file changed, 22 insertions(+), 114 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c index 8e78530865a..63932c5fd3c 100644 --- a/net/ipv6/xfrm6_policy.c +++ b/net/ipv6/xfrm6_policy.c @@ -93,126 +93,33 @@ __xfrm6_find_bundle(struct flowi *fl, struct xfrm_policy *policy) return dst; } -/* Allocate chain of dst_entry's, attach known xfrm's, calculate - * all the metrics... Shortly, bundle a bundle. - */ - -static int -__xfrm6_bundle_create(struct xfrm_policy *policy, struct xfrm_state **xfrm, int nx, - struct flowi *fl, struct dst_entry **dst_p) +static int xfrm6_get_tos(struct flowi *fl) { - struct dst_entry *dst, *dst_prev; - struct rt6_info *rt0 = (struct rt6_info*)(*dst_p); - struct rt6_info *rt = rt0; - int i; - int err; - int header_len = 0; - int trailer_len = 0; - - dst = dst_prev = NULL; - dst_hold(&rt->u.dst); - - for (i = 0; i < nx; i++) { - struct dst_entry *dst1 = dst_alloc(&xfrm6_dst_ops); - struct xfrm_dst *xdst; + return 0; +} - if (unlikely(dst1 == NULL)) { - err = -ENOBUFS; - dst_release(&rt->u.dst); - goto error; - } +static int xfrm6_fill_dst(struct xfrm_dst *xdst, struct net_device *dev) +{ + struct rt6_info *rt = (struct rt6_info*)xdst->route; - if (!dst) - dst = dst1; - else { - dst_prev->child = dst1; - dst1->flags |= DST_NOHASH; - dst_clone(dst1); - } + xdst->u.dst.dev = dev; + dev_hold(dev); - xdst = (struct xfrm_dst *)dst1; - xdst->route = &rt->u.dst; - xdst->genid = xfrm[i]->genid; - if (rt->rt6i_node) - xdst->route_cookie = rt->rt6i_node->fn_sernum; - - dst1->next = dst_prev; - dst_prev = dst1; - - if (xfrm[i]->type->flags & XFRM_TYPE_NON_FRAGMENT) - ((struct rt6_info *)dst)->nfheader_len += - xfrm[i]->props.header_len; - header_len += xfrm[i]->props.header_len; - trailer_len += xfrm[i]->props.trailer_len; - - if (xfrm[i]->props.mode != XFRM_MODE_TRANSPORT) { - dst1 = xfrm_dst_lookup(xfrm[i], 0); - err = PTR_ERR(dst1); - if (IS_ERR(dst1)) - goto error; - - rt = (struct rt6_info *)dst1; - } else - dst_hold(&rt->u.dst); - } + xdst->u.rt6.rt6i_idev = in6_dev_get(rt->u.dst.dev); + if (!xdst->u.rt6.rt6i_idev) + return -ENODEV; - dst_prev->child = &rt->u.dst; - dst->path = &rt->u.dst; - - /* Copy neighbour for reachability confirmation */ - dst->neighbour = neigh_clone(rt->u.dst.neighbour); - - if (rt->rt6i_node) - ((struct xfrm_dst *)dst)->path_cookie = rt->rt6i_node->fn_sernum; - - *dst_p = dst; - dst = dst_prev; - - dst_prev = *dst_p; - i = 0; - err = -ENODEV; - for (; dst_prev != &rt->u.dst; dst_prev = dst_prev->child) { - struct xfrm_dst *x = (struct xfrm_dst*)dst_prev; - - dst_prev->xfrm = xfrm[i++]; - dst_prev->dev = rt->u.dst.dev; - if (!rt->u.dst.dev) - goto error; - dev_hold(rt->u.dst.dev); - - x->u.rt6.rt6i_idev = in6_dev_get(rt->u.dst.dev); - if (!x->u.rt6.rt6i_idev) - goto error; - - dst_prev->obsolete = -1; - dst_prev->flags |= DST_HOST; - dst_prev->lastuse = jiffies; - dst_prev->header_len = header_len; - dst_prev->trailer_len = trailer_len; - memcpy(&dst_prev->metrics, &x->route->metrics, sizeof(dst_prev->metrics)); - - dst_prev->input = dst_discard; - dst_prev->output = dst_prev->xfrm->outer_mode->afinfo->output; - /* Sheit... I remember I did this right. Apparently, - * it was magically lost, so this code needs audit */ - x->u.rt6.rt6i_flags = rt0->rt6i_flags&(RTF_ANYCAST|RTF_LOCAL); - x->u.rt6.rt6i_metric = rt0->rt6i_metric; - x->u.rt6.rt6i_node = rt0->rt6i_node; - x->u.rt6.rt6i_gateway = rt0->rt6i_gateway; - memcpy(&x->u.rt6.rt6i_gateway, &rt0->rt6i_gateway, sizeof(x->u.rt6.rt6i_gateway)); - x->u.rt6.rt6i_dst = rt0->rt6i_dst; - x->u.rt6.rt6i_src = rt0->rt6i_src; - header_len -= x->u.dst.xfrm->props.header_len; - trailer_len -= x->u.dst.xfrm->props.trailer_len; - } + /* Sheit... I remember I did this right. Apparently, + * it was magically lost, so this code needs audit */ + xdst->u.rt6.rt6i_flags = rt->rt6i_flags & (RTF_ANYCAST | + RTF_LOCAL); + xdst->u.rt6.rt6i_metric = rt->rt6i_metric; + xdst->u.rt6.rt6i_node = rt->rt6i_node; + xdst->u.rt6.rt6i_gateway = rt->rt6i_gateway; + xdst->u.rt6.rt6i_dst = rt->rt6i_dst; + xdst->u.rt6.rt6i_src = rt->rt6i_src; - xfrm_init_pmtu(dst); return 0; - -error: - if (dst) - dst_free(dst); - return err; } static inline void @@ -355,8 +262,9 @@ static struct xfrm_policy_afinfo xfrm6_policy_afinfo = { .dst_lookup = xfrm6_dst_lookup, .get_saddr = xfrm6_get_saddr, .find_bundle = __xfrm6_find_bundle, - .bundle_create = __xfrm6_bundle_create, .decode_session = _decode_session6, + .get_tos = xfrm6_get_tos, + .fill_dst = xfrm6_fill_dst, }; static void __init xfrm6_policy_init(void) -- cgit v1.2.3 From e40b3286158565909692e5914ea4a11bdbcc68c8 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Tue, 13 Nov 2007 21:39:08 -0800 Subject: [IPSEC]: Forbid BEET + ipcomp for now While BEET can theoretically work with IPComp the current code can't do that because it tries to construct a BEET mode tunnel type which doesn't (and cannot) exist. In fact as it is it won't even attach a tunnel object at all for BEET which is bogus. To support this fully we'd also need to change the policy checks on input to recognise a plain tunnel as a legal variant of an optional BEET transform. This patch simply fails such constructions for now. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- net/ipv6/ipcomp6.c | 19 ++++++++----------- 1 file changed, 8 insertions(+), 11 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/ipcomp6.c b/net/ipv6/ipcomp6.c index 0cd4056f912..b276d04d6db 100644 --- a/net/ipv6/ipcomp6.c +++ b/net/ipv6/ipcomp6.c @@ -190,7 +190,6 @@ static void ipcomp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, static struct xfrm_state *ipcomp6_tunnel_create(struct xfrm_state *x) { struct xfrm_state *t = NULL; - u8 mode = XFRM_MODE_TUNNEL; t = xfrm_state_alloc(); if (!t) @@ -204,9 +203,7 @@ static struct xfrm_state *ipcomp6_tunnel_create(struct xfrm_state *x) memcpy(t->id.daddr.a6, x->id.daddr.a6, sizeof(struct in6_addr)); memcpy(&t->sel, &x->sel, sizeof(t->sel)); t->props.family = AF_INET6; - if (x->props.mode == XFRM_MODE_BEET) - mode = x->props.mode; - t->props.mode = mode; + t->props.mode = x->props.mode; memcpy(t->props.saddr.a6, x->props.saddr.a6, sizeof(struct in6_addr)); if (xfrm_init_state(t)) @@ -405,22 +402,22 @@ static int ipcomp6_init_state(struct xfrm_state *x) if (x->encap) goto out; - err = -ENOMEM; - ipcd = kzalloc(sizeof(*ipcd), GFP_KERNEL); - if (!ipcd) - goto out; - x->props.header_len = 0; switch (x->props.mode) { - case XFRM_MODE_BEET: case XFRM_MODE_TRANSPORT: break; case XFRM_MODE_TUNNEL: x->props.header_len += sizeof(struct ipv6hdr); + break; default: - goto error; + goto out; } + err = -ENOMEM; + ipcd = kzalloc(sizeof(*ipcd), GFP_KERNEL); + if (!ipcd) + goto out; + mutex_lock(&ipcomp6_resource_mutex); if (!ipcomp6_alloc_scratches()) goto error; -- cgit v1.2.3 From a2deb6d26f16ed7bf787dbd6a58c5d7be47d8db3 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Tue, 13 Nov 2007 21:39:38 -0800 Subject: [IPSEC]: Move x->outer_mode->output out of locked section RO mode is the only one that requires a locked output function. So it's easier to move the lock into that function rather than requiring everyone else to run under the lock. In particular, this allows us to move the size check into the output function without causing a potential dead-lock should the ICMP error somehow hit the same SA on transmission. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- net/ipv6/xfrm6_mode_ro.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'net/ipv6') diff --git a/net/ipv6/xfrm6_mode_ro.c b/net/ipv6/xfrm6_mode_ro.c index a7bc8c62317..4a01cb3c370 100644 --- a/net/ipv6/xfrm6_mode_ro.c +++ b/net/ipv6/xfrm6_mode_ro.c @@ -28,6 +28,7 @@ #include #include #include +#include #include #include #include @@ -53,7 +54,9 @@ static int xfrm6_ro_output(struct xfrm_state *x, struct sk_buff *skb) __skb_pull(skb, hdr_len); memmove(ipv6_hdr(skb), iph, hdr_len); + spin_lock_bh(&x->lock); x->lastused = get_seconds(); + spin_unlock_bh(&x->lock); return 0; } -- cgit v1.2.3 From 29bb43b4ec4e625b0659186fc8a7c8f8b7c81982 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Tue, 13 Nov 2007 21:40:13 -0800 Subject: [INET]: Give outer DSCP directly to ip*_copy_dscp This patch changes the prototype of ipv4_copy_dscp and ipv6_copy_dscp so that they directly take the outer DSCP rather than the outer IP header. This will help us to unify the code for inter-family tunnels. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- net/ipv6/ip6_tunnel.c | 2 +- net/ipv6/xfrm6_mode_tunnel.c | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 5383b33db8c..a4051afaf77 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -635,7 +635,7 @@ static void ip6ip6_dscp_ecn_decapsulate(struct ip6_tnl *t, struct sk_buff *skb) { if (t->parms.flags & IP6_TNL_F_RCV_DSCP_COPY) - ipv6_copy_dscp(ipv6h, ipv6_hdr(skb)); + ipv6_copy_dscp(ipv6_get_dsfield(ipv6h), ipv6_hdr(skb)); if (INET_ECN_is_ce(ipv6_get_dsfield(ipv6h))) IP6_ECN_set_ce(ipv6_hdr(skb)); diff --git a/net/ipv6/xfrm6_mode_tunnel.c b/net/ipv6/xfrm6_mode_tunnel.c index fd84e221727..9a43ea72248 100644 --- a/net/ipv6/xfrm6_mode_tunnel.c +++ b/net/ipv6/xfrm6_mode_tunnel.c @@ -95,7 +95,8 @@ static int xfrm6_tunnel_input(struct xfrm_state *x, struct sk_buff *skb) nh = skb_network_header(skb); if (nh[IP6CB(skb)->nhoff] == IPPROTO_IPV6) { if (x->props.flags & XFRM_STATE_DECAP_DSCP) - ipv6_copy_dscp(ipv6_hdr(skb), ipipv6_hdr(skb)); + ipv6_copy_dscp(ipv6_get_dsfield(ipv6_hdr(skb)), + ipipv6_hdr(skb)); if (!(x->props.flags & XFRM_STATE_NOECN)) ipip6_ecn_decapsulate(skb); } else { -- cgit v1.2.3 From 36cf9acf93e8561d9faec24849e57688a81eb9c5 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Tue, 13 Nov 2007 21:40:52 -0800 Subject: [IPSEC]: Separate inner/outer mode processing on output With inter-family transforms the inner mode differs from the outer mode. Attempting to handle both sides from the same function means that it needs to handle both IPv4 and IPv6 which creates duplication and confusion. This patch separates the two parts on the output path so that each function deals with one family only. In particular, the functions xfrm4_extract_output/xfrm6_extract_output moves the pertinent fields from the IPv4/IPv6 IP headers into a neutral format stored in skb->cb. This is then used by the outer mode output functions to write the outer IP header. In this way the output function no longer has to know about the inner address family. Since the extract functions are only called by tunnel modes (the only modes that can support inter-family transforms), I've also moved the xfrm*_tunnel_check_size calls into them. This allows the correct ICMP message to be sent as opposed to now where you might call icmp_send with an IPv6 packet and vice versa. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- net/ipv6/xfrm6_mode_beet.c | 28 ++++++++++++++-------------- net/ipv6/xfrm6_mode_tunnel.c | 31 ++++++++++--------------------- net/ipv6/xfrm6_output.c | 39 ++++++++++++++++++++++++++++++--------- net/ipv6/xfrm6_state.c | 18 ++++++++++++++++++ 4 files changed, 72 insertions(+), 44 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/xfrm6_mode_beet.c b/net/ipv6/xfrm6_mode_beet.c index 2bfb4f05c14..4988ed9c76c 100644 --- a/net/ipv6/xfrm6_mode_beet.c +++ b/net/ipv6/xfrm6_mode_beet.c @@ -25,25 +25,24 @@ */ static int xfrm6_beet_output(struct xfrm_state *x, struct sk_buff *skb) { - struct ipv6hdr *iph, *top_iph; - u8 *prevhdr; - int hdr_len; + struct ipv6hdr *top_iph; - iph = ipv6_hdr(skb); - - hdr_len = ip6_find_1stfragopt(skb, &prevhdr); - - skb_set_mac_header(skb, (prevhdr - x->props.header_len) - skb->data); skb_set_network_header(skb, -x->props.header_len); - skb->transport_header = skb->network_header + hdr_len; - __skb_pull(skb, hdr_len); - + skb->mac_header = skb->network_header + + offsetof(struct ipv6hdr, nexthdr); + skb->transport_header = skb->network_header + sizeof(*top_iph); top_iph = ipv6_hdr(skb); - memmove(top_iph, iph, hdr_len); + top_iph->version = 6; + + memcpy(top_iph->flow_lbl, XFRM_MODE_SKB_CB(skb)->flow_lbl, + sizeof(top_iph->flow_lbl)); + top_iph->nexthdr = XFRM_MODE_SKB_CB(skb)->protocol; + + ipv6_change_dsfield(top_iph, 0, XFRM_MODE_SKB_CB(skb)->tos); + top_iph->hop_limit = XFRM_MODE_SKB_CB(skb)->ttl; ipv6_addr_copy(&top_iph->saddr, (struct in6_addr *)&x->props.saddr); ipv6_addr_copy(&top_iph->daddr, (struct in6_addr *)&x->id.daddr); - return 0; } @@ -76,7 +75,8 @@ out: static struct xfrm_mode xfrm6_beet_mode = { .input = xfrm6_beet_input, - .output = xfrm6_beet_output, + .output2 = xfrm6_beet_output, + .output = xfrm6_prepare_output, .owner = THIS_MODULE, .encap = XFRM_MODE_BEET, .flags = XFRM_MODE_FLAG_TUNNEL, diff --git a/net/ipv6/xfrm6_mode_tunnel.c b/net/ipv6/xfrm6_mode_tunnel.c index 9a43ea72248..d45ce5d4419 100644 --- a/net/ipv6/xfrm6_mode_tunnel.c +++ b/net/ipv6/xfrm6_mode_tunnel.c @@ -38,33 +38,22 @@ static inline void ip6ip_ecn_decapsulate(struct sk_buff *skb) static int xfrm6_tunnel_output(struct xfrm_state *x, struct sk_buff *skb) { struct dst_entry *dst = skb->dst; - struct xfrm_dst *xdst = (struct xfrm_dst*)dst; - struct ipv6hdr *iph, *top_iph; + struct ipv6hdr *top_iph; int dsfield; - iph = ipv6_hdr(skb); - skb_set_network_header(skb, -x->props.header_len); skb->mac_header = skb->network_header + offsetof(struct ipv6hdr, nexthdr); - skb->transport_header = skb->network_header + sizeof(*iph); + skb->transport_header = skb->network_header + sizeof(*top_iph); top_iph = ipv6_hdr(skb); top_iph->version = 6; - if (xdst->route->ops->family == AF_INET6) { - top_iph->priority = iph->priority; - top_iph->flow_lbl[0] = iph->flow_lbl[0]; - top_iph->flow_lbl[1] = iph->flow_lbl[1]; - top_iph->flow_lbl[2] = iph->flow_lbl[2]; - top_iph->nexthdr = IPPROTO_IPV6; - } else { - top_iph->priority = 0; - top_iph->flow_lbl[0] = 0; - top_iph->flow_lbl[1] = 0; - top_iph->flow_lbl[2] = 0; - top_iph->nexthdr = IPPROTO_IPIP; - } - dsfield = ipv6_get_dsfield(top_iph); + + memcpy(top_iph->flow_lbl, XFRM_MODE_SKB_CB(skb)->flow_lbl, + sizeof(top_iph->flow_lbl)); + top_iph->nexthdr = x->inner_mode->afinfo->proto; + + dsfield = XFRM_MODE_SKB_CB(skb)->tos; dsfield = INET_ECN_encapsulate(dsfield, dsfield); if (x->props.flags & XFRM_STATE_NOECN) dsfield &= ~INET_ECN_MASK; @@ -72,7 +61,6 @@ static int xfrm6_tunnel_output(struct xfrm_state *x, struct sk_buff *skb) top_iph->hop_limit = dst_metric(dst->child, RTAX_HOPLIMIT); ipv6_addr_copy(&top_iph->saddr, (struct in6_addr *)&x->props.saddr); ipv6_addr_copy(&top_iph->daddr, (struct in6_addr *)&x->id.daddr); - skb->protocol = htons(ETH_P_IPV6); return 0; } @@ -116,7 +104,8 @@ out: static struct xfrm_mode xfrm6_tunnel_mode = { .input = xfrm6_tunnel_input, - .output = xfrm6_tunnel_output, + .output2 = xfrm6_tunnel_output, + .output = xfrm6_prepare_output, .owner = THIS_MODULE, .encap = XFRM_MODE_TUNNEL, .flags = XFRM_MODE_FLAG_TUNNEL, diff --git a/net/ipv6/xfrm6_output.c b/net/ipv6/xfrm6_output.c index 656976760ad..bc2e80e3b0b 100644 --- a/net/ipv6/xfrm6_output.c +++ b/net/ipv6/xfrm6_output.c @@ -10,10 +10,12 @@ */ #include -#include +#include +#include #include #include #include +#include #include #include @@ -43,19 +45,38 @@ static int xfrm6_tunnel_check_size(struct sk_buff *skb) return ret; } +int xfrm6_extract_output(struct xfrm_state *x, struct sk_buff *skb) +{ + int err; + + err = xfrm6_tunnel_check_size(skb); + if (err) + return err; + + return xfrm6_extract_header(skb); +} + +int xfrm6_prepare_output(struct xfrm_state *x, struct sk_buff *skb) +{ + int err; + + err = x->inner_mode->afinfo->extract_output(x, skb); + if (err) + return err; + + memset(IP6CB(skb), 0, sizeof(*IP6CB(skb))); + + skb->protocol = htons(ETH_P_IPV6); + + return x->outer_mode->output2(x, skb); +} +EXPORT_SYMBOL(xfrm6_prepare_output); + static inline int xfrm6_output_one(struct sk_buff *skb) { - struct dst_entry *dst = skb->dst; - struct xfrm_state *x = dst->xfrm; struct ipv6hdr *iph; int err; - if (x->outer_mode->flags & XFRM_MODE_FLAG_TUNNEL) { - err = xfrm6_tunnel_check_size(skb); - if (err) - goto error_nolock; - } - err = xfrm_output(skb); if (err) goto error_nolock; diff --git a/net/ipv6/xfrm6_state.c b/net/ipv6/xfrm6_state.c index b392bee396f..98b05f47232 100644 --- a/net/ipv6/xfrm6_state.c +++ b/net/ipv6/xfrm6_state.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include @@ -168,13 +169,30 @@ __xfrm6_tmpl_sort(struct xfrm_tmpl **dst, struct xfrm_tmpl **src, int n) return 0; } +int xfrm6_extract_header(struct sk_buff *skb) +{ + struct ipv6hdr *iph = ipv6_hdr(skb); + + XFRM_MODE_SKB_CB(skb)->id = 0; + XFRM_MODE_SKB_CB(skb)->frag_off = htons(IP_DF); + XFRM_MODE_SKB_CB(skb)->tos = ipv6_get_dsfield(iph); + XFRM_MODE_SKB_CB(skb)->ttl = iph->hop_limit; + XFRM_MODE_SKB_CB(skb)->protocol = iph->nexthdr; + memcpy(XFRM_MODE_SKB_CB(skb)->flow_lbl, iph->flow_lbl, + sizeof(XFRM_MODE_SKB_CB(skb)->flow_lbl)); + + return 0; +} + static struct xfrm_state_afinfo xfrm6_state_afinfo = { .family = AF_INET6, + .proto = IPPROTO_IPV6, .owner = THIS_MODULE, .init_tempsel = __xfrm6_init_tempsel, .tmpl_sort = __xfrm6_tmpl_sort, .state_sort = __xfrm6_state_sort, .output = xfrm6_output, + .extract_output = xfrm6_extract_output, }; void __init xfrm6_state_init(void) -- cgit v1.2.3 From 227620e295090629fcb2c46ad3828222ab65438d Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Tue, 13 Nov 2007 21:41:28 -0800 Subject: [IPSEC]: Separate inner/outer mode processing on input With inter-family transforms the inner mode differs from the outer mode. Attempting to handle both sides from the same function means that it needs to handle both IPv4 and IPv6 which creates duplication and confusion. This patch separates the two parts on the input path so that each function deals with one family only. In particular, the functions xfrm4_extract_inut/xfrm6_extract_inut moves the pertinent fields from the IPv4/IPv6 IP headers into a neutral format stored in skb->cb. This is then used by the inner mode input functions to modify the inner IP header. In this way the input function no longer has to know about the outer address family. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- net/ipv6/xfrm6_input.c | 7 ++++++- net/ipv6/xfrm6_mode_beet.c | 36 ++++++++++++++++++++++++------------ net/ipv6/xfrm6_mode_tunnel.c | 31 +++++++++---------------------- net/ipv6/xfrm6_output.c | 1 + net/ipv6/xfrm6_state.c | 5 ++++- 5 files changed, 44 insertions(+), 36 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/xfrm6_input.c b/net/ipv6/xfrm6_input.c index 515783707e8..c458d0a2e68 100644 --- a/net/ipv6/xfrm6_input.c +++ b/net/ipv6/xfrm6_input.c @@ -16,6 +16,11 @@ #include #include +int xfrm6_extract_input(struct xfrm_state *x, struct sk_buff *skb) +{ + return xfrm6_extract_header(skb); +} + int xfrm6_rcv_spi(struct sk_buff *skb, int nexthdr, __be32 spi) { int err; @@ -68,7 +73,7 @@ int xfrm6_rcv_spi(struct sk_buff *skb, int nexthdr, __be32 spi) xfrm_vec[xfrm_nr++] = x; - if (x->outer_mode->input(x, skb)) + if (x->inner_mode->input(x, skb)) goto drop; if (x->outer_mode->flags & XFRM_MODE_FLAG_TUNNEL) { diff --git a/net/ipv6/xfrm6_mode_beet.c b/net/ipv6/xfrm6_mode_beet.c index 4988ed9c76c..0527d11c1ae 100644 --- a/net/ipv6/xfrm6_mode_beet.c +++ b/net/ipv6/xfrm6_mode_beet.c @@ -19,6 +19,20 @@ #include #include +static void xfrm6_beet_make_header(struct sk_buff *skb) +{ + struct ipv6hdr *iph = ipv6_hdr(skb); + + iph->version = 6; + + memcpy(iph->flow_lbl, XFRM_MODE_SKB_CB(skb)->flow_lbl, + sizeof(iph->flow_lbl)); + iph->nexthdr = XFRM_MODE_SKB_CB(skb)->protocol; + + ipv6_change_dsfield(iph, 0, XFRM_MODE_SKB_CB(skb)->tos); + iph->hop_limit = XFRM_MODE_SKB_CB(skb)->ttl; +} + /* Add encapsulation header. * * The top IP header will be constructed per draft-nikander-esp-beet-mode-06.txt. @@ -31,16 +45,11 @@ static int xfrm6_beet_output(struct xfrm_state *x, struct sk_buff *skb) skb->mac_header = skb->network_header + offsetof(struct ipv6hdr, nexthdr); skb->transport_header = skb->network_header + sizeof(*top_iph); - top_iph = ipv6_hdr(skb); - top_iph->version = 6; + xfrm6_beet_make_header(skb); - memcpy(top_iph->flow_lbl, XFRM_MODE_SKB_CB(skb)->flow_lbl, - sizeof(top_iph->flow_lbl)); - top_iph->nexthdr = XFRM_MODE_SKB_CB(skb)->protocol; + top_iph = ipv6_hdr(skb); - ipv6_change_dsfield(top_iph, 0, XFRM_MODE_SKB_CB(skb)->tos); - top_iph->hop_limit = XFRM_MODE_SKB_CB(skb)->ttl; ipv6_addr_copy(&top_iph->saddr, (struct in6_addr *)&x->props.saddr); ipv6_addr_copy(&top_iph->daddr, (struct in6_addr *)&x->id.daddr); return 0; @@ -51,19 +60,21 @@ static int xfrm6_beet_input(struct xfrm_state *x, struct sk_buff *skb) struct ipv6hdr *ip6h; const unsigned char *old_mac; int size = sizeof(struct ipv6hdr); - int err = -EINVAL; + int err; - if (!pskb_may_pull(skb, sizeof(struct ipv6hdr))) + err = skb_cow_head(skb, size + skb->mac_len); + if (err) goto out; - skb_push(skb, size); - memmove(skb->data, skb_network_header(skb), size); + __skb_push(skb, size); skb_reset_network_header(skb); old_mac = skb_mac_header(skb); skb_set_mac_header(skb, -skb->mac_len); memmove(skb_mac_header(skb), old_mac, skb->mac_len); + xfrm6_beet_make_header(skb); + ip6h = ipv6_hdr(skb); ip6h->payload_len = htons(skb->len - size); ipv6_addr_copy(&ip6h->daddr, (struct in6_addr *) &x->sel.daddr.a6); @@ -74,7 +85,8 @@ out: } static struct xfrm_mode xfrm6_beet_mode = { - .input = xfrm6_beet_input, + .input2 = xfrm6_beet_input, + .input = xfrm_prepare_input, .output2 = xfrm6_beet_output, .output = xfrm6_prepare_output, .owner = THIS_MODULE, diff --git a/net/ipv6/xfrm6_mode_tunnel.c b/net/ipv6/xfrm6_mode_tunnel.c index d45ce5d4419..f7d0d661265 100644 --- a/net/ipv6/xfrm6_mode_tunnel.c +++ b/net/ipv6/xfrm6_mode_tunnel.c @@ -25,12 +25,6 @@ static inline void ipip6_ecn_decapsulate(struct sk_buff *skb) IP6_ECN_set_ce(inner_iph); } -static inline void ip6ip_ecn_decapsulate(struct sk_buff *skb) -{ - if (INET_ECN_is_ce(ipv6_get_dsfield(ipv6_hdr(skb)))) - IP_ECN_set_ce(ipip_hdr(skb)); -} - /* Add encapsulation header. * * The top IP header will be constructed per RFC 2401. @@ -68,10 +62,8 @@ static int xfrm6_tunnel_input(struct xfrm_state *x, struct sk_buff *skb) { int err = -EINVAL; const unsigned char *old_mac; - const unsigned char *nh = skb_network_header(skb); - if (nh[IP6CB(skb)->nhoff] != IPPROTO_IPV6 && - nh[IP6CB(skb)->nhoff] != IPPROTO_IPIP) + if (XFRM_MODE_SKB_CB(skb)->protocol != IPPROTO_IPV6) goto out; if (!pskb_may_pull(skb, sizeof(struct ipv6hdr))) goto out; @@ -80,18 +72,12 @@ static int xfrm6_tunnel_input(struct xfrm_state *x, struct sk_buff *skb) (err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC))) goto out; - nh = skb_network_header(skb); - if (nh[IP6CB(skb)->nhoff] == IPPROTO_IPV6) { - if (x->props.flags & XFRM_STATE_DECAP_DSCP) - ipv6_copy_dscp(ipv6_get_dsfield(ipv6_hdr(skb)), - ipipv6_hdr(skb)); - if (!(x->props.flags & XFRM_STATE_NOECN)) - ipip6_ecn_decapsulate(skb); - } else { - if (!(x->props.flags & XFRM_STATE_NOECN)) - ip6ip_ecn_decapsulate(skb); - skb->protocol = htons(ETH_P_IP); - } + if (x->props.flags & XFRM_STATE_DECAP_DSCP) + ipv6_copy_dscp(ipv6_get_dsfield(ipv6_hdr(skb)), + ipipv6_hdr(skb)); + if (!(x->props.flags & XFRM_STATE_NOECN)) + ipip6_ecn_decapsulate(skb); + old_mac = skb_mac_header(skb); skb_set_mac_header(skb, -skb->mac_len); memmove(skb_mac_header(skb), old_mac, skb->mac_len); @@ -103,7 +89,8 @@ out: } static struct xfrm_mode xfrm6_tunnel_mode = { - .input = xfrm6_tunnel_input, + .input2 = xfrm6_tunnel_input, + .input = xfrm_prepare_input, .output2 = xfrm6_tunnel_output, .output = xfrm6_prepare_output, .owner = THIS_MODULE, diff --git a/net/ipv6/xfrm6_output.c b/net/ipv6/xfrm6_output.c index bc2e80e3b0b..c45050cfe72 100644 --- a/net/ipv6/xfrm6_output.c +++ b/net/ipv6/xfrm6_output.c @@ -53,6 +53,7 @@ int xfrm6_extract_output(struct xfrm_state *x, struct sk_buff *skb) if (err) return err; + IP6CB(skb)->nhoff = offsetof(struct ipv6hdr, nexthdr); return xfrm6_extract_header(skb); } diff --git a/net/ipv6/xfrm6_state.c b/net/ipv6/xfrm6_state.c index 98b05f47232..90fef0a4726 100644 --- a/net/ipv6/xfrm6_state.c +++ b/net/ipv6/xfrm6_state.c @@ -177,7 +177,8 @@ int xfrm6_extract_header(struct sk_buff *skb) XFRM_MODE_SKB_CB(skb)->frag_off = htons(IP_DF); XFRM_MODE_SKB_CB(skb)->tos = ipv6_get_dsfield(iph); XFRM_MODE_SKB_CB(skb)->ttl = iph->hop_limit; - XFRM_MODE_SKB_CB(skb)->protocol = iph->nexthdr; + XFRM_MODE_SKB_CB(skb)->protocol = + skb_network_header(skb)[IP6CB(skb)->nhoff]; memcpy(XFRM_MODE_SKB_CB(skb)->flow_lbl, iph->flow_lbl, sizeof(XFRM_MODE_SKB_CB(skb)->flow_lbl)); @@ -187,11 +188,13 @@ int xfrm6_extract_header(struct sk_buff *skb) static struct xfrm_state_afinfo xfrm6_state_afinfo = { .family = AF_INET6, .proto = IPPROTO_IPV6, + .eth_proto = htons(ETH_P_IPV6), .owner = THIS_MODULE, .init_tempsel = __xfrm6_init_tempsel, .tmpl_sort = __xfrm6_tmpl_sort, .state_sort = __xfrm6_state_sort, .output = xfrm6_output, + .extract_input = xfrm6_extract_input, .extract_output = xfrm6_extract_output, }; -- cgit v1.2.3 From ef76bc23ef2acf20c8f7f841a542d8ab74c827c6 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Fri, 11 Jan 2008 19:15:08 -0800 Subject: [IPV6]: Add ip6_local_out Most callers of the LOCAL_OUT chain will set the IP packet length before doing so. They also share the same output function dst_output. This patch creates a new function called ip6_local_out which does all of that and converts the appropriate users over to it. Apart from removing duplicate code, it will also help in merging the IPsec output path. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- net/ipv6/ip6_output.c | 33 +++++++++++++++++++++++++++------ net/ipv6/ip6_tunnel.c | 4 +--- net/ipv6/netfilter/ip6t_REJECT.c | 4 +--- net/ipv6/xfrm6_output.c | 7 +------ 4 files changed, 30 insertions(+), 18 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 387030d2483..bd121f9ae0a 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -29,7 +29,7 @@ */ #include -#include +#include #include #include #include @@ -70,6 +70,31 @@ static __inline__ void ipv6_select_ident(struct sk_buff *skb, struct frag_hdr *f spin_unlock_bh(&ip6_id_lock); } +int __ip6_local_out(struct sk_buff *skb) +{ + int len; + + len = skb->len - sizeof(struct ipv6hdr); + if (len > IPV6_MAXPLEN) + len = 0; + ipv6_hdr(skb)->payload_len = htons(len); + + return nf_hook(PF_INET6, NF_IP6_LOCAL_OUT, skb, NULL, skb->dst->dev, + dst_output); +} + +int ip6_local_out(struct sk_buff *skb) +{ + int err; + + err = __ip6_local_out(skb); + if (likely(err == 1)) + err = dst_output(skb); + + return err; +} +EXPORT_SYMBOL_GPL(ip6_local_out); + static int ip6_output_finish(struct sk_buff *skb) { struct dst_entry *dst = skb->dst; @@ -1403,10 +1428,6 @@ int ip6_push_pending_frames(struct sock *sk) *(__be32*)hdr = fl->fl6_flowlabel | htonl(0x60000000 | ((int)np->cork.tclass << 20)); - if (skb->len <= sizeof(struct ipv6hdr) + IPV6_MAXPLEN) - hdr->payload_len = htons(skb->len - sizeof(struct ipv6hdr)); - else - hdr->payload_len = 0; hdr->hop_limit = np->cork.hop_limit; hdr->nexthdr = proto; ipv6_addr_copy(&hdr->saddr, &fl->fl6_src); @@ -1423,7 +1444,7 @@ int ip6_push_pending_frames(struct sock *sk) ICMP6_INC_STATS_BH(idev, ICMP6_MIB_OUTMSGS); } - err = NF_HOOK(PF_INET6, NF_IP6_LOCAL_OUT, skb, NULL, skb->dst->dev, dst_output); + err = ip6_local_out(skb); if (err) { if (err > 0) err = np->recverr ? net_xmit_errno(err) : 0; diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index a4051afaf77..29b5321e39c 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -910,15 +910,13 @@ static int ip6_tnl_xmit2(struct sk_buff *skb, *(__be32*)ipv6h = fl->fl6_flowlabel | htonl(0x60000000); dsfield = INET_ECN_encapsulate(0, dsfield); ipv6_change_dsfield(ipv6h, ~INET_ECN_MASK, dsfield); - ipv6h->payload_len = htons(skb->len - sizeof(struct ipv6hdr)); ipv6h->hop_limit = t->parms.hop_limit; ipv6h->nexthdr = proto; ipv6_addr_copy(&ipv6h->saddr, &fl->fl6_src); ipv6_addr_copy(&ipv6h->daddr, &fl->fl6_dst); nf_reset(skb); pkt_len = skb->len; - err = NF_HOOK(PF_INET6, NF_IP6_LOCAL_OUT, skb, NULL, - skb->dst->dev, dst_output); + err = ip6_local_out(skb); if (net_xmit_eval(err) == 0) { stats->tx_bytes += pkt_len; diff --git a/net/ipv6/netfilter/ip6t_REJECT.c b/net/ipv6/netfilter/ip6t_REJECT.c index 1a7d2917545..c1c66348283 100644 --- a/net/ipv6/netfilter/ip6t_REJECT.c +++ b/net/ipv6/netfilter/ip6t_REJECT.c @@ -121,7 +121,6 @@ static void send_reset(struct sk_buff *oldskb) ip6h->version = 6; ip6h->hop_limit = dst_metric(dst, RTAX_HOPLIMIT); ip6h->nexthdr = IPPROTO_TCP; - ip6h->payload_len = htons(sizeof(struct tcphdr)); ipv6_addr_copy(&ip6h->saddr, &oip6h->daddr); ipv6_addr_copy(&ip6h->daddr, &oip6h->saddr); @@ -159,8 +158,7 @@ static void send_reset(struct sk_buff *oldskb) nf_ct_attach(nskb, oldskb); - NF_HOOK(PF_INET6, NF_IP6_LOCAL_OUT, nskb, NULL, nskb->dst->dev, - dst_output); + ip6_local_out(nskb); } static inline void diff --git a/net/ipv6/xfrm6_output.c b/net/ipv6/xfrm6_output.c index c45050cfe72..0f0ff51f6db 100644 --- a/net/ipv6/xfrm6_output.c +++ b/net/ipv6/xfrm6_output.c @@ -75,16 +75,12 @@ EXPORT_SYMBOL(xfrm6_prepare_output); static inline int xfrm6_output_one(struct sk_buff *skb) { - struct ipv6hdr *iph; int err; err = xfrm_output(skb); if (err) goto error_nolock; - iph = ipv6_hdr(skb); - iph->payload_len = htons(skb->len - sizeof(*iph)); - IP6CB(skb)->flags |= IP6SKB_XFRM_TRANSFORMED; err = 0; @@ -102,8 +98,7 @@ static int xfrm6_output_finish2(struct sk_buff *skb) while (likely((err = xfrm6_output_one(skb)) == 0)) { nf_reset(skb); - err = nf_hook(PF_INET6, NF_IP6_LOCAL_OUT, skb, NULL, - skb->dst->dev, dst_output); + err = __ip6_local_out(skb); if (unlikely(err != 1)) break; -- cgit v1.2.3 From 862b82c6f960cc61274d370aa78ce1112f92a83e Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Tue, 13 Nov 2007 21:43:11 -0800 Subject: [IPSEC]: Merge most of the output path As part of the work on asynchrnous cryptographic operations, we need to be able to resume from the spot where they occur. As such, it helps if we isolate them to one spot. This patch moves most of the remaining family-specific processing into the common output code. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- net/ipv6/route.c | 1 + net/ipv6/xfrm6_output.c | 77 +++++-------------------------------------------- net/ipv6/xfrm6_policy.c | 1 + net/ipv6/xfrm6_state.c | 2 ++ 4 files changed, 11 insertions(+), 70 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/route.c b/net/ipv6/route.c index ac70e2d3b10..4ef2cfaa346 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -113,6 +113,7 @@ static struct dst_ops ip6_dst_ops = { .negative_advice = ip6_negative_advice, .link_failure = ip6_link_failure, .update_pmtu = ip6_rt_update_pmtu, + .local_out = ip6_local_out, .entry_size = sizeof(struct rt6_info), }; diff --git a/net/ipv6/xfrm6_output.c b/net/ipv6/xfrm6_output.c index 0f0ff51f6db..a0a924991c4 100644 --- a/net/ipv6/xfrm6_output.c +++ b/net/ipv6/xfrm6_output.c @@ -66,6 +66,9 @@ int xfrm6_prepare_output(struct xfrm_state *x, struct sk_buff *skb) return err; memset(IP6CB(skb), 0, sizeof(*IP6CB(skb))); +#ifdef CONFIG_NETFILTER + IP6CB(skb)->flags |= IP6SKB_XFRM_TRANSFORMED; +#endif skb->protocol = htons(ETH_P_IPV6); @@ -73,80 +76,14 @@ int xfrm6_prepare_output(struct xfrm_state *x, struct sk_buff *skb) } EXPORT_SYMBOL(xfrm6_prepare_output); -static inline int xfrm6_output_one(struct sk_buff *skb) -{ - int err; - - err = xfrm_output(skb); - if (err) - goto error_nolock; - - IP6CB(skb)->flags |= IP6SKB_XFRM_TRANSFORMED; - err = 0; - -out_exit: - return err; -error_nolock: - kfree_skb(skb); - goto out_exit; -} - -static int xfrm6_output_finish2(struct sk_buff *skb) -{ - int err; - - while (likely((err = xfrm6_output_one(skb)) == 0)) { - nf_reset(skb); - - err = __ip6_local_out(skb); - if (unlikely(err != 1)) - break; - - if (!skb->dst->xfrm) - return dst_output(skb); - - err = nf_hook(PF_INET6, NF_IP6_POST_ROUTING, skb, NULL, - skb->dst->dev, xfrm6_output_finish2); - if (unlikely(err != 1)) - break; - } - - return err; -} - static int xfrm6_output_finish(struct sk_buff *skb) { - struct sk_buff *segs; - - if (!skb_is_gso(skb)) - return xfrm6_output_finish2(skb); +#ifdef CONFIG_NETFILTER + IP6CB(skb)->flags |= IP6SKB_XFRM_TRANSFORMED; +#endif skb->protocol = htons(ETH_P_IPV6); - segs = skb_gso_segment(skb, 0); - kfree_skb(skb); - if (unlikely(IS_ERR(segs))) - return PTR_ERR(segs); - - do { - struct sk_buff *nskb = segs->next; - int err; - - segs->next = NULL; - err = xfrm6_output_finish2(segs); - - if (unlikely(err)) { - while ((segs = nskb)) { - nskb = segs->next; - segs->next = NULL; - kfree_skb(segs); - } - return err; - } - - segs = nskb; - } while (segs); - - return 0; + return xfrm_output(skb); } int xfrm6_output(struct sk_buff *skb) diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c index 63932c5fd3c..a31dd531e19 100644 --- a/net/ipv6/xfrm6_policy.c +++ b/net/ipv6/xfrm6_policy.c @@ -252,6 +252,7 @@ static struct dst_ops xfrm6_dst_ops = { .update_pmtu = xfrm6_update_pmtu, .destroy = xfrm6_dst_destroy, .ifdown = xfrm6_dst_ifdown, + .local_out = __ip6_local_out, .gc_thresh = 1024, .entry_size = sizeof(struct xfrm_dst), }; diff --git a/net/ipv6/xfrm6_state.c b/net/ipv6/xfrm6_state.c index 90fef0a4726..bb09e85a336 100644 --- a/net/ipv6/xfrm6_state.c +++ b/net/ipv6/xfrm6_state.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include #include @@ -189,6 +190,7 @@ static struct xfrm_state_afinfo xfrm6_state_afinfo = { .family = AF_INET6, .proto = IPPROTO_IPV6, .eth_proto = htons(ETH_P_IPV6), + .nf_post_routing = NF_IP6_POST_ROUTING, .owner = THIS_MODULE, .init_tempsel = __xfrm6_init_tempsel, .tmpl_sort = __xfrm6_tmpl_sort, -- cgit v1.2.3 From 716062fd4c2f88a33ab409f62a1e7397ad0a7e33 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Tue, 13 Nov 2007 21:44:23 -0800 Subject: [IPSEC]: Merge most of the input path As part of the work on asynchronous cryptographic operations, we need to be able to resume from the spot where they occur. As such, it helps if we isolate them to one spot. This patch moves most of the remaining family-specific processing into the common input code. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- net/ipv6/xfrm6_input.c | 118 ++++++------------------------------------------- net/ipv6/xfrm6_state.c | 1 + 2 files changed, 14 insertions(+), 105 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/xfrm6_input.c b/net/ipv6/xfrm6_input.c index c458d0a2e68..3b9eedf5b24 100644 --- a/net/ipv6/xfrm6_input.c +++ b/net/ipv6/xfrm6_input.c @@ -23,118 +23,26 @@ int xfrm6_extract_input(struct xfrm_state *x, struct sk_buff *skb) int xfrm6_rcv_spi(struct sk_buff *skb, int nexthdr, __be32 spi) { - int err; - __be32 seq; - struct xfrm_state *xfrm_vec[XFRM_MAX_DEPTH]; - struct xfrm_state *x; - int xfrm_nr = 0; - int decaps = 0; - unsigned int nhoff; - - nhoff = IP6CB(skb)->nhoff; - - seq = 0; - if (!spi && (err = xfrm_parse_spi(skb, nexthdr, &spi, &seq)) != 0) - goto drop; - - do { - struct ipv6hdr *iph = ipv6_hdr(skb); - - if (xfrm_nr == XFRM_MAX_DEPTH) - goto drop; - - x = xfrm_state_lookup((xfrm_address_t *)&iph->daddr, spi, - nexthdr, AF_INET6); - if (x == NULL) - goto drop; - spin_lock(&x->lock); - if (unlikely(x->km.state != XFRM_STATE_VALID)) - goto drop_unlock; - - if (x->props.replay_window && xfrm_replay_check(x, seq)) - goto drop_unlock; - - if (xfrm_state_check_expire(x)) - goto drop_unlock; - - nexthdr = x->type->input(x, skb); - if (nexthdr <= 0) - goto drop_unlock; - - skb_network_header(skb)[nhoff] = nexthdr; - - if (x->props.replay_window) - xfrm_replay_advance(x, seq); - - x->curlft.bytes += skb->len; - x->curlft.packets++; - - spin_unlock(&x->lock); - - xfrm_vec[xfrm_nr++] = x; - - if (x->inner_mode->input(x, skb)) - goto drop; - - if (x->outer_mode->flags & XFRM_MODE_FLAG_TUNNEL) { - decaps = 1; - break; - } - - if ((err = xfrm_parse_spi(skb, nexthdr, &spi, &seq)) < 0) - goto drop; - } while (!err); - - /* Allocate new secpath or COW existing one. */ - if (!skb->sp || atomic_read(&skb->sp->refcnt) != 1) { - struct sec_path *sp; - sp = secpath_dup(skb->sp); - if (!sp) - goto drop; - if (skb->sp) - secpath_put(skb->sp); - skb->sp = sp; - } - - if (xfrm_nr + skb->sp->len > XFRM_MAX_DEPTH) - goto drop; - - memcpy(skb->sp->xvec + skb->sp->len, xfrm_vec, - xfrm_nr * sizeof(xfrm_vec[0])); - skb->sp->len += xfrm_nr; - - nf_reset(skb); + XFRM_SPI_SKB_CB(skb)->nhoff = IP6CB(skb)->nhoff; + XFRM_SPI_SKB_CB(skb)->daddroff = offsetof(struct ipv6hdr, daddr); + return xfrm_input(skb, nexthdr, spi, 0); +} +EXPORT_SYMBOL(xfrm6_rcv_spi); - if (decaps) { - dst_release(skb->dst); - skb->dst = NULL; - netif_rx(skb); - return -1; - } else { +int xfrm6_transport_finish(struct sk_buff *skb, int async) +{ #ifdef CONFIG_NETFILTER - ipv6_hdr(skb)->payload_len = htons(skb->len); - __skb_push(skb, skb->data - skb_network_header(skb)); + ipv6_hdr(skb)->payload_len = htons(skb->len); + __skb_push(skb, skb->data - skb_network_header(skb)); - NF_HOOK(PF_INET6, NF_IP6_PRE_ROUTING, skb, skb->dev, NULL, - ip6_rcv_finish); - return -1; + NF_HOOK(PF_INET6, NF_IP6_PRE_ROUTING, skb, skb->dev, NULL, + ip6_rcv_finish); + return -1; #else - return 1; + return 1; #endif - } - -drop_unlock: - spin_unlock(&x->lock); - xfrm_state_put(x); -drop: - while (--xfrm_nr >= 0) - xfrm_state_put(xfrm_vec[xfrm_nr]); - kfree_skb(skb); - return -1; } -EXPORT_SYMBOL(xfrm6_rcv_spi); - int xfrm6_rcv(struct sk_buff *skb) { return xfrm6_rcv_spi(skb, skb_network_header(skb)[IP6CB(skb)->nhoff], diff --git a/net/ipv6/xfrm6_state.c b/net/ipv6/xfrm6_state.c index bb09e85a336..00360b514e9 100644 --- a/net/ipv6/xfrm6_state.c +++ b/net/ipv6/xfrm6_state.c @@ -198,6 +198,7 @@ static struct xfrm_state_afinfo xfrm6_state_afinfo = { .output = xfrm6_output, .extract_input = xfrm6_extract_input, .extract_output = xfrm6_extract_output, + .transport_finish = xfrm6_transport_finish, }; void __init xfrm6_state_init(void) -- cgit v1.2.3 From 668dc8af3150f837f7f0461001bbbc0ce25d7bdf Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sun, 16 Dec 2007 15:55:02 -0800 Subject: [IPSEC]: Move integrity stat collection into xfrm_input Similar to the moving out of the replay processing on the output, this patch moves the integrity stat collectin from x->type->input into xfrm_input. This would eventually allow transforms such as AH/ESP to be lockless. The error value EBADMSG (currently unused in the crypto layer) is used to indicate a failed integrity check. In future this error can be directly returned by the crypto layer once we switch to aead algorithms. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- net/ipv6/ah6.c | 3 +-- net/ipv6/esp6.c | 3 +-- 2 files changed, 2 insertions(+), 4 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c index 4eaf55072b1..d4b59ecb0b5 100644 --- a/net/ipv6/ah6.c +++ b/net/ipv6/ah6.c @@ -379,10 +379,9 @@ static int ah6_input(struct xfrm_state *x, struct sk_buff *skb) err = ah_mac_digest(ahp, skb, ah->auth_data); if (err) goto free_out; - err = -EINVAL; if (memcmp(ahp->work_icv, auth_data, ahp->icv_trunc_len)) { LIMIT_NETDEBUG(KERN_WARNING "ipsec ah authentication error\n"); - x->stats.integrity_failed++; + err = -EBADMSG; goto free_out; } } diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c index 44405325467..096974ba642 100644 --- a/net/ipv6/esp6.c +++ b/net/ipv6/esp6.c @@ -177,8 +177,7 @@ static int esp6_input(struct xfrm_state *x, struct sk_buff *skb) BUG(); if (unlikely(memcmp(esp->auth.work_icv, sum, alen))) { - x->stats.integrity_failed++; - ret = -EINVAL; + ret = -EBADMSG; goto out; } } -- cgit v1.2.3 From 0ebea8ef3559b545c37b016f44e84c3b33e47c39 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Tue, 13 Nov 2007 21:45:58 -0800 Subject: [IPSEC]: Move state lock into x->type->input This patch releases the lock on the state before calling x->type->input. It also adds the lock to the spots where they're currently needed. Most of those places (all except mip6) are expected to disappear with async crypto. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- net/ipv6/ah6.c | 9 +++++++-- net/ipv6/esp6.c | 37 +++++++++++++++++++++++-------------- net/ipv6/mip6.c | 14 ++++++++++---- 3 files changed, 40 insertions(+), 20 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c index d4b59ecb0b5..1b51d1eedbd 100644 --- a/net/ipv6/ah6.c +++ b/net/ipv6/ah6.c @@ -370,6 +370,7 @@ static int ah6_input(struct xfrm_state *x, struct sk_buff *skb) ip6h->flow_lbl[2] = 0; ip6h->hop_limit = 0; + spin_lock(&x->lock); { u8 auth_data[MAX_AH_AUTH_LEN]; @@ -378,13 +379,17 @@ static int ah6_input(struct xfrm_state *x, struct sk_buff *skb) skb_push(skb, hdr_len); err = ah_mac_digest(ahp, skb, ah->auth_data); if (err) - goto free_out; + goto unlock; if (memcmp(ahp->work_icv, auth_data, ahp->icv_trunc_len)) { LIMIT_NETDEBUG(KERN_WARNING "ipsec ah authentication error\n"); err = -EBADMSG; - goto free_out; } } +unlock: + spin_unlock(&x->lock); + + if (err) + goto free_out; skb->network_header += ah_hlen; memcpy(skb_network_header(skb), tmp_hdr, hdr_len); diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c index 096974ba642..5bd5292ad9f 100644 --- a/net/ipv6/esp6.c +++ b/net/ipv6/esp6.c @@ -165,30 +165,32 @@ static int esp6_input(struct xfrm_state *x, struct sk_buff *skb) goto out; } + if ((nfrags = skb_cow_data(skb, 0, &trailer)) < 0) { + ret = -EINVAL; + goto out; + } + + skb->ip_summed = CHECKSUM_NONE; + + spin_lock(&x->lock); + /* If integrity check is required, do this. */ if (esp->auth.icv_full_len) { u8 sum[alen]; ret = esp_mac_digest(esp, skb, 0, skb->len - alen); if (ret) - goto out; + goto unlock; if (skb_copy_bits(skb, skb->len - alen, sum, alen)) BUG(); if (unlikely(memcmp(esp->auth.work_icv, sum, alen))) { ret = -EBADMSG; - goto out; + goto unlock; } } - if ((nfrags = skb_cow_data(skb, 0, &trailer)) < 0) { - ret = -EINVAL; - goto out; - } - - skb->ip_summed = CHECKSUM_NONE; - esph = (struct ip_esp_hdr *)skb->data; iph = ipv6_hdr(skb); @@ -197,15 +199,13 @@ static int esp6_input(struct xfrm_state *x, struct sk_buff *skb) crypto_blkcipher_set_iv(tfm, esph->enc_data, esp->conf.ivlen); { - u8 nexthdr[2]; struct scatterlist *sg = &esp->sgbuf[0]; - u8 padlen; if (unlikely(nfrags > ESP_NUM_FAST_SG)) { sg = kmalloc(sizeof(struct scatterlist)*nfrags, GFP_ATOMIC); if (!sg) { ret = -ENOMEM; - goto out; + goto unlock; } } sg_init_table(sg, nfrags); @@ -215,8 +215,17 @@ static int esp6_input(struct xfrm_state *x, struct sk_buff *skb) ret = crypto_blkcipher_decrypt(&desc, sg, sg, elen); if (unlikely(sg != &esp->sgbuf[0])) kfree(sg); - if (unlikely(ret)) - goto out; + } + +unlock: + spin_unlock(&x->lock); + + if (unlikely(ret)) + goto out; + + { + u8 nexthdr[2]; + u8 padlen; if (skb_copy_bits(skb, skb->len-alen-2, nexthdr, 2)) BUG(); diff --git a/net/ipv6/mip6.c b/net/ipv6/mip6.c index edfd9cdd721..49d396620ea 100644 --- a/net/ipv6/mip6.c +++ b/net/ipv6/mip6.c @@ -128,12 +128,15 @@ static int mip6_destopt_input(struct xfrm_state *x, struct sk_buff *skb) { struct ipv6hdr *iph = ipv6_hdr(skb); struct ipv6_destopt_hdr *destopt = (struct ipv6_destopt_hdr *)skb->data; + int err = destopt->nexthdr; + spin_lock(&x->lock); if (!ipv6_addr_equal(&iph->saddr, (struct in6_addr *)x->coaddr) && !ipv6_addr_any((struct in6_addr *)x->coaddr)) - return -ENOENT; + err = -ENOENT; + spin_unlock(&x->lock); - return destopt->nexthdr; + return err; } /* Destination Option Header is inserted. @@ -344,12 +347,15 @@ static struct xfrm_type mip6_destopt_type = static int mip6_rthdr_input(struct xfrm_state *x, struct sk_buff *skb) { struct rt2_hdr *rt2 = (struct rt2_hdr *)skb->data; + int err = rt2->rt_hdr.nexthdr; + spin_lock(&x->lock); if (!ipv6_addr_equal(&rt2->addr, (struct in6_addr *)x->coaddr) && !ipv6_addr_any((struct in6_addr *)x->coaddr)) - return -ENOENT; + err = -ENOENT; + spin_unlock(&x->lock); - return rt2->rt_hdr.nexthdr; + return err; } /* Routing Header type 2 is inserted. -- cgit v1.2.3 From d26f398400311982d2433debae85746c348b7d58 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Tue, 13 Nov 2007 21:47:08 -0800 Subject: [IPSEC]: Make x->lastused an unsigned long Currently x->lastused is u64 which means that it cannot be read/written atomically on all architectures. David Miller observed that the value stored in it is only an unsigned long which is always atomic. So based on his suggestion this patch changes the internal representation from u64 to unsigned long while the user-interface still refers to it as u64. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- net/ipv6/xfrm6_mode_ro.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/xfrm6_mode_ro.c b/net/ipv6/xfrm6_mode_ro.c index 4a01cb3c370..63d5d493098 100644 --- a/net/ipv6/xfrm6_mode_ro.c +++ b/net/ipv6/xfrm6_mode_ro.c @@ -54,9 +54,7 @@ static int xfrm6_ro_output(struct xfrm_state *x, struct sk_buff *skb) __skb_pull(skb, hdr_len); memmove(ipv6_hdr(skb), iph, hdr_len); - spin_lock_bh(&x->lock); x->lastused = get_seconds(); - spin_unlock_bh(&x->lock); return 0; } -- cgit v1.2.3 From 60d5fcfb19d8a958fc563e52240cd05ec23f36c9 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Mon, 19 Nov 2007 18:47:58 -0800 Subject: [IPSEC]: Remove nhoff from xfrm_input The nhoff field isn't actually necessary in xfrm_input. For tunnel mode transforms we now throw away the output IP header so it makes no sense to fill in the nexthdr field. For transport mode we can now let the function transport_finish do the setting and it knows where the nexthdr field is. The only other thing that needs the nexthdr field to be set is the header extraction code. However, we can simply move the protocol extraction out of the generic header extraction. We want to minimise the amount of info we have to carry around between transforms as this simplifies the resumption process for async crypto. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- net/ipv6/xfrm6_input.c | 4 +++- net/ipv6/xfrm6_output.c | 3 ++- net/ipv6/xfrm6_state.c | 2 -- 3 files changed, 5 insertions(+), 4 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/xfrm6_input.c b/net/ipv6/xfrm6_input.c index 3b9eedf5b24..5c006c84594 100644 --- a/net/ipv6/xfrm6_input.c +++ b/net/ipv6/xfrm6_input.c @@ -23,7 +23,6 @@ int xfrm6_extract_input(struct xfrm_state *x, struct sk_buff *skb) int xfrm6_rcv_spi(struct sk_buff *skb, int nexthdr, __be32 spi) { - XFRM_SPI_SKB_CB(skb)->nhoff = IP6CB(skb)->nhoff; XFRM_SPI_SKB_CB(skb)->daddroff = offsetof(struct ipv6hdr, daddr); return xfrm_input(skb, nexthdr, spi, 0); } @@ -31,6 +30,9 @@ EXPORT_SYMBOL(xfrm6_rcv_spi); int xfrm6_transport_finish(struct sk_buff *skb, int async) { + skb_network_header(skb)[IP6CB(skb)->nhoff] = + XFRM_MODE_SKB_CB(skb)->protocol; + #ifdef CONFIG_NETFILTER ipv6_hdr(skb)->payload_len = htons(skb->len); __skb_push(skb, skb->data - skb_network_header(skb)); diff --git a/net/ipv6/xfrm6_output.c b/net/ipv6/xfrm6_output.c index a0a924991c4..318669a9cb4 100644 --- a/net/ipv6/xfrm6_output.c +++ b/net/ipv6/xfrm6_output.c @@ -53,7 +53,8 @@ int xfrm6_extract_output(struct xfrm_state *x, struct sk_buff *skb) if (err) return err; - IP6CB(skb)->nhoff = offsetof(struct ipv6hdr, nexthdr); + XFRM_MODE_SKB_CB(skb)->protocol = ipv6_hdr(skb)->nexthdr; + return xfrm6_extract_header(skb); } diff --git a/net/ipv6/xfrm6_state.c b/net/ipv6/xfrm6_state.c index 00360b514e9..df7e98d914f 100644 --- a/net/ipv6/xfrm6_state.c +++ b/net/ipv6/xfrm6_state.c @@ -178,8 +178,6 @@ int xfrm6_extract_header(struct sk_buff *skb) XFRM_MODE_SKB_CB(skb)->frag_off = htons(IP_DF); XFRM_MODE_SKB_CB(skb)->tos = ipv6_get_dsfield(iph); XFRM_MODE_SKB_CB(skb)->ttl = iph->hop_limit; - XFRM_MODE_SKB_CB(skb)->protocol = - skb_network_header(skb)[IP6CB(skb)->nhoff]; memcpy(XFRM_MODE_SKB_CB(skb)->flow_lbl, iph->flow_lbl, sizeof(XFRM_MODE_SKB_CB(skb)->flow_lbl)); -- cgit v1.2.3 From 1bf06cd2e338fd6fc29169d30eaf0df982338285 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Mon, 19 Nov 2007 18:50:17 -0800 Subject: [IPSEC]: Add async resume support on input This patch adds support for async resumptions on input. To do so, the transform would return -EINPROGRESS and subsequently invoke the function xfrm_input_resume to resume processing. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- net/ipv6/xfrm6_input.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'net/ipv6') diff --git a/net/ipv6/xfrm6_input.c b/net/ipv6/xfrm6_input.c index 5c006c84594..e317d085546 100644 --- a/net/ipv6/xfrm6_input.c +++ b/net/ipv6/xfrm6_input.c @@ -41,6 +41,9 @@ int xfrm6_transport_finish(struct sk_buff *skb, int async) ip6_rcv_finish); return -1; #else + if (async) + return ip6_rcv_finish(skb); + return 1; #endif } -- cgit v1.2.3 From 6e23ae2a48750bda407a4a58f52a4865d7308bf5 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Mon, 19 Nov 2007 18:53:30 -0800 Subject: [NETFILTER]: Introduce NF_INET_ hook values The IPv4 and IPv6 hook values are identical, yet some code tries to figure out the "correct" value by looking at the address family. Introduce NF_INET_* values for both IPv4 and IPv6. The old values are kept in a #ifndef __KERNEL__ section for userspace compatibility. Signed-off-by: Patrick McHardy Acked-by: Herbert Xu Signed-off-by: David S. Miller --- net/ipv6/ip6_input.c | 6 ++-- net/ipv6/ip6_output.c | 14 +++++---- net/ipv6/mcast.c | 6 ++-- net/ipv6/ndisc.c | 6 ++-- net/ipv6/netfilter.c | 6 ++-- net/ipv6/netfilter/ip6_tables.c | 26 ++++++++--------- net/ipv6/netfilter/ip6t_REJECT.c | 6 ++-- net/ipv6/netfilter/ip6t_eui64.c | 4 +-- net/ipv6/netfilter/ip6t_owner.c | 3 +- net/ipv6/netfilter/ip6table_filter.c | 22 +++++++------- net/ipv6/netfilter/ip6table_mangle.c | 40 +++++++++++++------------- net/ipv6/netfilter/ip6table_raw.c | 14 ++++----- net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c | 12 ++++---- net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c | 2 +- net/ipv6/raw.c | 2 +- net/ipv6/xfrm6_input.c | 2 +- net/ipv6/xfrm6_output.c | 2 +- net/ipv6/xfrm6_state.c | 2 +- 18 files changed, 92 insertions(+), 83 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c index fac6f7f9dd7..79610b4bad3 100644 --- a/net/ipv6/ip6_input.c +++ b/net/ipv6/ip6_input.c @@ -134,7 +134,8 @@ int ipv6_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt rcu_read_unlock(); - return NF_HOOK(PF_INET6,NF_IP6_PRE_ROUTING, skb, dev, NULL, ip6_rcv_finish); + return NF_HOOK(PF_INET6, NF_INET_PRE_ROUTING, skb, dev, NULL, + ip6_rcv_finish); err: IP6_INC_STATS_BH(idev, IPSTATS_MIB_INHDRERRORS); drop: @@ -229,7 +230,8 @@ discard: int ip6_input(struct sk_buff *skb) { - return NF_HOOK(PF_INET6,NF_IP6_LOCAL_IN, skb, skb->dev, NULL, ip6_input_finish); + return NF_HOOK(PF_INET6, NF_INET_LOCAL_IN, skb, skb->dev, NULL, + ip6_input_finish); } int ip6_mc_input(struct sk_buff *skb) diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index bd121f9ae0a..d54da616e3a 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -79,7 +79,7 @@ int __ip6_local_out(struct sk_buff *skb) len = 0; ipv6_hdr(skb)->payload_len = htons(len); - return nf_hook(PF_INET6, NF_IP6_LOCAL_OUT, skb, NULL, skb->dst->dev, + return nf_hook(PF_INET6, NF_INET_LOCAL_OUT, skb, NULL, skb->dst->dev, dst_output); } @@ -145,8 +145,8 @@ static int ip6_output2(struct sk_buff *skb) is not supported in any case. */ if (newskb) - NF_HOOK(PF_INET6, NF_IP6_POST_ROUTING, newskb, NULL, - newskb->dev, + NF_HOOK(PF_INET6, NF_INET_POST_ROUTING, newskb, + NULL, newskb->dev, ip6_dev_loopback_xmit); if (ipv6_hdr(skb)->hop_limit == 0) { @@ -159,7 +159,8 @@ static int ip6_output2(struct sk_buff *skb) IP6_INC_STATS(idev, IPSTATS_MIB_OUTMCASTPKTS); } - return NF_HOOK(PF_INET6, NF_IP6_POST_ROUTING, skb,NULL, skb->dev,ip6_output_finish); + return NF_HOOK(PF_INET6, NF_INET_POST_ROUTING, skb, NULL, skb->dev, + ip6_output_finish); } static inline int ip6_skb_dst_mtu(struct sk_buff *skb) @@ -261,7 +262,7 @@ int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl, if ((skb->len <= mtu) || ipfragok || skb_is_gso(skb)) { IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_OUTREQUESTS); - return NF_HOOK(PF_INET6, NF_IP6_LOCAL_OUT, skb, NULL, dst->dev, + return NF_HOOK(PF_INET6, NF_INET_LOCAL_OUT, skb, NULL, dst->dev, dst_output); } @@ -525,7 +526,8 @@ int ip6_forward(struct sk_buff *skb) hdr->hop_limit--; IP6_INC_STATS_BH(ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS); - return NF_HOOK(PF_INET6,NF_IP6_FORWARD, skb, skb->dev, dst->dev, ip6_forward_finish); + return NF_HOOK(PF_INET6, NF_INET_FORWARD, skb, skb->dev, dst->dev, + ip6_forward_finish); error: IP6_INC_STATS_BH(ip6_dst_idev(dst), IPSTATS_MIB_INADDRERRORS); diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index 17d7318ff7b..82b12940c2a 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c @@ -1448,7 +1448,7 @@ static inline int mld_dev_queue_xmit2(struct sk_buff *skb) static inline int mld_dev_queue_xmit(struct sk_buff *skb) { - return NF_HOOK(PF_INET6, NF_IP6_POST_ROUTING, skb, NULL, skb->dev, + return NF_HOOK(PF_INET6, NF_INET_POST_ROUTING, skb, NULL, skb->dev, mld_dev_queue_xmit2); } @@ -1469,7 +1469,7 @@ static void mld_sendpack(struct sk_buff *skb) pmr->csum = csum_ipv6_magic(&pip6->saddr, &pip6->daddr, mldlen, IPPROTO_ICMPV6, csum_partial(skb_transport_header(skb), mldlen, 0)); - err = NF_HOOK(PF_INET6, NF_IP6_LOCAL_OUT, skb, NULL, skb->dev, + err = NF_HOOK(PF_INET6, NF_INET_LOCAL_OUT, skb, NULL, skb->dev, mld_dev_queue_xmit); if (!err) { ICMP6MSGOUT_INC_STATS_BH(idev, ICMPV6_MLD2_REPORT); @@ -1813,7 +1813,7 @@ static void igmp6_send(struct in6_addr *addr, struct net_device *dev, int type) idev = in6_dev_get(skb->dev); - err = NF_HOOK(PF_INET6, NF_IP6_LOCAL_OUT, skb, NULL, skb->dev, + err = NF_HOOK(PF_INET6, NF_INET_LOCAL_OUT, skb, NULL, skb->dev, mld_dev_queue_xmit); if (!err) { ICMP6MSGOUT_INC_STATS(idev, type); diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index 85947eae5bf..b2531f80317 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -533,7 +533,8 @@ static void __ndisc_send(struct net_device *dev, idev = in6_dev_get(dst->dev); IP6_INC_STATS(idev, IPSTATS_MIB_OUTREQUESTS); - err = NF_HOOK(PF_INET6, NF_IP6_LOCAL_OUT, skb, NULL, dst->dev, dst_output); + err = NF_HOOK(PF_INET6, NF_INET_LOCAL_OUT, skb, NULL, dst->dev, + dst_output); if (!err) { ICMP6MSGOUT_INC_STATS(idev, type); ICMP6_INC_STATS(idev, ICMP6_MIB_OUTMSGS); @@ -1538,7 +1539,8 @@ void ndisc_send_redirect(struct sk_buff *skb, struct neighbour *neigh, buff->dst = dst; idev = in6_dev_get(dst->dev); IP6_INC_STATS(idev, IPSTATS_MIB_OUTREQUESTS); - err = NF_HOOK(PF_INET6, NF_IP6_LOCAL_OUT, buff, NULL, dst->dev, dst_output); + err = NF_HOOK(PF_INET6, NF_INET_LOCAL_OUT, buff, NULL, dst->dev, + dst_output); if (!err) { ICMP6MSGOUT_INC_STATS(idev, NDISC_REDIRECT); ICMP6_INC_STATS(idev, ICMP6_MIB_OUTMSGS); diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c index b1326c2bf8a..175e19f8025 100644 --- a/net/ipv6/netfilter.c +++ b/net/ipv6/netfilter.c @@ -60,7 +60,7 @@ static void nf_ip6_saveroute(const struct sk_buff *skb, struct nf_info *info) { struct ip6_rt_info *rt_info = nf_info_reroute(info); - if (info->hook == NF_IP6_LOCAL_OUT) { + if (info->hook == NF_INET_LOCAL_OUT) { struct ipv6hdr *iph = ipv6_hdr(skb); rt_info->daddr = iph->daddr; @@ -72,7 +72,7 @@ static int nf_ip6_reroute(struct sk_buff *skb, const struct nf_info *info) { struct ip6_rt_info *rt_info = nf_info_reroute(info); - if (info->hook == NF_IP6_LOCAL_OUT) { + if (info->hook == NF_INET_LOCAL_OUT) { struct ipv6hdr *iph = ipv6_hdr(skb); if (!ipv6_addr_equal(&iph->daddr, &rt_info->daddr) || !ipv6_addr_equal(&iph->saddr, &rt_info->saddr)) @@ -89,7 +89,7 @@ __sum16 nf_ip6_checksum(struct sk_buff *skb, unsigned int hook, switch (skb->ip_summed) { case CHECKSUM_COMPLETE: - if (hook != NF_IP6_PRE_ROUTING && hook != NF_IP6_LOCAL_IN) + if (hook != NF_INET_PRE_ROUTING && hook != NF_INET_LOCAL_IN) break; if (!csum_ipv6_magic(&ip6h->saddr, &ip6h->daddr, skb->len - dataoff, protocol, diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index acaba153793..e1e87eff468 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -258,11 +258,11 @@ unconditional(const struct ip6t_ip6 *ipv6) defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE) /* This cries for unification! */ static const char *hooknames[] = { - [NF_IP6_PRE_ROUTING] = "PREROUTING", - [NF_IP6_LOCAL_IN] = "INPUT", - [NF_IP6_FORWARD] = "FORWARD", - [NF_IP6_LOCAL_OUT] = "OUTPUT", - [NF_IP6_POST_ROUTING] = "POSTROUTING", + [NF_INET_PRE_ROUTING] = "PREROUTING", + [NF_INET_LOCAL_IN] = "INPUT", + [NF_INET_FORWARD] = "FORWARD", + [NF_INET_LOCAL_OUT] = "OUTPUT", + [NF_INET_POST_ROUTING] = "POSTROUTING", }; enum nf_ip_trace_comments { @@ -502,7 +502,7 @@ mark_source_chains(struct xt_table_info *newinfo, /* No recursion; use packet counter to save back ptrs (reset to 0 as we leave), and comefrom to save source hook bitmask */ - for (hook = 0; hook < NF_IP6_NUMHOOKS; hook++) { + for (hook = 0; hook < NF_INET_NUMHOOKS; hook++) { unsigned int pos = newinfo->hook_entry[hook]; struct ip6t_entry *e = (struct ip6t_entry *)(entry0 + pos); @@ -518,13 +518,13 @@ mark_source_chains(struct xt_table_info *newinfo, struct ip6t_standard_target *t = (void *)ip6t_get_target(e); - if (e->comefrom & (1 << NF_IP6_NUMHOOKS)) { + if (e->comefrom & (1 << NF_INET_NUMHOOKS)) { printk("iptables: loop hook %u pos %u %08X.\n", hook, pos, e->comefrom); return 0; } e->comefrom - |= ((1 << hook) | (1 << NF_IP6_NUMHOOKS)); + |= ((1 << hook) | (1 << NF_INET_NUMHOOKS)); /* Unconditional return/END. */ if ((e->target_offset == sizeof(struct ip6t_entry) @@ -544,10 +544,10 @@ mark_source_chains(struct xt_table_info *newinfo, /* Return: backtrack through the last big jump. */ do { - e->comefrom ^= (1<comefrom ^= (1<comefrom - & (1 << NF_IP6_NUMHOOKS)) { + & (1 << NF_INET_NUMHOOKS)) { duprintf("Back unset " "on hook %u " "rule %u\n", @@ -746,7 +746,7 @@ check_entry_size_and_hooks(struct ip6t_entry *e, } /* Check hooks & underflows */ - for (h = 0; h < NF_IP6_NUMHOOKS; h++) { + for (h = 0; h < NF_INET_NUMHOOKS; h++) { if ((unsigned char *)e - base == hook_entries[h]) newinfo->hook_entry[h] = hook_entries[h]; if ((unsigned char *)e - base == underflows[h]) @@ -800,7 +800,7 @@ translate_table(const char *name, newinfo->number = number; /* Init all hooks to impossible value. */ - for (i = 0; i < NF_IP6_NUMHOOKS; i++) { + for (i = 0; i < NF_INET_NUMHOOKS; i++) { newinfo->hook_entry[i] = 0xFFFFFFFF; newinfo->underflow[i] = 0xFFFFFFFF; } @@ -824,7 +824,7 @@ translate_table(const char *name, } /* Check hooks all assigned */ - for (i = 0; i < NF_IP6_NUMHOOKS; i++) { + for (i = 0; i < NF_INET_NUMHOOKS; i++) { /* Only hooks which are valid */ if (!(valid_hooks & (1 << i))) continue; diff --git a/net/ipv6/netfilter/ip6t_REJECT.c b/net/ipv6/netfilter/ip6t_REJECT.c index c1c66348283..960ba1780a9 100644 --- a/net/ipv6/netfilter/ip6t_REJECT.c +++ b/net/ipv6/netfilter/ip6t_REJECT.c @@ -164,7 +164,7 @@ static void send_reset(struct sk_buff *oldskb) static inline void send_unreach(struct sk_buff *skb_in, unsigned char code, unsigned int hooknum) { - if (hooknum == NF_IP6_LOCAL_OUT && skb_in->dev == NULL) + if (hooknum == NF_INET_LOCAL_OUT && skb_in->dev == NULL) skb_in->dev = init_net.loopback_dev; icmpv6_send(skb_in, ICMPV6_DEST_UNREACH, code, 0, NULL); @@ -243,8 +243,8 @@ static struct xt_target ip6t_reject_reg __read_mostly = { .target = reject6_target, .targetsize = sizeof(struct ip6t_reject_info), .table = "filter", - .hooks = (1 << NF_IP6_LOCAL_IN) | (1 << NF_IP6_FORWARD) | - (1 << NF_IP6_LOCAL_OUT), + .hooks = (1 << NF_INET_LOCAL_IN) | (1 << NF_INET_FORWARD) | + (1 << NF_INET_LOCAL_OUT), .checkentry = check, .me = THIS_MODULE }; diff --git a/net/ipv6/netfilter/ip6t_eui64.c b/net/ipv6/netfilter/ip6t_eui64.c index 41df9a578c7..ff71269579d 100644 --- a/net/ipv6/netfilter/ip6t_eui64.c +++ b/net/ipv6/netfilter/ip6t_eui64.c @@ -67,8 +67,8 @@ static struct xt_match eui64_match __read_mostly = { .family = AF_INET6, .match = match, .matchsize = sizeof(int), - .hooks = (1 << NF_IP6_PRE_ROUTING) | (1 << NF_IP6_LOCAL_IN) | - (1 << NF_IP6_FORWARD), + .hooks = (1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_LOCAL_IN) | + (1 << NF_INET_FORWARD), .me = THIS_MODULE, }; diff --git a/net/ipv6/netfilter/ip6t_owner.c b/net/ipv6/netfilter/ip6t_owner.c index 6036613aef3..1e0dc4a972c 100644 --- a/net/ipv6/netfilter/ip6t_owner.c +++ b/net/ipv6/netfilter/ip6t_owner.c @@ -73,7 +73,8 @@ static struct xt_match owner_match __read_mostly = { .family = AF_INET6, .match = match, .matchsize = sizeof(struct ip6t_owner_info), - .hooks = (1 << NF_IP6_LOCAL_OUT) | (1 << NF_IP6_POST_ROUTING), + .hooks = (1 << NF_INET_LOCAL_OUT) | + (1 << NF_INET_POST_ROUTING), .checkentry = checkentry, .me = THIS_MODULE, }; diff --git a/net/ipv6/netfilter/ip6table_filter.c b/net/ipv6/netfilter/ip6table_filter.c index 1d26b202bf3..0ae072dd692 100644 --- a/net/ipv6/netfilter/ip6table_filter.c +++ b/net/ipv6/netfilter/ip6table_filter.c @@ -17,7 +17,9 @@ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Netfilter Core Team "); MODULE_DESCRIPTION("ip6tables filter table"); -#define FILTER_VALID_HOOKS ((1 << NF_IP6_LOCAL_IN) | (1 << NF_IP6_FORWARD) | (1 << NF_IP6_LOCAL_OUT)) +#define FILTER_VALID_HOOKS ((1 << NF_INET_LOCAL_IN) | \ + (1 << NF_INET_FORWARD) | \ + (1 << NF_INET_LOCAL_OUT)) static struct { @@ -31,14 +33,14 @@ static struct .num_entries = 4, .size = sizeof(struct ip6t_standard) * 3 + sizeof(struct ip6t_error), .hook_entry = { - [NF_IP6_LOCAL_IN] = 0, - [NF_IP6_FORWARD] = sizeof(struct ip6t_standard), - [NF_IP6_LOCAL_OUT] = sizeof(struct ip6t_standard) * 2 + [NF_INET_LOCAL_IN] = 0, + [NF_INET_FORWARD] = sizeof(struct ip6t_standard), + [NF_INET_LOCAL_OUT] = sizeof(struct ip6t_standard) * 2 }, .underflow = { - [NF_IP6_LOCAL_IN] = 0, - [NF_IP6_FORWARD] = sizeof(struct ip6t_standard), - [NF_IP6_LOCAL_OUT] = sizeof(struct ip6t_standard) * 2 + [NF_INET_LOCAL_IN] = 0, + [NF_INET_FORWARD] = sizeof(struct ip6t_standard), + [NF_INET_LOCAL_OUT] = sizeof(struct ip6t_standard) * 2 }, }, .entries = { @@ -93,21 +95,21 @@ static struct nf_hook_ops ip6t_ops[] = { .hook = ip6t_hook, .owner = THIS_MODULE, .pf = PF_INET6, - .hooknum = NF_IP6_LOCAL_IN, + .hooknum = NF_INET_LOCAL_IN, .priority = NF_IP6_PRI_FILTER, }, { .hook = ip6t_hook, .owner = THIS_MODULE, .pf = PF_INET6, - .hooknum = NF_IP6_FORWARD, + .hooknum = NF_INET_FORWARD, .priority = NF_IP6_PRI_FILTER, }, { .hook = ip6t_local_out_hook, .owner = THIS_MODULE, .pf = PF_INET6, - .hooknum = NF_IP6_LOCAL_OUT, + .hooknum = NF_INET_LOCAL_OUT, .priority = NF_IP6_PRI_FILTER, }, }; diff --git a/net/ipv6/netfilter/ip6table_mangle.c b/net/ipv6/netfilter/ip6table_mangle.c index a0b6381f1e8..8e62b231682 100644 --- a/net/ipv6/netfilter/ip6table_mangle.c +++ b/net/ipv6/netfilter/ip6table_mangle.c @@ -15,11 +15,11 @@ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Netfilter Core Team "); MODULE_DESCRIPTION("ip6tables mangle table"); -#define MANGLE_VALID_HOOKS ((1 << NF_IP6_PRE_ROUTING) | \ - (1 << NF_IP6_LOCAL_IN) | \ - (1 << NF_IP6_FORWARD) | \ - (1 << NF_IP6_LOCAL_OUT) | \ - (1 << NF_IP6_POST_ROUTING)) +#define MANGLE_VALID_HOOKS ((1 << NF_INET_PRE_ROUTING) | \ + (1 << NF_INET_LOCAL_IN) | \ + (1 << NF_INET_FORWARD) | \ + (1 << NF_INET_LOCAL_OUT) | \ + (1 << NF_INET_POST_ROUTING)) static struct { @@ -33,18 +33,18 @@ static struct .num_entries = 6, .size = sizeof(struct ip6t_standard) * 5 + sizeof(struct ip6t_error), .hook_entry = { - [NF_IP6_PRE_ROUTING] = 0, - [NF_IP6_LOCAL_IN] = sizeof(struct ip6t_standard), - [NF_IP6_FORWARD] = sizeof(struct ip6t_standard) * 2, - [NF_IP6_LOCAL_OUT] = sizeof(struct ip6t_standard) * 3, - [NF_IP6_POST_ROUTING] = sizeof(struct ip6t_standard) * 4, + [NF_INET_PRE_ROUTING] = 0, + [NF_INET_LOCAL_IN] = sizeof(struct ip6t_standard), + [NF_INET_FORWARD] = sizeof(struct ip6t_standard) * 2, + [NF_INET_LOCAL_OUT] = sizeof(struct ip6t_standard) * 3, + [NF_INET_POST_ROUTING] = sizeof(struct ip6t_standard) * 4, }, .underflow = { - [NF_IP6_PRE_ROUTING] = 0, - [NF_IP6_LOCAL_IN] = sizeof(struct ip6t_standard), - [NF_IP6_FORWARD] = sizeof(struct ip6t_standard) * 2, - [NF_IP6_LOCAL_OUT] = sizeof(struct ip6t_standard) * 3, - [NF_IP6_POST_ROUTING] = sizeof(struct ip6t_standard) * 4, + [NF_INET_PRE_ROUTING] = 0, + [NF_INET_LOCAL_IN] = sizeof(struct ip6t_standard), + [NF_INET_FORWARD] = sizeof(struct ip6t_standard) * 2, + [NF_INET_LOCAL_OUT] = sizeof(struct ip6t_standard) * 3, + [NF_INET_POST_ROUTING] = sizeof(struct ip6t_standard) * 4, }, }, .entries = { @@ -125,35 +125,35 @@ static struct nf_hook_ops ip6t_ops[] = { .hook = ip6t_route_hook, .owner = THIS_MODULE, .pf = PF_INET6, - .hooknum = NF_IP6_PRE_ROUTING, + .hooknum = NF_INET_PRE_ROUTING, .priority = NF_IP6_PRI_MANGLE, }, { .hook = ip6t_local_hook, .owner = THIS_MODULE, .pf = PF_INET6, - .hooknum = NF_IP6_LOCAL_IN, + .hooknum = NF_INET_LOCAL_IN, .priority = NF_IP6_PRI_MANGLE, }, { .hook = ip6t_route_hook, .owner = THIS_MODULE, .pf = PF_INET6, - .hooknum = NF_IP6_FORWARD, + .hooknum = NF_INET_FORWARD, .priority = NF_IP6_PRI_MANGLE, }, { .hook = ip6t_local_hook, .owner = THIS_MODULE, .pf = PF_INET6, - .hooknum = NF_IP6_LOCAL_OUT, + .hooknum = NF_INET_LOCAL_OUT, .priority = NF_IP6_PRI_MANGLE, }, { .hook = ip6t_route_hook, .owner = THIS_MODULE, .pf = PF_INET6, - .hooknum = NF_IP6_POST_ROUTING, + .hooknum = NF_INET_POST_ROUTING, .priority = NF_IP6_PRI_MANGLE, }, }; diff --git a/net/ipv6/netfilter/ip6table_raw.c b/net/ipv6/netfilter/ip6table_raw.c index 8f7109f991e..4fecd8de8cc 100644 --- a/net/ipv6/netfilter/ip6table_raw.c +++ b/net/ipv6/netfilter/ip6table_raw.c @@ -6,7 +6,7 @@ #include #include -#define RAW_VALID_HOOKS ((1 << NF_IP6_PRE_ROUTING) | (1 << NF_IP6_LOCAL_OUT)) +#define RAW_VALID_HOOKS ((1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_LOCAL_OUT)) static struct { @@ -20,12 +20,12 @@ static struct .num_entries = 3, .size = sizeof(struct ip6t_standard) * 2 + sizeof(struct ip6t_error), .hook_entry = { - [NF_IP6_PRE_ROUTING] = 0, - [NF_IP6_LOCAL_OUT] = sizeof(struct ip6t_standard) + [NF_INET_PRE_ROUTING] = 0, + [NF_INET_LOCAL_OUT] = sizeof(struct ip6t_standard) }, .underflow = { - [NF_IP6_PRE_ROUTING] = 0, - [NF_IP6_LOCAL_OUT] = sizeof(struct ip6t_standard) + [NF_INET_PRE_ROUTING] = 0, + [NF_INET_LOCAL_OUT] = sizeof(struct ip6t_standard) }, }, .entries = { @@ -58,14 +58,14 @@ static struct nf_hook_ops ip6t_ops[] = { { .hook = ip6t_hook, .pf = PF_INET6, - .hooknum = NF_IP6_PRE_ROUTING, + .hooknum = NF_INET_PRE_ROUTING, .priority = NF_IP6_PRI_FIRST, .owner = THIS_MODULE, }, { .hook = ip6t_hook, .pf = PF_INET6, - .hooknum = NF_IP6_LOCAL_OUT, + .hooknum = NF_INET_LOCAL_OUT, .priority = NF_IP6_PRI_FIRST, .owner = THIS_MODULE, }, diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c index ad74bab0504..50f46787fda 100644 --- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c +++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c @@ -263,42 +263,42 @@ static struct nf_hook_ops ipv6_conntrack_ops[] = { .hook = ipv6_defrag, .owner = THIS_MODULE, .pf = PF_INET6, - .hooknum = NF_IP6_PRE_ROUTING, + .hooknum = NF_INET_PRE_ROUTING, .priority = NF_IP6_PRI_CONNTRACK_DEFRAG, }, { .hook = ipv6_conntrack_in, .owner = THIS_MODULE, .pf = PF_INET6, - .hooknum = NF_IP6_PRE_ROUTING, + .hooknum = NF_INET_PRE_ROUTING, .priority = NF_IP6_PRI_CONNTRACK, }, { .hook = ipv6_conntrack_local, .owner = THIS_MODULE, .pf = PF_INET6, - .hooknum = NF_IP6_LOCAL_OUT, + .hooknum = NF_INET_LOCAL_OUT, .priority = NF_IP6_PRI_CONNTRACK, }, { .hook = ipv6_defrag, .owner = THIS_MODULE, .pf = PF_INET6, - .hooknum = NF_IP6_LOCAL_OUT, + .hooknum = NF_INET_LOCAL_OUT, .priority = NF_IP6_PRI_CONNTRACK_DEFRAG, }, { .hook = ipv6_confirm, .owner = THIS_MODULE, .pf = PF_INET6, - .hooknum = NF_IP6_POST_ROUTING, + .hooknum = NF_INET_POST_ROUTING, .priority = NF_IP6_PRI_LAST, }, { .hook = ipv6_confirm, .owner = THIS_MODULE, .pf = PF_INET6, - .hooknum = NF_IP6_LOCAL_IN, + .hooknum = NF_INET_LOCAL_IN, .priority = NF_IP6_PRI_LAST-1, }, }; diff --git a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c index fd9123f3dc0..e99384f9764 100644 --- a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c +++ b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c @@ -192,7 +192,7 @@ icmpv6_error(struct sk_buff *skb, unsigned int dataoff, return -NF_ACCEPT; } - if (nf_conntrack_checksum && hooknum == NF_IP6_PRE_ROUTING && + if (nf_conntrack_checksum && hooknum == NF_INET_PRE_ROUTING && nf_ip6_checksum(skb, hooknum, dataoff, IPPROTO_ICMPV6)) { nf_log_packet(PF_INET6, 0, skb, NULL, NULL, NULL, "nf_ct_icmpv6: ICMPv6 checksum failed\n"); diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index ae314f3fea4..ad622cc11bd 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -619,7 +619,7 @@ static int rawv6_send_hdrinc(struct sock *sk, void *from, int length, goto error_fault; IP6_INC_STATS(rt->rt6i_idev, IPSTATS_MIB_OUTREQUESTS); - err = NF_HOOK(PF_INET6, NF_IP6_LOCAL_OUT, skb, NULL, rt->u.dst.dev, + err = NF_HOOK(PF_INET6, NF_INET_LOCAL_OUT, skb, NULL, rt->u.dst.dev, dst_output); if (err > 0) err = np->recverr ? net_xmit_errno(err) : 0; diff --git a/net/ipv6/xfrm6_input.c b/net/ipv6/xfrm6_input.c index e317d085546..e2c3efd2579 100644 --- a/net/ipv6/xfrm6_input.c +++ b/net/ipv6/xfrm6_input.c @@ -37,7 +37,7 @@ int xfrm6_transport_finish(struct sk_buff *skb, int async) ipv6_hdr(skb)->payload_len = htons(skb->len); __skb_push(skb, skb->data - skb_network_header(skb)); - NF_HOOK(PF_INET6, NF_IP6_PRE_ROUTING, skb, skb->dev, NULL, + NF_HOOK(PF_INET6, NF_INET_PRE_ROUTING, skb, skb->dev, NULL, ip6_rcv_finish); return -1; #else diff --git a/net/ipv6/xfrm6_output.c b/net/ipv6/xfrm6_output.c index 318669a9cb4..b34c58c6565 100644 --- a/net/ipv6/xfrm6_output.c +++ b/net/ipv6/xfrm6_output.c @@ -89,6 +89,6 @@ static int xfrm6_output_finish(struct sk_buff *skb) int xfrm6_output(struct sk_buff *skb) { - return NF_HOOK(PF_INET6, NF_IP6_POST_ROUTING, skb, NULL, skb->dst->dev, + return NF_HOOK(PF_INET6, NF_INET_POST_ROUTING, skb, NULL, skb->dst->dev, xfrm6_output_finish); } diff --git a/net/ipv6/xfrm6_state.c b/net/ipv6/xfrm6_state.c index df7e98d914f..29e0d25b9e1 100644 --- a/net/ipv6/xfrm6_state.c +++ b/net/ipv6/xfrm6_state.c @@ -188,7 +188,7 @@ static struct xfrm_state_afinfo xfrm6_state_afinfo = { .family = AF_INET6, .proto = IPPROTO_IPV6, .eth_proto = htons(ETH_P_IPV6), - .nf_post_routing = NF_IP6_POST_ROUTING, + .nf_post_routing = NF_INET_POST_ROUTING, .owner = THIS_MODULE, .init_tempsel = __xfrm6_init_tempsel, .tmpl_sort = __xfrm6_tmpl_sort, -- cgit v1.2.3 From 294b4baf292197e13d1df1d253efa7ac84ffee3f Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Wed, 14 Nov 2007 01:57:47 -0800 Subject: [IPSEC]: Kill afinfo->nf_post_routing After changeset: [NETFILTER]: Introduce NF_INET_ hook values It always evaluates to NF_INET_POST_ROUTING. Signed-off-by: David S. Miller --- net/ipv6/xfrm6_state.c | 1 - 1 file changed, 1 deletion(-) (limited to 'net/ipv6') diff --git a/net/ipv6/xfrm6_state.c b/net/ipv6/xfrm6_state.c index 29e0d25b9e1..a7a7e8fd6a3 100644 --- a/net/ipv6/xfrm6_state.c +++ b/net/ipv6/xfrm6_state.c @@ -188,7 +188,6 @@ static struct xfrm_state_afinfo xfrm6_state_afinfo = { .family = AF_INET6, .proto = IPPROTO_IPV6, .eth_proto = htons(ETH_P_IPV6), - .nf_post_routing = NF_INET_POST_ROUTING, .owner = THIS_MODULE, .init_tempsel = __xfrm6_init_tempsel, .tmpl_sort = __xfrm6_tmpl_sort, -- cgit v1.2.3 From c1ee656ccb3b03304d38f852debccdd1567702e6 Mon Sep 17 00:00:00 2001 From: YOSHIFUJI Hideaki Date: Wed, 14 Nov 2007 15:55:29 +0900 Subject: [IPV6] ADDRCONF: Rename ipv6_saddr_label() to ipv6_addr_label(). This patch renames ipv6_saddr_label() to ipv6_addr_label() because address label is used for both of source address and destination address. Signed-off-by: YOSHIFUJI Hideaki Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index c0720e4659b..733374b68d2 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -875,7 +875,7 @@ static inline int ipv6_saddr_preferred(int type) } /* static matching label */ -static inline int ipv6_saddr_label(const struct in6_addr *addr, int type) +static inline int ipv6_addr_label(const struct in6_addr *addr, int type) { /* * prefix (longest match) label @@ -910,7 +910,7 @@ int ipv6_dev_get_saddr(struct net_device *daddr_dev, struct inet6_ifaddr *ifa_result = NULL; int daddr_type = __ipv6_addr_type(daddr); int daddr_scope = __ipv6_addr_src_scope(daddr_type); - u32 daddr_label = ipv6_saddr_label(daddr, daddr_type); + u32 daddr_label = ipv6_addr_label(daddr, daddr_type); struct net_device *dev; memset(&hiscore, 0, sizeof(hiscore)); @@ -1083,11 +1083,13 @@ int ipv6_dev_get_saddr(struct net_device *daddr_dev, /* Rule 6: Prefer matching label */ if (hiscore.rule < 6) { - if (ipv6_saddr_label(&ifa_result->addr, hiscore.addr_type) == daddr_label) + if (ipv6_addr_label(&ifa_result->addr, + hiscore.addr_type) == daddr_label) hiscore.attrs |= IPV6_SADDR_SCORE_LABEL; hiscore.rule++; } - if (ipv6_saddr_label(&ifa->addr, score.addr_type) == daddr_label) { + if (ipv6_addr_label(&ifa->addr, + score.addr_type) == daddr_label) { score.attrs |= IPV6_SADDR_SCORE_LABEL; if (!(hiscore.attrs & IPV6_SADDR_SCORE_LABEL)) { score.rule = 6; -- cgit v1.2.3 From 303065a8545bf7524550bd9564afb48e8a685a2d Mon Sep 17 00:00:00 2001 From: YOSHIFUJI Hideaki Date: Wed, 14 Nov 2007 15:56:15 +0900 Subject: [IPV6] ADDRCONF: Allow address selection policy with ifindex. This patch allows ifindex to be a key for address selection policy table. Signed-off-by: YOSHIFUJI Hideaki Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 733374b68d2..e1e591bfbdc 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -875,7 +875,8 @@ static inline int ipv6_saddr_preferred(int type) } /* static matching label */ -static inline int ipv6_addr_label(const struct in6_addr *addr, int type) +static inline int ipv6_addr_label(const struct in6_addr *addr, int type, + int ifindex) { /* * prefix (longest match) label @@ -910,7 +911,8 @@ int ipv6_dev_get_saddr(struct net_device *daddr_dev, struct inet6_ifaddr *ifa_result = NULL; int daddr_type = __ipv6_addr_type(daddr); int daddr_scope = __ipv6_addr_src_scope(daddr_type); - u32 daddr_label = ipv6_addr_label(daddr, daddr_type); + int daddr_ifindex = daddr_dev ? daddr_dev->ifindex : 0; + u32 daddr_label = ipv6_addr_label(daddr, daddr_type, daddr_ifindex); struct net_device *dev; memset(&hiscore, 0, sizeof(hiscore)); @@ -1084,12 +1086,14 @@ int ipv6_dev_get_saddr(struct net_device *daddr_dev, /* Rule 6: Prefer matching label */ if (hiscore.rule < 6) { if (ipv6_addr_label(&ifa_result->addr, - hiscore.addr_type) == daddr_label) + hiscore.addr_type, + ifa_result->idev->dev->ifindex) == daddr_label) hiscore.attrs |= IPV6_SADDR_SCORE_LABEL; hiscore.rule++; } if (ipv6_addr_label(&ifa->addr, - score.addr_type) == daddr_label) { + score.addr_type, + ifa->idev->dev->ifindex) == daddr_label) { score.attrs |= IPV6_SADDR_SCORE_LABEL; if (!(hiscore.attrs & IPV6_SADDR_SCORE_LABEL)) { score.rule = 6; -- cgit v1.2.3 From 2a8cc6c89039e0530a3335954253b76ed0f9339a Mon Sep 17 00:00:00 2001 From: YOSHIFUJI Hideaki Date: Wed, 14 Nov 2007 15:56:23 +0900 Subject: [IPV6] ADDRCONF: Support RFC3484 configurable address selection policy table. Policy table is implemented as an RCU linear list since we do not expect large list nor frequent updates. Signed-off-by: YOSHIFUJI Hideaki Signed-off-by: David S. Miller --- net/ipv6/Makefile | 1 + net/ipv6/addrconf.c | 40 +--- net/ipv6/addrlabel.c | 551 +++++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 561 insertions(+), 31 deletions(-) create mode 100644 net/ipv6/addrlabel.c (limited to 'net/ipv6') diff --git a/net/ipv6/Makefile b/net/ipv6/Makefile index 87c23a73d28..5ffa9800305 100644 --- a/net/ipv6/Makefile +++ b/net/ipv6/Makefile @@ -5,6 +5,7 @@ obj-$(CONFIG_IPV6) += ipv6.o ipv6-objs := af_inet6.o anycast.o ip6_output.o ip6_input.o addrconf.o \ + addrlabel.o \ route.o ip6_fib.o ipv6_sockglue.o ndisc.o udp.o udplite.o \ raw.o protocol.o icmp.o mcast.o reassembly.o tcp_ipv6.o \ exthdrs.o sysctl_net_ipv6.o datagram.o \ diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index e1e591bfbdc..a70cecf8fc8 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -874,36 +874,6 @@ static inline int ipv6_saddr_preferred(int type) return 0; } -/* static matching label */ -static inline int ipv6_addr_label(const struct in6_addr *addr, int type, - int ifindex) -{ - /* - * prefix (longest match) label - * ----------------------------- - * ::1/128 0 - * ::/0 1 - * 2002::/16 2 - * ::/96 3 - * ::ffff:0:0/96 4 - * fc00::/7 5 - * 2001::/32 6 - */ - if (type & IPV6_ADDR_LOOPBACK) - return 0; - else if (type & IPV6_ADDR_COMPATv4) - return 3; - else if (type & IPV6_ADDR_MAPPED) - return 4; - else if (addr->s6_addr32[0] == htonl(0x20010000)) - return 6; - else if (addr->s6_addr16[0] == htons(0x2002)) - return 2; - else if ((addr->s6_addr[0] & 0xfe) == 0xfc) - return 5; - return 1; -} - int ipv6_dev_get_saddr(struct net_device *daddr_dev, struct in6_addr *daddr, struct in6_addr *saddr) { @@ -4189,7 +4159,13 @@ EXPORT_SYMBOL(unregister_inet6addr_notifier); int __init addrconf_init(void) { - int err = 0; + int err; + + if ((err = ipv6_addr_label_init()) < 0) { + printk(KERN_CRIT "IPv6 Addrconf: cannot initialize default policy table: %d.\n", + err); + return err; + } /* The addrconf netdev notifier requires that loopback_dev * has it's ipv6 private information allocated and setup @@ -4240,6 +4216,8 @@ int __init addrconf_init(void) __rtnl_register(PF_INET6, RTM_GETMULTICAST, NULL, inet6_dump_ifmcaddr); __rtnl_register(PF_INET6, RTM_GETANYCAST, NULL, inet6_dump_ifacaddr); + ipv6_addr_label_rtnl_register(); + #ifdef CONFIG_SYSCTL addrconf_sysctl.sysctl_header = register_sysctl_table(addrconf_sysctl.addrconf_root_dir); diff --git a/net/ipv6/addrlabel.c b/net/ipv6/addrlabel.c new file mode 100644 index 00000000000..204d4d66834 --- /dev/null +++ b/net/ipv6/addrlabel.c @@ -0,0 +1,551 @@ +/* + * IPv6 Address Label subsystem + * for the IPv6 "Default" Source Address Selection + * + * Copyright (C)2007 USAGI/WIDE Project + */ +/* + * Author: + * YOSHIFUJI Hideaki @ USAGI/WIDE Project + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#if 0 +#define ADDRLABEL(x...) printk(x) +#else +#define ADDRLABEL(x...) do { ; } while(0) +#endif + +/* + * Policy Table + */ +struct ip6addrlbl_entry +{ + struct in6_addr prefix; + int prefixlen; + int ifindex; + int addrtype; + u32 label; + struct hlist_node list; + atomic_t refcnt; + struct rcu_head rcu; +}; + +static struct ip6addrlbl_table +{ + struct hlist_head head; + spinlock_t lock; + u32 seq; +} ip6addrlbl_table; + +/* + * Default policy table (RFC3484 + extensions) + * + * prefix addr_type label + * ------------------------------------------------------------------------- + * ::1/128 LOOPBACK 0 + * ::/0 N/A 1 + * 2002::/16 N/A 2 + * ::/96 COMPATv4 3 + * ::ffff:0:0/96 V4MAPPED 4 + * fc00::/7 N/A 5 ULA (RFC 4193) + * 2001::/32 N/A 6 Teredo (RFC 4380) + * + * Note: 0xffffffff is used if we do not have any policies. + */ + +#define IPV6_ADDR_LABEL_DEFAULT 0xffffffffUL + +static const __initdata struct ip6addrlbl_init_table +{ + const struct in6_addr *prefix; + int prefixlen; + u32 label; +} ip6addrlbl_init_table[] = { + { /* ::/0 */ + .prefix = &in6addr_any, + .label = 1, + },{ /* fc00::/7 */ + .prefix = &(struct in6_addr){{{ 0xfc }}}, + .prefixlen = 7, + .label = 5, + },{ /* 2002::/16 */ + .prefix = &(struct in6_addr){{{ 0x20, 0x02 }}}, + .prefixlen = 16, + .label = 2, + },{ /* 2001::/32 */ + .prefix = &(struct in6_addr){{{ 0x20, 0x01 }}}, + .prefixlen = 32, + .label = 6, + },{ /* ::ffff:0:0 */ + .prefix = &(struct in6_addr){{{ [10] = 0xff, [11] = 0xff }}}, + .prefixlen = 96, + .label = 4, + },{ /* ::/96 */ + .prefix = &in6addr_any, + .prefixlen = 96, + .label = 3, + },{ /* ::1/128 */ + .prefix = &in6addr_loopback, + .prefixlen = 128, + .label = 0, + } +}; + +/* Object management */ +static inline void ip6addrlbl_free(struct ip6addrlbl_entry *p) +{ + kfree(p); +} + +static inline int ip6addrlbl_hold(struct ip6addrlbl_entry *p) +{ + return atomic_inc_not_zero(&p->refcnt); +} + +static inline void ip6addrlbl_put(struct ip6addrlbl_entry *p) +{ + if (atomic_dec_and_test(&p->refcnt)) + ip6addrlbl_free(p); +} + +static void ip6addrlbl_free_rcu(struct rcu_head *h) +{ + ip6addrlbl_free(container_of(h, struct ip6addrlbl_entry, rcu)); +} + +/* Find label */ +static int __ip6addrlbl_match(struct ip6addrlbl_entry *p, + const struct in6_addr *addr, + int addrtype, int ifindex) +{ + if (p->ifindex && p->ifindex != ifindex) + return 0; + if (p->addrtype && p->addrtype != addrtype) + return 0; + if (!ipv6_prefix_equal(addr, &p->prefix, p->prefixlen)) + return 0; + return 1; +} + +static struct ip6addrlbl_entry *__ipv6_addr_label(const struct in6_addr *addr, + int type, int ifindex) +{ + struct hlist_node *pos; + struct ip6addrlbl_entry *p; + hlist_for_each_entry_rcu(p, pos, &ip6addrlbl_table.head, list) { + if (__ip6addrlbl_match(p, addr, type, ifindex)) + return p; + } + return NULL; +} + +u32 ipv6_addr_label(const struct in6_addr *addr, int type, int ifindex) +{ + u32 label; + struct ip6addrlbl_entry *p; + + type &= IPV6_ADDR_MAPPED | IPV6_ADDR_COMPATv4 | IPV6_ADDR_LOOPBACK; + + rcu_read_lock(); + p = __ipv6_addr_label(addr, type, ifindex); + label = p ? p->label : IPV6_ADDR_LABEL_DEFAULT; + rcu_read_unlock(); + + ADDRLABEL(KERN_DEBUG "%s(addr=" NIP6_FMT ", type=%d, ifindex=%d) => %08x\n", + __FUNCTION__, + NIP6(*addr), type, ifindex, + label); + + return label; +} + +/* allocate one entry */ +struct ip6addrlbl_entry *ip6addrlbl_alloc(const struct in6_addr *prefix, + int prefixlen, int ifindex, + u32 label) +{ + struct ip6addrlbl_entry *newp; + int addrtype; + + ADDRLABEL(KERN_DEBUG "%s(prefix=" NIP6_FMT ", prefixlen=%d, ifindex=%d, label=%u)\n", + __FUNCTION__, + NIP6(*prefix), prefixlen, + ifindex, + (unsigned int)label); + + addrtype = ipv6_addr_type(prefix) & (IPV6_ADDR_MAPPED | IPV6_ADDR_COMPATv4 | IPV6_ADDR_LOOPBACK); + + switch (addrtype) { + case IPV6_ADDR_MAPPED: + if (prefixlen > 96) + return ERR_PTR(-EINVAL); + if (prefixlen < 96) + addrtype = 0; + break; + case IPV6_ADDR_COMPATv4: + if (prefixlen != 96) + addrtype = 0; + break; + case IPV6_ADDR_LOOPBACK: + if (prefixlen != 128) + addrtype = 0; + break; + } + + newp = kmalloc(sizeof(*newp), GFP_KERNEL); + if (!newp) + return ERR_PTR(-ENOMEM); + + ipv6_addr_prefix(&newp->prefix, prefix, prefixlen); + newp->prefixlen = prefixlen; + newp->ifindex = ifindex; + newp->addrtype = addrtype; + newp->label = label; + INIT_HLIST_NODE(&newp->list); + atomic_set(&newp->refcnt, 1); + return newp; +} + +/* add a label */ +int __ip6addrlbl_add(struct ip6addrlbl_entry *newp, int replace) +{ + int ret = 0; + + ADDRLABEL(KERN_DEBUG "%s(newp=%p, replace=%d)\n", + __FUNCTION__, + newp, replace); + + if (hlist_empty(&ip6addrlbl_table.head)) { + hlist_add_head_rcu(&newp->list, &ip6addrlbl_table.head); + } else { + struct hlist_node *pos, *n; + struct ip6addrlbl_entry *p = NULL; + hlist_for_each_entry_safe(p, pos, n, + &ip6addrlbl_table.head, list) { + if (p->prefixlen == newp->prefixlen && + p->ifindex == newp->ifindex && + ipv6_addr_equal(&p->prefix, &newp->prefix)) { + if (!replace) { + ret = -EEXIST; + goto out; + } + hlist_replace_rcu(&p->list, &newp->list); + ip6addrlbl_put(p); + call_rcu(&p->rcu, ip6addrlbl_free_rcu); + goto out; + } else if ((p->prefixlen == newp->prefixlen && !p->ifindex) || + (p->prefixlen < newp->prefixlen)) { + hlist_add_before_rcu(&newp->list, &p->list); + goto out; + } + } + hlist_add_after_rcu(&p->list, &newp->list); + } +out: + if (!ret) + ip6addrlbl_table.seq++; + return ret; +} + +/* add a label */ +int ip6addrlbl_add(const struct in6_addr *prefix, int prefixlen, + int ifindex, u32 label, int replace) +{ + struct ip6addrlbl_entry *newp; + int ret = 0; + + ADDRLABEL(KERN_DEBUG "%s(prefix=" NIP6_FMT ", prefixlen=%d, ifindex=%d, label=%u, replace=%d)\n", + __FUNCTION__, + NIP6(*prefix), prefixlen, + ifindex, + (unsigned int)label, + replace); + + newp = ip6addrlbl_alloc(prefix, prefixlen, ifindex, label); + if (IS_ERR(newp)) + return PTR_ERR(newp); + spin_lock(&ip6addrlbl_table.lock); + ret = __ip6addrlbl_add(newp, replace); + spin_unlock(&ip6addrlbl_table.lock); + if (ret) + ip6addrlbl_free(newp); + return ret; +} + +/* remove a label */ +int __ip6addrlbl_del(const struct in6_addr *prefix, int prefixlen, + int ifindex) +{ + struct ip6addrlbl_entry *p = NULL; + struct hlist_node *pos, *n; + int ret = -ESRCH; + + ADDRLABEL(KERN_DEBUG "%s(prefix=" NIP6_FMT ", prefixlen=%d, ifindex=%d)\n", + __FUNCTION__, + NIP6(*prefix), prefixlen, + ifindex); + + hlist_for_each_entry_safe(p, pos, n, &ip6addrlbl_table.head, list) { + if (p->prefixlen == prefixlen && + p->ifindex == ifindex && + ipv6_addr_equal(&p->prefix, prefix)) { + hlist_del_rcu(&p->list); + ip6addrlbl_put(p); + call_rcu(&p->rcu, ip6addrlbl_free_rcu); + ret = 0; + break; + } + } + return ret; +} + +int ip6addrlbl_del(const struct in6_addr *prefix, int prefixlen, + int ifindex) +{ + struct in6_addr prefix_buf; + int ret; + + ADDRLABEL(KERN_DEBUG "%s(prefix=" NIP6_FMT ", prefixlen=%d, ifindex=%d)\n", + __FUNCTION__, + NIP6(*prefix), prefixlen, + ifindex); + + ipv6_addr_prefix(&prefix_buf, prefix, prefixlen); + spin_lock(&ip6addrlbl_table.lock); + ret = __ip6addrlbl_del(&prefix_buf, prefixlen, ifindex); + spin_unlock(&ip6addrlbl_table.lock); + return ret; +} + +/* add default label */ +static __init int ip6addrlbl_init(void) +{ + int err = 0; + int i; + + ADDRLABEL(KERN_DEBUG "%s()\n", __FUNCTION__); + + for (i = 0; i < ARRAY_SIZE(ip6addrlbl_init_table); i++) { + int ret = ip6addrlbl_add(ip6addrlbl_init_table[i].prefix, + ip6addrlbl_init_table[i].prefixlen, + 0, + ip6addrlbl_init_table[i].label, 0); + /* XXX: should we free all rules when we catch an error? */ + if (ret && (!err || err != -ENOMEM)) + err = ret; + } + return err; +} + +int __init ipv6_addr_label_init(void) +{ + spin_lock_init(&ip6addrlbl_table.lock); + + return ip6addrlbl_init(); +} + +static const struct nla_policy ifal_policy[IFAL_MAX+1] = { + [IFAL_ADDRESS] = { .len = sizeof(struct in6_addr), }, + [IFAL_LABEL] = { .len = sizeof(u32), }, +}; + +static int ip6addrlbl_newdel(struct sk_buff *skb, struct nlmsghdr *nlh, + void *arg) +{ + struct ifaddrlblmsg *ifal; + struct nlattr *tb[IFAL_MAX+1]; + struct in6_addr *pfx; + u32 label; + int err = 0; + + err = nlmsg_parse(nlh, sizeof(*ifal), tb, IFAL_MAX, ifal_policy); + if (err < 0) + return err; + + ifal = nlmsg_data(nlh); + + if (ifal->ifal_family != AF_INET6 || + ifal->ifal_prefixlen > 128) + return -EINVAL; + + if (ifal->ifal_index && + !__dev_get_by_index(&init_net, ifal->ifal_index)) + return -EINVAL; + + if (!tb[IFAL_ADDRESS]) + return -EINVAL; + + pfx = nla_data(tb[IFAL_ADDRESS]); + if (!pfx) + return -EINVAL; + + if (!tb[IFAL_LABEL]) + return -EINVAL; + label = nla_get_u32(tb[IFAL_LABEL]); + if (label == IPV6_ADDR_LABEL_DEFAULT) + return -EINVAL; + + switch(nlh->nlmsg_type) { + case RTM_NEWADDRLABEL: + err = ip6addrlbl_add(pfx, ifal->ifal_prefixlen, + ifal->ifal_index, label, + nlh->nlmsg_flags & NLM_F_REPLACE); + break; + case RTM_DELADDRLABEL: + err = ip6addrlbl_del(pfx, ifal->ifal_prefixlen, + ifal->ifal_index); + break; + default: + err = -EOPNOTSUPP; + } + return err; +} + +static inline void ip6addrlbl_putmsg(struct nlmsghdr *nlh, + int prefixlen, int ifindex, u32 lseq) +{ + struct ifaddrlblmsg *ifal = nlmsg_data(nlh); + ifal->ifal_family = AF_INET6; + ifal->ifal_prefixlen = prefixlen; + ifal->ifal_flags = 0; + ifal->ifal_index = ifindex; + ifal->ifal_seq = lseq; +}; + +static int ip6addrlbl_fill(struct sk_buff *skb, + struct ip6addrlbl_entry *p, + u32 lseq, + u32 pid, u32 seq, int event, + unsigned int flags) +{ + struct nlmsghdr *nlh = nlmsg_put(skb, pid, seq, event, + sizeof(struct ifaddrlblmsg), flags); + if (!nlh) + return -EMSGSIZE; + + ip6addrlbl_putmsg(nlh, p->prefixlen, p->ifindex, lseq); + + if (nla_put(skb, IFAL_ADDRESS, 16, &p->prefix) < 0 || + nla_put_u32(skb, IFAL_LABEL, p->label) < 0) { + nlmsg_cancel(skb, nlh); + return -EMSGSIZE; + } + + return nlmsg_end(skb, nlh); +} + +static int ip6addrlbl_dump(struct sk_buff *skb, struct netlink_callback *cb) +{ + struct ip6addrlbl_entry *p; + struct hlist_node *pos; + int idx = 0, s_idx = cb->args[0]; + int err; + + rcu_read_lock(); + hlist_for_each_entry_rcu(p, pos, &ip6addrlbl_table.head, list) { + if (idx >= s_idx) { + if ((err = ip6addrlbl_fill(skb, p, + ip6addrlbl_table.seq, + NETLINK_CB(cb->skb).pid, + cb->nlh->nlmsg_seq, + RTM_NEWADDRLABEL, + NLM_F_MULTI)) <= 0) + break; + } + idx++; + } + rcu_read_unlock(); + cb->args[0] = idx; + return skb->len; +} + +static inline int ip6addrlbl_msgsize(void) +{ + return (NLMSG_ALIGN(sizeof(struct ifaddrlblmsg)) + + nla_total_size(16) /* IFAL_ADDRESS */ + + nla_total_size(4) /* IFAL_LABEL */ + ); +} + +static int ip6addrlbl_get(struct sk_buff *in_skb, struct nlmsghdr* nlh, + void *arg) +{ + struct ifaddrlblmsg *ifal; + struct nlattr *tb[IFAL_MAX+1]; + struct in6_addr *addr; + u32 lseq; + int err = 0; + struct ip6addrlbl_entry *p; + struct sk_buff *skb; + + err = nlmsg_parse(nlh, sizeof(*ifal), tb, IFAL_MAX, ifal_policy); + if (err < 0) + return err; + + ifal = nlmsg_data(nlh); + + if (ifal->ifal_family != AF_INET6 || + ifal->ifal_prefixlen != 128) + return -EINVAL; + + if (ifal->ifal_index && + !__dev_get_by_index(&init_net, ifal->ifal_index)) + return -EINVAL; + + if (!tb[IFAL_ADDRESS]) + return -EINVAL; + + addr = nla_data(tb[IFAL_ADDRESS]); + if (!addr) + return -EINVAL; + + rcu_read_lock(); + p = __ipv6_addr_label(addr, ipv6_addr_type(addr), ifal->ifal_index); + if (p && ip6addrlbl_hold(p)) + p = NULL; + lseq = ip6addrlbl_table.seq; + rcu_read_unlock(); + + if (!p) { + err = -ESRCH; + goto out; + } + + if (!(skb = nlmsg_new(ip6addrlbl_msgsize(), GFP_KERNEL))) { + ip6addrlbl_put(p); + return -ENOBUFS; + } + + err = ip6addrlbl_fill(skb, p, lseq, + NETLINK_CB(in_skb).pid, nlh->nlmsg_seq, + RTM_NEWADDRLABEL, 0); + + ip6addrlbl_put(p); + + if (err < 0) { + WARN_ON(err == -EMSGSIZE); + kfree_skb(skb); + goto out; + } + + err = rtnl_unicast(skb, NETLINK_CB(in_skb).pid); +out: + return err; +} + +void __init ipv6_addr_label_rtnl_register(void) +{ + __rtnl_register(PF_INET6, RTM_NEWADDRLABEL, ip6addrlbl_newdel, NULL); + __rtnl_register(PF_INET6, RTM_DELADDRLABEL, ip6addrlbl_newdel, NULL); + __rtnl_register(PF_INET6, RTM_GETADDRLABEL, ip6addrlbl_get, ip6addrlbl_dump); +} + -- cgit v1.2.3 From b854272b3c732316676e9128f7b9e6f1e1ff88b0 Mon Sep 17 00:00:00 2001 From: "Denis V. Lunev" Date: Sat, 1 Dec 2007 00:21:31 +1100 Subject: [NET]: Modify all rtnetlink methods to only work in the initial namespace (v2) Before I can enable rtnetlink to work in all network namespaces I need to be certain that something won't break. So this patch deliberately disables all of the rtnletlink methods in everything except the initial network namespace. After the methods have been audited this extra check can be disabled. Changes from v1: - added IPv6 addrlabel protection Signed-off-by: Denis V. Lunev Signed-off-by: Eric W. Biederman Signed-off-by: David S. Miller Signed-off-by: Herbert Xu --- net/ipv6/addrconf.c | 31 +++++++++++++++++++++++++++++++ net/ipv6/addrlabel.c | 12 ++++++++++++ net/ipv6/ip6_fib.c | 4 ++++ net/ipv6/route.c | 12 ++++++++++++ 4 files changed, 59 insertions(+) (limited to 'net/ipv6') diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index a70cecf8fc8..26de8ee5095 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -2971,11 +2971,15 @@ static const struct nla_policy ifa_ipv6_policy[IFA_MAX+1] = { static int inet6_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) { + struct net *net = skb->sk->sk_net; struct ifaddrmsg *ifm; struct nlattr *tb[IFA_MAX+1]; struct in6_addr *pfx; int err; + if (net != &init_net) + return -EINVAL; + err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv6_policy); if (err < 0) return err; @@ -3028,6 +3032,7 @@ static int inet6_addr_modify(struct inet6_ifaddr *ifp, u8 ifa_flags, static int inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) { + struct net *net = skb->sk->sk_net; struct ifaddrmsg *ifm; struct nlattr *tb[IFA_MAX+1]; struct in6_addr *pfx; @@ -3037,6 +3042,9 @@ inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) u8 ifa_flags; int err; + if (net != &init_net) + return -EINVAL; + err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv6_policy); if (err < 0) return err; @@ -3310,26 +3318,42 @@ done: static int inet6_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb) { + struct net *net = skb->sk->sk_net; enum addr_type_t type = UNICAST_ADDR; + + if (net != &init_net) + return 0; + return inet6_dump_addr(skb, cb, type); } static int inet6_dump_ifmcaddr(struct sk_buff *skb, struct netlink_callback *cb) { + struct net *net = skb->sk->sk_net; enum addr_type_t type = MULTICAST_ADDR; + + if (net != &init_net) + return 0; + return inet6_dump_addr(skb, cb, type); } static int inet6_dump_ifacaddr(struct sk_buff *skb, struct netlink_callback *cb) { + struct net *net = skb->sk->sk_net; enum addr_type_t type = ANYCAST_ADDR; + + if (net != &init_net) + return 0; + return inet6_dump_addr(skb, cb, type); } static int inet6_rtm_getaddr(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg) { + struct net *net = in_skb->sk->sk_net; struct ifaddrmsg *ifm; struct nlattr *tb[IFA_MAX+1]; struct in6_addr *addr = NULL; @@ -3338,6 +3362,9 @@ static int inet6_rtm_getaddr(struct sk_buff *in_skb, struct nlmsghdr* nlh, struct sk_buff *skb; int err; + if (net != &init_net) + return -EINVAL; + err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv6_policy); if (err < 0) goto errout; @@ -3555,11 +3582,15 @@ nla_put_failure: static int inet6_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb) { + struct net *net = skb->sk->sk_net; int idx, err; int s_idx = cb->args[0]; struct net_device *dev; struct inet6_dev *idev; + if (net != &init_net) + return 0; + read_lock(&dev_base_lock); idx = 0; for_each_netdev(&init_net, dev) { diff --git a/net/ipv6/addrlabel.c b/net/ipv6/addrlabel.c index 204d4d66834..b9b5d570714 100644 --- a/net/ipv6/addrlabel.c +++ b/net/ipv6/addrlabel.c @@ -361,12 +361,16 @@ static const struct nla_policy ifal_policy[IFAL_MAX+1] = { static int ip6addrlbl_newdel(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) { + struct net *net = skb->sk->sk_net; struct ifaddrlblmsg *ifal; struct nlattr *tb[IFAL_MAX+1]; struct in6_addr *pfx; u32 label; int err = 0; + if (net != &init_net) + return 0; + err = nlmsg_parse(nlh, sizeof(*ifal), tb, IFAL_MAX, ifal_policy); if (err < 0) return err; @@ -445,11 +449,15 @@ static int ip6addrlbl_fill(struct sk_buff *skb, static int ip6addrlbl_dump(struct sk_buff *skb, struct netlink_callback *cb) { + struct net *net = skb->sk->sk_net; struct ip6addrlbl_entry *p; struct hlist_node *pos; int idx = 0, s_idx = cb->args[0]; int err; + if (net != &init_net) + return 0; + rcu_read_lock(); hlist_for_each_entry_rcu(p, pos, &ip6addrlbl_table.head, list) { if (idx >= s_idx) { @@ -479,6 +487,7 @@ static inline int ip6addrlbl_msgsize(void) static int ip6addrlbl_get(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg) { + struct net *net = in_skb->sk->sk_net; struct ifaddrlblmsg *ifal; struct nlattr *tb[IFAL_MAX+1]; struct in6_addr *addr; @@ -487,6 +496,9 @@ static int ip6addrlbl_get(struct sk_buff *in_skb, struct nlmsghdr* nlh, struct ip6addrlbl_entry *p; struct sk_buff *skb; + if (net != &init_net) + return 0; + err = nlmsg_parse(nlh, sizeof(*ifal), tb, IFAL_MAX, ifal_policy); if (err < 0) return err; diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 946cf389ab9..31b60a02512 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -361,6 +361,7 @@ end: static int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb) { + struct net *net = skb->sk->sk_net; unsigned int h, s_h; unsigned int e = 0, s_e; struct rt6_rtnl_dump_arg arg; @@ -369,6 +370,9 @@ static int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb) struct hlist_node *node; int res = 0; + if (net != &init_net) + return 0; + s_h = cb->args[0]; s_e = cb->args[1]; diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 4ef2cfaa346..5e1c5796761 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -2003,9 +2003,13 @@ errout: static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) { + struct net *net = skb->sk->sk_net; struct fib6_config cfg; int err; + if (net != &init_net) + return -EINVAL; + err = rtm_to_fib6_config(skb, nlh, &cfg); if (err < 0) return err; @@ -2015,9 +2019,13 @@ static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *a static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) { + struct net *net = skb->sk->sk_net; struct fib6_config cfg; int err; + if (net != &init_net) + return -EINVAL; + err = rtm_to_fib6_config(skb, nlh, &cfg); if (err < 0) return err; @@ -2152,6 +2160,7 @@ int rt6_dump_route(struct rt6_info *rt, void *p_arg) static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg) { + struct net *net = in_skb->sk->sk_net; struct nlattr *tb[RTA_MAX+1]; struct rt6_info *rt; struct sk_buff *skb; @@ -2159,6 +2168,9 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void struct flowi fl; int err, iif = 0; + if (net != &init_net) + return -EINVAL; + err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy); if (err < 0) goto errout; -- cgit v1.2.3 From 97c53cacf00d1f5aa04adabfebcc806ca8b22b10 Mon Sep 17 00:00:00 2001 From: "Denis V. Lunev" Date: Mon, 19 Nov 2007 22:26:51 -0800 Subject: [NET]: Make rtnetlink infrastructure network namespace aware (v3) After this patch none of the netlink callback support anything except the initial network namespace but the rtnetlink infrastructure now handles multiple network namespaces. Changes from v2: - IPv6 addrlabel processing Changes from v1: - no need for special rtnl_unlock handling - fixed IPv6 ndisc Signed-off-by: Denis V. Lunev Signed-off-by: Eric W. Biederman Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 14 +++++++------- net/ipv6/addrlabel.c | 2 +- net/ipv6/ndisc.c | 5 +++-- net/ipv6/route.c | 6 +++--- 4 files changed, 14 insertions(+), 13 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 26de8ee5095..6c8b193474b 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -3397,7 +3397,7 @@ static int inet6_rtm_getaddr(struct sk_buff *in_skb, struct nlmsghdr* nlh, kfree_skb(skb); goto errout_ifa; } - err = rtnl_unicast(skb, NETLINK_CB(in_skb).pid); + err = rtnl_unicast(skb, &init_net, NETLINK_CB(in_skb).pid); errout_ifa: in6_ifa_put(ifa); errout: @@ -3420,10 +3420,10 @@ static void inet6_ifa_notify(int event, struct inet6_ifaddr *ifa) kfree_skb(skb); goto errout; } - err = rtnl_notify(skb, 0, RTNLGRP_IPV6_IFADDR, NULL, GFP_ATOMIC); + err = rtnl_notify(skb, &init_net, 0, RTNLGRP_IPV6_IFADDR, NULL, GFP_ATOMIC); errout: if (err < 0) - rtnl_set_sk_err(RTNLGRP_IPV6_IFADDR, err); + rtnl_set_sk_err(&init_net, RTNLGRP_IPV6_IFADDR, err); } static inline void ipv6_store_devconf(struct ipv6_devconf *cnf, @@ -3628,10 +3628,10 @@ void inet6_ifinfo_notify(int event, struct inet6_dev *idev) kfree_skb(skb); goto errout; } - err = rtnl_notify(skb, 0, RTNLGRP_IPV6_IFADDR, NULL, GFP_ATOMIC); + err = rtnl_notify(skb, &init_net, 0, RTNLGRP_IPV6_IFADDR, NULL, GFP_ATOMIC); errout: if (err < 0) - rtnl_set_sk_err(RTNLGRP_IPV6_IFADDR, err); + rtnl_set_sk_err(&init_net, RTNLGRP_IPV6_IFADDR, err); } static inline size_t inet6_prefix_nlmsg_size(void) @@ -3697,10 +3697,10 @@ static void inet6_prefix_notify(int event, struct inet6_dev *idev, kfree_skb(skb); goto errout; } - err = rtnl_notify(skb, 0, RTNLGRP_IPV6_PREFIX, NULL, GFP_ATOMIC); + err = rtnl_notify(skb, &init_net, 0, RTNLGRP_IPV6_PREFIX, NULL, GFP_ATOMIC); errout: if (err < 0) - rtnl_set_sk_err(RTNLGRP_IPV6_PREFIX, err); + rtnl_set_sk_err(&init_net, RTNLGRP_IPV6_PREFIX, err); } static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp) diff --git a/net/ipv6/addrlabel.c b/net/ipv6/addrlabel.c index b9b5d570714..6f1ca607edd 100644 --- a/net/ipv6/addrlabel.c +++ b/net/ipv6/addrlabel.c @@ -549,7 +549,7 @@ static int ip6addrlbl_get(struct sk_buff *in_skb, struct nlmsghdr* nlh, goto out; } - err = rtnl_unicast(skb, NETLINK_CB(in_skb).pid); + err = rtnl_unicast(skb, &init_net, NETLINK_CB(in_skb).pid); out: return err; } diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index b2531f80317..b87f9d245e2 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -1049,7 +1049,8 @@ static void ndisc_ra_useropt(struct sk_buff *ra, struct nd_opt_hdr *opt) &ipv6_hdr(ra)->saddr); nlmsg_end(skb, nlh); - err = rtnl_notify(skb, 0, RTNLGRP_ND_USEROPT, NULL, GFP_ATOMIC); + err = rtnl_notify(skb, &init_net, 0, RTNLGRP_ND_USEROPT, NULL, + GFP_ATOMIC); if (err < 0) goto errout; @@ -1059,7 +1060,7 @@ nla_put_failure: nlmsg_free(skb); err = -EMSGSIZE; errout: - rtnl_set_sk_err(RTNLGRP_ND_USEROPT, err); + rtnl_set_sk_err(&init_net, RTNLGRP_ND_USEROPT, err); } static void ndisc_router_discovery(struct sk_buff *skb) diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 5e1c5796761..d7ec4c9ffc4 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -2230,7 +2230,7 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void goto errout; } - err = rtnl_unicast(skb, NETLINK_CB(in_skb).pid); + err = rtnl_unicast(skb, &init_net, NETLINK_CB(in_skb).pid); errout: return err; } @@ -2260,10 +2260,10 @@ void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info) kfree_skb(skb); goto errout; } - err = rtnl_notify(skb, pid, RTNLGRP_IPV6_ROUTE, nlh, gfp_any()); + err = rtnl_notify(skb, &init_net, pid, RTNLGRP_IPV6_ROUTE, nlh, gfp_any()); errout: if (err < 0) - rtnl_set_sk_err(RTNLGRP_IPV6_ROUTE, err); + rtnl_set_sk_err(&init_net, RTNLGRP_IPV6_ROUTE, err); } /* -- cgit v1.2.3 From 69d6da0b0faa70249a243a14e6066c013e9294e5 Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Mon, 19 Nov 2007 22:35:57 -0800 Subject: [IPv6] RAW: Compact the API for the kernel Same as in the previous patch for ipv4, compact the API and hide hash table and rwlock inside the raw.c file. Plus fix some "bad" places from checkpatch.pl point of view (assignments inside if()). Signed-off-by: Pavel Emelyanov Signed-off-by: David S. Miller --- net/ipv6/icmp.c | 15 +-------------- net/ipv6/ip6_input.c | 9 +++------ net/ipv6/raw.c | 52 ++++++++++++++++++++++++++++++++++++++++++++-------- 3 files changed, 48 insertions(+), 28 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index f1240688dc5..93c96cfd5ee 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -555,9 +555,7 @@ out: static void icmpv6_notify(struct sk_buff *skb, int type, int code, __be32 info) { - struct in6_addr *saddr, *daddr; struct inet6_protocol *ipprot; - struct sock *sk; int inner_offset; int hash; u8 nexthdr; @@ -579,9 +577,6 @@ static void icmpv6_notify(struct sk_buff *skb, int type, int code, __be32 info) if (!pskb_may_pull(skb, inner_offset+8)) return; - saddr = &ipv6_hdr(skb)->saddr; - daddr = &ipv6_hdr(skb)->daddr; - /* BUGGG_FUTURE: we should try to parse exthdrs in this packet. Without this we will not able f.e. to make source routed pmtu discovery. @@ -597,15 +592,7 @@ static void icmpv6_notify(struct sk_buff *skb, int type, int code, __be32 info) ipprot->err_handler(skb, NULL, type, code, inner_offset, info); rcu_read_unlock(); - read_lock(&raw_v6_lock); - if ((sk = sk_head(&raw_v6_htable[hash])) != NULL) { - while ((sk = __raw_v6_lookup(sk, nexthdr, saddr, daddr, - IP6CB(skb)->iif))) { - rawv6_err(sk, skb, NULL, type, code, inner_offset, info); - sk = sk_next(sk); - } - } - read_unlock(&raw_v6_lock); + raw6_icmp_error(skb, nexthdr, type, code, inner_offset, info); } /* diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c index 79610b4bad3..178aebc0427 100644 --- a/net/ipv6/ip6_input.c +++ b/net/ipv6/ip6_input.c @@ -153,9 +153,8 @@ out: static int ip6_input_finish(struct sk_buff *skb) { struct inet6_protocol *ipprot; - struct sock *raw_sk; unsigned int nhoff; - int nexthdr; + int nexthdr, raw; u8 hash; struct inet6_dev *idev; @@ -171,9 +170,7 @@ resubmit: nhoff = IP6CB(skb)->nhoff; nexthdr = skb_network_header(skb)[nhoff]; - raw_sk = sk_head(&raw_v6_htable[nexthdr & (MAX_INET_PROTOS - 1)]); - if (raw_sk && !ipv6_raw_deliver(skb, nexthdr)) - raw_sk = NULL; + raw = raw6_local_deliver(skb, nexthdr); hash = nexthdr & (MAX_INET_PROTOS - 1); if ((ipprot = rcu_dereference(inet6_protos[hash])) != NULL) { @@ -206,7 +203,7 @@ resubmit: else if (ret == 0) IP6_INC_STATS_BH(idev, IPSTATS_MIB_INDELIVERS); } else { - if (!raw_sk) { + if (!raw) { if (xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) { IP6_INC_STATS_BH(idev, IPSTATS_MIB_INUNKNOWNPROTOS); icmpv6_send(skb, ICMPV6_PARAMPROB, diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index ad622cc11bd..53f01b4982c 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -60,8 +60,10 @@ #include #include -struct hlist_head raw_v6_htable[RAWV6_HTABLE_SIZE]; -DEFINE_RWLOCK(raw_v6_lock); +#define RAWV6_HTABLE_SIZE MAX_INET_PROTOS + +static struct hlist_head raw_v6_htable[RAWV6_HTABLE_SIZE]; +static DEFINE_RWLOCK(raw_v6_lock); static void raw_v6_hash(struct sock *sk) { @@ -83,10 +85,8 @@ static void raw_v6_unhash(struct sock *sk) } -/* Grumble... icmp and ip_input want to get at this... */ -struct sock *__raw_v6_lookup(struct sock *sk, unsigned short num, - struct in6_addr *loc_addr, struct in6_addr *rmt_addr, - int dif) +static struct sock *__raw_v6_lookup(struct sock *sk, unsigned short num, + struct in6_addr *loc_addr, struct in6_addr *rmt_addr, int dif) { struct hlist_node *node; int is_multicast = ipv6_addr_is_multicast(loc_addr); @@ -167,7 +167,7 @@ EXPORT_SYMBOL(rawv6_mh_filter_unregister); * * Caller owns SKB so we must make clones. */ -int ipv6_raw_deliver(struct sk_buff *skb, int nexthdr) +static int ipv6_raw_deliver(struct sk_buff *skb, int nexthdr) { struct in6_addr *saddr; struct in6_addr *daddr; @@ -242,6 +242,17 @@ out: return delivered; } +int raw6_local_deliver(struct sk_buff *skb, int nexthdr) +{ + struct sock *raw_sk; + + raw_sk = sk_head(&raw_v6_htable[nexthdr & (MAX_INET_PROTOS - 1)]); + if (raw_sk && !ipv6_raw_deliver(skb, nexthdr)) + raw_sk = NULL; + + return raw_sk != NULL; +} + /* This cleans up af_inet6 a bit. -DaveM */ static int rawv6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len) { @@ -316,7 +327,7 @@ out: return err; } -void rawv6_err(struct sock *sk, struct sk_buff *skb, +static void rawv6_err(struct sock *sk, struct sk_buff *skb, struct inet6_skb_parm *opt, int type, int code, int offset, __be32 info) { @@ -350,6 +361,31 @@ void rawv6_err(struct sock *sk, struct sk_buff *skb, } } +void raw6_icmp_error(struct sk_buff *skb, int nexthdr, + int type, int code, int inner_offset, __be32 info) +{ + struct sock *sk; + int hash; + struct in6_addr *saddr, *daddr; + + hash = nexthdr & (RAWV6_HTABLE_SIZE - 1); + + read_lock(&raw_v6_lock); + sk = sk_head(&raw_v6_htable[hash]); + if (sk != NULL) { + saddr = &ipv6_hdr(skb)->saddr; + daddr = &ipv6_hdr(skb)->daddr; + + while ((sk = __raw_v6_lookup(sk, nexthdr, saddr, daddr, + IP6CB(skb)->iif))) { + rawv6_err(sk, skb, NULL, type, code, + inner_offset, info); + sk = sk_next(sk); + } + } + read_unlock(&raw_v6_lock); +} + static inline int rawv6_rcv_skb(struct sock * sk, struct sk_buff * skb) { if ((raw6_sk(sk)->checksum || sk->sk_filter) && -- cgit v1.2.3 From b673e4dfc8f29e5bfe4d342029b793e9d504f6dd Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Mon, 19 Nov 2007 22:36:45 -0800 Subject: [RAW]: Introduce raw_hashinfo structure The ipv4/raw.c and ipv6/raw.c contain many common code (most of which is proc interface) which can be consolidated. Most of the places to consolidate deal with the raw sockets hashtable, so introduce a struct raw_hashinfo which describes the raw sockets hash. Signed-off-by: Pavel Emelyanov Signed-off-by: David S. Miller --- net/ipv6/raw.c | 49 +++++++++++++++++++++++++------------------------ 1 file changed, 25 insertions(+), 24 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index 53f01b4982c..15c72a6365a 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -54,34 +54,34 @@ #include #endif +#include #include #include #include #include -#define RAWV6_HTABLE_SIZE MAX_INET_PROTOS - -static struct hlist_head raw_v6_htable[RAWV6_HTABLE_SIZE]; -static DEFINE_RWLOCK(raw_v6_lock); +static struct raw_hashinfo raw_v6_hashinfo = { + .lock = __RW_LOCK_UNLOCKED(), +}; static void raw_v6_hash(struct sock *sk) { - struct hlist_head *list = &raw_v6_htable[inet_sk(sk)->num & - (RAWV6_HTABLE_SIZE - 1)]; + struct hlist_head *list = &raw_v6_hashinfo.ht[inet_sk(sk)->num & + (RAW_HTABLE_SIZE - 1)]; - write_lock_bh(&raw_v6_lock); + write_lock_bh(&raw_v6_hashinfo.lock); sk_add_node(sk, list); sock_prot_inc_use(sk->sk_prot); - write_unlock_bh(&raw_v6_lock); + write_unlock_bh(&raw_v6_hashinfo.lock); } static void raw_v6_unhash(struct sock *sk) { - write_lock_bh(&raw_v6_lock); + write_lock_bh(&raw_v6_hashinfo.lock); if (sk_del_node_init(sk)) sock_prot_dec_use(sk->sk_prot); - write_unlock_bh(&raw_v6_lock); + write_unlock_bh(&raw_v6_hashinfo.lock); } @@ -180,8 +180,8 @@ static int ipv6_raw_deliver(struct sk_buff *skb, int nexthdr) hash = nexthdr & (MAX_INET_PROTOS - 1); - read_lock(&raw_v6_lock); - sk = sk_head(&raw_v6_htable[hash]); + read_lock(&raw_v6_hashinfo.lock); + sk = sk_head(&raw_v6_hashinfo.ht[hash]); /* * The first socket found will be delivered after @@ -238,7 +238,7 @@ static int ipv6_raw_deliver(struct sk_buff *skb, int nexthdr) IP6CB(skb)->iif); } out: - read_unlock(&raw_v6_lock); + read_unlock(&raw_v6_hashinfo.lock); return delivered; } @@ -246,7 +246,7 @@ int raw6_local_deliver(struct sk_buff *skb, int nexthdr) { struct sock *raw_sk; - raw_sk = sk_head(&raw_v6_htable[nexthdr & (MAX_INET_PROTOS - 1)]); + raw_sk = sk_head(&raw_v6_hashinfo.ht[nexthdr & (MAX_INET_PROTOS - 1)]); if (raw_sk && !ipv6_raw_deliver(skb, nexthdr)) raw_sk = NULL; @@ -368,10 +368,10 @@ void raw6_icmp_error(struct sk_buff *skb, int nexthdr, int hash; struct in6_addr *saddr, *daddr; - hash = nexthdr & (RAWV6_HTABLE_SIZE - 1); + hash = nexthdr & (RAW_HTABLE_SIZE - 1); - read_lock(&raw_v6_lock); - sk = sk_head(&raw_v6_htable[hash]); + read_lock(&raw_v6_hashinfo.lock); + sk = sk_head(&raw_v6_hashinfo.ht[hash]); if (sk != NULL) { saddr = &ipv6_hdr(skb)->saddr; daddr = &ipv6_hdr(skb)->daddr; @@ -383,7 +383,7 @@ void raw6_icmp_error(struct sk_buff *skb, int nexthdr, sk = sk_next(sk); } } - read_unlock(&raw_v6_lock); + read_unlock(&raw_v6_hashinfo.lock); } static inline int rawv6_rcv_skb(struct sock * sk, struct sk_buff * skb) @@ -1221,8 +1221,9 @@ static struct sock *raw6_get_first(struct seq_file *seq) struct hlist_node *node; struct raw6_iter_state* state = raw6_seq_private(seq); - for (state->bucket = 0; state->bucket < RAWV6_HTABLE_SIZE; ++state->bucket) - sk_for_each(sk, node, &raw_v6_htable[state->bucket]) + for (state->bucket = 0; state->bucket < RAW_HTABLE_SIZE; + ++state->bucket) + sk_for_each(sk, node, &raw_v6_hashinfo.ht[state->bucket]) if (sk->sk_family == PF_INET6) goto out; sk = NULL; @@ -1240,8 +1241,8 @@ try_again: ; } while (sk && sk->sk_family != PF_INET6); - if (!sk && ++state->bucket < RAWV6_HTABLE_SIZE) { - sk = sk_head(&raw_v6_htable[state->bucket]); + if (!sk && ++state->bucket < RAW_HTABLE_SIZE) { + sk = sk_head(&raw_v6_hashinfo.ht[state->bucket]); goto try_again; } return sk; @@ -1258,7 +1259,7 @@ static struct sock *raw6_get_idx(struct seq_file *seq, loff_t pos) static void *raw6_seq_start(struct seq_file *seq, loff_t *pos) { - read_lock(&raw_v6_lock); + read_lock(&raw_v6_hashinfo.lock); return *pos ? raw6_get_idx(seq, *pos - 1) : SEQ_START_TOKEN; } @@ -1276,7 +1277,7 @@ static void *raw6_seq_next(struct seq_file *seq, void *v, loff_t *pos) static void raw6_seq_stop(struct seq_file *seq, void *v) { - read_unlock(&raw_v6_lock); + read_unlock(&raw_v6_hashinfo.lock); } static void raw6_sock_seq_show(struct seq_file *seq, struct sock *sp, int i) -- cgit v1.2.3 From 65b4c50b47c4ac3d2b5a82e5553b8e5613fb9585 Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Mon, 19 Nov 2007 22:37:24 -0800 Subject: [RAW]: Consolidate proto->hash callback Having the raw_hashinfo it's easy to consolidate the raw[46]_hash functions. Signed-off-by: Pavel Emelyanov Signed-off-by: David S. Miller --- net/ipv6/raw.c | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index 15c72a6365a..70db6f49453 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -67,13 +67,7 @@ static struct raw_hashinfo raw_v6_hashinfo = { static void raw_v6_hash(struct sock *sk) { - struct hlist_head *list = &raw_v6_hashinfo.ht[inet_sk(sk)->num & - (RAW_HTABLE_SIZE - 1)]; - - write_lock_bh(&raw_v6_hashinfo.lock); - sk_add_node(sk, list); - sock_prot_inc_use(sk->sk_prot); - write_unlock_bh(&raw_v6_hashinfo.lock); + raw_hash_sk(sk, &raw_v6_hashinfo); } static void raw_v6_unhash(struct sock *sk) -- cgit v1.2.3 From ab70768ec78c6784958bab3b58fbe3f4150006df Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Mon, 19 Nov 2007 22:37:58 -0800 Subject: [RAW]: Consolidate proto->unhash callback Same as the ->hash one, this is easily consolidated. Signed-off-by: Pavel Emelyanov Signed-off-by: David S. Miller --- net/ipv6/raw.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index 70db6f49453..422d27cfbe1 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -72,10 +72,7 @@ static void raw_v6_hash(struct sock *sk) static void raw_v6_unhash(struct sock *sk) { - write_lock_bh(&raw_v6_hashinfo.lock); - if (sk_del_node_init(sk)) - sock_prot_dec_use(sk->sk_prot); - write_unlock_bh(&raw_v6_hashinfo.lock); + raw_unhash_sk(sk, &raw_v6_hashinfo); } -- cgit v1.2.3 From 42a73808ed4f30b739eb52bcbb33a02fe62ceef5 Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Mon, 19 Nov 2007 22:38:33 -0800 Subject: [RAW]: Consolidate proc interface. Both ipv6/raw.c and ipv4/raw.c use the seq files to walk through the raw sockets hash and show them. The "walking" code is rather huge, but is identical in both cases. The difference is the hash table to walk over and the protocol family to check (this was not in the first virsion of the patch, which was noticed by YOSHIFUJI) Make the ->open store the needed hash table and the family on the allocated raw_iter_state and make the start/next/stop callbacks work with it. This removes most of the code. Signed-off-by: Pavel Emelyanov Signed-off-by: David S. Miller --- net/ipv6/raw.c | 82 ++++------------------------------------------------------ 1 file changed, 5 insertions(+), 77 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index 422d27cfbe1..b34631e1b01 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -1200,77 +1200,6 @@ struct proto rawv6_prot = { }; #ifdef CONFIG_PROC_FS -struct raw6_iter_state { - int bucket; -}; - -#define raw6_seq_private(seq) ((struct raw6_iter_state *)(seq)->private) - -static struct sock *raw6_get_first(struct seq_file *seq) -{ - struct sock *sk; - struct hlist_node *node; - struct raw6_iter_state* state = raw6_seq_private(seq); - - for (state->bucket = 0; state->bucket < RAW_HTABLE_SIZE; - ++state->bucket) - sk_for_each(sk, node, &raw_v6_hashinfo.ht[state->bucket]) - if (sk->sk_family == PF_INET6) - goto out; - sk = NULL; -out: - return sk; -} - -static struct sock *raw6_get_next(struct seq_file *seq, struct sock *sk) -{ - struct raw6_iter_state* state = raw6_seq_private(seq); - - do { - sk = sk_next(sk); -try_again: - ; - } while (sk && sk->sk_family != PF_INET6); - - if (!sk && ++state->bucket < RAW_HTABLE_SIZE) { - sk = sk_head(&raw_v6_hashinfo.ht[state->bucket]); - goto try_again; - } - return sk; -} - -static struct sock *raw6_get_idx(struct seq_file *seq, loff_t pos) -{ - struct sock *sk = raw6_get_first(seq); - if (sk) - while (pos && (sk = raw6_get_next(seq, sk)) != NULL) - --pos; - return pos ? NULL : sk; -} - -static void *raw6_seq_start(struct seq_file *seq, loff_t *pos) -{ - read_lock(&raw_v6_hashinfo.lock); - return *pos ? raw6_get_idx(seq, *pos - 1) : SEQ_START_TOKEN; -} - -static void *raw6_seq_next(struct seq_file *seq, void *v, loff_t *pos) -{ - struct sock *sk; - - if (v == SEQ_START_TOKEN) - sk = raw6_get_first(seq); - else - sk = raw6_get_next(seq, v); - ++*pos; - return sk; -} - -static void raw6_seq_stop(struct seq_file *seq, void *v) -{ - read_unlock(&raw_v6_hashinfo.lock); -} - static void raw6_sock_seq_show(struct seq_file *seq, struct sock *sp, int i) { struct ipv6_pinfo *np = inet6_sk(sp); @@ -1308,21 +1237,20 @@ static int raw6_seq_show(struct seq_file *seq, void *v) "st tx_queue rx_queue tr tm->when retrnsmt" " uid timeout inode drops\n"); else - raw6_sock_seq_show(seq, v, raw6_seq_private(seq)->bucket); + raw6_sock_seq_show(seq, v, raw_seq_private(seq)->bucket); return 0; } static const struct seq_operations raw6_seq_ops = { - .start = raw6_seq_start, - .next = raw6_seq_next, - .stop = raw6_seq_stop, + .start = raw_seq_start, + .next = raw_seq_next, + .stop = raw_seq_stop, .show = raw6_seq_show, }; static int raw6_seq_open(struct inode *inode, struct file *file) { - return seq_open_private(file, &raw6_seq_ops, - sizeof(struct raw6_iter_state)); + return raw_seq_open(file, &raw_v6_hashinfo, PF_INET6); } static const struct file_operations raw6_seq_fops = { -- cgit v1.2.3 From f1267347353b586e1a89a7d02c142be975bae699 Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Fri, 23 Nov 2007 21:28:44 +0800 Subject: [IPV6]: Correct the comment concerning inetsw6 table It seems that net/ipv6/af_inet6.c was copied from net/ipv4/af_inet.c, but one comment was not fixed. Signed-off-by: Pavel Emelyanov Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- net/ipv6/af_inet6.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/ipv6') diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 85178f71b21..64135e2a309 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -68,7 +68,7 @@ MODULE_LICENSE("GPL"); int sysctl_ipv6_bindv6only __read_mostly; -/* The inetsw table contains everything that inet_create needs to +/* The inetsw6 table contains everything that inet6_create needs to * build a new socket. */ static struct list_head inetsw6[SOCK_MAX]; -- cgit v1.2.3 From c7dc89c0ac8e7c3796bff91becf58ccdbcaf9f18 Mon Sep 17 00:00:00 2001 From: "Fred L. Templin" Date: Thu, 29 Nov 2007 22:11:40 +1100 Subject: [IPV6]: Add RFC4214 support This patch includes support for the Intra-Site Automatic Tunnel Addressing Protocol (ISATAP) per RFC4214. It uses the SIT module, and is configured using extensions to the "iproute2" utility. The diffs are specific to the Linux 2.6.24-rc2 kernel distribution. This version includes the diff for ./include/linux/if.h which was missing in the v2.4 submission and is needed to make the patch compile. The patch has been installed, compiled and tested in a clean 2.6.24-rc2 kernel build area. Signed-off-by: Fred L. Templin Signed-off-by: YOSHIFUJI Hideaki Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 22 ++++++++++++++- net/ipv6/route.c | 2 ++ net/ipv6/sit.c | 77 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 100 insertions(+), 1 deletion(-) (limited to 'net/ipv6') diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 6c8b193474b..f177424c186 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -377,6 +377,13 @@ static struct inet6_dev * ipv6_add_dev(struct net_device *dev) "%s: Disabled Privacy Extensions\n", dev->name); ndev->cnf.use_tempaddr = -1; + + if (dev->type == ARPHRD_SIT && (dev->priv_flags & IFF_ISATAP)) { + printk(KERN_INFO + "%s: Disabled Multicast RS\n", + dev->name); + ndev->cnf.rtr_solicits = 0; + } } else { in6_dev_hold(ndev); ipv6_regen_rndid((unsigned long) ndev); @@ -1409,6 +1416,9 @@ static int ipv6_generate_eui64(u8 *eui, struct net_device *dev) return addrconf_ifid_arcnet(eui, dev); case ARPHRD_INFINIBAND: return addrconf_ifid_infiniband(eui, dev); + case ARPHRD_SIT: + if (dev->priv_flags & IFF_ISATAP) + return ipv6_isatap_eui64(eui, *(__be32 *)dev->dev_addr); } return -1; } @@ -1444,7 +1454,7 @@ regen: * * - Reserved subnet anycast (RFC 2526) * 11111101 11....11 1xxxxxxx - * - ISATAP (draft-ietf-ngtrans-isatap-13.txt) 5.1 + * - ISATAP (RFC4214) 6.1 * 00-00-5E-FE-xx-xx-xx-xx * - value 0 * - XXX: already assigned to an address on the device @@ -2175,6 +2185,16 @@ static void addrconf_sit_config(struct net_device *dev) return; } + if (dev->priv_flags & IFF_ISATAP) { + struct in6_addr addr; + + ipv6_addr_set(&addr, htonl(0xFE800000), 0, 0, 0); + addrconf_prefix_route(&addr, 64, dev, 0, 0); + if (!ipv6_generate_eui64(addr.s6_addr + 8, dev)) + addrconf_add_linklocal(idev, &addr); + return; + } + sit_add_v4_addrs(idev); if (dev->flags&IFF_POINTOPOINT) { diff --git a/net/ipv6/route.c b/net/ipv6/route.c index d7ec4c9ffc4..e2c980dbe52 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -1659,6 +1659,8 @@ struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct net_device *d return rt; } +EXPORT_SYMBOL(rt6_get_dflt_router); + struct rt6_info *rt6_add_dflt_router(struct in6_addr *gwaddr, struct net_device *dev, unsigned int pref) diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index 71433d29d88..b3b8513e9cb 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -16,6 +16,7 @@ * Changes: * Roger Venning : 6to4 support * Nate Thompson : 6to4 support + * Fred L. Templin : isatap support */ #include @@ -182,6 +183,9 @@ static struct ip_tunnel * ipip6_tunnel_locate(struct ip_tunnel_parm *parms, int dev->init = ipip6_tunnel_init; nt->parms = *parms; + if (parms->i_flags & SIT_ISATAP) + dev->priv_flags |= IFF_ISATAP; + if (register_netdevice(dev) < 0) { free_netdev(dev); goto failed; @@ -364,6 +368,48 @@ static inline void ipip6_ecn_decapsulate(struct iphdr *iph, struct sk_buff *skb) IP6_ECN_set_ce(ipv6_hdr(skb)); } +/* ISATAP (RFC4214) - check source address */ +static int +isatap_srcok(struct sk_buff *skb, struct iphdr *iph, struct net_device *dev) +{ + struct neighbour *neigh; + struct dst_entry *dst; + struct rt6_info *rt; + struct flowi fl; + struct in6_addr *addr6; + struct in6_addr rtr; + struct ipv6hdr *iph6; + int ok = 0; + + /* from onlink default router */ + ipv6_addr_set(&rtr, htonl(0xFE800000), 0, 0, 0); + ipv6_isatap_eui64(rtr.s6_addr + 8, iph->saddr); + if ((rt = rt6_get_dflt_router(&rtr, dev))) { + dst_release(&rt->u.dst); + return 1; + } + + iph6 = ipv6_hdr(skb); + memset(&fl, 0, sizeof(fl)); + fl.proto = iph6->nexthdr; + ipv6_addr_copy(&fl.fl6_dst, &iph6->saddr); + fl.oif = dev->ifindex; + security_skb_classify_flow(skb, &fl); + + dst = ip6_route_output(NULL, &fl); + if (!dst->error && (dst->dev == dev) && (neigh = dst->neighbour)) { + + addr6 = (struct in6_addr*)&neigh->primary_key; + + /* from correct previous hop */ + if (ipv6_addr_is_isatap(addr6) && + (addr6->s6_addr32[3] == iph->saddr)) + ok = 1; + } + dst_release(dst); + return ok; +} + static int ipip6_rcv(struct sk_buff *skb) { struct iphdr *iph; @@ -382,6 +428,14 @@ static int ipip6_rcv(struct sk_buff *skb) IPCB(skb)->flags = 0; skb->protocol = htons(ETH_P_IPV6); skb->pkt_type = PACKET_HOST; + + if ((tunnel->dev->priv_flags & IFF_ISATAP) && + !isatap_srcok(skb, iph, tunnel->dev)) { + tunnel->stat.rx_errors++; + read_unlock(&ipip6_lock); + kfree_skb(skb); + return 0; + } tunnel->stat.rx_packets++; tunnel->stat.rx_bytes += skb->len; skb->dev = tunnel->dev; @@ -444,6 +498,29 @@ static int ipip6_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) if (skb->protocol != htons(ETH_P_IPV6)) goto tx_error; + /* ISATAP (RFC4214) - must come before 6to4 */ + if (dev->priv_flags & IFF_ISATAP) { + struct neighbour *neigh = NULL; + + if (skb->dst) + neigh = skb->dst->neighbour; + + if (neigh == NULL) { + if (net_ratelimit()) + printk(KERN_DEBUG "sit: nexthop == NULL\n"); + goto tx_error; + } + + addr6 = (struct in6_addr*)&neigh->primary_key; + addr_type = ipv6_addr_type(addr6); + + if ((addr_type & IPV6_ADDR_UNICAST) && + ipv6_addr_is_isatap(addr6)) + dst = addr6->s6_addr32[3]; + else + goto tx_error; + } + if (!dst) dst = try_6to4(&iph6->daddr); -- cgit v1.2.3 From f68635e627f9b21db05102e2d8fcd2894493d6bc Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Sun, 2 Dec 2007 00:21:52 +1100 Subject: [IPV6]: Cleanup the addconf_sysctl_register This only includes fixing the space-indented lines and removing one unneeded else after the goto. Signed-off-by: Pavel Emelyanov Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index f177424c186..2d2886a0b66 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -4127,7 +4127,8 @@ static void addrconf_sysctl_register(struct inet6_dev *idev, struct ipv6_devconf t = kmemdup(&addrconf_sysctl, sizeof(*t), GFP_KERNEL); if (t == NULL) - return; + goto out; + for (i=0; t->addrconf_vars[i].data; i++) { t->addrconf_vars[i].data += (char*)p - (char*)&ipv6_devconf; t->addrconf_vars[i].extra1 = idev; /* embedded; no ref */ @@ -4147,7 +4148,7 @@ static void addrconf_sysctl_register(struct inet6_dev *idev, struct ipv6_devconf */ dev_name = kstrdup(dev_name, GFP_KERNEL); if (!dev_name) - goto free; + goto free; t->addrconf_dev[0].procname = dev_name; @@ -4159,16 +4160,15 @@ static void addrconf_sysctl_register(struct inet6_dev *idev, struct ipv6_devconf t->sysctl_header = register_sysctl_table(t->addrconf_root_dir); if (t->sysctl_header == NULL) goto free_procname; - else - p->sysctl = t; + + p->sysctl = t; return; - /* error path */ - free_procname: +free_procname: kfree(dev_name); - free: +free: kfree(t); - +out: return; } -- cgit v1.2.3 From f52295a9c55ccb4d9b3580ce889f958ac740a44b Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Sun, 2 Dec 2007 00:58:37 +1100 Subject: [IPV6]: Unify and cleanup calls to addrconf_sysctl_register Currently this call is (ab)used similar to devinet one - it registers sysctls for devices and for the "default" confs, while the "all" sysctls are registered separately. But unlike its devinet brother, the passed inet6_device is needed. The fix is to make a __addrconf_sysctl_register(), which registers sysctls for all "devices" we need, including "default" and "all" :) The original addrconf_sysctl_register() calls the introduced function, passing the inet6_device, device name and ifindex (to be used as procname and ctl_name) into it. Thanks to Herbert again for pointing out, that we can shrink the argument list to 1 :) Signed-off-by: Pavel Emelyanov Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 2d2886a0b66..ea1673d2907 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -101,7 +101,7 @@ #define TIME_DELTA(a,b) ((unsigned long)((long)(a) - (long)(b))) #ifdef CONFIG_SYSCTL -static void addrconf_sysctl_register(struct inet6_dev *idev, struct ipv6_devconf *p); +static void addrconf_sysctl_register(struct inet6_dev *idev); static void addrconf_sysctl_unregister(struct ipv6_devconf *p); #endif @@ -400,7 +400,7 @@ static struct inet6_dev * ipv6_add_dev(struct net_device *dev) NET_IPV6_NEIGH, "ipv6", &ndisc_ifinfo_sysctl_change, NULL); - addrconf_sysctl_register(ndev, &ndev->cnf); + addrconf_sysctl_register(ndev); #endif /* protected by rtnl_lock */ rcu_assign_pointer(dev->ip6_ptr, ndev); @@ -2386,7 +2386,7 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event, NET_IPV6, NET_IPV6_NEIGH, "ipv6", &ndisc_ifinfo_sysctl_change, NULL); - addrconf_sysctl_register(idev, &idev->cnf); + addrconf_sysctl_register(idev); #endif err = snmp6_register_dev(idev); if (err) @@ -4118,12 +4118,11 @@ static struct addrconf_sysctl_table }, }; -static void addrconf_sysctl_register(struct inet6_dev *idev, struct ipv6_devconf *p) +static void __addrconf_sysctl_register(char *dev_name, int ctl_name, + struct inet6_dev *idev, struct ipv6_devconf *p) { int i; - struct net_device *dev = idev ? idev->dev : NULL; struct addrconf_sysctl_table *t; - char *dev_name = NULL; t = kmemdup(&addrconf_sysctl, sizeof(*t), GFP_KERNEL); if (t == NULL) @@ -4133,13 +4132,6 @@ static void addrconf_sysctl_register(struct inet6_dev *idev, struct ipv6_devconf t->addrconf_vars[i].data += (char*)p - (char*)&ipv6_devconf; t->addrconf_vars[i].extra1 = idev; /* embedded; no ref */ } - if (dev) { - dev_name = dev->name; - t->addrconf_dev[0].ctl_name = dev->ifindex; - } else { - dev_name = "default"; - t->addrconf_dev[0].ctl_name = NET_PROTO_CONF_DEFAULT; - } /* * Make a copy of dev_name, because '.procname' is regarded as const @@ -4150,6 +4142,7 @@ static void addrconf_sysctl_register(struct inet6_dev *idev, struct ipv6_devconf if (!dev_name) goto free; + t->addrconf_dev[0].ctl_name = ctl_name; t->addrconf_dev[0].procname = dev_name; t->addrconf_dev[0].child = t->addrconf_vars; @@ -4172,6 +4165,12 @@ out: return; } +static void addrconf_sysctl_register(struct inet6_dev *idev) +{ + __addrconf_sysctl_register(idev->dev->name, idev->dev->ifindex, + idev, &idev->cnf); +} + static void addrconf_sysctl_unregister(struct ipv6_devconf *p) { if (p->sysctl) { @@ -4270,9 +4269,10 @@ int __init addrconf_init(void) ipv6_addr_label_rtnl_register(); #ifdef CONFIG_SYSCTL - addrconf_sysctl.sysctl_header = - register_sysctl_table(addrconf_sysctl.addrconf_root_dir); - addrconf_sysctl_register(NULL, &ipv6_devconf_dflt); + __addrconf_sysctl_register("all", NET_PROTO_CONF_ALL, + NULL, &ipv6_devconf); + __addrconf_sysctl_register("default", NET_PROTO_CONF_DEFAULT, + NULL, &ipv6_devconf_dflt); #endif return 0; -- cgit v1.2.3 From 1dab62226dd9e0c0051229e7868363663546c772 Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Sun, 2 Dec 2007 00:59:38 +1100 Subject: [IPV6]: Use ctl paths to register addrconf sysctls This looks very much like the patch for ipv4's devinet. This is also intended to help us with the net namespaces and saves the ipv6.ko size by ~320 bytes. The difference from the first version is just the patch offsets, that changed due to changes in the patch #2. Signed-off-by: Pavel Emelyanov Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 80 ++++++++++++++--------------------------------------- 1 file changed, 20 insertions(+), 60 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index ea1673d2907..dbff389b700 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -3848,10 +3848,7 @@ static struct addrconf_sysctl_table { struct ctl_table_header *sysctl_header; ctl_table addrconf_vars[__NET_IPV6_MAX]; - ctl_table addrconf_dev[2]; - ctl_table addrconf_conf_dir[2]; - ctl_table addrconf_proto_dir[2]; - ctl_table addrconf_root_dir[2]; + char *dev_name; } addrconf_sysctl __read_mostly = { .sysctl_header = NULL, .addrconf_vars = { @@ -4072,50 +4069,6 @@ static struct addrconf_sysctl_table .ctl_name = 0, /* sentinel */ } }, - .addrconf_dev = { - { - .ctl_name = NET_PROTO_CONF_ALL, - .procname = "all", - .mode = 0555, - .child = addrconf_sysctl.addrconf_vars, - }, - { - .ctl_name = 0, /* sentinel */ - } - }, - .addrconf_conf_dir = { - { - .ctl_name = NET_IPV6_CONF, - .procname = "conf", - .mode = 0555, - .child = addrconf_sysctl.addrconf_dev, - }, - { - .ctl_name = 0, /* sentinel */ - } - }, - .addrconf_proto_dir = { - { - .ctl_name = NET_IPV6, - .procname = "ipv6", - .mode = 0555, - .child = addrconf_sysctl.addrconf_conf_dir, - }, - { - .ctl_name = 0, /* sentinel */ - } - }, - .addrconf_root_dir = { - { - .ctl_name = CTL_NET, - .procname = "net", - .mode = 0555, - .child = addrconf_sysctl.addrconf_proto_dir, - }, - { - .ctl_name = 0, /* sentinel */ - } - }, }; static void __addrconf_sysctl_register(char *dev_name, int ctl_name, @@ -4124,6 +4077,17 @@ static void __addrconf_sysctl_register(char *dev_name, int ctl_name, int i; struct addrconf_sysctl_table *t; +#define ADDRCONF_CTL_PATH_DEV 3 + + struct ctl_path addrconf_ctl_path[] = { + { .procname = "net", .ctl_name = CTL_NET, }, + { .procname = "ipv6", .ctl_name = NET_IPV6, }, + { .procname = "conf", .ctl_name = NET_IPV6_CONF, }, + { /* to be set */ }, + { }, + }; + + t = kmemdup(&addrconf_sysctl, sizeof(*t), GFP_KERNEL); if (t == NULL) goto out; @@ -4138,19 +4102,15 @@ static void __addrconf_sysctl_register(char *dev_name, int ctl_name, * by sysctl and we wouldn't want anyone to change it under our feet * (see SIOCSIFNAME). */ - dev_name = kstrdup(dev_name, GFP_KERNEL); - if (!dev_name) + t->dev_name = kstrdup(dev_name, GFP_KERNEL); + if (!t->dev_name) goto free; - t->addrconf_dev[0].ctl_name = ctl_name; - t->addrconf_dev[0].procname = dev_name; - - t->addrconf_dev[0].child = t->addrconf_vars; - t->addrconf_conf_dir[0].child = t->addrconf_dev; - t->addrconf_proto_dir[0].child = t->addrconf_conf_dir; - t->addrconf_root_dir[0].child = t->addrconf_proto_dir; + addrconf_ctl_path[ADDRCONF_CTL_PATH_DEV].procname = t->dev_name; + addrconf_ctl_path[ADDRCONF_CTL_PATH_DEV].ctl_name = ctl_name; - t->sysctl_header = register_sysctl_table(t->addrconf_root_dir); + t->sysctl_header = register_sysctl_paths(addrconf_ctl_path, + t->addrconf_vars); if (t->sysctl_header == NULL) goto free_procname; @@ -4158,7 +4118,7 @@ static void __addrconf_sysctl_register(char *dev_name, int ctl_name, return; free_procname: - kfree(dev_name); + kfree(t->dev_name); free: kfree(t); out: @@ -4177,7 +4137,7 @@ static void addrconf_sysctl_unregister(struct ipv6_devconf *p) struct addrconf_sysctl_table *t = p->sysctl; p->sysctl = NULL; unregister_sysctl_table(t->sysctl_header); - kfree(t->addrconf_dev[0].procname); + kfree(t->dev_name); kfree(t); } } -- cgit v1.2.3 From cb75994ec311b2cd50e5205efdcc0696abd6675d Mon Sep 17 00:00:00 2001 From: Wang Chen Date: Mon, 3 Dec 2007 22:33:28 +1100 Subject: [UDP]: Defer InDataGrams increment until recvmsg() does checksum Thanks dave, herbert, gerrit, andi and other people for your discussion about this problem. UdpInDatagrams can be confusing because it counts packets that might be dropped later. Move UdpInDatagrams into recvmsg() as allowed by the RFC. Signed-off-by: Wang Chen Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- net/ipv6/udp.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'net/ipv6') diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index ee1cc3f8599..b0474a618bb 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -164,6 +164,8 @@ try_again: if (err) goto out_free; + UDP6_INC_STATS_USER(UDP_MIB_INDATAGRAMS, is_udplite); + sock_recv_timestamp(msg, sk, skb); /* Copy the address. */ @@ -292,7 +294,7 @@ int udpv6_queue_rcv_skb(struct sock * sk, struct sk_buff *skb) UDP6_INC_STATS_BH(UDP_MIB_RCVBUFERRORS, up->pcflag); goto drop; } - UDP6_INC_STATS_BH(UDP_MIB_INDATAGRAMS, up->pcflag); + return 0; drop: UDP6_INC_STATS_BH(UDP_MIB_INERRORS, up->pcflag); -- cgit v1.2.3 From b2bf1e2659b1cba5e65f81781cfd530be447f80b Mon Sep 17 00:00:00 2001 From: Wang Chen Date: Mon, 3 Dec 2007 22:34:16 +1100 Subject: [UDP]: Clean up for IS_UDPLITE macro Since we have macro IS_UDPLITE, we can use it. Signed-off-by: Wang Chen Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- net/ipv6/udp.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index b0474a618bb..77ab31b9923 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -260,6 +260,7 @@ int udpv6_queue_rcv_skb(struct sock * sk, struct sk_buff *skb) { struct udp_sock *up = udp_sk(sk); int rc; + int is_udplite = IS_UDPLITE(sk); if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb)) goto drop; @@ -267,7 +268,7 @@ int udpv6_queue_rcv_skb(struct sock * sk, struct sk_buff *skb) /* * UDP-Lite specific tests, ignored on UDP sockets (see net/ipv4/udp.c). */ - if ((up->pcflag & UDPLITE_RECV_CC) && UDP_SKB_CB(skb)->partial_cov) { + if ((is_udplite & UDPLITE_RECV_CC) && UDP_SKB_CB(skb)->partial_cov) { if (up->pcrlen == 0) { /* full coverage was set */ LIMIT_NETDEBUG(KERN_WARNING "UDPLITE6: partial coverage" @@ -291,13 +292,13 @@ int udpv6_queue_rcv_skb(struct sock * sk, struct sk_buff *skb) if ((rc = sock_queue_rcv_skb(sk,skb)) < 0) { /* Note that an ENOMEM error is charged twice */ if (rc == -ENOMEM) - UDP6_INC_STATS_BH(UDP_MIB_RCVBUFERRORS, up->pcflag); + UDP6_INC_STATS_BH(UDP_MIB_RCVBUFERRORS, is_udplite); goto drop; } return 0; drop: - UDP6_INC_STATS_BH(UDP_MIB_INERRORS, up->pcflag); + UDP6_INC_STATS_BH(UDP_MIB_INERRORS, is_udplite); kfree_skb(skb); return -1; } @@ -525,6 +526,7 @@ static int udp_v6_push_pending_frames(struct sock *sk) struct inet_sock *inet = inet_sk(sk); struct flowi *fl = &inet->cork.fl; int err = 0; + int is_udplite = IS_UDPLITE(sk); __wsum csum = 0; /* Grab the skbuff where UDP header space exists. */ @@ -540,7 +542,7 @@ static int udp_v6_push_pending_frames(struct sock *sk) uh->len = htons(up->len); uh->check = 0; - if (up->pcflag) + if (is_udplite) csum = udplite_csum_outgoing(sk, skb); else csum = udp_csum_outgoing(sk, skb); @@ -556,7 +558,7 @@ out: up->len = 0; up->pending = 0; if (!err) - UDP6_INC_STATS_USER(UDP_MIB_OUTDATAGRAMS, up->pcflag); + UDP6_INC_STATS_USER(UDP_MIB_OUTDATAGRAMS, is_udplite); return err; } @@ -580,7 +582,7 @@ int udpv6_sendmsg(struct kiocb *iocb, struct sock *sk, int corkreq = up->corkflag || msg->msg_flags&MSG_MORE; int err; int connected = 0; - int is_udplite = up->pcflag; + int is_udplite = IS_UDPLITE(sk); int (*getfrag)(void *, char *, int, int, int, struct sk_buff *); /* destination address check */ -- cgit v1.2.3 From 2fcb45b6b87914f072314e5b5d9c196f45984683 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Mon, 3 Dec 2007 22:54:12 -0800 Subject: [IPSEC]: Use the correct family for input state lookup When merging the input paths of IPsec I accidentally left a hard-coded AF_INET for the state lookup call. This broke IPv6 obviously. This patch fixes by getting the input callers to specify the family through skb->cb. Credit goes to Kazunori Miyazawa for diagnosing this and providing an initial patch. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- net/ipv6/xfrm6_input.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net/ipv6') diff --git a/net/ipv6/xfrm6_input.c b/net/ipv6/xfrm6_input.c index e2c3efd2579..74f3aacebb5 100644 --- a/net/ipv6/xfrm6_input.c +++ b/net/ipv6/xfrm6_input.c @@ -23,6 +23,7 @@ int xfrm6_extract_input(struct xfrm_state *x, struct sk_buff *skb) int xfrm6_rcv_spi(struct sk_buff *skb, int nexthdr, __be32 spi) { + XFRM_SPI_SKB_CB(skb)->family = AF_INET6; XFRM_SPI_SKB_CB(skb)->daddroff = offsetof(struct ipv6hdr, daddr); return xfrm_input(skb, nexthdr, spi, 0); } -- cgit v1.2.3 From d3c5ee6d545b5372fd525ebe16988a5b6efeceb0 Mon Sep 17 00:00:00 2001 From: Jan Engelhardt Date: Tue, 4 Dec 2007 23:24:03 -0800 Subject: [NETFILTER]: x_tables: consistent and unique symbol names Give all Netfilter modules consistent and unique symbol names. Signed-off-by: Jan Engelhardt Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/ipv6/netfilter/ip6t_HL.c | 37 +++++++++++++++---------------- net/ipv6/netfilter/ip6t_LOG.c | 36 ++++++++++++++----------------- net/ipv6/netfilter/ip6t_REJECT.c | 37 +++++++++++++++---------------- net/ipv6/netfilter/ip6t_ah.c | 37 +++++++++++++------------------ net/ipv6/netfilter/ip6t_eui64.c | 28 +++++++++++------------- net/ipv6/netfilter/ip6t_frag.c | 38 ++++++++++++++------------------ net/ipv6/netfilter/ip6t_hbh.c | 42 ++++++++++++++++-------------------- net/ipv6/netfilter/ip6t_hl.c | 24 ++++++++++----------- net/ipv6/netfilter/ip6t_ipv6header.c | 38 ++++++++++++++------------------ net/ipv6/netfilter/ip6t_mh.c | 37 +++++++++++++------------------ net/ipv6/netfilter/ip6t_owner.c | 38 ++++++++++++++------------------ net/ipv6/netfilter/ip6t_rt.c | 37 +++++++++++++------------------ 12 files changed, 185 insertions(+), 244 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/netfilter/ip6t_HL.c b/net/ipv6/netfilter/ip6t_HL.c index 9afc836fd45..cefb4253711 100644 --- a/net/ipv6/netfilter/ip6t_HL.c +++ b/net/ipv6/netfilter/ip6t_HL.c @@ -18,12 +18,10 @@ MODULE_AUTHOR("Maciej Soltysiak "); MODULE_DESCRIPTION("IP6 tables Hop Limit modification module"); MODULE_LICENSE("GPL"); -static unsigned int ip6t_hl_target(struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - unsigned int hooknum, - const struct xt_target *target, - const void *targinfo) +static unsigned int +hl_tg6(struct sk_buff *skb, const struct net_device *in, + const struct net_device *out, unsigned int hooknum, + const struct xt_target *target, const void *targinfo) { struct ipv6hdr *ip6h; const struct ip6t_HL_info *info = targinfo; @@ -58,11 +56,10 @@ static unsigned int ip6t_hl_target(struct sk_buff *skb, return XT_CONTINUE; } -static bool ip6t_hl_checkentry(const char *tablename, - const void *entry, - const struct xt_target *target, - void *targinfo, - unsigned int hook_mask) +static bool +hl_tg6_check(const char *tablename, const void *entry, + const struct xt_target *target, void *targinfo, + unsigned int hook_mask) { const struct ip6t_HL_info *info = targinfo; @@ -79,25 +76,25 @@ static bool ip6t_hl_checkentry(const char *tablename, return true; } -static struct xt_target ip6t_HL __read_mostly = { +static struct xt_target hl_tg6_reg __read_mostly = { .name = "HL", .family = AF_INET6, - .target = ip6t_hl_target, + .target = hl_tg6, .targetsize = sizeof(struct ip6t_HL_info), .table = "mangle", - .checkentry = ip6t_hl_checkentry, + .checkentry = hl_tg6_check, .me = THIS_MODULE }; -static int __init ip6t_hl_init(void) +static int __init hl_tg6_init(void) { - return xt_register_target(&ip6t_HL); + return xt_register_target(&hl_tg6_reg); } -static void __exit ip6t_hl_fini(void) +static void __exit hl_tg6_exit(void) { - xt_unregister_target(&ip6t_HL); + xt_unregister_target(&hl_tg6_reg); } -module_init(ip6t_hl_init); -module_exit(ip6t_hl_fini); +module_init(hl_tg6_init); +module_exit(hl_tg6_exit); diff --git a/net/ipv6/netfilter/ip6t_LOG.c b/net/ipv6/netfilter/ip6t_LOG.c index 7a48c342df4..cd51c42727f 100644 --- a/net/ipv6/netfilter/ip6t_LOG.c +++ b/net/ipv6/netfilter/ip6t_LOG.c @@ -431,12 +431,9 @@ ip6t_log_packet(unsigned int pf, } static unsigned int -ip6t_log_target(struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - unsigned int hooknum, - const struct xt_target *target, - const void *targinfo) +log_tg6(struct sk_buff *skb, const struct net_device *in, + const struct net_device *out, unsigned int hooknum, + const struct xt_target *target, const void *targinfo) { const struct ip6t_log_info *loginfo = targinfo; struct nf_loginfo li; @@ -450,11 +447,10 @@ ip6t_log_target(struct sk_buff *skb, } -static bool ip6t_log_checkentry(const char *tablename, - const void *entry, - const struct xt_target *target, - void *targinfo, - unsigned int hook_mask) +static bool +log_tg6_check(const char *tablename, const void *entry, + const struct xt_target *target, void *targinfo, + unsigned int hook_mask) { const struct ip6t_log_info *loginfo = targinfo; @@ -470,12 +466,12 @@ static bool ip6t_log_checkentry(const char *tablename, return true; } -static struct xt_target ip6t_log_reg __read_mostly = { +static struct xt_target log_tg6_reg __read_mostly = { .name = "LOG", .family = AF_INET6, - .target = ip6t_log_target, + .target = log_tg6, .targetsize = sizeof(struct ip6t_log_info), - .checkentry = ip6t_log_checkentry, + .checkentry = log_tg6_check, .me = THIS_MODULE, }; @@ -485,22 +481,22 @@ static struct nf_logger ip6t_logger = { .me = THIS_MODULE, }; -static int __init ip6t_log_init(void) +static int __init log_tg6_init(void) { int ret; - ret = xt_register_target(&ip6t_log_reg); + ret = xt_register_target(&log_tg6_reg); if (ret < 0) return ret; nf_log_register(PF_INET6, &ip6t_logger); return 0; } -static void __exit ip6t_log_fini(void) +static void __exit log_tg6_exit(void) { nf_log_unregister(&ip6t_logger); - xt_unregister_target(&ip6t_log_reg); + xt_unregister_target(&log_tg6_reg); } -module_init(ip6t_log_init); -module_exit(ip6t_log_fini); +module_init(log_tg6_init); +module_exit(log_tg6_exit); diff --git a/net/ipv6/netfilter/ip6t_REJECT.c b/net/ipv6/netfilter/ip6t_REJECT.c index 960ba1780a9..a951c2cb6de 100644 --- a/net/ipv6/netfilter/ip6t_REJECT.c +++ b/net/ipv6/netfilter/ip6t_REJECT.c @@ -170,12 +170,10 @@ send_unreach(struct sk_buff *skb_in, unsigned char code, unsigned int hooknum) icmpv6_send(skb_in, ICMPV6_DEST_UNREACH, code, 0, NULL); } -static unsigned int reject6_target(struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - unsigned int hooknum, - const struct xt_target *target, - const void *targinfo) +static unsigned int +reject_tg6(struct sk_buff *skb, const struct net_device *in, + const struct net_device *out, unsigned int hooknum, + const struct xt_target *target, const void *targinfo) { const struct ip6t_reject_info *reject = targinfo; @@ -214,11 +212,10 @@ static unsigned int reject6_target(struct sk_buff *skb, return NF_DROP; } -static bool check(const char *tablename, - const void *entry, - const struct xt_target *target, - void *targinfo, - unsigned int hook_mask) +static bool +reject_tg6_check(const char *tablename, const void *entry, + const struct xt_target *target, void *targinfo, + unsigned int hook_mask) { const struct ip6t_reject_info *rejinfo = targinfo; const struct ip6t_entry *e = entry; @@ -237,27 +234,27 @@ static bool check(const char *tablename, return true; } -static struct xt_target ip6t_reject_reg __read_mostly = { +static struct xt_target reject_tg6_reg __read_mostly = { .name = "REJECT", .family = AF_INET6, - .target = reject6_target, + .target = reject_tg6, .targetsize = sizeof(struct ip6t_reject_info), .table = "filter", .hooks = (1 << NF_INET_LOCAL_IN) | (1 << NF_INET_FORWARD) | (1 << NF_INET_LOCAL_OUT), - .checkentry = check, + .checkentry = reject_tg6_check, .me = THIS_MODULE }; -static int __init ip6t_reject_init(void) +static int __init reject_tg6_init(void) { - return xt_register_target(&ip6t_reject_reg); + return xt_register_target(&reject_tg6_reg); } -static void __exit ip6t_reject_fini(void) +static void __exit reject_tg6_exit(void) { - xt_unregister_target(&ip6t_reject_reg); + xt_unregister_target(&reject_tg6_reg); } -module_init(ip6t_reject_init); -module_exit(ip6t_reject_fini); +module_init(reject_tg6_init); +module_exit(reject_tg6_exit); diff --git a/net/ipv6/netfilter/ip6t_ah.c b/net/ipv6/netfilter/ip6t_ah.c index 2a25fe25e0e..f5d08a8c011 100644 --- a/net/ipv6/netfilter/ip6t_ah.c +++ b/net/ipv6/netfilter/ip6t_ah.c @@ -37,14 +37,9 @@ spi_match(u_int32_t min, u_int32_t max, u_int32_t spi, bool invert) } static bool -match(const struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - const struct xt_match *match, - const void *matchinfo, - int offset, - unsigned int protoff, - bool *hotdrop) +ah_mt6(const struct sk_buff *skb, const struct net_device *in, + const struct net_device *out, const struct xt_match *match, + const void *matchinfo, int offset, unsigned int protoff, bool *hotdrop) { struct ip_auth_hdr _ah; const struct ip_auth_hdr *ah; @@ -100,11 +95,9 @@ match(const struct sk_buff *skb, /* Called when user tries to insert an entry of this type. */ static bool -checkentry(const char *tablename, - const void *entry, - const struct xt_match *match, - void *matchinfo, - unsigned int hook_mask) +ah_mt6_check(const char *tablename, const void *entry, + const struct xt_match *match, void *matchinfo, + unsigned int hook_mask) { const struct ip6t_ah *ahinfo = matchinfo; @@ -115,24 +108,24 @@ checkentry(const char *tablename, return true; } -static struct xt_match ah_match __read_mostly = { +static struct xt_match ah_mt6_reg __read_mostly = { .name = "ah", .family = AF_INET6, - .match = match, + .match = ah_mt6, .matchsize = sizeof(struct ip6t_ah), - .checkentry = checkentry, + .checkentry = ah_mt6_check, .me = THIS_MODULE, }; -static int __init ip6t_ah_init(void) +static int __init ah_mt6_init(void) { - return xt_register_match(&ah_match); + return xt_register_match(&ah_mt6_reg); } -static void __exit ip6t_ah_fini(void) +static void __exit ah_mt6_exit(void) { - xt_unregister_match(&ah_match); + xt_unregister_match(&ah_mt6_reg); } -module_init(ip6t_ah_init); -module_exit(ip6t_ah_fini); +module_init(ah_mt6_init); +module_exit(ah_mt6_exit); diff --git a/net/ipv6/netfilter/ip6t_eui64.c b/net/ipv6/netfilter/ip6t_eui64.c index ff71269579d..dd9e67df914 100644 --- a/net/ipv6/netfilter/ip6t_eui64.c +++ b/net/ipv6/netfilter/ip6t_eui64.c @@ -20,14 +20,10 @@ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Andras Kis-Szabo "); static bool -match(const struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - const struct xt_match *match, - const void *matchinfo, - int offset, - unsigned int protoff, - bool *hotdrop) +eui64_mt6(const struct sk_buff *skb, const struct net_device *in, + const struct net_device *out, const struct xt_match *match, + const void *matchinfo, int offset, unsigned int protoff, + bool *hotdrop) { unsigned char eui64[8]; int i = 0; @@ -62,25 +58,25 @@ match(const struct sk_buff *skb, return false; } -static struct xt_match eui64_match __read_mostly = { +static struct xt_match eui64_mt6_reg __read_mostly = { .name = "eui64", .family = AF_INET6, - .match = match, + .match = eui64_mt6, .matchsize = sizeof(int), .hooks = (1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_LOCAL_IN) | (1 << NF_INET_FORWARD), .me = THIS_MODULE, }; -static int __init ip6t_eui64_init(void) +static int __init eui64_mt6_init(void) { - return xt_register_match(&eui64_match); + return xt_register_match(&eui64_mt6_reg); } -static void __exit ip6t_eui64_fini(void) +static void __exit eui64_mt6_exit(void) { - xt_unregister_match(&eui64_match); + xt_unregister_match(&eui64_mt6_reg); } -module_init(ip6t_eui64_init); -module_exit(ip6t_eui64_fini); +module_init(eui64_mt6_init); +module_exit(eui64_mt6_exit); diff --git a/net/ipv6/netfilter/ip6t_frag.c b/net/ipv6/netfilter/ip6t_frag.c index 968aeba0207..ae8c714a80d 100644 --- a/net/ipv6/netfilter/ip6t_frag.c +++ b/net/ipv6/netfilter/ip6t_frag.c @@ -35,14 +35,10 @@ id_match(u_int32_t min, u_int32_t max, u_int32_t id, bool invert) } static bool -match(const struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - const struct xt_match *match, - const void *matchinfo, - int offset, - unsigned int protoff, - bool *hotdrop) +frag_mt6(const struct sk_buff *skb, const struct net_device *in, + const struct net_device *out, const struct xt_match *match, + const void *matchinfo, int offset, unsigned int protoff, + bool *hotdrop) { struct frag_hdr _frag; const struct frag_hdr *fh; @@ -116,11 +112,9 @@ match(const struct sk_buff *skb, /* Called when user tries to insert an entry of this type. */ static bool -checkentry(const char *tablename, - const void *ip, - const struct xt_match *match, - void *matchinfo, - unsigned int hook_mask) +frag_mt6_check(const char *tablename, const void *ip, + const struct xt_match *match, void *matchinfo, + unsigned int hook_mask) { const struct ip6t_frag *fraginfo = matchinfo; @@ -131,24 +125,24 @@ checkentry(const char *tablename, return true; } -static struct xt_match frag_match __read_mostly = { +static struct xt_match frag_mt6_reg __read_mostly = { .name = "frag", .family = AF_INET6, - .match = match, + .match = frag_mt6, .matchsize = sizeof(struct ip6t_frag), - .checkentry = checkentry, + .checkentry = frag_mt6_check, .me = THIS_MODULE, }; -static int __init ip6t_frag_init(void) +static int __init frag_mt6_init(void) { - return xt_register_match(&frag_match); + return xt_register_match(&frag_mt6_reg); } -static void __exit ip6t_frag_fini(void) +static void __exit frag_mt6_exit(void) { - xt_unregister_match(&frag_match); + xt_unregister_match(&frag_mt6_reg); } -module_init(ip6t_frag_init); -module_exit(ip6t_frag_fini); +module_init(frag_mt6_init); +module_exit(frag_mt6_exit); diff --git a/net/ipv6/netfilter/ip6t_hbh.c b/net/ipv6/netfilter/ip6t_hbh.c index e6ca6018b1e..b76e27dc73d 100644 --- a/net/ipv6/netfilter/ip6t_hbh.c +++ b/net/ipv6/netfilter/ip6t_hbh.c @@ -42,14 +42,10 @@ MODULE_ALIAS("ip6t_dst"); */ static bool -match(const struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - const struct xt_match *match, - const void *matchinfo, - int offset, - unsigned int protoff, - bool *hotdrop) +hbh_mt6(const struct sk_buff *skb, const struct net_device *in, + const struct net_device *out, const struct xt_match *match, + const void *matchinfo, int offset, unsigned int protoff, + bool *hotdrop) { struct ipv6_opt_hdr _optsh; const struct ipv6_opt_hdr *oh; @@ -171,11 +167,9 @@ match(const struct sk_buff *skb, /* Called when user tries to insert an entry of this type. */ static bool -checkentry(const char *tablename, - const void *entry, - const struct xt_match *match, - void *matchinfo, - unsigned int hook_mask) +hbh_mt6_check(const char *tablename, const void *entry, + const struct xt_match *match, void *matchinfo, + unsigned int hook_mask) { const struct ip6t_opts *optsinfo = matchinfo; @@ -186,36 +180,36 @@ checkentry(const char *tablename, return true; } -static struct xt_match opts_match[] __read_mostly = { +static struct xt_match hbh_mt6_reg[] __read_mostly = { { .name = "hbh", .family = AF_INET6, - .match = match, + .match = hbh_mt6, .matchsize = sizeof(struct ip6t_opts), - .checkentry = checkentry, + .checkentry = hbh_mt6_check, .me = THIS_MODULE, .data = NEXTHDR_HOP, }, { .name = "dst", .family = AF_INET6, - .match = match, + .match = hbh_mt6, .matchsize = sizeof(struct ip6t_opts), - .checkentry = checkentry, + .checkentry = hbh_mt6_check, .me = THIS_MODULE, .data = NEXTHDR_DEST, }, }; -static int __init ip6t_hbh_init(void) +static int __init hbh_mt6_init(void) { - return xt_register_matches(opts_match, ARRAY_SIZE(opts_match)); + return xt_register_matches(hbh_mt6_reg, ARRAY_SIZE(hbh_mt6_reg)); } -static void __exit ip6t_hbh_fini(void) +static void __exit hbh_mt6_exit(void) { - xt_unregister_matches(opts_match, ARRAY_SIZE(opts_match)); + xt_unregister_matches(hbh_mt6_reg, ARRAY_SIZE(hbh_mt6_reg)); } -module_init(ip6t_hbh_init); -module_exit(ip6t_hbh_fini); +module_init(hbh_mt6_init); +module_exit(hbh_mt6_exit); diff --git a/net/ipv6/netfilter/ip6t_hl.c b/net/ipv6/netfilter/ip6t_hl.c index ca29ec00dc1..8f2d7d0ab40 100644 --- a/net/ipv6/netfilter/ip6t_hl.c +++ b/net/ipv6/netfilter/ip6t_hl.c @@ -19,10 +19,10 @@ MODULE_AUTHOR("Maciej Soltysiak "); MODULE_DESCRIPTION("IP tables Hop Limit matching module"); MODULE_LICENSE("GPL"); -static bool match(const struct sk_buff *skb, - const struct net_device *in, const struct net_device *out, - const struct xt_match *match, const void *matchinfo, - int offset, unsigned int protoff, bool *hotdrop) +static bool +hl_mt6(const struct sk_buff *skb, const struct net_device *in, + const struct net_device *out, const struct xt_match *match, + const void *matchinfo, int offset, unsigned int protoff, bool *hotdrop) { const struct ip6t_hl_info *info = matchinfo; const struct ipv6hdr *ip6h = ipv6_hdr(skb); @@ -49,23 +49,23 @@ static bool match(const struct sk_buff *skb, return false; } -static struct xt_match hl_match __read_mostly = { +static struct xt_match hl_mt6_reg __read_mostly = { .name = "hl", .family = AF_INET6, - .match = match, + .match = hl_mt6, .matchsize = sizeof(struct ip6t_hl_info), .me = THIS_MODULE, }; -static int __init ip6t_hl_init(void) +static int __init hl_mt6_init(void) { - return xt_register_match(&hl_match); + return xt_register_match(&hl_mt6_reg); } -static void __exit ip6t_hl_fini(void) +static void __exit hl_mt6_exit(void) { - xt_unregister_match(&hl_match); + xt_unregister_match(&hl_mt6_reg); } -module_init(ip6t_hl_init); -module_exit(ip6t_hl_fini); +module_init(hl_mt6_init); +module_exit(hl_mt6_exit); diff --git a/net/ipv6/netfilter/ip6t_ipv6header.c b/net/ipv6/netfilter/ip6t_ipv6header.c index 2c65c2f9a4a..ae497e7ac11 100644 --- a/net/ipv6/netfilter/ip6t_ipv6header.c +++ b/net/ipv6/netfilter/ip6t_ipv6header.c @@ -27,14 +27,10 @@ MODULE_DESCRIPTION("IPv6 headers match"); MODULE_AUTHOR("Andras Kis-Szabo "); static bool -ipv6header_match(const struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - const struct xt_match *match, - const void *matchinfo, - int offset, - unsigned int protoff, - bool *hotdrop) +ipv6header_mt6(const struct sk_buff *skb, const struct net_device *in, + const struct net_device *out, const struct xt_match *match, + const void *matchinfo, int offset, unsigned int protoff, + bool *hotdrop) { const struct ip6t_ipv6header_info *info = matchinfo; unsigned int temp; @@ -125,11 +121,9 @@ ipv6header_match(const struct sk_buff *skb, } static bool -ipv6header_checkentry(const char *tablename, - const void *ip, - const struct xt_match *match, - void *matchinfo, - unsigned int hook_mask) +ipv6header_mt6_check(const char *tablename, const void *ip, + const struct xt_match *match, void *matchinfo, + unsigned int hook_mask) { const struct ip6t_ipv6header_info *info = matchinfo; @@ -141,25 +135,25 @@ ipv6header_checkentry(const char *tablename, return true; } -static struct xt_match ip6t_ipv6header_match __read_mostly = { +static struct xt_match ipv6header_mt6_reg __read_mostly = { .name = "ipv6header", .family = AF_INET6, - .match = &ipv6header_match, + .match = ipv6header_mt6, .matchsize = sizeof(struct ip6t_ipv6header_info), - .checkentry = &ipv6header_checkentry, + .checkentry = ipv6header_mt6_check, .destroy = NULL, .me = THIS_MODULE, }; -static int __init ipv6header_init(void) +static int __init ipv6header_mt6_init(void) { - return xt_register_match(&ip6t_ipv6header_match); + return xt_register_match(&ipv6header_mt6_reg); } -static void __exit ipv6header_exit(void) +static void __exit ipv6header_mt6_exit(void) { - xt_unregister_match(&ip6t_ipv6header_match); + xt_unregister_match(&ipv6header_mt6_reg); } -module_init(ipv6header_init); -module_exit(ipv6header_exit); +module_init(ipv6header_mt6_init); +module_exit(ipv6header_mt6_exit); diff --git a/net/ipv6/netfilter/ip6t_mh.c b/net/ipv6/netfilter/ip6t_mh.c index 0fa714092dc..618e6b94b03 100644 --- a/net/ipv6/netfilter/ip6t_mh.c +++ b/net/ipv6/netfilter/ip6t_mh.c @@ -38,14 +38,9 @@ type_match(u_int8_t min, u_int8_t max, u_int8_t type, bool invert) } static bool -match(const struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - const struct xt_match *match, - const void *matchinfo, - int offset, - unsigned int protoff, - bool *hotdrop) +mh_mt6(const struct sk_buff *skb, const struct net_device *in, + const struct net_device *out, const struct xt_match *match, + const void *matchinfo, int offset, unsigned int protoff, bool *hotdrop) { struct ip6_mh _mh; const struct ip6_mh *mh; @@ -77,11 +72,9 @@ match(const struct sk_buff *skb, /* Called when user tries to insert an entry of this type. */ static bool -mh_checkentry(const char *tablename, - const void *entry, - const struct xt_match *match, - void *matchinfo, - unsigned int hook_mask) +mh_mt6_check(const char *tablename, const void *entry, + const struct xt_match *match, void *matchinfo, + unsigned int hook_mask) { const struct ip6t_mh *mhinfo = matchinfo; @@ -89,25 +82,25 @@ mh_checkentry(const char *tablename, return !(mhinfo->invflags & ~IP6T_MH_INV_MASK); } -static struct xt_match mh_match __read_mostly = { +static struct xt_match mh_mt6_reg __read_mostly = { .name = "mh", .family = AF_INET6, - .checkentry = mh_checkentry, - .match = match, + .checkentry = mh_mt6_check, + .match = mh_mt6, .matchsize = sizeof(struct ip6t_mh), .proto = IPPROTO_MH, .me = THIS_MODULE, }; -static int __init ip6t_mh_init(void) +static int __init mh_mt6_init(void) { - return xt_register_match(&mh_match); + return xt_register_match(&mh_mt6_reg); } -static void __exit ip6t_mh_fini(void) +static void __exit mh_mt6_exit(void) { - xt_unregister_match(&mh_match); + xt_unregister_match(&mh_mt6_reg); } -module_init(ip6t_mh_init); -module_exit(ip6t_mh_fini); +module_init(mh_mt6_init); +module_exit(mh_mt6_exit); diff --git a/net/ipv6/netfilter/ip6t_owner.c b/net/ipv6/netfilter/ip6t_owner.c index 1e0dc4a972c..6a52ed98516 100644 --- a/net/ipv6/netfilter/ip6t_owner.c +++ b/net/ipv6/netfilter/ip6t_owner.c @@ -24,14 +24,10 @@ MODULE_LICENSE("GPL"); static bool -match(const struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - const struct xt_match *match, - const void *matchinfo, - int offset, - unsigned int protoff, - bool *hotdrop) +owner_mt6(const struct sk_buff *skb, const struct net_device *in, + const struct net_device *out, const struct xt_match *match, + const void *matchinfo, int offset, unsigned int protoff, + bool *hotdrop) { const struct ip6t_owner_info *info = matchinfo; @@ -52,11 +48,9 @@ match(const struct sk_buff *skb, } static bool -checkentry(const char *tablename, - const void *ip, - const struct xt_match *match, - void *matchinfo, - unsigned int hook_mask) +owner_mt6_check(const char *tablename, const void *ip, + const struct xt_match *match, void *matchinfo, + unsigned int hook_mask) { const struct ip6t_owner_info *info = matchinfo; @@ -68,26 +62,26 @@ checkentry(const char *tablename, return true; } -static struct xt_match owner_match __read_mostly = { +static struct xt_match owner_mt6_reg __read_mostly = { .name = "owner", .family = AF_INET6, - .match = match, + .match = owner_mt6, .matchsize = sizeof(struct ip6t_owner_info), .hooks = (1 << NF_INET_LOCAL_OUT) | (1 << NF_INET_POST_ROUTING), - .checkentry = checkentry, + .checkentry = owner_mt6_check, .me = THIS_MODULE, }; -static int __init ip6t_owner_init(void) +static int __init owner_mt6_init(void) { - return xt_register_match(&owner_match); + return xt_register_match(&owner_mt6_reg); } -static void __exit ip6t_owner_fini(void) +static void __exit owner_mt6_exit(void) { - xt_unregister_match(&owner_match); + xt_unregister_match(&owner_mt6_reg); } -module_init(ip6t_owner_init); -module_exit(ip6t_owner_fini); +module_init(owner_mt6_init); +module_exit(owner_mt6_exit); diff --git a/net/ipv6/netfilter/ip6t_rt.c b/net/ipv6/netfilter/ip6t_rt.c index 357cea703bd..038cea6407d 100644 --- a/net/ipv6/netfilter/ip6t_rt.c +++ b/net/ipv6/netfilter/ip6t_rt.c @@ -37,14 +37,9 @@ segsleft_match(u_int32_t min, u_int32_t max, u_int32_t id, bool invert) } static bool -match(const struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - const struct xt_match *match, - const void *matchinfo, - int offset, - unsigned int protoff, - bool *hotdrop) +rt_mt6(const struct sk_buff *skb, const struct net_device *in, + const struct net_device *out, const struct xt_match *match, + const void *matchinfo, int offset, unsigned int protoff, bool *hotdrop) { struct ipv6_rt_hdr _route; const struct ipv6_rt_hdr *rh; @@ -195,11 +190,9 @@ match(const struct sk_buff *skb, /* Called when user tries to insert an entry of this type. */ static bool -checkentry(const char *tablename, - const void *entry, - const struct xt_match *match, - void *matchinfo, - unsigned int hook_mask) +rt_mt6_check(const char *tablename, const void *entry, + const struct xt_match *match, void *matchinfo, + unsigned int hook_mask) { const struct ip6t_rt *rtinfo = matchinfo; @@ -218,24 +211,24 @@ checkentry(const char *tablename, return true; } -static struct xt_match rt_match __read_mostly = { +static struct xt_match rt_mt6_reg __read_mostly = { .name = "rt", .family = AF_INET6, - .match = match, + .match = rt_mt6, .matchsize = sizeof(struct ip6t_rt), - .checkentry = checkentry, + .checkentry = rt_mt6_check, .me = THIS_MODULE, }; -static int __init ip6t_rt_init(void) +static int __init rt_mt6_init(void) { - return xt_register_match(&rt_match); + return xt_register_match(&rt_mt6_reg); } -static void __exit ip6t_rt_fini(void) +static void __exit rt_mt6_exit(void) { - xt_unregister_match(&rt_match); + xt_unregister_match(&rt_mt6_reg); } -module_init(ip6t_rt_init); -module_exit(ip6t_rt_fini); +module_init(rt_mt6_init); +module_exit(rt_mt6_exit); -- cgit v1.2.3 From 259d4e41f3ec25f22169daece42729f597b89f9a Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 4 Dec 2007 23:24:56 -0800 Subject: [NETFILTER]: x_tables: struct xt_table_info diet Instead of using a big array of NR_CPUS entries, we can compute the size needed at runtime, using nr_cpu_ids This should save some ram (especially on David's machines where NR_CPUS=4096 : 32 KB can be saved per table, and 64KB for dynamically allocated ones (because of slab/slub alignements) ) In particular, the 'bootstrap' tables are not any more static (in data section) but on stack as their size is now very small. This also should reduce the size used on stack in compat functions (get_info() declares an automatic variable, that could be bigger than kernel stack size for big NR_CPUS) Signed-off-by: Eric Dumazet Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/ipv6/netfilter/ip6_tables.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index e1e87eff468..e60c1b4b1ec 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -1042,8 +1042,7 @@ do_replace(void __user *user, unsigned int len) return -EFAULT; /* overflow check */ - if (tmp.size >= (INT_MAX - sizeof(struct xt_table_info)) / NR_CPUS - - SMP_CACHE_BYTES) + if (tmp.size >= INT_MAX / num_possible_cpus()) return -ENOMEM; if (tmp.num_counters >= INT_MAX / sizeof(struct xt_counters)) return -ENOMEM; @@ -1339,7 +1338,7 @@ int ip6t_register_table(struct xt_table *table, { int ret; struct xt_table_info *newinfo; - static struct xt_table_info bootstrap + struct xt_table_info bootstrap = { 0, 0, 0, { 0 }, { 0 }, { } }; void *loc_cpu_entry; -- cgit v1.2.3 From 9e67d5a739327c44885adebb4f3a538050be73e4 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Tue, 4 Dec 2007 23:25:26 -0800 Subject: [NETFILTER]: x_tables: remove obsolete overflow check We're not multiplying the size with the number of CPUs anymore, so the check is obsolete. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/ipv6/netfilter/ip6_tables.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index e60c1b4b1ec..d3e884a5c6a 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -1042,8 +1042,6 @@ do_replace(void __user *user, unsigned int len) return -EFAULT; /* overflow check */ - if (tmp.size >= INT_MAX / num_possible_cpus()) - return -ENOMEM; if (tmp.num_counters >= INT_MAX / sizeof(struct xt_counters)) return -ENOMEM; -- cgit v1.2.3 From 0265ab44bacc1a1e0e3f5873d8ca2d5a29e33db2 Mon Sep 17 00:00:00 2001 From: Jan Engelhardt Date: Tue, 4 Dec 2007 23:27:38 -0800 Subject: [NETFILTER]: merge ipt_owner/ip6t_owner in xt_owner xt_owner merges ipt_owner and ip6t_owner, and adds a flag to match on socket (non-)existence. Signed-off-by: Jan Engelhardt Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/ipv6/netfilter/Kconfig | 9 ----- net/ipv6/netfilter/Makefile | 1 - net/ipv6/netfilter/ip6t_owner.c | 87 ----------------------------------------- 3 files changed, 97 deletions(-) delete mode 100644 net/ipv6/netfilter/ip6t_owner.c (limited to 'net/ipv6') diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig index 838b8ddee8c..30d48529d98 100644 --- a/net/ipv6/netfilter/Kconfig +++ b/net/ipv6/netfilter/Kconfig @@ -89,15 +89,6 @@ config IP6_NF_MATCH_HL To compile it as a module, choose M here. If unsure, say N. -config IP6_NF_MATCH_OWNER - tristate "Owner match support" - depends on IP6_NF_IPTABLES - help - Packet owner matching allows you to match locally-generated packets - based on who created them: the user, group, process or session. - - To compile it as a module, choose M here. If unsure, say N. - config IP6_NF_MATCH_IPV6HEADER tristate "IPv6 Extension Headers Match" depends on IP6_NF_IPTABLES diff --git a/net/ipv6/netfilter/Makefile b/net/ipv6/netfilter/Makefile index e789ec44d23..fbf2c14ed88 100644 --- a/net/ipv6/netfilter/Makefile +++ b/net/ipv6/netfilter/Makefile @@ -23,7 +23,6 @@ obj-$(CONFIG_IP6_NF_MATCH_HL) += ip6t_hl.o obj-$(CONFIG_IP6_NF_MATCH_IPV6HEADER) += ip6t_ipv6header.o obj-$(CONFIG_IP6_NF_MATCH_MH) += ip6t_mh.o obj-$(CONFIG_IP6_NF_MATCH_OPTS) += ip6t_hbh.o -obj-$(CONFIG_IP6_NF_MATCH_OWNER) += ip6t_owner.o obj-$(CONFIG_IP6_NF_MATCH_RT) += ip6t_rt.o # targets diff --git a/net/ipv6/netfilter/ip6t_owner.c b/net/ipv6/netfilter/ip6t_owner.c deleted file mode 100644 index 6a52ed98516..00000000000 --- a/net/ipv6/netfilter/ip6t_owner.c +++ /dev/null @@ -1,87 +0,0 @@ -/* Kernel module to match various things tied to sockets associated with - locally generated outgoing packets. */ - -/* (C) 2000-2001 Marc Boucher - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ - -#include -#include -#include -#include -#include - -#include -#include -#include - -MODULE_AUTHOR("Marc Boucher "); -MODULE_DESCRIPTION("IP6 tables owner matching module"); -MODULE_LICENSE("GPL"); - - -static bool -owner_mt6(const struct sk_buff *skb, const struct net_device *in, - const struct net_device *out, const struct xt_match *match, - const void *matchinfo, int offset, unsigned int protoff, - bool *hotdrop) -{ - const struct ip6t_owner_info *info = matchinfo; - - if (!skb->sk || !skb->sk->sk_socket || !skb->sk->sk_socket->file) - return false; - - if (info->match & IP6T_OWNER_UID) - if ((skb->sk->sk_socket->file->f_uid != info->uid) ^ - !!(info->invert & IP6T_OWNER_UID)) - return false; - - if (info->match & IP6T_OWNER_GID) - if ((skb->sk->sk_socket->file->f_gid != info->gid) ^ - !!(info->invert & IP6T_OWNER_GID)) - return false; - - return true; -} - -static bool -owner_mt6_check(const char *tablename, const void *ip, - const struct xt_match *match, void *matchinfo, - unsigned int hook_mask) -{ - const struct ip6t_owner_info *info = matchinfo; - - if (info->match & (IP6T_OWNER_PID | IP6T_OWNER_SID)) { - printk("ipt_owner: pid and sid matching " - "not supported anymore\n"); - return false; - } - return true; -} - -static struct xt_match owner_mt6_reg __read_mostly = { - .name = "owner", - .family = AF_INET6, - .match = owner_mt6, - .matchsize = sizeof(struct ip6t_owner_info), - .hooks = (1 << NF_INET_LOCAL_OUT) | - (1 << NF_INET_POST_ROUTING), - .checkentry = owner_mt6_check, - .me = THIS_MODULE, -}; - -static int __init owner_mt6_init(void) -{ - return xt_register_match(&owner_mt6_reg); -} - -static void __exit owner_mt6_exit(void) -{ - xt_unregister_match(&owner_mt6_reg); -} - -module_init(owner_mt6_init); -module_exit(owner_mt6_exit); -- cgit v1.2.3 From 4c37799ccf6c722e0dad6a0677af22d1c23fb897 Mon Sep 17 00:00:00 2001 From: Jan Engelhardt Date: Tue, 4 Dec 2007 23:31:59 -0800 Subject: [NETFILTER]: Use lowercase names for matches in Kconfig Unify netfilter match kconfig descriptions Consistently use lowercase for matches in kconfig one-line descriptions and name the match module. Signed-off-by: Jan Engelhardt Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/ipv6/netfilter/Kconfig | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig index 30d48529d98..5374c665f8d 100644 --- a/net/ipv6/netfilter/Kconfig +++ b/net/ipv6/netfilter/Kconfig @@ -54,7 +54,7 @@ config IP6_NF_IPTABLES # The simple matches. config IP6_NF_MATCH_RT - tristate "Routing header match support" + tristate '"rt" Routing header match support' depends on IP6_NF_IPTABLES help rt matching allows you to match packets based on the routing @@ -63,7 +63,7 @@ config IP6_NF_MATCH_RT To compile it as a module, choose M here. If unsure, say N. config IP6_NF_MATCH_OPTS - tristate "Hop-by-hop and Dst opts header match support" + tristate '"hopbyhop" and "dst" opts header match support' depends on IP6_NF_IPTABLES help This allows one to match packets based on the hop-by-hop @@ -72,7 +72,7 @@ config IP6_NF_MATCH_OPTS To compile it as a module, choose M here. If unsure, say N. config IP6_NF_MATCH_FRAG - tristate "Fragmentation header match support" + tristate '"frag" Fragmentation header match support' depends on IP6_NF_IPTABLES help frag matching allows you to match packets based on the fragmentation @@ -81,7 +81,7 @@ config IP6_NF_MATCH_FRAG To compile it as a module, choose M here. If unsure, say N. config IP6_NF_MATCH_HL - tristate "HL match support" + tristate '"hl" match support' depends on IP6_NF_IPTABLES help HL matching allows you to match packets based on the hop @@ -90,7 +90,7 @@ config IP6_NF_MATCH_HL To compile it as a module, choose M here. If unsure, say N. config IP6_NF_MATCH_IPV6HEADER - tristate "IPv6 Extension Headers Match" + tristate '"ipv6header" IPv6 Extension Headers Match' depends on IP6_NF_IPTABLES help This module allows one to match packets based upon @@ -99,7 +99,7 @@ config IP6_NF_MATCH_IPV6HEADER To compile it as a module, choose M here. If unsure, say N. config IP6_NF_MATCH_AH - tristate "AH match support" + tristate '"ah" match support' depends on IP6_NF_IPTABLES help This module allows one to match AH packets. @@ -107,7 +107,7 @@ config IP6_NF_MATCH_AH To compile it as a module, choose M here. If unsure, say N. config IP6_NF_MATCH_MH - tristate "MH match support" + tristate '"mh" match support' depends on IP6_NF_IPTABLES help This module allows one to match MH packets. @@ -115,7 +115,7 @@ config IP6_NF_MATCH_MH To compile it as a module, choose M here. If unsure, say N. config IP6_NF_MATCH_EUI64 - tristate "EUI64 address check" + tristate '"eui64" address check' depends on IP6_NF_IPTABLES help This module performs checking on the IPv6 source address -- cgit v1.2.3 From 17dfc93f6d7e5792c9c36dd70c8612721a091ae8 Mon Sep 17 00:00:00 2001 From: Maciej Soltysiak Date: Tue, 4 Dec 2007 23:50:38 -0800 Subject: [NETFILTER]: {ip,ip6}t_LOG: log GID Log GID in addition to UID Signed-off-by: Maciej Soltysiak Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/ipv6/netfilter/ip6t_LOG.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'net/ipv6') diff --git a/net/ipv6/netfilter/ip6t_LOG.c b/net/ipv6/netfilter/ip6t_LOG.c index cd51c42727f..19523242991 100644 --- a/net/ipv6/netfilter/ip6t_LOG.c +++ b/net/ipv6/netfilter/ip6t_LOG.c @@ -362,7 +362,9 @@ static void dump_packet(const struct nf_loginfo *info, if ((logflags & IP6T_LOG_UID) && recurse && skb->sk) { read_lock_bh(&skb->sk->sk_callback_lock); if (skb->sk->sk_socket && skb->sk->sk_socket->file) - printk("UID=%u ", skb->sk->sk_socket->file->f_uid); + printk("UID=%u GID=%u", + skb->sk->sk_socket->file->f_uid, + skb->sk->sk_socket->file->f_gid); read_unlock_bh(&skb->sk->sk_callback_lock); } } -- cgit v1.2.3 From 1841a4c7ae106b7a3e2521db55f4d8bb8a0988d5 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Wed, 5 Dec 2007 01:22:05 -0800 Subject: [NETFILTER]: nf_ct_h323: remove ipv6 module dependency nf_conntrack_h323 needs ip6_route_output for the call forwarding filter. Add a ->route function to nf_afinfo and use that to avoid pulling in the ipv6 module. Fix the #ifdef for the IPv6 code while I'm at it - the IPv6 support is only needed when IPv6 conntrack is enabled. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/ipv6/netfilter.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'net/ipv6') diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c index 175e19f8025..281f732e3c9 100644 --- a/net/ipv6/netfilter.c +++ b/net/ipv6/netfilter.c @@ -81,6 +81,12 @@ static int nf_ip6_reroute(struct sk_buff *skb, const struct nf_info *info) return 0; } +static int nf_ip6_route(struct dst_entry **dst, struct flowi *fl) +{ + *dst = ip6_route_output(NULL, fl); + return (*dst)->error; +} + __sum16 nf_ip6_checksum(struct sk_buff *skb, unsigned int hook, unsigned int dataoff, u_int8_t protocol) { @@ -118,6 +124,7 @@ EXPORT_SYMBOL(nf_ip6_checksum); static struct nf_afinfo nf_ip6_afinfo = { .family = AF_INET6, .checksum = nf_ip6_checksum, + .route = nf_ip6_route, .saveroute = nf_ip6_saveroute, .reroute = nf_ip6_reroute, .route_key_size = sizeof(struct ip6_rt_info), -- cgit v1.2.3 From 1999414a4ece2b8cea3fb3c4dc8fe06796256269 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Wed, 5 Dec 2007 01:23:00 -0800 Subject: [NETFILTER]: Mark hooks __read_mostly Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/ipv6/netfilter/ip6table_filter.c | 2 +- net/ipv6/netfilter/ip6table_mangle.c | 2 +- net/ipv6/netfilter/ip6table_raw.c | 2 +- net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/netfilter/ip6table_filter.c b/net/ipv6/netfilter/ip6table_filter.c index 0ae072dd692..87d38d08aad 100644 --- a/net/ipv6/netfilter/ip6table_filter.c +++ b/net/ipv6/netfilter/ip6table_filter.c @@ -90,7 +90,7 @@ ip6t_local_out_hook(unsigned int hook, return ip6t_do_table(skb, hook, in, out, &packet_filter); } -static struct nf_hook_ops ip6t_ops[] = { +static struct nf_hook_ops ip6t_ops[] __read_mostly = { { .hook = ip6t_hook, .owner = THIS_MODULE, diff --git a/net/ipv6/netfilter/ip6table_mangle.c b/net/ipv6/netfilter/ip6table_mangle.c index 8e62b231682..d6082600bc5 100644 --- a/net/ipv6/netfilter/ip6table_mangle.c +++ b/net/ipv6/netfilter/ip6table_mangle.c @@ -120,7 +120,7 @@ ip6t_local_hook(unsigned int hook, return ret; } -static struct nf_hook_ops ip6t_ops[] = { +static struct nf_hook_ops ip6t_ops[] __read_mostly = { { .hook = ip6t_route_hook, .owner = THIS_MODULE, diff --git a/net/ipv6/netfilter/ip6table_raw.c b/net/ipv6/netfilter/ip6table_raw.c index 4fecd8de8cc..eccbaaa104a 100644 --- a/net/ipv6/netfilter/ip6table_raw.c +++ b/net/ipv6/netfilter/ip6table_raw.c @@ -54,7 +54,7 @@ ip6t_hook(unsigned int hook, return ip6t_do_table(skb, hook, in, out, &packet_raw); } -static struct nf_hook_ops ip6t_ops[] = { +static struct nf_hook_ops ip6t_ops[] __read_mostly = { { .hook = ip6t_hook, .pf = PF_INET6, diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c index 50f46787fda..97a553036dd 100644 --- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c +++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c @@ -258,7 +258,7 @@ static unsigned int ipv6_conntrack_local(unsigned int hooknum, return ipv6_conntrack_in(hooknum, skb, in, out, okfn); } -static struct nf_hook_ops ipv6_conntrack_ops[] = { +static struct nf_hook_ops ipv6_conntrack_ops[] __read_mostly = { { .hook = ipv6_defrag, .owner = THIS_MODULE, -- cgit v1.2.3 From e3ac5298159c5286cef86f0865d4fa6a606bd391 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Wed, 5 Dec 2007 01:23:57 -0800 Subject: [NETFILTER]: nf_queue: make queue_handler const Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/ipv6/netfilter/ip6_queue.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/ipv6') diff --git a/net/ipv6/netfilter/ip6_queue.c b/net/ipv6/netfilter/ip6_queue.c index e273605eef8..d6e971bd9fe 100644 --- a/net/ipv6/netfilter/ip6_queue.c +++ b/net/ipv6/netfilter/ip6_queue.c @@ -634,7 +634,7 @@ static const struct file_operations ip6_queue_proc_fops = { .owner = THIS_MODULE, }; -static struct nf_queue_handler nfqh = { +static const struct nf_queue_handler nfqh = { .name = "ip6_queue", .outfn = &ipq_enqueue_packet, }; -- cgit v1.2.3 From f9d8928f8340ab8e76f1da4799cb19a6ff58b83d Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Wed, 5 Dec 2007 01:24:30 -0800 Subject: [NETFILTER]: nf_queue: remove unused data pointer Remove the data pointer from struct nf_queue_handler. It has never been used and is useless for the only handler that really matters, nfnetlink_queue, since the handler is shared between all instances. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/ipv6/netfilter/ip6_queue.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/ipv6') diff --git a/net/ipv6/netfilter/ip6_queue.c b/net/ipv6/netfilter/ip6_queue.c index d6e971bd9fe..5a9ca0d4fb2 100644 --- a/net/ipv6/netfilter/ip6_queue.c +++ b/net/ipv6/netfilter/ip6_queue.c @@ -269,7 +269,7 @@ nlmsg_failure: static int ipq_enqueue_packet(struct sk_buff *skb, struct nf_info *info, - unsigned int queuenum, void *data) + unsigned int queuenum) { int status = -EINVAL; struct sk_buff *nskb; -- cgit v1.2.3 From c01cd429fc118c5db92475c5f08b307718aa4efc Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Wed, 5 Dec 2007 01:24:48 -0800 Subject: [NETFILTER]: nf_queue: move queueing related functions/struct to seperate header Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/ipv6/netfilter.c | 1 + net/ipv6/netfilter/ip6_queue.c | 1 + 2 files changed, 2 insertions(+) (limited to 'net/ipv6') diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c index 281f732e3c9..55ea9c6ec74 100644 --- a/net/ipv6/netfilter.c +++ b/net/ipv6/netfilter.c @@ -8,6 +8,7 @@ #include #include #include +#include int ip6_route_me_harder(struct sk_buff *skb) { diff --git a/net/ipv6/netfilter/ip6_queue.c b/net/ipv6/netfilter/ip6_queue.c index 5a9ca0d4fb2..7ff9915750a 100644 --- a/net/ipv6/netfilter/ip6_queue.c +++ b/net/ipv6/netfilter/ip6_queue.c @@ -29,6 +29,7 @@ #include #include #include +#include #include #include #include -- cgit v1.2.3 From 0ac41e81462de20f87242caac2b9084c202c33b7 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Wed, 5 Dec 2007 01:25:03 -0800 Subject: [NETFILTER]: {nf_netlink,ip,ip6}_queue: use list_for_each_entry Use list_add_tail/list_for_each_entry instead of list_add and list_for_each_prev as a preparation for switching to RCU. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/ipv6/netfilter/ip6_queue.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/netfilter/ip6_queue.c b/net/ipv6/netfilter/ip6_queue.c index 7ff9915750a..243a00bcd3d 100644 --- a/net/ipv6/netfilter/ip6_queue.c +++ b/net/ipv6/netfilter/ip6_queue.c @@ -71,7 +71,7 @@ ipq_issue_verdict(struct ipq_queue_entry *entry, int verdict) static inline void __ipq_enqueue_entry(struct ipq_queue_entry *entry) { - list_add(&entry->list, &queue_list); + list_add_tail(&entry->list, &queue_list); queue_total++; } @@ -82,11 +82,9 @@ __ipq_enqueue_entry(struct ipq_queue_entry *entry) static inline struct ipq_queue_entry * __ipq_find_entry(ipq_cmpfn cmpfn, unsigned long data) { - struct list_head *p; - - list_for_each_prev(p, &queue_list) { - struct ipq_queue_entry *entry = (struct ipq_queue_entry *)p; + struct ipq_queue_entry *entry; + list_for_each_entry(entry, &queue_list, list) { if (!cmpfn || cmpfn(entry, data)) return entry; } -- cgit v1.2.3 From 171b7fc4fc178a004aec8d06eb745c30ae726fb6 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Wed, 5 Dec 2007 01:26:02 -0800 Subject: [NETFILTER]: ip6_queue: deobfuscate entry lookups A queue entry lookup currently looks like this: ipq_find_dequeue_entry -> __ipq_find_dequeue_entry -> __ipq_find_entry -> cmpfn -> id_cmp Use simple open-coded list walking and kill the cmpfn for ipq_find_dequeue_entry. Instead add it to ipq_flush (after similar cleanups) and use ipq_flush for both complete flushes and flushing entries related to a device. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/ipv6/netfilter/ip6_queue.c | 101 +++++++++++++++-------------------------- 1 file changed, 37 insertions(+), 64 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/netfilter/ip6_queue.c b/net/ipv6/netfilter/ip6_queue.c index 243a00bcd3d..7d0780d02d0 100644 --- a/net/ipv6/netfilter/ip6_queue.c +++ b/net/ipv6/netfilter/ip6_queue.c @@ -75,52 +75,6 @@ __ipq_enqueue_entry(struct ipq_queue_entry *entry) queue_total++; } -/* - * Find and return a queued entry matched by cmpfn, or return the last - * entry if cmpfn is NULL. - */ -static inline struct ipq_queue_entry * -__ipq_find_entry(ipq_cmpfn cmpfn, unsigned long data) -{ - struct ipq_queue_entry *entry; - - list_for_each_entry(entry, &queue_list, list) { - if (!cmpfn || cmpfn(entry, data)) - return entry; - } - return NULL; -} - -static inline void -__ipq_dequeue_entry(struct ipq_queue_entry *entry) -{ - list_del(&entry->list); - queue_total--; -} - -static inline struct ipq_queue_entry * -__ipq_find_dequeue_entry(ipq_cmpfn cmpfn, unsigned long data) -{ - struct ipq_queue_entry *entry; - - entry = __ipq_find_entry(cmpfn, data); - if (entry == NULL) - return NULL; - - __ipq_dequeue_entry(entry); - return entry; -} - - -static inline void -__ipq_flush(int verdict) -{ - struct ipq_queue_entry *entry; - - while ((entry = __ipq_find_dequeue_entry(NULL, 0))) - ipq_issue_verdict(entry, verdict); -} - static inline int __ipq_set_mode(unsigned char mode, unsigned int range) { @@ -147,31 +101,59 @@ __ipq_set_mode(unsigned char mode, unsigned int range) return status; } +static void __ipq_flush(ipq_cmpfn cmpfn, unsigned long data); + static inline void __ipq_reset(void) { peer_pid = 0; net_disable_timestamp(); __ipq_set_mode(IPQ_COPY_NONE, 0); - __ipq_flush(NF_DROP); + __ipq_flush(NULL, 0); } static struct ipq_queue_entry * -ipq_find_dequeue_entry(ipq_cmpfn cmpfn, unsigned long data) +ipq_find_dequeue_entry(unsigned long id) { - struct ipq_queue_entry *entry; + struct ipq_queue_entry *entry = NULL, *i; write_lock_bh(&queue_lock); - entry = __ipq_find_dequeue_entry(cmpfn, data); + + list_for_each_entry(i, &queue_list, list) { + if ((unsigned long)i == id) { + entry = i; + break; + } + } + + if (entry) { + list_del(&entry->list); + queue_total--; + } + write_unlock_bh(&queue_lock); return entry; } static void -ipq_flush(int verdict) +__ipq_flush(ipq_cmpfn cmpfn, unsigned long data) +{ + struct ipq_queue_entry *entry, *next; + + list_for_each_entry_safe(entry, next, &queue_list, list) { + if (!cmpfn || cmpfn(entry, data)) { + list_del(&entry->list); + queue_total--; + ipq_issue_verdict(entry, NF_DROP); + } + } +} + +static void +ipq_flush(ipq_cmpfn cmpfn, unsigned long data) { write_lock_bh(&queue_lock); - __ipq_flush(verdict); + __ipq_flush(cmpfn, data); write_unlock_bh(&queue_lock); } @@ -364,12 +346,6 @@ ipq_mangle_ipv6(ipq_verdict_msg_t *v, struct ipq_queue_entry *e) return 0; } -static inline int -id_cmp(struct ipq_queue_entry *e, unsigned long id) -{ - return (id == (unsigned long )e); -} - static int ipq_set_verdict(struct ipq_verdict_msg *vmsg, unsigned int len) { @@ -378,7 +354,7 @@ ipq_set_verdict(struct ipq_verdict_msg *vmsg, unsigned int len) if (vmsg->value > NF_MAX_VERDICT) return -EINVAL; - entry = ipq_find_dequeue_entry(id_cmp, vmsg->id); + entry = ipq_find_dequeue_entry(vmsg->id); if (entry == NULL) return -ENOENT; else { @@ -449,10 +425,7 @@ dev_cmp(struct ipq_queue_entry *entry, unsigned long ifindex) static void ipq_dev_drop(int ifindex) { - struct ipq_queue_entry *entry; - - while ((entry = ipq_find_dequeue_entry(dev_cmp, ifindex)) != NULL) - ipq_issue_verdict(entry, NF_DROP); + ipq_flush(dev_cmp, ifindex); } #define RCV_SKB_FAIL(err) do { netlink_ack(skb, nlh, (err)); return; } while (0) @@ -689,7 +662,7 @@ static void __exit ip6_queue_fini(void) { nf_unregister_queue_handlers(&nfqh); synchronize_net(); - ipq_flush(NF_DROP); + ipq_flush(NULL, 0); unregister_sysctl_table(ipq_sysctl_header); unregister_netdevice_notifier(&ipq_dev_notifier); -- cgit v1.2.3 From 7a6c6653b3a977087ec64d76817c7ee6e1df5b60 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Wed, 5 Dec 2007 01:26:18 -0800 Subject: [NETFILTER]: ip6_queue: resync dev-index based flushing Resync dev_cmp to take bridge devices into account. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/ipv6/netfilter/ip6_queue.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) (limited to 'net/ipv6') diff --git a/net/ipv6/netfilter/ip6_queue.c b/net/ipv6/netfilter/ip6_queue.c index 7d0780d02d0..9c50cb19b39 100644 --- a/net/ipv6/netfilter/ip6_queue.c +++ b/net/ipv6/netfilter/ip6_queue.c @@ -418,7 +418,16 @@ dev_cmp(struct ipq_queue_entry *entry, unsigned long ifindex) if (entry->info->outdev) if (entry->info->outdev->ifindex == ifindex) return 1; - +#ifdef CONFIG_BRIDGE_NETFILTER + if (entry->skb->nf_bridge) { + if (entry->skb->nf_bridge->physindev && + entry->skb->nf_bridge->physindev->ifindex == ifindex) + return 1; + if (entry->skb->nf_bridge->physoutdev && + entry->skb->nf_bridge->physoutdev->ifindex == ifindex) + return 1; + } +#endif return 0; } -- cgit v1.2.3 From 02f014d88831f73b895c1fe09badb66c88e932d3 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Wed, 5 Dec 2007 01:26:33 -0800 Subject: [NETFILTER]: nf_queue: move list_head/skb/id to struct nf_info Move common fields for queue management to struct nf_info and rename it to struct nf_queue_entry. The avoids one allocation/free per packet and simplifies the code a bit. Alternatively we could add some private room at the tail, but since all current users use identical structs this seems easier. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/ipv6/netfilter.c | 14 +++++---- net/ipv6/netfilter/ip6_queue.c | 67 +++++++++++++++--------------------------- 2 files changed, 31 insertions(+), 50 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c index 55ea9c6ec74..945e6ae1956 100644 --- a/net/ipv6/netfilter.c +++ b/net/ipv6/netfilter.c @@ -57,11 +57,12 @@ struct ip6_rt_info { struct in6_addr saddr; }; -static void nf_ip6_saveroute(const struct sk_buff *skb, struct nf_info *info) +static void nf_ip6_saveroute(const struct sk_buff *skb, + struct nf_queue_entry *entry) { - struct ip6_rt_info *rt_info = nf_info_reroute(info); + struct ip6_rt_info *rt_info = nf_queue_entry_reroute(entry); - if (info->hook == NF_INET_LOCAL_OUT) { + if (entry->hook == NF_INET_LOCAL_OUT) { struct ipv6hdr *iph = ipv6_hdr(skb); rt_info->daddr = iph->daddr; @@ -69,11 +70,12 @@ static void nf_ip6_saveroute(const struct sk_buff *skb, struct nf_info *info) } } -static int nf_ip6_reroute(struct sk_buff *skb, const struct nf_info *info) +static int nf_ip6_reroute(struct sk_buff *skb, + const struct nf_queue_entry *entry) { - struct ip6_rt_info *rt_info = nf_info_reroute(info); + struct ip6_rt_info *rt_info = nf_queue_entry_reroute(entry); - if (info->hook == NF_INET_LOCAL_OUT) { + if (entry->hook == NF_INET_LOCAL_OUT) { struct ipv6hdr *iph = ipv6_hdr(skb); if (!ipv6_addr_equal(&iph->daddr, &rt_info->daddr) || !ipv6_addr_equal(&iph->saddr, &rt_info->saddr)) diff --git a/net/ipv6/netfilter/ip6_queue.c b/net/ipv6/netfilter/ip6_queue.c index 9c50cb19b39..9014adae4fb 100644 --- a/net/ipv6/netfilter/ip6_queue.c +++ b/net/ipv6/netfilter/ip6_queue.c @@ -39,13 +39,7 @@ #define NET_IPQ_QMAX 2088 #define NET_IPQ_QMAX_NAME "ip6_queue_maxlen" -struct ipq_queue_entry { - struct list_head list; - struct nf_info *info; - struct sk_buff *skb; -}; - -typedef int (*ipq_cmpfn)(struct ipq_queue_entry *, unsigned long); +typedef int (*ipq_cmpfn)(struct nf_queue_entry *, unsigned long); static unsigned char copy_mode __read_mostly = IPQ_COPY_NONE; static unsigned int queue_maxlen __read_mostly = IPQ_QMAX_DEFAULT; @@ -60,16 +54,15 @@ static LIST_HEAD(queue_list); static DEFINE_MUTEX(ipqnl_mutex); static void -ipq_issue_verdict(struct ipq_queue_entry *entry, int verdict) +ipq_issue_verdict(struct nf_queue_entry *entry, int verdict) { local_bh_disable(); - nf_reinject(entry->skb, entry->info, verdict); + nf_reinject(entry, verdict); local_bh_enable(); - kfree(entry); } static inline void -__ipq_enqueue_entry(struct ipq_queue_entry *entry) +__ipq_enqueue_entry(struct nf_queue_entry *entry) { list_add_tail(&entry->list, &queue_list); queue_total++; @@ -112,10 +105,10 @@ __ipq_reset(void) __ipq_flush(NULL, 0); } -static struct ipq_queue_entry * +static struct nf_queue_entry * ipq_find_dequeue_entry(unsigned long id) { - struct ipq_queue_entry *entry = NULL, *i; + struct nf_queue_entry *entry = NULL, *i; write_lock_bh(&queue_lock); @@ -138,7 +131,7 @@ ipq_find_dequeue_entry(unsigned long id) static void __ipq_flush(ipq_cmpfn cmpfn, unsigned long data) { - struct ipq_queue_entry *entry, *next; + struct nf_queue_entry *entry, *next; list_for_each_entry_safe(entry, next, &queue_list, list) { if (!cmpfn || cmpfn(entry, data)) { @@ -158,7 +151,7 @@ ipq_flush(ipq_cmpfn cmpfn, unsigned long data) } static struct sk_buff * -ipq_build_packet_message(struct ipq_queue_entry *entry, int *errp) +ipq_build_packet_message(struct nf_queue_entry *entry, int *errp) { sk_buff_data_t old_tail; size_t size = 0; @@ -215,20 +208,20 @@ ipq_build_packet_message(struct ipq_queue_entry *entry, int *errp) pmsg->timestamp_sec = tv.tv_sec; pmsg->timestamp_usec = tv.tv_usec; pmsg->mark = entry->skb->mark; - pmsg->hook = entry->info->hook; + pmsg->hook = entry->hook; pmsg->hw_protocol = entry->skb->protocol; - if (entry->info->indev) - strcpy(pmsg->indev_name, entry->info->indev->name); + if (entry->indev) + strcpy(pmsg->indev_name, entry->indev->name); else pmsg->indev_name[0] = '\0'; - if (entry->info->outdev) - strcpy(pmsg->outdev_name, entry->info->outdev->name); + if (entry->outdev) + strcpy(pmsg->outdev_name, entry->outdev->name); else pmsg->outdev_name[0] = '\0'; - if (entry->info->indev && entry->skb->dev) { + if (entry->indev && entry->skb->dev) { pmsg->hw_type = entry->skb->dev->type; pmsg->hw_addrlen = dev_parse_header(entry->skb, pmsg->hw_addr); } @@ -249,28 +242,17 @@ nlmsg_failure: } static int -ipq_enqueue_packet(struct sk_buff *skb, struct nf_info *info, - unsigned int queuenum) +ipq_enqueue_packet(struct nf_queue_entry *entry, unsigned int queuenum) { int status = -EINVAL; struct sk_buff *nskb; - struct ipq_queue_entry *entry; if (copy_mode == IPQ_COPY_NONE) return -EAGAIN; - entry = kmalloc(sizeof(*entry), GFP_ATOMIC); - if (entry == NULL) { - printk(KERN_ERR "ip6_queue: OOM in ipq_enqueue_packet()\n"); - return -ENOMEM; - } - - entry->info = info; - entry->skb = skb; - nskb = ipq_build_packet_message(entry, &status); if (nskb == NULL) - goto err_out_free; + return status; write_lock_bh(&queue_lock); @@ -304,14 +286,11 @@ err_out_free_nskb: err_out_unlock: write_unlock_bh(&queue_lock); - -err_out_free: - kfree(entry); return status; } static int -ipq_mangle_ipv6(ipq_verdict_msg_t *v, struct ipq_queue_entry *e) +ipq_mangle_ipv6(ipq_verdict_msg_t *v, struct nf_queue_entry *e) { int diff; int err; @@ -349,7 +328,7 @@ ipq_mangle_ipv6(ipq_verdict_msg_t *v, struct ipq_queue_entry *e) static int ipq_set_verdict(struct ipq_verdict_msg *vmsg, unsigned int len) { - struct ipq_queue_entry *entry; + struct nf_queue_entry *entry; if (vmsg->value > NF_MAX_VERDICT) return -EINVAL; @@ -409,14 +388,14 @@ ipq_receive_peer(struct ipq_peer_msg *pmsg, } static int -dev_cmp(struct ipq_queue_entry *entry, unsigned long ifindex) +dev_cmp(struct nf_queue_entry *entry, unsigned long ifindex) { - if (entry->info->indev) - if (entry->info->indev->ifindex == ifindex) + if (entry->indev) + if (entry->indev->ifindex == ifindex) return 1; - if (entry->info->outdev) - if (entry->info->outdev->ifindex == ifindex) + if (entry->outdev) + if (entry->outdev->ifindex == ifindex) return 1; #ifdef CONFIG_BRIDGE_NETFILTER if (entry->skb->nf_bridge) { -- cgit v1.2.3 From 4b3d15ef4a88683d93d1b76351297d2298a02a99 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Wed, 5 Dec 2007 01:27:02 -0800 Subject: [NETFILTER]: {nfnetlink,ip,ip6}_queue: kill issue_verdict Now that issue_verdict doesn't need to free the queue entries anymore, all it does is disable local BHs and call nf_reinject. Move the BH disabling to the okfn invocation in nf_reinject and kill the issue_verdict functions. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/ipv6/netfilter/ip6_queue.c | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/netfilter/ip6_queue.c b/net/ipv6/netfilter/ip6_queue.c index 9014adae4fb..e5b0059582f 100644 --- a/net/ipv6/netfilter/ip6_queue.c +++ b/net/ipv6/netfilter/ip6_queue.c @@ -53,14 +53,6 @@ static struct sock *ipqnl __read_mostly; static LIST_HEAD(queue_list); static DEFINE_MUTEX(ipqnl_mutex); -static void -ipq_issue_verdict(struct nf_queue_entry *entry, int verdict) -{ - local_bh_disable(); - nf_reinject(entry, verdict); - local_bh_enable(); -} - static inline void __ipq_enqueue_entry(struct nf_queue_entry *entry) { @@ -137,7 +129,7 @@ __ipq_flush(ipq_cmpfn cmpfn, unsigned long data) if (!cmpfn || cmpfn(entry, data)) { list_del(&entry->list); queue_total--; - ipq_issue_verdict(entry, NF_DROP); + nf_reinject(entry, NF_DROP); } } } @@ -343,7 +335,7 @@ ipq_set_verdict(struct ipq_verdict_msg *vmsg, unsigned int len) if (ipq_mangle_ipv6(vmsg, entry) < 0) verdict = NF_DROP; - ipq_issue_verdict(entry, verdict); + nf_reinject(entry, verdict); return 0; } } -- cgit v1.2.3 From 4a61b586cd7eaab6242eca58e8e6e3c8ebd88bd2 Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Wed, 5 Dec 2007 01:43:25 -0800 Subject: [IPV6]: Make the ipv6/sysctl_net_ipv6.c compilation cleaner Since this file is entirely enclosed with the #ifdef CONFIG_SYSCTL/#endif pair, it's OK to move this CONFIG_ into a Makefile. Signed-off-by: Pavel Emelyanov Signed-off-by: David S. Miller --- net/ipv6/Makefile | 4 ++-- net/ipv6/sysctl_net_ipv6.c | 7 ------- 2 files changed, 2 insertions(+), 9 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/Makefile b/net/ipv6/Makefile index 5ffa9800305..24f3aa0f2a3 100644 --- a/net/ipv6/Makefile +++ b/net/ipv6/Makefile @@ -8,9 +8,9 @@ ipv6-objs := af_inet6.o anycast.o ip6_output.o ip6_input.o addrconf.o \ addrlabel.o \ route.o ip6_fib.o ipv6_sockglue.o ndisc.o udp.o udplite.o \ raw.o protocol.o icmp.o mcast.o reassembly.o tcp_ipv6.o \ - exthdrs.o sysctl_net_ipv6.o datagram.o \ - ip6_flowlabel.o inet6_connection_sock.o + exthdrs.o datagram.o ip6_flowlabel.o inet6_connection_sock.o +ipv6-$(CONFIG_SYSCTL) = sysctl_net_ipv6.o ipv6-$(CONFIG_XFRM) += xfrm6_policy.o xfrm6_state.o xfrm6_input.o \ xfrm6_output.o ipv6-$(CONFIG_NETFILTER) += netfilter.o diff --git a/net/ipv6/sysctl_net_ipv6.c b/net/ipv6/sysctl_net_ipv6.c index 68bb2548e46..227efa726ac 100644 --- a/net/ipv6/sysctl_net_ipv6.c +++ b/net/ipv6/sysctl_net_ipv6.c @@ -14,8 +14,6 @@ #include #include -#ifdef CONFIG_SYSCTL - static ctl_table ipv6_table[] = { { .ctl_name = NET_IPV6_ROUTE, @@ -115,8 +113,3 @@ void ipv6_sysctl_unregister(void) { unregister_sysctl_table(ipv6_sysctl_header); } - -#endif /* CONFIG_SYSCTL */ - - - -- cgit v1.2.3 From 4d43b78ac27ca50fe42718192ac7c80474417389 Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Wed, 5 Dec 2007 01:44:02 -0800 Subject: [IPV6]: Use sysctl paths to register ipv6 sysctl tables I have already done this for core, ipv4 and tr tables, so repeat this for the ipv6 ones. This makes the ipv6.ko smaller and creates the ground needed for net namespaces support in ipv6.ko ssctls. Signed-off-by: Pavel Emelyanov Signed-off-by: David S. Miller --- net/ipv6/sysctl_net_ipv6.c | 26 ++++++-------------------- 1 file changed, 6 insertions(+), 20 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/sysctl_net_ipv6.c b/net/ipv6/sysctl_net_ipv6.c index 227efa726ac..0b5bec3cb79 100644 --- a/net/ipv6/sysctl_net_ipv6.c +++ b/net/ipv6/sysctl_net_ipv6.c @@ -82,31 +82,17 @@ static ctl_table ipv6_table[] = { { .ctl_name = 0 } }; -static struct ctl_table_header *ipv6_sysctl_header; - -static ctl_table ipv6_net_table[] = { - { - .ctl_name = NET_IPV6, - .procname = "ipv6", - .mode = 0555, - .child = ipv6_table - }, - { .ctl_name = 0 } +static struct ctl_path ipv6_ctl_path[] = { + { .procname = "net", .ctl_name = CTL_NET, }, + { .procname = "ipv6", .ctl_name = NET_IPV6, }, + { }, }; -static ctl_table ipv6_root_table[] = { - { - .ctl_name = CTL_NET, - .procname = "net", - .mode = 0555, - .child = ipv6_net_table - }, - { .ctl_name = 0 } -}; +static struct ctl_table_header *ipv6_sysctl_header; void ipv6_sysctl_register(void) { - ipv6_sysctl_header = register_sysctl_table(ipv6_root_table); + ipv6_sysctl_header = register_sysctl_paths(ipv6_ctl_path, ipv6_table); } void ipv6_sysctl_unregister(void) -- cgit v1.2.3 From c8fecf2242a0ab7230210665986b8ef915e1ae9e Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Wed, 5 Dec 2007 01:50:24 -0800 Subject: [IPV6]: Eliminate difference in actions of sysctl and proc handler for conf.all.forwarding The only difference in this case is that updating all.forwarding causes the update in default.forwarding when done via proc, but not via the system call. Besides, this consolidates a good portion of code. Signed-off-by: Pavel Emelyanov Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 56 +++++++++++++++++++---------------------------------- 1 file changed, 20 insertions(+), 36 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index dbff389b700..95cf3aa41c9 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -476,6 +476,21 @@ static void addrconf_forward_change(void) } read_unlock(&dev_base_lock); } + +static void addrconf_fixup_forwarding(struct ctl_table *table, int *p, int old) +{ + if (p == &ipv6_devconf_dflt.forwarding) + return; + + if (p == &ipv6_devconf.forwarding) { + ipv6_devconf_dflt.forwarding = ipv6_devconf.forwarding; + addrconf_forward_change(); + } else if ((!*p) ^ (!old)) + dev_forward_change((struct inet6_dev *)table->extra1); + + if (*p) + rt6_purge_dflt_routers(); +} #endif /* Nobody refers to this ifaddr, destroy it */ @@ -3771,22 +3786,8 @@ int addrconf_sysctl_forward(ctl_table *ctl, int write, struct file * filp, ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos); - if (write && valp != &ipv6_devconf_dflt.forwarding) { - if (valp != &ipv6_devconf.forwarding) { - if ((!*valp) ^ (!val)) { - struct inet6_dev *idev = (struct inet6_dev *)ctl->extra1; - if (idev == NULL) - return ret; - dev_forward_change(idev); - } - } else { - ipv6_devconf_dflt.forwarding = ipv6_devconf.forwarding; - addrconf_forward_change(); - } - if (*valp) - rt6_purge_dflt_routers(); - } - + if (write) + addrconf_fixup_forwarding(ctl, valp, val); return ret; } @@ -3797,6 +3798,7 @@ static int addrconf_sysctl_forward_strategy(ctl_table *table, void __user *newval, size_t newlen) { int *valp = table->data; + int val = *valp; int new; if (!newval || !newlen) @@ -3821,26 +3823,8 @@ static int addrconf_sysctl_forward_strategy(ctl_table *table, } } - if (valp != &ipv6_devconf_dflt.forwarding) { - if (valp != &ipv6_devconf.forwarding) { - struct inet6_dev *idev = (struct inet6_dev *)table->extra1; - int changed; - if (unlikely(idev == NULL)) - return -ENODEV; - changed = (!*valp) ^ (!new); - *valp = new; - if (changed) - dev_forward_change(idev); - } else { - *valp = new; - addrconf_forward_change(); - } - - if (*valp) - rt6_purge_dflt_routers(); - } else - *valp = new; - + *valp = new; + addrconf_fixup_forwarding(table, valp, val); return 1; } -- cgit v1.2.3 From 27ab2568649d5ba6c5a20212079b7c4f6da4ca0d Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Wed, 5 Dec 2007 01:51:58 -0800 Subject: [UDP]: Avoid repeated counting of checksum errors due to peeking Currently it is possible for two processes to peek on the same socket and end up incrementing the error counter twice for the same packet. This patch fixes it by making skb_kill_datagram return whether it succeeded in unlinking the packet and only incrementing the counter if it did. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- net/ipv6/udp.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 77ab31b9923..87bccec9882 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -207,8 +207,8 @@ out: return err; csum_copy_err: - UDP6_INC_STATS_USER(UDP_MIB_INERRORS, is_udplite); - skb_kill_datagram(sk, skb, flags); + if (!skb_kill_datagram(sk, skb, flags)) + UDP6_INC_STATS_USER(UDP_MIB_INERRORS, is_udplite); if (flags & MSG_DONTWAIT) return -EAGAIN; -- cgit v1.2.3 From 1781f7f5804e52ee2d35328b129602146a8d8254 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Tue, 11 Dec 2007 11:30:32 -0800 Subject: [UDP]: Restore missing inDatagrams increments The previous move of the the UDP inDatagrams counter caused the counting of encapsulated packets, SUNRPC data (as opposed to call) packets and RXRPC packets to go missing. This patch restores all of these. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- net/ipv6/proc.c | 1 + net/ipv6/udp.c | 2 ++ 2 files changed, 3 insertions(+) (limited to 'net/ipv6') diff --git a/net/ipv6/proc.c b/net/ipv6/proc.c index 44937616057..41e9980b3e0 100644 --- a/net/ipv6/proc.c +++ b/net/ipv6/proc.c @@ -27,6 +27,7 @@ #include #include #include +#include #include #include diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 87bccec9882..36bdcd2e1b5 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -34,6 +34,7 @@ #include #include #include +#include #include #include @@ -51,6 +52,7 @@ #include "udp_impl.h" DEFINE_SNMP_STAT(struct udp_mib, udp_stats_in6) __read_mostly; +EXPORT_SYMBOL(udp_stats_in6); static inline int udp_v6_get_port(struct sock *sk, unsigned short snum) { -- cgit v1.2.3 From a59322be07c964e916d15be3df473fb7ba20c41e Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Wed, 5 Dec 2007 01:53:40 -0800 Subject: [UDP]: Only increment counter on first peek/recv The previous move of the the UDP inDatagrams counter caused each peek of the same packet to be counted separately. This may be undesirable. This patch fixes this by adding a bit to sk_buff to record whether this packet has already been seen through skb_recv_datagram. We then only increment the counter when the packet is seen for the first time. The only dodgy part is the fact that skb_recv_datagram doesn't have a good way of returning this new bit of information. So I've added a new function __skb_recv_datagram that does return this and made skb_recv_datagram a wrapper around it. The plan is to eventually replace all uses of skb_recv_datagram with this new function at which time it can be renamed its proper name. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- net/ipv6/udp.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 36bdcd2e1b5..fa640765385 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -123,6 +123,7 @@ int udpv6_recvmsg(struct kiocb *iocb, struct sock *sk, struct inet_sock *inet = inet_sk(sk); struct sk_buff *skb; unsigned int ulen, copied; + int peeked; int err; int is_udplite = IS_UDPLITE(sk); @@ -133,7 +134,8 @@ int udpv6_recvmsg(struct kiocb *iocb, struct sock *sk, return ipv6_recv_error(sk, msg, len); try_again: - skb = skb_recv_datagram(sk, flags, noblock, &err); + skb = __skb_recv_datagram(sk, flags | (noblock ? MSG_DONTWAIT : 0), + &peeked, &err); if (!skb) goto out; @@ -166,7 +168,8 @@ try_again: if (err) goto out_free; - UDP6_INC_STATS_USER(UDP_MIB_INDATAGRAMS, is_udplite); + if (!peeked) + UDP6_INC_STATS_USER(UDP_MIB_INDATAGRAMS, is_udplite); sock_recv_timestamp(msg, sk, skb); -- cgit v1.2.3 From 5a3e55d68ec5baac578bf32ba67607088c763657 Mon Sep 17 00:00:00 2001 From: "Denis V. Lunev" Date: Fri, 7 Dec 2007 00:38:10 -0800 Subject: [NET]: Multiple namespaces in the all dst_ifdown routines. Move dst entries to a namespace loopback to catch refcounting leaks. Signed-off-by: Denis V. Lunev Signed-off-by: David S. Miller --- net/ipv6/route.c | 7 +++++-- net/ipv6/xfrm6_policy.c | 3 ++- 2 files changed, 7 insertions(+), 3 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/route.c b/net/ipv6/route.c index e2c980dbe52..452111fa4c5 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -216,9 +216,12 @@ static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev, { struct rt6_info *rt = (struct rt6_info *)dst; struct inet6_dev *idev = rt->rt6i_idev; + struct net_device *loopback_dev = + dev->nd_net->loopback_dev; - if (dev != init_net.loopback_dev && idev != NULL && idev->dev == dev) { - struct inet6_dev *loopback_idev = in6_dev_get(init_net.loopback_dev); + if (dev != loopback_dev && idev != NULL && idev->dev == dev) { + struct inet6_dev *loopback_idev = + in6_dev_get(loopback_dev); if (loopback_idev != NULL) { rt->rt6i_idev = loopback_idev; in6_dev_put(idev); diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c index a31dd531e19..4d54951cea0 100644 --- a/net/ipv6/xfrm6_policy.c +++ b/net/ipv6/xfrm6_policy.c @@ -229,7 +229,8 @@ static void xfrm6_dst_ifdown(struct dst_entry *dst, struct net_device *dev, xdst = (struct xfrm_dst *)dst; if (xdst->u.rt6.rt6i_idev->dev == dev) { - struct inet6_dev *loopback_idev = in6_dev_get(init_net.loopback_dev); + struct inet6_dev *loopback_idev = + in6_dev_get(dev->nd_net->loopback_dev); BUG_ON(!loopback_idev); do { -- cgit v1.2.3 From d63bddbe90c4fd924b2155ca92a879393d856170 Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Fri, 7 Dec 2007 00:40:34 -0800 Subject: [IPV6]: Make fib6_init to return an error code. If there is an error in the initialization function, nothing is followed up to the caller. So I add a return value to be set for the init function. Signed-off-by: Daniel Lezcano Acked-by: Benjamin Thery Signed-off-by: David S. Miller --- net/ipv6/ip6_fib.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 31b60a02512..c100b44f2b8 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -1473,16 +1473,24 @@ void fib6_run_gc(unsigned long dummy) spin_unlock_bh(&fib6_gc_lock); } -void __init fib6_init(void) +int __init fib6_init(void) { + int ret; fib6_node_kmem = kmem_cache_create("fib6_nodes", sizeof(struct fib6_node), 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL); - fib6_tables_init(); - __rtnl_register(PF_INET6, RTM_GETROUTE, NULL, inet6_dump_fib); + ret = __rtnl_register(PF_INET6, RTM_GETROUTE, NULL, inet6_dump_fib); + if (ret) + goto out_kmem_cache_create; +out: + return ret; + +out_kmem_cache_create: + kmem_cache_destroy(fib6_node_kmem); + goto out; } void fib6_gc_cleanup(void) -- cgit v1.2.3 From 0013cabab30ec55830ce63d34c0bdd887eb87644 Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Fri, 7 Dec 2007 00:42:11 -0800 Subject: [IPV6]: Make xfrm6_init to return an error code. The xfrm initialization function does not return any error code, so if there is an error, the caller can not be advise of that. This patch checks the return code of the different called functions in order to return a successful or failed initialization. Signed-off-by: Daniel Lezcano Acked-by: Benjamin Thery Signed-off-by: David S. Miller --- net/ipv6/xfrm6_policy.c | 22 +++++++++++++++++----- net/ipv6/xfrm6_state.c | 4 ++-- 2 files changed, 19 insertions(+), 7 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c index 4d54951cea0..181cf91538f 100644 --- a/net/ipv6/xfrm6_policy.c +++ b/net/ipv6/xfrm6_policy.c @@ -269,9 +269,9 @@ static struct xfrm_policy_afinfo xfrm6_policy_afinfo = { .fill_dst = xfrm6_fill_dst, }; -static void __init xfrm6_policy_init(void) +static int __init xfrm6_policy_init(void) { - xfrm_policy_register_afinfo(&xfrm6_policy_afinfo); + return xfrm_policy_register_afinfo(&xfrm6_policy_afinfo); } static void xfrm6_policy_fini(void) @@ -279,10 +279,22 @@ static void xfrm6_policy_fini(void) xfrm_policy_unregister_afinfo(&xfrm6_policy_afinfo); } -void __init xfrm6_init(void) +int __init xfrm6_init(void) { - xfrm6_policy_init(); - xfrm6_state_init(); + int ret; + + ret = xfrm6_policy_init(); + if (ret) + goto out; + + ret = xfrm6_state_init(); + if (ret) + goto out_policy; +out: + return ret; +out_policy: + xfrm6_policy_fini(); + goto out; } void xfrm6_fini(void) diff --git a/net/ipv6/xfrm6_state.c b/net/ipv6/xfrm6_state.c index a7a7e8fd6a3..dc817e035e2 100644 --- a/net/ipv6/xfrm6_state.c +++ b/net/ipv6/xfrm6_state.c @@ -198,9 +198,9 @@ static struct xfrm_state_afinfo xfrm6_state_afinfo = { .transport_finish = xfrm6_transport_finish, }; -void __init xfrm6_state_init(void) +int __init xfrm6_state_init(void) { - xfrm_state_register_afinfo(&xfrm6_state_afinfo); + return xfrm_state_register_afinfo(&xfrm6_state_afinfo); } void xfrm6_state_fini(void) -- cgit v1.2.3 From 9eb87f3f7e0686a256c5bb4f886dede0171245f2 Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Fri, 7 Dec 2007 00:42:52 -0800 Subject: [IPV6]: Make fib6_rules_init to return an error code. When the fib_rules initialization finished, no return code is provided so there is no way to know, for the caller, if the initialization has been successful or has failed. This patch fix that. Signed-off-by: Daniel Lezcano Acked-by: Benjamin Thery Signed-off-by: David S. Miller --- net/ipv6/fib6_rules.c | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c index 428c6b0e26d..9ce2e0a6748 100644 --- a/net/ipv6/fib6_rules.c +++ b/net/ipv6/fib6_rules.c @@ -265,10 +265,23 @@ static int __init fib6_default_rules_init(void) return 0; } -void __init fib6_rules_init(void) +int __init fib6_rules_init(void) { - BUG_ON(fib6_default_rules_init()); - fib_rules_register(&fib6_rules_ops); + int ret; + + ret = fib6_default_rules_init(); + if (ret) + goto out; + + ret = fib_rules_register(&fib6_rules_ops); + if (ret) + goto out_default_rules_init; +out: + return ret; + +out_default_rules_init: + fib_rules_cleanup_ops(&fib6_rules_ops); + goto out; } void fib6_rules_cleanup(void) -- cgit v1.2.3 From 433d49c3bb14b8a2351fe97df8359e4ad0de4c7c Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Fri, 7 Dec 2007 00:43:48 -0800 Subject: [IPV6]: Make ip6_route_init to return an error code. The route initialization function does not return any value to notify if the initialization is successful or not. This patch checks all calls made for the initilization in order to return a value for the caller. Unfortunately, proc_net_fops_create will return a NULL pointer if CONFIG_PROC_FS is off, so we can not check the return code without an ifdef CONFIG_PROC_FS block in the ip6_route_init function. Signed-off-by: Daniel Lezcano Acked-by: Benjamin Thery Signed-off-by: David S. Miller --- net/ipv6/route.c | 64 ++++++++++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 53 insertions(+), 11 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 452111fa4c5..d7754abf921 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -2468,26 +2468,70 @@ ctl_table ipv6_route_table[] = { #endif -void __init ip6_route_init(void) +int __init ip6_route_init(void) { + int ret; + ip6_dst_ops.kmem_cachep = kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL); ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops.kmem_cachep; - fib6_init(); - proc_net_fops_create(&init_net, "ipv6_route", 0, &ipv6_route_proc_fops); - proc_net_fops_create(&init_net, "rt6_stats", S_IRUGO, &rt6_stats_seq_fops); + ret = fib6_init(); + if (ret) + goto out_kmem_cache; + +#ifdef CONFIG_PROC_FS + ret = -ENOMEM; + if (!proc_net_fops_create(&init_net, "ipv6_route", + 0, &ipv6_route_proc_fops)) + goto out_fib6_init; + + if (!proc_net_fops_create(&init_net, "rt6_stats", + S_IRUGO, &rt6_stats_seq_fops)) + goto out_proc_ipv6_route; +#endif + #ifdef CONFIG_XFRM - xfrm6_init(); + ret = xfrm6_init(); + if (ret) + goto out_proc_rt6_stats; #endif #ifdef CONFIG_IPV6_MULTIPLE_TABLES - fib6_rules_init(); + ret = fib6_rules_init(); + if (ret) + goto xfrm6_init; #endif + ret = -ENOBUFS; + if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL) || + __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL) || + __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL)) + goto fib6_rules_init; - __rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL); - __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL); - __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL); + ret = 0; +out: + return ret; + +fib6_rules_init: +#ifdef CONFIG_IPV6_MULTIPLE_TABLES + fib6_rules_cleanup(); +xfrm6_init: +#endif +#ifdef CONFIG_XFRM + xfrm6_fini(); +out_proc_rt6_stats: +#endif +#ifdef CONFIG_PROC_FS + proc_net_remove(&init_net, "rt6_stats"); +out_proc_ipv6_route: + proc_net_remove(&init_net, "ipv6_route"); +out_fib6_init: +#endif + rt6_ifdown(NULL); + fib6_gc_cleanup(); +out_kmem_cache: + kmem_cache_destroy(ip6_dst_ops.kmem_cachep); + goto out; } void ip6_route_cleanup(void) @@ -2495,10 +2539,8 @@ void ip6_route_cleanup(void) #ifdef CONFIG_IPV6_MULTIPLE_TABLES fib6_rules_cleanup(); #endif -#ifdef CONFIG_PROC_FS proc_net_remove(&init_net, "ipv6_route"); proc_net_remove(&init_net, "rt6_stats"); -#endif #ifdef CONFIG_XFRM xfrm6_fini(); #endif -- cgit v1.2.3 From e2fddf5e96df4ac26f2e9ce63053d51cdf3cfe1e Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Fri, 7 Dec 2007 00:44:29 -0800 Subject: [IPV6]: Make af_inet6 to check ip6_route_init return value. The af_inet6 initialization function does not check the return code of the route initilization, so if something goes wrong, the protocol initialization will continue anyway. This patch takes into account the modification made in the different route's initialization subroutines to check the return value and to make the protocol initialization to fail. Signed-off-by: Daniel Lezcano Acked-by: Benjamin Thery Signed-off-by: David S. Miller --- net/ipv6/af_inet6.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'net/ipv6') diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 64135e2a309..5ab8ba7a586 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -849,7 +849,9 @@ static int __init inet6_init(void) if (if6_proc_init()) goto proc_if6_fail; #endif - ip6_route_init(); + err = ip6_route_init(); + if (err) + goto ip6_route_fail; ip6_flowlabel_init(); err = addrconf_init(); if (err) @@ -874,6 +876,7 @@ out: addrconf_fail: ip6_flowlabel_cleanup(); ip6_route_cleanup(); +ip6_route_fail: #ifdef CONFIG_PROC_FS if6_proc_exit(); proc_if6_fail: @@ -904,6 +907,7 @@ icmp_fail: cleanup_ipv6_mibs(); out_unregister_sock: sock_unregister(PF_INET6); + rtnl_unregister_all(PF_INET6); out_unregister_raw_proto: proto_unregister(&rawv6_prot); out_unregister_udplite_proto: -- cgit v1.2.3 From f845ab6b7dd872d027c27146c264e46bc16c656a Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Fri, 7 Dec 2007 00:45:16 -0800 Subject: [IPV6] route6/fib6: Don't panic a kmem_cache_create. If the kmem_cache_creation fails, the kernel will panic. It is acceptable if the system is booting, but if the ipv6 protocol is compiled as a module and it is loaded after the system has booted, do we want to panic instead of just failing to initialize the protocol ? The init function is now returning an error and this one is checked for protocol initialization. So the ipv6 protocol will safely fails. Signed-off-by: Daniel Lezcano Acked-by: Benjamin Thery Signed-off-by: David S. Miller --- net/ipv6/ip6_fib.c | 5 ++++- net/ipv6/route.c | 5 ++++- 2 files changed, 8 insertions(+), 2 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index c100b44f2b8..5fae04506ad 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -1478,8 +1478,11 @@ int __init fib6_init(void) int ret; fib6_node_kmem = kmem_cache_create("fib6_nodes", sizeof(struct fib6_node), - 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, + 0, SLAB_HWCACHE_ALIGN, NULL); + if (!fib6_node_kmem) + return -ENOMEM; + fib6_tables_init(); ret = __rtnl_register(PF_INET6, RTM_GETROUTE, NULL, inet6_dump_fib); diff --git a/net/ipv6/route.c b/net/ipv6/route.c index d7754abf921..6f833cacfcf 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -2474,7 +2474,10 @@ int __init ip6_route_init(void) ip6_dst_ops.kmem_cachep = kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0, - SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL); + SLAB_HWCACHE_ALIGN, NULL); + if (!ip6_dst_ops.kmem_cachep) + return -ENOMEM; + ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops.kmem_cachep; ret = fib6_init(); -- cgit v1.2.3 From 75314fb38364c81a573cd222f74d792409a7afba Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Sat, 8 Dec 2007 00:13:32 -0800 Subject: [IPV6]: create route6 proc init-fini functions Make the proc creation/destruction to be a separate function. That allows to remove the #ifdef CONFIG_PROC_FS in the init/fini function and make them more readable. Signed-off-by: Daniel Lezcano Acked-by: YOSHIFUJI Hideaki Signed-off-by: David S. Miller --- net/ipv6/route.c | 58 ++++++++++++++++++++++++++++++++++++++------------------ 1 file changed, 40 insertions(+), 18 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 6f833cacfcf..dbdae143ef5 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -2358,6 +2358,40 @@ static const struct file_operations rt6_stats_seq_fops = { .llseek = seq_lseek, .release = single_release, }; + +static int ipv6_route_proc_init(struct net *net) +{ + int ret = -ENOMEM; + if (!proc_net_fops_create(net, "ipv6_route", + 0, &ipv6_route_proc_fops)) + goto out; + + if (!proc_net_fops_create(net, "rt6_stats", + S_IRUGO, &rt6_stats_seq_fops)) + goto out_ipv6_route; + + ret = 0; +out: + return ret; +out_ipv6_route: + proc_net_remove(net, "ipv6_route"); + goto out; +} + +static void ipv6_route_proc_fini(struct net *net) +{ + proc_net_remove(net, "ipv6_route"); + proc_net_remove(net, "rt6_stats"); +} +#else +static inline int ipv6_route_proc_init(struct net *net) +{ + return 0; +} +static inline void ipv6_route_proc_fini(struct net *net) +{ + return ; +} #endif /* CONFIG_PROC_FS */ #ifdef CONFIG_SYSCTL @@ -2484,21 +2518,14 @@ int __init ip6_route_init(void) if (ret) goto out_kmem_cache; -#ifdef CONFIG_PROC_FS - ret = -ENOMEM; - if (!proc_net_fops_create(&init_net, "ipv6_route", - 0, &ipv6_route_proc_fops)) + ret = ipv6_route_proc_init(&init_net); + if (ret) goto out_fib6_init; - if (!proc_net_fops_create(&init_net, "rt6_stats", - S_IRUGO, &rt6_stats_seq_fops)) - goto out_proc_ipv6_route; -#endif - #ifdef CONFIG_XFRM ret = xfrm6_init(); if (ret) - goto out_proc_rt6_stats; + goto out_proc_init; #endif #ifdef CONFIG_IPV6_MULTIPLE_TABLES ret = fib6_rules_init(); @@ -2522,14 +2549,10 @@ xfrm6_init: #endif #ifdef CONFIG_XFRM xfrm6_fini(); -out_proc_rt6_stats: #endif -#ifdef CONFIG_PROC_FS - proc_net_remove(&init_net, "rt6_stats"); -out_proc_ipv6_route: - proc_net_remove(&init_net, "ipv6_route"); +out_proc_init: + ipv6_route_proc_fini(&init_net); out_fib6_init: -#endif rt6_ifdown(NULL); fib6_gc_cleanup(); out_kmem_cache: @@ -2542,8 +2565,7 @@ void ip6_route_cleanup(void) #ifdef CONFIG_IPV6_MULTIPLE_TABLES fib6_rules_cleanup(); #endif - proc_net_remove(&init_net, "ipv6_route"); - proc_net_remove(&init_net, "rt6_stats"); + ipv6_route_proc_fini(&init_net); #ifdef CONFIG_XFRM xfrm6_fini(); #endif -- cgit v1.2.3 From c35b7e72cd48bc7163b6900fb3689fa54b572bba Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Sat, 8 Dec 2007 00:14:11 -0800 Subject: [IPV6]: remove ifdef in route6 for xfrm6 The following patch create the usual static inline functions to disable the xfrm6_init and xfrm6_fini function when XFRM is off. That's allow to remove some ifdef and make the code a little more clear. Signed-off-by: Daniel Lezcano Acked-by: YOSHIFUJI Hideaki Signed-off-by: David S. Miller --- net/ipv6/route.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/route.c b/net/ipv6/route.c index dbdae143ef5..c4e890abb2d 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -2522,11 +2522,10 @@ int __init ip6_route_init(void) if (ret) goto out_fib6_init; -#ifdef CONFIG_XFRM ret = xfrm6_init(); if (ret) goto out_proc_init; -#endif + #ifdef CONFIG_IPV6_MULTIPLE_TABLES ret = fib6_rules_init(); if (ret) @@ -2547,9 +2546,7 @@ fib6_rules_init: fib6_rules_cleanup(); xfrm6_init: #endif -#ifdef CONFIG_XFRM xfrm6_fini(); -#endif out_proc_init: ipv6_route_proc_fini(&init_net); out_fib6_init: @@ -2566,9 +2563,7 @@ void ip6_route_cleanup(void) fib6_rules_cleanup(); #endif ipv6_route_proc_fini(&init_net); -#ifdef CONFIG_XFRM xfrm6_fini(); -#endif rt6_ifdown(NULL); fib6_gc_cleanup(); kmem_cache_destroy(ip6_dst_ops.kmem_cachep); -- cgit v1.2.3 From 7e5449c21562f1554d2c355db1ec9d3e4f434288 Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Sat, 8 Dec 2007 00:14:54 -0800 Subject: [IPV6]: route6 remove ifdef for fib_rules The patch defines the usual static inline functions when the code is disabled for fib6_rules. That's allow to remove some ifdef in route.c file and make the code a little more clear. Signed-off-by: Daniel Lezcano Acked-by: YOSHIFUJI Hideaki Signed-off-by: David S. Miller --- net/ipv6/route.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/route.c b/net/ipv6/route.c index c4e890abb2d..11ef456d67c 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -2526,11 +2526,10 @@ int __init ip6_route_init(void) if (ret) goto out_proc_init; -#ifdef CONFIG_IPV6_MULTIPLE_TABLES ret = fib6_rules_init(); if (ret) goto xfrm6_init; -#endif + ret = -ENOBUFS; if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL) || __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL) || @@ -2542,10 +2541,8 @@ out: return ret; fib6_rules_init: -#ifdef CONFIG_IPV6_MULTIPLE_TABLES fib6_rules_cleanup(); xfrm6_init: -#endif xfrm6_fini(); out_proc_init: ipv6_route_proc_fini(&init_net); @@ -2559,9 +2556,7 @@ out_kmem_cache: void ip6_route_cleanup(void) { -#ifdef CONFIG_IPV6_MULTIPLE_TABLES fib6_rules_cleanup(); -#endif ipv6_route_proc_fini(&init_net); xfrm6_fini(); rt6_ifdown(NULL); -- cgit v1.2.3 From c69bce20dda7f79160856a338298d65a284ba303 Mon Sep 17 00:00:00 2001 From: YOSHIFUJI Hideaki Date: Wed, 23 Jan 2008 22:31:45 -0800 Subject: [NET]: Remove unused "mibalign" argument for snmp_mib_init(). With fixes from Arnaldo Carvalho de Melo. Signed-off-by: YOSHIFUJI Hideaki Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 9 +++------ net/ipv6/af_inet6.c | 17 ++++++++--------- 2 files changed, 11 insertions(+), 15 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 95cf3aa41c9..ba6f7925c17 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -256,16 +256,13 @@ static void addrconf_mod_timer(struct inet6_ifaddr *ifp, static int snmp6_alloc_dev(struct inet6_dev *idev) { if (snmp_mib_init((void **)idev->stats.ipv6, - sizeof(struct ipstats_mib), - __alignof__(struct ipstats_mib)) < 0) + sizeof(struct ipstats_mib)) < 0) goto err_ip; if (snmp_mib_init((void **)idev->stats.icmpv6, - sizeof(struct icmpv6_mib), - __alignof__(struct icmpv6_mib)) < 0) + sizeof(struct icmpv6_mib)) < 0) goto err_icmp; if (snmp_mib_init((void **)idev->stats.icmpv6msg, - sizeof(struct icmpv6msg_mib), - __alignof__(struct icmpv6msg_mib)) < 0) + sizeof(struct icmpv6msg_mib)) < 0) goto err_icmpmsg; return 0; diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 5ab8ba7a586..90d2f723fc4 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -714,20 +714,19 @@ EXPORT_SYMBOL_GPL(ipv6_opt_accepted); static int __init init_ipv6_mibs(void) { - if (snmp_mib_init((void **)ipv6_statistics, sizeof (struct ipstats_mib), - __alignof__(struct ipstats_mib)) < 0) + if (snmp_mib_init((void **)ipv6_statistics, + sizeof(struct ipstats_mib)) < 0) goto err_ip_mib; - if (snmp_mib_init((void **)icmpv6_statistics, sizeof (struct icmpv6_mib), - __alignof__(struct icmpv6_mib)) < 0) + if (snmp_mib_init((void **)icmpv6_statistics, + sizeof(struct icmpv6_mib)) < 0) goto err_icmp_mib; if (snmp_mib_init((void **)icmpv6msg_statistics, - sizeof (struct icmpv6msg_mib), __alignof__(struct icmpv6_mib)) < 0) + sizeof(struct icmpv6msg_mib)) < 0) goto err_icmpmsg_mib; - if (snmp_mib_init((void **)udp_stats_in6, sizeof (struct udp_mib), - __alignof__(struct udp_mib)) < 0) + if (snmp_mib_init((void **)udp_stats_in6, sizeof (struct udp_mib)) < 0) goto err_udp_mib; - if (snmp_mib_init((void **)udplite_stats_in6, sizeof (struct udp_mib), - __alignof__(struct udp_mib)) < 0) + if (snmp_mib_init((void **)udplite_stats_in6, + sizeof (struct udp_mib)) < 0) goto err_udplite_mib; return 0; -- cgit v1.2.3 From 0a3e78ac2c555441f5bc00588070058533bc8d6b Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Tue, 11 Dec 2007 02:23:18 -0800 Subject: [IPV6]: make flowlabel to return an error This patch makes the flowlab subsystem to return an error code and makes some cleanup with procfs ifdefs. The af_inet6 will use the flowlabel init return code to check the initialization was correct. Signed-off-by: Daniel Lezcano Signed-off-by: David S. Miller --- net/ipv6/af_inet6.c | 5 ++++- net/ipv6/ip6_flowlabel.c | 30 +++++++++++++++++++++++------- 2 files changed, 27 insertions(+), 8 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 90d2f723fc4..614f3d905dd 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -851,7 +851,9 @@ static int __init inet6_init(void) err = ip6_route_init(); if (err) goto ip6_route_fail; - ip6_flowlabel_init(); + err = ip6_flowlabel_init(); + if (err) + goto ip6_flowlabel_fail; err = addrconf_init(); if (err) goto addrconf_fail; @@ -874,6 +876,7 @@ out: addrconf_fail: ip6_flowlabel_cleanup(); +ip6_flowlabel_fail: ip6_route_cleanup(); ip6_route_fail: #ifdef CONFIG_PROC_FS diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c index b12cc22e774..d0babea8981 100644 --- a/net/ipv6/ip6_flowlabel.c +++ b/net/ipv6/ip6_flowlabel.c @@ -692,20 +692,36 @@ static const struct file_operations ip6fl_seq_fops = { .llseek = seq_lseek, .release = seq_release_private, }; -#endif +static int ip6_flowlabel_proc_init(struct net *net) +{ + if (!proc_net_fops_create(net, "ip6_flowlabel", S_IRUGO, &ip6fl_seq_fops)) + return -ENOMEM; + return 0; +} -void ip6_flowlabel_init(void) +static void ip6_flowlabel_proc_fini(struct net *net) { -#ifdef CONFIG_PROC_FS - proc_net_fops_create(&init_net, "ip6_flowlabel", S_IRUGO, &ip6fl_seq_fops); + proc_net_remove(net, "ip6_flowlabel"); +} +#else +static inline int ip6_flowlabel_proc_init(struct net *net) +{ + return 0; +} +static inline void ip6_flowlabel_proc_fini(struct net *net) +{ + return ; +} #endif + +int ip6_flowlabel_init(void) +{ + return ip6_flowlabel_proc_init(&init_net); } void ip6_flowlabel_cleanup(void) { del_timer(&ip6_fl_gc_timer); -#ifdef CONFIG_PROC_FS - proc_net_remove(&init_net, "ip6_flowlabel"); -#endif + ip6_flowlabel_proc_fini(&init_net); } -- cgit v1.2.3 From 248b238dc960a42aa235057ba0a51a98ae2b0f0d Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Tue, 11 Dec 2007 02:23:54 -0800 Subject: [IPV6]: make extended headers to return an error at initialization This patch factorize the code for the differents init functions for rthdr, nodata, destopt in a single function exthdrs_init. This function returns an error so the af_inet6 module can check correctly the initialization. Signed-off-by: Daniel Lezcano Signed-off-by: David S. Miller --- net/ipv6/af_inet6.c | 10 ++++++--- net/ipv6/exthdrs.c | 64 ++++++++++++++++++++++++++++++++--------------------- 2 files changed, 46 insertions(+), 28 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 614f3d905dd..442c298c1d7 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -859,10 +859,11 @@ static int __init inet6_init(void) goto addrconf_fail; /* Init v6 extension headers. */ - ipv6_rthdr_init(); + err = ipv6_exthdrs_init(); + if (err) + goto ipv6_exthdrs_fail; + ipv6_frag_init(); - ipv6_nodata_init(); - ipv6_destopt_init(); /* Init v6 transport protocols. */ udpv6_init(); @@ -874,6 +875,8 @@ static int __init inet6_init(void) out: return err; +ipv6_exthdrs_fail: + addrconf_cleanup(); addrconf_fail: ip6_flowlabel_cleanup(); ip6_flowlabel_fail: @@ -932,6 +935,7 @@ static void __exit inet6_exit(void) /* Cleanup code parts. */ ipv6_packet_cleanup(); + ipv6_exthdrs_exit(); addrconf_cleanup(); ip6_flowlabel_cleanup(); ip6_route_cleanup(); diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c index cee06b1655c..2df34ed276f 100644 --- a/net/ipv6/exthdrs.c +++ b/net/ipv6/exthdrs.c @@ -308,28 +308,6 @@ static int ipv6_destopt_rcv(struct sk_buff *skb) return -1; } -static struct inet6_protocol destopt_protocol = { - .handler = ipv6_destopt_rcv, - .flags = INET6_PROTO_NOPOLICY | INET6_PROTO_GSO_EXTHDR, -}; - -void __init ipv6_destopt_init(void) -{ - if (inet6_add_protocol(&destopt_protocol, IPPROTO_DSTOPTS) < 0) - printk(KERN_ERR "ipv6_destopt_init: Could not register protocol\n"); -} - -static struct inet6_protocol nodata_protocol = { - .handler = dst_discard, - .flags = INET6_PROTO_NOPOLICY, -}; - -void __init ipv6_nodata_init(void) -{ - if (inet6_add_protocol(&nodata_protocol, IPPROTO_NONE) < 0) - printk(KERN_ERR "ipv6_nodata_init: Could not register protocol\n"); -} - /******************************** Routing header. ********************************/ @@ -527,12 +505,48 @@ static struct inet6_protocol rthdr_protocol = { .flags = INET6_PROTO_NOPOLICY | INET6_PROTO_GSO_EXTHDR, }; -void __init ipv6_rthdr_init(void) +static struct inet6_protocol destopt_protocol = { + .handler = ipv6_destopt_rcv, + .flags = INET6_PROTO_NOPOLICY | INET6_PROTO_GSO_EXTHDR, +}; + +static struct inet6_protocol nodata_protocol = { + .handler = dst_discard, + .flags = INET6_PROTO_NOPOLICY, +}; + +int __init ipv6_exthdrs_init(void) { - if (inet6_add_protocol(&rthdr_protocol, IPPROTO_ROUTING) < 0) - printk(KERN_ERR "ipv6_rthdr_init: Could not register protocol\n"); + int ret; + + ret = inet6_add_protocol(&rthdr_protocol, IPPROTO_ROUTING); + if (ret) + goto out; + + ret = inet6_add_protocol(&destopt_protocol, IPPROTO_DSTOPTS); + if (ret) + goto out_rthdr; + + ret = inet6_add_protocol(&nodata_protocol, IPPROTO_NONE); + if (ret) + goto out_destopt; + +out: + return ret; +out_rthdr: + inet6_del_protocol(&rthdr_protocol, IPPROTO_ROUTING); +out_destopt: + inet6_del_protocol(&destopt_protocol, IPPROTO_DSTOPTS); + goto out; }; +void ipv6_exthdrs_exit(void) +{ + inet6_del_protocol(&nodata_protocol, IPPROTO_NONE); + inet6_del_protocol(&destopt_protocol, IPPROTO_DSTOPTS); + inet6_del_protocol(&rthdr_protocol, IPPROTO_ROUTING); +} + /********************************** Hop-by-hop options. **********************************/ -- cgit v1.2.3 From 853cbbaaa4ccdf221be5ab6afe967aa9998546b7 Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Tue, 11 Dec 2007 02:24:29 -0800 Subject: [IPV6]: make frag to return an error at initialization This patch makes the frag_init to return an error code, so the af_inet6 module can handle the error. Signed-off-by: Daniel Lezcano Signed-off-by: David S. Miller --- net/ipv6/af_inet6.c | 8 ++++++-- net/ipv6/reassembly.c | 16 +++++++++++++--- 2 files changed, 19 insertions(+), 5 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 442c298c1d7..a75c4bc9281 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -863,7 +863,9 @@ static int __init inet6_init(void) if (err) goto ipv6_exthdrs_fail; - ipv6_frag_init(); + err = ipv6_frag_init(); + if (err) + goto ipv6_frag_fail; /* Init v6 transport protocols. */ udpv6_init(); @@ -875,6 +877,8 @@ static int __init inet6_init(void) out: return err; +ipv6_frag_fail: + ipv6_exthdrs_exit(); ipv6_exthdrs_fail: addrconf_cleanup(); addrconf_fail: @@ -934,7 +938,7 @@ static void __exit inet6_exit(void) /* Cleanup code parts. */ ipv6_packet_cleanup(); - + ipv6_frag_exit(); ipv6_exthdrs_exit(); addrconf_cleanup(); ip6_flowlabel_cleanup(); diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c index 76c88a93b9b..bf4173daecb 100644 --- a/net/ipv6/reassembly.c +++ b/net/ipv6/reassembly.c @@ -632,11 +632,13 @@ static struct inet6_protocol frag_protocol = .flags = INET6_PROTO_NOPOLICY, }; -void __init ipv6_frag_init(void) +int __init ipv6_frag_init(void) { - if (inet6_add_protocol(&frag_protocol, IPPROTO_FRAGMENT) < 0) - printk(KERN_ERR "ipv6_frag_init: Could not register protocol\n"); + int ret; + ret = inet6_add_protocol(&frag_protocol, IPPROTO_FRAGMENT); + if (ret) + goto out; ip6_frags.ctl = &ip6_frags_ctl; ip6_frags.hashfn = ip6_hashfn; ip6_frags.constructor = ip6_frag_init; @@ -646,4 +648,12 @@ void __init ipv6_frag_init(void) ip6_frags.match = ip6_frag_match; ip6_frags.frag_expire = ip6_frag_expire; inet_frags_init(&ip6_frags); +out: + return ret; +} + +void ipv6_frag_exit(void) +{ + inet_frags_fini(&ip6_frags); + inet6_del_protocol(&frag_protocol, IPPROTO_FRAGMENT); } -- cgit v1.2.3 From 87c3efbfdd1f98af14a1f60ff19f73d9a8d8da98 Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Tue, 11 Dec 2007 02:25:01 -0800 Subject: [IPV6]: make inet6_register_protosw to return an error code This patch makes the inet6_register_protosw to return an error code. The different protocols can be aware the registration was successful or not and can pass the error to the initial caller, af_inet6. Signed-off-by: Daniel Lezcano Signed-off-by: David S. Miller --- net/ipv6/af_inet6.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index a75c4bc9281..53b06de696b 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -565,21 +565,23 @@ static struct inet_protosw rawv6_protosw = { .flags = INET_PROTOSW_REUSE, }; -void -inet6_register_protosw(struct inet_protosw *p) +int inet6_register_protosw(struct inet_protosw *p) { struct list_head *lh; struct inet_protosw *answer; - int protocol = p->protocol; struct list_head *last_perm; + int protocol = p->protocol; + int ret; spin_lock_bh(&inetsw6_lock); + ret = -EINVAL; if (p->type >= SOCK_MAX) goto out_illegal; /* If we are trying to override a permanent protocol, bail. */ answer = NULL; + ret = -EPERM; last_perm = &inetsw6[p->type]; list_for_each(lh, &inetsw6[p->type]) { answer = list_entry(lh, struct inet_protosw, list); @@ -603,9 +605,10 @@ inet6_register_protosw(struct inet_protosw *p) * system automatically returns to the old behavior. */ list_add_rcu(&p->list, last_perm); + ret = 0; out: spin_unlock_bh(&inetsw6_lock); - return; + return ret; out_permanent: printk(KERN_ERR "Attempt to override permanent protocol %d.\n", -- cgit v1.2.3 From 7f4e4868f3ce0e946f116c28fa4fe033be5e4ba9 Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Tue, 11 Dec 2007 02:25:35 -0800 Subject: [IPV6]: make the protocol initialization to return an error code This patchset makes the different protocols to return an error code, so the af_inet6 module can check the initialization was correct or not. The raw6 was taken into account to be consistent with the rest of the protocols, but the registration is at the same place. Because the raw6 has its own init function, the proto and the ops structure can be moved inside the raw6.c file. Signed-off-by: Daniel Lezcano Signed-off-by: David S. Miller --- net/ipv6/af_inet6.c | 77 +++++++++++++++++++++--------------------------- net/ipv6/ipv6_sockglue.c | 3 +- net/ipv6/raw.c | 52 ++++++++++++++++++++++++++++++++ net/ipv6/tcp_ipv6.c | 36 +++++++++++++++++----- net/ipv6/udp.c | 26 +++++++++++++--- net/ipv6/udplite.c | 25 +++++++++++++--- 6 files changed, 159 insertions(+), 60 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 53b06de696b..34c20533ba5 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -529,42 +529,6 @@ static struct net_proto_family inet6_family_ops = { .owner = THIS_MODULE, }; -/* Same as inet6_dgram_ops, sans udp_poll. */ -static const struct proto_ops inet6_sockraw_ops = { - .family = PF_INET6, - .owner = THIS_MODULE, - .release = inet6_release, - .bind = inet6_bind, - .connect = inet_dgram_connect, /* ok */ - .socketpair = sock_no_socketpair, /* a do nothing */ - .accept = sock_no_accept, /* a do nothing */ - .getname = inet6_getname, - .poll = datagram_poll, /* ok */ - .ioctl = inet6_ioctl, /* must change */ - .listen = sock_no_listen, /* ok */ - .shutdown = inet_shutdown, /* ok */ - .setsockopt = sock_common_setsockopt, /* ok */ - .getsockopt = sock_common_getsockopt, /* ok */ - .sendmsg = inet_sendmsg, /* ok */ - .recvmsg = sock_common_recvmsg, /* ok */ - .mmap = sock_no_mmap, - .sendpage = sock_no_sendpage, -#ifdef CONFIG_COMPAT - .compat_setsockopt = compat_sock_common_setsockopt, - .compat_getsockopt = compat_sock_common_getsockopt, -#endif -}; - -static struct inet_protosw rawv6_protosw = { - .type = SOCK_RAW, - .protocol = IPPROTO_IP, /* wild card */ - .prot = &rawv6_prot, - .ops = &inet6_sockraw_ops, - .capability = CAP_NET_RAW, - .no_check = UDP_CSUM_DEFAULT, - .flags = INET_PROTOSW_REUSE, -}; - int inet6_register_protosw(struct inet_protosw *p) { struct list_head *lh; @@ -771,7 +735,6 @@ static int __init inet6_init(void) __this_module.can_unload = &ipv6_unload; #endif #endif - err = proto_register(&tcpv6_prot, 1); if (err) goto out; @@ -796,14 +759,16 @@ static int __init inet6_init(void) /* We MUST register RAW sockets before we create the ICMP6, * IGMP6, or NDISC control sockets. */ - inet6_register_protosw(&rawv6_protosw); + err = rawv6_init(); + if (err) + goto out_unregister_raw_proto; /* Register the family here so that the init calls below will * be able to create sockets. (?? is this dangerous ??) */ err = sock_register(&inet6_family_ops); if (err) - goto out_unregister_raw_proto; + goto out_sock_register_fail; /* Initialise ipv6 mibs */ err = init_ipv6_mibs(); @@ -871,15 +836,32 @@ static int __init inet6_init(void) goto ipv6_frag_fail; /* Init v6 transport protocols. */ - udpv6_init(); - udplitev6_init(); - tcpv6_init(); + err = udpv6_init(); + if (err) + goto udpv6_fail; - ipv6_packet_init(); - err = 0; + err = udplitev6_init(); + if (err) + goto udplitev6_fail; + + err = tcpv6_init(); + if (err) + goto tcpv6_fail; + + err = ipv6_packet_init(); + if (err) + goto ipv6_packet_fail; out: return err; +ipv6_packet_fail: + tcpv6_exit(); +tcpv6_fail: + udplitev6_exit(); +udplitev6_fail: + udpv6_exit(); +udpv6_fail: + ipv6_frag_exit(); ipv6_frag_fail: ipv6_exthdrs_exit(); ipv6_exthdrs_fail: @@ -920,6 +902,8 @@ icmp_fail: out_unregister_sock: sock_unregister(PF_INET6); rtnl_unregister_all(PF_INET6); +out_sock_register_fail: + rawv6_exit(); out_unregister_raw_proto: proto_unregister(&rawv6_prot); out_unregister_udplite_proto: @@ -939,6 +923,10 @@ static void __exit inet6_exit(void) /* Disallow any further netlink messages */ rtnl_unregister_all(PF_INET6); + udpv6_exit(); + udplitev6_exit(); + tcpv6_exit(); + /* Cleanup code parts. */ ipv6_packet_cleanup(); ipv6_frag_exit(); @@ -961,6 +949,7 @@ static void __exit inet6_exit(void) igmp6_cleanup(); ndisc_cleanup(); icmpv6_cleanup(); + rawv6_exit(); #ifdef CONFIG_SYSCTL ipv6_sysctl_unregister(); #endif diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index 8c5f80fd03a..20fece4ad3d 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -1128,9 +1128,10 @@ int compat_ipv6_getsockopt(struct sock *sk, int level, int optname, EXPORT_SYMBOL(compat_ipv6_getsockopt); #endif -void __init ipv6_packet_init(void) +int __init ipv6_packet_init(void) { dev_add_pack(&ipv6_packet_type); + return 0; } void ipv6_packet_cleanup(void) diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index b34631e1b01..850b83e430b 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -1273,3 +1273,55 @@ void raw6_proc_exit(void) proc_net_remove(&init_net, "raw6"); } #endif /* CONFIG_PROC_FS */ + +/* Same as inet6_dgram_ops, sans udp_poll. */ +static const struct proto_ops inet6_sockraw_ops = { + .family = PF_INET6, + .owner = THIS_MODULE, + .release = inet6_release, + .bind = inet6_bind, + .connect = inet_dgram_connect, /* ok */ + .socketpair = sock_no_socketpair, /* a do nothing */ + .accept = sock_no_accept, /* a do nothing */ + .getname = inet6_getname, + .poll = datagram_poll, /* ok */ + .ioctl = inet6_ioctl, /* must change */ + .listen = sock_no_listen, /* ok */ + .shutdown = inet_shutdown, /* ok */ + .setsockopt = sock_common_setsockopt, /* ok */ + .getsockopt = sock_common_getsockopt, /* ok */ + .sendmsg = inet_sendmsg, /* ok */ + .recvmsg = sock_common_recvmsg, /* ok */ + .mmap = sock_no_mmap, + .sendpage = sock_no_sendpage, +#ifdef CONFIG_COMPAT + .compat_setsockopt = compat_sock_common_setsockopt, + .compat_getsockopt = compat_sock_common_getsockopt, +#endif +}; + +static struct inet_protosw rawv6_protosw = { + .type = SOCK_RAW, + .protocol = IPPROTO_IP, /* wild card */ + .prot = &rawv6_prot, + .ops = &inet6_sockraw_ops, + .capability = CAP_NET_RAW, + .no_check = UDP_CSUM_DEFAULT, + .flags = INET_PROTOSW_REUSE, +}; + +int __init rawv6_init(void) +{ + int ret; + + ret = inet6_register_protosw(&rawv6_protosw); + if (ret) + goto out; +out: + return ret; +} + +void __exit rawv6_exit(void) +{ + inet6_unregister_protosw(&rawv6_protosw); +} diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 93980c3b83e..9544beb6d1c 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -2166,14 +2166,36 @@ static struct inet_protosw tcpv6_protosw = { INET_PROTOSW_ICSK, }; -void __init tcpv6_init(void) +int __init tcpv6_init(void) { + int ret; + + ret = inet6_add_protocol(&tcpv6_protocol, IPPROTO_TCP); + if (ret) + goto out; + /* register inet6 protocol */ - if (inet6_add_protocol(&tcpv6_protocol, IPPROTO_TCP) < 0) - printk(KERN_ERR "tcpv6_init: Could not register protocol\n"); - inet6_register_protosw(&tcpv6_protosw); + ret = inet6_register_protosw(&tcpv6_protosw); + if (ret) + goto out_tcpv6_protocol; + + ret = inet_csk_ctl_sock_create(&tcp6_socket, PF_INET6, + SOCK_RAW, IPPROTO_TCP); + if (ret) + goto out_tcpv6_protosw; +out: + return ret; - if (inet_csk_ctl_sock_create(&tcp6_socket, PF_INET6, SOCK_RAW, - IPPROTO_TCP) < 0) - panic("Failed to create the TCPv6 control socket.\n"); +out_tcpv6_protocol: + inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP); +out_tcpv6_protosw: + inet6_unregister_protosw(&tcpv6_protosw); + goto out; +} + +void __exit tcpv6_exit(void) +{ + sock_release(tcp6_socket); + inet6_unregister_protosw(&tcpv6_protosw); + inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP); } diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index fa640765385..1e3bd39f54e 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -1016,9 +1016,27 @@ static struct inet_protosw udpv6_protosw = { }; -void __init udpv6_init(void) +int __init udpv6_init(void) { - if (inet6_add_protocol(&udpv6_protocol, IPPROTO_UDP) < 0) - printk(KERN_ERR "udpv6_init: Could not register protocol\n"); - inet6_register_protosw(&udpv6_protosw); + int ret; + + ret = inet6_add_protocol(&udpv6_protocol, IPPROTO_UDP); + if (ret) + goto out; + + ret = inet6_register_protosw(&udpv6_protosw); + if (ret) + goto out_udpv6_protocol; +out: + return ret; + +out_udpv6_protocol: + inet6_del_protocol(&udpv6_protocol, IPPROTO_UDP); + goto out; +} + +void __exit udpv6_exit(void) +{ + inet6_unregister_protosw(&udpv6_protosw); + inet6_del_protocol(&udpv6_protocol, IPPROTO_UDP); } diff --git a/net/ipv6/udplite.c b/net/ipv6/udplite.c index 5a0379f7141..f20b376689f 100644 --- a/net/ipv6/udplite.c +++ b/net/ipv6/udplite.c @@ -77,12 +77,29 @@ static struct inet_protosw udplite6_protosw = { .flags = INET_PROTOSW_PERMANENT, }; -void __init udplitev6_init(void) +int __init udplitev6_init(void) { - if (inet6_add_protocol(&udplitev6_protocol, IPPROTO_UDPLITE) < 0) - printk(KERN_ERR "%s: Could not register.\n", __FUNCTION__); + int ret; - inet6_register_protosw(&udplite6_protosw); + ret = inet6_add_protocol(&udplitev6_protocol, IPPROTO_UDPLITE); + if (ret) + goto out; + + ret = inet6_register_protosw(&udplite6_protosw); + if (ret) + goto out_udplitev6_protocol; +out: + return ret; + +out_udplitev6_protocol: + inet6_del_protocol(&udplitev6_protocol, IPPROTO_UDPLITE); + goto out; +} + +void __exit udplitev6_exit(void) +{ + inet6_unregister_protosw(&udplite6_protosw); + inet6_del_protocol(&udplitev6_protocol, IPPROTO_UDPLITE); } #ifdef CONFIG_PROC_FS -- cgit v1.2.3 From d5422efe680fc55010c6ddca2370ca9548a96355 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Wed, 12 Dec 2007 10:44:16 -0800 Subject: [IPSEC]: Added xfrm_decode_session_reverse and xfrmX_policy_check_reverse RFC 4301 requires us to relookup ICMP traffic that does not match any policies using the reverse of its payload. This patch adds the functions xfrm_decode_session_reverse and xfrmX_policy_check_reverse so we can get the reverse flow to perform such a lookup. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- net/ipv6/xfrm6_policy.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c index 181cf91538f..d26b7dc3f33 100644 --- a/net/ipv6/xfrm6_policy.c +++ b/net/ipv6/xfrm6_policy.c @@ -123,7 +123,7 @@ static int xfrm6_fill_dst(struct xfrm_dst *xdst, struct net_device *dev) } static inline void -_decode_session6(struct sk_buff *skb, struct flowi *fl) +_decode_session6(struct sk_buff *skb, struct flowi *fl, int reverse) { u16 offset = skb_network_header_len(skb); struct ipv6hdr *hdr = ipv6_hdr(skb); @@ -132,8 +132,8 @@ _decode_session6(struct sk_buff *skb, struct flowi *fl) u8 nexthdr = nh[IP6CB(skb)->nhoff]; memset(fl, 0, sizeof(struct flowi)); - ipv6_addr_copy(&fl->fl6_dst, &hdr->daddr); - ipv6_addr_copy(&fl->fl6_src, &hdr->saddr); + ipv6_addr_copy(&fl->fl6_dst, reverse ? &hdr->saddr : &hdr->daddr); + ipv6_addr_copy(&fl->fl6_src, reverse ? &hdr->daddr : &hdr->saddr); while (pskb_may_pull(skb, nh + offset + 1 - skb->data)) { nh = skb_network_header(skb); @@ -156,8 +156,8 @@ _decode_session6(struct sk_buff *skb, struct flowi *fl) if (pskb_may_pull(skb, nh + offset + 4 - skb->data)) { __be16 *ports = (__be16 *)exthdr; - fl->fl_ip_sport = ports[0]; - fl->fl_ip_dport = ports[1]; + fl->fl_ip_sport = ports[!!reverse]; + fl->fl_ip_dport = ports[!reverse]; } fl->proto = nexthdr; return; -- cgit v1.2.3 From 8b7817f3a959ed99d7443afc12f78a7e1fcc2063 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Wed, 12 Dec 2007 10:44:43 -0800 Subject: [IPSEC]: Add ICMP host relookup support RFC 4301 requires us to relookup ICMP traffic that does not match any policies using the reverse of its payload. This patch implements this for ICMP traffic that originates from or terminates on localhost. This is activated on outbound with the new policy flag XFRM_POLICY_ICMP, and on inbound by the new state flag XFRM_STATE_ICMP. On inbound the policy check is now performed by the ICMP protocol so that it can repeat the policy check where necessary. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- net/ipv6/icmp.c | 60 +++++++++++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 56 insertions(+), 4 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index 93c96cfd5ee..c0bea7bfaa8 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -63,6 +63,7 @@ #include #include #include +#include #include #include @@ -86,7 +87,7 @@ static int icmpv6_rcv(struct sk_buff *skb); static struct inet6_protocol icmpv6_protocol = { .handler = icmpv6_rcv, - .flags = INET6_PROTO_FINAL, + .flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL, }; static __inline__ int icmpv6_xmit_lock(void) @@ -310,8 +311,10 @@ void icmpv6_send(struct sk_buff *skb, int type, int code, __u32 info, struct ipv6_pinfo *np; struct in6_addr *saddr = NULL; struct dst_entry *dst; + struct dst_entry *dst2; struct icmp6hdr tmp_hdr; struct flowi fl; + struct flowi fl2; struct icmpv6_msg msg; int iif = 0; int addr_type = 0; @@ -418,9 +421,42 @@ void icmpv6_send(struct sk_buff *skb, int type, int code, __u32 info, goto out_dst_release; } - if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) + /* No need to clone since we're just using its address. */ + dst2 = dst; + + err = xfrm_lookup(&dst, &fl, sk, 0); + switch (err) { + case 0: + if (dst != dst2) + goto route_done; + break; + case -EPERM: + dst = NULL; + break; + default: + goto out; + } + + if (xfrm_decode_session_reverse(skb, &fl2, AF_INET6)) + goto out; + + if (ip6_dst_lookup(sk, &dst2, &fl)) goto out; + err = xfrm_lookup(&dst2, &fl, sk, XFRM_LOOKUP_ICMP); + if (err == -ENOENT) { + if (!dst) + goto out; + goto route_done; + } + + dst_release(dst); + dst = dst2; + + if (err) + goto out; + +route_done: if (ipv6_addr_is_multicast(&fl.fl6_dst)) hlimit = np->mcast_hops; else @@ -608,6 +644,22 @@ static int icmpv6_rcv(struct sk_buff *skb) struct icmp6hdr *hdr; int type; + if (xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb) && + skb->sp->xvec[skb->sp->len - 1]->props.flags & XFRM_STATE_ICMP) { + int nh; + + if (!pskb_may_pull(skb, sizeof(*hdr) + sizeof(*orig_hdr))) + goto drop_no_count; + + nh = skb_network_offset(skb); + skb_set_network_header(skb, sizeof(*hdr)); + + if (!xfrm6_policy_check_reverse(NULL, XFRM_POLICY_IN, skb)) + goto drop_no_count; + + skb_set_network_header(skb, nh); + } + ICMP6_INC_STATS_BH(idev, ICMP6_MIB_INMSGS); saddr = &ipv6_hdr(skb)->saddr; @@ -630,8 +682,7 @@ static int icmpv6_rcv(struct sk_buff *skb) } } - if (!pskb_pull(skb, sizeof(struct icmp6hdr))) - goto discard_it; + __skb_pull(skb, sizeof(*hdr)); hdr = icmp6_hdr(skb); @@ -717,6 +768,7 @@ static int icmpv6_rcv(struct sk_buff *skb) discard_it: ICMP6_INC_STATS_BH(idev, ICMP6_MIB_INERRORS); +drop_no_count: kfree_skb(skb); return 0; } -- cgit v1.2.3 From 7233b9f33e72ca477034ff5cf901c89efba3a5bc Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Wed, 12 Dec 2007 18:47:48 -0800 Subject: [IPSEC]: Fix reversed ICMP6 policy check The policy check I added for ICMP on IPv6 is reversed. This patch fixes that. It also adds an skb->sp check so that unprotected packets that fail the policy check do not crash the machine. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- net/ipv6/icmp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/ipv6') diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index c0bea7bfaa8..1659d2fb01f 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -644,7 +644,7 @@ static int icmpv6_rcv(struct sk_buff *skb) struct icmp6hdr *hdr; int type; - if (xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb) && + if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb) && skb->sp && skb->sp->xvec[skb->sp->len - 1]->props.flags & XFRM_STATE_ICMP) { int nh; -- cgit v1.2.3 From bb72845e699d3c84e5f861b51db686107a51dea5 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Wed, 12 Dec 2007 18:48:58 -0800 Subject: [IPSEC]: Make callers of xfrm_lookup to use XFRM_LOOKUP_WAIT This patch converts all callers of xfrm_lookup that used an explicit value of 1 to indiciate blocking to use the new flag XFRM_LOOKUP_WAIT. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- net/ipv6/datagram.c | 2 +- net/ipv6/raw.c | 2 +- net/ipv6/tcp_ipv6.c | 2 +- net/ipv6/udp.c | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index 5d4245ab418..f49a06aa97d 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -177,7 +177,7 @@ ipv4_connected: if (final_p) ipv6_addr_copy(&fl.fl6_dst, final_p); - if ((err = __xfrm_lookup(&dst, &fl, sk, 1)) < 0) { + if ((err = __xfrm_lookup(&dst, &fl, sk, XFRM_LOOKUP_WAIT)) < 0) { if (err == -EREMOTE) err = ip6_dst_blackhole(sk, &dst, &fl); if (err < 0) diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index 850b83e430b..2663fd1323e 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -871,7 +871,7 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk, if (final_p) ipv6_addr_copy(&fl.fl6_dst, final_p); - if ((err = __xfrm_lookup(&dst, &fl, sk, 1)) < 0) { + if ((err = __xfrm_lookup(&dst, &fl, sk, XFRM_LOOKUP_WAIT)) < 0) { if (err == -EREMOTE) err = ip6_dst_blackhole(sk, &dst, &fl); if (err < 0) diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 9544beb6d1c..0ef99864ef8 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -265,7 +265,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, if (final_p) ipv6_addr_copy(&fl.fl6_dst, final_p); - if ((err = __xfrm_lookup(&dst, &fl, sk, 1)) < 0) { + if ((err = __xfrm_lookup(&dst, &fl, sk, XFRM_LOOKUP_WAIT)) < 0) { if (err == -EREMOTE) err = ip6_dst_blackhole(sk, &dst, &fl); if (err < 0) diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 1e3bd39f54e..8cbdcc9602d 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -757,7 +757,7 @@ do_udp_sendmsg: if (final_p) ipv6_addr_copy(&fl.fl6_dst, final_p); - if ((err = __xfrm_lookup(&dst, &fl, sk, 1)) < 0) { + if ((err = __xfrm_lookup(&dst, &fl, sk, XFRM_LOOKUP_WAIT)) < 0) { if (err == -EREMOTE) err = ip6_dst_blackhole(sk, &dst, &fl); if (err < 0) -- cgit v1.2.3 From aebcf82c1fe9231be5cb4f9c1362d5db39e7d7b2 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Wed, 12 Dec 2007 18:54:16 -0800 Subject: [IPSEC]: Do not let packets pass when ICMP flag is off This fixes a logical error in ICMP policy checks which lets packets through if the state ICMP flag is off. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- net/ipv6/icmp.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index 1659d2fb01f..c3bbd868730 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -644,10 +644,13 @@ static int icmpv6_rcv(struct sk_buff *skb) struct icmp6hdr *hdr; int type; - if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb) && skb->sp && - skb->sp->xvec[skb->sp->len - 1]->props.flags & XFRM_STATE_ICMP) { + if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) { int nh; + if (!(skb->sp && skb->sp->xvec[skb->sp->len - 1]->props.flags & + XFRM_STATE_ICMP)) + goto drop_no_count; + if (!pskb_may_pull(skb, sizeof(*hdr) + sizeof(*orig_hdr))) goto drop_no_count; -- cgit v1.2.3 From 09f7709f4929666006931f1d4efc498a6d419bbc Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Thu, 13 Dec 2007 05:34:58 -0800 Subject: [IPV6]: fix section mismatch warnings Removed useless and buggy __exit section in the different ipv6 subsystems. Otherwise they will be called inside an init section during rollbacking in case of an error in the protocol initialization. Signed-off-by: Daniel Lezcano Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 2 +- net/ipv6/raw.c | 2 +- net/ipv6/tcp_ipv6.c | 2 +- net/ipv6/udp.c | 2 +- net/ipv6/udplite.c | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index ba6f7925c17..6a48bb88f46 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -4223,7 +4223,7 @@ errout: return err; } -void __exit addrconf_cleanup(void) +void addrconf_cleanup(void) { struct net_device *dev; struct inet6_ifaddr *ifa; diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index 2663fd1323e..45a580e843d 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -1321,7 +1321,7 @@ out: return ret; } -void __exit rawv6_exit(void) +void rawv6_exit(void) { inet6_unregister_protosw(&rawv6_protosw); } diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 0ef99864ef8..0268e118f0b 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -2193,7 +2193,7 @@ out_tcpv6_protosw: goto out; } -void __exit tcpv6_exit(void) +void tcpv6_exit(void) { sock_release(tcp6_socket); inet6_unregister_protosw(&tcpv6_protosw); diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 8cbdcc9602d..8b3e6d61bf5 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -1035,7 +1035,7 @@ out_udpv6_protocol: goto out; } -void __exit udpv6_exit(void) +void udpv6_exit(void) { inet6_unregister_protosw(&udpv6_protosw); inet6_del_protocol(&udpv6_protocol, IPPROTO_UDP); diff --git a/net/ipv6/udplite.c b/net/ipv6/udplite.c index f20b376689f..39f070518e6 100644 --- a/net/ipv6/udplite.c +++ b/net/ipv6/udplite.c @@ -96,7 +96,7 @@ out_udplitev6_protocol: goto out; } -void __exit udplitev6_exit(void) +void udplitev6_exit(void) { inet6_unregister_protosw(&udplite6_protosw); inet6_del_protocol(&udplitev6_protocol, IPPROTO_UDPLITE); -- cgit v1.2.3 From 528c4ceb427dad4a3893ba3d1913782efae0cd0e Mon Sep 17 00:00:00 2001 From: "Denis V. Lunev" Date: Thu, 13 Dec 2007 09:45:12 -0800 Subject: [IPV6]: Always pass a valid nl_info to inet6_rt_notify. This makes the code in the inet6_rt_notify more straightforward and provides groud for namespace passing. Signed-off-by: Denis V. Lunev Signed-off-by: David S. Miller --- net/ipv6/ip6_fib.c | 3 ++- net/ipv6/route.c | 27 +++++++++++++-------------- 2 files changed, 15 insertions(+), 15 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 5fae04506ad..df05c6f2189 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -1315,6 +1315,7 @@ static int fib6_walk(struct fib6_walker_t *w) static int fib6_clean_node(struct fib6_walker_t *w) { + struct nl_info info = {}; int res; struct rt6_info *rt; struct fib6_cleaner_t *c = container_of(w, struct fib6_cleaner_t, w); @@ -1323,7 +1324,7 @@ static int fib6_clean_node(struct fib6_walker_t *w) res = c->func(rt, c->arg); if (res < 0) { w->leaf = rt; - res = fib6_del(rt, NULL); + res = fib6_del(rt, &info); if (res) { #if RT6_DEBUG >= 2 printk(KERN_DEBUG "fib6_clean_node: del failed: rt=%p@%p err=%d\n", rt, rt->rt6i_node, res); diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 11ef456d67c..b80ef578420 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -609,7 +609,8 @@ static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info) int ip6_ins_rt(struct rt6_info *rt) { - return __ip6_ins_rt(rt, NULL); + struct nl_info info = {}; + return __ip6_ins_rt(rt, &info); } static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, struct in6_addr *daddr, @@ -1266,7 +1267,8 @@ static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info) int ip6_del_rt(struct rt6_info *rt) { - return __ip6_del_rt(rt, NULL); + struct nl_info info = {}; + return __ip6_del_rt(rt, &info); } static int ip6_route_del(struct fib6_config *cfg) @@ -2243,29 +2245,26 @@ errout: void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info) { struct sk_buff *skb; - u32 pid = 0, seq = 0; - struct nlmsghdr *nlh = NULL; - int err = -ENOBUFS; - - if (info) { - pid = info->pid; - nlh = info->nlh; - if (nlh) - seq = nlh->nlmsg_seq; - } + u32 seq; + int err; + + err = -ENOBUFS; + seq = info->nlh != NULL ? info->nlh->nlmsg_seq : 0; skb = nlmsg_new(rt6_nlmsg_size(), gfp_any()); if (skb == NULL) goto errout; - err = rt6_fill_node(skb, rt, NULL, NULL, 0, event, pid, seq, 0, 0); + err = rt6_fill_node(skb, rt, NULL, NULL, 0, + event, info->pid, seq, 0, 0); if (err < 0) { /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */ WARN_ON(err == -EMSGSIZE); kfree_skb(skb); goto errout; } - err = rtnl_notify(skb, &init_net, pid, RTNLGRP_IPV6_ROUTE, nlh, gfp_any()); + err = rtnl_notify(skb, &init_net, info->pid, + RTNLGRP_IPV6_ROUTE, info->nlh, gfp_any()); errout: if (err < 0) rtnl_set_sk_err(&init_net, RTNLGRP_IPV6_ROUTE, err); -- cgit v1.2.3 From 8a4a50f98bc13670bee94c40b94bc169e1263cd9 Mon Sep 17 00:00:00 2001 From: Michal Schmidt Date: Thu, 13 Dec 2007 09:47:00 -0800 Subject: [IPV6] sit: Rebinding of SIT tunnels to other interfaces This is similar to the change already done for IPIP tunnels. Once created, a SIT tunnel can't be bound to another device. To reproduce: # create a tunnel: ip tunnel add tunneltest0 mode sit remote 10.0.0.1 dev eth0 # try to change the bounding device from eth0 to eth1: ip tunnel change tunneltest0 dev eth1 # show the result: ip tunnel show tunneltest0 tunneltest0: ipv6/ip remote 10.0.0.1 local any dev eth0 ttl inherit Notice the bound device has not changed from eth0 to eth1. This patch fixes it. When changing the binding, it also recalculates the MTU according to the new bound device's MTU. Signed-off-by: Michal Schmidt Signed-off-by: David S. Miller --- net/ipv6/sit.c | 70 +++++++++++++++++++++++++++++++++++----------------------- 1 file changed, 42 insertions(+), 28 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index b3b8513e9cb..1c6fddb80b3 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -669,6 +669,42 @@ tx_error: return 0; } +static void ipip6_tunnel_bind_dev(struct net_device *dev) +{ + struct net_device *tdev = NULL; + struct ip_tunnel *tunnel; + struct iphdr *iph; + + tunnel = netdev_priv(dev); + iph = &tunnel->parms.iph; + + if (iph->daddr) { + struct flowi fl = { .nl_u = { .ip4_u = + { .daddr = iph->daddr, + .saddr = iph->saddr, + .tos = RT_TOS(iph->tos) } }, + .oif = tunnel->parms.link, + .proto = IPPROTO_IPV6 }; + struct rtable *rt; + if (!ip_route_output_key(&rt, &fl)) { + tdev = rt->u.dst.dev; + ip_rt_put(rt); + } + dev->flags |= IFF_POINTOPOINT; + } + + if (!tdev && tunnel->parms.link) + tdev = __dev_get_by_index(&init_net, tunnel->parms.link); + + if (tdev) { + dev->hard_header_len = tdev->hard_header_len + sizeof(struct iphdr); + dev->mtu = tdev->mtu - sizeof(struct iphdr); + if (dev->mtu < IPV6_MIN_MTU) + dev->mtu = IPV6_MIN_MTU; + } + dev->iflink = tunnel->parms.link; +} + static int ipip6_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd) { @@ -740,6 +776,11 @@ ipip6_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd) if (cmd == SIOCCHGTUNNEL) { t->parms.iph.ttl = p.iph.ttl; t->parms.iph.tos = p.iph.tos; + if (t->parms.link != p.link) { + t->parms.link = p.link; + ipip6_tunnel_bind_dev(dev); + netdev_state_change(dev); + } } if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof(p))) err = -EFAULT; @@ -808,12 +849,9 @@ static void ipip6_tunnel_setup(struct net_device *dev) static int ipip6_tunnel_init(struct net_device *dev) { - struct net_device *tdev = NULL; struct ip_tunnel *tunnel; - struct iphdr *iph; tunnel = netdev_priv(dev); - iph = &tunnel->parms.iph; tunnel->dev = dev; strcpy(tunnel->parms.name, dev->name); @@ -821,31 +859,7 @@ static int ipip6_tunnel_init(struct net_device *dev) memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4); memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4); - if (iph->daddr) { - struct flowi fl = { .nl_u = { .ip4_u = - { .daddr = iph->daddr, - .saddr = iph->saddr, - .tos = RT_TOS(iph->tos) } }, - .oif = tunnel->parms.link, - .proto = IPPROTO_IPV6 }; - struct rtable *rt; - if (!ip_route_output_key(&rt, &fl)) { - tdev = rt->u.dst.dev; - ip_rt_put(rt); - } - dev->flags |= IFF_POINTOPOINT; - } - - if (!tdev && tunnel->parms.link) - tdev = __dev_get_by_index(&init_net, tunnel->parms.link); - - if (tdev) { - dev->hard_header_len = tdev->hard_header_len + sizeof(struct iphdr); - dev->mtu = tdev->mtu - sizeof(struct iphdr); - if (dev->mtu < IPV6_MIN_MTU) - dev->mtu = IPV6_MIN_MTU; - } - dev->iflink = tunnel->parms.link; + ipip6_tunnel_bind_dev(dev); return 0; } -- cgit v1.2.3 From 9055e051b8d4b266054fe511a65a9888d30fa64f Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Fri, 14 Dec 2007 11:25:26 -0800 Subject: [UDP]: Move udp_stats_in6 into net/ipv4/udp.c Now that external users may increment the counters directly, we need to ensure that udp_stats_in6 is always available. Otherwise we'd either have to requrie the external users to be built as modules or ipv6 to be built-in. This isn't too bad because udp_stats_in6 is just a pair of pointers plus an EXPORT, e.g., just 40 (16 + 24) bytes on x86-64. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- net/ipv6/udp.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 8b3e6d61bf5..c9a97b40551 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -51,9 +51,6 @@ #include #include "udp_impl.h" -DEFINE_SNMP_STAT(struct udp_mib, udp_stats_in6) __read_mostly; -EXPORT_SYMBOL(udp_stats_in6); - static inline int udp_v6_get_port(struct sock *sk, unsigned short snum) { return udp_get_port(sk, snum, ipv6_rcv_saddr_equal); -- cgit v1.2.3 From 72f36ec14fb5006886bc0655ec2b43bf1ad53a26 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Mon, 17 Dec 2007 21:48:02 -0800 Subject: [NETFILTER]: ip6_tables: kill a few useless defines/forward declarations Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/ipv6/netfilter/ip6_tables.c | 16 ++-------------- 1 file changed, 2 insertions(+), 14 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index d3e884a5c6a..b73e6b6d554 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -31,9 +31,6 @@ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Netfilter Core Team "); MODULE_DESCRIPTION("IPv6 packet filter"); -#define IPV6_HDR_LEN (sizeof(struct ipv6hdr)) -#define IPV6_OPTHDR_LEN (sizeof(struct ipv6_opt_hdr)) - /*#define DEBUG_IP_FIREWALL*/ /*#define DEBUG_ALLOW_ALL*/ /* Useful for remote debugging */ /*#define DEBUG_IP_FIREWALL_USER*/ @@ -76,12 +73,6 @@ do { \ Hence the start of any table is given by get_table() below. */ -#if 0 -#define down(x) do { printk("DOWN:%u:" #x "\n", __LINE__); down(x); } while(0) -#define down_interruptible(x) ({ int __r; printk("DOWNi:%u:" #x "\n", __LINE__); __r = down_interruptible(x); if (__r != 0) printk("ABORT-DOWNi:%u\n", __LINE__); __r; }) -#define up(x) do { printk("UP:%u:" #x "\n", __LINE__); up(x); } while(0) -#endif - /* Check for an extension */ int ip6t_ext_hdr(u8 nexthdr) @@ -399,9 +390,8 @@ ip6t_do_table(struct sk_buff *skb, goto no_match; ADD_COUNTER(e->counters, - ntohs(ipv6_hdr(skb)->payload_len) - + IPV6_HDR_LEN, - 1); + ntohs(ipv6_hdr(skb)->payload_len) + + sizeof(struct ipv6hdr), 1); t = ip6t_get_target(e); IP_NF_ASSERT(t->u.kernel.target); @@ -657,8 +647,6 @@ err: return ret; } -static struct xt_target ip6t_standard_target; - static inline int check_entry(struct ip6t_entry *e, const char *name, unsigned int size, unsigned int *i) -- cgit v1.2.3 From f173c8a1f2c0ca39f45bb15b82ad5e6fe908556d Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Mon, 17 Dec 2007 21:48:17 -0800 Subject: [NETFILTER]: ip6_tables: move entry, match and target checks to seperate functions Resync with ip_tables.c as preparation for compat support. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/ipv6/netfilter/ip6_tables.c | 129 +++++++++++++++++++++++++--------------- 1 file changed, 82 insertions(+), 47 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index b73e6b6d554..655c221acd1 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -607,11 +607,55 @@ cleanup_match(struct ip6t_entry_match *m, unsigned int *i) } static inline int -check_match(struct ip6t_entry_match *m, - const char *name, - const struct ip6t_ip6 *ipv6, - unsigned int hookmask, - unsigned int *i) +check_entry(struct ip6t_entry *e, const char *name) +{ + struct ip6t_entry_target *t; + + if (!ip6_checkentry(&e->ipv6)) { + duprintf("ip_tables: ip check failed %p %s.\n", e, name); + return -EINVAL; + } + + if (e->target_offset + sizeof(struct ip6t_entry_target) > + e->next_offset) + return -EINVAL; + + t = ip6t_get_target(e); + if (e->target_offset + t->u.target_size > e->next_offset) + return -EINVAL; + + return 0; +} + +static inline int check_match(struct ip6t_entry_match *m, const char *name, + const struct ip6t_ip6 *ipv6, + unsigned int hookmask, unsigned int *i) +{ + struct xt_match *match; + int ret; + + match = m->u.kernel.match; + ret = xt_check_match(match, AF_INET6, m->u.match_size - sizeof(*m), + name, hookmask, ipv6->proto, + ipv6->invflags & IP6T_INV_PROTO); + if (!ret && m->u.kernel.match->checkentry + && !m->u.kernel.match->checkentry(name, ipv6, match, m->data, + hookmask)) { + duprintf("ip_tables: check failed for `%s'.\n", + m->u.kernel.match->name); + ret = -EINVAL; + } + if (!ret) + (*i)++; + return ret; +} + +static inline int +find_check_match(struct ip6t_entry_match *m, + const char *name, + const struct ip6t_ip6 *ipv6, + unsigned int hookmask, + unsigned int *i) { struct xt_match *match; int ret; @@ -620,86 +664,77 @@ check_match(struct ip6t_entry_match *m, m->u.user.revision), "ip6t_%s", m->u.user.name); if (IS_ERR(match) || !match) { - duprintf("check_match: `%s' not found\n", m->u.user.name); + duprintf("find_check_match: `%s' not found\n", m->u.user.name); return match ? PTR_ERR(match) : -ENOENT; } m->u.kernel.match = match; - ret = xt_check_match(match, AF_INET6, m->u.match_size - sizeof(*m), - name, hookmask, ipv6->proto, - ipv6->invflags & IP6T_INV_PROTO); + ret = check_match(m, name, ipv6, hookmask, i); if (ret) goto err; - if (m->u.kernel.match->checkentry - && !m->u.kernel.match->checkentry(name, ipv6, match, m->data, - hookmask)) { - duprintf("ip_tables: check failed for `%s'.\n", - m->u.kernel.match->name); - ret = -EINVAL; - goto err; - } - - (*i)++; return 0; err: module_put(m->u.kernel.match->me); return ret; } -static inline int -check_entry(struct ip6t_entry *e, const char *name, unsigned int size, - unsigned int *i) +static inline int check_target(struct ip6t_entry *e, const char *name) { struct ip6t_entry_target *t; struct xt_target *target; int ret; - unsigned int j; - if (!ip6_checkentry(&e->ipv6)) { - duprintf("ip_tables: ip check failed %p %s.\n", e, name); - return -EINVAL; + t = ip6t_get_target(e); + target = t->u.kernel.target; + ret = xt_check_target(target, AF_INET6, t->u.target_size - sizeof(*t), + name, e->comefrom, e->ipv6.proto, + e->ipv6.invflags & IP6T_INV_PROTO); + if (!ret && t->u.kernel.target->checkentry + && !t->u.kernel.target->checkentry(name, e, target, t->data, + e->comefrom)) { + duprintf("ip_tables: check failed for `%s'.\n", + t->u.kernel.target->name); + ret = -EINVAL; } + return ret; +} - if (e->target_offset + sizeof(struct ip6t_entry_target) > - e->next_offset) - return -EINVAL; +static inline int +find_check_entry(struct ip6t_entry *e, const char *name, unsigned int size, + unsigned int *i) +{ + struct ip6t_entry_target *t; + struct xt_target *target; + int ret; + unsigned int j; + + ret = check_entry(e, name); + if (ret) + return ret; j = 0; - ret = IP6T_MATCH_ITERATE(e, check_match, name, &e->ipv6, e->comefrom, &j); + ret = IP6T_MATCH_ITERATE(e, find_check_match, name, &e->ipv6, + e->comefrom, &j); if (ret != 0) goto cleanup_matches; t = ip6t_get_target(e); - ret = -EINVAL; - if (e->target_offset + t->u.target_size > e->next_offset) - goto cleanup_matches; target = try_then_request_module(xt_find_target(AF_INET6, t->u.user.name, t->u.user.revision), "ip6t_%s", t->u.user.name); if (IS_ERR(target) || !target) { - duprintf("check_entry: `%s' not found\n", t->u.user.name); + duprintf("find_check_entry: `%s' not found\n", t->u.user.name); ret = target ? PTR_ERR(target) : -ENOENT; goto cleanup_matches; } t->u.kernel.target = target; - ret = xt_check_target(target, AF_INET6, t->u.target_size - sizeof(*t), - name, e->comefrom, e->ipv6.proto, - e->ipv6.invflags & IP6T_INV_PROTO); + ret = check_target(e, name); if (ret) goto err; - if (t->u.kernel.target->checkentry - && !t->u.kernel.target->checkentry(name, e, target, t->data, - e->comefrom)) { - duprintf("ip_tables: check failed for `%s'.\n", - t->u.kernel.target->name); - ret = -EINVAL; - goto err; - } - (*i)++; return 0; err: @@ -834,7 +869,7 @@ translate_table(const char *name, /* Finally, each sanity check must pass */ i = 0; ret = IP6T_ENTRY_ITERATE(entry0, newinfo->size, - check_entry, name, size, &i); + find_check_entry, name, size, &i); if (ret != 0) { IP6T_ENTRY_ITERATE(entry0, newinfo->size, -- cgit v1.2.3 From 3b84e92b0d54864b0731c3ab3c20dd140bb3d7d9 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Mon, 17 Dec 2007 21:48:33 -0800 Subject: [NETFILTER]: ip6_tables: use vmalloc_node() Consistently use vmalloc_node for all counter allocations. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/ipv6/netfilter/ip6_tables.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index 655c221acd1..d0b5fa6661f 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -959,7 +959,7 @@ copy_entries_to_user(unsigned int total_size, (other than comefrom, which userspace doesn't care about). */ countersize = sizeof(struct xt_counters) * private->number; - counters = vmalloc(countersize); + counters = vmalloc_node(countersize, numa_node_id()); if (counters == NULL) return -ENOMEM; @@ -1080,7 +1080,8 @@ do_replace(void __user *user, unsigned int len) goto free_newinfo; } - counters = vmalloc(tmp.num_counters * sizeof(struct xt_counters)); + counters = vmalloc_node(tmp.num_counters * sizeof(struct xt_counters), + numa_node_id()); if (!counters) { ret = -ENOMEM; goto free_newinfo; @@ -1186,7 +1187,7 @@ do_add_counters(void __user *user, unsigned int len) if (len != sizeof(tmp) + tmp.num_counters*sizeof(struct xt_counters)) return -EINVAL; - paddc = vmalloc(len); + paddc = vmalloc_node(len, numa_node_id()); if (!paddc) return -ENOMEM; -- cgit v1.2.3 From ed1a6f5e77441c4020b8541b3f03f03e37d638e1 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Mon, 17 Dec 2007 21:49:51 -0800 Subject: [NETFILTER]: ip6_tables: move counter allocation to seperate function More resyncing with ip_tables.c as preparation for compat support. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/ipv6/netfilter/ip6_tables.c | 31 ++++++++++++++++++++++--------- 1 file changed, 22 insertions(+), 9 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index d0b5fa6661f..02be4fcb915 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -943,17 +943,11 @@ get_counters(const struct xt_table_info *t, } } -static int -copy_entries_to_user(unsigned int total_size, - struct xt_table *table, - void __user *userptr) +static inline struct xt_counters *alloc_counters(struct xt_table *table) { - unsigned int off, num, countersize; - struct ip6t_entry *e; + unsigned int countersize; struct xt_counters *counters; struct xt_table_info *private = table->private; - int ret = 0; - void *loc_cpu_entry; /* We need atomic snapshot of counters: rest doesn't change (other than comefrom, which userspace doesn't care @@ -962,13 +956,32 @@ copy_entries_to_user(unsigned int total_size, counters = vmalloc_node(countersize, numa_node_id()); if (counters == NULL) - return -ENOMEM; + return ERR_PTR(-ENOMEM); /* First, sum counters... */ write_lock_bh(&table->lock); get_counters(private, counters); write_unlock_bh(&table->lock); + return counters; +} + +static int +copy_entries_to_user(unsigned int total_size, + struct xt_table *table, + void __user *userptr) +{ + unsigned int off, num; + struct ip6t_entry *e; + struct xt_counters *counters; + struct xt_table_info *private = table->private; + int ret = 0; + void *loc_cpu_entry; + + counters = alloc_counters(table); + if (IS_ERR(counters)) + return PTR_ERR(counters); + /* choose the copy that is on ourc node/cpu */ loc_cpu_entry = private->entries[raw_smp_processor_id()]; if (copy_to_user(userptr, loc_cpu_entry, total_size) != 0) { -- cgit v1.2.3 From 433665c9d110d783ea4043c59657f0437fcc31dd Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Mon, 17 Dec 2007 21:50:05 -0800 Subject: [NETFILTER]: ip6_tables: move IP6T_SO_GET_INFO handling to seperate function Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/ipv6/netfilter/ip6_tables.c | 89 ++++++++++++++++++++++------------------- 1 file changed, 47 insertions(+), 42 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index 02be4fcb915..681316e40c6 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -1037,6 +1037,50 @@ copy_entries_to_user(unsigned int total_size, return ret; } +static int get_info(void __user *user, int *len) +{ + char name[IP6T_TABLE_MAXNAMELEN]; + struct xt_table *t; + int ret; + + if (*len != sizeof(struct ip6t_getinfo)) { + duprintf("length %u != %u\n", *len, + sizeof(struct ip6t_getinfo)); + return -EINVAL; + } + + if (copy_from_user(name, user, sizeof(name)) != 0) + return -EFAULT; + + name[IP6T_TABLE_MAXNAMELEN-1] = '\0'; + + t = try_then_request_module(xt_find_table_lock(AF_INET6, name), + "ip6table_%s", name); + if (t && !IS_ERR(t)) { + struct ip6t_getinfo info; + struct xt_table_info *private = t->private; + + info.valid_hooks = t->valid_hooks; + memcpy(info.hook_entry, private->hook_entry, + sizeof(info.hook_entry)); + memcpy(info.underflow, private->underflow, + sizeof(info.underflow)); + info.num_entries = private->number; + info.size = private->size; + memcpy(info.name, name, sizeof(info.name)); + + if (copy_to_user(user, &info, *len) != 0) + ret = -EFAULT; + else + ret = 0; + + xt_table_unlock(t); + module_put(t->me); + } else + ret = t ? PTR_ERR(t) : -ENOENT; + return ret; +} + static int get_entries(const struct ip6t_get_entries *entries, struct ip6t_get_entries __user *uptr) @@ -1274,48 +1318,9 @@ do_ip6t_get_ctl(struct sock *sk, int cmd, void __user *user, int *len) return -EPERM; switch (cmd) { - case IP6T_SO_GET_INFO: { - char name[IP6T_TABLE_MAXNAMELEN]; - struct xt_table *t; - - if (*len != sizeof(struct ip6t_getinfo)) { - duprintf("length %u != %u\n", *len, - sizeof(struct ip6t_getinfo)); - ret = -EINVAL; - break; - } - - if (copy_from_user(name, user, sizeof(name)) != 0) { - ret = -EFAULT; - break; - } - name[IP6T_TABLE_MAXNAMELEN-1] = '\0'; - - t = try_then_request_module(xt_find_table_lock(AF_INET6, name), - "ip6table_%s", name); - if (t && !IS_ERR(t)) { - struct ip6t_getinfo info; - struct xt_table_info *private = t->private; - - info.valid_hooks = t->valid_hooks; - memcpy(info.hook_entry, private->hook_entry, - sizeof(info.hook_entry)); - memcpy(info.underflow, private->underflow, - sizeof(info.underflow)); - info.num_entries = private->number; - info.size = private->size; - memcpy(info.name, name, sizeof(info.name)); - - if (copy_to_user(user, &info, *len) != 0) - ret = -EFAULT; - else - ret = 0; - xt_table_unlock(t); - module_put(t->me); - } else - ret = t ? PTR_ERR(t) : -ENOENT; - } - break; + case IP6T_SO_GET_INFO: + ret = get_info(user, len); + break; case IP6T_SO_GET_ENTRIES: { struct ip6t_get_entries get; -- cgit v1.2.3 From d924357c50d83e76d30dd5b81b5804815a2ae31c Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Mon, 17 Dec 2007 21:50:22 -0800 Subject: [NETFILTER]: ip6_tables: resync get_entries() with ip_tables Resync get_entries() with ip_tables.c by moving the checks from the setsockopt handler to the function itself. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/ipv6/netfilter/ip6_tables.c | 37 ++++++++++++++++++------------------- 1 file changed, 18 insertions(+), 19 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index 681316e40c6..6fcc0d5bc27 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -1082,17 +1082,29 @@ static int get_info(void __user *user, int *len) } static int -get_entries(const struct ip6t_get_entries *entries, - struct ip6t_get_entries __user *uptr) +get_entries(struct ip6t_get_entries __user *uptr, int *len) { int ret; + struct ip6t_get_entries get; struct xt_table *t; - t = xt_find_table_lock(AF_INET6, entries->name); + if (*len < sizeof(get)) { + duprintf("get_entries: %u < %u\n", *len, sizeof(get)); + return -EINVAL; + } + if (copy_from_user(&get, uptr, sizeof(get)) != 0) + return -EFAULT; + if (*len != sizeof(struct ip6t_get_entries) + get.size) { + duprintf("get_entries: %u != %u\n", *len, + sizeof(struct ip6t_get_entries) + get.size); + return -EINVAL; + } + + t = xt_find_table_lock(AF_INET6, get.name); if (t && !IS_ERR(t)) { struct xt_table_info *private = t->private; duprintf("t->private->number = %u\n", private->number); - if (entries->size == private->size) + if (get.size == private->size) ret = copy_entries_to_user(private->size, t, uptr->entrytable); else { @@ -1322,22 +1334,9 @@ do_ip6t_get_ctl(struct sock *sk, int cmd, void __user *user, int *len) ret = get_info(user, len); break; - case IP6T_SO_GET_ENTRIES: { - struct ip6t_get_entries get; - - if (*len < sizeof(get)) { - duprintf("get_entries: %u < %u\n", *len, sizeof(get)); - ret = -EINVAL; - } else if (copy_from_user(&get, user, sizeof(get)) != 0) { - ret = -EFAULT; - } else if (*len != sizeof(struct ip6t_get_entries) + get.size) { - duprintf("get_entries: %u != %u\n", *len, - sizeof(struct ip6t_get_entries) + get.size); - ret = -EINVAL; - } else - ret = get_entries(&get, user); + case IP6T_SO_GET_ENTRIES: + ret = get_entries(user, len); break; - } case IP6T_SO_GET_REVISION_MATCH: case IP6T_SO_GET_REVISION_TARGET: { -- cgit v1.2.3 From 3bc3fe5eed5e866c0871db6d745f3bf58af004ef Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Mon, 17 Dec 2007 21:50:37 -0800 Subject: [NETFILTER]: ip6_tables: add compat support Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/ipv6/netfilter/ip6_tables.c | 823 +++++++++++++++++++++++++++++++++++++--- 1 file changed, 767 insertions(+), 56 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index 6fcc0d5bc27..db0dc96be55 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -19,9 +19,11 @@ #include #include #include +#include #include #include #include +#include #include #include @@ -1037,7 +1039,80 @@ copy_entries_to_user(unsigned int total_size, return ret; } -static int get_info(void __user *user, int *len) +#ifdef CONFIG_COMPAT +static void compat_standard_from_user(void *dst, void *src) +{ + int v = *(compat_int_t *)src; + + if (v > 0) + v += xt_compat_calc_jump(AF_INET6, v); + memcpy(dst, &v, sizeof(v)); +} + +static int compat_standard_to_user(void __user *dst, void *src) +{ + compat_int_t cv = *(int *)src; + + if (cv > 0) + cv -= xt_compat_calc_jump(AF_INET6, cv); + return copy_to_user(dst, &cv, sizeof(cv)) ? -EFAULT : 0; +} + +static inline int +compat_calc_match(struct ip6t_entry_match *m, int *size) +{ + *size += xt_compat_match_offset(m->u.kernel.match); + return 0; +} + +static int compat_calc_entry(struct ip6t_entry *e, + const struct xt_table_info *info, + void *base, struct xt_table_info *newinfo) +{ + struct ip6t_entry_target *t; + unsigned int entry_offset; + int off, i, ret; + + off = sizeof(struct ip6t_entry) - sizeof(struct compat_ip6t_entry); + entry_offset = (void *)e - base; + IP6T_MATCH_ITERATE(e, compat_calc_match, &off); + t = ip6t_get_target(e); + off += xt_compat_target_offset(t->u.kernel.target); + newinfo->size -= off; + ret = xt_compat_add_offset(AF_INET6, entry_offset, off); + if (ret) + return ret; + + for (i = 0; i < NF_INET_NUMHOOKS; i++) { + if (info->hook_entry[i] && + (e < (struct ip6t_entry *)(base + info->hook_entry[i]))) + newinfo->hook_entry[i] -= off; + if (info->underflow[i] && + (e < (struct ip6t_entry *)(base + info->underflow[i]))) + newinfo->underflow[i] -= off; + } + return 0; +} + +static int compat_table_info(const struct xt_table_info *info, + struct xt_table_info *newinfo) +{ + void *loc_cpu_entry; + + if (!newinfo || !info) + return -EINVAL; + + /* we dont care about newinfo->entries[] */ + memcpy(newinfo, info, offsetof(struct xt_table_info, entries)); + newinfo->initial_entries = 0; + loc_cpu_entry = info->entries[raw_smp_processor_id()]; + return IP6T_ENTRY_ITERATE(loc_cpu_entry, info->size, + compat_calc_entry, info, loc_cpu_entry, + newinfo); +} +#endif + +static int get_info(void __user *user, int *len, int compat) { char name[IP6T_TABLE_MAXNAMELEN]; struct xt_table *t; @@ -1053,13 +1128,24 @@ static int get_info(void __user *user, int *len) return -EFAULT; name[IP6T_TABLE_MAXNAMELEN-1] = '\0'; - +#ifdef CONFIG_COMPAT + if (compat) + xt_compat_lock(AF_INET6); +#endif t = try_then_request_module(xt_find_table_lock(AF_INET6, name), "ip6table_%s", name); if (t && !IS_ERR(t)) { struct ip6t_getinfo info; struct xt_table_info *private = t->private; +#ifdef CONFIG_COMPAT + if (compat) { + struct xt_table_info tmp; + ret = compat_table_info(private, &tmp); + xt_compat_flush_offsets(AF_INET6); + private = &tmp; + } +#endif info.valid_hooks = t->valid_hooks; memcpy(info.hook_entry, private->hook_entry, sizeof(info.hook_entry)); @@ -1078,6 +1164,10 @@ static int get_info(void __user *user, int *len) module_put(t->me); } else ret = t ? PTR_ERR(t) : -ENOENT; +#ifdef CONFIG_COMPAT + if (compat) + xt_compat_unlock(AF_INET6); +#endif return ret; } @@ -1121,65 +1211,40 @@ get_entries(struct ip6t_get_entries __user *uptr, int *len) } static int -do_replace(void __user *user, unsigned int len) +__do_replace(const char *name, unsigned int valid_hooks, + struct xt_table_info *newinfo, unsigned int num_counters, + void __user *counters_ptr) { int ret; - struct ip6t_replace tmp; struct xt_table *t; - struct xt_table_info *newinfo, *oldinfo; + struct xt_table_info *oldinfo; struct xt_counters *counters; - void *loc_cpu_entry, *loc_cpu_old_entry; - - if (copy_from_user(&tmp, user, sizeof(tmp)) != 0) - return -EFAULT; - - /* overflow check */ - if (tmp.num_counters >= INT_MAX / sizeof(struct xt_counters)) - return -ENOMEM; - - newinfo = xt_alloc_table_info(tmp.size); - if (!newinfo) - return -ENOMEM; - - /* choose the copy that is on our node/cpu */ - loc_cpu_entry = newinfo->entries[raw_smp_processor_id()]; - if (copy_from_user(loc_cpu_entry, user + sizeof(tmp), - tmp.size) != 0) { - ret = -EFAULT; - goto free_newinfo; - } + void *loc_cpu_old_entry; - counters = vmalloc_node(tmp.num_counters * sizeof(struct xt_counters), + ret = 0; + counters = vmalloc_node(num_counters * sizeof(struct xt_counters), numa_node_id()); if (!counters) { ret = -ENOMEM; - goto free_newinfo; + goto out; } - ret = translate_table(tmp.name, tmp.valid_hooks, - newinfo, loc_cpu_entry, tmp.size, tmp.num_entries, - tmp.hook_entry, tmp.underflow); - if (ret != 0) - goto free_newinfo_counters; - - duprintf("ip_tables: Translated table\n"); - - t = try_then_request_module(xt_find_table_lock(AF_INET6, tmp.name), - "ip6table_%s", tmp.name); + t = try_then_request_module(xt_find_table_lock(AF_INET6, name), + "ip6table_%s", name); if (!t || IS_ERR(t)) { ret = t ? PTR_ERR(t) : -ENOENT; goto free_newinfo_counters_untrans; } /* You lied! */ - if (tmp.valid_hooks != t->valid_hooks) { + if (valid_hooks != t->valid_hooks) { duprintf("Valid hook crap: %08X vs %08X\n", - tmp.valid_hooks, t->valid_hooks); + valid_hooks, t->valid_hooks); ret = -EINVAL; goto put_module; } - oldinfo = xt_replace_table(t, tmp.num_counters, newinfo, &ret); + oldinfo = xt_replace_table(t, num_counters, newinfo, &ret); if (!oldinfo) goto put_module; @@ -1197,10 +1262,11 @@ do_replace(void __user *user, unsigned int len) get_counters(oldinfo, counters); /* Decrease module usage counts and free resource */ loc_cpu_old_entry = oldinfo->entries[raw_smp_processor_id()]; - IP6T_ENTRY_ITERATE(loc_cpu_old_entry, oldinfo->size, cleanup_entry,NULL); + IP6T_ENTRY_ITERATE(loc_cpu_old_entry, oldinfo->size, cleanup_entry, + NULL); xt_free_table_info(oldinfo); - if (copy_to_user(tmp.counters, counters, - sizeof(struct xt_counters) * tmp.num_counters) != 0) + if (copy_to_user(counters_ptr, counters, + sizeof(struct xt_counters) * num_counters) != 0) ret = -EFAULT; vfree(counters); xt_table_unlock(t); @@ -1210,9 +1276,54 @@ do_replace(void __user *user, unsigned int len) module_put(t->me); xt_table_unlock(t); free_newinfo_counters_untrans: - IP6T_ENTRY_ITERATE(loc_cpu_entry, newinfo->size, cleanup_entry,NULL); - free_newinfo_counters: vfree(counters); + out: + return ret; +} + +static int +do_replace(void __user *user, unsigned int len) +{ + int ret; + struct ip6t_replace tmp; + struct xt_table_info *newinfo; + void *loc_cpu_entry; + + if (copy_from_user(&tmp, user, sizeof(tmp)) != 0) + return -EFAULT; + + /* overflow check */ + if (tmp.num_counters >= INT_MAX / sizeof(struct xt_counters)) + return -ENOMEM; + + newinfo = xt_alloc_table_info(tmp.size); + if (!newinfo) + return -ENOMEM; + + /* choose the copy that is on our node/cpu */ + loc_cpu_entry = newinfo->entries[raw_smp_processor_id()]; + if (copy_from_user(loc_cpu_entry, user + sizeof(tmp), + tmp.size) != 0) { + ret = -EFAULT; + goto free_newinfo; + } + + ret = translate_table(tmp.name, tmp.valid_hooks, + newinfo, loc_cpu_entry, tmp.size, tmp.num_entries, + tmp.hook_entry, tmp.underflow); + if (ret != 0) + goto free_newinfo; + + duprintf("ip_tables: Translated table\n"); + + ret = __do_replace(tmp.name, tmp.valid_hooks, newinfo, + tmp.num_counters, tmp.counters); + if (ret) + goto free_newinfo_untrans; + return 0; + + free_newinfo_untrans: + IP6T_ENTRY_ITERATE(loc_cpu_entry, newinfo->size, cleanup_entry, NULL); free_newinfo: xt_free_table_info(newinfo); return ret; @@ -1241,31 +1352,59 @@ add_counter_to_entry(struct ip6t_entry *e, } static int -do_add_counters(void __user *user, unsigned int len) +do_add_counters(void __user *user, unsigned int len, int compat) { unsigned int i; - struct xt_counters_info tmp, *paddc; + struct xt_counters_info tmp; + struct xt_counters *paddc; + unsigned int num_counters; + char *name; + int size; + void *ptmp; struct xt_table_info *private; struct xt_table *t; int ret = 0; void *loc_cpu_entry; +#ifdef CONFIG_COMPAT + struct compat_xt_counters_info compat_tmp; - if (copy_from_user(&tmp, user, sizeof(tmp)) != 0) + if (compat) { + ptmp = &compat_tmp; + size = sizeof(struct compat_xt_counters_info); + } else +#endif + { + ptmp = &tmp; + size = sizeof(struct xt_counters_info); + } + + if (copy_from_user(ptmp, user, size) != 0) return -EFAULT; - if (len != sizeof(tmp) + tmp.num_counters*sizeof(struct xt_counters)) +#ifdef CONFIG_COMPAT + if (compat) { + num_counters = compat_tmp.num_counters; + name = compat_tmp.name; + } else +#endif + { + num_counters = tmp.num_counters; + name = tmp.name; + } + + if (len != size + num_counters * sizeof(struct xt_counters)) return -EINVAL; - paddc = vmalloc_node(len, numa_node_id()); + paddc = vmalloc_node(len - size, numa_node_id()); if (!paddc) return -ENOMEM; - if (copy_from_user(paddc, user, len) != 0) { + if (copy_from_user(paddc, user + size, len - size) != 0) { ret = -EFAULT; goto free; } - t = xt_find_table_lock(AF_INET6, tmp.name); + t = xt_find_table_lock(AF_INET6, name); if (!t || IS_ERR(t)) { ret = t ? PTR_ERR(t) : -ENOENT; goto free; @@ -1273,7 +1412,7 @@ do_add_counters(void __user *user, unsigned int len) write_lock_bh(&t->lock); private = t->private; - if (private->number != tmp.num_counters) { + if (private->number != num_counters) { ret = -EINVAL; goto unlock_up_free; } @@ -1284,7 +1423,7 @@ do_add_counters(void __user *user, unsigned int len) IP6T_ENTRY_ITERATE(loc_cpu_entry, private->size, add_counter_to_entry, - paddc->counters, + paddc, &i); unlock_up_free: write_unlock_bh(&t->lock); @@ -1296,6 +1435,567 @@ do_add_counters(void __user *user, unsigned int len) return ret; } +#ifdef CONFIG_COMPAT +struct compat_ip6t_replace { + char name[IP6T_TABLE_MAXNAMELEN]; + u32 valid_hooks; + u32 num_entries; + u32 size; + u32 hook_entry[NF_INET_NUMHOOKS]; + u32 underflow[NF_INET_NUMHOOKS]; + u32 num_counters; + compat_uptr_t counters; /* struct ip6t_counters * */ + struct compat_ip6t_entry entries[0]; +}; + +static int +compat_copy_entry_to_user(struct ip6t_entry *e, void __user **dstptr, + compat_uint_t *size, struct xt_counters *counters, + unsigned int *i) +{ + struct ip6t_entry_target *t; + struct compat_ip6t_entry __user *ce; + u_int16_t target_offset, next_offset; + compat_uint_t origsize; + int ret; + + ret = -EFAULT; + origsize = *size; + ce = (struct compat_ip6t_entry __user *)*dstptr; + if (copy_to_user(ce, e, sizeof(struct ip6t_entry))) + goto out; + + if (copy_to_user(&ce->counters, &counters[*i], sizeof(counters[*i]))) + goto out; + + *dstptr += sizeof(struct compat_ip6t_entry); + *size -= sizeof(struct ip6t_entry) - sizeof(struct compat_ip6t_entry); + + ret = IP6T_MATCH_ITERATE(e, xt_compat_match_to_user, dstptr, size); + target_offset = e->target_offset - (origsize - *size); + if (ret) + goto out; + t = ip6t_get_target(e); + ret = xt_compat_target_to_user(t, dstptr, size); + if (ret) + goto out; + ret = -EFAULT; + next_offset = e->next_offset - (origsize - *size); + if (put_user(target_offset, &ce->target_offset)) + goto out; + if (put_user(next_offset, &ce->next_offset)) + goto out; + + (*i)++; + return 0; +out: + return ret; +} + +static inline int +compat_find_calc_match(struct ip6t_entry_match *m, + const char *name, + const struct ip6t_ip6 *ipv6, + unsigned int hookmask, + int *size, int *i) +{ + struct xt_match *match; + + match = try_then_request_module(xt_find_match(AF_INET6, m->u.user.name, + m->u.user.revision), + "ip6t_%s", m->u.user.name); + if (IS_ERR(match) || !match) { + duprintf("compat_check_calc_match: `%s' not found\n", + m->u.user.name); + return match ? PTR_ERR(match) : -ENOENT; + } + m->u.kernel.match = match; + *size += xt_compat_match_offset(match); + + (*i)++; + return 0; +} + +static inline int +compat_release_match(struct ip6t_entry_match *m, unsigned int *i) +{ + if (i && (*i)-- == 0) + return 1; + + module_put(m->u.kernel.match->me); + return 0; +} + +static inline int +compat_release_entry(struct compat_ip6t_entry *e, unsigned int *i) +{ + struct ip6t_entry_target *t; + + if (i && (*i)-- == 0) + return 1; + + /* Cleanup all matches */ + COMPAT_IP6T_MATCH_ITERATE(e, compat_release_match, NULL); + t = compat_ip6t_get_target(e); + module_put(t->u.kernel.target->me); + return 0; +} + +static inline int +check_compat_entry_size_and_hooks(struct compat_ip6t_entry *e, + struct xt_table_info *newinfo, + unsigned int *size, + unsigned char *base, + unsigned char *limit, + unsigned int *hook_entries, + unsigned int *underflows, + unsigned int *i, + const char *name) +{ + struct ip6t_entry_target *t; + struct xt_target *target; + unsigned int entry_offset; + int ret, off, h, j; + + duprintf("check_compat_entry_size_and_hooks %p\n", e); + if ((unsigned long)e % __alignof__(struct compat_ip6t_entry) != 0 + || (unsigned char *)e + sizeof(struct compat_ip6t_entry) >= limit) { + duprintf("Bad offset %p, limit = %p\n", e, limit); + return -EINVAL; + } + + if (e->next_offset < sizeof(struct compat_ip6t_entry) + + sizeof(struct compat_xt_entry_target)) { + duprintf("checking: element %p size %u\n", + e, e->next_offset); + return -EINVAL; + } + + /* For purposes of check_entry casting the compat entry is fine */ + ret = check_entry((struct ip6t_entry *)e, name); + if (ret) + return ret; + + off = sizeof(struct ip6t_entry) - sizeof(struct compat_ip6t_entry); + entry_offset = (void *)e - (void *)base; + j = 0; + ret = COMPAT_IP6T_MATCH_ITERATE(e, compat_find_calc_match, name, + &e->ipv6, e->comefrom, &off, &j); + if (ret != 0) + goto release_matches; + + t = compat_ip6t_get_target(e); + target = try_then_request_module(xt_find_target(AF_INET6, + t->u.user.name, + t->u.user.revision), + "ip6t_%s", t->u.user.name); + if (IS_ERR(target) || !target) { + duprintf("check_compat_entry_size_and_hooks: `%s' not found\n", + t->u.user.name); + ret = target ? PTR_ERR(target) : -ENOENT; + goto release_matches; + } + t->u.kernel.target = target; + + off += xt_compat_target_offset(target); + *size += off; + ret = xt_compat_add_offset(AF_INET6, entry_offset, off); + if (ret) + goto out; + + /* Check hooks & underflows */ + for (h = 0; h < NF_INET_NUMHOOKS; h++) { + if ((unsigned char *)e - base == hook_entries[h]) + newinfo->hook_entry[h] = hook_entries[h]; + if ((unsigned char *)e - base == underflows[h]) + newinfo->underflow[h] = underflows[h]; + } + + /* Clear counters and comefrom */ + memset(&e->counters, 0, sizeof(e->counters)); + e->comefrom = 0; + + (*i)++; + return 0; + +out: + module_put(t->u.kernel.target->me); +release_matches: + IP6T_MATCH_ITERATE(e, compat_release_match, &j); + return ret; +} + +static int +compat_copy_entry_from_user(struct compat_ip6t_entry *e, void **dstptr, + unsigned int *size, const char *name, + struct xt_table_info *newinfo, unsigned char *base) +{ + struct ip6t_entry_target *t; + struct xt_target *target; + struct ip6t_entry *de; + unsigned int origsize; + int ret, h; + + ret = 0; + origsize = *size; + de = (struct ip6t_entry *)*dstptr; + memcpy(de, e, sizeof(struct ip6t_entry)); + memcpy(&de->counters, &e->counters, sizeof(e->counters)); + + *dstptr += sizeof(struct ip6t_entry); + *size += sizeof(struct ip6t_entry) - sizeof(struct compat_ip6t_entry); + + ret = COMPAT_IP6T_MATCH_ITERATE(e, xt_compat_match_from_user, + dstptr, size); + if (ret) + return ret; + de->target_offset = e->target_offset - (origsize - *size); + t = compat_ip6t_get_target(e); + target = t->u.kernel.target; + xt_compat_target_from_user(t, dstptr, size); + + de->next_offset = e->next_offset - (origsize - *size); + for (h = 0; h < NF_INET_NUMHOOKS; h++) { + if ((unsigned char *)de - base < newinfo->hook_entry[h]) + newinfo->hook_entry[h] -= origsize - *size; + if ((unsigned char *)de - base < newinfo->underflow[h]) + newinfo->underflow[h] -= origsize - *size; + } + return ret; +} + +static inline int compat_check_entry(struct ip6t_entry *e, const char *name, + unsigned int *i) +{ + int j, ret; + + j = 0; + ret = IP6T_MATCH_ITERATE(e, check_match, name, &e->ipv6, + e->comefrom, &j); + if (ret) + goto cleanup_matches; + + ret = check_target(e, name); + if (ret) + goto cleanup_matches; + + (*i)++; + return 0; + + cleanup_matches: + IP6T_MATCH_ITERATE(e, cleanup_match, &j); + return ret; +} + +static int +translate_compat_table(const char *name, + unsigned int valid_hooks, + struct xt_table_info **pinfo, + void **pentry0, + unsigned int total_size, + unsigned int number, + unsigned int *hook_entries, + unsigned int *underflows) +{ + unsigned int i, j; + struct xt_table_info *newinfo, *info; + void *pos, *entry0, *entry1; + unsigned int size; + int ret; + + info = *pinfo; + entry0 = *pentry0; + size = total_size; + info->number = number; + + /* Init all hooks to impossible value. */ + for (i = 0; i < NF_INET_NUMHOOKS; i++) { + info->hook_entry[i] = 0xFFFFFFFF; + info->underflow[i] = 0xFFFFFFFF; + } + + duprintf("translate_compat_table: size %u\n", info->size); + j = 0; + xt_compat_lock(AF_INET6); + /* Walk through entries, checking offsets. */ + ret = COMPAT_IP6T_ENTRY_ITERATE(entry0, total_size, + check_compat_entry_size_and_hooks, + info, &size, entry0, + entry0 + total_size, + hook_entries, underflows, &j, name); + if (ret != 0) + goto out_unlock; + + ret = -EINVAL; + if (j != number) { + duprintf("translate_compat_table: %u not %u entries\n", + j, number); + goto out_unlock; + } + + /* Check hooks all assigned */ + for (i = 0; i < NF_INET_NUMHOOKS; i++) { + /* Only hooks which are valid */ + if (!(valid_hooks & (1 << i))) + continue; + if (info->hook_entry[i] == 0xFFFFFFFF) { + duprintf("Invalid hook entry %u %u\n", + i, hook_entries[i]); + goto out_unlock; + } + if (info->underflow[i] == 0xFFFFFFFF) { + duprintf("Invalid underflow %u %u\n", + i, underflows[i]); + goto out_unlock; + } + } + + ret = -ENOMEM; + newinfo = xt_alloc_table_info(size); + if (!newinfo) + goto out_unlock; + + newinfo->number = number; + for (i = 0; i < NF_INET_NUMHOOKS; i++) { + newinfo->hook_entry[i] = info->hook_entry[i]; + newinfo->underflow[i] = info->underflow[i]; + } + entry1 = newinfo->entries[raw_smp_processor_id()]; + pos = entry1; + size = total_size; + ret = COMPAT_IP6T_ENTRY_ITERATE(entry0, total_size, + compat_copy_entry_from_user, + &pos, &size, name, newinfo, entry1); + xt_compat_flush_offsets(AF_INET6); + xt_compat_unlock(AF_INET6); + if (ret) + goto free_newinfo; + + ret = -ELOOP; + if (!mark_source_chains(newinfo, valid_hooks, entry1)) + goto free_newinfo; + + i = 0; + ret = IP6T_ENTRY_ITERATE(entry1, newinfo->size, compat_check_entry, + name, &i); + if (ret) { + j -= i; + COMPAT_IP6T_ENTRY_ITERATE_CONTINUE(entry0, newinfo->size, i, + compat_release_entry, &j); + IP6T_ENTRY_ITERATE(entry1, newinfo->size, cleanup_entry, &i); + xt_free_table_info(newinfo); + return ret; + } + + /* And one copy for every other CPU */ + for_each_possible_cpu(i) + if (newinfo->entries[i] && newinfo->entries[i] != entry1) + memcpy(newinfo->entries[i], entry1, newinfo->size); + + *pinfo = newinfo; + *pentry0 = entry1; + xt_free_table_info(info); + return 0; + +free_newinfo: + xt_free_table_info(newinfo); +out: + COMPAT_IP6T_ENTRY_ITERATE(entry0, total_size, compat_release_entry, &j); + return ret; +out_unlock: + xt_compat_flush_offsets(AF_INET6); + xt_compat_unlock(AF_INET6); + goto out; +} + +static int +compat_do_replace(void __user *user, unsigned int len) +{ + int ret; + struct compat_ip6t_replace tmp; + struct xt_table_info *newinfo; + void *loc_cpu_entry; + + if (copy_from_user(&tmp, user, sizeof(tmp)) != 0) + return -EFAULT; + + /* overflow check */ + if (tmp.size >= INT_MAX / num_possible_cpus()) + return -ENOMEM; + if (tmp.num_counters >= INT_MAX / sizeof(struct xt_counters)) + return -ENOMEM; + + newinfo = xt_alloc_table_info(tmp.size); + if (!newinfo) + return -ENOMEM; + + /* choose the copy that is our node/cpu */ + loc_cpu_entry = newinfo->entries[raw_smp_processor_id()]; + if (copy_from_user(loc_cpu_entry, user + sizeof(tmp), + tmp.size) != 0) { + ret = -EFAULT; + goto free_newinfo; + } + + ret = translate_compat_table(tmp.name, tmp.valid_hooks, + &newinfo, &loc_cpu_entry, tmp.size, + tmp.num_entries, tmp.hook_entry, + tmp.underflow); + if (ret != 0) + goto free_newinfo; + + duprintf("compat_do_replace: Translated table\n"); + + ret = __do_replace(tmp.name, tmp.valid_hooks, newinfo, + tmp.num_counters, compat_ptr(tmp.counters)); + if (ret) + goto free_newinfo_untrans; + return 0; + + free_newinfo_untrans: + IP6T_ENTRY_ITERATE(loc_cpu_entry, newinfo->size, cleanup_entry, NULL); + free_newinfo: + xt_free_table_info(newinfo); + return ret; +} + +static int +compat_do_ip6t_set_ctl(struct sock *sk, int cmd, void __user *user, + unsigned int len) +{ + int ret; + + if (!capable(CAP_NET_ADMIN)) + return -EPERM; + + switch (cmd) { + case IP6T_SO_SET_REPLACE: + ret = compat_do_replace(user, len); + break; + + case IP6T_SO_SET_ADD_COUNTERS: + ret = do_add_counters(user, len, 1); + break; + + default: + duprintf("do_ip6t_set_ctl: unknown request %i\n", cmd); + ret = -EINVAL; + } + + return ret; +} + +struct compat_ip6t_get_entries { + char name[IP6T_TABLE_MAXNAMELEN]; + compat_uint_t size; + struct compat_ip6t_entry entrytable[0]; +}; + +static int +compat_copy_entries_to_user(unsigned int total_size, struct xt_table *table, + void __user *userptr) +{ + struct xt_counters *counters; + struct xt_table_info *private = table->private; + void __user *pos; + unsigned int size; + int ret = 0; + void *loc_cpu_entry; + unsigned int i = 0; + + counters = alloc_counters(table); + if (IS_ERR(counters)) + return PTR_ERR(counters); + + /* choose the copy that is on our node/cpu, ... + * This choice is lazy (because current thread is + * allowed to migrate to another cpu) + */ + loc_cpu_entry = private->entries[raw_smp_processor_id()]; + pos = userptr; + size = total_size; + ret = IP6T_ENTRY_ITERATE(loc_cpu_entry, total_size, + compat_copy_entry_to_user, + &pos, &size, counters, &i); + + vfree(counters); + return ret; +} + +static int +compat_get_entries(struct compat_ip6t_get_entries __user *uptr, int *len) +{ + int ret; + struct compat_ip6t_get_entries get; + struct xt_table *t; + + if (*len < sizeof(get)) { + duprintf("compat_get_entries: %u < %u\n", + *len, (unsigned int)sizeof(get)); + return -EINVAL; + } + + if (copy_from_user(&get, uptr, sizeof(get)) != 0) + return -EFAULT; + + if (*len != sizeof(struct compat_ip6t_get_entries) + get.size) { + duprintf("compat_get_entries: %u != %u\n", *len, + (unsigned int)(sizeof(struct compat_ip6t_get_entries) + + get.size)); + return -EINVAL; + } + + xt_compat_lock(AF_INET6); + t = xt_find_table_lock(AF_INET6, get.name); + if (t && !IS_ERR(t)) { + struct xt_table_info *private = t->private; + struct xt_table_info info; + duprintf("t->private->number = %u\n", + private->number); + ret = compat_table_info(private, &info); + if (!ret && get.size == info.size) { + ret = compat_copy_entries_to_user(private->size, + t, uptr->entrytable); + } else if (!ret) { + duprintf("compat_get_entries: I've got %u not %u!\n", + private->size, + get.size); + ret = -EINVAL; + } + xt_compat_flush_offsets(AF_INET6); + module_put(t->me); + xt_table_unlock(t); + } else + ret = t ? PTR_ERR(t) : -ENOENT; + + xt_compat_unlock(AF_INET6); + return ret; +} + +static int do_ip6t_get_ctl(struct sock *, int, void __user *, int *); + +static int +compat_do_ip6t_get_ctl(struct sock *sk, int cmd, void __user *user, int *len) +{ + int ret; + + if (!capable(CAP_NET_ADMIN)) + return -EPERM; + + switch (cmd) { + case IP6T_SO_GET_INFO: + ret = get_info(user, len, 1); + break; + case IP6T_SO_GET_ENTRIES: + ret = compat_get_entries(user, len); + break; + default: + ret = do_ip6t_get_ctl(sk, cmd, user, len); + } + return ret; +} +#endif + static int do_ip6t_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len) { @@ -1310,7 +2010,7 @@ do_ip6t_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len) break; case IP6T_SO_SET_ADD_COUNTERS: - ret = do_add_counters(user, len); + ret = do_add_counters(user, len, 0); break; default: @@ -1331,7 +2031,7 @@ do_ip6t_get_ctl(struct sock *sk, int cmd, void __user *user, int *len) switch (cmd) { case IP6T_SO_GET_INFO: - ret = get_info(user, len); + ret = get_info(user, len, 0); break; case IP6T_SO_GET_ENTRIES: @@ -1483,6 +2183,11 @@ static struct xt_target ip6t_standard_target __read_mostly = { .name = IP6T_STANDARD_TARGET, .targetsize = sizeof(int), .family = AF_INET6, +#ifdef CONFIG_COMPAT + .compatsize = sizeof(compat_int_t), + .compat_from_user = compat_standard_from_user, + .compat_to_user = compat_standard_to_user, +#endif }; static struct xt_target ip6t_error_target __read_mostly = { @@ -1497,9 +2202,15 @@ static struct nf_sockopt_ops ip6t_sockopts = { .set_optmin = IP6T_BASE_CTL, .set_optmax = IP6T_SO_SET_MAX+1, .set = do_ip6t_set_ctl, +#ifdef CONFIG_COMPAT + .compat_set = compat_do_ip6t_set_ctl, +#endif .get_optmin = IP6T_BASE_CTL, .get_optmax = IP6T_SO_GET_MAX+1, .get = do_ip6t_get_ctl, +#ifdef CONFIG_COMPAT + .compat_get = compat_do_ip6t_get_ctl, +#endif .owner = THIS_MODULE, }; -- cgit v1.2.3 From 9c54795950d198e77144a18c94e7ed52ea0f3c77 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Mon, 17 Dec 2007 21:52:00 -0800 Subject: [NETFILTER]: {ip,ip6}_tables: reformat to eliminate differences Reformat ip_tables.c and ip6_tables.c in order to eliminate non-functional differences and minimize diff output. This allows to get a view of the real differences using: sed -e 's/IP6T/IPT/g' \ -e 's/IP6/IP/g' \ -e 's/INET6/INET/g' \ -e 's/ip6t/ipt/g' \ -e 's/ip6/ip/g' \ -e 's/ipv6/ip/g' \ -e 's/icmp6/icmp/g' \ net/ipv6/netfilter/ip6_tables.c | \ diff -wup /dev/stdin net/ipv4/netfilter/ip_tables.c Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/ipv6/netfilter/ip6_tables.c | 46 ++++++++++++++++++++--------------------- 1 file changed, 23 insertions(+), 23 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index db0dc96be55..0a2ec4b346f 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -371,8 +371,8 @@ ip6t_do_table(struct sk_buff *skb, * match it. */ read_lock_bh(&table->lock); - private = table->private; IP_NF_ASSERT(table->valid_hooks & (1 << hook)); + private = table->private; table_base = (void *)private->entries[smp_processor_id()]; e = get_entry(table_base, private->hook_entry[hook]); @@ -496,9 +496,7 @@ mark_source_chains(struct xt_table_info *newinfo, to 0 as we leave), and comefrom to save source hook bitmask */ for (hook = 0; hook < NF_INET_NUMHOOKS; hook++) { unsigned int pos = newinfo->hook_entry[hook]; - struct ip6t_entry *e - = (struct ip6t_entry *)(entry0 + pos); - int visited = e->comefrom & (1 << hook); + struct ip6t_entry *e = (struct ip6t_entry *)(entry0 + pos); if (!(valid_hooks & (1 << hook))) continue; @@ -509,14 +507,14 @@ mark_source_chains(struct xt_table_info *newinfo, for (;;) { struct ip6t_standard_target *t = (void *)ip6t_get_target(e); + int visited = e->comefrom & (1 << hook); if (e->comefrom & (1 << NF_INET_NUMHOOKS)) { printk("iptables: loop hook %u pos %u %08X.\n", hook, pos, e->comefrom); return 0; } - e->comefrom - |= ((1 << hook) | (1 << NF_INET_NUMHOOKS)); + e->comefrom |= ((1 << hook) | (1 << NF_INET_NUMHOOKS)); /* Unconditional return/END. */ if ((e->target_offset == sizeof(struct ip6t_entry) @@ -663,7 +661,7 @@ find_check_match(struct ip6t_entry_match *m, int ret; match = try_then_request_module(xt_find_match(AF_INET6, m->u.user.name, - m->u.user.revision), + m->u.user.revision), "ip6t_%s", m->u.user.name); if (IS_ERR(match) || !match) { duprintf("find_check_match: `%s' not found\n", m->u.user.name); @@ -885,7 +883,7 @@ translate_table(const char *name, memcpy(newinfo->entries[i], entry0, newinfo->size); } - return 0; + return ret; } /* Gets counters. */ @@ -984,7 +982,10 @@ copy_entries_to_user(unsigned int total_size, if (IS_ERR(counters)) return PTR_ERR(counters); - /* choose the copy that is on ourc node/cpu */ + /* choose the copy that is on our node/cpu, ... + * This choice is lazy (because current thread is + * allowed to migrate to another cpu) + */ loc_cpu_entry = private->entries[raw_smp_processor_id()]; if (copy_to_user(userptr, loc_cpu_entry, total_size) != 0) { ret = -EFAULT; @@ -1199,7 +1200,7 @@ get_entries(struct ip6t_get_entries __user *uptr, int *len) t, uptr->entrytable); else { duprintf("get_entries: I've got %u not %u!\n", - private->size, entries->size); + private->size, get.size); ret = -EINVAL; } module_put(t->me); @@ -1361,8 +1362,8 @@ do_add_counters(void __user *user, unsigned int len, int compat) char *name; int size; void *ptmp; - struct xt_table_info *private; struct xt_table *t; + struct xt_table_info *private; int ret = 0; void *loc_cpu_entry; #ifdef CONFIG_COMPAT @@ -1829,7 +1830,7 @@ compat_do_replace(void __user *user, unsigned int len) if (!newinfo) return -ENOMEM; - /* choose the copy that is our node/cpu */ + /* choose the copy that is on our node/cpu */ loc_cpu_entry = newinfo->entries[raw_smp_processor_id()]; if (copy_from_user(loc_cpu_entry, user + sizeof(tmp), tmp.size) != 0) { @@ -1950,16 +1951,14 @@ compat_get_entries(struct compat_ip6t_get_entries __user *uptr, int *len) if (t && !IS_ERR(t)) { struct xt_table_info *private = t->private; struct xt_table_info info; - duprintf("t->private->number = %u\n", - private->number); + duprintf("t->private->number = %u\n", private->number); ret = compat_table_info(private, &info); if (!ret && get.size == info.size) { ret = compat_copy_entries_to_user(private->size, t, uptr->entrytable); } else if (!ret) { duprintf("compat_get_entries: I've got %u not %u!\n", - private->size, - get.size); + private->size, get.size); ret = -EINVAL; } xt_compat_flush_offsets(AF_INET6); @@ -2072,8 +2071,7 @@ do_ip6t_get_ctl(struct sock *sk, int cmd, void __user *user, int *len) return ret; } -int ip6t_register_table(struct xt_table *table, - const struct ip6t_replace *repl) +int ip6t_register_table(struct xt_table *table, const struct ip6t_replace *repl) { int ret; struct xt_table_info *newinfo; @@ -2085,7 +2083,7 @@ int ip6t_register_table(struct xt_table *table, if (!newinfo) return -ENOMEM; - /* choose the copy on our node/cpu */ + /* choose the copy on our node/cpu, but dont care about preemption */ loc_cpu_entry = newinfo->entries[raw_smp_processor_id()]; memcpy(loc_cpu_entry, repl->entries, repl->size); @@ -2141,17 +2139,18 @@ icmp6_match(const struct sk_buff *skb, unsigned int protoff, bool *hotdrop) { - struct icmp6hdr _icmp, *ic; + struct icmp6hdr _icmph, *ic; const struct ip6t_icmp *icmpinfo = matchinfo; /* Must not be a fragment. */ if (offset) return false; - ic = skb_header_pointer(skb, protoff, sizeof(_icmp), &_icmp); + ic = skb_header_pointer(skb, protoff, sizeof(_icmph), &_icmph); if (ic == NULL) { /* We've been asked to examine this packet, and we - can't. Hence, no choice but to drop. */ + * can't. Hence, no choice but to drop. + */ duprintf("Dropping evil ICMP tinygram.\n"); *hotdrop = true; return false; @@ -2216,7 +2215,7 @@ static struct nf_sockopt_ops ip6t_sockopts = { static struct xt_match icmp6_matchstruct __read_mostly = { .name = "icmp6", - .match = &icmp6_match, + .match = icmp6_match, .matchsize = sizeof(struct ip6t_icmp), .checkentry = icmp6_checkentry, .proto = IPPROTO_ICMPV6, @@ -2265,6 +2264,7 @@ err1: static void __exit ip6_tables_fini(void) { nf_unregister_sockopt(&ip6t_sockopts); + xt_unregister_match(&icmp6_matchstruct); xt_unregister_target(&ip6t_error_target); xt_unregister_target(&ip6t_standard_target); -- cgit v1.2.3 From c9d8fe13175140c79982f9d29c6921328f9afad6 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Mon, 17 Dec 2007 21:52:15 -0800 Subject: [NETFILTER]: {ip,ip6}_tables: fix format strings Use %zu for sizeof() and remove casts. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/ipv6/netfilter/ip6_tables.c | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index 0a2ec4b346f..40893fc0592 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -1120,7 +1120,7 @@ static int get_info(void __user *user, int *len, int compat) int ret; if (*len != sizeof(struct ip6t_getinfo)) { - duprintf("length %u != %u\n", *len, + duprintf("length %u != %zu\n", *len, sizeof(struct ip6t_getinfo)); return -EINVAL; } @@ -1180,14 +1180,14 @@ get_entries(struct ip6t_get_entries __user *uptr, int *len) struct xt_table *t; if (*len < sizeof(get)) { - duprintf("get_entries: %u < %u\n", *len, sizeof(get)); + duprintf("get_entries: %u < %zu\n", *len, sizeof(get)); return -EINVAL; } if (copy_from_user(&get, uptr, sizeof(get)) != 0) return -EFAULT; if (*len != sizeof(struct ip6t_get_entries) + get.size) { - duprintf("get_entries: %u != %u\n", *len, - sizeof(struct ip6t_get_entries) + get.size); + duprintf("get_entries: %u != %zu\n", + *len, sizeof(get) + get.size); return -EINVAL; } @@ -1931,8 +1931,7 @@ compat_get_entries(struct compat_ip6t_get_entries __user *uptr, int *len) struct xt_table *t; if (*len < sizeof(get)) { - duprintf("compat_get_entries: %u < %u\n", - *len, (unsigned int)sizeof(get)); + duprintf("compat_get_entries: %u < %zu\n", *len, sizeof(get)); return -EINVAL; } @@ -1940,9 +1939,8 @@ compat_get_entries(struct compat_ip6t_get_entries __user *uptr, int *len) return -EFAULT; if (*len != sizeof(struct compat_ip6t_get_entries) + get.size) { - duprintf("compat_get_entries: %u != %u\n", *len, - (unsigned int)(sizeof(struct compat_ip6t_get_entries) + - get.size)); + duprintf("compat_get_entries: %u != %zu\n", + *len, sizeof(get) + get.size); return -EINVAL; } -- cgit v1.2.3 From b5dd674b2a1de5925955a088b0a10f81484e975a Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Mon, 17 Dec 2007 21:52:35 -0800 Subject: [NETFILTER]: ip6_tables: fix stack leagage Fix leakage of local variable on stack. This already got fixed in ip_tables silently by the compat patches. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/ipv6/netfilter/ip6_tables.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/ipv6') diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index 40893fc0592..fc4c62fddfe 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -1154,7 +1154,7 @@ static int get_info(void __user *user, int *len, int compat) sizeof(info.underflow)); info.num_entries = private->number; info.size = private->size; - memcpy(info.name, name, sizeof(info.name)); + strcpy(info.name, name); if (copy_to_user(user, &info, *len) != 0) ret = -EFAULT; -- cgit v1.2.3 From da4d0f6b3d3c7bcd00e097d48416e0a1dfde2a0f Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Mon, 17 Dec 2007 21:52:52 -0800 Subject: [NETFILTER]: ip6_tables: use raw_smp_processor_id() in do_add_counters() Use raw_smp_processor_id() in do_add_counters() as in ip_tables.c. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/ipv6/netfilter/ip6_tables.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/ipv6') diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index fc4c62fddfe..d910d56d22d 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -1420,7 +1420,7 @@ do_add_counters(void __user *user, unsigned int len, int compat) i = 0; /* Choose the copy that is on our node */ - loc_cpu_entry = private->entries[smp_processor_id()]; + loc_cpu_entry = private->entries[raw_smp_processor_id()]; IP6T_ENTRY_ITERATE(loc_cpu_entry, private->size, add_counter_to_entry, -- cgit v1.2.3 From 77236b6e33b06aaf756a86ed1965ca7d460b1b53 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Mon, 17 Dec 2007 22:29:45 -0800 Subject: [NETFILTER]: ctnetlink: use netlink attribute helpers Use NLA_PUT_BE32, nla_get_be32() etc. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c | 18 ++++++------------ 1 file changed, 6 insertions(+), 12 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c index e99384f9764..44689d44441 100644 --- a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c +++ b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c @@ -213,12 +213,9 @@ icmpv6_error(struct sk_buff *skb, unsigned int dataoff, static int icmpv6_tuple_to_nlattr(struct sk_buff *skb, const struct nf_conntrack_tuple *t) { - NLA_PUT(skb, CTA_PROTO_ICMPV6_ID, sizeof(u_int16_t), - &t->src.u.icmp.id); - NLA_PUT(skb, CTA_PROTO_ICMPV6_TYPE, sizeof(u_int8_t), - &t->dst.u.icmp.type); - NLA_PUT(skb, CTA_PROTO_ICMPV6_CODE, sizeof(u_int8_t), - &t->dst.u.icmp.code); + NLA_PUT_BE16(skb, CTA_PROTO_ICMPV6_ID, t->src.u.icmp.id); + NLA_PUT_U8(skb, CTA_PROTO_ICMPV6_TYPE, t->dst.u.icmp.type); + NLA_PUT_U8(skb, CTA_PROTO_ICMPV6_CODE, t->dst.u.icmp.code); return 0; @@ -240,12 +237,9 @@ static int icmpv6_nlattr_to_tuple(struct nlattr *tb[], || !tb[CTA_PROTO_ICMPV6_ID]) return -EINVAL; - tuple->dst.u.icmp.type = - *(u_int8_t *)nla_data(tb[CTA_PROTO_ICMPV6_TYPE]); - tuple->dst.u.icmp.code = - *(u_int8_t *)nla_data(tb[CTA_PROTO_ICMPV6_CODE]); - tuple->src.u.icmp.id = - *(__be16 *)nla_data(tb[CTA_PROTO_ICMPV6_ID]); + tuple->dst.u.icmp.type = nla_get_u8(tb[CTA_PROTO_ICMPV6_TYPE]); + tuple->dst.u.icmp.code = nla_get_u8(tb[CTA_PROTO_ICMPV6_CODE]); + tuple->src.u.icmp.id = nla_get_be16(tb[CTA_PROTO_ICMPV6_ID]); if (tuple->dst.u.icmp.type < 128 || tuple->dst.u.icmp.type - 128 >= sizeof(invmap) -- cgit v1.2.3 From f01ffbd6e7d001ccf9168b33507958a51ce0ffcf Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Mon, 17 Dec 2007 22:38:49 -0800 Subject: [NETFILTER]: nf_log: move logging stuff to seperate header Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/ipv6/netfilter/ip6_tables.c | 1 + net/ipv6/netfilter/ip6t_LOG.c | 1 + net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c | 1 + 3 files changed, 3 insertions(+) (limited to 'net/ipv6') diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index d910d56d22d..bb50d0e6673 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -28,6 +28,7 @@ #include #include +#include MODULE_LICENSE("GPL"); MODULE_AUTHOR("Netfilter Core Team "); diff --git a/net/ipv6/netfilter/ip6t_LOG.c b/net/ipv6/netfilter/ip6t_LOG.c index 19523242991..e6a2b1e9469 100644 --- a/net/ipv6/netfilter/ip6t_LOG.c +++ b/net/ipv6/netfilter/ip6t_LOG.c @@ -23,6 +23,7 @@ #include #include #include +#include MODULE_AUTHOR("Jan Rekorajski "); MODULE_DESCRIPTION("IP6 tables LOG target module"); diff --git a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c index 44689d44441..02d60dfbab8 100644 --- a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c +++ b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c @@ -24,6 +24,7 @@ #include #include #include +#include static unsigned long nf_ct_icmpv6_timeout __read_mostly = 30*HZ; -- cgit v1.2.3 From 7b2f9631e789c3e7d59201c21f09a24cd6ce3b1a Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Mon, 17 Dec 2007 22:39:08 -0800 Subject: [NETFILTER]: nf_log: constify struct nf_logger and nf_log_packet loginfo arg Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/ipv6/netfilter/ip6t_LOG.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/ipv6') diff --git a/net/ipv6/netfilter/ip6t_LOG.c b/net/ipv6/netfilter/ip6t_LOG.c index e6a2b1e9469..474c2b12621 100644 --- a/net/ipv6/netfilter/ip6t_LOG.c +++ b/net/ipv6/netfilter/ip6t_LOG.c @@ -478,7 +478,7 @@ static struct xt_target log_tg6_reg __read_mostly = { .me = THIS_MODULE, }; -static struct nf_logger ip6t_logger = { +static const struct nf_logger ip6t_logger = { .name = "ip6t_LOG", .logfn = &ip6t_log_packet, .me = THIS_MODULE, -- cgit v1.2.3 From 1e796fda00f06bac584f0e4ad8750ab9430d79d3 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Mon, 17 Dec 2007 22:42:27 -0800 Subject: [NETFILTER]: constify nf_afinfo Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/ipv6/netfilter.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/ipv6') diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c index 945e6ae1956..2e06724dc34 100644 --- a/net/ipv6/netfilter.c +++ b/net/ipv6/netfilter.c @@ -124,7 +124,7 @@ __sum16 nf_ip6_checksum(struct sk_buff *skb, unsigned int hook, EXPORT_SYMBOL(nf_ip6_checksum); -static struct nf_afinfo nf_ip6_afinfo = { +static const struct nf_afinfo nf_ip6_afinfo = { .family = AF_INET6, .checksum = nf_ip6_checksum, .route = nf_ip6_route, -- cgit v1.2.3 From e79ec50b9587c175f65f98550d66ad5b96c05dd9 Mon Sep 17 00:00:00 2001 From: Jan Engelhardt Date: Mon, 17 Dec 2007 22:44:06 -0800 Subject: [NETFILTER]: Parenthesize macro parameters Parenthesize macro parameters. Signed-off-by: Jan Engelhardt Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/ipv6/netfilter/ip6_tables.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/ipv6') diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index bb50d0e6673..4ed16d254b9 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -102,7 +102,7 @@ ip6_packet_match(const struct sk_buff *skb, unsigned long ret; const struct ipv6hdr *ipv6 = ipv6_hdr(skb); -#define FWINV(bool,invflg) ((bool) ^ !!(ip6info->invflags & invflg)) +#define FWINV(bool, invflg) ((bool) ^ !!(ip6info->invflags & (invflg))) if (FWINV(ipv6_masked_addr_cmp(&ipv6->saddr, &ip6info->smsk, &ip6info->src), IP6T_INV_SRCIP) -- cgit v1.2.3 From 33b8e776056202aceaf4c90f465d0f4ee53432ac Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Mon, 17 Dec 2007 22:47:05 -0800 Subject: [NETFILTER]: Add CONFIG_NETFILTER_ADVANCED option The NETFILTER_ADVANCED option hides lots of the rather obscure netfilter options when disabled and provides defaults (M) that should allow to run a distribution firewall without further thinking. Defaults to 'y' to avoid breaking current configurations. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/ipv6/netfilter/Kconfig | 23 ++++++++++++++++++++--- 1 file changed, 20 insertions(+), 3 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig index 5374c665f8d..a6b4a9a1053 100644 --- a/net/ipv6/netfilter/Kconfig +++ b/net/ipv6/netfilter/Kconfig @@ -8,6 +8,7 @@ menu "IPv6: Netfilter Configuration (EXPERIMENTAL)" config NF_CONNTRACK_IPV6 tristate "IPv6 connection tracking support (EXPERIMENTAL)" depends on INET && IPV6 && EXPERIMENTAL && NF_CONNTRACK + default m if NETFILTER_ADVANCED=n ---help--- Connection tracking keeps a record of what packets have passed through your machine, in order to figure out how they are related @@ -22,6 +23,7 @@ config NF_CONNTRACK_IPV6 config IP6_NF_QUEUE tristate "IP6 Userspace queueing via NETLINK (OBSOLETE)" depends on INET && IPV6 && NETFILTER && EXPERIMENTAL + depends on NETFILTER_ADVANCED ---help--- This option adds a queue handler to the kernel for IPv6 @@ -44,6 +46,7 @@ config IP6_NF_IPTABLES tristate "IP6 tables support (required for filtering)" depends on INET && IPV6 && EXPERIMENTAL select NETFILTER_XTABLES + default m if NETFILTER_ADVANCED=n help ip6tables is a general, extensible packet identification framework. Currently only the packet filtering and packet mangling subsystem @@ -56,6 +59,7 @@ config IP6_NF_IPTABLES config IP6_NF_MATCH_RT tristate '"rt" Routing header match support' depends on IP6_NF_IPTABLES + depends on NETFILTER_ADVANCED help rt matching allows you to match packets based on the routing header of the packet. @@ -65,6 +69,7 @@ config IP6_NF_MATCH_RT config IP6_NF_MATCH_OPTS tristate '"hopbyhop" and "dst" opts header match support' depends on IP6_NF_IPTABLES + depends on NETFILTER_ADVANCED help This allows one to match packets based on the hop-by-hop and destination options headers of a packet. @@ -74,6 +79,7 @@ config IP6_NF_MATCH_OPTS config IP6_NF_MATCH_FRAG tristate '"frag" Fragmentation header match support' depends on IP6_NF_IPTABLES + depends on NETFILTER_ADVANCED help frag matching allows you to match packets based on the fragmentation header of the packet. @@ -83,6 +89,7 @@ config IP6_NF_MATCH_FRAG config IP6_NF_MATCH_HL tristate '"hl" match support' depends on IP6_NF_IPTABLES + depends on NETFILTER_ADVANCED help HL matching allows you to match packets based on the hop limit of the packet. @@ -92,6 +99,7 @@ config IP6_NF_MATCH_HL config IP6_NF_MATCH_IPV6HEADER tristate '"ipv6header" IPv6 Extension Headers Match' depends on IP6_NF_IPTABLES + depends on NETFILTER_ADVANCED help This module allows one to match packets based upon the ipv6 extension headers. @@ -101,6 +109,7 @@ config IP6_NF_MATCH_IPV6HEADER config IP6_NF_MATCH_AH tristate '"ah" match support' depends on IP6_NF_IPTABLES + depends on NETFILTER_ADVANCED help This module allows one to match AH packets. @@ -109,6 +118,7 @@ config IP6_NF_MATCH_AH config IP6_NF_MATCH_MH tristate '"mh" match support' depends on IP6_NF_IPTABLES + depends on NETFILTER_ADVANCED help This module allows one to match MH packets. @@ -117,6 +127,7 @@ config IP6_NF_MATCH_MH config IP6_NF_MATCH_EUI64 tristate '"eui64" address check' depends on IP6_NF_IPTABLES + depends on NETFILTER_ADVANCED help This module performs checking on the IPv6 source address Compares the last 64 bits with the EUI64 (delivered @@ -128,6 +139,7 @@ config IP6_NF_MATCH_EUI64 config IP6_NF_FILTER tristate "Packet filtering" depends on IP6_NF_IPTABLES + default m if NETFILTER_ADVANCED=n help Packet filtering defines a table `filter', which has a series of rules for simple packet filtering at local input, forwarding and @@ -138,6 +150,7 @@ config IP6_NF_FILTER config IP6_NF_TARGET_LOG tristate "LOG target support" depends on IP6_NF_FILTER + default m if NETFILTER_ADVANCED=n help This option adds a `LOG' target, which allows you to create rules in any iptables table which records the packet header to the syslog. @@ -147,6 +160,7 @@ config IP6_NF_TARGET_LOG config IP6_NF_TARGET_REJECT tristate "REJECT target support" depends on IP6_NF_FILTER + default m if NETFILTER_ADVANCED=n help The REJECT target allows a filtering rule to specify that an ICMPv6 error should be issued in response to an incoming packet, rather @@ -157,6 +171,7 @@ config IP6_NF_TARGET_REJECT config IP6_NF_MANGLE tristate "Packet mangling" depends on IP6_NF_IPTABLES + default m if NETFILTER_ADVANCED=n help This option adds a `mangle' table to iptables: see the man page for iptables(8). This table is used for various packet alterations @@ -167,27 +182,29 @@ config IP6_NF_MANGLE config IP6_NF_TARGET_HL tristate 'HL (hoplimit) target support' depends on IP6_NF_MANGLE + depends on NETFILTER_ADVANCED help This option adds a `HL' target, which enables the user to decrement the hoplimit value of the IPv6 header or set it to a given (lower) value. - + While it is safe to decrement the hoplimit value, this option also enables functionality to increment and set the hoplimit value of the IPv6 header to arbitrary values. This is EXTREMELY DANGEROUS since you can easily create immortal packets that loop forever on the - network. + network. To compile it as a module, choose M here. If unsure, say N. config IP6_NF_RAW tristate 'raw table support (required for TRACE)' depends on IP6_NF_IPTABLES + depends on NETFILTER_ADVANCED help This option adds a `raw' table to ip6tables. This table is the very first in the netfilter framework and hooks in at the PREROUTING and OUTPUT chains. - + If you want to compile it as a module, say M here and read . If unsure, say `N'. -- cgit v1.2.3 From 195ad6a3ac8b5c4eef4916efcb673e96e6f09d89 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Thu, 20 Dec 2007 13:53:40 -0800 Subject: [IPSEC]: Rename tunnel-mode functions to avoid collisions with tunnels It appears that I've managed to create two different functions both called xfrm6_tunnel_output. This is because we have the plain tunnel encapsulation named xfrmX_tunnel as well as the tunnel-mode encapsulation which lives in the files xfrmX_mode_tunnel.c. This patch renames functions from the latter to use the xfrmX_mode_tunnel prefix to avoid name-space conflicts. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- net/ipv6/xfrm6_mode_tunnel.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/xfrm6_mode_tunnel.c b/net/ipv6/xfrm6_mode_tunnel.c index f7d0d661265..0c742faaa30 100644 --- a/net/ipv6/xfrm6_mode_tunnel.c +++ b/net/ipv6/xfrm6_mode_tunnel.c @@ -29,7 +29,7 @@ static inline void ipip6_ecn_decapsulate(struct sk_buff *skb) * * The top IP header will be constructed per RFC 2401. */ -static int xfrm6_tunnel_output(struct xfrm_state *x, struct sk_buff *skb) +static int xfrm6_mode_tunnel_output(struct xfrm_state *x, struct sk_buff *skb) { struct dst_entry *dst = skb->dst; struct ipv6hdr *top_iph; @@ -58,7 +58,7 @@ static int xfrm6_tunnel_output(struct xfrm_state *x, struct sk_buff *skb) return 0; } -static int xfrm6_tunnel_input(struct xfrm_state *x, struct sk_buff *skb) +static int xfrm6_mode_tunnel_input(struct xfrm_state *x, struct sk_buff *skb) { int err = -EINVAL; const unsigned char *old_mac; @@ -89,21 +89,21 @@ out: } static struct xfrm_mode xfrm6_tunnel_mode = { - .input2 = xfrm6_tunnel_input, + .input2 = xfrm6_mode_tunnel_input, .input = xfrm_prepare_input, - .output2 = xfrm6_tunnel_output, + .output2 = xfrm6_mode_tunnel_output, .output = xfrm6_prepare_output, .owner = THIS_MODULE, .encap = XFRM_MODE_TUNNEL, .flags = XFRM_MODE_FLAG_TUNNEL, }; -static int __init xfrm6_tunnel_init(void) +static int __init xfrm6_mode_tunnel_init(void) { return xfrm_register_mode(&xfrm6_tunnel_mode, AF_INET6); } -static void __exit xfrm6_tunnel_exit(void) +static void __exit xfrm6_mode_tunnel_exit(void) { int err; @@ -111,7 +111,7 @@ static void __exit xfrm6_tunnel_exit(void) BUG_ON(err); } -module_init(xfrm6_tunnel_init); -module_exit(xfrm6_tunnel_exit); +module_init(xfrm6_mode_tunnel_init); +module_exit(xfrm6_mode_tunnel_exit); MODULE_LICENSE("GPL"); MODULE_ALIAS_XFRM_MODE(AF_INET6, XFRM_MODE_TUNNEL); -- cgit v1.2.3 From a1b051405bc16222d92c73b0c26d65b333a154ee Mon Sep 17 00:00:00 2001 From: Masahide NAKAMURA Date: Thu, 20 Dec 2007 20:41:12 -0800 Subject: [XFRM] IPv6: Fix dst/routing check at transformation. IPv6 specific thing is wrongly removed from transformation at net-2.6.25. This patch recovers it with current design. o Update "path" of xfrm_dst since IPv6 transformation should care about routing changes. It is required by MIPv6 and off-link destined IPsec. o Rename nfheader_len which is for non-fragment transformation used by MIPv6 to rt6i_nfheader_len as IPv6 name space. Signed-off-by: Masahide NAKAMURA Acked-by: Herbert Xu Signed-off-by: David S. Miller --- net/ipv6/ip6_output.c | 4 ++-- net/ipv6/xfrm6_policy.c | 17 +++++++++++++++++ 2 files changed, 19 insertions(+), 2 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index d54da616e3a..4686646058d 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -1126,7 +1126,7 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to, sk->sk_sndmsg_page = NULL; sk->sk_sndmsg_off = 0; exthdrlen = rt->u.dst.header_len + (opt ? opt->opt_flen : 0) - - rt->nfheader_len; + rt->rt6i_nfheader_len; length += exthdrlen; transhdrlen += exthdrlen; } else { @@ -1141,7 +1141,7 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to, hh_len = LL_RESERVED_SPACE(rt->u.dst.dev); - fragheaderlen = sizeof(struct ipv6hdr) + rt->nfheader_len + + fragheaderlen = sizeof(struct ipv6hdr) + rt->rt6i_nfheader_len + (opt ? opt->opt_nflen : 0); maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen - sizeof(struct frag_hdr); diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c index d26b7dc3f33..cf373b46a1b 100644 --- a/net/ipv6/xfrm6_policy.c +++ b/net/ipv6/xfrm6_policy.c @@ -98,6 +98,20 @@ static int xfrm6_get_tos(struct flowi *fl) return 0; } +static int xfrm6_init_path(struct xfrm_dst *path, struct dst_entry *dst, + int nfheader_len) +{ + if (dst->ops->family == AF_INET6) { + struct rt6_info *rt = (struct rt6_info*)dst; + if (rt->rt6i_node) + path->path_cookie = rt->rt6i_node->fn_sernum; + } + + path->u.rt6.rt6i_nfheader_len = nfheader_len; + + return 0; +} + static int xfrm6_fill_dst(struct xfrm_dst *xdst, struct net_device *dev) { struct rt6_info *rt = (struct rt6_info*)xdst->route; @@ -115,6 +129,8 @@ static int xfrm6_fill_dst(struct xfrm_dst *xdst, struct net_device *dev) RTF_LOCAL); xdst->u.rt6.rt6i_metric = rt->rt6i_metric; xdst->u.rt6.rt6i_node = rt->rt6i_node; + if (rt->rt6i_node) + xdst->route_cookie = rt->rt6i_node->fn_sernum; xdst->u.rt6.rt6i_gateway = rt->rt6i_gateway; xdst->u.rt6.rt6i_dst = rt->rt6i_dst; xdst->u.rt6.rt6i_src = rt->rt6i_src; @@ -266,6 +282,7 @@ static struct xfrm_policy_afinfo xfrm6_policy_afinfo = { .find_bundle = __xfrm6_find_bundle, .decode_session = _decode_session6, .get_tos = xfrm6_get_tos, + .init_path = xfrm6_init_path, .fill_dst = xfrm6_fill_dst, }; -- cgit v1.2.3 From 9473e1f631de339c50bde1e3bd09e1045fe90fd5 Mon Sep 17 00:00:00 2001 From: Masahide NAKAMURA Date: Thu, 20 Dec 2007 20:41:57 -0800 Subject: [XFRM] MIPv6: Fix to input RO state correctly. Disable spin_lock during xfrm_type.input() function. Follow design as IPsec inbound does. Signed-off-by: Masahide NAKAMURA Signed-off-by: David S. Miller --- net/ipv6/xfrm6_input.c | 54 +++++++++++++++++++++++++++----------------------- 1 file changed, 29 insertions(+), 25 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/xfrm6_input.c b/net/ipv6/xfrm6_input.c index 74f3aacebb5..f835ab458f5 100644 --- a/net/ipv6/xfrm6_input.c +++ b/net/ipv6/xfrm6_input.c @@ -63,10 +63,26 @@ int xfrm6_input_addr(struct sk_buff *skb, xfrm_address_t *daddr, struct xfrm_state *x = NULL; int wildcard = 0; xfrm_address_t *xany; - struct xfrm_state *xfrm_vec_one = NULL; int nh = 0; int i = 0; + /* Allocate new secpath or COW existing one. */ + if (!skb->sp || atomic_read(&skb->sp->refcnt) != 1) { + struct sec_path *sp; + + sp = secpath_dup(skb->sp); + if (!sp) { + goto drop; + } + if (skb->sp) + secpath_put(skb->sp); + skb->sp = sp; + } + + if (1 + skb->sp->len == XFRM_MAX_DEPTH) { + goto drop; + } + xany = (xfrm_address_t *)&in6addr_any; for (i = 0; i < 3; i++) { @@ -119,47 +135,35 @@ int xfrm6_input_addr(struct sk_buff *skb, xfrm_address_t *daddr, continue; } + spin_unlock(&x->lock); + nh = x->type->input(x, skb); if (nh <= 0) { - spin_unlock(&x->lock); xfrm_state_put(x); x = NULL; continue; } - x->curlft.bytes += skb->len; - x->curlft.packets++; - - spin_unlock(&x->lock); - - xfrm_vec_one = x; + /* Found a state */ break; } - if (!xfrm_vec_one) + if (!x) { goto drop; - - /* Allocate new secpath or COW existing one. */ - if (!skb->sp || atomic_read(&skb->sp->refcnt) != 1) { - struct sec_path *sp; - sp = secpath_dup(skb->sp); - if (!sp) - goto drop; - if (skb->sp) - secpath_put(skb->sp); - skb->sp = sp; } - if (1 + skb->sp->len > XFRM_MAX_DEPTH) - goto drop; + skb->sp->xvec[skb->sp->len++] = x; + + spin_lock(&x->lock); - skb->sp->xvec[skb->sp->len] = xfrm_vec_one; - skb->sp->len ++; + x->curlft.bytes += skb->len; + x->curlft.packets++; + + spin_unlock(&x->lock); return 1; + drop: - if (xfrm_vec_one) - xfrm_state_put(xfrm_vec_one); return -1; } -- cgit v1.2.3 From 0aa647746e5602e608220c10e51f49709a030f5d Mon Sep 17 00:00:00 2001 From: Masahide NAKAMURA Date: Thu, 20 Dec 2007 20:43:36 -0800 Subject: [XFRM]: Support to increment packet dropping statistics. Signed-off-by: Masahide NAKAMURA Signed-off-by: David S. Miller --- net/ipv6/xfrm6_input.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'net/ipv6') diff --git a/net/ipv6/xfrm6_input.c b/net/ipv6/xfrm6_input.c index f835ab458f5..6644fc6d542 100644 --- a/net/ipv6/xfrm6_input.c +++ b/net/ipv6/xfrm6_input.c @@ -72,6 +72,7 @@ int xfrm6_input_addr(struct sk_buff *skb, xfrm_address_t *daddr, sp = secpath_dup(skb->sp); if (!sp) { + XFRM_INC_STATS(LINUX_MIB_XFRMINERROR); goto drop; } if (skb->sp) @@ -80,6 +81,7 @@ int xfrm6_input_addr(struct sk_buff *skb, xfrm_address_t *daddr, } if (1 + skb->sp->len == XFRM_MAX_DEPTH) { + XFRM_INC_STATS(LINUX_MIB_XFRMINBUFFERERROR); goto drop; } @@ -149,6 +151,7 @@ int xfrm6_input_addr(struct sk_buff *skb, xfrm_address_t *daddr, } if (!x) { + XFRM_INC_STATS(LINUX_MIB_XFRMINNOSTATES); goto drop; } -- cgit v1.2.3 From 9cb5734e5b9b26097c7fa28a9c6426a204cc15e3 Mon Sep 17 00:00:00 2001 From: YOSHIFUJI Hideaki Date: Sat, 12 Jan 2008 02:16:03 -0800 Subject: [TCP]: Convert several length variable to unsigned. Several length variables cannot be negative, so convert int to unsigned int. This also allows us to do sane shift operations on those variables. Signed-off-by: YOSHIFUJI Hideaki Signed-off-by: David S. Miller --- net/ipv6/tcp_ipv6.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 0268e118f0b..00c08399837 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -733,7 +733,7 @@ static int tcp_v6_do_calc_md5_hash(char *md5_hash, struct tcp_md5sig_key *key, struct in6_addr *saddr, struct in6_addr *daddr, struct tcphdr *th, int protocol, - int tcplen) + unsigned int tcplen) { struct scatterlist sg[4]; __u16 data_len; @@ -818,7 +818,7 @@ static int tcp_v6_calc_md5_hash(char *md5_hash, struct tcp_md5sig_key *key, struct dst_entry *dst, struct request_sock *req, struct tcphdr *th, int protocol, - int tcplen) + unsigned int tcplen) { struct in6_addr *saddr, *daddr; @@ -985,7 +985,7 @@ static void tcp_v6_send_reset(struct sock *sk, struct sk_buff *skb) struct tcphdr *th = tcp_hdr(skb), *t1; struct sk_buff *buff; struct flowi fl; - int tot_len = sizeof(*th); + unsigned int tot_len = sizeof(*th); #ifdef CONFIG_TCP_MD5SIG struct tcp_md5sig_key *key; #endif @@ -1085,7 +1085,7 @@ static void tcp_v6_send_ack(struct tcp_timewait_sock *tw, struct tcphdr *th = tcp_hdr(skb), *t1; struct sk_buff *buff; struct flowi fl; - int tot_len = sizeof(struct tcphdr); + unsigned int tot_len = sizeof(struct tcphdr); __be32 *topt; #ifdef CONFIG_TCP_MD5SIG struct tcp_md5sig_key *key; -- cgit v1.2.3 From afeb14b49098ba7a51c96e083a4105a0301f94c4 Mon Sep 17 00:00:00 2001 From: Paul Moore Date: Fri, 21 Dec 2007 14:58:11 -0800 Subject: [XFRM]: RFC4303 compliant auditing This patch adds a number of new IPsec audit events to meet the auditing requirements of RFC4303. This includes audit hooks for the following events: * Could not find a valid SA [sections 2.1, 3.4.2] . xfrm_audit_state_notfound() . xfrm_audit_state_notfound_simple() * Sequence number overflow [section 3.3.3] . xfrm_audit_state_replay_overflow() * Replayed packet [section 3.4.3] . xfrm_audit_state_replay() * Integrity check failure [sections 3.4.4.1, 3.4.4.2] . xfrm_audit_state_icvfail() While RFC4304 deals only with ESP most of the changes in this patch apply to IPsec in general, i.e. both AH and ESP. The one case, integrity check failure, where ESP specific code had to be modified the same was done to the AH code for the sake of consistency. Signed-off-by: Paul Moore Acked-by: James Morris Signed-off-by: David S. Miller --- net/ipv6/ah6.c | 2 +- net/ipv6/esp6.c | 1 + net/ipv6/xfrm6_input.c | 1 + 3 files changed, 3 insertions(+), 1 deletion(-) (limited to 'net/ipv6') diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c index 1b51d1eedbd..2d32772c87c 100644 --- a/net/ipv6/ah6.c +++ b/net/ipv6/ah6.c @@ -381,7 +381,7 @@ static int ah6_input(struct xfrm_state *x, struct sk_buff *skb) if (err) goto unlock; if (memcmp(ahp->work_icv, auth_data, ahp->icv_trunc_len)) { - LIMIT_NETDEBUG(KERN_WARNING "ipsec ah authentication error\n"); + xfrm_audit_state_icvfail(x, skb, IPPROTO_AH); err = -EBADMSG; } } diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c index 5bd5292ad9f..e10f10bfe2c 100644 --- a/net/ipv6/esp6.c +++ b/net/ipv6/esp6.c @@ -186,6 +186,7 @@ static int esp6_input(struct xfrm_state *x, struct sk_buff *skb) BUG(); if (unlikely(memcmp(esp->auth.work_icv, sum, alen))) { + xfrm_audit_state_icvfail(x, skb, IPPROTO_ESP); ret = -EBADMSG; goto unlock; } diff --git a/net/ipv6/xfrm6_input.c b/net/ipv6/xfrm6_input.c index 6644fc6d542..063ce6ed1bd 100644 --- a/net/ipv6/xfrm6_input.c +++ b/net/ipv6/xfrm6_input.c @@ -152,6 +152,7 @@ int xfrm6_input_addr(struct sk_buff *skb, xfrm_address_t *daddr, if (!x) { XFRM_INC_STATS(LINUX_MIB_XFRMINNOSTATES); + xfrm_audit_state_notfound_simple(skb, AF_INET6); goto drop; } -- cgit v1.2.3 From 426b5303eb435d98b9bee37a807be386bc2b3320 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Thu, 24 Jan 2008 00:13:18 -0800 Subject: [NETNS]: Modify the neighbour table code so it handles multiple network namespaces I'm actually surprised at how much was involved. At first glance it appears that the neighbour table data structures are already split by network device so all that should be needed is to modify the user interface commands to filter the set of neighbours by the network namespace of their devices. However a couple things turned up while I was reading through the code. The proxy neighbour table allows entries with no network device, and the neighbour parms are per network device (except for the defaults) so they now need a per network namespace default. So I updated the two structures (which surprised me) with their very own network namespace parameter. Updated the relevant lookup and destroy routines with a network namespace parameter and modified the code that interacts with users to filter out neighbour table entries for devices of other namespaces. I'm a little concerned that we can modify and display the global table configuration and from all network namespaces. But this appears good enough for now. I keep thinking modifying the neighbour table to have per network namespace instances of each table type would should be cleaner. The hash table is already dynamically sized so there are it is not a limiter. The default parameter would be straight forward to take care of. However when I look at the how the network table is built and used I still find some assumptions that there is only a single neighbour table for each type of table in the kernel. The netlink operations, neigh_seq_start, the non-core network users that call neigh_lookup. So while it might be doable it would require more refactoring than my current approach of just doing a little extra filtering in the code. Signed-off-by: Eric W. Biederman Signed-off-by: Daniel Lezcano Signed-off-by: David S. Miller --- net/ipv6/ip6_output.c | 2 +- net/ipv6/ndisc.c | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 4686646058d..ba7c8aaf278 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -449,7 +449,7 @@ int ip6_forward(struct sk_buff *skb) /* XXX: idev->cnf.proxy_ndp? */ if (ipv6_devconf.proxy_ndp && - pneigh_lookup(&nd_tbl, &hdr->daddr, skb->dev, 0)) { + pneigh_lookup(&nd_tbl, &init_net, &hdr->daddr, skb->dev, 0)) { int proxied = ip6_forward_proxy_check(skb); if (proxied > 0) return ip6_input(skb); diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index b87f9d245e2..b66a1f81bd8 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -789,7 +789,7 @@ static void ndisc_recv_ns(struct sk_buff *skb) if (ipv6_chk_acast_addr(dev, &msg->target) || (idev->cnf.forwarding && (ipv6_devconf.proxy_ndp || idev->cnf.proxy_ndp) && - (pneigh = pneigh_lookup(&nd_tbl, + (pneigh = pneigh_lookup(&nd_tbl, &init_net, &msg->target, dev, 0)) != NULL)) { if (!(NEIGH_CB(skb)->flags & LOCALLY_ENQUEUED) && skb->pkt_type != PACKET_HOST && @@ -930,7 +930,7 @@ static void ndisc_recv_na(struct sk_buff *skb) */ if (lladdr && !memcmp(lladdr, dev->dev_addr, dev->addr_len) && ipv6_devconf.forwarding && ipv6_devconf.proxy_ndp && - pneigh_lookup(&nd_tbl, &msg->target, dev, 0)) { + pneigh_lookup(&nd_tbl, &init_net, &msg->target, dev, 0)) { /* XXX: idev->cnf.prixy_ndp */ goto out; } -- cgit v1.2.3 From 0883ae0e5599656b5f3b0e9ce474e01dee7dfee4 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sun, 30 Dec 2007 21:10:14 -0800 Subject: [IPSEC]: Fix transport-mode async resume on intput without netfilter When netfilter is off the transport-mode async resumption doesn't work because we don't push back the IP header. This patch fixes that by moving most of the code outside of ifdef NETFILTER since the only part that's not common is the short-circuit in the protocol handler. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- net/ipv6/xfrm6_input.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/xfrm6_input.c b/net/ipv6/xfrm6_input.c index 063ce6ed1bd..a4714d76ae6 100644 --- a/net/ipv6/xfrm6_input.c +++ b/net/ipv6/xfrm6_input.c @@ -34,19 +34,17 @@ int xfrm6_transport_finish(struct sk_buff *skb, int async) skb_network_header(skb)[IP6CB(skb)->nhoff] = XFRM_MODE_SKB_CB(skb)->protocol; -#ifdef CONFIG_NETFILTER +#ifndef CONFIG_NETFILTER + if (!async) + return 1; +#endif + ipv6_hdr(skb)->payload_len = htons(skb->len); __skb_push(skb, skb->data - skb_network_header(skb)); NF_HOOK(PF_INET6, NF_INET_PRE_ROUTING, skb, skb->dev, NULL, ip6_rcv_finish); return -1; -#else - if (async) - return ip6_rcv_finish(skb); - - return 1; -#endif } int xfrm6_rcv(struct sk_buff *skb) -- cgit v1.2.3 From 9dd3245a2ac1834797191072705015e6a12f55bf Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sun, 30 Dec 2007 21:10:30 -0800 Subject: [IPSEC]: Move all calls to xfrm_audit_state_icvfail to xfrm_input Let's nip the code duplication in the bud :) Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- net/ipv6/ah6.c | 4 +--- net/ipv6/esp6.c | 1 - 2 files changed, 1 insertion(+), 4 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c index 2d32772c87c..fb0d07a15e9 100644 --- a/net/ipv6/ah6.c +++ b/net/ipv6/ah6.c @@ -380,10 +380,8 @@ static int ah6_input(struct xfrm_state *x, struct sk_buff *skb) err = ah_mac_digest(ahp, skb, ah->auth_data); if (err) goto unlock; - if (memcmp(ahp->work_icv, auth_data, ahp->icv_trunc_len)) { - xfrm_audit_state_icvfail(x, skb, IPPROTO_AH); + if (memcmp(ahp->work_icv, auth_data, ahp->icv_trunc_len)) err = -EBADMSG; - } } unlock: spin_unlock(&x->lock); diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c index e10f10bfe2c..5bd5292ad9f 100644 --- a/net/ipv6/esp6.c +++ b/net/ipv6/esp6.c @@ -186,7 +186,6 @@ static int esp6_input(struct xfrm_state *x, struct sk_buff *skb) BUG(); if (unlikely(memcmp(esp->auth.work_icv, sum, alen))) { - xfrm_audit_state_icvfail(x, skb, IPPROTO_ESP); ret = -EBADMSG; goto unlock; } -- cgit v1.2.3 From a06b494b61de44617dd58612164bdde56fca7bfb Mon Sep 17 00:00:00 2001 From: Gui Jianfeng Date: Sun, 30 Dec 2007 23:27:10 -0800 Subject: [IPV6]: Remove useless code from fib6_del_route(). There are useless codes in fib6_del_route(). The following patch has been tested, every thing looks fine, as usual. Signed-off-by: Gui Jianfeng Signed-off-by: David S. Miller --- net/ipv6/ip6_fib.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index df05c6f2189..7165a5e90f4 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -1126,9 +1126,6 @@ static void fib6_del_route(struct fib6_node *fn, struct rt6_info **rtp, rt->u.dst.rt6_next = NULL; - if (fn->leaf == NULL && fn->fn_flags&RTN_TL_ROOT) - fn->leaf = &ip6_null_entry; - /* If it was last route, expunge its radix tree node */ if (fn->leaf == NULL) { fn->fn_flags &= ~RTN_RTINFO; -- cgit v1.2.3 From 95766fff6b9a78d11fc2d3812dd035381690b55d Mon Sep 17 00:00:00 2001 From: Hideo Aoki Date: Mon, 31 Dec 2007 00:29:24 -0800 Subject: [UDP]: Add memory accounting. Signed-off-by: Takahiro Yasui Signed-off-by: Hideo Aoki Signed-off-by: David S. Miller --- net/ipv6/udp.c | 32 ++++++++++++++++++++++++++++---- 1 file changed, 28 insertions(+), 4 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index c9a97b40551..bf58acab206 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -204,13 +204,17 @@ try_again: err = ulen; out_free: + lock_sock(sk); skb_free_datagram(sk, skb); + release_sock(sk); out: return err; csum_copy_err: + lock_sock(sk); if (!skb_kill_datagram(sk, skb, flags)) UDP6_INC_STATS_USER(UDP_MIB_INERRORS, is_udplite); + release_sock(sk); if (flags & MSG_DONTWAIT) return -EAGAIN; @@ -366,10 +370,21 @@ static int __udp6_lib_mcast_deliver(struct sk_buff *skb, struct in6_addr *saddr, while ((sk2 = udp_v6_mcast_next(sk_next(sk2), uh->dest, daddr, uh->source, saddr, dif))) { struct sk_buff *buff = skb_clone(skb, GFP_ATOMIC); - if (buff) - udpv6_queue_rcv_skb(sk2, buff); + if (buff) { + bh_lock_sock_nested(sk2); + if (!sock_owned_by_user(sk2)) + udpv6_queue_rcv_skb(sk2, buff); + else + sk_add_backlog(sk2, buff); + bh_unlock_sock(sk2); + } } - udpv6_queue_rcv_skb(sk, skb); + bh_lock_sock_nested(sk); + if (!sock_owned_by_user(sk)) + udpv6_queue_rcv_skb(sk, skb); + else + sk_add_backlog(sk, skb); + bh_unlock_sock(sk); out: read_unlock(&udp_hash_lock); return 0; @@ -482,7 +497,12 @@ int __udp6_lib_rcv(struct sk_buff *skb, struct hlist_head udptable[], /* deliver */ - udpv6_queue_rcv_skb(sk, skb); + bh_lock_sock_nested(sk); + if (!sock_owned_by_user(sk)) + udpv6_queue_rcv_skb(sk, skb); + else + sk_add_backlog(sk, skb); + bh_unlock_sock(sk); sock_put(sk); return 0; @@ -994,6 +1014,10 @@ struct proto udpv6_prot = { .hash = udp_lib_hash, .unhash = udp_lib_unhash, .get_port = udp_v6_get_port, + .memory_allocated = &udp_memory_allocated, + .sysctl_mem = sysctl_udp_mem, + .sysctl_wmem = &sysctl_udp_wmem_min, + .sysctl_rmem = &sysctl_udp_rmem_min, .obj_size = sizeof(struct udp6_sock), #ifdef CONFIG_COMPAT .compat_setsockopt = compat_udpv6_setsockopt, -- cgit v1.2.3 From 9a429c4983deae020f1e757ecc8f547b6d4e2f2b Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 1 Jan 2008 21:58:02 -0800 Subject: [NET]: Add some acquires/releases sparse annotations. Add __acquires() and __releases() annotations to suppress some sparse warnings. example of warnings : net/ipv4/udp.c:1555:14: warning: context imbalance in 'udp_seq_start' - wrong count at exit net/ipv4/udp.c:1571:13: warning: context imbalance in 'udp_seq_stop' - unexpected unlock Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv6/anycast.c | 2 ++ net/ipv6/ip6_flowlabel.c | 2 ++ net/ipv6/mcast.c | 4 ++++ 3 files changed, 8 insertions(+) (limited to 'net/ipv6') diff --git a/net/ipv6/anycast.c b/net/ipv6/anycast.c index f915c4df982..5c4190060e7 100644 --- a/net/ipv6/anycast.c +++ b/net/ipv6/anycast.c @@ -504,6 +504,7 @@ static struct ifacaddr6 *ac6_get_idx(struct seq_file *seq, loff_t pos) } static void *ac6_seq_start(struct seq_file *seq, loff_t *pos) + __acquires(dev_base_lock) { read_lock(&dev_base_lock); return ac6_get_idx(seq, *pos); @@ -518,6 +519,7 @@ static void *ac6_seq_next(struct seq_file *seq, void *v, loff_t *pos) } static void ac6_seq_stop(struct seq_file *seq, void *v) + __releases(dev_base_lock) { struct ac6_iter_state *state = ac6_seq_private(seq); if (likely(state->idev != NULL)) { diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c index d0babea8981..2b7d9ee9883 100644 --- a/net/ipv6/ip6_flowlabel.c +++ b/net/ipv6/ip6_flowlabel.c @@ -629,6 +629,7 @@ static struct ip6_flowlabel *ip6fl_get_idx(struct seq_file *seq, loff_t pos) } static void *ip6fl_seq_start(struct seq_file *seq, loff_t *pos) + __acquires(ip6_fl_lock) { read_lock_bh(&ip6_fl_lock); return *pos ? ip6fl_get_idx(seq, *pos - 1) : SEQ_START_TOKEN; @@ -647,6 +648,7 @@ static void *ip6fl_seq_next(struct seq_file *seq, void *v, loff_t *pos) } static void ip6fl_seq_stop(struct seq_file *seq, void *v) + __releases(ip6_fl_lock) { read_unlock_bh(&ip6_fl_lock); } diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index 82b12940c2a..ab228d1ea11 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c @@ -2373,6 +2373,7 @@ static struct ifmcaddr6 *igmp6_mc_get_idx(struct seq_file *seq, loff_t pos) } static void *igmp6_mc_seq_start(struct seq_file *seq, loff_t *pos) + __acquires(dev_base_lock) { read_lock(&dev_base_lock); return igmp6_mc_get_idx(seq, *pos); @@ -2387,6 +2388,7 @@ static void *igmp6_mc_seq_next(struct seq_file *seq, void *v, loff_t *pos) } static void igmp6_mc_seq_stop(struct seq_file *seq, void *v) + __releases(dev_base_lock) { struct igmp6_mc_iter_state *state = igmp6_mc_seq_private(seq); if (likely(state->idev != NULL)) { @@ -2516,6 +2518,7 @@ static struct ip6_sf_list *igmp6_mcf_get_idx(struct seq_file *seq, loff_t pos) } static void *igmp6_mcf_seq_start(struct seq_file *seq, loff_t *pos) + __acquires(dev_base_lock) { read_lock(&dev_base_lock); return *pos ? igmp6_mcf_get_idx(seq, *pos - 1) : SEQ_START_TOKEN; @@ -2533,6 +2536,7 @@ static void *igmp6_mcf_seq_next(struct seq_file *seq, void *v, loff_t *pos) } static void igmp6_mcf_seq_stop(struct seq_file *seq, void *v) + __releases(dev_base_lock) { struct igmp6_mcf_iter_state *state = igmp6_mcf_seq_private(seq); if (likely(state->im != NULL)) { -- cgit v1.2.3 From 65f7651788e18fadb2fbb7276af935d7871e1803 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 3 Jan 2008 20:46:48 -0800 Subject: [NET]: prot_inuse cleanups and optimizations 1) Cleanups (all functions are prefixed by sock_prot_inuse) sock_prot_inc_use(prot) -> sock_prot_inuse_add(prot,-1) sock_prot_dec_use(prot) -> sock_prot_inuse_add(prot,-1) sock_prot_inuse() -> sock_prot_inuse_get() New functions : sock_prot_inuse_init() and sock_prot_inuse_free() to abstract pcounter use. 2) if CONFIG_PROC_FS=n, we can zap 'inuse' member from "struct proto", since nobody wants to read the inuse value. This saves 1372 bytes on i386/SMP and some cpu cycles. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv6/inet6_hashtables.c | 4 ++-- net/ipv6/ipv6_sockglue.c | 8 ++++---- net/ipv6/proc.c | 8 ++++---- 3 files changed, 10 insertions(+), 10 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c index 0765d8bd380..a66a7d8e281 100644 --- a/net/ipv6/inet6_hashtables.c +++ b/net/ipv6/inet6_hashtables.c @@ -43,7 +43,7 @@ void __inet6_hash(struct inet_hashinfo *hashinfo, } __sk_add_node(sk, list); - sock_prot_inc_use(sk->sk_prot); + sock_prot_inuse_add(sk->sk_prot, 1); write_unlock(lock); } EXPORT_SYMBOL(__inet6_hash); @@ -216,7 +216,7 @@ unique: BUG_TRAP(sk_unhashed(sk)); __sk_add_node(sk, &head->chain); sk->sk_hash = hash; - sock_prot_inc_use(sk->sk_prot); + sock_prot_inuse_add(sk->sk_prot, 1); write_unlock(lock); if (twp != NULL) { diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index 20fece4ad3d..bf2a686aa13 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -268,8 +268,8 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname, struct inet_connection_sock *icsk = inet_csk(sk); local_bh_disable(); - sock_prot_dec_use(sk->sk_prot); - sock_prot_inc_use(&tcp_prot); + sock_prot_inuse_add(sk->sk_prot, -1); + sock_prot_inuse_add(&tcp_prot, 1); local_bh_enable(); sk->sk_prot = &tcp_prot; icsk->icsk_af_ops = &ipv4_specific; @@ -282,8 +282,8 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname, if (sk->sk_protocol == IPPROTO_UDPLITE) prot = &udplite_prot; local_bh_disable(); - sock_prot_dec_use(sk->sk_prot); - sock_prot_inc_use(prot); + sock_prot_inuse_add(sk->sk_prot, -1); + sock_prot_inuse_add(prot, 1); local_bh_enable(); sk->sk_prot = prot; sk->sk_socket->ops = &inet_dgram_ops; diff --git a/net/ipv6/proc.c b/net/ipv6/proc.c index 41e9980b3e0..571d95a21c1 100644 --- a/net/ipv6/proc.c +++ b/net/ipv6/proc.c @@ -36,13 +36,13 @@ static struct proc_dir_entry *proc_net_devsnmp6; static int sockstat6_seq_show(struct seq_file *seq, void *v) { seq_printf(seq, "TCP6: inuse %d\n", - sock_prot_inuse(&tcpv6_prot)); + sock_prot_inuse_get(&tcpv6_prot)); seq_printf(seq, "UDP6: inuse %d\n", - sock_prot_inuse(&udpv6_prot)); + sock_prot_inuse_get(&udpv6_prot)); seq_printf(seq, "UDPLITE6: inuse %d\n", - sock_prot_inuse(&udplitev6_prot)); + sock_prot_inuse_get(&udplitev6_prot)); seq_printf(seq, "RAW6: inuse %d\n", - sock_prot_inuse(&rawv6_prot)); + sock_prot_inuse_get(&rawv6_prot)); seq_printf(seq, "FRAG6: inuse %d memory %d\n", ip6_frag_nqueues(), ip6_frag_mem()); return 0; -- cgit v1.2.3 From 3d7cc2ba628dcc6b55a2bafc7eaf35019fdcc201 Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Wed, 9 Jan 2008 00:33:11 -0800 Subject: [NETFILTER]: Switch to using ctl_paths in nf_queue and conntrack modules This includes the most simple cases for netfilter. The first part is tne queue modules for ipv4 and ipv6, on which the net/ipv4/ and net/ipv6/ paths are reused from the appropriate ipv4 and ipv6 code. The conntrack module is also patched, but this hunk is very small and simple. Signed-off-by: Pavel Emelyanov Acked-by: Patrick McHardy Signed-off-by: David S. Miller --- net/ipv6/netfilter/ip6_queue.c | 22 +--------------------- net/ipv6/sysctl_net_ipv6.c | 6 ++++-- 2 files changed, 5 insertions(+), 23 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/netfilter/ip6_queue.c b/net/ipv6/netfilter/ip6_queue.c index e5b0059582f..a20db0bb5a1 100644 --- a/net/ipv6/netfilter/ip6_queue.c +++ b/net/ipv6/netfilter/ip6_queue.c @@ -529,26 +529,6 @@ static ctl_table ipq_table[] = { { .ctl_name = 0 } }; -static ctl_table ipq_dir_table[] = { - { - .ctl_name = NET_IPV6, - .procname = "ipv6", - .mode = 0555, - .child = ipq_table - }, - { .ctl_name = 0 } -}; - -static ctl_table ipq_root_table[] = { - { - .ctl_name = CTL_NET, - .procname = "net", - .mode = 0555, - .child = ipq_dir_table - }, - { .ctl_name = 0 } -}; - static int ip6_queue_show(struct seq_file *m, void *v) { read_lock_bh(&queue_lock); @@ -614,7 +594,7 @@ static int __init ip6_queue_init(void) } register_netdevice_notifier(&ipq_dev_notifier); - ipq_sysctl_header = register_sysctl_table(ipq_root_table); + ipq_sysctl_header = register_sysctl_paths(net_ipv6_ctl_path, ipq_table); status = nf_register_queue_handler(PF_INET6, &nfqh); if (status < 0) { diff --git a/net/ipv6/sysctl_net_ipv6.c b/net/ipv6/sysctl_net_ipv6.c index 0b5bec3cb79..4ad8d9d3cb7 100644 --- a/net/ipv6/sysctl_net_ipv6.c +++ b/net/ipv6/sysctl_net_ipv6.c @@ -82,17 +82,19 @@ static ctl_table ipv6_table[] = { { .ctl_name = 0 } }; -static struct ctl_path ipv6_ctl_path[] = { +struct ctl_path net_ipv6_ctl_path[] = { { .procname = "net", .ctl_name = CTL_NET, }, { .procname = "ipv6", .ctl_name = NET_IPV6, }, { }, }; +EXPORT_SYMBOL_GPL(net_ipv6_ctl_path); static struct ctl_table_header *ipv6_sysctl_header; void ipv6_sysctl_register(void) { - ipv6_sysctl_header = register_sysctl_paths(ipv6_ctl_path, ipv6_table); + ipv6_sysctl_header = register_sysctl_paths(net_ipv6_ctl_path, + ipv6_table); } void ipv6_sysctl_unregister(void) -- cgit v1.2.3 From 291480c09a9452a3d8852a9bfeb5ba2cbcfe662c Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Thu, 10 Jan 2008 02:47:55 -0800 Subject: [NETNS][IPV6]: Make ipv6_sysctl_register to return a value. This patch makes the function ipv6_sysctl_register to return a value. The af_inet6 init function is now able to handle an error and catch it from the initialization of the sysctl. Signed-off-by: Daniel Lezcano Signed-off-by: David S. Miller --- net/ipv6/af_inet6.c | 5 ++++- net/ipv6/sysctl_net_ipv6.c | 9 +++++++-- 2 files changed, 11 insertions(+), 3 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 34c20533ba5..a2842400a09 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -783,7 +783,9 @@ static int __init inet6_init(void) */ #ifdef CONFIG_SYSCTL - ipv6_sysctl_register(); + err = ipv6_sysctl_register(); + if (err) + goto sysctl_fail; #endif err = icmpv6_init(&inet6_family_ops); if (err) @@ -897,6 +899,7 @@ ndisc_fail: icmp_fail: #ifdef CONFIG_SYSCTL ipv6_sysctl_unregister(); +sysctl_fail: #endif cleanup_ipv6_mibs(); out_unregister_sock: diff --git a/net/ipv6/sysctl_net_ipv6.c b/net/ipv6/sysctl_net_ipv6.c index 4ad8d9d3cb7..f713fbf34c2 100644 --- a/net/ipv6/sysctl_net_ipv6.c +++ b/net/ipv6/sysctl_net_ipv6.c @@ -91,10 +91,15 @@ EXPORT_SYMBOL_GPL(net_ipv6_ctl_path); static struct ctl_table_header *ipv6_sysctl_header; -void ipv6_sysctl_register(void) +int ipv6_sysctl_register(void) { ipv6_sysctl_header = register_sysctl_paths(net_ipv6_ctl_path, - ipv6_table); + ipv6_table); + if (!ipv6_sysctl_header) + return -ENOMEM; + + return 0; + } void ipv6_sysctl_unregister(void) -- cgit v1.2.3 From 81c1c17804ac52036e07b0ba95cb637bdd4784d6 Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Thu, 10 Jan 2008 02:48:33 -0800 Subject: [NETNS][IPV6]: Make a subsystem for af_inet6. This patch add a network namespace subsystem for the af_inet6 module. It does nothing right now, but one of its purpose is to receive the different variables for sysctl in order to initialize them. When the sysctl variable will be moved to the network namespace structure, they will be no longer initialized as global static variables, so we must find a place to initialize them. Because the sysctl can be disabled, it has no sense to store them in the sysctl_net_ipv6 file. Signed-off-by: Daniel Lezcano Signed-off-by: David S. Miller --- net/ipv6/af_inet6.c | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) (limited to 'net/ipv6') diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index a2842400a09..72b898fe2dd 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -719,6 +719,21 @@ static void cleanup_ipv6_mibs(void) snmp_mib_free((void **)udplite_stats_in6); } +static int inet6_net_init(struct net *net) +{ + return 0; +} + +static void inet6_net_exit(struct net *net) +{ + return; +} + +static struct pernet_operations inet6_net_ops = { + .init = inet6_net_init, + .exit = inet6_net_exit, +}; + static int __init inet6_init(void) { struct sk_buff *dummy_skb; @@ -782,6 +797,10 @@ static int __init inet6_init(void) * able to communicate via both network protocols. */ + err = register_pernet_subsys(&inet6_net_ops); + if (err) + goto register_pernet_fail; + #ifdef CONFIG_SYSCTL err = ipv6_sysctl_register(); if (err) @@ -901,6 +920,8 @@ icmp_fail: ipv6_sysctl_unregister(); sysctl_fail: #endif + unregister_pernet_subsys(&inet6_net_ops); +register_pernet_fail: cleanup_ipv6_mibs(); out_unregister_sock: sock_unregister(PF_INET6); @@ -956,6 +977,7 @@ static void __exit inet6_exit(void) #ifdef CONFIG_SYSCTL ipv6_sysctl_unregister(); #endif + unregister_pernet_subsys(&inet6_net_ops); cleanup_ipv6_mibs(); proto_unregister(&rawv6_prot); proto_unregister(&udplitev6_prot); -- cgit v1.2.3 From 89918fc270bb77cb1a0703f0ea566a692b32e324 Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Thu, 10 Jan 2008 02:49:34 -0800 Subject: [NETNS][IPV6]: Make the ipv6 sysctl to be a netns subsystem. The initialization of the sysctl for the ipv6 protocol is changed to a network namespace subsystem. That means when a new network namespace is created the initialization function for the sysctl will be called. That do not change the behavior of the sysctl in case of the kernel with the network namespace disabled. Signed-off-by: Daniel Lezcano Signed-off-by: David S. Miller --- net/ipv6/sysctl_net_ipv6.c | 23 +++++++++++++++++++---- 1 file changed, 19 insertions(+), 4 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/sysctl_net_ipv6.c b/net/ipv6/sysctl_net_ipv6.c index f713fbf34c2..7329decf1f9 100644 --- a/net/ipv6/sysctl_net_ipv6.c +++ b/net/ipv6/sysctl_net_ipv6.c @@ -91,10 +91,10 @@ EXPORT_SYMBOL_GPL(net_ipv6_ctl_path); static struct ctl_table_header *ipv6_sysctl_header; -int ipv6_sysctl_register(void) +static int ipv6_sysctl_net_init(struct net *net) { - ipv6_sysctl_header = register_sysctl_paths(net_ipv6_ctl_path, - ipv6_table); + ipv6_sysctl_header = register_net_sysctl_table(net, net_ipv6_ctl_path, + ipv6_table); if (!ipv6_sysctl_header) return -ENOMEM; @@ -102,7 +102,22 @@ int ipv6_sysctl_register(void) } +static void ipv6_sysctl_net_exit(struct net *net) +{ + unregister_net_sysctl_table(ipv6_sysctl_header); +} + +static struct pernet_operations ipv6_sysctl_net_ops = { + .init = ipv6_sysctl_net_init, + .exit = ipv6_sysctl_net_exit, +}; + +int ipv6_sysctl_register(void) +{ + return register_pernet_subsys(&ipv6_sysctl_net_ops); +} + void ipv6_sysctl_unregister(void) { - unregister_sysctl_table(ipv6_sysctl_header); + unregister_pernet_subsys(&ipv6_sysctl_net_ops); } -- cgit v1.2.3 From 760f2d0186225f06d46e07232d65219c5055cad3 Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Thu, 10 Jan 2008 02:53:43 -0800 Subject: [NETNS][IPV6]: Make multiple instance of sysctl tables. Each network namespace wants its own set of sysctl value, eg. we should not be able from a namespace to set a sysctl value for another namespace , especially for the initial network namespace. This patch duplicates the sysctl table when we register a new network namespace for ipv6. The duplicated table are postfixed with the "template" word to notify the developper the table is cloned. Signed-off-by: Daniel Lezcano Signed-off-by: David S. Miller --- net/ipv6/icmp.c | 12 ++++++++- net/ipv6/route.c | 11 +++++++- net/ipv6/sysctl_net_ipv6.c | 67 +++++++++++++++++++++++++++++++++++++++------- 3 files changed, 78 insertions(+), 12 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index c3bbd868730..dfe3b37c43e 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -907,7 +907,7 @@ int icmpv6_err_convert(int type, int code, int *err) EXPORT_SYMBOL(icmpv6_err_convert); #ifdef CONFIG_SYSCTL -ctl_table ipv6_icmp_table[] = { +ctl_table ipv6_icmp_table_template[] = { { .ctl_name = NET_IPV6_ICMP_RATELIMIT, .procname = "ratelimit", @@ -918,5 +918,15 @@ ctl_table ipv6_icmp_table[] = { }, { .ctl_name = 0 }, }; + +struct ctl_table *ipv6_icmp_sysctl_init(struct net *net) +{ + struct ctl_table *table; + + table = kmemdup(ipv6_icmp_table_template, + sizeof(ipv6_icmp_table_template), + GFP_KERNEL); + return table; +} #endif diff --git a/net/ipv6/route.c b/net/ipv6/route.c index b80ef578420..0c7382f4fb8 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -2409,7 +2409,7 @@ int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write, struct file * filp, return -EINVAL; } -ctl_table ipv6_route_table[] = { +ctl_table ipv6_route_table_template[] = { { .procname = "flush", .data = &flush_delay, @@ -2499,6 +2499,15 @@ ctl_table ipv6_route_table[] = { { .ctl_name = 0 } }; +struct ctl_table *ipv6_route_sysctl_init(struct net *net) +{ + struct ctl_table *table; + + table = kmemdup(ipv6_route_table_template, + sizeof(ipv6_route_table_template), + GFP_KERNEL); + return table; +} #endif int __init ip6_route_init(void) diff --git a/net/ipv6/sysctl_net_ipv6.c b/net/ipv6/sysctl_net_ipv6.c index 7329decf1f9..7970f3366f8 100644 --- a/net/ipv6/sysctl_net_ipv6.c +++ b/net/ipv6/sysctl_net_ipv6.c @@ -14,20 +14,23 @@ #include #include -static ctl_table ipv6_table[] = { +extern struct ctl_table *ipv6_route_sysctl_init(struct net *net); +extern struct ctl_table *ipv6_icmp_sysctl_init(struct net *net); + +static ctl_table ipv6_table_template[] = { { .ctl_name = NET_IPV6_ROUTE, .procname = "route", .maxlen = 0, .mode = 0555, - .child = ipv6_route_table + .child = ipv6_route_table_template }, { .ctl_name = NET_IPV6_ICMP, .procname = "icmp", .maxlen = 0, .mode = 0555, - .child = ipv6_icmp_table + .child = ipv6_icmp_table_template }, { .ctl_name = NET_IPV6_BINDV6ONLY, @@ -89,22 +92,66 @@ struct ctl_path net_ipv6_ctl_path[] = { }; EXPORT_SYMBOL_GPL(net_ipv6_ctl_path); -static struct ctl_table_header *ipv6_sysctl_header; - static int ipv6_sysctl_net_init(struct net *net) { - ipv6_sysctl_header = register_net_sysctl_table(net, net_ipv6_ctl_path, - ipv6_table); - if (!ipv6_sysctl_header) + struct ctl_table *ipv6_table; + struct ctl_table *ipv6_route_table; + struct ctl_table *ipv6_icmp_table; + int err; + + err = -ENOMEM; + ipv6_table = kmemdup(ipv6_table_template, sizeof(ipv6_table_template), + GFP_KERNEL); + if (!ipv6_table) + goto out; + + ipv6_route_table = ipv6_route_sysctl_init(net); + if (!ipv6_route_table) + goto out_ipv6_table; + + ipv6_icmp_table = ipv6_icmp_sysctl_init(net); + if (!ipv6_icmp_table) + goto out_ipv6_route_table; + + ipv6_table[0].child = ipv6_route_table; + ipv6_table[1].child = ipv6_icmp_table; + + net->ipv6.sysctl.table = register_net_sysctl_table(net, net_ipv6_ctl_path, + ipv6_table); + if (!net->ipv6.sysctl.table) return -ENOMEM; - return 0; + if (!net->ipv6.sysctl.table) + goto out_ipv6_icmp_table; + + err = 0; +out: + return err; +out_ipv6_icmp_table: + kfree(ipv6_icmp_table); +out_ipv6_route_table: + kfree(ipv6_route_table); +out_ipv6_table: + kfree(ipv6_table); + goto out; } static void ipv6_sysctl_net_exit(struct net *net) { - unregister_net_sysctl_table(ipv6_sysctl_header); + struct ctl_table *ipv6_table; + struct ctl_table *ipv6_route_table; + struct ctl_table *ipv6_icmp_table; + + ipv6_table = net->ipv6.sysctl.table->ctl_table_arg; + ipv6_route_table = ipv6_table[0].child; + ipv6_icmp_table = ipv6_table[1].child; + + unregister_net_sysctl_table(net->ipv6.sysctl.table); + + kfree(ipv6_table); + kfree(ipv6_route_table); + kfree(ipv6_icmp_table); } static struct pernet_operations ipv6_sysctl_net_ops = { -- cgit v1.2.3 From 99bc9c4e45e7e783cf0b0a25cc03a103c038f254 Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Thu, 10 Jan 2008 02:54:53 -0800 Subject: [NETNS][IPV6]: Make bindv6only sysctl per namespace. This patch moves the bindv6only sysctl to the network namespace structure. Until the ipv6 protocol is not per namespace, the sysctl variable is always from the initial network namespace. Signed-off-by: Daniel Lezcano Signed-off-by: David S. Miller --- net/ipv6/af_inet6.c | 5 ++--- net/ipv6/sysctl_net_ipv6.c | 4 +++- 2 files changed, 5 insertions(+), 4 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 72b898fe2dd..70662bf8ab9 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -66,8 +66,6 @@ MODULE_AUTHOR("Cast of dozens"); MODULE_DESCRIPTION("IPv6 protocol stack for Linux"); MODULE_LICENSE("GPL"); -int sysctl_ipv6_bindv6only __read_mostly; - /* The inetsw6 table contains everything that inet6_create needs to * build a new socket. */ @@ -193,7 +191,7 @@ lookup_protocol: np->mcast_hops = -1; np->mc_loop = 1; np->pmtudisc = IPV6_PMTUDISC_WANT; - np->ipv6only = sysctl_ipv6_bindv6only; + np->ipv6only = init_net.ipv6.sysctl.bindv6only; /* Init the ipv4 part of the socket since we can have sockets * using v6 API for ipv4. @@ -721,6 +719,7 @@ static void cleanup_ipv6_mibs(void) static int inet6_net_init(struct net *net) { + net->ipv6.sysctl.bindv6only = 0; return 0; } diff --git a/net/ipv6/sysctl_net_ipv6.c b/net/ipv6/sysctl_net_ipv6.c index 7970f3366f8..13be97a928c 100644 --- a/net/ipv6/sysctl_net_ipv6.c +++ b/net/ipv6/sysctl_net_ipv6.c @@ -35,7 +35,7 @@ static ctl_table ipv6_table_template[] = { { .ctl_name = NET_IPV6_BINDV6ONLY, .procname = "bindv6only", - .data = &sysctl_ipv6_bindv6only, + .data = &init_net.ipv6.sysctl.bindv6only, .maxlen = sizeof(int), .mode = 0644, .proc_handler = &proc_dointvec @@ -116,6 +116,8 @@ static int ipv6_sysctl_net_init(struct net *net) ipv6_table[0].child = ipv6_route_table; ipv6_table[1].child = ipv6_icmp_table; + ipv6_table[2].data = &net->ipv6.sysctl.bindv6only; + net->ipv6.sysctl.table = register_net_sysctl_table(net, net_ipv6_ctl_path, ipv6_table); if (!net->ipv6.sysctl.table) -- cgit v1.2.3 From e71e0349eb32bc438fa80d8990c6f3592967d111 Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Thu, 10 Jan 2008 02:56:03 -0800 Subject: [NETNS][IPV6]: Make ip6_frags per namespace. The ip6_frags is moved to the network namespace structure. Because there can be multiple instances of the network namespaces, and the ip6_frags is no longer a global static variable, a helper function has been added to facilitate the initialization of the variables. Until the ipv6 protocol is not per namespace, the variables are accessed relatively from the initial network namespace. Signed-off-by: Daniel Lezcano Signed-off-by: David S. Miller --- net/ipv6/af_inet6.c | 8 ++++++++ net/ipv6/reassembly.c | 16 +++++++--------- net/ipv6/sysctl_net_ipv6.c | 12 ++++++++---- 3 files changed, 23 insertions(+), 13 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 70662bf8ab9..c4a1882fa80 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -72,6 +72,8 @@ MODULE_LICENSE("GPL"); static struct list_head inetsw6[SOCK_MAX]; static DEFINE_SPINLOCK(inetsw6_lock); +void ipv6_frag_sysctl_init(struct net *net); + static __inline__ struct ipv6_pinfo *inet6_sk_generic(struct sock *sk) { const int offset = sk->sk_prot->obj_size - sizeof(struct ipv6_pinfo); @@ -720,6 +722,12 @@ static void cleanup_ipv6_mibs(void) static int inet6_net_init(struct net *net) { net->ipv6.sysctl.bindv6only = 0; + net->ipv6.sysctl.frags.high_thresh = 256 * 1024; + net->ipv6.sysctl.frags.low_thresh = 192 * 1024; + net->ipv6.sysctl.frags.timeout = IPV6_FRAG_TIMEOUT; + net->ipv6.sysctl.frags.secret_interval = 10 * 60 * HZ; + ipv6_frag_sysctl_init(net); + return 0; } diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c index bf4173daecb..5cd0bc693a5 100644 --- a/net/ipv6/reassembly.c +++ b/net/ipv6/reassembly.c @@ -82,13 +82,6 @@ struct frag_queue __u16 nhoffset; }; -struct inet_frags_ctl ip6_frags_ctl __read_mostly = { - .high_thresh = 256 * 1024, - .low_thresh = 192 * 1024, - .timeout = IPV6_FRAG_TIMEOUT, - .secret_interval = 10 * 60 * HZ, -}; - static struct inet_frags ip6_frags; int ip6_frag_nqueues(void) @@ -605,7 +598,7 @@ static int ipv6_frag_rcv(struct sk_buff *skb) return 1; } - if (atomic_read(&ip6_frags.mem) > ip6_frags_ctl.high_thresh) + if (atomic_read(&ip6_frags.mem) > init_net.ipv6.sysctl.frags.high_thresh) ip6_evictor(ip6_dst_idev(skb->dst)); if ((fq = fq_find(fhdr->identification, &hdr->saddr, &hdr->daddr, @@ -632,6 +625,11 @@ static struct inet6_protocol frag_protocol = .flags = INET6_PROTO_NOPOLICY, }; +void ipv6_frag_sysctl_init(struct net *net) +{ + ip6_frags.ctl = &net->ipv6.sysctl.frags; +} + int __init ipv6_frag_init(void) { int ret; @@ -639,7 +637,7 @@ int __init ipv6_frag_init(void) ret = inet6_add_protocol(&frag_protocol, IPPROTO_FRAGMENT); if (ret) goto out; - ip6_frags.ctl = &ip6_frags_ctl; + ip6_frags.hashfn = ip6_hashfn; ip6_frags.constructor = ip6_frag_init; ip6_frags.destructor = NULL; diff --git a/net/ipv6/sysctl_net_ipv6.c b/net/ipv6/sysctl_net_ipv6.c index 13be97a928c..ae3cfd1b8e0 100644 --- a/net/ipv6/sysctl_net_ipv6.c +++ b/net/ipv6/sysctl_net_ipv6.c @@ -43,7 +43,7 @@ static ctl_table ipv6_table_template[] = { { .ctl_name = NET_IPV6_IP6FRAG_HIGH_THRESH, .procname = "ip6frag_high_thresh", - .data = &ip6_frags_ctl.high_thresh, + .data = &init_net.ipv6.sysctl.frags.high_thresh, .maxlen = sizeof(int), .mode = 0644, .proc_handler = &proc_dointvec @@ -51,7 +51,7 @@ static ctl_table ipv6_table_template[] = { { .ctl_name = NET_IPV6_IP6FRAG_LOW_THRESH, .procname = "ip6frag_low_thresh", - .data = &ip6_frags_ctl.low_thresh, + .data = &init_net.ipv6.sysctl.frags.low_thresh, .maxlen = sizeof(int), .mode = 0644, .proc_handler = &proc_dointvec @@ -59,7 +59,7 @@ static ctl_table ipv6_table_template[] = { { .ctl_name = NET_IPV6_IP6FRAG_TIME, .procname = "ip6frag_time", - .data = &ip6_frags_ctl.timeout, + .data = &init_net.ipv6.sysctl.frags.timeout, .maxlen = sizeof(int), .mode = 0644, .proc_handler = &proc_dointvec_jiffies, @@ -68,7 +68,7 @@ static ctl_table ipv6_table_template[] = { { .ctl_name = NET_IPV6_IP6FRAG_SECRET_INTERVAL, .procname = "ip6frag_secret_interval", - .data = &ip6_frags_ctl.secret_interval, + .data = &init_net.ipv6.sysctl.frags.secret_interval, .maxlen = sizeof(int), .mode = 0644, .proc_handler = &proc_dointvec_jiffies, @@ -117,6 +117,10 @@ static int ipv6_sysctl_net_init(struct net *net) ipv6_table[1].child = ipv6_icmp_table; ipv6_table[2].data = &net->ipv6.sysctl.bindv6only; + ipv6_table[3].data = &net->ipv6.sysctl.frags.high_thresh; + ipv6_table[4].data = &net->ipv6.sysctl.frags.low_thresh; + ipv6_table[5].data = &net->ipv6.sysctl.frags.timeout; + ipv6_table[6].data = &net->ipv6.sysctl.frags.secret_interval; net->ipv6.sysctl.table = register_net_sysctl_table(net, net_ipv6_ctl_path, ipv6_table); -- cgit v1.2.3 From 7c76509d0da99f29289b9b7ab134791e45d49b15 Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Thu, 10 Jan 2008 02:57:43 -0800 Subject: [NETNS][IPV6]: Make mld_max_msf readonly in other namespaces. The mld_max_msf protects the system with a maximum allowed multicast source filters. Making this variable per namespace can be potentially an problem if someone inside a namespace set it to a big value, that will impact the whole system including other namespaces. I don't see any benefits to have it per namespace for now, so in order to keep a directory entry in a newly created namespace, I make it read-only when we are not in the initial network namespace. Signed-off-by: Daniel Lezcano Signed-off-by: David S. Miller --- net/ipv6/sysctl_net_ipv6.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'net/ipv6') diff --git a/net/ipv6/sysctl_net_ipv6.c b/net/ipv6/sysctl_net_ipv6.c index ae3cfd1b8e0..d223159638d 100644 --- a/net/ipv6/sysctl_net_ipv6.c +++ b/net/ipv6/sysctl_net_ipv6.c @@ -122,6 +122,12 @@ static int ipv6_sysctl_net_init(struct net *net) ipv6_table[5].data = &net->ipv6.sysctl.frags.timeout; ipv6_table[6].data = &net->ipv6.sysctl.frags.secret_interval; + /* We don't want this value to be per namespace, it should be global + to all namespaces, so make it read-only when we are not in the + init network namespace */ + if (net != &init_net) + ipv6_table[7].mode = 0444; + net->ipv6.sysctl.table = register_net_sysctl_table(net, net_ipv6_ctl_path, ipv6_table); if (!net->ipv6.sysctl.table) -- cgit v1.2.3 From 4990509f19e8f1e000a83a88fc46328f73b8a88a Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Thu, 10 Jan 2008 03:01:01 -0800 Subject: [NETNS][IPV6]: Make sysctls route per namespace. All the sysctl concerning the routes are moved to the network namespace structure. A helper function is called to initialize the variables. Because the ipv6 protocol is not yet per namespace, the variables are accessed relatively from the network namespace. Signed-off-by: Daniel Lezcano Signed-off-by: David S. Miller --- net/ipv6/af_inet6.c | 8 ++++++++ net/ipv6/ip6_fib.c | 14 ++++++++----- net/ipv6/route.c | 49 +++++++++++++++++++--------------------------- net/ipv6/sysctl_net_ipv6.c | 11 +++++++++++ 4 files changed, 48 insertions(+), 34 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index c4a1882fa80..3aea84a1822 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -726,6 +726,14 @@ static int inet6_net_init(struct net *net) net->ipv6.sysctl.frags.low_thresh = 192 * 1024; net->ipv6.sysctl.frags.timeout = IPV6_FRAG_TIMEOUT; net->ipv6.sysctl.frags.secret_interval = 10 * 60 * HZ; + net->ipv6.sysctl.flush_delay = 0; + net->ipv6.sysctl.ip6_rt_max_size = 4096; + net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2; + net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ; + net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ; + net->ipv6.sysctl.ip6_rt_gc_elasticity = 9; + net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ; + net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40; ipv6_frag_sysctl_init(net); return 0; diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 7165a5e90f4..0e83164aa3e 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -681,13 +681,15 @@ static __inline__ void fib6_start_gc(struct rt6_info *rt) { if (ip6_fib_timer.expires == 0 && (rt->rt6i_flags & (RTF_EXPIRES|RTF_CACHE))) - mod_timer(&ip6_fib_timer, jiffies + ip6_rt_gc_interval); + mod_timer(&ip6_fib_timer, jiffies + + init_net.ipv6.sysctl.ip6_rt_gc_interval); } void fib6_force_start_gc(void) { if (ip6_fib_timer.expires == 0) - mod_timer(&ip6_fib_timer, jiffies + ip6_rt_gc_interval); + mod_timer(&ip6_fib_timer, jiffies + + init_net.ipv6.sysctl.ip6_rt_gc_interval); } /* @@ -1447,7 +1449,8 @@ void fib6_run_gc(unsigned long dummy) { if (dummy != ~0UL) { spin_lock_bh(&fib6_gc_lock); - gc_args.timeout = dummy ? (int)dummy : ip6_rt_gc_interval; + gc_args.timeout = dummy ? (int)dummy : + init_net.ipv6.sysctl.ip6_rt_gc_interval; } else { local_bh_disable(); if (!spin_trylock(&fib6_gc_lock)) { @@ -1455,7 +1458,7 @@ void fib6_run_gc(unsigned long dummy) local_bh_enable(); return; } - gc_args.timeout = ip6_rt_gc_interval; + gc_args.timeout = init_net.ipv6.sysctl.ip6_rt_gc_interval; } gc_args.more = 0; @@ -1463,7 +1466,8 @@ void fib6_run_gc(unsigned long dummy) fib6_clean_all(fib6_age, 0, NULL); if (gc_args.more) - mod_timer(&ip6_fib_timer, jiffies + ip6_rt_gc_interval); + mod_timer(&ip6_fib_timer, jiffies + + init_net.ipv6.sysctl.ip6_rt_gc_interval); else { del_timer(&ip6_fib_timer); ip6_fib_timer.expires = 0; diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 0c7382f4fb8..d2b3cf695af 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -73,14 +73,6 @@ #define CLONE_OFFLINK_ROUTE 0 -static int ip6_rt_max_size = 4096; -static int ip6_rt_gc_min_interval = HZ / 2; -static int ip6_rt_gc_timeout = 60*HZ; -int ip6_rt_gc_interval = 30*HZ; -static int ip6_rt_gc_elasticity = 9; -static int ip6_rt_mtu_expires = 10*60*HZ; -static int ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40; - static struct rt6_info * ip6_rt_copy(struct rt6_info *ort); static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie); static struct dst_entry *ip6_negative_advice(struct dst_entry *); @@ -894,8 +886,8 @@ static inline unsigned int ipv6_advmss(unsigned int mtu) { mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr); - if (mtu < ip6_rt_min_advmss) - mtu = ip6_rt_min_advmss; + if (mtu < init_net.ipv6.sysctl.ip6_rt_min_advmss) + mtu = init_net.ipv6.sysctl.ip6_rt_min_advmss; /* * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and @@ -995,19 +987,19 @@ static int ip6_dst_gc(void) static unsigned long last_gc; unsigned long now = jiffies; - if (time_after(last_gc + ip6_rt_gc_min_interval, now) && - atomic_read(&ip6_dst_ops.entries) <= ip6_rt_max_size) + if (time_after(last_gc + init_net.ipv6.sysctl.ip6_rt_gc_min_interval, now) && + atomic_read(&ip6_dst_ops.entries) <= init_net.ipv6.sysctl.ip6_rt_max_size) goto out; expire++; fib6_run_gc(expire); last_gc = now; if (atomic_read(&ip6_dst_ops.entries) < ip6_dst_ops.gc_thresh) - expire = ip6_rt_gc_timeout>>1; + expire = init_net.ipv6.sysctl.ip6_rt_gc_timeout>>1; out: - expire -= expire>>ip6_rt_gc_elasticity; - return (atomic_read(&ip6_dst_ops.entries) > ip6_rt_max_size); + expire -= expire>>init_net.ipv6.sysctl.ip6_rt_gc_elasticity; + return (atomic_read(&ip6_dst_ops.entries) > init_net.ipv6.sysctl.ip6_rt_max_size); } /* Clean host part of a prefix. Not necessary in radix tree, @@ -1513,7 +1505,7 @@ void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr, rt->u.dst.metrics[RTAX_MTU-1] = pmtu; if (allfrag) rt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG; - dst_set_expires(&rt->u.dst, ip6_rt_mtu_expires); + dst_set_expires(&rt->u.dst, init_net.ipv6.sysctl.ip6_rt_mtu_expires); rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES; goto out; } @@ -1539,7 +1531,7 @@ void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr, * which is 10 mins. After 10 mins the decreased pmtu is expired * and detecting PMTU increase will be automatically happened. */ - dst_set_expires(&nrt->u.dst, ip6_rt_mtu_expires); + dst_set_expires(&nrt->u.dst, init_net.ipv6.sysctl.ip6_rt_mtu_expires); nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES; ip6_ins_rt(nrt); @@ -2395,15 +2387,14 @@ static inline void ipv6_route_proc_fini(struct net *net) #ifdef CONFIG_SYSCTL -static int flush_delay; - static int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write, struct file * filp, void __user *buffer, size_t *lenp, loff_t *ppos) { + int delay = init_net.ipv6.sysctl.flush_delay; if (write) { proc_dointvec(ctl, write, filp, buffer, lenp, ppos); - fib6_run_gc(flush_delay <= 0 ? ~0UL : (unsigned long)flush_delay); + fib6_run_gc(delay <= 0 ? ~0UL : (unsigned long)delay); return 0; } else return -EINVAL; @@ -2412,7 +2403,7 @@ int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write, struct file * filp, ctl_table ipv6_route_table_template[] = { { .procname = "flush", - .data = &flush_delay, + .data = &init_net.ipv6.sysctl.flush_delay, .maxlen = sizeof(int), .mode = 0200, .proc_handler = &ipv6_sysctl_rtcache_flush @@ -2428,7 +2419,7 @@ ctl_table ipv6_route_table_template[] = { { .ctl_name = NET_IPV6_ROUTE_MAX_SIZE, .procname = "max_size", - .data = &ip6_rt_max_size, + .data = &init_net.ipv6.sysctl.ip6_rt_max_size, .maxlen = sizeof(int), .mode = 0644, .proc_handler = &proc_dointvec, @@ -2436,7 +2427,7 @@ ctl_table ipv6_route_table_template[] = { { .ctl_name = NET_IPV6_ROUTE_GC_MIN_INTERVAL, .procname = "gc_min_interval", - .data = &ip6_rt_gc_min_interval, + .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval, .maxlen = sizeof(int), .mode = 0644, .proc_handler = &proc_dointvec_jiffies, @@ -2445,7 +2436,7 @@ ctl_table ipv6_route_table_template[] = { { .ctl_name = NET_IPV6_ROUTE_GC_TIMEOUT, .procname = "gc_timeout", - .data = &ip6_rt_gc_timeout, + .data = &init_net.ipv6.sysctl.ip6_rt_gc_timeout, .maxlen = sizeof(int), .mode = 0644, .proc_handler = &proc_dointvec_jiffies, @@ -2454,7 +2445,7 @@ ctl_table ipv6_route_table_template[] = { { .ctl_name = NET_IPV6_ROUTE_GC_INTERVAL, .procname = "gc_interval", - .data = &ip6_rt_gc_interval, + .data = &init_net.ipv6.sysctl.ip6_rt_gc_interval, .maxlen = sizeof(int), .mode = 0644, .proc_handler = &proc_dointvec_jiffies, @@ -2463,7 +2454,7 @@ ctl_table ipv6_route_table_template[] = { { .ctl_name = NET_IPV6_ROUTE_GC_ELASTICITY, .procname = "gc_elasticity", - .data = &ip6_rt_gc_elasticity, + .data = &init_net.ipv6.sysctl.ip6_rt_gc_elasticity, .maxlen = sizeof(int), .mode = 0644, .proc_handler = &proc_dointvec_jiffies, @@ -2472,7 +2463,7 @@ ctl_table ipv6_route_table_template[] = { { .ctl_name = NET_IPV6_ROUTE_MTU_EXPIRES, .procname = "mtu_expires", - .data = &ip6_rt_mtu_expires, + .data = &init_net.ipv6.sysctl.ip6_rt_mtu_expires, .maxlen = sizeof(int), .mode = 0644, .proc_handler = &proc_dointvec_jiffies, @@ -2481,7 +2472,7 @@ ctl_table ipv6_route_table_template[] = { { .ctl_name = NET_IPV6_ROUTE_MIN_ADVMSS, .procname = "min_adv_mss", - .data = &ip6_rt_min_advmss, + .data = &init_net.ipv6.sysctl.ip6_rt_min_advmss, .maxlen = sizeof(int), .mode = 0644, .proc_handler = &proc_dointvec_jiffies, @@ -2490,7 +2481,7 @@ ctl_table ipv6_route_table_template[] = { { .ctl_name = NET_IPV6_ROUTE_GC_MIN_INTERVAL_MS, .procname = "gc_min_interval_ms", - .data = &ip6_rt_gc_min_interval, + .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval, .maxlen = sizeof(int), .mode = 0644, .proc_handler = &proc_dointvec_ms_jiffies, diff --git a/net/ipv6/sysctl_net_ipv6.c b/net/ipv6/sysctl_net_ipv6.c index d223159638d..b4ba422f271 100644 --- a/net/ipv6/sysctl_net_ipv6.c +++ b/net/ipv6/sysctl_net_ipv6.c @@ -113,7 +113,18 @@ static int ipv6_sysctl_net_init(struct net *net) if (!ipv6_icmp_table) goto out_ipv6_route_table; + ipv6_route_table[0].data = &net->ipv6.sysctl.flush_delay; + /* ipv6_route_table[1].data will be handled when we have + routes per namespace */ + ipv6_route_table[2].data = &net->ipv6.sysctl.ip6_rt_max_size; + ipv6_route_table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval; + ipv6_route_table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout; + ipv6_route_table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval; + ipv6_route_table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity; + ipv6_route_table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires; + ipv6_route_table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss; ipv6_table[0].child = ipv6_route_table; + ipv6_table[1].child = ipv6_icmp_table; ipv6_table[2].data = &net->ipv6.sysctl.bindv6only; -- cgit v1.2.3 From 41a76906b3225997036efd88cbaae69d60b1e947 Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Thu, 10 Jan 2008 03:02:40 -0800 Subject: [NETNS][IPV6]: Make icmpv6_time sysctl per namespace. This patch moves the icmpv6_time sysctl to the network namespace structure. Because the ipv6 protocol is not yet per namespace, the variable is accessed relatively to the initial network namespace. Signed-off-by: Daniel Lezcano Signed-off-by: David S. Miller --- net/ipv6/af_inet6.c | 1 + net/ipv6/icmp.c | 6 ++---- net/ipv6/sysctl_net_ipv6.c | 1 + 3 files changed, 4 insertions(+), 4 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 3aea84a1822..218b8b3050a 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -734,6 +734,7 @@ static int inet6_net_init(struct net *net) net->ipv6.sysctl.ip6_rt_gc_elasticity = 9; net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ; net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40; + net->ipv6.sysctl.icmpv6_time = 1*HZ; ipv6_frag_sysctl_init(net); return 0; diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index dfe3b37c43e..5395afe55ca 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -154,8 +154,6 @@ static int is_ineligible(struct sk_buff *skb) return 0; } -static int sysctl_icmpv6_time __read_mostly = 1*HZ; - /* * Check the ICMP output rate limit */ @@ -186,7 +184,7 @@ static inline int icmpv6_xrlim_allow(struct sock *sk, int type, res = 1; } else { struct rt6_info *rt = (struct rt6_info *)dst; - int tmo = sysctl_icmpv6_time; + int tmo = init_net.ipv6.sysctl.icmpv6_time; /* Give more bandwidth to wider prefixes. */ if (rt->rt6i_dst.plen < 128) @@ -911,7 +909,7 @@ ctl_table ipv6_icmp_table_template[] = { { .ctl_name = NET_IPV6_ICMP_RATELIMIT, .procname = "ratelimit", - .data = &sysctl_icmpv6_time, + .data = &init_net.ipv6.sysctl.icmpv6_time, .maxlen = sizeof(int), .mode = 0644, .proc_handler = &proc_dointvec diff --git a/net/ipv6/sysctl_net_ipv6.c b/net/ipv6/sysctl_net_ipv6.c index b4ba422f271..5e0af4d4632 100644 --- a/net/ipv6/sysctl_net_ipv6.c +++ b/net/ipv6/sysctl_net_ipv6.c @@ -125,6 +125,7 @@ static int ipv6_sysctl_net_init(struct net *net) ipv6_route_table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss; ipv6_table[0].child = ipv6_route_table; + ipv6_icmp_table[0].data = &net->ipv6.sysctl.icmpv6_time; ipv6_table[1].child = ipv6_icmp_table; ipv6_table[2].data = &net->ipv6.sysctl.bindv6only; -- cgit v1.2.3 From f8c26b8d589867aed8251db2935f8aa03aa68717 Mon Sep 17 00:00:00 2001 From: "Denis V. Lunev" Date: Thu, 10 Jan 2008 03:17:29 -0800 Subject: [NETNS]: Add netns parameter to fib_rules_(un)register. The patch extends the different fib rules API in order to pass the network namespace pointer. That will allow to access the different tables from a namespace relative object. As usual, the pointer to the init_net variable is passed as parameter so we don't break the network. Acked-by: Benjamin Thery Acked-by: Daniel Lezcano Signed-off-by: Denis V. Lunev Signed-off-by: David S. Miller --- net/ipv6/fib6_rules.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c index 9ce2e0a6748..e4d7e5a08b4 100644 --- a/net/ipv6/fib6_rules.c +++ b/net/ipv6/fib6_rules.c @@ -273,7 +273,7 @@ int __init fib6_rules_init(void) if (ret) goto out; - ret = fib_rules_register(&fib6_rules_ops); + ret = fib_rules_register(&init_net, &fib6_rules_ops); if (ret) goto out_default_rules_init; out: @@ -286,5 +286,5 @@ out_default_rules_init: void fib6_rules_cleanup(void) { - fib_rules_unregister(&fib6_rules_ops); + fib_rules_unregister(&init_net, &fib6_rules_ops); } -- cgit v1.2.3 From 868d13ac811746e28e4c806f2b1bd8575796f9af Mon Sep 17 00:00:00 2001 From: "Denis V. Lunev" Date: Thu, 10 Jan 2008 03:18:25 -0800 Subject: [NETNS]: Pass fib_rules_ops into default_pref method. fib_rules_ops contains operations and the list of configured rules. ops will become per/namespace soon, so we need them to be known in the default_pref callback. Acked-by: Benjamin Thery Acked-by: Daniel Lezcano Signed-off-by: Denis V. Lunev Signed-off-by: David S. Miller --- net/ipv6/fib6_rules.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/ipv6') diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c index e4d7e5a08b4..76437a1fcab 100644 --- a/net/ipv6/fib6_rules.c +++ b/net/ipv6/fib6_rules.c @@ -223,7 +223,7 @@ nla_put_failure: return -ENOBUFS; } -static u32 fib6_rule_default_pref(void) +static u32 fib6_rule_default_pref(struct fib_rules_ops *ops) { return 0x3FFF; } -- cgit v1.2.3 From 6b175b26c1048d331508940ad3516ead1998084f Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Thu, 10 Jan 2008 03:25:28 -0800 Subject: [NETNS]: Add netns parameter to inet_(dev_)add_type. The patch extends the inet_addr_type and inet_dev_addr_type with the network namespace pointer. That allows to access the different tables relatively to the network namespace. The modification of the signature function is reported in all the callers of the inet_addr_type using the pointer to the well known init_net. Acked-by: Benjamin Thery Acked-by: Daniel Lezcano Signed-off-by: Eric W. Biederman Signed-off-by: David S. Miller --- net/ipv6/af_inet6.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/ipv6') diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 218b8b3050a..ac8772dd968 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -280,7 +280,7 @@ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) /* Check if the address belongs to the host. */ if (addr_type == IPV6_ADDR_MAPPED) { v4addr = addr->sin6_addr.s6_addr32[3]; - if (inet_addr_type(v4addr) != RTN_LOCAL) { + if (inet_addr_type(&init_net, v4addr) != RTN_LOCAL) { err = -EADDRNOTAVAIL; goto out; } -- cgit v1.2.3 From 4d1169c1e781e5853317c6b75620d678b2c4854e Mon Sep 17 00:00:00 2001 From: "Denis V. Lunev" Date: Thu, 10 Jan 2008 03:26:13 -0800 Subject: [NETNS]: Add netns to nl_info structure. nl_info is used to track the end-user destination of routing change notification. This is a natural object to hold a namespace on. Place it there and utilize the context in the appropriate places. Acked-by: Benjamin Thery Acked-by: Daniel Lezcano Signed-off-by: Denis V. Lunev Signed-off-by: David S. Miller --- net/ipv6/ip6_fib.c | 4 +++- net/ipv6/route.c | 8 ++++++-- 2 files changed, 9 insertions(+), 3 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 0e83164aa3e..f93407cf651 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -1314,7 +1314,9 @@ static int fib6_walk(struct fib6_walker_t *w) static int fib6_clean_node(struct fib6_walker_t *w) { - struct nl_info info = {}; + struct nl_info info = { + .nl_net = &init_net, + }; int res; struct rt6_info *rt; struct fib6_cleaner_t *c = container_of(w, struct fib6_cleaner_t, w); diff --git a/net/ipv6/route.c b/net/ipv6/route.c index d2b3cf695af..48c8d7cb902 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -601,7 +601,9 @@ static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info) int ip6_ins_rt(struct rt6_info *rt) { - struct nl_info info = {}; + struct nl_info info = { + .nl_net = &init_net, + }; return __ip6_ins_rt(rt, &info); } @@ -1259,7 +1261,9 @@ static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info) int ip6_del_rt(struct rt6_info *rt) { - struct nl_info info = {}; + struct nl_info info = { + .nl_net = &init_net, + }; return __ip6_del_rt(rt, &info); } -- cgit v1.2.3 From 408c4768cd0843f43a13a442c76215dd9cadf23d Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Thu, 10 Jan 2008 17:41:21 -0800 Subject: [NETNS]: Clean out the ipv6-related sysctls creation/destruction The addrconf sysctls and neigh sysctls are registered and unregistered always in pairs, so they can be joined into one (well, two) functions, that accept the struct inet6_dev and do all the job. This also get rids of unneeded ifdefs inside the code. Signed-off-by: Pavel Emelyanov Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 63 +++++++++++++++++++++++++++++------------------------ 1 file changed, 34 insertions(+), 29 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 6a48bb88f46..27b35ddeeab 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -102,7 +102,15 @@ #ifdef CONFIG_SYSCTL static void addrconf_sysctl_register(struct inet6_dev *idev); -static void addrconf_sysctl_unregister(struct ipv6_devconf *p); +static void addrconf_sysctl_unregister(struct inet6_dev *idev); +#else +static inline void addrconf_sysctl_register(struct inet6_dev *idev) +{ +} + +static inline void addrconf_sysctl_unregister(struct inet6_dev *idev) +{ +} #endif #ifdef CONFIG_IPV6_PRIVACY @@ -392,13 +400,7 @@ static struct inet6_dev * ipv6_add_dev(struct net_device *dev) ipv6_mc_init_dev(ndev); ndev->tstamp = jiffies; -#ifdef CONFIG_SYSCTL - neigh_sysctl_register(dev, ndev->nd_parms, NET_IPV6, - NET_IPV6_NEIGH, "ipv6", - &ndisc_ifinfo_sysctl_change, - NULL); addrconf_sysctl_register(ndev); -#endif /* protected by rtnl_lock */ rcu_assign_pointer(dev->ip6_ptr, ndev); @@ -2391,15 +2393,8 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event, case NETDEV_CHANGENAME: if (idev) { snmp6_unregister_dev(idev); -#ifdef CONFIG_SYSCTL - addrconf_sysctl_unregister(&idev->cnf); - neigh_sysctl_unregister(idev->nd_parms); - neigh_sysctl_register(dev, idev->nd_parms, - NET_IPV6, NET_IPV6_NEIGH, "ipv6", - &ndisc_ifinfo_sysctl_change, - NULL); + addrconf_sysctl_unregister(idev); addrconf_sysctl_register(idev); -#endif err = snmp6_register_dev(idev); if (err) return notifier_from_errno(err); @@ -2523,10 +2518,7 @@ static int addrconf_ifdown(struct net_device *dev, int how) /* Shot the device (if unregistered) */ if (how == 1) { -#ifdef CONFIG_SYSCTL - addrconf_sysctl_unregister(&idev->cnf); - neigh_sysctl_unregister(idev->nd_parms); -#endif + addrconf_sysctl_unregister(idev); neigh_parms_release(&nd_tbl, idev->nd_parms); neigh_ifdown(&nd_tbl, dev); in6_dev_put(idev); @@ -4106,21 +4098,34 @@ out: return; } +static void __addrconf_sysctl_unregister(struct ipv6_devconf *p) +{ + struct addrconf_sysctl_table *t; + + if (p->sysctl == NULL) + return; + + t = p->sysctl; + p->sysctl = NULL; + unregister_sysctl_table(t->sysctl_header); + kfree(t->dev_name); + kfree(t); +} + static void addrconf_sysctl_register(struct inet6_dev *idev) { + neigh_sysctl_register(idev->dev, idev->nd_parms, NET_IPV6, + NET_IPV6_NEIGH, "ipv6", + &ndisc_ifinfo_sysctl_change, + NULL); __addrconf_sysctl_register(idev->dev->name, idev->dev->ifindex, idev, &idev->cnf); } -static void addrconf_sysctl_unregister(struct ipv6_devconf *p) +static void addrconf_sysctl_unregister(struct inet6_dev *idev) { - if (p->sysctl) { - struct addrconf_sysctl_table *t = p->sysctl; - p->sysctl = NULL; - unregister_sysctl_table(t->sysctl_header); - kfree(t->dev_name); - kfree(t); - } + __addrconf_sysctl_unregister(&idev->cnf); + neigh_sysctl_unregister(idev->nd_parms); } @@ -4232,8 +4237,8 @@ void addrconf_cleanup(void) unregister_netdevice_notifier(&ipv6_dev_notf); #ifdef CONFIG_SYSCTL - addrconf_sysctl_unregister(&ipv6_devconf_dflt); - addrconf_sysctl_unregister(&ipv6_devconf); + __addrconf_sysctl_unregister(&ipv6_devconf_dflt); + __addrconf_sysctl_unregister(&ipv6_devconf); #endif rtnl_lock(); -- cgit v1.2.3 From 9589731220edfebeb6a05c52d0838a99dee20893 Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Thu, 10 Jan 2008 17:41:45 -0800 Subject: [NETNS]: Make the __addrconf_sysctl_register return an error This error code will be needed to abort the namespace creation if needed. Probably, this is to be checked when a new device is created (currently it is ignored). Signed-off-by: Pavel Emelyanov Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 27b35ddeeab..18d43349013 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -4044,7 +4044,7 @@ static struct addrconf_sysctl_table }, }; -static void __addrconf_sysctl_register(char *dev_name, int ctl_name, +static int __addrconf_sysctl_register(char *dev_name, int ctl_name, struct inet6_dev *idev, struct ipv6_devconf *p) { int i; @@ -4088,14 +4088,14 @@ static void __addrconf_sysctl_register(char *dev_name, int ctl_name, goto free_procname; p->sysctl = t; - return; + return 0; free_procname: kfree(t->dev_name); free: kfree(t); out: - return; + return -ENOBUFS; } static void __addrconf_sysctl_unregister(struct ipv6_devconf *p) -- cgit v1.2.3 From bff16c2f991386883dc81ec969ba15eb270a0c7f Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Thu, 10 Jan 2008 17:42:13 -0800 Subject: [NETNS]: Make the ctl-tables per-namespace This includes passing the net to __addrconf_sysctl_register and saving this on the ctl_table->extra2 to be used in handlers (those, needing it). Signed-off-by: Pavel Emelyanov Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 18d43349013..bde50c68672 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -456,13 +456,13 @@ static void dev_forward_change(struct inet6_dev *idev) } -static void addrconf_forward_change(void) +static void addrconf_forward_change(struct net *net) { struct net_device *dev; struct inet6_dev *idev; read_lock(&dev_base_lock); - for_each_netdev(&init_net, dev) { + for_each_netdev(net, dev) { rcu_read_lock(); idev = __in6_dev_get(dev); if (idev) { @@ -478,12 +478,15 @@ static void addrconf_forward_change(void) static void addrconf_fixup_forwarding(struct ctl_table *table, int *p, int old) { + struct net *net; + + net = (struct net *)table->extra2; if (p == &ipv6_devconf_dflt.forwarding) return; if (p == &ipv6_devconf.forwarding) { ipv6_devconf_dflt.forwarding = ipv6_devconf.forwarding; - addrconf_forward_change(); + addrconf_forward_change(net); } else if ((!*p) ^ (!old)) dev_forward_change((struct inet6_dev *)table->extra1); @@ -4044,8 +4047,8 @@ static struct addrconf_sysctl_table }, }; -static int __addrconf_sysctl_register(char *dev_name, int ctl_name, - struct inet6_dev *idev, struct ipv6_devconf *p) +static int __addrconf_sysctl_register(struct net *net, char *dev_name, + int ctl_name, struct inet6_dev *idev, struct ipv6_devconf *p) { int i; struct addrconf_sysctl_table *t; @@ -4068,6 +4071,7 @@ static int __addrconf_sysctl_register(char *dev_name, int ctl_name, for (i=0; t->addrconf_vars[i].data; i++) { t->addrconf_vars[i].data += (char*)p - (char*)&ipv6_devconf; t->addrconf_vars[i].extra1 = idev; /* embedded; no ref */ + t->addrconf_vars[i].extra2 = net; } /* @@ -4082,7 +4086,7 @@ static int __addrconf_sysctl_register(char *dev_name, int ctl_name, addrconf_ctl_path[ADDRCONF_CTL_PATH_DEV].procname = t->dev_name; addrconf_ctl_path[ADDRCONF_CTL_PATH_DEV].ctl_name = ctl_name; - t->sysctl_header = register_sysctl_paths(addrconf_ctl_path, + t->sysctl_header = register_net_sysctl_table(net, addrconf_ctl_path, t->addrconf_vars); if (t->sysctl_header == NULL) goto free_procname; @@ -4118,8 +4122,8 @@ static void addrconf_sysctl_register(struct inet6_dev *idev) NET_IPV6_NEIGH, "ipv6", &ndisc_ifinfo_sysctl_change, NULL); - __addrconf_sysctl_register(idev->dev->name, idev->dev->ifindex, - idev, &idev->cnf); + __addrconf_sysctl_register(idev->dev->nd_net, idev->dev->name, + idev->dev->ifindex, idev, &idev->cnf); } static void addrconf_sysctl_unregister(struct inet6_dev *idev) @@ -4215,9 +4219,9 @@ int __init addrconf_init(void) ipv6_addr_label_rtnl_register(); #ifdef CONFIG_SYSCTL - __addrconf_sysctl_register("all", NET_PROTO_CONF_ALL, + __addrconf_sysctl_register(&init_net, "all", NET_PROTO_CONF_ALL, NULL, &ipv6_devconf); - __addrconf_sysctl_register("default", NET_PROTO_CONF_DEFAULT, + __addrconf_sysctl_register(&init_net, "default", NET_PROTO_CONF_DEFAULT, NULL, &ipv6_devconf_dflt); #endif -- cgit v1.2.3 From e0da5a480cafc7ca228d6b5a05dbd77344a6bd29 Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Thu, 10 Jan 2008 17:42:55 -0800 Subject: [NETNS]: Create ipv6 devconf-s for namespaces This is the core. Declare and register the pernet subsys for addrconf. The init callback the will create the devconf-s. The init_net will reuse the existing statically declared confs, so that accessing them from inside the ipv6 code will still work. The register_pernet_subsys() is moved above the ipv6_add_dev() call for loopback, because this function will need the net->devconf_dflt pointer to be already set. Signed-off-by: Pavel Emelyanov Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 82 +++++++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 70 insertions(+), 12 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index bde50c68672..3ad081e9366 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -4135,6 +4135,70 @@ static void addrconf_sysctl_unregister(struct inet6_dev *idev) #endif +static int addrconf_init_net(struct net *net) +{ + int err; + struct ipv6_devconf *all, *dflt; + + err = -ENOMEM; + all = &ipv6_devconf; + dflt = &ipv6_devconf_dflt; + + if (net != &init_net) { + all = kmemdup(all, sizeof(ipv6_devconf), GFP_KERNEL); + if (all == NULL) + goto err_alloc_all; + + dflt = kmemdup(dflt, sizeof(ipv6_devconf_dflt), GFP_KERNEL); + if (dflt == NULL) + goto err_alloc_dflt; + } + + net->ipv6.devconf_all = all; + net->ipv6.devconf_dflt = dflt; + +#ifdef CONFIG_SYSCTL + err = __addrconf_sysctl_register(net, "all", NET_PROTO_CONF_ALL, + NULL, all); + if (err < 0) + goto err_reg_all; + + err = __addrconf_sysctl_register(net, "default", NET_PROTO_CONF_DEFAULT, + NULL, dflt); + if (err < 0) + goto err_reg_dflt; +#endif + return 0; + +#ifdef CONFIG_SYSCTL +err_reg_dflt: + __addrconf_sysctl_unregister(all); +err_reg_all: + kfree(dflt); +#endif +err_alloc_dflt: + kfree(all); +err_alloc_all: + return err; +} + +static void addrconf_exit_net(struct net *net) +{ +#ifdef CONFIG_SYSCTL + __addrconf_sysctl_unregister(net->ipv6.devconf_dflt); + __addrconf_sysctl_unregister(net->ipv6.devconf_all); +#endif + if (net != &init_net) { + kfree(net->ipv6.devconf_dflt); + kfree(net->ipv6.devconf_all); + } +} + +static struct pernet_operations addrconf_ops = { + .init = addrconf_init_net, + .exit = addrconf_exit_net, +}; + /* * Device notifier */ @@ -4167,6 +4231,8 @@ int __init addrconf_init(void) return err; } + register_pernet_subsys(&addrconf_ops); + /* The addrconf netdev notifier requires that loopback_dev * has it's ipv6 private information allocated and setup * before it can bring up and give link-local addresses @@ -4190,7 +4256,7 @@ int __init addrconf_init(void) err = -ENOMEM; rtnl_unlock(); if (err) - return err; + goto errlo; ip6_null_entry.u.dst.dev = init_net.loopback_dev; ip6_null_entry.rt6i_idev = in6_dev_get(init_net.loopback_dev); @@ -4218,16 +4284,11 @@ int __init addrconf_init(void) ipv6_addr_label_rtnl_register(); -#ifdef CONFIG_SYSCTL - __addrconf_sysctl_register(&init_net, "all", NET_PROTO_CONF_ALL, - NULL, &ipv6_devconf); - __addrconf_sysctl_register(&init_net, "default", NET_PROTO_CONF_DEFAULT, - NULL, &ipv6_devconf_dflt); -#endif - return 0; errout: unregister_netdevice_notifier(&ipv6_dev_notf); +errlo: + unregister_pernet_subsys(&addrconf_ops); return err; } @@ -4240,10 +4301,7 @@ void addrconf_cleanup(void) unregister_netdevice_notifier(&ipv6_dev_notf); -#ifdef CONFIG_SYSCTL - __addrconf_sysctl_unregister(&ipv6_devconf_dflt); - __addrconf_sysctl_unregister(&ipv6_devconf); -#endif + unregister_pernet_subsys(&addrconf_ops); rtnl_lock(); -- cgit v1.2.3 From 441fc2a2393a9b9ffbacb97f4427cce743579411 Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Thu, 10 Jan 2008 17:43:22 -0800 Subject: [NETNS]: Use the per-net ipv6_devconf_dflt All its users are in net/ipv6/addrconf.c's sysctl handlers. Since they already have the struct net to get from, the per-net ipv6_devconf_dflt can already be used. Signed-off-by: Pavel Emelyanov Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 3ad081e9366..9b96de3ba5e 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -334,7 +334,7 @@ static struct inet6_dev * ipv6_add_dev(struct net_device *dev) rwlock_init(&ndev->lock); ndev->dev = dev; - memcpy(&ndev->cnf, &ipv6_devconf_dflt, sizeof(ndev->cnf)); + memcpy(&ndev->cnf, dev->nd_net->ipv6.devconf_dflt, sizeof(ndev->cnf)); ndev->cnf.mtu6 = dev->mtu; ndev->cnf.sysctl = NULL; ndev->nd_parms = neigh_parms_alloc(dev, &nd_tbl); @@ -481,11 +481,11 @@ static void addrconf_fixup_forwarding(struct ctl_table *table, int *p, int old) struct net *net; net = (struct net *)table->extra2; - if (p == &ipv6_devconf_dflt.forwarding) + if (p == &net->ipv6.devconf_dflt->forwarding) return; if (p == &ipv6_devconf.forwarding) { - ipv6_devconf_dflt.forwarding = ipv6_devconf.forwarding; + net->ipv6.devconf_dflt->forwarding = ipv6_devconf.forwarding; addrconf_forward_change(net); } else if ((!*p) ^ (!old)) dev_forward_change((struct inet6_dev *)table->extra1); -- cgit v1.2.3 From e186932b3d26bd975022a1e254407e95dddceae7 Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Thu, 10 Jan 2008 17:43:50 -0800 Subject: [NETNS]: Use the per-net ipv6_devconf(_all) in sysctl handlers Actually the net->ipv6.devconf_all can be used in a few places, but to keep the /proc/sys/net/ipv6/conf/ sysctls work consistently in the namespace we should use the per-net devconf_all in the sysctl "forwarding" handler. Signed-off-by: Pavel Emelyanov Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 9b96de3ba5e..cd90f9a6da3 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -456,7 +456,7 @@ static void dev_forward_change(struct inet6_dev *idev) } -static void addrconf_forward_change(struct net *net) +static void addrconf_forward_change(struct net *net, __s32 newf) { struct net_device *dev; struct inet6_dev *idev; @@ -466,8 +466,8 @@ static void addrconf_forward_change(struct net *net) rcu_read_lock(); idev = __in6_dev_get(dev); if (idev) { - int changed = (!idev->cnf.forwarding) ^ (!ipv6_devconf.forwarding); - idev->cnf.forwarding = ipv6_devconf.forwarding; + int changed = (!idev->cnf.forwarding) ^ (!newf); + idev->cnf.forwarding = newf; if (changed) dev_forward_change(idev); } @@ -484,9 +484,10 @@ static void addrconf_fixup_forwarding(struct ctl_table *table, int *p, int old) if (p == &net->ipv6.devconf_dflt->forwarding) return; - if (p == &ipv6_devconf.forwarding) { - net->ipv6.devconf_dflt->forwarding = ipv6_devconf.forwarding; - addrconf_forward_change(net); + if (p == &net->ipv6.devconf_all->forwarding) { + __s32 newf = net->ipv6.devconf_all->forwarding; + net->ipv6.devconf_dflt->forwarding = newf; + addrconf_forward_change(net, newf); } else if ((!*p) ^ (!old)) dev_forward_change((struct inet6_dev *)table->extra1); -- cgit v1.2.3 From 3c40090a0f5b69deecc5ca615f994957f949333d Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Thu, 10 Jan 2008 22:42:49 -0800 Subject: [NETNS][IPV6]: inet6_addr - isolate inet6 addresses from proc file Make /proc/net/if_inet6 show only inet6 addresses belonging to the namespace. Signed-off-by: Daniel Lezcano Signed-off-by: Benjamin Thery Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 41 +++++++++++++++++++++++++++++++++++------ 1 file changed, 35 insertions(+), 6 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index cd90f9a6da3..d7b440343e9 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -2736,6 +2736,7 @@ static void addrconf_dad_run(struct inet6_dev *idev) { #ifdef CONFIG_PROC_FS struct if6_iter_state { + struct seq_net_private p; int bucket; }; @@ -2743,9 +2744,13 @@ static struct inet6_ifaddr *if6_get_first(struct seq_file *seq) { struct inet6_ifaddr *ifa = NULL; struct if6_iter_state *state = seq->private; + struct net *net = state->p.net; for (state->bucket = 0; state->bucket < IN6_ADDR_HSIZE; ++state->bucket) { ifa = inet6_addr_lst[state->bucket]; + + while (ifa && ifa->idev->dev->nd_net != net) + ifa = ifa->lst_next; if (ifa) break; } @@ -2755,13 +2760,22 @@ static struct inet6_ifaddr *if6_get_first(struct seq_file *seq) static struct inet6_ifaddr *if6_get_next(struct seq_file *seq, struct inet6_ifaddr *ifa) { struct if6_iter_state *state = seq->private; + struct net *net = state->p.net; ifa = ifa->lst_next; try_again: + if (ifa) { + if (ifa->idev->dev->nd_net != net) { + ifa = ifa->lst_next; + goto try_again; + } + } + if (!ifa && ++state->bucket < IN6_ADDR_HSIZE) { ifa = inet6_addr_lst[state->bucket]; goto try_again; } + return ifa; } @@ -2818,8 +2832,8 @@ static const struct seq_operations if6_seq_ops = { static int if6_seq_open(struct inode *inode, struct file *file) { - return seq_open_private(file, &if6_seq_ops, - sizeof(struct if6_iter_state)); + return seq_open_net(inode, file, &if6_seq_ops, + sizeof(struct if6_iter_state)); } static const struct file_operations if6_fops = { @@ -2827,19 +2841,34 @@ static const struct file_operations if6_fops = { .open = if6_seq_open, .read = seq_read, .llseek = seq_lseek, - .release = seq_release_private, + .release = seq_release_net, }; -int __init if6_proc_init(void) +static int if6_proc_net_init(struct net *net) { - if (!proc_net_fops_create(&init_net, "if_inet6", S_IRUGO, &if6_fops)) + if (!proc_net_fops_create(net, "if_inet6", S_IRUGO, &if6_fops)) return -ENOMEM; return 0; } +static void if6_proc_net_exit(struct net *net) +{ + proc_net_remove(net, "if_inet6"); +} + +static struct pernet_operations if6_proc_net_ops = { + .init = if6_proc_net_init, + .exit = if6_proc_net_exit, +}; + +int __init if6_proc_init(void) +{ + return register_pernet_subsys(&if6_proc_net_ops); +} + void if6_proc_exit(void) { - proc_net_remove(&init_net, "if_inet6"); + unregister_pernet_subsys(&if6_proc_net_ops); } #endif /* CONFIG_PROC_FS */ -- cgit v1.2.3 From bfeade087005278fc8cafe230b7658a4f40c5acb Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Thu, 10 Jan 2008 22:43:18 -0800 Subject: [NETNS][IPV6]: inet6_addr - check ipv6 address per namespace When a new address is added, we must check if the new address does not already exists. This patch makes this check to be aware of a network namespace, so the check will look if the address already exists for the specified network namespace. While the addresses are browsed, the addresses which do not belong to the namespace are discarded. Signed-off-by: Daniel Lezcano Signed-off-by: Benjamin Thery Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 6 ++++-- net/ipv6/af_inet6.c | 3 ++- net/ipv6/anycast.c | 2 +- net/ipv6/datagram.c | 3 ++- net/ipv6/icmp.c | 2 +- net/ipv6/ip6_tunnel.c | 8 ++++---- net/ipv6/ndisc.c | 2 +- net/ipv6/raw.c | 3 ++- 8 files changed, 17 insertions(+), 12 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index d7b440343e9..f35c3df410d 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -1206,13 +1206,16 @@ static int ipv6_count_addresses(struct inet6_dev *idev) return cnt; } -int ipv6_chk_addr(struct in6_addr *addr, struct net_device *dev, int strict) +int ipv6_chk_addr(struct net *net, struct in6_addr *addr, + struct net_device *dev, int strict) { struct inet6_ifaddr * ifp; u8 hash = ipv6_addr_hash(addr); read_lock_bh(&addrconf_hash_lock); for(ifp = inet6_addr_lst[hash]; ifp; ifp=ifp->lst_next) { + if (ifp->idev->dev->nd_net != net) + continue; if (ipv6_addr_equal(&ifp->addr, addr) && !(ifp->flags&IFA_F_TENTATIVE)) { if (dev == NULL || ifp->idev->dev == dev || @@ -1223,7 +1226,6 @@ int ipv6_chk_addr(struct in6_addr *addr, struct net_device *dev, int strict) read_unlock_bh(&addrconf_hash_lock); return ifp != NULL; } - EXPORT_SYMBOL(ipv6_chk_addr); static diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index ac8772dd968..3150c4be3c0 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -314,7 +314,8 @@ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) */ v4addr = LOOPBACK4_IPV6; if (!(addr_type & IPV6_ADDR_MULTICAST)) { - if (!ipv6_chk_addr(&addr->sin6_addr, dev, 0)) { + if (!ipv6_chk_addr(&init_net, &addr->sin6_addr, + dev, 0)) { if (dev) dev_put(dev); err = -EADDRNOTAVAIL; diff --git a/net/ipv6/anycast.c b/net/ipv6/anycast.c index 5c4190060e7..9c7f83fbc3a 100644 --- a/net/ipv6/anycast.c +++ b/net/ipv6/anycast.c @@ -89,7 +89,7 @@ int ipv6_sock_ac_join(struct sock *sk, int ifindex, struct in6_addr *addr) return -EPERM; if (ipv6_addr_is_multicast(addr)) return -EINVAL; - if (ipv6_chk_addr(addr, NULL, 0)) + if (ipv6_chk_addr(&init_net, addr, NULL, 0)) return -EINVAL; pac = sock_kmalloc(sk, sizeof(struct ipv6_ac_socklist), GFP_KERNEL); diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index f49a06aa97d..94fa6ae77cf 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -549,7 +549,8 @@ int datagram_send_ctl(struct msghdr *msg, struct flowi *fl, return -ENODEV; } } - if (!ipv6_chk_addr(&src_info->ipi6_addr, dev, 0)) { + if (!ipv6_chk_addr(&init_net, &src_info->ipi6_addr, + dev, 0)) { if (dev) dev_put(dev); err = -EINVAL; diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index 5395afe55ca..cbb5b9cf84a 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -332,7 +332,7 @@ void icmpv6_send(struct sk_buff *skb, int type, int code, __u32 info, */ addr_type = ipv6_addr_type(&hdr->daddr); - if (ipv6_chk_addr(&hdr->daddr, skb->dev, 0)) + if (ipv6_chk_addr(&init_net, &hdr->daddr, skb->dev, 0)) saddr = &hdr->daddr; /* diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 29b5321e39c..425c9ae8b31 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -653,8 +653,8 @@ static inline int ip6_tnl_rcv_ctl(struct ip6_tnl *t) ldev = dev_get_by_index(&init_net, p->link); if ((ipv6_addr_is_multicast(&p->laddr) || - likely(ipv6_chk_addr(&p->laddr, ldev, 0))) && - likely(!ipv6_chk_addr(&p->raddr, NULL, 0))) + likely(ipv6_chk_addr(&init_net, &p->laddr, ldev, 0))) && + likely(!ipv6_chk_addr(&init_net, &p->raddr, NULL, 0))) ret = 1; if (ldev) @@ -788,12 +788,12 @@ static inline int ip6_tnl_xmit_ctl(struct ip6_tnl *t) if (p->link) ldev = dev_get_by_index(&init_net, p->link); - if (unlikely(!ipv6_chk_addr(&p->laddr, ldev, 0))) + if (unlikely(!ipv6_chk_addr(&init_net, &p->laddr, ldev, 0))) printk(KERN_WARNING "%s xmit: Local address not yet configured!\n", p->name); else if (!ipv6_addr_is_multicast(&p->raddr) && - unlikely(ipv6_chk_addr(&p->raddr, NULL, 0))) + unlikely(ipv6_chk_addr(&init_net, &p->raddr, NULL, 0))) printk(KERN_WARNING "%s xmit: Routing loop! " "Remote address found on this node!\n", diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index b66a1f81bd8..e217d3ff00f 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -653,7 +653,7 @@ static void ndisc_solicit(struct neighbour *neigh, struct sk_buff *skb) struct in6_addr *target = (struct in6_addr *)&neigh->primary_key; int probes = atomic_read(&neigh->probes); - if (skb && ipv6_chk_addr(&ipv6_hdr(skb)->saddr, dev, 1)) + if (skb && ipv6_chk_addr(&init_net, &ipv6_hdr(skb)->saddr, dev, 1)) saddr = &ipv6_hdr(skb)->saddr; if ((probes -= neigh->parms->ucast_probes) < 0) { diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index 45a580e843d..cb0b110a2ac 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -298,7 +298,8 @@ static int rawv6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len) v4addr = LOOPBACK4_IPV6; if (!(addr_type & IPV6_ADDR_MULTICAST)) { err = -EADDRNOTAVAIL; - if (!ipv6_chk_addr(&addr->sin6_addr, dev, 0)) { + if (!ipv6_chk_addr(&init_net, &addr->sin6_addr, + dev, 0)) { if (dev) dev_put(dev); goto out; -- cgit v1.2.3 From 06bfe655e7db7719c0eb51eb420fb9c2a6aa1e00 Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Thu, 10 Jan 2008 22:43:42 -0800 Subject: [NETNS][IPV6]: inet6_addr - ipv6_chk_same_addr namespace aware This patch makes ipv6_chk_same_addr function to be aware of the network namespace. The addresses not belonging to the network namespace are discarded. Signed-off-by: Daniel Lezcano Signed-off-by: Benjamin Thery Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index f35c3df410d..41cc31ee297 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -149,7 +149,8 @@ static void ipv6_ifa_notify(int event, struct inet6_ifaddr *ifa); static void inet6_prefix_notify(int event, struct inet6_dev *idev, struct prefix_info *pinfo); -static int ipv6_chk_same_addr(const struct in6_addr *addr, struct net_device *dev); +static int ipv6_chk_same_addr(struct net *net, const struct in6_addr *addr, + struct net_device *dev); static ATOMIC_NOTIFIER_HEAD(inet6addr_chain); @@ -560,7 +561,7 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr, int pfxlen, write_lock(&addrconf_hash_lock); /* Ignore adding duplicate addresses on an interface */ - if (ipv6_chk_same_addr(addr, idev->dev)) { + if (ipv6_chk_same_addr(&init_net, addr, idev->dev)) { ADBG(("ipv6_add_addr: already assigned\n")); err = -EEXIST; goto out; @@ -1229,12 +1230,15 @@ int ipv6_chk_addr(struct net *net, struct in6_addr *addr, EXPORT_SYMBOL(ipv6_chk_addr); static -int ipv6_chk_same_addr(const struct in6_addr *addr, struct net_device *dev) +int ipv6_chk_same_addr(struct net *net, const struct in6_addr *addr, + struct net_device *dev) { struct inet6_ifaddr * ifp; u8 hash = ipv6_addr_hash(addr); for(ifp = inet6_addr_lst[hash]; ifp; ifp=ifp->lst_next) { + if (ifp->idev->dev->nd_net != net) + continue; if (ipv6_addr_equal(&ifp->addr, addr)) { if (dev == NULL || ifp->idev->dev == dev) break; -- cgit v1.2.3 From 1cab3da6be6c7659f62d0d297b389cc0e48b2178 Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Thu, 10 Jan 2008 22:44:09 -0800 Subject: [NETNS][IPV6]: inet6_addr - ipv6_get_ifaddr namespace aware The inet6_addr_lst is browsed taking into account the network namespace specified as parameter. If an address does not belong to the specified namespace, it is ignored. Signed-off-by: Daniel Lezcano Signed-off-by: Benjamin Thery Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 11 +++++++---- net/ipv6/ip6_output.c | 3 ++- net/ipv6/ndisc.c | 9 +++++---- 3 files changed, 14 insertions(+), 9 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 41cc31ee297..c4df6cdff65 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -1247,13 +1247,16 @@ int ipv6_chk_same_addr(struct net *net, const struct in6_addr *addr, return ifp != NULL; } -struct inet6_ifaddr * ipv6_get_ifaddr(struct in6_addr *addr, struct net_device *dev, int strict) +struct inet6_ifaddr *ipv6_get_ifaddr(struct net *net, struct in6_addr *addr, + struct net_device *dev, int strict) { struct inet6_ifaddr * ifp; u8 hash = ipv6_addr_hash(addr); read_lock_bh(&addrconf_hash_lock); for(ifp = inet6_addr_lst[hash]; ifp; ifp=ifp->lst_next) { + if (ifp->idev->dev->nd_net != net) + continue; if (ipv6_addr_equal(&ifp->addr, addr)) { if (dev == NULL || ifp->idev->dev == dev || !(ifp->scope&(IFA_LINK|IFA_HOST) || strict)) { @@ -1739,7 +1742,7 @@ void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len) ok: - ifp = ipv6_get_ifaddr(&addr, dev, 1); + ifp = ipv6_get_ifaddr(&init_net, &addr, dev, 1); if (ifp == NULL && valid_lft) { int max_addresses = in6_dev->cnf.max_addresses; @@ -3135,7 +3138,7 @@ inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) /* We ignore other flags so far. */ ifa_flags = ifm->ifa_flags & (IFA_F_NODAD | IFA_F_HOMEADDRESS); - ifa = ipv6_get_ifaddr(pfx, dev, 1); + ifa = ipv6_get_ifaddr(net, pfx, dev, 1); if (ifa == NULL) { /* * It would be best to check for !NLM_F_CREATE here but @@ -3442,7 +3445,7 @@ static int inet6_rtm_getaddr(struct sk_buff *in_skb, struct nlmsghdr* nlh, if (ifm->ifa_index) dev = __dev_get_by_index(&init_net, ifm->ifa_index); - if ((ifa = ipv6_get_ifaddr(addr, dev, 1)) == NULL) { + if ((ifa = ipv6_get_ifaddr(net, addr, dev, 1)) == NULL) { err = -EADDRNOTAVAIL; goto errout; } diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index ba7c8aaf278..15c4f6cee3e 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -936,7 +936,8 @@ static int ip6_dst_lookup_tail(struct sock *sk, struct flowi fl_gw; int redirect; - ifp = ipv6_get_ifaddr(&fl->fl6_src, (*dst)->dev, 1); + ifp = ipv6_get_ifaddr(&init_net, &fl->fl6_src, + (*dst)->dev, 1); redirect = (ifp && ifp->flags & IFA_F_OPTIMISTIC); if (ifp) diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index e217d3ff00f..bdfc4ea6194 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -556,7 +556,7 @@ static void ndisc_send_na(struct net_device *dev, struct neighbour *neigh, }; /* for anycast or proxy, solicited_addr != src_addr */ - ifp = ipv6_get_ifaddr(solicited_addr, dev, 1); + ifp = ipv6_get_ifaddr(&init_net, solicited_addr, dev, 1); if (ifp) { src_addr = solicited_addr; if (ifp->flags & IFA_F_OPTIMISTIC) @@ -616,7 +616,8 @@ void ndisc_send_rs(struct net_device *dev, struct in6_addr *saddr, * suppress the inclusion of the sllao. */ if (send_sllao) { - struct inet6_ifaddr *ifp = ipv6_get_ifaddr(saddr, dev, 1); + struct inet6_ifaddr *ifp = ipv6_get_ifaddr(&init_net, saddr, + dev, 1); if (ifp) { if (ifp->flags & IFA_F_OPTIMISTIC) { send_sllao = 0; @@ -741,7 +742,7 @@ static void ndisc_recv_ns(struct sk_buff *skb) inc = ipv6_addr_is_multicast(daddr); - if ((ifp = ipv6_get_ifaddr(&msg->target, dev, 1)) != NULL) { + if ((ifp = ipv6_get_ifaddr(&init_net, &msg->target, dev, 1)) != NULL) { if (ifp->flags & (IFA_F_TENTATIVE|IFA_F_OPTIMISTIC)) { if (dad) { @@ -899,7 +900,7 @@ static void ndisc_recv_na(struct sk_buff *skb) return; } } - if ((ifp = ipv6_get_ifaddr(&msg->target, dev, 1))) { + if ((ifp = ipv6_get_ifaddr(&init_net, &msg->target, dev, 1))) { if (ifp->flags & IFA_F_TENTATIVE) { addrconf_dad_failure(ifp); return; -- cgit v1.2.3 From 389f661224cdbdf178553fb09a52dc6c8bf86890 Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Thu, 10 Jan 2008 22:44:40 -0800 Subject: [NETNS][IPV6]: inet6_addr - make ipv6_chk_home_addr namespace aware Looks if the address is belonging to the network namespace, otherwise discard the address for the check. Signed-off-by: Daniel Lezcano Signed-off-by: Benjamin Thery Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 4 +++- net/ipv6/exthdrs.c | 2 +- 2 files changed, 4 insertions(+), 2 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index c4df6cdff65..803caf1a389 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -2883,13 +2883,15 @@ void if6_proc_exit(void) #if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) /* Check if address is a home address configured on any interface. */ -int ipv6_chk_home_addr(struct in6_addr *addr) +int ipv6_chk_home_addr(struct net *net, struct in6_addr *addr) { int ret = 0; struct inet6_ifaddr * ifp; u8 hash = ipv6_addr_hash(addr); read_lock_bh(&addrconf_hash_lock); for (ifp = inet6_addr_lst[hash]; ifp; ifp = ifp->lst_next) { + if (ifp->idev->dev->nd_net != net) + continue; if (ipv6_addr_cmp(&ifp->addr, addr) == 0 && (ifp->flags & IFA_F_HOMEADDRESS)) { ret = 1; diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c index 2df34ed276f..3cd1c993d52 100644 --- a/net/ipv6/exthdrs.c +++ b/net/ipv6/exthdrs.c @@ -445,7 +445,7 @@ looped_back: kfree_skb(skb); return -1; } - if (!ipv6_chk_home_addr(addr)) { + if (!ipv6_chk_home_addr(&init_net, addr)) { IP6_INC_STATS_BH(ip6_dst_idev(skb->dst), IPSTATS_MIB_INADDRERRORS); kfree_skb(skb); -- cgit v1.2.3 From 50eb431d6e98189eb40606fcd4d03ecd8e168afa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= Date: Sat, 12 Jan 2008 03:21:00 -0800 Subject: [IPV6] route: kill some bloat MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit net/ipv6/route.c: ip6_pkt_prohibit_out | -130 ip6_pkt_discard | -261 ip6_pkt_discard_out | -130 ip6_pkt_prohibit | -261 4 functions changed, 782 bytes removed, diff: -782 net/ipv6/route.c: ip6_pkt_drop | +300 1 function changed, 300 bytes added, diff: +300 net/ipv6/route.o: 5 functions changed, 300 bytes added, 782 bytes removed, diff: -482 Signed-off-by: Ilpo Järvinen Signed-off-by: David S. Miller --- net/ipv6/route.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 48c8d7cb902..a429900d16a 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -1763,8 +1763,7 @@ int ipv6_route_ioctl(unsigned int cmd, void __user *arg) * Drop the packet on the floor */ -static inline int ip6_pkt_drop(struct sk_buff *skb, int code, - int ipstats_mib_noroutes) +static int ip6_pkt_drop(struct sk_buff *skb, int code, int ipstats_mib_noroutes) { int type; switch (ipstats_mib_noroutes) { -- cgit v1.2.3 From be185884b31093555dc10aa32efe0b73c835312e Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Mon, 14 Jan 2008 05:35:31 -0800 Subject: [NETNS][RAW]: Make ipv[46] raw sockets lookup namespaces aware. This requires just to pass the appropriate struct net pointer into __raw_v[46]_lookup and skip sockets that do not belong to a needed namespace. The proper net is get from skb->dev in all the cases. Signed-off-by: Pavel Emelyanov Signed-off-by: David S. Miller --- net/ipv6/raw.c | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index cb0b110a2ac..6f20086064b 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -76,8 +76,9 @@ static void raw_v6_unhash(struct sock *sk) } -static struct sock *__raw_v6_lookup(struct sock *sk, unsigned short num, - struct in6_addr *loc_addr, struct in6_addr *rmt_addr, int dif) +static struct sock *__raw_v6_lookup(struct net *net, struct sock *sk, + unsigned short num, struct in6_addr *loc_addr, + struct in6_addr *rmt_addr, int dif) { struct hlist_node *node; int is_multicast = ipv6_addr_is_multicast(loc_addr); @@ -86,6 +87,9 @@ static struct sock *__raw_v6_lookup(struct sock *sk, unsigned short num, if (inet_sk(sk)->num == num) { struct ipv6_pinfo *np = inet6_sk(sk); + if (sk->sk_net != net) + continue; + if (!ipv6_addr_any(&np->daddr) && !ipv6_addr_equal(&np->daddr, rmt_addr)) continue; @@ -165,6 +169,7 @@ static int ipv6_raw_deliver(struct sk_buff *skb, int nexthdr) struct sock *sk; int delivered = 0; __u8 hash; + struct net *net; saddr = &ipv6_hdr(skb)->saddr; daddr = saddr + 1; @@ -182,7 +187,8 @@ static int ipv6_raw_deliver(struct sk_buff *skb, int nexthdr) if (sk == NULL) goto out; - sk = __raw_v6_lookup(sk, nexthdr, daddr, saddr, IP6CB(skb)->iif); + net = skb->dev->nd_net; + sk = __raw_v6_lookup(net, sk, nexthdr, daddr, saddr, IP6CB(skb)->iif); while (sk) { int filtered; @@ -225,7 +231,7 @@ static int ipv6_raw_deliver(struct sk_buff *skb, int nexthdr) rawv6_rcv(sk, clone); } } - sk = __raw_v6_lookup(sk_next(sk), nexthdr, daddr, saddr, + sk = __raw_v6_lookup(net, sk_next(sk), nexthdr, daddr, saddr, IP6CB(skb)->iif); } out: @@ -359,6 +365,7 @@ void raw6_icmp_error(struct sk_buff *skb, int nexthdr, struct sock *sk; int hash; struct in6_addr *saddr, *daddr; + struct net *net; hash = nexthdr & (RAW_HTABLE_SIZE - 1); @@ -367,8 +374,9 @@ void raw6_icmp_error(struct sk_buff *skb, int nexthdr, if (sk != NULL) { saddr = &ipv6_hdr(skb)->saddr; daddr = &ipv6_hdr(skb)->daddr; + net = skb->dev->nd_net; - while ((sk = __raw_v6_lookup(sk, nexthdr, saddr, daddr, + while ((sk = __raw_v6_lookup(net, sk, nexthdr, saddr, daddr, IP6CB(skb)->iif))) { rawv6_err(sk, skb, NULL, type, code, inner_offset, info); -- cgit v1.2.3 From f51d599fbecf4881a36466f0355da6b0b346ea70 Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Mon, 14 Jan 2008 05:35:57 -0800 Subject: [NETNS][RAW]: Make /proc/net/raw(6) show per-namespace socket list. Pull the struct net pointer up to the showing functions to filter the sockets depending on their namespaces. Signed-off-by: Pavel Emelyanov Signed-off-by: David S. Miller --- net/ipv6/raw.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index 6f20086064b..026fa910cb7 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -1259,7 +1259,7 @@ static const struct seq_operations raw6_seq_ops = { static int raw6_seq_open(struct inode *inode, struct file *file) { - return raw_seq_open(file, &raw_v6_hashinfo, PF_INET6); + return raw_seq_open(inode, file, &raw_v6_hashinfo, PF_INET6); } static const struct file_operations raw6_seq_fops = { @@ -1267,7 +1267,7 @@ static const struct file_operations raw6_seq_fops = { .open = raw6_seq_open, .read = seq_read, .llseek = seq_lseek, - .release = seq_release_private, + .release = seq_release_net, }; int __init raw6_proc_init(void) -- cgit v1.2.3 From e5ba31f11f6cae785e893d5d10abd612fef0b6bc Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Mon, 14 Jan 2008 05:36:27 -0800 Subject: [NETNS][RAW]: Eliminate explicit init_net references. Happily, in all the rest places (->bind callbacks only), that require the struct net, we have a socket, so get the net from it. Signed-off-by: Pavel Emelyanov Signed-off-by: David S. Miller --- net/ipv6/raw.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index 026fa910cb7..970529e4754 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -291,7 +291,7 @@ static int rawv6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len) if (!sk->sk_bound_dev_if) goto out; - dev = dev_get_by_index(&init_net, sk->sk_bound_dev_if); + dev = dev_get_by_index(sk->sk_net, sk->sk_bound_dev_if); if (!dev) { err = -ENODEV; goto out; @@ -304,7 +304,7 @@ static int rawv6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len) v4addr = LOOPBACK4_IPV6; if (!(addr_type & IPV6_ADDR_MULTICAST)) { err = -EADDRNOTAVAIL; - if (!ipv6_chk_addr(&init_net, &addr->sin6_addr, + if (!ipv6_chk_addr(sk->sk_net, &addr->sin6_addr, dev, 0)) { if (dev) dev_put(dev); -- cgit v1.2.3 From a308da1627d711fd0c7542bfe892abc78d65d215 Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Mon, 14 Jan 2008 05:36:50 -0800 Subject: [NETNS][RAW]: Create the /proc/net/raw(6) in each namespace. To do so, just register the proper subsystem and create files in ->init callbacks. No other special per-namespace handling for raw sockets is required. Signed-off-by: Pavel Emelyanov Signed-off-by: David S. Miller --- net/ipv6/raw.c | 22 +++++++++++++++++++--- 1 file changed, 19 insertions(+), 3 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index 970529e4754..4d880551fe6 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -1270,16 +1270,32 @@ static const struct file_operations raw6_seq_fops = { .release = seq_release_net, }; -int __init raw6_proc_init(void) +static int raw6_init_net(struct net *net) { - if (!proc_net_fops_create(&init_net, "raw6", S_IRUGO, &raw6_seq_fops)) + if (!proc_net_fops_create(net, "raw6", S_IRUGO, &raw6_seq_fops)) return -ENOMEM; + return 0; } +static void raw6_exit_net(struct net *net) +{ + proc_net_remove(net, "raw6"); +} + +static struct pernet_operations raw6_net_ops = { + .init = raw6_init_net, + .exit = raw6_exit_net, +}; + +int __init raw6_proc_init(void) +{ + return register_pernet_subsys(&raw6_net_ops); +} + void raw6_proc_exit(void) { - proc_net_remove(&init_net, "raw6"); + unregister_pernet_subsys(&raw6_net_ops); } #endif /* CONFIG_PROC_FS */ -- cgit v1.2.3 From 8ce22fcab432313717d393c96ad35f0aee016e83 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Mon, 14 Jan 2008 23:31:36 -0800 Subject: [NETFILTER]: Remove some EXPERIMENTAL dependencies Most of the netfilter modules are not considered experimental anymore, the only ones I want to keep marked as EXPERIMENTAL are: - TCPOPTSTRIP target, which is brand new. - SANE helper, which is quite new. - CLUSTERIP target, which I believe hasn't had much testing despite being in the kernel for quite a long time. - SCTP match and conntrack protocol, which are a mess and need to be reviewed and cleaned up before I would trust them. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/ipv6/netfilter/Kconfig | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig index a6b4a9a1053..4fc0b023cfd 100644 --- a/net/ipv6/netfilter/Kconfig +++ b/net/ipv6/netfilter/Kconfig @@ -2,12 +2,12 @@ # IP netfilter configuration # -menu "IPv6: Netfilter Configuration (EXPERIMENTAL)" - depends on INET && IPV6 && NETFILTER && EXPERIMENTAL +menu "IPv6: Netfilter Configuration" + depends on INET && IPV6 && NETFILTER config NF_CONNTRACK_IPV6 - tristate "IPv6 connection tracking support (EXPERIMENTAL)" - depends on INET && IPV6 && EXPERIMENTAL && NF_CONNTRACK + tristate "IPv6 connection tracking support" + depends on INET && IPV6 && NF_CONNTRACK default m if NETFILTER_ADVANCED=n ---help--- Connection tracking keeps a record of what packets have passed @@ -22,7 +22,7 @@ config NF_CONNTRACK_IPV6 config IP6_NF_QUEUE tristate "IP6 Userspace queueing via NETLINK (OBSOLETE)" - depends on INET && IPV6 && NETFILTER && EXPERIMENTAL + depends on INET && IPV6 && NETFILTER depends on NETFILTER_ADVANCED ---help--- @@ -44,7 +44,7 @@ config IP6_NF_QUEUE config IP6_NF_IPTABLES tristate "IP6 tables support (required for filtering)" - depends on INET && IPV6 && EXPERIMENTAL + depends on INET && IPV6 select NETFILTER_XTABLES default m if NETFILTER_ADVANCED=n help -- cgit v1.2.3 From 2ae15b64e6a1608c840c60df38e8e5eef7b2b8c3 Mon Sep 17 00:00:00 2001 From: Jan Engelhardt Date: Mon, 14 Jan 2008 23:42:28 -0800 Subject: [NETFILTER]: Update modules' descriptions Updates the MODULE_DESCRIPTION() tags for all Netfilter modules, actually describing what the module does and not just "netfilter XYZ target". Signed-off-by: Jan Engelhardt Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/ipv6/netfilter/ip6t_HL.c | 2 +- net/ipv6/netfilter/ip6t_LOG.c | 2 +- net/ipv6/netfilter/ip6t_REJECT.c | 2 +- net/ipv6/netfilter/ip6t_ah.c | 2 +- net/ipv6/netfilter/ip6t_eui64.c | 2 +- net/ipv6/netfilter/ip6t_frag.c | 2 +- net/ipv6/netfilter/ip6t_hbh.c | 2 +- net/ipv6/netfilter/ip6t_hl.c | 2 +- net/ipv6/netfilter/ip6t_ipv6header.c | 2 +- net/ipv6/netfilter/ip6t_mh.c | 2 +- net/ipv6/netfilter/ip6t_rt.c | 2 +- 11 files changed, 11 insertions(+), 11 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/netfilter/ip6t_HL.c b/net/ipv6/netfilter/ip6t_HL.c index cefb4253711..d5f8fd5f29d 100644 --- a/net/ipv6/netfilter/ip6t_HL.c +++ b/net/ipv6/netfilter/ip6t_HL.c @@ -15,7 +15,7 @@ #include MODULE_AUTHOR("Maciej Soltysiak "); -MODULE_DESCRIPTION("IP6 tables Hop Limit modification module"); +MODULE_DESCRIPTION("Xtables: IPv6 Hop Limit field modification target"); MODULE_LICENSE("GPL"); static unsigned int diff --git a/net/ipv6/netfilter/ip6t_LOG.c b/net/ipv6/netfilter/ip6t_LOG.c index 474c2b12621..86a613810b6 100644 --- a/net/ipv6/netfilter/ip6t_LOG.c +++ b/net/ipv6/netfilter/ip6t_LOG.c @@ -26,7 +26,7 @@ #include MODULE_AUTHOR("Jan Rekorajski "); -MODULE_DESCRIPTION("IP6 tables LOG target module"); +MODULE_DESCRIPTION("Xtables: IPv6 packet logging to syslog"); MODULE_LICENSE("GPL"); struct in_device; diff --git a/net/ipv6/netfilter/ip6t_REJECT.c b/net/ipv6/netfilter/ip6t_REJECT.c index a951c2cb6de..b23baa635fe 100644 --- a/net/ipv6/netfilter/ip6t_REJECT.c +++ b/net/ipv6/netfilter/ip6t_REJECT.c @@ -31,7 +31,7 @@ #include MODULE_AUTHOR("Yasuyuki KOZAKAI "); -MODULE_DESCRIPTION("IP6 tables REJECT target module"); +MODULE_DESCRIPTION("Xtables: packet \"rejection\" target for IPv6"); MODULE_LICENSE("GPL"); /* Send RST reply */ diff --git a/net/ipv6/netfilter/ip6t_ah.c b/net/ipv6/netfilter/ip6t_ah.c index f5d08a8c011..429629fd63b 100644 --- a/net/ipv6/netfilter/ip6t_ah.c +++ b/net/ipv6/netfilter/ip6t_ah.c @@ -20,7 +20,7 @@ #include MODULE_LICENSE("GPL"); -MODULE_DESCRIPTION("IPv6 AH match"); +MODULE_DESCRIPTION("Xtables: IPv6 IPsec-AH match"); MODULE_AUTHOR("Andras Kis-Szabo "); /* Returns 1 if the spi is matched by the range, 0 otherwise */ diff --git a/net/ipv6/netfilter/ip6t_eui64.c b/net/ipv6/netfilter/ip6t_eui64.c index dd9e67df914..8f331f12b2e 100644 --- a/net/ipv6/netfilter/ip6t_eui64.c +++ b/net/ipv6/netfilter/ip6t_eui64.c @@ -15,7 +15,7 @@ #include #include -MODULE_DESCRIPTION("IPv6 EUI64 address checking match"); +MODULE_DESCRIPTION("Xtables: IPv6 EUI64 address match"); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Andras Kis-Szabo "); diff --git a/net/ipv6/netfilter/ip6t_frag.c b/net/ipv6/netfilter/ip6t_frag.c index ae8c714a80d..e2bbc63dba5 100644 --- a/net/ipv6/netfilter/ip6t_frag.c +++ b/net/ipv6/netfilter/ip6t_frag.c @@ -19,7 +19,7 @@ #include MODULE_LICENSE("GPL"); -MODULE_DESCRIPTION("IPv6 FRAG match"); +MODULE_DESCRIPTION("Xtables: IPv6 fragment match"); MODULE_AUTHOR("Andras Kis-Szabo "); /* Returns 1 if the id is matched by the range, 0 otherwise */ diff --git a/net/ipv6/netfilter/ip6t_hbh.c b/net/ipv6/netfilter/ip6t_hbh.c index b76e27dc73d..62e39ace058 100644 --- a/net/ipv6/netfilter/ip6t_hbh.c +++ b/net/ipv6/netfilter/ip6t_hbh.c @@ -21,7 +21,7 @@ #include MODULE_LICENSE("GPL"); -MODULE_DESCRIPTION("IPv6 opts match"); +MODULE_DESCRIPTION("Xtables: IPv6 Hop-By-Hop and Destination Header match"); MODULE_AUTHOR("Andras Kis-Szabo "); MODULE_ALIAS("ip6t_dst"); diff --git a/net/ipv6/netfilter/ip6t_hl.c b/net/ipv6/netfilter/ip6t_hl.c index 8f2d7d0ab40..34567167384 100644 --- a/net/ipv6/netfilter/ip6t_hl.c +++ b/net/ipv6/netfilter/ip6t_hl.c @@ -16,7 +16,7 @@ #include MODULE_AUTHOR("Maciej Soltysiak "); -MODULE_DESCRIPTION("IP tables Hop Limit matching module"); +MODULE_DESCRIPTION("Xtables: IPv6 Hop Limit field match"); MODULE_LICENSE("GPL"); static bool diff --git a/net/ipv6/netfilter/ip6t_ipv6header.c b/net/ipv6/netfilter/ip6t_ipv6header.c index ae497e7ac11..3a940171f82 100644 --- a/net/ipv6/netfilter/ip6t_ipv6header.c +++ b/net/ipv6/netfilter/ip6t_ipv6header.c @@ -23,7 +23,7 @@ #include MODULE_LICENSE("GPL"); -MODULE_DESCRIPTION("IPv6 headers match"); +MODULE_DESCRIPTION("Xtables: IPv6 header types match"); MODULE_AUTHOR("Andras Kis-Szabo "); static bool diff --git a/net/ipv6/netfilter/ip6t_mh.c b/net/ipv6/netfilter/ip6t_mh.c index 618e6b94b03..e06678d07ec 100644 --- a/net/ipv6/netfilter/ip6t_mh.c +++ b/net/ipv6/netfilter/ip6t_mh.c @@ -21,7 +21,7 @@ #include #include -MODULE_DESCRIPTION("ip6t_tables match for MH"); +MODULE_DESCRIPTION("Xtables: IPv6 Mobility Header match"); MODULE_LICENSE("GPL"); #ifdef DEBUG_IP_FIREWALL_USER diff --git a/net/ipv6/netfilter/ip6t_rt.c b/net/ipv6/netfilter/ip6t_rt.c index 038cea6407d..12a9efe9886 100644 --- a/net/ipv6/netfilter/ip6t_rt.c +++ b/net/ipv6/netfilter/ip6t_rt.c @@ -21,7 +21,7 @@ #include MODULE_LICENSE("GPL"); -MODULE_DESCRIPTION("IPv6 RT match"); +MODULE_DESCRIPTION("Xtables: IPv6 Routing Header match"); MODULE_AUTHOR("Andras Kis-Szabo "); /* Returns 1 if the id is matched by the range, 0 otherwise */ -- cgit v1.2.3 From 022748a9357c4c1a0113ec1ce5612f383b80156f Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Mon, 14 Jan 2008 23:44:05 -0800 Subject: [NETFILTER]: {ip,ip6}_tables: remove some inlines This patch removes inlines except those which are used by packet matching code and thus are performance-critical. Before: $ size */*/*/ip*tables*.o text data bss dec hex filename 6402 500 16 6918 1b06 net/ipv4/netfilter/ip_tables.o 7130 500 16 7646 1dde net/ipv6/netfilter/ip6_tables.o After: $ size */*/*/ip*tables*.o text data bss dec hex filename 6307 500 16 6823 1aa7 net/ipv4/netfilter/ip_tables.o 7010 500 16 7526 1d66 net/ipv6/netfilter/ip6_tables.o Signed-off-by: Denys Vlasenko Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/ipv6/netfilter/ip6_tables.c | 42 ++++++++++++++++++++++------------------- 1 file changed, 23 insertions(+), 19 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index 4ed16d254b9..dd7860fea61 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -90,6 +90,7 @@ ip6t_ext_hdr(u8 nexthdr) } /* Returns whether matches rule or not. */ +/* Performance critical - called for every packet */ static inline bool ip6_packet_match(const struct sk_buff *skb, const char *indev, @@ -182,7 +183,7 @@ ip6_packet_match(const struct sk_buff *skb, } /* should be ip6 safe */ -static inline bool +static bool ip6_checkentry(const struct ip6t_ip6 *ipv6) { if (ipv6->flags & ~IP6T_F_MASK) { @@ -212,8 +213,9 @@ ip6t_error(struct sk_buff *skb, return NF_DROP; } -static inline -bool do_match(struct ip6t_entry_match *m, +/* Performance critical - called for every packet */ +static inline bool +do_match(struct ip6t_entry_match *m, const struct sk_buff *skb, const struct net_device *in, const struct net_device *out, @@ -236,6 +238,7 @@ get_entry(void *base, unsigned int offset) } /* All zeroes == unconditional rule. */ +/* Mildly perf critical (only if packet tracing is on) */ static inline int unconditional(const struct ip6t_ip6 *ipv6) { @@ -251,7 +254,7 @@ unconditional(const struct ip6t_ip6 *ipv6) #if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \ defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE) /* This cries for unification! */ -static const char *hooknames[] = { +static const char *const hooknames[] = { [NF_INET_PRE_ROUTING] = "PREROUTING", [NF_INET_LOCAL_IN] = "INPUT", [NF_INET_FORWARD] = "FORWARD", @@ -265,7 +268,7 @@ enum nf_ip_trace_comments { NF_IP6_TRACE_COMMENT_POLICY, }; -static const char *comments[] = { +static const char *const comments[] = { [NF_IP6_TRACE_COMMENT_RULE] = "rule", [NF_IP6_TRACE_COMMENT_RETURN] = "return", [NF_IP6_TRACE_COMMENT_POLICY] = "policy", @@ -281,6 +284,7 @@ static struct nf_loginfo trace_loginfo = { }, }; +/* Mildly perf critical (only if packet tracing is on) */ static inline int get_chainname_rulenum(struct ip6t_entry *s, struct ip6t_entry *e, char *hookname, char **chainname, @@ -595,7 +599,7 @@ mark_source_chains(struct xt_table_info *newinfo, return 1; } -static inline int +static int cleanup_match(struct ip6t_entry_match *m, unsigned int *i) { if (i && (*i)-- == 0) @@ -607,7 +611,7 @@ cleanup_match(struct ip6t_entry_match *m, unsigned int *i) return 0; } -static inline int +static int check_entry(struct ip6t_entry *e, const char *name) { struct ip6t_entry_target *t; @@ -628,7 +632,7 @@ check_entry(struct ip6t_entry *e, const char *name) return 0; } -static inline int check_match(struct ip6t_entry_match *m, const char *name, +static int check_match(struct ip6t_entry_match *m, const char *name, const struct ip6t_ip6 *ipv6, unsigned int hookmask, unsigned int *i) { @@ -651,7 +655,7 @@ static inline int check_match(struct ip6t_entry_match *m, const char *name, return ret; } -static inline int +static int find_check_match(struct ip6t_entry_match *m, const char *name, const struct ip6t_ip6 *ipv6, @@ -680,7 +684,7 @@ err: return ret; } -static inline int check_target(struct ip6t_entry *e, const char *name) +static int check_target(struct ip6t_entry *e, const char *name) { struct ip6t_entry_target *t; struct xt_target *target; @@ -701,7 +705,7 @@ static inline int check_target(struct ip6t_entry *e, const char *name) return ret; } -static inline int +static int find_check_entry(struct ip6t_entry *e, const char *name, unsigned int size, unsigned int *i) { @@ -745,7 +749,7 @@ find_check_entry(struct ip6t_entry *e, const char *name, unsigned int size, return ret; } -static inline int +static int check_entry_size_and_hooks(struct ip6t_entry *e, struct xt_table_info *newinfo, unsigned char *base, @@ -788,7 +792,7 @@ check_entry_size_and_hooks(struct ip6t_entry *e, return 0; } -static inline int +static int cleanup_entry(struct ip6t_entry *e, unsigned int *i) { struct ip6t_entry_target *t; @@ -944,7 +948,7 @@ get_counters(const struct xt_table_info *t, } } -static inline struct xt_counters *alloc_counters(struct xt_table *table) +static struct xt_counters *alloc_counters(struct xt_table *table) { unsigned int countersize; struct xt_counters *counters; @@ -1494,7 +1498,7 @@ out: return ret; } -static inline int +static int compat_find_calc_match(struct ip6t_entry_match *m, const char *name, const struct ip6t_ip6 *ipv6, @@ -1518,7 +1522,7 @@ compat_find_calc_match(struct ip6t_entry_match *m, return 0; } -static inline int +static int compat_release_match(struct ip6t_entry_match *m, unsigned int *i) { if (i && (*i)-- == 0) @@ -1528,7 +1532,7 @@ compat_release_match(struct ip6t_entry_match *m, unsigned int *i) return 0; } -static inline int +static int compat_release_entry(struct compat_ip6t_entry *e, unsigned int *i) { struct ip6t_entry_target *t; @@ -1543,7 +1547,7 @@ compat_release_entry(struct compat_ip6t_entry *e, unsigned int *i) return 0; } -static inline int +static int check_compat_entry_size_and_hooks(struct compat_ip6t_entry *e, struct xt_table_info *newinfo, unsigned int *size, @@ -1666,7 +1670,7 @@ compat_copy_entry_from_user(struct compat_ip6t_entry *e, void **dstptr, return ret; } -static inline int compat_check_entry(struct ip6t_entry *e, const char *name, +static int compat_check_entry(struct ip6t_entry *e, const char *name, unsigned int *i) { int j, ret; -- cgit v1.2.3 From c56cc9c07b049acc9d2ca97be0b540978c0c80bf Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Mon, 14 Jan 2008 23:49:17 -0800 Subject: [NETFILTER]: nf_conntrack: remove print_conntrack function from l3protos Its unused and unlikely to ever be used. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c | 7 ------- 1 file changed, 7 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c index 97a553036dd..cf42f5cfc33 100644 --- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c +++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c @@ -60,12 +60,6 @@ static int ipv6_print_tuple(struct seq_file *s, NIP6(*((struct in6_addr *)tuple->dst.u3.ip6))); } -static int ipv6_print_conntrack(struct seq_file *s, - const struct nf_conn *conntrack) -{ - return 0; -} - /* * Based on ipv6_skip_exthdr() in net/ipv6/exthdr.c * @@ -376,7 +370,6 @@ struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv6 __read_mostly = { .pkt_to_tuple = ipv6_pkt_to_tuple, .invert_tuple = ipv6_invert_tuple, .print_tuple = ipv6_print_tuple, - .print_conntrack = ipv6_print_conntrack, .get_l4proto = ipv6_get_l4proto, #if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) .tuple_to_nlattr = ipv6_tuple_to_nlattr, -- cgit v1.2.3 From c71e9167081a6a0d2a076cda674b696b89bb31c2 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Mon, 14 Jan 2008 23:49:37 -0800 Subject: [NETFILTER]: nf_conntrack: make print_conntrack function optional for l4protos Allows to remove five empty implementations. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c | 8 -------- 1 file changed, 8 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c index 02d60dfbab8..da924c6b5f0 100644 --- a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c +++ b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c @@ -75,13 +75,6 @@ static int icmpv6_print_tuple(struct seq_file *s, ntohs(tuple->src.u.icmp.id)); } -/* Print out the private part of the conntrack. */ -static int icmpv6_print_conntrack(struct seq_file *s, - const struct nf_conn *conntrack) -{ - return 0; -} - /* Returns verdict for packet, or -1 for invalid. */ static int icmpv6_packet(struct nf_conn *ct, const struct sk_buff *skb, @@ -275,7 +268,6 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_icmpv6 __read_mostly = .pkt_to_tuple = icmpv6_pkt_to_tuple, .invert_tuple = icmpv6_invert_tuple, .print_tuple = icmpv6_print_tuple, - .print_conntrack = icmpv6_print_conntrack, .packet = icmpv6_packet, .new = icmpv6_new, .error = icmpv6_error, -- cgit v1.2.3 From 569d36452ee26c08523cc9f658901c5188640853 Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Fri, 18 Jan 2008 03:56:57 -0800 Subject: [NETNS][DST] dst: pass the dst_ops as parameter to the gc functions The garbage collection function receive the dst_ops structure as parameter. This is useful for the next incoming patchset because it will need the dst_ops (there will be several instances) and the network namespace pointer (contained in the dst_ops). The protocols which do not take care of the namespaces will not be impacted by this change (expect for the function signature), they do just ignore the parameter. Signed-off-by: Daniel Lezcano Signed-off-by: David S. Miller --- net/ipv6/route.c | 4 ++-- net/ipv6/xfrm6_policy.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/route.c b/net/ipv6/route.c index a429900d16a..4004c5f0b8d 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -79,7 +79,7 @@ static struct dst_entry *ip6_negative_advice(struct dst_entry *); static void ip6_dst_destroy(struct dst_entry *); static void ip6_dst_ifdown(struct dst_entry *, struct net_device *dev, int how); -static int ip6_dst_gc(void); +static int ip6_dst_gc(struct dst_ops *ops); static int ip6_pkt_discard(struct sk_buff *skb); static int ip6_pkt_discard_out(struct sk_buff *skb); @@ -983,7 +983,7 @@ int ndisc_dst_gc(int *more) return freed; } -static int ip6_dst_gc(void) +static int ip6_dst_gc(struct dst_ops *ops) { static unsigned expire = 30*HZ; static unsigned long last_gc; diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c index cf373b46a1b..c25a6b527fc 100644 --- a/net/ipv6/xfrm6_policy.c +++ b/net/ipv6/xfrm6_policy.c @@ -212,7 +212,7 @@ _decode_session6(struct sk_buff *skb, struct flowi *fl, int reverse) } } -static inline int xfrm6_garbage_collect(void) +static inline int xfrm6_garbage_collect(struct dst_ops *ops) { xfrm6_policy_afinfo.garbage_collect(); return (atomic_read(&xfrm6_dst_ops.entries) > xfrm6_dst_ops.gc_thresh*2); -- cgit v1.2.3 From 7d460db953d6d205e4c8ecc2017aea1ec22b6c9a Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Fri, 18 Jan 2008 23:52:35 -0800 Subject: [IPV6]: Fix ip6_frag ctl Alexey Dobriyan reported an oops when unsharing the network indefinitely inside a loop. This is because the ip6_frag is not per namespace while the ctls are. That happens at the fragment timer expiration: inet_frag_secret_rebuild function is called and this one restarts the timer using the value stored inside the sysctl field. "mod_timer(&f->secret_timer, now + f->ctl->secret_interval);" When the network is unshared, ip6_frag.ctl is initialized with the new sysctl instances, but ip6_frag has only one instance. A race in this case will appear because f->ctl can be modified during the read access in the timer callback. Until the ip6_frag is not per namespace, I discard the assignation to the ctl field of ip6_frags in ip6_frag_sysctl_init when the network namespace is not the init net. Signed-off-by: Daniel Lezcano Signed-off-by: David S. Miller --- net/ipv6/reassembly.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'net/ipv6') diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c index 5cd0bc693a5..4dfcddc871c 100644 --- a/net/ipv6/reassembly.c +++ b/net/ipv6/reassembly.c @@ -627,6 +627,9 @@ static struct inet6_protocol frag_protocol = void ipv6_frag_sysctl_init(struct net *net) { + if (net != &init_net) + return; + ip6_frags.ctl = &net->ipv6.sysctl.frags; } -- cgit v1.2.3 From b7c6ba6eb1234e35a74fb8ba8123232a7b1ba9e4 Mon Sep 17 00:00:00 2001 From: "Denis V. Lunev" Date: Mon, 28 Jan 2008 14:41:19 -0800 Subject: [NETNS]: Consolidate kernel netlink socket destruction. Create a specific helper for netlink kernel socket disposal. This just let the code look better and provides a ground for proper disposal inside a namespace. Signed-off-by: Denis V. Lunev Tested-by: Alexey Dobriyan Signed-off-by: David S. Miller --- net/ipv6/netfilter/ip6_queue.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/netfilter/ip6_queue.c b/net/ipv6/netfilter/ip6_queue.c index a20db0bb5a1..56b4ea6d29e 100644 --- a/net/ipv6/netfilter/ip6_queue.c +++ b/net/ipv6/netfilter/ip6_queue.c @@ -609,7 +609,7 @@ cleanup_sysctl: proc_net_remove(&init_net, IPQ_PROC_FS_NAME); cleanup_ipqnl: - sock_release(ipqnl->sk_socket); + netlink_kernel_release(ipqnl); mutex_lock(&ipqnl_mutex); mutex_unlock(&ipqnl_mutex); @@ -628,7 +628,7 @@ static void __exit ip6_queue_fini(void) unregister_netdevice_notifier(&ipq_dev_notifier); proc_net_remove(&init_net, IPQ_PROC_FS_NAME); - sock_release(ipqnl->sk_socket); + netlink_kernel_release(ipqnl); mutex_lock(&ipqnl_mutex); mutex_unlock(&ipqnl_mutex); -- cgit v1.2.3 From 035923833369e4da5d3c4ad0700bc7c367a0fa37 Mon Sep 17 00:00:00 2001 From: "Denis V. Lunev" Date: Sun, 20 Jan 2008 16:46:01 -0800 Subject: [FIB]: Add netns to fib_rules_ops. The backward link from FIB rules operations to the network namespace will allow to simplify the API a bit. Signed-off-by: Denis V. Lunev Acked-by: Daniel Lezcano Signed-off-by: David S. Miller --- net/ipv6/fib6_rules.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net/ipv6') diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c index 76437a1fcab..ead5ab2da9a 100644 --- a/net/ipv6/fib6_rules.c +++ b/net/ipv6/fib6_rules.c @@ -249,6 +249,7 @@ static struct fib_rules_ops fib6_rules_ops = { .policy = fib6_rule_policy, .rules_list = LIST_HEAD_INIT(fib6_rules_ops.rules_list), .owner = THIS_MODULE, + .fro_net = &init_net, }; static int __init fib6_default_rules_init(void) -- cgit v1.2.3 From 9e3a548781fc1c0da617fc65769a515f074be740 Mon Sep 17 00:00:00 2001 From: "Denis V. Lunev" Date: Sun, 20 Jan 2008 16:46:41 -0800 Subject: [NETNS]: FIB rules API cleanup. Remove struct net from fib_rules_register(unregister)/notify_change paths and diet code size a bit. add/remove: 0/0 grow/shrink: 10/12 up/down: 35/-100 (-65) function old new delta notify_rule_change 273 280 +7 trie_show_stats 471 475 +4 fn_trie_delete 473 477 +4 fib_rules_unregister 144 148 +4 fib4_rule_compare 119 123 +4 resize 2842 2845 +3 fn_trie_select_default 515 518 +3 inet_sk_rebuild_header 836 838 +2 fib_trie_seq_show 764 766 +2 __devinet_sysctl_register 276 278 +2 fn_trie_lookup 1124 1123 -1 ip_fib_check_default 133 131 -2 devinet_conf_sysctl 223 221 -2 snmp_fold_field 126 123 -3 fn_trie_insert 2091 2086 -5 inet_create 876 870 -6 fib4_rules_init 197 191 -6 fib_sync_down 452 444 -8 inet_gso_send_check 334 325 -9 fib_create_info 3003 2991 -12 fib_nl_delrule 568 553 -15 fib_nl_newrule 883 852 -31 Signed-off-by: Denis V. Lunev Acked-by: Daniel Lezcano Signed-off-by: David S. Miller --- net/ipv6/fib6_rules.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c index ead5ab2da9a..695c0ca8a41 100644 --- a/net/ipv6/fib6_rules.c +++ b/net/ipv6/fib6_rules.c @@ -274,7 +274,7 @@ int __init fib6_rules_init(void) if (ret) goto out; - ret = fib_rules_register(&init_net, &fib6_rules_ops); + ret = fib_rules_register(&fib6_rules_ops); if (ret) goto out_default_rules_init; out: @@ -287,5 +287,5 @@ out_default_rules_init: void fib6_rules_cleanup(void) { - fib_rules_unregister(&init_net, &fib6_rules_ops); + fib_rules_unregister(&fib6_rules_ops); } -- cgit v1.2.3 From d20b3109e9d122460929c50b857fcde251706ece Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Mon, 21 Jan 2008 00:48:43 -0800 Subject: [IPV6]: addrconf sparse warnings Get rid of a couple of sparse warnings in IPV6 addrconf code. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'net/ipv6') diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 803caf1a389..aba7b5d52a9 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -1900,7 +1900,7 @@ int addrconf_set_dstaddr(void __user *arg) p.iph.ihl = 5; p.iph.protocol = IPPROTO_IPV6; p.iph.ttl = 64; - ifr.ifr_ifru.ifru_data = (void __user *)&p; + ifr.ifr_ifru.ifru_data = (__force void __user *)&p; oldfs = get_fs(); set_fs(KERNEL_DS); err = dev->do_ioctl(dev, &ifr, SIOCADDTUNNEL); @@ -2799,6 +2799,7 @@ static struct inet6_ifaddr *if6_get_idx(struct seq_file *seq, loff_t pos) } static void *if6_seq_start(struct seq_file *seq, loff_t *pos) + __acquires(addrconf_hash_lock) { read_lock_bh(&addrconf_hash_lock); return if6_get_idx(seq, *pos); @@ -2814,6 +2815,7 @@ static void *if6_seq_next(struct seq_file *seq, void *v, loff_t *pos) } static void if6_seq_stop(struct seq_file *seq, void *v) + __releases(addrconf_hash_lock) { read_unlock_bh(&addrconf_hash_lock); } -- cgit v1.2.3 From 77d0d350e96c9453be255d8eff8dc97555710b17 Mon Sep 17 00:00:00 2001 From: YOSHIFUJI Hideaki Date: Tue, 22 Jan 2008 17:09:55 +0900 Subject: [IPV6] UDP,UDPLITE: Sparse: {__udp6_lib,udp,udplite}_err() are of void. Fix following sparse warnings: | net/ipv6/udp.c:262:2: warning: returning void-valued expression | net/ipv6/udplite.c:29:2: warning: returning void-valued expression Signed-off-by: YOSHIFUJI Hideaki --- net/ipv6/udp.c | 2 +- net/ipv6/udplite.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index bf58acab206..bd4b9df8f61 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -259,7 +259,7 @@ static __inline__ void udpv6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, int type, int code, int offset, __be32 info ) { - return __udp6_lib_err(skb, opt, type, code, offset, info, udp_hash); + __udp6_lib_err(skb, opt, type, code, offset, info, udp_hash); } int udpv6_queue_rcv_skb(struct sock * sk, struct sk_buff *skb) diff --git a/net/ipv6/udplite.c b/net/ipv6/udplite.c index 39f070518e6..87d4202522e 100644 --- a/net/ipv6/udplite.c +++ b/net/ipv6/udplite.c @@ -26,7 +26,7 @@ static void udplitev6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, int type, int code, int offset, __be32 info) { - return __udp6_lib_err(skb, opt, type, code, offset, info, udplite_hash); + __udp6_lib_err(skb, opt, type, code, offset, info, udplite_hash); } static struct inet6_protocol udplitev6_protocol = { -- cgit v1.2.3 From 5e8b9df6e8786e4d5ee5ac951240cb2eaaac3014 Mon Sep 17 00:00:00 2001 From: YOSHIFUJI Hideaki Date: Tue, 22 Jan 2008 17:25:46 +0900 Subject: [IPV6] UDPLITE: Sparse: Declare non-static symbols in header. Fix the following sparse warnings: | net/ipv6/udplite.c:45:14: warning: symbol 'udplitev6_prot' was not declared. Should it be static? | net/ipv6/udplite.c:80:12: warning: symbol 'udplitev6_init' was not declared. Should it be static? | net/ipv6/udplite.c:99:6: warning: symbol 'udplitev6_exit' was not declared. Should it be static? Signed-off-by: YOSHIFUJI Hideaki --- net/ipv6/udp_impl.h | 1 + 1 file changed, 1 insertion(+) (limited to 'net/ipv6') diff --git a/net/ipv6/udp_impl.h b/net/ipv6/udp_impl.h index 2d3fda60123..21be3a83e7b 100644 --- a/net/ipv6/udp_impl.h +++ b/net/ipv6/udp_impl.h @@ -5,6 +5,7 @@ #include #include #include +#include extern int __udp6_lib_rcv(struct sk_buff *, struct hlist_head [], int ); extern void __udp6_lib_err(struct sk_buff *, struct inet6_skb_parm *, -- cgit v1.2.3 From 40fee36e11b49f92bc7c385bd45d7805c0127a34 Mon Sep 17 00:00:00 2001 From: YOSHIFUJI Hideaki Date: Tue, 22 Jan 2008 17:12:50 +0900 Subject: [IPV6] ADDRLABEL: Sparse: Make several functions static. Fix following sparse warnings: | net/ipv6/addrlabel.c:172:25: warning: symbol 'ip6addrlbl_alloc' was not declared. Should it be static? | net/ipv6/addrlabel.c:219:5: warning: symbol '__ip6addrlbl_add' was not declared. Should it be static? | net/ipv6/addrlabel.c:260:5: warning: symbol 'ip6addrlbl_add' was not declared. Should it be static? | net/ipv6/addrlabel.c:285:5: warning: symbol '__ip6addrlbl_del' was not declared. Should it be static? | net/ipv6/addrlabel.c:311:5: warning: symbol 'ip6addrlbl_del' was not declared. Should it be static? Signed-off-by: YOSHIFUJI Hideaki --- net/ipv6/addrlabel.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/addrlabel.c b/net/ipv6/addrlabel.c index 6f1ca607edd..38674121ae5 100644 --- a/net/ipv6/addrlabel.c +++ b/net/ipv6/addrlabel.c @@ -169,9 +169,9 @@ u32 ipv6_addr_label(const struct in6_addr *addr, int type, int ifindex) } /* allocate one entry */ -struct ip6addrlbl_entry *ip6addrlbl_alloc(const struct in6_addr *prefix, - int prefixlen, int ifindex, - u32 label) +static struct ip6addrlbl_entry *ip6addrlbl_alloc(const struct in6_addr *prefix, + int prefixlen, int ifindex, + u32 label) { struct ip6addrlbl_entry *newp; int addrtype; @@ -216,7 +216,7 @@ struct ip6addrlbl_entry *ip6addrlbl_alloc(const struct in6_addr *prefix, } /* add a label */ -int __ip6addrlbl_add(struct ip6addrlbl_entry *newp, int replace) +static int __ip6addrlbl_add(struct ip6addrlbl_entry *newp, int replace) { int ret = 0; @@ -257,8 +257,8 @@ out: } /* add a label */ -int ip6addrlbl_add(const struct in6_addr *prefix, int prefixlen, - int ifindex, u32 label, int replace) +static int ip6addrlbl_add(const struct in6_addr *prefix, int prefixlen, + int ifindex, u32 label, int replace) { struct ip6addrlbl_entry *newp; int ret = 0; @@ -282,8 +282,8 @@ int ip6addrlbl_add(const struct in6_addr *prefix, int prefixlen, } /* remove a label */ -int __ip6addrlbl_del(const struct in6_addr *prefix, int prefixlen, - int ifindex) +static int __ip6addrlbl_del(const struct in6_addr *prefix, int prefixlen, + int ifindex) { struct ip6addrlbl_entry *p = NULL; struct hlist_node *pos, *n; @@ -308,8 +308,8 @@ int __ip6addrlbl_del(const struct in6_addr *prefix, int prefixlen, return ret; } -int ip6addrlbl_del(const struct in6_addr *prefix, int prefixlen, - int ifindex) +static int ip6addrlbl_del(const struct in6_addr *prefix, int prefixlen, + int ifindex) { struct in6_addr prefix_buf; int ret; -- cgit v1.2.3 From 2334ecbdb27bd1745c0fc6d05cce09ed9585e4c1 Mon Sep 17 00:00:00 2001 From: YOSHIFUJI Hideaki Date: Tue, 22 Jan 2008 17:18:38 +0900 Subject: [IPV6]: Sparse: Declare non-static ipv6_{route,icmp,frag}_sysctl_init() in header. Fix the following sparse warnings: | net/ipv6/route.c:2491:18: warning: symbol 'ipv6_route_sysctl_init' was not declared. Should it be static? | net/ipv6/icmp.c:922:18: warning: symbol 'ipv6_icmp_sysctl_init' was not declared. Should it be static? | net/ipv6/reassembly.c:628:6: warning: symbol 'ipv6_frag_sysctl_init' was not declared. Should it be static? Signed-off-by: YOSHIFUJI Hideaki --- net/ipv6/af_inet6.c | 2 -- net/ipv6/sysctl_net_ipv6.c | 3 --- 2 files changed, 5 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 3150c4be3c0..6738a7b0e67 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -72,8 +72,6 @@ MODULE_LICENSE("GPL"); static struct list_head inetsw6[SOCK_MAX]; static DEFINE_SPINLOCK(inetsw6_lock); -void ipv6_frag_sysctl_init(struct net *net); - static __inline__ struct ipv6_pinfo *inet6_sk_generic(struct sock *sk) { const int offset = sk->sk_prot->obj_size - sizeof(struct ipv6_pinfo); diff --git a/net/ipv6/sysctl_net_ipv6.c b/net/ipv6/sysctl_net_ipv6.c index 5e0af4d4632..7197eb74a75 100644 --- a/net/ipv6/sysctl_net_ipv6.c +++ b/net/ipv6/sysctl_net_ipv6.c @@ -14,9 +14,6 @@ #include #include -extern struct ctl_table *ipv6_route_sysctl_init(struct net *net); -extern struct ctl_table *ipv6_icmp_sysctl_init(struct net *net); - static ctl_table ipv6_table_template[] = { { .ctl_name = NET_IPV6_ROUTE, -- cgit v1.2.3 From 5d5619b40c2474de01c64bdf6bb9f1211d3e967a Mon Sep 17 00:00:00 2001 From: YOSHIFUJI Hideaki Date: Tue, 22 Jan 2008 17:29:40 +0900 Subject: [IPV6] ADDRCONF: Sparse: Make inet6_dump_addr() code paths more straight-forward. Fix the following sparse warning: | net/ipv6/addrconf.c:3384:2: warning: context imbalance in 'inet6_dump_addr' - different lock contexts for basic block Signed-off-by: YOSHIFUJI Hideaki --- net/ipv6/addrconf.c | 38 ++++++++++++++++++-------------------- 1 file changed, 18 insertions(+), 20 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index aba7b5d52a9..e40213db9e4 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -3335,11 +3335,11 @@ static int inet6_dump_addr(struct sk_buff *skb, struct netlink_callback *cb, ifa = ifa->if_next, ip_idx++) { if (ip_idx < s_ip_idx) continue; - if ((err = inet6_fill_ifaddr(skb, ifa, - NETLINK_CB(cb->skb).pid, - cb->nlh->nlmsg_seq, RTM_NEWADDR, - NLM_F_MULTI)) <= 0) - goto done; + err = inet6_fill_ifaddr(skb, ifa, + NETLINK_CB(cb->skb).pid, + cb->nlh->nlmsg_seq, + RTM_NEWADDR, + NLM_F_MULTI); } break; case MULTICAST_ADDR: @@ -3348,11 +3348,11 @@ static int inet6_dump_addr(struct sk_buff *skb, struct netlink_callback *cb, ifmca = ifmca->next, ip_idx++) { if (ip_idx < s_ip_idx) continue; - if ((err = inet6_fill_ifmcaddr(skb, ifmca, - NETLINK_CB(cb->skb).pid, - cb->nlh->nlmsg_seq, RTM_GETMULTICAST, - NLM_F_MULTI)) <= 0) - goto done; + err = inet6_fill_ifmcaddr(skb, ifmca, + NETLINK_CB(cb->skb).pid, + cb->nlh->nlmsg_seq, + RTM_GETMULTICAST, + NLM_F_MULTI); } break; case ANYCAST_ADDR: @@ -3361,11 +3361,11 @@ static int inet6_dump_addr(struct sk_buff *skb, struct netlink_callback *cb, ifaca = ifaca->aca_next, ip_idx++) { if (ip_idx < s_ip_idx) continue; - if ((err = inet6_fill_ifacaddr(skb, ifaca, - NETLINK_CB(cb->skb).pid, - cb->nlh->nlmsg_seq, RTM_GETANYCAST, - NLM_F_MULTI)) <= 0) - goto done; + err = inet6_fill_ifacaddr(skb, ifaca, + NETLINK_CB(cb->skb).pid, + cb->nlh->nlmsg_seq, + RTM_GETANYCAST, + NLM_F_MULTI); } break; default: @@ -3373,14 +3373,12 @@ static int inet6_dump_addr(struct sk_buff *skb, struct netlink_callback *cb, } read_unlock_bh(&idev->lock); in6_dev_put(idev); + + if (err <= 0) + break; cont: idx++; } -done: - if (err <= 0) { - read_unlock_bh(&idev->lock); - in6_dev_put(idev); - } cb->args[0] = idx; cb->args[1] = ip_idx; return skb->len; -- cgit v1.2.3 From 61cf46ad581ba43073d3bcb0be549eb60fbbf9f8 Mon Sep 17 00:00:00 2001 From: YOSHIFUJI Hideaki Date: Tue, 22 Jan 2008 17:32:53 +0900 Subject: [IPV6] NDISC: Sparse: Use different variable name for local use. Fix the following sparse warnings: | net/ipv6/ndisc.c:1300:21: warning: symbol 'opt' shadows an earlier one | net/ipv6/ndisc.c:1078:7: originally declared here Signed-off-by: YOSHIFUJI Hideaki --- net/ipv6/ndisc.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index bdfc4ea6194..0d33a7d3212 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -1297,11 +1297,11 @@ skip_defrtr: } if (ndopts.nd_useropts) { - struct nd_opt_hdr *opt; - for (opt = ndopts.nd_useropts; - opt; - opt = ndisc_next_useropt(opt, ndopts.nd_useropts_end)) { - ndisc_ra_useropt(skb, opt); + struct nd_opt_hdr *p; + for (p = ndopts.nd_useropts; + p; + p = ndisc_next_useropt(p, ndopts.nd_useropts_end)) { + ndisc_ra_useropt(skb, p); } } -- cgit v1.2.3 From 8d8354d2fb9277f165715a6e1cb92bcc89259975 Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Tue, 22 Jan 2008 05:58:31 -0800 Subject: [NETNS][FRAGS]: Move ctl tables around. This is a preparation for sysctl netns-ization. Move the ctl tables to the files, where the tuning variables reside. Plus make the helpers to register the tables. This will simplify the later patches and will keep similar things closer to each other. ipv4, ipv6 and conntrack_reasm are patched differently, but the result is all the tables are in appropriate files. Signed-off-by: Pavel Emelyanov Signed-off-by: David S. Miller --- net/ipv6/af_inet6.c | 5 -- net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c | 29 ----------- net/ipv6/netfilter/nf_conntrack_reasm.c | 31 +++++++++++- net/ipv6/reassembly.c | 66 ++++++++++++++++++++++++-- net/ipv6/sysctl_net_ipv6.c | 40 +--------------- 5 files changed, 94 insertions(+), 77 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 6738a7b0e67..bddac0e8780 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -721,10 +721,6 @@ static void cleanup_ipv6_mibs(void) static int inet6_net_init(struct net *net) { net->ipv6.sysctl.bindv6only = 0; - net->ipv6.sysctl.frags.high_thresh = 256 * 1024; - net->ipv6.sysctl.frags.low_thresh = 192 * 1024; - net->ipv6.sysctl.frags.timeout = IPV6_FRAG_TIMEOUT; - net->ipv6.sysctl.frags.secret_interval = 10 * 60 * HZ; net->ipv6.sysctl.flush_delay = 0; net->ipv6.sysctl.ip6_rt_max_size = 4096; net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2; @@ -734,7 +730,6 @@ static int inet6_net_init(struct net *net) net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ; net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40; net->ipv6.sysctl.icmpv6_time = 1*HZ; - ipv6_frag_sysctl_init(net); return 0; } diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c index cf42f5cfc33..2d7b0246475 100644 --- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c +++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c @@ -297,35 +297,6 @@ static struct nf_hook_ops ipv6_conntrack_ops[] __read_mostly = { }, }; -#ifdef CONFIG_SYSCTL -static ctl_table nf_ct_ipv6_sysctl_table[] = { - { - .procname = "nf_conntrack_frag6_timeout", - .data = &nf_frags_ctl.timeout, - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, - }, - { - .ctl_name = NET_NF_CONNTRACK_FRAG6_LOW_THRESH, - .procname = "nf_conntrack_frag6_low_thresh", - .data = &nf_frags_ctl.low_thresh, - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = &proc_dointvec, - }, - { - .ctl_name = NET_NF_CONNTRACK_FRAG6_HIGH_THRESH, - .procname = "nf_conntrack_frag6_high_thresh", - .data = &nf_frags_ctl.high_thresh, - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = &proc_dointvec, - }, - { .ctl_name = 0 } -}; -#endif - #if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) #include diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c index e170c67c47a..d631631189b 100644 --- a/net/ipv6/netfilter/nf_conntrack_reasm.c +++ b/net/ipv6/netfilter/nf_conntrack_reasm.c @@ -70,7 +70,7 @@ struct nf_ct_frag6_queue __u16 nhoffset; }; -struct inet_frags_ctl nf_frags_ctl __read_mostly = { +static struct inet_frags_ctl nf_frags_ctl __read_mostly = { .high_thresh = 256 * 1024, .low_thresh = 192 * 1024, .timeout = IPV6_FRAG_TIMEOUT, @@ -79,6 +79,35 @@ struct inet_frags_ctl nf_frags_ctl __read_mostly = { static struct inet_frags nf_frags; +#ifdef CONFIG_SYSCTL +struct ctl_table nf_ct_ipv6_sysctl_table[] = { + { + .procname = "nf_conntrack_frag6_timeout", + .data = &nf_frags_ctl.timeout, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = &proc_dointvec_jiffies, + }, + { + .ctl_name = NET_NF_CONNTRACK_FRAG6_LOW_THRESH, + .procname = "nf_conntrack_frag6_low_thresh", + .data = &nf_frags_ctl.low_thresh, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = &proc_dointvec, + }, + { + .ctl_name = NET_NF_CONNTRACK_FRAG6_HIGH_THRESH, + .procname = "nf_conntrack_frag6_high_thresh", + .data = &nf_frags_ctl.high_thresh, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = &proc_dointvec, + }, + { .ctl_name = 0 } +}; +#endif + static unsigned int ip6qhashfn(__be32 id, struct in6_addr *saddr, struct in6_addr *daddr) { diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c index 4dfcddc871c..1815ff0cf62 100644 --- a/net/ipv6/reassembly.c +++ b/net/ipv6/reassembly.c @@ -625,12 +625,70 @@ static struct inet6_protocol frag_protocol = .flags = INET6_PROTO_NOPOLICY, }; -void ipv6_frag_sysctl_init(struct net *net) +#ifdef CONFIG_SYSCTL +static struct ctl_table ip6_frags_ctl_table[] = { + { + .ctl_name = NET_IPV6_IP6FRAG_HIGH_THRESH, + .procname = "ip6frag_high_thresh", + .data = &init_net.ipv6.sysctl.frags.high_thresh, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec + }, + { + .ctl_name = NET_IPV6_IP6FRAG_LOW_THRESH, + .procname = "ip6frag_low_thresh", + .data = &init_net.ipv6.sysctl.frags.low_thresh, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec + }, + { + .ctl_name = NET_IPV6_IP6FRAG_TIME, + .procname = "ip6frag_time", + .data = &init_net.ipv6.sysctl.frags.timeout, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec_jiffies, + .strategy = &sysctl_jiffies, + }, + { + .ctl_name = NET_IPV6_IP6FRAG_SECRET_INTERVAL, + .procname = "ip6frag_secret_interval", + .data = &init_net.ipv6.sysctl.frags.secret_interval, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec_jiffies, + .strategy = &sysctl_jiffies + }, + { } +}; + +static int ip6_frags_sysctl_register(struct net *net) +{ + struct ctl_table_header *hdr; + + hdr = register_net_sysctl_table(net, net_ipv6_ctl_path, + ip6_frags_ctl_table); + return hdr == NULL ? -ENOMEM : 0; +} +#else +static inline int ip6_frags_sysctl_register(struct net *net) { - if (net != &init_net) - return; + return 0; +} +#endif +static int ipv6_frags_init_net(struct net *net) +{ ip6_frags.ctl = &net->ipv6.sysctl.frags; + + net->ipv6.sysctl.frags.high_thresh = 256 * 1024; + net->ipv6.sysctl.frags.low_thresh = 192 * 1024; + net->ipv6.sysctl.frags.timeout = IPV6_FRAG_TIMEOUT; + net->ipv6.sysctl.frags.secret_interval = 10 * 60 * HZ; + + return ip6_frags_sysctl_register(net); } int __init ipv6_frag_init(void) @@ -641,6 +699,8 @@ int __init ipv6_frag_init(void) if (ret) goto out; + ipv6_frags_init_net(&init_net); + ip6_frags.hashfn = ip6_hashfn; ip6_frags.constructor = ip6_frag_init; ip6_frags.destructor = NULL; diff --git a/net/ipv6/sysctl_net_ipv6.c b/net/ipv6/sysctl_net_ipv6.c index 7197eb74a75..408691b777c 100644 --- a/net/ipv6/sysctl_net_ipv6.c +++ b/net/ipv6/sysctl_net_ipv6.c @@ -37,40 +37,6 @@ static ctl_table ipv6_table_template[] = { .mode = 0644, .proc_handler = &proc_dointvec }, - { - .ctl_name = NET_IPV6_IP6FRAG_HIGH_THRESH, - .procname = "ip6frag_high_thresh", - .data = &init_net.ipv6.sysctl.frags.high_thresh, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = &proc_dointvec - }, - { - .ctl_name = NET_IPV6_IP6FRAG_LOW_THRESH, - .procname = "ip6frag_low_thresh", - .data = &init_net.ipv6.sysctl.frags.low_thresh, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = &proc_dointvec - }, - { - .ctl_name = NET_IPV6_IP6FRAG_TIME, - .procname = "ip6frag_time", - .data = &init_net.ipv6.sysctl.frags.timeout, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, - .strategy = &sysctl_jiffies, - }, - { - .ctl_name = NET_IPV6_IP6FRAG_SECRET_INTERVAL, - .procname = "ip6frag_secret_interval", - .data = &init_net.ipv6.sysctl.frags.secret_interval, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, - .strategy = &sysctl_jiffies - }, { .ctl_name = NET_IPV6_MLD_MAX_MSF, .procname = "mld_max_msf", @@ -126,16 +92,12 @@ static int ipv6_sysctl_net_init(struct net *net) ipv6_table[1].child = ipv6_icmp_table; ipv6_table[2].data = &net->ipv6.sysctl.bindv6only; - ipv6_table[3].data = &net->ipv6.sysctl.frags.high_thresh; - ipv6_table[4].data = &net->ipv6.sysctl.frags.low_thresh; - ipv6_table[5].data = &net->ipv6.sysctl.frags.timeout; - ipv6_table[6].data = &net->ipv6.sysctl.frags.secret_interval; /* We don't want this value to be per namespace, it should be global to all namespaces, so make it read-only when we are not in the init network namespace */ if (net != &init_net) - ipv6_table[7].mode = 0444; + ipv6_table[3].mode = 0444; net->ipv6.sysctl.table = register_net_sysctl_table(net, net_ipv6_ctl_path, ipv6_table); -- cgit v1.2.3 From ac18e7509e7df327e30d6e073a787d922eaf211d Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Tue, 22 Jan 2008 06:02:14 -0800 Subject: [NETNS][FRAGS]: Make the inet_frag_queue lookup work in namespaces. Since fragment management code is consolidated, we cannot have the pointer from inet_frag_queue to struct net, since we must know what king of fragment this is. So, I introduce the netns_frags structure. This one is currently empty, but will be eventually filled with per-namespace attributes. Each inet_frag_queue is tagged with this one. The conntrack_reasm is not "netns-izated", so it has one static netns_frags instance to keep working in init namespace. Signed-off-by: Pavel Emelyanov Signed-off-by: David S. Miller --- net/ipv6/netfilter/nf_conntrack_reasm.c | 3 ++- net/ipv6/reassembly.c | 8 +++++--- 2 files changed, 7 insertions(+), 4 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c index d631631189b..18accd4eab0 100644 --- a/net/ipv6/netfilter/nf_conntrack_reasm.c +++ b/net/ipv6/netfilter/nf_conntrack_reasm.c @@ -78,6 +78,7 @@ static struct inet_frags_ctl nf_frags_ctl __read_mostly = { }; static struct inet_frags nf_frags; +static struct netns_frags nf_init_frags; #ifdef CONFIG_SYSCTL struct ctl_table nf_ct_ipv6_sysctl_table[] = { @@ -212,7 +213,7 @@ fq_find(__be32 id, struct in6_addr *src, struct in6_addr *dst) arg.dst = dst; hash = ip6qhashfn(id, src, dst); - q = inet_frag_find(&nf_frags, &arg, hash); + q = inet_frag_find(&nf_init_frags, &nf_frags, &arg, hash); if (q == NULL) goto oom; diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c index 1815ff0cf62..ab2d53b81b7 100644 --- a/net/ipv6/reassembly.c +++ b/net/ipv6/reassembly.c @@ -234,7 +234,7 @@ out: } static __inline__ struct frag_queue * -fq_find(__be32 id, struct in6_addr *src, struct in6_addr *dst, +fq_find(struct net *net, __be32 id, struct in6_addr *src, struct in6_addr *dst, struct inet6_dev *idev) { struct inet_frag_queue *q; @@ -246,7 +246,7 @@ fq_find(__be32 id, struct in6_addr *src, struct in6_addr *dst, arg.dst = dst; hash = ip6qhashfn(id, src, dst); - q = inet_frag_find(&ip6_frags, &arg, hash); + q = inet_frag_find(&net->ipv6.frags, &ip6_frags, &arg, hash); if (q == NULL) goto oom; @@ -568,6 +568,7 @@ static int ipv6_frag_rcv(struct sk_buff *skb) struct frag_hdr *fhdr; struct frag_queue *fq; struct ipv6hdr *hdr = ipv6_hdr(skb); + struct net *net; IP6_INC_STATS_BH(ip6_dst_idev(skb->dst), IPSTATS_MIB_REASMREQDS); @@ -598,10 +599,11 @@ static int ipv6_frag_rcv(struct sk_buff *skb) return 1; } + net = skb->dev->nd_net; if (atomic_read(&ip6_frags.mem) > init_net.ipv6.sysctl.frags.high_thresh) ip6_evictor(ip6_dst_idev(skb->dst)); - if ((fq = fq_find(fhdr->identification, &hdr->saddr, &hdr->daddr, + if ((fq = fq_find(net, fhdr->identification, &hdr->saddr, &hdr->daddr, ip6_dst_idev(skb->dst))) != NULL) { int ret; -- cgit v1.2.3 From e5a2bb842cd9681d00d4ca963e63e4d3647e66f8 Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Tue, 22 Jan 2008 06:06:23 -0800 Subject: [NETNS][FRAGS]: Make the nqueues counter per-namespace. This is simple - just move the variable from struct inet_frags to struct netns_frags and adjust the usage appropriately. Signed-off-by: Pavel Emelyanov Signed-off-by: David S. Miller --- net/ipv6/netfilter/nf_conntrack_reasm.c | 1 + net/ipv6/proc.c | 2 +- net/ipv6/reassembly.c | 6 ++++-- 3 files changed, 6 insertions(+), 3 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c index 18accd4eab0..0b9d0097b68 100644 --- a/net/ipv6/netfilter/nf_conntrack_reasm.c +++ b/net/ipv6/netfilter/nf_conntrack_reasm.c @@ -712,6 +712,7 @@ int nf_ct_frag6_init(void) nf_frags.qsize = sizeof(struct nf_ct_frag6_queue); nf_frags.match = ip6_frag_match; nf_frags.frag_expire = nf_ct_frag6_expire; + inet_frags_init_net(&nf_init_frags); inet_frags_init(&nf_frags); return 0; diff --git a/net/ipv6/proc.c b/net/ipv6/proc.c index 571d95a21c1..dec34c87cb4 100644 --- a/net/ipv6/proc.c +++ b/net/ipv6/proc.c @@ -44,7 +44,7 @@ static int sockstat6_seq_show(struct seq_file *seq, void *v) seq_printf(seq, "RAW6: inuse %d\n", sock_prot_inuse_get(&rawv6_prot)); seq_printf(seq, "FRAG6: inuse %d memory %d\n", - ip6_frag_nqueues(), ip6_frag_mem()); + ip6_frag_nqueues(&init_net), ip6_frag_mem()); return 0; } diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c index ab2d53b81b7..77a874020f3 100644 --- a/net/ipv6/reassembly.c +++ b/net/ipv6/reassembly.c @@ -84,9 +84,9 @@ struct frag_queue static struct inet_frags ip6_frags; -int ip6_frag_nqueues(void) +int ip6_frag_nqueues(struct net *net) { - return ip6_frags.nqueues; + return net->ipv6.frags.nqueues; } int ip6_frag_mem(void) @@ -690,6 +690,8 @@ static int ipv6_frags_init_net(struct net *net) net->ipv6.sysctl.frags.timeout = IPV6_FRAG_TIMEOUT; net->ipv6.sysctl.frags.secret_interval = 10 * 60 * HZ; + inet_frags_init_net(&net->ipv6.frags); + return ip6_frags_sysctl_register(net); } -- cgit v1.2.3 From 6ddc082223ef0f73717b4133fa7e648842bbfd02 Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Tue, 22 Jan 2008 06:07:25 -0800 Subject: [NETNS][FRAGS]: Make the mem counter per-namespace. This is also simple, but introduces more changes, since then mem counter is altered in more places. Signed-off-by: Pavel Emelyanov Signed-off-by: David S. Miller --- net/ipv6/netfilter/nf_conntrack_reasm.c | 14 +++++++------- net/ipv6/proc.c | 2 +- net/ipv6/reassembly.c | 28 +++++++++++++++------------- 3 files changed, 23 insertions(+), 21 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c index 0b9d0097b68..cb826bea4b1 100644 --- a/net/ipv6/netfilter/nf_conntrack_reasm.c +++ b/net/ipv6/netfilter/nf_conntrack_reasm.c @@ -155,7 +155,7 @@ static inline void frag_kfree_skb(struct sk_buff *skb, unsigned int *work) { if (work) *work -= skb->truesize; - atomic_sub(skb->truesize, &nf_frags.mem); + atomic_sub(skb->truesize, &nf_init_frags.mem); nf_skb_free(skb); kfree_skb(skb); } @@ -177,7 +177,7 @@ static __inline__ void fq_kill(struct nf_ct_frag6_queue *fq) static void nf_ct_frag6_evictor(void) { - inet_frag_evictor(&nf_frags); + inet_frag_evictor(&nf_init_frags, &nf_frags); } static void nf_ct_frag6_expire(unsigned long data) @@ -382,7 +382,7 @@ static int nf_ct_frag6_queue(struct nf_ct_frag6_queue *fq, struct sk_buff *skb, skb->dev = NULL; fq->q.stamp = skb->tstamp; fq->q.meat += skb->len; - atomic_add(skb->truesize, &nf_frags.mem); + atomic_add(skb->truesize, &nf_init_frags.mem); /* The first fragment. * nhoffset is obtained from the first fragment, of course. @@ -459,7 +459,7 @@ nf_ct_frag6_reasm(struct nf_ct_frag6_queue *fq, struct net_device *dev) clone->ip_summed = head->ip_summed; NFCT_FRAG6_CB(clone)->orig = NULL; - atomic_add(clone->truesize, &nf_frags.mem); + atomic_add(clone->truesize, &nf_init_frags.mem); } /* We have to remove fragment header from datagram and to relocate @@ -473,7 +473,7 @@ nf_ct_frag6_reasm(struct nf_ct_frag6_queue *fq, struct net_device *dev) skb_shinfo(head)->frag_list = head->next; skb_reset_transport_header(head); skb_push(head, head->data - skb_network_header(head)); - atomic_sub(head->truesize, &nf_frags.mem); + atomic_sub(head->truesize, &nf_init_frags.mem); for (fp=head->next; fp; fp = fp->next) { head->data_len += fp->len; @@ -483,7 +483,7 @@ nf_ct_frag6_reasm(struct nf_ct_frag6_queue *fq, struct net_device *dev) else if (head->ip_summed == CHECKSUM_COMPLETE) head->csum = csum_add(head->csum, fp->csum); head->truesize += fp->truesize; - atomic_sub(fp->truesize, &nf_frags.mem); + atomic_sub(fp->truesize, &nf_init_frags.mem); } head->next = NULL; @@ -633,7 +633,7 @@ struct sk_buff *nf_ct_frag6_gather(struct sk_buff *skb) goto ret_orig; } - if (atomic_read(&nf_frags.mem) > nf_frags_ctl.high_thresh) + if (atomic_read(&nf_init_frags.mem) > nf_frags_ctl.high_thresh) nf_ct_frag6_evictor(); fq = fq_find(fhdr->identification, &hdr->saddr, &hdr->daddr); diff --git a/net/ipv6/proc.c b/net/ipv6/proc.c index dec34c87cb4..35e502a7249 100644 --- a/net/ipv6/proc.c +++ b/net/ipv6/proc.c @@ -44,7 +44,7 @@ static int sockstat6_seq_show(struct seq_file *seq, void *v) seq_printf(seq, "RAW6: inuse %d\n", sock_prot_inuse_get(&rawv6_prot)); seq_printf(seq, "FRAG6: inuse %d memory %d\n", - ip6_frag_nqueues(&init_net), ip6_frag_mem()); + ip6_frag_nqueues(&init_net), ip6_frag_mem(&init_net)); return 0; } diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c index 77a874020f3..241b2cc49bf 100644 --- a/net/ipv6/reassembly.c +++ b/net/ipv6/reassembly.c @@ -89,9 +89,9 @@ int ip6_frag_nqueues(struct net *net) return net->ipv6.frags.nqueues; } -int ip6_frag_mem(void) +int ip6_frag_mem(struct net *net) { - return atomic_read(&ip6_frags.mem); + return atomic_read(&net->ipv6.frags.mem); } static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev, @@ -149,11 +149,12 @@ int ip6_frag_match(struct inet_frag_queue *q, void *a) EXPORT_SYMBOL(ip6_frag_match); /* Memory Tracking Functions. */ -static inline void frag_kfree_skb(struct sk_buff *skb, int *work) +static inline void frag_kfree_skb(struct netns_frags *nf, + struct sk_buff *skb, int *work) { if (work) *work -= skb->truesize; - atomic_sub(skb->truesize, &ip6_frags.mem); + atomic_sub(skb->truesize, &nf->mem); kfree_skb(skb); } @@ -183,11 +184,11 @@ static __inline__ void fq_kill(struct frag_queue *fq) inet_frag_kill(&fq->q, &ip6_frags); } -static void ip6_evictor(struct inet6_dev *idev) +static void ip6_evictor(struct net *net, struct inet6_dev *idev) { int evicted; - evicted = inet_frag_evictor(&ip6_frags); + evicted = inet_frag_evictor(&net->ipv6.frags, &ip6_frags); if (evicted) IP6_ADD_STATS_BH(idev, IPSTATS_MIB_REASMFAILS, evicted); } @@ -389,7 +390,7 @@ static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb, fq->q.fragments = next; fq->q.meat -= free_it->len; - frag_kfree_skb(free_it, NULL); + frag_kfree_skb(fq->q.net, free_it, NULL); } } @@ -409,7 +410,7 @@ static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb, } fq->q.stamp = skb->tstamp; fq->q.meat += skb->len; - atomic_add(skb->truesize, &ip6_frags.mem); + atomic_add(skb->truesize, &fq->q.net->mem); /* The first fragment. * nhoffset is obtained from the first fragment, of course. @@ -503,7 +504,7 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev, head->len -= clone->len; clone->csum = 0; clone->ip_summed = head->ip_summed; - atomic_add(clone->truesize, &ip6_frags.mem); + atomic_add(clone->truesize, &fq->q.net->mem); } /* We have to remove fragment header from datagram and to relocate @@ -518,7 +519,7 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev, skb_shinfo(head)->frag_list = head->next; skb_reset_transport_header(head); skb_push(head, head->data - skb_network_header(head)); - atomic_sub(head->truesize, &ip6_frags.mem); + atomic_sub(head->truesize, &fq->q.net->mem); for (fp=head->next; fp; fp = fp->next) { head->data_len += fp->len; @@ -528,7 +529,7 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev, else if (head->ip_summed == CHECKSUM_COMPLETE) head->csum = csum_add(head->csum, fp->csum); head->truesize += fp->truesize; - atomic_sub(fp->truesize, &ip6_frags.mem); + atomic_sub(fp->truesize, &fq->q.net->mem); } head->next = NULL; @@ -600,8 +601,9 @@ static int ipv6_frag_rcv(struct sk_buff *skb) } net = skb->dev->nd_net; - if (atomic_read(&ip6_frags.mem) > init_net.ipv6.sysctl.frags.high_thresh) - ip6_evictor(ip6_dst_idev(skb->dst)); + if (atomic_read(&net->ipv6.frags.mem) > + init_net.ipv6.sysctl.frags.high_thresh) + ip6_evictor(net, ip6_dst_idev(skb->dst)); if ((fq = fq_find(net, fhdr->identification, &hdr->saddr, &hdr->daddr, ip6_dst_idev(skb->dst))) != NULL) { -- cgit v1.2.3 From e4a2d5c2bccd5bd29de5ae4f14ff4448fac9cfc8 Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Tue, 22 Jan 2008 06:08:36 -0800 Subject: [NETNS][FRAGS]: Duplicate sysctl tables for new namespaces. Each namespace has to have own tables to tune their different parameters, so duplicate the tables and register them. All the tables in sub-namespaces are temporarily made read-only. Signed-off-by: Pavel Emelyanov Signed-off-by: David S. Miller --- net/ipv6/reassembly.c | 41 ++++++++++++++++++++++++++++++++++++++--- 1 file changed, 38 insertions(+), 3 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c index 241b2cc49bf..0300dcbf1a7 100644 --- a/net/ipv6/reassembly.c +++ b/net/ipv6/reassembly.c @@ -670,17 +670,52 @@ static struct ctl_table ip6_frags_ctl_table[] = { static int ip6_frags_sysctl_register(struct net *net) { + struct ctl_table *table; struct ctl_table_header *hdr; - hdr = register_net_sysctl_table(net, net_ipv6_ctl_path, - ip6_frags_ctl_table); - return hdr == NULL ? -ENOMEM : 0; + table = ip6_frags_ctl_table; + if (net != &init_net) { + table = kmemdup(table, sizeof(ip6_frags_ctl_table), GFP_KERNEL); + if (table == NULL) + goto err_alloc; + + table[0].mode &= ~0222; + table[1].mode &= ~0222; + table[2].mode &= ~0222; + table[3].mode &= ~0222; + } + + hdr = register_net_sysctl_table(net, net_ipv6_ctl_path, table); + if (hdr == NULL) + goto err_reg; + + net->ipv6.sysctl.frags_hdr = hdr; + return 0; + +err_reg: + if (net != &init_net) + kfree(table); +err_alloc: + return -ENOMEM; +} + +static void ip6_frags_sysctl_unregister(struct net *net) +{ + struct ctl_table *table; + + table = net->ipv6.sysctl.frags_hdr->ctl_table_arg; + unregister_net_sysctl_table(net->ipv6.sysctl.frags_hdr); + kfree(table); } #else static inline int ip6_frags_sysctl_register(struct net *net) { return 0; } + +static inline void ip6_frags_sysctl_unregister(struct net *net) +{ +} #endif static int ipv6_frags_init_net(struct net *net) -- cgit v1.2.3 From b2fd5321dd160ef309dfb6cfc78ed8de4a830659 Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Tue, 22 Jan 2008 06:09:37 -0800 Subject: [NETNS][FRAGS]: Make the net.ipv4.ipfrag_timeout work in namespaces. Move it to the netns_frags, adjust the usage and make the appropriate ctl table writable. Now fragment, that live in different namespaces can live for different times. Signed-off-by: Pavel Emelyanov Signed-off-by: David S. Miller --- net/ipv6/netfilter/nf_conntrack_reasm.c | 4 ++-- net/ipv6/reassembly.c | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c index cb826bea4b1..92a311ff79c 100644 --- a/net/ipv6/netfilter/nf_conntrack_reasm.c +++ b/net/ipv6/netfilter/nf_conntrack_reasm.c @@ -73,7 +73,6 @@ struct nf_ct_frag6_queue static struct inet_frags_ctl nf_frags_ctl __read_mostly = { .high_thresh = 256 * 1024, .low_thresh = 192 * 1024, - .timeout = IPV6_FRAG_TIMEOUT, .secret_interval = 10 * 60 * HZ, }; @@ -84,7 +83,7 @@ static struct netns_frags nf_init_frags; struct ctl_table nf_ct_ipv6_sysctl_table[] = { { .procname = "nf_conntrack_frag6_timeout", - .data = &nf_frags_ctl.timeout, + .data = &nf_init_frags.timeout, .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = &proc_dointvec_jiffies, @@ -712,6 +711,7 @@ int nf_ct_frag6_init(void) nf_frags.qsize = sizeof(struct nf_ct_frag6_queue); nf_frags.match = ip6_frag_match; nf_frags.frag_expire = nf_ct_frag6_expire; + nf_init_frags.timeout = IPV6_FRAG_TIMEOUT; inet_frags_init_net(&nf_init_frags); inet_frags_init(&nf_frags); diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c index 0300dcbf1a7..91761365b18 100644 --- a/net/ipv6/reassembly.c +++ b/net/ipv6/reassembly.c @@ -650,7 +650,7 @@ static struct ctl_table ip6_frags_ctl_table[] = { { .ctl_name = NET_IPV6_IP6FRAG_TIME, .procname = "ip6frag_time", - .data = &init_net.ipv6.sysctl.frags.timeout, + .data = &init_net.ipv6.frags.timeout, .maxlen = sizeof(int), .mode = 0644, .proc_handler = &proc_dointvec_jiffies, @@ -681,7 +681,7 @@ static int ip6_frags_sysctl_register(struct net *net) table[0].mode &= ~0222; table[1].mode &= ~0222; - table[2].mode &= ~0222; + table[2].data = &net->ipv6.frags.timeout; table[3].mode &= ~0222; } @@ -724,7 +724,7 @@ static int ipv6_frags_init_net(struct net *net) net->ipv6.sysctl.frags.high_thresh = 256 * 1024; net->ipv6.sysctl.frags.low_thresh = 192 * 1024; - net->ipv6.sysctl.frags.timeout = IPV6_FRAG_TIMEOUT; + net->ipv6.frags.timeout = IPV6_FRAG_TIMEOUT; net->ipv6.sysctl.frags.secret_interval = 10 * 60 * HZ; inet_frags_init_net(&net->ipv6.frags); -- cgit v1.2.3 From e31e0bdc7e7fb9a4b09d2f3266c035a18fdcee9d Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Tue, 22 Jan 2008 06:10:13 -0800 Subject: [NETNS][FRAGS]: Make thresholds work in namespaces. This is the same as with the timeout variable. Currently, after exceeding the high threshold _all_ the fragments are evicted, but it will be fixed in later patch. Signed-off-by: Pavel Emelyanov Signed-off-by: David S. Miller --- net/ipv6/netfilter/nf_conntrack_reasm.c | 12 ++++++------ net/ipv6/reassembly.c | 15 +++++++-------- 2 files changed, 13 insertions(+), 14 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c index 92a311ff79c..c75ac17e394 100644 --- a/net/ipv6/netfilter/nf_conntrack_reasm.c +++ b/net/ipv6/netfilter/nf_conntrack_reasm.c @@ -71,8 +71,6 @@ struct nf_ct_frag6_queue }; static struct inet_frags_ctl nf_frags_ctl __read_mostly = { - .high_thresh = 256 * 1024, - .low_thresh = 192 * 1024, .secret_interval = 10 * 60 * HZ, }; @@ -91,7 +89,7 @@ struct ctl_table nf_ct_ipv6_sysctl_table[] = { { .ctl_name = NET_NF_CONNTRACK_FRAG6_LOW_THRESH, .procname = "nf_conntrack_frag6_low_thresh", - .data = &nf_frags_ctl.low_thresh, + .data = &nf_init_frags.low_thresh, .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = &proc_dointvec, @@ -99,7 +97,7 @@ struct ctl_table nf_ct_ipv6_sysctl_table[] = { { .ctl_name = NET_NF_CONNTRACK_FRAG6_HIGH_THRESH, .procname = "nf_conntrack_frag6_high_thresh", - .data = &nf_frags_ctl.high_thresh, + .data = &nf_init_frags.high_thresh, .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = &proc_dointvec, @@ -632,7 +630,7 @@ struct sk_buff *nf_ct_frag6_gather(struct sk_buff *skb) goto ret_orig; } - if (atomic_read(&nf_init_frags.mem) > nf_frags_ctl.high_thresh) + if (atomic_read(&nf_init_frags.mem) > nf_init_frags.high_thresh) nf_ct_frag6_evictor(); fq = fq_find(fhdr->identification, &hdr->saddr, &hdr->daddr); @@ -712,6 +710,8 @@ int nf_ct_frag6_init(void) nf_frags.match = ip6_frag_match; nf_frags.frag_expire = nf_ct_frag6_expire; nf_init_frags.timeout = IPV6_FRAG_TIMEOUT; + nf_init_frags.high_thresh = 256 * 1024; + nf_init_frags.low_thresh = 192 * 1024; inet_frags_init_net(&nf_init_frags); inet_frags_init(&nf_frags); @@ -722,6 +722,6 @@ void nf_ct_frag6_cleanup(void) { inet_frags_fini(&nf_frags); - nf_frags_ctl.low_thresh = 0; + nf_init_frags.low_thresh = 0; nf_ct_frag6_evictor(); } diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c index 91761365b18..85f3fa38223 100644 --- a/net/ipv6/reassembly.c +++ b/net/ipv6/reassembly.c @@ -601,8 +601,7 @@ static int ipv6_frag_rcv(struct sk_buff *skb) } net = skb->dev->nd_net; - if (atomic_read(&net->ipv6.frags.mem) > - init_net.ipv6.sysctl.frags.high_thresh) + if (atomic_read(&net->ipv6.frags.mem) > net->ipv6.frags.high_thresh) ip6_evictor(net, ip6_dst_idev(skb->dst)); if ((fq = fq_find(net, fhdr->identification, &hdr->saddr, &hdr->daddr, @@ -634,7 +633,7 @@ static struct ctl_table ip6_frags_ctl_table[] = { { .ctl_name = NET_IPV6_IP6FRAG_HIGH_THRESH, .procname = "ip6frag_high_thresh", - .data = &init_net.ipv6.sysctl.frags.high_thresh, + .data = &init_net.ipv6.frags.high_thresh, .maxlen = sizeof(int), .mode = 0644, .proc_handler = &proc_dointvec @@ -642,7 +641,7 @@ static struct ctl_table ip6_frags_ctl_table[] = { { .ctl_name = NET_IPV6_IP6FRAG_LOW_THRESH, .procname = "ip6frag_low_thresh", - .data = &init_net.ipv6.sysctl.frags.low_thresh, + .data = &init_net.ipv6.frags.low_thresh, .maxlen = sizeof(int), .mode = 0644, .proc_handler = &proc_dointvec @@ -679,8 +678,8 @@ static int ip6_frags_sysctl_register(struct net *net) if (table == NULL) goto err_alloc; - table[0].mode &= ~0222; - table[1].mode &= ~0222; + table[0].data = &net->ipv6.frags.high_thresh; + table[1].data = &net->ipv6.frags.low_thresh; table[2].data = &net->ipv6.frags.timeout; table[3].mode &= ~0222; } @@ -722,8 +721,8 @@ static int ipv6_frags_init_net(struct net *net) { ip6_frags.ctl = &net->ipv6.sysctl.frags; - net->ipv6.sysctl.frags.high_thresh = 256 * 1024; - net->ipv6.sysctl.frags.low_thresh = 192 * 1024; + net->ipv6.frags.high_thresh = 256 * 1024; + net->ipv6.frags.low_thresh = 192 * 1024; net->ipv6.frags.timeout = IPV6_FRAG_TIMEOUT; net->ipv6.sysctl.frags.secret_interval = 10 * 60 * HZ; -- cgit v1.2.3 From 3b4bc4a2bfe80d01ebd4f2b6dcc58986c970ed16 Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Tue, 22 Jan 2008 06:11:04 -0800 Subject: [NETNS][FRAGS]: Isolate the secret interval from namespaces. Since we have one hashtable to lookup the fragment, having different secret_interval-s for hash rebuild doesn't make sense, so move this one to inet_frags. The inet_frags_ctl becomes empty after this, so remove it. The appropriate ctl table is kept read-only in namespaces. Signed-off-by: Pavel Emelyanov Signed-off-by: David S. Miller --- net/ipv6/netfilter/nf_conntrack_reasm.c | 6 +----- net/ipv6/reassembly.c | 6 ++---- 2 files changed, 3 insertions(+), 9 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c index c75ac17e394..6eed991a4a3 100644 --- a/net/ipv6/netfilter/nf_conntrack_reasm.c +++ b/net/ipv6/netfilter/nf_conntrack_reasm.c @@ -70,10 +70,6 @@ struct nf_ct_frag6_queue __u16 nhoffset; }; -static struct inet_frags_ctl nf_frags_ctl __read_mostly = { - .secret_interval = 10 * 60 * HZ, -}; - static struct inet_frags nf_frags; static struct netns_frags nf_init_frags; @@ -701,7 +697,6 @@ int nf_ct_frag6_kfree_frags(struct sk_buff *skb) int nf_ct_frag6_init(void) { - nf_frags.ctl = &nf_frags_ctl; nf_frags.hashfn = nf_hashfn; nf_frags.constructor = ip6_frag_init; nf_frags.destructor = NULL; @@ -709,6 +704,7 @@ int nf_ct_frag6_init(void) nf_frags.qsize = sizeof(struct nf_ct_frag6_queue); nf_frags.match = ip6_frag_match; nf_frags.frag_expire = nf_ct_frag6_expire; + nf_frags.secret_interval = 10 * 60 * HZ; nf_init_frags.timeout = IPV6_FRAG_TIMEOUT; nf_init_frags.high_thresh = 256 * 1024; nf_init_frags.low_thresh = 192 * 1024; diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c index 85f3fa38223..85207008730 100644 --- a/net/ipv6/reassembly.c +++ b/net/ipv6/reassembly.c @@ -658,7 +658,7 @@ static struct ctl_table ip6_frags_ctl_table[] = { { .ctl_name = NET_IPV6_IP6FRAG_SECRET_INTERVAL, .procname = "ip6frag_secret_interval", - .data = &init_net.ipv6.sysctl.frags.secret_interval, + .data = &ip6_frags.secret_interval, .maxlen = sizeof(int), .mode = 0644, .proc_handler = &proc_dointvec_jiffies, @@ -719,12 +719,9 @@ static inline void ip6_frags_sysctl_unregister(struct net *net) static int ipv6_frags_init_net(struct net *net) { - ip6_frags.ctl = &net->ipv6.sysctl.frags; - net->ipv6.frags.high_thresh = 256 * 1024; net->ipv6.frags.low_thresh = 192 * 1024; net->ipv6.frags.timeout = IPV6_FRAG_TIMEOUT; - net->ipv6.sysctl.frags.secret_interval = 10 * 60 * HZ; inet_frags_init_net(&net->ipv6.frags); @@ -748,6 +745,7 @@ int __init ipv6_frag_init(void) ip6_frags.qsize = sizeof(struct frag_queue); ip6_frags.match = ip6_frag_match; ip6_frags.frag_expire = ip6_frag_expire; + ip6_frags.secret_interval = 10 * 60 * HZ; inet_frags_init(&ip6_frags); out: return ret; -- cgit v1.2.3 From 3140c25c82106645a6b1fc469dab7006a1d09fd0 Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Tue, 22 Jan 2008 06:11:48 -0800 Subject: [NETNS][FRAGS]: Make the LRU list per namespace. The inet_frags.lru_list is used for evicting only, so we have to make it per-namespace, to evict only those fragments, who's namespace exceeded its high threshold, but not the whole hash. Besides, this helps to avoid long loops in evictor. The spinlock is not per-namespace because it protects the hash table as well, which is global. Signed-off-by: Pavel Emelyanov Signed-off-by: David S. Miller --- net/ipv6/netfilter/nf_conntrack_reasm.c | 2 +- net/ipv6/reassembly.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c index 6eed991a4a3..022da6ce4c0 100644 --- a/net/ipv6/netfilter/nf_conntrack_reasm.c +++ b/net/ipv6/netfilter/nf_conntrack_reasm.c @@ -385,7 +385,7 @@ static int nf_ct_frag6_queue(struct nf_ct_frag6_queue *fq, struct sk_buff *skb, fq->q.last_in |= FIRST_IN; } write_lock(&nf_frags.lock); - list_move_tail(&fq->q.lru_list, &nf_frags.lru_list); + list_move_tail(&fq->q.lru_list, &nf_init_frags.lru_list); write_unlock(&nf_frags.lock); return 0; diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c index 85207008730..0c4bc46dee0 100644 --- a/net/ipv6/reassembly.c +++ b/net/ipv6/reassembly.c @@ -424,7 +424,7 @@ static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb, return ip6_frag_reasm(fq, prev, dev); write_lock(&ip6_frags.lock); - list_move_tail(&fq->q.lru_list, &ip6_frags.lru_list); + list_move_tail(&fq->q.lru_list, &fq->q.net->lru_list); write_unlock(&ip6_frags.lock); return -1; -- cgit v1.2.3 From 81566e8322c3f6c6f9a2277fe0e440fee8d917bd Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Tue, 22 Jan 2008 06:12:39 -0800 Subject: [NETNS][FRAGS]: Make the pernet subsystem for fragments. On namespace start we mainly prepare the ctl variables. When the namespace is stopped we have to kill all the fragments that point to this namespace. The inet_frags_exit_net() handles it. Signed-off-by: Pavel Emelyanov Signed-off-by: David S. Miller --- net/ipv6/reassembly.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) (limited to 'net/ipv6') diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c index 0c4bc46dee0..f936d045a39 100644 --- a/net/ipv6/reassembly.c +++ b/net/ipv6/reassembly.c @@ -728,6 +728,17 @@ static int ipv6_frags_init_net(struct net *net) return ip6_frags_sysctl_register(net); } +static void ipv6_frags_exit_net(struct net *net) +{ + ip6_frags_sysctl_unregister(net); + inet_frags_exit_net(&net->ipv6.frags, &ip6_frags); +} + +static struct pernet_operations ip6_frags_ops = { + .init = ipv6_frags_init_net, + .exit = ipv6_frags_exit_net, +}; + int __init ipv6_frag_init(void) { int ret; @@ -736,7 +747,7 @@ int __init ipv6_frag_init(void) if (ret) goto out; - ipv6_frags_init_net(&init_net); + register_pernet_subsys(&ip6_frags_ops); ip6_frags.hashfn = ip6_hashfn; ip6_frags.constructor = ip6_frag_init; @@ -754,5 +765,6 @@ out: void ipv6_frag_exit(void) { inet_frags_fini(&ip6_frags); + unregister_pernet_subsys(&ip6_frags_ops); inet6_del_protocol(&frag_protocol, IPPROTO_FRAGMENT); } -- cgit v1.2.3 From f206351a50ea86250fabea96b9af8d8f8fc02603 Mon Sep 17 00:00:00 2001 From: "Denis V. Lunev" Date: Tue, 22 Jan 2008 22:07:34 -0800 Subject: [NETNS]: Add namespace parameter to ip_route_output_key. Needed to propagate it down to the ip_route_output_flow. Signed-off-by: Denis V. Lunev Signed-off-by: David S. Miller --- net/ipv6/ip6_tunnel.c | 4 ++-- net/ipv6/sit.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 425c9ae8b31..9031e521c1d 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -533,7 +533,7 @@ ip4ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, fl.fl4_dst = eiph->saddr; fl.fl4_tos = RT_TOS(eiph->tos); fl.proto = IPPROTO_IPIP; - if (ip_route_output_key(&rt, &fl)) + if (ip_route_output_key(&init_net, &rt, &fl)) goto out; skb2->dev = rt->u.dst.dev; @@ -545,7 +545,7 @@ ip4ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, fl.fl4_dst = eiph->daddr; fl.fl4_src = eiph->saddr; fl.fl4_tos = eiph->tos; - if (ip_route_output_key(&rt, &fl) || + if (ip_route_output_key(&init_net, &rt, &fl) || rt->u.dst.dev->type != ARPHRD_TUNNEL) { ip_rt_put(rt); goto out; diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index 1c6fddb80b3..e77239d02bf 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -557,7 +557,7 @@ static int ipip6_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) .tos = RT_TOS(tos) } }, .oif = tunnel->parms.link, .proto = IPPROTO_IPV6 }; - if (ip_route_output_key(&rt, &fl)) { + if (ip_route_output_key(&init_net, &rt, &fl)) { tunnel->stat.tx_carrier_errors++; goto tx_error_icmp; } @@ -686,7 +686,7 @@ static void ipip6_tunnel_bind_dev(struct net_device *dev) .oif = tunnel->parms.link, .proto = IPPROTO_IPV6 }; struct rtable *rt; - if (!ip_route_output_key(&rt, &fl)) { + if (!ip_route_output_key(&init_net, &rt, &fl)) { tdev = rt->u.dst.dev; ip_rt_put(rt); } -- cgit v1.2.3 From 85040bcb4643cba578839e953f25e2d1965d83d0 Mon Sep 17 00:00:00 2001 From: YOSHIFUJI Hideaki Date: Mon, 28 Jan 2008 15:46:02 -0800 Subject: [IPV6] ADDRLABEL: Fix double free on label deletion. If an entry is being deleted because it has only one reference, we immediately delete it and blindly register the rcu handler for it, This results in oops by double freeing that object. This patch fixes it by consolidating the code paths for the deletion; let its rcu handler delete the object if it has no more reference. Bug was found by Mitsuru Chinen Signed-off-by: YOSHIFUJI Hideaki Signed-off-by: David S. Miller --- net/ipv6/addrlabel.c | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/addrlabel.c b/net/ipv6/addrlabel.c index 38674121ae5..a3c5a72218f 100644 --- a/net/ipv6/addrlabel.c +++ b/net/ipv6/addrlabel.c @@ -106,6 +106,11 @@ static inline void ip6addrlbl_free(struct ip6addrlbl_entry *p) kfree(p); } +static void ip6addrlbl_free_rcu(struct rcu_head *h) +{ + ip6addrlbl_free(container_of(h, struct ip6addrlbl_entry, rcu)); +} + static inline int ip6addrlbl_hold(struct ip6addrlbl_entry *p) { return atomic_inc_not_zero(&p->refcnt); @@ -114,12 +119,7 @@ static inline int ip6addrlbl_hold(struct ip6addrlbl_entry *p) static inline void ip6addrlbl_put(struct ip6addrlbl_entry *p) { if (atomic_dec_and_test(&p->refcnt)) - ip6addrlbl_free(p); -} - -static void ip6addrlbl_free_rcu(struct rcu_head *h) -{ - ip6addrlbl_free(container_of(h, struct ip6addrlbl_entry, rcu)); + call_rcu(&p->rcu, ip6addrlbl_free_rcu); } /* Find label */ @@ -240,7 +240,6 @@ static int __ip6addrlbl_add(struct ip6addrlbl_entry *newp, int replace) } hlist_replace_rcu(&p->list, &newp->list); ip6addrlbl_put(p); - call_rcu(&p->rcu, ip6addrlbl_free_rcu); goto out; } else if ((p->prefixlen == newp->prefixlen && !p->ifindex) || (p->prefixlen < newp->prefixlen)) { @@ -300,7 +299,6 @@ static int __ip6addrlbl_del(const struct in6_addr *prefix, int prefixlen, ipv6_addr_equal(&p->prefix, prefix)) { hlist_del_rcu(&p->list); ip6addrlbl_put(p); - call_rcu(&p->rcu, ip6addrlbl_free_rcu); ret = 0; break; } -- cgit v1.2.3 From 44c45eb911845ec58fc3e26cc8994ba868ea2572 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Thu, 31 Jan 2008 00:26:10 +1100 Subject: Make !NETFILTER_ADVANCED enable IP6_NF_MATCH_IPV6HEADER We want IPV6HEADER matching for the non-advanced default netfilter configuration, since it's part of the standard netfilter setup of at least some distributions (eg Fedora). Otherwise NETFILTER_ADVANCED loses much of its point, since even non-advanced users would have to enable all the advanced options just to get a working IPv6 netfilter setup. Signed-off-by: Linus Torvalds --- net/ipv6/netfilter/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/ipv6') diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig index 4fc0b023cfd..6cae5475737 100644 --- a/net/ipv6/netfilter/Kconfig +++ b/net/ipv6/netfilter/Kconfig @@ -99,7 +99,7 @@ config IP6_NF_MATCH_HL config IP6_NF_MATCH_IPV6HEADER tristate '"ipv6header" IPv6 Extension Headers Match' depends on IP6_NF_IPTABLES - depends on NETFILTER_ADVANCED + default m if NETFILTER_ADVANCED=n help This module allows one to match packets based upon the ipv6 extension headers. -- cgit v1.2.3