diff options
Diffstat (limited to 'net/ipv4/netfilter/ip_conntrack_core.c')
-rw-r--r-- | net/ipv4/netfilter/ip_conntrack_core.c | 128 |
1 files changed, 70 insertions, 58 deletions
diff --git a/net/ipv4/netfilter/ip_conntrack_core.c b/net/ipv4/netfilter/ip_conntrack_core.c index 8556a4f4f60..07ba1dd136b 100644 --- a/net/ipv4/netfilter/ip_conntrack_core.c +++ b/net/ipv4/netfilter/ip_conntrack_core.c @@ -2,7 +2,7 @@ but required by, the NAT layer; it can also be used by an iptables extension. */ -/* (C) 1999-2001 Paul `Rusty' Russell +/* (C) 1999-2001 Paul `Rusty' Russell * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org> * * This program is free software; you can redistribute it and/or modify @@ -99,7 +99,7 @@ __ip_ct_deliver_cached_events(struct ip_conntrack_ecache *ecache) void ip_ct_deliver_cached_events(const struct ip_conntrack *ct) { struct ip_conntrack_ecache *ecache; - + local_bh_disable(); ecache = &__get_cpu_var(ip_conntrack_ecache); if (ecache->ct == ct) @@ -147,9 +147,9 @@ static u_int32_t __hash_conntrack(const struct ip_conntrack_tuple *tuple, unsigned int size, unsigned int rnd) { return (jhash_3words((__force u32)tuple->src.ip, - ((__force u32)tuple->dst.ip ^ tuple->dst.protonum), - (tuple->src.u.all | (tuple->dst.u.all << 16)), - rnd) % size); + ((__force u32)tuple->dst.ip ^ tuple->dst.protonum), + (tuple->src.u.all | (tuple->dst.u.all << 16)), + rnd) % size); } static u_int32_t @@ -219,7 +219,7 @@ struct ip_conntrack_expect * __ip_conntrack_expect_find(const struct ip_conntrack_tuple *tuple) { struct ip_conntrack_expect *i; - + list_for_each_entry(i, &ip_conntrack_expect_list, list) { if (ip_ct_tuple_mask_cmp(tuple, &i->tuple, &i->mask)) return i; @@ -232,7 +232,7 @@ struct ip_conntrack_expect * ip_conntrack_expect_find_get(const struct ip_conntrack_tuple *tuple) { struct ip_conntrack_expect *i; - + read_lock_bh(&ip_conntrack_lock); i = __ip_conntrack_expect_find(tuple); if (i) @@ -303,6 +303,7 @@ destroy_conntrack(struct nf_conntrack *nfct) struct ip_conntrack *ct = (struct ip_conntrack *)nfct; struct ip_conntrack_protocol *proto; struct ip_conntrack_helper *helper; + typeof(ip_conntrack_destroyed) destroyed; DEBUGP("destroy_conntrack(%p)\n", ct); IP_NF_ASSERT(atomic_read(&nfct->use) == 0); @@ -318,12 +319,16 @@ destroy_conntrack(struct nf_conntrack *nfct) /* To make sure we don't get any weird locking issues here: * destroy_conntrack() MUST NOT be called with a write lock * to ip_conntrack_lock!!! -HW */ + rcu_read_lock(); proto = __ip_conntrack_proto_find(ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.protonum); if (proto && proto->destroy) proto->destroy(ct); - if (ip_conntrack_destroyed) - ip_conntrack_destroyed(ct); + destroyed = rcu_dereference(ip_conntrack_destroyed); + if (destroyed) + destroyed(ct); + + rcu_read_unlock(); write_lock_bh(&ip_conntrack_lock); /* Expectations will have been removed in clean_from_lists, @@ -398,7 +403,7 @@ ip_conntrack_find_get(const struct ip_conntrack_tuple *tuple, static void __ip_conntrack_hash_insert(struct ip_conntrack *ct, unsigned int hash, - unsigned int repl_hash) + unsigned int repl_hash) { ct->id = ++ip_conntrack_next_id; list_add(&ct->tuplehash[IP_CT_DIR_ORIGINAL].list, @@ -446,15 +451,15 @@ __ip_conntrack_confirm(struct sk_buff **pskb) /* IP_NF_ASSERT(atomic_read(&ct->ct_general.use) == 1); */ /* No external references means noone else could have - confirmed us. */ + confirmed us. */ IP_NF_ASSERT(!is_confirmed(ct)); DEBUGP("Confirming conntrack %p\n", ct); write_lock_bh(&ip_conntrack_lock); /* See if there's one in the list already, including reverse: - NAT could have grabbed it without realizing, since we're - not in the hash. If there is, we lost race. */ + NAT could have grabbed it without realizing, since we're + not in the hash. If there is, we lost race. */ list_for_each_entry(h, &ip_conntrack_hash[hash], list) if (ip_ct_tuple_equal(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple, &h->tuple)) @@ -536,7 +541,7 @@ static int early_drop(struct list_head *chain) if (del_timer(&ct->timeout)) { death_by_timeout((unsigned long)ct); dropped = 1; - CONNTRACK_STAT_INC(early_drop); + CONNTRACK_STAT_INC_ATOMIC(early_drop); } ip_conntrack_put(ct); return dropped; @@ -595,14 +600,14 @@ ip_conntrack_proto_find_get(u_int8_t protocol) { struct ip_conntrack_protocol *p; - preempt_disable(); + rcu_read_lock(); p = __ip_conntrack_proto_find(protocol); if (p) { if (!try_module_get(p->me)) p = &ip_conntrack_generic_protocol; } - preempt_enable(); - + rcu_read_unlock(); + return p; } @@ -638,14 +643,13 @@ struct ip_conntrack *ip_conntrack_alloc(struct ip_conntrack_tuple *orig, } } - conntrack = kmem_cache_alloc(ip_conntrack_cachep, GFP_ATOMIC); + conntrack = kmem_cache_zalloc(ip_conntrack_cachep, GFP_ATOMIC); if (!conntrack) { DEBUGP("Can't allocate conntrack.\n"); atomic_dec(&ip_conntrack_count); return ERR_PTR(-ENOMEM); } - memset(conntrack, 0, sizeof(*conntrack)); atomic_set(&conntrack->ct_general.use, 1); conntrack->ct_general.destroy = destroy_conntrack; conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple = *orig; @@ -746,7 +750,7 @@ resolve_normal_ct(struct sk_buff *skb, IP_NF_ASSERT((skb->nh.iph->frag_off & htons(IP_OFFSET)) == 0); - if (!ip_ct_get_tuple(skb->nh.iph, skb, skb->nh.iph->ihl*4, + if (!ip_ct_get_tuple(skb->nh.iph, skb, skb->nh.iph->ihl*4, &tuple,proto)) return NULL; @@ -771,7 +775,7 @@ resolve_normal_ct(struct sk_buff *skb, if (test_bit(IPS_SEEN_REPLY_BIT, &ct->status)) { DEBUGP("ip_conntrack_in: normal packet for %p\n", ct); - *ctinfo = IP_CT_ESTABLISHED; + *ctinfo = IP_CT_ESTABLISHED; } else if (test_bit(IPS_EXPECTED_BIT, &ct->status)) { DEBUGP("ip_conntrack_in: related packet for %p\n", ct); @@ -803,7 +807,7 @@ unsigned int ip_conntrack_in(unsigned int hooknum, /* Previously seen (loopback or untracked)? Ignore. */ if ((*pskb)->nfct) { - CONNTRACK_STAT_INC(ignore); + CONNTRACK_STAT_INC_ATOMIC(ignore); return NF_ACCEPT; } @@ -822,7 +826,7 @@ unsigned int ip_conntrack_in(unsigned int hooknum, if ((*pskb)->pkt_type == PACKET_BROADCAST) { printk("Broadcast packet!\n"); return NF_ACCEPT; - } else if (((*pskb)->nh.iph->daddr & htonl(0x000000FF)) + } else if (((*pskb)->nh.iph->daddr & htonl(0x000000FF)) == htonl(0x000000FF)) { printk("Should bcast: %u.%u.%u.%u->%u.%u.%u.%u (sk=%p, ptype=%u)\n", NIPQUAD((*pskb)->nh.iph->saddr), @@ -831,27 +835,28 @@ unsigned int ip_conntrack_in(unsigned int hooknum, } #endif + /* rcu_read_lock()ed by nf_hook_slow */ proto = __ip_conntrack_proto_find((*pskb)->nh.iph->protocol); /* It may be an special packet, error, unclean... * inverse of the return code tells to the netfilter * core what to do with the packet. */ - if (proto->error != NULL + if (proto->error != NULL && (ret = proto->error(*pskb, &ctinfo, hooknum)) <= 0) { - CONNTRACK_STAT_INC(error); - CONNTRACK_STAT_INC(invalid); + CONNTRACK_STAT_INC_ATOMIC(error); + CONNTRACK_STAT_INC_ATOMIC(invalid); return -ret; } if (!(ct = resolve_normal_ct(*pskb, proto,&set_reply,hooknum,&ctinfo))) { /* Not valid part of a connection */ - CONNTRACK_STAT_INC(invalid); + CONNTRACK_STAT_INC_ATOMIC(invalid); return NF_ACCEPT; } if (IS_ERR(ct)) { /* Too stressed to deal. */ - CONNTRACK_STAT_INC(drop); + CONNTRACK_STAT_INC_ATOMIC(drop); return NF_DROP; } @@ -863,7 +868,7 @@ unsigned int ip_conntrack_in(unsigned int hooknum, * the netfilter core what to do*/ nf_conntrack_put((*pskb)->nfct); (*pskb)->nfct = NULL; - CONNTRACK_STAT_INC(invalid); + CONNTRACK_STAT_INC_ATOMIC(invalid); return -ret; } @@ -876,8 +881,15 @@ unsigned int ip_conntrack_in(unsigned int hooknum, int invert_tuplepr(struct ip_conntrack_tuple *inverse, const struct ip_conntrack_tuple *orig) { - return ip_ct_invert_tuple(inverse, orig, - __ip_conntrack_proto_find(orig->dst.protonum)); + struct ip_conntrack_protocol *proto; + int ret; + + rcu_read_lock(); + proto = __ip_conntrack_proto_find(orig->dst.protonum); + ret = ip_ct_invert_tuple(inverse, orig, proto); + rcu_read_unlock(); + + return ret; } /* Would two expected things clash? */ @@ -885,7 +897,7 @@ static inline int expect_clash(const struct ip_conntrack_expect *a, const struct ip_conntrack_expect *b) { /* Part covered by intersection of masks must be unequal, - otherwise they clash */ + otherwise they clash */ struct ip_conntrack_tuple intersect_mask = { { a->mask.src.ip & b->mask.src.ip, { a->mask.src.u.all & b->mask.src.u.all } }, @@ -923,7 +935,7 @@ void ip_conntrack_unexpect_related(struct ip_conntrack_expect *exp) } /* We don't increase the master conntrack refcount for non-fulfilled - * conntracks. During the conntrack destruction, the expectations are + * conntracks. During the conntrack destruction, the expectations are * always killed before the conntrack itself */ struct ip_conntrack_expect *ip_conntrack_expect_alloc(struct ip_conntrack *me) { @@ -1012,7 +1024,7 @@ int ip_conntrack_expect_related(struct ip_conntrack_expect *expect) } /* Will be over limit? */ - if (expect->master->helper->max_expected && + if (expect->master->helper->max_expected && expect->master->expecting >= expect->master->helper->max_expected) evict_oldest_expect(expect->master); @@ -1021,7 +1033,7 @@ int ip_conntrack_expect_related(struct ip_conntrack_expect *expect) ret = 0; out: write_unlock_bh(&ip_conntrack_lock); - return ret; + return ret; } /* Alter reply tuple (maybe alter helper). This is for NAT, and is @@ -1069,7 +1081,7 @@ static inline void unhelp(struct ip_conntrack_tuple_hash *i, const struct ip_conntrack_helper *me) { if (tuplehash_to_ctrack(i)->helper == me) { - ip_conntrack_event(IPCT_HELPER, tuplehash_to_ctrack(i)); + ip_conntrack_event(IPCT_HELPER, tuplehash_to_ctrack(i)); tuplehash_to_ctrack(i)->helper = NULL; } } @@ -1105,8 +1117,8 @@ void ip_conntrack_helper_unregister(struct ip_conntrack_helper *me) } /* Refresh conntrack for this many jiffies and do accounting if do_acct is 1 */ -void __ip_ct_refresh_acct(struct ip_conntrack *ct, - enum ip_conntrack_info ctinfo, +void __ip_ct_refresh_acct(struct ip_conntrack *ct, + enum ip_conntrack_info ctinfo, const struct sk_buff *skb, unsigned long extra_jiffies, int do_acct) @@ -1140,7 +1152,7 @@ void __ip_ct_refresh_acct(struct ip_conntrack *ct, #ifdef CONFIG_IP_NF_CT_ACCT if (do_acct) { ct->counters[CTINFO2DIR(ctinfo)].packets++; - ct->counters[CTINFO2DIR(ctinfo)].bytes += + ct->counters[CTINFO2DIR(ctinfo)].bytes += ntohs(skb->nh.iph->tot_len); if ((ct->counters[CTINFO2DIR(ctinfo)].packets & 0x80000000) || (ct->counters[CTINFO2DIR(ctinfo)].bytes & 0x80000000)) @@ -1194,7 +1206,7 @@ ip_ct_gather_frags(struct sk_buff *skb, u_int32_t user) { skb_orphan(skb); - local_bh_disable(); + local_bh_disable(); skb = ip_defrag(skb, user); local_bh_enable(); @@ -1211,7 +1223,7 @@ static void ip_conntrack_attach(struct sk_buff *nskb, struct sk_buff *skb) /* This ICMP is in reverse direction to the packet which caused it */ ct = ip_conntrack_get(skb, &ctinfo); - + if (CTINFO2DIR(ctinfo) == IP_CT_DIR_ORIGINAL) ctinfo = IP_CT_RELATED + IP_CT_IS_REPLY; else @@ -1279,7 +1291,7 @@ getorigdst(struct sock *sk, int optval, void __user *user, int *len) struct inet_sock *inet = inet_sk(sk); struct ip_conntrack_tuple_hash *h; struct ip_conntrack_tuple tuple; - + IP_CT_TUPLE_U_BLANK(&tuple); tuple.src.ip = inet->rcv_saddr; tuple.src.u.tcp.port = inet->sport; @@ -1347,7 +1359,7 @@ static void free_conntrack_hash(struct list_head *hash, int vmalloced,int size) if (vmalloced) vfree(hash); else - free_pages((unsigned long)hash, + free_pages((unsigned long)hash, get_order(sizeof(struct list_head) * size)); } @@ -1355,11 +1367,11 @@ static void free_conntrack_hash(struct list_head *hash, int vmalloced,int size) supposed to kill the mall. */ void ip_conntrack_cleanup(void) { - ip_ct_attach = NULL; + rcu_assign_pointer(ip_ct_attach, NULL); /* This makes sure all current packets have passed through - netfilter framework. Roll on, two-stage module - delete... */ + netfilter framework. Roll on, two-stage module + delete... */ synchronize_net(); ip_ct_event_cache_flush(); @@ -1385,11 +1397,11 @@ static struct list_head *alloc_hashtable(int size, int *vmalloced) struct list_head *hash; unsigned int i; - *vmalloced = 0; - hash = (void*)__get_free_pages(GFP_KERNEL, + *vmalloced = 0; + hash = (void*)__get_free_pages(GFP_KERNEL, get_order(sizeof(struct list_head) * size)); - if (!hash) { + if (!hash) { *vmalloced = 1; printk(KERN_WARNING"ip_conntrack: falling back to vmalloc.\n"); hash = vmalloc(sizeof(struct list_head) * size); @@ -1422,7 +1434,7 @@ static int set_hashsize(const char *val, struct kernel_param *kp) if (!hash) return -ENOMEM; - /* We have to rehash for the new table anyway, so we also can + /* We have to rehash for the new table anyway, so we also can * use a new random seed */ get_random_bytes(&rnd, 4); @@ -1460,7 +1472,7 @@ int __init ip_conntrack_init(void) /* Idea from tcp.c: use 1/16384 of memory. On i386: 32MB * machine has 256 buckets. >= 1GB machines have 8192 buckets. */ - if (!ip_conntrack_htable_size) { + if (!ip_conntrack_htable_size) { ip_conntrack_htable_size = (((num_physpages << PAGE_SHIFT) / 16384) / sizeof(struct list_head)); @@ -1490,8 +1502,8 @@ int __init ip_conntrack_init(void) } ip_conntrack_cachep = kmem_cache_create("ip_conntrack", - sizeof(struct ip_conntrack), 0, - 0, NULL, NULL); + sizeof(struct ip_conntrack), 0, + 0, NULL, NULL); if (!ip_conntrack_cachep) { printk(KERN_ERR "Unable to create ip_conntrack slab cache\n"); goto err_free_hash; @@ -1508,15 +1520,15 @@ int __init ip_conntrack_init(void) /* Don't NEED lock here, but good form anyway. */ write_lock_bh(&ip_conntrack_lock); for (i = 0; i < MAX_IP_CT_PROTO; i++) - ip_ct_protos[i] = &ip_conntrack_generic_protocol; + rcu_assign_pointer(ip_ct_protos[i], &ip_conntrack_generic_protocol); /* Sew in builtin protocols. */ - ip_ct_protos[IPPROTO_TCP] = &ip_conntrack_protocol_tcp; - ip_ct_protos[IPPROTO_UDP] = &ip_conntrack_protocol_udp; - ip_ct_protos[IPPROTO_ICMP] = &ip_conntrack_protocol_icmp; + rcu_assign_pointer(ip_ct_protos[IPPROTO_TCP], &ip_conntrack_protocol_tcp); + rcu_assign_pointer(ip_ct_protos[IPPROTO_UDP], &ip_conntrack_protocol_udp); + rcu_assign_pointer(ip_ct_protos[IPPROTO_ICMP], &ip_conntrack_protocol_icmp); write_unlock_bh(&ip_conntrack_lock); /* For use by ipt_REJECT */ - ip_ct_attach = ip_conntrack_attach; + rcu_assign_pointer(ip_ct_attach, ip_conntrack_attach); /* Set up fake conntrack: - to never be deleted, not in any hashes */ |