From 9c8222b9e71b690c8388bb0ebe5c3e5a1469e884 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Wed, 18 Feb 2009 16:30:20 +0100 Subject: netfilter: x_tables: remove unneeded initializations Later patches change the locking on xt_table and the initialization of the lock element is not needed since the lock is always initialized in xt_table_register anyway. Signed-off-by: Stephen Hemminger Signed-off-by: Patrick McHardy --- net/ipv4/netfilter/arptable_filter.c | 2 -- net/ipv4/netfilter/iptable_filter.c | 1 - net/ipv4/netfilter/iptable_mangle.c | 1 - net/ipv4/netfilter/iptable_raw.c | 1 - net/ipv4/netfilter/iptable_security.c | 1 - net/ipv4/netfilter/nf_nat_rule.c | 1 - 6 files changed, 7 deletions(-) (limited to 'net/ipv4') diff --git a/net/ipv4/netfilter/arptable_filter.c b/net/ipv4/netfilter/arptable_filter.c index e091187e864..6ecfdae7c58 100644 --- a/net/ipv4/netfilter/arptable_filter.c +++ b/net/ipv4/netfilter/arptable_filter.c @@ -48,8 +48,6 @@ static struct static struct xt_table packet_filter = { .name = "filter", .valid_hooks = FILTER_VALID_HOOKS, - .lock = __RW_LOCK_UNLOCKED(packet_filter.lock), - .private = NULL, .me = THIS_MODULE, .af = NFPROTO_ARP, }; diff --git a/net/ipv4/netfilter/iptable_filter.c b/net/ipv4/netfilter/iptable_filter.c index 52cb6939d09..c30a969724f 100644 --- a/net/ipv4/netfilter/iptable_filter.c +++ b/net/ipv4/netfilter/iptable_filter.c @@ -56,7 +56,6 @@ static struct static struct xt_table packet_filter = { .name = "filter", .valid_hooks = FILTER_VALID_HOOKS, - .lock = __RW_LOCK_UNLOCKED(packet_filter.lock), .me = THIS_MODULE, .af = AF_INET, }; diff --git a/net/ipv4/netfilter/iptable_mangle.c b/net/ipv4/netfilter/iptable_mangle.c index 3929d20b9e4..4087614d951 100644 --- a/net/ipv4/netfilter/iptable_mangle.c +++ b/net/ipv4/netfilter/iptable_mangle.c @@ -67,7 +67,6 @@ static struct static struct xt_table packet_mangler = { .name = "mangle", .valid_hooks = MANGLE_VALID_HOOKS, - .lock = __RW_LOCK_UNLOCKED(packet_mangler.lock), .me = THIS_MODULE, .af = AF_INET, }; diff --git a/net/ipv4/netfilter/iptable_raw.c b/net/ipv4/netfilter/iptable_raw.c index 7f65d18333e..e5356da1fb5 100644 --- a/net/ipv4/netfilter/iptable_raw.c +++ b/net/ipv4/netfilter/iptable_raw.c @@ -39,7 +39,6 @@ static struct static struct xt_table packet_raw = { .name = "raw", .valid_hooks = RAW_VALID_HOOKS, - .lock = __RW_LOCK_UNLOCKED(packet_raw.lock), .me = THIS_MODULE, .af = AF_INET, }; diff --git a/net/ipv4/netfilter/iptable_security.c b/net/ipv4/netfilter/iptable_security.c index a52a35f4a58..29ab630f240 100644 --- a/net/ipv4/netfilter/iptable_security.c +++ b/net/ipv4/netfilter/iptable_security.c @@ -60,7 +60,6 @@ static struct static struct xt_table security_table = { .name = "security", .valid_hooks = SECURITY_VALID_HOOKS, - .lock = __RW_LOCK_UNLOCKED(security_table.lock), .me = THIS_MODULE, .af = AF_INET, }; diff --git a/net/ipv4/netfilter/nf_nat_rule.c b/net/ipv4/netfilter/nf_nat_rule.c index a7eb0471904..6348a793936 100644 --- a/net/ipv4/netfilter/nf_nat_rule.c +++ b/net/ipv4/netfilter/nf_nat_rule.c @@ -61,7 +61,6 @@ static struct static struct xt_table nat_table = { .name = "nat", .valid_hooks = NAT_VALID_HOOKS, - .lock = __RW_LOCK_UNLOCKED(nat_table.lock), .me = THIS_MODULE, .af = AF_INET, }; -- cgit v1.2.3 From ddc214c43a923e89741e04da2f10e3037a64e222 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 18 Feb 2009 17:47:50 +0100 Subject: netfilter: arp_tables: unfold two critical loops in arp_packet_match() x86 and powerpc can perform long word accesses in an efficient maner. We can use this to unroll two loops in arp_packet_match(), to perform arithmetic on long words instead of bytes. This is a win on x86_64 for example. Signed-off-by: Eric Dumazet Signed-off-by: Patrick McHardy --- net/ipv4/netfilter/arp_tables.c | 44 +++++++++++++++++++++++++++++++---------- 1 file changed, 34 insertions(+), 10 deletions(-) (limited to 'net/ipv4') diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index 7ea88b61cb0..b5db4634261 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c @@ -73,6 +73,36 @@ static inline int arp_devaddr_compare(const struct arpt_devaddr_info *ap, return (ret != 0); } +/* + * Unfortunatly, _b and _mask are not aligned to an int (or long int) + * Some arches dont care, unrolling the loop is a win on them. + */ +static unsigned long ifname_compare(const char *_a, const char *_b, const char *_mask) +{ +#ifdef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS + const unsigned long *a = (const unsigned long *)_a; + const unsigned long *b = (const unsigned long *)_b; + const unsigned long *mask = (const unsigned long *)_mask; + unsigned long ret; + + ret = (a[0] ^ b[0]) & mask[0]; + if (IFNAMSIZ > sizeof(unsigned long)) + ret |= (a[1] ^ b[1]) & mask[1]; + if (IFNAMSIZ > 2 * sizeof(unsigned long)) + ret |= (a[2] ^ b[2]) & mask[2]; + if (IFNAMSIZ > 3 * sizeof(unsigned long)) + ret |= (a[3] ^ b[3]) & mask[3]; + BUILD_BUG_ON(IFNAMSIZ > 4 * sizeof(unsigned long)); +#else + unsigned long ret = 0; + int i; + + for (i = 0; i < IFNAMSIZ; i++) + ret |= (_a[i] ^ _b[i]) & _mask[i]; +#endif + return ret; +} + /* Returns whether packet matches rule or not. */ static inline int arp_packet_match(const struct arphdr *arphdr, struct net_device *dev, @@ -83,7 +113,7 @@ static inline int arp_packet_match(const struct arphdr *arphdr, const char *arpptr = (char *)(arphdr + 1); const char *src_devaddr, *tgt_devaddr; __be32 src_ipaddr, tgt_ipaddr; - int i, ret; + long ret; #define FWINV(bool, invflg) ((bool) ^ !!(arpinfo->invflags & (invflg))) @@ -156,10 +186,7 @@ static inline int arp_packet_match(const struct arphdr *arphdr, } /* Look for ifname matches. */ - for (i = 0, ret = 0; i < IFNAMSIZ; i++) { - ret |= (indev[i] ^ arpinfo->iniface[i]) - & arpinfo->iniface_mask[i]; - } + ret = ifname_compare(indev, arpinfo->iniface, arpinfo->iniface_mask); if (FWINV(ret != 0, ARPT_INV_VIA_IN)) { dprintf("VIA in mismatch (%s vs %s).%s\n", @@ -168,10 +195,7 @@ static inline int arp_packet_match(const struct arphdr *arphdr, return 0; } - for (i = 0, ret = 0; i < IFNAMSIZ; i++) { - ret |= (outdev[i] ^ arpinfo->outiface[i]) - & arpinfo->outiface_mask[i]; - } + ret = ifname_compare(outdev, arpinfo->outiface, arpinfo->outiface_mask); if (FWINV(ret != 0, ARPT_INV_VIA_OUT)) { dprintf("VIA out mismatch (%s vs %s).%s\n", @@ -221,7 +245,7 @@ unsigned int arpt_do_table(struct sk_buff *skb, const struct net_device *out, struct xt_table *table) { - static const char nulldevname[IFNAMSIZ]; + static const char nulldevname[IFNAMSIZ] __attribute__((aligned(sizeof(long)))); unsigned int verdict = NF_DROP; const struct arphdr *arp; bool hotdrop = false; -- cgit v1.2.3 From 563d36eb3fb22dd04da9aa6f12e1b9ba0ac115f3 Mon Sep 17 00:00:00 2001 From: Jan Engelhardt Date: Wed, 18 Feb 2009 18:38:40 +0100 Subject: netfilter: Combine ipt_TTL and ip6t_HL source Suggested by: James King Similarly to commit c9fd49680954714473d6cbd2546d6ff120f96840, merge TTL and HL. Since HL does not depend on any IPv6-specific function, no new module dependencies would arise. With slight adjustments to the Kconfig help text. Signed-off-by: Jan Engelhardt Signed-off-by: Patrick McHardy --- net/ipv4/netfilter/Kconfig | 15 ------- net/ipv4/netfilter/Makefile | 1 - net/ipv4/netfilter/ipt_TTL.c | 97 -------------------------------------------- 3 files changed, 113 deletions(-) delete mode 100644 net/ipv4/netfilter/ipt_TTL.c (limited to 'net/ipv4') diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig index 3816e1dc929..3ad9f43b4c4 100644 --- a/net/ipv4/netfilter/Kconfig +++ b/net/ipv4/netfilter/Kconfig @@ -322,21 +322,6 @@ config IP_NF_TARGET_ECN To compile it as a module, choose M here. If unsure, say N. -config IP_NF_TARGET_TTL - tristate 'TTL target support' - depends on IP_NF_MANGLE - depends on NETFILTER_ADVANCED - help - This option adds a `TTL' target, which enables the user to modify - the TTL value of the IP header. - - While it is safe to decrement/lower the TTL, this target also enables - functionality to increment and set the TTL value of the IP header to - arbitrary values. This is EXTREMELY DANGEROUS since you can easily - create immortal packets that loop forever on the network. - - To compile it as a module, choose M here. If unsure, say N. - # raw + specific targets config IP_NF_RAW tristate 'raw table support (required for NOTRACK/TRACE)' diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile index 5f9b650d90f..20b0c37155f 100644 --- a/net/ipv4/netfilter/Makefile +++ b/net/ipv4/netfilter/Makefile @@ -61,7 +61,6 @@ obj-$(CONFIG_IP_NF_TARGET_MASQUERADE) += ipt_MASQUERADE.o obj-$(CONFIG_IP_NF_TARGET_NETMAP) += ipt_NETMAP.o obj-$(CONFIG_IP_NF_TARGET_REDIRECT) += ipt_REDIRECT.o obj-$(CONFIG_IP_NF_TARGET_REJECT) += ipt_REJECT.o -obj-$(CONFIG_IP_NF_TARGET_TTL) += ipt_TTL.o obj-$(CONFIG_IP_NF_TARGET_ULOG) += ipt_ULOG.o # generic ARP tables diff --git a/net/ipv4/netfilter/ipt_TTL.c b/net/ipv4/netfilter/ipt_TTL.c deleted file mode 100644 index 6d76aae90cc..00000000000 --- a/net/ipv4/netfilter/ipt_TTL.c +++ /dev/null @@ -1,97 +0,0 @@ -/* TTL modification target for IP tables - * (C) 2000,2005 by Harald Welte - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - */ - -#include -#include -#include -#include - -#include -#include - -MODULE_AUTHOR("Harald Welte "); -MODULE_DESCRIPTION("Xtables: IPv4 TTL field modification target"); -MODULE_LICENSE("GPL"); - -static unsigned int -ttl_tg(struct sk_buff *skb, const struct xt_target_param *par) -{ - struct iphdr *iph; - const struct ipt_TTL_info *info = par->targinfo; - int new_ttl; - - if (!skb_make_writable(skb, skb->len)) - return NF_DROP; - - iph = ip_hdr(skb); - - switch (info->mode) { - case IPT_TTL_SET: - new_ttl = info->ttl; - break; - case IPT_TTL_INC: - new_ttl = iph->ttl + info->ttl; - if (new_ttl > 255) - new_ttl = 255; - break; - case IPT_TTL_DEC: - new_ttl = iph->ttl - info->ttl; - if (new_ttl < 0) - new_ttl = 0; - break; - default: - new_ttl = iph->ttl; - break; - } - - if (new_ttl != iph->ttl) { - csum_replace2(&iph->check, htons(iph->ttl << 8), - htons(new_ttl << 8)); - iph->ttl = new_ttl; - } - - return XT_CONTINUE; -} - -static bool ttl_tg_check(const struct xt_tgchk_param *par) -{ - const struct ipt_TTL_info *info = par->targinfo; - - if (info->mode > IPT_TTL_MAXMODE) { - printk(KERN_WARNING "ipt_TTL: invalid or unknown Mode %u\n", - info->mode); - return false; - } - if (info->mode != IPT_TTL_SET && info->ttl == 0) - return false; - return true; -} - -static struct xt_target ttl_tg_reg __read_mostly = { - .name = "TTL", - .family = NFPROTO_IPV4, - .target = ttl_tg, - .targetsize = sizeof(struct ipt_TTL_info), - .table = "mangle", - .checkentry = ttl_tg_check, - .me = THIS_MODULE, -}; - -static int __init ttl_tg_init(void) -{ - return xt_register_target(&ttl_tg_reg); -} - -static void __exit ttl_tg_exit(void) -{ - xt_unregister_target(&ttl_tg_reg); -} - -module_init(ttl_tg_init); -module_exit(ttl_tg_exit); -- cgit v1.2.3 From cfac5ef7b92a2d504563989ecd0beb563920444b Mon Sep 17 00:00:00 2001 From: Jan Engelhardt Date: Wed, 18 Feb 2009 18:39:31 +0100 Subject: netfilter: Combine ipt_ttl and ip6t_hl source Signed-off-by: Jan Engelhardt Signed-off-by: Patrick McHardy --- net/ipv4/netfilter/Kconfig | 9 ------- net/ipv4/netfilter/Makefile | 1 - net/ipv4/netfilter/ipt_ttl.c | 63 -------------------------------------------- 3 files changed, 73 deletions(-) delete mode 100644 net/ipv4/netfilter/ipt_ttl.c (limited to 'net/ipv4') diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig index 3ad9f43b4c4..40ad41f19b7 100644 --- a/net/ipv4/netfilter/Kconfig +++ b/net/ipv4/netfilter/Kconfig @@ -92,15 +92,6 @@ config IP_NF_MATCH_ECN To compile it as a module, choose M here. If unsure, say N. -config IP_NF_MATCH_TTL - tristate '"ttl" match support' - depends on NETFILTER_ADVANCED - help - This adds CONFIG_IP_NF_MATCH_TTL option, which enabled the user - to match packets by their TTL value. - - To compile it as a module, choose M here. If unsure, say N. - # `filter', generic and specific targets config IP_NF_FILTER tristate "Packet filtering" diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile index 20b0c37155f..48111594ee9 100644 --- a/net/ipv4/netfilter/Makefile +++ b/net/ipv4/netfilter/Makefile @@ -51,7 +51,6 @@ obj-$(CONFIG_IP_NF_SECURITY) += iptable_security.o obj-$(CONFIG_IP_NF_MATCH_ADDRTYPE) += ipt_addrtype.o obj-$(CONFIG_IP_NF_MATCH_AH) += ipt_ah.o obj-$(CONFIG_IP_NF_MATCH_ECN) += ipt_ecn.o -obj-$(CONFIG_IP_NF_MATCH_TTL) += ipt_ttl.o # targets obj-$(CONFIG_IP_NF_TARGET_CLUSTERIP) += ipt_CLUSTERIP.o diff --git a/net/ipv4/netfilter/ipt_ttl.c b/net/ipv4/netfilter/ipt_ttl.c deleted file mode 100644 index 297f1cbf4ff..00000000000 --- a/net/ipv4/netfilter/ipt_ttl.c +++ /dev/null @@ -1,63 +0,0 @@ -/* IP tables module for matching the value of the TTL - * - * (C) 2000,2001 by Harald Welte - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ - -#include -#include -#include - -#include -#include - -MODULE_AUTHOR("Harald Welte "); -MODULE_DESCRIPTION("Xtables: IPv4 TTL field match"); -MODULE_LICENSE("GPL"); - -static bool ttl_mt(const struct sk_buff *skb, const struct xt_match_param *par) -{ - const struct ipt_ttl_info *info = par->matchinfo; - const u8 ttl = ip_hdr(skb)->ttl; - - switch (info->mode) { - case IPT_TTL_EQ: - return ttl == info->ttl; - case IPT_TTL_NE: - return ttl != info->ttl; - case IPT_TTL_LT: - return ttl < info->ttl; - case IPT_TTL_GT: - return ttl > info->ttl; - default: - printk(KERN_WARNING "ipt_ttl: unknown mode %d\n", - info->mode); - return false; - } - - return false; -} - -static struct xt_match ttl_mt_reg __read_mostly = { - .name = "ttl", - .family = NFPROTO_IPV4, - .match = ttl_mt, - .matchsize = sizeof(struct ipt_ttl_info), - .me = THIS_MODULE, -}; - -static int __init ttl_mt_init(void) -{ - return xt_register_match(&ttl_mt_reg); -} - -static void __exit ttl_mt_exit(void) -{ - xt_unregister_match(&ttl_mt_reg); -} - -module_init(ttl_mt_init); -module_exit(ttl_mt_exit); -- cgit v1.2.3 From 4323362e49bd10b8ff3fe5cf183fdd52662ff4a3 Mon Sep 17 00:00:00 2001 From: Jan Engelhardt Date: Thu, 19 Feb 2009 11:16:03 +0100 Subject: netfilter: xtables: add backward-compat options Concern has been expressed about the changing Kconfig options. Provide the old options that forward-select. Signed-off-by: Jan Engelhardt Signed-off-by: Patrick McHardy --- net/ipv4/netfilter/Kconfig | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'net/ipv4') diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig index 40ad41f19b7..f8d6180938d 100644 --- a/net/ipv4/netfilter/Kconfig +++ b/net/ipv4/netfilter/Kconfig @@ -92,6 +92,15 @@ config IP_NF_MATCH_ECN To compile it as a module, choose M here. If unsure, say N. +config IP_NF_MATCH_TTL + tristate '"ttl" match support' + depends on NETFILTER_ADVANCED + select NETFILTER_XT_MATCH_HL + ---help--- + This is a backwards-compat option for the user's convenience + (e.g. when running oldconfig). It selects + COFNIG_NETFILTER_XT_MATCH_HL. + # `filter', generic and specific targets config IP_NF_FILTER tristate "Packet filtering" @@ -313,6 +322,15 @@ config IP_NF_TARGET_ECN To compile it as a module, choose M here. If unsure, say N. +config IP_NF_TARGET_TTL + tristate '"TTL" target support' + depends on NETFILTER_ADVANCED + select NETFILTER_XT_TARGET_HL + ---help--- + This is a backwards-compat option for the user's convenience + (e.g. when running oldconfig). It selects + COFNIG_NETFILTER_XT_TARGET_HL. + # raw + specific targets config IP_NF_RAW tristate 'raw table support (required for NOTRACK/TRACE)' -- cgit v1.2.3 From 784544739a25c30637397ace5489eeb6e15d7d49 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Fri, 20 Feb 2009 10:35:32 +0100 Subject: netfilter: iptables: lock free counters The reader/writer lock in ip_tables is acquired in the critical path of processing packets and is one of the reasons just loading iptables can cause a 20% performance loss. The rwlock serves two functions: 1) it prevents changes to table state (xt_replace) while table is in use. This is now handled by doing rcu on the xt_table. When table is replaced, the new table(s) are put in and the old one table(s) are freed after RCU period. 2) it provides synchronization when accesing the counter values. This is now handled by swapping in new table_info entries for each cpu then summing the old values, and putting the result back onto one cpu. On a busy system it may cause sampling to occur at different times on each cpu, but no packet/byte counts are lost in the process. Signed-off-by: Stephen Hemminger Sucessfully tested on my dual quad core machine too, but iptables only (no ipv6 here) BTW, my new "tbench 8" result is 2450 MB/s, (it was 2150 MB/s not so long ago) Acked-by: Eric Dumazet Signed-off-by: Patrick McHardy --- net/ipv4/netfilter/arp_tables.c | 115 +++++++++++++++++++++++++++++--------- net/ipv4/netfilter/ip_tables.c | 120 +++++++++++++++++++++++++++++----------- 2 files changed, 175 insertions(+), 60 deletions(-) (limited to 'net/ipv4') diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index b5db4634261..64a7c6ce0b9 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c @@ -261,9 +261,10 @@ unsigned int arpt_do_table(struct sk_buff *skb, indev = in ? in->name : nulldevname; outdev = out ? out->name : nulldevname; - read_lock_bh(&table->lock); - private = table->private; - table_base = (void *)private->entries[smp_processor_id()]; + rcu_read_lock(); + private = rcu_dereference(table->private); + table_base = rcu_dereference(private->entries[smp_processor_id()]); + e = get_entry(table_base, private->hook_entry[hook]); back = get_entry(table_base, private->underflow[hook]); @@ -335,7 +336,8 @@ unsigned int arpt_do_table(struct sk_buff *skb, e = (void *)e + e->next_offset; } } while (!hotdrop); - read_unlock_bh(&table->lock); + + rcu_read_unlock(); if (hotdrop) return NF_DROP; @@ -738,11 +740,65 @@ static void get_counters(const struct xt_table_info *t, } } -static inline struct xt_counters *alloc_counters(struct xt_table *table) + +/* We're lazy, and add to the first CPU; overflow works its fey magic + * and everything is OK. */ +static int +add_counter_to_entry(struct arpt_entry *e, + const struct xt_counters addme[], + unsigned int *i) +{ + ADD_COUNTER(e->counters, addme[*i].bcnt, addme[*i].pcnt); + + (*i)++; + return 0; +} + +/* Take values from counters and add them back onto the current cpu */ +static void put_counters(struct xt_table_info *t, + const struct xt_counters counters[]) +{ + unsigned int i, cpu; + + local_bh_disable(); + cpu = smp_processor_id(); + i = 0; + ARPT_ENTRY_ITERATE(t->entries[cpu], + t->size, + add_counter_to_entry, + counters, + &i); + local_bh_enable(); +} + +static inline int +zero_entry_counter(struct arpt_entry *e, void *arg) +{ + e->counters.bcnt = 0; + e->counters.pcnt = 0; + return 0; +} + +static void +clone_counters(struct xt_table_info *newinfo, const struct xt_table_info *info) +{ + unsigned int cpu; + const void *loc_cpu_entry = info->entries[raw_smp_processor_id()]; + + memcpy(newinfo, info, offsetof(struct xt_table_info, entries)); + for_each_possible_cpu(cpu) { + memcpy(newinfo->entries[cpu], loc_cpu_entry, info->size); + ARPT_ENTRY_ITERATE(newinfo->entries[cpu], newinfo->size, + zero_entry_counter, NULL); + } +} + +static struct xt_counters *alloc_counters(struct xt_table *table) { unsigned int countersize; struct xt_counters *counters; - const struct xt_table_info *private = table->private; + struct xt_table_info *private = table->private; + struct xt_table_info *info; /* We need atomic snapshot of counters: rest doesn't change * (other than comefrom, which userspace doesn't care @@ -752,14 +808,30 @@ static inline struct xt_counters *alloc_counters(struct xt_table *table) counters = vmalloc_node(countersize, numa_node_id()); if (counters == NULL) - return ERR_PTR(-ENOMEM); + goto nomem; + + info = xt_alloc_table_info(private->size); + if (!info) + goto free_counters; - /* First, sum counters... */ - write_lock_bh(&table->lock); - get_counters(private, counters); - write_unlock_bh(&table->lock); + clone_counters(info, private); + + mutex_lock(&table->lock); + xt_table_entry_swap_rcu(private, info); + synchronize_net(); /* Wait until smoke has cleared */ + + get_counters(info, counters); + put_counters(private, counters); + mutex_unlock(&table->lock); + + xt_free_table_info(info); return counters; + + free_counters: + vfree(counters); + nomem: + return ERR_PTR(-ENOMEM); } static int copy_entries_to_user(unsigned int total_size, @@ -1099,20 +1171,6 @@ static int do_replace(struct net *net, void __user *user, unsigned int len) return ret; } -/* We're lazy, and add to the first CPU; overflow works its fey magic - * and everything is OK. - */ -static inline int add_counter_to_entry(struct arpt_entry *e, - const struct xt_counters addme[], - unsigned int *i) -{ - - ADD_COUNTER(e->counters, addme[*i].bcnt, addme[*i].pcnt); - - (*i)++; - return 0; -} - static int do_add_counters(struct net *net, void __user *user, unsigned int len, int compat) { @@ -1172,13 +1230,14 @@ static int do_add_counters(struct net *net, void __user *user, unsigned int len, goto free; } - write_lock_bh(&t->lock); + mutex_lock(&t->lock); private = t->private; if (private->number != num_counters) { ret = -EINVAL; goto unlock_up_free; } + preempt_disable(); i = 0; /* Choose the copy that is on our node */ loc_cpu_entry = private->entries[smp_processor_id()]; @@ -1187,8 +1246,10 @@ static int do_add_counters(struct net *net, void __user *user, unsigned int len, add_counter_to_entry, paddc, &i); + preempt_enable(); unlock_up_free: - write_unlock_bh(&t->lock); + mutex_unlock(&t->lock); + xt_table_unlock(t); module_put(t->me); free: diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index ef8b6ca068b..08cde5bd70a 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -347,10 +347,12 @@ ipt_do_table(struct sk_buff *skb, mtpar.family = tgpar.family = NFPROTO_IPV4; tgpar.hooknum = hook; - read_lock_bh(&table->lock); IP_NF_ASSERT(table->valid_hooks & (1 << hook)); - private = table->private; - table_base = (void *)private->entries[smp_processor_id()]; + + rcu_read_lock(); + private = rcu_dereference(table->private); + table_base = rcu_dereference(private->entries[smp_processor_id()]); + e = get_entry(table_base, private->hook_entry[hook]); /* For return from builtin chain */ @@ -445,7 +447,7 @@ ipt_do_table(struct sk_buff *skb, } } while (!hotdrop); - read_unlock_bh(&table->lock); + rcu_read_unlock(); #ifdef DEBUG_ALLOW_ALL return NF_ACCEPT; @@ -924,13 +926,68 @@ get_counters(const struct xt_table_info *t, counters, &i); } + +} + +/* We're lazy, and add to the first CPU; overflow works its fey magic + * and everything is OK. */ +static int +add_counter_to_entry(struct ipt_entry *e, + const struct xt_counters addme[], + unsigned int *i) +{ + ADD_COUNTER(e->counters, addme[*i].bcnt, addme[*i].pcnt); + + (*i)++; + return 0; +} + +/* Take values from counters and add them back onto the current cpu */ +static void put_counters(struct xt_table_info *t, + const struct xt_counters counters[]) +{ + unsigned int i, cpu; + + local_bh_disable(); + cpu = smp_processor_id(); + i = 0; + IPT_ENTRY_ITERATE(t->entries[cpu], + t->size, + add_counter_to_entry, + counters, + &i); + local_bh_enable(); +} + + +static inline int +zero_entry_counter(struct ipt_entry *e, void *arg) +{ + e->counters.bcnt = 0; + e->counters.pcnt = 0; + return 0; +} + +static void +clone_counters(struct xt_table_info *newinfo, const struct xt_table_info *info) +{ + unsigned int cpu; + const void *loc_cpu_entry = info->entries[raw_smp_processor_id()]; + + memcpy(newinfo, info, offsetof(struct xt_table_info, entries)); + for_each_possible_cpu(cpu) { + memcpy(newinfo->entries[cpu], loc_cpu_entry, info->size); + IPT_ENTRY_ITERATE(newinfo->entries[cpu], newinfo->size, + zero_entry_counter, NULL); + } } static struct xt_counters * alloc_counters(struct xt_table *table) { unsigned int countersize; struct xt_counters *counters; - const struct xt_table_info *private = table->private; + struct xt_table_info *private = table->private; + struct xt_table_info *info; /* We need atomic snapshot of counters: rest doesn't change (other than comefrom, which userspace doesn't care @@ -939,14 +996,30 @@ static struct xt_counters * alloc_counters(struct xt_table *table) counters = vmalloc_node(countersize, numa_node_id()); if (counters == NULL) - return ERR_PTR(-ENOMEM); + goto nomem; - /* First, sum counters... */ - write_lock_bh(&table->lock); - get_counters(private, counters); - write_unlock_bh(&table->lock); + info = xt_alloc_table_info(private->size); + if (!info) + goto free_counters; + + clone_counters(info, private); + + mutex_lock(&table->lock); + xt_table_entry_swap_rcu(private, info); + synchronize_net(); /* Wait until smoke has cleared */ + + get_counters(info, counters); + put_counters(private, counters); + mutex_unlock(&table->lock); + + xt_free_table_info(info); return counters; + + free_counters: + vfree(counters); + nomem: + return ERR_PTR(-ENOMEM); } static int @@ -1312,27 +1385,6 @@ do_replace(struct net *net, void __user *user, unsigned int len) return ret; } -/* We're lazy, and add to the first CPU; overflow works its fey magic - * and everything is OK. */ -static int -add_counter_to_entry(struct ipt_entry *e, - const struct xt_counters addme[], - unsigned int *i) -{ -#if 0 - duprintf("add_counter: Entry %u %lu/%lu + %lu/%lu\n", - *i, - (long unsigned int)e->counters.pcnt, - (long unsigned int)e->counters.bcnt, - (long unsigned int)addme[*i].pcnt, - (long unsigned int)addme[*i].bcnt); -#endif - - ADD_COUNTER(e->counters, addme[*i].bcnt, addme[*i].pcnt); - - (*i)++; - return 0; -} static int do_add_counters(struct net *net, void __user *user, unsigned int len, int compat) @@ -1393,13 +1445,14 @@ do_add_counters(struct net *net, void __user *user, unsigned int len, int compat goto free; } - write_lock_bh(&t->lock); + mutex_lock(&t->lock); private = t->private; if (private->number != num_counters) { ret = -EINVAL; goto unlock_up_free; } + preempt_disable(); i = 0; /* Choose the copy that is on our node */ loc_cpu_entry = private->entries[raw_smp_processor_id()]; @@ -1408,8 +1461,9 @@ do_add_counters(struct net *net, void __user *user, unsigned int len, int compat add_counter_to_entry, paddc, &i); + preempt_enable(); unlock_up_free: - write_unlock_bh(&t->lock); + mutex_unlock(&t->lock); xt_table_unlock(t); module_put(t->me); free: -- cgit v1.2.3 From 08361aa807ae5e5007cd226ca9e34287512de737 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 20 Feb 2009 11:03:33 +0100 Subject: netfilter: ip_tables: unfold two critical loops in ip_packet_match() While doing oprofile tests I noticed two loops are not properly unrolled by gcc Using a hand coded unrolled loop provides nice speedup : ipt_do_table credited of 2.52 % of cpu instead of 3.29 % in tbench. Signed-off-by: Eric Dumazet Signed-off-by: Patrick McHardy --- net/ipv4/netfilter/ip_tables.c | 33 +++++++++++++++++++++------------ 1 file changed, 21 insertions(+), 12 deletions(-) (limited to 'net/ipv4') diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index 08cde5bd70a..e5294aec967 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -74,6 +74,25 @@ do { \ Hence the start of any table is given by get_table() below. */ +static unsigned long ifname_compare(const char *_a, const char *_b, + const unsigned char *_mask) +{ + const unsigned long *a = (const unsigned long *)_a; + const unsigned long *b = (const unsigned long *)_b; + const unsigned long *mask = (const unsigned long *)_mask; + unsigned long ret; + + ret = (a[0] ^ b[0]) & mask[0]; + if (IFNAMSIZ > sizeof(unsigned long)) + ret |= (a[1] ^ b[1]) & mask[1]; + if (IFNAMSIZ > 2 * sizeof(unsigned long)) + ret |= (a[2] ^ b[2]) & mask[2]; + if (IFNAMSIZ > 3 * sizeof(unsigned long)) + ret |= (a[3] ^ b[3]) & mask[3]; + BUILD_BUG_ON(IFNAMSIZ > 4 * sizeof(unsigned long)); + return ret; +} + /* Returns whether matches rule or not. */ /* Performance critical - called for every packet */ static inline bool @@ -83,7 +102,6 @@ ip_packet_match(const struct iphdr *ip, const struct ipt_ip *ipinfo, int isfrag) { - size_t i; unsigned long ret; #define FWINV(bool, invflg) ((bool) ^ !!(ipinfo->invflags & (invflg))) @@ -103,12 +121,7 @@ ip_packet_match(const struct iphdr *ip, return false; } - /* Look for ifname matches; this should unroll nicely. */ - for (i = 0, ret = 0; i < IFNAMSIZ/sizeof(unsigned long); i++) { - ret |= (((const unsigned long *)indev)[i] - ^ ((const unsigned long *)ipinfo->iniface)[i]) - & ((const unsigned long *)ipinfo->iniface_mask)[i]; - } + ret = ifname_compare(indev, ipinfo->iniface, ipinfo->iniface_mask); if (FWINV(ret != 0, IPT_INV_VIA_IN)) { dprintf("VIA in mismatch (%s vs %s).%s\n", @@ -117,11 +130,7 @@ ip_packet_match(const struct iphdr *ip, return false; } - for (i = 0, ret = 0; i < IFNAMSIZ/sizeof(unsigned long); i++) { - ret |= (((const unsigned long *)outdev)[i] - ^ ((const unsigned long *)ipinfo->outiface)[i]) - & ((const unsigned long *)ipinfo->outiface_mask)[i]; - } + ret = ifname_compare(outdev, ipinfo->outiface, ipinfo->outiface_mask); if (FWINV(ret != 0, IPT_INV_VIA_OUT)) { dprintf("VIA out mismatch (%s vs %s).%s\n", -- cgit v1.2.3 From ca735b3aaa945626ba65a3e51145bfe4ecd9e222 Mon Sep 17 00:00:00 2001 From: Eric Leblond Date: Mon, 16 Mar 2009 14:54:21 +0100 Subject: netfilter: use a linked list of loggers This patch modifies nf_log to use a linked list of loggers for each protocol. This list of loggers is read and write protected with a mutex. This patch separates registration and binding. To be used as logging module, a module has to register calling nf_log_register() and to bind to a protocol it has to call nf_log_bind_pf(). This patch also converts the logging modules to the new API. For nfnetlink_log, it simply switchs call to register functions to call to bind function and adds a call to nf_log_register() during init. For other modules, it just remove a const flag from the logger structure and replace it with a __read_mostly. Signed-off-by: Eric Leblond Signed-off-by: Patrick McHardy --- net/ipv4/netfilter/ipt_LOG.c | 2 +- net/ipv4/netfilter/ipt_ULOG.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'net/ipv4') diff --git a/net/ipv4/netfilter/ipt_LOG.c b/net/ipv4/netfilter/ipt_LOG.c index 27a78fbbd92..acc44c69eb6 100644 --- a/net/ipv4/netfilter/ipt_LOG.c +++ b/net/ipv4/netfilter/ipt_LOG.c @@ -464,7 +464,7 @@ static struct xt_target log_tg_reg __read_mostly = { .me = THIS_MODULE, }; -static const struct nf_logger ipt_log_logger ={ +static struct nf_logger ipt_log_logger __read_mostly = { .name = "ipt_LOG", .logfn = &ipt_log_packet, .me = THIS_MODULE, diff --git a/net/ipv4/netfilter/ipt_ULOG.c b/net/ipv4/netfilter/ipt_ULOG.c index 18a2826b57c..d32cc4bb328 100644 --- a/net/ipv4/netfilter/ipt_ULOG.c +++ b/net/ipv4/netfilter/ipt_ULOG.c @@ -379,7 +379,7 @@ static struct xt_target ulog_tg_reg __read_mostly = { .me = THIS_MODULE, }; -static struct nf_logger ipt_ulog_logger = { +static struct nf_logger ipt_ulog_logger __read_mostly = { .name = "ipt_ULOG", .logfn = ipt_logfn, .me = THIS_MODULE, -- cgit v1.2.3 From 67c0d57930ff9a24c6c34abee1b01f7716a9b0e2 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Mon, 16 Mar 2009 15:17:23 +0100 Subject: netfilter: Kconfig spelling fixes (trivial) Signed-off-by: Stephen Hemminger Signed-off-by: Patrick McHardy --- net/ipv4/netfilter/Kconfig | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'net/ipv4') diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig index f8d6180938d..1833bdbf980 100644 --- a/net/ipv4/netfilter/Kconfig +++ b/net/ipv4/netfilter/Kconfig @@ -31,7 +31,7 @@ config NF_CONNTRACK_PROC_COMPAT default y help This option enables /proc and sysctl compatibility with the old - layer 3 dependant connection tracking. This is needed to keep + layer 3 dependent connection tracking. This is needed to keep old programs that have not been adapted to the new names working. If unsure, say Y. @@ -99,7 +99,7 @@ config IP_NF_MATCH_TTL ---help--- This is a backwards-compat option for the user's convenience (e.g. when running oldconfig). It selects - COFNIG_NETFILTER_XT_MATCH_HL. + CONFIG_NETFILTER_XT_MATCH_HL. # `filter', generic and specific targets config IP_NF_FILTER @@ -329,7 +329,7 @@ config IP_NF_TARGET_TTL ---help--- This is a backwards-compat option for the user's convenience (e.g. when running oldconfig). It selects - COFNIG_NETFILTER_XT_TARGET_HL. + CONFIG_NETFILTER_XT_TARGET_HL. # raw + specific targets config IP_NF_RAW -- cgit v1.2.3 From 1db7a748dfd50d7615913730763c024444900030 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Mon, 16 Mar 2009 15:18:50 +0100 Subject: netfilter: conntrack: increase drop stats if sequence adjustment fails This patch increases the statistics of packets drop if the sequence adjustment fails in ipv4_confirm(). Signed-off-by: Pablo Neira Ayuso Signed-off-by: Patrick McHardy --- net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'net/ipv4') diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c index 4beb04fac58..8b681f24e27 100644 --- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c +++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c @@ -120,8 +120,10 @@ static unsigned int ipv4_confirm(unsigned int hooknum, typeof(nf_nat_seq_adjust_hook) seq_adjust; seq_adjust = rcu_dereference(nf_nat_seq_adjust_hook); - if (!seq_adjust || !seq_adjust(skb, ct, ctinfo)) + if (!seq_adjust || !seq_adjust(skb, ct, ctinfo)) { + NF_CT_STAT_INC_ATOMIC(nf_ct_net(ct), drop); return NF_DROP; + } } out: /* We've seen it coming out the other side: confirm it */ -- cgit v1.2.3 From 95ba434f898c3cb5c7457dce265bf0ab72ba8ce9 Mon Sep 17 00:00:00 2001 From: Scott James Remnant Date: Mon, 16 Mar 2009 15:31:10 +0100 Subject: netfilter: auto-load ip_queue module when socket opened The ip_queue module is missing the net-pf-16-proto-3 alias that would causae it to be auto-loaded when a socket of that type is opened. This patch adds the alias. Signed-off-by: Scott James Remnant Signed-off-by: Tim Gardner Signed-off-by: Patrick McHardy --- net/ipv4/netfilter/ip_queue.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'net/ipv4') diff --git a/net/ipv4/netfilter/ip_queue.c b/net/ipv4/netfilter/ip_queue.c index 432ce9d1c11..5f22c91c6e1 100644 --- a/net/ipv4/netfilter/ip_queue.c +++ b/net/ipv4/netfilter/ip_queue.c @@ -24,6 +24,7 @@ #include #include #include +#include #include #include #include @@ -640,6 +641,7 @@ static void __exit ip_queue_fini(void) MODULE_DESCRIPTION("IPv4 packet queue handler"); MODULE_AUTHOR("James Morris "); MODULE_LICENSE("GPL"); +MODULE_ALIAS_NET_PF_PROTO(PF_NETLINK, NETLINK_FIREWALL); module_init(ip_queue_init); module_exit(ip_queue_fini); -- cgit v1.2.3