From 4a2f965ca5a4e2593744bf75425d85e0e8ff814a Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Wed, 18 Feb 2009 16:29:44 +0100 Subject: netfilter: x_tables: change elements in x_tables Change to proper type on private pointer rather than anonymous void. Keep active elements on same cache line. Signed-off-by: Stephen Hemminger Signed-off-by: Patrick McHardy --- include/linux/netfilter/x_tables.h | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h index c7ee8744d26..9fac88fc0e7 100644 --- a/include/linux/netfilter/x_tables.h +++ b/include/linux/netfilter/x_tables.h @@ -349,9 +349,6 @@ struct xt_table { struct list_head list; - /* A unique name... */ - const char name[XT_TABLE_MAXNAMELEN]; - /* What hooks you will enter on */ unsigned int valid_hooks; @@ -359,13 +356,15 @@ struct xt_table rwlock_t lock; /* Man behind the curtain... */ - //struct ip6t_table_info *private; - void *private; + struct xt_table_info *private; /* Set this to THIS_MODULE if you are a module, otherwise NULL */ struct module *me; u_int8_t af; /* address/protocol family */ + + /* A unique name... */ + const char name[XT_TABLE_MAXNAMELEN]; }; #include -- cgit v1.2.3 From 784544739a25c30637397ace5489eeb6e15d7d49 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Fri, 20 Feb 2009 10:35:32 +0100 Subject: netfilter: iptables: lock free counters The reader/writer lock in ip_tables is acquired in the critical path of processing packets and is one of the reasons just loading iptables can cause a 20% performance loss. The rwlock serves two functions: 1) it prevents changes to table state (xt_replace) while table is in use. This is now handled by doing rcu on the xt_table. When table is replaced, the new table(s) are put in and the old one table(s) are freed after RCU period. 2) it provides synchronization when accesing the counter values. This is now handled by swapping in new table_info entries for each cpu then summing the old values, and putting the result back onto one cpu. On a busy system it may cause sampling to occur at different times on each cpu, but no packet/byte counts are lost in the process. Signed-off-by: Stephen Hemminger Sucessfully tested on my dual quad core machine too, but iptables only (no ipv6 here) BTW, my new "tbench 8" result is 2450 MB/s, (it was 2150 MB/s not so long ago) Acked-by: Eric Dumazet Signed-off-by: Patrick McHardy --- include/linux/netfilter/x_tables.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h index 9fac88fc0e7..e8e08d03675 100644 --- a/include/linux/netfilter/x_tables.h +++ b/include/linux/netfilter/x_tables.h @@ -353,7 +353,7 @@ struct xt_table unsigned int valid_hooks; /* Lock for the curtain */ - rwlock_t lock; + struct mutex lock; /* Man behind the curtain... */ struct xt_table_info *private; @@ -385,7 +385,7 @@ struct xt_table_info /* ipt_entry tables: one per CPU */ /* Note : this field MUST be the last one, see XT_TABLE_INFO_SZ */ - char *entries[1]; + void *entries[1]; }; #define XT_TABLE_INFO_SZ (offsetof(struct xt_table_info, entries) \ @@ -432,6 +432,8 @@ extern void xt_proto_fini(struct net *net, u_int8_t af); extern struct xt_table_info *xt_alloc_table_info(unsigned int size); extern void xt_free_table_info(struct xt_table_info *info); +extern void xt_table_entry_swap_rcu(struct xt_table_info *old, + struct xt_table_info *new); #ifdef CONFIG_COMPAT #include -- cgit v1.2.3 From e478075c6f07a383c378fb400edc1a7407a941b0 Mon Sep 17 00:00:00 2001 From: Hagen Paul Pfeifer Date: Fri, 20 Feb 2009 10:47:09 +0100 Subject: netfilter: nf_conntrack: table max size should hold at least table size Table size is defined as unsigned, wheres the table maximum size is defined as a signed integer. The calculation of max is 8 or 4, multiplied the table size. Therefore the max value is aligned to unsigned. Signed-off-by: Hagen Paul Pfeifer Signed-off-by: Patrick McHardy --- include/net/netfilter/nf_conntrack.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h index 2e0c53641cb..4dfb793c3f1 100644 --- a/include/net/netfilter/nf_conntrack.h +++ b/include/net/netfilter/nf_conntrack.h @@ -287,7 +287,7 @@ static inline int nf_ct_is_untracked(const struct sk_buff *skb) extern int nf_conntrack_set_hashsize(const char *val, struct kernel_param *kp); extern unsigned int nf_conntrack_htable_size; -extern int nf_conntrack_max; +extern unsigned int nf_conntrack_max; #define NF_CT_STAT_INC(net, count) \ (per_cpu_ptr((net)->ct.stat, raw_smp_processor_id())->count++) -- cgit v1.2.3 From 268cb38e1802db560c73167e643f14a3dcb4b07c Mon Sep 17 00:00:00 2001 From: Adam Nielsen Date: Fri, 20 Feb 2009 10:55:14 +0100 Subject: netfilter: x_tables: add LED trigger target Kernel module providing implementation of LED netfilter target. Each instance of the target appears as a led-trigger device, which can be associated with one or more LEDs in /sys/class/leds/ Signed-off-by: Adam Nielsen Acked-by: Richard Purdie Signed-off-by: Patrick McHardy --- include/linux/netfilter/Kbuild | 1 + include/linux/netfilter/xt_LED.h | 13 +++++++++++++ 2 files changed, 14 insertions(+) create mode 100644 include/linux/netfilter/xt_LED.h (limited to 'include') diff --git a/include/linux/netfilter/Kbuild b/include/linux/netfilter/Kbuild index 5a8af875bce..deeaee5c83f 100644 --- a/include/linux/netfilter/Kbuild +++ b/include/linux/netfilter/Kbuild @@ -7,6 +7,7 @@ header-y += xt_CLASSIFY.h header-y += xt_CONNMARK.h header-y += xt_CONNSECMARK.h header-y += xt_DSCP.h +header-y += xt_LED.h header-y += xt_MARK.h header-y += xt_NFLOG.h header-y += xt_NFQUEUE.h diff --git a/include/linux/netfilter/xt_LED.h b/include/linux/netfilter/xt_LED.h new file mode 100644 index 00000000000..4c91a0d770d --- /dev/null +++ b/include/linux/netfilter/xt_LED.h @@ -0,0 +1,13 @@ +#ifndef _XT_LED_H +#define _XT_LED_H + +struct xt_led_info { + char id[27]; /* Unique ID for this trigger in the LED class */ + __u8 always_blink; /* Blink even if the LED is already on */ + __u32 delay; /* Delay until LED is switched off after trigger */ + + /* Kernel data used in the module */ + void *internal_data __attribute__((aligned(8))); +}; + +#endif /* _XT_LED_H */ -- cgit v1.2.3 From d060ffc1840e37100628f520e66600c5ae483b44 Mon Sep 17 00:00:00 2001 From: Jan Engelhardt Date: Tue, 24 Feb 2009 15:23:58 +0100 Subject: netfilter: install missing headers iptables imports headers from (the unifdefed headers of a) kernel tree, but some headers happened to not be installed. Signed-off-by: Jan Engelhardt Signed-off-by: Patrick McHardy --- include/linux/netfilter/Kbuild | 6 ++++++ include/linux/netfilter_ipv6/Kbuild | 1 + 2 files changed, 7 insertions(+) (limited to 'include') diff --git a/include/linux/netfilter/Kbuild b/include/linux/netfilter/Kbuild index deeaee5c83f..947b47d7f6c 100644 --- a/include/linux/netfilter/Kbuild +++ b/include/linux/netfilter/Kbuild @@ -14,8 +14,11 @@ header-y += xt_NFQUEUE.h header-y += xt_RATEEST.h header-y += xt_SECMARK.h header-y += xt_TCPMSS.h +header-y += xt_TCPOPTSTRIP.h +header-y += xt_TPROXY.h header-y += xt_comment.h header-y += xt_connbytes.h +header-y += xt_connlimit.h header-y += xt_connmark.h header-y += xt_conntrack.h header-y += xt_dccp.h @@ -31,6 +34,7 @@ header-y += xt_mark.h header-y += xt_multiport.h header-y += xt_owner.h header-y += xt_pkttype.h +header-y += xt_quota.h header-y += xt_rateest.h header-y += xt_realm.h header-y += xt_recent.h @@ -40,6 +44,8 @@ header-y += xt_statistic.h header-y += xt_string.h header-y += xt_tcpmss.h header-y += xt_tcpudp.h +header-y += xt_time.h +header-y += xt_u32.h unifdef-y += nf_conntrack_common.h unifdef-y += nf_conntrack_ftp.h diff --git a/include/linux/netfilter_ipv6/Kbuild b/include/linux/netfilter_ipv6/Kbuild index 8887a5fcd1d..aca4bd1f6d7 100644 --- a/include/linux/netfilter_ipv6/Kbuild +++ b/include/linux/netfilter_ipv6/Kbuild @@ -11,6 +11,7 @@ header-y += ip6t_length.h header-y += ip6t_limit.h header-y += ip6t_mac.h header-y += ip6t_mark.h +header-y += ip6t_mh.h header-y += ip6t_multiport.h header-y += ip6t_opts.h header-y += ip6t_owner.h -- cgit v1.2.3 From ca735b3aaa945626ba65a3e51145bfe4ecd9e222 Mon Sep 17 00:00:00 2001 From: Eric Leblond Date: Mon, 16 Mar 2009 14:54:21 +0100 Subject: netfilter: use a linked list of loggers This patch modifies nf_log to use a linked list of loggers for each protocol. This list of loggers is read and write protected with a mutex. This patch separates registration and binding. To be used as logging module, a module has to register calling nf_log_register() and to bind to a protocol it has to call nf_log_bind_pf(). This patch also converts the logging modules to the new API. For nfnetlink_log, it simply switchs call to register functions to call to bind function and adds a call to nf_log_register() during init. For other modules, it just remove a const flag from the logger structure and replace it with a __read_mostly. Signed-off-by: Eric Leblond Signed-off-by: Patrick McHardy --- include/net/netfilter/nf_log.h | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/net/netfilter/nf_log.h b/include/net/netfilter/nf_log.h index 7182c06974f..920997f1aff 100644 --- a/include/net/netfilter/nf_log.h +++ b/include/net/netfilter/nf_log.h @@ -1,6 +1,8 @@ #ifndef _NF_LOG_H #define _NF_LOG_H +#include + /* those NF_LOG_* defines and struct nf_loginfo are legacy definitios that will * disappear once iptables is replaced with pkttables. Please DO NOT use them * for any new code! */ @@ -40,12 +42,15 @@ struct nf_logger { struct module *me; nf_logfn *logfn; char *name; + struct list_head list[NFPROTO_NUMPROTO]; }; /* Function to register/unregister log function. */ -int nf_log_register(u_int8_t pf, const struct nf_logger *logger); -void nf_log_unregister(const struct nf_logger *logger); -void nf_log_unregister_pf(u_int8_t pf); +int nf_log_register(u_int8_t pf, struct nf_logger *logger); +void nf_log_unregister(struct nf_logger *logger); + +int nf_log_bind_pf(u_int8_t pf, const struct nf_logger *logger); +void nf_log_unbind_pf(u_int8_t pf); /* Calls the registered backend logging function */ void nf_log_packet(u_int8_t pf, -- cgit v1.2.3 From 9d2493f88f846b391a15a736efc7f4b97d6c4046 Mon Sep 17 00:00:00 2001 From: Christoph Paasch Date: Mon, 16 Mar 2009 15:15:35 +0100 Subject: netfilter: remove IPvX specific parts from nf_conntrack_l4proto.h Moving the structure definitions to the corresponding IPvX specific header files. Signed-off-by: Patrick McHardy --- include/net/netfilter/nf_conntrack_l4proto.h | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'include') diff --git a/include/net/netfilter/nf_conntrack_l4proto.h b/include/net/netfilter/nf_conntrack_l4proto.h index debdaf75cec..16ab604659e 100644 --- a/include/net/netfilter/nf_conntrack_l4proto.h +++ b/include/net/netfilter/nf_conntrack_l4proto.h @@ -90,10 +90,7 @@ struct nf_conntrack_l4proto struct module *me; }; -/* Existing built-in protocols */ -extern struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp6; -extern struct nf_conntrack_l4proto nf_conntrack_l4proto_udp4; -extern struct nf_conntrack_l4proto nf_conntrack_l4proto_udp6; +/* Existing built-in generic protocol */ extern struct nf_conntrack_l4proto nf_conntrack_l4proto_generic; #define MAX_NF_CT_PROTO 256 -- cgit v1.2.3 From acc738fec03bdaa5b77340c32a82fbfedaaabef0 Mon Sep 17 00:00:00 2001 From: Jan Engelhardt Date: Mon, 16 Mar 2009 15:35:29 +0100 Subject: netfilter: xtables: avoid pointer to self Commit 784544739a25c30637397ace5489eeb6e15d7d49 (netfilter: iptables: lock free counters) broke a number of modules whose rule data referenced itself. A reallocation would not reestablish the correct references, so it is best to use a separate struct that does not fall under RCU. Signed-off-by: Jan Engelhardt Signed-off-by: Patrick McHardy --- include/linux/netfilter/xt_limit.h | 9 +++++---- include/linux/netfilter/xt_quota.h | 4 +++- include/linux/netfilter/xt_statistic.h | 7 ++++--- 3 files changed, 12 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/linux/netfilter/xt_limit.h b/include/linux/netfilter/xt_limit.h index b3ce65375ec..fda222c7953 100644 --- a/include/linux/netfilter/xt_limit.h +++ b/include/linux/netfilter/xt_limit.h @@ -4,6 +4,8 @@ /* timings are in milliseconds. */ #define XT_LIMIT_SCALE 10000 +struct xt_limit_priv; + /* 1/10,000 sec period => max of 10,000/sec. Min rate is then 429490 seconds, or one every 59 hours. */ struct xt_rateinfo { @@ -11,11 +13,10 @@ struct xt_rateinfo { u_int32_t burst; /* Period multiplier for upper limit. */ /* Used internally by the kernel */ - unsigned long prev; - u_int32_t credit; + unsigned long prev; /* moved to xt_limit_priv */ + u_int32_t credit; /* moved to xt_limit_priv */ u_int32_t credit_cap, cost; - /* Ugly, ugly fucker. */ - struct xt_rateinfo *master; + struct xt_limit_priv *master; }; #endif /*_XT_RATE_H*/ diff --git a/include/linux/netfilter/xt_quota.h b/include/linux/netfilter/xt_quota.h index 4c8368d781e..8dc89dfc136 100644 --- a/include/linux/netfilter/xt_quota.h +++ b/include/linux/netfilter/xt_quota.h @@ -6,13 +6,15 @@ enum xt_quota_flags { }; #define XT_QUOTA_MASK 0x1 +struct xt_quota_priv; + struct xt_quota_info { u_int32_t flags; u_int32_t pad; /* Used internally by the kernel */ aligned_u64 quota; - struct xt_quota_info *master; + struct xt_quota_priv *master; }; #endif /* _XT_QUOTA_H */ diff --git a/include/linux/netfilter/xt_statistic.h b/include/linux/netfilter/xt_statistic.h index 3d38bc97504..8f521ab49ef 100644 --- a/include/linux/netfilter/xt_statistic.h +++ b/include/linux/netfilter/xt_statistic.h @@ -13,6 +13,8 @@ enum xt_statistic_flags { }; #define XT_STATISTIC_MASK 0x1 +struct xt_statistic_priv; + struct xt_statistic_info { u_int16_t mode; u_int16_t flags; @@ -23,11 +25,10 @@ struct xt_statistic_info { struct { u_int32_t every; u_int32_t packet; - /* Used internally by the kernel */ - u_int32_t count; + u_int32_t count; /* unused */ } nth; } u; - struct xt_statistic_info *master __attribute__((aligned(8))); + struct xt_statistic_priv *master __attribute__((aligned(8))); }; #endif /* _XT_STATISTIC_H */ -- cgit v1.2.3 From 0269ea4937343536ec7e85649932bc8c9686ea78 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Mon, 16 Mar 2009 17:10:36 +0100 Subject: netfilter: xtables: add cluster match This patch adds the iptables cluster match. This match can be used to deploy gateway and back-end load-sharing clusters. The cluster can be composed of 32 nodes maximum (although I have only tested this with two nodes, so I cannot tell what is the real scalability limit of this solution in terms of cluster nodes). Assuming that all the nodes see all packets (see below for an example on how to do that if your switch does not allow this), the cluster match decides if this node has to handle a packet given: (jhash(source IP) % total_nodes) & node_mask For related connections, the master conntrack is used. The following is an example of its use to deploy a gateway cluster composed of two nodes (where this is the node 1): iptables -I PREROUTING -t mangle -i eth1 -m cluster \ --cluster-total-nodes 2 --cluster-local-node 1 \ --cluster-proc-name eth1 -j MARK --set-mark 0xffff iptables -A PREROUTING -t mangle -i eth1 \ -m mark ! --mark 0xffff -j DROP iptables -A PREROUTING -t mangle -i eth2 -m cluster \ --cluster-total-nodes 2 --cluster-local-node 1 \ --cluster-proc-name eth2 -j MARK --set-mark 0xffff iptables -A PREROUTING -t mangle -i eth2 \ -m mark ! --mark 0xffff -j DROP And the following commands to make all nodes see the same packets: ip maddr add 01:00:5e:00:01:01 dev eth1 ip maddr add 01:00:5e:00:01:02 dev eth2 arptables -I OUTPUT -o eth1 --h-length 6 \ -j mangle --mangle-mac-s 01:00:5e:00:01:01 arptables -I INPUT -i eth1 --h-length 6 \ --destination-mac 01:00:5e:00:01:01 \ -j mangle --mangle-mac-d 00:zz:yy:xx:5a:27 arptables -I OUTPUT -o eth2 --h-length 6 \ -j mangle --mangle-mac-s 01:00:5e:00:01:02 arptables -I INPUT -i eth2 --h-length 6 \ --destination-mac 01:00:5e:00:01:02 \ -j mangle --mangle-mac-d 00:zz:yy:xx:5a:27 In the case of TCP connections, pickup facility has to be disabled to avoid marking TCP ACK packets coming in the reply direction as valid. echo 0 > /proc/sys/net/netfilter/nf_conntrack_tcp_loose BTW, some final notes: * This match mangles the skbuff pkt_type in case that it detects PACKET_MULTICAST for a non-multicast address. This may be done in a PKTTYPE target for this sole purpose. * This match supersedes the CLUSTERIP target. Signed-off-by: Pablo Neira Ayuso Signed-off-by: Patrick McHardy --- include/linux/netfilter/Kbuild | 1 + include/linux/netfilter/xt_cluster.h | 15 +++++++++++++++ 2 files changed, 16 insertions(+) create mode 100644 include/linux/netfilter/xt_cluster.h (limited to 'include') diff --git a/include/linux/netfilter/Kbuild b/include/linux/netfilter/Kbuild index 947b47d7f6c..af9d2fb9721 100644 --- a/include/linux/netfilter/Kbuild +++ b/include/linux/netfilter/Kbuild @@ -21,6 +21,7 @@ header-y += xt_connbytes.h header-y += xt_connlimit.h header-y += xt_connmark.h header-y += xt_conntrack.h +header-y += xt_cluster.h header-y += xt_dccp.h header-y += xt_dscp.h header-y += xt_esp.h diff --git a/include/linux/netfilter/xt_cluster.h b/include/linux/netfilter/xt_cluster.h new file mode 100644 index 00000000000..5e0a0d07b52 --- /dev/null +++ b/include/linux/netfilter/xt_cluster.h @@ -0,0 +1,15 @@ +#ifndef _XT_CLUSTER_MATCH_H +#define _XT_CLUSTER_MATCH_H + +enum xt_cluster_flags { + XT_CLUSTER_F_INV = (1 << 0) +}; + +struct xt_cluster_match_info { + u_int32_t total_nodes; + u_int32_t node_mask; + u_int32_t hash_seed; + u_int32_t flags; +}; + +#endif /* _XT_CLUSTER_MATCH_H */ -- cgit v1.2.3 From 711d60a9e7f88e394ccca10f5fc83f95f0cea5b1 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 18 Mar 2009 17:30:50 +0100 Subject: netfilter: remove nf_ct_l4proto_find_get/nf_ct_l4proto_put users have been moved to __nf_ct_l4proto_find. Signed-off-by: Florian Westphal Signed-off-by: Patrick McHardy --- include/net/netfilter/nf_conntrack_l4proto.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'include') diff --git a/include/net/netfilter/nf_conntrack_l4proto.h b/include/net/netfilter/nf_conntrack_l4proto.h index 16ab604659e..b01070bf2f8 100644 --- a/include/net/netfilter/nf_conntrack_l4proto.h +++ b/include/net/netfilter/nf_conntrack_l4proto.h @@ -98,11 +98,6 @@ extern struct nf_conntrack_l4proto nf_conntrack_l4proto_generic; extern struct nf_conntrack_l4proto * __nf_ct_l4proto_find(u_int16_t l3proto, u_int8_t l4proto); -extern struct nf_conntrack_l4proto * -nf_ct_l4proto_find_get(u_int16_t l3proto, u_int8_t protocol); - -extern void nf_ct_l4proto_put(struct nf_conntrack_l4proto *p); - /* Protocol registration. */ extern int nf_conntrack_l4proto_register(struct nf_conntrack_l4proto *proto); extern void nf_conntrack_l4proto_unregister(struct nf_conntrack_l4proto *proto); -- cgit v1.2.3 From dd5b6ce6fd465eab90357711c8e8124dc3a31ff0 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Mon, 23 Mar 2009 13:21:06 +0100 Subject: nefilter: nfnetlink: add nfnetlink_set_err and use it in ctnetlink This patch adds nfnetlink_set_err() to propagate the error to netlink broadcast listener in case of memory allocation errors in the message building. Signed-off-by: Pablo Neira Ayuso Signed-off-by: Patrick McHardy --- include/linux/netfilter/nfnetlink.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/netfilter/nfnetlink.h b/include/linux/netfilter/nfnetlink.h index 7d8e0455cca..135e5cfe68a 100644 --- a/include/linux/netfilter/nfnetlink.h +++ b/include/linux/netfilter/nfnetlink.h @@ -76,6 +76,7 @@ extern int nfnetlink_subsys_unregister(const struct nfnetlink_subsystem *n); extern int nfnetlink_has_listeners(unsigned int group); extern int nfnetlink_send(struct sk_buff *skb, u32 pid, unsigned group, int echo); +extern void nfnetlink_set_err(u32 pid, u32 group, int error); extern int nfnetlink_unicast(struct sk_buff *skb, u_int32_t pid, int flags); extern void nfnl_lock(void); -- cgit v1.2.3