From 6f7edb4881bf51300060e89915926e070ace8c4d Mon Sep 17 00:00:00 2001 From: "Catalin(ux) M. BOIE" Date: Tue, 5 Jan 2010 05:50:24 +0100 Subject: IPVS: Allow boot time change of hash size I was very frustrated about the fact that I have to recompile the kernel to change the hash size. So, I created this patch. If IPVS is built-in you can append ip_vs.conn_tab_bits=?? to kernel command line, or, if you built IPVS as modules, you can add options ip_vs conn_tab_bits=??. To keep everything backward compatible, you still can select the size at compile time, and that will be used as default. It has been about a year since this patch was originally posted and subsequently dropped on the basis of insufficient test data. Mark Bergsma has provided the following test results which seem to strongly support the need for larger hash table sizes: We do however run into the same problem with the default setting (212 = 4096 entries), as most of our LVS balancers handle around a million connections/SLAB entries at any point in time (around 100-150 kpps load). With only 4096 hash table entries this implies that each entry consists of a linked list of 256 connections *on average*. To provide some statistics, I did an oprofile run on an 2.6.31 kernel, with both the default 4096 table size, and the same kernel recompiled with IP_VS_CONN_TAB_BITS set to 18 (218 = 262144 entries). I built a quick test setup with a part of Wikimedia/Wikipedia's live traffic mirrored by the switch to the test host. With the default setting, at ~ 120 kpps packet load we saw a typical %si CPU usage of around 30-35%, and oprofile reported a hot spot in ip_vs_conn_in_get: samples % image name app name symbol name 1719761 42.3741 ip_vs.ko ip_vs.ko ip_vs_conn_in_get 302577 7.4554 bnx2 bnx2 /bnx2 181984 4.4840 vmlinux vmlinux __ticket_spin_lock 128636 3.1695 vmlinux vmlinux ip_route_input 74345 1.8318 ip_vs.ko ip_vs.ko ip_vs_conn_out_get 68482 1.6874 vmlinux vmlinux mwait_idle After loading the recompiled kernel with 218 entries, %si CPU usage dropped in half to around 12-18%, and oprofile looks much healthier, with only 7% spent in ip_vs_conn_in_get: samples % image name app name symbol name 265641 14.4616 bnx2 bnx2 /bnx2 143251 7.7986 vmlinux vmlinux __ticket_spin_lock 140661 7.6576 ip_vs.ko ip_vs.ko ip_vs_conn_in_get 94364 5.1372 vmlinux vmlinux mwait_idle 86267 4.6964 vmlinux vmlinux ip_route_input [ horms@verge.net.au: trivial up-port and minor style fixes ] Signed-off-by: Catalin(ux) M. BOIE Cc: Mark Bergsma Signed-off-by: Simon Horman Signed-off-by: Patrick McHardy --- include/net/ip_vs.h | 16 +++++----------- 1 file changed, 5 insertions(+), 11 deletions(-) (limited to 'include/net') diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index 8dc3296b7be..a816c37417b 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -26,6 +26,11 @@ #include /* for struct ipv6hdr */ #include /* for ipv6_addr_copy */ + +/* Connections' size value needed by ip_vs_ctl.c */ +extern int ip_vs_conn_tab_size; + + struct ip_vs_iphdr { int len; __u8 protocol; @@ -592,17 +597,6 @@ extern void ip_vs_init_hash_table(struct list_head *table, int rows); * (from ip_vs_conn.c) */ -/* - * IPVS connection entry hash table - */ -#ifndef CONFIG_IP_VS_TAB_BITS -#define CONFIG_IP_VS_TAB_BITS 12 -#endif - -#define IP_VS_CONN_TAB_BITS CONFIG_IP_VS_TAB_BITS -#define IP_VS_CONN_TAB_SIZE (1 << IP_VS_CONN_TAB_BITS) -#define IP_VS_CONN_TAB_MASK (IP_VS_CONN_TAB_SIZE - 1) - enum { IP_VS_DIR_INPUT = 0, IP_VS_DIR_OUTPUT, -- cgit v1.2.3 From cd8c20b650f49354722b8cc1f03320b004815a0a Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Wed, 13 Jan 2010 16:02:14 +0100 Subject: netfilter: nfnetlink: netns support Make nfnl socket per-petns. Signed-off-by: Alexey Dobriyan Signed-off-by: Patrick McHardy --- include/net/net_namespace.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/net') diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h index f307e133d14..82b7be4db89 100644 --- a/include/net/net_namespace.h +++ b/include/net/net_namespace.h @@ -81,6 +81,8 @@ struct net { #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) struct netns_ct ct; #endif + struct sock *nfnl; + struct sock *nfnl_stash; #endif #ifdef CONFIG_XFRM struct netns_xfrm xfrm; -- cgit v1.2.3 From e9d3897cc2205eec8b7afcc022e4730914b4f48c Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Mon, 18 Jan 2010 08:08:37 +0100 Subject: netfilter: netns: #ifdef ->iptable_security, ->ip6table_security 'security' tables depend on SECURITY, so ifdef them. Signed-off-by: Alexey Dobriyan Signed-off-by: Patrick McHardy --- include/net/netns/ipv4.h | 2 ++ include/net/netns/ipv6.h | 2 ++ 2 files changed, 4 insertions(+) (limited to 'include/net') diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index 2eb3814d625..8098f6b8319 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -37,7 +37,9 @@ struct netns_ipv4 { struct xt_table *iptable_mangle; struct xt_table *iptable_raw; struct xt_table *arptable_filter; +#ifdef CONFIG_SECURITY struct xt_table *iptable_security; +#endif struct xt_table *nat_table; struct hlist_head *nat_bysource; int nat_vmalloced; diff --git a/include/net/netns/ipv6.h b/include/net/netns/ipv6.h index dfeb2d7c425..1f11ebc2215 100644 --- a/include/net/netns/ipv6.h +++ b/include/net/netns/ipv6.h @@ -36,7 +36,9 @@ struct netns_ipv6 { struct xt_table *ip6table_filter; struct xt_table *ip6table_mangle; struct xt_table *ip6table_raw; +#ifdef CONFIG_SECURITY struct xt_table *ip6table_security; +#endif #endif struct rt6_info *ip6_null_entry; struct rt6_statistics *rt6_stats; -- cgit v1.2.3 From 7c070aa947d1a4105742378579c267f6e7fd08a1 Mon Sep 17 00:00:00 2001 From: Shan Wei Date: Wed, 20 Jan 2010 10:42:41 +0100 Subject: IPv6: reassembly: replace magic number with macro definitions Use macro to define high/low thresh value, refer to IPV6_FRAG_TIMEOUT. Signed-off-by: Shan Wei Acked-by: David S. Miller Signed-off-by: Patrick McHardy --- include/net/ipv6.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/net') diff --git a/include/net/ipv6.h b/include/net/ipv6.h index ccab5946c83..299bbf5adfb 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -246,6 +246,8 @@ extern int ipv6_opt_accepted(struct sock *sk, struct sk_buff *skb); int ip6_frag_nqueues(struct net *net); int ip6_frag_mem(struct net *net); +#define IPV6_FRAG_HIGH_THRESH 262144 /* == 256*1024 */ +#define IPV6_FRAG_LOW_THRESH 196608 /* == 192*1024 */ #define IPV6_FRAG_TIMEOUT (60*HZ) /* 60 seconds */ extern int __ipv6_addr_type(const struct in6_addr *addr); -- cgit v1.2.3 From 794e68716bab578ae8f8912dc934496d7c7abc90 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Wed, 3 Feb 2010 13:41:29 +0100 Subject: netfilter: ctnetlink: only assign helpers for matching protocols Make sure not to assign a helper for a different network or transport layer protocol to a connection. Additionally change expectation deletion by helper to compare the name directly - there might be multiple helper registrations using the same name, currently one of them is chosen in an unpredictable manner and only those expectations are removed. Signed-off-by: Patrick McHardy --- include/net/netfilter/nf_conntrack_helper.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/netfilter/nf_conntrack_helper.h b/include/net/netfilter/nf_conntrack_helper.h index d015de92e03..86be7c4816d 100644 --- a/include/net/netfilter/nf_conntrack_helper.h +++ b/include/net/netfilter/nf_conntrack_helper.h @@ -40,7 +40,7 @@ struct nf_conntrack_helper { }; extern struct nf_conntrack_helper * -__nf_conntrack_helper_find_byname(const char *name); +__nf_conntrack_helper_find(const char *name, u16 l3num, u8 protonum); extern int nf_conntrack_helper_register(struct nf_conntrack_helper *); extern void nf_conntrack_helper_unregister(struct nf_conntrack_helper *); -- cgit v1.2.3 From 858b31330054a9ad259feceea0ad1ce5385c47f0 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Wed, 3 Feb 2010 13:48:53 +0100 Subject: netfilter: nf_conntrack: split up IPCT_STATUS event Split up the IPCT_STATUS event into an IPCT_REPLY event, which is generated when the IPS_SEEN_REPLY bit is set, and an IPCT_ASSURED event, which is generated when the IPS_ASSURED bit is set. In combination with a following patch to support selective event delivery, this can be used for "sparse" conntrack replication: start replicating the conntrack entry after it reached the ASSURED state and that way it's SYN-flood resistant. Signed-off-by: Patrick McHardy --- include/net/netfilter/nf_conntrack_ecache.h | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) (limited to 'include/net') diff --git a/include/net/netfilter/nf_conntrack_ecache.h b/include/net/netfilter/nf_conntrack_ecache.h index 475facc3051..5e05fb883ab 100644 --- a/include/net/netfilter/nf_conntrack_ecache.h +++ b/include/net/netfilter/nf_conntrack_ecache.h @@ -14,19 +14,20 @@ /* Connection tracking event types */ enum ip_conntrack_events { - IPCT_NEW = 0, /* new conntrack */ - IPCT_RELATED = 1, /* related conntrack */ - IPCT_DESTROY = 2, /* destroyed conntrack */ - IPCT_STATUS = 3, /* status has changed */ - IPCT_PROTOINFO = 4, /* protocol information has changed */ - IPCT_HELPER = 5, /* new helper has been set */ - IPCT_MARK = 6, /* new mark has been set */ - IPCT_NATSEQADJ = 7, /* NAT is doing sequence adjustment */ - IPCT_SECMARK = 8, /* new security mark has been set */ + IPCT_NEW, /* new conntrack */ + IPCT_RELATED, /* related conntrack */ + IPCT_DESTROY, /* destroyed conntrack */ + IPCT_REPLY, /* connection has seen two-way traffic */ + IPCT_ASSURED, /* connection status has changed to assured */ + IPCT_PROTOINFO, /* protocol information has changed */ + IPCT_HELPER, /* new helper has been set */ + IPCT_MARK, /* new mark has been set */ + IPCT_NATSEQADJ, /* NAT is doing sequence adjustment */ + IPCT_SECMARK, /* new security mark has been set */ }; enum ip_conntrack_expect_events { - IPEXP_NEW = 0, /* new expectation */ + IPEXP_NEW, /* new expectation */ }; struct nf_conntrack_ecache { -- cgit v1.2.3 From 0cebe4b4163b6373c9d24c1a192939777bc27e55 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Wed, 3 Feb 2010 13:51:51 +0100 Subject: netfilter: ctnetlink: support selective event delivery Add two masks for conntrack end expectation events to struct nf_conntrack_ecache and use them to filter events. Their default value is "all events" when the event sysctl is on and "no events" when it is off. A following patch will add specific initializations. Expectation events depend on the ecache struct of their master conntrack. Signed-off-by: Patrick McHardy --- include/net/netfilter/nf_conntrack_ecache.h | 59 ++++++++++++++--------------- 1 file changed, 28 insertions(+), 31 deletions(-) (limited to 'include/net') diff --git a/include/net/netfilter/nf_conntrack_ecache.h b/include/net/netfilter/nf_conntrack_ecache.h index 5e05fb883ab..96ba5f7dcab 100644 --- a/include/net/netfilter/nf_conntrack_ecache.h +++ b/include/net/netfilter/nf_conntrack_ecache.h @@ -12,28 +12,12 @@ #include #include -/* Connection tracking event types */ -enum ip_conntrack_events { - IPCT_NEW, /* new conntrack */ - IPCT_RELATED, /* related conntrack */ - IPCT_DESTROY, /* destroyed conntrack */ - IPCT_REPLY, /* connection has seen two-way traffic */ - IPCT_ASSURED, /* connection status has changed to assured */ - IPCT_PROTOINFO, /* protocol information has changed */ - IPCT_HELPER, /* new helper has been set */ - IPCT_MARK, /* new mark has been set */ - IPCT_NATSEQADJ, /* NAT is doing sequence adjustment */ - IPCT_SECMARK, /* new security mark has been set */ -}; - -enum ip_conntrack_expect_events { - IPEXP_NEW, /* new expectation */ -}; - struct nf_conntrack_ecache { - unsigned long cache; /* bitops want long */ - unsigned long missed; /* missed events */ - u32 pid; /* netlink pid of destroyer */ + unsigned long cache; /* bitops want long */ + unsigned long missed; /* missed events */ + u16 ctmask; /* bitmask of ct events to be delivered */ + u16 expmask; /* bitmask of expect events to be delivered */ + u32 pid; /* netlink pid of destroyer */ }; static inline struct nf_conntrack_ecache * @@ -43,14 +27,24 @@ nf_ct_ecache_find(const struct nf_conn *ct) } static inline struct nf_conntrack_ecache * -nf_ct_ecache_ext_add(struct nf_conn *ct, gfp_t gfp) +nf_ct_ecache_ext_add(struct nf_conn *ct, u16 ctmask, u16 expmask, gfp_t gfp) { struct net *net = nf_ct_net(ct); + struct nf_conntrack_ecache *e; - if (!net->ct.sysctl_events) + if (!ctmask && !expmask && net->ct.sysctl_events) { + ctmask = ~0; + expmask = ~0; + } + if (!ctmask && !expmask) return NULL; - return nf_ct_ext_add(ct, NF_CT_EXT_ECACHE, gfp); + e = nf_ct_ext_add(ct, NF_CT_EXT_ECACHE, gfp); + if (e) { + e->ctmask = ctmask; + e->expmask = expmask; + } + return e; }; #ifdef CONFIG_NF_CONNTRACK_EVENTS @@ -83,6 +77,9 @@ nf_conntrack_event_cache(enum ip_conntrack_events event, struct nf_conn *ct) if (e == NULL) return; + if (!(e->ctmask & (1 << event))) + return; + set_bit(event, &e->cache); } @@ -93,7 +90,6 @@ nf_conntrack_eventmask_report(unsigned int eventmask, int report) { int ret = 0; - struct net *net = nf_ct_net(ct); struct nf_ct_event_notifier *notify; struct nf_conntrack_ecache *e; @@ -102,9 +98,6 @@ nf_conntrack_eventmask_report(unsigned int eventmask, if (notify == NULL) goto out_unlock; - if (!net->ct.sysctl_events) - goto out_unlock; - e = nf_ct_ecache_find(ct); if (e == NULL) goto out_unlock; @@ -118,6 +111,9 @@ nf_conntrack_eventmask_report(unsigned int eventmask, /* This is a resent of a destroy event? If so, skip missed */ unsigned long missed = e->pid ? 0 : e->missed; + if (!((eventmask | missed) & e->ctmask)) + goto out_unlock; + ret = notify->fcn(eventmask | missed, &item); if (unlikely(ret < 0 || missed)) { spin_lock_bh(&ct->lock); @@ -173,18 +169,19 @@ nf_ct_expect_event_report(enum ip_conntrack_expect_events event, u32 pid, int report) { - struct net *net = nf_ct_exp_net(exp); struct nf_exp_event_notifier *notify; + struct nf_conntrack_ecache *e; rcu_read_lock(); notify = rcu_dereference(nf_expect_event_cb); if (notify == NULL) goto out_unlock; - if (!net->ct.sysctl_events) + e = nf_ct_ecache_find(exp->master); + if (e == NULL) goto out_unlock; - { + if (e->expmask & (1 << event)) { struct nf_exp_event item = { .exp = exp, .pid = pid, -- cgit v1.2.3 From b2a15a604d379af323645e330638e2cfcc696aff Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Wed, 3 Feb 2010 14:13:03 +0100 Subject: netfilter: nf_conntrack: support conntrack templates Support initializing selected parameters of new conntrack entries from a "conntrack template", which is a specially marked conntrack entry attached to the skb. Currently the helper and the event delivery masks can be initialized this way. Signed-off-by: Patrick McHardy --- include/net/netfilter/nf_conntrack.h | 5 +++++ include/net/netfilter/nf_conntrack_helper.h | 3 ++- 2 files changed, 7 insertions(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h index a0904adfb8f..5043d61c99a 100644 --- a/include/net/netfilter/nf_conntrack.h +++ b/include/net/netfilter/nf_conntrack.h @@ -272,6 +272,11 @@ nf_conntrack_alloc(struct net *net, const struct nf_conntrack_tuple *repl, gfp_t gfp); +static inline int nf_ct_is_template(const struct nf_conn *ct) +{ + return test_bit(IPS_TEMPLATE_BIT, &ct->status); +} + /* It's confirmed if it is, or has been in the hash table. */ static inline int nf_ct_is_confirmed(struct nf_conn *ct) { diff --git a/include/net/netfilter/nf_conntrack_helper.h b/include/net/netfilter/nf_conntrack_helper.h index 86be7c4816d..e17aaa3e19f 100644 --- a/include/net/netfilter/nf_conntrack_helper.h +++ b/include/net/netfilter/nf_conntrack_helper.h @@ -47,7 +47,8 @@ extern void nf_conntrack_helper_unregister(struct nf_conntrack_helper *); extern struct nf_conn_help *nf_ct_helper_ext_add(struct nf_conn *ct, gfp_t gfp); -extern int __nf_ct_try_assign_helper(struct nf_conn *ct, gfp_t flags); +extern int __nf_ct_try_assign_helper(struct nf_conn *ct, struct nf_conn *tmpl, + gfp_t flags); extern void nf_ct_helper_destroy(struct nf_conn *ct); -- cgit v1.2.3 From 84f3bb9ae9db90f7fb15d98b55279a58ab1b2363 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Wed, 3 Feb 2010 17:17:06 +0100 Subject: netfilter: xtables: add CT target Add a new target for the raw table, which can be used to specify conntrack parameters for specific connections, f.i. the conntrack helper. The target attaches a "template" connection tracking entry to the skb, which is used by the conntrack core when initializing a new conntrack. Signed-off-by: Patrick McHardy --- include/net/netfilter/nf_conntrack_helper.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/net') diff --git a/include/net/netfilter/nf_conntrack_helper.h b/include/net/netfilter/nf_conntrack_helper.h index e17aaa3e19f..32c305dbdab 100644 --- a/include/net/netfilter/nf_conntrack_helper.h +++ b/include/net/netfilter/nf_conntrack_helper.h @@ -42,6 +42,9 @@ struct nf_conntrack_helper { extern struct nf_conntrack_helper * __nf_conntrack_helper_find(const char *name, u16 l3num, u8 protonum); +extern struct nf_conntrack_helper * +nf_conntrack_helper_try_module_get(const char *name, u16 l3num, u8 protonum); + extern int nf_conntrack_helper_register(struct nf_conntrack_helper *); extern void nf_conntrack_helper_unregister(struct nf_conntrack_helper *); -- cgit v1.2.3 From b87921bdf25485afd8f5a5f25e86b5acef32a9cf Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Thu, 11 Feb 2010 12:22:48 +0100 Subject: netfilter: nf_conntrack: show helper and class in /proc/net/nf_conntrack_expect Make the output a bit more informative by showing the helper an expectation belongs to and the expectation class. Signed-off-by: Patrick McHardy --- include/net/netfilter/nf_conntrack_expect.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/net') diff --git a/include/net/netfilter/nf_conntrack_expect.h b/include/net/netfilter/nf_conntrack_expect.h index 9a2b9cb5227..917e170fa75 100644 --- a/include/net/netfilter/nf_conntrack_expect.h +++ b/include/net/netfilter/nf_conntrack_expect.h @@ -66,6 +66,7 @@ static inline struct net *nf_ct_exp_net(struct nf_conntrack_expect *exp) struct nf_conntrack_expect_policy { unsigned int max_expected; unsigned int timeout; + const char *name; }; #define NF_CT_EXPECT_CLASS_DEFAULT 0 -- cgit v1.2.3 From 010c0b9f34a4c567b431f8b49a58b7332ed42e47 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Thu, 11 Feb 2010 12:27:09 +0100 Subject: netfilter: nf_nat: support mangling a single TCP packet multiple times nf_nat_mangle_tcp_packet() can currently only handle a single mangling per window because it only maintains two sequence adjustment positions: the one before the last adjustment and the one after. This patch makes sequence number adjustment tracking in nf_nat_mangle_tcp_packet() optional and allows a helper to manually update the offsets after the packet has been fully handled. Signed-off-by: Patrick McHardy --- include/net/netfilter/nf_nat_helper.h | 32 +++++++++++++++++++++++++------- 1 file changed, 25 insertions(+), 7 deletions(-) (limited to 'include/net') diff --git a/include/net/netfilter/nf_nat_helper.h b/include/net/netfilter/nf_nat_helper.h index 4222220920a..02bb6c29dc3 100644 --- a/include/net/netfilter/nf_nat_helper.h +++ b/include/net/netfilter/nf_nat_helper.h @@ -7,13 +7,27 @@ struct sk_buff; /* These return true or false. */ -extern int nf_nat_mangle_tcp_packet(struct sk_buff *skb, - struct nf_conn *ct, - enum ip_conntrack_info ctinfo, - unsigned int match_offset, - unsigned int match_len, - const char *rep_buffer, - unsigned int rep_len); +extern int __nf_nat_mangle_tcp_packet(struct sk_buff *skb, + struct nf_conn *ct, + enum ip_conntrack_info ctinfo, + unsigned int match_offset, + unsigned int match_len, + const char *rep_buffer, + unsigned int rep_len, bool adjust); + +static inline int nf_nat_mangle_tcp_packet(struct sk_buff *skb, + struct nf_conn *ct, + enum ip_conntrack_info ctinfo, + unsigned int match_offset, + unsigned int match_len, + const char *rep_buffer, + unsigned int rep_len) +{ + return __nf_nat_mangle_tcp_packet(skb, ct, ctinfo, + match_offset, match_len, + rep_buffer, rep_len, true); +} + extern int nf_nat_mangle_udp_packet(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, @@ -21,6 +35,10 @@ extern int nf_nat_mangle_udp_packet(struct sk_buff *skb, unsigned int match_len, const char *rep_buffer, unsigned int rep_len); + +extern void nf_nat_set_seq_adjust(struct nf_conn *ct, + enum ip_conntrack_info ctinfo, + __be32 seq, s16 off); extern int nf_nat_seq_adjust(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo); -- cgit v1.2.3 From 9d288dffe3a276e1f06ba556845c456d696c5a4f Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Thu, 11 Feb 2010 12:30:21 +0100 Subject: netfilter: nf_conntrack_sip: add T.38 FAX support Signed-off-by: Patrick McHardy --- include/net/netfilter/nf_conntrack.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h index 5043d61c99a..5b7d8835523 100644 --- a/include/net/netfilter/nf_conntrack.h +++ b/include/net/netfilter/nf_conntrack.h @@ -70,7 +70,7 @@ union nf_conntrack_help { struct nf_conntrack_helper; /* Must be kept in sync with the classes defined by helpers */ -#define NF_CT_MAX_EXPECT_CLASSES 3 +#define NF_CT_MAX_EXPECT_CLASSES 4 /* nf_conn feature for connections that have a helper */ struct nf_conn_help { -- cgit v1.2.3 From 857b409a48bdc33e824dff2d730e271b964e78bd Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Fri, 12 Feb 2010 06:24:46 +0100 Subject: netfilter: nf_conntrack: elegantly simplify nf_ct_exp_net() Remove #ifdef at nf_ct_exp_net() by using nf_ct_net(). Signed-off-by: Alexey Dobriyan Signed-off-by: Patrick McHardy --- include/net/netfilter/nf_conntrack_expect.h | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) (limited to 'include/net') diff --git a/include/net/netfilter/nf_conntrack_expect.h b/include/net/netfilter/nf_conntrack_expect.h index 917e170fa75..4b47ec19ef3 100644 --- a/include/net/netfilter/nf_conntrack_expect.h +++ b/include/net/netfilter/nf_conntrack_expect.h @@ -56,11 +56,7 @@ struct nf_conntrack_expect { static inline struct net *nf_ct_exp_net(struct nf_conntrack_expect *exp) { -#ifdef CONFIG_NET_NS - return exp->master->ct_net; /* by definition */ -#else - return &init_net; -#endif + return nf_ct_net(exp->master); } struct nf_conntrack_expect_policy { -- cgit v1.2.3 From 8fea97ec1772bbf553d89187340ef624d548e115 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Mon, 15 Feb 2010 17:45:08 +0100 Subject: netfilter: nf_conntrack: pass template to l4proto ->error() handler The error handlers might need the template to get the conntrack zone introduced in the next patches to perform a conntrack lookup. Signed-off-by: Patrick McHardy --- include/net/netfilter/nf_conntrack_l4proto.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/net') diff --git a/include/net/netfilter/nf_conntrack_l4proto.h b/include/net/netfilter/nf_conntrack_l4proto.h index ca6dcf3445a..e3d3ee3c06a 100644 --- a/include/net/netfilter/nf_conntrack_l4proto.h +++ b/include/net/netfilter/nf_conntrack_l4proto.h @@ -49,8 +49,8 @@ struct nf_conntrack_l4proto { /* Called when a conntrack entry is destroyed */ void (*destroy)(struct nf_conn *ct); - int (*error)(struct net *net, struct sk_buff *skb, unsigned int dataoff, - enum ip_conntrack_info *ctinfo, + int (*error)(struct net *net, struct nf_conn *tmpl, struct sk_buff *skb, + unsigned int dataoff, enum ip_conntrack_info *ctinfo, u_int8_t pf, unsigned int hooknum); /* Print out the per-protocol part of the tuple. Return like seq_* */ -- cgit v1.2.3 From 5d0aa2ccd4699a01cfdf14886191c249d7b45a01 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Mon, 15 Feb 2010 18:13:33 +0100 Subject: netfilter: nf_conntrack: add support for "conntrack zones" Normally, each connection needs a unique identity. Conntrack zones allow to specify a numerical zone using the CT target, connections in different zones can use the same identity. Example: iptables -t raw -A PREROUTING -i veth0 -j CT --zone 1 iptables -t raw -A OUTPUT -o veth1 -j CT --zone 1 Signed-off-by: Patrick McHardy --- include/net/ip.h | 3 +++ include/net/ipv6.h | 3 +++ include/net/netfilter/nf_conntrack.h | 5 +++-- include/net/netfilter/nf_conntrack_core.h | 3 ++- include/net/netfilter/nf_conntrack_expect.h | 9 ++++++--- include/net/netfilter/nf_conntrack_extend.h | 2 ++ include/net/netfilter/nf_conntrack_zones.h | 23 +++++++++++++++++++++++ 7 files changed, 42 insertions(+), 6 deletions(-) create mode 100644 include/net/netfilter/nf_conntrack_zones.h (limited to 'include/net') diff --git a/include/net/ip.h b/include/net/ip.h index fb63371c07a..7bc47873e3f 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -352,8 +352,11 @@ enum ip_defrag_users { IP_DEFRAG_LOCAL_DELIVER, IP_DEFRAG_CALL_RA_CHAIN, IP_DEFRAG_CONNTRACK_IN, + __IP_DEFRAG_CONNTRACK_IN_END = IP_DEFRAG_CONNTRACK_IN + USHORT_MAX, IP_DEFRAG_CONNTRACK_OUT, + __IP_DEFRAG_CONNTRACK_OUT_END = IP_DEFRAG_CONNTRACK_OUT + USHORT_MAX, IP_DEFRAG_CONNTRACK_BRIDGE_IN, + __IP_DEFRAG_CONNTRACK_BRIDGE_IN = IP_DEFRAG_CONNTRACK_BRIDGE_IN + USHORT_MAX, IP_DEFRAG_VS_IN, IP_DEFRAG_VS_OUT, IP_DEFRAG_VS_FWD diff --git a/include/net/ipv6.h b/include/net/ipv6.h index 299bbf5adfb..639ec53ea08 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -355,8 +355,11 @@ struct inet_frag_queue; enum ip6_defrag_users { IP6_DEFRAG_LOCAL_DELIVER, IP6_DEFRAG_CONNTRACK_IN, + __IP6_DEFRAG_CONNTRACK_IN = IP6_DEFRAG_CONNTRACK_IN + USHORT_MAX, IP6_DEFRAG_CONNTRACK_OUT, + __IP6_DEFRAG_CONNTRACK_OUT = IP6_DEFRAG_CONNTRACK_OUT + USHORT_MAX, IP6_DEFRAG_CONNTRACK_BRIDGE_IN, + __IP6_DEFRAG_CONNTRACK_BRIDGE_IN = IP6_DEFRAG_CONNTRACK_BRIDGE_IN + USHORT_MAX, }; struct ip6_create_arg { diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h index 5b7d8835523..bde095f7e84 100644 --- a/include/net/netfilter/nf_conntrack.h +++ b/include/net/netfilter/nf_conntrack.h @@ -198,7 +198,8 @@ extern void *nf_ct_alloc_hashtable(unsigned int *sizep, int *vmalloced, int null extern void nf_ct_free_hashtable(void *hash, int vmalloced, unsigned int size); extern struct nf_conntrack_tuple_hash * -__nf_conntrack_find(struct net *net, const struct nf_conntrack_tuple *tuple); +__nf_conntrack_find(struct net *net, u16 zone, + const struct nf_conntrack_tuple *tuple); extern void nf_conntrack_hash_insert(struct nf_conn *ct); extern void nf_ct_delete_from_lists(struct nf_conn *ct); @@ -267,7 +268,7 @@ extern void nf_ct_iterate_cleanup(struct net *net, int (*iter)(struct nf_conn *i, void *data), void *data); extern void nf_conntrack_free(struct nf_conn *ct); extern struct nf_conn * -nf_conntrack_alloc(struct net *net, +nf_conntrack_alloc(struct net *net, u16 zone, const struct nf_conntrack_tuple *orig, const struct nf_conntrack_tuple *repl, gfp_t gfp); diff --git a/include/net/netfilter/nf_conntrack_core.h b/include/net/netfilter/nf_conntrack_core.h index 5a449b44ba3..dffde8e6920 100644 --- a/include/net/netfilter/nf_conntrack_core.h +++ b/include/net/netfilter/nf_conntrack_core.h @@ -49,7 +49,8 @@ nf_ct_invert_tuple(struct nf_conntrack_tuple *inverse, /* Find a connection corresponding to a tuple. */ extern struct nf_conntrack_tuple_hash * -nf_conntrack_find_get(struct net *net, const struct nf_conntrack_tuple *tuple); +nf_conntrack_find_get(struct net *net, u16 zone, + const struct nf_conntrack_tuple *tuple); extern int __nf_conntrack_confirm(struct sk_buff *skb); diff --git a/include/net/netfilter/nf_conntrack_expect.h b/include/net/netfilter/nf_conntrack_expect.h index 4b47ec19ef3..11e815084fc 100644 --- a/include/net/netfilter/nf_conntrack_expect.h +++ b/include/net/netfilter/nf_conntrack_expect.h @@ -74,13 +74,16 @@ int nf_conntrack_expect_init(struct net *net); void nf_conntrack_expect_fini(struct net *net); struct nf_conntrack_expect * -__nf_ct_expect_find(struct net *net, const struct nf_conntrack_tuple *tuple); +__nf_ct_expect_find(struct net *net, u16 zone, + const struct nf_conntrack_tuple *tuple); struct nf_conntrack_expect * -nf_ct_expect_find_get(struct net *net, const struct nf_conntrack_tuple *tuple); +nf_ct_expect_find_get(struct net *net, u16 zone, + const struct nf_conntrack_tuple *tuple); struct nf_conntrack_expect * -nf_ct_find_expectation(struct net *net, const struct nf_conntrack_tuple *tuple); +nf_ct_find_expectation(struct net *net, u16 zone, + const struct nf_conntrack_tuple *tuple); void nf_ct_unlink_expect(struct nf_conntrack_expect *exp); void nf_ct_remove_expectations(struct nf_conn *ct); diff --git a/include/net/netfilter/nf_conntrack_extend.h b/include/net/netfilter/nf_conntrack_extend.h index e192dc17c58..2d2a1f9a61d 100644 --- a/include/net/netfilter/nf_conntrack_extend.h +++ b/include/net/netfilter/nf_conntrack_extend.h @@ -8,6 +8,7 @@ enum nf_ct_ext_id { NF_CT_EXT_NAT, NF_CT_EXT_ACCT, NF_CT_EXT_ECACHE, + NF_CT_EXT_ZONE, NF_CT_EXT_NUM, }; @@ -15,6 +16,7 @@ enum nf_ct_ext_id { #define NF_CT_EXT_NAT_TYPE struct nf_conn_nat #define NF_CT_EXT_ACCT_TYPE struct nf_conn_counter #define NF_CT_EXT_ECACHE_TYPE struct nf_conntrack_ecache +#define NF_CT_EXT_ZONE_TYPE struct nf_conntrack_zone /* Extensions: optional stuff which isn't permanently in struct. */ struct nf_ct_ext { diff --git a/include/net/netfilter/nf_conntrack_zones.h b/include/net/netfilter/nf_conntrack_zones.h new file mode 100644 index 00000000000..0bbb2bd51e8 --- /dev/null +++ b/include/net/netfilter/nf_conntrack_zones.h @@ -0,0 +1,23 @@ +#ifndef _NF_CONNTRACK_ZONES_H +#define _NF_CONNTRACK_ZONES_H + +#include + +#define NF_CT_DEFAULT_ZONE 0 + +struct nf_conntrack_zone { + u16 id; +}; + +static inline u16 nf_ct_zone(const struct nf_conn *ct) +{ +#ifdef CONFIG_NF_CONNTRACK_ZONES + struct nf_conntrack_zone *nf_ct_zone; + nf_ct_zone = nf_ct_ext_find(ct, NF_CT_EXT_ZONE); + if (nf_ct_zone) + return nf_ct_zone->id; +#endif + return NF_CT_DEFAULT_ZONE; +} + +#endif /* _NF_CONNTRACK_ZONES_H */ -- cgit v1.2.3