From 892c141e62982272b9c738b5520ad0e5e1ad7b42 Mon Sep 17 00:00:00 2001 From: Venkat Yekkirala Date: Fri, 4 Aug 2006 23:08:56 -0700 Subject: [MLSXFRM]: Add security sid to sock This adds security for IP sockets at the sock level. Security at the sock level is needed to enforce the SELinux security policy for security associations even when a sock is orphaned (such as in the TCP LAST_ACK state). This will also be used to enforce SELinux controls over data arriving at or leaving a child socket while it's still waiting to be accepted. Signed-off-by: Venkat Yekkirala Signed-off-by: David S. Miller --- include/linux/security.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include/linux') diff --git a/include/linux/security.h b/include/linux/security.h index 6bc2aad494f..4d7fb59996b 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -812,6 +812,8 @@ struct swap_info_struct; * which is used to copy security attributes between local stream sockets. * @sk_free_security: * Deallocate security structure. + * @sk_clone_security: + * Clone/copy security structure. * @sk_getsid: * Retrieve the LSM-specific sid for the sock to enable caching of network * authorizations. @@ -1332,6 +1334,7 @@ struct security_operations { int (*socket_getpeersec_dgram) (struct socket *sock, struct sk_buff *skb, u32 *secid); int (*sk_alloc_security) (struct sock *sk, int family, gfp_t priority); void (*sk_free_security) (struct sock *sk); + void (*sk_clone_security) (const struct sock *sk, struct sock *newsk); unsigned int (*sk_getsid) (struct sock *sk, struct flowi *fl, u8 dir); #endif /* CONFIG_SECURITY_NETWORK */ @@ -2885,6 +2888,11 @@ static inline void security_sk_free(struct sock *sk) return security_ops->sk_free_security(sk); } +static inline void security_sk_clone(const struct sock *sk, struct sock *newsk) +{ + return security_ops->sk_clone_security(sk, newsk); +} + static inline unsigned int security_sk_sid(struct sock *sk, struct flowi *fl, u8 dir) { return security_ops->sk_getsid(sk, fl, dir); @@ -3011,6 +3019,10 @@ static inline void security_sk_free(struct sock *sk) { } +static inline void security_sk_clone(const struct sock *sk, struct sock *newsk) +{ +} + static inline unsigned int security_sk_sid(struct sock *sk, struct flowi *fl, u8 dir) { return 0; -- cgit v1.2.3 From e0d1caa7b0d5f02e4f34aa09c695d04251310c6c Mon Sep 17 00:00:00 2001 From: Venkat Yekkirala Date: Mon, 24 Jul 2006 23:29:07 -0700 Subject: [MLSXFRM]: Flow based matching of xfrm policy and state This implements a seemless mechanism for xfrm policy selection and state matching based on the flow sid. This also includes the necessary SELinux enforcement pieces. Signed-off-by: Venkat Yekkirala Signed-off-by: David S. Miller --- include/linux/security.h | 106 ++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 90 insertions(+), 16 deletions(-) (limited to 'include/linux') diff --git a/include/linux/security.h b/include/linux/security.h index 4d7fb59996b..2c4921d79d1 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -31,6 +31,7 @@ #include #include #include +#include struct ctl_table; @@ -825,9 +826,8 @@ struct swap_info_struct; * used by the XFRM system. * @sec_ctx contains the security context information being provided by * the user-level policy update program (e.g., setkey). - * Allocate a security structure to the xp->security field. - * The security field is initialized to NULL when the xfrm_policy is - * allocated. + * Allocate a security structure to the xp->security field; the security + * field is initialized to NULL when the xfrm_policy is allocated. * Return 0 if operation was successful (memory to allocate, legal context) * @xfrm_policy_clone_security: * @old contains an existing xfrm_policy in the SPD. @@ -846,9 +846,14 @@ struct swap_info_struct; * Database by the XFRM system. * @sec_ctx contains the security context information being provided by * the user-level SA generation program (e.g., setkey or racoon). - * Allocate a security structure to the x->security field. The - * security field is initialized to NULL when the xfrm_state is - * allocated. + * @polsec contains the security context information associated with a xfrm + * policy rule from which to take the base context. polsec must be NULL + * when sec_ctx is specified. + * @secid contains the secid from which to take the mls portion of the context. + * Allocate a security structure to the x->security field; the security + * field is initialized to NULL when the xfrm_state is allocated. Set the + * context to correspond to either sec_ctx or polsec, with the mls portion + * taken from secid in the latter case. * Return 0 if operation was successful (memory to allocate, legal context). * @xfrm_state_free_security: * @x contains the xfrm_state. @@ -859,13 +864,26 @@ struct swap_info_struct; * @xfrm_policy_lookup: * @xp contains the xfrm_policy for which the access control is being * checked. - * @sk_sid contains the sock security label that is used to authorize + * @fl_secid contains the flow security label that is used to authorize * access to the policy xp. * @dir contains the direction of the flow (input or output). - * Check permission when a sock selects a xfrm_policy for processing + * Check permission when a flow selects a xfrm_policy for processing * XFRMs on a packet. The hook is called when selecting either a * per-socket policy or a generic xfrm policy. * Return 0 if permission is granted. + * @xfrm_state_pol_flow_match: + * @x contains the state to match. + * @xp contains the policy to check for a match. + * @fl contains the flow to check for a match. + * Return 1 if there is a match. + * @xfrm_flow_state_match: + * @fl contains the flow key to match. + * @xfrm points to the xfrm_state to match. + * Return 1 if there is a match. + * @xfrm_decode_session: + * @skb points to skb to decode. + * @fl points to the flow key to set. + * Return 0 if successful decoding. * * Security hooks affecting all Key Management operations * @@ -1343,10 +1361,16 @@ struct security_operations { int (*xfrm_policy_clone_security) (struct xfrm_policy *old, struct xfrm_policy *new); void (*xfrm_policy_free_security) (struct xfrm_policy *xp); int (*xfrm_policy_delete_security) (struct xfrm_policy *xp); - int (*xfrm_state_alloc_security) (struct xfrm_state *x, struct xfrm_user_sec_ctx *sec_ctx); + int (*xfrm_state_alloc_security) (struct xfrm_state *x, + struct xfrm_user_sec_ctx *sec_ctx, struct xfrm_sec_ctx *polsec, + u32 secid); void (*xfrm_state_free_security) (struct xfrm_state *x); int (*xfrm_state_delete_security) (struct xfrm_state *x); - int (*xfrm_policy_lookup)(struct xfrm_policy *xp, u32 sk_sid, u8 dir); + int (*xfrm_policy_lookup)(struct xfrm_policy *xp, u32 fl_secid, u8 dir); + int (*xfrm_state_pol_flow_match)(struct xfrm_state *x, + struct xfrm_policy *xp, struct flowi *fl); + int (*xfrm_flow_state_match)(struct flowi *fl, struct xfrm_state *xfrm); + int (*xfrm_decode_session)(struct sk_buff *skb, struct flowi *fl); #endif /* CONFIG_SECURITY_NETWORK_XFRM */ /* key management security hooks */ @@ -3050,9 +3074,18 @@ static inline int security_xfrm_policy_delete(struct xfrm_policy *xp) return security_ops->xfrm_policy_delete_security(xp); } -static inline int security_xfrm_state_alloc(struct xfrm_state *x, struct xfrm_user_sec_ctx *sec_ctx) +static inline int security_xfrm_state_alloc(struct xfrm_state *x, + struct xfrm_user_sec_ctx *sec_ctx) +{ + return security_ops->xfrm_state_alloc_security(x, sec_ctx, NULL, 0); +} + +static inline int security_xfrm_state_alloc_acquire(struct xfrm_state *x, + struct xfrm_sec_ctx *polsec, u32 secid) { - return security_ops->xfrm_state_alloc_security(x, sec_ctx); + if (!polsec) + return 0; + return security_ops->xfrm_state_alloc_security(x, NULL, polsec, secid); } static inline int security_xfrm_state_delete(struct xfrm_state *x) @@ -3065,9 +3098,25 @@ static inline void security_xfrm_state_free(struct xfrm_state *x) security_ops->xfrm_state_free_security(x); } -static inline int security_xfrm_policy_lookup(struct xfrm_policy *xp, u32 sk_sid, u8 dir) +static inline int security_xfrm_policy_lookup(struct xfrm_policy *xp, u32 fl_secid, u8 dir) +{ + return security_ops->xfrm_policy_lookup(xp, fl_secid, dir); +} + +static inline int security_xfrm_state_pol_flow_match(struct xfrm_state *x, + struct xfrm_policy *xp, struct flowi *fl) +{ + return security_ops->xfrm_state_pol_flow_match(x, xp, fl); +} + +static inline int security_xfrm_flow_state_match(struct flowi *fl, struct xfrm_state *xfrm) +{ + return security_ops->xfrm_flow_state_match(fl, xfrm); +} + +static inline int security_xfrm_decode_session(struct sk_buff *skb, struct flowi *fl) { - return security_ops->xfrm_policy_lookup(xp, sk_sid, dir); + return security_ops->xfrm_decode_session(skb, fl); } #else /* CONFIG_SECURITY_NETWORK_XFRM */ static inline int security_xfrm_policy_alloc(struct xfrm_policy *xp, struct xfrm_user_sec_ctx *sec_ctx) @@ -3089,7 +3138,14 @@ static inline int security_xfrm_policy_delete(struct xfrm_policy *xp) return 0; } -static inline int security_xfrm_state_alloc(struct xfrm_state *x, struct xfrm_user_sec_ctx *sec_ctx) +static inline int security_xfrm_state_alloc(struct xfrm_state *x, + struct xfrm_user_sec_ctx *sec_ctx) +{ + return 0; +} + +static inline int security_xfrm_state_alloc_acquire(struct xfrm_state *x, + struct xfrm_sec_ctx *polsec, u32 secid) { return 0; } @@ -3103,10 +3159,28 @@ static inline int security_xfrm_state_delete(struct xfrm_state *x) return 0; } -static inline int security_xfrm_policy_lookup(struct xfrm_policy *xp, u32 sk_sid, u8 dir) +static inline int security_xfrm_policy_lookup(struct xfrm_policy *xp, u32 fl_secid, u8 dir) { return 0; } + +static inline int security_xfrm_state_pol_flow_match(struct xfrm_state *x, + struct xfrm_policy *xp, struct flowi *fl) +{ + return 1; +} + +static inline int security_xfrm_flow_state_match(struct flowi *fl, + struct xfrm_state *xfrm) +{ + return 1; +} + +static inline int security_xfrm_decode_session(struct sk_buff *skb, struct flowi *fl) +{ + return 0; +} + #endif /* CONFIG_SECURITY_NETWORK_XFRM */ #ifdef CONFIG_KEYS -- cgit v1.2.3 From beb8d13bed80f8388f1a9a107d07ddd342e627e8 Mon Sep 17 00:00:00 2001 From: Venkat Yekkirala Date: Fri, 4 Aug 2006 23:12:42 -0700 Subject: [MLSXFRM]: Add flow labeling This labels the flows that could utilize IPSec xfrms at the points the flows are defined so that IPSec policy and SAs at the right label can be used. The following protos are currently not handled, but they should continue to be able to use single-labeled IPSec like they currently do. ipmr ip_gre ipip igmp sit sctp ip6_tunnel (IPv6 over IPv6 tunnel device) decnet Signed-off-by: Venkat Yekkirala Signed-off-by: David S. Miller --- include/linux/security.h | 38 +++++++++++++++++++++++++------------- 1 file changed, 25 insertions(+), 13 deletions(-) (limited to 'include/linux') diff --git a/include/linux/security.h b/include/linux/security.h index 2c4921d79d1..f3909d189fe 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -32,6 +32,7 @@ #include #include #include +#include struct ctl_table; @@ -815,8 +816,8 @@ struct swap_info_struct; * Deallocate security structure. * @sk_clone_security: * Clone/copy security structure. - * @sk_getsid: - * Retrieve the LSM-specific sid for the sock to enable caching of network + * @sk_getsecid: + * Retrieve the LSM-specific secid for the sock to enable caching of network * authorizations. * * Security hooks for XFRM operations. @@ -882,8 +883,9 @@ struct swap_info_struct; * Return 1 if there is a match. * @xfrm_decode_session: * @skb points to skb to decode. - * @fl points to the flow key to set. - * Return 0 if successful decoding. + * @secid points to the flow key secid to set. + * @ckall says if all xfrms used should be checked for same secid. + * Return 0 if ckall is zero or all xfrms used have the same secid. * * Security hooks affecting all Key Management operations * @@ -1353,7 +1355,7 @@ struct security_operations { int (*sk_alloc_security) (struct sock *sk, int family, gfp_t priority); void (*sk_free_security) (struct sock *sk); void (*sk_clone_security) (const struct sock *sk, struct sock *newsk); - unsigned int (*sk_getsid) (struct sock *sk, struct flowi *fl, u8 dir); + void (*sk_getsecid) (struct sock *sk, u32 *secid); #endif /* CONFIG_SECURITY_NETWORK */ #ifdef CONFIG_SECURITY_NETWORK_XFRM @@ -1370,7 +1372,7 @@ struct security_operations { int (*xfrm_state_pol_flow_match)(struct xfrm_state *x, struct xfrm_policy *xp, struct flowi *fl); int (*xfrm_flow_state_match)(struct flowi *fl, struct xfrm_state *xfrm); - int (*xfrm_decode_session)(struct sk_buff *skb, struct flowi *fl); + int (*xfrm_decode_session)(struct sk_buff *skb, u32 *secid, int ckall); #endif /* CONFIG_SECURITY_NETWORK_XFRM */ /* key management security hooks */ @@ -2917,9 +2919,9 @@ static inline void security_sk_clone(const struct sock *sk, struct sock *newsk) return security_ops->sk_clone_security(sk, newsk); } -static inline unsigned int security_sk_sid(struct sock *sk, struct flowi *fl, u8 dir) +static inline void security_sk_classify_flow(struct sock *sk, struct flowi *fl) { - return security_ops->sk_getsid(sk, fl, dir); + security_ops->sk_getsecid(sk, &fl->secid); } #else /* CONFIG_SECURITY_NETWORK */ static inline int security_unix_stream_connect(struct socket * sock, @@ -3047,9 +3049,8 @@ static inline void security_sk_clone(const struct sock *sk, struct sock *newsk) { } -static inline unsigned int security_sk_sid(struct sock *sk, struct flowi *fl, u8 dir) +static inline void security_sk_classify_flow(struct sock *sk, struct flowi *fl) { - return 0; } #endif /* CONFIG_SECURITY_NETWORK */ @@ -3114,9 +3115,16 @@ static inline int security_xfrm_flow_state_match(struct flowi *fl, struct xfrm_s return security_ops->xfrm_flow_state_match(fl, xfrm); } -static inline int security_xfrm_decode_session(struct sk_buff *skb, struct flowi *fl) +static inline int security_xfrm_decode_session(struct sk_buff *skb, u32 *secid) +{ + return security_ops->xfrm_decode_session(skb, secid, 1); +} + +static inline void security_skb_classify_flow(struct sk_buff *skb, struct flowi *fl) { - return security_ops->xfrm_decode_session(skb, fl); + int rc = security_ops->xfrm_decode_session(skb, &fl->secid, 0); + + BUG_ON(rc); } #else /* CONFIG_SECURITY_NETWORK_XFRM */ static inline int security_xfrm_policy_alloc(struct xfrm_policy *xp, struct xfrm_user_sec_ctx *sec_ctx) @@ -3176,11 +3184,15 @@ static inline int security_xfrm_flow_state_match(struct flowi *fl, return 1; } -static inline int security_xfrm_decode_session(struct sk_buff *skb, struct flowi *fl) +static inline int security_xfrm_decode_session(struct sk_buff *skb, u32 *secid) { return 0; } +static inline void security_skb_classify_flow(struct sk_buff *skb, struct flowi *fl) +{ +} + #endif /* CONFIG_SECURITY_NETWORK_XFRM */ #ifdef CONFIG_KEYS -- cgit v1.2.3 From cb969f072b6d67770b559617f14e767f47e77ece Mon Sep 17 00:00:00 2001 From: Venkat Yekkirala Date: Mon, 24 Jul 2006 23:32:20 -0700 Subject: [MLSXFRM]: Default labeling of socket specific IPSec policies This defaults the label of socket-specific IPSec policies to be the same as the socket they are set on. Signed-off-by: Venkat Yekkirala Signed-off-by: David S. Miller --- include/linux/security.h | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/security.h b/include/linux/security.h index f3909d189fe..8e3dc6c51a6 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -827,8 +827,10 @@ struct swap_info_struct; * used by the XFRM system. * @sec_ctx contains the security context information being provided by * the user-level policy update program (e.g., setkey). + * @sk refers to the sock from which to derive the security context. * Allocate a security structure to the xp->security field; the security - * field is initialized to NULL when the xfrm_policy is allocated. + * field is initialized to NULL when the xfrm_policy is allocated. Only + * one of sec_ctx or sock can be specified. * Return 0 if operation was successful (memory to allocate, legal context) * @xfrm_policy_clone_security: * @old contains an existing xfrm_policy in the SPD. @@ -1359,7 +1361,8 @@ struct security_operations { #endif /* CONFIG_SECURITY_NETWORK */ #ifdef CONFIG_SECURITY_NETWORK_XFRM - int (*xfrm_policy_alloc_security) (struct xfrm_policy *xp, struct xfrm_user_sec_ctx *sec_ctx); + int (*xfrm_policy_alloc_security) (struct xfrm_policy *xp, + struct xfrm_user_sec_ctx *sec_ctx, struct sock *sk); int (*xfrm_policy_clone_security) (struct xfrm_policy *old, struct xfrm_policy *new); void (*xfrm_policy_free_security) (struct xfrm_policy *xp); int (*xfrm_policy_delete_security) (struct xfrm_policy *xp); @@ -3057,7 +3060,12 @@ static inline void security_sk_classify_flow(struct sock *sk, struct flowi *fl) #ifdef CONFIG_SECURITY_NETWORK_XFRM static inline int security_xfrm_policy_alloc(struct xfrm_policy *xp, struct xfrm_user_sec_ctx *sec_ctx) { - return security_ops->xfrm_policy_alloc_security(xp, sec_ctx); + return security_ops->xfrm_policy_alloc_security(xp, sec_ctx, NULL); +} + +static inline int security_xfrm_sock_policy_alloc(struct xfrm_policy *xp, struct sock *sk) +{ + return security_ops->xfrm_policy_alloc_security(xp, NULL, sk); } static inline int security_xfrm_policy_clone(struct xfrm_policy *old, struct xfrm_policy *new) @@ -3132,6 +3140,11 @@ static inline int security_xfrm_policy_alloc(struct xfrm_policy *xp, struct xfrm return 0; } +static inline int security_xfrm_sock_policy_alloc(struct xfrm_policy *xp, struct sock *sk) +{ + return 0; +} + static inline int security_xfrm_policy_clone(struct xfrm_policy *old, struct xfrm_policy *new) { return 0; -- cgit v1.2.3 From 4237c75c0a35535d7f9f2bfeeb4b4df1e068a0bf Mon Sep 17 00:00:00 2001 From: Venkat Yekkirala Date: Mon, 24 Jul 2006 23:32:50 -0700 Subject: [MLSXFRM]: Auto-labeling of child sockets This automatically labels the TCP, Unix stream, and dccp child sockets as well as openreqs to be at the same MLS level as the peer. This will result in the selection of appropriately labeled IPSec Security Associations. This also uses the sock's sid (as opposed to the isec sid) in SELinux enforcement of secmark in rcv_skb and postroute_last hooks. Signed-off-by: Venkat Yekkirala Signed-off-by: David S. Miller --- include/linux/security.h | 55 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 55 insertions(+) (limited to 'include/linux') diff --git a/include/linux/security.h b/include/linux/security.h index 8e3dc6c51a6..bb4c80fdfe7 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -90,6 +90,7 @@ extern int cap_netlink_recv(struct sk_buff *skb, int cap); struct nfsctl_arg; struct sched_param; struct swap_info_struct; +struct request_sock; /* bprm_apply_creds unsafe reasons */ #define LSM_UNSAFE_SHARE 1 @@ -819,6 +820,14 @@ struct swap_info_struct; * @sk_getsecid: * Retrieve the LSM-specific secid for the sock to enable caching of network * authorizations. + * @sock_graft: + * Sets the socket's isec sid to the sock's sid. + * @inet_conn_request: + * Sets the openreq's sid to socket's sid with MLS portion taken from peer sid. + * @inet_csk_clone: + * Sets the new child socket's sid to the openreq sid. + * @req_classify_flow: + * Sets the flow's sid to the openreq sid. * * Security hooks for XFRM operations. * @@ -1358,6 +1367,11 @@ struct security_operations { void (*sk_free_security) (struct sock *sk); void (*sk_clone_security) (const struct sock *sk, struct sock *newsk); void (*sk_getsecid) (struct sock *sk, u32 *secid); + void (*sock_graft)(struct sock* sk, struct socket *parent); + int (*inet_conn_request)(struct sock *sk, struct sk_buff *skb, + struct request_sock *req); + void (*inet_csk_clone)(struct sock *newsk, const struct request_sock *req); + void (*req_classify_flow)(const struct request_sock *req, struct flowi *fl); #endif /* CONFIG_SECURITY_NETWORK */ #ifdef CONFIG_SECURITY_NETWORK_XFRM @@ -2926,6 +2940,28 @@ static inline void security_sk_classify_flow(struct sock *sk, struct flowi *fl) { security_ops->sk_getsecid(sk, &fl->secid); } + +static inline void security_req_classify_flow(const struct request_sock *req, struct flowi *fl) +{ + security_ops->req_classify_flow(req, fl); +} + +static inline void security_sock_graft(struct sock* sk, struct socket *parent) +{ + security_ops->sock_graft(sk, parent); +} + +static inline int security_inet_conn_request(struct sock *sk, + struct sk_buff *skb, struct request_sock *req) +{ + return security_ops->inet_conn_request(sk, skb, req); +} + +static inline void security_inet_csk_clone(struct sock *newsk, + const struct request_sock *req) +{ + security_ops->inet_csk_clone(newsk, req); +} #else /* CONFIG_SECURITY_NETWORK */ static inline int security_unix_stream_connect(struct socket * sock, struct socket * other, @@ -3055,6 +3091,25 @@ static inline void security_sk_clone(const struct sock *sk, struct sock *newsk) static inline void security_sk_classify_flow(struct sock *sk, struct flowi *fl) { } + +static inline void security_req_classify_flow(const struct request_sock *req, struct flowi *fl) +{ +} + +static inline void security_sock_graft(struct sock* sk, struct socket *parent) +{ +} + +static inline int security_inet_conn_request(struct sock *sk, + struct sk_buff *skb, struct request_sock *req) +{ + return 0; +} + +static inline void security_inet_csk_clone(struct sock *newsk, + const struct request_sock *req) +{ +} #endif /* CONFIG_SECURITY_NETWORK */ #ifdef CONFIG_SECURITY_NETWORK_XFRM -- cgit v1.2.3 From 11a03f78fbf15a866ba3bf6359a75cdfd1ced703 Mon Sep 17 00:00:00 2001 From: Paul Moore Date: Thu, 3 Aug 2006 16:46:20 -0700 Subject: [NetLabel]: core network changes Changes to the core network stack to support the NetLabel subsystem. This includes changes to the IPv4 option handling to support CIPSO labels. Signed-off-by: Paul Moore Signed-off-by: David S. Miller --- include/linux/ip.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/ip.h b/include/linux/ip.h index 4b55cf1df73..2f4600146f8 100644 --- a/include/linux/ip.h +++ b/include/linux/ip.h @@ -57,6 +57,7 @@ #define IPOPT_SEC (2 |IPOPT_CONTROL|IPOPT_COPY) #define IPOPT_LSRR (3 |IPOPT_CONTROL|IPOPT_COPY) #define IPOPT_TIMESTAMP (4 |IPOPT_MEASUREMENT) +#define IPOPT_CIPSO (6 |IPOPT_CONTROL|IPOPT_COPY) #define IPOPT_RR (7 |IPOPT_CONTROL) #define IPOPT_SID (8 |IPOPT_CONTROL|IPOPT_COPY) #define IPOPT_SSRR (9 |IPOPT_CONTROL|IPOPT_COPY) -- cgit v1.2.3 From 446fda4f26822b2d42ab3396aafcedf38a9ff2b6 Mon Sep 17 00:00:00 2001 From: Paul Moore Date: Thu, 3 Aug 2006 16:48:06 -0700 Subject: [NetLabel]: CIPSOv4 engine Add support for the Commercial IP Security Option (CIPSO) to the IPv4 network stack. CIPSO has become a de-facto standard for trusted/labeled networking amongst existing Trusted Operating Systems such as Trusted Solaris, HP-UX CMW, etc. This implementation is designed to be used with the NetLabel subsystem to provide explicit packet labeling to LSM developers. The CIPSO/IPv4 packet labeling works by the LSM calling a NetLabel API function which attaches a CIPSO label (IPv4 option) to a given socket; this in turn attaches the CIPSO label to every packet leaving the socket without any extra processing on the outbound side. On the inbound side the individual packet's sk_buff is examined through a call to a NetLabel API function to determine if a CIPSO/IPv4 label is present and if so the security attributes of the CIPSO label are returned to the caller of the NetLabel API function. Signed-off-by: Paul Moore Signed-off-by: David S. Miller --- include/linux/sysctl.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h index e4b1a4d4dcf..af61d923540 100644 --- a/include/linux/sysctl.h +++ b/include/linux/sysctl.h @@ -411,6 +411,10 @@ enum NET_IPV4_TCP_WORKAROUND_SIGNED_WINDOWS=115, NET_TCP_DMA_COPYBREAK=116, NET_TCP_SLOW_START_AFTER_IDLE=117, + NET_CIPSOV4_CACHE_ENABLE=118, + NET_CIPSOV4_CACHE_BUCKET_SIZE=119, + NET_CIPSOV4_RBM_OPTFMT=120, + NET_CIPSOV4_RBM_STRICTVALID=121, }; enum { -- cgit v1.2.3 From 7420ed23a4f77480b5b7b3245e5da30dd24b7575 Mon Sep 17 00:00:00 2001 From: Venkat Yekkirala Date: Fri, 4 Aug 2006 23:17:57 -0700 Subject: [NetLabel]: SELinux support Add NetLabel support to the SELinux LSM and modify the socket_post_create() LSM hook to return an error code. The most significant part of this patch is the addition of NetLabel hooks into the following SELinux LSM hooks: * selinux_file_permission() * selinux_socket_sendmsg() * selinux_socket_post_create() * selinux_socket_sock_rcv_skb() * selinux_socket_getpeersec_stream() * selinux_socket_getpeersec_dgram() * selinux_sock_graft() * selinux_inet_conn_request() The basic reasoning behind this patch is that outgoing packets are "NetLabel'd" by labeling their socket and the NetLabel security attributes are checked via the additional hook in selinux_socket_sock_rcv_skb(). NetLabel itself is only a labeling mechanism, similar to filesystem extended attributes, it is up to the SELinux enforcement mechanism to perform the actual access checks. In addition to the changes outlined above this patch also includes some changes to the extended bitmap (ebitmap) and multi-level security (mls) code to import and export SELinux TE/MLS attributes into and out of NetLabel. Signed-off-by: Paul Moore Signed-off-by: David S. Miller --- include/linux/security.h | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) (limited to 'include/linux') diff --git a/include/linux/security.h b/include/linux/security.h index bb4c80fdfe7..9f56fb8a4a6 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -1341,8 +1341,8 @@ struct security_operations { int (*unix_may_send) (struct socket * sock, struct socket * other); int (*socket_create) (int family, int type, int protocol, int kern); - void (*socket_post_create) (struct socket * sock, int family, - int type, int protocol, int kern); + int (*socket_post_create) (struct socket * sock, int family, + int type, int protocol, int kern); int (*socket_bind) (struct socket * sock, struct sockaddr * address, int addrlen); int (*socket_connect) (struct socket * sock, @@ -2824,13 +2824,13 @@ static inline int security_socket_create (int family, int type, return security_ops->socket_create(family, type, protocol, kern); } -static inline void security_socket_post_create(struct socket * sock, - int family, - int type, - int protocol, int kern) +static inline int security_socket_post_create(struct socket * sock, + int family, + int type, + int protocol, int kern) { - security_ops->socket_post_create(sock, family, type, - protocol, kern); + return security_ops->socket_post_create(sock, family, type, + protocol, kern); } static inline int security_socket_bind(struct socket * sock, @@ -2982,11 +2982,12 @@ static inline int security_socket_create (int family, int type, return 0; } -static inline void security_socket_post_create(struct socket * sock, - int family, - int type, - int protocol, int kern) +static inline int security_socket_post_create(struct socket * sock, + int family, + int type, + int protocol, int kern) { + return 0; } static inline int security_socket_bind(struct socket * sock, -- cgit v1.2.3 From 14c0b97ddfc2944982d078b8e33b088840068976 Mon Sep 17 00:00:00 2001 From: Thomas Graf Date: Fri, 4 Aug 2006 03:38:38 -0700 Subject: [NET]: Protocol Independant Policy Routing Rules Framework Derived from net/ipv/fib_rules.c Signed-off-by: Thomas Graf Signed-off-by: David S. Miller --- include/linux/fib_rules.h | 60 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 60 insertions(+) create mode 100644 include/linux/fib_rules.h (limited to 'include/linux') diff --git a/include/linux/fib_rules.h b/include/linux/fib_rules.h new file mode 100644 index 00000000000..5e503f0ca6e --- /dev/null +++ b/include/linux/fib_rules.h @@ -0,0 +1,60 @@ +#ifndef __LINUX_FIB_RULES_H +#define __LINUX_FIB_RULES_H + +#include +#include + +/* rule is permanent, and cannot be deleted */ +#define FIB_RULE_PERMANENT 1 + +struct fib_rule_hdr +{ + __u8 family; + __u8 dst_len; + __u8 src_len; + __u8 tos; + + __u8 table; + __u8 res1; /* reserved */ + __u8 res2; /* reserved */ + __u8 action; + + __u32 flags; +}; + +enum +{ + FRA_UNSPEC, + FRA_DST, /* destination address */ + FRA_SRC, /* source address */ + FRA_IFNAME, /* interface name */ + FRA_UNUSED1, + FRA_UNUSED2, + FRA_PRIORITY, /* priority/preference */ + FRA_UNUSED3, + FRA_UNUSED4, + FRA_UNUSED5, + FRA_FWMARK, /* netfilter mark (IPv4) */ + FRA_FLOW, /* flow/class id */ + __FRA_MAX +}; + +#define FRA_MAX (__FRA_MAX - 1) + +enum +{ + FR_ACT_UNSPEC, + FR_ACT_TO_TBL, /* Pass to fixed table */ + FR_ACT_RES1, + FR_ACT_RES2, + FR_ACT_RES3, + FR_ACT_RES4, + FR_ACT_BLACKHOLE, /* Drop without notification */ + FR_ACT_UNREACHABLE, /* Drop with ENETUNREACH */ + FR_ACT_PROHIBIT, /* Drop with EACCES */ + __FR_ACT_MAX, +}; + +#define FR_ACT_MAX (__FR_ACT_MAX - 1) + +#endif -- cgit v1.2.3 From 101367c2f8c464ea96643192673aa18d88e6336d Mon Sep 17 00:00:00 2001 From: Thomas Graf Date: Fri, 4 Aug 2006 03:39:02 -0700 Subject: [IPV6]: Policy Routing Rules Adds support for policy routing rules including a new local table for routes with a local destination. Signed-off-by: Thomas Graf Signed-off-by: David S. Miller --- include/linux/rtnetlink.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h index facd9ee37b7..bf353538ae9 100644 --- a/include/linux/rtnetlink.h +++ b/include/linux/rtnetlink.h @@ -889,6 +889,8 @@ enum rtnetlink_groups { RTNLGRP_NOP4, RTNLGRP_IPV6_PREFIX, #define RTNLGRP_IPV6_PREFIX RTNLGRP_IPV6_PREFIX + RTNLGRP_IPV6_RULE, +#define RTNLGRP_IPV6_RULE RTNLGRP_IPV6_RULE __RTNLGRP_MAX }; #define RTNLGRP_MAX (__RTNLGRP_MAX - 1) -- cgit v1.2.3 From 1823730fbc89fadde72a7bb3b7bdf03cc7b8835c Mon Sep 17 00:00:00 2001 From: Thomas Graf Date: Fri, 4 Aug 2006 23:04:54 -0700 Subject: [IPv4]: Move interface address bits to linux/if_addr.h Signed-off-by: Thomas Graf Signed-off-by: David S. Miller --- include/linux/if_addr.h | 53 ++++++++++++++++++++++++++++++++++++++++++++ include/linux/rtnetlink.h | 56 ----------------------------------------------- 2 files changed, 53 insertions(+), 56 deletions(-) create mode 100644 include/linux/if_addr.h (limited to 'include/linux') diff --git a/include/linux/if_addr.h b/include/linux/if_addr.h new file mode 100644 index 00000000000..e1590454db5 --- /dev/null +++ b/include/linux/if_addr.h @@ -0,0 +1,53 @@ +#ifndef __LINUX_IF_ADDR_H +#define __LINUX_IF_ADDR_H + +#include + +struct ifaddrmsg +{ + __u8 ifa_family; + __u8 ifa_prefixlen; /* The prefix length */ + __u8 ifa_flags; /* Flags */ + __u8 ifa_scope; /* Address scope */ + __u32 ifa_index; /* Link index */ +}; + +/* + * Important comment: + * IFA_ADDRESS is prefix address, rather than local interface address. + * It makes no difference for normally configured broadcast interfaces, + * but for point-to-point IFA_ADDRESS is DESTINATION address, + * local address is supplied in IFA_LOCAL attribute. + */ +enum +{ + IFA_UNSPEC, + IFA_ADDRESS, + IFA_LOCAL, + IFA_LABEL, + IFA_BROADCAST, + IFA_ANYCAST, + IFA_CACHEINFO, + IFA_MULTICAST, + __IFA_MAX, +}; + +#define IFA_MAX (__IFA_MAX - 1) + +/* ifa_flags */ +#define IFA_F_SECONDARY 0x01 +#define IFA_F_TEMPORARY IFA_F_SECONDARY + +#define IFA_F_DEPRECATED 0x20 +#define IFA_F_TENTATIVE 0x40 +#define IFA_F_PERMANENT 0x80 + +struct ifa_cacheinfo +{ + __u32 ifa_prefered; + __u32 ifa_valid; + __u32 cstamp; /* created timestamp, hundredths of seconds */ + __u32 tstamp; /* updated timestamp, hundredths of seconds */ +}; + +#endif diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h index bf353538ae9..890c4d4038b 100644 --- a/include/linux/rtnetlink.h +++ b/include/linux/rtnetlink.h @@ -384,62 +384,6 @@ struct rta_session }; -/********************************************************* - * Interface address. - ****/ - -struct ifaddrmsg -{ - unsigned char ifa_family; - unsigned char ifa_prefixlen; /* The prefix length */ - unsigned char ifa_flags; /* Flags */ - unsigned char ifa_scope; /* See above */ - int ifa_index; /* Link index */ -}; - -enum -{ - IFA_UNSPEC, - IFA_ADDRESS, - IFA_LOCAL, - IFA_LABEL, - IFA_BROADCAST, - IFA_ANYCAST, - IFA_CACHEINFO, - IFA_MULTICAST, - __IFA_MAX -}; - -#define IFA_MAX (__IFA_MAX - 1) - -/* ifa_flags */ - -#define IFA_F_SECONDARY 0x01 -#define IFA_F_TEMPORARY IFA_F_SECONDARY - -#define IFA_F_DEPRECATED 0x20 -#define IFA_F_TENTATIVE 0x40 -#define IFA_F_PERMANENT 0x80 - -struct ifa_cacheinfo -{ - __u32 ifa_prefered; - __u32 ifa_valid; - __u32 cstamp; /* created timestamp, hundredths of seconds */ - __u32 tstamp; /* updated timestamp, hundredths of seconds */ -}; - - -#define IFA_RTA(r) ((struct rtattr*)(((char*)(r)) + NLMSG_ALIGN(sizeof(struct ifaddrmsg)))) -#define IFA_PAYLOAD(n) NLMSG_PAYLOAD(n,sizeof(struct ifaddrmsg)) - -/* - Important comment: - IFA_ADDRESS is prefix address, rather than local interface address. - It makes no difference for normally configured broadcast interfaces, - but for point-to-point IFA_ADDRESS is DESTINATION address, - local address is supplied in IFA_LOCAL attribute. - */ /************************************************************** * Neighbour discovery. -- cgit v1.2.3 From 0844565fb8a9418f5a860aa480c1aef70319c9a2 Mon Sep 17 00:00:00 2001 From: Thomas Graf Date: Fri, 4 Aug 2006 23:05:56 -0700 Subject: [NET]: Move netlink interface bits to linux/if.h Signed-off-by: Thomas Graf Signed-off-by: David S. Miller --- include/linux/if.h | 129 ++++++++++++++++++++++++++++++++++++++++++++ include/linux/rtnetlink.h | 133 +--------------------------------------------- 2 files changed, 130 insertions(+), 132 deletions(-) (limited to 'include/linux') diff --git a/include/linux/if.h b/include/linux/if.h index 374e20ad8b0..cd080d76532 100644 --- a/include/linux/if.h +++ b/include/linux/if.h @@ -212,5 +212,134 @@ struct ifconf #define ifc_buf ifc_ifcu.ifcu_buf /* buffer address */ #define ifc_req ifc_ifcu.ifcu_req /* array of structures */ +/* The struct should be in sync with struct net_device_stats */ +struct rtnl_link_stats +{ + __u32 rx_packets; /* total packets received */ + __u32 tx_packets; /* total packets transmitted */ + __u32 rx_bytes; /* total bytes received */ + __u32 tx_bytes; /* total bytes transmitted */ + __u32 rx_errors; /* bad packets received */ + __u32 tx_errors; /* packet transmit problems */ + __u32 rx_dropped; /* no space in linux buffers */ + __u32 tx_dropped; /* no space available in linux */ + __u32 multicast; /* multicast packets received */ + __u32 collisions; + + /* detailed rx_errors: */ + __u32 rx_length_errors; + __u32 rx_over_errors; /* receiver ring buff overflow */ + __u32 rx_crc_errors; /* recved pkt with crc error */ + __u32 rx_frame_errors; /* recv'd frame alignment error */ + __u32 rx_fifo_errors; /* recv'r fifo overrun */ + __u32 rx_missed_errors; /* receiver missed packet */ + + /* detailed tx_errors */ + __u32 tx_aborted_errors; + __u32 tx_carrier_errors; + __u32 tx_fifo_errors; + __u32 tx_heartbeat_errors; + __u32 tx_window_errors; + + /* for cslip etc */ + __u32 rx_compressed; + __u32 tx_compressed; +}; + +/* The struct should be in sync with struct ifmap */ +struct rtnl_link_ifmap +{ + __u64 mem_start; + __u64 mem_end; + __u64 base_addr; + __u16 irq; + __u8 dma; + __u8 port; +}; + +enum +{ + IFLA_UNSPEC, + IFLA_ADDRESS, + IFLA_BROADCAST, + IFLA_IFNAME, + IFLA_MTU, + IFLA_LINK, + IFLA_QDISC, + IFLA_STATS, + IFLA_COST, +#define IFLA_COST IFLA_COST + IFLA_PRIORITY, +#define IFLA_PRIORITY IFLA_PRIORITY + IFLA_MASTER, +#define IFLA_MASTER IFLA_MASTER + IFLA_WIRELESS, /* Wireless Extension event - see wireless.h */ +#define IFLA_WIRELESS IFLA_WIRELESS + IFLA_PROTINFO, /* Protocol specific information for a link */ +#define IFLA_PROTINFO IFLA_PROTINFO + IFLA_TXQLEN, +#define IFLA_TXQLEN IFLA_TXQLEN + IFLA_MAP, +#define IFLA_MAP IFLA_MAP + IFLA_WEIGHT, +#define IFLA_WEIGHT IFLA_WEIGHT + IFLA_OPERSTATE, + IFLA_LINKMODE, + __IFLA_MAX +}; + + +#define IFLA_MAX (__IFLA_MAX - 1) + +/* ifi_flags. + + IFF_* flags. + + The only change is: + IFF_LOOPBACK, IFF_BROADCAST and IFF_POINTOPOINT are + more not changeable by user. They describe link media + characteristics and set by device driver. + + Comments: + - Combination IFF_BROADCAST|IFF_POINTOPOINT is invalid + - If neither of these three flags are set; + the interface is NBMA. + + - IFF_MULTICAST does not mean anything special: + multicasts can be used on all not-NBMA links. + IFF_MULTICAST means that this media uses special encapsulation + for multicast frames. Apparently, all IFF_POINTOPOINT and + IFF_BROADCAST devices are able to use multicasts too. + */ + +/* IFLA_LINK. + For usual devices it is equal ifi_index. + If it is a "virtual interface" (f.e. tunnel), ifi_link + can point to real physical interface (f.e. for bandwidth calculations), + or maybe 0, what means, that real media is unknown (usual + for IPIP tunnels, when route to endpoint is allowed to change) + */ + +/* Subtype attributes for IFLA_PROTINFO */ +enum +{ + IFLA_INET6_UNSPEC, + IFLA_INET6_FLAGS, /* link flags */ + IFLA_INET6_CONF, /* sysctl parameters */ + IFLA_INET6_STATS, /* statistics */ + IFLA_INET6_MCAST, /* MC things. What of them? */ + IFLA_INET6_CACHEINFO, /* time values and max reasm size */ + __IFLA_INET6_MAX +}; + +#define IFLA_INET6_MAX (__IFLA_INET6_MAX - 1) + +struct ifla_cacheinfo +{ + __u32 max_reasm_len; + __u32 tstamp; /* ipv6InterfaceTable updated timestamp */ + __u32 reachable_time; + __u32 retrans_time; +}; #endif /* _LINUX_IF_H */ diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h index 890c4d4038b..84f3eb426da 100644 --- a/include/linux/rtnetlink.h +++ b/include/linux/rtnetlink.h @@ -2,6 +2,7 @@ #define __LINUX_RTNETLINK_H #include +#include /**** * Routing/neighbour discovery messages. @@ -607,138 +608,6 @@ struct prefix_cacheinfo __u32 valid_time; }; -/* The struct should be in sync with struct net_device_stats */ -struct rtnl_link_stats -{ - __u32 rx_packets; /* total packets received */ - __u32 tx_packets; /* total packets transmitted */ - __u32 rx_bytes; /* total bytes received */ - __u32 tx_bytes; /* total bytes transmitted */ - __u32 rx_errors; /* bad packets received */ - __u32 tx_errors; /* packet transmit problems */ - __u32 rx_dropped; /* no space in linux buffers */ - __u32 tx_dropped; /* no space available in linux */ - __u32 multicast; /* multicast packets received */ - __u32 collisions; - - /* detailed rx_errors: */ - __u32 rx_length_errors; - __u32 rx_over_errors; /* receiver ring buff overflow */ - __u32 rx_crc_errors; /* recved pkt with crc error */ - __u32 rx_frame_errors; /* recv'd frame alignment error */ - __u32 rx_fifo_errors; /* recv'r fifo overrun */ - __u32 rx_missed_errors; /* receiver missed packet */ - - /* detailed tx_errors */ - __u32 tx_aborted_errors; - __u32 tx_carrier_errors; - __u32 tx_fifo_errors; - __u32 tx_heartbeat_errors; - __u32 tx_window_errors; - - /* for cslip etc */ - __u32 rx_compressed; - __u32 tx_compressed; -}; - -/* The struct should be in sync with struct ifmap */ -struct rtnl_link_ifmap -{ - __u64 mem_start; - __u64 mem_end; - __u64 base_addr; - __u16 irq; - __u8 dma; - __u8 port; -}; - -enum -{ - IFLA_UNSPEC, - IFLA_ADDRESS, - IFLA_BROADCAST, - IFLA_IFNAME, - IFLA_MTU, - IFLA_LINK, - IFLA_QDISC, - IFLA_STATS, - IFLA_COST, -#define IFLA_COST IFLA_COST - IFLA_PRIORITY, -#define IFLA_PRIORITY IFLA_PRIORITY - IFLA_MASTER, -#define IFLA_MASTER IFLA_MASTER - IFLA_WIRELESS, /* Wireless Extension event - see wireless.h */ -#define IFLA_WIRELESS IFLA_WIRELESS - IFLA_PROTINFO, /* Protocol specific information for a link */ -#define IFLA_PROTINFO IFLA_PROTINFO - IFLA_TXQLEN, -#define IFLA_TXQLEN IFLA_TXQLEN - IFLA_MAP, -#define IFLA_MAP IFLA_MAP - IFLA_WEIGHT, -#define IFLA_WEIGHT IFLA_WEIGHT - IFLA_OPERSTATE, - IFLA_LINKMODE, - __IFLA_MAX -}; - - -#define IFLA_MAX (__IFLA_MAX - 1) - -#define IFLA_RTA(r) ((struct rtattr*)(((char*)(r)) + NLMSG_ALIGN(sizeof(struct ifinfomsg)))) -#define IFLA_PAYLOAD(n) NLMSG_PAYLOAD(n,sizeof(struct ifinfomsg)) - -/* ifi_flags. - - IFF_* flags. - - The only change is: - IFF_LOOPBACK, IFF_BROADCAST and IFF_POINTOPOINT are - more not changeable by user. They describe link media - characteristics and set by device driver. - - Comments: - - Combination IFF_BROADCAST|IFF_POINTOPOINT is invalid - - If neither of these three flags are set; - the interface is NBMA. - - - IFF_MULTICAST does not mean anything special: - multicasts can be used on all not-NBMA links. - IFF_MULTICAST means that this media uses special encapsulation - for multicast frames. Apparently, all IFF_POINTOPOINT and - IFF_BROADCAST devices are able to use multicasts too. - */ - -/* IFLA_LINK. - For usual devices it is equal ifi_index. - If it is a "virtual interface" (f.e. tunnel), ifi_link - can point to real physical interface (f.e. for bandwidth calculations), - or maybe 0, what means, that real media is unknown (usual - for IPIP tunnels, when route to endpoint is allowed to change) - */ - -/* Subtype attributes for IFLA_PROTINFO */ -enum -{ - IFLA_INET6_UNSPEC, - IFLA_INET6_FLAGS, /* link flags */ - IFLA_INET6_CONF, /* sysctl parameters */ - IFLA_INET6_STATS, /* statistics */ - IFLA_INET6_MCAST, /* MC things. What of them? */ - IFLA_INET6_CACHEINFO, /* time values and max reasm size */ - __IFLA_INET6_MAX -}; - -#define IFLA_INET6_MAX (__IFLA_INET6_MAX - 1) - -struct ifla_cacheinfo -{ - __u32 max_reasm_len; - __u32 tstamp; /* ipv6InterfaceTable updated timestamp */ - __u32 reachable_time; - __u32 retrans_time; -}; /***************************************************************** * Traffic control messages. -- cgit v1.2.3 From 84fa7933a33f806bbbaae6775e87459b1ec584c0 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Tue, 29 Aug 2006 16:44:56 -0700 Subject: [NET]: Replace CHECKSUM_HW by CHECKSUM_PARTIAL/CHECKSUM_COMPLETE Replace CHECKSUM_HW by CHECKSUM_PARTIAL (for outgoing packets, whose checksum still needs to be completed) and CHECKSUM_COMPLETE (for incoming packets, device supplied full checksum). Patch originally from Herbert Xu, updated by myself for 2.6.18-rc3. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/netdevice.h | 4 ++-- include/linux/skbuff.h | 17 +++++++++-------- 2 files changed, 11 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 50a4719512e..4f2c2b6beb5 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -976,7 +976,7 @@ extern void dev_mcast_init(void); extern int netdev_max_backlog; extern int weight_p; extern int netdev_set_master(struct net_device *dev, struct net_device *master); -extern int skb_checksum_help(struct sk_buff *skb, int inward); +extern int skb_checksum_help(struct sk_buff *skb); extern struct sk_buff *skb_gso_segment(struct sk_buff *skb, int features); #ifdef CONFIG_BUG extern void netdev_rx_csum_fault(struct net_device *dev); @@ -1012,7 +1012,7 @@ static inline int netif_needs_gso(struct net_device *dev, struct sk_buff *skb) { return skb_is_gso(skb) && (!skb_gso_ok(skb, dev->features) || - unlikely(skb->ip_summed != CHECKSUM_HW)); + unlikely(skb->ip_summed != CHECKSUM_PARTIAL)); } /* On bonding slaves other than the currently active slave, suppress diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 755e9cddac4..85577a4ffa6 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -34,8 +34,9 @@ #define HAVE_ALIGNABLE_SKB /* Ditto 8) */ #define CHECKSUM_NONE 0 -#define CHECKSUM_HW 1 +#define CHECKSUM_PARTIAL 1 #define CHECKSUM_UNNECESSARY 2 +#define CHECKSUM_COMPLETE 3 #define SKB_DATA_ALIGN(X) (((X) + (SMP_CACHE_BYTES - 1)) & \ ~(SMP_CACHE_BYTES - 1)) @@ -56,17 +57,17 @@ * Apparently with secret goal to sell you new device, when you * will add new protocol to your host. F.e. IPv6. 8) * - * HW: the most generic way. Device supplied checksum of _all_ + * COMPLETE: the most generic way. Device supplied checksum of _all_ * the packet as seen by netif_rx in skb->csum. * NOTE: Even if device supports only some protocols, but - * is able to produce some skb->csum, it MUST use HW, + * is able to produce some skb->csum, it MUST use COMPLETE, * not UNNECESSARY. * * B. Checksumming on output. * * NONE: skb is checksummed by protocol or csum is not required. * - * HW: device is required to csum packet as seen by hard_start_xmit + * PARTIAL: device is required to csum packet as seen by hard_start_xmit * from skb->h.raw to the end and to record the checksum * at skb->h.raw+skb->csum. * @@ -1261,14 +1262,14 @@ static inline int skb_linearize_cow(struct sk_buff *skb) * @len: length of data pulled * * After doing a pull on a received packet, you need to call this to - * update the CHECKSUM_HW checksum, or set ip_summed to CHECKSUM_NONE - * so that it can be recomputed from scratch. + * update the CHECKSUM_COMPLETE checksum, or set ip_summed to + * CHECKSUM_NONE so that it can be recomputed from scratch. */ static inline void skb_postpull_rcsum(struct sk_buff *skb, const void *start, unsigned int len) { - if (skb->ip_summed == CHECKSUM_HW) + if (skb->ip_summed == CHECKSUM_COMPLETE) skb->csum = csum_sub(skb->csum, csum_partial(start, len, 0)); } @@ -1287,7 +1288,7 @@ static inline int pskb_trim_rcsum(struct sk_buff *skb, unsigned int len) { if (likely(len >= skb->len)) return 0; - if (skb->ip_summed == CHECKSUM_HW) + if (skb->ip_summed == CHECKSUM_COMPLETE) skb->ip_summed = CHECKSUM_NONE; return __pskb_trim(skb, len); } -- cgit v1.2.3 From 4cf411de49c65140b3c259748629b561c0d3340f Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Sat, 5 Aug 2006 00:58:33 -0700 Subject: [NETFILTER]: Get rid of HW checksum invalidation Update hardware checksums incrementally to avoid breaking GSO. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/netfilter.h | 6 ++++++ include/linux/netfilter_ipv4/ip_nat.h | 4 ---- include/linux/netfilter_ipv4/ip_nat_core.h | 8 ++++---- 3 files changed, 10 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h index 10168e26a84..b7e67d1d438 100644 --- a/include/linux/netfilter.h +++ b/include/linux/netfilter.h @@ -282,6 +282,12 @@ extern void nf_invalidate_cache(int pf); Returns true or false. */ extern int skb_make_writable(struct sk_buff **pskb, unsigned int writable_len); +extern u_int16_t nf_csum_update(u_int32_t oldval, u_int32_t newval, + u_int32_t csum); +extern u_int16_t nf_proto_csum_update(struct sk_buff *skb, + u_int32_t oldval, u_int32_t newval, + u_int16_t csum, int pseudohdr); + struct nf_afinfo { unsigned short family; unsigned int (*checksum)(struct sk_buff *skb, unsigned int hook, diff --git a/include/linux/netfilter_ipv4/ip_nat.h b/include/linux/netfilter_ipv4/ip_nat.h index e9f5ed1d9f6..98f8407e4cb 100644 --- a/include/linux/netfilter_ipv4/ip_nat.h +++ b/include/linux/netfilter_ipv4/ip_nat.h @@ -72,10 +72,6 @@ extern unsigned int ip_nat_setup_info(struct ip_conntrack *conntrack, extern int ip_nat_used_tuple(const struct ip_conntrack_tuple *tuple, const struct ip_conntrack *ignored_conntrack); -/* Calculate relative checksum. */ -extern u_int16_t ip_nat_cheat_check(u_int32_t oldvalinv, - u_int32_t newval, - u_int16_t oldcheck); #else /* !__KERNEL__: iptables wants this to compile. */ #define ip_nat_multi_range ip_nat_multi_range_compat #endif /*__KERNEL__*/ diff --git a/include/linux/netfilter_ipv4/ip_nat_core.h b/include/linux/netfilter_ipv4/ip_nat_core.h index 30db23f06b0..60566f9fd7b 100644 --- a/include/linux/netfilter_ipv4/ip_nat_core.h +++ b/include/linux/netfilter_ipv4/ip_nat_core.h @@ -11,8 +11,8 @@ extern unsigned int ip_nat_packet(struct ip_conntrack *ct, unsigned int hooknum, struct sk_buff **pskb); -extern int ip_nat_icmp_reply_translation(struct sk_buff **pskb, - struct ip_conntrack *ct, - enum ip_nat_manip_type manip, - enum ip_conntrack_dir dir); +extern int ip_nat_icmp_reply_translation(struct ip_conntrack *ct, + enum ip_conntrack_info ctinfo, + unsigned int hooknum, + struct sk_buff **pskb); #endif /* _IP_NAT_CORE_H */ -- cgit v1.2.3 From 9067c722cf6930adf1df2d169de9094dd90b0c33 Mon Sep 17 00:00:00 2001 From: Thomas Graf Date: Mon, 7 Aug 2006 17:57:44 -0700 Subject: [NEIGH]: Move netlink neighbour bits to linux/neighbour.h Moves netlink neighbour bits to linux/neighbour.h. Also moves bits to be exported to userspace from net/neighbour.h to linux/neighbour.h and removes __KERNEL__ guards, userspace is not supposed to be using it. rtnetlink_rcv_msg() is not longer required to parse attributes for the neighbour layer, remove dependency on obsolete and buggy rta_buf. Signed-off-by: Thomas Graf Signed-off-by: David S. Miller --- include/linux/neighbour.h | 65 +++++++++++++++++++++++++++++++++++++++++++++++ include/linux/rtnetlink.h | 63 --------------------------------------------- 2 files changed, 65 insertions(+), 63 deletions(-) create mode 100644 include/linux/neighbour.h (limited to 'include/linux') diff --git a/include/linux/neighbour.h b/include/linux/neighbour.h new file mode 100644 index 00000000000..8e8293d86fb --- /dev/null +++ b/include/linux/neighbour.h @@ -0,0 +1,65 @@ +#ifndef __LINUX_NEIGHBOUR_H +#define __LINUX_NEIGHBOUR_H + +#include + +struct ndmsg +{ + __u8 ndm_family; + __u8 ndm_pad1; + __u16 ndm_pad2; + __s32 ndm_ifindex; + __u16 ndm_state; + __u8 ndm_flags; + __u8 ndm_type; +}; + +enum +{ + NDA_UNSPEC, + NDA_DST, + NDA_LLADDR, + NDA_CACHEINFO, + NDA_PROBES, + __NDA_MAX +}; + +#define NDA_MAX (__NDA_MAX - 1) + +/* + * Neighbor Cache Entry Flags + */ + +#define NTF_PROXY 0x08 /* == ATF_PUBL */ +#define NTF_ROUTER 0x80 + +/* + * Neighbor Cache Entry States. + */ + +#define NUD_INCOMPLETE 0x01 +#define NUD_REACHABLE 0x02 +#define NUD_STALE 0x04 +#define NUD_DELAY 0x08 +#define NUD_PROBE 0x10 +#define NUD_FAILED 0x20 + +/* Dummy states */ +#define NUD_NOARP 0x40 +#define NUD_PERMANENT 0x80 +#define NUD_NONE 0x00 + +/* NUD_NOARP & NUD_PERMANENT are pseudostates, they never change + and make no address resolution or NUD. + NUD_PERMANENT is also cannot be deleted by garbage collectors. + */ + +struct nda_cacheinfo +{ + __u32 ndm_confirmed; + __u32 ndm_used; + __u32 ndm_updated; + __u32 ndm_refcnt; +}; + +#endif diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h index 84f3eb426da..9750f0214c2 100644 --- a/include/linux/rtnetlink.h +++ b/include/linux/rtnetlink.h @@ -386,69 +386,6 @@ struct rta_session -/************************************************************** - * Neighbour discovery. - ****/ - -struct ndmsg -{ - unsigned char ndm_family; - unsigned char ndm_pad1; - unsigned short ndm_pad2; - int ndm_ifindex; /* Link index */ - __u16 ndm_state; - __u8 ndm_flags; - __u8 ndm_type; -}; - -enum -{ - NDA_UNSPEC, - NDA_DST, - NDA_LLADDR, - NDA_CACHEINFO, - NDA_PROBES, - __NDA_MAX -}; - -#define NDA_MAX (__NDA_MAX - 1) - -#define NDA_RTA(r) ((struct rtattr*)(((char*)(r)) + NLMSG_ALIGN(sizeof(struct ndmsg)))) -#define NDA_PAYLOAD(n) NLMSG_PAYLOAD(n,sizeof(struct ndmsg)) - -/* - * Neighbor Cache Entry Flags - */ - -#define NTF_PROXY 0x08 /* == ATF_PUBL */ -#define NTF_ROUTER 0x80 - -/* - * Neighbor Cache Entry States. - */ - -#define NUD_INCOMPLETE 0x01 -#define NUD_REACHABLE 0x02 -#define NUD_STALE 0x04 -#define NUD_DELAY 0x08 -#define NUD_PROBE 0x10 -#define NUD_FAILED 0x20 - -/* Dummy states */ -#define NUD_NOARP 0x40 -#define NUD_PERMANENT 0x80 -#define NUD_NONE 0x00 - - -struct nda_cacheinfo -{ - __u32 ndm_confirmed; - __u32 ndm_used; - __u32 ndm_updated; - __u32 ndm_refcnt; -}; - - /***************************************************************** * Neighbour tables specific messages. * -- cgit v1.2.3 From b63bbc5006a0a62fabc81c4f77e95f16ff16f340 Mon Sep 17 00:00:00 2001 From: Thomas Graf Date: Mon, 7 Aug 2006 18:00:57 -0700 Subject: [NEIGH]: Move netlink neighbour table bits to linux/neighbour.h rtnetlink_rcv_msg() is not longer required to parse attributes for the neighbour tables layer, remove dependency on obsolete and buggy rta_buf. Signed-off-by: Thomas Graf Signed-off-by: David S. Miller --- include/linux/neighbour.h | 94 ++++++++++++++++++++++++++++++++++++++++++ include/linux/rtnetlink.h | 101 ---------------------------------------------- 2 files changed, 94 insertions(+), 101 deletions(-) (limited to 'include/linux') diff --git a/include/linux/neighbour.h b/include/linux/neighbour.h index 8e8293d86fb..bd3bbf668cd 100644 --- a/include/linux/neighbour.h +++ b/include/linux/neighbour.h @@ -62,4 +62,98 @@ struct nda_cacheinfo __u32 ndm_refcnt; }; +/***************************************************************** + * Neighbour tables specific messages. + * + * To retrieve the neighbour tables send RTM_GETNEIGHTBL with the + * NLM_F_DUMP flag set. Every neighbour table configuration is + * spread over multiple messages to avoid running into message + * size limits on systems with many interfaces. The first message + * in the sequence transports all not device specific data such as + * statistics, configuration, and the default parameter set. + * This message is followed by 0..n messages carrying device + * specific parameter sets. + * Although the ordering should be sufficient, NDTA_NAME can be + * used to identify sequences. The initial message can be identified + * by checking for NDTA_CONFIG. The device specific messages do + * not contain this TLV but have NDTPA_IFINDEX set to the + * corresponding interface index. + * + * To change neighbour table attributes, send RTM_SETNEIGHTBL + * with NDTA_NAME set. Changeable attribute include NDTA_THRESH[1-3], + * NDTA_GC_INTERVAL, and all TLVs in NDTA_PARMS unless marked + * otherwise. Device specific parameter sets can be changed by + * setting NDTPA_IFINDEX to the interface index of the corresponding + * device. + ****/ + +struct ndt_stats +{ + __u64 ndts_allocs; + __u64 ndts_destroys; + __u64 ndts_hash_grows; + __u64 ndts_res_failed; + __u64 ndts_lookups; + __u64 ndts_hits; + __u64 ndts_rcv_probes_mcast; + __u64 ndts_rcv_probes_ucast; + __u64 ndts_periodic_gc_runs; + __u64 ndts_forced_gc_runs; +}; + +enum { + NDTPA_UNSPEC, + NDTPA_IFINDEX, /* u32, unchangeable */ + NDTPA_REFCNT, /* u32, read-only */ + NDTPA_REACHABLE_TIME, /* u64, read-only, msecs */ + NDTPA_BASE_REACHABLE_TIME, /* u64, msecs */ + NDTPA_RETRANS_TIME, /* u64, msecs */ + NDTPA_GC_STALETIME, /* u64, msecs */ + NDTPA_DELAY_PROBE_TIME, /* u64, msecs */ + NDTPA_QUEUE_LEN, /* u32 */ + NDTPA_APP_PROBES, /* u32 */ + NDTPA_UCAST_PROBES, /* u32 */ + NDTPA_MCAST_PROBES, /* u32 */ + NDTPA_ANYCAST_DELAY, /* u64, msecs */ + NDTPA_PROXY_DELAY, /* u64, msecs */ + NDTPA_PROXY_QLEN, /* u32 */ + NDTPA_LOCKTIME, /* u64, msecs */ + __NDTPA_MAX +}; +#define NDTPA_MAX (__NDTPA_MAX - 1) + +struct ndtmsg +{ + __u8 ndtm_family; + __u8 ndtm_pad1; + __u16 ndtm_pad2; +}; + +struct ndt_config +{ + __u16 ndtc_key_len; + __u16 ndtc_entry_size; + __u32 ndtc_entries; + __u32 ndtc_last_flush; /* delta to now in msecs */ + __u32 ndtc_last_rand; /* delta to now in msecs */ + __u32 ndtc_hash_rnd; + __u32 ndtc_hash_mask; + __u32 ndtc_hash_chain_gc; + __u32 ndtc_proxy_qlen; +}; + +enum { + NDTA_UNSPEC, + NDTA_NAME, /* char *, unchangeable */ + NDTA_THRESH1, /* u32 */ + NDTA_THRESH2, /* u32 */ + NDTA_THRESH3, /* u32 */ + NDTA_CONFIG, /* struct ndt_config, read-only */ + NDTA_PARMS, /* nested TLV NDTPA_* */ + NDTA_STATS, /* struct ndt_stats, read-only */ + NDTA_GC_INTERVAL, /* u64, msecs */ + __NDTA_MAX +}; +#define NDTA_MAX (__NDTA_MAX - 1) + #endif diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h index 9750f0214c2..784a1a29490 100644 --- a/include/linux/rtnetlink.h +++ b/include/linux/rtnetlink.h @@ -384,107 +384,6 @@ struct rta_session } u; }; - - -/***************************************************************** - * Neighbour tables specific messages. - * - * To retrieve the neighbour tables send RTM_GETNEIGHTBL with the - * NLM_F_DUMP flag set. Every neighbour table configuration is - * spread over multiple messages to avoid running into message - * size limits on systems with many interfaces. The first message - * in the sequence transports all not device specific data such as - * statistics, configuration, and the default parameter set. - * This message is followed by 0..n messages carrying device - * specific parameter sets. - * Although the ordering should be sufficient, NDTA_NAME can be - * used to identify sequences. The initial message can be identified - * by checking for NDTA_CONFIG. The device specific messages do - * not contain this TLV but have NDTPA_IFINDEX set to the - * corresponding interface index. - * - * To change neighbour table attributes, send RTM_SETNEIGHTBL - * with NDTA_NAME set. Changeable attribute include NDTA_THRESH[1-3], - * NDTA_GC_INTERVAL, and all TLVs in NDTA_PARMS unless marked - * otherwise. Device specific parameter sets can be changed by - * setting NDTPA_IFINDEX to the interface index of the corresponding - * device. - ****/ - -struct ndt_stats -{ - __u64 ndts_allocs; - __u64 ndts_destroys; - __u64 ndts_hash_grows; - __u64 ndts_res_failed; - __u64 ndts_lookups; - __u64 ndts_hits; - __u64 ndts_rcv_probes_mcast; - __u64 ndts_rcv_probes_ucast; - __u64 ndts_periodic_gc_runs; - __u64 ndts_forced_gc_runs; -}; - -enum { - NDTPA_UNSPEC, - NDTPA_IFINDEX, /* u32, unchangeable */ - NDTPA_REFCNT, /* u32, read-only */ - NDTPA_REACHABLE_TIME, /* u64, read-only, msecs */ - NDTPA_BASE_REACHABLE_TIME, /* u64, msecs */ - NDTPA_RETRANS_TIME, /* u64, msecs */ - NDTPA_GC_STALETIME, /* u64, msecs */ - NDTPA_DELAY_PROBE_TIME, /* u64, msecs */ - NDTPA_QUEUE_LEN, /* u32 */ - NDTPA_APP_PROBES, /* u32 */ - NDTPA_UCAST_PROBES, /* u32 */ - NDTPA_MCAST_PROBES, /* u32 */ - NDTPA_ANYCAST_DELAY, /* u64, msecs */ - NDTPA_PROXY_DELAY, /* u64, msecs */ - NDTPA_PROXY_QLEN, /* u32 */ - NDTPA_LOCKTIME, /* u64, msecs */ - __NDTPA_MAX -}; -#define NDTPA_MAX (__NDTPA_MAX - 1) - -struct ndtmsg -{ - __u8 ndtm_family; - __u8 ndtm_pad1; - __u16 ndtm_pad2; -}; - -struct ndt_config -{ - __u16 ndtc_key_len; - __u16 ndtc_entry_size; - __u32 ndtc_entries; - __u32 ndtc_last_flush; /* delta to now in msecs */ - __u32 ndtc_last_rand; /* delta to now in msecs */ - __u32 ndtc_hash_rnd; - __u32 ndtc_hash_mask; - __u32 ndtc_hash_chain_gc; - __u32 ndtc_proxy_qlen; -}; - -enum { - NDTA_UNSPEC, - NDTA_NAME, /* char *, unchangeable */ - NDTA_THRESH1, /* u32 */ - NDTA_THRESH2, /* u32 */ - NDTA_THRESH3, /* u32 */ - NDTA_CONFIG, /* struct ndt_config, read-only */ - NDTA_PARMS, /* nested TLV NDTPA_* */ - NDTA_STATS, /* struct ndt_stats, read-only */ - NDTA_GC_INTERVAL, /* u64, msecs */ - __NDTA_MAX -}; -#define NDTA_MAX (__NDTA_MAX - 1) - -#define NDTA_RTA(r) ((struct rtattr*)(((char*)(r)) + \ - NLMSG_ALIGN(sizeof(struct ndtmsg)))) -#define NDTA_PAYLOAD(n) NLMSG_PAYLOAD(n,sizeof(struct ndtmsg)) - - /**** * General form of address family dependent message. ****/ -- cgit v1.2.3 From ac5a488ef252ed673cb067843e411f8cc43f7ab9 Mon Sep 17 00:00:00 2001 From: Sridhar Samudrala Date: Mon, 7 Aug 2006 20:57:31 -0700 Subject: [NET]: Round out in-kernel sockets API This patch implements wrapper functions that provide a convenient way to access the sockets API for in-kernel users like sunrpc, cifs & ocfs2 etc and any future users. Signed-off-by: Sridhar Samudrala Acked-by: James Morris Signed-off-by: David S. Miller --- include/linux/net.h | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) (limited to 'include/linux') diff --git a/include/linux/net.h b/include/linux/net.h index b20c53c7441..19da2c08d7b 100644 --- a/include/linux/net.h +++ b/include/linux/net.h @@ -208,6 +208,25 @@ extern int kernel_recvmsg(struct socket *sock, struct msghdr *msg, struct kvec *vec, size_t num, size_t len, int flags); +extern int kernel_bind(struct socket *sock, struct sockaddr *addr, + int addrlen); +extern int kernel_listen(struct socket *sock, int backlog); +extern int kernel_accept(struct socket *sock, struct socket **newsock, + int flags); +extern int kernel_connect(struct socket *sock, struct sockaddr *addr, + int addrlen, int flags); +extern int kernel_getsockname(struct socket *sock, struct sockaddr *addr, + int *addrlen); +extern int kernel_getpeername(struct socket *sock, struct sockaddr *addr, + int *addrlen); +extern int kernel_getsockopt(struct socket *sock, int level, int optname, + char *optval, int *optlen); +extern int kernel_setsockopt(struct socket *sock, int level, int optname, + char *optval, int optlen); +extern int kernel_sendpage(struct socket *sock, struct page *page, int offset, + size_t size, int flags); +extern int kernel_sock_ioctl(struct socket *sock, int cmd, unsigned long arg); + #ifndef CONFIG_SMP #define SOCKOPS_WRAPPED(name) name #define SOCKOPS_WRAP(name, fam) -- cgit v1.2.3 From a8731cbf61c8768ea129780b70dc7dfc6795aad4 Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Wed, 9 Aug 2006 15:56:46 -0700 Subject: [DECNET]: Covert rules to use generic code This patch converts the DECnet rules code to use the generic rules system created by Thomas Graf . Signed-off-by: Steven Whitehouse Acked-by: Thomas Graf Signed-off-by: David S. Miller --- include/linux/rtnetlink.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h index 784a1a29490..0aaffa2ae66 100644 --- a/include/linux/rtnetlink.h +++ b/include/linux/rtnetlink.h @@ -534,7 +534,8 @@ enum rtnetlink_groups { RTNLGRP_NOP2, RTNLGRP_DECnet_ROUTE, #define RTNLGRP_DECnet_ROUTE RTNLGRP_DECnet_ROUTE - RTNLGRP_NOP3, + RTNLGRP_DECnet_RULE, +#define RTNLGRP_DECnet_RULE RTNLGRP_DECnet_RULE RTNLGRP_NOP4, RTNLGRP_IPV6_PREFIX, #define RTNLGRP_IPV6_PREFIX RTNLGRP_IPV6_PREFIX -- cgit v1.2.3 From 757dbb494be3309fe41ce4c62f8057d8b41d8897 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Wed, 9 Aug 2006 20:50:00 -0700 Subject: [NET]: drop unused elements from net_proto_family Three values in net_proto_family are defined but never used. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- include/linux/net.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/net.h b/include/linux/net.h index 19da2c08d7b..1bd76327ee2 100644 --- a/include/linux/net.h +++ b/include/linux/net.h @@ -169,11 +169,6 @@ struct proto_ops { struct net_proto_family { int family; int (*create)(struct socket *sock, int protocol); - /* These are counters for the number of different methods of - each we support */ - short authentication; - short encryption; - short encrypt_net; struct module *owner; }; -- cgit v1.2.3 From f0fd27d42e39b91f85e1840ec49b072fd6c545b8 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Wed, 9 Aug 2006 21:03:17 -0700 Subject: [NET]: sock_register interface changes The sock_register() doesn't change the family, so the protocols can define it read-only. No caller ever checks return value from sock_unregister() Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- include/linux/net.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/net.h b/include/linux/net.h index 1bd76327ee2..c257f716e00 100644 --- a/include/linux/net.h +++ b/include/linux/net.h @@ -176,8 +176,8 @@ struct iovec; struct kvec; extern int sock_wake_async(struct socket *sk, int how, int band); -extern int sock_register(struct net_proto_family *fam); -extern int sock_unregister(int family); +extern int sock_register(const struct net_proto_family *fam); +extern void sock_unregister(int family); extern int sock_create(int family, int type, int proto, struct socket **res); extern int sock_create_kern(int family, int type, int proto, -- cgit v1.2.3 From 9e762a4a89b302cb3b26a1f9bb33eff459eaeca9 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Thu, 10 Aug 2006 23:09:48 -0700 Subject: [NET]: Introduce RTA_TABLE/FRA_TABLE attributes Introduce RTA_TABLE route attribute and FRA_TABLE routing rule attribute to hold 32 bit routing table IDs. Usespace compatibility is provided by continuing to accept and send the rtm_table field, but because of its limited size it can only carry the low 8 bits of the table ID. This implies that if larger IDs are used, _all_ userspace programs using them need to use RTA_TABLE. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/fib_rules.h | 4 ++++ include/linux/rtnetlink.h | 8 ++++++++ 2 files changed, 12 insertions(+) (limited to 'include/linux') diff --git a/include/linux/fib_rules.h b/include/linux/fib_rules.h index 5e503f0ca6e..19a82b6c1c1 100644 --- a/include/linux/fib_rules.h +++ b/include/linux/fib_rules.h @@ -36,6 +36,10 @@ enum FRA_UNUSED5, FRA_FWMARK, /* netfilter mark (IPv4) */ FRA_FLOW, /* flow/class id */ + FRA_UNUSED6, + FRA_UNUSED7, + FRA_UNUSED8, + FRA_TABLE, /* Extended table id */ __FRA_MAX }; diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h index 0aaffa2ae66..ea422a539a0 100644 --- a/include/linux/rtnetlink.h +++ b/include/linux/rtnetlink.h @@ -264,6 +264,7 @@ enum rtattr_type_t RTA_CACHEINFO, RTA_SESSION, RTA_MP_ALGO, + RTA_TABLE, __RTA_MAX }; @@ -717,6 +718,13 @@ extern void __rtnl_unlock(void); } \ } while(0) +static inline u32 rtm_get_table(struct rtattr **rta, u8 table) +{ + return RTA_GET_U32(rta[RTA_TABLE-1]); +rtattr_failure: + return table; +} + #endif /* __KERNEL__ */ -- cgit v1.2.3 From b801f54917b7c6e8540f877ee562cd0725e62ebd Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Thu, 10 Aug 2006 23:12:34 -0700 Subject: [NET]: Increate RT_TABLE_MAX to 2^32 Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/rtnetlink.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h index ea422a539a0..7e4aa48680a 100644 --- a/include/linux/rtnetlink.h +++ b/include/linux/rtnetlink.h @@ -239,10 +239,8 @@ enum rt_class_t RT_TABLE_DEFAULT=253, RT_TABLE_MAIN=254, RT_TABLE_LOCAL=255, - __RT_TABLE_MAX + RT_TABLE_MAX=0xFFFFFFFF }; -#define RT_TABLE_MAX (__RT_TABLE_MAX - 1) - /* Routing message attributes */ -- cgit v1.2.3 From 81aa646cc4df3779bcbf9d18cc2c0813ee9b3262 Mon Sep 17 00:00:00 2001 From: Martin Bligh Date: Mon, 14 Aug 2006 23:57:10 -0700 Subject: [IPV4]: add the UdpSndbufErrors and UdpRcvbufErrors MIBs Signed-off-by: Martin Bligh Signed-off-by: Andrew Morton --- include/linux/snmp.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/snmp.h b/include/linux/snmp.h index 4db25d5c7cd..30156556f78 100644 --- a/include/linux/snmp.h +++ b/include/linux/snmp.h @@ -155,6 +155,8 @@ enum UDP_MIB_NOPORTS, /* NoPorts */ UDP_MIB_INERRORS, /* InErrors */ UDP_MIB_OUTDATAGRAMS, /* OutDatagrams */ + UDP_MIB_RCVBUFERRORS, /* RcvbufErrors */ + UDP_MIB_SNDBUFERRORS, /* SndbufErrors */ __UDP_MIB_MAX }; -- cgit v1.2.3 From 2942e90050569525628a9f34e0daaa9b661b49cc Mon Sep 17 00:00:00 2001 From: Thomas Graf Date: Tue, 15 Aug 2006 00:30:25 -0700 Subject: [RTNETLINK]: Use rtnl_unicast() for rtnetlink unicasts Signed-off-by: Thomas Graf Signed-off-by: David S. Miller --- include/linux/rtnetlink.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h index 7e4aa48680a..0e4f478e2cb 100644 --- a/include/linux/rtnetlink.h +++ b/include/linux/rtnetlink.h @@ -584,6 +584,7 @@ struct rtnetlink_link extern struct rtnetlink_link * rtnetlink_links[NPROTO]; extern int rtnetlink_send(struct sk_buff *skb, u32 pid, u32 group, int echo); +extern int rtnl_unicast(struct sk_buff *skb, u32 pid); extern int rtnetlink_put_metrics(struct sk_buff *skb, u32 *metrics); extern void __rta_fill(struct sk_buff *skb, int attrtype, int attrlen, const void *data); -- cgit v1.2.3 From 97676b6b5538b3e059d33b8338e7d5cc41c5f1f1 Mon Sep 17 00:00:00 2001 From: Thomas Graf Date: Tue, 15 Aug 2006 00:31:41 -0700 Subject: [RTNETLINK]: Add rtnetlink notification interface Adds rtnl_notify() to send rtnetlink notification messages and rtnl_set_sk_err() to report notification errors as socket errors in order to indicate the need of a resync due to loss of events. nlmsg_report() is added to properly document the meaning of NLM_F_ECHO. Signed-off-by: Thomas Graf Signed-off-by: David S. Miller --- include/linux/rtnetlink.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h index 0e4f478e2cb..ecbe0349060 100644 --- a/include/linux/rtnetlink.h +++ b/include/linux/rtnetlink.h @@ -585,6 +585,9 @@ struct rtnetlink_link extern struct rtnetlink_link * rtnetlink_links[NPROTO]; extern int rtnetlink_send(struct sk_buff *skb, u32 pid, u32 group, int echo); extern int rtnl_unicast(struct sk_buff *skb, u32 pid); +extern int rtnl_notify(struct sk_buff *skb, u32 pid, u32 group, + struct nlmsghdr *nlh, gfp_t flags); +extern void rtnl_set_sk_err(u32 group, int error); extern int rtnetlink_put_metrics(struct sk_buff *skb, u32 *metrics); extern void __rta_fill(struct sk_buff *skb, int attrtype, int attrlen, const void *data); -- cgit v1.2.3 From 56fc85ac961e2c20dcb5ef07e2628b3f93de2e49 Mon Sep 17 00:00:00 2001 From: Thomas Graf Date: Tue, 15 Aug 2006 00:37:29 -0700 Subject: [RTNETLINK]: Unexport rtnl socket Signed-off-by: Thomas Graf Signed-off-by: David S. Miller --- include/linux/rtnetlink.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h index ecbe0349060..9c92dc8b9a0 100644 --- a/include/linux/rtnetlink.h +++ b/include/linux/rtnetlink.h @@ -574,8 +574,6 @@ extern int rtattr_parse(struct rtattr *tb[], int maxattr, struct rtattr *rta, in #define rtattr_parse_nested(tb, max, rta) \ rtattr_parse((tb), (max), RTA_DATA((rta)), RTA_PAYLOAD((rta))) -extern struct sock *rtnl; - struct rtnetlink_link { int (*doit)(struct sk_buff *, struct nlmsghdr*, void *attr); -- cgit v1.2.3 From ac0b04627269ff16c3c7ab854a65fe6780c6e3e5 Mon Sep 17 00:00:00 2001 From: Sridhar Samudrala Date: Tue, 22 Aug 2006 00:15:33 -0700 Subject: [SCTP]: Extend /proc/net/sctp/snmp to provide more statistics. This patch adds more statistics info under /proc/net/sctp/snmp that should be useful for debugging. The additional events that are counted now include timer expirations, retransmits, packet and data chunk discards. The Data chunk discards include all the cases where a data chunk is discarded including high tsn, bad stream, dup tsn and the most useful one(out of receive buffer/rwnd). Also moved the SCTP MIB data structures from the generic include directories to include/sctp/sctp.h. Signed-off-by: Sridhar Samudrala Signed-off-by: David S. Miller --- include/linux/snmp.h | 33 --------------------------------- 1 file changed, 33 deletions(-) (limited to 'include/linux') diff --git a/include/linux/snmp.h b/include/linux/snmp.h index 30156556f78..854aa6b543f 100644 --- a/include/linux/snmp.h +++ b/include/linux/snmp.h @@ -160,39 +160,6 @@ enum __UDP_MIB_MAX }; -/* sctp mib definitions */ -/* - * draft-ietf-sigtran-sctp-mib-07.txt - */ -enum -{ - SCTP_MIB_NUM = 0, - SCTP_MIB_CURRESTAB, /* CurrEstab */ - SCTP_MIB_ACTIVEESTABS, /* ActiveEstabs */ - SCTP_MIB_PASSIVEESTABS, /* PassiveEstabs */ - SCTP_MIB_ABORTEDS, /* Aborteds */ - SCTP_MIB_SHUTDOWNS, /* Shutdowns */ - SCTP_MIB_OUTOFBLUES, /* OutOfBlues */ - SCTP_MIB_CHECKSUMERRORS, /* ChecksumErrors */ - SCTP_MIB_OUTCTRLCHUNKS, /* OutCtrlChunks */ - SCTP_MIB_OUTORDERCHUNKS, /* OutOrderChunks */ - SCTP_MIB_OUTUNORDERCHUNKS, /* OutUnorderChunks */ - SCTP_MIB_INCTRLCHUNKS, /* InCtrlChunks */ - SCTP_MIB_INORDERCHUNKS, /* InOrderChunks */ - SCTP_MIB_INUNORDERCHUNKS, /* InUnorderChunks */ - SCTP_MIB_FRAGUSRMSGS, /* FragUsrMsgs */ - SCTP_MIB_REASMUSRMSGS, /* ReasmUsrMsgs */ - SCTP_MIB_OUTSCTPPACKS, /* OutSCTPPacks */ - SCTP_MIB_INSCTPPACKS, /* InSCTPPacks */ - SCTP_MIB_RTOALGORITHM, /* RtoAlgorithm */ - SCTP_MIB_RTOMIN, /* RtoMin */ - SCTP_MIB_RTOMAX, /* RtoMax */ - SCTP_MIB_RTOINITIAL, /* RtoInitial */ - SCTP_MIB_VALCOOKIELIFE, /* ValCookieLife */ - SCTP_MIB_MAXINITRETR, /* MaxInitRetr */ - __SCTP_MIB_MAX -}; - /* linux mib definitions */ enum { -- cgit v1.2.3 From 9ba1627617d396135a4d679542a3623d5819e628 Mon Sep 17 00:00:00 2001 From: Yasuyuki Kozakai Date: Tue, 22 Aug 2006 00:29:37 -0700 Subject: [NETFILTER]: x_tables: replace IPv4 dscp match by address family independent version This replaces IPv4 dscp match by address family independent version. This also - utilizes dsfield.h to get the DS field in IPv4/IPv6 header, and - checks for the DSCP value from user space. - fixes Kconfig help text. Signed-off-by: Yasuyuki Kozakai Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/netfilter/xt_dscp.h | 23 +++++++++++++++++++++++ include/linux/netfilter_ipv4/ipt_dscp.h | 14 ++++++-------- 2 files changed, 29 insertions(+), 8 deletions(-) create mode 100644 include/linux/netfilter/xt_dscp.h (limited to 'include/linux') diff --git a/include/linux/netfilter/xt_dscp.h b/include/linux/netfilter/xt_dscp.h new file mode 100644 index 00000000000..1da61e6acaf --- /dev/null +++ b/include/linux/netfilter/xt_dscp.h @@ -0,0 +1,23 @@ +/* x_tables module for matching the IPv4/IPv6 DSCP field + * + * (C) 2002 Harald Welte + * This software is distributed under GNU GPL v2, 1991 + * + * See RFC2474 for a description of the DSCP field within the IP Header. + * + * xt_dscp.h,v 1.3 2002/08/05 19:00:21 laforge Exp +*/ +#ifndef _XT_DSCP_H +#define _XT_DSCP_H + +#define XT_DSCP_MASK 0xfc /* 11111100 */ +#define XT_DSCP_SHIFT 2 +#define XT_DSCP_MAX 0x3f /* 00111111 */ + +/* match info */ +struct xt_dscp_info { + u_int8_t dscp; + u_int8_t invert; +}; + +#endif /* _XT_DSCP_H */ diff --git a/include/linux/netfilter_ipv4/ipt_dscp.h b/include/linux/netfilter_ipv4/ipt_dscp.h index 2fa6dfe9289..4b82ca912b0 100644 --- a/include/linux/netfilter_ipv4/ipt_dscp.h +++ b/include/linux/netfilter_ipv4/ipt_dscp.h @@ -10,14 +10,12 @@ #ifndef _IPT_DSCP_H #define _IPT_DSCP_H -#define IPT_DSCP_MASK 0xfc /* 11111100 */ -#define IPT_DSCP_SHIFT 2 -#define IPT_DSCP_MAX 0x3f /* 00111111 */ +#include -/* match info */ -struct ipt_dscp_info { - u_int8_t dscp; - u_int8_t invert; -}; +#define IPT_DSCP_MASK XT_DSCP_MASK +#define IPT_DSCP_SHIFT XT_DSCP_SHIFT +#define IPT_DSCP_MAX XT_DSCP_MAX + +#define ipt_dscp_info xt_dscp_info #endif /* _IPT_DSCP_H */ -- cgit v1.2.3 From a468701db58a8b3e08e3f55fa6ac66db42014922 Mon Sep 17 00:00:00 2001 From: Yasuyuki Kozakai Date: Tue, 22 Aug 2006 00:30:26 -0700 Subject: [NETFILTER]: x_tables: replace IPv4 DSCP target by address family independent version This replaces IPv4 DSCP target by address family independent version. This also - utilizes dsfield.h to get/mangle DS field in IPv4/IPv6 header - fixes Kconfig help text. Signed-off-by: Yasuyuki Kozakai Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/netfilter/xt_DSCP.h | 20 ++++++++++++++++++++ include/linux/netfilter_ipv4/ipt_DSCP.h | 6 ++---- 2 files changed, 22 insertions(+), 4 deletions(-) create mode 100644 include/linux/netfilter/xt_DSCP.h (limited to 'include/linux') diff --git a/include/linux/netfilter/xt_DSCP.h b/include/linux/netfilter/xt_DSCP.h new file mode 100644 index 00000000000..3c7c963997b --- /dev/null +++ b/include/linux/netfilter/xt_DSCP.h @@ -0,0 +1,20 @@ +/* x_tables module for setting the IPv4/IPv6 DSCP field + * + * (C) 2002 Harald Welte + * based on ipt_FTOS.c (C) 2000 by Matthew G. Marsh + * This software is distributed under GNU GPL v2, 1991 + * + * See RFC2474 for a description of the DSCP field within the IP Header. + * + * xt_DSCP.h,v 1.7 2002/03/14 12:03:13 laforge Exp +*/ +#ifndef _XT_DSCP_TARGET_H +#define _XT_DSCP_TARGET_H +#include + +/* target info */ +struct xt_DSCP_info { + u_int8_t dscp; +}; + +#endif /* _XT_DSCP_TARGET_H */ diff --git a/include/linux/netfilter_ipv4/ipt_DSCP.h b/include/linux/netfilter_ipv4/ipt_DSCP.h index b30f510b5be..3491e524d5e 100644 --- a/include/linux/netfilter_ipv4/ipt_DSCP.h +++ b/include/linux/netfilter_ipv4/ipt_DSCP.h @@ -11,10 +11,8 @@ #ifndef _IPT_DSCP_TARGET_H #define _IPT_DSCP_TARGET_H #include +#include -/* target info */ -struct ipt_DSCP_info { - u_int8_t dscp; -}; +#define ipt_DSCP_info xt_DSCP_info #endif /* _IPT_DSCP_TARGET_H */ -- cgit v1.2.3 From 2521c12cf1a29f6c380b13ca32a38175f6beed08 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Tue, 22 Aug 2006 00:31:24 -0700 Subject: [NETFILTER]: conntrack: introduce connection mark event This patch introduces the mark event. ctnetlink can use this to know if the mark needs to be dumped. Signed-off-by: Pablo Neira Ayuso Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/netfilter/nf_conntrack_common.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/netfilter/nf_conntrack_common.h b/include/linux/netfilter/nf_conntrack_common.h index d2e4bd7a7a1..9e0dae07861 100644 --- a/include/linux/netfilter/nf_conntrack_common.h +++ b/include/linux/netfilter/nf_conntrack_common.h @@ -125,6 +125,10 @@ enum ip_conntrack_events /* Counter highest bit has been set */ IPCT_COUNTER_FILLING_BIT = 11, IPCT_COUNTER_FILLING = (1 << IPCT_COUNTER_FILLING_BIT), + + /* Mark is set */ + IPCT_MARK_BIT = 12, + IPCT_MARK = (1 << IPCT_MARK_BIT), }; enum ip_conntrack_expect_events { -- cgit v1.2.3 From 52d9c42ef2563d2c420eb23b96bf5a4cae9e167b Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Tue, 22 Aug 2006 00:33:45 -0700 Subject: [NETFILTER]: x_tables: add helpers for mass match/target registration Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/netfilter/x_tables.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h index 48cc32d83f7..9a9912430e3 100644 --- a/include/linux/netfilter/x_tables.h +++ b/include/linux/netfilter/x_tables.h @@ -290,8 +290,13 @@ struct xt_table_info extern int xt_register_target(struct xt_target *target); extern void xt_unregister_target(struct xt_target *target); +extern int xt_register_targets(struct xt_target *target, unsigned int n); +extern void xt_unregister_targets(struct xt_target *target, unsigned int n); + extern int xt_register_match(struct xt_match *target); extern void xt_unregister_match(struct xt_match *target); +extern int xt_register_matches(struct xt_match *match, unsigned int n); +extern void xt_unregister_matches(struct xt_match *match, unsigned int n); extern int xt_check_match(const struct xt_match *match, unsigned short family, unsigned int size, const char *table, unsigned int hook, -- cgit v1.2.3 From fe1cb10873b44cf89082465823ee6d4d4ac63ad7 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Tue, 22 Aug 2006 00:35:47 -0700 Subject: [NETFILTER]: x_tables: remove unused argument to target functions Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/netfilter/x_tables.h | 3 +-- include/linux/netfilter_arp/arp_tables.h | 3 +-- include/linux/netfilter_ipv4/ip_tables.h | 3 +-- include/linux/netfilter_ipv6/ip6_tables.h | 3 +-- 4 files changed, 4 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h index 9a9912430e3..9cef0e91542 100644 --- a/include/linux/netfilter/x_tables.h +++ b/include/linux/netfilter/x_tables.h @@ -211,8 +211,7 @@ struct xt_target const struct net_device *out, unsigned int hooknum, const struct xt_target *target, - const void *targinfo, - void *userdata); + const void *targinfo); /* Called when user tries to insert an entry of this type: hook_mask is a bitmask of hooks from which it can be diff --git a/include/linux/netfilter_arp/arp_tables.h b/include/linux/netfilter_arp/arp_tables.h index 62cc27daca4..149e87c9ab1 100644 --- a/include/linux/netfilter_arp/arp_tables.h +++ b/include/linux/netfilter_arp/arp_tables.h @@ -248,8 +248,7 @@ extern unsigned int arpt_do_table(struct sk_buff **pskb, unsigned int hook, const struct net_device *in, const struct net_device *out, - struct arpt_table *table, - void *userdata); + struct arpt_table *table); #define ARPT_ALIGN(s) (((s) + (__alignof__(struct arpt_entry)-1)) & ~(__alignof__(struct arpt_entry)-1)) #endif /*__KERNEL__*/ diff --git a/include/linux/netfilter_ipv4/ip_tables.h b/include/linux/netfilter_ipv4/ip_tables.h index c0dac16e190..a536bbdef14 100644 --- a/include/linux/netfilter_ipv4/ip_tables.h +++ b/include/linux/netfilter_ipv4/ip_tables.h @@ -312,8 +312,7 @@ extern unsigned int ipt_do_table(struct sk_buff **pskb, unsigned int hook, const struct net_device *in, const struct net_device *out, - struct ipt_table *table, - void *userdata); + struct ipt_table *table); #define IPT_ALIGN(s) XT_ALIGN(s) diff --git a/include/linux/netfilter_ipv6/ip6_tables.h b/include/linux/netfilter_ipv6/ip6_tables.h index d0d5d1ee4be..d7a8e9c0dad 100644 --- a/include/linux/netfilter_ipv6/ip6_tables.h +++ b/include/linux/netfilter_ipv6/ip6_tables.h @@ -300,8 +300,7 @@ extern unsigned int ip6t_do_table(struct sk_buff **pskb, unsigned int hook, const struct net_device *in, const struct net_device *out, - struct ip6t_table *table, - void *userdata); + struct ip6t_table *table); /* Check for an extension */ extern int ip6t_ext_hdr(u8 nexthdr); -- cgit v1.2.3 From efa741656e9ebf5fd6e0432b0d1b3c7f156392d3 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Tue, 22 Aug 2006 00:36:37 -0700 Subject: [NETFILTER]: x_tables: remove unused size argument to check/destroy functions The size is verified by x_tables and isn't needed by the modules anymore. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/netfilter/x_tables.h | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h index 9cef0e91542..9d97102a934 100644 --- a/include/linux/netfilter/x_tables.h +++ b/include/linux/netfilter/x_tables.h @@ -174,12 +174,10 @@ struct xt_match const void *ip, const struct xt_match *match, void *matchinfo, - unsigned int matchinfosize, unsigned int hook_mask); /* Called when entry of this type deleted. */ - void (*destroy)(const struct xt_match *match, void *matchinfo, - unsigned int matchinfosize); + void (*destroy)(const struct xt_match *match, void *matchinfo); /* Called when userspace align differs from kernel space one */ int (*compat)(void *match, void **dstptr, int *size, int convert); @@ -221,12 +219,10 @@ struct xt_target const void *entry, const struct xt_target *target, void *targinfo, - unsigned int targinfosize, unsigned int hook_mask); /* Called when entry of this type deleted. */ - void (*destroy)(const struct xt_target *target, void *targinfo, - unsigned int targinfosize); + void (*destroy)(const struct xt_target *target, void *targinfo); /* Called when userspace align differs from kernel space one */ int (*compat)(void *target, void **dstptr, int *size, int convert); -- cgit v1.2.3 From 53e26658282373b84ba85a0c9807cb762f7738a6 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Tue, 22 Aug 2006 00:43:20 -0700 Subject: [NETFILTER]: nfnetlink: remove unnecessary packed attributes Remove unnecessary packed attributes in nfnetlink structures. Unfortunately in a few cases they have to stay to avoid changing structure sizes. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/netfilter/nfnetlink.h | 4 ++-- include/linux/netfilter/nfnetlink_log.h | 6 +++--- include/linux/netfilter/nfnetlink_queue.h | 8 ++++---- 3 files changed, 9 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter/nfnetlink.h b/include/linux/netfilter/nfnetlink.h index 9f5b12cf489..6d8e3e5a80e 100644 --- a/include/linux/netfilter/nfnetlink.h +++ b/include/linux/netfilter/nfnetlink.h @@ -43,7 +43,7 @@ struct nfattr u_int16_t nfa_len; u_int16_t nfa_type; /* we use 15 bits for the type, and the highest * bit to indicate whether the payload is nested */ -} __attribute__ ((packed)); +}; /* FIXME: Apart from NFNL_NFA_NESTED shamelessly copy and pasted from * rtnetlink.h, it's time to put this in a generic file */ @@ -79,7 +79,7 @@ struct nfgenmsg { u_int8_t nfgen_family; /* AF_xxx */ u_int8_t version; /* nfnetlink version */ u_int16_t res_id; /* resource id */ -} __attribute__ ((packed)); +}; #define NFNETLINK_V0 0 diff --git a/include/linux/netfilter/nfnetlink_log.h b/include/linux/netfilter/nfnetlink_log.h index a7497c7436d..87b92f8b988 100644 --- a/include/linux/netfilter/nfnetlink_log.h +++ b/include/linux/netfilter/nfnetlink_log.h @@ -19,18 +19,18 @@ struct nfulnl_msg_packet_hdr { u_int16_t hw_protocol; /* hw protocol (network order) */ u_int8_t hook; /* netfilter hook */ u_int8_t _pad; -} __attribute__ ((packed)); +}; struct nfulnl_msg_packet_hw { u_int16_t hw_addrlen; u_int16_t _pad; u_int8_t hw_addr[8]; -} __attribute__ ((packed)); +}; struct nfulnl_msg_packet_timestamp { aligned_u64 sec; aligned_u64 usec; -} __attribute__ ((packed)); +}; #define NFULNL_PREFIXLEN 30 /* just like old log target */ diff --git a/include/linux/netfilter/nfnetlink_queue.h b/include/linux/netfilter/nfnetlink_queue.h index 9e774373244..36af0360b56 100644 --- a/include/linux/netfilter/nfnetlink_queue.h +++ b/include/linux/netfilter/nfnetlink_queue.h @@ -22,12 +22,12 @@ struct nfqnl_msg_packet_hw { u_int16_t hw_addrlen; u_int16_t _pad; u_int8_t hw_addr[8]; -} __attribute__ ((packed)); +}; struct nfqnl_msg_packet_timestamp { aligned_u64 sec; aligned_u64 usec; -} __attribute__ ((packed)); +}; enum nfqnl_attr_type { NFQA_UNSPEC, @@ -49,7 +49,7 @@ enum nfqnl_attr_type { struct nfqnl_msg_verdict_hdr { u_int32_t verdict; u_int32_t id; -} __attribute__ ((packed)); +}; enum nfqnl_msg_config_cmds { @@ -64,7 +64,7 @@ struct nfqnl_msg_config_cmd { u_int8_t command; /* nfqnl_msg_config_cmds */ u_int8_t _pad; u_int16_t pf; /* AF_xxx for PF_[UN]BIND */ -} __attribute__ ((packed)); +}; enum nfqnl_config_mode { NFQNL_COPY_NONE, -- cgit v1.2.3 From 91270cf81765152f6e77953440beb4d3b34a71b5 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Tue, 22 Aug 2006 00:43:38 -0700 Subject: [NETFILTER]: x_tables: add data member to struct xt_match Shared match functions can use this to make runtime decisions basen on the used match. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/netfilter/x_tables.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h index 9d97102a934..03d1027fb0e 100644 --- a/include/linux/netfilter/x_tables.h +++ b/include/linux/netfilter/x_tables.h @@ -185,6 +185,9 @@ struct xt_match /* Set this to THIS_MODULE if you are a module, otherwise NULL */ struct module *me; + /* Free to use by each match */ + unsigned long data; + char *table; unsigned int matchsize; unsigned int hooks; -- cgit v1.2.3 From 8e1ef0a95b87e8b4292b2ba733e8cb854ea2d2fe Mon Sep 17 00:00:00 2001 From: YOSHIFUJI Hideaki Date: Tue, 29 Aug 2006 17:15:09 -0700 Subject: [IPV6]: Cache source address as well in ipv6_pinfo{}. Based on MIPL2 kernel patch. Signed-off-by: YOSHIFUJI Hideaki Signed-off-by: Ville Nuorvala Signed-off-by: David S. Miller --- include/linux/ipv6.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index 297853c841b..02d14a3ff2a 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -242,6 +242,9 @@ struct ipv6_pinfo { struct in6_addr rcv_saddr; struct in6_addr daddr; struct in6_addr *daddr_cache; +#ifdef CONFIG_IPV6_SUBTREES + struct in6_addr *saddr_cache; +#endif __u32 flow_label; __u32 frag_size; -- cgit v1.2.3 From 7e49e6de30efa716614e280d97963c570f3acf29 Mon Sep 17 00:00:00 2001 From: Masahide NAKAMURA Date: Fri, 22 Sep 2006 15:05:15 -0700 Subject: [XFRM]: Add XFRM_MODE_xxx for future use. Transformation mode is used as either IPsec transport or tunnel. It is required to add two more items, route optimization and inbound trigger for Mobile IPv6. Based on MIPL2 kernel patch. This patch was also written by: Ville Nuorvala Signed-off-by: Masahide NAKAMURA Signed-off-by: YOSHIFUJI Hideaki Signed-off-by: David S. Miller --- include/linux/xfrm.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/xfrm.h b/include/linux/xfrm.h index 46a15c7a1a1..5154064b6d9 100644 --- a/include/linux/xfrm.h +++ b/include/linux/xfrm.h @@ -120,7 +120,9 @@ enum #define XFRM_MODE_TRANSPORT 0 #define XFRM_MODE_TUNNEL 1 -#define XFRM_MODE_MAX 2 +#define XFRM_MODE_ROUTEOPTIMIZATION 2 +#define XFRM_MODE_IN_TRIGGER 3 +#define XFRM_MODE_MAX 4 /* Netlink configuration messages. */ enum { @@ -247,7 +249,7 @@ struct xfrm_usersa_info { __u32 seq; __u32 reqid; __u16 family; - __u8 mode; /* 0=transport,1=tunnel */ + __u8 mode; /* XFRM_MODE_xxx */ __u8 replay_window; __u8 flags; #define XFRM_STATE_NOECN 1 -- cgit v1.2.3 From eb2971b68a7d17a7d0fa2c7fc6fbc4bfe41cd694 Mon Sep 17 00:00:00 2001 From: Masahide NAKAMURA Date: Wed, 23 Aug 2006 17:56:04 -0700 Subject: [XFRM] STATE: Search by address using source address list. This is a support to search transformation states by its addresses by using source address list for Mobile IPv6 usage. To use it from user-space, it is also added a message type for source address as a xfrm state option. Based on MIPL2 kernel patch. Signed-off-by: Masahide NAKAMURA Signed-off-by: YOSHIFUJI Hideaki Signed-off-by: David S. Miller --- include/linux/xfrm.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/xfrm.h b/include/linux/xfrm.h index 5154064b6d9..66343d3d4b9 100644 --- a/include/linux/xfrm.h +++ b/include/linux/xfrm.h @@ -234,6 +234,7 @@ enum xfrm_attr_type_t { XFRMA_REPLAY_VAL, XFRMA_REPLAY_THRESH, XFRMA_ETIMER_THRESH, + XFRMA_SRCADDR, /* xfrm_address_t */ __XFRMA_MAX #define XFRMA_MAX (__XFRMA_MAX - 1) -- cgit v1.2.3 From fbd9a5b47ee9c319ff0cae584391241ce78ffd6b Mon Sep 17 00:00:00 2001 From: Masahide NAKAMURA Date: Wed, 23 Aug 2006 18:08:21 -0700 Subject: [XFRM] STATE: Common receive function for route optimization extension headers. XFRM_STATE_WILDRECV flag is introduced; the last resort state is set it and receives packet which is not route optimized but uses such extension headers i.e. Mobile IPv6 signaling (binding update and acknowledgement). A node enabled Mobile IPv6 adds the state. Based on MIPL2 kernel patch. Signed-off-by: Masahide NAKAMURA Signed-off-by: YOSHIFUJI Hideaki Signed-off-by: David S. Miller --- include/linux/xfrm.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/xfrm.h b/include/linux/xfrm.h index 66343d3d4b9..a7c9e4cfb15 100644 --- a/include/linux/xfrm.h +++ b/include/linux/xfrm.h @@ -256,6 +256,7 @@ struct xfrm_usersa_info { #define XFRM_STATE_NOECN 1 #define XFRM_STATE_DECAP_DSCP 2 #define XFRM_STATE_NOPMTUDISC 4 +#define XFRM_STATE_WILDRECV 8 }; struct xfrm_usersa_id { -- cgit v1.2.3 From 060f02a3bdd4d9ba8aa3c48e9b470672b1f3a585 Mon Sep 17 00:00:00 2001 From: Noriaki TAKAMIYA Date: Wed, 23 Aug 2006 18:18:55 -0700 Subject: [XFRM] STATE: Introduce care-of address. Care-of address is carried by state as a transformation option like IPsec encryption/authentication algorithm. Based on MIPL2 kernel patch. Signed-off-by: Noriaki TAKAMIYA Signed-off-by: Masahide NAKAMURA Signed-off-by: YOSHIFUJI Hideaki --- include/linux/xfrm.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/xfrm.h b/include/linux/xfrm.h index a7c9e4cfb15..b53f799189a 100644 --- a/include/linux/xfrm.h +++ b/include/linux/xfrm.h @@ -235,6 +235,7 @@ enum xfrm_attr_type_t { XFRMA_REPLAY_THRESH, XFRMA_ETIMER_THRESH, XFRMA_SRCADDR, /* xfrm_address_t */ + XFRMA_COADDR, /* xfrm_address_t */ __XFRMA_MAX #define XFRMA_MAX (__XFRMA_MAX - 1) -- cgit v1.2.3 From 9afaca057980c02771f4657c455cc7592fcd7373 Mon Sep 17 00:00:00 2001 From: Masahide NAKAMURA Date: Wed, 23 Aug 2006 18:20:16 -0700 Subject: [XFRM] IPV6: Update outbound state timestamp for each sending. With this patch transformation state is updated last used time for each sending. Xtime is used for it like other state lifetime expiration. Mobile IPv6 enabled nodes will want to know traffic status of each binding (e.g. judgement to request binding refresh by correspondent node, or to keep home/care-of nonce alive by mobile node). The last used timestamp is an important hint about it. Based on MIPL2 kernel patch. This patch was also written by: Henrik Petander Signed-off-by: Masahide NAKAMURA Signed-off-by: YOSHIFUJI Hideaki Signed-off-by: David S. Miller --- include/linux/xfrm.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/xfrm.h b/include/linux/xfrm.h index b53f799189a..1d8c1f22c12 100644 --- a/include/linux/xfrm.h +++ b/include/linux/xfrm.h @@ -236,6 +236,7 @@ enum xfrm_attr_type_t { XFRMA_ETIMER_THRESH, XFRMA_SRCADDR, /* xfrm_address_t */ XFRMA_COADDR, /* xfrm_address_t */ + XFRMA_LASTUSED, __XFRMA_MAX #define XFRMA_MAX (__XFRMA_MAX - 1) -- cgit v1.2.3 From 642ec62eee5bdc158e01029220c8a23c685778fb Mon Sep 17 00:00:00 2001 From: Noriaki TAKAMIYA Date: Wed, 23 Aug 2006 19:15:07 -0700 Subject: [IPV6] MIP6: Add routing header type 2 definition. Add routing header type 2 definition for Mobile IPv6. Based on MIPL2 kernel patch. Signed-off-by: Noriaki TAKAMIYA Signed-off-by: Masahide NAKAMURA Signed-off-by: YOSHIFUJI Hideaki --- include/linux/ipv6.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index 02d14a3ff2a..d995662e94c 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -29,6 +29,7 @@ struct in6_ifreq { #define IPV6_SRCRT_STRICT 0x01 /* this hop must be a neighbor */ #define IPV6_SRCRT_TYPE_0 0 /* IPv6 type 0 Routing Header */ +#define IPV6_SRCRT_TYPE_2 2 /* IPv6 type 2 Routing Header */ /* * routing header @@ -73,6 +74,18 @@ struct rt0_hdr { #define rt0_type rt_hdr.type }; +/* + * routing header type 2 + */ + +struct rt2_hdr { + struct ipv6_rt_hdr rt_hdr; + __u32 reserved; + struct in6_addr addr; + +#define rt2_type rt_hdr.type +}; + struct ipv6_auth_hdr { __u8 nexthdr; __u8 hdrlen; /* This one is measured in 32 bit units! */ -- cgit v1.2.3 From 842426e719f86cd5709617208efae93ff1a1e2d8 Mon Sep 17 00:00:00 2001 From: Noriaki TAKAMIYA Date: Wed, 23 Aug 2006 19:21:34 -0700 Subject: [IPV6] MIP6: Add home address option definition. Add home address option definition for Mobile IPv6. Based on MIPL2 kernel patch. Signed-off-by: Noriaki TAKAMIYA Signed-off-by: Masahide NAKAMURA Signed-off-by: YOSHIFUJI Hideaki Signed-off-by: David S. Miller --- include/linux/in6.h | 1 + include/linux/ipv6.h | 10 ++++++++++ 2 files changed, 11 insertions(+) (limited to 'include/linux') diff --git a/include/linux/in6.h b/include/linux/in6.h index 304aaedea30..086ec2ac8c5 100644 --- a/include/linux/in6.h +++ b/include/linux/in6.h @@ -142,6 +142,7 @@ struct in6_flowlabel_req #define IPV6_TLV_PADN 1 #define IPV6_TLV_ROUTERALERT 5 #define IPV6_TLV_JUMBO 194 +#define IPV6_TLV_HAO 201 /* home address option */ /* * IPV6 socket options diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index d995662e94c..5bf4406e26d 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -86,6 +86,16 @@ struct rt2_hdr { #define rt2_type rt_hdr.type }; +/* + * home address option in destination options header + */ + +struct ipv6_destopt_hao { + __u8 type; + __u8 length; + struct in6_addr addr; +} __attribute__ ((__packed__)); + struct ipv6_auth_hdr { __u8 nexthdr; __u8 hdrlen; /* This one is measured in 32 bit units! */ -- cgit v1.2.3 From a831f5bbc89a9978795504be9e1ff412043f8f77 Mon Sep 17 00:00:00 2001 From: Masahide NAKAMURA Date: Wed, 23 Aug 2006 19:24:48 -0700 Subject: [IPV6] MIP6: Add inbound interface of home address option. Add inbound function of home address option by registering it to TLV table for destination options header. Based on MIPL2 kernel patch. This patch was also written by: Ville Nuorvala Signed-off-by: Masahide NAKAMURA Signed-off-by: YOSHIFUJI Hideaki Signed-off-by: David S. Miller --- include/linux/ipv6.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index 5bf4406e26d..db3b2ba0f4f 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -226,6 +226,9 @@ struct inet6_skb_parm { __u16 dst0; __u16 srcrt; __u16 dst1; +#ifdef CONFIG_IPV6_MIP6 + __u16 dsthao; +#endif __u16 lastopt; __u32 nhoff; __u16 flags; -- cgit v1.2.3 From 8dd7368dd97def967bbb3aec67b882e8dfd1a528 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Wed, 23 Aug 2006 19:25:55 -0700 Subject: [IPV6]: Put dsthao after flags in order to pack inet6_skb_parm better. Signed-off-by: David S. Miller --- include/linux/ipv6.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index db3b2ba0f4f..1d6d3ccc941 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -226,12 +226,12 @@ struct inet6_skb_parm { __u16 dst0; __u16 srcrt; __u16 dst1; -#ifdef CONFIG_IPV6_MIP6 - __u16 dsthao; -#endif __u16 lastopt; __u32 nhoff; __u16 flags; +#ifdef CONFIG_IPV6_MIP6 + __u16 dsthao; +#endif #define IP6SKB_XFRM_TRANSFORMED 1 }; -- cgit v1.2.3 From 2b741653b6c824fe7520ee92b6795f11c5f24b24 Mon Sep 17 00:00:00 2001 From: Masahide NAKAMURA Date: Wed, 23 Aug 2006 20:34:26 -0700 Subject: [IPV6] MIP6: Add Mobility header definition. Add Mobility header definition for Mobile IPv6. Based on MIPL2 kernel patch. This patch was also written by: Antti Tuominen Signed-off-by: Masahide NAKAMURA Signed-off-by: YOSHIFUJI Hideaki Signed-off-by: David S. Miller --- include/linux/in6.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/in6.h b/include/linux/in6.h index 086ec2ac8c5..d776829b443 100644 --- a/include/linux/in6.h +++ b/include/linux/in6.h @@ -134,6 +134,7 @@ struct in6_flowlabel_req #define IPPROTO_ICMPV6 58 /* ICMPv6 */ #define IPPROTO_NONE 59 /* IPv6 no next header */ #define IPPROTO_DSTOPTS 60 /* IPv6 destination options */ +#define IPPROTO_MH 135 /* IPv6 mobility header */ /* * IPv6 TLV options. -- cgit v1.2.3 From 97a64b4577ae2bc5599dbd008a3cd9e25de9b9f5 Mon Sep 17 00:00:00 2001 From: Masahide NAKAMURA Date: Wed, 23 Aug 2006 20:44:06 -0700 Subject: [XFRM]: Introduce XFRM_MSG_REPORT. XFRM_MSG_REPORT is a message as notification of state protocol and selector from kernel to user-space. Mobile IPv6 will use it when inbound reject is occurred at route optimization to make user-space know a binding error requirement. Based on MIPL2 kernel patch. Signed-off-by: Masahide NAKAMURA Signed-off-by: YOSHIFUJI Hideaki Signed-off-by: David S. Miller --- include/linux/xfrm.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include/linux') diff --git a/include/linux/xfrm.h b/include/linux/xfrm.h index 1d8c1f22c12..4009f4445fa 100644 --- a/include/linux/xfrm.h +++ b/include/linux/xfrm.h @@ -166,6 +166,10 @@ enum { #define XFRM_MSG_NEWAE XFRM_MSG_NEWAE XFRM_MSG_GETAE, #define XFRM_MSG_GETAE XFRM_MSG_GETAE + + XFRM_MSG_REPORT, +#define XFRM_MSG_REPORT XFRM_MSG_REPORT + __XFRM_MSG_MAX }; #define XFRM_MSG_MAX (__XFRM_MSG_MAX - 1) @@ -325,12 +329,18 @@ struct xfrm_usersa_flush { __u8 proto; }; +struct xfrm_user_report { + __u8 proto; + struct xfrm_selector sel; +}; + #ifndef __KERNEL__ /* backwards compatibility for userspace */ #define XFRMGRP_ACQUIRE 1 #define XFRMGRP_EXPIRE 2 #define XFRMGRP_SA 4 #define XFRMGRP_POLICY 8 +#define XFRMGRP_REPORT 0x10 #endif enum xfrm_nlgroups { @@ -346,6 +356,8 @@ enum xfrm_nlgroups { #define XFRMNLGRP_POLICY XFRMNLGRP_POLICY XFRMNLGRP_AEVENTS, #define XFRMNLGRP_AEVENTS XFRMNLGRP_AEVENTS + XFRMNLGRP_REPORT, +#define XFRMNLGRP_REPORT XFRMNLGRP_REPORT __XFRMNLGRP_MAX }; #define XFRMNLGRP_MAX (__XFRMNLGRP_MAX - 1) -- cgit v1.2.3 From 4e81bb8336a0ac50289d4d4c7a55e559b994ee8f Mon Sep 17 00:00:00 2001 From: Masahide NAKAMURA Date: Wed, 23 Aug 2006 22:43:30 -0700 Subject: [XFRM] POLICY: sub policy support. Sub policy is introduced. Main and sub policy are applied the same flow. (Policy that current kernel uses is named as main.) It is required another transformation policy management to keep IPsec and Mobile IPv6 lives separate. Policy which lives shorter time in kernel should be a sub i.e. normally main is for IPsec and sub is for Mobile IPv6. (Such usage as two IPsec policies on different database can be used, too.) Limitation or TODOs: - Sub policy is not supported for per socket one (it is always inserted as main). - Current kernel makes cached outbound with flowi to skip searching database. However this patch makes it disabled only when "two policies are used and the first matched one is bypass case" because neither flowi nor bundle information knows about transformation template size. Signed-off-by: Masahide NAKAMURA Signed-off-by: YOSHIFUJI Hideaki --- include/linux/xfrm.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/xfrm.h b/include/linux/xfrm.h index 4009f4445fa..492fb981874 100644 --- a/include/linux/xfrm.h +++ b/include/linux/xfrm.h @@ -102,6 +102,13 @@ struct xfrm_stats { __u32 integrity_failed; }; +enum +{ + XFRM_POLICY_TYPE_MAIN = 0, + XFRM_POLICY_TYPE_SUB = 1, + XFRM_POLICY_TYPE_MAX = 2 +}; + enum { XFRM_POLICY_IN = 0, -- cgit v1.2.3 From f7b6983f0feeefcd2a594138adcffe640593d8de Mon Sep 17 00:00:00 2001 From: Masahide NAKAMURA Date: Wed, 23 Aug 2006 22:49:28 -0700 Subject: [XFRM] POLICY: Support netlink socket interface for sub policy. Sub policy can be used through netlink socket. PF_KEY uses main only and it is TODO to support sub. Signed-off-by: Masahide NAKAMURA Signed-off-by: YOSHIFUJI Hideaki Signed-off-by: David S. Miller --- include/linux/xfrm.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/xfrm.h b/include/linux/xfrm.h index 492fb981874..14ecd19f4cd 100644 --- a/include/linux/xfrm.h +++ b/include/linux/xfrm.h @@ -230,6 +230,12 @@ enum xfrm_ae_ftype_t { #define XFRM_AE_MAX (__XFRM_AE_MAX - 1) }; +struct xfrm_userpolicy_type { + __u8 type; + __u16 reserved1; + __u8 reserved2; +}; + /* Netlink message attributes. */ enum xfrm_attr_type_t { XFRMA_UNSPEC, @@ -248,6 +254,7 @@ enum xfrm_attr_type_t { XFRMA_SRCADDR, /* xfrm_address_t */ XFRMA_COADDR, /* xfrm_address_t */ XFRMA_LASTUSED, + XFRMA_POLICY_TYPE, /* struct xfrm_userpolicy_type */ __XFRMA_MAX #define XFRMA_MAX (__XFRMA_MAX - 1) -- cgit v1.2.3 From f034b5d4efdfe0fb9e2a1ce1d95fa7914f24de49 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 24 Aug 2006 03:08:07 -0700 Subject: [XFRM]: Dynamic xfrm_state hash table sizing. The grow algorithm is simple, we grow if: 1) we see a hash chain collision at insert, and 2) we haven't hit the hash size limit (currently 1*1024*1024 slots), and 3) the number of xfrm_state objects is > the current hash mask All of this needs some tweaking. Remove __initdata from "hashdist" so we can use it safely at run time. Signed-off-by: David S. Miller --- include/linux/bootmem.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/bootmem.h b/include/linux/bootmem.h index 1021f508d82..e319c649e4f 100644 --- a/include/linux/bootmem.h +++ b/include/linux/bootmem.h @@ -114,7 +114,7 @@ extern void *__init alloc_large_system_hash(const char *tablename, #else #define HASHDIST_DEFAULT 0 #endif -extern int __initdata hashdist; /* Distribute hashes across NUMA nodes? */ +extern int hashdist; /* Distribute hashes across NUMA nodes? */ #endif /* _LINUX_BOOTMEM_H */ -- cgit v1.2.3 From 75bff8f023e02b045a8f68f36fa7da98dca124b8 Mon Sep 17 00:00:00 2001 From: YOSHIFUJI Hideaki Date: Mon, 21 Aug 2006 19:22:01 +0900 Subject: [IPV6] ROUTE: Routing by FWMARK. Based on patch by Jean Lorchat . Signed-off-by: YOSHIFUJI Hideaki --- include/linux/fib_rules.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/fib_rules.h b/include/linux/fib_rules.h index 19a82b6c1c1..2987549d604 100644 --- a/include/linux/fib_rules.h +++ b/include/linux/fib_rules.h @@ -34,7 +34,7 @@ enum FRA_UNUSED3, FRA_UNUSED4, FRA_UNUSED5, - FRA_FWMARK, /* netfilter mark (IPv4) */ + FRA_FWMARK, /* netfilter mark (IPv4/IPv6) */ FRA_FLOW, /* flow/class id */ FRA_UNUSED6, FRA_UNUSED7, -- cgit v1.2.3 From 1aaec67f9335a17856dfacdd3e5cc6f4c18faeec Mon Sep 17 00:00:00 2001 From: YOSHIFUJI Hideaki Date: Sun, 25 Jun 2006 23:54:55 +0900 Subject: [NET]: Add common helper functions to convert IPv6/IPv4 address string to network address structure. These helpers can be used in netfilter, cifs etc. Signed-off-by: YOSHIFUJI Hideaki --- include/linux/inet.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/inet.h b/include/linux/inet.h index 6c5587af118..b7c6da7d6d3 100644 --- a/include/linux/inet.h +++ b/include/linux/inet.h @@ -46,5 +46,7 @@ #include extern __be32 in_aton(const char *str); +extern int in4_pton(const char *src, int srclen, u8 *dst, char delim, const char **end); +extern int in6_pton(const char *src, int srclen, u8 *dst, char delim, const char **end); #endif #endif /* _LINUX_INET_H */ -- cgit v1.2.3 From bbfb39cbf63829d1db607aa90cbdca557a3a131d Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Fri, 25 Aug 2006 16:10:14 -0700 Subject: [IPV4]: Add support for fwmark masks in routing rules Add a FRA_FWMASK attributes for fwmark masks. For compatibility a mask of 0xFFFFFFFF is used when a mark value != 0 is sent without a mask. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/fib_rules.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/fib_rules.h b/include/linux/fib_rules.h index 2987549d604..4418c8d9d47 100644 --- a/include/linux/fib_rules.h +++ b/include/linux/fib_rules.h @@ -34,12 +34,13 @@ enum FRA_UNUSED3, FRA_UNUSED4, FRA_UNUSED5, - FRA_FWMARK, /* netfilter mark (IPv4/IPv6) */ + FRA_FWMARK, /* netfilter mark */ FRA_FLOW, /* flow/class id */ FRA_UNUSED6, FRA_UNUSED7, FRA_UNUSED8, FRA_TABLE, /* Extended table id */ + FRA_FWMASK, /* mask for netfilter mark */ __FRA_MAX }; -- cgit v1.2.3 From b4e9b520ca5d07a37ea59648e7f50f478e7487a3 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Fri, 25 Aug 2006 16:11:42 -0700 Subject: [NET_SCHED]: Add mask support to fwmark classifier Support masking the nfmark value before the search. The mask value is global for all filters contained in one instance. It can only be set when a new instance is created, all filters must specify the same mask. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/pkt_cls.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/pkt_cls.h b/include/linux/pkt_cls.h index bd2c5a2bbbf..c3f01b3085a 100644 --- a/include/linux/pkt_cls.h +++ b/include/linux/pkt_cls.h @@ -305,6 +305,7 @@ enum TCA_FW_POLICE, TCA_FW_INDEV, /* used by CONFIG_NET_CLS_IND */ TCA_FW_ACT, /* used by CONFIG_NET_CLS_ACT */ + TCA_FW_MASK, __TCA_FW_MAX }; -- cgit v1.2.3 From 97e5848dd39e7e76bd6077735ebb5473763ab9c5 Mon Sep 17 00:00:00 2001 From: Ian McDonald Date: Sat, 26 Aug 2006 19:16:45 -0700 Subject: [DCCP]: Introduce tx buffering This adds transmit buffering to DCCP. I have tested with CCID2/3 and with loss and rate limiting. Signed off by: Ian McDonald Signed-off-by: David S. Miller --- include/linux/dccp.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/dccp.h b/include/linux/dccp.h index 676333b9fad..2d7671c92c0 100644 --- a/include/linux/dccp.h +++ b/include/linux/dccp.h @@ -438,6 +438,7 @@ struct dccp_ackvec; * @dccps_role - Role of this sock, one of %dccp_role * @dccps_ndp_count - number of Non Data Packets since last data packet * @dccps_hc_rx_ackvec - rx half connection ack vector + * @dccps_xmit_timer - timer for when CCID is not ready to send */ struct dccp_sock { /* inet_connection_sock has to be the first member of dccp_sock */ @@ -470,6 +471,7 @@ struct dccp_sock { enum dccp_role dccps_role:2; __u8 dccps_hc_rx_insert_options:1; __u8 dccps_hc_tx_insert_options:1; + struct timer_list dccps_xmit_timer; }; static inline struct dccp_sock *dccp_sk(const struct sock *sk) -- cgit v1.2.3 From def42ff4dd6f54ebcf78192579a8ff1f81d8e2e8 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Mon, 28 Aug 2006 23:57:56 -0700 Subject: [IPV4]: Make struct in_addr::s_addr __be32 There will be relatively small increase in sparse endian warnings, but this (and sin_port) patch is a first step to make networking code endian clean. Signed-off-by: Alexey Dobriyan Signed-off-by: David S. Miller --- include/linux/in.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/in.h b/include/linux/in.h index 94f557fa463..9a9d5dd32e7 100644 --- a/include/linux/in.h +++ b/include/linux/in.h @@ -52,7 +52,7 @@ enum { /* Internet address. */ struct in_addr { - __u32 s_addr; + __be32 s_addr; }; #define IP_TOS 1 -- cgit v1.2.3 From cd360007a0eb8cbf17c006cca42aa884d33f96be Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Mon, 28 Aug 2006 23:58:32 -0700 Subject: [IPV4]: Make struct sockaddr_in::sin_port __be16 Signed-off-by: Alexey Dobriyan Signed-off-by: David S. Miller --- include/linux/in.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/in.h b/include/linux/in.h index 9a9d5dd32e7..bcaca8399ae 100644 --- a/include/linux/in.h +++ b/include/linux/in.h @@ -177,7 +177,7 @@ struct in_pktinfo #define __SOCK_SIZE__ 16 /* sizeof(struct sockaddr) */ struct sockaddr_in { sa_family_t sin_family; /* Address family */ - unsigned short int sin_port; /* Port number */ + __be16 sin_port; /* Port number */ struct in_addr sin_addr; /* Internet address */ /* Pad to size of `struct sockaddr'. */ -- cgit v1.2.3 From 07317621d004e8e6967f2dac8562825267e56135 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Tue, 29 Aug 2006 17:48:17 -0700 Subject: [NETFILTER] bridge: code rearrangement for clarity Cleanup and rearrangement for better style and clarity: Split the function nf_bridge_maybe_copy_header into two pieces Move copy portion out of line. Use Ethernet header size macros. Use header file to handle CONFIG_NETFILTER_BRIDGE differences Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- include/linux/netfilter_bridge.h | 26 +++++++------------------- 1 file changed, 7 insertions(+), 19 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter_bridge.h b/include/linux/netfilter_bridge.h index 427c67ff89e..274fe4b3315 100644 --- a/include/linux/netfilter_bridge.h +++ b/include/linux/netfilter_bridge.h @@ -47,26 +47,12 @@ enum nf_br_hook_priorities { /* Only used in br_forward.c */ -static inline -int nf_bridge_maybe_copy_header(struct sk_buff *skb) +extern int nf_bridge_copy_header(struct sk_buff *skb); +static inline int nf_bridge_maybe_copy_header(struct sk_buff *skb) { - int err; - - if (skb->nf_bridge) { - if (skb->protocol == __constant_htons(ETH_P_8021Q)) { - err = skb_cow(skb, 18); - if (err) - return err; - memcpy(skb->data - 18, skb->nf_bridge->data, 18); - skb_push(skb, 4); - } else { - err = skb_cow(skb, 16); - if (err) - return err; - memcpy(skb->data - 16, skb->nf_bridge->data, 16); - } - } - return 0; + if (skb->nf_bridge) + return nf_bridge_copy_header(skb); + return 0; } /* This is called by the IP fragmenting code and it ensures there is @@ -90,6 +76,8 @@ struct bridge_skb_cb { }; extern int brnf_deferred_hooks; +#else +#define nf_bridge_maybe_copy_header(skb) (0) #endif /* CONFIG_BRIDGE_NETFILTER */ #endif /* __KERNEL__ */ -- cgit v1.2.3 From 9bcfcaf5e9cc887eb39236e43bdbe4b4b2572229 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Tue, 29 Aug 2006 17:48:57 -0700 Subject: [NETFILTER] bridge: simplify nf_bridge_pad Do some simple optimization on the nf_bridge_pad() function and don't use magic constants. Eliminate a double call and the #ifdef'd code for CONFIG_BRIDGE_NETFILTER. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- include/linux/netfilter_bridge.h | 16 +++++----------- 1 file changed, 5 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter_bridge.h b/include/linux/netfilter_bridge.h index 274fe4b3315..9a4dd11af86 100644 --- a/include/linux/netfilter_bridge.h +++ b/include/linux/netfilter_bridge.h @@ -5,9 +5,8 @@ */ #include -#if defined(__KERNEL__) && defined(CONFIG_BRIDGE_NETFILTER) #include -#endif +#include /* Bridge Hooks */ /* After promisc drops, checksum checks. */ @@ -57,16 +56,10 @@ static inline int nf_bridge_maybe_copy_header(struct sk_buff *skb) /* This is called by the IP fragmenting code and it ensures there is * enough room for the encapsulating header (if there is one). */ -static inline -int nf_bridge_pad(struct sk_buff *skb) +static inline int nf_bridge_pad(const struct sk_buff *skb) { - if (skb->protocol == __constant_htons(ETH_P_IP)) - return 0; - if (skb->nf_bridge) { - if (skb->protocol == __constant_htons(ETH_P_8021Q)) - return 4; - } - return 0; + return (skb->nf_bridge && skb->protocol == htons(ETH_P_8021Q)) + ? VLAN_HLEN : 0; } struct bridge_skb_cb { @@ -78,6 +71,7 @@ struct bridge_skb_cb { extern int brnf_deferred_hooks; #else #define nf_bridge_maybe_copy_header(skb) (0) +#define nf_bridge_pad(skb) (0) #endif /* CONFIG_BRIDGE_NETFILTER */ #endif /* __KERNEL__ */ -- cgit v1.2.3 From fda9ef5d679b07c9d9097aaf6ef7f069d794a8f9 Mon Sep 17 00:00:00 2001 From: Dmitry Mishin Date: Thu, 31 Aug 2006 15:28:39 -0700 Subject: [NET]: Fix sk->sk_filter field access Function sk_filter() is called from tcp_v{4,6}_rcv() functions with arg needlock = 0, while socket is not locked at that moment. In order to avoid this and similar issues in the future, use rcu for sk->sk_filter field read protection. Signed-off-by: Dmitry Mishin Signed-off-by: Alexey Kuznetsov Signed-off-by: Kirill Korotaev --- include/linux/filter.h | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/filter.h b/include/linux/filter.h index c6cb8f09508..91b2e3b9251 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -25,10 +25,10 @@ struct sock_filter /* Filter block */ { - __u16 code; /* Actual filter code */ - __u8 jt; /* Jump true */ - __u8 jf; /* Jump false */ - __u32 k; /* Generic multiuse field */ + __u16 code; /* Actual filter code */ + __u8 jt; /* Jump true */ + __u8 jf; /* Jump false */ + __u32 k; /* Generic multiuse field */ }; struct sock_fprog /* Required for SO_ATTACH_FILTER. */ @@ -41,8 +41,9 @@ struct sock_fprog /* Required for SO_ATTACH_FILTER. */ struct sk_filter { atomic_t refcnt; - unsigned int len; /* Number of filter blocks */ - struct sock_filter insns[0]; + unsigned int len; /* Number of filter blocks */ + struct rcu_head rcu; + struct sock_filter insns[0]; }; static inline unsigned int sk_filter_len(struct sk_filter *fp) -- cgit v1.2.3 From eb328111efde7bca782f340fe805756039ec6a0c Mon Sep 17 00:00:00 2001 From: Thomas Graf Date: Mon, 18 Sep 2006 00:01:59 -0700 Subject: [GENL]: Provide more information to userspace about registered genl families Additionaly exports the following information when providing the list of registered generic netlink families: - protocol version - header size - maximum number of attributes - list of available operations including - id - flags - avaiability of policy and doit/dumpit function libnl HEAD provides a utility to read this new information: 0x0010 nlctrl version 1 hdrsize 0 maxattr 6 op GETFAMILY (0x03) [POLICY,DOIT,DUMPIT] 0x0011 NLBL_MGMT version 1 hdrsize 0 maxattr 0 op unknown (0x02) [DOIT] op unknown (0x03) [DOIT] .... Signed-off-by: Thomas Graf Signed-off-by: David S. Miller --- include/linux/genetlink.h | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'include/linux') diff --git a/include/linux/genetlink.h b/include/linux/genetlink.h index 84f12a41dc0..9049dc65ae5 100644 --- a/include/linux/genetlink.h +++ b/include/linux/genetlink.h @@ -16,6 +16,8 @@ struct genlmsghdr { #define GENL_HDRLEN NLMSG_ALIGN(sizeof(struct genlmsghdr)) +#define GENL_ADMIN_PERM 0x01 + /* * List of reserved static generic netlink identifiers: */ @@ -43,9 +45,25 @@ enum { CTRL_ATTR_UNSPEC, CTRL_ATTR_FAMILY_ID, CTRL_ATTR_FAMILY_NAME, + CTRL_ATTR_VERSION, + CTRL_ATTR_HDRSIZE, + CTRL_ATTR_MAXATTR, + CTRL_ATTR_OPS, __CTRL_ATTR_MAX, }; #define CTRL_ATTR_MAX (__CTRL_ATTR_MAX - 1) +enum { + CTRL_ATTR_OP_UNSPEC, + CTRL_ATTR_OP_ID, + CTRL_ATTR_OP_FLAGS, + CTRL_ATTR_OP_POLICY, + CTRL_ATTR_OP_DOIT, + CTRL_ATTR_OP_DUMPIT, + __CTRL_ATTR_OP_MAX, +}; + +#define CTRL_ATTR_OP_MAX (__CTRL_ATTR_OP_MAX - 1) + #endif /* __LINUX_GENERIC_NETLINK_H */ -- cgit v1.2.3 From 1bf38a36b6a0e810dafae048fdbb999e587f0f2f Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Wed, 20 Sep 2006 11:57:09 -0700 Subject: [NETFILTER]: remove unused include file Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/netfilter_logging.h | 33 --------------------------------- 1 file changed, 33 deletions(-) delete mode 100644 include/linux/netfilter_logging.h (limited to 'include/linux') diff --git a/include/linux/netfilter_logging.h b/include/linux/netfilter_logging.h deleted file mode 100644 index 562bb6aad4e..00000000000 --- a/include/linux/netfilter_logging.h +++ /dev/null @@ -1,33 +0,0 @@ -/* Internal logging interface, which relies on the real - LOG target modules */ -#ifndef __LINUX_NETFILTER_LOGGING_H -#define __LINUX_NETFILTER_LOGGING_H - -#ifdef __KERNEL__ -#include - -struct nf_logging_t { - void (*nf_log_packet)(struct sk_buff **pskb, - unsigned int hooknum, - const struct net_device *in, - const struct net_device *out, - const char *prefix); - void (*nf_log)(char *pfh, size_t len, - const char *prefix); -}; - -extern void nf_log_register(int pf, const struct nf_logging_t *logging); -extern void nf_log_unregister(int pf, const struct nf_logging_t *logging); - -extern void nf_log_packet(int pf, - struct sk_buff **pskb, - unsigned int hooknum, - const struct net_device *in, - const struct net_device *out, - const char *fmt, ...); -extern void nf_log(int pf, - char *pfh, size_t len, - const char *fmt, ...); -#endif /*__KERNEL__*/ - -#endif /*__LINUX_NETFILTER_LOGGING_H*/ -- cgit v1.2.3 From df0933dcb027e156cb5253570ad694b81bd52b69 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Wed, 20 Sep 2006 11:57:53 -0700 Subject: [NETFILTER]: kill listhelp.h Kill listhelp.h and use the list.h functions instead. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/netfilter/x_tables.h | 4 -- include/linux/netfilter_ipv4/listhelp.h | 123 -------------------------------- 2 files changed, 127 deletions(-) delete mode 100644 include/linux/netfilter_ipv4/listhelp.h (limited to 'include/linux') diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h index 03d1027fb0e..c832295dbf6 100644 --- a/include/linux/netfilter/x_tables.h +++ b/include/linux/netfilter/x_tables.h @@ -138,10 +138,6 @@ struct xt_counters_info #include -#define ASSERT_READ_LOCK(x) -#define ASSERT_WRITE_LOCK(x) -#include - #ifdef CONFIG_COMPAT #define COMPAT_TO_USER 1 #define COMPAT_FROM_USER -1 diff --git a/include/linux/netfilter_ipv4/listhelp.h b/include/linux/netfilter_ipv4/listhelp.h deleted file mode 100644 index 5d92cf044d9..00000000000 --- a/include/linux/netfilter_ipv4/listhelp.h +++ /dev/null @@ -1,123 +0,0 @@ -#ifndef _LISTHELP_H -#define _LISTHELP_H -#include - -/* Header to do more comprehensive job than linux/list.h; assume list - is first entry in structure. */ - -/* Return pointer to first true entry, if any, or NULL. A macro - required to allow inlining of cmpfn. */ -#define LIST_FIND(head, cmpfn, type, args...) \ -({ \ - const struct list_head *__i, *__j = NULL; \ - \ - ASSERT_READ_LOCK(head); \ - list_for_each(__i, (head)) \ - if (cmpfn((const type)__i , ## args)) { \ - __j = __i; \ - break; \ - } \ - (type)__j; \ -}) - -#define LIST_FIND_W(head, cmpfn, type, args...) \ -({ \ - const struct list_head *__i, *__j = NULL; \ - \ - ASSERT_WRITE_LOCK(head); \ - list_for_each(__i, (head)) \ - if (cmpfn((type)__i , ## args)) { \ - __j = __i; \ - break; \ - } \ - (type)__j; \ -}) - -/* Just like LIST_FIND but we search backwards */ -#define LIST_FIND_B(head, cmpfn, type, args...) \ -({ \ - const struct list_head *__i, *__j = NULL; \ - \ - ASSERT_READ_LOCK(head); \ - list_for_each_prev(__i, (head)) \ - if (cmpfn((const type)__i , ## args)) { \ - __j = __i; \ - break; \ - } \ - (type)__j; \ -}) - -static inline int -__list_cmp_same(const void *p1, const void *p2) { return p1 == p2; } - -/* Is this entry in the list? */ -static inline int -list_inlist(struct list_head *head, const void *entry) -{ - return LIST_FIND(head, __list_cmp_same, void *, entry) != NULL; -} - -/* Delete from list. */ -#ifdef CONFIG_NETFILTER_DEBUG -#define LIST_DELETE(head, oldentry) \ -do { \ - ASSERT_WRITE_LOCK(head); \ - if (!list_inlist(head, oldentry)) \ - printk("LIST_DELETE: %s:%u `%s'(%p) not in %s.\n", \ - __FILE__, __LINE__, #oldentry, oldentry, #head); \ - else list_del((struct list_head *)oldentry); \ -} while(0) -#else -#define LIST_DELETE(head, oldentry) list_del((struct list_head *)oldentry) -#endif - -/* Append. */ -static inline void -list_append(struct list_head *head, void *new) -{ - ASSERT_WRITE_LOCK(head); - list_add((new), (head)->prev); -} - -/* Prepend. */ -static inline void -list_prepend(struct list_head *head, void *new) -{ - ASSERT_WRITE_LOCK(head); - list_add(new, head); -} - -/* Insert according to ordering function; insert before first true. */ -#define LIST_INSERT(head, new, cmpfn) \ -do { \ - struct list_head *__i; \ - ASSERT_WRITE_LOCK(head); \ - list_for_each(__i, (head)) \ - if ((new), (typeof (new))__i) \ - break; \ - list_add((struct list_head *)(new), __i->prev); \ -} while(0) - -/* If the field after the list_head is a nul-terminated string, you - can use these functions. */ -static inline int __list_cmp_name(const void *i, const char *name) -{ - return strcmp(name, i+sizeof(struct list_head)) == 0; -} - -/* Returns false if same name already in list, otherwise does insert. */ -static inline int -list_named_insert(struct list_head *head, void *new) -{ - if (LIST_FIND(head, __list_cmp_name, void *, - new + sizeof(struct list_head))) - return 0; - list_prepend(head, new); - return 1; -} - -/* Find this named element in the list. */ -#define list_named_find(head, name) \ -LIST_FIND(head, __list_cmp_name, void *, name) - -#endif /*_LISTHELP_H*/ -- cgit v1.2.3 From 9123de2c043996050bacf77031cad845f5976f5d Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Wed, 20 Sep 2006 11:59:42 -0700 Subject: [NETFILTER]: ip6table_mangle: reroute when nfmark changes in NF_IP6_LOCAL_OUT Now that IPv6 supports policy routing we need to reroute in NF_IP6_LOCAL_OUT when the mark value changes. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/netfilter_ipv6.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/netfilter_ipv6.h b/include/linux/netfilter_ipv6.h index 52a7b9e7642..d97e268cdfe 100644 --- a/include/linux/netfilter_ipv6.h +++ b/include/linux/netfilter_ipv6.h @@ -73,6 +73,7 @@ enum nf_ip6_hook_priorities { }; #ifdef CONFIG_NETFILTER +extern int ip6_route_me_harder(struct sk_buff *skb); extern unsigned int nf_ip6_checksum(struct sk_buff *skb, unsigned int hook, unsigned int dataoff, u_int8_t protocol); -- cgit v1.2.3 From c1fe3ca5106d9568791433fa6c7f27e71ac69e1b Mon Sep 17 00:00:00 2001 From: George Hansper Date: Wed, 20 Sep 2006 12:03:23 -0700 Subject: [NETFILTER]: TCP conntrack: improve dead connection detection Don't count window updates as retransmissions. Signed-off-by: George Hansper Signed-off-by: Patrick McHardy --- include/linux/netfilter/nf_conntrack_tcp.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/netfilter/nf_conntrack_tcp.h b/include/linux/netfilter/nf_conntrack_tcp.h index b2feeffde38..6b01ba29772 100644 --- a/include/linux/netfilter/nf_conntrack_tcp.h +++ b/include/linux/netfilter/nf_conntrack_tcp.h @@ -49,6 +49,7 @@ struct ip_ct_tcp u_int32_t last_seq; /* Last sequence number seen in dir */ u_int32_t last_ack; /* Last sequence number seen in opposite dir */ u_int32_t last_end; /* Last seq + len */ + u_int16_t last_win; /* Last window advertisement seen in dir */ }; #endif /* __KERNEL__ */ -- cgit v1.2.3 From 9fa492cdc160cd27ce1046cb36f47d3b2b1efa21 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Wed, 20 Sep 2006 12:05:37 -0700 Subject: [NETFILTER]: x_tables: simplify compat API Split the xt_compat_match/xt_compat_target into smaller type-safe functions performing just one operation. Handle all alignment and size-related conversions centrally in these function instead of requiring each module to implement a full-blown conversion function. Replace ->compat callback by ->compat_from_user and ->compat_to_user callbacks, responsible for converting just a single private structure. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/netfilter/x_tables.h | 29 ++++++++++++++++++----------- 1 file changed, 18 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h index c832295dbf6..739a98eebe2 100644 --- a/include/linux/netfilter/x_tables.h +++ b/include/linux/netfilter/x_tables.h @@ -138,12 +138,6 @@ struct xt_counters_info #include -#ifdef CONFIG_COMPAT -#define COMPAT_TO_USER 1 -#define COMPAT_FROM_USER -1 -#define COMPAT_CALC_SIZE 0 -#endif - struct xt_match { struct list_head list; @@ -176,7 +170,8 @@ struct xt_match void (*destroy)(const struct xt_match *match, void *matchinfo); /* Called when userspace align differs from kernel space one */ - int (*compat)(void *match, void **dstptr, int *size, int convert); + void (*compat_from_user)(void *dst, void *src); + int (*compat_to_user)(void __user *dst, void *src); /* Set this to THIS_MODULE if you are a module, otherwise NULL */ struct module *me; @@ -186,6 +181,7 @@ struct xt_match char *table; unsigned int matchsize; + unsigned int compatsize; unsigned int hooks; unsigned short proto; @@ -224,13 +220,15 @@ struct xt_target void (*destroy)(const struct xt_target *target, void *targinfo); /* Called when userspace align differs from kernel space one */ - int (*compat)(void *target, void **dstptr, int *size, int convert); + void (*compat_from_user)(void *dst, void *src); + int (*compat_to_user)(void __user *dst, void *src); /* Set this to THIS_MODULE if you are a module, otherwise NULL */ struct module *me; char *table; unsigned int targetsize; + unsigned int compatsize; unsigned int hooks; unsigned short proto; @@ -387,9 +385,18 @@ struct compat_xt_counters_info extern void xt_compat_lock(int af); extern void xt_compat_unlock(int af); -extern int xt_compat_match(void *match, void **dstptr, int *size, int convert); -extern int xt_compat_target(void *target, void **dstptr, int *size, - int convert); + +extern int xt_compat_match_offset(struct xt_match *match); +extern void xt_compat_match_from_user(struct xt_entry_match *m, + void **dstptr, int *size); +extern int xt_compat_match_to_user(struct xt_entry_match *m, + void * __user *dstptr, int *size); + +extern int xt_compat_target_offset(struct xt_target *target); +extern void xt_compat_target_from_user(struct xt_entry_target *t, + void **dstptr, int *size); +extern int xt_compat_target_to_user(struct xt_entry_target *t, + void * __user *dstptr, int *size); #endif /* CONFIG_COMPAT */ #endif /* __KERNEL__ */ -- cgit v1.2.3 From edd5a329cf69c112882e03c8ab55e985062a5d2a Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Wed, 20 Sep 2006 12:07:39 -0700 Subject: [NETFILTER]: PPTP conntrack: fix whitespace errors Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/netfilter_ipv4/ip_conntrack_pptp.h | 26 ++++++++++++------------ 1 file changed, 13 insertions(+), 13 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter_ipv4/ip_conntrack_pptp.h b/include/linux/netfilter_ipv4/ip_conntrack_pptp.h index 816144c75de..88f66d3c876 100644 --- a/include/linux/netfilter_ipv4/ip_conntrack_pptp.h +++ b/include/linux/netfilter_ipv4/ip_conntrack_pptp.h @@ -285,19 +285,19 @@ struct PptpSetLinkInfo { }; union pptp_ctrl_union { - struct PptpStartSessionRequest sreq; - struct PptpStartSessionReply srep; - struct PptpStopSessionRequest streq; - struct PptpStopSessionReply strep; - struct PptpOutCallRequest ocreq; - struct PptpOutCallReply ocack; - struct PptpInCallRequest icreq; - struct PptpInCallReply icack; - struct PptpInCallConnected iccon; - struct PptpClearCallRequest clrreq; - struct PptpCallDisconnectNotify disc; - struct PptpWanErrorNotify wanerr; - struct PptpSetLinkInfo setlink; + struct PptpStartSessionRequest sreq; + struct PptpStartSessionReply srep; + struct PptpStopSessionRequest streq; + struct PptpStopSessionReply strep; + struct PptpOutCallRequest ocreq; + struct PptpOutCallReply ocack; + struct PptpInCallRequest icreq; + struct PptpInCallReply icack; + struct PptpInCallConnected iccon; + struct PptpClearCallRequest clrreq; + struct PptpCallDisconnectNotify disc; + struct PptpWanErrorNotify wanerr; + struct PptpSetLinkInfo setlink; }; extern int -- cgit v1.2.3 From 955b944293dd4c931ec866ebe19a6b2463b8f9a0 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Wed, 20 Sep 2006 12:08:03 -0700 Subject: [NETFILTER]: PPTP conntrack: get rid of unnecessary byte order conversions The conntrack structure contains the call ID in host byte order for no reason, get rid of back and forth conversions. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/netfilter_ipv4/ip_conntrack_pptp.h | 8 ++++---- .../linux/netfilter_ipv4/ip_conntrack_proto_gre.h | 22 +++++++++++----------- include/linux/netfilter_ipv4/ip_nat_pptp.h | 4 ++-- 3 files changed, 17 insertions(+), 17 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter_ipv4/ip_conntrack_pptp.h b/include/linux/netfilter_ipv4/ip_conntrack_pptp.h index 88f66d3c876..0d35623f945 100644 --- a/include/linux/netfilter_ipv4/ip_conntrack_pptp.h +++ b/include/linux/netfilter_ipv4/ip_conntrack_pptp.h @@ -31,8 +31,8 @@ struct ip_ct_pptp_master { /* everything below is going to be per-expectation in newnat, * since there could be more than one call within one session */ enum pptp_ctrlcall_state cstate; /* call state */ - u_int16_t pac_call_id; /* call id of PAC, host byte order */ - u_int16_t pns_call_id; /* call id of PNS, host byte order */ + __be16 pac_call_id; /* call id of PAC, host byte order */ + __be16 pns_call_id; /* call id of PNS, host byte order */ /* in pre-2.6.11 this used to be per-expect. Now it is per-conntrack * and therefore imposes a fixed limit on the number of maps */ @@ -42,8 +42,8 @@ struct ip_ct_pptp_master { /* conntrack_expect private member */ struct ip_ct_pptp_expect { enum pptp_ctrlcall_state cstate; /* call state */ - u_int16_t pac_call_id; /* call id of PAC */ - u_int16_t pns_call_id; /* call id of PNS */ + __be16 pac_call_id; /* call id of PAC */ + __be16 pns_call_id; /* call id of PNS */ }; diff --git a/include/linux/netfilter_ipv4/ip_conntrack_proto_gre.h b/include/linux/netfilter_ipv4/ip_conntrack_proto_gre.h index 8d090ef82f5..1d853aa873e 100644 --- a/include/linux/netfilter_ipv4/ip_conntrack_proto_gre.h +++ b/include/linux/netfilter_ipv4/ip_conntrack_proto_gre.h @@ -49,18 +49,18 @@ struct gre_hdr { #else #error "Adjust your defines" #endif - __u16 protocol; + __be16 protocol; }; /* modified GRE header for PPTP */ struct gre_hdr_pptp { - __u8 flags; /* bitfield */ - __u8 version; /* should be GRE_VERSION_PPTP */ - __u16 protocol; /* should be GRE_PROTOCOL_PPTP */ - __u16 payload_len; /* size of ppp payload, not inc. gre header */ - __u16 call_id; /* peer's call_id for this session */ - __u32 seq; /* sequence number. Present if S==1 */ - __u32 ack; /* seq number of highest packet recieved by */ + __u8 flags; /* bitfield */ + __u8 version; /* should be GRE_VERSION_PPTP */ + __be16 protocol; /* should be GRE_PROTOCOL_PPTP */ + __be16 payload_len; /* size of ppp payload, not inc. gre header */ + __be16 call_id; /* peer's call_id for this session */ + __be32 seq; /* sequence number. Present if S==1 */ + __be32 ack; /* seq number of highest packet recieved by */ /* sender in this session */ }; @@ -92,13 +92,13 @@ void ip_ct_gre_keymap_destroy(struct ip_conntrack *ct); /* get pointer to gre key, if present */ -static inline u_int32_t *gre_key(struct gre_hdr *greh) +static inline __be32 *gre_key(struct gre_hdr *greh) { if (!greh->key) return NULL; if (greh->csum || greh->routing) - return (u_int32_t *) (greh+sizeof(*greh)+4); - return (u_int32_t *) (greh+sizeof(*greh)); + return (__be32 *) (greh+sizeof(*greh)+4); + return (__be32 *) (greh+sizeof(*greh)); } /* get pointer ot gre csum, if present */ diff --git a/include/linux/netfilter_ipv4/ip_nat_pptp.h b/include/linux/netfilter_ipv4/ip_nat_pptp.h index eaf66c2e8f9..36668bf0f37 100644 --- a/include/linux/netfilter_ipv4/ip_nat_pptp.h +++ b/include/linux/netfilter_ipv4/ip_nat_pptp.h @@ -4,8 +4,8 @@ /* conntrack private data */ struct ip_nat_pptp { - u_int16_t pns_call_id; /* NAT'ed PNS call id */ - u_int16_t pac_call_id; /* NAT'ed PAC call id */ + __be16 pns_call_id; /* NAT'ed PNS call id */ + __be16 pac_call_id; /* NAT'ed PAC call id */ }; #endif /* _NAT_PPTP_H */ -- cgit v1.2.3 From 6013c0a13e335674a783215e182c367406294392 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Wed, 20 Sep 2006 12:08:56 -0700 Subject: [NETFILTER]: PPTP conntrack: fix header definitions Fix a few header definitions to match RFC2637. Most importantly the PptpOutCallRequest header included an invalid padding field and a size check was disabled because of this. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/netfilter_ipv4/ip_conntrack_pptp.h | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter_ipv4/ip_conntrack_pptp.h b/include/linux/netfilter_ipv4/ip_conntrack_pptp.h index 0d35623f945..620bf06fabc 100644 --- a/include/linux/netfilter_ipv4/ip_conntrack_pptp.h +++ b/include/linux/netfilter_ipv4/ip_conntrack_pptp.h @@ -107,8 +107,7 @@ struct PptpControlHeader { struct PptpStartSessionRequest { __be16 protocolVersion; - __u8 reserved1; - __u8 reserved2; + __u16 reserved1; __be32 framingCapability; __be32 bearerCapability; __be16 maxChannels; @@ -143,6 +142,8 @@ struct PptpStartSessionReply { struct PptpStopSessionRequest { __u8 reason; + __u8 reserved1; + __u16 reserved2; }; /* PptpStopSessionResultCode */ @@ -152,6 +153,7 @@ struct PptpStopSessionRequest { struct PptpStopSessionReply { __u8 resultCode; __u8 generalErrorCode; + __u16 reserved1; }; struct PptpEchoRequest { @@ -188,9 +190,8 @@ struct PptpOutCallRequest { __be32 framingType; __be16 packetWindow; __be16 packetProcDelay; - __u16 reserved1; __be16 phoneNumberLength; - __u16 reserved2; + __u16 reserved1; __u8 phoneNumber[64]; __u8 subAddress[64]; }; -- cgit v1.2.3 From cf9f81523ef3e95d9f222c896d266e4562999150 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Wed, 20 Sep 2006 12:09:34 -0700 Subject: [NETFILTER]: PPTP conntrack: simplify expectation handling Remove duplicated expectation handling in the NAT helper and simplify the remains in the conntrack helper. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/netfilter_ipv4/ip_conntrack_pptp.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/netfilter_ipv4/ip_conntrack_pptp.h b/include/linux/netfilter_ipv4/ip_conntrack_pptp.h index 620bf06fabc..2644b1faddd 100644 --- a/include/linux/netfilter_ipv4/ip_conntrack_pptp.h +++ b/include/linux/netfilter_ipv4/ip_conntrack_pptp.h @@ -315,7 +315,7 @@ extern int struct PptpControlHeader *ctlh, union pptp_ctrl_union *pptpReq); -extern int +extern void (*ip_nat_pptp_hook_exp_gre)(struct ip_conntrack_expect *exp_orig, struct ip_conntrack_expect *exp_reply); -- cgit v1.2.3 From 4c5de695cf7f71c85ad8cfff509f6475b8bd4d27 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Wed, 20 Sep 2006 12:11:30 -0700 Subject: [NETFILTER]: PPTP conntrack: fix another GRE keymap leak When the master PPTP connection times out while still having unfullfilled expectations (and a GRE keymap entry) associated with it, the keymap entry is not destroyed. Add a destroy callback to struct ip_conntrack_helper and use it to destroy PPTP siblings when the master is destroyed. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/netfilter_ipv4/ip_conntrack_helper.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/netfilter_ipv4/ip_conntrack_helper.h b/include/linux/netfilter_ipv4/ip_conntrack_helper.h index 8d69279ccfe..77fe868d36f 100644 --- a/include/linux/netfilter_ipv4/ip_conntrack_helper.h +++ b/include/linux/netfilter_ipv4/ip_conntrack_helper.h @@ -25,6 +25,8 @@ struct ip_conntrack_helper struct ip_conntrack *ct, enum ip_conntrack_info conntrackinfo); + void (*destroy)(struct ip_conntrack *ct); + int (*to_nfattr)(struct sk_buff *skb, const struct ip_conntrack *ct); }; -- cgit v1.2.3 From fbea49e1e2404baa2d88ab47e2db89e49551b53b Mon Sep 17 00:00:00 2001 From: YOSHIFUJI Hideaki Date: Fri, 22 Sep 2006 14:43:49 -0700 Subject: [IPV6] NDISC: Add proxy_ndp sysctl. We do not always need proxy NDP functionality even we enable forwarding. Signed-off-by: YOSHIFUJI Hideaki Signed-off-by: David S. Miller --- include/linux/ipv6.h | 2 ++ include/linux/sysctl.h | 1 + 2 files changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index 1d6d3ccc941..caca57df0d7 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -176,6 +176,7 @@ struct ipv6_devconf { __s32 accept_ra_rt_info_max_plen; #endif #endif + __s32 proxy_ndp; void *sysctl; }; @@ -203,6 +204,7 @@ enum { DEVCONF_ACCEPT_RA_RTR_PREF, DEVCONF_RTR_PROBE_INTERVAL, DEVCONF_ACCEPT_RA_RT_INFO_MAX_PLEN, + DEVCONF_PROXY_NDP, DEVCONF_MAX }; diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h index af61d923540..736ed917a4f 100644 --- a/include/linux/sysctl.h +++ b/include/linux/sysctl.h @@ -556,6 +556,7 @@ enum { NET_IPV6_ACCEPT_RA_RTR_PREF=20, NET_IPV6_RTR_PROBE_INTERVAL=21, NET_IPV6_ACCEPT_RA_RT_INFO_MAX_PLEN=22, + NET_IPV6_PROXY_NDP=23, __NET_IPV6_MAX }; -- cgit v1.2.3 From 55ebaef1d5db9c1c76ba01a87fd986db5dee550d Mon Sep 17 00:00:00 2001 From: Noriaki TAKAMIYA Date: Fri, 22 Sep 2006 14:45:27 -0700 Subject: [IPV6] ADDRCONF: Allow non-DAD'able addresses. IFA_F_NODAD flag, similar to IN6_IFF_NODAD in BSDs, is introduced to skip DAD. This flag should be set to Mobile IPv6 Home Address(es) on Mobile Node because DAD would fail if we should perform DAD; our Home Agent protects our Home Address(es). Signed-off-by: Noriaki TAKAMIYA Signed-off-by: YOSHIFUJI Hideaki Signed-off-by: David S. Miller --- include/linux/if_addr.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/if_addr.h b/include/linux/if_addr.h index e1590454db5..ca24b9de13f 100644 --- a/include/linux/if_addr.h +++ b/include/linux/if_addr.h @@ -38,6 +38,7 @@ enum #define IFA_F_SECONDARY 0x01 #define IFA_F_TEMPORARY IFA_F_SECONDARY +#define IFA_F_NODAD 0x02 #define IFA_F_DEPRECATED 0x20 #define IFA_F_TENTATIVE 0x40 #define IFA_F_PERMANENT 0x80 -- cgit v1.2.3 From 3b9f9a1c3903b64c38505f9fed3bb11e48dbc931 Mon Sep 17 00:00:00 2001 From: Noriaki TAKAMIYA Date: Fri, 22 Sep 2006 14:45:56 -0700 Subject: [IPV6] ADDRCONF: Mobile IPv6 Home Address support. IFA_F_HOMEADDRESS is introduced for Mobile IPv6 Home Addresses on Mobile Node. The IFA_F_HOMEADDRESS flag should be set for Mobile IPv6 Home Addresses for 2 purposes. 1) We need to check this on receipt of Type 2 Routing Header (RFC3775 Secion 6.4), 2) We prefer Home Address(es) in source address selection (RFC3484 Section 5 Rule 4). Signed-off-by: Noriaki TAKAMIYA Signed-off-by: YOSHIFUJI Hideaki Signed-off-by: David S. Miller --- include/linux/if_addr.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/if_addr.h b/include/linux/if_addr.h index ca24b9de13f..dbe8f6120a4 100644 --- a/include/linux/if_addr.h +++ b/include/linux/if_addr.h @@ -39,6 +39,7 @@ enum #define IFA_F_TEMPORARY IFA_F_SECONDARY #define IFA_F_NODAD 0x02 +#define IFA_F_HOMEADDRESS 0x10 #define IFA_F_DEPRECATED 0x20 #define IFA_F_TENTATIVE 0x40 #define IFA_F_PERMANENT 0x80 -- cgit v1.2.3 From 1c3c07e9f6cc50dab2aeb8051325e317d4f6c70e Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 25 Jul 2006 11:28:18 -0400 Subject: NFS: Add a new ACCESS rpc call cache to the linux nfs client The current access cache only allows one entry at a time to be cached for each inode. Add a per-inode red-black tree in order to allow more than one to be cached at a time. Should significantly cut down the time spent in path traversal for shared directories such as ${PATH}, /usr/share, etc. Signed-off-by: Trond Myklebust --- include/linux/nfs_fs.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 6c2066caeaa..cc013ed2e52 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -42,6 +42,7 @@ #include #include #include +#include #include #include @@ -69,6 +70,7 @@ * NFSv3/v4 Access mode cache entry */ struct nfs_access_entry { + struct rb_node rb_node; unsigned long jiffies; struct rpc_cred * cred; int mask; @@ -145,7 +147,7 @@ struct nfs_inode { */ atomic_t data_updates; - struct nfs_access_entry cache_access; + struct rb_root access_cache; #ifdef CONFIG_NFS_V3_ACL struct posix_acl *acl_access; struct posix_acl *acl_default; @@ -297,6 +299,7 @@ extern int nfs_getattr(struct vfsmount *, struct dentry *, struct kstat *); extern int nfs_permission(struct inode *, int, struct nameidata *); extern int nfs_access_get_cached(struct inode *, struct rpc_cred *, struct nfs_access_entry *); extern void nfs_access_add_cache(struct inode *, struct nfs_access_entry *); +extern void nfs_access_zap_cache(struct inode *inode); extern int nfs_open(struct inode *, struct file *); extern int nfs_release(struct inode *, struct file *); extern int nfs_attribute_timeout(struct inode *inode); -- cgit v1.2.3 From cfcea3e8c66c2dcde98d5c2693d4bff50b5cac97 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 25 Jul 2006 11:28:18 -0400 Subject: NFS: Add a global LRU list for the ACCESS cache ...in order to allow the addition of a memory shrinker. Signed-off-by: Trond Myklebust --- include/linux/nfs_fs.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index cc013ed2e52..a36e01cd632 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -71,6 +71,7 @@ */ struct nfs_access_entry { struct rb_node rb_node; + struct list_head lru; unsigned long jiffies; struct rpc_cred * cred; int mask; @@ -148,6 +149,8 @@ struct nfs_inode { atomic_t data_updates; struct rb_root access_cache; + struct list_head access_cache_entry_lru; + struct list_head access_cache_inode_lru; #ifdef CONFIG_NFS_V3_ACL struct posix_acl *acl_access; struct posix_acl *acl_default; @@ -201,6 +204,7 @@ struct nfs_inode { #define NFS_INO_REVALIDATING (0) /* revalidating attrs */ #define NFS_INO_ADVISE_RDPLUS (1) /* advise readdirplus */ #define NFS_INO_STALE (2) /* possible stale inode */ +#define NFS_INO_ACL_LRU_SET (3) /* Inode is on the LRU list */ static inline struct nfs_inode *NFS_I(struct inode *inode) { -- cgit v1.2.3 From 770bfad846ab6628444428467b11fa6773ae9ea1 Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 22 Aug 2006 20:06:07 -0400 Subject: NFS: Add dentry materialisation op The attached patch adds a new directory cache management function that prepares a disconnected anonymous function to be connected into the dentry tree. The anonymous dentry is transferred the name and parentage from another dentry. The following changes were made in [try #2]: (*) d_materialise_dentry() now switches the parentage of the two nodes around correctly when one or other of them is self-referential. The following changes were made in [try #7]: (*) d_instantiate_unique() has had the interior part split out as function __d_instantiate_unique(). Callers of this latter function must be holding the appropriate locks. (*) _d_rehash() has been added as a wrapper around __d_rehash() to call it with the most obvious hash list (the one from the name). d_rehash() now calls _d_rehash(). (*) d_materialise_dentry() is now __d_materialise_dentry() and is static. (*) d_materialise_unique() added to perform the combination of d_find_alias(), d_materialise_dentry() and d_add_unique() that the NFS client was doing twice, all within a single dcache_lock critical section. This reduces the number of times two different spinlocks were being accessed. The following further changes were made: (*) Add the dentries onto their parents d_subdirs lists. Signed-Off-By: David Howells Signed-off-by: Trond Myklebust --- include/linux/dcache.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/dcache.h b/include/linux/dcache.h index 471781ffeab..44605be5940 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -221,6 +221,7 @@ static inline int dname_external(struct dentry *dentry) */ extern void d_instantiate(struct dentry *, struct inode *); extern struct dentry * d_instantiate_unique(struct dentry *, struct inode *); +extern struct dentry * d_materialise_unique(struct dentry *, struct inode *); extern void d_delete(struct dentry *); /* allocate/de-allocate */ -- cgit v1.2.3 From adfa6f980bd46974e6b32b22dd0c45e3f52063f4 Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 22 Aug 2006 20:06:08 -0400 Subject: NFS: Rename struct nfs4_client to struct nfs_client Rename struct nfs4_client to struct nfs_client so that it can become the basis for a general client record for NFS2 and NFS3 in addition to NFS4. Signed-Off-By: David Howells Signed-off-by: Trond Myklebust --- include/linux/nfs_fs_sb.h | 2 +- include/linux/nfs_idmap.h | 14 +++++++------- 2 files changed, 8 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index 6b4a13c7947..4db90df2aed 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -43,7 +43,7 @@ struct nfs_server { */ char ip_addr[16]; char * mnt_path; - struct nfs4_client * nfs4_state; /* all NFSv4 state starts here */ + struct nfs_client * nfs4_state; /* all NFSv4 state starts here */ struct list_head nfs4_siblings; /* List of other nfs_server structs * that share the same clientid */ diff --git a/include/linux/nfs_idmap.h b/include/linux/nfs_idmap.h index 102e5609429..678fe68982e 100644 --- a/include/linux/nfs_idmap.h +++ b/include/linux/nfs_idmap.h @@ -62,15 +62,15 @@ struct idmap_msg { #ifdef __KERNEL__ /* Forward declaration to make this header independent of others */ -struct nfs4_client; +struct nfs_client; -void nfs_idmap_new(struct nfs4_client *); -void nfs_idmap_delete(struct nfs4_client *); +void nfs_idmap_new(struct nfs_client *); +void nfs_idmap_delete(struct nfs_client *); -int nfs_map_name_to_uid(struct nfs4_client *, const char *, size_t, __u32 *); -int nfs_map_group_to_gid(struct nfs4_client *, const char *, size_t, __u32 *); -int nfs_map_uid_to_name(struct nfs4_client *, __u32, char *); -int nfs_map_gid_to_group(struct nfs4_client *, __u32, char *); +int nfs_map_name_to_uid(struct nfs_client *, const char *, size_t, __u32 *); +int nfs_map_group_to_gid(struct nfs_client *, const char *, size_t, __u32 *); +int nfs_map_uid_to_name(struct nfs_client *, __u32, char *); +int nfs_map_gid_to_group(struct nfs_client *, __u32, char *); extern unsigned int nfs_idmap_cache_timeout; #endif /* __KERNEL__ */ -- cgit v1.2.3 From 7539bbab8062aadc1db95a22b377146843cfa88f Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 22 Aug 2006 20:06:09 -0400 Subject: NFS: Rename nfs_server::nfs4_state Rename nfs_server::nfs4_state to nfs_client as it will be used to represent the client state for NFS2 and NFS3 also. Signed-Off-By: David Howells Signed-off-by: Trond Myklebust --- include/linux/nfs_fs_sb.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index 4db90df2aed..fc20d6b934f 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -43,7 +43,7 @@ struct nfs_server { */ char ip_addr[16]; char * mnt_path; - struct nfs_client * nfs4_state; /* all NFSv4 state starts here */ + struct nfs_client * nfs_client; /* all NFSv4 state starts here */ struct list_head nfs4_siblings; /* List of other nfs_server structs * that share the same clientid */ -- cgit v1.2.3 From b7162792b5c0e0f6e91b8997f8e6bbc76ec5420a Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 22 Aug 2006 20:06:09 -0400 Subject: NFS: Return an error when starting the idmapping pipe Return an error when starting the idmapping pipe so that we can detect it failing. Signed-Off-By: David Howells Signed-off-by: Trond Myklebust --- include/linux/nfs_idmap.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/nfs_idmap.h b/include/linux/nfs_idmap.h index 678fe68982e..15a9f3b7289 100644 --- a/include/linux/nfs_idmap.h +++ b/include/linux/nfs_idmap.h @@ -64,7 +64,7 @@ struct idmap_msg { /* Forward declaration to make this header independent of others */ struct nfs_client; -void nfs_idmap_new(struct nfs_client *); +int nfs_idmap_new(struct nfs_client *); void nfs_idmap_delete(struct nfs_client *); int nfs_map_name_to_uid(struct nfs_client *, const char *, size_t, __u32 *); -- cgit v1.2.3 From 2b3de4411b3ccaeb00018c99d1bbe7203554cf7f Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 22 Aug 2006 20:06:09 -0400 Subject: NFS: Add a lookupfh NFS RPC op Add a lookup filehandle NFS RPC op so that a file handle can be looked up without requiring dentries and inodes and other VFS stuff when doing an NFS4 pathwalk during mounting. Signed-Off-By: David Howells Signed-off-by: Trond Myklebust --- include/linux/nfs_xdr.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 41e5a19199e..26879771831 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -770,6 +770,9 @@ struct nfs_rpc_ops { int (*getroot) (struct nfs_server *, struct nfs_fh *, struct nfs_fsinfo *); + int (*lookupfh)(struct nfs_server *, struct nfs_fh *, + struct qstr *, struct nfs_fh *, + struct nfs_fattr *); int (*getattr) (struct nfs_server *, struct nfs_fh *, struct nfs_fattr *); int (*setattr) (struct dentry *, struct nfs_fattr *, -- cgit v1.2.3 From e9326dcab413848e70ab746c7c5363da13e5f801 Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 22 Aug 2006 20:06:10 -0400 Subject: NFS: Add a server capabilities NFS RPC op Add a set_capabilities NFS RPC op so that the server capabilities can be set. Signed-Off-By: David Howells Signed-off-by: Trond Myklebust --- include/linux/nfs_xdr.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 26879771831..dd9ae6761f7 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -809,6 +809,7 @@ struct nfs_rpc_ops { struct nfs_fsinfo *); int (*pathconf) (struct nfs_server *, struct nfs_fh *, struct nfs_pathconf *); + int (*set_capabilities)(struct nfs_server *, struct nfs_fh *); u32 * (*decode_dirent)(u32 *, struct nfs_entry *, int plus); void (*read_setup) (struct nfs_read_data *); int (*read_done) (struct rpc_task *, struct nfs_read_data *); -- cgit v1.2.3 From 24c8dbbb5f777187d660393599641ab3307b4b97 Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 22 Aug 2006 20:06:10 -0400 Subject: NFS: Generalise the nfs_client structure Generalise the nfs_client structure by: (1) Moving nfs_client to a more general place (nfs_fs_sb.h). (2) Renaming its maintenance routines to be non-NFS4 specific. (3) Move those maintenance routines to a new non-NFS4 specific file (client.c) and move the declarations to internal.h. (4) Make nfs_find/get_client() take a full sockaddr_in to include the port number (will be required for NFS2/3). (5) Make nfs_find/get_client() take the NFS protocol version (again will be required to differentiate NFS2, 3 & 4 client records). Also: (6) Make nfs_client construction proceed akin to inodes, marking them as under construction and providing a function to indicate completion. (7) Make nfs_get_client() wait interruptibly if it finds a client that it can share, but that client is currently being constructed. (8) Make nfs4_create_client() use (6) and (7) instead of locking cl_sem. Signed-Off-By: David Howells Signed-off-by: Trond Myklebust --- include/linux/nfs_fs.h | 1 + include/linux/nfs_fs_sb.h | 60 +++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 61 insertions(+) (limited to 'include/linux') diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index a36e01cd632..70e1dc9162e 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -586,6 +586,7 @@ extern void * nfs_root_data(void); #define NFSDBG_FILE 0x0040 #define NFSDBG_ROOT 0x0080 #define NFSDBG_CALLBACK 0x0100 +#define NFSDBG_CLIENT 0x0200 #define NFSDBG_ALL 0xFFFF #ifdef __KERNEL__ diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index fc20d6b934f..a727657e0ad 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -6,6 +6,66 @@ struct nfs_iostats; +/* + * The nfs_client identifies our client state to the server. + */ +struct nfs_client { + atomic_t cl_count; + int cl_cons_state; /* current construction state (-ve: init error) */ +#define NFS_CS_READY 0 /* ready to be used */ +#define NFS_CS_INITING 1 /* busy initialising */ + int cl_nfsversion; /* NFS protocol version */ + unsigned long cl_res_state; /* NFS resources state */ +#define NFS_CS_RPCIOD 0 /* - rpciod started */ +#define NFS_CS_CALLBACK 1 /* - callback started */ +#define NFS_CS_IDMAP 2 /* - idmap started */ + struct sockaddr_in cl_addr; /* server identifier */ + char * cl_hostname; /* hostname of server */ + struct list_head cl_share_link; /* link in global client list */ + struct list_head cl_superblocks; /* List of nfs_server structs */ + + struct rpc_clnt * cl_rpcclient; + +#ifdef CONFIG_NFS_V4 + u64 cl_clientid; /* constant */ + nfs4_verifier cl_confirm; + unsigned long cl_state; + + u32 cl_lockowner_id; + + /* + * The following rwsem ensures exclusive access to the server + * while we recover the state following a lease expiration. + */ + struct rw_semaphore cl_sem; + + struct list_head cl_delegations; + struct list_head cl_state_owners; + struct list_head cl_unused; + int cl_nunused; + spinlock_t cl_lock; + + unsigned long cl_lease_time; + unsigned long cl_last_renewal; + struct work_struct cl_renewd; + struct work_struct cl_recoverd; + + struct rpc_wait_queue cl_rpcwaitq; + + /* used for the setclientid verifier */ + struct timespec cl_boot_time; + + /* idmapper */ + struct idmap * cl_idmap; + + /* Our own IP address, as a null-terminated string. + * This is used to generate the clientid, and the callback address. + */ + char cl_ipaddr[16]; + unsigned char cl_id_uniquifier; +#endif +}; + /* * NFS client parameters stored in the superblock. */ -- cgit v1.2.3 From 509de8111656a7d89b4a1a5f430f4460ce510f0f Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 22 Aug 2006 20:06:11 -0400 Subject: NFS: Add extra const qualifiers Add some extra const qualifiers into NFS. Signed-Off-By: David Howells Signed-off-by: Trond Myklebust --- include/linux/nfs_fs_sb.h | 2 +- include/linux/nfs_xdr.h | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index a727657e0ad..95f32d5f6e9 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -73,7 +73,7 @@ struct nfs_server { struct rpc_clnt * client; /* RPC client handle */ struct rpc_clnt * client_sys; /* 2nd handle for FSINFO */ struct rpc_clnt * client_acl; /* ACL RPC client handle */ - struct nfs_rpc_ops * rpc_ops; /* NFS protocol vector */ + const struct nfs_rpc_ops *rpc_ops; /* NFS protocol vector */ struct nfs_iostats * io_stats; /* I/O statistics */ struct backing_dev_info backing_dev_info; int flags; /* various flags */ diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index dd9ae6761f7..2426b11b6cc 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -833,9 +833,9 @@ struct nfs_rpc_ops { /* * Function vectors etc. for the NFS client */ -extern struct nfs_rpc_ops nfs_v2_clientops; -extern struct nfs_rpc_ops nfs_v3_clientops; -extern struct nfs_rpc_ops nfs_v4_clientops; +extern const struct nfs_rpc_ops nfs_v2_clientops; +extern const struct nfs_rpc_ops nfs_v3_clientops; +extern const struct nfs_rpc_ops nfs_v4_clientops; extern struct rpc_version nfs_version2; extern struct rpc_version nfs_version3; extern struct rpc_version nfs_version4; -- cgit v1.2.3 From 27951bd26031f6c27d38df9e94623bbe208a2464 Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 22 Aug 2006 20:06:11 -0400 Subject: NFS: Maintain a common server record for NFS2/3 as well as for NFS4 Maintain a common server record for NFS2/3 as well as for NFS4 so that common stuff can be moved there from struct nfs_server. Signed-Off-By: David Howells Signed-off-by: Trond Myklebust --- include/linux/nfs_fs_sb.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index 95f32d5f6e9..e7d7662f51f 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -70,6 +70,7 @@ struct nfs_client { * NFS client parameters stored in the superblock. */ struct nfs_server { + struct nfs_client * nfs_client; /* shared client and NFS4 state */ struct rpc_clnt * client; /* RPC client handle */ struct rpc_clnt * client_sys; /* 2nd handle for FSINFO */ struct rpc_clnt * client_acl; /* ACL RPC client handle */ @@ -103,7 +104,6 @@ struct nfs_server { */ char ip_addr[16]; char * mnt_path; - struct nfs_client * nfs_client; /* all NFSv4 state starts here */ struct list_head nfs4_siblings; /* List of other nfs_server structs * that share the same clientid */ -- cgit v1.2.3 From 8fa5c000d7f986ef9cdc6d95f9f7fcee20e0a7d6 Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 22 Aug 2006 20:06:12 -0400 Subject: NFS: Move rpc_ops from nfs_server to nfs_client Move the rpc_ops from the nfs_server struct to the nfs_client struct as they're common to all server records of a particular NFS protocol version. Signed-Off-By: David Howells Signed-off-by: Trond Myklebust --- include/linux/nfs_fs.h | 2 +- include/linux/nfs_fs_sb.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 70e1dc9162e..51e9bd90ded 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -215,7 +215,7 @@ static inline struct nfs_inode *NFS_I(struct inode *inode) #define NFS_FH(inode) (&NFS_I(inode)->fh) #define NFS_SERVER(inode) (NFS_SB(inode->i_sb)) #define NFS_CLIENT(inode) (NFS_SERVER(inode)->client) -#define NFS_PROTO(inode) (NFS_SERVER(inode)->rpc_ops) +#define NFS_PROTO(inode) (NFS_SERVER(inode)->nfs_client->rpc_ops) #define NFS_ADDR(inode) (RPC_PEERADDR(NFS_CLIENT(inode))) #define NFS_COOKIEVERF(inode) (NFS_I(inode)->cookieverf) #define NFS_READTIME(inode) (NFS_I(inode)->read_cache_jiffies) diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index e7d7662f51f..aae7c117597 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -25,6 +25,7 @@ struct nfs_client { struct list_head cl_superblocks; /* List of nfs_server structs */ struct rpc_clnt * cl_rpcclient; + const struct nfs_rpc_ops *rpc_ops; /* NFS protocol vector */ #ifdef CONFIG_NFS_V4 u64 cl_clientid; /* constant */ @@ -74,7 +75,6 @@ struct nfs_server { struct rpc_clnt * client; /* RPC client handle */ struct rpc_clnt * client_sys; /* 2nd handle for FSINFO */ struct rpc_clnt * client_acl; /* ACL RPC client handle */ - const struct nfs_rpc_ops *rpc_ops; /* NFS protocol vector */ struct nfs_iostats * io_stats; /* I/O statistics */ struct backing_dev_info backing_dev_info; int flags; /* various flags */ -- cgit v1.2.3 From 5006a76cca8f86c6975c16fcf67e83b8b0eee2b6 Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 22 Aug 2006 20:06:12 -0400 Subject: NFS: Eliminate client_sys in favour of cl_rpcclient Eliminate nfs_server::client_sys in favour of nfs_client::cl_rpcclient as we only really need one per server that we're talking to since it doesn't have any security on it. The retransmission management variables are also moved to the common struct as they're required to set up the cl_rpcclient connection. The NFS2/3 client and client_acl connections are thenceforth derived by cloning the cl_rpcclient connection and post-applying the authorisation flavour. The code for setting up the initial common connection has been moved to client.c as nfs_create_rpc_client(). All the NFS program definition tables are also moved there as that's where they're now required rather than super.c. Signed-Off-By: David Howells Signed-off-by: Trond Myklebust --- include/linux/nfs_fs_sb.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index aae7c117597..d404ceca916 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -26,6 +26,8 @@ struct nfs_client { struct rpc_clnt * cl_rpcclient; const struct nfs_rpc_ops *rpc_ops; /* NFS protocol vector */ + unsigned long retrans_timeo; /* retransmit timeout */ + unsigned int retrans_count; /* number of retransmit tries */ #ifdef CONFIG_NFS_V4 u64 cl_clientid; /* constant */ @@ -73,7 +75,6 @@ struct nfs_client { struct nfs_server { struct nfs_client * nfs_client; /* shared client and NFS4 state */ struct rpc_clnt * client; /* RPC client handle */ - struct rpc_clnt * client_sys; /* 2nd handle for FSINFO */ struct rpc_clnt * client_acl; /* ACL RPC client handle */ struct nfs_iostats * io_stats; /* I/O statistics */ struct backing_dev_info backing_dev_info; @@ -90,8 +91,6 @@ struct nfs_server { unsigned int acregmax; unsigned int acdirmin; unsigned int acdirmax; - unsigned long retrans_timeo; /* retransmit timeout */ - unsigned int retrans_count; /* number of retransmit tries */ unsigned int namelen; char * hostname; /* remote hostname */ struct nfs_fh fh; -- cgit v1.2.3 From 54ceac4515986030c2502960be620198dd8fe25b Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 22 Aug 2006 20:06:13 -0400 Subject: NFS: Share NFS superblocks per-protocol per-server per-FSID The attached patch makes NFS share superblocks between mounts from the same server and FSID over the same protocol. It does this by creating each superblock with a false root and returning the real root dentry in the vfsmount presented by get_sb(). The root dentry set starts off as an anonymous dentry if we don't already have the dentry for its inode, otherwise it simply returns the dentry we already have. We may thus end up with several trees of dentries in the superblock, and if at some later point one of anonymous tree roots is discovered by normal filesystem activity to be located in another tree within the superblock, the anonymous root is named and materialises attached to the second tree at the appropriate point. Why do it this way? Why not pass an extra argument to the mount() syscall to indicate the subpath and then pathwalk from the server root to the desired directory? You can't guarantee this will work for two reasons: (1) The root and intervening nodes may not be accessible to the client. With NFS2 and NFS3, for instance, mountd is called on the server to get the filehandle for the tip of a path. mountd won't give us handles for anything we don't have permission to access, and so we can't set up NFS inodes for such nodes, and so can't easily set up dentries (we'd have to have ghost inodes or something). With this patch we don't actually create dentries until we get handles from the server that we can use to set up their inodes, and we don't actually bind them into the tree until we know for sure where they go. (2) Inaccessible symbolic links. If we're asked to mount two exports from the server, eg: mount warthog:/warthog/aaa/xxx /mmm mount warthog:/warthog/bbb/yyy /nnn We may not be able to access anything nearer the root than xxx and yyy, but we may find out later that /mmm/www/yyy, say, is actually the same directory as the one mounted on /nnn. What we might then find out, for example, is that /warthog/bbb was actually a symbolic link to /warthog/aaa/xxx/www, but we can't actually determine that by talking to the server until /warthog is made available by NFS. This would lead to having constructed an errneous dentry tree which we can't easily fix. We can end up with a dentry marked as a directory when it should actually be a symlink, or we could end up with an apparently hardlinked directory. With this patch we need not make assumptions about the type of a dentry for which we can't retrieve information, nor need we assume we know its place in the grand scheme of things until we actually see that place. This patch reduces the possibility of aliasing in the inode and page caches for inodes that may be accessed by more than one NFS export. It also reduces the number of superblocks required for NFS where there are many NFS exports being used from a server (home directory server + autofs for example). This in turn makes it simpler to do local caching of network filesystems, as it can then be guaranteed that there won't be links from multiple inodes in separate superblocks to the same cache file. Obviously, cache aliasing between different levels of NFS protocol could still be a problem, but at least that gives us another key to use when indexing the cache. This patch makes the following changes: (1) The server record construction/destruction has been abstracted out into its own set of functions to make things easier to get right. These have been moved into fs/nfs/client.c. All the code in fs/nfs/client.c has to do with the management of connections to servers, and doesn't touch superblocks in any way; the remaining code in fs/nfs/super.c has to do with VFS superblock management. (2) The sequence of events undertaken by NFS mount is now reordered: (a) A volume representation (struct nfs_server) is allocated. (b) A server representation (struct nfs_client) is acquired. This may be allocated or shared, and is keyed on server address, port and NFS version. (c) If allocated, the client representation is initialised. The state member variable of nfs_client is used to prevent a race during initialisation from two mounts. (d) For NFS4 a simple pathwalk is performed, walking from FH to FH to find the root filehandle for the mount (fs/nfs/getroot.c). For NFS2/3 we are given the root FH in advance. (e) The volume FSID is probed for on the root FH. (f) The volume representation is initialised from the FSINFO record retrieved on the root FH. (g) sget() is called to acquire a superblock. This may be allocated or shared, keyed on client pointer and FSID. (h) If allocated, the superblock is initialised. (i) If the superblock is shared, then the new nfs_server record is discarded. (j) The root dentry for this mount is looked up from the root FH. (k) The root dentry for this mount is assigned to the vfsmount. (3) nfs_readdir_lookup() creates dentries for each of the entries readdir() returns; this function now attaches disconnected trees from alternate roots that happen to be discovered attached to a directory being read (in the same way nfs_lookup() is made to do for lookup ops). The new d_materialise_unique() function is now used to do this, thus permitting the whole thing to be done under one set of locks, and thus avoiding any race between mount and lookup operations on the same directory. (4) The client management code uses a new debug facility: NFSDBG_CLIENT which is set by echoing 1024 to /proc/net/sunrpc/nfs_debug. (5) Clone mounts are now called xdev mounts. (6) Use the dentry passed to the statfs() op as the handle for retrieving fs statistics rather than the root dentry of the superblock (which is now a dummy). Signed-Off-By: David Howells Signed-off-by: Trond Myklebust --- include/linux/nfs_fs_sb.h | 21 +++++++++------------ 1 file changed, 9 insertions(+), 12 deletions(-) (limited to 'include/linux') diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index d404ceca916..6d0be0efd1b 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -51,7 +51,6 @@ struct nfs_client { unsigned long cl_lease_time; unsigned long cl_last_renewal; struct work_struct cl_renewd; - struct work_struct cl_recoverd; struct rpc_wait_queue cl_rpcwaitq; @@ -74,6 +73,10 @@ struct nfs_client { */ struct nfs_server { struct nfs_client * nfs_client; /* shared client and NFS4 state */ + struct list_head client_link; /* List of other nfs_server structs + * that share the same client + */ + struct list_head master_link; /* link in master servers list */ struct rpc_clnt * client; /* RPC client handle */ struct rpc_clnt * client_acl; /* ACL RPC client handle */ struct nfs_iostats * io_stats; /* I/O statistics */ @@ -92,20 +95,13 @@ struct nfs_server { unsigned int acdirmin; unsigned int acdirmax; unsigned int namelen; - char * hostname; /* remote hostname */ - struct nfs_fh fh; - struct sockaddr_in addr; + struct nfs_fsid fsid; + __u64 maxfilesize; /* maximum file size */ unsigned long mount_time; /* when this fs was mounted */ + dev_t s_dev; /* superblock dev numbers */ + #ifdef CONFIG_NFS_V4 - /* Our own IP address, as a null-terminated string. - * This is used to generate the clientid, and the callback address. - */ - char ip_addr[16]; - char * mnt_path; - struct list_head nfs4_siblings; /* List of other nfs_server structs - * that share the same clientid - */ u32 attr_bitmask[2];/* V4 bitmask representing the set of attributes supported on this filesystem */ @@ -113,6 +109,7 @@ struct nfs_server { that are supported on this filesystem */ #endif + void (*destroy)(struct nfs_server *); }; /* Server capabilities */ -- cgit v1.2.3 From ec739ef03dc926d05051c8c5838971445504470a Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 22 Aug 2006 20:06:15 -0400 Subject: SUNRPC: Create a helper to tell whether a transport is bound Hide the contents and format of xprt->addr by eliminating direct uses of the xprt->addr.sin_port field. This change is required to support alternate RPC host address formats (eg IPv6). Test-plan: Destructive testing (unplugging the network temporarily). Repeated runs of Connectathon locking suite with UDP and TCP. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- include/linux/sunrpc/xprt.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index 3a0cca255b7..a71106723d7 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -269,6 +269,7 @@ int xs_setup_tcp(struct rpc_xprt *xprt, struct rpc_timeout *to); #define XPRT_CONNECTED (1) #define XPRT_CONNECTING (2) #define XPRT_CLOSE_WAIT (3) +#define XPRT_BOUND (4) static inline void xprt_set_connected(struct rpc_xprt *xprt) { @@ -312,6 +313,21 @@ static inline int xprt_test_and_set_connecting(struct rpc_xprt *xprt) return test_and_set_bit(XPRT_CONNECTING, &xprt->state); } +static inline void xprt_set_bound(struct rpc_xprt *xprt) +{ + test_and_set_bit(XPRT_BOUND, &xprt->state); +} + +static inline int xprt_bound(struct rpc_xprt *xprt) +{ + return test_bit(XPRT_BOUND, &xprt->state); +} + +static inline void xprt_clear_bound(struct rpc_xprt *xprt) +{ + clear_bit(XPRT_BOUND, &xprt->state); +} + #endif /* __KERNEL__*/ #endif /* _LINUX_SUNRPC_XPRT_H */ -- cgit v1.2.3 From 4a68179d38874c37be2802442a71b847f5d1a2a9 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 22 Aug 2006 20:06:15 -0400 Subject: SUNRPC: Make RPC portmapper use per-transport storage Move connection and bind state that was maintained in the rpc_clnt structure to the rpc_xprt structure. This will allow the creation of a clean API for plugging in different types of bind mechanisms. This brings improvements such as the elimination of a single spin lock to control serialization for all in-kernel RPC binding. A set of per-xprt bitops is used to serialize tasks during RPC binding, just like it now works for making RPC transport connections. Test-plan: Destructive testing (unplugging the network temporarily). Connectathon with UDP and TCP. NFSv2/3 and NFSv4 mounting should be carefully checked. Probably need to rig a server where certain services aren't running, or that returns an error for some typical operation. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- include/linux/sunrpc/clnt.h | 23 +++-------------------- include/linux/sunrpc/xprt.h | 14 ++++++++++++++ 2 files changed, 17 insertions(+), 20 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h index 8fe9f35eba3..00e9dbaec9c 100644 --- a/include/linux/sunrpc/clnt.h +++ b/include/linux/sunrpc/clnt.h @@ -18,18 +18,6 @@ #include #include -/* - * This defines an RPC port mapping - */ -struct rpc_portmap { - __u32 pm_prog; - __u32 pm_vers; - __u32 pm_prot; - __u16 pm_port; - unsigned char pm_binding : 1; /* doing a getport() */ - struct rpc_wait_queue pm_bindwait; /* waiting on getport() */ -}; - struct rpc_inode; /* @@ -40,7 +28,9 @@ struct rpc_clnt { atomic_t cl_users; /* number of references */ struct rpc_xprt * cl_xprt; /* transport */ struct rpc_procinfo * cl_procinfo; /* procedure info */ - u32 cl_maxproc; /* max procedure number */ + u32 cl_prog, /* RPC program number */ + cl_vers, /* RPC version number */ + cl_maxproc; /* max procedure number */ char * cl_server; /* server machine name */ char * cl_protname; /* protocol name */ @@ -55,7 +45,6 @@ struct rpc_clnt { cl_dead : 1;/* abandoned */ struct rpc_rtt * cl_rtt; /* RTO estimator data */ - struct rpc_portmap * cl_pmap; /* port mapping */ int cl_nodelen; /* nodename length */ char cl_nodename[UNX_MAXNODENAME]; @@ -64,14 +53,8 @@ struct rpc_clnt { struct dentry * cl_dentry; /* inode */ struct rpc_clnt * cl_parent; /* Points to parent of clones */ struct rpc_rtt cl_rtt_default; - struct rpc_portmap cl_pmap_default; char cl_inline_name[32]; }; -#define cl_timeout cl_xprt->timeout -#define cl_prog cl_pmap->pm_prog -#define cl_vers cl_pmap->pm_vers -#define cl_port cl_pmap->pm_port -#define cl_prot cl_pmap->pm_prot /* * General RPC program info diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index a71106723d7..4ce82616873 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -138,6 +138,7 @@ struct rpc_xprt { unsigned int tsh_size; /* size of transport specific header */ + struct rpc_wait_queue binding; /* requests waiting on rpcbind */ struct rpc_wait_queue sending; /* requests waiting to send */ struct rpc_wait_queue resend; /* requests waiting to resend */ struct rpc_wait_queue pending; /* requests in flight */ @@ -270,6 +271,7 @@ int xs_setup_tcp(struct rpc_xprt *xprt, struct rpc_timeout *to); #define XPRT_CONNECTING (2) #define XPRT_CLOSE_WAIT (3) #define XPRT_BOUND (4) +#define XPRT_BINDING (5) static inline void xprt_set_connected(struct rpc_xprt *xprt) { @@ -328,6 +330,18 @@ static inline void xprt_clear_bound(struct rpc_xprt *xprt) clear_bit(XPRT_BOUND, &xprt->state); } +static inline void xprt_clear_binding(struct rpc_xprt *xprt) +{ + smp_mb__before_clear_bit(); + clear_bit(XPRT_BINDING, &xprt->state); + smp_mb__after_clear_bit(); +} + +static inline int xprt_test_and_set_binding(struct rpc_xprt *xprt) +{ + return test_and_set_bit(XPRT_BINDING, &xprt->state); +} + #endif /* __KERNEL__*/ #endif /* _LINUX_SUNRPC_XPRT_H */ -- cgit v1.2.3 From 5b1eacbcd78930d976eb50a93f1779d311b553d1 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 22 Aug 2006 20:06:16 -0400 Subject: SUNRPC: Support for RPC child tasks no longer needed The previous patches removed the last user of RPC child tasks, so we can remove support for child tasks from net/sunrpc/sched.c now. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- include/linux/sunrpc/sched.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h index 82a91bb2236..f399c138f79 100644 --- a/include/linux/sunrpc/sched.h +++ b/include/linux/sunrpc/sched.h @@ -127,7 +127,6 @@ struct rpc_call_ops { */ #define RPC_TASK_ASYNC 0x0001 /* is an async task */ #define RPC_TASK_SWAPPER 0x0002 /* is swapping in/out */ -#define RPC_TASK_CHILD 0x0008 /* is child of other task */ #define RPC_CALL_MAJORSEEN 0x0020 /* major timeout seen */ #define RPC_TASK_ROOTCREDS 0x0040 /* force root creds */ #define RPC_TASK_DYNAMIC 0x0080 /* task was kmalloc'ed */ @@ -136,7 +135,6 @@ struct rpc_call_ops { #define RPC_TASK_NOINTR 0x0400 /* uninterruptible task */ #define RPC_IS_ASYNC(t) ((t)->tk_flags & RPC_TASK_ASYNC) -#define RPC_IS_CHILD(t) ((t)->tk_flags & RPC_TASK_CHILD) #define RPC_IS_SWAPPER(t) ((t)->tk_flags & RPC_TASK_SWAPPER) #define RPC_DO_ROOTOVERRIDE(t) ((t)->tk_flags & RPC_TASK_ROOTCREDS) #define RPC_ASSASSINATED(t) ((t)->tk_flags & RPC_TASK_KILLED) @@ -253,7 +251,6 @@ struct rpc_task *rpc_new_task(struct rpc_clnt *, int flags, const struct rpc_call_ops *ops, void *data); struct rpc_task *rpc_run_task(struct rpc_clnt *clnt, int flags, const struct rpc_call_ops *ops, void *data); -struct rpc_task *rpc_new_child(struct rpc_clnt *, struct rpc_task *parent); void rpc_init_task(struct rpc_task *task, struct rpc_clnt *clnt, int flags, const struct rpc_call_ops *ops, void *data); @@ -261,8 +258,6 @@ void rpc_release_task(struct rpc_task *); void rpc_exit_task(struct rpc_task *); void rpc_killall_tasks(struct rpc_clnt *); int rpc_execute(struct rpc_task *); -void rpc_run_child(struct rpc_task *parent, struct rpc_task *child, - rpc_action action); void rpc_init_priority_wait_queue(struct rpc_wait_queue *, const char *); void rpc_init_wait_queue(struct rpc_wait_queue *, const char *); void rpc_sleep_on(struct rpc_wait_queue *, struct rpc_task *, -- cgit v1.2.3 From bbf7c1dd2ae2b4040b41b1065ee9b1b6905b1605 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 22 Aug 2006 20:06:16 -0400 Subject: SUNRPC: Introduce transport switch callout for pluggable rpcbind Introduce a clean transport switch API for plugging in different types of rpcbind mechanisms. For instance, rpcbind can cleanly replace the existing portmapper client, or a transport can choose to implement RPC binding any way it likes. Test plan: Destructive testing (unplugging the network temporarily). Connectathon with UDP and TCP. NFSv2/3 and NFSv4 mounting should be carefully checked. Probably need to rig a server where certain services aren't running, or that returns an error for some typical operation. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- include/linux/sunrpc/clnt.h | 2 +- include/linux/sunrpc/xprt.h | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h index 00e9dbaec9c..2e68ac0aa02 100644 --- a/include/linux/sunrpc/clnt.h +++ b/include/linux/sunrpc/clnt.h @@ -106,7 +106,7 @@ struct rpc_clnt *rpc_clone_client(struct rpc_clnt *); int rpc_shutdown_client(struct rpc_clnt *); int rpc_destroy_client(struct rpc_clnt *); void rpc_release_client(struct rpc_clnt *); -void rpc_getport(struct rpc_task *, struct rpc_clnt *); +void rpc_getport(struct rpc_task *); int rpc_register(u32, u32, int, unsigned short, int *); void rpc_call_setup(struct rpc_task *, struct rpc_message *, int); diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index 4ce82616873..84122559fa1 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -105,6 +105,7 @@ struct rpc_xprt_ops { void (*set_buffer_size)(struct rpc_xprt *xprt, size_t sndsize, size_t rcvsize); int (*reserve_xprt)(struct rpc_task *task); void (*release_xprt)(struct rpc_xprt *xprt, struct rpc_task *task); + void (*rpcbind)(struct rpc_task *task); void (*set_port)(struct rpc_xprt *xprt, unsigned short port); void (*connect)(struct rpc_task *task); void * (*buf_alloc)(struct rpc_task *task, size_t size); -- cgit v1.2.3 From ed39440a2573abc926f230267000f21fa5a87822 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 22 Aug 2006 20:06:17 -0400 Subject: SUNRPC: create API for getting remote peer address Provide an API for retrieving the remote peer address without allowing direct access to the rpc_xprt struct. Test-plan: Compile kernel with CONFIG_NFS enabled. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- include/linux/sunrpc/clnt.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h index 2e68ac0aa02..65196b03f0a 100644 --- a/include/linux/sunrpc/clnt.h +++ b/include/linux/sunrpc/clnt.h @@ -123,6 +123,7 @@ void rpc_setbufsize(struct rpc_clnt *, unsigned int, unsigned int); size_t rpc_max_payload(struct rpc_clnt *); void rpc_force_rebind(struct rpc_clnt *); int rpc_ping(struct rpc_clnt *clnt, int flags); +size_t rpc_peeraddr(struct rpc_clnt *, struct sockaddr *, size_t); /* * Helper function for NFSroot support -- cgit v1.2.3 From 39d7bbcb5ba5e9d8d658b70903dd7939400e57db Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 22 Aug 2006 20:06:18 -0400 Subject: SUNRPC: remove extraneous header inclusions include/linux/sunrpc/clnt.h already includes include/linux/sunrpc/xprt.h. We can remove xprt.h from source files that already include clnt.h. Likewise include/linux/sunrpc/timer.h. Test plan: Compile kernel with CONFIG_NFS enabled. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- include/linux/nfs_xdr.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 2426b11b6cc..0f33e621892 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -1,7 +1,6 @@ #ifndef _LINUX_NFS_XDR_H #define _LINUX_NFS_XDR_H -#include #include /* -- cgit v1.2.3 From edb267a688fcee5335d596752f117a30c7152e44 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 22 Aug 2006 20:06:18 -0400 Subject: SUNRPC: add xprt switch API for printing formatted remote peer addresses Add a new method to the transport switch API to provide a way to convert the opaque contents of xprt->addr to a human-readable string. Test plan: Compile kernel with CONFIG_NFS enabled. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- include/linux/sunrpc/xprt.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index 84122559fa1..8372ab8fc9b 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -51,6 +51,14 @@ struct rpc_timeout { unsigned char to_exponential; }; +enum rpc_display_format_t { + RPC_DISPLAY_ADDR = 0, + RPC_DISPLAY_PORT, + RPC_DISPLAY_PROTO, + RPC_DISPLAY_ALL, + RPC_DISPLAY_MAX, +}; + struct rpc_task; struct rpc_xprt; struct seq_file; @@ -103,6 +111,7 @@ struct rpc_rqst { struct rpc_xprt_ops { void (*set_buffer_size)(struct rpc_xprt *xprt, size_t sndsize, size_t rcvsize); + char * (*print_addr)(struct rpc_xprt *xprt, enum rpc_display_format_t format); int (*reserve_xprt)(struct rpc_task *task); void (*release_xprt)(struct rpc_xprt *xprt, struct rpc_task *task); void (*rpcbind)(struct rpc_task *task); @@ -207,6 +216,8 @@ struct rpc_xprt { void (*old_data_ready)(struct sock *, int); void (*old_state_change)(struct sock *); void (*old_write_space)(struct sock *); + + char * address_strings[RPC_DISPLAY_MAX]; }; #define XPRT_LAST_FRAG (1 << 0) -- cgit v1.2.3 From f425eba437f0051bde979ea2eef8bc875a77cd00 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 22 Aug 2006 20:06:18 -0400 Subject: SUNRPC: Create API for displaying remote peer address Provide an API for formatting the remote peer address for printing without exposing its internal structure. The address could be dynamic, so we support a function call to get the address rather than reading it straight out of a structure. Test-plan: Destructive testing (unplugging the network temporarily). Probably need to rig a server where certain services aren't running, or that returns an error for some typical operation. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- include/linux/sunrpc/clnt.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h index 65196b03f0a..b7d47f01835 100644 --- a/include/linux/sunrpc/clnt.h +++ b/include/linux/sunrpc/clnt.h @@ -124,6 +124,7 @@ size_t rpc_max_payload(struct rpc_clnt *); void rpc_force_rebind(struct rpc_clnt *); int rpc_ping(struct rpc_clnt *clnt, int flags); size_t rpc_peeraddr(struct rpc_clnt *, struct sockaddr *, size_t); +char * rpc_peeraddr2str(struct rpc_clnt *, enum rpc_display_format_t); /* * Helper function for NFSroot support -- cgit v1.2.3 From c4efcb1d3e0bc76aeb9ca6301d19a5079893c6c9 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 22 Aug 2006 20:06:19 -0400 Subject: SUNRPC: Use "sockaddr_storage" for storing RPC client's remote peer address IPv6 addresses are big (128 bytes). Now that no RPC client consumers treat the addr field in rpc_xprt structs as an opaque, and access it only via the API calls, we can safely widen the field in the rpc_xprt struct to accomodate larger addresses. Test plan: Compile kernel with CONFIG_NFS enabled. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- include/linux/sunrpc/xprt.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index 8372ab8fc9b..fc05cfbd580 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -134,7 +134,8 @@ struct rpc_xprt { struct sock * inet; /* INET layer */ struct rpc_timeout timeout; /* timeout parms */ - struct sockaddr_in addr; /* server address */ + struct sockaddr_storage addr; /* server address */ + size_t addrlen; /* size of server address */ int prot; /* IP protocol */ unsigned long cong; /* current congestion */ -- cgit v1.2.3 From 6ca948238724c945bd353f51d54ae7d285f3889f Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 22 Aug 2006 20:06:19 -0400 Subject: SUNRPC: Clean-up after previous patches. Remove some unused macros related to accessing an RPC peer address Test plan: Compile kernel with CONFIG_NFS option enabled. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- include/linux/nfs_fs.h | 1 - include/linux/sunrpc/clnt.h | 3 --- 2 files changed, 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 51e9bd90ded..3b5b04193fe 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -216,7 +216,6 @@ static inline struct nfs_inode *NFS_I(struct inode *inode) #define NFS_SERVER(inode) (NFS_SB(inode->i_sb)) #define NFS_CLIENT(inode) (NFS_SERVER(inode)->client) #define NFS_PROTO(inode) (NFS_SERVER(inode)->nfs_client->rpc_ops) -#define NFS_ADDR(inode) (RPC_PEERADDR(NFS_CLIENT(inode))) #define NFS_COOKIEVERF(inode) (NFS_I(inode)->cookieverf) #define NFS_READTIME(inode) (NFS_I(inode)->read_cache_jiffies) #define NFS_CHANGE_ATTR(inode) (NFS_I(inode)->change_attr) diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h index b7d47f01835..a26d69583c7 100644 --- a/include/linux/sunrpc/clnt.h +++ b/include/linux/sunrpc/clnt.h @@ -89,9 +89,6 @@ struct rpc_procinfo { char * p_name; /* name of procedure */ }; -#define RPC_CONGESTED(clnt) (RPCXPRT_CONGESTED((clnt)->cl_xprt)) -#define RPC_PEERADDR(clnt) (&(clnt)->cl_xprt->addr) - #ifdef __KERNEL__ struct rpc_clnt *rpc_create_client(struct rpc_xprt *xprt, char *servname, -- cgit v1.2.3 From c2866763b4029411d166040306691773c12d4caf Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 22 Aug 2006 20:06:20 -0400 Subject: SUNRPC: use sockaddr + size when creating remote transport endpoints Prepare for more generic transport endpoint handling needed by transports that might use different forms of addressing, such as IPv6. Introduce a single function call to replace the two-call xprt_create_proto/rpc_create_client API. Define a new rpc_create_args structure that allows callers to pass in remote endpoint addresses of varying length. Test-plan: Compile kernel with CONFIG_NFS enabled. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- include/linux/sunrpc/clnt.h | 22 ++++++++++++++++++++++ include/linux/sunrpc/xprt.h | 1 + 2 files changed, 23 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h index a26d69583c7..7817ba82f1b 100644 --- a/include/linux/sunrpc/clnt.h +++ b/include/linux/sunrpc/clnt.h @@ -97,6 +97,28 @@ struct rpc_clnt *rpc_create_client(struct rpc_xprt *xprt, char *servname, struct rpc_clnt *rpc_new_client(struct rpc_xprt *xprt, char *servname, struct rpc_program *info, u32 version, rpc_authflavor_t authflavor); + +struct rpc_create_args { + int protocol; + struct sockaddr *address; + size_t addrsize; + struct rpc_timeout *timeout; + char *servername; + struct rpc_program *program; + u32 version; + rpc_authflavor_t authflavor; + unsigned long flags; +}; + +/* Values for "flags" field */ +#define RPC_CLNT_CREATE_HARDRTRY (1UL << 0) +#define RPC_CLNT_CREATE_INTR (1UL << 1) +#define RPC_CLNT_CREATE_AUTOBIND (1UL << 2) +#define RPC_CLNT_CREATE_ONESHOT (1UL << 3) +#define RPC_CLNT_CREATE_NONPRIVPORT (1UL << 4) +#define RPC_CLNT_CREATE_NOPING (1UL << 5) + +struct rpc_clnt *rpc_create(struct rpc_create_args *args); struct rpc_clnt *rpc_bind_new_program(struct rpc_clnt *, struct rpc_program *, int); struct rpc_clnt *rpc_clone_client(struct rpc_clnt *); diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index fc05cfbd580..bc80fcfdd89 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -237,6 +237,7 @@ void xprt_set_timeout(struct rpc_timeout *to, unsigned int retr, unsigned long /* * Generic internal transport functions */ +struct rpc_xprt * xprt_create_transport(int proto, struct sockaddr *addr, size_t size, struct rpc_timeout *toparms); void xprt_connect(struct rpc_task *task); void xprt_reserve(struct rpc_task *task); int xprt_reserve_xprt(struct rpc_task *task); -- cgit v1.2.3 From ff9aa5e56df60cc8565a93cc868fe25ae3f20e49 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 22 Aug 2006 20:06:21 -0400 Subject: SUNRPC: Eliminate xprt_create_proto and rpc_create_client The two function call API for creating a new RPC client is now obsolete. Remove it. Also, remove an unnecessary check to see whether the caller is capable of using privileged network services. The kernel RPC client always uses a privileged ephemeral port by default; callers are responsible for checking the authority of users to make use of any RPC service, or for specifying that a nonprivileged port is acceptable. Test plan: Repeated runs of Connectathon locking suite. Check network trace to ensure correctness of NLM requests and replies. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- include/linux/sunrpc/clnt.h | 7 ------- include/linux/sunrpc/xprt.h | 1 - 2 files changed, 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h index 7817ba82f1b..f6d1d646ce0 100644 --- a/include/linux/sunrpc/clnt.h +++ b/include/linux/sunrpc/clnt.h @@ -91,13 +91,6 @@ struct rpc_procinfo { #ifdef __KERNEL__ -struct rpc_clnt *rpc_create_client(struct rpc_xprt *xprt, char *servname, - struct rpc_program *info, - u32 version, rpc_authflavor_t authflavor); -struct rpc_clnt *rpc_new_client(struct rpc_xprt *xprt, char *servname, - struct rpc_program *info, - u32 version, rpc_authflavor_t authflavor); - struct rpc_create_args { int protocol; struct sockaddr *address; diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index bc80fcfdd89..de4efea7c85 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -231,7 +231,6 @@ struct rpc_xprt { /* * Transport operations used by ULPs */ -struct rpc_xprt * xprt_create_proto(int proto, struct sockaddr_in *addr, struct rpc_timeout *to); void xprt_set_timeout(struct rpc_timeout *to, unsigned int retr, unsigned long incr); /* -- cgit v1.2.3 From 4f390c152bc87165da4b1f5b7d870b46fb106d4e Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 22 Aug 2006 20:06:22 -0400 Subject: NFS: Fix double d_drop in nfs_instantiate() error path If the LOOKUP or GETATTR in nfs_instantiate fail, nfs_instantiate will do a d_drop before returning. But some callers already do a d_drop in the case of an error return. Make certain we do only one d_drop in all error paths. This issue was introduced because over time, the symlink proc API diverged slightly from the create/mkdir/mknod proc API. To prevent other coding mistakes of this type, change the symlink proc API to be more like create/mkdir/mknod and move the nfs_instantiate call into the symlink proc routines so it is used in exactly the same way for create, mkdir, mknod, and symlink. Test plan: Connectathon, all versions of NFS. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- include/linux/nfs_xdr.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 0f33e621892..ddf5d75e97a 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -793,9 +793,8 @@ struct nfs_rpc_ops { int (*rename) (struct inode *, struct qstr *, struct inode *, struct qstr *); int (*link) (struct inode *, struct inode *, struct qstr *); - int (*symlink) (struct inode *, struct qstr *, struct qstr *, - struct iattr *, struct nfs_fh *, - struct nfs_fattr *); + int (*symlink) (struct inode *, struct dentry *, struct qstr *, + struct iattr *); int (*mkdir) (struct inode *, struct dentry *, struct iattr *); int (*rmdir) (struct inode *, struct qstr *); int (*readdir) (struct dentry *, struct rpc_cred *, -- cgit v1.2.3 From 94a6d75320b3681e6e728b70e18bd186cb55e682 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 22 Aug 2006 20:06:23 -0400 Subject: NFS: Use cached page as buffer for NFS symlink requests Now that we have a copy of the symlink path in the page cache, we can pass a struct page down to the XDR routines instead of a string buffer. Test plan: Connectathon, all NFS versions. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- include/linux/nfs_xdr.h | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index ddf5d75e97a..dc5397d9d23 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -358,8 +358,8 @@ struct nfs_symlinkargs { struct nfs_fh * fromfh; const char * fromname; unsigned int fromlen; - const char * topath; - unsigned int tolen; + struct page ** pages; + unsigned int pathlen; struct iattr * sattr; }; @@ -434,8 +434,8 @@ struct nfs3_symlinkargs { struct nfs_fh * fromfh; const char * fromname; unsigned int fromlen; - const char * topath; - unsigned int tolen; + struct page ** pages; + unsigned int pathlen; struct iattr * sattr; }; @@ -533,7 +533,10 @@ struct nfs4_accessres { struct nfs4_create_arg { u32 ftype; union { - struct qstr * symlink; /* NF4LNK */ + struct { + struct page ** pages; + unsigned int len; + } symlink; /* NF4LNK */ struct { u32 specdata1; u32 specdata2; @@ -793,8 +796,8 @@ struct nfs_rpc_ops { int (*rename) (struct inode *, struct qstr *, struct inode *, struct qstr *); int (*link) (struct inode *, struct inode *, struct qstr *); - int (*symlink) (struct inode *, struct dentry *, struct qstr *, - struct iattr *); + int (*symlink) (struct inode *, struct dentry *, struct page *, + unsigned int, struct iattr *); int (*mkdir) (struct inode *, struct dentry *, struct iattr *); int (*rmdir) (struct inode *, struct qstr *); int (*readdir) (struct dentry *, struct rpc_cred *, -- cgit v1.2.3 From 275a082fe9308e710324e26ccb5363c53d8fd45f Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 22 Aug 2006 20:06:24 -0400 Subject: Add a real API for dealing with blk_congestion_wait() Signed-off-by: Trond Myklebust --- include/linux/blkdev.h | 1 + include/linux/writeback.h | 1 + 2 files changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index aafe82788b4..96c9040c00a 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -746,6 +746,7 @@ extern void blk_queue_free_tags(request_queue_t *); extern int blk_queue_resize_tags(request_queue_t *, int); extern void blk_queue_invalidate_tags(request_queue_t *); extern long blk_congestion_wait(int rw, long timeout); +extern void blk_congestion_end(int rw); extern void blk_rq_bio_prep(request_queue_t *, struct request *, struct bio *); extern int blkdev_issue_flush(struct block_device *, sector_t *); diff --git a/include/linux/writeback.h b/include/linux/writeback.h index 9e38b566d0e..0422036af4e 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h @@ -85,6 +85,7 @@ int wakeup_pdflush(long nr_pages); void laptop_io_completion(void); void laptop_sync_completion(void); void throttle_vm_writeout(void); +void writeback_congestion_end(void); /* These are exported to sysctl. */ extern int dirty_background_ratio; -- cgit v1.2.3 From 5dd3177ae5012c1e2ad7a9ffdbd0e0d0de2f60e4 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 24 Aug 2006 01:03:05 -0400 Subject: NFSv4: Fix a use-after-free issue with the nfs server. Signed-off-by: Trond Myklebust --- include/linux/nfs_fs_sb.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index 6d0be0efd1b..7ccfc7ef0a8 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -19,6 +19,7 @@ struct nfs_client { #define NFS_CS_RPCIOD 0 /* - rpciod started */ #define NFS_CS_CALLBACK 1 /* - callback started */ #define NFS_CS_IDMAP 2 /* - idmap started */ +#define NFS_CS_RENEWD 3 /* - renewd started */ struct sockaddr_in cl_addr; /* server identifier */ char * cl_hostname; /* hostname of server */ struct list_head cl_share_link; /* link in global client list */ -- cgit v1.2.3 From 158998b6fe36f6acef087f574c96d44713499cc9 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 24 Aug 2006 01:03:17 -0400 Subject: SUNRPC: Make rpc_mkpipe() take the parent dentry as an argument Signed-off-by: Trond Myklebust --- include/linux/sunrpc/rpc_pipe_fs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/rpc_pipe_fs.h b/include/linux/sunrpc/rpc_pipe_fs.h index a481472c948..a2eb9b4a9de 100644 --- a/include/linux/sunrpc/rpc_pipe_fs.h +++ b/include/linux/sunrpc/rpc_pipe_fs.h @@ -43,7 +43,7 @@ extern int rpc_queue_upcall(struct inode *, struct rpc_pipe_msg *); extern struct dentry *rpc_mkdir(char *, struct rpc_clnt *); extern int rpc_rmdir(struct dentry *); -extern struct dentry *rpc_mkpipe(char *, void *, struct rpc_pipe_ops *, int flags); +extern struct dentry *rpc_mkpipe(struct dentry *, const char *, void *, struct rpc_pipe_ops *, int flags); extern int rpc_unlink(struct dentry *); extern struct vfsmount *rpc_get_mount(void); extern void rpc_put_mount(void); -- cgit v1.2.3 From 6b6ca86b77b62b798cf9ca2599036420abce7796 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 5 Sep 2006 12:55:57 -0400 Subject: SUNRPC: Add refcounting to the struct rpc_xprt In a subsequent patch, this will allow the portmapper to take a reference to the rpc_xprt for which it is updating the port number, fixing an Oops. Signed-off-by: Trond Myklebust --- include/linux/sunrpc/xprt.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index de4efea7c85..bdeba8538c7 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -12,6 +12,7 @@ #include #include #include +#include #include #include @@ -129,6 +130,7 @@ struct rpc_xprt_ops { }; struct rpc_xprt { + struct kref kref; /* Reference count */ struct rpc_xprt_ops * ops; /* transport methods */ struct socket * sock; /* BSD socket layer */ struct sock * inet; /* INET layer */ @@ -248,7 +250,8 @@ int xprt_adjust_timeout(struct rpc_rqst *req); void xprt_release_xprt(struct rpc_xprt *xprt, struct rpc_task *task); void xprt_release_xprt_cong(struct rpc_xprt *xprt, struct rpc_task *task); void xprt_release(struct rpc_task *task); -int xprt_destroy(struct rpc_xprt *xprt); +struct rpc_xprt * xprt_get(struct rpc_xprt *xprt); +void xprt_put(struct rpc_xprt *xprt); static inline u32 *xprt_skip_transport_header(struct rpc_xprt *xprt, u32 *p) { -- cgit v1.2.3 From 4c8bd7eeee4c8f157fb61fb64b57500990b42e0e Mon Sep 17 00:00:00 2001 From: David Miller Date: Fri, 22 Sep 2006 22:31:36 -0700 Subject: [KERNEL] Do not truncate to 'int' in ALIGN() macro. Signed-off-by: David S. Miller Signed-off-by: Linus Torvalds --- include/linux/kernel.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 851aa1bcfc1..2b2ae4fdce8 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -31,7 +31,7 @@ extern const char linux_banner[]; #define STACK_MAGIC 0xdeadbeef #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) -#define ALIGN(x,a) (((x)+(a)-1)&~((a)-1)) +#define ALIGN(x,a) (((x)+(a)-1UL)&~((a)-1UL)) #define FIELD_SIZEOF(t, f) (sizeof(((t*)0)->f)) #define roundup(x, y) ((((x) + ((y) - 1)) / (y)) * (y)) -- cgit v1.2.3