From 3a765aa528401c7aec2208f7ed1276b232b24c57 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Mon, 26 Feb 2007 14:52:21 -0800 Subject: [XFRM] xfrm_user: Fix return values of xfrm_add_sa_expire. As noted by Kent Yoder, this function will always return an error. Make sure it returns zero on success. Signed-off-by: David S. Miller --- net/xfrm/xfrm_user.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index 25674532161..924a2fefcd9 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -1557,14 +1557,13 @@ static int xfrm_add_sa_expire(struct sk_buff *skb, struct nlmsghdr *nlh, struct xfrm_usersa_info *p = &ue->state; x = xfrm_state_lookup(&p->id.daddr, p->id.spi, p->id.proto, p->family); - err = -ENOENT; + err = -ENOENT; if (x == NULL) return err; - err = -EINVAL; - spin_lock_bh(&x->lock); + err = -EINVAL; if (x->km.state != XFRM_STATE_VALID) goto out; km_state_expired(x, ue->hard, current->pid); @@ -1574,6 +1573,7 @@ static int xfrm_add_sa_expire(struct sk_buff *skb, struct nlmsghdr *nlh, xfrm_audit_log(NETLINK_CB(skb).loginuid, NETLINK_CB(skb).sid, AUDIT_MAC_IPSEC_DELSA, 1, NULL, x); } + err = 0; out: spin_unlock_bh(&x->lock); xfrm_state_put(x); -- cgit v1.2.3 From 2c12a74cc4aeaebc378aa40ee11c7761a8ed05e0 Mon Sep 17 00:00:00 2001 From: Michal Wrobel Date: Mon, 26 Feb 2007 15:36:10 -0800 Subject: [IPV6]: anycast refcnt fix This patch fixes a bug in Linux IPv6 stack which caused anycast address to be added to a device prior DAD has been completed. This led to incorrect reference count which resulted in infinite wait for unregister_netdevice completion on interface removal. Signed-off-by: Michal Wrobel Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index f6ac65d3655..e16f1bba5df 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -400,6 +400,8 @@ static void dev_forward_change(struct inet6_dev *idev) ipv6_dev_mc_dec(dev, &addr); } for (ifa=idev->addr_list; ifa; ifa=ifa->if_next) { + if (ifa->flags&IFA_F_TENTATIVE) + continue; if (idev->cnf.forwarding) addrconf_join_anycast(ifa); else -- cgit v1.2.3 From 304c209c9b02b0386024d037fa49b273caa0575b Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Mon, 26 Feb 2007 15:45:15 -0800 Subject: [NET]: Revert socket.h/stat.h ifdef hacks. This reverts 57a87bb0720a5cf7a9ece49a8c8ed288398fd1bb. As H. Peter Anvin states, this change broke klibc and it's not very easy to fix things up without duplicating everything into userspace. In the longer term we should have a better solution to this problem, but for now let's unbreak things. Signed-off-by: David S. Miller --- include/linux/socket.h | 2 +- include/linux/stat.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/include/linux/socket.h b/include/linux/socket.h index 28157a36e6c..fcd35a210e7 100644 --- a/include/linux/socket.h +++ b/include/linux/socket.h @@ -16,7 +16,7 @@ struct __kernel_sockaddr_storage { /* _SS_MAXSIZE value minus size of ss_family */ } __attribute__ ((aligned(_K_SS_ALIGNSIZE))); /* force desired alignment */ -#ifdef __KERNEL__ +#if defined(__KERNEL__) || !defined(__GLIBC__) || (__GLIBC__ < 2) #include /* arch-dependent defines */ #include /* the SIOCxxx I/O controls */ diff --git a/include/linux/stat.h b/include/linux/stat.h index 4f8539ccff6..679ef0d70b6 100644 --- a/include/linux/stat.h +++ b/include/linux/stat.h @@ -7,7 +7,7 @@ #endif -#ifdef __KERNEL__ +#if defined(__KERNEL__) || !defined(__GLIBC__) || (__GLIBC__ < 2) #define S_IFMT 00170000 #define S_IFSOCK 0140000 -- cgit v1.2.3 From ff49f26b453ac77788d3a3c652d8cb0f578214da Mon Sep 17 00:00:00 2001 From: Rolf Eike Beer Date: Mon, 26 Feb 2007 15:47:16 -0800 Subject: [IPX]: Remove outdated information from Kconfig SPX was removed in early 2.5. How to connect to a Mac or the other OS isn't hard to find out these days. Signed-off-by: Rolf Eike Beer Signed-off-by: David S. Miller --- net/ipx/Kconfig | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/net/ipx/Kconfig b/net/ipx/Kconfig index 980a826f5d0..e9ad0062fbb 100644 --- a/net/ipx/Kconfig +++ b/net/ipx/Kconfig @@ -16,8 +16,7 @@ config IPX support", below. IPX is similar in scope to IP, while SPX, which runs on top of IPX, - is similar to TCP. There is also experimental support for SPX in - Linux (see "SPX networking", below). + is similar to TCP. To turn your Linux box into a fully featured NetWare file server and IPX router, say Y here and fetch either lwared from @@ -26,9 +25,6 @@ config IPX information, read the IPX-HOWTO available from . - General information about how to connect Linux, Windows machines and - Macs is on the WWW at . - The IPX driver would enlarge your kernel by about 16 KB. To compile this driver as a module, choose M here: the module will be called ipx. Unless you want to integrate your Linux box with a local Novell -- cgit v1.2.3 From 1267cd766c712644b45662572df05f28be5a6e89 Mon Sep 17 00:00:00 2001 From: Rolf Eike Beer Date: Mon, 26 Feb 2007 15:48:03 -0800 Subject: [IPX]: Remove ancient changelog Signed-off-by: Rolf Eike Beer Signed-off-by: David S. Miller --- net/ipx/ChangeLog | 101 ------------------------------------------------------ 1 file changed, 101 deletions(-) delete mode 100644 net/ipx/ChangeLog diff --git a/net/ipx/ChangeLog b/net/ipx/ChangeLog deleted file mode 100644 index 3b29763751a..00000000000 --- a/net/ipx/ChangeLog +++ /dev/null @@ -1,101 +0,0 @@ - Revision 0.21: Uses the new generic socket option code. - - Revision 0.22: Gcc clean ups and drop out device registration. Use the - new multi-protocol edition of hard_header - - Revision 0.23: IPX /proc by Mark Evans. Adding a route will - will overwrite any existing route to the same network. - - Revision 0.24: Supports new /proc with no 4K limit - - Revision 0.25: Add ephemeral sockets, passive local network - identification, support for local net 0 and - multiple datalinks - - Revision 0.26: Device drop kills IPX routes via it. (needed for module) - - Revision 0.27: Autobind - - Revision 0.28: Small fix for multiple local networks - - Revision 0.29: Assorted major errors removed - Small correction to promisc mode error fix - Asynchronous I/O support. Changed to use notifiers - and the newer packet_type stuff. Assorted major - fixes - - Revision 0.30: Moved to net/ipx/... - Don't set address length on recvfrom that errors. - Incorrect verify_area. - - Revision 0.31: New sk_buffs. This still needs a lot of - testing. - - Revision 0.32: Using sock_alloc_send_skb, firewall hooks. - Supports sendmsg/recvmsg - - Revision 0.33: Internal network support, routing changes, uses a - protocol private area for ipx data. - - Revision 0.34: Module support. - - Revision 0.35: Checksum support. , hooked in by - Handles WIN95 discovery packets - - Revision 0.36: Internal bump up for 2.1 - - Revision 0.37: Began adding POSIXisms. - - Revision 0.38: Asynchronous socket stuff made current. - - Revision 0.39: SPX interfaces - - Revision 0.40: Tiny SIOCGSTAMP fix (chris@cybernet.co.nz) - - Revision 0.41: 802.2TR removed (p.norton@computer.org) - Fixed connecting to primary net, - Automatic binding on send & receive, - Martijn van Oosterhout - - Revision 042: Multithreading - use spinlocks and refcounting to - protect some structures: ipx_interface sock list, list - of ipx interfaces, etc. - Bugfixes - do refcounting on net_devices, check function - results, etc. Thanks to davem and freitag for - suggestions and guidance. - Arnaldo Carvalho de Melo , - November, 2000 - - Revision 043: Shared SKBs, don't mangle packets, some cleanups - Arnaldo Carvalho de Melo , - December, 2000 - - Revision 044: Call ipxitf_hold on NETDEV_UP - acme - - Revision 045: fix PPROP routing bug - acme - - Revision 046: Further fixes to PPROP, ipxitf_create_internal was - doing an unneeded MOD_INC_USE_COUNT, implement - sysctl for ipx_pprop_broacasting, fix the ipx sysctl - handling, making it dynamic, some cleanups, thanks to - Petr Vandrovec for review and good suggestions. (acme) - - Revision 047: Cleanups, CodingStyle changes, move the ncp connection - hack out of line - acme - - Revision 048: Use sk->protinfo to store the pointer to IPX private - area, remove af_ipx from sk->protinfo and move ipx_opt - to include/net/ipx.h, use IPX_SK like DecNET, etc - acme - - Revision 049: SPX support dropped, see comment in ipx_create - acme - - Revision 050: Use seq_file for proc stuff, moving it to ipx_proc.c - acme - -Other fixes: - - Protect the module by a MOD_INC_USE_COUNT/MOD_DEC_USE_COUNT pair. Also, now - usage count is managed this way: - -Count one if the auto_interface mode is on - -Count one per configured interface - - Jacques Gelinas (jacques@solucorp.qc.ca) -- cgit v1.2.3 From aa6e4a96e7589948fe770744f7bb4f0f743dddaa Mon Sep 17 00:00:00 2001 From: David Stevens Date: Mon, 26 Feb 2007 16:28:56 -0800 Subject: [IPV6]: /proc/net/anycast6 unbalanced inet6_dev refcnt Reading /proc/net/anycast6 when there is no anycast address on an interface results in an ever-increasing inet6_dev reference count, as well as a reference to the netdevice you can't get rid of. Signed-off-by: David S. Miller --- net/ipv6/anycast.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/ipv6/anycast.c b/net/ipv6/anycast.c index e5ef5979ade..09117d63256 100644 --- a/net/ipv6/anycast.c +++ b/net/ipv6/anycast.c @@ -461,6 +461,7 @@ static inline struct ifacaddr6 *ac6_get_first(struct seq_file *seq) break; } read_unlock_bh(&idev->lock); + in6_dev_put(idev); } return im; } -- cgit v1.2.3 From 6548cda289b549ed60c35f16a0051609aeee2fd0 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Tue, 27 Feb 2007 09:55:07 -0800 Subject: [BRIDGE]: Fix locking of set path cost. This change goes with earlier change to get rid of work queue for path cost. Now stp_set_path_cost does its own locking. This is to allow it to call br_path_cost() which calls ethtool interfaces (might sleep). Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- net/bridge/br_ioctl.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/net/bridge/br_ioctl.c b/net/bridge/br_ioctl.c index 3ab153d3c50..147015fe5c7 100644 --- a/net/bridge/br_ioctl.c +++ b/net/bridge/br_ioctl.c @@ -291,12 +291,11 @@ static int old_dev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd) if (!capable(CAP_NET_ADMIN)) return -EPERM; - spin_lock_bh(&br->lock); if ((p = br_get_port(br, args[1])) == NULL) ret = -EINVAL; else br_stp_set_path_cost(p, args[2]); - spin_unlock_bh(&br->lock); + return ret; } -- cgit v1.2.3 From 4498121ca3acbf928681b71261227d28dc29b6f6 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Tue, 27 Feb 2007 09:56:42 -0800 Subject: [NET]: Handle disabled preemption in gfp_any() ctnetlink uses netlink_unicast from an atomic_notifier_chain (which is called within a RCU read side critical section) without holding further locks. netlink_unicast calls netlink_trim with the result of gfp_any() for the gfp flags, which are passed down to pskb_expand_header. gfp_any() only checks for softirq context and returns GFP_KERNEL, resulting in this warning: BUG: sleeping function called from invalid context at mm/slab.c:3032 in_atomic():1, irqs_disabled():0 no locks held by rmmod/7010. Call Trace: [] debug_show_held_locks+0x9/0xb [] __might_sleep+0xd9/0xdb [] __kmalloc+0x68/0x110 [] pskb_expand_head+0x4d/0x13b [] netlink_broadcast+0xa5/0x2e0 [] :nfnetlink:nfnetlink_send+0x83/0x8a [] :nf_conntrack_netlink:ctnetlink_conntrack_event+0x94c/0x96a [] notifier_call_chain+0x29/0x3e [] atomic_notifier_call_chain+0x32/0x60 [] :nf_conntrack:destroy_conntrack+0xa5/0x1d3 [] :nf_conntrack:nf_ct_cleanup+0x8c/0x12c [] :nf_conntrack:kill_l3proto+0x0/0x13 [] :nf_conntrack:nf_conntrack_l3proto_unregister+0x90/0x94 [] :nf_conntrack_ipv4:nf_conntrack_l3proto_ipv4_fini+0x2b/0x5d [] sys_delete_module+0x1b5/0x1e6 [] trace_hardirqs_on_thunk+0x35/0x37 [] system_call+0x7e/0x83 Since netlink_unicast is supposed to be callable from within RCU read side critical sections, make gfp_any() check for in_atomic() instead of in_softirq(). Additionally nfnetlink_send needs to use gfp_any() as well for the call to netlink_broadcast). Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/net/sock.h | 2 +- net/netfilter/nfnetlink.c | 3 +-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/include/net/sock.h b/include/net/sock.h index 03684e702d1..2c7d60ca354 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1278,7 +1278,7 @@ static inline int sock_writeable(const struct sock *sk) static inline gfp_t gfp_any(void) { - return in_softirq() ? GFP_ATOMIC : GFP_KERNEL; + return in_atomic() ? GFP_ATOMIC : GFP_KERNEL; } static inline long sock_rcvtimeo(const struct sock *sk, int noblock) diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c index 11d504d0ac7..bf23e489e4c 100644 --- a/net/netfilter/nfnetlink.c +++ b/net/netfilter/nfnetlink.c @@ -197,13 +197,12 @@ EXPORT_SYMBOL_GPL(nfnetlink_has_listeners); int nfnetlink_send(struct sk_buff *skb, u32 pid, unsigned group, int echo) { - gfp_t allocation = in_interrupt() ? GFP_ATOMIC : GFP_KERNEL; int err = 0; NETLINK_CB(skb).dst_group = group; if (echo) atomic_inc(&skb->users); - netlink_broadcast(nfnl, skb, pid, group, allocation); + netlink_broadcast(nfnl, skb, pid, group, gfp_any()); if (echo) err = netlink_unicast(nfnl, skb, pid, MSG_DONTWAIT); -- cgit v1.2.3 From b08d5840d2c5a6ac0bce172f4c861974d718e34b Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Tue, 27 Feb 2007 09:57:37 -0800 Subject: [NET]: Fix kfree(skb) Signed-off-by: Patrick McHardy Acked-by: Paul Moore Signed-off-by: David S. Miller --- net/core/skbuff.c | 2 +- net/dccp/output.c | 2 +- net/netlabel/netlabel_unlabeled.c | 2 +- net/xfrm/xfrm_user.c | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/net/core/skbuff.c b/net/core/skbuff.c index f89ff151cfa..820761f9eee 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -2037,7 +2037,7 @@ struct sk_buff *skb_segment(struct sk_buff *skb, int features) err: while ((skb = segs)) { segs = skb->next; - kfree(skb); + kfree_skb(skb); } return ERR_PTR(err); } diff --git a/net/dccp/output.c b/net/dccp/output.c index f5c6aca1dfa..3282f2f2291 100644 --- a/net/dccp/output.c +++ b/net/dccp/output.c @@ -269,7 +269,7 @@ void dccp_write_xmit(struct sock *sk, int block) err); } else { dccp_pr_debug("packet discarded\n"); - kfree(skb); + kfree_skb(skb); } } } diff --git a/net/netlabel/netlabel_unlabeled.c b/net/netlabel/netlabel_unlabeled.c index 5bc37181662..b931edee4b8 100644 --- a/net/netlabel/netlabel_unlabeled.c +++ b/net/netlabel/netlabel_unlabeled.c @@ -173,7 +173,7 @@ static int netlbl_unlabel_list(struct sk_buff *skb, struct genl_info *info) return 0; list_failure: - kfree(ans_skb); + kfree_skb(ans_skb); return ret_val; } diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index 924a2fefcd9..956cfe0ff7f 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -1401,7 +1401,7 @@ static int xfrm_get_ae(struct sk_buff *skb, struct nlmsghdr *nlh, x = xfrm_state_lookup(&id->daddr, id->spi, id->proto, id->family); if (x == NULL) { - kfree(r_skb); + kfree_skb(r_skb); return -ESRCH; } -- cgit v1.2.3 From 71599cd1c381d1b5f58c35653ac1d3627c6276db Mon Sep 17 00:00:00 2001 From: John Heffner Date: Tue, 27 Feb 2007 10:03:56 -0800 Subject: [TCP]: Document several sysctls. This adds documentation for tcp_moderate_rcvbuf, tcp_no_metrics_save, tcp_base_mss, and tcp_mtu_probing. Signed-off-by: John Heffner Signed-off-by: David S. Miller --- Documentation/networking/ip-sysctl.txt | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt index a0f6842368c..d3aae1f9b4c 100644 --- a/Documentation/networking/ip-sysctl.txt +++ b/Documentation/networking/ip-sysctl.txt @@ -147,6 +147,11 @@ tcp_available_congestion_control - STRING More congestion control algorithms may be available as modules, but not loaded. +tcp_base_mss - INTEGER + The initial value of search_low to be used by Packetization Layer + Path MTU Discovery (MTU probing). If MTU probing is enabled, + this is the inital MSS used by the connection. + tcp_congestion_control - STRING Set the congestion control algorithm to be used for new connections. The algorithm "reno" is always available, but @@ -243,6 +248,27 @@ tcp_mem - vector of 3 INTEGERs: min, pressure, max Defaults are calculated at boot time from amount of available memory. +tcp_moderate_rcvbuf - BOOLEAN + If set, TCP performs receive buffer autotuning, attempting to + automatically size the buffer (no greater than tcp_rmem[2]) to + match the size required by the path for full throughput. Enabled by + default. + +tcp_mtu_probing - INTEGER + Controls TCP Packetization-Layer Path MTU Discovery. Takes three + values: + 0 - Disabled + 1 - Disabled by default, enabled when an ICMP black hole detected + 2 - Always enabled, use initial MSS of tcp_base_mss. + +tcp_no_metrics_save - BOOLEAN + By default, TCP saves various connection metrics in the route cache + when the connection closes, so that connections established in the + near future can use these to set initial conditions. Usually, this + increases overall performance, but may sometimes cause performance + degredation. If set, TCP will not cache metrics on closing + connections. + tcp_orphan_retries - INTEGER How may times to retry before killing TCP connection, closed by our side. Default value 7 corresponds to ~50sec-16min -- cgit v1.2.3 From a9948a7e15015e7f2cb602190322b8ebb00c54c8 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 28 Feb 2007 11:05:56 -0800 Subject: [TCP]: Fix minisock tcp_create_openreq_child() typo. On 2/28/07, KOVACS Krisztian wrote: > > Hi, > > While reading TCP minisock code I've found this suspiciously looking > code fragment: > > - 8< - > struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req, struct sk_buff *skb) > { > struct sock *newsk = inet_csk_clone(sk, req, GFP_ATOMIC); > > if (newsk != NULL) { > const struct inet_request_sock *ireq = inet_rsk(req); > struct tcp_request_sock *treq = tcp_rsk(req); > struct inet_connection_sock *newicsk = inet_csk(sk); > struct tcp_sock *newtp; > - 8< - > > The above code initializes newicsk to inet_csk(sk), isn't that supposed > to be inet_csk(newsk)? As far as I can tell this might leave > icsk_ack.last_seg_size zero even if we do have received data. Good catch! David, please apply the attached patch. Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- net/ipv4/tcp_minisocks.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 30b1e520ad9..6b5c64f3c92 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -381,7 +381,7 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req, if (newsk != NULL) { const struct inet_request_sock *ireq = inet_rsk(req); struct tcp_request_sock *treq = tcp_rsk(req); - struct inet_connection_sock *newicsk = inet_csk(sk); + struct inet_connection_sock *newicsk = inet_csk(newsk); struct tcp_sock *newtp; /* Now setup tcp_sock */ -- cgit v1.2.3