From 82524746c27fa418c250a56dd7606b9d3fc79826 Mon Sep 17 00:00:00 2001 From: Franck Bui-Huu Date: Mon, 12 May 2008 21:21:05 +0200 Subject: rcu: split list.h and move rcu-protected lists into rculist.h Move rcu-protected lists from list.h into a new header file rculist.h. This is done because list are a very used primitive structure all over the kernel and it's currently impossible to include other header files in this list.h without creating some circular dependencies. For example, list.h implements rcu-protected list and uses rcu_dereference() without including rcupdate.h. It actually compiles because users of rcu_dereference() are macros. Others RCU functions could be used too but aren't probably because of this. Therefore this patch creates rculist.h which includes rcupdates without to many changes/troubles. Signed-off-by: Franck Bui-Huu Acked-by: Paul E. McKenney Acked-by: Josh Triplett Signed-off-by: Andrew Morton Signed-off-by: Ingo Molnar --- net/802/psnap.c | 1 + net/8021q/vlan.c | 1 + net/bridge/br_fdb.c | 1 + net/bridge/br_stp.c | 1 + net/netlabel/netlabel_domainhash.c | 3 +-- 5 files changed, 5 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/802/psnap.c b/net/802/psnap.c index 31128cb92a2..ea464393144 100644 --- a/net/802/psnap.c +++ b/net/802/psnap.c @@ -20,6 +20,7 @@ #include #include #include +#include static LIST_HEAD(snap_list); static DEFINE_SPINLOCK(snap_lock); diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c index 2a739adaa92..e7ddbfa0e02 100644 --- a/net/8021q/vlan.c +++ b/net/8021q/vlan.c @@ -27,6 +27,7 @@ #include #include #include +#include #include #include #include diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c index 72c5976a5ce..142060f0205 100644 --- a/net/bridge/br_fdb.c +++ b/net/bridge/br_fdb.c @@ -15,6 +15,7 @@ #include #include +#include #include #include #include diff --git a/net/bridge/br_stp.c b/net/bridge/br_stp.c index e38034aa56f..9e96ffcd29a 100644 --- a/net/bridge/br_stp.c +++ b/net/bridge/br_stp.c @@ -13,6 +13,7 @@ * 2 of the License, or (at your option) any later version. */ #include +#include #include "br_private.h" #include "br_private_stp.h" diff --git a/net/netlabel/netlabel_domainhash.c b/net/netlabel/netlabel_domainhash.c index 02c2f7c0b25..643c032a3a5 100644 --- a/net/netlabel/netlabel_domainhash.c +++ b/net/netlabel/netlabel_domainhash.c @@ -30,8 +30,7 @@ */ #include -#include -#include +#include #include #include #include -- cgit v1.2.3 From 711bbdd659b685b45d3f28b29a00f17be6484f38 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sat, 17 May 2008 08:26:25 +0200 Subject: rculist.h: fix include in net/netfilter/nf_conntrack_netlink.c this file has rculist dependency but did not explicitly include it, which broke the build. Signed-off-by: Ingo Molnar --- net/netfilter/nf_conntrack_netlink.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index 0edefcfc594..077bcd22879 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include #include -- cgit v1.2.3 From 2ba4cc319ab26c56205d4f23724c4748a553c845 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 13 May 2008 15:37:05 +0200 Subject: rcu: fix nf_conntrack_helper.c build bug Signed-off-by: Ingo Molnar --- net/netfilter/nf_conntrack_helper.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c index 7d1b1170374..8e0b4c8f62a 100644 --- a/net/netfilter/nf_conntrack_helper.c +++ b/net/netfilter/nf_conntrack_helper.c @@ -20,6 +20,7 @@ #include #include #include +#include #include #include -- cgit v1.2.3 From 962cf36c5bf6d2840b8d66ee9a606fae2f540bbd Mon Sep 17 00:00:00 2001 From: "Carlos R. Mafra" Date: Thu, 15 May 2008 11:15:37 -0300 Subject: Remove argument from open_softirq which is always NULL As git-grep shows, open_softirq() is always called with the last argument being NULL block/blk-core.c: open_softirq(BLOCK_SOFTIRQ, blk_done_softirq, NULL); kernel/hrtimer.c: open_softirq(HRTIMER_SOFTIRQ, run_hrtimer_softirq, NULL); kernel/rcuclassic.c: open_softirq(RCU_SOFTIRQ, rcu_process_callbacks, NULL); kernel/rcupreempt.c: open_softirq(RCU_SOFTIRQ, rcu_process_callbacks, NULL); kernel/sched.c: open_softirq(SCHED_SOFTIRQ, run_rebalance_domains, NULL); kernel/softirq.c: open_softirq(TASKLET_SOFTIRQ, tasklet_action, NULL); kernel/softirq.c: open_softirq(HI_SOFTIRQ, tasklet_hi_action, NULL); kernel/timer.c: open_softirq(TIMER_SOFTIRQ, run_timer_softirq, NULL); net/core/dev.c: open_softirq(NET_TX_SOFTIRQ, net_tx_action, NULL); net/core/dev.c: open_softirq(NET_RX_SOFTIRQ, net_rx_action, NULL); This observation has already been made by Matthew Wilcox in June 2002 (http://www.cs.helsinki.fi/linux/linux-kernel/2002-25/0687.html) "I notice that none of the current softirq routines use the data element passed to them." and the situation hasn't changed since them. So it appears we can safely remove that extra argument to save 128 (54) bytes of kernel data (text). Signed-off-by: Carlos R. Mafra Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- net/core/dev.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index 58296307787..cf0e16731dc 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4563,8 +4563,8 @@ static int __init net_dev_init(void) dev_boot_phase = 0; - open_softirq(NET_TX_SOFTIRQ, net_tx_action, NULL); - open_softirq(NET_RX_SOFTIRQ, net_rx_action, NULL); + open_softirq(NET_TX_SOFTIRQ, net_tx_action); + open_softirq(NET_RX_SOFTIRQ, net_rx_action); hotcpu_notifier(dev_cpu_callback, 0); dst_init(); -- cgit v1.2.3 From 6079a463cf95fafcc704a4e5e92a4da12444bd3c Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Tue, 27 May 2008 06:22:38 -0700 Subject: dccp: Fix to handle short sequence numbers packet correctly RFC4340 said: 8.5. Pseudocode ... If P.type is not Data, Ack, or DataAck and P.X == 0 (the packet has short sequence numbers), drop packet and return But DCCP has some mistake to handle short sequence numbers packet, now it drop packet only if P.type is Data, Ack, or DataAck and P.X == 0. Signed-off-by: Wei Yongjun Acked-by: Gerrit Renker Acked-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- net/dccp/ipv4.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index b348dd70c68..c22a3780c14 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -739,8 +739,8 @@ int dccp_invalid_packet(struct sk_buff *skb) * If P.type is not Data, Ack, or DataAck and P.X == 0 (the packet * has short sequence numbers), drop packet and return */ - if (dh->dccph_type >= DCCP_PKT_DATA && - dh->dccph_type <= DCCP_PKT_DATAACK && dh->dccph_x == 0) { + if ((dh->dccph_type < DCCP_PKT_DATA || + dh->dccph_type > DCCP_PKT_DATAACK) && dh->dccph_x == 0) { DCCP_WARN("P.type (%s) not Data || [Data]Ack, while P.X == 0\n", dccp_packet_name(dh->dccph_type)); return 1; -- cgit v1.2.3 From 825de27d9e40b3117b29a79d412b7a4b78c5d815 Mon Sep 17 00:00:00 2001 From: Gerrit Renker Date: Tue, 27 May 2008 06:33:54 -0700 Subject: dccp ccid-3: Fix "t_ipi explosion" bug The identification of this bug is thanks to Cheng Wei and Tomasz Grobelny. To avoid divide-by-zero, the implementation previously ignored RTTs smaller than 4 microseconds when performing integer division RTT/4. When the RTT reached a value less than 4 microseconds (as observed on loopback), this prevented the Window Counter CCVal value from advancing. As a result, the receiver stopped sending feedback. This in turn caused non-ending expiries of the nofeedback timer at the sender, so that the sending rate was progressively reduced until reaching the minimum of one packet per 64 seconds. The patch fixes this bug by handling integer division more intelligently. Due to consistent use of dccp_sample_rtt(), divide-by-zero-RTT is avoided. Signed-off-by: Gerrit Renker Signed-off-by: David S. Miller --- net/dccp/ccids/ccid3.c | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) (limited to 'net') diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c index cd61dea2eea..f813077234b 100644 --- a/net/dccp/ccids/ccid3.c +++ b/net/dccp/ccids/ccid3.c @@ -193,22 +193,17 @@ static inline void ccid3_hc_tx_update_s(struct ccid3_hc_tx_sock *hctx, int len) /* * Update Window Counter using the algorithm from [RFC 4342, 8.1]. - * The algorithm is not applicable if RTT < 4 microseconds. + * As elsewhere, RTT > 0 is assumed by using dccp_sample_rtt(). */ static inline void ccid3_hc_tx_update_win_count(struct ccid3_hc_tx_sock *hctx, ktime_t now) { - u32 quarter_rtts; - - if (unlikely(hctx->ccid3hctx_rtt < 4)) /* avoid divide-by-zero */ - return; - - quarter_rtts = ktime_us_delta(now, hctx->ccid3hctx_t_last_win_count); - quarter_rtts /= hctx->ccid3hctx_rtt / 4; + u32 delta = ktime_us_delta(now, hctx->ccid3hctx_t_last_win_count), + quarter_rtts = (4 * delta) / hctx->ccid3hctx_rtt; if (quarter_rtts > 0) { hctx->ccid3hctx_t_last_win_count = now; - hctx->ccid3hctx_last_win_count += min_t(u32, quarter_rtts, 5); + hctx->ccid3hctx_last_win_count += min(quarter_rtts, 5U); hctx->ccid3hctx_last_win_count &= 0xF; /* mod 16 */ } } -- cgit v1.2.3 From 679fda1aa49fddf938bb699df7867c01988371ab Mon Sep 17 00:00:00 2001 From: Nicolas Kaiser Date: Tue, 20 May 2008 18:42:54 +0200 Subject: net/mac80211: always true conditionals Correct always true conditionals. Signed-off-by: Nicolas Kaiser Signed-off-by: John W. Linville --- net/mac80211/cfg.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 699d97b8de5..a9fce4afdf2 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -672,7 +672,7 @@ static int ieee80211_add_station(struct wiphy *wiphy, struct net_device *dev, if (params->vlan) { sdata = IEEE80211_DEV_TO_SUB_IF(params->vlan); - if (sdata->vif.type != IEEE80211_IF_TYPE_VLAN || + if (sdata->vif.type != IEEE80211_IF_TYPE_VLAN && sdata->vif.type != IEEE80211_IF_TYPE_AP) return -EINVAL; } else @@ -760,7 +760,7 @@ static int ieee80211_change_station(struct wiphy *wiphy, if (params->vlan && params->vlan != sta->sdata->dev) { vlansdata = IEEE80211_DEV_TO_SUB_IF(params->vlan); - if (vlansdata->vif.type != IEEE80211_IF_TYPE_VLAN || + if (vlansdata->vif.type != IEEE80211_IF_TYPE_VLAN && vlansdata->vif.type != IEEE80211_IF_TYPE_AP) { rcu_read_unlock(); return -EINVAL; -- cgit v1.2.3 From 167ad6f7a2b2ae58dfaa46620b9b3212594f38e6 Mon Sep 17 00:00:00 2001 From: Tomas Winkler Date: Wed, 21 May 2008 18:17:05 +0300 Subject: mac80211: fix ieee80211_rx_bss_put/get imbalance This patch fixes iee80211_rx_bss_put/get imbalance introduced by 'mac80211: enable IBSS merging' patch. Signed-off-by: Tomas Winkler Signed-off-by: John W. Linville --- net/mac80211/mlme.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 7cfd12e0d1e..0ef5993e785 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -2479,8 +2479,6 @@ static int ieee80211_sta_join_ibss(struct net_device *dev, ifsta->state = IEEE80211_IBSS_JOINED; mod_timer(&ifsta->timer, jiffies + IEEE80211_IBSS_MERGE_INTERVAL); - ieee80211_rx_bss_put(dev, bss); - return res; } @@ -3523,6 +3521,7 @@ static int ieee80211_sta_create_ibss(struct net_device *dev, struct ieee80211_supported_band *sband; u8 bssid[ETH_ALEN], *pos; int i; + int ret; DECLARE_MAC_BUF(mac); #if 0 @@ -3567,7 +3566,9 @@ static int ieee80211_sta_create_ibss(struct net_device *dev, *pos++ = (u8) (rate / 5); } - return ieee80211_sta_join_ibss(dev, ifsta, bss); + ret = ieee80211_sta_join_ibss(dev, ifsta, bss); + ieee80211_rx_bss_put(dev, bss); + return ret; } @@ -3615,10 +3616,13 @@ static int ieee80211_sta_find_ibss(struct net_device *dev, (bss = ieee80211_rx_bss_get(dev, bssid, local->hw.conf.channel->center_freq, ifsta->ssid, ifsta->ssid_len))) { + int ret; printk(KERN_DEBUG "%s: Selected IBSS BSSID %s" " based on configured SSID\n", dev->name, print_mac(mac, bssid)); - return ieee80211_sta_join_ibss(dev, ifsta, bss); + ret = ieee80211_sta_join_ibss(dev, ifsta, bss); + ieee80211_rx_bss_put(dev, bss); + return ret; } #ifdef CONFIG_MAC80211_IBSS_DEBUG printk(KERN_DEBUG " did not try to join ibss\n"); -- cgit v1.2.3 From 9381be059bf5831d259e8735005cfa35b7488543 Mon Sep 17 00:00:00 2001 From: Tomas Winkler Date: Fri, 23 May 2008 01:36:36 +0300 Subject: mac80211: reorder channel and freq reporting in wext scan report This patch switch order of channel and freq (SIOCGIWFREQ) reports in scan results in order to overcome wpa_supplicant inability to handle channel numbers in 5.2Ghz band. Wext reporting channel number is ambiguous as channels 7-12 (802.11j) exist on both bands. Signed-off-by: Tomas Winkler Signed-off-by: Emmanuel Grumbach Acked-by: Dan Williams Signed-off-by: John W. Linville --- net/mac80211/mlme.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 0ef5993e785..c29927c4977 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -4099,18 +4099,17 @@ ieee80211_sta_scan_result(struct net_device *dev, memset(&iwe, 0, sizeof(iwe)); iwe.cmd = SIOCGIWFREQ; - iwe.u.freq.m = bss->freq; - iwe.u.freq.e = 6; + iwe.u.freq.m = ieee80211_frequency_to_channel(bss->freq); + iwe.u.freq.e = 0; current_ev = iwe_stream_add_event(current_ev, end_buf, &iwe, IW_EV_FREQ_LEN); memset(&iwe, 0, sizeof(iwe)); iwe.cmd = SIOCGIWFREQ; - iwe.u.freq.m = ieee80211_frequency_to_channel(bss->freq); - iwe.u.freq.e = 0; + iwe.u.freq.m = bss->freq; + iwe.u.freq.e = 6; current_ev = iwe_stream_add_event(current_ev, end_buf, &iwe, IW_EV_FREQ_LEN); - memset(&iwe, 0, sizeof(iwe)); iwe.cmd = IWEVQUAL; iwe.u.qual.qual = bss->signal; -- cgit v1.2.3 From d4231ca3e162387a2b6964dacaa83604e065c4e9 Mon Sep 17 00:00:00 2001 From: Abhijeet Kolekar Date: Fri, 23 May 2008 10:15:26 -0700 Subject: mac80211 : Fixes the status message for iwconfig iwconfig was showing incorrect status messages when disassociated. Patch fixes this by always checking for association status in ioctl calls for getting ap address. Signed-off-by: Abhijeet Kolekar Acked-by: Dan Williams Signed-off-by: John W. Linville --- net/mac80211/wext.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/mac80211/wext.c b/net/mac80211/wext.c index 457ebf9e85a..8311bb24f9f 100644 --- a/net/mac80211/wext.c +++ b/net/mac80211/wext.c @@ -489,9 +489,14 @@ static int ieee80211_ioctl_giwap(struct net_device *dev, sdata = IEEE80211_DEV_TO_SUB_IF(dev); if (sdata->vif.type == IEEE80211_IF_TYPE_STA || sdata->vif.type == IEEE80211_IF_TYPE_IBSS) { - ap_addr->sa_family = ARPHRD_ETHER; - memcpy(&ap_addr->sa_data, sdata->u.sta.bssid, ETH_ALEN); - return 0; + if (sdata->u.sta.state == IEEE80211_ASSOCIATED) { + ap_addr->sa_family = ARPHRD_ETHER; + memcpy(&ap_addr->sa_data, sdata->u.sta.bssid, ETH_ALEN); + return 0; + } else { + memset(&ap_addr->sa_data, 0, ETH_ALEN); + return 0; + } } else if (sdata->vif.type == IEEE80211_IF_TYPE_WDS) { ap_addr->sa_family = ARPHRD_ETHER; memcpy(&ap_addr->sa_data, sdata->u.wds.remote_addr, ETH_ALEN); -- cgit v1.2.3 From f6d97104890203ba9c2cf8e34894c4c8e64cb880 Mon Sep 17 00:00:00 2001 From: Yi Zhu Date: Tue, 27 May 2008 17:50:50 +0300 Subject: mac80211: fix a typo in ieee80211_handle_filtered_frame comment fix a typo in ieee80211_handle_filtered_frame comment Signed-off-by: Yi Zhu Signed-off-by: John W. Linville --- net/mac80211/main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/mac80211/main.c b/net/mac80211/main.c index 915afadb060..5c876450b14 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -1313,7 +1313,7 @@ static void ieee80211_handle_filtered_frame(struct ieee80211_local *local, /* * Clear the TX filter mask for this STA when sending the next * packet. If the STA went to power save mode, this will happen - * happen when it wakes up for the next time. + * when it wakes up for the next time. */ sta->flags |= WLAN_STA_CLEAR_PS_FILT; -- cgit v1.2.3 From 70d251b24c44ab2fcba1807a5206e844cf10eb38 Mon Sep 17 00:00:00 2001 From: Senthil Balasubramanian Date: Wed, 28 May 2008 20:08:12 +0530 Subject: mac80211: Fix for NULL pointer dereference in sta_info_get() This addresses a NULL pointer dereference in sta_info_get(). TID and sta_info are extracted in ADDBA Timer expiry function through the timer handler's argument. The problem is extracging the TID (which was stored in timer_to_tid[] array of type "u8") through "int *" typecast which may also yield unwanted bytes for the MSB of TID that results in incorrect sta_info and ieee80211_local pointers. ieee80211_local pointer is NULL as illustrated below, it crashes in sta_info_get(). The problem started when extracting ieee80211_local pointer out of sta_info iteself and eventually crashed in stat_info_get(). The proper way to fix is to change the data type of TID to u8 instead of u16. However changing all the occurences requires some prototype changes as well. We should fix this in upcoming patches. Signed-off-by: Senthil Balasubramanian Signed-off-by: Luis Rodriguez Signed-off-by: John W. Linville --- net/mac80211/mlme.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index c29927c4977..33a356e7b66 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -1614,7 +1614,7 @@ void sta_addba_resp_timer_expired(unsigned long data) * only one argument, and both sta_info and TID are needed, so init * flow in sta_info_create gives the TID as data, while the timer_to_id * array gives the sta through container_of */ - u16 tid = *(int *)data; + u16 tid = *(u8 *)data; struct sta_info *temp_sta = container_of((void *)data, struct sta_info, timer_to_tid[tid]); @@ -1662,7 +1662,7 @@ timer_expired_exit: void sta_rx_agg_session_timer_expired(unsigned long data) { /* not an elegant detour, but there is no choice as the timer passes - * only one argument, and verious sta_info are needed here, so init + * only one argument, and various sta_info are needed here, so init * flow in sta_info_create gives the TID as data, while the timer_to_id * array gives the sta through container_of */ u8 *ptid = (u8 *)data; -- cgit v1.2.3 From c97c23e38625f59e3e9869664eeeb0cab1822948 Mon Sep 17 00:00:00 2001 From: Senthil Balasubramanian Date: Wed, 28 May 2008 23:15:32 +0530 Subject: mac80211: fix alignment issue with compare_ether_addr() This addresses an alignment issue with compare_ether_addr(). The addresses passed to compare_ether_addr should be two bytes aligned. It may function properly in x86 platform. However may not work properly on IA-64 or ARM processor. This also fixes a typo in mlme.c where the sk_buff struct name is incorect. Though sizeof() works for any incorrect structure pointer name as its just a pointer length that we want, lets just fix it. Signed-off-by: Senthil Balasubramanian Signed-off-by: Luis R. Rodriguez Signed-off-by: John W. Linville --- net/mac80211/mlme.c | 4 ++-- net/mac80211/rx.c | 4 ++-- net/mac80211/util.c | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 33a356e7b66..841278f1df8 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -1325,7 +1325,7 @@ static void ieee80211_sta_process_addba_request(struct net_device *dev, /* prepare reordering buffer */ tid_agg_rx->reorder_buf = - kmalloc(buf_size * sizeof(struct sk_buf *), GFP_ATOMIC); + kmalloc(buf_size * sizeof(struct sk_buff *), GFP_ATOMIC); if (!tid_agg_rx->reorder_buf) { if (net_ratelimit()) printk(KERN_ERR "can not allocate reordering buffer " @@ -1334,7 +1334,7 @@ static void ieee80211_sta_process_addba_request(struct net_device *dev, goto end; } memset(tid_agg_rx->reorder_buf, 0, - buf_size * sizeof(struct sk_buf *)); + buf_size * sizeof(struct sk_buff *)); if (local->ops->ampdu_action) ret = local->ops->ampdu_action(hw, IEEE80211_AMPDU_RX_START, diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 1958bfb361c..0941e5d6a52 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -1091,7 +1091,7 @@ ieee80211_data_to_8023(struct ieee80211_rx_data *rx) u16 fc, hdrlen, ethertype; u8 *payload; u8 dst[ETH_ALEN]; - u8 src[ETH_ALEN]; + u8 src[ETH_ALEN] __aligned(2); struct sk_buff *skb = rx->skb; struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); DECLARE_MAC_BUF(mac); @@ -1234,7 +1234,7 @@ ieee80211_data_to_8023(struct ieee80211_rx_data *rx) */ static bool ieee80211_frame_allowed(struct ieee80211_rx_data *rx) { - static const u8 pae_group_addr[ETH_ALEN] + static const u8 pae_group_addr[ETH_ALEN] __aligned(2) = { 0x01, 0x80, 0xC2, 0x00, 0x00, 0x03 }; struct ethhdr *ehdr = (struct ethhdr *) rx->skb->data; diff --git a/net/mac80211/util.c b/net/mac80211/util.c index 131e9e6c8a3..4e97b266f90 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -34,11 +34,11 @@ void *mac80211_wiphy_privid = &mac80211_wiphy_privid; /* See IEEE 802.1H for LLC/SNAP encapsulation/decapsulation */ /* Ethernet-II snap header (RFC1042 for most EtherTypes) */ -const unsigned char rfc1042_header[] = +const unsigned char rfc1042_header[] __aligned(2) = { 0xaa, 0xaa, 0x03, 0x00, 0x00, 0x00 }; /* Bridge-Tunnel header (for EtherTypes ETH_P_AARP and ETH_P_IPX) */ -const unsigned char bridge_tunnel_header[] = +const unsigned char bridge_tunnel_header[] __aligned(2) = { 0xaa, 0xaa, 0x03, 0x00, 0x00, 0xf8 }; -- cgit v1.2.3 From 4c8411f8c115def968820a4df6658ccfd55d7f1a Mon Sep 17 00:00:00 2001 From: Arjan van de Ven Date: Thu, 29 May 2008 01:32:47 -0700 Subject: bluetooth: fix locking bug in the rfcomm socket cleanup handling in net/bluetooth/rfcomm/sock.c, rfcomm_sk_state_change() does the following operation: if (parent && sock_flag(sk, SOCK_ZAPPED)) { /* We have to drop DLC lock here, otherwise * rfcomm_sock_destruct() will dead lock. */ rfcomm_dlc_unlock(d); rfcomm_sock_kill(sk); rfcomm_dlc_lock(d); } } which is fine, since rfcomm_sock_kill() will call sk_free() which will call rfcomm_sock_destruct() which takes the rfcomm_dlc_lock()... so far so good. HOWEVER, this assumes that the rfcomm_sk_state_change() function always gets called with the rfcomm_dlc_lock() taken. This is the case for all but one case, and in that case where we don't have the lock, we do a double unlock followed by an attempt to take the lock, which due to underflow isn't going anywhere fast. This patch fixes this by moving the stragling case inside the lock, like the other usages of the same call are doing in this code. This was found with the help of the www.kerneloops.org project, where this deadlock was observed 51 times at this point in time: http://www.kerneloops.org/search.php?search=rfcomm_sock_destruct Signed-off-by: Arjan van de Ven Acked-by: Marcel Holtmann Signed-off-by: David S. Miller --- net/bluetooth/rfcomm/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/bluetooth/rfcomm/core.c b/net/bluetooth/rfcomm/core.c index eb62558e9b0..0c2c93735e9 100644 --- a/net/bluetooth/rfcomm/core.c +++ b/net/bluetooth/rfcomm/core.c @@ -423,8 +423,8 @@ static int __rfcomm_dlc_close(struct rfcomm_dlc *d, int err) rfcomm_dlc_lock(d); d->state = BT_CLOSED; - rfcomm_dlc_unlock(d); d->state_change(d, err); + rfcomm_dlc_unlock(d); skb_queue_purge(&d->tx_queue); rfcomm_dlc_unlink(d); -- cgit v1.2.3 From 12293bf91126ad253a25e2840b307fdc7c2754c3 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Thu, 29 May 2008 03:19:37 -0700 Subject: netfilter: nf_conntrack_expect: fix error path unwind in nf_conntrack_expect_init() Signed-off-by: Alexey Dobriyan Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/netfilter/nf_conntrack_expect.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c index e31beeb33b2..e8f0dead267 100644 --- a/net/netfilter/nf_conntrack_expect.c +++ b/net/netfilter/nf_conntrack_expect.c @@ -587,10 +587,10 @@ int __init nf_conntrack_expect_init(void) return 0; err3: + kmem_cache_destroy(nf_ct_expect_cachep); +err2: nf_ct_free_hashtable(nf_ct_expect_hash, nf_ct_expect_vmalloc, nf_ct_expect_hsize); -err2: - kmem_cache_destroy(nf_ct_expect_cachep); err1: return err; } -- cgit v1.2.3 From 3446b9d57edd0b96a89715fef222879e4919a115 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 30 May 2008 02:57:29 -0700 Subject: llc: Fix double accounting of received packets llc_sap_rcv was being preceded by skb_set_owner_r, then calling llc_state_process that calls sock_queue_rcv_skb, that in turn calls skb_set_owner_r again making the space allowed to be used by the socket to be leaked, making the socket to get stuck. Fix it by setting skb->sk at llc_sap_rcv and leave the accounting to be done only at sock_queue_rcv_skb. Reported-by: Dmitry Petukhov Tested-by: Dmitry Petukhov Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- net/llc/llc_sap.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/llc/llc_sap.c b/net/llc/llc_sap.c index e2ddde75501..008de1fc42c 100644 --- a/net/llc/llc_sap.c +++ b/net/llc/llc_sap.c @@ -286,12 +286,14 @@ void llc_build_and_send_xid_pkt(struct llc_sap *sap, struct sk_buff *skb, * * Sends received pdus to the sap state machine. */ -static void llc_sap_rcv(struct llc_sap *sap, struct sk_buff *skb) +static void llc_sap_rcv(struct llc_sap *sap, struct sk_buff *skb, + struct sock *sk) { struct llc_sap_state_ev *ev = llc_sap_ev(skb); ev->type = LLC_SAP_EV_TYPE_PDU; ev->reason = 0; + skb->sk = sk; llc_sap_state_process(sap, skb); } @@ -360,8 +362,7 @@ static void llc_sap_mcast(struct llc_sap *sap, break; sock_hold(sk); - skb_set_owner_r(skb1, sk); - llc_sap_rcv(sap, skb1); + llc_sap_rcv(sap, skb1, sk); sock_put(sk); } read_unlock_bh(&sap->sk_list.lock); @@ -381,8 +382,7 @@ void llc_sap_handler(struct llc_sap *sap, struct sk_buff *skb) } else { struct sock *sk = llc_lookup_dgram(sap, &laddr); if (sk) { - skb_set_owner_r(skb, sk); - llc_sap_rcv(sap, skb); + llc_sap_rcv(sap, skb, sk); sock_put(sk); } else kfree_skb(skb); -- cgit v1.2.3 From 537d59af73d894750cff14f90fe2b6d77fbab15b Mon Sep 17 00:00:00 2001 From: Dave Young Date: Sun, 1 Jun 2008 23:50:52 -0700 Subject: bluetooth: rfcomm_dev_state_change deadlock fix There's logic in __rfcomm_dlc_close: rfcomm_dlc_lock(d); d->state = BT_CLOSED; d->state_changed(d, err); rfcomm_dlc_unlock(d); In rfcomm_dev_state_change, it's possible that rfcomm_dev_put try to take the dlc lock, then we will deadlock. Here fixed it by unlock dlc before rfcomm_dev_get in rfcomm_dev_state_change. why not unlock just before rfcomm_dev_put? it's because there's another problem. rfcomm_dev_get/rfcomm_dev_del will take rfcomm_dev_lock, but in rfcomm_dev_add the lock order is : rfcomm_dev_lock --> dlc lock so I unlock dlc before the taken of rfcomm_dev_lock. Actually it's a regression caused by commit 1905f6c736cb618e07eca0c96e60e3c024023428 ("bluetooth : __rfcomm_dlc_close lock fix"), the dlc state_change could be two callbacks : rfcomm_sk_state_change and rfcomm_dev_state_change. I missed the rfcomm_sk_state_change that time. Thanks Arjan van de Ven for the effort in commit 4c8411f8c115def968820a4df6658ccfd55d7f1a ("bluetooth: fix locking bug in the rfcomm socket cleanup handling") but he missed the rfcomm_dev_state_change lock issue. Signed-off-by: Dave Young Acked-by: Marcel Holtmann Signed-off-by: David S. Miller --- net/bluetooth/rfcomm/tty.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/bluetooth/rfcomm/tty.c b/net/bluetooth/rfcomm/tty.c index c3f749abb2d..c9191871c1e 100644 --- a/net/bluetooth/rfcomm/tty.c +++ b/net/bluetooth/rfcomm/tty.c @@ -566,11 +566,22 @@ static void rfcomm_dev_state_change(struct rfcomm_dlc *dlc, int err) if (dlc->state == BT_CLOSED) { if (!dev->tty) { if (test_bit(RFCOMM_RELEASE_ONHUP, &dev->flags)) { - if (rfcomm_dev_get(dev->id) == NULL) + /* Drop DLC lock here to avoid deadlock + * 1. rfcomm_dev_get will take rfcomm_dev_lock + * but in rfcomm_dev_add there's lock order: + * rfcomm_dev_lock -> dlc lock + * 2. rfcomm_dev_put will deadlock if it's + * the last reference + */ + rfcomm_dlc_unlock(dlc); + if (rfcomm_dev_get(dev->id) == NULL) { + rfcomm_dlc_lock(dlc); return; + } rfcomm_dev_del(dev); rfcomm_dev_put(dev); + rfcomm_dlc_lock(dlc); } } else tty_hangup(dev->tty); -- cgit v1.2.3 From 7dccf1f4e1696c79bff064c3770867cc53cbc71c Mon Sep 17 00:00:00 2001 From: Jarek Poplawski Date: Tue, 3 Jun 2008 14:53:46 -0700 Subject: ax25: Fix NULL pointer dereference and lockup. From: Jarek Poplawski There is only one function in AX25 calling skb_append(), and it really looks suspicious: appends skb after previously enqueued one, but in the meantime this previous skb could be removed from the queue. This patch Fixes it the simple way, so this is not fully compatible with the current method, but testing hasn't shown any problems. Signed-off-by: Ralf Baechle Signed-off-by: David S. Miller --- net/ax25/ax25_subr.c | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) (limited to 'net') diff --git a/net/ax25/ax25_subr.c b/net/ax25/ax25_subr.c index d8f21573317..034aa10a519 100644 --- a/net/ax25/ax25_subr.c +++ b/net/ax25/ax25_subr.c @@ -64,20 +64,15 @@ void ax25_frames_acked(ax25_cb *ax25, unsigned short nr) void ax25_requeue_frames(ax25_cb *ax25) { - struct sk_buff *skb, *skb_prev = NULL; + struct sk_buff *skb; /* * Requeue all the un-ack-ed frames on the output queue to be picked * up by ax25_kick called from the timer. This arrangement handles the * possibility of an empty output queue. */ - while ((skb = skb_dequeue(&ax25->ack_queue)) != NULL) { - if (skb_prev == NULL) - skb_queue_head(&ax25->write_queue, skb); - else - skb_append(skb_prev, skb, &ax25->write_queue); - skb_prev = skb; - } + while ((skb = skb_dequeue_tail(&ax25->ack_queue)) != NULL) + skb_queue_head(&ax25->write_queue, skb); } /* -- cgit v1.2.3 From 9ecad877948deb2871d29e03786a7d7911687009 Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Tue, 3 Jun 2008 15:18:36 -0700 Subject: irda: Sock leak on error path in irda_create. Bad type/protocol specified result in sk leak. Fix is simple - release the sk if bad values are given, but to make it possible just to call sk_free(), I move some sk initialization a bit lower. Signed-off-by: Pavel Emelyanov Signed-off-by: David S. Miller --- net/irda/af_irda.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/irda/af_irda.c b/net/irda/af_irda.c index ae54b20d047..3eb5bcc75f9 100644 --- a/net/irda/af_irda.c +++ b/net/irda/af_irda.c @@ -1093,11 +1093,6 @@ static int irda_create(struct net *net, struct socket *sock, int protocol) init_waitqueue_head(&self->query_wait); - /* Initialise networking socket struct */ - sock_init_data(sock, sk); /* Note : set sk->sk_refcnt to 1 */ - sk->sk_family = PF_IRDA; - sk->sk_protocol = protocol; - switch (sock->type) { case SOCK_STREAM: sock->ops = &irda_stream_ops; @@ -1124,13 +1119,20 @@ static int irda_create(struct net *net, struct socket *sock, int protocol) self->max_sdu_size_rx = TTP_SAR_UNBOUND; break; default: + sk_free(sk); return -ESOCKTNOSUPPORT; } break; default: + sk_free(sk); return -ESOCKTNOSUPPORT; } + /* Initialise networking socket struct */ + sock_init_data(sock, sk); /* Note : set sk->sk_refcnt to 1 */ + sk->sk_family = PF_IRDA; + sk->sk_protocol = protocol; + /* Register as a client with IrLMP */ self->ckey = irlmp_register_client(0, NULL, NULL, NULL); self->mask.word = 0xffff; -- cgit v1.2.3 From b9f5f52cca3e94f1e7509f366aa250ebbe1ed0b5 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Tue, 3 Jun 2008 16:03:15 -0700 Subject: net: neighbour table ABI problem The neighbor table time of last use information is returned in the incorrect unit. Kernel to user space ABI's need to use USER_HZ (or milliseconds), otherwise the application has to try and discover the real system HZ value which is problematic. Linux has standardized on keeping USER_HZ consistent (100hz) even when kernel is running internally at some other value. This change is small, but it breaks the ABI for older version of iproute2 utilities. But these utilities are already broken since they are looking at the psched_hz values which are completely different. So let's just go ahead and fix both kernel and user space. Older utilities will just print wrong values. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- net/core/neighbour.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 5d9d7130bd6..3896de79dfb 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -2057,9 +2057,9 @@ static int neigh_fill_info(struct sk_buff *skb, struct neighbour *neigh, goto nla_put_failure; } - ci.ndm_used = now - neigh->used; - ci.ndm_confirmed = now - neigh->confirmed; - ci.ndm_updated = now - neigh->updated; + ci.ndm_used = jiffies_to_clock_t(now - neigh->used); + ci.ndm_confirmed = jiffies_to_clock_t(now - neigh->confirmed); + ci.ndm_updated = jiffies_to_clock_t(now - neigh->updated); ci.ndm_refcnt = atomic_read(&neigh->refcnt) - 1; read_unlock_bh(&neigh->lock); -- cgit v1.2.3 From 7557af25155a82ac2dad73eec6b0166868bf8ea2 Mon Sep 17 00:00:00 2001 From: Brice Goglin Date: Tue, 3 Jun 2008 16:07:45 -0700 Subject: net_dma: remove duplicate assignment in dma_skb_copy_datagram_iovec No need to compute copy twice in the frags loop in dma_skb_copy_datagram_iovec(). Signed-off-by: Brice Goglin Acked-by: Shannon Nelson Signed-off-by: Maciej Sosnowski Signed-off-by: Dan Williams Signed-off-by: David S. Miller --- net/core/user_dma.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/core/user_dma.c b/net/core/user_dma.c index 0ad1cd57bc3..c77aff9c6eb 100644 --- a/net/core/user_dma.c +++ b/net/core/user_dma.c @@ -75,7 +75,7 @@ int dma_skb_copy_datagram_iovec(struct dma_chan *chan, end = start + skb_shinfo(skb)->frags[i].size; copy = end - offset; - if ((copy = end - offset) > 0) { + if (copy > 0) { skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; struct page *page = frag->page; -- cgit v1.2.3 From 51b77cae0d5aa8e1546fca855dcfe48ddfadfa9c Mon Sep 17 00:00:00 2001 From: Thomas Graf Date: Tue, 3 Jun 2008 16:36:01 -0700 Subject: route: Mark unused route cache flags as such. Also removes an obsolete check for the unused flag RTCF_MASQ. Signed-off-by: Thomas Graf Signed-off-by: David S. Miller --- net/ipv4/route.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv4/route.c b/net/ipv4/route.c index df41026b60d..96be336064f 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1792,7 +1792,7 @@ static int __mkroute_input(struct sk_buff *skb, if (err) flags |= RTCF_DIRECTSRC; - if (out_dev == in_dev && err && !(flags & RTCF_MASQ) && + if (out_dev == in_dev && err && (IN_DEV_SHARED_MEDIA(out_dev) || inet_addr_onlink(out_dev, saddr, FIB_RES_GW(*res)))) flags |= RTCF_DOREDIRECT; -- cgit v1.2.3 From 1f9d11c7c99da706e33646c3a9080dd5a8ef9a0b Mon Sep 17 00:00:00 2001 From: Thomas Graf Date: Tue, 3 Jun 2008 16:36:27 -0700 Subject: route: Mark unused routing attributes as such Also removes an unused policy entry for an attribute which is only used in kernel->user direction. Signed-off-by: Thomas Graf Signed-off-by: David S. Miller --- net/ipv4/fib_frontend.c | 1 - 1 file changed, 1 deletion(-) (limited to 'net') diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 0f1557a4ac7..0b2ac6a3d90 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -506,7 +506,6 @@ const struct nla_policy rtm_ipv4_policy[RTA_MAX+1] = { [RTA_PREFSRC] = { .type = NLA_U32 }, [RTA_METRICS] = { .type = NLA_NESTED }, [RTA_MULTIPATH] = { .len = sizeof(struct rtnexthop) }, - [RTA_PROTOINFO] = { .type = NLA_U32 }, [RTA_FLOW] = { .type = NLA_U32 }, }; -- cgit v1.2.3 From bc3ed28caaef55e7e3a9316464256353c5f9b1df Mon Sep 17 00:00:00 2001 From: Thomas Graf Date: Tue, 3 Jun 2008 16:36:54 -0700 Subject: netlink: Improve returned error codes Make nlmsg_trim(), nlmsg_cancel(), genlmsg_cancel(), and nla_nest_cancel() void functions. Return -EMSGSIZE instead of -1 if the provided message buffer is not big enough. Signed-off-by: Thomas Graf Signed-off-by: David S. Miller --- net/core/neighbour.c | 3 ++- net/core/rtnetlink.c | 3 ++- net/netlink/attr.c | 12 ++++++------ net/netlink/genetlink.c | 6 ++++-- net/sched/sch_dsmark.c | 6 ++++-- net/sched/sch_gred.c | 3 ++- net/sched/sch_hfsc.c | 2 +- net/sched/sch_red.c | 3 ++- net/wireless/nl80211.c | 12 ++++++++---- 9 files changed, 31 insertions(+), 19 deletions(-) (limited to 'net') diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 3896de79dfb..65f01f71b3f 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -1714,7 +1714,8 @@ static int neightbl_fill_parms(struct sk_buff *skb, struct neigh_parms *parms) return nla_nest_end(skb, nest); nla_put_failure: - return nla_nest_cancel(skb, nest); + nla_nest_cancel(skb, nest); + return -EMSGSIZE; } static int neightbl_fill_info(struct sk_buff *skb, struct neigh_table *tbl, diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index cf857c4dc7b..a9a77216310 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -498,7 +498,8 @@ int rtnetlink_put_metrics(struct sk_buff *skb, u32 *metrics) return nla_nest_end(skb, mx); nla_put_failure: - return nla_nest_cancel(skb, mx); + nla_nest_cancel(skb, mx); + return -EMSGSIZE; } int rtnl_put_cacheinfo(struct sk_buff *skb, struct dst_entry *dst, u32 id, diff --git a/net/netlink/attr.c b/net/netlink/attr.c index feb326f4a75..47bbf45ae5d 100644 --- a/net/netlink/attr.c +++ b/net/netlink/attr.c @@ -400,13 +400,13 @@ void __nla_put_nohdr(struct sk_buff *skb, int attrlen, const void *data) * @attrlen: length of attribute payload * @data: head of attribute payload * - * Returns -1 if the tailroom of the skb is insufficient to store + * Returns -EMSGSIZE if the tailroom of the skb is insufficient to store * the attribute header and payload. */ int nla_put(struct sk_buff *skb, int attrtype, int attrlen, const void *data) { if (unlikely(skb_tailroom(skb) < nla_total_size(attrlen))) - return -1; + return -EMSGSIZE; __nla_put(skb, attrtype, attrlen, data); return 0; @@ -418,13 +418,13 @@ int nla_put(struct sk_buff *skb, int attrtype, int attrlen, const void *data) * @attrlen: length of attribute payload * @data: head of attribute payload * - * Returns -1 if the tailroom of the skb is insufficient to store + * Returns -EMSGSIZE if the tailroom of the skb is insufficient to store * the attribute payload. */ int nla_put_nohdr(struct sk_buff *skb, int attrlen, const void *data) { if (unlikely(skb_tailroom(skb) < NLA_ALIGN(attrlen))) - return -1; + return -EMSGSIZE; __nla_put_nohdr(skb, attrlen, data); return 0; @@ -436,13 +436,13 @@ int nla_put_nohdr(struct sk_buff *skb, int attrlen, const void *data) * @attrlen: length of attribute payload * @data: head of attribute payload * - * Returns -1 if the tailroom of the skb is insufficient to store + * Returns -EMSGSIZE if the tailroom of the skb is insufficient to store * the attribute payload. */ int nla_append(struct sk_buff *skb, int attrlen, const void *data) { if (unlikely(skb_tailroom(skb) < NLA_ALIGN(attrlen))) - return -1; + return -EMSGSIZE; memcpy(skb_put(skb, attrlen), data, attrlen); return 0; diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c index d16929c9b4b..f5aa23c3e88 100644 --- a/net/netlink/genetlink.c +++ b/net/netlink/genetlink.c @@ -554,7 +554,8 @@ static int ctrl_fill_info(struct genl_family *family, u32 pid, u32 seq, return genlmsg_end(skb, hdr); nla_put_failure: - return genlmsg_cancel(skb, hdr); + genlmsg_cancel(skb, hdr); + return -EMSGSIZE; } static int ctrl_fill_mcgrp_info(struct genl_multicast_group *grp, u32 pid, @@ -590,7 +591,8 @@ static int ctrl_fill_mcgrp_info(struct genl_multicast_group *grp, u32 pid, return genlmsg_end(skb, hdr); nla_put_failure: - return genlmsg_cancel(skb, hdr); + genlmsg_cancel(skb, hdr); + return -EMSGSIZE; } static int ctrl_dumpfamily(struct sk_buff *skb, struct netlink_callback *cb) diff --git a/net/sched/sch_dsmark.c b/net/sched/sch_dsmark.c index 0df911fd67b..64465bacbe7 100644 --- a/net/sched/sch_dsmark.c +++ b/net/sched/sch_dsmark.c @@ -444,7 +444,8 @@ static int dsmark_dump_class(struct Qdisc *sch, unsigned long cl, return nla_nest_end(skb, opts); nla_put_failure: - return nla_nest_cancel(skb, opts); + nla_nest_cancel(skb, opts); + return -EMSGSIZE; } static int dsmark_dump(struct Qdisc *sch, struct sk_buff *skb) @@ -466,7 +467,8 @@ static int dsmark_dump(struct Qdisc *sch, struct sk_buff *skb) return nla_nest_end(skb, opts); nla_put_failure: - return nla_nest_cancel(skb, opts); + nla_nest_cancel(skb, opts); + return -EMSGSIZE; } static const struct Qdisc_class_ops dsmark_class_ops = { diff --git a/net/sched/sch_gred.c b/net/sched/sch_gred.c index 3a9d226ff1e..c89fba56db5 100644 --- a/net/sched/sch_gred.c +++ b/net/sched/sch_gred.c @@ -582,7 +582,8 @@ append_opt: return nla_nest_end(skb, opts); nla_put_failure: - return nla_nest_cancel(skb, opts); + nla_nest_cancel(skb, opts); + return -EMSGSIZE; } static void gred_destroy(struct Qdisc *sch) diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c index 87293d0db1d..fdfaa3fcc16 100644 --- a/net/sched/sch_hfsc.c +++ b/net/sched/sch_hfsc.c @@ -1360,7 +1360,7 @@ hfsc_dump_class(struct Qdisc *sch, unsigned long arg, struct sk_buff *skb, nla_put_failure: nla_nest_cancel(skb, nest); - return -1; + return -EMSGSIZE; } static int diff --git a/net/sched/sch_red.c b/net/sched/sch_red.c index 3dcd493f4f4..5c569853b9c 100644 --- a/net/sched/sch_red.c +++ b/net/sched/sch_red.c @@ -281,7 +281,8 @@ static int red_dump(struct Qdisc *sch, struct sk_buff *skb) return nla_nest_end(skb, opts); nla_put_failure: - return nla_nest_cancel(skb, opts); + nla_nest_cancel(skb, opts); + return -EMSGSIZE; } static int red_dump_stats(struct Qdisc *sch, struct gnet_dump *d) diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 2bdd4dddc0e..fb75f265b39 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -187,7 +187,8 @@ static int nl80211_send_wiphy(struct sk_buff *msg, u32 pid, u32 seq, int flags, return genlmsg_end(msg, hdr); nla_put_failure: - return genlmsg_cancel(msg, hdr); + genlmsg_cancel(msg, hdr); + return -EMSGSIZE; } static int nl80211_dump_wiphy(struct sk_buff *skb, struct netlink_callback *cb) @@ -273,7 +274,8 @@ static int nl80211_send_iface(struct sk_buff *msg, u32 pid, u32 seq, int flags, return genlmsg_end(msg, hdr); nla_put_failure: - return genlmsg_cancel(msg, hdr); + genlmsg_cancel(msg, hdr); + return -EMSGSIZE; } static int nl80211_dump_interface(struct sk_buff *skb, struct netlink_callback *cb) @@ -928,7 +930,8 @@ static int nl80211_send_station(struct sk_buff *msg, u32 pid, u32 seq, return genlmsg_end(msg, hdr); nla_put_failure: - return genlmsg_cancel(msg, hdr); + genlmsg_cancel(msg, hdr); + return -EMSGSIZE; } static int nl80211_dump_station(struct sk_buff *skb, @@ -1267,7 +1270,8 @@ static int nl80211_send_mpath(struct sk_buff *msg, u32 pid, u32 seq, return genlmsg_end(msg, hdr); nla_put_failure: - return genlmsg_cancel(msg, hdr); + genlmsg_cancel(msg, hdr); + return -EMSGSIZE; } static int nl80211_dump_mpath(struct sk_buff *skb, -- cgit v1.2.3 From ab32cd793dca21eec846a8204390d9594ed994d5 Mon Sep 17 00:00:00 2001 From: Thomas Graf Date: Tue, 3 Jun 2008 16:37:33 -0700 Subject: route: Remove unused ifa_anycast field The field was supposed to allow the creation of an anycast route by assigning an anycast address to an address prefix. It was never implemented so this field is unused and serves no purpose. Remove it. Signed-off-by: Thomas Graf Signed-off-by: David S. Miller --- net/ipv4/devinet.c | 9 --------- 1 file changed, 9 deletions(-) (limited to 'net') diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index 6848e4760f3..79a7ef6209f 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -90,7 +90,6 @@ static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = { [IFA_LOCAL] = { .type = NLA_U32 }, [IFA_ADDRESS] = { .type = NLA_U32 }, [IFA_BROADCAST] = { .type = NLA_U32 }, - [IFA_ANYCAST] = { .type = NLA_U32 }, [IFA_LABEL] = { .type = NLA_STRING, .len = IFNAMSIZ - 1 }, }; @@ -536,9 +535,6 @@ static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh) if (tb[IFA_BROADCAST]) ifa->ifa_broadcast = nla_get_be32(tb[IFA_BROADCAST]); - if (tb[IFA_ANYCAST]) - ifa->ifa_anycast = nla_get_be32(tb[IFA_ANYCAST]); - if (tb[IFA_LABEL]) nla_strlcpy(ifa->ifa_label, tb[IFA_LABEL], IFNAMSIZ); else @@ -745,7 +741,6 @@ int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg) break; inet_del_ifa(in_dev, ifap, 0); ifa->ifa_broadcast = 0; - ifa->ifa_anycast = 0; ifa->ifa_scope = 0; } @@ -1113,7 +1108,6 @@ static inline size_t inet_nlmsg_size(void) + nla_total_size(4) /* IFA_ADDRESS */ + nla_total_size(4) /* IFA_LOCAL */ + nla_total_size(4) /* IFA_BROADCAST */ - + nla_total_size(4) /* IFA_ANYCAST */ + nla_total_size(IFNAMSIZ); /* IFA_LABEL */ } @@ -1143,9 +1137,6 @@ static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa, if (ifa->ifa_broadcast) NLA_PUT_BE32(skb, IFA_BROADCAST, ifa->ifa_broadcast); - if (ifa->ifa_anycast) - NLA_PUT_BE32(skb, IFA_ANYCAST, ifa->ifa_anycast); - if (ifa->ifa_label[0]) NLA_PUT_STRING(skb, IFA_LABEL, ifa->ifa_label); -- cgit v1.2.3 From d430a227d272fa514bade388bf511dba4ec2962a Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 2 Jun 2008 10:59:02 +0100 Subject: bogus format in ip6mr ptrdiff_t is %t..., not %Z... Signed-off-by: Al Viro Signed-off-by: Linus Torvalds --- net/ipv6/ip6mr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index 2de3c464fe7..14796181e8b 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -197,7 +197,7 @@ static int ip6mr_vif_seq_show(struct seq_file *seq, void *v) const char *name = vif->dev ? vif->dev->name : "none"; seq_printf(seq, - "%2Zd %-10s %8ld %7ld %8ld %7ld %05X\n", + "%2td %-10s %8ld %7ld %8ld %7ld %05X\n", vif - vif6_table, name, vif->bytes_in, vif->pkt_in, vif->bytes_out, vif->pkt_out, -- cgit v1.2.3 From d2ee3f2c4b1db1320c1efb4dcaceeaf6c7e6c2d3 Mon Sep 17 00:00:00 2001 From: Dong Wei Date: Wed, 4 Jun 2008 09:57:51 -0700 Subject: netfilter: xt_connlimit: fix accouning when receive RST packet in ESTABLISHED state In xt_connlimit match module, the counter of an IP is decreased when the TCP packet is go through the chain with ip_conntrack state TW. Well, it's very natural that the server and client close the socket with FIN packet. But when the client/server close the socket with RST packet(using so_linger), the counter for this connection still exsit. The following patch can fix it which is based on linux-2.6.25.4 Signed-off-by: Dong Wei Acked-by: Jan Engelhardt Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/netfilter/xt_connlimit.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/netfilter/xt_connlimit.c b/net/netfilter/xt_connlimit.c index 2e89a00df92..70907f6baac 100644 --- a/net/netfilter/xt_connlimit.c +++ b/net/netfilter/xt_connlimit.c @@ -73,7 +73,8 @@ connlimit_iphash6(const union nf_inet_addr *addr, static inline bool already_closed(const struct nf_conn *conn) { if (nf_ct_protonum(conn) == IPPROTO_TCP) - return conn->proto.tcp.state == TCP_CONNTRACK_TIME_WAIT; + return conn->proto.tcp.state == TCP_CONNTRACK_TIME_WAIT || + conn->proto.tcp.state == TCP_CONNTRACK_CLOSE; else return 0; } -- cgit v1.2.3 From b9c698964614f71b9c8afeca163a945b4c2e2d20 Mon Sep 17 00:00:00 2001 From: Jarek Poplawski Date: Wed, 4 Jun 2008 09:58:27 -0700 Subject: netfilter: nf_conntrack_ipv6: fix inconsistent lock state in nf_ct_frag6_gather() [ 63.531438] ================================= [ 63.531520] [ INFO: inconsistent lock state ] [ 63.531520] 2.6.26-rc4 #7 [ 63.531520] --------------------------------- [ 63.531520] inconsistent {softirq-on-W} -> {in-softirq-W} usage. [ 63.531520] tcpsic6/3864 [HC0[0]:SC1[1]:HE1:SE0] takes: [ 63.531520] (&q->lock#2){-+..}, at: [] ipv6_frag_rcv+0xd0/0xbd0 [ 63.531520] {softirq-on-W} state was registered at: [ 63.531520] [] __lock_acquire+0x3aa/0x1080 [ 63.531520] [] lock_acquire+0x76/0xa0 [ 63.531520] [] _spin_lock+0x2b/0x40 [ 63.531520] [] nf_ct_frag6_gather+0x3f6/0x910 ... According to this and another similar lockdep report inet_fragment locks are taken from nf_ct_frag6_gather() with softirqs enabled, but these locks are mainly used in softirq context, so disabling BHs is necessary. Reported-and-tested-by: Eric Sesterhenn Signed-off-by: Jarek Poplawski Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/ipv6/netfilter/nf_conntrack_reasm.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c index 2dccad48058..e65e26e210e 100644 --- a/net/ipv6/netfilter/nf_conntrack_reasm.c +++ b/net/ipv6/netfilter/nf_conntrack_reasm.c @@ -209,7 +209,9 @@ fq_find(__be32 id, struct in6_addr *src, struct in6_addr *dst) arg.dst = dst; hash = ip6qhashfn(id, src, dst); + local_bh_disable(); q = inet_frag_find(&nf_init_frags, &nf_frags, &arg, hash); + local_bh_enable(); if (q == NULL) goto oom; @@ -638,10 +640,10 @@ struct sk_buff *nf_ct_frag6_gather(struct sk_buff *skb) goto ret_orig; } - spin_lock(&fq->q.lock); + spin_lock_bh(&fq->q.lock); if (nf_ct_frag6_queue(fq, clone, fhdr, nhoff) < 0) { - spin_unlock(&fq->q.lock); + spin_unlock_bh(&fq->q.lock); pr_debug("Can't insert skb to queue\n"); fq_put(fq); goto ret_orig; @@ -653,7 +655,7 @@ struct sk_buff *nf_ct_frag6_gather(struct sk_buff *skb) if (ret_skb == NULL) pr_debug("Can't reassemble fragmented packets\n"); } - spin_unlock(&fq->q.lock); + spin_unlock_bh(&fq->q.lock); fq_put(fq); return ret_skb; -- cgit v1.2.3 From 8aca6cb1179ed9bef9351028c8d8af852903eae2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= Date: Wed, 4 Jun 2008 11:34:22 -0700 Subject: tcp: Fix inconsistency source (CA_Open only when !tcp_left_out(tp)) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It is possible that this skip path causes TCP to end up into an invalid state where ca_state was left to CA_Open while some segments already came into sacked_out. If next valid ACK doesn't contain new SACK information TCP fails to enter into tcp_fastretrans_alert(). Thus at least high_seq is set incorrectly to a too high seqno because some new data segments could be sent in between (and also, limited transmit is not being correctly invoked there). Reordering in both directions can easily cause this situation to occur. I guess we would want to use tcp_moderate_cwnd(tp) there as well as it may be possible to use this to trigger oversized burst to network by sending an old ACK with huge amount of SACK info, but I'm a bit unsure about its effects (mainly to FlightSize), so to be on the safe side I just currently fixed it minimally to keep TCP's state consistent (obviously, such nasty ACKs have been possible this far). Though it seems that FlightSize is already underestimated by some amount, so probably on the long term we might want to trigger recovery there too, if appropriate, to make FlightSize calculation to resemble reality at the time when the losses where discovered (but such change scares me too much now and requires some more thinking anyway how to do that as it likely involves some code shuffling). This bug was found by Brian Vowell while running my TCP debug patch to find cause of another TCP issue (fackets_out miscount). Signed-off-by: Ilpo Järvinen Signed-off-by: David S. Miller --- net/ipv4/tcp_input.c | 29 +++++++++++++++++++---------- 1 file changed, 19 insertions(+), 10 deletions(-) (limited to 'net') diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index b54d9d37b63..54a0b741278 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -2483,6 +2483,20 @@ static inline void tcp_complete_cwr(struct sock *sk) tcp_ca_event(sk, CA_EVENT_COMPLETE_CWR); } +static void tcp_try_keep_open(struct sock *sk) +{ + struct tcp_sock *tp = tcp_sk(sk); + int state = TCP_CA_Open; + + if (tcp_left_out(tp) || tp->retrans_out || tp->undo_marker) + state = TCP_CA_Disorder; + + if (inet_csk(sk)->icsk_ca_state != state) { + tcp_set_ca_state(sk, state); + tp->high_seq = tp->snd_nxt; + } +} + static void tcp_try_to_open(struct sock *sk, int flag) { struct tcp_sock *tp = tcp_sk(sk); @@ -2496,15 +2510,7 @@ static void tcp_try_to_open(struct sock *sk, int flag) tcp_enter_cwr(sk, 1); if (inet_csk(sk)->icsk_ca_state != TCP_CA_CWR) { - int state = TCP_CA_Open; - - if (tcp_left_out(tp) || tp->retrans_out || tp->undo_marker) - state = TCP_CA_Disorder; - - if (inet_csk(sk)->icsk_ca_state != state) { - tcp_set_ca_state(sk, state); - tp->high_seq = tp->snd_nxt; - } + tcp_try_keep_open(sk); tcp_moderate_cwnd(tp); } else { tcp_cwnd_down(sk, flag); @@ -3310,8 +3316,11 @@ no_queue: return 1; old_ack: - if (TCP_SKB_CB(skb)->sacked) + if (TCP_SKB_CB(skb)->sacked) { tcp_sacktag_write_queue(sk, skb, prior_snd_una); + if (icsk->icsk_ca_state == TCP_CA_Open) + tcp_try_keep_open(sk); + } uninteresting_ack: SOCK_DEBUG(sk, "Ack %u out of %u:%u\n", ack, tp->snd_una, tp->snd_nxt); -- cgit v1.2.3 From e51171019bb0e1f9fb57c25bd2e38ce652eaea27 Mon Sep 17 00:00:00 2001 From: YOSHIFUJI Hideaki Date: Thu, 29 May 2008 19:55:05 +0900 Subject: [SCTP]: Fix NULL dereference of asoc. Commit 7cbca67c073263c179f605bdbbdc565ab29d801d ("[IPV6]: Support Source Address Selection API (RFC5014)") introduced NULL dereference of asoc to sctp_v6_get_saddr in net/sctp/ipv6.c. Pointed out by Johann Felix Soden . Signed-off-by: YOSHIFUJI Hideaki --- net/sctp/ipv6.c | 5 +++-- net/sctp/protocol.c | 3 ++- net/sctp/transport.c | 2 +- 3 files changed, 6 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c index e45e44c6063..e4aac3266fc 100644 --- a/net/sctp/ipv6.c +++ b/net/sctp/ipv6.c @@ -299,7 +299,8 @@ static inline int sctp_v6_addr_match_len(union sctp_addr *s1, /* Fills in the source address(saddr) based on the destination address(daddr) * and asoc's bind address list. */ -static void sctp_v6_get_saddr(struct sctp_association *asoc, +static void sctp_v6_get_saddr(struct sctp_sock *sk, + struct sctp_association *asoc, struct dst_entry *dst, union sctp_addr *daddr, union sctp_addr *saddr) @@ -318,7 +319,7 @@ static void sctp_v6_get_saddr(struct sctp_association *asoc, if (!asoc) { ipv6_dev_get_saddr(dst ? ip6_dst_idev(dst)->dev : NULL, &daddr->v6.sin6_addr, - inet6_sk(asoc->base.sk)->srcprefs, + inet6_sk(&sk->inet.sk)->srcprefs, &saddr->v6.sin6_addr); SCTP_DEBUG_PRINTK("saddr from ipv6_get_saddr: " NIP6_FMT "\n", NIP6(saddr->v6.sin6_addr)); diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index 0ec234b762c..13ee7fa92e0 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -519,7 +519,8 @@ out: /* For v4, the source address is cached in the route entry(dst). So no need * to cache it separately and hence this is an empty routine. */ -static void sctp_v4_get_saddr(struct sctp_association *asoc, +static void sctp_v4_get_saddr(struct sctp_sock *sk, + struct sctp_association *asoc, struct dst_entry *dst, union sctp_addr *daddr, union sctp_addr *saddr) diff --git a/net/sctp/transport.c b/net/sctp/transport.c index f4938f6c5ab..62082e7b797 100644 --- a/net/sctp/transport.c +++ b/net/sctp/transport.c @@ -291,7 +291,7 @@ void sctp_transport_route(struct sctp_transport *transport, if (saddr) memcpy(&transport->saddr, saddr, sizeof(union sctp_addr)); else - af->get_saddr(asoc, dst, daddr, &transport->saddr); + af->get_saddr(opt, asoc, dst, daddr, &transport->saddr); transport->dst = dst; if ((transport->param_flags & SPP_PMTUD_DISABLE) && transport->pathmtu) { -- cgit v1.2.3 From a3c960899e042bc1c2b730a2115fa32da7802039 Mon Sep 17 00:00:00 2001 From: YOSHIFUJI Hideaki Date: Wed, 4 Jun 2008 01:30:25 +0900 Subject: [IPV6] UDP: Possible dst leak in udpv6_sendmsg. ip6_sk_dst_lookup returns held dst entry. It should be released on all paths beyond this point. Add missed release when up->pending is set. Bug report and initial patch by Denis V. Lunev . Signed-off-by: YOSHIFUJI Hideaki Acked-by: Denis V. Lunev --- net/ipv6/udp.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'net') diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 1fd784f3e2e..47123bf5eb0 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -848,12 +848,14 @@ do_append_data: } else { dst_release(dst); } + dst = NULL; } if (err > 0) err = np->recverr ? net_xmit_errno(err) : 0; release_sock(sk); out: + dst_release(dst); fl6_sock_release(flowlabel); if (!err) return len; -- cgit v1.2.3 From 24ef0da7b864435f221f668bc8a324160d063e78 Mon Sep 17 00:00:00 2001 From: Thomas Graf Date: Wed, 28 May 2008 16:54:22 +0200 Subject: [IPV6] ADDRCONF: Check range of prefix length As of now, the prefix length is not vaildated when adding or deleting addresses. The value is passed directly into the inet6_ifaddr structure and later passed on to memcmp() as length indicator which relies on the value never to exceed 128 (bits). Due to the missing check, the currently code allows for any 8 bit value to be passed on as prefix length while using the netlink interface, and any 32 bit value while using the ioctl interface. [Use unsigned int instead to generate better code - yoshfuji] Signed-off-by: Thomas Graf Signed-off-by: YOSHIFUJI Hideaki --- net/ipv6/addrconf.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 3a835578fd1..c3b20c5afa3 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -2027,7 +2027,7 @@ err_exit: * Manual configuration of address on an interface */ static int inet6_addr_add(struct net *net, int ifindex, struct in6_addr *pfx, - int plen, __u8 ifa_flags, __u32 prefered_lft, + unsigned int plen, __u8 ifa_flags, __u32 prefered_lft, __u32 valid_lft) { struct inet6_ifaddr *ifp; @@ -2039,6 +2039,9 @@ static int inet6_addr_add(struct net *net, int ifindex, struct in6_addr *pfx, ASSERT_RTNL(); + if (plen > 128) + return -EINVAL; + /* check the lifetime */ if (!valid_lft || prefered_lft > valid_lft) return -EINVAL; @@ -2095,12 +2098,15 @@ static int inet6_addr_add(struct net *net, int ifindex, struct in6_addr *pfx, } static int inet6_addr_del(struct net *net, int ifindex, struct in6_addr *pfx, - int plen) + unsigned int plen) { struct inet6_ifaddr *ifp; struct inet6_dev *idev; struct net_device *dev; + if (plen > 128) + return -EINVAL; + dev = __dev_get_by_index(net, ifindex); if (!dev) return -ENODEV; -- cgit v1.2.3 From 82836372311a5cbf9cc5f4f47f9b56cb9edfe90d Mon Sep 17 00:00:00 2001 From: Colin Date: Tue, 27 May 2008 00:04:43 +0800 Subject: [IPV6] TUNNEL6: Fix incoming packet length check for inter-protocol tunnel. I discover a strange behavior in [ipv4 in ipv6] tunnel. When IPv6 tunnel payload is less than 40(0x28), packet can be sent to network, received in physical interface, but not seen in IP tunnel interface. No counter increase in tunnel interface. Signed-off-by: YOSHIFUJI Hideaki --- net/ipv6/tunnel6.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv6/tunnel6.c b/net/ipv6/tunnel6.c index 6323921b40b..669f280989c 100644 --- a/net/ipv6/tunnel6.c +++ b/net/ipv6/tunnel6.c @@ -109,7 +109,7 @@ static int tunnel46_rcv(struct sk_buff *skb) { struct xfrm6_tunnel *handler; - if (!pskb_may_pull(skb, sizeof(struct ipv6hdr))) + if (!pskb_may_pull(skb, sizeof(struct iphdr))) goto drop; for (handler = tunnel46_handlers; handler; handler = handler->next) -- cgit v1.2.3 From baa2bfb8aef24bb7fe1875b256918724b3884662 Mon Sep 17 00:00:00 2001 From: YOSHIFUJI Hideaki Date: Fri, 30 May 2008 11:35:03 +0900 Subject: [IPV4] TUNNEL4: Fix incoming packet length check for inter-protocol tunnel. Signed-off-by: YOSHIFUJI Hideaki --- net/ipv4/tunnel4.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv4/tunnel4.c b/net/ipv4/tunnel4.c index d3b709a6f26..cb1f0e83830 100644 --- a/net/ipv4/tunnel4.c +++ b/net/ipv4/tunnel4.c @@ -97,7 +97,7 @@ static int tunnel64_rcv(struct sk_buff *skb) { struct xfrm_tunnel *handler; - if (!pskb_may_pull(skb, sizeof(struct iphdr))) + if (!pskb_may_pull(skb, sizeof(struct ipv6hdr))) goto drop; for (handler = tunnel64_handlers; handler; handler = handler->next) -- cgit v1.2.3 From 4bed72e4f5502ea3322f0a00794815fa58951abe Mon Sep 17 00:00:00 2001 From: YOSHIFUJI Hideaki Date: Tue, 27 May 2008 17:37:49 +0900 Subject: [IPV6] ADDRCONF: Allow longer lifetime on 64bit archs. - Allow longer lifetimes (>= 0x7fffffff/HZ) on 64bit archs by using unsigned long. - Shadow this arithmetic overflow workaround by introducing helper functions: addrconf_timeout_fixup() and addrconf_finite_timeout(). Signed-off-by: YOSHIFUJI Hideaki --- net/ipv6/addrconf.c | 97 +++++++++++++++++++++++++++-------------------------- net/ipv6/route.c | 12 ++----- 2 files changed, 53 insertions(+), 56 deletions(-) (limited to 'net') diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index c3b20c5afa3..147588f4c7c 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -731,8 +731,13 @@ static void ipv6_del_addr(struct inet6_ifaddr *ifp) onlink = -1; spin_lock(&ifa->lock); - lifetime = min_t(unsigned long, - ifa->valid_lft, 0x7fffffffUL/HZ); + + lifetime = addrconf_timeout_fixup(ifa->valid_lft, HZ); + /* + * Note: Because this address is + * not permanent, lifetime < + * LONG_MAX / HZ here. + */ if (time_before(expires, ifa->tstamp + lifetime * HZ)) expires = ifa->tstamp + lifetime * HZ; @@ -1722,7 +1727,6 @@ void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len) __u32 valid_lft; __u32 prefered_lft; int addr_type; - unsigned long rt_expires; struct inet6_dev *in6_dev; pinfo = (struct prefix_info *) opt; @@ -1764,28 +1768,23 @@ void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len) * 2) Configure prefixes with the auto flag set */ - if (valid_lft == INFINITY_LIFE_TIME) - rt_expires = ~0UL; - else if (valid_lft >= 0x7FFFFFFF/HZ) { + if (pinfo->onlink) { + struct rt6_info *rt; + unsigned long rt_expires; + /* Avoid arithmetic overflow. Really, we could * save rt_expires in seconds, likely valid_lft, * but it would require division in fib gc, that it * not good. */ - rt_expires = 0x7FFFFFFF - (0x7FFFFFFF % HZ); - } else - rt_expires = valid_lft * HZ; + if (HZ > USER_HZ) + rt_expires = addrconf_timeout_fixup(valid_lft, HZ); + else + rt_expires = addrconf_timeout_fixup(valid_lft, USER_HZ); - /* - * We convert this (in jiffies) to clock_t later. - * Avoid arithmetic overflow there as well. - * Overflow can happen only if HZ < USER_HZ. - */ - if (HZ < USER_HZ && ~rt_expires && rt_expires > 0x7FFFFFFF / USER_HZ) - rt_expires = 0x7FFFFFFF / USER_HZ; + if (addrconf_finite_timeout(rt_expires)) + rt_expires *= HZ; - if (pinfo->onlink) { - struct rt6_info *rt; rt = rt6_lookup(dev_net(dev), &pinfo->prefix, NULL, dev->ifindex, 1); @@ -1794,7 +1793,7 @@ void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len) if (valid_lft == 0) { ip6_del_rt(rt); rt = NULL; - } else if (~rt_expires) { + } else if (addrconf_finite_timeout(rt_expires)) { /* not infinity */ rt->rt6i_expires = jiffies + rt_expires; rt->rt6i_flags |= RTF_EXPIRES; @@ -1803,9 +1802,9 @@ void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len) rt->rt6i_expires = 0; } } else if (valid_lft) { - int flags = RTF_ADDRCONF | RTF_PREFIX_RT; clock_t expires = 0; - if (~rt_expires) { + int flags = RTF_ADDRCONF | RTF_PREFIX_RT; + if (addrconf_finite_timeout(rt_expires)) { /* not infinity */ flags |= RTF_EXPIRES; expires = jiffies_to_clock_t(rt_expires); @@ -2036,6 +2035,7 @@ static int inet6_addr_add(struct net *net, int ifindex, struct in6_addr *pfx, int scope; u32 flags; clock_t expires; + unsigned long timeout; ASSERT_RTNL(); @@ -2055,22 +2055,23 @@ static int inet6_addr_add(struct net *net, int ifindex, struct in6_addr *pfx, scope = ipv6_addr_scope(pfx); - if (valid_lft == INFINITY_LIFE_TIME) { - ifa_flags |= IFA_F_PERMANENT; - flags = 0; - expires = 0; - } else { - if (valid_lft >= 0x7FFFFFFF/HZ) - valid_lft = 0x7FFFFFFF/HZ; + timeout = addrconf_timeout_fixup(valid_lft, HZ); + if (addrconf_finite_timeout(timeout)) { + expires = jiffies_to_clock_t(timeout * HZ); + valid_lft = timeout; flags = RTF_EXPIRES; - expires = jiffies_to_clock_t(valid_lft * HZ); + } else { + expires = 0; + flags = 0; + ifa_flags |= IFA_F_PERMANENT; } - if (prefered_lft == 0) - ifa_flags |= IFA_F_DEPRECATED; - else if ((prefered_lft >= 0x7FFFFFFF/HZ) && - (prefered_lft != INFINITY_LIFE_TIME)) - prefered_lft = 0x7FFFFFFF/HZ; + timeout = addrconf_timeout_fixup(prefered_lft, HZ); + if (addrconf_finite_timeout(timeout)) { + if (timeout == 0) + ifa_flags |= IFA_F_DEPRECATED; + prefered_lft = timeout; + } ifp = ipv6_add_addr(idev, pfx, plen, scope, ifa_flags); @@ -3175,26 +3176,28 @@ static int inet6_addr_modify(struct inet6_ifaddr *ifp, u8 ifa_flags, { u32 flags; clock_t expires; + unsigned long timeout; if (!valid_lft || (prefered_lft > valid_lft)) return -EINVAL; - if (valid_lft == INFINITY_LIFE_TIME) { - ifa_flags |= IFA_F_PERMANENT; - flags = 0; - expires = 0; - } else { - if (valid_lft >= 0x7FFFFFFF/HZ) - valid_lft = 0x7FFFFFFF/HZ; + timeout = addrconf_timeout_fixup(valid_lft, HZ); + if (addrconf_finite_timeout(timeout)) { + expires = jiffies_to_clock_t(timeout * HZ); + valid_lft = timeout; flags = RTF_EXPIRES; - expires = jiffies_to_clock_t(valid_lft * HZ); + } else { + expires = 0; + flags = 0; + ifa_flags |= IFA_F_PERMANENT; } - if (prefered_lft == 0) - ifa_flags |= IFA_F_DEPRECATED; - else if ((prefered_lft >= 0x7FFFFFFF/HZ) && - (prefered_lft != INFINITY_LIFE_TIME)) - prefered_lft = 0x7FFFFFFF/HZ; + timeout = addrconf_timeout_fixup(prefered_lft, HZ); + if (addrconf_finite_timeout(timeout)) { + if (timeout == 0) + ifa_flags |= IFA_F_DEPRECATED; + prefered_lft = timeout; + } spin_lock_bh(&ifp->lock); ifp->flags = (ifp->flags & ~(IFA_F_DEPRECATED | IFA_F_PERMANENT | IFA_F_NODAD | IFA_F_HOMEADDRESS)) | ifa_flags; diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 48534c6c073..220cffe9e63 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -446,7 +446,7 @@ int rt6_route_rcv(struct net_device *dev, u8 *opt, int len, struct route_info *rinfo = (struct route_info *) opt; struct in6_addr prefix_buf, *prefix; unsigned int pref; - u32 lifetime; + unsigned long lifetime; struct rt6_info *rt; if (len < sizeof(struct route_info)) { @@ -472,13 +472,7 @@ int rt6_route_rcv(struct net_device *dev, u8 *opt, int len, if (pref == ICMPV6_ROUTER_PREF_INVALID) pref = ICMPV6_ROUTER_PREF_MEDIUM; - lifetime = ntohl(rinfo->lifetime); - if (lifetime == 0xffffffff) { - /* infinity */ - } else if (lifetime > 0x7fffffff/HZ - 1) { - /* Avoid arithmetic overflow */ - lifetime = 0x7fffffff/HZ - 1; - } + lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ); if (rinfo->length == 3) prefix = (struct in6_addr *)rinfo->prefix; @@ -506,7 +500,7 @@ int rt6_route_rcv(struct net_device *dev, u8 *opt, int len, (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref); if (rt) { - if (lifetime == 0xffffffff) { + if (!addrconf_finite_timeout(lifetime)) { rt->rt6i_flags &= ~RTF_EXPIRES; } else { rt->rt6i_expires = jiffies + HZ * lifetime; -- cgit v1.2.3 From 05335c2220c4911b69cb1bdd79e603ab08088372 Mon Sep 17 00:00:00 2001 From: Yang Hongyang Date: Wed, 28 May 2008 16:23:47 +0800 Subject: [IPV6]: Fix the return value of get destination options with NULL data pointer If we pass NULL data buffer to getsockopt(), it will return 0, and the option length is set to -EFAULT: getsockopt(sk, IPPROTO_IPV6, IPV6_DSTOPTS, NULL, &len); This is because ipv6_getsockopt_sticky() will return -EFAULT or -EINVAL if some error occur. This patch fix this problem. Signed-off-by: Yang Hongyang Signed-off-by: YOSHIFUJI Hideaki --- net/ipv6/ipv6_sockglue.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'net') diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index 56d55fecf8e..aa7bedf780e 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -975,6 +975,9 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname, len = ipv6_getsockopt_sticky(sk, np->opt, optname, optval, len); release_sock(sk); + /* check if ipv6_getsockopt_sticky() returns err code */ + if (len < 0) + return len; return put_user(len, optlen); } -- cgit v1.2.3 From 95b496b66615d8c43f77702049b1bd01e2f06595 Mon Sep 17 00:00:00 2001 From: Yang Hongyang Date: Wed, 28 May 2008 16:27:28 +0800 Subject: [IPV6]: Fix the data length of get destination options with short length If get destination options with length which is not enough for that option,getsockopt() will still return the real length of the option, which is larger then the buffer space. This is because ipv6_getsockopt_sticky() returns the real length of the option. This patch fix this problem. Signed-off-by: Yang Hongyang Signed-off-by: YOSHIFUJI Hideaki --- net/ipv6/ipv6_sockglue.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index aa7bedf780e..9293b9f0ac2 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -832,7 +832,7 @@ static int ipv6_getsockopt_sticky(struct sock *sk, struct ipv6_txoptions *opt, len = min_t(unsigned int, len, ipv6_optlen(hdr)); if (copy_to_user(optval, hdr, len)) return -EFAULT; - return ipv6_optlen(hdr); + return len; } static int do_ipv6_getsockopt(struct sock *sk, int level, int optname, -- cgit v1.2.3 From 187e38384c4abfbbb1b880fab234d16c2df23a25 Mon Sep 17 00:00:00 2001 From: YOSHIFUJI Hideaki Date: Wed, 4 Jun 2008 13:01:37 +0900 Subject: [IPV6]: Check outgoing interface even if source address is unspecified. The outgoing interface index (ipi6_ifindex) in IPV6_PKTINFO ancillary data, is not checked if the source address (ipi6_addr) is unspecified. If the ipi6_ifindex is the not-exist interface, it should be fail. Based on patch from Shan Wei and Brian Haley . Signed-off-by: Shan Wei Signed-off-by: Brian Haley Signed-off-by: YOSHIFUJI Hideaki --- net/ipv6/datagram.c | 42 ++++++++++++++++++++++-------------------- 1 file changed, 22 insertions(+), 20 deletions(-) (limited to 'net') diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index 94fa6ae77cf..53e3883f766 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -509,7 +509,6 @@ int datagram_send_ctl(struct msghdr *msg, struct flowi *fl, for (cmsg = CMSG_FIRSTHDR(msg); cmsg; cmsg = CMSG_NXTHDR(msg, cmsg)) { int addr_type; - struct net_device *dev = NULL; if (!CMSG_OK(msg, cmsg)) { err = -EINVAL; @@ -522,6 +521,9 @@ int datagram_send_ctl(struct msghdr *msg, struct flowi *fl, switch (cmsg->cmsg_type) { case IPV6_PKTINFO: case IPV6_2292PKTINFO: + { + struct net_device *dev = NULL; + if (cmsg->cmsg_len < CMSG_LEN(sizeof(struct in6_pktinfo))) { err = -EINVAL; goto exit_f; @@ -535,32 +537,32 @@ int datagram_send_ctl(struct msghdr *msg, struct flowi *fl, fl->oif = src_info->ipi6_ifindex; } - addr_type = ipv6_addr_type(&src_info->ipi6_addr); + addr_type = __ipv6_addr_type(&src_info->ipi6_addr); - if (addr_type == IPV6_ADDR_ANY) - break; + if (fl->oif) { + dev = dev_get_by_index(&init_net, fl->oif); + if (!dev) + return -ENODEV; + } else if (addr_type & IPV6_ADDR_LINKLOCAL) + return -EINVAL; - if (addr_type & IPV6_ADDR_LINKLOCAL) { - if (!src_info->ipi6_ifindex) - return -EINVAL; - else { - dev = dev_get_by_index(&init_net, src_info->ipi6_ifindex); - if (!dev) - return -ENODEV; - } - } - if (!ipv6_chk_addr(&init_net, &src_info->ipi6_addr, - dev, 0)) { - if (dev) - dev_put(dev); - err = -EINVAL; - goto exit_f; + if (addr_type != IPV6_ADDR_ANY) { + int strict = __ipv6_addr_src_scope(addr_type) <= IPV6_ADDR_SCOPE_LINKLOCAL; + if (!ipv6_chk_addr(&init_net, &src_info->ipi6_addr, + strict ? dev : NULL, 0)) + err = -EINVAL; + else + ipv6_addr_copy(&fl->fl6_src, &src_info->ipi6_addr); } + if (dev) dev_put(dev); - ipv6_addr_copy(&fl->fl6_src, &src_info->ipi6_addr); + if (err) + goto exit_f; + break; + } case IPV6_FLOWINFO: if (cmsg->cmsg_len < CMSG_LEN(4)) { -- cgit v1.2.3 From 91e1908f569dd96a25a3947de8771e6cc93999dd Mon Sep 17 00:00:00 2001 From: YOSHIFUJI Hideaki Date: Wed, 4 Jun 2008 13:02:49 +0900 Subject: [IPV6] NETNS: Handle ancillary data in appropriate namespace. Signed-off-by: YOSHIFUJI Hideaki --- net/ipv6/datagram.c | 7 ++++--- net/ipv6/ip6_flowlabel.c | 2 +- net/ipv6/ipv6_sockglue.c | 2 +- net/ipv6/raw.c | 2 +- net/ipv6/udp.c | 2 +- 5 files changed, 8 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index 53e3883f766..b9c2de84a8a 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -496,7 +496,8 @@ int datagram_recv_ctl(struct sock *sk, struct msghdr *msg, struct sk_buff *skb) return 0; } -int datagram_send_ctl(struct msghdr *msg, struct flowi *fl, +int datagram_send_ctl(struct net *net, + struct msghdr *msg, struct flowi *fl, struct ipv6_txoptions *opt, int *hlimit, int *tclass) { @@ -540,7 +541,7 @@ int datagram_send_ctl(struct msghdr *msg, struct flowi *fl, addr_type = __ipv6_addr_type(&src_info->ipi6_addr); if (fl->oif) { - dev = dev_get_by_index(&init_net, fl->oif); + dev = dev_get_by_index(net, fl->oif); if (!dev) return -ENODEV; } else if (addr_type & IPV6_ADDR_LINKLOCAL) @@ -548,7 +549,7 @@ int datagram_send_ctl(struct msghdr *msg, struct flowi *fl, if (addr_type != IPV6_ADDR_ANY) { int strict = __ipv6_addr_src_scope(addr_type) <= IPV6_ADDR_SCOPE_LINKLOCAL; - if (!ipv6_chk_addr(&init_net, &src_info->ipi6_addr, + if (!ipv6_chk_addr(net, &src_info->ipi6_addr, strict ? dev : NULL, 0)) err = -EINVAL; else diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c index eb7a940310f..37a4e777e34 100644 --- a/net/ipv6/ip6_flowlabel.c +++ b/net/ipv6/ip6_flowlabel.c @@ -354,7 +354,7 @@ fl_create(struct net *net, struct in6_flowlabel_req *freq, char __user *optval, msg.msg_control = (void*)(fl->opt+1); flowi.oif = 0; - err = datagram_send_ctl(&msg, &flowi, fl->opt, &junk, &junk); + err = datagram_send_ctl(net, &msg, &flowi, fl->opt, &junk, &junk); if (err) goto done; err = -EINVAL; diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index 9293b9f0ac2..3eef8e5b363 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -416,7 +416,7 @@ sticky_done: msg.msg_controllen = optlen; msg.msg_control = (void*)(opt+1); - retv = datagram_send_ctl(&msg, &fl, opt, &junk, &junk); + retv = datagram_send_ctl(net, &msg, &fl, opt, &junk, &junk); if (retv) goto done; update: diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index 232e0dc45bf..603df76e052 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -813,7 +813,7 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk, memset(opt, 0, sizeof(struct ipv6_txoptions)); opt->tot_len = sizeof(struct ipv6_txoptions); - err = datagram_send_ctl(msg, &fl, opt, &hlimit, &tclass); + err = datagram_send_ctl(sock_net(sk), msg, &fl, opt, &hlimit, &tclass); if (err < 0) { fl6_sock_release(flowlabel); return err; diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 47123bf5eb0..1b35c472200 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -731,7 +731,7 @@ do_udp_sendmsg: memset(opt, 0, sizeof(struct ipv6_txoptions)); opt->tot_len = sizeof(*opt); - err = datagram_send_ctl(msg, &fl, opt, &hlimit, &tclass); + err = datagram_send_ctl(sock_net(sk), msg, &fl, opt, &hlimit, &tclass); if (err < 0) { fl6_sock_release(flowlabel); return err; -- cgit v1.2.3 From 49d074f4009a7b5ce9c17b040f978abcb4d7f6f6 Mon Sep 17 00:00:00 2001 From: "Denis V. Lunev" Date: Wed, 4 Jun 2008 15:49:06 +0400 Subject: [IPV6]: Do not change protocol for raw IPv6 sockets. It is not allowed to change underlying protocol for int fd = socket(PF_INET6, SOCK_RAW, IPPROTO_UDP); Signed-off-by: Denis V. Lunev Signed-off-by: YOSHIFUJI Hideaki --- net/ipv6/ipv6_sockglue.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'net') diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index 3eef8e5b363..1afe210d628 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -161,6 +161,9 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname, struct ipv6_txoptions *opt; struct sk_buff *pktopt; + if (sk->sk_type == SOCK_RAW) + break; + if (sk->sk_protocol != IPPROTO_UDP && sk->sk_protocol != IPPROTO_UDPLITE && sk->sk_protocol != IPPROTO_TCP) -- cgit v1.2.3 From 36d926b94a9908937593e5669162305a071b9cc3 Mon Sep 17 00:00:00 2001 From: "Denis V. Lunev" Date: Wed, 4 Jun 2008 15:49:07 +0400 Subject: [IPV6]: inet_sk(sk)->cork.opt leak IPv6 UDP sockets wth IPv4 mapped address use udp_sendmsg to send the data actually. In this case ip_flush_pending_frames should be called instead of ip6_flush_pending_frames. Signed-off-by: Denis V. Lunev Signed-off-by: YOSHIFUJI Hideaki --- net/ipv4/udp.c | 3 ++- net/ipv6/udp.c | 4 +++- 2 files changed, 5 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index db1cb7c96d6..56fcda3694b 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -420,7 +420,7 @@ void udp_err(struct sk_buff *skb, u32 info) /* * Throw away all pending data and cancel the corking. Socket is locked. */ -static void udp_flush_pending_frames(struct sock *sk) +void udp_flush_pending_frames(struct sock *sk) { struct udp_sock *up = udp_sk(sk); @@ -430,6 +430,7 @@ static void udp_flush_pending_frames(struct sock *sk) ip_flush_pending_frames(sk); } } +EXPORT_SYMBOL(udp_flush_pending_frames); /** * udp4_hwcsum_outgoing - handle outgoing HW checksumming diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 1b35c472200..dd309626ae9 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -534,7 +534,9 @@ static void udp_v6_flush_pending_frames(struct sock *sk) { struct udp_sock *up = udp_sk(sk); - if (up->pending) { + if (up->pending == AF_INET) + udp_flush_pending_frames(sk); + else if (up->pending) { up->len = 0; up->pending = 0; ip6_flush_pending_frames(sk); -- cgit v1.2.3 From 9596cc826e2e52bfc318ca37a6c52fe3d72990a3 Mon Sep 17 00:00:00 2001 From: "Denis V. Lunev" Date: Wed, 4 Jun 2008 15:49:08 +0400 Subject: [IPV6]: Do not change protocol for UDPv6 sockets with pending sent data. Signed-off-by: Denis V. Lunev Signed-off-by: YOSHIFUJI Hideaki --- net/ipv6/ipv6_sockglue.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index 1afe210d628..26b83e512a0 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -164,9 +164,14 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname, if (sk->sk_type == SOCK_RAW) break; - if (sk->sk_protocol != IPPROTO_UDP && - sk->sk_protocol != IPPROTO_UDPLITE && - sk->sk_protocol != IPPROTO_TCP) + if (sk->sk_protocol == IPPROTO_UDP || + sk->sk_protocol == IPPROTO_UDPLITE) { + struct udp_sock *up = udp_sk(sk); + if (up->pending == AF_INET6) { + retv = -EBUSY; + break; + } + } else if (sk->sk_protocol != IPPROTO_TCP) break; if (sk->sk_state != TCP_ESTABLISHED) { -- cgit v1.2.3 From a13366c632132bb9f8f2950a79773d8f68f4871e Mon Sep 17 00:00:00 2001 From: Adrian-Ken Rueegsegger Date: Wed, 4 Jun 2008 12:04:55 -0700 Subject: xfrm: xfrm_algo: correct usage of RIPEMD-160 This patch fixes the usage of RIPEMD-160 in xfrm_algo which in turn allows hmac(rmd160) to be used as authentication mechanism in IPsec ESP and AH (see RFC 2857). Signed-off-by: Adrian-Ken Rueegsegger Acked-by: Herbert Xu Signed-off-by: David S. Miller --- net/xfrm/xfrm_algo.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/xfrm/xfrm_algo.c b/net/xfrm/xfrm_algo.c index ac765dd9c7f..23a2cc04b8c 100644 --- a/net/xfrm/xfrm_algo.c +++ b/net/xfrm/xfrm_algo.c @@ -200,8 +200,8 @@ static struct xfrm_algo_desc aalg_list[] = { } }, { - .name = "hmac(ripemd160)", - .compat = "ripemd160", + .name = "hmac(rmd160)", + .compat = "rmd160", .uinfo = { .auth = { -- cgit v1.2.3 From a6604471db5e7a33474a7f16c64d6b118fae3e74 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= Date: Wed, 4 Jun 2008 12:07:44 -0700 Subject: tcp: fix skb vs fack_count out-of-sync condition MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This bug is able to corrupt fackets_out in very rare cases. In order for this to cause corruption: 1) DSACK in the middle of previous SACK block must be generated. 2) In order to take that particular branch, part or all of the DSACKed segment must already be SACKed so that we have that in cache in the first place. 3) The new info must be top enough so that fackets_out will be updated on this iteration. ...then fack_count is updated while skb wasn't, then we walk again that particular segment thus updating fack_count twice for a single skb and finally that value is assigned to fackets_out by tcp_sacktag_one. It is safe to call tcp_sacktag_one just once for a segment (at DSACK), no need to call again for plain SACK. Potential problem of the miscount are limited to premature entry to recovery and to inflated reordering metric (which could even cancel each other out in the most the luckiest scenarios :-)). Both are quite insignificant in worst case too and there exists also code to reset them (fackets_out once sacked_out becomes zero and reordering metric on RTO). This has been reported by a number of people, because it occurred quite rarely, it has been very evasive. Andy Furniss was able to get it to occur couple of times so that a bit more info was collected about the problem using a debug patch, though it still required lot of checking around. Thanks also to others who have tried to help here. This is listed as Bugzilla #10346. The bug was introduced by me in commit 68f8353b48 ([TCP]: Rewrite SACK block processing & sack_recv_cache use), I probably thought back then that there's need to scan that entry twice or didn't dare to make it go through it just once there. Going through twice would have required restoring fack_count after the walk but as noted above, I chose to drop the additional walk step altogether here. Signed-off-by: Ilpo Järvinen Signed-off-by: David S. Miller --- net/ipv4/tcp_input.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 54a0b741278..eba873e9b56 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -1392,9 +1392,9 @@ static struct sk_buff *tcp_maybe_skipping_dsack(struct sk_buff *skb, if (before(next_dup->start_seq, skip_to_seq)) { skb = tcp_sacktag_skip(skb, sk, next_dup->start_seq, fack_count); - tcp_sacktag_walk(skb, sk, NULL, - next_dup->start_seq, next_dup->end_seq, - 1, fack_count, reord, flag); + skb = tcp_sacktag_walk(skb, sk, NULL, + next_dup->start_seq, next_dup->end_seq, + 1, fack_count, reord, flag); } return skb; -- cgit v1.2.3 From 4141ddc02a92a6e3e5793601554c6033e83c25b9 Mon Sep 17 00:00:00 2001 From: Gui Jianfeng Date: Wed, 4 Jun 2008 12:37:33 -0700 Subject: sctp: retran_path update bug fix If the current retran_path is the only active one, it should update it to the the next inactive one. Signed-off-by: Gui Jianfeng Signed-off-by: Vlad Yasevich Signed-off-by: David S. Miller --- net/sctp/associola.c | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) (limited to 'net') diff --git a/net/sctp/associola.c b/net/sctp/associola.c index b4cd2b71953..532634861db 100644 --- a/net/sctp/associola.c +++ b/net/sctp/associola.c @@ -1203,6 +1203,9 @@ void sctp_assoc_update_retran_path(struct sctp_association *asoc) struct list_head *head = &asoc->peer.transport_addr_list; struct list_head *pos; + if (asoc->peer.transport_count == 1) + return; + /* Find the next transport in a round-robin fashion. */ t = asoc->peer.retran_path; pos = &t->transports; @@ -1217,6 +1220,15 @@ void sctp_assoc_update_retran_path(struct sctp_association *asoc) t = list_entry(pos, struct sctp_transport, transports); + /* We have exhausted the list, but didn't find any + * other active transports. If so, use the next + * transport. + */ + if (t == asoc->peer.retran_path) { + t = next; + break; + } + /* Try to find an active transport. */ if ((t->state == SCTP_ACTIVE) || @@ -1229,15 +1241,6 @@ void sctp_assoc_update_retran_path(struct sctp_association *asoc) if (!next) next = t; } - - /* We have exhausted the list, but didn't find any - * other active transports. If so, use the next - * transport. - */ - if (t == asoc->peer.retran_path) { - t = next; - break; - } } asoc->peer.retran_path = t; -- cgit v1.2.3 From 159c6bea37c54dfae44409467e0f17600722d541 Mon Sep 17 00:00:00 2001 From: Gui Jianfeng Date: Wed, 4 Jun 2008 12:38:07 -0700 Subject: sctp: Move sctp_v4_dst_saddr out of loop There's no need to execute sctp_v4_dst_saddr() for each iteration, just move it out of loop. Signed-off-by: Gui Jianfeng Signed-off-by: Vlad Yasevich Signed-off-by: David S. Miller --- net/sctp/protocol.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index 13ee7fa92e0..56bdaf7fc42 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -470,11 +470,11 @@ static struct dst_entry *sctp_v4_get_dst(struct sctp_association *asoc, /* Walk through the bind address list and look for a bind * address that matches the source address of the returned dst. */ + sctp_v4_dst_saddr(&dst_saddr, dst, htons(bp->port)); rcu_read_lock(); list_for_each_entry_rcu(laddr, &bp->address_list, list) { if (!laddr->valid || (laddr->state != SCTP_ADDR_SRC)) continue; - sctp_v4_dst_saddr(&dst_saddr, dst, htons(bp->port)); if (sctp_v4_cmp_addr(&dst_saddr, &laddr->a)) goto out_unlock; } -- cgit v1.2.3 From a6465234814efda9ed1dccdba852953f7508e827 Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Wed, 4 Jun 2008 12:38:43 -0700 Subject: sctp: Correctly implement Fast Recovery cwnd manipulations. Correctly keep track of Fast Recovery state and do not reduce congestion window multiple times during sucht state. Signed-off-by: Vlad Yasevich Tested-by: Wei Yongjun Signed-off-by: David S. Miller --- net/sctp/transport.c | 44 ++++++++++++++++++++++++++++++++------------ 1 file changed, 32 insertions(+), 12 deletions(-) (limited to 'net') diff --git a/net/sctp/transport.c b/net/sctp/transport.c index 62082e7b797..9647fb27722 100644 --- a/net/sctp/transport.c +++ b/net/sctp/transport.c @@ -79,6 +79,7 @@ static struct sctp_transport *sctp_transport_init(struct sctp_transport *peer, peer->rttvar = 0; peer->srtt = 0; peer->rto_pending = 0; + peer->fast_recovery = 0; peer->last_time_heard = jiffies; peer->last_time_used = jiffies; @@ -403,11 +404,16 @@ void sctp_transport_raise_cwnd(struct sctp_transport *transport, cwnd = transport->cwnd; flight_size = transport->flight_size; + /* See if we need to exit Fast Recovery first */ + if (transport->fast_recovery && + TSN_lte(transport->fast_recovery_exit, sack_ctsn)) + transport->fast_recovery = 0; + /* The appropriate cwnd increase algorithm is performed if, and only - * if the cumulative TSN has advanced and the congestion window is + * if the cumulative TSN whould advanced and the congestion window is * being fully utilized. */ - if ((transport->asoc->ctsn_ack_point >= sack_ctsn) || + if (TSN_lte(sack_ctsn, transport->asoc->ctsn_ack_point) || (flight_size < cwnd)) return; @@ -416,17 +422,23 @@ void sctp_transport_raise_cwnd(struct sctp_transport *transport, pmtu = transport->asoc->pathmtu; if (cwnd <= ssthresh) { - /* RFC 2960 7.2.1, sctpimpguide-05 2.14.2 When cwnd is less - * than or equal to ssthresh an SCTP endpoint MUST use the - * slow start algorithm to increase cwnd only if the current - * congestion window is being fully utilized and an incoming - * SACK advances the Cumulative TSN Ack Point. Only when these - * two conditions are met can the cwnd be increased otherwise - * the cwnd MUST not be increased. If these conditions are met - * then cwnd MUST be increased by at most the lesser of - * 1) the total size of the previously outstanding DATA - * chunk(s) acknowledged, and 2) the destination's path MTU. + /* RFC 4960 7.2.1 + * o When cwnd is less than or equal to ssthresh, an SCTP + * endpoint MUST use the slow-start algorithm to increase + * cwnd only if the current congestion window is being fully + * utilized, an incoming SACK advances the Cumulative TSN + * Ack Point, and the data sender is not in Fast Recovery. + * Only when these three conditions are met can the cwnd be + * increased; otherwise, the cwnd MUST not be increased. + * If these conditions are met, then cwnd MUST be increased + * by, at most, the lesser of 1) the total size of the + * previously outstanding DATA chunk(s) acknowledged, and + * 2) the destination's path MTU. This upper bound protects + * against the ACK-Splitting attack outlined in [SAVAGE99]. */ + if (transport->fast_recovery) + return; + if (bytes_acked > pmtu) cwnd += pmtu; else @@ -502,6 +514,13 @@ void sctp_transport_lower_cwnd(struct sctp_transport *transport, * cwnd = ssthresh * partial_bytes_acked = 0 */ + if (transport->fast_recovery) + return; + + /* Mark Fast recovery */ + transport->fast_recovery = 1; + transport->fast_recovery_exit = transport->asoc->next_tsn - 1; + transport->ssthresh = max(transport->cwnd/2, 4*transport->asoc->pathmtu); transport->cwnd = transport->ssthresh; @@ -586,6 +605,7 @@ void sctp_transport_reset(struct sctp_transport *t) t->flight_size = 0; t->error_count = 0; t->rto_pending = 0; + t->fast_recovery = 0; /* Initialize the state information for SFR-CACC */ t->cacc.changeover_active = 0; -- cgit v1.2.3 From 62aeaff5ccd96462b7077046357a6d7886175a57 Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Wed, 4 Jun 2008 12:39:11 -0700 Subject: sctp: Start T3-RTX timer when fast retransmitting lowest TSN When we are trying to fast retransmit the lowest outstanding TSN, we need to restart the T3-RTX timer, so that subsequent timeouts will correctly tag all the packets necessary for retransmissions. Signed-off-by: Vlad Yasevich Tested-by: Wei Yongjun Signed-off-by: David S. Miller --- net/sctp/outqueue.c | 42 +++++++++++++++++++++++++++++++----------- net/sctp/transport.c | 4 ++-- 2 files changed, 33 insertions(+), 13 deletions(-) (limited to 'net') diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c index 59edfd25a19..5d3c441e84d 100644 --- a/net/sctp/outqueue.c +++ b/net/sctp/outqueue.c @@ -208,6 +208,7 @@ void sctp_outq_init(struct sctp_association *asoc, struct sctp_outq *q) INIT_LIST_HEAD(&q->sacked); INIT_LIST_HEAD(&q->abandoned); + q->fast_rtx = 0; q->outstanding_bytes = 0; q->empty = 1; q->cork = 0; @@ -500,6 +501,7 @@ void sctp_retransmit(struct sctp_outq *q, struct sctp_transport *transport, case SCTP_RTXR_FAST_RTX: SCTP_INC_STATS(SCTP_MIB_FAST_RETRANSMITS); sctp_transport_lower_cwnd(transport, SCTP_LOWER_CWND_FAST_RTX); + q->fast_rtx = 1; break; case SCTP_RTXR_PMTUD: SCTP_INC_STATS(SCTP_MIB_PMTUD_RETRANSMITS); @@ -543,10 +545,13 @@ static int sctp_outq_flush_rtx(struct sctp_outq *q, struct sctp_packet *pkt, sctp_xmit_t status; struct sctp_chunk *chunk, *chunk1; struct sctp_association *asoc; + int fast_rtx; int error = 0; + int timer = 0; asoc = q->asoc; lqueue = &q->retransmit; + fast_rtx = q->fast_rtx; /* RFC 2960 6.3.3 Handle T3-rtx Expiration * @@ -587,13 +592,12 @@ static int sctp_outq_flush_rtx(struct sctp_outq *q, struct sctp_packet *pkt, switch (status) { case SCTP_XMIT_PMTU_FULL: /* Send this packet. */ - if ((error = sctp_packet_transmit(pkt)) == 0) - *start_timer = 1; + error = sctp_packet_transmit(pkt); /* If we are retransmitting, we should only * send a single packet. */ - if (rtx_timeout) { + if (rtx_timeout || fast_rtx) { list_add(lchunk, lqueue); lchunk = NULL; } @@ -603,8 +607,7 @@ static int sctp_outq_flush_rtx(struct sctp_outq *q, struct sctp_packet *pkt, case SCTP_XMIT_RWND_FULL: /* Send this packet. */ - if ((error = sctp_packet_transmit(pkt)) == 0) - *start_timer = 1; + error = sctp_packet_transmit(pkt); /* Stop sending DATA as there is no more room * at the receiver. @@ -615,8 +618,7 @@ static int sctp_outq_flush_rtx(struct sctp_outq *q, struct sctp_packet *pkt, case SCTP_XMIT_NAGLE_DELAY: /* Send this packet. */ - if ((error = sctp_packet_transmit(pkt)) == 0) - *start_timer = 1; + error = sctp_packet_transmit(pkt); /* Stop sending DATA because of nagle delay. */ list_add(lchunk, lqueue); @@ -635,7 +637,14 @@ static int sctp_outq_flush_rtx(struct sctp_outq *q, struct sctp_packet *pkt, if (chunk->fast_retransmit > 0) chunk->fast_retransmit = -1; - *start_timer = 1; + /* Force start T3-rtx timer when fast retransmitting + * the earliest outstanding TSN + */ + if (!timer && fast_rtx && + ntohl(chunk->subh.data_hdr->tsn) == + asoc->ctsn_ack_point + 1) + timer = 2; + q->empty = 0; /* Retrieve a new chunk to bundle. */ @@ -643,12 +652,16 @@ static int sctp_outq_flush_rtx(struct sctp_outq *q, struct sctp_packet *pkt, break; } + /* Set the timer if there were no errors */ + if (!error && !timer) + timer = 1; + /* If we are here due to a retransmit timeout or a fast * retransmit and if there are any chunks left in the retransmit * queue that could not fit in the PMTU sized packet, they need * to be marked as ineligible for a subsequent fast retransmit. */ - if (rtx_timeout && !lchunk) { + if (rtx_timeout && fast_rtx) { list_for_each_entry(chunk1, lqueue, transmitted_list) { if (chunk1->fast_retransmit > 0) chunk1->fast_retransmit = -1; @@ -656,6 +669,12 @@ static int sctp_outq_flush_rtx(struct sctp_outq *q, struct sctp_packet *pkt, } } + *start_timer = timer; + + /* Clear fast retransmit hint */ + if (fast_rtx) + q->fast_rtx = 0; + return error; } @@ -862,7 +881,8 @@ int sctp_outq_flush(struct sctp_outq *q, int rtx_timeout) rtx_timeout, &start_timer); if (start_timer) - sctp_transport_reset_timers(transport); + sctp_transport_reset_timers(transport, + start_timer-1); /* This can happen on COOKIE-ECHO resend. Only * one chunk can get bundled with a COOKIE-ECHO. @@ -977,7 +997,7 @@ int sctp_outq_flush(struct sctp_outq *q, int rtx_timeout) list_add_tail(&chunk->transmitted_list, &transport->transmitted); - sctp_transport_reset_timers(transport); + sctp_transport_reset_timers(transport, start_timer-1); q->empty = 0; diff --git a/net/sctp/transport.c b/net/sctp/transport.c index 9647fb27722..3f34f61221e 100644 --- a/net/sctp/transport.c +++ b/net/sctp/transport.c @@ -191,7 +191,7 @@ static void sctp_transport_destroy(struct sctp_transport *transport) /* Start T3_rtx timer if it is not already running and update the heartbeat * timer. This routine is called every time a DATA chunk is sent. */ -void sctp_transport_reset_timers(struct sctp_transport *transport) +void sctp_transport_reset_timers(struct sctp_transport *transport, int force) { /* RFC 2960 6.3.2 Retransmission Timer Rules * @@ -201,7 +201,7 @@ void sctp_transport_reset_timers(struct sctp_transport *transport) * address. */ - if (!timer_pending(&transport->T3_rtx_timer)) + if (force || !timer_pending(&transport->T3_rtx_timer)) if (!mod_timer(&transport->T3_rtx_timer, jiffies + transport->rto)) sctp_transport_hold(transport); -- cgit v1.2.3 From 8b750ce54bd8ab5f75d519ee450e1b0c5226ebe9 Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Wed, 4 Jun 2008 12:39:36 -0700 Subject: sctp: Flush the queue only once during fast retransmit. When fast retransmit is triggered by a sack, we should flush the queue only once so that only 1 retransmit happens. Also, since we could potentially have non-fast-rtx chunks on the retransmit queue, we need make sure any chunks eligable for fast retransmit are sent first during fast retransmission. Signed-off-by: Vlad Yasevich Tested-by: Wei Yongjun Signed-off-by: David S. Miller --- net/sctp/outqueue.c | 82 +++++++++++++++++++++++++++++++---------------------- 1 file changed, 48 insertions(+), 34 deletions(-) (limited to 'net') diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c index 5d3c441e84d..ace6770e904 100644 --- a/net/sctp/outqueue.c +++ b/net/sctp/outqueue.c @@ -520,9 +520,15 @@ void sctp_retransmit(struct sctp_outq *q, struct sctp_transport *transport, * the sender SHOULD try to advance the "Advanced.Peer.Ack.Point" by * following the procedures outlined in C1 - C5. */ - sctp_generate_fwdtsn(q, q->asoc->ctsn_ack_point); + if (reason == SCTP_RTXR_T3_RTX) + sctp_generate_fwdtsn(q, q->asoc->ctsn_ack_point); - error = sctp_outq_flush(q, /* rtx_timeout */ 1); + /* Flush the queues only on timeout, since fast_rtx is only + * triggered during sack processing and the queue + * will be flushed at the end. + */ + if (reason != SCTP_RTXR_FAST_RTX) + error = sctp_outq_flush(q, /* rtx_timeout */ 1); if (error) q->asoc->base.sk->sk_err = -error; @@ -540,7 +546,6 @@ static int sctp_outq_flush_rtx(struct sctp_outq *q, struct sctp_packet *pkt, int rtx_timeout, int *start_timer) { struct list_head *lqueue; - struct list_head *lchunk; struct sctp_transport *transport = pkt->transport; sctp_xmit_t status; struct sctp_chunk *chunk, *chunk1; @@ -548,12 +553,16 @@ static int sctp_outq_flush_rtx(struct sctp_outq *q, struct sctp_packet *pkt, int fast_rtx; int error = 0; int timer = 0; + int done = 0; asoc = q->asoc; lqueue = &q->retransmit; fast_rtx = q->fast_rtx; - /* RFC 2960 6.3.3 Handle T3-rtx Expiration + /* This loop handles time-out retransmissions, fast retransmissions, + * and retransmissions due to opening of whindow. + * + * RFC 2960 6.3.3 Handle T3-rtx Expiration * * E3) Determine how many of the earliest (i.e., lowest TSN) * outstanding DATA chunks for the address for which the @@ -568,12 +577,12 @@ static int sctp_outq_flush_rtx(struct sctp_outq *q, struct sctp_packet *pkt, * [Just to be painfully clear, if we are retransmitting * because a timeout just happened, we should send only ONE * packet of retransmitted data.] + * + * For fast retransmissions we also send only ONE packet. However, + * if we are just flushing the queue due to open window, we'll + * try to send as much as possible. */ - lchunk = sctp_list_dequeue(lqueue); - - while (lchunk) { - chunk = list_entry(lchunk, struct sctp_chunk, - transmitted_list); + list_for_each_entry_safe(chunk, chunk1, lqueue, transmitted_list) { /* Make sure that Gap Acked TSNs are not retransmitted. A * simple approach is just to move such TSNs out of the @@ -581,11 +590,18 @@ static int sctp_outq_flush_rtx(struct sctp_outq *q, struct sctp_packet *pkt, * next chunk. */ if (chunk->tsn_gap_acked) { - list_add_tail(lchunk, &transport->transmitted); - lchunk = sctp_list_dequeue(lqueue); + list_del(&chunk->transmitted_list); + list_add_tail(&chunk->transmitted_list, + &transport->transmitted); continue; } + /* If we are doing fast retransmit, ignore non-fast_rtransmit + * chunks + */ + if (fast_rtx && !chunk->fast_retransmit) + continue; + /* Attempt to append this chunk to the packet. */ status = sctp_packet_append_chunk(pkt, chunk); @@ -597,12 +613,10 @@ static int sctp_outq_flush_rtx(struct sctp_outq *q, struct sctp_packet *pkt, /* If we are retransmitting, we should only * send a single packet. */ - if (rtx_timeout || fast_rtx) { - list_add(lchunk, lqueue); - lchunk = NULL; - } + if (rtx_timeout || fast_rtx) + done = 1; - /* Bundle lchunk in the next round. */ + /* Bundle next chunk in the next round. */ break; case SCTP_XMIT_RWND_FULL: @@ -612,8 +626,7 @@ static int sctp_outq_flush_rtx(struct sctp_outq *q, struct sctp_packet *pkt, /* Stop sending DATA as there is no more room * at the receiver. */ - list_add(lchunk, lqueue); - lchunk = NULL; + done = 1; break; case SCTP_XMIT_NAGLE_DELAY: @@ -621,15 +634,16 @@ static int sctp_outq_flush_rtx(struct sctp_outq *q, struct sctp_packet *pkt, error = sctp_packet_transmit(pkt); /* Stop sending DATA because of nagle delay. */ - list_add(lchunk, lqueue); - lchunk = NULL; + done = 1; break; default: /* The append was successful, so add this chunk to * the transmitted list. */ - list_add_tail(lchunk, &transport->transmitted); + list_del(&chunk->transmitted_list); + list_add_tail(&chunk->transmitted_list, + &transport->transmitted); /* Mark the chunk as ineligible for fast retransmit * after it is retransmitted. @@ -646,9 +660,6 @@ static int sctp_outq_flush_rtx(struct sctp_outq *q, struct sctp_packet *pkt, timer = 2; q->empty = 0; - - /* Retrieve a new chunk to bundle. */ - lchunk = sctp_list_dequeue(lqueue); break; } @@ -656,16 +667,19 @@ static int sctp_outq_flush_rtx(struct sctp_outq *q, struct sctp_packet *pkt, if (!error && !timer) timer = 1; - /* If we are here due to a retransmit timeout or a fast - * retransmit and if there are any chunks left in the retransmit - * queue that could not fit in the PMTU sized packet, they need - * to be marked as ineligible for a subsequent fast retransmit. - */ - if (rtx_timeout && fast_rtx) { - list_for_each_entry(chunk1, lqueue, transmitted_list) { - if (chunk1->fast_retransmit > 0) - chunk1->fast_retransmit = -1; - } + if (done) + break; + } + + /* If we are here due to a retransmit timeout or a fast + * retransmit and if there are any chunks left in the retransmit + * queue that could not fit in the PMTU sized packet, they need + * to be marked as ineligible for a subsequent fast retransmit. + */ + if (rtx_timeout || fast_rtx) { + list_for_each_entry(chunk1, lqueue, transmitted_list) { + if (chunk1->fast_retransmit > 0) + chunk1->fast_retransmit = -1; } } -- cgit v1.2.3 From b9031d9d87b24e24cd32ea15b5f4220a1e8da909 Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Wed, 4 Jun 2008 12:40:15 -0700 Subject: sctp: Fix ECN markings for IPv6 Commit e9df2e8fd8fbc95c57dbd1d33dada66c4627b44c ("[IPV6]: Use appropriate sock tclass setting for routing lookup.") also changed the way that ECN capable transports mark this capability in IPv6. As a result, SCTP was not marking ECN capablity because the traffic class was never set. This patch brings back the markings for IPv6 traffic. Signed-off-by: Vlad Yasevich Signed-off-by: David S. Miller --- net/sctp/ipv6.c | 6 ++++++ net/sctp/output.c | 2 +- net/sctp/protocol.c | 6 ++++++ 3 files changed, 13 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c index e4aac3266fc..a2f4d4d5159 100644 --- a/net/sctp/ipv6.c +++ b/net/sctp/ipv6.c @@ -727,6 +727,11 @@ static void sctp_v6_seq_dump_addr(struct seq_file *seq, union sctp_addr *addr) seq_printf(seq, NIP6_FMT " ", NIP6(addr->v6.sin6_addr)); } +static void sctp_v6_ecn_capable(struct sock *sk) +{ + inet6_sk(sk)->tclass |= INET_ECN_ECT_0; +} + /* Initialize a PF_INET6 socket msg_name. */ static void sctp_inet6_msgname(char *msgname, int *addr_len) { @@ -997,6 +1002,7 @@ static struct sctp_af sctp_af_inet6 = { .skb_iif = sctp_v6_skb_iif, .is_ce = sctp_v6_is_ce, .seq_dump_addr = sctp_v6_seq_dump_addr, + .ecn_capable = sctp_v6_ecn_capable, .net_header_len = sizeof(struct ipv6hdr), .sockaddr_len = sizeof(struct sockaddr_in6), #ifdef CONFIG_COMPAT diff --git a/net/sctp/output.c b/net/sctp/output.c index cf4f9fb6819..6d45bae93b4 100644 --- a/net/sctp/output.c +++ b/net/sctp/output.c @@ -548,7 +548,7 @@ int sctp_packet_transmit(struct sctp_packet *packet) * Note: The works for IPv6 layer checks this bit too later * in transmission. See IP6_ECN_flow_xmit(). */ - INET_ECN_xmit(nskb->sk); + (*tp->af_specific->ecn_capable)(nskb->sk); /* Set up the IP options. */ /* BUG: not implemented diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index 56bdaf7fc42..b435a193c5d 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -617,6 +617,11 @@ static void sctp_v4_seq_dump_addr(struct seq_file *seq, union sctp_addr *addr) seq_printf(seq, "%d.%d.%d.%d ", NIPQUAD(addr->v4.sin_addr)); } +static void sctp_v4_ecn_capable(struct sock *sk) +{ + INET_ECN_xmit(sk); +} + /* Event handler for inet address addition/deletion events. * The sctp_local_addr_list needs to be protocted by a spin lock since * multiple notifiers (say IPv4 and IPv6) may be running at the same @@ -935,6 +940,7 @@ static struct sctp_af sctp_af_inet = { .skb_iif = sctp_v4_skb_iif, .is_ce = sctp_v4_is_ce, .seq_dump_addr = sctp_v4_seq_dump_addr, + .ecn_capable = sctp_v4_ecn_capable, .net_header_len = sizeof(struct iphdr), .sockaddr_len = sizeof(struct sockaddr_in), #ifdef CONFIG_COMPAT -- cgit v1.2.3 From 22dd485022f3d0b162ceb5e67d85de7c3806aa20 Mon Sep 17 00:00:00 2001 From: "Denis V. Lunev" Date: Wed, 4 Jun 2008 15:16:12 -0700 Subject: raw: Raw socket leak. The program below just leaks the raw kernel socket int main() { int fd = socket(PF_INET, SOCK_RAW, IPPROTO_UDP); struct sockaddr_in addr; memset(&addr, 0, sizeof(addr)); inet_aton("127.0.0.1", &addr.sin_addr); addr.sin_family = AF_INET; addr.sin_port = htons(2048); sendto(fd, "a", 1, MSG_MORE, &addr, sizeof(addr)); return 0; } Corked packet is allocated via sock_wmalloc which holds the owner socket, so one should uncork it and flush all pending data on close. Do this in the same way as in UDP. Signed-off-by: Denis V. Lunev Acked-by: Alexey Kuznetsov Signed-off-by: David S. Miller --- net/ipv4/raw.c | 9 +++++++++ net/ipv6/raw.c | 9 +++++++++ 2 files changed, 18 insertions(+) (limited to 'net') diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index fead049daf4..e7e091d365f 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -608,6 +608,14 @@ static void raw_close(struct sock *sk, long timeout) sk_common_release(sk); } +static int raw_destroy(struct sock *sk) +{ + lock_sock(sk); + ip_flush_pending_frames(sk); + release_sock(sk); + return 0; +} + /* This gets rid of all the nasties in af_inet. -DaveM */ static int raw_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len) { @@ -820,6 +828,7 @@ struct proto raw_prot = { .name = "RAW", .owner = THIS_MODULE, .close = raw_close, + .destroy = raw_destroy, .connect = ip4_datagram_connect, .disconnect = udp_disconnect, .ioctl = raw_ioctl, diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index 603df76e052..8fee9a15b2d 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -1164,6 +1164,14 @@ static void rawv6_close(struct sock *sk, long timeout) sk_common_release(sk); } +static int raw6_destroy(struct sock *sk) +{ + lock_sock(sk); + ip6_flush_pending_frames(sk); + release_sock(sk); + return 0; +} + static int rawv6_init_sk(struct sock *sk) { struct raw6_sock *rp = raw6_sk(sk); @@ -1187,6 +1195,7 @@ struct proto rawv6_prot = { .name = "RAWv6", .owner = THIS_MODULE, .close = rawv6_close, + .destroy = raw6_destroy, .connect = ip6_datagram_connect, .disconnect = udp_disconnect, .ioctl = rawv6_ioctl, -- cgit v1.2.3 From 26af65cbeb2467a486ae4fc7242c94e470c67c50 Mon Sep 17 00:00:00 2001 From: Sridhar Samudrala Date: Wed, 4 Jun 2008 15:19:35 -0700 Subject: tcp: Increment OUTRSTS in tcp_send_active_reset() TCP "resets sent" counter is not incremented when a TCP Reset is sent via tcp_send_active_reset(). Signed-off-by: Sridhar Samudrala Signed-off-by: David S. Miller --- net/ipv4/tcp_output.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'net') diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index e399bde7813..ad993ecb481 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -2131,6 +2131,8 @@ void tcp_send_active_reset(struct sock *sk, gfp_t priority) TCP_SKB_CB(skb)->when = tcp_time_stamp; if (tcp_transmit_skb(sk, skb, 0, priority)) NET_INC_STATS(LINUX_MIB_TCPABORTFAILED); + + TCP_INC_STATS(TCP_MIB_OUTRSTS); } /* WARNING: This routine must only be called when we have already sent -- cgit v1.2.3 From 293ad60401da621b8b329abbe8c388edb25f658a Mon Sep 17 00:00:00 2001 From: Octavian Purdila Date: Wed, 4 Jun 2008 15:45:58 -0700 Subject: tcp: Fix for race due to temporary drop of the socket lock in skb_splice_bits. skb_splice_bits temporary drops the socket lock while iterating over the socket queue in order to break a reverse locking condition which happens with sendfile. This, however, opens a window of opportunity for tcp_collapse() to aggregate skbs and thus potentially free the current skb used in skb_splice_bits and tcp_read_sock. This patch fixes the problem by (re-)getting the same "logical skb" after the lock has been temporary dropped. Based on idea and initial patch from Evgeniy Polyakov. Signed-off-by: Octavian Purdila Acked-by: Evgeniy Polyakov Signed-off-by: David S. Miller --- net/core/skbuff.c | 5 +++-- net/ipv4/tcp.c | 9 ++++++++- 2 files changed, 11 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 5c459f2b798..1e556d31211 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -1445,6 +1445,7 @@ done: if (spd.nr_pages) { int ret; + struct sock *sk = __skb->sk; /* * Drop the socket lock, otherwise we have reverse @@ -1455,9 +1456,9 @@ done: * we call into ->sendpage() with the i_mutex lock held * and networking will grab the socket lock. */ - release_sock(__skb->sk); + release_sock(sk); ret = splice_to_pipe(pipe, &spd); - lock_sock(__skb->sk); + lock_sock(sk); return ret; } diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index f8865313862..ab66683b804 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -1227,7 +1227,14 @@ int tcp_read_sock(struct sock *sk, read_descriptor_t *desc, copied += used; offset += used; } - if (offset != skb->len) + /* + * If recv_actor drops the lock (e.g. TCP splice + * receive) the skb pointer might be invalid when + * getting here: tcp_collapse might have deleted it + * while aggregating skbs from the socket queue. + */ + skb = tcp_recv_skb(sk, seq-1, &offset); + if (!skb || (offset+1 != skb->len)) break; } if (tcp_hdr(skb)->fin) { -- cgit v1.2.3 From ddb2c43594f22843e9f3153da151deaba1a834c5 Mon Sep 17 00:00:00 2001 From: Chris Wright Date: Wed, 4 Jun 2008 09:16:33 -0700 Subject: asn1: additional sanity checking during BER decoding - Don't trust a length which is greater than the working buffer. An invalid length could cause overflow when calculating buffer size for decoding oid. - An oid length of zero is invalid and allows for an off-by-one error when decoding oid because the first subid actually encodes first 2 subids. - A primitive encoding may not have an indefinite length. Thanks to Wei Wang from McAfee for report. Cc: Steven French Cc: stable@kernel.org Acked-by: Patrick McHardy Signed-off-by: Chris Wright Signed-off-by: Linus Torvalds --- net/ipv4/netfilter/nf_nat_snmp_basic.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'net') diff --git a/net/ipv4/netfilter/nf_nat_snmp_basic.c b/net/ipv4/netfilter/nf_nat_snmp_basic.c index 5daefad3d19..7750c97fde7 100644 --- a/net/ipv4/netfilter/nf_nat_snmp_basic.c +++ b/net/ipv4/netfilter/nf_nat_snmp_basic.c @@ -232,6 +232,11 @@ static unsigned char asn1_length_decode(struct asn1_ctx *ctx, } } } + + /* don't trust len bigger than ctx buffer */ + if (*len > ctx->end - ctx->pointer) + return 0; + return 1; } @@ -250,6 +255,10 @@ static unsigned char asn1_header_decode(struct asn1_ctx *ctx, if (!asn1_length_decode(ctx, &def, &len)) return 0; + /* primitive shall be definite, indefinite shall be constructed */ + if (*con == ASN1_PRI && !def) + return 0; + if (def) *eoc = ctx->pointer + len; else @@ -434,6 +443,11 @@ static unsigned char asn1_oid_decode(struct asn1_ctx *ctx, unsigned long *optr; size = eoc - ctx->pointer + 1; + + /* first subid actually encodes first two subids */ + if (size < 2 || size > ULONG_MAX/sizeof(unsigned long)) + return 0; + *oid = kmalloc(size * sizeof(unsigned long), GFP_ATOMIC); if (*oid == NULL) { if (net_ratelimit()) -- cgit v1.2.3 From 507b06d0622480f8026d49a94f86068bb0fd6ed6 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Tue, 3 Jun 2008 23:39:55 -0400 Subject: mac80211: send association event on IBSS create Otherwise userspace has no idea the IBSS creation succeeded. Signed-off-by: Dan Williams Signed-off-by: John W. Linville --- net/mac80211/mlme.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'net') diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 841278f1df8..af375da9df2 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -2336,6 +2336,7 @@ static int ieee80211_sta_join_ibss(struct net_device *dev, u8 *pos; struct ieee80211_sub_if_data *sdata; struct ieee80211_supported_band *sband; + union iwreq_data wrqu; sband = local->hw.wiphy->bands[local->hw.conf.channel->band]; @@ -2479,6 +2480,10 @@ static int ieee80211_sta_join_ibss(struct net_device *dev, ifsta->state = IEEE80211_IBSS_JOINED; mod_timer(&ifsta->timer, jiffies + IEEE80211_IBSS_MERGE_INTERVAL); + memset(&wrqu, 0, sizeof(wrqu)); + memcpy(wrqu.ap_addr.sa_data, bss->bssid, ETH_ALEN); + wireless_send_event(dev, SIOCGIWAP, &wrqu, NULL); + return res; } -- cgit v1.2.3 From ad81b2f97d42e13ef78bb3798e046cd5f0492980 Mon Sep 17 00:00:00 2001 From: Assaf Krauss Date: Wed, 4 Jun 2008 20:27:59 +0300 Subject: mac80211: Fixing slow IBSS rejoin This patch fixes the issue of slow reconnection to an IBSS cell after disconnection from it. Now the interface's bssid is reset upon ifdown. ieee80211_sta_find_ibss: if (found && memcmp(ifsta->bssid, bssid, ETH_ALEN) != 0 && (bss = ieee80211_rx_bss_get(dev, bssid, local->hw.conf.channel->center_freq, ifsta->ssid, ifsta->ssid_len))) Note: In general disconnection is still not handled properly in mac80211 Signed-off-by: Assaf Krauss Signed-off-by: Tomas Winkler Signed-off-by: John W. Linville --- net/mac80211/main.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/mac80211/main.c b/net/mac80211/main.c index 5c876450b14..98c0b5e56ec 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -511,6 +511,7 @@ static int ieee80211_stop(struct net_device *dev) case IEEE80211_IF_TYPE_STA: case IEEE80211_IF_TYPE_IBSS: sdata->u.sta.state = IEEE80211_DISABLED; + memset(sdata->u.sta.bssid, 0, ETH_ALEN); del_timer_sync(&sdata->u.sta.timer); /* * When we get here, the interface is marked down. -- cgit v1.2.3 From 872ba53395b2a8be08c3ea2d39e225e5b4a8cb40 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Wed, 4 Jun 2008 13:59:34 -0400 Subject: mac80211: decrease IBSS creation latency Sufficient scans (at least 2 or 3) should have been done within 7 seconds to find an existing IBSS to join. This should improve IBSS creation latency; and since IBSS merging is still in effect, shouldn't have detrimental effects on eventual IBSS convergence. Signed-off-by: Dan Williams Signed-off-by: John W. Linville --- net/mac80211/mlme.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index af375da9df2..affe42f8484 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -44,7 +44,7 @@ #define IEEE80211_RETRY_AUTH_INTERVAL (1 * HZ) #define IEEE80211_SCAN_INTERVAL (2 * HZ) #define IEEE80211_SCAN_INTERVAL_SLOW (15 * HZ) -#define IEEE80211_IBSS_JOIN_TIMEOUT (20 * HZ) +#define IEEE80211_IBSS_JOIN_TIMEOUT (7 * HZ) #define IEEE80211_PROBE_DELAY (HZ / 33) #define IEEE80211_CHANNEL_TIME (HZ / 33) -- cgit v1.2.3 From be038b376465953c358d675cb38a611898a49dc2 Mon Sep 17 00:00:00 2001 From: Assaf Krauss Date: Thu, 5 Jun 2008 19:55:21 +0300 Subject: mac80211: Checking IBSS support while changing channel in ad-hoc mode This patch adds a check to the set_channel flow. When attempting to change the channel while in IBSS mode, and the new channel does not support IBSS mode, the flow return with an error value with no consequences on the mac80211 and driver state. Signed-off-by: Assaf Krauss Signed-off-by: Emmanuel Grumbach Signed-off-by: Tomas Winkler Signed-off-by: John W. Linville --- net/mac80211/ieee80211_i.h | 2 +- net/mac80211/mlme.c | 11 ++++------- net/mac80211/wext.c | 15 +++++++++++---- 3 files changed, 16 insertions(+), 12 deletions(-) (limited to 'net') diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index c7314bf4bec..006486b2672 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -899,7 +899,7 @@ extern const struct iw_handler_def ieee80211_iw_handler_def; /* ieee80211_ioctl.c */ -int ieee80211_set_freq(struct ieee80211_local *local, int freq); +int ieee80211_set_freq(struct net_device *dev, int freq); /* ieee80211_sta.c */ void ieee80211_sta_timer(unsigned long data); void ieee80211_sta_work(struct work_struct *work); diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index affe42f8484..4d2b582dd05 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -2359,13 +2359,10 @@ static int ieee80211_sta_join_ibss(struct net_device *dev, sdata->drop_unencrypted = bss->capability & WLAN_CAPABILITY_PRIVACY ? 1 : 0; - res = ieee80211_set_freq(local, bss->freq); + res = ieee80211_set_freq(dev, bss->freq); - if (local->oper_channel->flags & IEEE80211_CHAN_NO_IBSS) { - printk(KERN_DEBUG "%s: IBSS not allowed on frequency " - "%d MHz\n", dev->name, local->oper_channel->center_freq); - return -1; - } + if (res) + return res; /* Set beacon template */ skb = dev_alloc_skb(local->hw.extra_tx_headroom + 400); @@ -3491,7 +3488,7 @@ static int ieee80211_sta_config_auth(struct net_device *dev, spin_unlock_bh(&local->sta_bss_lock); if (selected) { - ieee80211_set_freq(local, selected->freq); + ieee80211_set_freq(dev, selected->freq); if (!(ifsta->flags & IEEE80211_STA_SSID_SET)) ieee80211_sta_set_ssid(dev, selected->ssid, selected->ssid_len); diff --git a/net/mac80211/wext.c b/net/mac80211/wext.c index 8311bb24f9f..a8bb8e31b1e 100644 --- a/net/mac80211/wext.c +++ b/net/mac80211/wext.c @@ -290,14 +290,22 @@ static int ieee80211_ioctl_giwmode(struct net_device *dev, return 0; } -int ieee80211_set_freq(struct ieee80211_local *local, int freqMHz) +int ieee80211_set_freq(struct net_device *dev, int freqMHz) { int ret = -EINVAL; struct ieee80211_channel *chan; + struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); + struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); chan = ieee80211_get_channel(local->hw.wiphy, freqMHz); if (chan && !(chan->flags & IEEE80211_CHAN_DISABLED)) { + if (sdata->vif.type == IEEE80211_IF_TYPE_IBSS && + chan->flags & IEEE80211_CHAN_NO_IBSS) { + printk(KERN_DEBUG "%s: IBSS not allowed on frequency " + "%d MHz\n", dev->name, chan->center_freq); + return ret; + } local->oper_channel = chan; if (local->sta_sw_scanning || local->sta_hw_scanning) @@ -315,7 +323,6 @@ static int ieee80211_ioctl_siwfreq(struct net_device *dev, struct iw_request_info *info, struct iw_freq *freq, char *extra) { - struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); if (sdata->vif.type == IEEE80211_IF_TYPE_STA) @@ -329,14 +336,14 @@ static int ieee80211_ioctl_siwfreq(struct net_device *dev, IEEE80211_STA_AUTO_CHANNEL_SEL; return 0; } else - return ieee80211_set_freq(local, + return ieee80211_set_freq(dev, ieee80211_channel_to_frequency(freq->m)); } else { int i, div = 1000000; for (i = 0; i < freq->e; i++) div /= 10; if (div > 0) - return ieee80211_set_freq(local, freq->m / div); + return ieee80211_set_freq(dev, freq->m / div); else return -EINVAL; } -- cgit v1.2.3 From 2e761e0532a784816e7e822dbaaece8c5d4be14d Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Mon, 9 Jun 2008 15:53:30 -0700 Subject: ipv6 netns: init net is used to set bindv6only for new sock The bindv6only is tuned via sysctl. It is already on a struct net and per-net sysctls allow for its modification (ipv6_sysctl_net_init). Despite this the value configured in the init net is used for the rest of them. Signed-off-by: Pavel Emelyanov Signed-off-by: David S. Miller --- net/ipv6/af_inet6.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 3c6aafb0218..e84b3fd17fb 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -191,7 +191,7 @@ lookup_protocol: np->mcast_hops = -1; np->mc_loop = 1; np->pmtudisc = IPV6_PMTUDISC_WANT; - np->ipv6only = init_net.ipv6.sysctl.bindv6only; + np->ipv6only = net->ipv6.sysctl.bindv6only; /* Init the ipv4 part of the socket since we can have sockets * using v6 API for ipv4. -- cgit v1.2.3 From ce4a7d0d48bbaed78ccbb0bafb9229651a40303a Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 10 Jun 2008 12:39:35 -0700 Subject: inet{6}_request_sock: Init ->opt and ->pktopts in the constructor Wei Yongjun noticed that we may call reqsk_free on request sock objects where the opt fields may not be initialized, fix it by introducing inet_reqsk_alloc where we initialize ->opt to NULL and set ->pktopts to NULL in inet6_reqsk_alloc. Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- net/dccp/ipv4.c | 3 +-- net/dccp/ipv6.c | 1 - net/ipv4/syncookies.c | 3 +-- net/ipv4/tcp_ipv4.c | 2 +- net/ipv6/syncookies.c | 1 - net/ipv6/tcp_ipv6.c | 1 - 6 files changed, 3 insertions(+), 8 deletions(-) (limited to 'net') diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index c22a3780c14..37d27bcb361 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -589,7 +589,7 @@ int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb) if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1) goto drop; - req = reqsk_alloc(&dccp_request_sock_ops); + req = inet_reqsk_alloc(&dccp_request_sock_ops); if (req == NULL) goto drop; @@ -605,7 +605,6 @@ int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb) ireq = inet_rsk(req); ireq->loc_addr = ip_hdr(skb)->daddr; ireq->rmt_addr = ip_hdr(skb)->saddr; - ireq->opt = NULL; /* * Step 3: Process LISTEN state diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index 9b1129bb7ec..f7fe2a572d7 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -421,7 +421,6 @@ static int dccp_v6_conn_request(struct sock *sk, struct sk_buff *skb) ireq6 = inet6_rsk(req); ipv6_addr_copy(&ireq6->rmt_addr, &ipv6_hdr(skb)->saddr); ipv6_addr_copy(&ireq6->loc_addr, &ipv6_hdr(skb)->daddr); - ireq6->pktopts = NULL; if (ipv6_opt_accepted(sk, skb) || np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo || diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index 73ba98921d6..d182a2a2629 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c @@ -285,7 +285,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb, cookie_check_timestamp(&tcp_opt); ret = NULL; - req = reqsk_alloc(&tcp_request_sock_ops); /* for safety */ + req = inet_reqsk_alloc(&tcp_request_sock_ops); /* for safety */ if (!req) goto out; @@ -301,7 +301,6 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb, ireq->rmt_port = th->source; ireq->loc_addr = ip_hdr(skb)->daddr; ireq->rmt_addr = ip_hdr(skb)->saddr; - ireq->opt = NULL; ireq->snd_wscale = tcp_opt.snd_wscale; ireq->rcv_wscale = tcp_opt.rcv_wscale; ireq->sack_ok = tcp_opt.sack_ok; diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index cd601a866c2..4f8485c67d1 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1285,7 +1285,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1) goto drop; - req = reqsk_alloc(&tcp_request_sock_ops); + req = inet_reqsk_alloc(&tcp_request_sock_ops); if (!req) goto drop; diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c index 938ce4ecde5..3ecc1157994 100644 --- a/net/ipv6/syncookies.c +++ b/net/ipv6/syncookies.c @@ -198,7 +198,6 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) ireq = inet_rsk(req); ireq6 = inet6_rsk(req); treq = tcp_rsk(req); - ireq6->pktopts = NULL; if (security_inet_conn_request(sk, skb, req)) { reqsk_free(req); diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 715965f0fac..cb46749d4c3 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1299,7 +1299,6 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb) treq = inet6_rsk(req); ipv6_addr_copy(&treq->rmt_addr, &ipv6_hdr(skb)->saddr); ipv6_addr_copy(&treq->loc_addr, &ipv6_hdr(skb)->daddr); - treq->pktopts = NULL; if (!want_cookie) TCP_ECN_create_request(req, tcp_hdr(skb)); -- cgit v1.2.3 From 99c6f60e72f112b57ddb07abb2e5f771ee211f43 Mon Sep 17 00:00:00 2001 From: Jamal Hadi Salim Date: Tue, 10 Jun 2008 14:25:34 -0700 Subject: ipsec: pfkey should ignore events when no listeners When pfkey has no km listeners, it still does a lot of work before finding out there aint nobody out there. If a tree falls in a forest and no one is around to hear it, does it make a sound? In this case it makes a lot of noise: With this short-circuit adding 10s of thousands of SAs using netlink improves performance by ~10%. Signed-off-by: Jamal Hadi Salim Signed-off-by: David S. Miller --- net/key/af_key.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'net') diff --git a/net/key/af_key.c b/net/key/af_key.c index 9bba7ac5fee..7470e367272 100644 --- a/net/key/af_key.c +++ b/net/key/af_key.c @@ -3030,6 +3030,9 @@ static int key_notify_sa_expire(struct xfrm_state *x, struct km_event *c) static int pfkey_send_notify(struct xfrm_state *x, struct km_event *c) { + if (atomic_read(&pfkey_socks_nr) == 0) + return 0; + switch (c->event) { case XFRM_MSG_EXPIRE: return key_notify_sa_expire(x, c); -- cgit v1.2.3 From 709772e6e06564ed94ba740de70185ac3d792773 Mon Sep 17 00:00:00 2001 From: Krzysztof Piotr Oledzki Date: Tue, 10 Jun 2008 15:44:49 -0700 Subject: net: Fix routing tables with id > 255 for legacy software Most legacy software do not like tables > 255 as rtm_table is u8 so tb_id is sent &0xff and it is possible to mismatch for example table 510 with table 254 (main). This patch introduces RT_TABLE_COMPAT=252 so the code uses it if tb_id > 255. It makes such old applications happy, new ones are still able to use RTA_TABLE to get a proper table id. Signed-off-by: Krzysztof Piotr Oledzki Acked-by: Patrick McHardy Signed-off-by: David S. Miller --- net/ipv4/fib_semantics.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index 3b83c34019f..0d4d72827e4 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -960,7 +960,10 @@ int fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event, rtm->rtm_dst_len = dst_len; rtm->rtm_src_len = 0; rtm->rtm_tos = tos; - rtm->rtm_table = tb_id; + if (tb_id < 256) + rtm->rtm_table = tb_id; + else + rtm->rtm_table = RT_TABLE_COMPAT; NLA_PUT_U32(skb, RTA_TABLE, tb_id); rtm->rtm_type = type; rtm->rtm_flags = fi->fib_flags; -- cgit v1.2.3 From 3294f202dc1acd82223e83ef59f272bd87bb06b2 Mon Sep 17 00:00:00 2001 From: Gerrit Renker Date: Wed, 11 Jun 2008 11:19:09 +0100 Subject: dccp ccid-3: Bug-Fix - Zero RTT is possible In commit $(825de27d9e40b3117b29a79d412b7a4b78c5d815) (from 27th May, commit message `dccp ccid-3: Fix "t_ipi explosion" bug'), the CCID-3 window counter computation was fixed to cope with RTTs < 4 microseconds. Such RTTs can be found e.g. when running CCID-3 over loopback. The fix removed a check against RTT < 4, but introduced a divide-by-zero bug. All steady-state RTTs in DCCP are filtered using dccp_sample_rtt(), which ensures non-zero samples. However, a zero RTT is possible on initialisation, when there is no RTT sample from the Request/Response exchange. The fix is to use the fallback-RTT from RFC 4340, 3.4. This is also better than just fixing update_win_count() since it allows other parts of the code to always assume that the RTT is non-zero during the time that the CCID is used. Signed-off-by: Gerrit Renker --- net/dccp/ccids/ccid3.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c index f813077234b..0474f4c5707 100644 --- a/net/dccp/ccids/ccid3.c +++ b/net/dccp/ccids/ccid3.c @@ -329,8 +329,14 @@ static int ccid3_hc_tx_send_packet(struct sock *sk, struct sk_buff *skb) hctx->ccid3hctx_x = rfc3390_initial_rate(sk); hctx->ccid3hctx_t_ld = now; } else { - /* Sender does not have RTT sample: X_pps = 1 pkt/sec */ - hctx->ccid3hctx_x = hctx->ccid3hctx_s; + /* + * Sender does not have RTT sample: + * - set fallback RTT (RFC 4340, 3.4) since a RTT value + * is needed in several parts (e.g. window counter); + * - set sending rate X_pps = 1pps as per RFC 3448, 4.2. + */ + hctx->ccid3hctx_rtt = DCCP_FALLBACK_RTT; + hctx->ccid3hctx_x = hctx->ccid3hctx_s; hctx->ccid3hctx_x <<= 6; } ccid3_update_send_interval(hctx); -- cgit v1.2.3 From 1e2f0e5e8376f2a0ada8760fc9d3104e1a81382b Mon Sep 17 00:00:00 2001 From: Gerrit Renker Date: Wed, 11 Jun 2008 11:19:09 +0100 Subject: dccp: Fix sparse warnings This patch fixes the following sparse warnings: * nested min(max()) expression: net/dccp/ccids/ccid3.c:91:21: warning: symbol '__x' shadows an earlier one net/dccp/ccids/ccid3.c:91:21: warning: symbol '__y' shadows an earlier one * Declaration of function prototypes in .c instead of .h file, resulting in "should it be static?" warnings. * Declared "struct dccpw" static (local to dccp_probe). * Disabled dccp_delayed_ack() - not fully removed due to RFC 4340, 11.3 ("Receivers SHOULD implement delayed acknowledgement timers ..."). * Used a different local variable name to avoid net/dccp/ackvec.c:293:13: warning: symbol 'state' shadows an earlier one net/dccp/ackvec.c:238:33: originally declared here * Removed unused functions `dccp_ackvector_print' and `dccp_ackvec_print'. Signed-off-by: Gerrit Renker --- net/dccp/ackvec.c | 29 ++--------------------------- net/dccp/ccids/ccid3.c | 4 ++-- net/dccp/ccids/lib/tfrc.c | 8 -------- net/dccp/ccids/lib/tfrc.h | 11 +++++++++-- net/dccp/output.c | 2 ++ net/dccp/probe.c | 2 +- 6 files changed, 16 insertions(+), 40 deletions(-) (limited to 'net') diff --git a/net/dccp/ackvec.c b/net/dccp/ackvec.c index 6de4bd195d2..1e8be246ad1 100644 --- a/net/dccp/ackvec.c +++ b/net/dccp/ackvec.c @@ -290,12 +290,12 @@ int dccp_ackvec_add(struct dccp_ackvec *av, const struct sock *sk, while (1) { const u8 len = dccp_ackvec_len(av, index); - const u8 state = dccp_ackvec_state(av, index); + const u8 av_state = dccp_ackvec_state(av, index); /* * valid packets not yet in av_buf have a reserved * entry, with a len equal to 0. */ - if (state == DCCP_ACKVEC_STATE_NOT_RECEIVED && + if (av_state == DCCP_ACKVEC_STATE_NOT_RECEIVED && len == 0 && delta == 0) { /* Found our reserved seat! */ dccp_pr_debug("Found %llu reserved seat!\n", @@ -325,31 +325,6 @@ out_duplicate: return -EILSEQ; } -#ifdef CONFIG_IP_DCCP_DEBUG -void dccp_ackvector_print(const u64 ackno, const unsigned char *vector, int len) -{ - dccp_pr_debug_cat("ACK vector len=%d, ackno=%llu |", len, - (unsigned long long)ackno); - - while (len--) { - const u8 state = (*vector & DCCP_ACKVEC_STATE_MASK) >> 6; - const u8 rl = *vector & DCCP_ACKVEC_LEN_MASK; - - dccp_pr_debug_cat("%d,%d|", state, rl); - ++vector; - } - - dccp_pr_debug_cat("\n"); -} - -void dccp_ackvec_print(const struct dccp_ackvec *av) -{ - dccp_ackvector_print(av->av_buf_ackno, - av->av_buf + av->av_buf_head, - av->av_vec_len); -} -#endif - static void dccp_ackvec_throw_record(struct dccp_ackvec *av, struct dccp_ackvec_record *avr) { diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c index 0474f4c5707..a1929f33d70 100644 --- a/net/dccp/ccids/ccid3.c +++ b/net/dccp/ccids/ccid3.c @@ -159,8 +159,8 @@ static void ccid3_hc_tx_update_x(struct sock *sk, ktime_t *stamp) } else if (ktime_us_delta(now, hctx->ccid3hctx_t_ld) - (s64)hctx->ccid3hctx_rtt >= 0) { - hctx->ccid3hctx_x = - max(min(2 * hctx->ccid3hctx_x, min_rate), + hctx->ccid3hctx_x = min(2 * hctx->ccid3hctx_x, min_rate); + hctx->ccid3hctx_x = max(hctx->ccid3hctx_x, scaled_div(((__u64)hctx->ccid3hctx_s) << 6, hctx->ccid3hctx_rtt)); hctx->ccid3hctx_t_ld = now; diff --git a/net/dccp/ccids/lib/tfrc.c b/net/dccp/ccids/lib/tfrc.c index d1dfbb8de64..97ecec0a8e7 100644 --- a/net/dccp/ccids/lib/tfrc.c +++ b/net/dccp/ccids/lib/tfrc.c @@ -14,14 +14,6 @@ module_param(tfrc_debug, bool, 0444); MODULE_PARM_DESC(tfrc_debug, "Enable debug messages"); #endif -extern int tfrc_tx_packet_history_init(void); -extern void tfrc_tx_packet_history_exit(void); -extern int tfrc_rx_packet_history_init(void); -extern void tfrc_rx_packet_history_exit(void); - -extern int tfrc_li_init(void); -extern void tfrc_li_exit(void); - static int __init tfrc_module_init(void) { int rc = tfrc_li_init(); diff --git a/net/dccp/ccids/lib/tfrc.h b/net/dccp/ccids/lib/tfrc.h index 1fb1187bbf1..ddd8107b927 100644 --- a/net/dccp/ccids/lib/tfrc.h +++ b/net/dccp/ccids/lib/tfrc.h @@ -58,7 +58,14 @@ static inline u32 tfrc_ewma(const u32 avg, const u32 newval, const u8 weight) return avg ? (weight * avg + (10 - weight) * newval) / 10 : newval; } -extern u32 tfrc_calc_x(u16 s, u32 R, u32 p); -extern u32 tfrc_calc_x_reverse_lookup(u32 fvalue); +extern u32 tfrc_calc_x(u16 s, u32 R, u32 p); +extern u32 tfrc_calc_x_reverse_lookup(u32 fvalue); +extern int tfrc_tx_packet_history_init(void); +extern void tfrc_tx_packet_history_exit(void); +extern int tfrc_rx_packet_history_init(void); +extern void tfrc_rx_packet_history_exit(void); + +extern int tfrc_li_init(void); +extern void tfrc_li_exit(void); #endif /* _TFRC_H_ */ diff --git a/net/dccp/output.c b/net/dccp/output.c index 1f8a9b64c08..fe20068c5d8 100644 --- a/net/dccp/output.c +++ b/net/dccp/output.c @@ -508,6 +508,7 @@ void dccp_send_ack(struct sock *sk) EXPORT_SYMBOL_GPL(dccp_send_ack); +#if 0 /* FIXME: Is this still necessary (11.3) - currently nowhere used by DCCP. */ void dccp_send_delayed_ack(struct sock *sk) { @@ -538,6 +539,7 @@ void dccp_send_delayed_ack(struct sock *sk) icsk->icsk_ack.timeout = timeout; sk_reset_timer(sk, &icsk->icsk_delack_timer, timeout); } +#endif void dccp_send_sync(struct sock *sk, const u64 ackno, const enum dccp_pkt_type pkt_type) diff --git a/net/dccp/probe.c b/net/dccp/probe.c index 0bcdc925027..81368a7f537 100644 --- a/net/dccp/probe.c +++ b/net/dccp/probe.c @@ -42,7 +42,7 @@ static int bufsize = 64 * 1024; static const char procname[] = "dccpprobe"; -struct { +static struct { struct kfifo *fifo; spinlock_t lock; wait_queue_head_t wait; -- cgit v1.2.3 From 65907a433ac0ca450c4408080f24c6e4743386b2 Mon Sep 17 00:00:00 2001 From: Gerrit Renker Date: Wed, 11 Jun 2008 11:19:09 +0100 Subject: dccp ccid-2: Bug-Fix - Ack Vectors need to be ignored on request sockets This fixes an oversight from an earlier patch, ensuring that Ack Vectors are not processed on request sockets. The issue is that Ack Vectors must not be parsed on request sockets, since the Ack Vector feature depends on the selection of the (TX) CCID. During the initial handshake the CCIDs are undefined, and so RFC 4340, 10.3 applies: "Using CCID-specific options and feature options during a negotiation for the corresponding CCID feature is NOT RECOMMENDED [...]" And it is not even possible: when the server receives the Request from the client, the CCID and Ack vector features are undefined; when the Ack finalising the 3-way hanshake arrives, the request socket has not been cloned yet into a full socket. (This order is necessary, since otherwise the newly created socket would have to be destroyed whenever an option error occurred - a malicious hacker could simply send garbage options and exploit this.) Signed-off-by: Gerrit Renker --- net/dccp/options.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/dccp/options.c b/net/dccp/options.c index d2a84a2fece..43bc24e761d 100644 --- a/net/dccp/options.c +++ b/net/dccp/options.c @@ -107,9 +107,11 @@ int dccp_parse_options(struct sock *sk, struct dccp_request_sock *dreq, * * CCID-specific options are ignored during connection setup, as * negotiation may still be in progress (see RFC 4340, 10.3). + * The same applies to Ack Vectors, as these depend on the CCID. * */ - if (dreq != NULL && opt >= 128) + if (dreq != NULL && (opt >= 128 || + opt == DCCPO_ACK_VECTOR_0 || opt == DCCPO_ACK_VECTOR_1)) goto ignore_option; switch (opt) { -- cgit v1.2.3 From 1e8a287c79f64226541f5c44aa52d4698bb84cf5 Mon Sep 17 00:00:00 2001 From: Gerrit Renker Date: Wed, 11 Jun 2008 11:19:10 +0100 Subject: dccp ccid-3: TFRC reverse-lookup Bug-Fix This fixes a bug in the reverse lookup of p: given a value f(p), instead of p, the function returned the smallest tabulated value f(p). The smallest tabulated value of 10^6 * f(p) = sqrt(2*p/3) + 12 * sqrt(3*p/8) * (32 * p^3 + p) for p=0.0001 is 8172. Since this value is scaled by 10^6, the outcome of this bug is that a loss of 8172/10^6 = 0.8172% was reported whenever the input was below the table resolution of 0.01%. This means that the value was over 80 times too high, resulting in large spikes of the initial loss interval, thus unnecessarily reducing the throughput. Also corrected the printk format (%u for u32). Signed-off-by: Gerrit Renker --- net/dccp/ccids/lib/tfrc_equation.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/dccp/ccids/lib/tfrc_equation.c b/net/dccp/ccids/lib/tfrc_equation.c index e4e64b76c10..2f20a29cffe 100644 --- a/net/dccp/ccids/lib/tfrc_equation.c +++ b/net/dccp/ccids/lib/tfrc_equation.c @@ -661,7 +661,7 @@ u32 tfrc_calc_x(u16 s, u32 R, u32 p) EXPORT_SYMBOL_GPL(tfrc_calc_x); -/* +/** * tfrc_calc_x_reverse_lookup - try to find p given f(p) * * @fvalue: function value to match, scaled by 1000000 @@ -676,11 +676,11 @@ u32 tfrc_calc_x_reverse_lookup(u32 fvalue) /* Error cases. */ if (fvalue < tfrc_calc_x_lookup[0][1]) { - DCCP_WARN("fvalue %d smaller than resolution\n", fvalue); - return tfrc_calc_x_lookup[0][1]; + DCCP_WARN("fvalue %u smaller than resolution\n", fvalue); + return TFRC_SMALLEST_P; } if (fvalue > tfrc_calc_x_lookup[TFRC_CALC_X_ARRSIZE - 1][0]) { - DCCP_WARN("fvalue %d exceeds bounds!\n", fvalue); + DCCP_WARN("fvalue %u exceeds bounds!\n", fvalue); return 1000000; } -- cgit v1.2.3 From 7deb0f851003287d7e259bf6b33548b144c0f2d5 Mon Sep 17 00:00:00 2001 From: Gerrit Renker Date: Wed, 11 Jun 2008 11:19:10 +0100 Subject: dccp ccid-3: X truncated due to type conversion This fixes a bug in computing the inter-packet-interval t_ipi = s/X: scaled_div32(a, b) uses u32 for b, but in "scaled_div32(s, X)" the type of the sending rate `X' is u64. Since X is scaled by 2^6, this truncates rates greater than 2^26 Bps (~537 Mbps). Using full 64-bit division now. Signed-off-by: Gerrit Renker --- net/dccp/ccids/lib/tfrc.h | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) (limited to 'net') diff --git a/net/dccp/ccids/lib/tfrc.h b/net/dccp/ccids/lib/tfrc.h index ddd8107b927..ed9857527ac 100644 --- a/net/dccp/ccids/lib/tfrc.h +++ b/net/dccp/ccids/lib/tfrc.h @@ -15,7 +15,7 @@ * (at your option) any later version. */ #include -#include +#include #include "../../dccp.h" /* internal includes that this module exports: */ #include "loss_interval.h" @@ -29,21 +29,19 @@ extern int tfrc_debug; #endif /* integer-arithmetic divisions of type (a * 1000000)/b */ -static inline u64 scaled_div(u64 a, u32 b) +static inline u64 scaled_div(u64 a, u64 b) { BUG_ON(b==0); - a *= 1000000; - do_div(a, b); - return a; + return div64_u64(a * 1000000, b); } -static inline u32 scaled_div32(u64 a, u32 b) +static inline u32 scaled_div32(u64 a, u64 b) { u64 result = scaled_div(a, b); if (result > UINT_MAX) { - DCCP_CRIT("Overflow: a(%llu)/b(%u) > ~0U", - (unsigned long long)a, b); + DCCP_CRIT("Overflow: %llu/%llu > UINT_MAX", + (unsigned long long)a, (unsigned long long)b); return UINT_MAX; } return result; -- cgit v1.2.3 From be4c798a41bf626cdaacf96c382f116ed2f7dbe9 Mon Sep 17 00:00:00 2001 From: Gerrit Renker Date: Wed, 11 Jun 2008 11:19:10 +0100 Subject: dccp: Bug in initial acknowledgment number assignment Step 8.5 in RFC 4340 says for the newly cloned socket Initialize S.GAR := S.ISS, but what in fact the code (minisocks.c) does is Initialize S.GAR := S.ISR, which is wrong (typo?) -- fixed by the patch. Signed-off-by: Gerrit Renker --- net/dccp/minisocks.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/dccp/minisocks.c b/net/dccp/minisocks.c index 33ad48321b0..66dca5bba85 100644 --- a/net/dccp/minisocks.c +++ b/net/dccp/minisocks.c @@ -165,12 +165,12 @@ out_free: /* See dccp_v4_conn_request */ newdmsk->dccpms_sequence_window = req->rcv_wnd; - newdp->dccps_gar = newdp->dccps_isr = dreq->dreq_isr; - dccp_update_gsr(newsk, dreq->dreq_isr); - - newdp->dccps_iss = dreq->dreq_iss; + newdp->dccps_gar = newdp->dccps_iss = dreq->dreq_iss; dccp_update_gss(newsk, dreq->dreq_iss); + newdp->dccps_isr = dreq->dreq_isr; + dccp_update_gsr(newsk, dreq->dreq_isr); + /* * SWL and AWL are initially adjusted so that they are not less than * the initial Sequence Numbers received and sent, respectively: -- cgit v1.2.3 From 20c61fbd8deb2ada0ac3acecf6156a986dbfff2d Mon Sep 17 00:00:00 2001 From: YOSHIFUJI Hideaki Date: Mon, 28 Apr 2008 14:40:55 +0900 Subject: ipv6 mcast: Check address family of gf_group in getsockopt(MS_FILTER). Signed-off-by: YOSHIFUJI Hideaki --- net/ipv6/ipv6_sockglue.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'net') diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index 26b83e512a0..ce794d6acb7 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -874,6 +874,8 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname, return -EINVAL; if (copy_from_user(&gsf, optval, GROUP_FILTER_SIZE(0))) return -EFAULT; + if (gsf.gf_group.ss_family != AF_INET6) + return -EADDRNOTAVAIL; lock_sock(sk); err = ip6_mc_msfget(sk, &gsf, (struct group_filter __user *)optval, optlen); -- cgit v1.2.3 From 36e3deae8ba84865fd9eb3f2f21bbc00d49b7544 Mon Sep 17 00:00:00 2001 From: YOSHIFUJI Hideaki Date: Tue, 13 May 2008 02:52:55 +0900 Subject: ipv6 route: Fix route lifetime in netlink message. 1) We may have route lifetime larger than INT_MAX. In that case we had wired value in lifetime. Use INT_MAX if lifetime does not fit in s32. 2) Lifetime is valid iif RTF_EXPIRES is set. Signed-off-by: YOSHIFUJI Hideaki --- net/ipv6/route.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 220cffe9e63..d1f3e19b06c 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -2196,8 +2196,12 @@ static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt, NLA_PUT_U32(skb, RTA_PRIORITY, rt->rt6i_metric); - expires = (rt->rt6i_flags & RTF_EXPIRES) ? - rt->rt6i_expires - jiffies : 0; + if (!(rt->rt6i_flags & RTF_EXPIRES)) + expires = 0; + else if (rt->rt6i_expires - jiffies < INT_MAX) + expires = rt->rt6i_expires - jiffies; + else + expires = INT_MAX; if (rtnl_put_cacheinfo(skb, &rt->u.dst, 0, 0, 0, expires, rt->u.dst.error) < 0) -- cgit v1.2.3 From e8766fc86b34d44a8c55a2f9d71da69e091b1ca4 Mon Sep 17 00:00:00 2001 From: Shan Wei Date: Tue, 10 Jun 2008 15:50:55 +0800 Subject: ipv6: Check the hop limit setting in ancillary data. When specifing the outgoing hop limit as ancillary data for sendmsg(), the kernel doesn't check the integer hop limit value as specified in [RFC-3542] section 6.3. Signed-off-by: Shan Wei Signed-off-by: YOSHIFUJI Hideaki --- net/ipv6/datagram.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'net') diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index b9c2de84a8a..0f0f94a4033 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -705,6 +705,11 @@ int datagram_send_ctl(struct net *net, } *hlimit = *(int *)CMSG_DATA(cmsg); + if (*hlimit < -1 || *hlimit > 0xff) { + err = -EINVAL; + goto exit_f; + } + break; case IPV6_TCLASS: -- cgit v1.2.3 From 28d4488216645cd71402925cffde9528b0cfdb7e Mon Sep 17 00:00:00 2001 From: YOSHIFUJI Hideaki Date: Thu, 12 Jun 2008 03:14:51 +0900 Subject: ipv6: Check IPV6_MULTICAST_LOOP option value. Only 0 and 1 are valid for IPV6_MULTICAST_LOOP socket option, and we should return an error of EINVAL otherwise, per RFC3493. Based on patch from Shan Wei . Signed-off-by: YOSHIFUJI Hideaki --- net/ipv6/ipv6_sockglue.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'net') diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index ce794d6acb7..9a3697172d5 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -458,6 +458,8 @@ done: case IPV6_MULTICAST_LOOP: if (optlen < sizeof(int)) goto e_inval; + if (val != valbool) + goto e_inval; np->mc_loop = valbool; retv = 0; break; -- cgit v1.2.3 From 1717699cd5130009b7cd6756e883d8582c1fe706 Mon Sep 17 00:00:00 2001 From: YOSHIFUJI Hideaki Date: Thu, 12 Jun 2008 03:27:26 +0900 Subject: ipv6: Fail with appropriate error code when setting not-applicable sockopt. IPV6_MULTICAST_HOPS, for example, is not valid for stream sockets. Since they are virtually unavailable for stream sockets, we should return ENOPROTOOPT instead of EINVAL. Signed-off-by: YOSHIFUJI Hideaki --- net/ipv6/ipv6_sockglue.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index 9a3697172d5..c042ce19bd1 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -67,7 +67,7 @@ int ip6_ra_control(struct sock *sk, int sel, void (*destructor)(struct sock *)) /* RA packet may be delivered ONLY to IPPROTO_RAW socket */ if (sk->sk_type != SOCK_RAW || inet_sk(sk)->num != IPPROTO_RAW) - return -EINVAL; + return -ENOPROTOOPT; new_ra = (sel>=0) ? kmalloc(sizeof(*new_ra), GFP_KERNEL) : NULL; @@ -446,7 +446,7 @@ done: case IPV6_MULTICAST_HOPS: if (sk->sk_type == SOCK_STREAM) - goto e_inval; + break; if (optlen < sizeof(int)) goto e_inval; if (val > 255 || val < -1) @@ -466,7 +466,7 @@ done: case IPV6_MULTICAST_IF: if (sk->sk_type == SOCK_STREAM) - goto e_inval; + break; if (optlen < sizeof(int)) goto e_inval; @@ -862,7 +862,7 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname, if (sk->sk_protocol != IPPROTO_UDP && sk->sk_protocol != IPPROTO_UDPLITE && sk->sk_protocol != IPPROTO_TCP) - return -EINVAL; + return -ENOPROTOOPT; if (sk->sk_state != TCP_ESTABLISHED) return -ENOTCONN; val = sk->sk_family; -- cgit v1.2.3 From b66985b11b8b00e1ec65b89a3112510ac9a9ec6e Mon Sep 17 00:00:00 2001 From: Eric Leblond Date: Wed, 11 Jun 2008 17:50:27 -0700 Subject: netfilter: Make nflog quiet when no one listen in userspace. The message "nf_log_packet: can't log since no backend logging module loaded in! Please either load one, or disable logging explicitly" was displayed for each logged packet when no userspace application is listening to nflog events. The message seems to warn for a problem with a kernel module missing but as said before this is not the case. I thus propose to suppress the message (I don't see any reason to flood the log because a user application has crashed.) Signed-off-by: Eric Leblond Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/netfilter/nf_log.c | 4 ---- 1 file changed, 4 deletions(-) (limited to 'net') diff --git a/net/netfilter/nf_log.c b/net/netfilter/nf_log.c index bc11d709203..9fda6ee95a3 100644 --- a/net/netfilter/nf_log.c +++ b/net/netfilter/nf_log.c @@ -92,10 +92,6 @@ void nf_log_packet(int pf, vsnprintf(prefix, sizeof(prefix), fmt, args); va_end(args); logger->logfn(pf, hooknum, skb, in, out, loginfo, prefix); - } else if (net_ratelimit()) { - printk(KERN_WARNING "nf_log_packet: can\'t log since " - "no backend logging module loaded in! Please either " - "load one, or disable logging explicitly\n"); } rcu_read_unlock(); } -- cgit v1.2.3 From ceeff7541e5a4ba8e8d97ffbae32b3f283cb7a3f Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Wed, 11 Jun 2008 17:51:10 -0700 Subject: netfilter: nf_conntrack: fix ctnetlink related crash in nf_nat_setup_info() When creation of a new conntrack entry in ctnetlink fails after having set up the NAT mappings, the conntrack has an extension area allocated that is not getting properly destroyed when freeing the conntrack again. This means the NAT extension is still in the bysource hash, causing a crash when walking over the hash chain the next time: BUG: unable to handle kernel paging request at 00120fbd IP: [] nf_nat_setup_info+0x221/0x58a *pde = 00000000 Oops: 0000 [#1] PREEMPT SMP Pid: 2795, comm: conntrackd Not tainted (2.6.26-rc5 #1) EIP: 0060:[] EFLAGS: 00010206 CPU: 1 EIP is at nf_nat_setup_info+0x221/0x58a EAX: 00120fbd EBX: 00120fbd ECX: 00000001 EDX: 00000000 ESI: 0000019e EDI: e853bbb4 EBP: e853bbc8 ESP: e853bb78 DS: 007b ES: 007b FS: 00d8 GS: 0033 SS: 0068 Process conntrackd (pid: 2795, ti=e853a000 task=f7de10f0 task.ti=e853a000) Stack: 00000000 e853bc2c e85672ec 00000008 c0561084 63c1db4a 00000000 00000000 00000000 0002e109 61d2b1c3 00000000 00000000 00000000 01114e22 61d2b1c3 00000000 00000000 f7444674 e853bc04 00000008 c038e728 0000000a f7444674 Call Trace: [] nla_parse+0x5c/0xb0 [] ctnetlink_change_status+0x190/0x1c6 [] ctnetlink_new_conntrack+0x189/0x61f [] update_curr+0x3d/0x52 [] nfnetlink_rcv_msg+0xc1/0xd8 [] nfnetlink_rcv_msg+0x18/0xd8 [] nfnetlink_rcv_msg+0x0/0xd8 [] netlink_rcv_skb+0x2d/0x71 [] nfnetlink_rcv+0x19/0x24 [] netlink_unicast+0x1b3/0x216 ... Move invocation of the extension destructors to nf_conntrack_free() to fix this problem. Fixes http://bugzilla.kernel.org/show_bug.cgi?id=10875 Reported-and-Tested-by: Krzysztof Piotr Oledzki Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/netfilter/nf_conntrack_core.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'net') diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index c4b1799da5d..662c1ccfee2 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -196,8 +196,6 @@ destroy_conntrack(struct nf_conntrack *nfct) if (l4proto && l4proto->destroy) l4proto->destroy(ct); - nf_ct_ext_destroy(ct); - rcu_read_unlock(); spin_lock_bh(&nf_conntrack_lock); @@ -520,6 +518,7 @@ static void nf_conntrack_free_rcu(struct rcu_head *head) void nf_conntrack_free(struct nf_conn *ct) { + nf_ct_ext_destroy(ct); call_rcu(&ct->rcu, nf_conntrack_free_rcu); } EXPORT_SYMBOL_GPL(nf_conntrack_free); -- cgit v1.2.3 From f23d60de719e639690b2dc5c2d0e4243ff614b7a Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 12 Jun 2008 14:47:58 -0700 Subject: ipv6: Fix duplicate initialization of rawv6_prot.destroy In changeset 22dd485022f3d0b162ceb5e67d85de7c3806aa20 ("raw: Raw socket leak.") code was added so that we flush pending frames on raw sockets to avoid leaks. The ipv4 part was fine, but the ipv6 part was not done correctly. Unlike the ipv4 side, the ipv6 code already has a .destroy method for rawv6_prot. So now there were two assignments to this member, and what the compiler does is use the last one, effectively making the ipv6 parts of that changeset a NOP. Fix this by removing the: .destroy = inet6_destroy_sock, line, and adding an inet6_destroy_sock() call to the end of raw6_destroy(). Noticed by Al Viro. Signed-off-by: David S. Miller Acked-by: YOSHIFUJI Hideaki --- net/ipv6/raw.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index 8fee9a15b2d..3aee12310d9 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -1169,7 +1169,8 @@ static int raw6_destroy(struct sock *sk) lock_sock(sk); ip6_flush_pending_frames(sk); release_sock(sk); - return 0; + + return inet6_destroy_sock(sk); } static int rawv6_init_sk(struct sock *sk) @@ -1200,7 +1201,6 @@ struct proto rawv6_prot = { .disconnect = udp_disconnect, .ioctl = rawv6_ioctl, .init = rawv6_init_sk, - .destroy = inet6_destroy_sock, .setsockopt = rawv6_setsockopt, .getsockopt = rawv6_getsockopt, .sendmsg = rawv6_sendmsg, -- cgit v1.2.3 From ec0a196626bd12e0ba108d7daa6d95a4fb25c2c5 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 12 Jun 2008 16:31:35 -0700 Subject: tcp: Revert 'process defer accept as established' changes. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts two changesets, ec3c0982a2dd1e671bad8e9d26c28dcba0039d87 ("[TCP]: TCP_DEFER_ACCEPT updates - process as established") and the follow-on bug fix 9ae27e0adbf471c7a6b80102e38e1d5a346b3b38 ("tcp: Fix slab corruption with ipv6 and tcp6fuzz"). This change causes several problems, first reported by Ingo Molnar as a distcc-over-loopback regression where connections were getting stuck. Ilpo Järvinen first spotted the locking problems. The new function added by this code, tcp_defer_accept_check(), only has the child socket locked, yet it is modifying state of the parent listening socket. Fixing that is non-trivial at best, because we can't simply just grab the parent listening socket lock at this point, because it would create an ABBA deadlock. The normal ordering is parent listening socket --> child socket, but this code path would require the reverse lock ordering. Next is a problem noticed by Vitaliy Gusev, he noted: ---------------------------------------- >--- a/net/ipv4/tcp_timer.c >+++ b/net/ipv4/tcp_timer.c >@@ -481,6 +481,11 @@ static void tcp_keepalive_timer (unsigned long data) > goto death; > } > >+ if (tp->defer_tcp_accept.request && sk->sk_state == TCP_ESTABLISHED) { >+ tcp_send_active_reset(sk, GFP_ATOMIC); >+ goto death; Here socket sk is not attached to listening socket's request queue. tcp_done() will not call inet_csk_destroy_sock() (and tcp_v4_destroy_sock() which should release this sk) as socket is not DEAD. Therefore socket sk will be lost for freeing. ---------------------------------------- Finally, Alexey Kuznetsov argues that there might not even be any real value or advantage to these new semantics even if we fix all of the bugs: ---------------------------------------- Hiding from accept() sockets with only out-of-order data only is the only thing which is impossible with old approach. Is this really so valuable? My opinion: no, this is nothing but a new loophole to consume memory without control. ---------------------------------------- So revert this thing for now. Signed-off-by: David S. Miller --- net/ipv4/inet_connection_sock.c | 11 +++++++--- net/ipv4/tcp.c | 18 ++++++++++------- net/ipv4/tcp_input.c | 45 ----------------------------------------- net/ipv4/tcp_ipv4.c | 8 -------- net/ipv4/tcp_minisocks.c | 32 +++++++++++------------------ net/ipv4/tcp_timer.c | 5 ----- 6 files changed, 31 insertions(+), 88 deletions(-) (limited to 'net') diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 828ea211ff2..045e799d3e1 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -419,7 +419,8 @@ void inet_csk_reqsk_queue_prune(struct sock *parent, struct inet_connection_sock *icsk = inet_csk(parent); struct request_sock_queue *queue = &icsk->icsk_accept_queue; struct listen_sock *lopt = queue->listen_opt; - int thresh = icsk->icsk_syn_retries ? : sysctl_tcp_synack_retries; + int max_retries = icsk->icsk_syn_retries ? : sysctl_tcp_synack_retries; + int thresh = max_retries; unsigned long now = jiffies; struct request_sock **reqp, *req; int i, budget; @@ -455,6 +456,9 @@ void inet_csk_reqsk_queue_prune(struct sock *parent, } } + if (queue->rskq_defer_accept) + max_retries = queue->rskq_defer_accept; + budget = 2 * (lopt->nr_table_entries / (timeout / interval)); i = lopt->clock_hand; @@ -462,8 +466,9 @@ void inet_csk_reqsk_queue_prune(struct sock *parent, reqp=&lopt->syn_table[i]; while ((req = *reqp) != NULL) { if (time_after_eq(now, req->expires)) { - if (req->retrans < thresh && - !req->rsk_ops->rtx_syn_ack(parent, req)) { + if ((req->retrans < (inet_rsk(req)->acked ? max_retries : thresh)) && + (inet_rsk(req)->acked || + !req->rsk_ops->rtx_syn_ack(parent, req))) { unsigned long timeo; if (req->retrans++ == 0) diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index ab66683b804..fc54a48fde1 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -2112,12 +2112,15 @@ static int do_tcp_setsockopt(struct sock *sk, int level, break; case TCP_DEFER_ACCEPT: - if (val < 0) { - err = -EINVAL; - } else { - if (val > MAX_TCP_ACCEPT_DEFERRED) - val = MAX_TCP_ACCEPT_DEFERRED; - icsk->icsk_accept_queue.rskq_defer_accept = val; + icsk->icsk_accept_queue.rskq_defer_accept = 0; + if (val > 0) { + /* Translate value in seconds to number of + * retransmits */ + while (icsk->icsk_accept_queue.rskq_defer_accept < 32 && + val > ((TCP_TIMEOUT_INIT / HZ) << + icsk->icsk_accept_queue.rskq_defer_accept)) + icsk->icsk_accept_queue.rskq_defer_accept++; + icsk->icsk_accept_queue.rskq_defer_accept++; } break; @@ -2299,7 +2302,8 @@ static int do_tcp_getsockopt(struct sock *sk, int level, val = (val ? : sysctl_tcp_fin_timeout) / HZ; break; case TCP_DEFER_ACCEPT: - val = icsk->icsk_accept_queue.rskq_defer_accept; + val = !icsk->icsk_accept_queue.rskq_defer_accept ? 0 : + ((TCP_TIMEOUT_INIT / HZ) << (icsk->icsk_accept_queue.rskq_defer_accept - 1)); break; case TCP_WINDOW_CLAMP: val = tp->window_clamp; diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index eba873e9b56..cad73b7dfef 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -4541,49 +4541,6 @@ static void tcp_urg(struct sock *sk, struct sk_buff *skb, struct tcphdr *th) } } -static int tcp_defer_accept_check(struct sock *sk) -{ - struct tcp_sock *tp = tcp_sk(sk); - - if (tp->defer_tcp_accept.request) { - int queued_data = tp->rcv_nxt - tp->copied_seq; - int hasfin = !skb_queue_empty(&sk->sk_receive_queue) ? - tcp_hdr((struct sk_buff *) - sk->sk_receive_queue.prev)->fin : 0; - - if (queued_data && hasfin) - queued_data--; - - if (queued_data && - tp->defer_tcp_accept.listen_sk->sk_state == TCP_LISTEN) { - if (sock_flag(sk, SOCK_KEEPOPEN)) { - inet_csk_reset_keepalive_timer(sk, - keepalive_time_when(tp)); - } else { - inet_csk_delete_keepalive_timer(sk); - } - - inet_csk_reqsk_queue_add( - tp->defer_tcp_accept.listen_sk, - tp->defer_tcp_accept.request, - sk); - - tp->defer_tcp_accept.listen_sk->sk_data_ready( - tp->defer_tcp_accept.listen_sk, 0); - - sock_put(tp->defer_tcp_accept.listen_sk); - sock_put(sk); - tp->defer_tcp_accept.listen_sk = NULL; - tp->defer_tcp_accept.request = NULL; - } else if (hasfin || - tp->defer_tcp_accept.listen_sk->sk_state != TCP_LISTEN) { - tcp_reset(sk); - return -1; - } - } - return 0; -} - static int tcp_copy_to_iovec(struct sock *sk, struct sk_buff *skb, int hlen) { struct tcp_sock *tp = tcp_sk(sk); @@ -4944,8 +4901,6 @@ step5: tcp_data_snd_check(sk); tcp_ack_snd_check(sk); - - tcp_defer_accept_check(sk); return 0; csum_error: diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 4f8485c67d1..97a230026e1 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1918,14 +1918,6 @@ int tcp_v4_destroy_sock(struct sock *sk) sk->sk_sndmsg_page = NULL; } - if (tp->defer_tcp_accept.request) { - reqsk_free(tp->defer_tcp_accept.request); - sock_put(tp->defer_tcp_accept.listen_sk); - sock_put(sk); - tp->defer_tcp_accept.listen_sk = NULL; - tp->defer_tcp_accept.request = NULL; - } - atomic_dec(&tcp_sockets_allocated); return 0; diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 019c8c16e5c..8245247a6ce 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -571,8 +571,10 @@ struct sock *tcp_check_req(struct sock *sk,struct sk_buff *skb, does sequence test, SYN is truncated, and thus we consider it a bare ACK. - Both ends (listening sockets) accept the new incoming - connection and try to talk to each other. 8-) + If icsk->icsk_accept_queue.rskq_defer_accept, we silently drop this + bare ACK. Otherwise, we create an established connection. Both + ends (listening sockets) accept the new incoming connection and try + to talk to each other. 8-) Note: This case is both harmless, and rare. Possibility is about the same as us discovering intelligent life on another plant tomorrow. @@ -640,6 +642,13 @@ struct sock *tcp_check_req(struct sock *sk,struct sk_buff *skb, if (!(flg & TCP_FLAG_ACK)) return NULL; + /* If TCP_DEFER_ACCEPT is set, drop bare ACK. */ + if (inet_csk(sk)->icsk_accept_queue.rskq_defer_accept && + TCP_SKB_CB(skb)->end_seq == tcp_rsk(req)->rcv_isn + 1) { + inet_rsk(req)->acked = 1; + return NULL; + } + /* OK, ACK is valid, create big socket and * feed this segment to it. It will repeat all * the tests. THIS SEGMENT MUST MOVE SOCKET TO @@ -678,24 +687,7 @@ struct sock *tcp_check_req(struct sock *sk,struct sk_buff *skb, inet_csk_reqsk_queue_unlink(sk, req, prev); inet_csk_reqsk_queue_removed(sk, req); - if (inet_csk(sk)->icsk_accept_queue.rskq_defer_accept && - TCP_SKB_CB(skb)->end_seq == tcp_rsk(req)->rcv_isn + 1) { - - /* the accept queue handling is done is est recv slow - * path so lets make sure to start there - */ - tcp_sk(child)->pred_flags = 0; - sock_hold(sk); - sock_hold(child); - tcp_sk(child)->defer_tcp_accept.listen_sk = sk; - tcp_sk(child)->defer_tcp_accept.request = req; - - inet_csk_reset_keepalive_timer(child, - inet_csk(sk)->icsk_accept_queue.rskq_defer_accept * HZ); - } else { - inet_csk_reqsk_queue_add(sk, req, child); - } - + inet_csk_reqsk_queue_add(sk, req, child); return child; listen_overflow: diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index 4de68cf5f2a..63ed9d6830e 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -489,11 +489,6 @@ static void tcp_keepalive_timer (unsigned long data) goto death; } - if (tp->defer_tcp_accept.request && sk->sk_state == TCP_ESTABLISHED) { - tcp_send_active_reset(sk, GFP_ATOMIC); - goto death; - } - if (!sock_flag(sk, SOCK_KEEPOPEN) || sk->sk_state == TCP_CLOSE) goto out; -- cgit v1.2.3 From 5c5f9664d5284d8542062fed39e1f19b80db7aa5 Mon Sep 17 00:00:00 2001 From: Abhijeet Kolekar Date: Thu, 12 Jun 2008 09:47:16 +0800 Subject: mac80211 : fix for iwconfig in ad-hoc mode The patch checks interface status, if it is in IBSS_JOINED mode show cell id it is associated with. Signed-off-by: Abhijeet Kolekar Signed-off-by: Zhu Yi Signed-off-by: John W. Linville --- net/mac80211/wext.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/mac80211/wext.c b/net/mac80211/wext.c index a8bb8e31b1e..6106cb79060 100644 --- a/net/mac80211/wext.c +++ b/net/mac80211/wext.c @@ -496,7 +496,8 @@ static int ieee80211_ioctl_giwap(struct net_device *dev, sdata = IEEE80211_DEV_TO_SUB_IF(dev); if (sdata->vif.type == IEEE80211_IF_TYPE_STA || sdata->vif.type == IEEE80211_IF_TYPE_IBSS) { - if (sdata->u.sta.state == IEEE80211_ASSOCIATED) { + if (sdata->u.sta.state == IEEE80211_ASSOCIATED || + sdata->u.sta.state == IEEE80211_IBSS_JOINED) { ap_addr->sa_family = ARPHRD_ETHER; memcpy(&ap_addr->sa_data, sdata->u.sta.bssid, ETH_ALEN); return 0; -- cgit v1.2.3 From 995ad6c5a415c9389d094d246ca1b305c1e31813 Mon Sep 17 00:00:00 2001 From: Tomas Winkler Date: Thu, 12 Jun 2008 20:08:19 +0300 Subject: mac80211: add missing new line in debug print HT_DEBUG This patch adds '\n' in debug printk (wme.c HT DEBUG) Signed-off-by: Tomas Winkler Signed-off-by: John W. Linville --- net/mac80211/wme.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/mac80211/wme.c b/net/mac80211/wme.c index dc1598b8600..635b996c8c3 100644 --- a/net/mac80211/wme.c +++ b/net/mac80211/wme.c @@ -673,7 +673,7 @@ int ieee80211_ht_agg_queue_add(struct ieee80211_local *local, #ifdef CONFIG_MAC80211_HT_DEBUG if (net_ratelimit()) printk(KERN_DEBUG "allocated aggregation queue" - " %d tid %d addr %s pool=0x%lX", + " %d tid %d addr %s pool=0x%lX\n", i, tid, print_mac(mac, sta->addr), q->qdisc_pool[0]); #endif /* CONFIG_MAC80211_HT_DEBUG */ -- cgit v1.2.3 From f9ffcedddba5b2fc5ab16ef08bca55af8be2717e Mon Sep 17 00:00:00 2001 From: Jesper Dangaard Brouer Date: Mon, 16 Jun 2008 16:38:33 -0700 Subject: pkt_sched: HTB scheduler, change default hysteresis mode to off. The HTB hysteresis mode reduce the CPU load, but at the cost of scheduling accuracy. On ADSL links (512 kbit/s upstream), this inaccuracy introduce significant jitter, enought to disturbe VoIP. For details see my masters thesis (http://www.adsl-optimizer.dk/thesis/), chapter 7, section 7.3.1, pp 69-70. Signed-off-by: Jesper Dangaard Brouer Acked-by: Martin Devera Signed-off-by: David S. Miller --- net/sched/sch_htb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c index 5bc1ed49018..9134f029ee0 100644 --- a/net/sched/sch_htb.c +++ b/net/sched/sch_htb.c @@ -53,7 +53,7 @@ */ #define HTB_HSIZE 16 /* classid hash size */ -#define HTB_HYSTERESIS 1 /* whether to use mode hysteresis for speedup */ +#define HTB_HYSTERESIS 0 /* whether to use mode hysteresis for speedup */ #define HTB_VER 0x30011 /* major must be matched with number suplied by TC as version */ #if HTB_VER >> 16 != TC_HTB_PROTOVER -- cgit v1.2.3 From 47083fc0735f5145b72fc31236d07339dc52b908 Mon Sep 17 00:00:00 2001 From: Jesper Dangaard Brouer Date: Mon, 16 Jun 2008 16:39:32 -0700 Subject: pkt_sched: Change HTB_HYSTERESIS to a runtime parameter htb_hysteresis. Add a htb_hysteresis parameter to htb_sch.ko and by sysfs magic make it runtime adjustable via /sys/module/sch_htb/parameters/htb_hysteresis mode 640. Signed-off-by: Jesper Dangaard Brouer Acked-by: Martin Devera Signed-off-by: David S. Miller --- net/sched/sch_htb.c | 23 +++++++++++++++-------- 1 file changed, 15 insertions(+), 8 deletions(-) (limited to 'net') diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c index 9134f029ee0..6807c97985a 100644 --- a/net/sched/sch_htb.c +++ b/net/sched/sch_htb.c @@ -28,6 +28,7 @@ * $Id: sch_htb.c,v 1.25 2003/12/07 11:08:25 devik Exp devik $ */ #include +#include #include #include #include @@ -53,13 +54,17 @@ */ #define HTB_HSIZE 16 /* classid hash size */ -#define HTB_HYSTERESIS 0 /* whether to use mode hysteresis for speedup */ +static int htb_hysteresis __read_mostly = 0; /* whether to use mode hysteresis for speedup */ #define HTB_VER 0x30011 /* major must be matched with number suplied by TC as version */ #if HTB_VER >> 16 != TC_HTB_PROTOVER #error "Mismatched sch_htb.c and pkt_sch.h" #endif +/* Module parameter and sysfs export */ +module_param (htb_hysteresis, int, 0640); +MODULE_PARM_DESC(htb_hysteresis, "Hysteresis mode, less CPU load, less accurate"); + /* used internaly to keep status of single class */ enum htb_cmode { HTB_CANT_SEND, /* class can't send and can't borrow */ @@ -462,19 +467,21 @@ static void htb_deactivate_prios(struct htb_sched *q, struct htb_class *cl) htb_remove_class_from_row(q, cl, mask); } -#if HTB_HYSTERESIS static inline long htb_lowater(const struct htb_class *cl) { - return cl->cmode != HTB_CANT_SEND ? -cl->cbuffer : 0; + if (htb_hysteresis) + return cl->cmode != HTB_CANT_SEND ? -cl->cbuffer : 0; + else + return 0; } static inline long htb_hiwater(const struct htb_class *cl) { - return cl->cmode == HTB_CAN_SEND ? -cl->buffer : 0; + if (htb_hysteresis) + return cl->cmode == HTB_CAN_SEND ? -cl->buffer : 0; + else + return 0; } -#else -#define htb_lowater(cl) (0) -#define htb_hiwater(cl) (0) -#endif + /** * htb_class_mode - computes and returns current class mode -- cgit v1.2.3 From 2b4743bd6be9fedaa560f8c6dc3997e9ec21b99b Mon Sep 17 00:00:00 2001 From: YOSHIFUJI Hideaki Date: Mon, 16 Jun 2008 16:48:20 -0700 Subject: ipv6 sit: Avoid extra need for compat layer in PRL management. We've introduced extra need of compat layer for ip_tunnel_prl{} for PRL (Potential Router List) management. Though compat_ioctl is still missing in ipv4/ipv6, let's make the interface more straight-forward and eliminate extra need for nasty compat layer anyway since the interface is new for 2.6.26. Signed-off-by: YOSHIFUJI Hideaki Signed-off-by: David S. Miller --- net/ipv6/sit.c | 44 ++++++++++++++++++++++++-------------------- 1 file changed, 24 insertions(+), 20 deletions(-) (limited to 'net') diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index 3de6ffdaedf..32e871a6c25 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -222,15 +222,18 @@ __ipip6_tunnel_locate_prl(struct ip_tunnel *t, __be32 addr) } -static int ipip6_tunnel_get_prl(struct ip_tunnel *t, struct ip_tunnel_prl *a) +static int ipip6_tunnel_get_prl(struct ip_tunnel *t, + struct ip_tunnel_prl __user *a) { - struct ip_tunnel_prl *kp; + struct ip_tunnel_prl kprl, *kp; struct ip_tunnel_prl_entry *prl; unsigned int cmax, c = 0, ca, len; int ret = 0; - cmax = a->datalen / sizeof(*a); - if (cmax > 1 && a->addr != htonl(INADDR_ANY)) + if (copy_from_user(&kprl, a, sizeof(kprl))) + return -EFAULT; + cmax = kprl.datalen / sizeof(kprl); + if (cmax > 1 && kprl.addr != htonl(INADDR_ANY)) cmax = 1; /* For simple GET or for root users, @@ -261,26 +264,25 @@ static int ipip6_tunnel_get_prl(struct ip_tunnel *t, struct ip_tunnel_prl *a) for (prl = t->prl; prl; prl = prl->next) { if (c > cmax) break; - if (a->addr != htonl(INADDR_ANY) && prl->addr != a->addr) + if (kprl.addr != htonl(INADDR_ANY) && prl->addr != kprl.addr) continue; kp[c].addr = prl->addr; kp[c].flags = prl->flags; c++; - if (a->addr != htonl(INADDR_ANY)) + if (kprl.addr != htonl(INADDR_ANY)) break; } out: read_unlock(&ipip6_lock); len = sizeof(*kp) * c; - ret = len ? copy_to_user(a->data, kp, len) : 0; + ret = 0; + if ((len && copy_to_user(a + 1, kp, len)) || put_user(len, &a->datalen)) + ret = -EFAULT; kfree(kp); - if (ret) - return -EFAULT; - a->datalen = len; - return 0; + return ret; } static int @@ -873,11 +875,20 @@ ipip6_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd) break; case SIOCGETPRL: + err = -EINVAL; + if (dev == sitn->fb_tunnel_dev) + goto done; + err = -ENOENT; + if (!(t = netdev_priv(dev))) + goto done; + err = ipip6_tunnel_get_prl(t, ifr->ifr_ifru.ifru_data); + break; + case SIOCADDPRL: case SIOCDELPRL: case SIOCCHGPRL: err = -EPERM; - if (cmd != SIOCGETPRL && !capable(CAP_NET_ADMIN)) + if (!capable(CAP_NET_ADMIN)) goto done; err = -EINVAL; if (dev == sitn->fb_tunnel_dev) @@ -890,12 +901,6 @@ ipip6_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd) goto done; switch (cmd) { - case SIOCGETPRL: - err = ipip6_tunnel_get_prl(t, &prl); - if (!err && copy_to_user(ifr->ifr_ifru.ifru_data, - &prl, sizeof(prl))) - err = -EFAULT; - break; case SIOCDELPRL: err = ipip6_tunnel_del_prl(t, &prl); break; @@ -904,8 +909,7 @@ ipip6_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd) err = ipip6_tunnel_add_prl(t, &prl, cmd == SIOCCHGPRL); break; } - if (cmd != SIOCGETPRL) - netdev_state_change(dev); + netdev_state_change(dev); break; default: -- cgit v1.2.3 From 93653e0448196344d7699ccad395eaebd30359d1 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Mon, 16 Jun 2008 16:57:40 -0700 Subject: tcp: Revert reset of deferred accept changes in 2.6.26 Ingo's system is still seeing strange behavior, and he reports that is goes away if the rest of the deferred accept changes are reverted too. Therefore this reverts e4c78840284f3f51b1896cf3936d60a6033c4d2c ("[TCP]: TCP_DEFER_ACCEPT updates - dont retxmt synack") and 539fae89bebd16ebeafd57a87169bc56eb530d76 ("[TCP]: TCP_DEFER_ACCEPT updates - defer timeout conflicts with max_thresh"). Just like the other revert, these ideas can be revisited for 2.6.27 Signed-off-by: David S. Miller --- net/ipv4/inet_connection_sock.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 045e799d3e1..ec834480abe 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -466,9 +466,9 @@ void inet_csk_reqsk_queue_prune(struct sock *parent, reqp=&lopt->syn_table[i]; while ((req = *reqp) != NULL) { if (time_after_eq(now, req->expires)) { - if ((req->retrans < (inet_rsk(req)->acked ? max_retries : thresh)) && - (inet_rsk(req)->acked || - !req->rsk_ops->rtx_syn_ack(parent, req))) { + if ((req->retrans < thresh || + (inet_rsk(req)->acked && req->retrans < max_retries)) + && !req->rsk_ops->rtx_syn_ack(parent, req)) { unsigned long timeo; if (req->retrans++ == 0) -- cgit v1.2.3 From 80896a3584bbff9ff9ad4dde735517c4de68d736 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Mon, 16 Jun 2008 16:59:55 -0700 Subject: sctp: Correctly cleanup procfs entries upon failure. This patch remove the proc fs entry which has been created if fail to set up proc fs entry for the SCTP protocol. Signed-off-by: Wei Yongjun Acked-by: Neil Horman Signed-off-by: Vlad Yasevich Signed-off-by: David S. Miller --- net/sctp/protocol.c | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index b435a193c5d..9258dfe784a 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -108,14 +108,23 @@ static __init int sctp_proc_init(void) } if (sctp_snmp_proc_init()) - goto out_nomem; + goto out_snmp_proc_init; if (sctp_eps_proc_init()) - goto out_nomem; + goto out_eps_proc_init; if (sctp_assocs_proc_init()) - goto out_nomem; + goto out_assocs_proc_init; return 0; +out_assocs_proc_init: + sctp_eps_proc_exit(); +out_eps_proc_init: + sctp_snmp_proc_exit(); +out_snmp_proc_init: + if (proc_net_sctp) { + proc_net_sctp = NULL; + remove_proc_entry("sctp", init_net.proc_net); + } out_nomem: return -ENOMEM; } -- cgit v1.2.3 From 319fa2a24f652dc35e613360c4532b8d2a771add Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Mon, 16 Jun 2008 17:00:29 -0700 Subject: sctp: Correclty set changeover_active for SFR-CACC Right now, any time we set a primary transport we set the changeover_active flag. As a result, we invoke SFR-CACC even when there has been no changeover events. Only set changeover_active, when there is a true changeover event, i.e. we had a primary path and we are changing to another transport. Signed-off-by: Vlad Yasevich Signed-off-by: David S. Miller --- net/sctp/associola.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/sctp/associola.c b/net/sctp/associola.c index 532634861db..024c3ebd966 100644 --- a/net/sctp/associola.c +++ b/net/sctp/associola.c @@ -474,6 +474,15 @@ static void sctp_association_destroy(struct sctp_association *asoc) void sctp_assoc_set_primary(struct sctp_association *asoc, struct sctp_transport *transport) { + int changeover = 0; + + /* it's a changeover only if we already have a primary path + * that we are changing + */ + if (asoc->peer.primary_path != NULL && + asoc->peer.primary_path != transport) + changeover = 1 ; + asoc->peer.primary_path = transport; /* Set a default msg_name for events. */ @@ -499,12 +508,12 @@ void sctp_assoc_set_primary(struct sctp_association *asoc, * double switch to the same destination address. */ if (transport->cacc.changeover_active) - transport->cacc.cycling_changeover = 1; + transport->cacc.cycling_changeover = changeover; /* 2) The sender MUST set CHANGEOVER_ACTIVE to indicate that * a changeover has occurred. */ - transport->cacc.changeover_active = 1; + transport->cacc.changeover_active = changeover; /* 3) The sender MUST store the next TSN to be sent in * next_tsn_at_change. -- cgit v1.2.3 From 6de329e26caed7bbbf51229c80f3948549d3c010 Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Mon, 16 Jun 2008 17:02:28 -0700 Subject: net: Fix test for VLAN TX checksum offload capability Selected device feature bits can be propagated to VLAN devices, so we can make use of TX checksum offload and TSO on VLAN-tagged packets. However, if the physical device does not do VLAN tag insertion or generic checksum offload then the test for TX checksum offload in dev_queue_xmit() will see a protocol of htons(ETH_P_8021Q) and yield false. This splits the checksum offload test into two functions: - can_checksum_protocol() tests a given protocol against a feature bitmask - dev_can_checksum() first tests the skb protocol against the device features; if that fails and the protocol is htons(ETH_P_8021Q) then it tests the encapsulated protocol against the effective device features for VLANs Signed-off-by: Ben Hutchings Acked-by: Patrick McHardy Signed-off-by: David S. Miller --- net/core/dev.c | 34 ++++++++++++++++++++++++++-------- 1 file changed, 26 insertions(+), 8 deletions(-) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index 58296307787..68d8df0992a 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -119,6 +119,7 @@ #include #include #include +#include #include "net-sysfs.h" @@ -1362,6 +1363,29 @@ void netif_device_attach(struct net_device *dev) } EXPORT_SYMBOL(netif_device_attach); +static bool can_checksum_protocol(unsigned long features, __be16 protocol) +{ + return ((features & NETIF_F_GEN_CSUM) || + ((features & NETIF_F_IP_CSUM) && + protocol == htons(ETH_P_IP)) || + ((features & NETIF_F_IPV6_CSUM) && + protocol == htons(ETH_P_IPV6))); +} + +static bool dev_can_checksum(struct net_device *dev, struct sk_buff *skb) +{ + if (can_checksum_protocol(dev->features, skb->protocol)) + return true; + + if (skb->protocol == htons(ETH_P_8021Q)) { + struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data; + if (can_checksum_protocol(dev->features & dev->vlan_features, + veh->h_vlan_encapsulated_proto)) + return true; + } + + return false; +} /* * Invalidate hardware checksum when packet is to be mangled, and @@ -1640,14 +1664,8 @@ int dev_queue_xmit(struct sk_buff *skb) if (skb->ip_summed == CHECKSUM_PARTIAL) { skb_set_transport_header(skb, skb->csum_start - skb_headroom(skb)); - - if (!(dev->features & NETIF_F_GEN_CSUM) && - !((dev->features & NETIF_F_IP_CSUM) && - skb->protocol == htons(ETH_P_IP)) && - !((dev->features & NETIF_F_IPV6_CSUM) && - skb->protocol == htons(ETH_P_IPV6))) - if (skb_checksum_help(skb)) - goto out_kfree_skb; + if (!dev_can_checksum(dev, skb) && skb_checksum_help(skb)) + goto out_kfree_skb; } gso: -- cgit v1.2.3 From 68be802cd5ad040fe8cfa33ce3031405df2d9117 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 16 Jun 2008 17:03:32 -0700 Subject: raw: Restore /proc/net/raw correct behavior I just noticed "cat /proc/net/raw" was buggy, missing '\n' separators. I believe this was introduced by commit 8cd850efa4948d57a2ed836911cfd1ab299e89c6 ([RAW]: Cleanup IPv4 raw_seq_show.) This trivial patch restores correct behavior, and applies to current Linus tree (should also be applied to stable tree as well.) Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/raw.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index e7e091d365f..37a1ecd9d60 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -934,7 +934,7 @@ static void raw_sock_seq_show(struct seq_file *seq, struct sock *sp, int i) srcp = inet->num; seq_printf(seq, "%4d: %08X:%04X %08X:%04X" - " %02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %p %d", + " %02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %p %d\n", i, src, srcp, dest, destp, sp->sk_state, atomic_read(&sp->sk_wmem_alloc), atomic_read(&sp->sk_rmem_alloc), -- cgit v1.2.3 From a9d246dbb07cf0bd32bbfc5d184ed738bf2af4f8 Mon Sep 17 00:00:00 2001 From: Rami Rosen Date: Mon, 16 Jun 2008 17:07:16 -0700 Subject: ipv4: Remove unused definitions in net/ipv4/tcp_ipv4.c. 1) Remove ICMP_MIN_LENGTH, as it is unused. 2) Remove unneeded tcp_v4_send_check() declaration. Signed-off-by: Rami Rosen Signed-off-by: David S. Miller --- net/ipv4/tcp_ipv4.c | 4 ---- 1 file changed, 4 deletions(-) (limited to 'net') diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 97a230026e1..12695be2c25 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -85,10 +85,6 @@ int sysctl_tcp_tw_reuse __read_mostly; int sysctl_tcp_low_latency __read_mostly; -/* Check TCP sequence numbers in ICMP packets. */ -#define ICMP_MIN_LENGTH 8 - -void tcp_v4_send_check(struct sock *sk, int len, struct sk_buff *skb); #ifdef CONFIG_TCP_MD5SIG static struct tcp_md5sig_key *tcp_v4_md5_do_lookup(struct sock *sk, -- cgit v1.2.3 From 27141666b69f535a4d63d7bc6d9e84ee5032f82a Mon Sep 17 00:00:00 2001 From: "Jorge Boncompte [DTI2]" Date: Mon, 16 Jun 2008 17:15:33 -0700 Subject: atm: [br2684] Fix oops due to skb->dev being NULL It happens that if a packet arrives in a VC between the call to open it on the hardware and the call to change the backend to br2684, br2684_regvcc processes the packet and oopses dereferencing skb->dev because it is NULL before the call to br2684_push(). Signed-off-by: Jorge Boncompte [DTI2] Signed-off-by: Chas Williams --- net/atm/br2684.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/atm/br2684.c b/net/atm/br2684.c index 9d52ebfc196..ac6035046ad 100644 --- a/net/atm/br2684.c +++ b/net/atm/br2684.c @@ -518,9 +518,9 @@ static int br2684_regvcc(struct atm_vcc *atmvcc, void __user * arg) struct sk_buff *next = skb->next; skb->next = skb->prev = NULL; + br2684_push(atmvcc, skb); BRPRIV(skb->dev)->stats.rx_bytes -= skb->len; BRPRIV(skb->dev)->stats.rx_packets--; - br2684_push(atmvcc, skb); skb = next; } -- cgit v1.2.3 From 7e903c2ae36efb526eacab3b25d00e90424bd8a8 Mon Sep 17 00:00:00 2001 From: Eric Kinzie Date: Mon, 16 Jun 2008 17:18:18 -0700 Subject: atm: [br2864] fix routed vcmux support From: Eric Kinzie Signed-off-by: Chas Williams Signed-off-by: David S. Miller --- net/atm/br2684.c | 76 ++++++++++++++++++++++++++++++++------------------------ 1 file changed, 44 insertions(+), 32 deletions(-) (limited to 'net') diff --git a/net/atm/br2684.c b/net/atm/br2684.c index ac6035046ad..05fafdc2eea 100644 --- a/net/atm/br2684.c +++ b/net/atm/br2684.c @@ -188,10 +188,13 @@ static int br2684_xmit_vcc(struct sk_buff *skb, struct br2684_dev *brdev, return 0; } } - } else { - skb_push(skb, 2); - if (brdev->payload == p_bridged) + } else { /* e_vc */ + if (brdev->payload == p_bridged) { + skb_push(skb, 2); memset(skb->data, 0, 2); + } else { /* p_routed */ + skb_pull(skb, ETH_HLEN); + } } skb_debug(skb); @@ -377,11 +380,8 @@ static void br2684_push(struct atm_vcc *atmvcc, struct sk_buff *skb) (skb->data + 6, ethertype_ipv4, sizeof(ethertype_ipv4)) == 0) skb->protocol = __constant_htons(ETH_P_IP); - else { - brdev->stats.rx_errors++; - dev_kfree_skb(skb); - return; - } + else + goto error; skb_pull(skb, sizeof(llc_oui_ipv4)); skb_reset_network_header(skb); skb->pkt_type = PACKET_HOST; @@ -394,44 +394,56 @@ static void br2684_push(struct atm_vcc *atmvcc, struct sk_buff *skb) (memcmp(skb->data, llc_oui_pid_pad, 7) == 0)) { skb_pull(skb, sizeof(llc_oui_pid_pad)); skb->protocol = eth_type_trans(skb, net_dev); - } else { - brdev->stats.rx_errors++; - dev_kfree_skb(skb); - return; - } + } else + goto error; - } else { - /* first 2 chars should be 0 */ - if (*((u16 *) (skb->data)) != 0) { - brdev->stats.rx_errors++; - dev_kfree_skb(skb); - return; + } else { /* e_vc */ + if (brdev->payload == p_routed) { + struct iphdr *iph; + + skb_reset_network_header(skb); + iph = ip_hdr(skb); + if (iph->version == 4) + skb->protocol = __constant_htons(ETH_P_IP); + else if (iph->version == 6) + skb->protocol = __constant_htons(ETH_P_IPV6); + else + goto error; + skb->pkt_type = PACKET_HOST; + } else { /* p_bridged */ + /* first 2 chars should be 0 */ + if (*((u16 *) (skb->data)) != 0) + goto error; + skb_pull(skb, BR2684_PAD_LEN); + skb->protocol = eth_type_trans(skb, net_dev); } - skb_pull(skb, BR2684_PAD_LEN + ETH_HLEN); /* pad, dstmac, srcmac, ethtype */ - skb->protocol = eth_type_trans(skb, net_dev); } #ifdef CONFIG_ATM_BR2684_IPFILTER - if (unlikely(packet_fails_filter(skb->protocol, brvcc, skb))) { - brdev->stats.rx_dropped++; - dev_kfree_skb(skb); - return; - } + if (unlikely(packet_fails_filter(skb->protocol, brvcc, skb))) + goto dropped; #endif /* CONFIG_ATM_BR2684_IPFILTER */ skb->dev = net_dev; ATM_SKB(skb)->vcc = atmvcc; /* needed ? */ pr_debug("received packet's protocol: %x\n", ntohs(skb->protocol)); skb_debug(skb); - if (unlikely(!(net_dev->flags & IFF_UP))) { - /* sigh, interface is down */ - brdev->stats.rx_dropped++; - dev_kfree_skb(skb); - return; - } + /* sigh, interface is down? */ + if (unlikely(!(net_dev->flags & IFF_UP))) + goto dropped; brdev->stats.rx_packets++; brdev->stats.rx_bytes += skb->len; memset(ATM_SKB(skb), 0, sizeof(struct atm_skb_data)); netif_rx(skb); + return; + +dropped: + brdev->stats.rx_dropped++; + goto free_skb; +error: + brdev->stats.rx_errors++; +free_skb: + dev_kfree_skb(skb); + return; } /* -- cgit v1.2.3 From 68b80f11380889996aa7eadba29dbbb5c29a5864 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Tue, 17 Jun 2008 15:51:47 -0700 Subject: netfilter: nf_nat: fix RCU races Fix three ct_extend/NAT extension related races: - When cleaning up the extension area and removing it from the bysource hash, the nat->ct pointer must not be set to NULL since it may still be used in a RCU read side - When replacing a NAT extension area in the bysource hash, the nat->ct pointer must be assigned before performing the replacement - When reallocating extension storage in ct_extend, the old memory must not be freed immediately since it may still be used by a RCU read side Possibly fixes https://bugzilla.redhat.com/show_bug.cgi?id=449315 and/or http://bugzilla.kernel.org/show_bug.cgi?id=10875 Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/ipv4/netfilter/nf_nat_core.c | 3 +-- net/netfilter/nf_conntrack_extend.c | 9 ++++++++- 2 files changed, 9 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/ipv4/netfilter/nf_nat_core.c b/net/ipv4/netfilter/nf_nat_core.c index 04578593e10..d2a887fc8d9 100644 --- a/net/ipv4/netfilter/nf_nat_core.c +++ b/net/ipv4/netfilter/nf_nat_core.c @@ -556,7 +556,6 @@ static void nf_nat_cleanup_conntrack(struct nf_conn *ct) spin_lock_bh(&nf_nat_lock); hlist_del_rcu(&nat->bysource); - nat->ct = NULL; spin_unlock_bh(&nf_nat_lock); } @@ -570,8 +569,8 @@ static void nf_nat_move_storage(void *new, void *old) return; spin_lock_bh(&nf_nat_lock); - hlist_replace_rcu(&old_nat->bysource, &new_nat->bysource); new_nat->ct = ct; + hlist_replace_rcu(&old_nat->bysource, &new_nat->bysource); spin_unlock_bh(&nf_nat_lock); } diff --git a/net/netfilter/nf_conntrack_extend.c b/net/netfilter/nf_conntrack_extend.c index bcc19fa4ed1..8a3f8b34e46 100644 --- a/net/netfilter/nf_conntrack_extend.c +++ b/net/netfilter/nf_conntrack_extend.c @@ -59,12 +59,19 @@ nf_ct_ext_create(struct nf_ct_ext **ext, enum nf_ct_ext_id id, gfp_t gfp) if (!*ext) return NULL; + INIT_RCU_HEAD(&(*ext)->rcu); (*ext)->offset[id] = off; (*ext)->len = len; return (void *)(*ext) + off; } +static void __nf_ct_ext_free_rcu(struct rcu_head *head) +{ + struct nf_ct_ext *ext = container_of(head, struct nf_ct_ext, rcu); + kfree(ext); +} + void *__nf_ct_ext_add(struct nf_conn *ct, enum nf_ct_ext_id id, gfp_t gfp) { struct nf_ct_ext *new; @@ -106,7 +113,7 @@ void *__nf_ct_ext_add(struct nf_conn *ct, enum nf_ct_ext_id id, gfp_t gfp) (void *)ct->ext + ct->ext->offset[i]); rcu_read_unlock(); } - kfree(ct->ext); + call_rcu(&ct->ext->rcu, __nf_ct_ext_free_rcu); ct->ext = new; } -- cgit v1.2.3 From 8a548868db62422113104ebc658065e3fe976951 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Tue, 17 Jun 2008 15:52:07 -0700 Subject: netfilter: nf_conntrack_h323: fix memory leak in module initialization error path Properly free h323_buffer when helper registration fails. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/netfilter/nf_conntrack_h323_main.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/netfilter/nf_conntrack_h323_main.c b/net/netfilter/nf_conntrack_h323_main.c index 95da1a24aab..99e385d5b70 100644 --- a/net/netfilter/nf_conntrack_h323_main.c +++ b/net/netfilter/nf_conntrack_h323_main.c @@ -1799,6 +1799,7 @@ err3: err2: nf_conntrack_helper_unregister(&nf_conntrack_helper_q931[0]); err1: + kfree(h323_buffer); return ret; } -- cgit v1.2.3 From a56b8f81580761c65e4d8d0c04ac1cb7a788bdf1 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Tue, 17 Jun 2008 15:52:32 -0700 Subject: netfilter: nf_conntrack_h323: fix module unload crash The H.245 helper is not registered/unregistered, but assigned to connections manually from the Q.931 helper. This means on unload existing expectations and connections using the helper are not cleaned up, leading to the following oops on module unload: CPU 0 Unable to handle kernel paging request at virtual address c00a6828, epc == 802224dc, ra == 801d4e7c Oops[#1]: Cpu 0 $ 0 : 00000000 00000000 00000004 c00a67f0 $ 4 : 802a5ad0 81657e00 00000000 00000000 $ 8 : 00000008 801461c8 00000000 80570050 $12 : 819b0280 819b04b0 00000006 00000000 $16 : 802a5a60 80000000 80b46000 80321010 $20 : 00000000 00000004 802a5ad0 00000001 $24 : 00000000 802257a8 $28 : 802a4000 802a59e8 00000004 801d4e7c Hi : 0000000b Lo : 00506320 epc : 802224dc ip_conntrack_help+0x38/0x74 Tainted: P ra : 801d4e7c nf_iterate+0xbc/0x130 Status: 1000f403 KERNEL EXL IE Cause : 00800008 BadVA : c00a6828 PrId : 00019374 Modules linked in: ip_nat_pptp ip_conntrack_pptp ath_pktlog wlan_acl wlan_wep wlan_tkip wlan_ccmp wlan_xauth ath_pci ath_dev ath_dfs ath_rate_atheros wlan ath_hal ip_nat_tftp ip_conntrack_tftp ip_nat_ftp ip_conntrack_ftp pppoe ppp_async ppp_deflate ppp_mppe pppox ppp_generic slhc Process swapper (pid: 0, threadinfo=802a4000, task=802a6000) Stack : 801e7d98 00000004 802a5a60 80000000 801d4e7c 801d4e7c 802a5ad0 00000004 00000000 00000000 801e7d98 00000000 00000004 802a5ad0 00000000 00000010 801e7d98 80b46000 802a5a60 80320000 80000000 801d4f8c 802a5b00 00000002 80063834 00000000 80b46000 802a5a60 801e7d98 80000000 802ba854 00000000 81a02180 80b7e260 81a021b0 819b0000 819b0000 80570056 00000000 00000001 ... Call Trace: [<801e7d98>] ip_finish_output+0x0/0x23c [<801d4e7c>] nf_iterate+0xbc/0x130 [<801d4e7c>] nf_iterate+0xbc/0x130 [<801e7d98>] ip_finish_output+0x0/0x23c [<801e7d98>] ip_finish_output+0x0/0x23c [<801d4f8c>] nf_hook_slow+0x9c/0x1a4 One way to fix this would be to split helper cleanup from the unregistration function and invoke it for the H.245 helper, but since ctnetlink needs to be able to find the helper for synchonization purposes, a better fix is to register it normally and make sure its not assigned to connections during helper lookup. The missing l3num initialization is enough for this, this patch changes it to use AF_UNSPEC to make it more explicit though. Reported-by: liannan Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/netfilter/nf_conntrack_h323_main.c | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/net/netfilter/nf_conntrack_h323_main.c b/net/netfilter/nf_conntrack_h323_main.c index 99e385d5b70..2f83c158934 100644 --- a/net/netfilter/nf_conntrack_h323_main.c +++ b/net/netfilter/nf_conntrack_h323_main.c @@ -619,6 +619,7 @@ static const struct nf_conntrack_expect_policy h245_exp_policy = { static struct nf_conntrack_helper nf_conntrack_helper_h245 __read_mostly = { .name = "H.245", .me = THIS_MODULE, + .tuple.src.l3num = AF_UNSPEC, .tuple.dst.protonum = IPPROTO_UDP, .help = h245_help, .expect_policy = &h245_exp_policy, @@ -1765,6 +1766,7 @@ static void __exit nf_conntrack_h323_fini(void) nf_conntrack_helper_unregister(&nf_conntrack_helper_ras[0]); nf_conntrack_helper_unregister(&nf_conntrack_helper_q931[1]); nf_conntrack_helper_unregister(&nf_conntrack_helper_q931[0]); + nf_conntrack_helper_unregister(&nf_conntrack_helper_h245); kfree(h323_buffer); pr_debug("nf_ct_h323: fini\n"); } @@ -1777,27 +1779,32 @@ static int __init nf_conntrack_h323_init(void) h323_buffer = kmalloc(65536, GFP_KERNEL); if (!h323_buffer) return -ENOMEM; - ret = nf_conntrack_helper_register(&nf_conntrack_helper_q931[0]); + ret = nf_conntrack_helper_register(&nf_conntrack_helper_h245); if (ret < 0) goto err1; - ret = nf_conntrack_helper_register(&nf_conntrack_helper_q931[1]); + ret = nf_conntrack_helper_register(&nf_conntrack_helper_q931[0]); if (ret < 0) goto err2; - ret = nf_conntrack_helper_register(&nf_conntrack_helper_ras[0]); + ret = nf_conntrack_helper_register(&nf_conntrack_helper_q931[1]); if (ret < 0) goto err3; - ret = nf_conntrack_helper_register(&nf_conntrack_helper_ras[1]); + ret = nf_conntrack_helper_register(&nf_conntrack_helper_ras[0]); if (ret < 0) goto err4; + ret = nf_conntrack_helper_register(&nf_conntrack_helper_ras[1]); + if (ret < 0) + goto err5; pr_debug("nf_ct_h323: init success\n"); return 0; -err4: +err5: nf_conntrack_helper_unregister(&nf_conntrack_helper_ras[0]); -err3: +err4: nf_conntrack_helper_unregister(&nf_conntrack_helper_q931[1]); -err2: +err3: nf_conntrack_helper_unregister(&nf_conntrack_helper_q931[0]); +err2: + nf_conntrack_helper_unregister(&nf_conntrack_helper_h245); err1: kfree(h323_buffer); return ret; -- cgit v1.2.3 From fe833fca2eac6b3d3ad5e35f44ad4638362f1da8 Mon Sep 17 00:00:00 2001 From: Steffen Klassert Date: Tue, 17 Jun 2008 16:37:13 -0700 Subject: xfrm: fix fragmentation for ipv4 xfrm tunnel When generating the ip header for the transformed packet we just copy the frag_off field of the ip header from the original packet to the ip header of the new generated packet. If we receive a packet as a chain of fragments, all but the last of the new generated packets have the IP_MF flag set. We have to mask the frag_off field to only keep the IP_DF flag from the original packet. This got lost with git commit 36cf9acf93e8561d9faec24849e57688a81eb9c5 ("[IPSEC]: Separate inner/outer mode processing on output") Signed-off-by: Steffen Klassert Acked-by: Herbert Xu Signed-off-by: David S. Miller --- net/ipv4/xfrm4_mode_tunnel.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv4/xfrm4_mode_tunnel.c b/net/ipv4/xfrm4_mode_tunnel.c index 584e6d74e3a..7135279f3f8 100644 --- a/net/ipv4/xfrm4_mode_tunnel.c +++ b/net/ipv4/xfrm4_mode_tunnel.c @@ -52,7 +52,7 @@ static int xfrm4_mode_tunnel_output(struct xfrm_state *x, struct sk_buff *skb) IP_ECN_clear(top_iph); top_iph->frag_off = (flags & XFRM_STATE_NOPMTUDISC) ? - 0 : XFRM_MODE_SKB_CB(skb)->frag_off; + 0 : (XFRM_MODE_SKB_CB(skb)->frag_off & htons(IP_DF)); ip_select_ident(top_iph, dst->child, NULL); top_iph->ttl = dst_metric(dst->child, RTAX_HOPLIMIT); -- cgit v1.2.3 From 3c73419c09a5ef73d56472dbfdade9e311496e9b Mon Sep 17 00:00:00 2001 From: Rainer Weikusat Date: Tue, 17 Jun 2008 22:28:05 -0700 Subject: af_unix: fix 'poll for write'/ connected DGRAM sockets The unix_dgram_sendmsg routine implements a (somewhat crude) form of receiver-imposed flow control by comparing the length of the receive queue of the 'peer socket' with the max_ack_backlog value stored in the corresponding sock structure, either blocking the thread which caused the send-routine to be called or returning EAGAIN. This routine is used by both SOCK_DGRAM and SOCK_SEQPACKET sockets. The poll-implementation for these socket types is datagram_poll from core/datagram.c. A socket is deemed to be writeable by this routine when the memory presently consumed by datagrams owned by it is less than the configured socket send buffer size. This is always wrong for connected PF_UNIX non-stream sockets when the abovementioned receive queue is currently considered to be full. 'poll' will then return, indicating that the socket is writeable, but a subsequent write result in EAGAIN, effectively causing an (usual) application to 'poll for writeability by repeated send request with O_NONBLOCK set' until it has consumed its time quantum. The change below uses a suitably modified variant of the datagram_poll routines for both type of PF_UNIX sockets, which tests if the recv-queue of the peer a socket is connected to is presently considered to be 'full' as part of the 'is this socket writeable'-checking code. The socket being polled is additionally put onto the peer_wait wait queue associated with its peer, because the unix_dgram_sendmsg routine does a wake up on this queue after a datagram was received and the 'other wakeup call' is done implicitly as part of skb destruction, meaning, a process blocked in poll because of a full peer receive queue could otherwise sleep forever if no datagram owned by its socket was already sitting on this queue. Among this change is a small (inline) helper routine named 'unix_recvq_full', which consolidates the actual testing code (in three different places) into a single location. Signed-off-by: Rainer Weikusat Signed-off-by: David S. Miller --- net/unix/af_unix.c | 79 +++++++++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 70 insertions(+), 9 deletions(-) (limited to 'net') diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index e18cd3628db..657835f227d 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -169,6 +169,11 @@ static inline int unix_may_send(struct sock *sk, struct sock *osk) return (unix_peer(osk) == NULL || unix_our_peer(sk, osk)); } +static inline int unix_recvq_full(struct sock const *sk) +{ + return skb_queue_len(&sk->sk_receive_queue) > sk->sk_max_ack_backlog; +} + static struct sock *unix_peer_get(struct sock *s) { struct sock *peer; @@ -482,6 +487,8 @@ static int unix_socketpair(struct socket *, struct socket *); static int unix_accept(struct socket *, struct socket *, int); static int unix_getname(struct socket *, struct sockaddr *, int *, int); static unsigned int unix_poll(struct file *, struct socket *, poll_table *); +static unsigned int unix_datagram_poll(struct file *, struct socket *, + poll_table *); static int unix_ioctl(struct socket *, unsigned int, unsigned long); static int unix_shutdown(struct socket *, int); static int unix_stream_sendmsg(struct kiocb *, struct socket *, @@ -527,7 +534,7 @@ static const struct proto_ops unix_dgram_ops = { .socketpair = unix_socketpair, .accept = sock_no_accept, .getname = unix_getname, - .poll = datagram_poll, + .poll = unix_datagram_poll, .ioctl = unix_ioctl, .listen = sock_no_listen, .shutdown = unix_shutdown, @@ -548,7 +555,7 @@ static const struct proto_ops unix_seqpacket_ops = { .socketpair = unix_socketpair, .accept = unix_accept, .getname = unix_getname, - .poll = datagram_poll, + .poll = unix_datagram_poll, .ioctl = unix_ioctl, .listen = unix_listen, .shutdown = unix_shutdown, @@ -983,8 +990,7 @@ static long unix_wait_for_peer(struct sock *other, long timeo) sched = !sock_flag(other, SOCK_DEAD) && !(other->sk_shutdown & RCV_SHUTDOWN) && - (skb_queue_len(&other->sk_receive_queue) > - other->sk_max_ack_backlog); + unix_recvq_full(other); unix_state_unlock(other); @@ -1058,8 +1064,7 @@ restart: if (other->sk_state != TCP_LISTEN) goto out_unlock; - if (skb_queue_len(&other->sk_receive_queue) > - other->sk_max_ack_backlog) { + if (unix_recvq_full(other)) { err = -EAGAIN; if (!timeo) goto out_unlock; @@ -1428,9 +1433,7 @@ restart: goto out_unlock; } - if (unix_peer(other) != sk && - (skb_queue_len(&other->sk_receive_queue) > - other->sk_max_ack_backlog)) { + if (unix_peer(other) != sk && unix_recvq_full(other)) { if (!timeo) { err = -EAGAIN; goto out_unlock; @@ -1991,6 +1994,64 @@ static unsigned int unix_poll(struct file * file, struct socket *sock, poll_tabl return mask; } +static unsigned int unix_datagram_poll(struct file *file, struct socket *sock, + poll_table *wait) +{ + struct sock *sk = sock->sk, *peer; + unsigned int mask; + + poll_wait(file, sk->sk_sleep, wait); + + peer = unix_peer_get(sk); + if (peer) { + if (peer != sk) { + /* + * Writability of a connected socket additionally + * depends on the state of the receive queue of the + * peer. + */ + poll_wait(file, &unix_sk(peer)->peer_wait, wait); + } else { + sock_put(peer); + peer = NULL; + } + } + + mask = 0; + + /* exceptional events? */ + if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue)) + mask |= POLLERR; + if (sk->sk_shutdown & RCV_SHUTDOWN) + mask |= POLLRDHUP; + if (sk->sk_shutdown == SHUTDOWN_MASK) + mask |= POLLHUP; + + /* readable? */ + if (!skb_queue_empty(&sk->sk_receive_queue) || + (sk->sk_shutdown & RCV_SHUTDOWN)) + mask |= POLLIN | POLLRDNORM; + + /* Connection-based need to check for termination and startup */ + if (sk->sk_type == SOCK_SEQPACKET) { + if (sk->sk_state == TCP_CLOSE) + mask |= POLLHUP; + /* connection hasn't started yet? */ + if (sk->sk_state == TCP_SYN_SENT) + return mask; + } + + /* writable? */ + if (unix_writable(sk) && !(peer && unix_recvq_full(peer))) + mask |= POLLOUT | POLLWRNORM | POLLWRBAND; + else + set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags); + + if (peer) + sock_put(peer); + + return mask; +} #ifdef CONFIG_PROC_FS static struct sock *first_unix_socket(int *i) -- cgit v1.2.3 From 3a5be7d4b079f3a9ce1e8ce4a93ba15ae6d00111 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Wed, 18 Jun 2008 01:19:51 -0700 Subject: Revert "mac80211: Use skb_header_cloned() on TX path." This reverts commit 608961a5eca8d3c6bd07172febc27b5559408c5d. The problem is that the mac80211 stack not only needs to be able to muck with the link-level headers, it also might need to mangle all of the packet data if doing sw wireless encryption. This fixes kernel bugzilla #10903. Thanks to Didier Raboud (for the bugzilla report), Andrew Prince (for bisecting), Johannes Berg (for bringing this bisection analysis to my attention), and Ilpo (for trying to analyze this purely from the TCP side). In 2.6.27 we can take another stab at this, by using something like skb_cow_data() when the TX path of mac80211 ends up with a non-NULL tx->key. The ESP protocol code in the IPSEC stack can be used as a model for implementation. Signed-off-by: David S. Miller --- net/mac80211/tx.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 1d7dd54aace..28d8bd53bd3 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -1562,13 +1562,13 @@ int ieee80211_subif_start_xmit(struct sk_buff *skb, * be cloned. This could happen, e.g., with Linux bridge code passing * us broadcast frames. */ - if (head_need > 0 || skb_header_cloned(skb)) { + if (head_need > 0 || skb_cloned(skb)) { #if 0 printk(KERN_DEBUG "%s: need to reallocate buffer for %d bytes " "of headroom\n", dev->name, head_need); #endif - if (skb_header_cloned(skb)) + if (skb_cloned(skb)) I802_DEBUG_INC(local->tx_expand_skb_head_cloned); else I802_DEBUG_INC(local->tx_expand_skb_head); -- cgit v1.2.3 From 6d1a3fb567a728d31474636e167c324702a0c38b Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Wed, 18 Jun 2008 02:07:07 -0700 Subject: netlink: genl: fix circular locking genetlink has a circular locking dependency when dumping the registered families: - dump start: genl_rcv() : take genl_mutex genl_rcv_msg() : call netlink_dump_start() while holding genl_mutex netlink_dump_start(), netlink_dump() : take nlk->cb_mutex ctrl_dumpfamily() : try to detect this case and not take genl_mutex a second time - dump continuance: netlink_rcv() : call netlink_dump netlink_dump : take nlk->cb_mutex ctrl_dumpfamily() : take genl_mutex Register genl_lock as callback mutex with netlink to fix this. This slightly widens an already existing module unload race, the genl ops used during the dump might go away when the module is unloaded. Thomas Graf is working on a seperate fix for this. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/netlink/genetlink.c | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) (limited to 'net') diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c index f5aa23c3e88..3e1191cecaf 100644 --- a/net/netlink/genetlink.c +++ b/net/netlink/genetlink.c @@ -444,8 +444,11 @@ static int genl_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) if (ops->dumpit == NULL) return -EOPNOTSUPP; - return netlink_dump_start(genl_sock, skb, nlh, - ops->dumpit, ops->done); + genl_unlock(); + err = netlink_dump_start(genl_sock, skb, nlh, + ops->dumpit, ops->done); + genl_lock(); + return err; } if (ops->doit == NULL) @@ -603,9 +606,6 @@ static int ctrl_dumpfamily(struct sk_buff *skb, struct netlink_callback *cb) int chains_to_skip = cb->args[0]; int fams_to_skip = cb->args[1]; - if (chains_to_skip != 0) - genl_lock(); - for (i = 0; i < GENL_FAM_TAB_SIZE; i++) { if (i < chains_to_skip) continue; @@ -623,9 +623,6 @@ static int ctrl_dumpfamily(struct sk_buff *skb, struct netlink_callback *cb) } errout: - if (chains_to_skip != 0) - genl_unlock(); - cb->args[0] = i; cb->args[1] = n; @@ -770,7 +767,7 @@ static int __init genl_init(void) /* we'll bump the group number right afterwards */ genl_sock = netlink_kernel_create(&init_net, NETLINK_GENERIC, 0, - genl_rcv, NULL, THIS_MODULE); + genl_rcv, &genl_mutex, THIS_MODULE); if (genl_sock == NULL) panic("GENL: Cannot initialize generic netlink\n"); -- cgit v1.2.3 From ef3a62d272f033989e83eb1f26505f93f93e3e69 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 18 Jun 2008 15:39:48 -0700 Subject: mac80211: detect driver tx bugs When a driver rejects a frame in it's ->tx() callback, it must also stop queues, otherwise mac80211 can go into a loop here. Detect this situation and abort the loop after five retries, warning about the driver bug. Signed-off-by: Johannes Berg Signed-off-by: David S. Miller --- net/mac80211/tx.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 28d8bd53bd3..c80d5899f27 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -1132,7 +1132,7 @@ static int ieee80211_tx(struct net_device *dev, struct sk_buff *skb, ieee80211_tx_handler *handler; struct ieee80211_tx_data tx; ieee80211_tx_result res = TX_DROP, res_prepare; - int ret, i; + int ret, i, retries = 0; WARN_ON(__ieee80211_queue_pending(local, control->queue)); @@ -1216,6 +1216,13 @@ retry: if (!__ieee80211_queue_stopped(local, control->queue)) { clear_bit(IEEE80211_LINK_STATE_PENDING, &local->state[control->queue]); + retries++; + /* + * Driver bug, it's rejecting packets but + * not stopping queues. + */ + if (WARN_ON_ONCE(retries > 5)) + goto drop; goto retry; } memcpy(&store->control, control, -- cgit v1.2.3 From aea7427f70cce5fa8f99ce447b213e9e3b49f24c Mon Sep 17 00:00:00 2001 From: Shan Wei Date: Thu, 19 Jun 2008 16:29:39 -0700 Subject: ipv6: Remove options header when setsockopt's optlen is 0 Remove the sticky Hop-by-Hop options header by calling setsockopt() for IPV6_HOPOPTS with a zero option length, per RFC3542. Routing header and Destination options header does the same as Hop-by-Hop options header. Signed-off-by: Shan Wei Acked-by: YOSHIFUJI Hideaki Signed-off-by: David S. Miller --- net/ipv6/ipv6_sockglue.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index c042ce19bd1..86e28a75267 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -345,18 +345,21 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname, case IPV6_DSTOPTS: { struct ipv6_txoptions *opt; + + /* remove any sticky options header with a zero option + * length, per RFC3542. + */ if (optlen == 0) optval = NULL; + else if (optlen < sizeof(struct ipv6_opt_hdr) || + optlen & 0x7 || optlen > 8 * 255) + goto e_inval; /* hop-by-hop / destination options are privileged option */ retv = -EPERM; if (optname != IPV6_RTHDR && !capable(CAP_NET_RAW)) break; - if (optlen < sizeof(struct ipv6_opt_hdr) || - optlen & 0x7 || optlen > 8 * 255) - goto e_inval; - opt = ipv6_renew_options(sk, np->opt, optname, (struct ipv6_opt_hdr __user *)optval, optlen); -- cgit v1.2.3 From f630e43a215a3129d0c1173cae0bce6ea4855cf7 Mon Sep 17 00:00:00 2001 From: YOSHIFUJI Hideaki Date: Thu, 19 Jun 2008 16:33:57 -0700 Subject: ipv6: Drop packets for loopback address from outside of the box. [ Based upon original report and patch by Karsten Keil. Karsten has verified that this fixes the TAHI test case "ICMPv6 test v6LC.5.1.2 Part F". -DaveM ] Signed-off-by: YOSHIFUJI Hideaki Signed-off-by: David S. Miller --- net/ipv6/ip6_input.c | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'net') diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c index 4e5c8615832..17eb48b8e32 100644 --- a/net/ipv6/ip6_input.c +++ b/net/ipv6/ip6_input.c @@ -102,6 +102,15 @@ int ipv6_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt if (hdr->version != 6) goto err; + /* + * RFC4291 2.5.3 + * A packet received on an interface with a destination address + * of loopback must be dropped. + */ + if (!(dev->flags & IFF_LOOPBACK) && + ipv6_addr_loopback(&hdr->daddr)) + goto err; + skb->transport_header = skb->network_header + sizeof(*hdr); IP6CB(skb)->nhoff = offsetof(struct ipv6hdr, nexthdr); -- cgit v1.2.3 From cddf63d99d0d145f18b293c3d0de4af7dab2a922 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 20 May 2008 19:16:06 +0200 Subject: irnet_ppp: BKL pushdown Signed-off-by: Arnd Bergmann --- net/irda/irnet/irnet.h | 1 + net/irda/irnet/irnet_ppp.c | 3 +++ 2 files changed, 4 insertions(+) (limited to 'net') diff --git a/net/irda/irnet/irnet.h b/net/irda/irnet/irnet.h index b001c361ad3..bccf4d0059f 100644 --- a/net/irda/irnet/irnet.h +++ b/net/irda/irnet/irnet.h @@ -241,6 +241,7 @@ #include #include +#include #include #include #include diff --git a/net/irda/irnet/irnet_ppp.c b/net/irda/irnet/irnet_ppp.c index e0eab5927c4..e84a70dd346 100644 --- a/net/irda/irnet/irnet_ppp.c +++ b/net/irda/irnet/irnet_ppp.c @@ -479,6 +479,7 @@ dev_irnet_open(struct inode * inode, ap = kzalloc(sizeof(*ap), GFP_KERNEL); DABORT(ap == NULL, -ENOMEM, FS_ERROR, "Can't allocate struct irnet...\n"); + lock_kernel(); /* initialize the irnet structure */ ap->file = file; @@ -500,6 +501,7 @@ dev_irnet_open(struct inode * inode, { DERROR(FS_ERROR, "Can't setup IrDA link...\n"); kfree(ap); + unlock_kernel(); return err; } @@ -510,6 +512,7 @@ dev_irnet_open(struct inode * inode, file->private_data = ap; DEXIT(FS_TRACE, " - ap=0x%p\n", ap); + unlock_kernel(); return 0; } -- cgit v1.2.3 From 735ce972fbc8a65fb17788debd7bbe7b4383cc62 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Fri, 20 Jun 2008 22:04:34 -0700 Subject: sctp: Make sure N * sizeof(union sctp_addr) does not overflow. As noticed by Gabriel Campana, the kmalloc() length arg passed in by sctp_getsockopt_local_addrs_old() can overflow if ->addr_num is large enough. Therefore, enforce an appropriate limit. Signed-off-by: David S. Miller --- net/sctp/socket.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/sctp/socket.c b/net/sctp/socket.c index e7e3baf7009..0dbcde6758e 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -4401,7 +4401,9 @@ static int sctp_getsockopt_local_addrs_old(struct sock *sk, int len, if (copy_from_user(&getaddrs, optval, len)) return -EFAULT; - if (getaddrs.addr_num <= 0) return -EINVAL; + if (getaddrs.addr_num <= 0 || + getaddrs.addr_num >= (INT_MAX / sizeof(union sctp_addr))) + return -EINVAL; /* * For UDP-style sockets, id specifies the association to query. * If the id field is set to the value '0' then the locally bound -- cgit v1.2.3 From b9f75f45a6b46a0ab4eb0857d437a0845871f314 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 20 Jun 2008 22:16:51 -0700 Subject: netns: Don't receive new packets in a dead network namespace. Alexey Dobriyan writes: > Subject: ICMP sockets destruction vs ICMP packets oops > After icmp_sk_exit() nuked ICMP sockets, we get an interrupt. > icmp_reply() wants ICMP socket. > > Steps to reproduce: > > launch shell in new netns > move real NIC to netns > setup routing > ping -i 0 > exit from shell > > BUG: unable to handle kernel NULL pointer dereference at 0000000000000000 > IP: [] icmp_sk+0x17/0x30 > PGD 17f3cd067 PUD 17f3ce067 PMD 0 > Oops: 0000 [1] PREEMPT SMP DEBUG_PAGEALLOC > CPU 0 > Modules linked in: usblp usbcore > Pid: 0, comm: swapper Not tainted 2.6.26-rc6-netns-ct #4 > RIP: 0010:[] [] icmp_sk+0x17/0x30 > RSP: 0018:ffffffff8057fc30 EFLAGS: 00010286 > RAX: 0000000000000000 RBX: 0000000000000000 RCX: ffff81017c7db900 > RDX: 0000000000000034 RSI: ffff81017c7db900 RDI: ffff81017dc41800 > RBP: ffffffff8057fc40 R08: 0000000000000001 R09: 000000000000a815 > R10: 0000000000000000 R11: 0000000000000001 R12: ffffffff8057fd28 > R13: ffffffff8057fd00 R14: ffff81017c7db938 R15: ffff81017dc41800 > FS: 0000000000000000(0000) GS:ffffffff80525000(0000) knlGS:0000000000000000 > CS: 0010 DS: 0018 ES: 0018 CR0: 000000008005003b > CR2: 0000000000000000 CR3: 000000017fcda000 CR4: 00000000000006e0 > DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 > DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400 > Process swapper (pid: 0, threadinfo ffffffff8053a000, task ffffffff804fa4a0) > Stack: 0000000000000000 ffff81017c7db900 ffffffff8057fcf0 ffffffff803fcfe4 > ffffffff804faa38 0000000000000246 0000000000005a40 0000000000000246 > 000000000001ffff ffff81017dd68dc0 0000000000005a40 0000000055342436 > Call Trace: > [] icmp_reply+0x44/0x1e0 > [] ? ip_route_input+0x23a/0x1360 > [] icmp_echo+0x65/0x70 > [] icmp_rcv+0x180/0x1b0 > [] ip_local_deliver+0xf4/0x1f0 > [] ip_rcv+0x33b/0x650 > [] netif_receive_skb+0x27a/0x340 > [] process_backlog+0x9d/0x100 > [] net_rx_action+0x18d/0x250 > [] __do_softirq+0x75/0x100 > [] call_softirq+0x1c/0x30 > [] do_softirq+0x65/0xa0 > [] irq_exit+0x97/0xa0 > [] do_IRQ+0xa8/0x130 > [] ? mwait_idle+0x0/0x60 > [] ret_from_intr+0x0/0xf > [] ? mwait_idle+0x4c/0x60 > [] ? mwait_idle+0x43/0x60 > [] ? cpu_idle+0x57/0xa0 > [] ? rest_init+0x70/0x80 > Code: 10 5b 41 5c 41 5d 41 5e c9 c3 66 2e 0f 1f 84 00 00 00 00 00 55 48 89 e5 53 > 48 83 ec 08 48 8b 9f 78 01 00 00 e8 2b c7 f1 ff 89 c0 <48> 8b 04 c3 48 83 c4 08 > 5b c9 c3 66 66 66 66 66 2e 0f 1f 84 00 > RIP [] icmp_sk+0x17/0x30 > RSP > CR2: 0000000000000000 > ---[ end trace ea161157b76b33e8 ]--- > Kernel panic - not syncing: Aiee, killing interrupt handler! Receiving packets while we are cleaning up a network namespace is a racy proposition. It is possible when the packet arrives that we have removed some but not all of the state we need to fully process it. We have the choice of either playing wack-a-mole with the cleanup routines or simply dropping packets when we don't have a network namespace to handle them. Since the check looks inexpensive in netif_receive_skb let's just drop the incoming packets. Signed-off-by: Eric W. Biederman Signed-off-by: David S. Miller --- net/core/dev.c | 4 ++++ net/core/net_namespace.c | 3 +++ 2 files changed, 7 insertions(+) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index 68d8df0992a..c421a1f8f0b 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2077,6 +2077,10 @@ int netif_receive_skb(struct sk_buff *skb) rcu_read_lock(); + /* Don't receive packets in an exiting network namespace */ + if (!net_alive(dev_net(skb->dev))) + goto out; + #ifdef CONFIG_NET_CLS_ACT if (skb->tc_verd & TC_NCLS) { skb->tc_verd = CLR_TC_NCLS(skb->tc_verd); diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index 72b4c184dd8..7c52fe277b6 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c @@ -140,6 +140,9 @@ static void cleanup_net(struct work_struct *work) struct pernet_operations *ops; struct net *net; + /* Be very certain incoming network packets will not find us */ + rcu_barrier(); + net = container_of(work, struct net, work); mutex_lock(&net_mutex); -- cgit v1.2.3 From 88a6f4ad76be425f47df7f892baf913bcd466fb3 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Tue, 24 Jun 2008 13:30:45 -0700 Subject: netfilter: ip6table_mangle: don't reroute in LOCAL_IN Rerouting should only happen in LOCAL_OUT, in INPUT its useless since the packet has already chosen its final destination. Noticed by Alexey Dobriyan . Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/ipv6/netfilter/ip6table_mangle.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv6/netfilter/ip6table_mangle.c b/net/ipv6/netfilter/ip6table_mangle.c index 27a5e8b48d9..f405cea21a8 100644 --- a/net/ipv6/netfilter/ip6table_mangle.c +++ b/net/ipv6/netfilter/ip6table_mangle.c @@ -129,7 +129,7 @@ static struct nf_hook_ops ip6t_ops[] __read_mostly = { .priority = NF_IP6_PRI_MANGLE, }, { - .hook = ip6t_local_hook, + .hook = ip6t_route_hook, .owner = THIS_MODULE, .pf = PF_INET6, .hooknum = NF_INET_LOCAL_IN, -- cgit v1.2.3 From 59d393ad92f719d9ef36b96eae56d4817a7eeb10 Mon Sep 17 00:00:00 2001 From: Tony Vroon Date: Wed, 11 Jun 2008 16:23:56 -0400 Subject: mac80211: implement EU regulatory domain Implement missing EU regulatory domain for mac80211. Based on the information in IEEE 802.11-2007 (specifically pages 1142, 1143 & 1148) and ETSI 301 893 (V1.4.1). With thanks to Johannes Berg. Signed-off-by: Tony Vroon Acked-by: Johannes Berg Signed-off-by: John W. Linville --- net/wireless/reg.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'net') diff --git a/net/wireless/reg.c b/net/wireless/reg.c index 185488da246..855bff4b325 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -80,6 +80,23 @@ static const struct ieee80211_channel_range ieee80211_JP_channels[] = { IEEE80211_CHAN_RADAR), }; +static const struct ieee80211_channel_range ieee80211_EU_channels[] = { + /* IEEE 802.11b/g, channels 1..13 */ + RANGE_PWR(2412, 2472, 20, 6, 0), + /* IEEE 802.11a, channel 36*/ + RANGE_PWR(5180, 5180, 23, 6, IEEE80211_CHAN_PASSIVE_SCAN), + /* IEEE 802.11a, channel 40*/ + RANGE_PWR(5200, 5200, 23, 6, IEEE80211_CHAN_PASSIVE_SCAN), + /* IEEE 802.11a, channel 44*/ + RANGE_PWR(5220, 5220, 23, 6, IEEE80211_CHAN_PASSIVE_SCAN), + /* IEEE 802.11a, channels 48..64 */ + RANGE_PWR(5240, 5320, 23, 6, IEEE80211_CHAN_NO_IBSS | + IEEE80211_CHAN_RADAR), + /* IEEE 802.11a, channels 100..140 */ + RANGE_PWR(5500, 5700, 30, 6, IEEE80211_CHAN_NO_IBSS | + IEEE80211_CHAN_RADAR), +}; + #define REGDOM(_code) \ { \ .code = __stringify(_code), \ @@ -90,6 +107,7 @@ static const struct ieee80211_channel_range ieee80211_JP_channels[] = { static const struct ieee80211_regdomain ieee80211_regdoms[] = { REGDOM(US), REGDOM(JP), + REGDOM(EU), }; -- cgit v1.2.3 From 8691e5a8f691cc2a4fda0651e8d307aaba0e7d68 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 6 Jun 2008 11:18:06 +0200 Subject: smp_call_function: get rid of the unused nonatomic/retry argument It's never used and the comments refer to nonatomic and retry interchangably. So get rid of it. Acked-by: Jeremy Fitzhardinge Signed-off-by: Jens Axboe --- net/core/flow.c | 2 +- net/iucv/iucv.c | 14 +++++++------- 2 files changed, 8 insertions(+), 8 deletions(-) (limited to 'net') diff --git a/net/core/flow.c b/net/core/flow.c index 19991175fde..5cf81052d04 100644 --- a/net/core/flow.c +++ b/net/core/flow.c @@ -298,7 +298,7 @@ void flow_cache_flush(void) init_completion(&info.completion); local_bh_disable(); - smp_call_function(flow_cache_flush_per_cpu, &info, 1, 0); + smp_call_function(flow_cache_flush_per_cpu, &info, 0); flow_cache_flush_tasklet((unsigned long)&info); local_bh_enable(); diff --git a/net/iucv/iucv.c b/net/iucv/iucv.c index 91897076213..94d5a45c3a5 100644 --- a/net/iucv/iucv.c +++ b/net/iucv/iucv.c @@ -480,7 +480,7 @@ static void iucv_setmask_mp(void) if (cpu_isset(cpu, iucv_buffer_cpumask) && !cpu_isset(cpu, iucv_irq_cpumask)) smp_call_function_single(cpu, iucv_allow_cpu, - NULL, 0, 1); + NULL, 1); preempt_enable(); } @@ -498,7 +498,7 @@ static void iucv_setmask_up(void) cpumask = iucv_irq_cpumask; cpu_clear(first_cpu(iucv_irq_cpumask), cpumask); for_each_cpu_mask(cpu, cpumask) - smp_call_function_single(cpu, iucv_block_cpu, NULL, 0, 1); + smp_call_function_single(cpu, iucv_block_cpu, NULL, 1); } /** @@ -523,7 +523,7 @@ static int iucv_enable(void) rc = -EIO; preempt_disable(); for_each_online_cpu(cpu) - smp_call_function_single(cpu, iucv_declare_cpu, NULL, 0, 1); + smp_call_function_single(cpu, iucv_declare_cpu, NULL, 1); preempt_enable(); if (cpus_empty(iucv_buffer_cpumask)) /* No cpu could declare an iucv buffer. */ @@ -580,7 +580,7 @@ static int __cpuinit iucv_cpu_notify(struct notifier_block *self, case CPU_ONLINE_FROZEN: case CPU_DOWN_FAILED: case CPU_DOWN_FAILED_FROZEN: - smp_call_function_single(cpu, iucv_declare_cpu, NULL, 0, 1); + smp_call_function_single(cpu, iucv_declare_cpu, NULL, 1); break; case CPU_DOWN_PREPARE: case CPU_DOWN_PREPARE_FROZEN: @@ -589,10 +589,10 @@ static int __cpuinit iucv_cpu_notify(struct notifier_block *self, if (cpus_empty(cpumask)) /* Can't offline last IUCV enabled cpu. */ return NOTIFY_BAD; - smp_call_function_single(cpu, iucv_retrieve_cpu, NULL, 0, 1); + smp_call_function_single(cpu, iucv_retrieve_cpu, NULL, 1); if (cpus_empty(iucv_irq_cpumask)) smp_call_function_single(first_cpu(iucv_buffer_cpumask), - iucv_allow_cpu, NULL, 0, 1); + iucv_allow_cpu, NULL, 1); break; } return NOTIFY_OK; @@ -652,7 +652,7 @@ static void iucv_cleanup_queue(void) * pending interrupts force them to the work queue by calling * an empty function on all cpus. */ - smp_call_function(__iucv_cleanup_queue, NULL, 0, 1); + smp_call_function(__iucv_cleanup_queue, NULL, 1); spin_lock_irq(&iucv_queue_lock); list_for_each_entry_safe(p, n, &iucv_task_queue, list) { /* Remove stale work items from the task queue. */ -- cgit v1.2.3 From 15c8b6c1aaaf1c4edd67e2f02e4d8e1bd1a51c0d Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 9 May 2008 09:39:44 +0200 Subject: on_each_cpu(): kill unused 'retry' parameter It's not even passed on to smp_call_function() anymore, since that was removed. So kill it. Acked-by: Jeremy Fitzhardinge Reviewed-by: Paul E. McKenney Signed-off-by: Jens Axboe --- net/iucv/iucv.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/iucv/iucv.c b/net/iucv/iucv.c index 94d5a45c3a5..a178e27e7b1 100644 --- a/net/iucv/iucv.c +++ b/net/iucv/iucv.c @@ -545,7 +545,7 @@ out: */ static void iucv_disable(void) { - on_each_cpu(iucv_retrieve_cpu, NULL, 0, 1); + on_each_cpu(iucv_retrieve_cpu, NULL, 1); kfree(iucv_path_table); } -- cgit v1.2.3 From 00eb7fe77eb455f807c396f9917f0f623d4c84bb Mon Sep 17 00:00:00 2001 From: Emmanuel Grumbach Date: Thu, 26 Jun 2008 12:13:46 +0300 Subject: mac80211: fix an oops in several failure paths in key allocation This patch fixes an oops in several failure paths in key allocation. This Oops occurs when freeing a key that has not been linked yet, so the key->sdata is not set. Signed-off-by: Emmanuel Grumbach Signed-off-by: Tomas Winkler Acked-by: Johannes Berg Signed-off-by: John W. Linville --- net/mac80211/key.c | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'net') diff --git a/net/mac80211/key.c b/net/mac80211/key.c index 150d66dbda9..220e83be3ef 100644 --- a/net/mac80211/key.c +++ b/net/mac80211/key.c @@ -380,6 +380,15 @@ void ieee80211_key_free(struct ieee80211_key *key) if (!key) return; + if (!key->sdata) { + /* The key has not been linked yet, simply free it + * and don't Oops */ + if (key->conf.alg == ALG_CCMP) + ieee80211_aes_key_free(key->u.ccmp.tfm); + kfree(key); + return; + } + spin_lock_irqsave(&key->sdata->local->key_lock, flags); __ieee80211_key_free(key); spin_unlock_irqrestore(&key->sdata->local->key_lock, flags); -- cgit v1.2.3 From 57413ebc4e0f1e471a3b4db4aff9a85c083d090e Mon Sep 17 00:00:00 2001 From: Miquel van Smoorenburg Date: Fri, 27 Jun 2008 17:23:57 -0700 Subject: tcp: calculate tcp_mem based on low memory instead of all memory The tcp_mem array which contains limits on the total amount of memory used by TCP sockets is calculated based on nr_all_pages. On a 32 bits x86 system, we should base this on the number of lowmem pages. Signed-off-by: Miquel van Smoorenburg Signed-off-by: David S. Miller --- net/ipv4/tcp.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index fc54a48fde1..850825dc86e 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -260,6 +260,8 @@ #include #include #include +#include +#include #include #include #include @@ -2620,7 +2622,7 @@ __setup("thash_entries=", set_thash_entries); void __init tcp_init(void) { struct sk_buff *skb = NULL; - unsigned long limit; + unsigned long nr_pages, limit; int order, i, max_share; BUILD_BUG_ON(sizeof(struct tcp_skb_cb) > sizeof(skb->cb)); @@ -2689,8 +2691,9 @@ void __init tcp_init(void) * is up to 1/2 at 256 MB, decreasing toward zero with the amount of * memory, with a floor of 128 pages. */ - limit = min(nr_all_pages, 1UL<<(28-PAGE_SHIFT)) >> (20-PAGE_SHIFT); - limit = (limit * (nr_all_pages >> (20-PAGE_SHIFT))) >> (PAGE_SHIFT-11); + nr_pages = totalram_pages - totalhigh_pages; + limit = min(nr_pages, 1UL<<(28-PAGE_SHIFT)) >> (20-PAGE_SHIFT); + limit = (limit * (nr_pages >> (20-PAGE_SHIFT))) >> (PAGE_SHIFT-11); limit = max(limit, 128UL); sysctl_tcp_mem[0] = limit / 4 * 3; sysctl_tcp_mem[1] = limit; -- cgit v1.2.3 From db43a282d3ec92ea45109c5551fff3dcc5afef02 Mon Sep 17 00:00:00 2001 From: Octavian Purdila Date: Fri, 27 Jun 2008 17:27:21 -0700 Subject: tcp: fix for splice receive when used with software LRO If an skb has nr_frags set to zero but its frag_list is not empty (as it can happen if software LRO is enabled), and a previous tcp_read_sock has consumed the linear part of the skb, then __skb_splice_bits: (a) incorrectly reports an error and (b) forgets to update the offset to account for the linear part Any of the two problems will cause the subsequent __skb_splice_bits call (the one that handles the frag_list skbs) to either skip data, or, if the unadjusted offset is greater then the size of the next skb in the frag_list, make tcp_splice_read loop forever. Signed-off-by: Octavian Purdila Signed-off-by: David S. Miller --- net/core/skbuff.c | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 1e556d31211..366621610e7 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -1292,12 +1292,14 @@ static int __skb_splice_bits(struct sk_buff *skb, unsigned int *offset, { unsigned int nr_pages = spd->nr_pages; unsigned int poff, plen, len, toff, tlen; - int headlen, seg; + int headlen, seg, error = 0; toff = *offset; tlen = *total_len; - if (!tlen) + if (!tlen) { + error = 1; goto err; + } /* * if the offset is greater than the linear part, go directly to @@ -1339,7 +1341,8 @@ static int __skb_splice_bits(struct sk_buff *skb, unsigned int *offset, * just jump directly to update and return, no point * in going over fragments when the output is full. */ - if (spd_fill_page(spd, virt_to_page(p), plen, poff, skb)) + error = spd_fill_page(spd, virt_to_page(p), plen, poff, skb); + if (error) goto done; tlen -= plen; @@ -1369,7 +1372,8 @@ map_frag: if (!plen) break; - if (spd_fill_page(spd, f->page, plen, poff, skb)) + error = spd_fill_page(spd, f->page, plen, poff, skb); + if (error) break; tlen -= plen; @@ -1382,7 +1386,10 @@ done: return 0; } err: - return 1; + /* update the offset to reflect the linear part skip, if any */ + if (!error) + *offset = toff; + return error; } /* -- cgit v1.2.3 From ec0d215f9420564fc8286dcf93d2d068bb53a07e Mon Sep 17 00:00:00 2001 From: Rainer Weikusat Date: Fri, 27 Jun 2008 19:34:18 -0700 Subject: af_unix: fix 'poll for write'/connected DGRAM sockets For n:1 'datagram connections' (eg /dev/log), the unix_dgram_sendmsg routine implements a form of receiver-imposed flow control by comparing the length of the receive queue of the 'peer socket' with the max_ack_backlog value stored in the corresponding sock structure, either blocking the thread which caused the send-routine to be called or returning EAGAIN. This routine is used by both SOCK_DGRAM and SOCK_SEQPACKET sockets. The poll-implementation for these socket types is datagram_poll from core/datagram.c. A socket is deemed to be writeable by this routine when the memory presently consumed by datagrams owned by it is less than the configured socket send buffer size. This is always wrong for PF_UNIX non-stream sockets connected to server sockets dealing with (potentially) multiple clients if the abovementioned receive queue is currently considered to be full. 'poll' will then return, indicating that the socket is writeable, but a subsequent write result in EAGAIN, effectively causing an (usual) application to 'poll for writeability by repeated send request with O_NONBLOCK set' until it has consumed its time quantum. The change below uses a suitably modified variant of the datagram_poll routines for both type of PF_UNIX sockets, which tests if the recv-queue of the peer a socket is connected to is presently considered to be 'full' as part of the 'is this socket writeable'-checking code. The socket being polled is additionally put onto the peer_wait wait queue associated with its peer, because the unix_dgram_recvmsg routine does a wake up on this queue after a datagram was received and the 'other wakeup call' is done implicitly as part of skb destruction, meaning, a process blocked in poll because of a full peer receive queue could otherwise sleep forever if no datagram owned by its socket was already sitting on this queue. Among this change is a small (inline) helper routine named 'unix_recvq_full', which consolidates the actual testing code (in three different places) into a single location. Signed-off-by: Rainer Weikusat Signed-off-by: David S. Miller --- net/unix/af_unix.c | 52 ++++++++++++++++++++++++---------------------------- 1 file changed, 24 insertions(+), 28 deletions(-) (limited to 'net') diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 657835f227d..783317dacd3 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -487,8 +487,8 @@ static int unix_socketpair(struct socket *, struct socket *); static int unix_accept(struct socket *, struct socket *, int); static int unix_getname(struct socket *, struct sockaddr *, int *, int); static unsigned int unix_poll(struct file *, struct socket *, poll_table *); -static unsigned int unix_datagram_poll(struct file *, struct socket *, - poll_table *); +static unsigned int unix_dgram_poll(struct file *, struct socket *, + poll_table *); static int unix_ioctl(struct socket *, unsigned int, unsigned long); static int unix_shutdown(struct socket *, int); static int unix_stream_sendmsg(struct kiocb *, struct socket *, @@ -534,7 +534,7 @@ static const struct proto_ops unix_dgram_ops = { .socketpair = unix_socketpair, .accept = sock_no_accept, .getname = unix_getname, - .poll = unix_datagram_poll, + .poll = unix_dgram_poll, .ioctl = unix_ioctl, .listen = sock_no_listen, .shutdown = unix_shutdown, @@ -555,7 +555,7 @@ static const struct proto_ops unix_seqpacket_ops = { .socketpair = unix_socketpair, .accept = unix_accept, .getname = unix_getname, - .poll = unix_datagram_poll, + .poll = unix_dgram_poll, .ioctl = unix_ioctl, .listen = unix_listen, .shutdown = unix_shutdown, @@ -1994,29 +1994,13 @@ static unsigned int unix_poll(struct file * file, struct socket *sock, poll_tabl return mask; } -static unsigned int unix_datagram_poll(struct file *file, struct socket *sock, - poll_table *wait) +static unsigned int unix_dgram_poll(struct file *file, struct socket *sock, + poll_table *wait) { - struct sock *sk = sock->sk, *peer; - unsigned int mask; + struct sock *sk = sock->sk, *other; + unsigned int mask, writable; poll_wait(file, sk->sk_sleep, wait); - - peer = unix_peer_get(sk); - if (peer) { - if (peer != sk) { - /* - * Writability of a connected socket additionally - * depends on the state of the receive queue of the - * peer. - */ - poll_wait(file, &unix_sk(peer)->peer_wait, wait); - } else { - sock_put(peer); - peer = NULL; - } - } - mask = 0; /* exceptional events? */ @@ -2042,14 +2026,26 @@ static unsigned int unix_datagram_poll(struct file *file, struct socket *sock, } /* writable? */ - if (unix_writable(sk) && !(peer && unix_recvq_full(peer))) + writable = unix_writable(sk); + if (writable) { + other = unix_peer_get(sk); + if (other) { + if (unix_peer(other) != sk) { + poll_wait(file, &unix_sk(other)->peer_wait, + wait); + if (unix_recvq_full(other)) + writable = 0; + } + + sock_put(other); + } + } + + if (writable) mask |= POLLOUT | POLLWRNORM | POLLWRBAND; else set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags); - if (peer) - sock_put(peer); - return mask; } -- cgit v1.2.3 From 5dbaec5dc6a4895db8bf9765a867418481ed7311 Mon Sep 17 00:00:00 2001 From: Wang Chen Date: Fri, 27 Jun 2008 19:35:16 -0700 Subject: netdevice: Fix typo of dev_unicast_add() comment Signed-off-by: Wang Chen Signed-off-by: David S. Miller --- net/core/dev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index c421a1f8f0b..56b46579ff4 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2973,7 +2973,7 @@ EXPORT_SYMBOL(dev_unicast_delete); /** * dev_unicast_add - add a secondary unicast address * @dev: device - * @addr: address to delete + * @addr: address to add * @alen: length of @addr * * Add a secondary unicast address to the device or increase -- cgit v1.2.3 From 01e123d79a23000f85c4cfb12a957908c0b2c3d8 Mon Sep 17 00:00:00 2001 From: WANG Cong Date: Fri, 27 Jun 2008 19:51:35 -0700 Subject: pkt_sched: ERR_PTR() ususally encodes an negative errno, not positive. Note, in the following patch, 'err' is initialized as: int err = -ENOBUFS; Signed-off-by: WANG Cong Signed-off-by: David S. Miller --- net/sched/sch_generic.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index d355e5e47fe..13afa721439 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -468,7 +468,7 @@ struct Qdisc *qdisc_alloc(struct net_device *dev, struct Qdisc_ops *ops) return sch; errout: - return ERR_PTR(-err); + return ERR_PTR(err); } struct Qdisc * qdisc_create_dflt(struct net_device *dev, struct Qdisc_ops *ops, -- cgit v1.2.3 From ede16af4cdbd21fa15d4178beb7c6fcbcccd07e9 Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Fri, 27 Jun 2008 19:54:05 -0700 Subject: pkt_sched: Remove CONFIG_NET_SCH_RR Commit d62733c8e437fdb58325617c4b3331769ba82d70 ([SCHED]: Qdisc changes and sch_rr added for multiqueue) added a NET_SCH_RR option that was unused since the code went unconditionally into sch_prio. Reported-by: Robert P. J. Day Signed-off-by: Adrian Bunk Signed-off-by: David S. Miller --- net/sched/Kconfig | 11 ----------- 1 file changed, 11 deletions(-) (limited to 'net') diff --git a/net/sched/Kconfig b/net/sched/Kconfig index 82adfe6447d..9437b27ff84 100644 --- a/net/sched/Kconfig +++ b/net/sched/Kconfig @@ -106,17 +106,6 @@ config NET_SCH_PRIO To compile this code as a module, choose M here: the module will be called sch_prio. -config NET_SCH_RR - tristate "Multi Band Round Robin Queuing (RR)" - select NET_SCH_PRIO - ---help--- - Say Y here if you want to use an n-band round robin packet - scheduler. - - The module uses sch_prio for its framework and is aliased as - sch_rr, so it will load sch_prio, although it is referred - to using sch_rr. - config NET_SCH_RED tristate "Random Early Detection (RED)" ---help--- -- cgit v1.2.3 From 7be87351a1f6430426e88b4fcde353ab3330caff Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Fri, 27 Jun 2008 20:00:19 -0700 Subject: tcp: /proc/net/tcp rto,ato values not scaled properly (v2) I found another case where we are sending information to userspace in the wrong HZ scale. This should have been fixed back in 2.5 :-( This means an ABI change but as it stands there is no way for an application like ss to get the right value. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- net/ipv4/tcp_ipv4.c | 6 +++--- net/ipv6/tcp_ipv6.c | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 12695be2c25..ffe869ac1bc 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -2291,7 +2291,7 @@ static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i, int *len) } seq_printf(f, "%4d: %08X:%04X %08X:%04X %02X %08X:%08X %02X:%08lX " - "%08X %5d %8d %lu %d %p %u %u %u %u %d%n", + "%08X %5d %8d %lu %d %p %lu %lu %u %u %d%n", i, src, srcp, dest, destp, sk->sk_state, tp->write_seq - tp->snd_una, sk->sk_state == TCP_LISTEN ? sk->sk_ack_backlog : @@ -2303,8 +2303,8 @@ static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i, int *len) icsk->icsk_probes_out, sock_i_ino(sk), atomic_read(&sk->sk_refcnt), sk, - icsk->icsk_rto, - icsk->icsk_ack.ato, + jiffies_to_clock_t(icsk->icsk_rto), + jiffies_to_clock_t(icsk->icsk_ack.ato), (icsk->icsk_ack.quick << 1) | icsk->icsk_ack.pingpong, tp->snd_cwnd, tp->snd_ssthresh >= 0xFFFF ? -1 : tp->snd_ssthresh, diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index cb46749d4c3..40ea9c36d24 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -2036,7 +2036,7 @@ static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i) seq_printf(seq, "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X " - "%02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %p %u %u %u %u %d\n", + "%02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %p %lu %lu %u %u %d\n", i, src->s6_addr32[0], src->s6_addr32[1], src->s6_addr32[2], src->s6_addr32[3], srcp, @@ -2052,8 +2052,8 @@ static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i) icsk->icsk_probes_out, sock_i_ino(sp), atomic_read(&sp->sk_refcnt), sp, - icsk->icsk_rto, - icsk->icsk_ack.ato, + jiffies_to_clock_t(icsk->icsk_rto), + jiffies_to_clock_t(icsk->icsk_ack.ato), (icsk->icsk_ack.quick << 1 ) | icsk->icsk_ack.pingpong, tp->snd_cwnd, tp->snd_ssthresh>=0xFFFF?-1:tp->snd_ssthresh ); -- cgit v1.2.3 From 10b595aff138961b520bfed51d664fd99980f6e9 Mon Sep 17 00:00:00 2001 From: Julius Volz Date: Fri, 27 Jun 2008 20:02:14 -0700 Subject: netlink: Fix some doc comments in net/netlink/attr.c Fix some doc comments to match function and attribute names in net/netlink/attr.c. Signed-off-by: Julius Volz Signed-off-by: David S. Miller --- net/netlink/attr.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/netlink/attr.c b/net/netlink/attr.c index 47bbf45ae5d..2d106cfe1d2 100644 --- a/net/netlink/attr.c +++ b/net/netlink/attr.c @@ -132,6 +132,7 @@ errout: * @maxtype: maximum attribute type to be expected * @head: head of attribute stream * @len: length of attribute stream + * @policy: validation policy * * Parses a stream of attributes and stores a pointer to each attribute in * the tb array accessable via the attribute type. Attributes with a type @@ -194,7 +195,7 @@ struct nlattr *nla_find(struct nlattr *head, int len, int attrtype) /** * nla_strlcpy - Copy string attribute payload into a sized buffer * @dst: where to copy the string to - * @src: attribute to copy the string from + * @nla: attribute to copy the string from * @dstsize: size of destination buffer * * Copies at most dstsize - 1 bytes into the destination buffer. @@ -340,9 +341,9 @@ struct nlattr *nla_reserve(struct sk_buff *skb, int attrtype, int attrlen) } /** - * nla_reserve - reserve room for attribute without header + * nla_reserve_nohdr - reserve room for attribute without header * @skb: socket buffer to reserve room on - * @len: length of attribute payload + * @attrlen: length of attribute payload * * Reserves room for attribute payload without a header. * -- cgit v1.2.3 From 9a375803feaadb6c34e0807bd9325885dcca5c00 Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Fri, 27 Jun 2008 20:06:08 -0700 Subject: inet fragments: fix race between inet_frag_find and inet_frag_secret_rebuild The problem is that while we work w/o the inet_frags.lock even read-locked the secret rebuild timer may occur (on another CPU, since BHs are still disabled in the inet_frag_find) and change the rnd seed for ipv4/6 fragments. It was caused by my patch fd9e63544cac30a34c951f0ec958038f0529e244 ([INET]: Omit double hash calculations in xxx_frag_intern) late in the 2.6.24 kernel, so this should probably be queued to -stable. Signed-off-by: Pavel Emelyanov Signed-off-by: David S. Miller --- net/ipv4/inet_fragment.c | 16 +++++++++++----- net/ipv4/ip_fragment.c | 2 ++ net/ipv6/netfilter/nf_conntrack_reasm.c | 3 ++- net/ipv6/reassembly.c | 2 ++ 4 files changed, 17 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c index 4ed429bd595..0546a0bc97e 100644 --- a/net/ipv4/inet_fragment.c +++ b/net/ipv4/inet_fragment.c @@ -192,14 +192,21 @@ EXPORT_SYMBOL(inet_frag_evictor); static struct inet_frag_queue *inet_frag_intern(struct netns_frags *nf, struct inet_frag_queue *qp_in, struct inet_frags *f, - unsigned int hash, void *arg) + void *arg) { struct inet_frag_queue *qp; #ifdef CONFIG_SMP struct hlist_node *n; #endif + unsigned int hash; write_lock(&f->lock); + /* + * While we stayed w/o the lock other CPU could update + * the rnd seed, so we need to re-calculate the hash + * chain. Fortunatelly the qp_in can be used to get one. + */ + hash = f->hashfn(qp_in); #ifdef CONFIG_SMP /* With SMP race we have to recheck hash table, because * such entry could be created on other cpu, while we @@ -247,7 +254,7 @@ static struct inet_frag_queue *inet_frag_alloc(struct netns_frags *nf, } static struct inet_frag_queue *inet_frag_create(struct netns_frags *nf, - struct inet_frags *f, void *arg, unsigned int hash) + struct inet_frags *f, void *arg) { struct inet_frag_queue *q; @@ -255,7 +262,7 @@ static struct inet_frag_queue *inet_frag_create(struct netns_frags *nf, if (q == NULL) return NULL; - return inet_frag_intern(nf, q, f, hash, arg); + return inet_frag_intern(nf, q, f, arg); } struct inet_frag_queue *inet_frag_find(struct netns_frags *nf, @@ -264,7 +271,6 @@ struct inet_frag_queue *inet_frag_find(struct netns_frags *nf, struct inet_frag_queue *q; struct hlist_node *n; - read_lock(&f->lock); hlist_for_each_entry(q, n, &f->hash[hash], list) { if (q->net == nf && f->match(q, key)) { atomic_inc(&q->refcnt); @@ -274,6 +280,6 @@ struct inet_frag_queue *inet_frag_find(struct netns_frags *nf, } read_unlock(&f->lock); - return inet_frag_create(nf, f, key, hash); + return inet_frag_create(nf, f, key); } EXPORT_SYMBOL(inet_frag_find); diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index cd6ce6ac635..37221f65915 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -229,6 +229,8 @@ static inline struct ipq *ip_find(struct net *net, struct iphdr *iph, u32 user) arg.iph = iph; arg.user = user; + + read_lock(&ip4_frags.lock); hash = ipqhashfn(iph->id, iph->saddr, iph->daddr, iph->protocol); q = inet_frag_find(&net->ipv4.frags, &ip4_frags, &arg, hash); diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c index e65e26e210e..cf20bc4fd60 100644 --- a/net/ipv6/netfilter/nf_conntrack_reasm.c +++ b/net/ipv6/netfilter/nf_conntrack_reasm.c @@ -207,9 +207,10 @@ fq_find(__be32 id, struct in6_addr *src, struct in6_addr *dst) arg.id = id; arg.src = src; arg.dst = dst; + + read_lock_bh(&nf_frags.lock); hash = ip6qhashfn(id, src, dst); - local_bh_disable(); q = inet_frag_find(&nf_init_frags, &nf_frags, &arg, hash); local_bh_enable(); if (q == NULL) diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c index 798cabc7535..a60d7d12971 100644 --- a/net/ipv6/reassembly.c +++ b/net/ipv6/reassembly.c @@ -247,6 +247,8 @@ fq_find(struct net *net, __be32 id, struct in6_addr *src, struct in6_addr *dst, arg.id = id; arg.src = src; arg.dst = dst; + + read_lock(&ip6_frags.lock); hash = ip6qhashfn(id, src, dst); q = inet_frag_find(&net->ipv6.frags, &ip6_frags, &arg, hash); -- cgit v1.2.3 From 251a4b320f2352598f84e4452ab538aa8064af52 Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Fri, 27 Jun 2008 20:09:00 -0700 Subject: net/inet_lro: remove setting skb->ip_summed when not LRO-able When an SKB cannot be chained to a session, the current code attempts to "restore" its ip_summed field from lro_mgr->ip_summed. However, lro_mgr->ip_summed does not hold the original value; in fact, we'd better not touch skb->ip_summed since it is not modified by the code in the path leading to a failure to chain it. Also use a cleaer comment to the describe the ip_summed field of struct net_lro_mgr. Issue raised by Or Gerlitz Signed-off-by: Eli Cohen Signed-off-by: David S. Miller --- net/ipv4/inet_lro.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'net') diff --git a/net/ipv4/inet_lro.c b/net/ipv4/inet_lro.c index 4a4d49fca1f..cfd034a2b96 100644 --- a/net/ipv4/inet_lro.c +++ b/net/ipv4/inet_lro.c @@ -383,8 +383,7 @@ static int __lro_proc_skb(struct net_lro_mgr *lro_mgr, struct sk_buff *skb, out2: /* send aggregated SKBs to stack */ lro_flush(lro_mgr, lro_desc); -out: /* Original SKB has to be posted to stack */ - skb->ip_summed = lro_mgr->ip_summed; +out: return 1; } -- cgit v1.2.3 From 59d88c00cafe5192b058abf4f3ce17c2e27d1c09 Mon Sep 17 00:00:00 2001 From: Paul Moore Date: Fri, 27 Jun 2008 20:12:32 -0700 Subject: netlabel: Fix a problem when dumping the default IPv6 static labels There is a missing "!" in a conditional statement which is causing entries to be skipped when dumping the default IPv6 static label entries. This can be demonstrated by running the following: # netlabelctl unlbl add default address:::1 \ label:system_u:object_r:unlabeled_t:s0 # netlabelctl -p unlbl list ... you will notice that the entry for the IPv6 localhost address is not displayed but does exist (works correctly, causes collisions when attempting to add duplicate entries, etc.). Signed-off-by: Paul Moore Signed-off-by: David S. Miller --- net/netlabel/netlabel_unlabeled.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/netlabel/netlabel_unlabeled.c b/net/netlabel/netlabel_unlabeled.c index 0099da5b259..52b2611a6eb 100644 --- a/net/netlabel/netlabel_unlabeled.c +++ b/net/netlabel/netlabel_unlabeled.c @@ -1534,7 +1534,7 @@ static int netlbl_unlabel_staticlistdef(struct sk_buff *skb, } } list_for_each_entry_rcu(addr6, &iface->addr6_list, list) { - if (addr6->valid || iter_addr6++ < skip_addr6) + if (!addr6->valid || iter_addr6++ < skip_addr6) continue; if (netlbl_unlabel_staticlist_gen(NLBL_UNLABEL_C_STATICLISTDEF, iface, -- cgit v1.2.3 From d420895efb259a78dda50f95289571faa6e10e41 Mon Sep 17 00:00:00 2001 From: YOSHIFUJI Hideaki Date: Fri, 27 Jun 2008 20:14:54 -0700 Subject: ipv6 route: Convert rt6_device_match() to use RT6_LOOKUP_F_xxx flags. The commit 77d16f450ae0452d7d4b009f78debb1294fb435c ("[IPV6] ROUTE: Unify RT6_F_xxx and RT6_SELECT_F_xxx flags") intended to pass various routing lookup hints around RT6_LOOKUP_F_xxx flags, but conversion was missing for rt6_device_match(). Signed-off-by: YOSHIFUJI Hideaki Signed-off-by: David S. Miller --- net/ipv6/route.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/ipv6/route.c b/net/ipv6/route.c index d1f3e19b06c..7ff687020fa 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -240,7 +240,7 @@ static inline int rt6_need_strict(struct in6_addr *daddr) static inline struct rt6_info *rt6_device_match(struct net *net, struct rt6_info *rt, int oif, - int strict) + int flags) { struct rt6_info *local = NULL; struct rt6_info *sprt; @@ -253,7 +253,7 @@ static inline struct rt6_info *rt6_device_match(struct net *net, if (dev->flags & IFF_LOOPBACK) { if (sprt->rt6i_idev == NULL || sprt->rt6i_idev->dev->ifindex != oif) { - if (strict && oif) + if (flags & RT6_LOOKUP_F_IFACE && oif) continue; if (local && (!oif || local->rt6i_idev->dev->ifindex == oif)) @@ -266,7 +266,7 @@ static inline struct rt6_info *rt6_device_match(struct net *net, if (local) return local; - if (strict) + if (flags & RT6_LOOKUP_F_IFACE) return net->ipv6.ip6_null_entry; } return rt; -- cgit v1.2.3 From 84ebe1cdae56707b9aa1b40ae5aa7d817ba745f5 Mon Sep 17 00:00:00 2001 From: Jozsef Kadlecsik Date: Mon, 30 Jun 2008 12:41:30 -0700 Subject: netfilter: nf_conntrack_tcp: fixing to check the lower bound of valid ACK MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Lost connections was reported by Thomas Bätzler (running 2.6.25 kernel) on the netfilter mailing list (see the thread "Weird nat/conntrack Problem with PASV FTP upload"). He provided tcpdump recordings which helped to find a long lingering bug in conntrack. In TCP connection tracking, checking the lower bound of valid ACK could lead to mark valid packets as INVALID because: - We have got a "higher or equal" inequality, but the test checked the "higher" condition only; fixed. - If the packet contains a SACK option, it could occur that the ACK value was before the left edge of our (S)ACK "window": if a previous packet from the other party intersected the right edge of the window of the receiver, we could move forward the window parameters beyond accepting a valid ack. Therefore in this patch we check the rightmost SACK edge instead of the ACK value in the lower bound of valid (S)ACK test. Signed-off-by: Jozsef Kadlecsik Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/netfilter/nf_conntrack_proto_tcp.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c index ba94004fe32..271cd01d57a 100644 --- a/net/netfilter/nf_conntrack_proto_tcp.c +++ b/net/netfilter/nf_conntrack_proto_tcp.c @@ -331,12 +331,13 @@ static unsigned int get_conntrack_index(const struct tcphdr *tcph) I. Upper bound for valid data: seq <= sender.td_maxend II. Lower bound for valid data: seq + len >= sender.td_end - receiver.td_maxwin - III. Upper bound for valid ack: sack <= receiver.td_end - IV. Lower bound for valid ack: ack >= receiver.td_end - MAXACKWINDOW + III. Upper bound for valid (s)ack: sack <= receiver.td_end + IV. Lower bound for valid (s)ack: sack >= receiver.td_end - MAXACKWINDOW - where sack is the highest right edge of sack block found in the packet. + where sack is the highest right edge of sack block found in the packet + or ack in the case of packet without SACK option. - The upper bound limit for a valid ack is not ignored - + The upper bound limit for a valid (s)ack is not ignored - we doesn't have to deal with fragments. */ @@ -606,12 +607,12 @@ static bool tcp_in_window(const struct nf_conn *ct, before(seq, sender->td_maxend + 1), after(end, sender->td_end - receiver->td_maxwin - 1), before(sack, receiver->td_end + 1), - after(ack, receiver->td_end - MAXACKWINDOW(sender))); + after(sack, receiver->td_end - MAXACKWINDOW(sender) - 1)); if (before(seq, sender->td_maxend + 1) && after(end, sender->td_end - receiver->td_maxwin - 1) && before(sack, receiver->td_end + 1) && - after(ack, receiver->td_end - MAXACKWINDOW(sender))) { + after(sack, receiver->td_end - MAXACKWINDOW(sender) - 1)) { /* * Take into account window scaling (RFC 1323). */ -- cgit v1.2.3 From 23976efedd5ecb420b87455787c537eb4aed1981 Mon Sep 17 00:00:00 2001 From: Emmanuel Grumbach Date: Sat, 28 Jun 2008 02:50:13 +0300 Subject: mac80211: don't accept WEP keys other than WEP40 and WEP104 This patch makes mac80211 refuse a WEP key whose length is not WEP40 nor WEP104. Signed-off-by: Emmanuel Grumbach Signed-off-by: Tomas Winkler Signed-off-by: John W. Linville --- net/mac80211/wext.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'net') diff --git a/net/mac80211/wext.c b/net/mac80211/wext.c index 6106cb79060..e8404212ad5 100644 --- a/net/mac80211/wext.c +++ b/net/mac80211/wext.c @@ -95,6 +95,13 @@ static int ieee80211_set_encryption(struct net_device *dev, u8 *sta_addr, } } + if (alg == ALG_WEP && + key_len != LEN_WEP40 && key_len != LEN_WEP104) { + ieee80211_key_free(key); + err = -EINVAL; + goto out_unlock; + } + ieee80211_key_link(key, sdata, sta); if (set_tx_key || (!sta && !sdata->default_key && key)) -- cgit v1.2.3 From ff31ab56c0e900235f653e375fc3b01ba2d8d6a3 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Tue, 1 Jul 2008 19:52:38 -0700 Subject: net-sched: change tcf_destroy_chain() to clear start of filter list Pass double tcf_proto pointers to tcf_destroy_chain() to make it clear the start of the filter list for more consistency. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/mac80211/wme.c | 3 +-- net/sched/sch_api.c | 6 +++--- net/sched/sch_atm.c | 5 ++--- net/sched/sch_cbq.c | 8 +++----- net/sched/sch_dsmark.c | 2 +- net/sched/sch_hfsc.c | 2 +- net/sched/sch_htb.c | 4 ++-- net/sched/sch_ingress.c | 2 +- net/sched/sch_prio.c | 2 +- net/sched/sch_sfq.c | 2 +- 10 files changed, 16 insertions(+), 20 deletions(-) (limited to 'net') diff --git a/net/mac80211/wme.c b/net/mac80211/wme.c index 635b996c8c3..5d09e8698b5 100644 --- a/net/mac80211/wme.c +++ b/net/mac80211/wme.c @@ -323,8 +323,7 @@ static void wme_qdiscop_destroy(struct Qdisc* qd) struct ieee80211_hw *hw = &local->hw; int queue; - tcf_destroy_chain(q->filter_list); - q->filter_list = NULL; + tcf_destroy_chain(&q->filter_list); for (queue=0; queue < hw->queues; queue++) { skb_queue_purge(&q->requeued[queue]); diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index c40773cdbe4..10f01ad0438 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -1252,12 +1252,12 @@ void tcf_destroy(struct tcf_proto *tp) kfree(tp); } -void tcf_destroy_chain(struct tcf_proto *fl) +void tcf_destroy_chain(struct tcf_proto **fl) { struct tcf_proto *tp; - while ((tp = fl) != NULL) { - fl = tp->next; + while ((tp = *fl) != NULL) { + *fl = tp->next; tcf_destroy(tp); } } diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c index 33527341638..8e5f70ba3a1 100644 --- a/net/sched/sch_atm.c +++ b/net/sched/sch_atm.c @@ -160,7 +160,7 @@ static void atm_tc_put(struct Qdisc *sch, unsigned long cl) *prev = flow->next; pr_debug("atm_tc_put: qdisc %p\n", flow->q); qdisc_destroy(flow->q); - tcf_destroy_chain(flow->filter_list); + tcf_destroy_chain(&flow->filter_list); if (flow->sock) { pr_debug("atm_tc_put: f_count %d\n", file_count(flow->sock->file)); @@ -588,8 +588,7 @@ static void atm_tc_destroy(struct Qdisc *sch) pr_debug("atm_tc_destroy(sch %p,[qdisc %p])\n", sch, p); /* races ? */ while ((flow = p->flows)) { - tcf_destroy_chain(flow->filter_list); - flow->filter_list = NULL; + tcf_destroy_chain(&flow->filter_list); if (flow->ref > 1) printk(KERN_ERR "atm_destroy: %p->ref = %d\n", flow, flow->ref); diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c index 09969c1fbc0..2a3c97f7dc6 100644 --- a/net/sched/sch_cbq.c +++ b/net/sched/sch_cbq.c @@ -1704,7 +1704,7 @@ static void cbq_destroy_class(struct Qdisc *sch, struct cbq_class *cl) BUG_TRAP(!cl->filters); - tcf_destroy_chain(cl->filter_list); + tcf_destroy_chain(&cl->filter_list); qdisc_destroy(cl->q); qdisc_put_rtab(cl->R_tab); gen_kill_estimator(&cl->bstats, &cl->rate_est); @@ -1728,10 +1728,8 @@ cbq_destroy(struct Qdisc* sch) * be bound to classes which have been destroyed already. --TGR '04 */ for (h = 0; h < 16; h++) { - for (cl = q->classes[h]; cl; cl = cl->next) { - tcf_destroy_chain(cl->filter_list); - cl->filter_list = NULL; - } + for (cl = q->classes[h]; cl; cl = cl->next) + tcf_destroy_chain(&cl->filter_list); } for (h = 0; h < 16; h++) { struct cbq_class *next; diff --git a/net/sched/sch_dsmark.c b/net/sched/sch_dsmark.c index 64465bacbe7..c4c1317cd47 100644 --- a/net/sched/sch_dsmark.c +++ b/net/sched/sch_dsmark.c @@ -416,7 +416,7 @@ static void dsmark_destroy(struct Qdisc *sch) pr_debug("dsmark_destroy(sch %p,[qdisc %p])\n", sch, p); - tcf_destroy_chain(p->filter_list); + tcf_destroy_chain(&p->filter_list); qdisc_destroy(p->q); kfree(p->mask); } diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c index fdfaa3fcc16..eca83a3be29 100644 --- a/net/sched/sch_hfsc.c +++ b/net/sched/sch_hfsc.c @@ -1123,7 +1123,7 @@ hfsc_destroy_class(struct Qdisc *sch, struct hfsc_class *cl) { struct hfsc_sched *q = qdisc_priv(sch); - tcf_destroy_chain(cl->filter_list); + tcf_destroy_chain(&cl->filter_list); qdisc_destroy(cl->qdisc); gen_kill_estimator(&cl->bstats, &cl->rate_est); if (cl != &q->root) diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c index 6807c97985a..3fb58f428f7 100644 --- a/net/sched/sch_htb.c +++ b/net/sched/sch_htb.c @@ -1238,7 +1238,7 @@ static void htb_destroy_class(struct Qdisc *sch, struct htb_class *cl) qdisc_put_rtab(cl->rate); qdisc_put_rtab(cl->ceil); - tcf_destroy_chain(cl->filter_list); + tcf_destroy_chain(&cl->filter_list); while (!list_empty(&cl->children)) htb_destroy_class(sch, list_entry(cl->children.next, @@ -1267,7 +1267,7 @@ static void htb_destroy(struct Qdisc *sch) and surprisingly it worked in 2.4. But it must precede it because filter need its target class alive to be able to call unbind_filter on it (without Oops). */ - tcf_destroy_chain(q->filter_list); + tcf_destroy_chain(&q->filter_list); while (!list_empty(&q->root)) htb_destroy_class(sch, list_entry(q->root.next, diff --git a/net/sched/sch_ingress.c b/net/sched/sch_ingress.c index 274b1ddb160..956c80ad596 100644 --- a/net/sched/sch_ingress.c +++ b/net/sched/sch_ingress.c @@ -104,7 +104,7 @@ static void ingress_destroy(struct Qdisc *sch) { struct ingress_qdisc_data *p = qdisc_priv(sch); - tcf_destroy_chain(p->filter_list); + tcf_destroy_chain(&p->filter_list); } static int ingress_dump(struct Qdisc *sch, struct sk_buff *skb) diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c index 4aa2b45dad0..5532f1031ab 100644 --- a/net/sched/sch_prio.c +++ b/net/sched/sch_prio.c @@ -219,7 +219,7 @@ prio_destroy(struct Qdisc* sch) int prio; struct prio_sched_data *q = qdisc_priv(sch); - tcf_destroy_chain(q->filter_list); + tcf_destroy_chain(&q->filter_list); for (prio=0; priobands; prio++) qdisc_destroy(q->queues[prio]); } diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c index f0463d757a9..6a97afbfb95 100644 --- a/net/sched/sch_sfq.c +++ b/net/sched/sch_sfq.c @@ -520,7 +520,7 @@ static void sfq_destroy(struct Qdisc *sch) { struct sfq_sched_data *q = qdisc_priv(sch); - tcf_destroy_chain(q->filter_list); + tcf_destroy_chain(&q->filter_list); q->perturb_period = 0; del_timer_sync(&q->perturb_timer); } -- cgit v1.2.3 From a4aebb83cf0da0363684f1c339f7e6149a3e74c1 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Tue, 1 Jul 2008 19:53:09 -0700 Subject: net-sched: fix filter destruction in atm/hfsc qdisc destruction Filters need to be destroyed before beginning to destroy classes since the destination class needs to still be alive to unbind the filter. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/sched/sch_atm.c | 4 +++- net/sched/sch_hfsc.c | 4 ++++ 2 files changed, 7 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c index 8e5f70ba3a1..db0e23ae85f 100644 --- a/net/sched/sch_atm.c +++ b/net/sched/sch_atm.c @@ -586,9 +586,11 @@ static void atm_tc_destroy(struct Qdisc *sch) struct atm_flow_data *flow; pr_debug("atm_tc_destroy(sch %p,[qdisc %p])\n", sch, p); + for (flow = p->flows; flow; flow = flow->next) + tcf_destroy_chain(&flow->filter_list); + /* races ? */ while ((flow = p->flows)) { - tcf_destroy_chain(&flow->filter_list); if (flow->ref > 1) printk(KERN_ERR "atm_destroy: %p->ref = %d\n", flow, flow->ref); diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c index eca83a3be29..e817aa00441 100644 --- a/net/sched/sch_hfsc.c +++ b/net/sched/sch_hfsc.c @@ -1540,6 +1540,10 @@ hfsc_destroy_qdisc(struct Qdisc *sch) struct hfsc_class *cl, *next; unsigned int i; + for (i = 0; i < HFSC_HSIZE; i++) { + list_for_each_entry(cl, &q->clhash[i], hlist) + tcf_destroy_chain(&cl->filter_list); + } for (i = 0; i < HFSC_HSIZE; i++) { list_for_each_entry_safe(cl, next, &q->clhash[i], hlist) hfsc_destroy_class(sch, cl); -- cgit v1.2.3 From 8487460720fd03a0f4ecd032f017b0a8468028da Mon Sep 17 00:00:00 2001 From: Wang Chen Date: Tue, 1 Jul 2008 19:55:09 -0700 Subject: netlink: Unneeded local variable We already have a variable, which has the same capability. Signed-off-by: Wang Chen Signed-off-by: David S. Miller --- net/netlink/af_netlink.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 9b97f8006c9..349aba18955 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -886,7 +886,7 @@ retry: return netlink_unicast_kernel(sk, skb); if (sk_filter(sk, skb)) { - int err = skb->len; + err = skb->len; kfree_skb(skb); sock_put(sk); return err; -- cgit v1.2.3 From 8fde8a076940969d32805b853efdce8b988d7dda Mon Sep 17 00:00:00 2001 From: Wang Chen Date: Tue, 1 Jul 2008 19:55:40 -0700 Subject: net: Tyop of sk_filter() comment Parameter "needlock" no long exists. Signed-off-by: Wang Chen Signed-off-by: David S. Miller --- net/core/filter.c | 1 - 1 file changed, 1 deletion(-) (limited to 'net') diff --git a/net/core/filter.c b/net/core/filter.c index 4f8369729a4..df374435583 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -68,7 +68,6 @@ static inline void *load_pointer(struct sk_buff *skb, int k, * sk_filter - run a packet through a socket filter * @sk: sock associated with &sk_buff * @skb: buffer to filter - * @needlock: set to 1 if the sock is not locked by caller. * * Run the filter code and then cut skb->data to correct size returned by * sk_run_filter. If pkt_len is 0 we toss packet. If skb->len is smaller -- cgit v1.2.3 From 93b3cff9915322d6fa36bac0064714a7076230e4 Mon Sep 17 00:00:00 2001 From: Wang Chen Date: Tue, 1 Jul 2008 19:57:19 -0700 Subject: netdevice: Fix wrong string handle in kernel command line parsing v1->v2: Use strlcpy() to ensure s[i].name be null-termination. 1. In netdev_boot_setup_add(), a long name will leak. ex. : dev=21,0x1234,0x1234,0x2345,eth123456789verylongname......... 2. In netdev_boot_setup_check(), mismatch will happen if s[i].name is a substring of dev->name. ex. : dev=...eth1 dev=...eth11 [ With feedback from Ben Hutchings. ] Signed-off-by: Wang Chen Signed-off-by: David S. Miller --- net/core/dev.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index 56b46579ff4..fca23a3bf12 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -454,7 +454,7 @@ static int netdev_boot_setup_add(char *name, struct ifmap *map) for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) { if (s[i].name[0] == '\0' || s[i].name[0] == ' ') { memset(s[i].name, 0, sizeof(s[i].name)); - strcpy(s[i].name, name); + strlcpy(s[i].name, name, IFNAMSIZ); memcpy(&s[i].map, map, sizeof(s[i].map)); break; } @@ -479,7 +479,7 @@ int netdev_boot_setup_check(struct net_device *dev) for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) { if (s[i].name[0] != '\0' && s[i].name[0] != ' ' && - !strncmp(dev->name, s[i].name, strlen(s[i].name))) { + !strcmp(dev->name, s[i].name)) { dev->irq = s[i].map.irq; dev->base_addr = s[i].map.base_addr; dev->mem_start = s[i].map.mem_start; -- cgit v1.2.3 From 2fe195cfe3e53c144d247b2768e37732e8eae4d8 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Tue, 1 Jul 2008 19:59:37 -0700 Subject: net: fib_rules: fix error code for unsupported families The errno code returned must be negative. Fixes "RTNETLINK answers: Unknown error 18446744073709551519". Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/core/fib_rules.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c index e3e9ab0f74e..277a2302eb3 100644 --- a/net/core/fib_rules.c +++ b/net/core/fib_rules.c @@ -226,7 +226,7 @@ static int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) ops = lookup_rules_ops(net, frh->family); if (ops == NULL) { - err = EAFNOSUPPORT; + err = -EAFNOSUPPORT; goto errout; } @@ -365,7 +365,7 @@ static int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) ops = lookup_rules_ops(net, frh->family); if (ops == NULL) { - err = EAFNOSUPPORT; + err = -EAFNOSUPPORT; goto errout; } -- cgit v1.2.3 From 81b23b4a7acd9b37a269c62d02479d4f645dd20a Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Thu, 3 Jul 2008 03:22:02 -0700 Subject: tcp: net/ipv4/tcp.c needs linux/scatterlist.h alpha: net/ipv4/tcp.c: In function 'tcp_calc_md5_hash': net/ipv4/tcp.c:2479: error: implicit declaration of function 'sg_init_table' net/ipv4/tcp.c:2482: error: implicit declaration of function 'sg_set_buf' net/ipv4/tcp.c:2507: error: implicit declaration of function 'sg_mark_end' Signed-off-by: Andrew Morton Signed-off-by: David S. Miller --- net/ipv4/tcp.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 850825dc86e..de53024664e 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -255,6 +255,7 @@ #include #include #include +#include #include #include #include -- cgit v1.2.3 From 374e7b59498ce0785b3727794b351221528a5159 Mon Sep 17 00:00:00 2001 From: Octavian Purdila Date: Thu, 3 Jul 2008 03:31:21 -0700 Subject: tcp: fix a size_t < 0 comparison in tcp_read_sock should be of type int (not size_t) since recv_actor can return negative values and it is also used in a < 0 comparison. Signed-off-by: Octavian Purdila Signed-off-by: David S. Miller --- net/ipv4/tcp.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index de53024664e..1d723de1868 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -1209,7 +1209,8 @@ int tcp_read_sock(struct sock *sk, read_descriptor_t *desc, return -ENOTCONN; while ((skb = tcp_recv_skb(sk, seq, &offset)) != NULL) { if (offset < skb->len) { - size_t used, len; + int used; + size_t len; len = skb->len - offset; /* Stop reading if we hit a patch of urgent data */ -- cgit v1.2.3 From ab1b20467cd2214ad89a95d007047cd2a6b5bf5d Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Thu, 3 Jul 2008 03:53:42 -0700 Subject: bridge: fix use-after-free in br_cleanup_bridges() Unregistering a bridge device may cause virtual devices stacked on the bridge, like vlan or macvlan devices, to be unregistered as well. br_cleanup_bridges() uses for_each_netdev_safe() to iterate over all devices during cleanup. This is not enough however, if one of the additionally unregistered devices is next in the list to the bridge device, it will get freed as well and the iteration continues on the freed element. Restart iteration after each bridge device removal from the beginning to fix this, similar to what rtnl_link_unregister() does. Signed-off-by: Patrick McHardy Acked-by: Stephen Hemminger Signed-off-by: David S. Miller --- net/bridge/br_if.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c index c2397f503b0..f38cc5317b8 100644 --- a/net/bridge/br_if.c +++ b/net/bridge/br_if.c @@ -442,12 +442,16 @@ int br_del_if(struct net_bridge *br, struct net_device *dev) void __exit br_cleanup_bridges(void) { - struct net_device *dev, *nxt; + struct net_device *dev; rtnl_lock(); - for_each_netdev_safe(&init_net, dev, nxt) - if (dev->priv_flags & IFF_EBRIDGE) +restart: + for_each_netdev(&init_net, dev) { + if (dev->priv_flags & IFF_EBRIDGE) { del_br(dev->priv); + goto restart; + } + } rtnl_unlock(); } -- cgit v1.2.3 From b620754bfeb8b0e0c6622b03d5ee2f1af1d3082f Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Thu, 3 Jul 2008 15:26:35 -0400 Subject: svcrpc: fix handling of garbage args To return garbage_args, the accept_stat must be 0, and we must have a verifier. So we shouldn't be resetting the write pointer as we reject the call. Also, we must add the two placeholder words here regardless of success of the unwrap, to ensure the output buffer is left in a consistent state for svcauth_gss_release(). This fixes a BUG() in svcauth_gss.c:svcauth_gss_release(). Thanks to Aime Le Rouzic for bug report, debugging help, and testing. Signed-off-by: J. Bruce Fields Tested-by: Aime Le Rouzic Signed-off-by: Linus Torvalds --- net/sunrpc/auth_gss/svcauth_gss.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c index 5905d56737d..81ae3d62a0c 100644 --- a/net/sunrpc/auth_gss/svcauth_gss.c +++ b/net/sunrpc/auth_gss/svcauth_gss.c @@ -1144,20 +1144,20 @@ svcauth_gss_accept(struct svc_rqst *rqstp, __be32 *authp) case RPC_GSS_SVC_NONE: break; case RPC_GSS_SVC_INTEGRITY: + /* placeholders for length and seq. number: */ + svc_putnl(resv, 0); + svc_putnl(resv, 0); if (unwrap_integ_data(&rqstp->rq_arg, gc->gc_seq, rsci->mechctx)) goto garbage_args; + break; + case RPC_GSS_SVC_PRIVACY: /* placeholders for length and seq. number: */ svc_putnl(resv, 0); svc_putnl(resv, 0); - break; - case RPC_GSS_SVC_PRIVACY: if (unwrap_priv_data(rqstp, &rqstp->rq_arg, gc->gc_seq, rsci->mechctx)) goto garbage_args; - /* placeholders for length and seq. number: */ - svc_putnl(resv, 0); - svc_putnl(resv, 0); break; default: goto auth_err; @@ -1170,8 +1170,6 @@ svcauth_gss_accept(struct svc_rqst *rqstp, __be32 *authp) goto out; } garbage_args: - /* Restore write pointer to its original value: */ - xdr_ressize_check(rqstp, reject_stat); ret = SVC_GARBAGE; goto out; auth_err: -- cgit v1.2.3 From 7f2d38eb7a42bea1c1df51bbdaa2ca0f0bdda07f Mon Sep 17 00:00:00 2001 From: Oliver Hartkopp Date: Sat, 5 Jul 2008 23:38:43 -0700 Subject: can: add sanity checks Even though the CAN netlayer only deals with CAN netdevices, the netlayer interface to the userspace and to the device layer should perform some sanity checks. This patch adds several sanity checks that mainly prevent userspace apps to send broken content into the system that may be misinterpreted by some other userspace application. Signed-off-by: Oliver Hartkopp Signed-off-by: Urs Thuermann Acked-by: Andre Naujoks Signed-off-by: David S. Miller --- net/can/af_can.c | 10 ++++++++++ net/can/bcm.c | 23 +++++++++++++++++++---- net/can/raw.c | 3 +++ 3 files changed, 32 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/can/af_can.c b/net/can/af_can.c index 7e8ca283645..484bbf6dd03 100644 --- a/net/can/af_can.c +++ b/net/can/af_can.c @@ -205,12 +205,19 @@ static int can_create(struct net *net, struct socket *sock, int protocol) * -ENOBUFS on full driver queue (see net_xmit_errno()) * -ENOMEM when local loopback failed at calling skb_clone() * -EPERM when trying to send on a non-CAN interface + * -EINVAL when the skb->data does not contain a valid CAN frame */ int can_send(struct sk_buff *skb, int loop) { struct sk_buff *newskb = NULL; + struct can_frame *cf = (struct can_frame *)skb->data; int err; + if (skb->len != sizeof(struct can_frame) || cf->can_dlc > 8) { + kfree_skb(skb); + return -EINVAL; + } + if (skb->dev->type != ARPHRD_CAN) { kfree_skb(skb); return -EPERM; @@ -605,6 +612,7 @@ static int can_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev) { struct dev_rcv_lists *d; + struct can_frame *cf = (struct can_frame *)skb->data; int matches; if (dev->type != ARPHRD_CAN || dev_net(dev) != &init_net) { @@ -612,6 +620,8 @@ static int can_rcv(struct sk_buff *skb, struct net_device *dev, return 0; } + BUG_ON(skb->len != sizeof(struct can_frame) || cf->can_dlc > 8); + /* update statistics */ can_stats.rx_frames++; can_stats.rx_frames_delta++; diff --git a/net/can/bcm.c b/net/can/bcm.c index d9a3a9d13be..72c2ce904f8 100644 --- a/net/can/bcm.c +++ b/net/can/bcm.c @@ -298,7 +298,7 @@ static void bcm_send_to_user(struct bcm_op *op, struct bcm_msg_head *head, if (head->nframes) { /* can_frames starting here */ - firstframe = (struct can_frame *) skb_tail_pointer(skb); + firstframe = (struct can_frame *)skb_tail_pointer(skb); memcpy(skb_put(skb, datalen), frames, datalen); @@ -826,6 +826,10 @@ static int bcm_tx_setup(struct bcm_msg_head *msg_head, struct msghdr *msg, for (i = 0; i < msg_head->nframes; i++) { err = memcpy_fromiovec((u8 *)&op->frames[i], msg->msg_iov, CFSIZ); + + if (op->frames[i].can_dlc > 8) + err = -EINVAL; + if (err < 0) return err; @@ -858,6 +862,10 @@ static int bcm_tx_setup(struct bcm_msg_head *msg_head, struct msghdr *msg, for (i = 0; i < msg_head->nframes; i++) { err = memcpy_fromiovec((u8 *)&op->frames[i], msg->msg_iov, CFSIZ); + + if (op->frames[i].can_dlc > 8) + err = -EINVAL; + if (err < 0) { if (op->frames != &op->sframe) kfree(op->frames); @@ -1164,9 +1172,12 @@ static int bcm_tx_send(struct msghdr *msg, int ifindex, struct sock *sk) skb->dev = dev; skb->sk = sk; - can_send(skb, 1); /* send with loopback */ + err = can_send(skb, 1); /* send with loopback */ dev_put(dev); + if (err) + return err; + return CFSIZ + MHSIZ; } @@ -1185,6 +1196,10 @@ static int bcm_sendmsg(struct kiocb *iocb, struct socket *sock, if (!bo->bound) return -ENOTCONN; + /* check for valid message length from userspace */ + if (size < MHSIZ || (size - MHSIZ) % CFSIZ) + return -EINVAL; + /* check for alternative ifindex for this bcm_op */ if (!ifindex && msg->msg_name) { @@ -1259,8 +1274,8 @@ static int bcm_sendmsg(struct kiocb *iocb, struct socket *sock, break; case TX_SEND: - /* we need at least one can_frame */ - if (msg_head.nframes < 1) + /* we need exactly one can_frame behind the msg head */ + if ((msg_head.nframes != 1) || (size != CFSIZ + MHSIZ)) ret = -EINVAL; else ret = bcm_tx_send(msg, ifindex, sk); diff --git a/net/can/raw.c b/net/can/raw.c index 69877b8e7e9..3e46ee36a1a 100644 --- a/net/can/raw.c +++ b/net/can/raw.c @@ -632,6 +632,9 @@ static int raw_sendmsg(struct kiocb *iocb, struct socket *sock, } else ifindex = ro->ifindex; + if (size != sizeof(struct can_frame)) + return -EINVAL; + dev = dev_get_by_index(&init_net, ifindex); if (!dev) return -ENXIO; -- cgit v1.2.3 From 8db9369ff92b1cd93566baadd8bd2992f025fdd0 Mon Sep 17 00:00:00 2001 From: Guy Cohen Date: Thu, 3 Jul 2008 19:56:13 +0300 Subject: mac80211: move netif_carrier_on to after ieee80211_bss_info_change_notify Putting netif_carrier_on before configuring the driver/device with the new association state may cause a race (tx frames may be sent before configuration is done) Signed-off-by: Guy Cohen Signed-off-by: Tomas Winkler Signed-off-by: John W. Linville --- net/mac80211/mlme.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 4d2b582dd05..a7018540ae8 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -547,15 +547,14 @@ static void ieee80211_set_associated(struct net_device *dev, sdata->bss_conf.ht_bss_conf = &conf->ht_bss_conf; } - netif_carrier_on(dev); ifsta->flags |= IEEE80211_STA_PREV_BSSID_SET; memcpy(ifsta->prev_bssid, sdata->u.sta.bssid, ETH_ALEN); memcpy(wrqu.ap_addr.sa_data, sdata->u.sta.bssid, ETH_ALEN); ieee80211_sta_send_associnfo(dev, ifsta); } else { + netif_carrier_off(dev); ieee80211_sta_tear_down_BA_sessions(dev, ifsta->bssid); ifsta->flags &= ~IEEE80211_STA_ASSOCIATED; - netif_carrier_off(dev); ieee80211_reset_erp_info(dev); sdata->bss_conf.assoc_ht = 0; @@ -569,6 +568,10 @@ static void ieee80211_set_associated(struct net_device *dev, sdata->bss_conf.assoc = assoc; ieee80211_bss_info_change_notify(sdata, changed); + + if (assoc) + netif_carrier_on(dev); + wrqu.ap_addr.sa_family = ARPHRD_ETHER; wireless_send_event(dev, SIOCGIWAP, &wrqu, NULL); } -- cgit v1.2.3 From ea0c925370b33baf168bb33782c613468c1aa119 Mon Sep 17 00:00:00 2001 From: Ivo van Doorn Date: Thu, 3 Jul 2008 19:02:44 +0200 Subject: mac80211: Only flush workqueue when last interface was removed Currently the ieee80211_hw->workqueue is flushed each time an interface is being removed. However most scheduled work is not interface specific but device specific, for example things like periodic work for link tuners. This patch will move the flush_workqueue() call to directly behind the call to ops->stop() to make sure the workqueue is only flushed when all interfaces are gone and there really shouldn't be any scheduled work in the drivers left. Signed-off-by: Ivo van Doorn Acked-by: Johannes Berg Signed-off-by: John W. Linville --- net/mac80211/main.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/mac80211/main.c b/net/mac80211/main.c index 98c0b5e56ec..df0836ff1a2 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -530,8 +530,6 @@ static int ieee80211_stop(struct net_device *dev) local->sta_hw_scanning = 0; } - flush_workqueue(local->hw.workqueue); - sdata->u.sta.flags &= ~IEEE80211_STA_PRIVACY_INVOKED; kfree(sdata->u.sta.extra_ie); sdata->u.sta.extra_ie = NULL; @@ -555,6 +553,8 @@ static int ieee80211_stop(struct net_device *dev) ieee80211_led_radio(local, 0); + flush_workqueue(local->hw.workqueue); + tasklet_disable(&local->tx_pending_tasklet); tasklet_disable(&local->tasklet); } -- cgit v1.2.3 From 6e43829bb69bf1d584a592075f1357590eb49b1a Mon Sep 17 00:00:00 2001 From: Vladimir Koutny Date: Mon, 7 Jul 2008 14:23:01 +0200 Subject: mac80211: don't report selected IBSS when not found Don't report a 'selected' IBSS in sta_find_ibss when none was found. Signed-off-by: Vladimir Koutny Signed-off-by: John W. Linville --- net/mac80211/mlme.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index a7018540ae8..b404537c0bc 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -3614,8 +3614,10 @@ static int ieee80211_sta_find_ibss(struct net_device *dev, spin_unlock_bh(&local->sta_bss_lock); #ifdef CONFIG_MAC80211_IBSS_DEBUG - printk(KERN_DEBUG " sta_find_ibss: selected %s current " - "%s\n", print_mac(mac, bssid), print_mac(mac2, ifsta->bssid)); + if (found) + printk(KERN_DEBUG " sta_find_ibss: selected %s current " + "%s\n", print_mac(mac, bssid), + print_mac(mac2, ifsta->bssid)); #endif /* CONFIG_MAC80211_IBSS_DEBUG */ if (found && memcmp(ifsta->bssid, bssid, ETH_ALEN) != 0 && (bss = ieee80211_rx_bss_get(dev, bssid, -- cgit v1.2.3 From 3888e9efc9bf05e60504d2a420be7a527ff43678 Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Tue, 8 Jul 2008 02:28:39 -0700 Subject: sctp: Mark the tsn as received after all allocations finish If we don't have the buffer space or memory allocations fail, the data chunk is dropped, but TSN is still reported as received. This introduced a data loss that can't be recovered. We should only mark TSNs are received after memory allocations finish. The one exception is the invalid stream identifier, but that's due to user error and is reported back to the user. This was noticed by Michael Tuexen. Signed-off-by: Vlad Yasevich Signed-off-by: David S. Miller --- net/sctp/sm_statefuns.c | 9 +++------ net/sctp/ulpevent.c | 5 +++++ 2 files changed, 8 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c index 0c9d5a6950f..fcdb45d1071 100644 --- a/net/sctp/sm_statefuns.c +++ b/net/sctp/sm_statefuns.c @@ -5899,12 +5899,6 @@ static int sctp_eat_data(const struct sctp_association *asoc, return SCTP_IERROR_NO_DATA; } - /* If definately accepting the DATA chunk, record its TSN, otherwise - * wait for renege processing. - */ - if (SCTP_CMD_CHUNK_ULP == deliver) - sctp_add_cmd_sf(commands, SCTP_CMD_REPORT_TSN, SCTP_U32(tsn)); - chunk->data_accepted = 1; /* Note: Some chunks may get overcounted (if we drop) or overcounted @@ -5924,6 +5918,9 @@ static int sctp_eat_data(const struct sctp_association *asoc, * and discard the DATA chunk. */ if (ntohs(data_hdr->stream) >= asoc->c.sinit_max_instreams) { + /* Mark tsn as received even though we drop it */ + sctp_add_cmd_sf(commands, SCTP_CMD_REPORT_TSN, SCTP_U32(tsn)); + err = sctp_make_op_error(asoc, chunk, SCTP_ERROR_INV_STRM, &data_hdr->stream, sizeof(data_hdr->stream)); diff --git a/net/sctp/ulpevent.c b/net/sctp/ulpevent.c index ce6cda6b699..a1f654aea26 100644 --- a/net/sctp/ulpevent.c +++ b/net/sctp/ulpevent.c @@ -710,6 +710,11 @@ struct sctp_ulpevent *sctp_ulpevent_make_rcvmsg(struct sctp_association *asoc, if (!skb) goto fail; + /* Now that all memory allocations for this chunk succeeded, we + * can mark it as received so the tsn_map is updated correctly. + */ + sctp_tsnmap_mark(&asoc->peer.tsn_map, ntohl(chunk->subh.data_hdr->tsn)); + /* First calculate the padding, so we don't inadvertently * pass up the wrong length to the user. * -- cgit v1.2.3 From 07035fc1bbf931a06e47583cddd2cea2907ac0db Mon Sep 17 00:00:00 2001 From: Julius Volz Date: Tue, 8 Jul 2008 03:07:43 -0700 Subject: irda: Fix netlink error path return value Fix an incorrect return value check of genlmsg_put() in irda_nl_get_mode(). genlmsg_put() does not use ERR_PTR() to encode return values, it just returns NULL on error. Signed-off-by: Julius Volz Signed-off-by: Samuel Ortiz Signed-off-by: David S. Miller --- net/irda/irnetlink.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/irda/irnetlink.c b/net/irda/irnetlink.c index 9e1fb82e322..2f05ec1037a 100644 --- a/net/irda/irnetlink.c +++ b/net/irda/irnetlink.c @@ -101,8 +101,8 @@ static int irda_nl_get_mode(struct sk_buff *skb, struct genl_info *info) hdr = genlmsg_put(msg, info->snd_pid, info->snd_seq, &irda_nl_family, 0, IRDA_NL_CMD_GET_MODE); - if (IS_ERR(hdr)) { - ret = PTR_ERR(hdr); + if (hdr == NULL) { + ret = -EMSGSIZE; goto err_out; } -- cgit v1.2.3 From 0d3a34b48c87a374b37d7a21a60d257d076484f3 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 7 Jul 2008 12:18:52 -0400 Subject: SUNRPC: Fix a double-free in rpcbind It is wrong to be freeing up the rpcbind arguments if the call to rpcb_call_async() fails, since they should already have been freed up by rpcb_map_release(). Signed-off-by: Trond Myklebust --- net/sunrpc/rpcb_clnt.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c index 0517967a68b..21c698d7b77 100644 --- a/net/sunrpc/rpcb_clnt.c +++ b/net/sunrpc/rpcb_clnt.c @@ -365,18 +365,16 @@ void rpcb_getport_async(struct rpc_task *task) rpc_release_client(rpcb_clnt); if (IS_ERR(child)) { status = -EIO; + /* rpcb_map_release() has freed the arguments */ dprintk("RPC: %5u %s: rpc_run_task failed\n", task->tk_pid, __func__); - goto bailout; + goto bailout_nofree; } rpc_put_task(child); task->tk_xprt->stat.bind_count++; return; -bailout: - kfree(map); - xprt_put(xprt); bailout_nofree: rpcb_wake_rpcbind_waiters(xprt, status); bailout_nowake: -- cgit v1.2.3 From 803a9067e19714ea7b7da760fe92f0d53bfa6994 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 1 Jul 2008 15:20:55 -0400 Subject: SUNRPC: Fix an rpcbind breakage for the case of IPv6 lookups Now that rpcb_next_version has been split into an IPv4 version and an IPv6 version, we Oops when rpcb_call_async attempts to look up the IPv6-specific RPC procedure in rpcb_next_version. Fix the Oops simply by having rpcb_getport_async pass the correct RPC procedure as an argument. Signed-off-by: Trond Myklebust --- net/sunrpc/rpcb_clnt.c | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) (limited to 'net') diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c index 21c698d7b77..e6fb21b19b8 100644 --- a/net/sunrpc/rpcb_clnt.c +++ b/net/sunrpc/rpcb_clnt.c @@ -243,10 +243,10 @@ int rpcb_getport_sync(struct sockaddr_in *sin, u32 prog, u32 vers, int prot) } EXPORT_SYMBOL_GPL(rpcb_getport_sync); -static struct rpc_task *rpcb_call_async(struct rpc_clnt *rpcb_clnt, struct rpcbind_args *map, int version) +static struct rpc_task *rpcb_call_async(struct rpc_clnt *rpcb_clnt, struct rpcbind_args *map, struct rpc_procinfo *proc) { struct rpc_message msg = { - .rpc_proc = rpcb_next_version[version].rpc_proc, + .rpc_proc = proc, .rpc_argp = map, .rpc_resp = &map->r_port, }; @@ -271,6 +271,7 @@ static struct rpc_task *rpcb_call_async(struct rpc_clnt *rpcb_clnt, struct rpcbi void rpcb_getport_async(struct rpc_task *task) { struct rpc_clnt *clnt = task->tk_client; + struct rpc_procinfo *proc; u32 bind_version; struct rpc_xprt *xprt = task->tk_xprt; struct rpc_clnt *rpcb_clnt; @@ -280,7 +281,6 @@ void rpcb_getport_async(struct rpc_task *task) struct sockaddr *sap = (struct sockaddr *)&addr; size_t salen; int status; - struct rpcb_info *info; dprintk("RPC: %5u %s(%s, %u, %u, %d)\n", task->tk_pid, __func__, @@ -313,10 +313,12 @@ void rpcb_getport_async(struct rpc_task *task) /* Don't ever use rpcbind v2 for AF_INET6 requests */ switch (sap->sa_family) { case AF_INET: - info = rpcb_next_version; + proc = rpcb_next_version[xprt->bind_index].rpc_proc; + bind_version = rpcb_next_version[xprt->bind_index].rpc_vers; break; case AF_INET6: - info = rpcb_next_version6; + proc = rpcb_next_version6[xprt->bind_index].rpc_proc; + bind_version = rpcb_next_version6[xprt->bind_index].rpc_vers; break; default: status = -EAFNOSUPPORT; @@ -324,14 +326,13 @@ void rpcb_getport_async(struct rpc_task *task) task->tk_pid, __func__); goto bailout_nofree; } - if (info[xprt->bind_index].rpc_proc == NULL) { + if (proc == NULL) { xprt->bind_index = 0; status = -EPFNOSUPPORT; dprintk("RPC: %5u %s: no more getport versions available\n", task->tk_pid, __func__); goto bailout_nofree; } - bind_version = info[xprt->bind_index].rpc_vers; dprintk("RPC: %5u %s: trying rpcbind version %u\n", task->tk_pid, __func__, bind_version); @@ -361,7 +362,7 @@ void rpcb_getport_async(struct rpc_task *task) map->r_addr = rpc_peeraddr2str(rpcb_clnt, RPC_DISPLAY_UNIVERSAL_ADDR); map->r_owner = RPCB_OWNER_STRING; /* ignored for GETADDR */ - child = rpcb_call_async(rpcb_clnt, map, xprt->bind_index); + child = rpcb_call_async(rpcb_clnt, map, proc); rpc_release_client(rpcb_clnt); if (IS_ERR(child)) { status = -EIO; -- cgit v1.2.3 From b2238566401f01eb796e75750213c7b0fce396b2 Mon Sep 17 00:00:00 2001 From: Andrey Vagin Date: Tue, 8 Jul 2008 15:13:31 -0700 Subject: ipv6: fix race between ipv6_del_addr and DAD timer Consider the following scenario: ipv6_del_addr(ifp) ipv6_ifa_notify(RTM_DELADDR, ifp) ip6_del_rt(ifp->rt) after returning from the ipv6_ifa_notify and enabling BH-s back, but *before* calling the addrconf_del_timer the ifp->timer fires and: addrconf_dad_timer(ifp) addrconf_dad_completed(ifp) ipv6_ifa_notify(RTM_NEWADDR, ifp) ip6_ins_rt(ifp->rt) then return back to the ipv6_del_addr and: in6_ifa_put(ifp) inet6_ifa_finish_destroy(ifp) dst_release(&ifp->rt->u.dst) After this we have an ifp->rt inserted into fib6 lists, but queued for gc, which in turn can result in oopses in the fib6_run_gc. Maybe some other nasty things, but we caught only the oops in gc so far. The solution is to disarm the ifp->timer before flushing the rt from it. Signed-off-by: Andrey Vagin Signed-off-by: Pavel Emelyanov Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 147588f4c7c..ff61a5cdb0b 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -749,12 +749,12 @@ static void ipv6_del_addr(struct inet6_ifaddr *ifp) } write_unlock_bh(&idev->lock); + addrconf_del_timer(ifp); + ipv6_ifa_notify(RTM_DELADDR, ifp); atomic_notifier_call_chain(&inet6addr_chain, NETDEV_DOWN, ifp); - addrconf_del_timer(ifp); - /* * Purge or update corresponding prefix * -- cgit v1.2.3 From 8b39f2b41033754e7ba669503d27268beb1b524a Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 14 May 2008 19:48:25 -0700 Subject: SUNRPC: Ensure we exit early in case of an encode error All errors from call_encode(), with exception of EAGAIN are fatal, so we should immediately return instead of proceeding to xprt_transmit(). Signed-off-by: Trond Myklebust --- net/sunrpc/clnt.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 8945307556e..9503b4c177d 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -890,7 +890,6 @@ call_encode(struct rpc_task *task) task->tk_msg.rpc_argp); if (task->tk_status == -ENOMEM) { /* XXX: Is this sane? */ - rpc_delay(task, 3*HZ); task->tk_status = -EAGAIN; } } @@ -1048,8 +1047,14 @@ call_transmit(struct rpc_task *task) BUG_ON(task->tk_rqstp->rq_bytes_sent != 0); call_encode(task); /* Did the encode result in an error condition? */ - if (task->tk_status != 0) + if (task->tk_status != 0) { + /* Was the error nonfatal? */ + if (task->tk_status == -EAGAIN) + rpc_delay(task, HZ >> 4); + else + rpc_exit(task, task->tk_status); return; + } } xprt_transmit(task); if (task->tk_status < 0) -- cgit v1.2.3 From b390c2b55c830eb3b64633fa8d8b8837e073e458 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 10 Jun 2008 18:30:11 -0400 Subject: SUNRPC: An ENOMEM error from call_encode is always fatal The special 'ENOMEM' case that was previously flagged as non-fatal is bogus: auth_gss always returns EAGAIN for non-fatal errors, and may in fact return ENOMEM in the special case where xdr_buf_read_netobj runs out of preallocated buffer space (invariably a _fatal_ error, since there is no provision for preallocating larger buffers). Signed-off-by: Trond Myklebust --- net/sunrpc/clnt.c | 4 ---- 1 file changed, 4 deletions(-) (limited to 'net') diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 9503b4c177d..1af4f161cda 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -888,10 +888,6 @@ call_encode(struct rpc_task *task) task->tk_status = rpcauth_wrap_req(task, encode, req, p, task->tk_msg.rpc_argp); - if (task->tk_status == -ENOMEM) { - /* XXX: Is this sane? */ - task->tk_status = -EAGAIN; - } } /* -- cgit v1.2.3 From 0f38b873aeaae698c3693748438547c8493165fb Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 10 Jun 2008 18:31:01 -0400 Subject: SUNRPC: Use GFP_NOFS when allocating credentials Since the credentials may be allocated during the call to rpc_new_task(), which again may be called by a memory allocator... Signed-off-by: Trond Myklebust --- net/sunrpc/auth_gss/auth_gss.c | 10 +++++----- net/sunrpc/auth_gss/gss_krb5_mech.c | 4 ++-- net/sunrpc/auth_gss/gss_spkm3_mech.c | 4 ++-- net/sunrpc/auth_gss/gss_spkm3_token.c | 2 +- net/sunrpc/auth_unix.c | 2 +- 5 files changed, 11 insertions(+), 11 deletions(-) (limited to 'net') diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c index cc12d5f5d5d..bf7585b8054 100644 --- a/net/sunrpc/auth_gss/auth_gss.c +++ b/net/sunrpc/auth_gss/auth_gss.c @@ -146,7 +146,7 @@ simple_get_netobj(const void *p, const void *end, struct xdr_netobj *dest) q = (const void *)((const char *)p + len); if (unlikely(q > end || q < p)) return ERR_PTR(-EFAULT); - dest->data = kmemdup(p, len, GFP_KERNEL); + dest->data = kmemdup(p, len, GFP_NOFS); if (unlikely(dest->data == NULL)) return ERR_PTR(-ENOMEM); dest->len = len; @@ -171,7 +171,7 @@ gss_alloc_context(void) { struct gss_cl_ctx *ctx; - ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); + ctx = kzalloc(sizeof(*ctx), GFP_NOFS); if (ctx != NULL) { ctx->gc_proc = RPC_GSS_PROC_DATA; ctx->gc_seq = 1; /* NetApp 6.4R1 doesn't accept seq. no. 0 */ @@ -341,7 +341,7 @@ gss_alloc_msg(struct gss_auth *gss_auth, uid_t uid) { struct gss_upcall_msg *gss_msg; - gss_msg = kzalloc(sizeof(*gss_msg), GFP_KERNEL); + gss_msg = kzalloc(sizeof(*gss_msg), GFP_NOFS); if (gss_msg != NULL) { INIT_LIST_HEAD(&gss_msg->list); rpc_init_wait_queue(&gss_msg->rpc_waitqueue, "RPCSEC_GSS upcall waitq"); @@ -503,7 +503,7 @@ gss_pipe_downcall(struct file *filp, const char __user *src, size_t mlen) if (mlen > MSG_BUF_MAXSIZE) goto out; err = -ENOMEM; - buf = kmalloc(mlen, GFP_KERNEL); + buf = kmalloc(mlen, GFP_NOFS); if (!buf) goto out; @@ -806,7 +806,7 @@ gss_create_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags) dprintk("RPC: gss_create_cred for uid %d, flavor %d\n", acred->uid, auth->au_flavor); - if (!(cred = kzalloc(sizeof(*cred), GFP_KERNEL))) + if (!(cred = kzalloc(sizeof(*cred), GFP_NOFS))) goto out_err; rpcauth_init_cred(&cred->gc_base, acred, auth, &gss_credops); diff --git a/net/sunrpc/auth_gss/gss_krb5_mech.c b/net/sunrpc/auth_gss/gss_krb5_mech.c index 60c3dba545d..ef45eba2248 100644 --- a/net/sunrpc/auth_gss/gss_krb5_mech.c +++ b/net/sunrpc/auth_gss/gss_krb5_mech.c @@ -70,7 +70,7 @@ simple_get_netobj(const void *p, const void *end, struct xdr_netobj *res) q = (const void *)((const char *)p + len); if (unlikely(q > end || q < p)) return ERR_PTR(-EFAULT); - res->data = kmemdup(p, len, GFP_KERNEL); + res->data = kmemdup(p, len, GFP_NOFS); if (unlikely(res->data == NULL)) return ERR_PTR(-ENOMEM); res->len = len; @@ -131,7 +131,7 @@ gss_import_sec_context_kerberos(const void *p, struct krb5_ctx *ctx; int tmp; - if (!(ctx = kzalloc(sizeof(*ctx), GFP_KERNEL))) + if (!(ctx = kzalloc(sizeof(*ctx), GFP_NOFS))) goto out_err; p = simple_get_bytes(p, end, &ctx->initiate, sizeof(ctx->initiate)); diff --git a/net/sunrpc/auth_gss/gss_spkm3_mech.c b/net/sunrpc/auth_gss/gss_spkm3_mech.c index 5deb4b6e451..035e1dd6af1 100644 --- a/net/sunrpc/auth_gss/gss_spkm3_mech.c +++ b/net/sunrpc/auth_gss/gss_spkm3_mech.c @@ -76,7 +76,7 @@ simple_get_netobj(const void *p, const void *end, struct xdr_netobj *res) q = (const void *)((const char *)p + len); if (unlikely(q > end || q < p)) return ERR_PTR(-EFAULT); - res->data = kmemdup(p, len, GFP_KERNEL); + res->data = kmemdup(p, len, GFP_NOFS); if (unlikely(res->data == NULL)) return ERR_PTR(-ENOMEM); return q; @@ -90,7 +90,7 @@ gss_import_sec_context_spkm3(const void *p, size_t len, struct spkm3_ctx *ctx; int version; - if (!(ctx = kzalloc(sizeof(*ctx), GFP_KERNEL))) + if (!(ctx = kzalloc(sizeof(*ctx), GFP_NOFS))) goto out_err; p = simple_get_bytes(p, end, &version, sizeof(version)); diff --git a/net/sunrpc/auth_gss/gss_spkm3_token.c b/net/sunrpc/auth_gss/gss_spkm3_token.c index 6cdd241ad26..3308157436d 100644 --- a/net/sunrpc/auth_gss/gss_spkm3_token.c +++ b/net/sunrpc/auth_gss/gss_spkm3_token.c @@ -90,7 +90,7 @@ asn1_bitstring_len(struct xdr_netobj *in, int *enclen, int *zerobits) int decode_asn1_bitstring(struct xdr_netobj *out, char *in, int enclen, int explen) { - if (!(out->data = kzalloc(explen,GFP_KERNEL))) + if (!(out->data = kzalloc(explen,GFP_NOFS))) return 0; out->len = explen; memcpy(out->data, in, enclen); diff --git a/net/sunrpc/auth_unix.c b/net/sunrpc/auth_unix.c index 44920b90bdc..46b2647c5bd 100644 --- a/net/sunrpc/auth_unix.c +++ b/net/sunrpc/auth_unix.c @@ -66,7 +66,7 @@ unx_create_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags) dprintk("RPC: allocating UNIX cred for uid %d gid %d\n", acred->uid, acred->gid); - if (!(cred = kmalloc(sizeof(*cred), GFP_KERNEL))) + if (!(cred = kmalloc(sizeof(*cred), GFP_NOFS))) return ERR_PTR(-ENOMEM); rpcauth_init_cred(&cred->uc_base, acred, auth, &unix_credops); -- cgit v1.2.3 From 3748f1e447ac1dbf45f33ee7491a008a8bb5cdf0 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 21 May 2008 17:09:12 -0400 Subject: SUNRPC: Add a function to display the name of an RPC procedure Improve debugging messages in call_start() and call_verify() by having them show the RPC procedure name instead of the procedure number. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- net/sunrpc/clnt.c | 23 +++++++++++++++++++---- 1 file changed, 19 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 1af4f161cda..68ea6dddcf1 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -690,6 +690,21 @@ rpc_restart_call(struct rpc_task *task) } EXPORT_SYMBOL_GPL(rpc_restart_call); +#ifdef RPC_DEBUG +static const char *rpc_proc_name(const struct rpc_task *task) +{ + const struct rpc_procinfo *proc = task->tk_msg.rpc_proc; + + if (proc) { + if (proc->p_name) + return proc->p_name; + else + return "NULL"; + } else + return "no proc"; +} +#endif + /* * 0. Initial state * @@ -701,9 +716,9 @@ call_start(struct rpc_task *task) { struct rpc_clnt *clnt = task->tk_client; - dprintk("RPC: %5u call_start %s%d proc %d (%s)\n", task->tk_pid, + dprintk("RPC: %5u call_start %s%d proc %s (%s)\n", task->tk_pid, clnt->cl_protname, clnt->cl_vers, - task->tk_msg.rpc_proc->p_proc, + rpc_proc_name(task), (RPC_IS_ASYNC(task) ? "async" : "sync")); /* Increment call count */ @@ -1432,10 +1447,10 @@ call_verify(struct rpc_task *task) error = -EPROTONOSUPPORT; goto out_err; case RPC_PROC_UNAVAIL: - dprintk("RPC: %5u %s: proc %p unsupported by program %u, " + dprintk("RPC: %5u %s: proc %s unsupported by program %u, " "version %u on server %s\n", task->tk_pid, __func__, - task->tk_msg.rpc_proc, + rpc_proc_name(task), task->tk_client->cl_prog, task->tk_client->cl_vers, task->tk_client->cl_server); -- cgit v1.2.3 From b0e1c57ea00302c3ac541ffd37e7db07d13cd674 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 21 May 2008 17:09:19 -0400 Subject: SUNRPC: Rename "call_" functions that are no longer FSM states The RPC client uses a finite state machine to move RPC tasks through each step of an RPC request. Each state is contained in a function in net/sunrpc/clnt.c, and named call_foo. Some of the functions named call_foo have changed over the past few years and are no longer states in the FSM. These include: call_encode, call_header, and call_verify. As a clean up, rename the functions that have changed. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- net/sunrpc/clnt.c | 35 ++++++++++++++--------------------- 1 file changed, 14 insertions(+), 21 deletions(-) (limited to 'net') diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 68ea6dddcf1..ab8038db8ef 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -58,7 +58,6 @@ static void call_start(struct rpc_task *task); static void call_reserve(struct rpc_task *task); static void call_reserveresult(struct rpc_task *task); static void call_allocate(struct rpc_task *task); -static void call_encode(struct rpc_task *task); static void call_decode(struct rpc_task *task); static void call_bind(struct rpc_task *task); static void call_bind_status(struct rpc_task *task); @@ -70,9 +69,9 @@ static void call_refreshresult(struct rpc_task *task); static void call_timeout(struct rpc_task *task); static void call_connect(struct rpc_task *task); static void call_connect_status(struct rpc_task *task); -static __be32 * call_header(struct rpc_task *task); -static __be32 * call_verify(struct rpc_task *task); +static __be32 *rpc_encode_header(struct rpc_task *task); +static __be32 *rpc_verify_header(struct rpc_task *task); static int rpc_ping(struct rpc_clnt *clnt, int flags); static void rpc_register_client(struct rpc_clnt *clnt) @@ -876,7 +875,7 @@ rpc_xdr_buf_init(struct xdr_buf *buf, void *start, size_t len) * 3. Encode arguments of an RPC call */ static void -call_encode(struct rpc_task *task) +rpc_xdr_encode(struct rpc_task *task) { struct rpc_rqst *req = task->tk_rqstp; kxdrproc_t encode; @@ -891,13 +890,14 @@ call_encode(struct rpc_task *task) (char *)req->rq_buffer + req->rq_callsize, req->rq_rcvsize); - /* Encode header and provided arguments */ - encode = task->tk_msg.rpc_proc->p_encode; - if (!(p = call_header(task))) { - printk(KERN_INFO "RPC: call_header failed, exit EIO\n"); + p = rpc_encode_header(task); + if (p == NULL) { + printk(KERN_INFO "RPC: couldn't encode RPC header, exit EIO\n"); rpc_exit(task, -EIO); return; } + + encode = task->tk_msg.rpc_proc->p_encode; if (encode == NULL) return; @@ -1056,7 +1056,7 @@ call_transmit(struct rpc_task *task) /* Encode here so that rpcsec_gss can use correct sequence number. */ if (rpc_task_need_encode(task)) { BUG_ON(task->tk_rqstp->rq_bytes_sent != 0); - call_encode(task); + rpc_xdr_encode(task); /* Did the encode result in an error condition? */ if (task->tk_status != 0) { /* Was the error nonfatal? */ @@ -1240,8 +1240,7 @@ call_decode(struct rpc_task *task) goto out_retry; } - /* Verify the RPC header */ - p = call_verify(task); + p = rpc_verify_header(task); if (IS_ERR(p)) { if (p == ERR_PTR(-EAGAIN)) goto out_retry; @@ -1259,7 +1258,7 @@ call_decode(struct rpc_task *task) return; out_retry: task->tk_status = 0; - /* Note: call_verify() may have freed the RPC slot */ + /* Note: rpc_verify_header() may have freed the RPC slot */ if (task->tk_rqstp == req) { req->rq_received = req->rq_rcv_buf.len = 0; if (task->tk_client->cl_discrtry) @@ -1306,11 +1305,8 @@ call_refreshresult(struct rpc_task *task) return; } -/* - * Call header serialization - */ static __be32 * -call_header(struct rpc_task *task) +rpc_encode_header(struct rpc_task *task) { struct rpc_clnt *clnt = task->tk_client; struct rpc_rqst *req = task->tk_rqstp; @@ -1330,11 +1326,8 @@ call_header(struct rpc_task *task) return p; } -/* - * Reply header verification - */ static __be32 * -call_verify(struct rpc_task *task) +rpc_verify_header(struct rpc_task *task) { struct kvec *iov = &task->tk_rqstp->rq_rcv_buf.head[0]; int len = task->tk_rqstp->rq_rcv_buf.len >> 2; @@ -1408,7 +1401,7 @@ call_verify(struct rpc_task *task) task->tk_action = call_bind; goto out_retry; case RPC_AUTH_TOOWEAK: - printk(KERN_NOTICE "call_verify: server %s requires stronger " + printk(KERN_NOTICE "RPC: server %s requires stronger " "authentication.\n", task->tk_client->cl_server); break; default: -- cgit v1.2.3 From 68a23ee94e3a06819f5a39d64f2e1f3131bab12d Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 21 May 2008 17:09:26 -0400 Subject: SUNRPC: Don't display the rpc_show_tasks header if there are no tasks Clean up: don't display the rpc_show_tasks column header unless there is at least one task to display. As far as I can tell, it is safe to let the list_for_each_entry macro decide that each list is empty. scripts/checkpatch.pl also wants a KERN_FOO at the start of any newly added printk() calls, so this and subsequent patches will also add KERN_INFO. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- net/sunrpc/clnt.c | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index ab8038db8ef..7dda32851ad 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -1526,24 +1526,30 @@ struct rpc_task *rpc_call_null(struct rpc_clnt *clnt, struct rpc_cred *cred, int EXPORT_SYMBOL_GPL(rpc_call_null); #ifdef RPC_DEBUG +static void rpc_show_header(void) +{ + printk(KERN_INFO "-pid- proc flgs status -client- -prog- --rqstp- " + "-timeout -rpcwait -action- ---ops--\n"); +} + void rpc_show_tasks(void) { struct rpc_clnt *clnt; struct rpc_task *t; + int header = 0; spin_lock(&rpc_client_lock); - if (list_empty(&all_clients)) - goto out; - printk("-pid- proc flgs status -client- -prog- --rqstp- -timeout " - "-rpcwait -action- ---ops--\n"); list_for_each_entry(clnt, &all_clients, cl_clients) { - if (list_empty(&clnt->cl_tasks)) - continue; spin_lock(&clnt->cl_lock); list_for_each_entry(t, &clnt->cl_tasks, tk_task) { const char *rpc_waitq = "none"; int proc; + if (!header) { + rpc_show_header(); + header++; + } + if (t->tk_msg.rpc_proc) proc = t->tk_msg.rpc_proc->p_proc; else @@ -1563,7 +1569,6 @@ void rpc_show_tasks(void) } spin_unlock(&clnt->cl_lock); } -out: spin_unlock(&rpc_client_lock); } #endif -- cgit v1.2.3 From 38e886e0c18975543938519254fc9bf0829c75a3 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 21 May 2008 17:09:33 -0400 Subject: SUNRPC: Refactor rpc_show_tasks Clean up: move the logic that displays each task to its own function. This removes indentation and makes future changes easier. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- net/sunrpc/clnt.c | 46 ++++++++++++++++++++++++---------------------- 1 file changed, 24 insertions(+), 22 deletions(-) (limited to 'net') diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 7dda32851ad..7964a98c90e 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -1532,40 +1532,42 @@ static void rpc_show_header(void) "-timeout -rpcwait -action- ---ops--\n"); } +static void rpc_show_task(const struct rpc_clnt *clnt, + const struct rpc_task *task) +{ + const char *rpc_waitq = "none"; + int proc = -1; + + if (task->tk_msg.rpc_proc) + proc = task->tk_msg.rpc_proc->p_proc; + + if (RPC_IS_QUEUED(task)) + rpc_waitq = rpc_qname(task->tk_waitqueue); + + printk(KERN_INFO "%5u %04d %04x %6d %8p %6d %8p %8ld %8s %8p %8p\n", + task->tk_pid, proc, + task->tk_flags, task->tk_status, + clnt, clnt->cl_prog, + task->tk_rqstp, task->tk_timeout, + rpc_waitq, + task->tk_action, task->tk_ops); +} + void rpc_show_tasks(void) { struct rpc_clnt *clnt; - struct rpc_task *t; + struct rpc_task *task; int header = 0; spin_lock(&rpc_client_lock); list_for_each_entry(clnt, &all_clients, cl_clients) { spin_lock(&clnt->cl_lock); - list_for_each_entry(t, &clnt->cl_tasks, tk_task) { - const char *rpc_waitq = "none"; - int proc; - + list_for_each_entry(task, &clnt->cl_tasks, tk_task) { if (!header) { rpc_show_header(); header++; } - - if (t->tk_msg.rpc_proc) - proc = t->tk_msg.rpc_proc->p_proc; - else - proc = -1; - - if (RPC_IS_QUEUED(t)) - rpc_waitq = rpc_qname(t->tk_waitqueue); - - printk("%5u %04d %04x %6d %8p %6d %8p %8ld %8s %8p %8p\n", - t->tk_pid, proc, - t->tk_flags, t->tk_status, - t->tk_client, - (t->tk_client ? t->tk_client->cl_prog : 0), - t->tk_rqstp, t->tk_timeout, - rpc_waitq, - t->tk_action, t->tk_ops); + rpc_show_task(clnt, task); } spin_unlock(&clnt->cl_lock); } -- cgit v1.2.3 From cb3997b5a0b21864368bd1bd1d0929f9304fb6d9 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 21 May 2008 17:09:41 -0400 Subject: SUNRPC: Display some debugging information as text rather than numbers In rpc_show_tasks(), display the program name, version number, procedure name and tk_action as human-readable variable-length text fields rather than columnar numbers. Doing the symbol lookup here helps in cases where we have actual debugging output from a kernel log, but don't have access to the kernel image or RPC module that generated the output. Sample output: -pid- flgs status -client- --rqstp- -timeout ---ops-- 5608 0001 -11 eeb42690 f6d93710 0 f8fa1764 nfsv3 WRITE a:call_transmit_status q:none 5609 0001 -11 eeb42690 f6d937e0 0 f8fa1764 nfsv3 WRITE a:call_status q:xprt_sending 5610 0001 -11 eeb42690 f6d93230 0 f8fa1764 nfsv3 WRITE a:call_status q:xprt_sending 5611 0001 -11 eeb42690 f6d93300 0 f8fa1764 nfsv3 WRITE a:call_status q:xprt_sending 5612 0001 -11 eeb42690 f6d93090 0 f8fa1764 nfsv3 WRITE a:call_status q:xprt_sending 5613 0001 -11 eeb42690 f6d933d0 0 f8fa1764 nfsv3 WRITE a:call_status q:xprt_sending 5614 0001 -11 eeb42690 f6d93cc0 0 f8fa1764 nfsv3 WRITE a:call_status q:xprt_sending 5615 0001 -11 eeb42690 f6d93a50 0 f8fa1764 nfsv3 WRITE a:call_status q:xprt_sending 5616 0001 -11 eeb42690 f6d93640 0 f8fa1764 nfsv3 WRITE a:call_status q:xprt_sending 5617 0001 -11 eeb42690 f6d93b20 0 f8fa1764 nfsv3 WRITE a:call_status q:xprt_sending 5618 0001 -11 eeb42690 f6d93160 0 f8fa1764 nfsv3 WRITE a:call_status q:xprt_sending Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- net/sunrpc/clnt.c | 29 ++++++++++++++++------------- 1 file changed, 16 insertions(+), 13 deletions(-) (limited to 'net') diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 7964a98c90e..0530eea37b5 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -25,6 +25,7 @@ #include #include +#include #include #include #include @@ -1528,29 +1529,31 @@ EXPORT_SYMBOL_GPL(rpc_call_null); #ifdef RPC_DEBUG static void rpc_show_header(void) { - printk(KERN_INFO "-pid- proc flgs status -client- -prog- --rqstp- " - "-timeout -rpcwait -action- ---ops--\n"); + printk(KERN_INFO "-pid- flgs status -client- --rqstp- " + "-timeout ---ops--\n"); } static void rpc_show_task(const struct rpc_clnt *clnt, const struct rpc_task *task) { const char *rpc_waitq = "none"; - int proc = -1; - - if (task->tk_msg.rpc_proc) - proc = task->tk_msg.rpc_proc->p_proc; + char *p, action[KSYM_SYMBOL_LEN]; if (RPC_IS_QUEUED(task)) rpc_waitq = rpc_qname(task->tk_waitqueue); - printk(KERN_INFO "%5u %04d %04x %6d %8p %6d %8p %8ld %8s %8p %8p\n", - task->tk_pid, proc, - task->tk_flags, task->tk_status, - clnt, clnt->cl_prog, - task->tk_rqstp, task->tk_timeout, - rpc_waitq, - task->tk_action, task->tk_ops); + /* map tk_action pointer to a function name; then trim off + * the "+0x0 [sunrpc]" */ + sprint_symbol(action, (unsigned long)task->tk_action); + p = strchr(action, '+'); + if (p) + *p = '\0'; + + printk(KERN_INFO "%5u %04x %6d %8p %8p %8ld %8p %sv%u %s a:%s q:%s\n", + task->tk_pid, task->tk_flags, task->tk_status, + clnt, task->tk_rqstp, task->tk_timeout, task->tk_ops, + clnt->cl_protname, clnt->cl_vers, rpc_proc_name(task), + action, rpc_waitq); } void rpc_show_tasks(void) -- cgit v1.2.3 From cd983ef81b9d79573848dabf81277c7314220257 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 11 Jun 2008 17:56:13 -0400 Subject: SUNRPC: Remove obsolete messages during transport connect Recent changes to the RPC client's transport connect logic make connect status values ECONNREFUSED and ECONNRESET impossible. Clean up xprt_connect_status() to account for these changes. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- net/sunrpc/xprt.c | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) (limited to 'net') diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index e1770f7ba0b..67996bd7fbf 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -690,7 +690,7 @@ static void xprt_connect_status(struct rpc_task *task) { struct rpc_xprt *xprt = task->tk_xprt; - if (task->tk_status >= 0) { + if (task->tk_status == 0) { xprt->stat.connect_count++; xprt->stat.connect_time += (long)jiffies - xprt->stat.connect_start; dprintk("RPC: %5u xprt_connect_status: connection established\n", @@ -699,12 +699,6 @@ static void xprt_connect_status(struct rpc_task *task) } switch (task->tk_status) { - case -ECONNREFUSED: - case -ECONNRESET: - dprintk("RPC: %5u xprt_connect_status: server %s refused " - "connection\n", task->tk_pid, - task->tk_client->cl_server); - break; case -ENOTCONN: dprintk("RPC: %5u xprt_connect_status: connection broken\n", task->tk_pid); -- cgit v1.2.3 From b6b6152c46861dd914d0e6cea9c27df057d6e235 Mon Sep 17 00:00:00 2001 From: Olga Kornievskaia Date: Mon, 9 Jun 2008 16:51:31 -0400 Subject: rpc: bring back cl_chatty The cl_chatty flag alows us to control whether a given rpc client leaves "server X not responding, timed out" messages in the syslog. Such messages make sense for ordinary nfs clients (where an unresponsive server means applications on the mountpoint are probably hanging), but not for the callback client (which can fail more commonly, with the only result just of disabling some optimizations). Previously cl_chatty was removed, do to lack of users; reinstate it, and use it for the nfsd's callback client. Signed-off-by: J. Bruce Fields Signed-off-by: Trond Myklebust --- net/sunrpc/clnt.c | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 0530eea37b5..09631f6e30e 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -324,6 +324,8 @@ struct rpc_clnt *rpc_create(struct rpc_create_args *args) clnt->cl_autobind = 1; if (args->flags & RPC_CLNT_CREATE_DISCRTRY) clnt->cl_discrtry = 1; + if (!(args->flags & RPC_CLNT_CREATE_QUIET)) + clnt->cl_chatty = 1; return clnt; } @@ -1149,7 +1151,8 @@ call_status(struct rpc_task *task) rpc_exit(task, status); break; default: - printk("%s: RPC call returned error %d\n", + if (clnt->cl_chatty) + printk("%s: RPC call returned error %d\n", clnt->cl_protname, -status); rpc_exit(task, status); } @@ -1174,7 +1177,8 @@ call_timeout(struct rpc_task *task) task->tk_timeouts++; if (RPC_IS_SOFT(task)) { - printk(KERN_NOTICE "%s: server %s not responding, timed out\n", + if (clnt->cl_chatty) + printk(KERN_NOTICE "%s: server %s not responding, timed out\n", clnt->cl_protname, clnt->cl_server); rpc_exit(task, -EIO); return; @@ -1182,7 +1186,8 @@ call_timeout(struct rpc_task *task) if (!(task->tk_flags & RPC_CALL_MAJORSEEN)) { task->tk_flags |= RPC_CALL_MAJORSEEN; - printk(KERN_NOTICE "%s: server %s not responding, still trying\n", + if (clnt->cl_chatty) + printk(KERN_NOTICE "%s: server %s not responding, still trying\n", clnt->cl_protname, clnt->cl_server); } rpc_force_rebind(clnt); @@ -1213,8 +1218,9 @@ call_decode(struct rpc_task *task) task->tk_pid, task->tk_status); if (task->tk_flags & RPC_CALL_MAJORSEEN) { - printk(KERN_NOTICE "%s: server %s OK\n", - clnt->cl_protname, clnt->cl_server); + if (clnt->cl_chatty) + printk(KERN_NOTICE "%s: server %s OK\n", + clnt->cl_protname, clnt->cl_server); task->tk_flags &= ~RPC_CALL_MAJORSEEN; } -- cgit v1.2.3 From 720b8f2d6f7de9e16f1217448cc7396e1604e175 Mon Sep 17 00:00:00 2001 From: "\\\\\\\"J. Bruce Fields\\\\\\" Date: Mon, 9 Jun 2008 16:51:33 -0400 Subject: rpc: eliminate unused variable in auth_gss upcall code Also, a minor comment grammar fix in the same file. Signed-off-by: J. Bruce Fields Signed-off-by: Trond Myklebust --- net/sunrpc/auth_gss/auth_gss.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'net') diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c index bf7585b8054..ebfd630541f 100644 --- a/net/sunrpc/auth_gss/auth_gss.c +++ b/net/sunrpc/auth_gss/auth_gss.c @@ -272,7 +272,7 @@ __gss_find_upcall(struct rpc_inode *rpci, uid_t uid) return NULL; } -/* Try to add a upcall to the pipefs queue. +/* Try to add an upcall to the pipefs queue. * If an upcall owned by our uid already exists, then we return a reference * to that upcall instead of adding the new upcall. */ @@ -493,7 +493,6 @@ gss_pipe_downcall(struct file *filp, const char __user *src, size_t mlen) { const void *p, *end; void *buf; - struct rpc_clnt *clnt; struct gss_upcall_msg *gss_msg; struct inode *inode = filp->f_path.dentry->d_inode; struct gss_cl_ctx *ctx; @@ -507,7 +506,6 @@ gss_pipe_downcall(struct file *filp, const char __user *src, size_t mlen) if (!buf) goto out; - clnt = RPC_I(inode)->private; err = -EFAULT; if (copy_from_user(buf, src, mlen)) goto err; -- cgit v1.2.3 From d25a03cf966f2cf9990dc0bf2a921a554919ea34 Mon Sep 17 00:00:00 2001 From: "\\\\\\\"J. Bruce Fields\\\\\\" Date: Mon, 9 Jun 2008 16:51:34 -0400 Subject: rpc: remove some unused macros There used to be a print_hexl() function that used isprint(), now gone. I don't know why NFS_NGROUPS and CA_RUN_AS_MACHINE were here. I also don't know why another #define that's actually used was marked "unused". Signed-off-by: J. Bruce Fields Signed-off-by: Trond Myklebust --- net/sunrpc/auth_gss/auth_gss.c | 13 +------------ 1 file changed, 1 insertion(+), 12 deletions(-) (limited to 'net') diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c index ebfd630541f..834a83199bd 100644 --- a/net/sunrpc/auth_gss/auth_gss.c +++ b/net/sunrpc/auth_gss/auth_gss.c @@ -63,22 +63,11 @@ static const struct rpc_credops gss_nullops; # define RPCDBG_FACILITY RPCDBG_AUTH #endif -#define NFS_NGROUPS 16 - -#define GSS_CRED_SLACK 1024 /* XXX: unused */ +#define GSS_CRED_SLACK 1024 /* length of a krb5 verifier (48), plus data added before arguments when * using integrity (two 4-byte integers): */ #define GSS_VERF_SLACK 100 -/* XXX this define must match the gssd define -* as it is passed to gssd to signal the use of -* machine creds should be part of the shared rpc interface */ - -#define CA_RUN_AS_MACHINE 0x00000200 - -/* dump the buffer in `emacs-hexl' style */ -#define isprint(c) ((c > 0x1f) && (c < 0x7f)) - struct gss_auth { struct kref kref; struct rpc_auth rpc_auth; -- cgit v1.2.3 From a486aeda9b2b0d944aecce7871b3186379b898de Mon Sep 17 00:00:00 2001 From: "\\\\\\\"J. Bruce Fields\\\\\\" Date: Mon, 9 Jun 2008 16:51:35 -0400 Subject: rpc: minor cleanup of scheduler callback code Try to make the comment here a little more clear and concise. Also, this macro definition seems unnecessary. Signed-off-by: J. Bruce Fields Signed-off-by: Trond Myklebust --- net/sunrpc/sched.c | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) (limited to 'net') diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c index 6eab9bf94ba..6288af05c20 100644 --- a/net/sunrpc/sched.c +++ b/net/sunrpc/sched.c @@ -626,19 +626,15 @@ static void __rpc_execute(struct rpc_task *task) /* * Execute any pending callback. */ - if (RPC_DO_CALLBACK(task)) { - /* Define a callback save pointer */ + if (task->tk_callback) { void (*save_callback)(struct rpc_task *); /* - * If a callback exists, save it, reset it, - * call it. - * The save is needed to stop from resetting - * another callback set within the callback handler - * - Dave + * We set tk_callback to NULL before calling it, + * in case it sets the tk_callback field itself: */ - save_callback=task->tk_callback; - task->tk_callback=NULL; + save_callback = task->tk_callback; + task->tk_callback = NULL; save_callback(task); } -- cgit v1.2.3 From b22602a673b1743bba4b62bb404ffd3b269d2f09 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 6 Jun 2008 13:22:25 -0400 Subject: SUNRPC: Ensure all transports set rq_xtime consistently The RPC client uses the rq_xtime field in each RPC request to determine the round-trip time of the request. Currently, the rq_xtime field is initialized by each transport just before it starts enqueing a request to be sent. However, transports do not handle initializing this value consistently; sometimes they don't initialize it at all. To make the measurement of request round-trip time consistent for all RPC client transport capabilities, pull rq_xtime initialization into the RPC client's generic transport logic. Now all transports will get a standardized RTT measure automatically, from: xprt_transmit() to xprt_complete_rqst() This makes round-trip time calculation more accurate for the TCP transport. The socket ->sendmsg() method can return "-EAGAIN" if the socket's output buffer is full, so the TCP transport's ->send_request() method may call the ->sendmsg() method repeatedly until it gets all of the request's bytes queued in the socket's buffer. Currently, the TCP transport sets the rq_xtime field every time through that loop so the final value is the timestamp just before the *last* call to the underlying socket's ->sendmsg() method. After this patch, the rq_xtime field contains a timestamp that reflects the time just before the *first* call to ->sendmsg(). This is consequential under heavy workloads because large requests often take multiple ->sendmsg() calls to get all the bytes of a request queued. The TCP transport causes the request to sleep until the remote end of the socket has received enough bytes to clear space in the socket's local output buffer. This delay can be quite significant. The method introduced by this patch is a more accurate measure of RTT for stream transports, since the server can cause enough back pressure to delay (ie increase the latency of) requests from the client. Additionally, this patch corrects the behavior of the RDMA transport, which entirely neglected to initialize the rq_xtime field. RPC performance metrics for RDMA transports now display correct RPC request round trip times. Signed-off-by: Chuck Lever Acked-by: Tom Talpey Signed-off-by: Trond Myklebust --- net/sunrpc/xprt.c | 1 + net/sunrpc/xprtsock.c | 2 -- 2 files changed, 1 insertion(+), 2 deletions(-) (limited to 'net') diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index 67996bd7fbf..99a52aabe33 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -872,6 +872,7 @@ void xprt_transmit(struct rpc_task *task) return; req->rq_connect_cookie = xprt->connect_cookie; + req->rq_xtime = jiffies; status = xprt->ops->send_request(task); if (status == 0) { dprintk("RPC: %5u xmit complete\n", task->tk_pid); diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index ddbe981ab51..4486c59c3ac 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -579,7 +579,6 @@ static int xs_udp_send_request(struct rpc_task *task) req->rq_svec->iov_base, req->rq_svec->iov_len); - req->rq_xtime = jiffies; status = xs_sendpages(transport->sock, xs_addr(xprt), xprt->addrlen, xdr, @@ -671,7 +670,6 @@ static int xs_tcp_send_request(struct rpc_task *task) * to cope with writespace callbacks arriving _after_ we have * called sendmsg(). */ while (1) { - req->rq_xtime = jiffies; status = xs_sendpages(transport->sock, NULL, 0, xdr, req->rq_bytes_sent); -- cgit v1.2.3 From 877fcf103982e52a59a1035378b4c0b8c63fe004 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 25 Jun 2008 17:24:23 -0400 Subject: SUNRPC: More useful debugging output for rpcb client Clean up dprintk's in rpcb client's XDR decoder functions. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- net/sunrpc/rpcb_clnt.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c index e6fb21b19b8..cf2b91613ac 100644 --- a/net/sunrpc/rpcb_clnt.c +++ b/net/sunrpc/rpcb_clnt.c @@ -438,7 +438,7 @@ static int rpcb_decode_getport(struct rpc_rqst *req, __be32 *p, unsigned short *portp) { *portp = (unsigned short) ntohl(*p++); - dprintk("RPC: rpcb_decode_getport result %u\n", + dprintk("RPC: rpcb_decode_getport result %u\n", *portp); return 0; } @@ -447,8 +447,8 @@ static int rpcb_decode_set(struct rpc_rqst *req, __be32 *p, unsigned int *boolp) { *boolp = (unsigned int) ntohl(*p++); - dprintk("RPC: rpcb_decode_set result %u\n", - *boolp); + dprintk("RPC: rpcb_decode_set: call %s\n", + (*boolp ? "succeeded" : "failed")); return 0; } -- cgit v1.2.3 From fc200e794d723bc88c39859e8f096b717532f9c9 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 25 Jun 2008 17:24:31 -0400 Subject: SUNRPC: Document some naked integers in rpcbind client Clean up: Replace naked integers that represent rpcbind protocol versions. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- net/sunrpc/rpcb_clnt.c | 49 ++++++++++++++++++++++++++++++++++++------------- 1 file changed, 36 insertions(+), 13 deletions(-) (limited to 'net') diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c index cf2b91613ac..b23a719aca3 100644 --- a/net/sunrpc/rpcb_clnt.c +++ b/net/sunrpc/rpcb_clnt.c @@ -32,6 +32,10 @@ #define RPCBIND_PROGRAM (100000u) #define RPCBIND_PORT (111u) +#define RPCBVERS_2 (2u) +#define RPCBVERS_3 (3u) +#define RPCBVERS_4 (4u) + enum { RPCBPROC_NULL, RPCBPROC_SET, @@ -82,7 +86,7 @@ static struct rpc_procinfo rpcb_procedures2[]; static struct rpc_procinfo rpcb_procedures3[]; struct rpcb_info { - int rpc_vers; + u32 rpc_vers; struct rpc_procinfo * rpc_proc; }; @@ -177,7 +181,7 @@ int rpcb_register(u32 prog, u32 vers, int prot, unsigned short port, int *okay) prog, vers, prot, port); rpcb_clnt = rpcb_create("localhost", (struct sockaddr *) &sin, - sizeof(sin), XPRT_TRANSPORT_UDP, 2, 1); + sizeof(sin), XPRT_TRANSPORT_UDP, RPCBVERS_2, 1); if (IS_ERR(rpcb_clnt)) return PTR_ERR(rpcb_clnt); @@ -227,7 +231,7 @@ int rpcb_getport_sync(struct sockaddr_in *sin, u32 prog, u32 vers, int prot) __func__, NIPQUAD(sin->sin_addr.s_addr), prog, vers, prot); rpcb_clnt = rpcb_create(NULL, (struct sockaddr *)sin, - sizeof(*sin), prot, 2, 0); + sizeof(*sin), prot, RPCBVERS_2, 0); if (IS_ERR(rpcb_clnt)) return PTR_ERR(rpcb_clnt); @@ -588,35 +592,54 @@ static struct rpc_procinfo rpcb_procedures4[] = { static struct rpcb_info rpcb_next_version[] = { #ifdef CONFIG_SUNRPC_BIND34 - { 4, &rpcb_procedures4[RPCBPROC_GETVERSADDR] }, - { 3, &rpcb_procedures3[RPCBPROC_GETADDR] }, + { + .rpc_vers = RPCBVERS_4, + .rpc_proc = &rpcb_procedures4[RPCBPROC_GETVERSADDR], + }, + { + .rpc_vers = RPCBVERS_3, + .rpc_proc = &rpcb_procedures3[RPCBPROC_GETADDR], + }, #endif - { 2, &rpcb_procedures2[RPCBPROC_GETPORT] }, - { 0, NULL }, + { + .rpc_vers = RPCBVERS_2, + .rpc_proc = &rpcb_procedures2[RPCBPROC_GETPORT], + }, + { + .rpc_proc = NULL, + }, }; static struct rpcb_info rpcb_next_version6[] = { #ifdef CONFIG_SUNRPC_BIND34 - { 4, &rpcb_procedures4[RPCBPROC_GETVERSADDR] }, - { 3, &rpcb_procedures3[RPCBPROC_GETADDR] }, + { + .rpc_vers = RPCBVERS_4, + .rpc_proc = &rpcb_procedures4[RPCBPROC_GETVERSADDR], + }, + { + .rpc_vers = RPCBVERS_3, + .rpc_proc = &rpcb_procedures3[RPCBPROC_GETADDR], + }, #endif - { 0, NULL }, + { + .rpc_proc = NULL, + }, }; static struct rpc_version rpcb_version2 = { - .number = 2, + .number = RPCBVERS_2, .nrprocs = RPCB_HIGHPROC_2, .procs = rpcb_procedures2 }; static struct rpc_version rpcb_version3 = { - .number = 3, + .number = RPCBVERS_3, .nrprocs = RPCB_HIGHPROC_3, .procs = rpcb_procedures3 }; static struct rpc_version rpcb_version4 = { - .number = 4, + .number = RPCBVERS_4, .nrprocs = RPCB_HIGHPROC_4, .procs = rpcb_procedures4 }; -- cgit v1.2.3 From 6a774051573042cdeb57e81f77b36c25e5856739 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 25 Jun 2008 17:24:39 -0400 Subject: SUNRPC: Use rpcbind version 2 GETPORT Clean up: Change the version 2 procedure name to GETPORT. It's the same procedure number as GETADDR, but version 2 implementations usually refer to it as GETPORT. This also now matches the procedure name used in the version 2 procedure entry in the rpcb_next_version[] array, making it slightly less confusing. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- net/sunrpc/rpcb_clnt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c index b23a719aca3..a70cc1e1179 100644 --- a/net/sunrpc/rpcb_clnt.c +++ b/net/sunrpc/rpcb_clnt.c @@ -575,7 +575,7 @@ out_err: static struct rpc_procinfo rpcb_procedures2[] = { PROC(SET, mapping, set), PROC(UNSET, mapping, set), - PROC(GETADDR, mapping, getport), + PROC(GETPORT, mapping, getport), }; static struct rpc_procinfo rpcb_procedures3[] = { -- cgit v1.2.3 From 8842413aa4c3220ce9313791f99808fc149ca16d Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 25 Jun 2008 17:24:47 -0400 Subject: SUNRPC: Use GETADDR for rpcbind version 4 queries Some rpcbind servers that do support rpcbind version 4 do not support the GETVERSADDR procedure. Use GETADDR for querying rpcbind servers via rpcbind version 4 instead of GETVERSADDR. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- net/sunrpc/rpcb_clnt.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c index a70cc1e1179..625ba72e624 100644 --- a/net/sunrpc/rpcb_clnt.c +++ b/net/sunrpc/rpcb_clnt.c @@ -587,6 +587,7 @@ static struct rpc_procinfo rpcb_procedures3[] = { static struct rpc_procinfo rpcb_procedures4[] = { PROC(SET, mapping, set), PROC(UNSET, mapping, set), + PROC(GETADDR, getaddr, getaddr), PROC(GETVERSADDR, getaddr, getaddr), }; @@ -594,7 +595,7 @@ static struct rpcb_info rpcb_next_version[] = { #ifdef CONFIG_SUNRPC_BIND34 { .rpc_vers = RPCBVERS_4, - .rpc_proc = &rpcb_procedures4[RPCBPROC_GETVERSADDR], + .rpc_proc = &rpcb_procedures4[RPCBPROC_GETADDR], }, { .rpc_vers = RPCBVERS_3, @@ -614,7 +615,7 @@ static struct rpcb_info rpcb_next_version6[] = { #ifdef CONFIG_SUNRPC_BIND34 { .rpc_vers = RPCBVERS_4, - .rpc_proc = &rpcb_procedures4[RPCBPROC_GETVERSADDR], + .rpc_proc = &rpcb_procedures4[RPCBPROC_GETADDR], }, { .rpc_vers = RPCBVERS_3, -- cgit v1.2.3 From 40fef8a649e5344bfb6a67a7cc3def3e0dad6448 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 25 Jun 2008 17:24:54 -0400 Subject: SUNRPC: Use only rpcbind v2 for AF_INET requests Some server vendors support the higher versions of rpcbind only for AF_INET6. The kernel doesn't need to use v3 or v4 for AF_INET anyway, so change the kernel's rpcbind client to query AF_INET servers over rpcbind v2 only. This has a few interesting benefits: 1. If the rpcbind request is going over TCP, and the server doesn't support rpcbind versions 3 or 4, the client reduces by two the number of ephemeral ports left in TIME_WAIT for each rpcbind request. This will help during NFS mount storms. 2. The rpcbind interaction with servers that don't support rpcbind versions 3 or 4 will use less network traffic. Also helpful during mount storms. 3. We can eliminate the kernel build option that controls whether the kernel's rpcbind client uses rpcbind version 3 and 4 for AF_INET servers. Less complicated kernel configuration... Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- net/sunrpc/rpcb_clnt.c | 12 ------------ 1 file changed, 12 deletions(-) (limited to 'net') diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c index 625ba72e624..c62e446723a 100644 --- a/net/sunrpc/rpcb_clnt.c +++ b/net/sunrpc/rpcb_clnt.c @@ -592,16 +592,6 @@ static struct rpc_procinfo rpcb_procedures4[] = { }; static struct rpcb_info rpcb_next_version[] = { -#ifdef CONFIG_SUNRPC_BIND34 - { - .rpc_vers = RPCBVERS_4, - .rpc_proc = &rpcb_procedures4[RPCBPROC_GETADDR], - }, - { - .rpc_vers = RPCBVERS_3, - .rpc_proc = &rpcb_procedures3[RPCBPROC_GETADDR], - }, -#endif { .rpc_vers = RPCBVERS_2, .rpc_proc = &rpcb_procedures2[RPCBPROC_GETPORT], @@ -612,7 +602,6 @@ static struct rpcb_info rpcb_next_version[] = { }; static struct rpcb_info rpcb_next_version6[] = { -#ifdef CONFIG_SUNRPC_BIND34 { .rpc_vers = RPCBVERS_4, .rpc_proc = &rpcb_procedures4[RPCBPROC_GETADDR], @@ -621,7 +610,6 @@ static struct rpcb_info rpcb_next_version6[] = { .rpc_vers = RPCBVERS_3, .rpc_proc = &rpcb_procedures3[RPCBPROC_GETADDR], }, -#endif { .rpc_proc = NULL, }, -- cgit v1.2.3 From 381ba74af55e58bca4c01553835a360a9f6fbb07 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 7 Jul 2008 12:18:53 -0400 Subject: SUNRPC: Ensure our task is notified when an rpcbind call is done If another task is busy in rpcb_getport_async number, it is more efficient to have it wake us up when it has finished instead of arbitrarily sleeping for 5 seconds. Also ensure that rpcb_wake_rpcbind_waiters() is called regardless of whether or not rpcb_getport_done() gets called. Signed-off-by: Trond Myklebust --- net/sunrpc/clnt.c | 8 +++----- net/sunrpc/rpcb_clnt.c | 38 ++++++++++++++++++++------------------ 2 files changed, 23 insertions(+), 23 deletions(-) (limited to 'net') diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 09631f6e30e..76739e928d0 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -942,11 +942,9 @@ call_bind_status(struct rpc_task *task) } switch (task->tk_status) { - case -EAGAIN: - dprintk("RPC: %5u rpcbind waiting for another request " - "to finish\n", task->tk_pid); - /* avoid busy-waiting here -- could be a network outage. */ - rpc_delay(task, 5*HZ); + case -ENOMEM: + dprintk("RPC: %5u rpcbind out of memory\n", task->tk_pid); + rpc_delay(task, HZ >> 2); goto retry_timeout; case -EACCES: dprintk("RPC: %5u remote rpcbind: RPC program/version " diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c index c62e446723a..24e93e0a0a2 100644 --- a/net/sunrpc/rpcb_clnt.c +++ b/net/sunrpc/rpcb_clnt.c @@ -68,6 +68,7 @@ enum { #define RPCB_MAXOWNERLEN sizeof(RPCB_OWNER_STRING) static void rpcb_getport_done(struct rpc_task *, void *); +static void rpcb_map_release(void *data); static struct rpc_program rpcb_program; struct rpcbind_args { @@ -80,6 +81,8 @@ struct rpcbind_args { const char * r_netid; const char * r_addr; const char * r_owner; + + int r_status; }; static struct rpc_procinfo rpcb_procedures2[]; @@ -93,14 +96,6 @@ struct rpcb_info { static struct rpcb_info rpcb_next_version[]; static struct rpcb_info rpcb_next_version6[]; -static void rpcb_map_release(void *data) -{ - struct rpcbind_args *map = data; - - xprt_put(map->r_xprt); - kfree(map); -} - static const struct rpc_call_ops rpcb_getport_ops = { .rpc_call_done = rpcb_getport_done, .rpc_release = rpcb_map_release, @@ -112,6 +107,15 @@ static void rpcb_wake_rpcbind_waiters(struct rpc_xprt *xprt, int status) rpc_wake_up_status(&xprt->binding, status); } +static void rpcb_map_release(void *data) +{ + struct rpcbind_args *map = data; + + rpcb_wake_rpcbind_waiters(map->r_xprt, map->r_status); + xprt_put(map->r_xprt); + kfree(map); +} + static struct rpc_clnt *rpcb_create(char *hostname, struct sockaddr *srvaddr, size_t salen, int proto, u32 version, int privileged) @@ -293,17 +297,16 @@ void rpcb_getport_async(struct rpc_task *task) /* Autobind on cloned rpc clients is discouraged */ BUG_ON(clnt->cl_parent != clnt); + /* Put self on the wait queue to ensure we get notified if + * some other task is already attempting to bind the port */ + rpc_sleep_on(&xprt->binding, task, NULL); + if (xprt_test_and_set_binding(xprt)) { - status = -EAGAIN; /* tell caller to check again */ dprintk("RPC: %5u %s: waiting for another binder\n", task->tk_pid, __func__); - goto bailout_nowake; + return; } - /* Put self on queue before sending rpcbind request, in case - * rpcb_getport_done completes before we return from rpc_run_task */ - rpc_sleep_on(&xprt->binding, task, NULL); - /* Someone else may have bound if we slept */ if (xprt_bound(xprt)) { status = 0; @@ -365,15 +368,15 @@ void rpcb_getport_async(struct rpc_task *task) map->r_netid = rpc_peeraddr2str(clnt, RPC_DISPLAY_NETID); map->r_addr = rpc_peeraddr2str(rpcb_clnt, RPC_DISPLAY_UNIVERSAL_ADDR); map->r_owner = RPCB_OWNER_STRING; /* ignored for GETADDR */ + map->r_status = -EIO; child = rpcb_call_async(rpcb_clnt, map, proc); rpc_release_client(rpcb_clnt); if (IS_ERR(child)) { - status = -EIO; /* rpcb_map_release() has freed the arguments */ dprintk("RPC: %5u %s: rpc_run_task failed\n", task->tk_pid, __func__); - goto bailout_nofree; + return; } rpc_put_task(child); @@ -382,7 +385,6 @@ void rpcb_getport_async(struct rpc_task *task) bailout_nofree: rpcb_wake_rpcbind_waiters(xprt, status); -bailout_nowake: task->tk_status = status; } EXPORT_SYMBOL_GPL(rpcb_getport_async); @@ -421,7 +423,7 @@ static void rpcb_getport_done(struct rpc_task *child, void *data) dprintk("RPC: %5u rpcb_getport_done(status %d, port %u)\n", child->tk_pid, status, map->r_port); - rpcb_wake_rpcbind_waiters(xprt, status); + map->r_status = status; } static int rpcb_encode_mapping(struct rpc_rqst *req, __be32 *p, -- cgit v1.2.3 From adeed48090fc370afa0db8d007748ee72a40b578 Mon Sep 17 00:00:00 2001 From: Mattias Nissler Date: Mon, 7 Jul 2008 23:08:19 +0200 Subject: rc80211_pid: Fix fast_start parameter handling This removes the fast_start parameter from the rc_pid parameters information and instead uses the parameter macro when initializing the rc_pid state. Since the parameter is only used on initialization, there is no point of making exporting it via debugfs. This also fixes uninitialized memory references to the fast_start and norm_offset parameters detected by the kmemcheck utility. Thanks to Vegard Nossum for reporting the bug. Signed-off-by: Mattias Nissler Signed-off-by: John W. Linville --- net/mac80211/rc80211_pid.h | 5 ----- net/mac80211/rc80211_pid_algo.c | 31 +++++++++++++------------------ 2 files changed, 13 insertions(+), 23 deletions(-) (limited to 'net') diff --git a/net/mac80211/rc80211_pid.h b/net/mac80211/rc80211_pid.h index 04afc13ed82..4ea7b97d1af 100644 --- a/net/mac80211/rc80211_pid.h +++ b/net/mac80211/rc80211_pid.h @@ -141,7 +141,6 @@ struct rc_pid_events_file_info { * rate behaviour values (lower means we should trust more what we learnt * about behaviour of rates, higher means we should trust more the natural * ordering of rates) - * @fast_start: if Y, push high rates right after initialization */ struct rc_pid_debugfs_entries { struct dentry *dir; @@ -154,7 +153,6 @@ struct rc_pid_debugfs_entries { struct dentry *sharpen_factor; struct dentry *sharpen_duration; struct dentry *norm_offset; - struct dentry *fast_start; }; void rate_control_pid_event_tx_status(struct rc_pid_event_buffer *buf, @@ -267,9 +265,6 @@ struct rc_pid_info { /* Normalization offset. */ unsigned int norm_offset; - /* Fast starst parameter. */ - unsigned int fast_start; - /* Rates information. */ struct rc_pid_rateinfo *rinfo; diff --git a/net/mac80211/rc80211_pid_algo.c b/net/mac80211/rc80211_pid_algo.c index a849b745bdb..bcd27c1d759 100644 --- a/net/mac80211/rc80211_pid_algo.c +++ b/net/mac80211/rc80211_pid_algo.c @@ -398,13 +398,25 @@ static void *rate_control_pid_alloc(struct ieee80211_local *local) return NULL; } + pinfo->target = RC_PID_TARGET_PF; + pinfo->sampling_period = RC_PID_INTERVAL; + pinfo->coeff_p = RC_PID_COEFF_P; + pinfo->coeff_i = RC_PID_COEFF_I; + pinfo->coeff_d = RC_PID_COEFF_D; + pinfo->smoothing_shift = RC_PID_SMOOTHING_SHIFT; + pinfo->sharpen_factor = RC_PID_SHARPENING_FACTOR; + pinfo->sharpen_duration = RC_PID_SHARPENING_DURATION; + pinfo->norm_offset = RC_PID_NORM_OFFSET; + pinfo->rinfo = rinfo; + pinfo->oldrate = 0; + /* Sort the rates. This is optimized for the most common case (i.e. * almost-sorted CCK+OFDM rates). Kind of bubble-sort with reversed * mapping too. */ for (i = 0; i < sband->n_bitrates; i++) { rinfo[i].index = i; rinfo[i].rev_index = i; - if (pinfo->fast_start) + if (RC_PID_FAST_START) rinfo[i].diff = 0; else rinfo[i].diff = i * pinfo->norm_offset; @@ -425,19 +437,6 @@ static void *rate_control_pid_alloc(struct ieee80211_local *local) break; } - pinfo->target = RC_PID_TARGET_PF; - pinfo->sampling_period = RC_PID_INTERVAL; - pinfo->coeff_p = RC_PID_COEFF_P; - pinfo->coeff_i = RC_PID_COEFF_I; - pinfo->coeff_d = RC_PID_COEFF_D; - pinfo->smoothing_shift = RC_PID_SMOOTHING_SHIFT; - pinfo->sharpen_factor = RC_PID_SHARPENING_FACTOR; - pinfo->sharpen_duration = RC_PID_SHARPENING_DURATION; - pinfo->norm_offset = RC_PID_NORM_OFFSET; - pinfo->fast_start = RC_PID_FAST_START; - pinfo->rinfo = rinfo; - pinfo->oldrate = 0; - #ifdef CONFIG_MAC80211_DEBUGFS de = &pinfo->dentries; de->dir = debugfs_create_dir("rc80211_pid", @@ -465,9 +464,6 @@ static void *rate_control_pid_alloc(struct ieee80211_local *local) de->norm_offset = debugfs_create_u32("norm_offset", S_IRUSR | S_IWUSR, de->dir, &pinfo->norm_offset); - de->fast_start = debugfs_create_bool("fast_start", - S_IRUSR | S_IWUSR, de->dir, - &pinfo->fast_start); #endif return pinfo; @@ -479,7 +475,6 @@ static void rate_control_pid_free(void *priv) #ifdef CONFIG_MAC80211_DEBUGFS struct rc_pid_debugfs_entries *de = &pinfo->dentries; - debugfs_remove(de->fast_start); debugfs_remove(de->norm_offset); debugfs_remove(de->sharpen_duration); debugfs_remove(de->sharpen_factor); -- cgit v1.2.3 From 6b69fe0c73c0f5a8dacf8f889db3cc9adee53649 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Wed, 9 Jul 2008 15:06:12 -0700 Subject: netfilter: nf_conntrack_tcp: fix endless loop When a conntrack entry is destroyed in process context and destruction is interrupted by packet processing and the packet is an attempt to reopen a closed connection, TCP conntrack tries to kill the old entry itself and returns NF_REPEAT to pass the packet through the hook again. This may lead to an endless loop: TCP conntrack repeatedly finds the old entry, but can not kill it itself since destruction is already in progress, but destruction in process context can not complete since TCP conntrack is keeping the CPU busy. Drop the packet in TCP conntrack if we can't kill the connection ourselves to avoid this. Reported by: hemao77@gmail.com [ Kernel bugzilla #11058 ] Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/netfilter/nf_conntrack_proto_tcp.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c index 271cd01d57a..dd28fb239a6 100644 --- a/net/netfilter/nf_conntrack_proto_tcp.c +++ b/net/netfilter/nf_conntrack_proto_tcp.c @@ -844,9 +844,15 @@ static int tcp_packet(struct nf_conn *ct, /* Attempt to reopen a closed/aborted connection. * Delete this connection and look up again. */ write_unlock_bh(&tcp_lock); - if (del_timer(&ct->timeout)) + /* Only repeat if we can actually remove the timer. + * Destruction may already be in progress in process + * context and we must give it a chance to terminate. + */ + if (del_timer(&ct->timeout)) { ct->timeout.function((unsigned long)ct); - return -NF_REPEAT; + return -NF_REPEAT; + } + return -NF_DROP; } /* Fall through */ case TCP_CONNTRACK_IGNORE: -- cgit v1.2.3 From 252815b0cfe711001eff0327872209986b36d490 Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 9 Jul 2008 15:06:45 -0700 Subject: netfilter: nf_nat_snmp_basic: fix a range check in NAT for SNMP Fix a range check in netfilter IP NAT for SNMP to always use a big enough size variable that the compiler won't moan about comparing it to ULONG_MAX/8 on a 64-bit platform. Signed-off-by: David Howells Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/ipv4/netfilter/nf_nat_snmp_basic.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv4/netfilter/nf_nat_snmp_basic.c b/net/ipv4/netfilter/nf_nat_snmp_basic.c index 7750c97fde7..ffeaffc3fff 100644 --- a/net/ipv4/netfilter/nf_nat_snmp_basic.c +++ b/net/ipv4/netfilter/nf_nat_snmp_basic.c @@ -439,8 +439,8 @@ static unsigned char asn1_oid_decode(struct asn1_ctx *ctx, unsigned int *len) { unsigned long subid; - unsigned int size; unsigned long *optr; + size_t size; size = eoc - ctx->pointer + 1; -- cgit v1.2.3 From 3d8ea1fd7001f39b5cc0ad2ff51696292ea3cfbf Mon Sep 17 00:00:00 2001 From: Milton Miller Date: Thu, 10 Jul 2008 16:51:32 -0700 Subject: tcp: correct kcalloc usage kcalloc is supposed to be called with the count as its first argument and the element size as the second. Signed-off-by: Milton Miller Signed-off-by: David S. Miller --- net/ipv4/tcp_probe.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv4/tcp_probe.c b/net/ipv4/tcp_probe.c index 5ff0ce6e9d3..7ddc30f0744 100644 --- a/net/ipv4/tcp_probe.c +++ b/net/ipv4/tcp_probe.c @@ -224,7 +224,7 @@ static __init int tcpprobe_init(void) if (bufsize < 0) return -EINVAL; - tcp_probe.log = kcalloc(sizeof(struct tcp_log), bufsize, GFP_KERNEL); + tcp_probe.log = kcalloc(bufsize, sizeof(struct tcp_log), GFP_KERNEL); if (!tcp_probe.log) goto err0; -- cgit v1.2.3 From 2e655571c618434c24ac2ca989374fdd84470d6d Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Thu, 10 Jul 2008 16:52:52 -0700 Subject: ipv4: fib_trie: Fix lookup error return In commit a07f5f508a4d9728c8e57d7f66294bf5b254ff7f "[IPV4] fib_trie: style cleanup", the changes to check_leaf() and fn_trie_lookup() were wrong - where fn_trie_lookup() would previously return a negative error value from check_leaf(), it now returns 0. Now fn_trie_lookup() doesn't appear to care about plen, so we can revert check_leaf() to returning the error value. Signed-off-by: Ben Hutchings Tested-by: William Boughton Acked-by: Stephen Heminger Signed-off-by: David S. Miller --- net/ipv4/fib_trie.c | 17 ++++++----------- 1 file changed, 6 insertions(+), 11 deletions(-) (limited to 'net') diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index 4b02d14e7ab..e1600ad8fb0 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -1359,17 +1359,17 @@ static int check_leaf(struct trie *t, struct leaf *l, t->stats.semantic_match_miss++; #endif if (err <= 0) - return plen; + return err; } - return -1; + return 1; } static int fn_trie_lookup(struct fib_table *tb, const struct flowi *flp, struct fib_result *res) { struct trie *t = (struct trie *) tb->tb_data; - int plen, ret = 0; + int ret; struct node *n; struct tnode *pn; int pos, bits; @@ -1393,10 +1393,7 @@ static int fn_trie_lookup(struct fib_table *tb, const struct flowi *flp, /* Just a leaf? */ if (IS_LEAF(n)) { - plen = check_leaf(t, (struct leaf *)n, key, flp, res); - if (plen < 0) - goto failed; - ret = 0; + ret = check_leaf(t, (struct leaf *)n, key, flp, res); goto found; } @@ -1421,11 +1418,9 @@ static int fn_trie_lookup(struct fib_table *tb, const struct flowi *flp, } if (IS_LEAF(n)) { - plen = check_leaf(t, (struct leaf *)n, key, flp, res); - if (plen < 0) + ret = check_leaf(t, (struct leaf *)n, key, flp, res); + if (ret > 0) goto backtrace; - - ret = 0; goto found; } -- cgit v1.2.3 From fe785bee05f08d37b34b7399d003b74199274ce4 Mon Sep 17 00:00:00 2001 From: "Denis V. Lunev" Date: Thu, 10 Jul 2008 16:53:39 -0700 Subject: netlabel: netlink_unicast calls kfree_skb on error path by itself So, no need to kfree_skb here on the error path. In this case we can simply return. Signed-off-by: Denis V. Lunev Acked-by: Paul Moore Signed-off-by: David S. Miller --- net/netlabel/netlabel_cipso_v4.c | 7 +------ net/netlabel/netlabel_mgmt.c | 12 ++---------- net/netlabel/netlabel_unlabeled.c | 6 +----- 3 files changed, 4 insertions(+), 21 deletions(-) (limited to 'net') diff --git a/net/netlabel/netlabel_cipso_v4.c b/net/netlabel/netlabel_cipso_v4.c index fdc14a0d21a..9080c61b71a 100644 --- a/net/netlabel/netlabel_cipso_v4.c +++ b/net/netlabel/netlabel_cipso_v4.c @@ -584,12 +584,7 @@ list_start: rcu_read_unlock(); genlmsg_end(ans_skb, data); - - ret_val = genlmsg_reply(ans_skb, info); - if (ret_val != 0) - goto list_failure; - - return 0; + return genlmsg_reply(ans_skb, info); list_retry: /* XXX - this limit is a guesstimate */ diff --git a/net/netlabel/netlabel_mgmt.c b/net/netlabel/netlabel_mgmt.c index 22c19126780..44be5d5261f 100644 --- a/net/netlabel/netlabel_mgmt.c +++ b/net/netlabel/netlabel_mgmt.c @@ -386,11 +386,7 @@ static int netlbl_mgmt_listdef(struct sk_buff *skb, struct genl_info *info) rcu_read_unlock(); genlmsg_end(ans_skb, data); - - ret_val = genlmsg_reply(ans_skb, info); - if (ret_val != 0) - goto listdef_failure; - return 0; + return genlmsg_reply(ans_skb, info); listdef_failure_lock: rcu_read_unlock(); @@ -501,11 +497,7 @@ static int netlbl_mgmt_version(struct sk_buff *skb, struct genl_info *info) goto version_failure; genlmsg_end(ans_skb, data); - - ret_val = genlmsg_reply(ans_skb, info); - if (ret_val != 0) - goto version_failure; - return 0; + return genlmsg_reply(ans_skb, info); version_failure: kfree_skb(ans_skb); diff --git a/net/netlabel/netlabel_unlabeled.c b/net/netlabel/netlabel_unlabeled.c index 52b2611a6eb..56f80872924 100644 --- a/net/netlabel/netlabel_unlabeled.c +++ b/net/netlabel/netlabel_unlabeled.c @@ -1107,11 +1107,7 @@ static int netlbl_unlabel_list(struct sk_buff *skb, struct genl_info *info) goto list_failure; genlmsg_end(ans_skb, data); - - ret_val = genlmsg_reply(ans_skb, info); - if (ret_val != 0) - goto list_failure; - return 0; + return genlmsg_reply(ans_skb, info); list_failure: kfree_skb(ans_skb); -- cgit v1.2.3 From 0ce28553cc018be5022f51e67c87997f7271534e Mon Sep 17 00:00:00 2001 From: "Denis V. Lunev" Date: Thu, 10 Jul 2008 16:54:50 -0700 Subject: ipv6: missed namespace context in ipv6_rthdr_rcv Signed-off-by: Denis V. Lunev Signed-off-by: David S. Miller --- net/ipv6/exthdrs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c index 3cd1c993d52..dcf94fdfb86 100644 --- a/net/ipv6/exthdrs.c +++ b/net/ipv6/exthdrs.c @@ -445,7 +445,7 @@ looped_back: kfree_skb(skb); return -1; } - if (!ipv6_chk_home_addr(&init_net, addr)) { + if (!ipv6_chk_home_addr(dev_net(skb->dst->dev), addr)) { IP6_INC_STATS_BH(ip6_dst_idev(skb->dst), IPSTATS_MIB_INADDRERRORS); kfree_skb(skb); -- cgit v1.2.3 From ccf9b3b83d0e56fbf20c00a08b15031ce13204a7 Mon Sep 17 00:00:00 2001 From: Steffen Klassert Date: Thu, 10 Jul 2008 16:55:37 -0700 Subject: xfrm: Add a XFRM_STATE_AF_UNSPEC flag to xfrm_usersa_info Add a XFRM_STATE_AF_UNSPEC flag to handle the AF_UNSPEC behavior for the selector family. Userspace applications can set this flag to leave the selector family of the xfrm_state unspecified. This can be used to to handle inter family tunnels if the selector is not set from userspace. Signed-off-by: Steffen Klassert Acked-by: Herbert Xu Signed-off-by: David S. Miller --- net/xfrm/xfrm_user.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'net') diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index b976d9ed10e..04c41504f84 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -277,9 +277,8 @@ static void copy_from_user_state(struct xfrm_state *x, struct xfrm_usersa_info * memcpy(&x->props.saddr, &p->saddr, sizeof(x->props.saddr)); x->props.flags = p->flags; - if (!x->sel.family) + if (!x->sel.family && !(p->flags & XFRM_STATE_AF_UNSPEC)) x->sel.family = p->family; - } /* -- cgit v1.2.3 From c2b4afd2f99a187ec3bbd6e2def186fbfb755929 Mon Sep 17 00:00:00 2001 From: Ursula Braun Date: Mon, 14 Jul 2008 09:59:29 +0200 Subject: [S390] Cleanup iucv printk messages. Cc: David S. Miller Signed-off-by: Ursula Braun Signed-off-by: Martin Schwidefsky Signed-off-by: Heiko Carstens --- net/iucv/af_iucv.c | 8 ++------ net/iucv/iucv.c | 9 ++------- 2 files changed, 4 insertions(+), 13 deletions(-) (limited to 'net') diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c index 7b0038f45b1..bda71015885 100644 --- a/net/iucv/af_iucv.c +++ b/net/iucv/af_iucv.c @@ -1135,8 +1135,7 @@ static void iucv_callback_txdone(struct iucv_path *path, if (this) kfree_skb(this); } - if (!this) - printk(KERN_ERR "AF_IUCV msg tag %u not found\n", msg->tag); + BUG_ON(!this); if (sk->sk_state == IUCV_CLOSING) { if (skb_queue_empty(&iucv_sk(sk)->send_skb_q)) { @@ -1196,7 +1195,7 @@ static int __init afiucv_init(void) } cpcmd("QUERY USERID", iucv_userid, sizeof(iucv_userid), &err); if (unlikely(err)) { - printk(KERN_ERR "AF_IUCV needs the VM userid\n"); + WARN_ON(err); err = -EPROTONOSUPPORT; goto out; } @@ -1210,7 +1209,6 @@ static int __init afiucv_init(void) err = sock_register(&iucv_sock_family_ops); if (err) goto out_proto; - printk(KERN_INFO "AF_IUCV lowlevel driver initialized\n"); return 0; out_proto: @@ -1226,8 +1224,6 @@ static void __exit afiucv_exit(void) sock_unregister(PF_IUCV); proto_unregister(&iucv_proto); iucv_unregister(&af_iucv_handler, 0); - - printk(KERN_INFO "AF_IUCV lowlevel driver unloaded\n"); } module_init(afiucv_init); diff --git a/net/iucv/iucv.c b/net/iucv/iucv.c index 91897076213..7f82b761621 100644 --- a/net/iucv/iucv.c +++ b/net/iucv/iucv.c @@ -1559,16 +1559,11 @@ static void iucv_external_interrupt(u16 code) p = iucv_irq_data[smp_processor_id()]; if (p->ippathid >= iucv_max_pathid) { - printk(KERN_WARNING "iucv_do_int: Got interrupt with " - "pathid %d > max_connections (%ld)\n", - p->ippathid, iucv_max_pathid - 1); + WARN_ON(p->ippathid >= iucv_max_pathid); iucv_sever_pathid(p->ippathid, iucv_error_no_listener); return; } - if (p->iptype < 0x01 || p->iptype > 0x09) { - printk(KERN_ERR "iucv_do_int: unknown iucv interrupt\n"); - return; - } + BUG_ON(p->iptype < 0x01 || p->iptype > 0x09); work = kmalloc(sizeof(struct iucv_irq_list), GFP_ATOMIC); if (!work) { printk(KERN_WARNING "iucv_external_interrupt: out of memory\n"); -- cgit v1.2.3 From 166b88d755f925139af7f7b75aa0a1b896ca0670 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 14 Jul 2008 16:03:26 -0400 Subject: SUNRPC: Use correct XDR encoding procedure for rpcbind SET/UNSET The rpcbind versions 3 and 4 SET and UNSET procedures use the same arguments as the GETADDR procedure. While definitely a bug, this hasn't been a problem so far since the kernel hasn't used version 3 or 4 SET and UNSET. But this will change in just a moment. Signed-off-by: Chuck Lever Signed-off-by: J. Bruce Fields Signed-off-by: Trond Myklebust --- net/sunrpc/rpcb_clnt.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c index 24e93e0a0a2..0021fad464e 100644 --- a/net/sunrpc/rpcb_clnt.c +++ b/net/sunrpc/rpcb_clnt.c @@ -426,6 +426,10 @@ static void rpcb_getport_done(struct rpc_task *child, void *data) map->r_status = status; } +/* + * XDR functions for rpcbind + */ + static int rpcb_encode_mapping(struct rpc_rqst *req, __be32 *p, struct rpcbind_args *rpcb) { @@ -581,14 +585,14 @@ static struct rpc_procinfo rpcb_procedures2[] = { }; static struct rpc_procinfo rpcb_procedures3[] = { - PROC(SET, mapping, set), - PROC(UNSET, mapping, set), + PROC(SET, getaddr, set), + PROC(UNSET, getaddr, set), PROC(GETADDR, getaddr, getaddr), }; static struct rpc_procinfo rpcb_procedures4[] = { - PROC(SET, mapping, set), - PROC(UNSET, mapping, set), + PROC(SET, getaddr, set), + PROC(UNSET, getaddr, set), PROC(GETADDR, getaddr, getaddr), PROC(GETVERSADDR, getaddr, getaddr), }; -- cgit v1.2.3 From cc5598b78fd320dd6d1f90c14491e08029f3c4f6 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 14 Jul 2008 16:03:27 -0400 Subject: SUNRPC: Introduce a specific rpcb_create for contacting localhost Add rpcb_create_local() for use by rpcb_register() and upcoming IPv6 registration functions. Ensure any errors encountered by rpcb_create_local() are properly reported. We can also use a statically allocated constant loopback socket address instead of one allocated on the stack and initialized every time the function is called. Signed-off-by: Chuck Lever Signed-off-by: J. Bruce Fields Signed-off-by: Trond Myklebust --- net/sunrpc/rpcb_clnt.c | 42 +++++++++++++++++++++++++++++++----------- 1 file changed, 31 insertions(+), 11 deletions(-) (limited to 'net') diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c index 0021fad464e..35c1ded1fc4 100644 --- a/net/sunrpc/rpcb_clnt.c +++ b/net/sunrpc/rpcb_clnt.c @@ -116,6 +116,29 @@ static void rpcb_map_release(void *data) kfree(map); } +static const struct sockaddr_in rpcb_inaddr_loopback = { + .sin_family = AF_INET, + .sin_addr.s_addr = htonl(INADDR_LOOPBACK), + .sin_port = htons(RPCBIND_PORT), +}; + +static struct rpc_clnt *rpcb_create_local(struct sockaddr *addr, + size_t addrlen, u32 version) +{ + struct rpc_create_args args = { + .protocol = XPRT_TRANSPORT_UDP, + .address = addr, + .addrsize = addrlen, + .servername = "localhost", + .program = &rpcb_program, + .version = version, + .authflavor = RPC_AUTH_UNIX, + .flags = RPC_CLNT_CREATE_NOPING, + }; + + return rpc_create(&args); +} + static struct rpc_clnt *rpcb_create(char *hostname, struct sockaddr *srvaddr, size_t salen, int proto, u32 version, int privileged) @@ -161,10 +184,6 @@ static struct rpc_clnt *rpcb_create(char *hostname, struct sockaddr *srvaddr, */ int rpcb_register(u32 prog, u32 vers, int prot, unsigned short port, int *okay) { - struct sockaddr_in sin = { - .sin_family = AF_INET, - .sin_addr.s_addr = htonl(INADDR_LOOPBACK), - }; struct rpcbind_args map = { .r_prog = prog, .r_vers = vers, @@ -184,14 +203,15 @@ int rpcb_register(u32 prog, u32 vers, int prot, unsigned short port, int *okay) "rpcbind\n", (port ? "" : "un"), prog, vers, prot, port); - rpcb_clnt = rpcb_create("localhost", (struct sockaddr *) &sin, - sizeof(sin), XPRT_TRANSPORT_UDP, RPCBVERS_2, 1); - if (IS_ERR(rpcb_clnt)) - return PTR_ERR(rpcb_clnt); + rpcb_clnt = rpcb_create_local((struct sockaddr *)&rpcb_inaddr_loopback, + sizeof(rpcb_inaddr_loopback), + RPCBVERS_2); + if (!IS_ERR(rpcb_clnt)) { + error = rpc_call_sync(rpcb_clnt, &msg, 0); + rpc_shutdown_client(rpcb_clnt); + } else + error = PTR_ERR(rpcb_clnt); - error = rpc_call_sync(rpcb_clnt, &msg, 0); - - rpc_shutdown_client(rpcb_clnt); if (error < 0) printk(KERN_WARNING "RPC: failed to contact local rpcbind " "server (errno %d).\n", -error); -- cgit v1.2.3 From 423d8b064771f5cd8b706a4839b18db9bb6c3c59 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 14 Jul 2008 16:03:28 -0400 Subject: SUNRPC: None of rpcb_create's callers wants a privileged source port Clean up: Callers that required a privileged source port now use rpcb_create_local(), so we can remove the @privileged argument from rpcb_create(). Signed-off-by: Chuck Lever Signed-off-by: J. Bruce Fields Signed-off-by: Trond Myklebust --- net/sunrpc/rpcb_clnt.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c index 35c1ded1fc4..691bd216f46 100644 --- a/net/sunrpc/rpcb_clnt.c +++ b/net/sunrpc/rpcb_clnt.c @@ -140,8 +140,7 @@ static struct rpc_clnt *rpcb_create_local(struct sockaddr *addr, } static struct rpc_clnt *rpcb_create(char *hostname, struct sockaddr *srvaddr, - size_t salen, int proto, u32 version, - int privileged) + size_t salen, int proto, u32 version) { struct rpc_create_args args = { .protocol = proto, @@ -151,7 +150,8 @@ static struct rpc_clnt *rpcb_create(char *hostname, struct sockaddr *srvaddr, .program = &rpcb_program, .version = version, .authflavor = RPC_AUTH_UNIX, - .flags = RPC_CLNT_CREATE_NOPING, + .flags = (RPC_CLNT_CREATE_NOPING | + RPC_CLNT_CREATE_NONPRIVPORT), }; switch (srvaddr->sa_family) { @@ -165,8 +165,6 @@ static struct rpc_clnt *rpcb_create(char *hostname, struct sockaddr *srvaddr, return NULL; } - if (!privileged) - args.flags |= RPC_CLNT_CREATE_NONPRIVPORT; return rpc_create(&args); } @@ -255,7 +253,7 @@ int rpcb_getport_sync(struct sockaddr_in *sin, u32 prog, u32 vers, int prot) __func__, NIPQUAD(sin->sin_addr.s_addr), prog, vers, prot); rpcb_clnt = rpcb_create(NULL, (struct sockaddr *)sin, - sizeof(*sin), prot, RPCBVERS_2, 0); + sizeof(*sin), prot, RPCBVERS_2); if (IS_ERR(rpcb_clnt)) return PTR_ERR(rpcb_clnt); @@ -365,7 +363,7 @@ void rpcb_getport_async(struct rpc_task *task) task->tk_pid, __func__, bind_version); rpcb_clnt = rpcb_create(clnt->cl_server, sap, salen, xprt->prot, - bind_version, 0); + bind_version); if (IS_ERR(rpcb_clnt)) { status = PTR_ERR(rpcb_clnt); dprintk("RPC: %5u %s: rpcb_create failed, error %ld\n", -- cgit v1.2.3 From babe80eb4994dfdc97d5be19a68b5af66d667585 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 14 Jul 2008 16:03:29 -0400 Subject: SUNRPC: Refactor rpcb_register to make rpcbindv4 support easier rpcbind version 4 registration will reuse part of rpcb_register, so just split it out into a separate function now. Signed-off-by: Chuck Lever Signed-off-by: J. Bruce Fields Signed-off-by: Trond Myklebust --- net/sunrpc/rpcb_clnt.c | 48 ++++++++++++++++++++++++++++++------------------ 1 file changed, 30 insertions(+), 18 deletions(-) (limited to 'net') diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c index 691bd216f46..8b75c306e66 100644 --- a/net/sunrpc/rpcb_clnt.c +++ b/net/sunrpc/rpcb_clnt.c @@ -168,6 +168,30 @@ static struct rpc_clnt *rpcb_create(char *hostname, struct sockaddr *srvaddr, return rpc_create(&args); } +static int rpcb_register_call(struct sockaddr *addr, size_t addrlen, + u32 version, struct rpc_message *msg, + int *result) +{ + struct rpc_clnt *rpcb_clnt; + int error = 0; + + *result = 0; + + rpcb_clnt = rpcb_create_local(addr, addrlen, version); + if (!IS_ERR(rpcb_clnt)) { + error = rpc_call_sync(rpcb_clnt, msg, 0); + rpc_shutdown_client(rpcb_clnt); + } else + error = PTR_ERR(rpcb_clnt); + + if (error < 0) + printk(KERN_WARNING "RPC: failed to contact local rpcbind " + "server (errno %d).\n", -error); + dprintk("RPC: registration status %d/%d\n", error, *result); + + return error; +} + /** * rpcb_register - set or unset a port registration with the local rpcbind svc * @prog: RPC program number to bind @@ -189,33 +213,21 @@ int rpcb_register(u32 prog, u32 vers, int prot, unsigned short port, int *okay) .r_port = port, }; struct rpc_message msg = { - .rpc_proc = &rpcb_procedures2[port ? - RPCBPROC_SET : RPCBPROC_UNSET], .rpc_argp = &map, .rpc_resp = okay, }; - struct rpc_clnt *rpcb_clnt; - int error = 0; dprintk("RPC: %sregistering (%u, %u, %d, %u) with local " "rpcbind\n", (port ? "" : "un"), prog, vers, prot, port); - rpcb_clnt = rpcb_create_local((struct sockaddr *)&rpcb_inaddr_loopback, - sizeof(rpcb_inaddr_loopback), - RPCBVERS_2); - if (!IS_ERR(rpcb_clnt)) { - error = rpc_call_sync(rpcb_clnt, &msg, 0); - rpc_shutdown_client(rpcb_clnt); - } else - error = PTR_ERR(rpcb_clnt); + msg.rpc_proc = &rpcb_procedures2[RPCBPROC_UNSET]; + if (port) + msg.rpc_proc = &rpcb_procedures2[RPCBPROC_SET]; - if (error < 0) - printk(KERN_WARNING "RPC: failed to contact local rpcbind " - "server (errno %d).\n", -error); - dprintk("RPC: registration status %d/%d\n", error, *okay); - - return error; + return rpcb_register_call((struct sockaddr *)&rpcb_inaddr_loopback, + sizeof(rpcb_inaddr_loopback), + RPCBVERS_2, &msg, okay); } /** -- cgit v1.2.3 From c2e1b09ff237c0a3687b9a804cc8bf489743cffc Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 14 Jul 2008 16:03:30 -0400 Subject: SUNRPC: Support registering IPv6 interfaces with local rpcbind daemon Introduce a new API to register RPC services on IPv6 interfaces to allow the NFS server and lockd to advertise on IPv6 networks. Unlike rpcb_register(), the new rpcb_v4_register() function uses rpcbind protocol version 4 to contact the local rpcbind daemon. The version 4 SET/UNSET procedures allow services to register address families besides AF_INET, register at specific network interfaces, and register transport protocols besides UDP and TCP. All of this functionality is exposed via the new rpcb_v4_register() kernel API. A user-space rpcbind daemon implementation that supports version 4 of the rpcbind protocol is required in order to make use of this new API. Note that rpcbind version 3 is sufficient to support the new rpcbind facilities listed above, but most extant implementations use version 4. Signed-off-by: Chuck Lever Signed-off-by: J. Bruce Fields Signed-off-by: Trond Myklebust --- net/sunrpc/rpcb_clnt.c | 178 +++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 174 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c index 8b75c306e66..24db2b4d12d 100644 --- a/net/sunrpc/rpcb_clnt.c +++ b/net/sunrpc/rpcb_clnt.c @@ -87,6 +87,7 @@ struct rpcbind_args { static struct rpc_procinfo rpcb_procedures2[]; static struct rpc_procinfo rpcb_procedures3[]; +static struct rpc_procinfo rpcb_procedures4[]; struct rpcb_info { u32 rpc_vers; @@ -122,6 +123,12 @@ static const struct sockaddr_in rpcb_inaddr_loopback = { .sin_port = htons(RPCBIND_PORT), }; +static const struct sockaddr_in6 rpcb_in6addr_loopback = { + .sin6_family = AF_INET6, + .sin6_addr = IN6ADDR_LOOPBACK_INIT, + .sin6_port = htons(RPCBIND_PORT), +}; + static struct rpc_clnt *rpcb_create_local(struct sockaddr *addr, size_t addrlen, u32 version) { @@ -196,13 +203,38 @@ static int rpcb_register_call(struct sockaddr *addr, size_t addrlen, * rpcb_register - set or unset a port registration with the local rpcbind svc * @prog: RPC program number to bind * @vers: RPC version number to bind - * @prot: transport protocol to use to make this request + * @prot: transport protocol to register * @port: port value to register - * @okay: result code + * @okay: OUT: result code + * + * RPC services invoke this function to advertise their contact + * information via the system's rpcbind daemon. RPC services + * invoke this function once for each [program, version, transport] + * tuple they wish to advertise. + * + * Callers may also unregister RPC services that are no longer + * available by setting the passed-in port to zero. This removes + * all registered transports for [program, version] from the local + * rpcbind database. + * + * Returns zero if the registration request was dispatched + * successfully and a reply was received. The rpcbind daemon's + * boolean result code is stored in *okay. * - * port == 0 means unregister, port != 0 means register. + * Returns an errno value and sets *result to zero if there was + * some problem that prevented the rpcbind request from being + * dispatched, or if the rpcbind daemon did not respond within + * the timeout. * - * This routine supports only rpcbind version 2. + * This function uses rpcbind protocol version 2 to contact the + * local rpcbind daemon. + * + * Registration works over both AF_INET and AF_INET6, and services + * registered via this function are advertised as available for any + * address. If the local rpcbind daemon is listening on AF_INET6, + * services registered via this function will be advertised on + * IN6ADDR_ANY (ie available for all AF_INET and AF_INET6 + * addresses). */ int rpcb_register(u32 prog, u32 vers, int prot, unsigned short port, int *okay) { @@ -230,6 +262,144 @@ int rpcb_register(u32 prog, u32 vers, int prot, unsigned short port, int *okay) RPCBVERS_2, &msg, okay); } +/* + * Fill in AF_INET family-specific arguments to register + */ +static int rpcb_register_netid4(struct sockaddr_in *address_to_register, + struct rpc_message *msg) +{ + struct rpcbind_args *map = msg->rpc_argp; + unsigned short port = ntohs(address_to_register->sin_port); + char buf[32]; + + /* Construct AF_INET universal address */ + snprintf(buf, sizeof(buf), + NIPQUAD_FMT".%u.%u", + NIPQUAD(address_to_register->sin_addr.s_addr), + port >> 8, port & 0xff); + map->r_addr = buf; + + dprintk("RPC: %sregistering [%u, %u, %s, '%s'] with " + "local rpcbind\n", (port ? "" : "un"), + map->r_prog, map->r_vers, + map->r_addr, map->r_netid); + + msg->rpc_proc = &rpcb_procedures4[RPCBPROC_UNSET]; + if (port) + msg->rpc_proc = &rpcb_procedures4[RPCBPROC_SET]; + + return rpcb_register_call((struct sockaddr *)&rpcb_inaddr_loopback, + sizeof(rpcb_inaddr_loopback), + RPCBVERS_4, msg, msg->rpc_resp); +} + +/* + * Fill in AF_INET6 family-specific arguments to register + */ +static int rpcb_register_netid6(struct sockaddr_in6 *address_to_register, + struct rpc_message *msg) +{ + struct rpcbind_args *map = msg->rpc_argp; + unsigned short port = ntohs(address_to_register->sin6_port); + char buf[64]; + + /* Construct AF_INET6 universal address */ + snprintf(buf, sizeof(buf), + NIP6_FMT".%u.%u", + NIP6(address_to_register->sin6_addr), + port >> 8, port & 0xff); + map->r_addr = buf; + + dprintk("RPC: %sregistering [%u, %u, %s, '%s'] with " + "local rpcbind\n", (port ? "" : "un"), + map->r_prog, map->r_vers, + map->r_addr, map->r_netid); + + msg->rpc_proc = &rpcb_procedures4[RPCBPROC_UNSET]; + if (port) + msg->rpc_proc = &rpcb_procedures4[RPCBPROC_SET]; + + return rpcb_register_call((struct sockaddr *)&rpcb_in6addr_loopback, + sizeof(rpcb_in6addr_loopback), + RPCBVERS_4, msg, msg->rpc_resp); +} + +/** + * rpcb_v4_register - set or unset a port registration with the local rpcbind + * @program: RPC program number of service to (un)register + * @version: RPC version number of service to (un)register + * @address: address family, IP address, and port to (un)register + * @netid: netid of transport protocol to (un)register + * @result: result code from rpcbind RPC call + * + * RPC services invoke this function to advertise their contact + * information via the system's rpcbind daemon. RPC services + * invoke this function once for each [program, version, address, + * netid] tuple they wish to advertise. + * + * Callers may also unregister RPC services that are no longer + * available by setting the port number in the passed-in address + * to zero. Callers pass a netid of "" to unregister all + * transport netids associated with [program, version, address]. + * + * Returns zero if the registration request was dispatched + * successfully and a reply was received. The rpcbind daemon's + * result code is stored in *result. + * + * Returns an errno value and sets *result to zero if there was + * some problem that prevented the rpcbind request from being + * dispatched, or if the rpcbind daemon did not respond within + * the timeout. + * + * This function uses rpcbind protocol version 4 to contact the + * local rpcbind daemon. The local rpcbind daemon must support + * version 4 of the rpcbind protocol in order for these functions + * to register a service successfully. + * + * Supported netids include "udp" and "tcp" for UDP and TCP over + * IPv4, and "udp6" and "tcp6" for UDP and TCP over IPv6, + * respectively. + * + * The contents of @address determine the address family and the + * port to be registered. The usual practice is to pass INADDR_ANY + * as the raw address, but specifying a non-zero address is also + * supported by this API if the caller wishes to advertise an RPC + * service on a specific network interface. + * + * Note that passing in INADDR_ANY does not create the same service + * registration as IN6ADDR_ANY. The former advertises an RPC + * service on any IPv4 address, but not on IPv6. The latter + * advertises the service on all IPv4 and IPv6 addresses. + */ +int rpcb_v4_register(const u32 program, const u32 version, + const struct sockaddr *address, const char *netid, + int *result) +{ + struct rpcbind_args map = { + .r_prog = program, + .r_vers = version, + .r_netid = netid, + .r_owner = RPCB_OWNER_STRING, + }; + struct rpc_message msg = { + .rpc_argp = &map, + .rpc_resp = result, + }; + + *result = 0; + + switch (address->sa_family) { + case AF_INET: + return rpcb_register_netid4((struct sockaddr_in *)address, + &msg); + case AF_INET6: + return rpcb_register_netid6((struct sockaddr_in6 *)address, + &msg); + } + + return -EAFNOSUPPORT; +} + /** * rpcb_getport_sync - obtain the port for an RPC service on a given host * @sin: address of remote peer -- cgit v1.2.3 From a86dc496b764ebb1431677b38eab45310e5a2ad4 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 11 Jun 2008 13:37:09 -0400 Subject: SUNRPC: Remove the BKL from the callback functions Push it into those callback functions that actually need it. Note that all the NFS operations use their own locking, so don't need the BKL. Ditto for the rpcbind client. Signed-off-by: Trond Myklebust --- net/sunrpc/sched.c | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) (limited to 'net') diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c index 6288af05c20..385f427beda 100644 --- a/net/sunrpc/sched.c +++ b/net/sunrpc/sched.c @@ -576,9 +576,7 @@ EXPORT_SYMBOL_GPL(rpc_delay); */ static void rpc_prepare_task(struct rpc_task *task) { - lock_kernel(); task->tk_ops->rpc_call_prepare(task, task->tk_calldata); - unlock_kernel(); } /* @@ -588,9 +586,7 @@ void rpc_exit_task(struct rpc_task *task) { task->tk_action = NULL; if (task->tk_ops->rpc_call_done != NULL) { - lock_kernel(); task->tk_ops->rpc_call_done(task, task->tk_calldata); - unlock_kernel(); if (task->tk_action != NULL) { WARN_ON(RPC_ASSASSINATED(task)); /* Always release the RPC slot and buffer memory */ @@ -602,11 +598,8 @@ EXPORT_SYMBOL_GPL(rpc_exit_task); void rpc_release_calldata(const struct rpc_call_ops *ops, void *calldata) { - if (ops->rpc_release != NULL) { - lock_kernel(); + if (ops->rpc_release != NULL) ops->rpc_release(calldata); - unlock_kernel(); - } } /* -- cgit v1.2.3