From e797aa1b7da6bfcb2e19a10ae5ead9aa7aea732b Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Mon, 15 Oct 2007 16:50:54 +0200
Subject: [PATCH] ieee80211: fix TKIP QoS bug

The commit 65b6a277 titled "ieee80211: Fix header->qos_ctl endian issue"
*introduced* an endianness bug. Partially revert it.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/ieee80211/ieee80211_crypt_tkip.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ieee80211/ieee80211_crypt_tkip.c b/net/ieee80211/ieee80211_crypt_tkip.c
index 6cc54eeca3e..72e6ab66834 100644
--- a/net/ieee80211/ieee80211_crypt_tkip.c
+++ b/net/ieee80211/ieee80211_crypt_tkip.c
@@ -586,7 +586,7 @@ static void michael_mic_hdr(struct sk_buff *skb, u8 * hdr)
 	if (stype & IEEE80211_STYPE_QOS_DATA) {
 		const struct ieee80211_hdr_3addrqos *qoshdr =
 			(struct ieee80211_hdr_3addrqos *)skb->data;
-		hdr[12] = qoshdr->qos_ctl & cpu_to_le16(IEEE80211_QCTL_TID);
+		hdr[12] = le16_to_cpu(qoshdr->qos_ctl) & IEEE80211_QCTL_TID;
 	} else
 		hdr[12] = 0;		/* priority */
 
-- 
cgit v1.2.3


From 58a9ac17ed7a78958d03f3b4af107f0ef075cbed Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Fri, 12 Oct 2007 21:24:07 +0200
Subject: [PATCH] mac80211: fix set_channel regression

Adam Baker reported that the prism2 ioctl removal changed behaviour
in that now the selection order was the other way around as before.
New API is planned but not done yet, so for now just use the first
matching channel in any mode as was previous behaviour with an unset
next_mode.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Acked-by: Michael Wu <flamingice@sourmilk.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/ieee80211_ioctl.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/mac80211/ieee80211_ioctl.c b/net/mac80211/ieee80211_ioctl.c
index f0224c2311d..6b4b0d5880d 100644
--- a/net/mac80211/ieee80211_ioctl.c
+++ b/net/mac80211/ieee80211_ioctl.c
@@ -306,9 +306,12 @@ int ieee80211_set_channel(struct ieee80211_local *local, int channel, int freq)
 			    ((chan->chan == channel) || (chan->freq == freq))) {
 				local->oper_channel = chan;
 				local->oper_hw_mode = mode;
-				set++;
+				set = 1;
+				break;
 			}
 		}
+		if (set)
+			break;
 	}
 
 	if (set) {
-- 
cgit v1.2.3


From 1dd84aa213d0f98a91a1ec9be2f750f5f48e75a0 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Wed, 10 Oct 2007 12:03:41 +0200
Subject: [PATCH] mac80211: reorder association debug output

There's no reason to warn about an invalid AID field when the
association was denied.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Acked-by: Michael Wu <flamingice@sourmilk.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/ieee80211_sta.c | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/ieee80211_sta.c b/net/mac80211/ieee80211_sta.c
index 1641e8fe44b..8dae26bbfdf 100644
--- a/net/mac80211/ieee80211_sta.c
+++ b/net/mac80211/ieee80211_sta.c
@@ -1203,15 +1203,11 @@ static void ieee80211_rx_mgmt_assoc_resp(struct net_device *dev,
 	capab_info = le16_to_cpu(mgmt->u.assoc_resp.capab_info);
 	status_code = le16_to_cpu(mgmt->u.assoc_resp.status_code);
 	aid = le16_to_cpu(mgmt->u.assoc_resp.aid);
-	if ((aid & (BIT(15) | BIT(14))) != (BIT(15) | BIT(14)))
-		printk(KERN_DEBUG "%s: invalid aid value %d; bits 15:14 not "
-		       "set\n", dev->name, aid);
-	aid &= ~(BIT(15) | BIT(14));
 
 	printk(KERN_DEBUG "%s: RX %sssocResp from %s (capab=0x%x "
 	       "status=%d aid=%d)\n",
 	       dev->name, reassoc ? "Rea" : "A", print_mac(mac, mgmt->sa),
-	       capab_info, status_code, aid);
+	       capab_info, status_code, aid & ~(BIT(15) | BIT(14)));
 
 	if (status_code != WLAN_STATUS_SUCCESS) {
 		printk(KERN_DEBUG "%s: AP denied association (code=%d)\n",
@@ -1223,6 +1219,11 @@ static void ieee80211_rx_mgmt_assoc_resp(struct net_device *dev,
 		return;
 	}
 
+	if ((aid & (BIT(15) | BIT(14))) != (BIT(15) | BIT(14)))
+		printk(KERN_DEBUG "%s: invalid aid value %d; bits 15:14 not "
+		       "set\n", dev->name, aid);
+	aid &= ~(BIT(15) | BIT(14));
+
 	pos = mgmt->u.assoc_resp.variable;
 	if (ieee802_11_parse_elems(pos, len - (pos - (u8 *) mgmt), &elems)
 	    == ParseFailed) {
-- 
cgit v1.2.3


From 65c107ab3befc37b21d1c970a6159525bc0121b8 Mon Sep 17 00:00:00 2001
From: "John W. Linville" <linville@tuxdriver.com>
Date: Fri, 5 Oct 2007 14:23:27 -0400
Subject: [PATCH] mac80211: store channel info in sta_bss_list

Some AP equipment "in the wild" uses the same BSSID on multiple channels
(particularly "a" vs. "b/g").  This patch changes the key of sta_bss_list
to include both the BSSID and the channel so as to prevent a BSSID on
one channel from eclipsing the same BSSID on another channel.

Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/ieee80211_sta.c | 34 +++++++++++++++++++---------------
 1 file changed, 19 insertions(+), 15 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/ieee80211_sta.c b/net/mac80211/ieee80211_sta.c
index 8dae26bbfdf..c1cc79e9f1f 100644
--- a/net/mac80211/ieee80211_sta.c
+++ b/net/mac80211/ieee80211_sta.c
@@ -61,7 +61,7 @@
 static void ieee80211_send_probe_req(struct net_device *dev, u8 *dst,
 				     u8 *ssid, size_t ssid_len);
 static struct ieee80211_sta_bss *
-ieee80211_rx_bss_get(struct net_device *dev, u8 *bssid);
+ieee80211_rx_bss_get(struct net_device *dev, u8 *bssid, int channel);
 static void ieee80211_rx_bss_put(struct net_device *dev,
 				 struct ieee80211_sta_bss *bss);
 static int ieee80211_sta_find_ibss(struct net_device *dev,
@@ -427,7 +427,8 @@ static void ieee80211_set_associated(struct net_device *dev,
 		if (sdata->type != IEEE80211_IF_TYPE_STA)
 			return;
 
-		bss = ieee80211_rx_bss_get(dev, ifsta->bssid);
+		bss = ieee80211_rx_bss_get(dev, ifsta->bssid,
+					   local->hw.conf.channel);
 		if (bss) {
 			if (bss->has_erp_value)
 				ieee80211_handle_erp_ie(dev, bss->erp_value);
@@ -574,7 +575,7 @@ static void ieee80211_send_assoc(struct net_device *dev,
 		capab |= WLAN_CAPABILITY_SHORT_SLOT_TIME |
 			WLAN_CAPABILITY_SHORT_PREAMBLE;
 	}
-	bss = ieee80211_rx_bss_get(dev, ifsta->bssid);
+	bss = ieee80211_rx_bss_get(dev, ifsta->bssid, local->hw.conf.channel);
 	if (bss) {
 		if (bss->capability & WLAN_CAPABILITY_PRIVACY)
 			capab |= WLAN_CAPABILITY_PRIVACY;
@@ -722,6 +723,7 @@ static void ieee80211_send_disassoc(struct net_device *dev,
 static int ieee80211_privacy_mismatch(struct net_device *dev,
 				      struct ieee80211_if_sta *ifsta)
 {
+	struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
 	struct ieee80211_sta_bss *bss;
 	int res = 0;
 
@@ -729,7 +731,7 @@ static int ieee80211_privacy_mismatch(struct net_device *dev,
 	    ifsta->key_management_enabled)
 		return 0;
 
-	bss = ieee80211_rx_bss_get(dev, ifsta->bssid);
+	bss = ieee80211_rx_bss_get(dev, ifsta->bssid, local->hw.conf.channel);
 	if (!bss)
 		return 0;
 
@@ -1242,7 +1244,8 @@ static void ieee80211_rx_mgmt_assoc_resp(struct net_device *dev,
 	 * update our stored copy */
 	if (elems.erp_info && elems.erp_info_len >= 1) {
 		struct ieee80211_sta_bss *bss
-			= ieee80211_rx_bss_get(dev, ifsta->bssid);
+			= ieee80211_rx_bss_get(dev, ifsta->bssid,
+					       local->hw.conf.channel);
 		if (bss) {
 			bss->erp_value = elems.erp_info[0];
 			bss->has_erp_value = 1;
@@ -1272,7 +1275,8 @@ static void ieee80211_rx_mgmt_assoc_resp(struct net_device *dev,
 			       " AP\n", dev->name);
 			return;
 		}
-		bss = ieee80211_rx_bss_get(dev, ifsta->bssid);
+		bss = ieee80211_rx_bss_get(dev, ifsta->bssid,
+					   local->hw.conf.channel);
 		if (bss) {
 			sta->last_rssi = bss->rssi;
 			sta->last_signal = bss->signal;
@@ -1348,7 +1352,7 @@ static void __ieee80211_rx_bss_hash_del(struct net_device *dev,
 
 
 static struct ieee80211_sta_bss *
-ieee80211_rx_bss_add(struct net_device *dev, u8 *bssid)
+ieee80211_rx_bss_add(struct net_device *dev, u8 *bssid, int channel)
 {
 	struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
 	struct ieee80211_sta_bss *bss;
@@ -1359,6 +1363,7 @@ ieee80211_rx_bss_add(struct net_device *dev, u8 *bssid)
 	atomic_inc(&bss->users);
 	atomic_inc(&bss->users);
 	memcpy(bss->bssid, bssid, ETH_ALEN);
+	bss->channel = channel;
 
 	spin_lock_bh(&local->sta_bss_lock);
 	/* TODO: order by RSSI? */
@@ -1370,7 +1375,7 @@ ieee80211_rx_bss_add(struct net_device *dev, u8 *bssid)
 
 
 static struct ieee80211_sta_bss *
-ieee80211_rx_bss_get(struct net_device *dev, u8 *bssid)
+ieee80211_rx_bss_get(struct net_device *dev, u8 *bssid, int channel)
 {
 	struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
 	struct ieee80211_sta_bss *bss;
@@ -1378,7 +1383,8 @@ ieee80211_rx_bss_get(struct net_device *dev, u8 *bssid)
 	spin_lock_bh(&local->sta_bss_lock);
 	bss = local->sta_bss_hash[STA_HASH(bssid)];
 	while (bss) {
-		if (memcmp(bss->bssid, bssid, ETH_ALEN) == 0) {
+		if (memcmp(bss->bssid, bssid, ETH_ALEN) == 0 &&
+		    bss->channel == channel) {
 			atomic_inc(&bss->users);
 			break;
 		}
@@ -1546,9 +1552,9 @@ static void ieee80211_rx_bss_info(struct net_device *dev,
 	else
 		channel = rx_status->channel;
 
-	bss = ieee80211_rx_bss_get(dev, mgmt->bssid);
+	bss = ieee80211_rx_bss_get(dev, mgmt->bssid, channel);
 	if (!bss) {
-		bss = ieee80211_rx_bss_add(dev, mgmt->bssid);
+		bss = ieee80211_rx_bss_add(dev, mgmt->bssid, channel);
 		if (!bss)
 			return;
 	} else {
@@ -1648,7 +1654,6 @@ static void ieee80211_rx_bss_info(struct net_device *dev,
 
 
 	bss->hw_mode = rx_status->phymode;
-	bss->channel = channel;
 	bss->freq = rx_status->freq;
 	if (channel != rx_status->channel &&
 	    (bss->hw_mode == MODE_IEEE80211G ||
@@ -2399,7 +2404,7 @@ static int ieee80211_sta_create_ibss(struct net_device *dev,
 	printk(KERN_DEBUG "%s: Creating new IBSS network, BSSID %s\n",
 	       dev->name, print_mac(mac, bssid));
 
-	bss = ieee80211_rx_bss_add(dev, bssid);
+	bss = ieee80211_rx_bss_add(dev, bssid, local->hw.conf.channel);
 	if (!bss)
 		return -ENOMEM;
 
@@ -2410,7 +2415,6 @@ static int ieee80211_sta_create_ibss(struct net_device *dev,
 		local->hw.conf.beacon_int = 100;
 	bss->beacon_int = local->hw.conf.beacon_int;
 	bss->hw_mode = local->hw.conf.phymode;
-	bss->channel = local->hw.conf.channel;
 	bss->freq = local->hw.conf.freq;
 	bss->last_update = jiffies;
 	bss->capability = WLAN_CAPABILITY_IBSS;
@@ -2470,7 +2474,7 @@ static int ieee80211_sta_find_ibss(struct net_device *dev,
 	       "%s\n", print_mac(mac, bssid), print_mac(mac2, ifsta->bssid));
 #endif /* CONFIG_MAC80211_IBSS_DEBUG */
 	if (found && memcmp(ifsta->bssid, bssid, ETH_ALEN) != 0 &&
-	    (bss = ieee80211_rx_bss_get(dev, bssid))) {
+	    (bss = ieee80211_rx_bss_get(dev, bssid, local->hw.conf.channel))) {
 		printk(KERN_DEBUG "%s: Selected IBSS BSSID %s"
 		       " based on configured SSID\n",
 		       dev->name, print_mac(mac, bssid));
-- 
cgit v1.2.3


From cffdd30d20d163343b1c6de25bcb0cc978a1ebf9 Mon Sep 17 00:00:00 2001
From: "John W. Linville" <linville@tuxdriver.com>
Date: Fri, 5 Oct 2007 14:23:27 -0400
Subject: [PATCH] mac80211: store SSID in sta_bss_list

Some AP equipment "in the wild" services multiple SSIDs using the
same BSSID.  This patch changes the key of sta_bss_list to include
the SSID as well as the BSSID and the channel so as to prevent one
SSID from eclipsing another SSID with the same BSSID.

Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/ieee80211_sta.c | 54 +++++++++++++++++++++++++++-----------------
 1 file changed, 33 insertions(+), 21 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/ieee80211_sta.c b/net/mac80211/ieee80211_sta.c
index c1cc79e9f1f..db81aef6177 100644
--- a/net/mac80211/ieee80211_sta.c
+++ b/net/mac80211/ieee80211_sta.c
@@ -12,7 +12,6 @@
  */
 
 /* TODO:
- * BSS table: use <BSSID,SSID> as the key to support multi-SSID APs
  * order BSS list by RSSI(?) ("quality of AP")
  * scan result table filtering (by capability (privacy, IBSS/BSS, WPA/RSN IE,
  *    SSID)
@@ -61,7 +60,8 @@
 static void ieee80211_send_probe_req(struct net_device *dev, u8 *dst,
 				     u8 *ssid, size_t ssid_len);
 static struct ieee80211_sta_bss *
-ieee80211_rx_bss_get(struct net_device *dev, u8 *bssid, int channel);
+ieee80211_rx_bss_get(struct net_device *dev, u8 *bssid, int channel,
+		     u8 *ssid, u8 ssid_len);
 static void ieee80211_rx_bss_put(struct net_device *dev,
 				 struct ieee80211_sta_bss *bss);
 static int ieee80211_sta_find_ibss(struct net_device *dev,
@@ -428,7 +428,8 @@ static void ieee80211_set_associated(struct net_device *dev,
 			return;
 
 		bss = ieee80211_rx_bss_get(dev, ifsta->bssid,
-					   local->hw.conf.channel);
+					   local->hw.conf.channel,
+					   ifsta->ssid, ifsta->ssid_len);
 		if (bss) {
 			if (bss->has_erp_value)
 				ieee80211_handle_erp_ie(dev, bss->erp_value);
@@ -575,7 +576,8 @@ static void ieee80211_send_assoc(struct net_device *dev,
 		capab |= WLAN_CAPABILITY_SHORT_SLOT_TIME |
 			WLAN_CAPABILITY_SHORT_PREAMBLE;
 	}
-	bss = ieee80211_rx_bss_get(dev, ifsta->bssid, local->hw.conf.channel);
+	bss = ieee80211_rx_bss_get(dev, ifsta->bssid, local->hw.conf.channel,
+				   ifsta->ssid, ifsta->ssid_len);
 	if (bss) {
 		if (bss->capability & WLAN_CAPABILITY_PRIVACY)
 			capab |= WLAN_CAPABILITY_PRIVACY;
@@ -731,7 +733,8 @@ static int ieee80211_privacy_mismatch(struct net_device *dev,
 	    ifsta->key_management_enabled)
 		return 0;
 
-	bss = ieee80211_rx_bss_get(dev, ifsta->bssid, local->hw.conf.channel);
+	bss = ieee80211_rx_bss_get(dev, ifsta->bssid, local->hw.conf.channel,
+				   ifsta->ssid, ifsta->ssid_len);
 	if (!bss)
 		return 0;
 
@@ -1245,7 +1248,8 @@ static void ieee80211_rx_mgmt_assoc_resp(struct net_device *dev,
 	if (elems.erp_info && elems.erp_info_len >= 1) {
 		struct ieee80211_sta_bss *bss
 			= ieee80211_rx_bss_get(dev, ifsta->bssid,
-					       local->hw.conf.channel);
+					       local->hw.conf.channel,
+					       ifsta->ssid, ifsta->ssid_len);
 		if (bss) {
 			bss->erp_value = elems.erp_info[0];
 			bss->has_erp_value = 1;
@@ -1276,7 +1280,8 @@ static void ieee80211_rx_mgmt_assoc_resp(struct net_device *dev,
 			return;
 		}
 		bss = ieee80211_rx_bss_get(dev, ifsta->bssid,
-					   local->hw.conf.channel);
+					   local->hw.conf.channel,
+					   ifsta->ssid, ifsta->ssid_len);
 		if (bss) {
 			sta->last_rssi = bss->rssi;
 			sta->last_signal = bss->signal;
@@ -1352,7 +1357,8 @@ static void __ieee80211_rx_bss_hash_del(struct net_device *dev,
 
 
 static struct ieee80211_sta_bss *
-ieee80211_rx_bss_add(struct net_device *dev, u8 *bssid, int channel)
+ieee80211_rx_bss_add(struct net_device *dev, u8 *bssid, int channel,
+		     u8 *ssid, u8 ssid_len)
 {
 	struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
 	struct ieee80211_sta_bss *bss;
@@ -1364,6 +1370,10 @@ ieee80211_rx_bss_add(struct net_device *dev, u8 *bssid, int channel)
 	atomic_inc(&bss->users);
 	memcpy(bss->bssid, bssid, ETH_ALEN);
 	bss->channel = channel;
+	if (ssid && ssid_len <= IEEE80211_MAX_SSID_LEN) {
+		memcpy(bss->ssid, ssid, ssid_len);
+		bss->ssid_len = ssid_len;
+	}
 
 	spin_lock_bh(&local->sta_bss_lock);
 	/* TODO: order by RSSI? */
@@ -1375,7 +1385,8 @@ ieee80211_rx_bss_add(struct net_device *dev, u8 *bssid, int channel)
 
 
 static struct ieee80211_sta_bss *
-ieee80211_rx_bss_get(struct net_device *dev, u8 *bssid, int channel)
+ieee80211_rx_bss_get(struct net_device *dev, u8 *bssid, int channel,
+		     u8 *ssid, u8 ssid_len)
 {
 	struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
 	struct ieee80211_sta_bss *bss;
@@ -1383,8 +1394,10 @@ ieee80211_rx_bss_get(struct net_device *dev, u8 *bssid, int channel)
 	spin_lock_bh(&local->sta_bss_lock);
 	bss = local->sta_bss_hash[STA_HASH(bssid)];
 	while (bss) {
-		if (memcmp(bss->bssid, bssid, ETH_ALEN) == 0 &&
-		    bss->channel == channel) {
+		if (!memcmp(bss->bssid, bssid, ETH_ALEN) &&
+		    bss->channel == channel &&
+		    bss->ssid_len == ssid_len &&
+		    (ssid_len == 0 || !memcmp(bss->ssid, ssid, ssid_len))) {
 			atomic_inc(&bss->users);
 			break;
 		}
@@ -1552,9 +1565,11 @@ static void ieee80211_rx_bss_info(struct net_device *dev,
 	else
 		channel = rx_status->channel;
 
-	bss = ieee80211_rx_bss_get(dev, mgmt->bssid, channel);
+	bss = ieee80211_rx_bss_get(dev, mgmt->bssid, channel,
+				   elems.ssid, elems.ssid_len);
 	if (!bss) {
-		bss = ieee80211_rx_bss_add(dev, mgmt->bssid, channel);
+		bss = ieee80211_rx_bss_add(dev, mgmt->bssid, channel,
+					   elems.ssid, elems.ssid_len);
 		if (!bss)
 			return;
 	} else {
@@ -1580,10 +1595,6 @@ static void ieee80211_rx_bss_info(struct net_device *dev,
 
 	bss->beacon_int = le16_to_cpu(mgmt->u.beacon.beacon_int);
 	bss->capability = le16_to_cpu(mgmt->u.beacon.capab_info);
-	if (elems.ssid && elems.ssid_len <= IEEE80211_MAX_SSID_LEN) {
-		memcpy(bss->ssid, elems.ssid, elems.ssid_len);
-		bss->ssid_len = elems.ssid_len;
-	}
 
 	bss->supp_rates_len = 0;
 	if (elems.supp_rates) {
@@ -2381,7 +2392,7 @@ static int ieee80211_sta_create_ibss(struct net_device *dev,
 {
 	struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
 	struct ieee80211_sta_bss *bss;
-	struct ieee80211_sub_if_data *sdata;
+	struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
 	struct ieee80211_hw_mode *mode;
 	u8 bssid[ETH_ALEN], *pos;
 	int i;
@@ -2404,11 +2415,11 @@ static int ieee80211_sta_create_ibss(struct net_device *dev,
 	printk(KERN_DEBUG "%s: Creating new IBSS network, BSSID %s\n",
 	       dev->name, print_mac(mac, bssid));
 
-	bss = ieee80211_rx_bss_add(dev, bssid, local->hw.conf.channel);
+	bss = ieee80211_rx_bss_add(dev, bssid, local->hw.conf.channel,
+				   sdata->u.sta.ssid, sdata->u.sta.ssid_len);
 	if (!bss)
 		return -ENOMEM;
 
-	sdata = IEEE80211_DEV_TO_SUB_IF(dev);
 	mode = local->oper_hw_mode;
 
 	if (local->hw.conf.beacon_int == 0)
@@ -2474,7 +2485,8 @@ static int ieee80211_sta_find_ibss(struct net_device *dev,
 	       "%s\n", print_mac(mac, bssid), print_mac(mac2, ifsta->bssid));
 #endif /* CONFIG_MAC80211_IBSS_DEBUG */
 	if (found && memcmp(ifsta->bssid, bssid, ETH_ALEN) != 0 &&
-	    (bss = ieee80211_rx_bss_get(dev, bssid, local->hw.conf.channel))) {
+	    (bss = ieee80211_rx_bss_get(dev, bssid, local->hw.conf.channel,
+					ifsta->ssid, ifsta->ssid_len))) {
 		printk(KERN_DEBUG "%s: Selected IBSS BSSID %s"
 		       " based on configured SSID\n",
 		       dev->name, print_mac(mac, bssid));
-- 
cgit v1.2.3


From 107acb23ba763197d390ae9ffd347f3e2a524d39 Mon Sep 17 00:00:00 2001
From: Bill Moss <bmoss@clemson.edu>
Date: Wed, 10 Oct 2007 16:23:55 -0400
Subject: [PATCH] mac80211: honor IW_SCAN_THIS_ESSID in siwscan ioctl

This patch fixes the problem of associating with wpa_secured hidden
AP.  Please try out.

The original author of this patch is Bill Moss <bmoss@clemson.edu>

Signed-off-by: Abhijeet Kolekar <abhijeet.kolekar@intel.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/ieee80211_ioctl.c | 40 ++++++++++++++++++++++++----------------
 1 file changed, 24 insertions(+), 16 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/ieee80211_ioctl.c b/net/mac80211/ieee80211_ioctl.c
index 6b4b0d5880d..a57fed77db2 100644
--- a/net/mac80211/ieee80211_ioctl.c
+++ b/net/mac80211/ieee80211_ioctl.c
@@ -511,32 +511,40 @@ static int ieee80211_ioctl_giwap(struct net_device *dev,
 
 static int ieee80211_ioctl_siwscan(struct net_device *dev,
 				   struct iw_request_info *info,
-				   struct iw_point *data, char *extra)
+				   union iwreq_data *wrqu, char *extra)
 {
 	struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
 	struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
+	struct iw_scan_req *req = NULL;
 	u8 *ssid = NULL;
 	size_t ssid_len = 0;
 
 	if (!netif_running(dev))
 		return -ENETDOWN;
 
-	switch (sdata->type) {
-	case IEEE80211_IF_TYPE_STA:
-	case IEEE80211_IF_TYPE_IBSS:
-		if (local->scan_flags & IEEE80211_SCAN_MATCH_SSID) {
-			ssid = sdata->u.sta.ssid;
-			ssid_len = sdata->u.sta.ssid_len;
-		}
-		break;
-	case IEEE80211_IF_TYPE_AP:
-		if (local->scan_flags & IEEE80211_SCAN_MATCH_SSID) {
-			ssid = sdata->u.ap.ssid;
-			ssid_len = sdata->u.ap.ssid_len;
+	if (wrqu->data.length == sizeof(struct iw_scan_req) &&
+	    wrqu->data.flags & IW_SCAN_THIS_ESSID) {
+		req = (struct iw_scan_req *)extra;
+		ssid = req->essid;
+		ssid_len = req->essid_len;
+	} else {
+		switch (sdata->type) {
+		case IEEE80211_IF_TYPE_STA:
+		case IEEE80211_IF_TYPE_IBSS:
+			if (local->scan_flags & IEEE80211_SCAN_MATCH_SSID) {
+				ssid = sdata->u.sta.ssid;
+				ssid_len = sdata->u.sta.ssid_len;
+			}
+			break;
+		case IEEE80211_IF_TYPE_AP:
+			if (local->scan_flags & IEEE80211_SCAN_MATCH_SSID) {
+				ssid = sdata->u.ap.ssid;
+				ssid_len = sdata->u.ap.ssid_len;
+			}
+			break;
+		default:
+			return -EOPNOTSUPP;
 		}
-		break;
-	default:
-		return -EOPNOTSUPP;
 	}
 
 	return ieee80211_sta_req_scan(dev, ssid, ssid_len);
-- 
cgit v1.2.3


From dc8a82ad285dcd2831feb2fd8f7b41ce1f82e243 Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@openvz.org>
Date: Wed, 17 Oct 2007 19:30:40 -0700
Subject: [IPV6]: Fix memory leak in cleanup_ipv6_mibs()

The icmpv6msg mib statistics is not freed.

This is almost not critical for current kernel, since ipv6
module is unloadable, but this can happen on load error and
will happen every time we stop the network namespace (when
we have one, of course).

Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
Acked-by: David L Stevens <dlstevens@us.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/af_inet6.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net')

diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index bc929381fa4..1b1caf3aa1c 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -747,6 +747,7 @@ static void cleanup_ipv6_mibs(void)
 {
 	snmp_mib_free((void **)ipv6_statistics);
 	snmp_mib_free((void **)icmpv6_statistics);
+	snmp_mib_free((void **)icmpv6msg_statistics);
 	snmp_mib_free((void **)udp_stats_in6);
 	snmp_mib_free((void **)udplite_stats_in6);
 }
-- 
cgit v1.2.3


From bd5435e76a226b7151ae5aaee2cd366ab003dd2e Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Wed, 17 Oct 2007 19:33:06 -0700
Subject: [DCCP]: fix link error with !CONFIG_SYSCTL

Do not define the sysctl_dccp_sync_ratelimit sysctl variable in the
CONFIG_SYSCTL dependent sysctl.c module - move it to input.c instead.

This fixes the following build bug:

 net/built-in.o: In function `dccp_check_seqno':
 input.c:(.text+0xbd859): undefined reference to `sysctl_dccp_sync_ratelimit'
 distcc[29953] ERROR: compile (null) on localhost failed
 make: *** [vmlinux] Error 1

Found via 'make randconfig' build testing.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Acked-by: Ian McDonald <ian.mcdonald@jandi.co.nz>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/dccp/input.c  | 3 +++
 net/dccp/sysctl.c | 3 ---
 2 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/dccp/input.c b/net/dccp/input.c
index 19d7e1dbd87..3560a2a875a 100644
--- a/net/dccp/input.c
+++ b/net/dccp/input.c
@@ -19,6 +19,9 @@
 #include "ccid.h"
 #include "dccp.h"
 
+/* rate-limit for syncs in reply to sequence-invalid packets; RFC 4340, 7.5.4 */
+int sysctl_dccp_sync_ratelimit	__read_mostly = HZ / 8;
+
 static void dccp_fin(struct sock *sk, struct sk_buff *skb)
 {
 	sk->sk_shutdown |= RCV_SHUTDOWN;
diff --git a/net/dccp/sysctl.c b/net/dccp/sysctl.c
index 9364b2fb4db..c62c05039f6 100644
--- a/net/dccp/sysctl.c
+++ b/net/dccp/sysctl.c
@@ -18,9 +18,6 @@
 #error This file should not be compiled without CONFIG_SYSCTL defined
 #endif
 
-/* rate-limit for syncs in reply to sequence-invalid packets; RFC 4340, 7.5.4 */
-int sysctl_dccp_sync_ratelimit	__read_mostly = HZ / 8;
-
 static struct ctl_table dccp_default_table[] = {
 	{
 		.procname	= "seq_window",
-- 
cgit v1.2.3


From c310f099be953a20b5fabed341ad085f444cf2bd Mon Sep 17 00:00:00 2001
From: Ryan Reading <ryanr23@gmail.com>
Date: Wed, 17 Oct 2007 19:34:11 -0700
Subject: [IRDA]: IrCOMM discovery indication simplification

From: Ryan Reading <ryanr23@gmail.com>

Every IrCOMM socket is registered with the discovery subsystem, so we don't
need to loop over all of them for every discovery event. We just need to
do it for the registered IrCOMM socket.

Signed-off-by: Samuel Ortiz <samuel@sortiz.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/irda/ircomm/ircomm_tty_attach.c | 15 +++------------
 1 file changed, 3 insertions(+), 12 deletions(-)

(limited to 'net')

diff --git a/net/irda/ircomm/ircomm_tty_attach.c b/net/irda/ircomm/ircomm_tty_attach.c
index 824309dabfe..b5a13882c92 100644
--- a/net/irda/ircomm/ircomm_tty_attach.c
+++ b/net/irda/ircomm/ircomm_tty_attach.c
@@ -381,18 +381,9 @@ static void ircomm_tty_discovery_indication(discinfo_t *discovery,
 	info.daddr = discovery->daddr;
 	info.saddr = discovery->saddr;
 
-	/* FIXME. We have a locking problem on the hashbin here.
-	 * We probably need to use hashbin_find_next(), but we first
-	 * need to ensure that "line" is unique. - Jean II */
-	self = (struct ircomm_tty_cb *) hashbin_get_first(ircomm_tty);
-	while (self != NULL) {
-		IRDA_ASSERT(self->magic == IRCOMM_TTY_MAGIC, return;);
-
-		ircomm_tty_do_event(self, IRCOMM_TTY_DISCOVERY_INDICATION,
-				    NULL, &info);
-
-		self = (struct ircomm_tty_cb *) hashbin_get_next(ircomm_tty);
-	}
+	self = (struct ircomm_tty_cb *) priv;
+	ircomm_tty_do_event(self, IRCOMM_TTY_DISCOVERY_INDICATION,
+			    NULL, &info);
 }
 
 /*
-- 
cgit v1.2.3


From be07664599fa94d0b85c3e0f525aee2432d15fbf Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <shemminger@linux-foundation.org>
Date: Wed, 17 Oct 2007 19:39:22 -0700
Subject: [BR2684]: get rid of broken header code.

Recent header_ops change would break the following dead
code in br2684. Maintaining conditonal code in mainline is wrong.

"Do, or do not. There is no 'try.'"

Signed-off-by: Stephen Hemminger <shemminger@linux-foundation.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/atm/br2684.c | 121 -------------------------------------------------------
 1 file changed, 121 deletions(-)

(limited to 'net')

diff --git a/net/atm/br2684.c b/net/atm/br2684.c
index c742d37bfb9..ba6428f204f 100644
--- a/net/atm/br2684.c
+++ b/net/atm/br2684.c
@@ -24,16 +24,6 @@ Author: Marcell GAL, 2000, XDSL Ltd, Hungary
 
 #include "common.h"
 
-/*
- * Define this to use a version of the code which interacts with the higher
- * layers in a more intellegent way, by always reserving enough space for
- * our header at the begining of the packet.  However, there may still be
- * some problems with programs like tcpdump.  In 2.5 we'll sort out what
- * we need to do to get this perfect.  For now we just will copy the packet
- * if we need space for the header
- */
-/* #define FASTER_VERSION */
-
 #ifdef SKB_DEBUG
 static void skb_debug(const struct sk_buff *skb)
 {
@@ -69,9 +59,7 @@ struct br2684_vcc {
 #ifdef CONFIG_ATM_BR2684_IPFILTER
 	struct br2684_filter filter;
 #endif /* CONFIG_ATM_BR2684_IPFILTER */
-#ifndef FASTER_VERSION
 	unsigned copies_needed, copies_failed;
-#endif /* FASTER_VERSION */
 };
 
 struct br2684_dev {
@@ -147,13 +135,6 @@ static int br2684_xmit_vcc(struct sk_buff *skb, struct br2684_dev *brdev,
 	struct br2684_vcc *brvcc)
 {
 	struct atm_vcc *atmvcc;
-#ifdef FASTER_VERSION
-	if (brvcc->encaps == e_llc)
-		memcpy(skb_push(skb, 8), llc_oui_pid_pad, 8);
-	/* last 2 bytes of llc_oui_pid_pad are managed by header routines;
-	   yes, you got it: 8 + 2 = sizeof(llc_oui_pid_pad)
-	 */
-#else
 	int minheadroom = (brvcc->encaps == e_llc) ? 10 : 2;
 	if (skb_headroom(skb) < minheadroom) {
 		struct sk_buff *skb2 = skb_realloc_headroom(skb, minheadroom);
@@ -170,7 +151,6 @@ static int br2684_xmit_vcc(struct sk_buff *skb, struct br2684_dev *brdev,
 		skb_copy_to_linear_data(skb, llc_oui_pid_pad, 10);
 	else
 		memset(skb->data, 0, 2);
-#endif /* FASTER_VERSION */
 	skb_debug(skb);
 
 	ATM_SKB(skb)->vcc = atmvcc = brvcc->atmvcc;
@@ -237,87 +217,6 @@ static struct net_device_stats *br2684_get_stats(struct net_device *dev)
 	return &BRPRIV(dev)->stats;
 }
 
-#ifdef FASTER_VERSION
-/*
- * These mirror eth_header and eth_header_cache.  They are not usually
- * exported for use in modules, so we grab them from net_device
- * after ether_setup() is done with it.  Bit of a hack.
- */
-static int (*my_eth_header)(struct sk_buff *, struct net_device *,
-	unsigned short, void *, void *, unsigned);
-static int (*my_eth_header_cache)(struct neighbour *, struct hh_cache *);
-
-static int
-br2684_header(struct sk_buff *skb, struct net_device *dev,
-	      unsigned short type, void *daddr, void *saddr, unsigned len)
-{
-	u16 *pad_before_eth;
-	int t = my_eth_header(skb, dev, type, daddr, saddr, len);
-	if (t > 0) {
-		pad_before_eth = (u16 *) skb_push(skb, 2);
-		*pad_before_eth = 0;
-		return dev->hard_header_len;	/* or return 16; ? */
-	} else
-		return t;
-}
-
-static int
-br2684_header_cache(struct neighbour *neigh, struct hh_cache *hh)
-{
-/* hh_data is 16 bytes long. if encaps is ether-llc we need 24, so
-xmit will add the additional header part in that case */
-	u16 *pad_before_eth = (u16 *)(hh->hh_data);
-	int t = my_eth_header_cache(neigh, hh);
-	DPRINTK("br2684_header_cache, neigh=%p, hh_cache=%p\n", neigh, hh);
-	if (t < 0)
-		return t;
-	else {
-		*pad_before_eth = 0;
-		hh->hh_len = PADLEN + ETH_HLEN;
-	}
-	return 0;
-}
-
-/*
- * This is similar to eth_type_trans, which cannot be used because of
- * our dev->hard_header_len
- */
-static inline __be16 br_type_trans(struct sk_buff *skb, struct net_device *dev)
-{
-	struct ethhdr *eth;
-	unsigned char *rawp;
-	eth = eth_hdr(skb);
-
-	if (is_multicast_ether_addr(eth->h_dest)) {
-		if (!compare_ether_addr(eth->h_dest, dev->broadcast))
-			skb->pkt_type = PACKET_BROADCAST;
-		else
-			skb->pkt_type = PACKET_MULTICAST;
-	}
-
-	else if (compare_ether_addr(eth->h_dest, dev->dev_addr))
-		skb->pkt_type = PACKET_OTHERHOST;
-
-	if (ntohs(eth->h_proto) >= 1536)
-		return eth->h_proto;
-
-	rawp = skb->data;
-
-	/*
-	 * This is a magic hack to spot IPX packets. Older Novell breaks
-	 * the protocol design and runs IPX over 802.3 without an 802.2 LLC
-	 * layer. We look for FFFF which isn't a used 802.2 SSAP/DSAP. This
-	 * won't work for fault tolerant netware but does for the rest.
-	 */
-	if (*(unsigned short *) rawp == 0xFFFF)
-		return htons(ETH_P_802_3);
-
-	/*
-	 * Real 802.2 LLC
-	 */
-	return htons(ETH_P_802_2);
-}
-#endif /* FASTER_VERSION */
 
 /*
  * We remember when the MAC gets set, so we don't override it later with
@@ -448,17 +347,8 @@ static void br2684_push(struct atm_vcc *atmvcc, struct sk_buff *skb)
 		return;
 	}
 
-#ifdef FASTER_VERSION
-	/* FIXME: tcpdump shows that pointer to mac header is 2 bytes earlier,
-	   than should be. What else should I set? */
-	skb_pull(skb, plen);
-	skb_set_mac_header(skb, -ETH_HLEN);
-	skb->pkt_type = PACKET_HOST;
-	skb->protocol = br_type_trans(skb, net_dev);
-#else
 	skb_pull(skb, plen - ETH_HLEN);
 	skb->protocol = eth_type_trans(skb, net_dev);
-#endif /* FASTER_VERSION */
 #ifdef CONFIG_ATM_BR2684_IPFILTER
 	if (unlikely(packet_fails_filter(skb->protocol, brvcc, skb))) {
 		brdev->stats.rx_dropped++;
@@ -584,13 +474,6 @@ static void br2684_setup(struct net_device *netdev)
 	ether_setup(netdev);
 	brdev->net_dev = netdev;
 
-#ifdef FASTER_VERSION
-	my_eth_header = netdev->hard_header;
-	netdev->hard_header = br2684_header;
-	my_eth_header_cache = netdev->hard_header_cache;
-	netdev->hard_header_cache = br2684_header_cache;
-	netdev->hard_header_len = sizeof(llc_oui_pid_pad) + ETH_HLEN;	/* 10 + 14 */
-#endif
 	my_eth_mac_addr = netdev->set_mac_address;
 	netdev->set_mac_address = br2684_mac_addr;
 	netdev->hard_start_xmit = br2684_start_xmit;
@@ -719,16 +602,12 @@ static int br2684_seq_show(struct seq_file *seq, void *v)
 
 	list_for_each_entry(brvcc, &brdev->brvccs, brvccs) {
 		seq_printf(seq, "  vcc %d.%d.%d: encaps=%s"
-#ifndef FASTER_VERSION
 				    ", failed copies %u/%u"
-#endif /* FASTER_VERSION */
 				    "\n", brvcc->atmvcc->dev->number,
 				    brvcc->atmvcc->vpi, brvcc->atmvcc->vci,
 				    (brvcc->encaps == e_llc) ? "LLC" : "VC"
-#ifndef FASTER_VERSION
 				    , brvcc->copies_failed
 				    , brvcc->copies_needed
-#endif /* FASTER_VERSION */
 				    );
 #ifdef CONFIG_ATM_BR2684_IPFILTER
 #define b1(var, byte)	((u8 *) &brvcc->filter.var)[byte]
-- 
cgit v1.2.3


From fd9e63544cac30a34c951f0ec958038f0529e244 Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@openvz.org>
Date: Wed, 17 Oct 2007 19:43:37 -0700
Subject: [INET]: Omit double hash calculations in xxx_frag_intern

Since the hash value is already calculated in xxx_find, we can
simply use it later. This is already done in netfilter code,
so make the same in ipv4 and ipv6.

Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/ip_fragment.c | 11 ++++-------
 net/ipv6/reassembly.c  | 11 +++++------
 2 files changed, 9 insertions(+), 13 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index 443b3f89192..d12a18b8f56 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -212,17 +212,14 @@ out:
 
 /* Creation primitives. */
 
-static struct ipq *ip_frag_intern(struct ipq *qp_in)
+static struct ipq *ip_frag_intern(struct ipq *qp_in, unsigned int hash)
 {
 	struct ipq *qp;
 #ifdef CONFIG_SMP
 	struct hlist_node *n;
 #endif
-	unsigned int hash;
 
 	write_lock(&ip4_frags.lock);
-	hash = ipqhashfn(qp_in->id, qp_in->saddr, qp_in->daddr,
-			 qp_in->protocol);
 #ifdef CONFIG_SMP
 	/* With SMP race we have to recheck hash table, because
 	 * such entry could be created on other cpu, while we
@@ -257,7 +254,7 @@ static struct ipq *ip_frag_intern(struct ipq *qp_in)
 }
 
 /* Add an entry to the 'ipq' queue for a newly received IP datagram. */
-static struct ipq *ip_frag_create(struct iphdr *iph, u32 user)
+static struct ipq *ip_frag_create(struct iphdr *iph, u32 user, unsigned int h)
 {
 	struct ipq *qp;
 
@@ -278,7 +275,7 @@ static struct ipq *ip_frag_create(struct iphdr *iph, u32 user)
 	spin_lock_init(&qp->q.lock);
 	atomic_set(&qp->q.refcnt, 1);
 
-	return ip_frag_intern(qp);
+	return ip_frag_intern(qp, h);
 
 out_nomem:
 	LIMIT_NETDEBUG(KERN_ERR "ip_frag_create: no memory left !\n");
@@ -313,7 +310,7 @@ static inline struct ipq *ip_find(struct iphdr *iph, u32 user)
 	}
 	read_unlock(&ip4_frags.lock);
 
-	return ip_frag_create(iph, user);
+	return ip_frag_create(iph, user, hash);
 }
 
 /* Is the fragment too far ahead to be part of ipq? */
diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
index 6ad19cfc202..0a1bf43bd48 100644
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -233,16 +233,15 @@ out:
 /* Creation primitives. */
 
 
-static struct frag_queue *ip6_frag_intern(struct frag_queue *fq_in)
+static struct frag_queue *ip6_frag_intern(struct frag_queue *fq_in,
+		unsigned int hash)
 {
 	struct frag_queue *fq;
-	unsigned int hash;
 #ifdef CONFIG_SMP
 	struct hlist_node *n;
 #endif
 
 	write_lock(&ip6_frags.lock);
-	hash = ip6qhashfn(fq_in->id, &fq_in->saddr, &fq_in->daddr);
 #ifdef CONFIG_SMP
 	hlist_for_each_entry(fq, n, &ip6_frags.hash[hash], q.list) {
 		if (fq->id == fq_in->id &&
@@ -273,7 +272,7 @@ static struct frag_queue *ip6_frag_intern(struct frag_queue *fq_in)
 
 static struct frag_queue *
 ip6_frag_create(__be32 id, struct in6_addr *src, struct in6_addr *dst,
-		struct inet6_dev *idev)
+		struct inet6_dev *idev, unsigned int hash)
 {
 	struct frag_queue *fq;
 
@@ -290,7 +289,7 @@ ip6_frag_create(__be32 id, struct in6_addr *src, struct in6_addr *dst,
 	spin_lock_init(&fq->q.lock);
 	atomic_set(&fq->q.refcnt, 1);
 
-	return ip6_frag_intern(fq);
+	return ip6_frag_intern(fq, hash);
 
 oom:
 	IP6_INC_STATS_BH(idev, IPSTATS_MIB_REASMFAILS);
@@ -318,7 +317,7 @@ fq_find(__be32 id, struct in6_addr *src, struct in6_addr *dst,
 	}
 	read_unlock(&ip6_frags.lock);
 
-	return ip6_frag_create(id, src, dst, idev);
+	return ip6_frag_create(id, src, dst, idev, hash);
 }
 
 
-- 
cgit v1.2.3


From 2588fe1d782f1686847493ad643157d5d10bf602 Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@openvz.org>
Date: Wed, 17 Oct 2007 19:44:34 -0700
Subject: [INET]: Consolidate xxx_frag_intern

This routine checks for the existence of a given entry
in the hash table and inserts the new one if needed.

The ->equal callback is used to compare two frag_queue-s
together, but this one is temporary and will be removed
later. The netfilter code and the ipv6 one use the same
routine to compare frags.

The inet_frag_intern() always returns non-NULL pointer,
so convert the inet_frag_queue into protocol specific
one (with the container_of) without any checks.

Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/inet_fragment.c                | 37 ++++++++++++++++++++++
 net/ipv4/ip_fragment.c                  | 54 +++++++++++----------------------
 net/ipv6/netfilter/nf_conntrack_reasm.c | 34 +++------------------
 net/ipv6/reassembly.c                   | 46 ++++++++++------------------
 4 files changed, 75 insertions(+), 96 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c
index 484cf512858..15054eb3d4b 100644
--- a/net/ipv4/inet_fragment.c
+++ b/net/ipv4/inet_fragment.c
@@ -172,3 +172,40 @@ int inet_frag_evictor(struct inet_frags *f)
 	return evicted;
 }
 EXPORT_SYMBOL(inet_frag_evictor);
+
+struct inet_frag_queue *inet_frag_intern(struct inet_frag_queue *qp_in,
+		struct inet_frags *f, unsigned int hash)
+{
+	struct inet_frag_queue *qp;
+#ifdef CONFIG_SMP
+	struct hlist_node *n;
+#endif
+
+	write_lock(&f->lock);
+#ifdef CONFIG_SMP
+	/* With SMP race we have to recheck hash table, because
+	 * such entry could be created on other cpu, while we
+	 * promoted read lock to write lock.
+	 */
+	hlist_for_each_entry(qp, n, &f->hash[hash], list) {
+		if (f->equal(qp, qp_in)) {
+			atomic_inc(&qp->refcnt);
+			write_unlock(&f->lock);
+			qp_in->last_in |= COMPLETE;
+			inet_frag_put(qp_in, f);
+			return qp;
+		}
+	}
+#endif
+	qp = qp_in;
+	if (!mod_timer(&qp->timer, jiffies + f->ctl->timeout))
+		atomic_inc(&qp->refcnt);
+
+	atomic_inc(&qp->refcnt);
+	hlist_add_head(&qp->list, &f->hash[hash]);
+	list_add_tail(&qp->lru_list, &f->lru_list);
+	f->nqueues++;
+	write_unlock(&f->lock);
+	return qp;
+}
+EXPORT_SYMBOL(inet_frag_intern);
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index d12a18b8f56..4b1bbbee22c 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -123,6 +123,20 @@ static unsigned int ip4_hashfn(struct inet_frag_queue *q)
 	return ipqhashfn(ipq->id, ipq->saddr, ipq->daddr, ipq->protocol);
 }
 
+static int ip4_frag_equal(struct inet_frag_queue *q1,
+		struct inet_frag_queue *q2)
+{
+	struct ipq *qp1, *qp2;
+
+	qp1 = container_of(q1, struct ipq, q);
+	qp2 = container_of(q2, struct ipq, q);
+	return (qp1->id == qp2->id &&
+			qp1->saddr == qp2->saddr &&
+			qp1->daddr == qp2->daddr &&
+			qp1->protocol == qp2->protocol &&
+			qp1->user == qp2->user);
+}
+
 /* Memory Tracking Functions. */
 static __inline__ void frag_kfree_skb(struct sk_buff *skb, int *work)
 {
@@ -214,43 +228,10 @@ out:
 
 static struct ipq *ip_frag_intern(struct ipq *qp_in, unsigned int hash)
 {
-	struct ipq *qp;
-#ifdef CONFIG_SMP
-	struct hlist_node *n;
-#endif
+	struct inet_frag_queue *q;
 
-	write_lock(&ip4_frags.lock);
-#ifdef CONFIG_SMP
-	/* With SMP race we have to recheck hash table, because
-	 * such entry could be created on other cpu, while we
-	 * promoted read lock to write lock.
-	 */
-	hlist_for_each_entry(qp, n, &ip4_frags.hash[hash], q.list) {
-		if (qp->id == qp_in->id		&&
-		    qp->saddr == qp_in->saddr	&&
-		    qp->daddr == qp_in->daddr	&&
-		    qp->protocol == qp_in->protocol &&
-		    qp->user == qp_in->user) {
-			atomic_inc(&qp->q.refcnt);
-			write_unlock(&ip4_frags.lock);
-			qp_in->q.last_in |= COMPLETE;
-			ipq_put(qp_in);
-			return qp;
-		}
-	}
-#endif
-	qp = qp_in;
-
-	if (!mod_timer(&qp->q.timer, jiffies + ip4_frags_ctl.timeout))
-		atomic_inc(&qp->q.refcnt);
-
-	atomic_inc(&qp->q.refcnt);
-	hlist_add_head(&qp->q.list, &ip4_frags.hash[hash]);
-	INIT_LIST_HEAD(&qp->q.lru_list);
-	list_add_tail(&qp->q.lru_list, &ip4_frags.lru_list);
-	ip4_frags.nqueues++;
-	write_unlock(&ip4_frags.lock);
-	return qp;
+	q = inet_frag_intern(&qp_in->q, &ip4_frags, hash);
+	return container_of(q, struct ipq, q);
 }
 
 /* Add an entry to the 'ipq' queue for a newly received IP datagram. */
@@ -671,6 +652,7 @@ void __init ipfrag_init(void)
 	ip4_frags.destructor = ip4_frag_free;
 	ip4_frags.skb_free = NULL;
 	ip4_frags.qsize = sizeof(struct ipq);
+	ip4_frags.equal = ip4_frag_equal;
 	inet_frags_init(&ip4_frags);
 }
 
diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
index 726fafd4196..d7dc444ec48 100644
--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
+++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
@@ -187,37 +187,10 @@ out:
 static struct nf_ct_frag6_queue *nf_ct_frag6_intern(unsigned int hash,
 					  struct nf_ct_frag6_queue *fq_in)
 {
-	struct nf_ct_frag6_queue *fq;
-#ifdef CONFIG_SMP
-	struct hlist_node *n;
-#endif
-
-	write_lock(&nf_frags.lock);
-#ifdef CONFIG_SMP
-	hlist_for_each_entry(fq, n, &nf_frags.hash[hash], q.list) {
-		if (fq->id == fq_in->id &&
-		    ipv6_addr_equal(&fq_in->saddr, &fq->saddr) &&
-		    ipv6_addr_equal(&fq_in->daddr, &fq->daddr)) {
-			atomic_inc(&fq->q.refcnt);
-			write_unlock(&nf_frags.lock);
-			fq_in->q.last_in |= COMPLETE;
-			fq_put(fq_in);
-			return fq;
-		}
-	}
-#endif
-	fq = fq_in;
+	struct inet_frag_queue *q;
 
-	if (!mod_timer(&fq->q.timer, jiffies + nf_frags_ctl.timeout))
-		atomic_inc(&fq->q.refcnt);
-
-	atomic_inc(&fq->q.refcnt);
-	hlist_add_head(&fq->q.list, &nf_frags.hash[hash]);
-	INIT_LIST_HEAD(&fq->q.lru_list);
-	list_add_tail(&fq->q.lru_list, &nf_frags.lru_list);
-	nf_frags.nqueues++;
-	write_unlock(&nf_frags.lock);
-	return fq;
+	q = inet_frag_intern(&fq_in->q, &nf_frags, hash);
+	return container_of(q, struct nf_ct_frag6_queue, q);
 }
 
 
@@ -752,6 +725,7 @@ int nf_ct_frag6_init(void)
 	nf_frags.destructor = nf_frag_free;
 	nf_frags.skb_free = nf_skb_free;
 	nf_frags.qsize = sizeof(struct nf_ct_frag6_queue);
+	nf_frags.equal = ip6_frag_equal;
 	inet_frags_init(&nf_frags);
 
 	return 0;
diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
index 0a1bf43bd48..73ea204eaa6 100644
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -143,6 +143,18 @@ static unsigned int ip6_hashfn(struct inet_frag_queue *q)
 	return ip6qhashfn(fq->id, &fq->saddr, &fq->daddr);
 }
 
+int ip6_frag_equal(struct inet_frag_queue *q1, struct inet_frag_queue *q2)
+{
+	struct frag_queue *fq1, *fq2;
+
+	fq1 = container_of(q1, struct frag_queue, q);
+	fq2 = container_of(q2, struct frag_queue, q);
+	return (fq1->id == fq2->id &&
+			ipv6_addr_equal(&fq2->saddr, &fq1->saddr) &&
+			ipv6_addr_equal(&fq2->daddr, &fq1->daddr));
+}
+EXPORT_SYMBOL(ip6_frag_equal);
+
 /* Memory Tracking Functions. */
 static inline void frag_kfree_skb(struct sk_buff *skb, int *work)
 {
@@ -236,37 +248,10 @@ out:
 static struct frag_queue *ip6_frag_intern(struct frag_queue *fq_in,
 		unsigned int hash)
 {
-	struct frag_queue *fq;
-#ifdef CONFIG_SMP
-	struct hlist_node *n;
-#endif
-
-	write_lock(&ip6_frags.lock);
-#ifdef CONFIG_SMP
-	hlist_for_each_entry(fq, n, &ip6_frags.hash[hash], q.list) {
-		if (fq->id == fq_in->id &&
-		    ipv6_addr_equal(&fq_in->saddr, &fq->saddr) &&
-		    ipv6_addr_equal(&fq_in->daddr, &fq->daddr)) {
-			atomic_inc(&fq->q.refcnt);
-			write_unlock(&ip6_frags.lock);
-			fq_in->q.last_in |= COMPLETE;
-			fq_put(fq_in);
-			return fq;
-		}
-	}
-#endif
-	fq = fq_in;
-
-	if (!mod_timer(&fq->q.timer, jiffies + ip6_frags_ctl.timeout))
-		atomic_inc(&fq->q.refcnt);
+	struct inet_frag_queue *q;
 
-	atomic_inc(&fq->q.refcnt);
-	hlist_add_head(&fq->q.list, &ip6_frags.hash[hash]);
-	INIT_LIST_HEAD(&fq->q.lru_list);
-	list_add_tail(&fq->q.lru_list, &ip6_frags.lru_list);
-	ip6_frags.nqueues++;
-	write_unlock(&ip6_frags.lock);
-	return fq;
+	q = inet_frag_intern(&fq_in->q, &ip6_frags, hash);
+	return container_of(q, struct frag_queue, q);
 }
 
 
@@ -699,5 +684,6 @@ void __init ipv6_frag_init(void)
 	ip6_frags.destructor = ip6_frag_free;
 	ip6_frags.skb_free = NULL;
 	ip6_frags.qsize = sizeof(struct frag_queue);
+	ip6_frags.equal = ip6_frag_equal;
 	inet_frags_init(&ip6_frags);
 }
-- 
cgit v1.2.3


From e521db9d790aaa60ae8920e21cb7faedc280fc36 Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@openvz.org>
Date: Wed, 17 Oct 2007 19:45:23 -0700
Subject: [INET]: Consolidate xxx_frag_alloc()

Just perform the kzalloc() allocation and setup common
fields in the inet_frag_queue(). Then return the result
to the caller to initialize the rest.

The inet_frag_alloc() may return NULL, so check the
return value before doing the container_of(). This looks
ugly, but the xxx_frag_alloc() will be removed soon.

The xxx_expire() timer callbacks are patches,
because the argument is now the inet_frag_queue, not
the protocol specific queue.

Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/inet_fragment.c                | 17 +++++++++++++++++
 net/ipv4/ip_fragment.c                  | 20 +++++++-------------
 net/ipv6/netfilter/nf_conntrack_reasm.c | 19 ++++++++-----------
 net/ipv6/reassembly.c                   | 19 +++++++------------
 4 files changed, 39 insertions(+), 36 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c
index 15054eb3d4b..57e15fa307d 100644
--- a/net/ipv4/inet_fragment.c
+++ b/net/ipv4/inet_fragment.c
@@ -209,3 +209,20 @@ struct inet_frag_queue *inet_frag_intern(struct inet_frag_queue *qp_in,
 	return qp;
 }
 EXPORT_SYMBOL(inet_frag_intern);
+
+struct inet_frag_queue *inet_frag_alloc(struct inet_frags *f)
+{
+	struct inet_frag_queue *q;
+
+	q = kzalloc(f->qsize, GFP_ATOMIC);
+	if (q == NULL)
+		return NULL;
+
+	atomic_add(f->qsize, &f->mem);
+	setup_timer(&q->timer, f->frag_expire, (unsigned long)q);
+	spin_lock_init(&q->lock);
+	atomic_set(&q->refcnt, 1);
+
+	return q;
+}
+EXPORT_SYMBOL(inet_frag_alloc);
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index 4b1bbbee22c..fc0d530df52 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -158,12 +158,10 @@ static __inline__ void ip4_frag_free(struct inet_frag_queue *q)
 
 static __inline__ struct ipq *frag_alloc_queue(void)
 {
-	struct ipq *qp = kzalloc(sizeof(struct ipq), GFP_ATOMIC);
+	struct inet_frag_queue *q;
 
-	if (!qp)
-		return NULL;
-	atomic_add(sizeof(struct ipq), &ip4_frags.mem);
-	return qp;
+	q = inet_frag_alloc(&ip4_frags);
+	return q ? container_of(q, struct ipq, q) : NULL;
 }
 
 
@@ -199,7 +197,9 @@ static void ip_evictor(void)
  */
 static void ip_expire(unsigned long arg)
 {
-	struct ipq *qp = (struct ipq *) arg;
+	struct ipq *qp;
+
+	qp = container_of((struct inet_frag_queue *) arg, struct ipq, q);
 
 	spin_lock(&qp->q.lock);
 
@@ -249,13 +249,6 @@ static struct ipq *ip_frag_create(struct iphdr *iph, u32 user, unsigned int h)
 	qp->user = user;
 	qp->peer = sysctl_ipfrag_max_dist ? inet_getpeer(iph->saddr, 1) : NULL;
 
-	/* Initialize a timer for this entry. */
-	init_timer(&qp->q.timer);
-	qp->q.timer.data = (unsigned long) qp;	/* pointer to queue	*/
-	qp->q.timer.function = ip_expire;		/* expire function	*/
-	spin_lock_init(&qp->q.lock);
-	atomic_set(&qp->q.refcnt, 1);
-
 	return ip_frag_intern(qp, h);
 
 out_nomem:
@@ -653,6 +646,7 @@ void __init ipfrag_init(void)
 	ip4_frags.skb_free = NULL;
 	ip4_frags.qsize = sizeof(struct ipq);
 	ip4_frags.equal = ip4_frag_equal;
+	ip4_frags.frag_expire = ip_expire;
 	inet_frags_init(&ip4_frags);
 }
 
diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
index d7dc444ec48..3f8c16b3301 100644
--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
+++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
@@ -137,13 +137,10 @@ static void nf_frag_free(struct inet_frag_queue *q)
 
 static inline struct nf_ct_frag6_queue *frag_alloc_queue(void)
 {
-	struct nf_ct_frag6_queue *fq;
+	struct inet_frag_queue *q;
 
-	fq = kzalloc(sizeof(struct nf_ct_frag6_queue), GFP_ATOMIC);
-	if (fq == NULL)
-		return NULL;
-	atomic_add(sizeof(struct nf_ct_frag6_queue), &nf_frags.mem);
-	return fq;
+	q = inet_frag_alloc(&nf_frags);
+	return q ? container_of(q, struct nf_ct_frag6_queue, q) : NULL;
 }
 
 /* Destruction primitives. */
@@ -168,7 +165,10 @@ static void nf_ct_frag6_evictor(void)
 
 static void nf_ct_frag6_expire(unsigned long data)
 {
-	struct nf_ct_frag6_queue *fq = (struct nf_ct_frag6_queue *) data;
+	struct nf_ct_frag6_queue *fq;
+
+	fq = container_of((struct inet_frag_queue *)data,
+			struct nf_ct_frag6_queue, q);
 
 	spin_lock(&fq->q.lock);
 
@@ -208,10 +208,6 @@ nf_ct_frag6_create(unsigned int hash, __be32 id, struct in6_addr *src,				   str
 	ipv6_addr_copy(&fq->saddr, src);
 	ipv6_addr_copy(&fq->daddr, dst);
 
-	setup_timer(&fq->q.timer, nf_ct_frag6_expire, (unsigned long)fq);
-	spin_lock_init(&fq->q.lock);
-	atomic_set(&fq->q.refcnt, 1);
-
 	return nf_ct_frag6_intern(hash, fq);
 
 oom:
@@ -726,6 +722,7 @@ int nf_ct_frag6_init(void)
 	nf_frags.skb_free = nf_skb_free;
 	nf_frags.qsize = sizeof(struct nf_ct_frag6_queue);
 	nf_frags.equal = ip6_frag_equal;
+	nf_frags.frag_expire = nf_ct_frag6_expire;
 	inet_frags_init(&nf_frags);
 
 	return 0;
diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
index 73ea204eaa6..21913c78f05 100644
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -171,12 +171,10 @@ static void ip6_frag_free(struct inet_frag_queue *fq)
 
 static inline struct frag_queue *frag_alloc_queue(void)
 {
-	struct frag_queue *fq = kzalloc(sizeof(struct frag_queue), GFP_ATOMIC);
+	struct inet_frag_queue *q;
 
-	if(!fq)
-		return NULL;
-	atomic_add(sizeof(struct frag_queue), &ip6_frags.mem);
-	return fq;
+	q = inet_frag_alloc(&ip6_frags);
+	return q ? container_of(q, struct frag_queue, q) : NULL;
 }
 
 /* Destruction primitives. */
@@ -205,9 +203,11 @@ static void ip6_evictor(struct inet6_dev *idev)
 
 static void ip6_frag_expire(unsigned long data)
 {
-	struct frag_queue *fq = (struct frag_queue *) data;
+	struct frag_queue *fq;
 	struct net_device *dev = NULL;
 
+	fq = container_of((struct inet_frag_queue *)data, struct frag_queue, q);
+
 	spin_lock(&fq->q.lock);
 
 	if (fq->q.last_in & COMPLETE)
@@ -268,12 +268,6 @@ ip6_frag_create(__be32 id, struct in6_addr *src, struct in6_addr *dst,
 	ipv6_addr_copy(&fq->saddr, src);
 	ipv6_addr_copy(&fq->daddr, dst);
 
-	init_timer(&fq->q.timer);
-	fq->q.timer.function = ip6_frag_expire;
-	fq->q.timer.data = (long) fq;
-	spin_lock_init(&fq->q.lock);
-	atomic_set(&fq->q.refcnt, 1);
-
 	return ip6_frag_intern(fq, hash);
 
 oom:
@@ -685,5 +679,6 @@ void __init ipv6_frag_init(void)
 	ip6_frags.skb_free = NULL;
 	ip6_frags.qsize = sizeof(struct frag_queue);
 	ip6_frags.equal = ip6_frag_equal;
+	ip6_frags.frag_expire = ip6_frag_expire;
 	inet_frags_init(&ip6_frags);
 }
-- 
cgit v1.2.3


From c6fda282294da882f8d8cc4c513940277dd380f5 Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@openvz.org>
Date: Wed, 17 Oct 2007 19:46:47 -0700
Subject: [INET]: Consolidate xxx_frag_create()

This one uses the xxx_frag_intern() and xxx_frag_alloc()
routines, which are already consolidated, so remove them
from protocol code (as promised).

The ->constructor callback is used to init the rest of
the frag queue and it is the same for netfilter and ipv6.

Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/inet_fragment.c                | 20 +++++++++---
 net/ipv4/ip_fragment.c                  | 54 +++++++++++++++++----------------
 net/ipv6/netfilter/nf_conntrack_reasm.c | 39 ++++++++----------------
 net/ipv6/reassembly.c                   | 45 ++++++++++++---------------
 4 files changed, 76 insertions(+), 82 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c
index 57e15fa307d..b531f803cda 100644
--- a/net/ipv4/inet_fragment.c
+++ b/net/ipv4/inet_fragment.c
@@ -173,7 +173,7 @@ int inet_frag_evictor(struct inet_frags *f)
 }
 EXPORT_SYMBOL(inet_frag_evictor);
 
-struct inet_frag_queue *inet_frag_intern(struct inet_frag_queue *qp_in,
+static struct inet_frag_queue *inet_frag_intern(struct inet_frag_queue *qp_in,
 		struct inet_frags *f, unsigned int hash)
 {
 	struct inet_frag_queue *qp;
@@ -208,9 +208,8 @@ struct inet_frag_queue *inet_frag_intern(struct inet_frag_queue *qp_in,
 	write_unlock(&f->lock);
 	return qp;
 }
-EXPORT_SYMBOL(inet_frag_intern);
 
-struct inet_frag_queue *inet_frag_alloc(struct inet_frags *f)
+static struct inet_frag_queue *inet_frag_alloc(struct inet_frags *f, void *arg)
 {
 	struct inet_frag_queue *q;
 
@@ -218,6 +217,7 @@ struct inet_frag_queue *inet_frag_alloc(struct inet_frags *f)
 	if (q == NULL)
 		return NULL;
 
+	f->constructor(q, arg);
 	atomic_add(f->qsize, &f->mem);
 	setup_timer(&q->timer, f->frag_expire, (unsigned long)q);
 	spin_lock_init(&q->lock);
@@ -225,4 +225,16 @@ struct inet_frag_queue *inet_frag_alloc(struct inet_frags *f)
 
 	return q;
 }
-EXPORT_SYMBOL(inet_frag_alloc);
+
+struct inet_frag_queue *inet_frag_create(struct inet_frags *f, void *arg,
+		unsigned int hash)
+{
+	struct inet_frag_queue *q;
+
+	q = inet_frag_alloc(f, arg);
+	if (q == NULL)
+		return NULL;
+
+	return inet_frag_intern(q, f, hash);
+}
+EXPORT_SYMBOL(inet_frag_create);
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index fc0d530df52..0d6cff1de5a 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -108,6 +108,11 @@ int ip_frag_mem(void)
 static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev,
 			 struct net_device *dev);
 
+struct ip4_create_arg {
+	struct iphdr *iph;
+	u32 user;
+};
+
 static unsigned int ipqhashfn(__be16 id, __be32 saddr, __be32 daddr, u8 prot)
 {
 	return jhash_3words((__force u32)id << 16 | prot,
@@ -146,6 +151,20 @@ static __inline__ void frag_kfree_skb(struct sk_buff *skb, int *work)
 	kfree_skb(skb);
 }
 
+static void ip4_frag_init(struct inet_frag_queue *q, void *a)
+{
+	struct ipq *qp = container_of(q, struct ipq, q);
+	struct ip4_create_arg *arg = a;
+
+	qp->protocol = arg->iph->protocol;
+	qp->id = arg->iph->id;
+	qp->saddr = arg->iph->saddr;
+	qp->daddr = arg->iph->daddr;
+	qp->user = arg->user;
+	qp->peer = sysctl_ipfrag_max_dist ?
+		inet_getpeer(arg->iph->saddr, 1) : NULL;
+}
+
 static __inline__ void ip4_frag_free(struct inet_frag_queue *q)
 {
 	struct ipq *qp;
@@ -156,14 +175,6 @@ static __inline__ void ip4_frag_free(struct inet_frag_queue *q)
 	kfree(qp);
 }
 
-static __inline__ struct ipq *frag_alloc_queue(void)
-{
-	struct inet_frag_queue *q;
-
-	q = inet_frag_alloc(&ip4_frags);
-	return q ? container_of(q, struct ipq, q) : NULL;
-}
-
 
 /* Destruction primitives. */
 
@@ -226,30 +237,20 @@ out:
 
 /* Creation primitives. */
 
-static struct ipq *ip_frag_intern(struct ipq *qp_in, unsigned int hash)
-{
-	struct inet_frag_queue *q;
-
-	q = inet_frag_intern(&qp_in->q, &ip4_frags, hash);
-	return container_of(q, struct ipq, q);
-}
-
 /* Add an entry to the 'ipq' queue for a newly received IP datagram. */
 static struct ipq *ip_frag_create(struct iphdr *iph, u32 user, unsigned int h)
 {
-	struct ipq *qp;
+	struct inet_frag_queue *q;
+	struct ip4_create_arg arg;
 
-	if ((qp = frag_alloc_queue()) == NULL)
-		goto out_nomem;
+	arg.iph = iph;
+	arg.user = user;
 
-	qp->protocol = iph->protocol;
-	qp->id = iph->id;
-	qp->saddr = iph->saddr;
-	qp->daddr = iph->daddr;
-	qp->user = user;
-	qp->peer = sysctl_ipfrag_max_dist ? inet_getpeer(iph->saddr, 1) : NULL;
+	q = inet_frag_create(&ip4_frags, &arg, h);
+	if (q == NULL)
+		goto out_nomem;
 
-	return ip_frag_intern(qp, h);
+	return container_of(q, struct ipq, q);
 
 out_nomem:
 	LIMIT_NETDEBUG(KERN_ERR "ip_frag_create: no memory left !\n");
@@ -642,6 +643,7 @@ void __init ipfrag_init(void)
 {
 	ip4_frags.ctl = &ip4_frags_ctl;
 	ip4_frags.hashfn = ip4_hashfn;
+	ip4_frags.constructor = ip4_frag_init;
 	ip4_frags.destructor = ip4_frag_free;
 	ip4_frags.skb_free = NULL;
 	ip4_frags.qsize = sizeof(struct ipq);
diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
index 3f8c16b3301..127d1d84278 100644
--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
+++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
@@ -135,14 +135,6 @@ static void nf_frag_free(struct inet_frag_queue *q)
 	kfree(container_of(q, struct nf_ct_frag6_queue, q));
 }
 
-static inline struct nf_ct_frag6_queue *frag_alloc_queue(void)
-{
-	struct inet_frag_queue *q;
-
-	q = inet_frag_alloc(&nf_frags);
-	return q ? container_of(q, struct nf_ct_frag6_queue, q) : NULL;
-}
-
 /* Destruction primitives. */
 
 static __inline__ void fq_put(struct nf_ct_frag6_queue *fq)
@@ -184,33 +176,25 @@ out:
 
 /* Creation primitives. */
 
-static struct nf_ct_frag6_queue *nf_ct_frag6_intern(unsigned int hash,
-					  struct nf_ct_frag6_queue *fq_in)
+static struct nf_ct_frag6_queue *
+nf_ct_frag6_create(unsigned int hash, __be32 id, struct in6_addr *src,
+		struct in6_addr *dst)
 {
 	struct inet_frag_queue *q;
+	struct ip6_create_arg arg;
 
-	q = inet_frag_intern(&fq_in->q, &nf_frags, hash);
-	return container_of(q, struct nf_ct_frag6_queue, q);
-}
-
-
-static struct nf_ct_frag6_queue *
-nf_ct_frag6_create(unsigned int hash, __be32 id, struct in6_addr *src,				   struct in6_addr *dst)
-{
-	struct nf_ct_frag6_queue *fq;
+	arg.id = id;
+	arg.src = src;
+	arg.dst = dst;
 
-	if ((fq = frag_alloc_queue()) == NULL) {
-		pr_debug("Can't alloc new queue\n");
+	q = inet_frag_create(&nf_frags, &arg, hash);
+	if (q == NULL)
 		goto oom;
-	}
 
-	fq->id = id;
-	ipv6_addr_copy(&fq->saddr, src);
-	ipv6_addr_copy(&fq->daddr, dst);
-
-	return nf_ct_frag6_intern(hash, fq);
+	return container_of(q, struct nf_ct_frag6_queue, q);
 
 oom:
+	pr_debug("Can't alloc new queue\n");
 	return NULL;
 }
 
@@ -718,6 +702,7 @@ int nf_ct_frag6_init(void)
 {
 	nf_frags.ctl = &nf_frags_ctl;
 	nf_frags.hashfn = nf_hashfn;
+	nf_frags.constructor = ip6_frag_init;
 	nf_frags.destructor = nf_frag_free;
 	nf_frags.skb_free = nf_skb_free;
 	nf_frags.qsize = sizeof(struct nf_ct_frag6_queue);
diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
index 21913c78f05..ce8734028d9 100644
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -164,17 +164,20 @@ static inline void frag_kfree_skb(struct sk_buff *skb, int *work)
 	kfree_skb(skb);
 }
 
-static void ip6_frag_free(struct inet_frag_queue *fq)
+void ip6_frag_init(struct inet_frag_queue *q, void *a)
 {
-	kfree(container_of(fq, struct frag_queue, q));
+	struct frag_queue *fq = container_of(q, struct frag_queue, q);
+	struct ip6_create_arg *arg = a;
+
+	fq->id = arg->id;
+	ipv6_addr_copy(&fq->saddr, arg->src);
+	ipv6_addr_copy(&fq->daddr, arg->dst);
 }
+EXPORT_SYMBOL(ip6_frag_init);
 
-static inline struct frag_queue *frag_alloc_queue(void)
+static void ip6_frag_free(struct inet_frag_queue *fq)
 {
-	struct inet_frag_queue *q;
-
-	q = inet_frag_alloc(&ip6_frags);
-	return q ? container_of(q, struct frag_queue, q) : NULL;
+	kfree(container_of(fq, struct frag_queue, q));
 }
 
 /* Destruction primitives. */
@@ -244,31 +247,22 @@ out:
 
 /* Creation primitives. */
 
-
-static struct frag_queue *ip6_frag_intern(struct frag_queue *fq_in,
-		unsigned int hash)
-{
-	struct inet_frag_queue *q;
-
-	q = inet_frag_intern(&fq_in->q, &ip6_frags, hash);
-	return container_of(q, struct frag_queue, q);
-}
-
-
 static struct frag_queue *
 ip6_frag_create(__be32 id, struct in6_addr *src, struct in6_addr *dst,
 		struct inet6_dev *idev, unsigned int hash)
 {
-	struct frag_queue *fq;
+	struct inet_frag_queue *q;
+	struct ip6_create_arg arg;
 
-	if ((fq = frag_alloc_queue()) == NULL)
-		goto oom;
+	arg.id = id;
+	arg.src = src;
+	arg.dst = dst;
 
-	fq->id = id;
-	ipv6_addr_copy(&fq->saddr, src);
-	ipv6_addr_copy(&fq->daddr, dst);
+	q = inet_frag_create(&ip6_frags, &arg, hash);
+	if (q == NULL)
+		goto oom;
 
-	return ip6_frag_intern(fq, hash);
+	return container_of(q, struct frag_queue, q);
 
 oom:
 	IP6_INC_STATS_BH(idev, IPSTATS_MIB_REASMFAILS);
@@ -675,6 +669,7 @@ void __init ipv6_frag_init(void)
 
 	ip6_frags.ctl = &ip6_frags_ctl;
 	ip6_frags.hashfn = ip6_hashfn;
+	ip6_frags.constructor = ip6_frag_init;
 	ip6_frags.destructor = ip6_frag_free;
 	ip6_frags.skb_free = NULL;
 	ip6_frags.qsize = sizeof(struct frag_queue);
-- 
cgit v1.2.3


From abd6523d15f40bfee14652619a31a7f65f77f581 Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@openvz.org>
Date: Wed, 17 Oct 2007 19:47:21 -0700
Subject: [INET]: Consolidate xxx_find() in fragment management

Here we need another callback ->match to check whether the
entry found in hash matches the key passed. The key used
is the same as the creation argument for inet_frag_create.

Yet again, this ->match is the same for netfilter and ipv6.
Running a frew steps forward - this callback will later
replace the ->equal one.

Since the inet_frag_find() uses the already consolidated
inet_frag_create() remove the xxx_frag_create from protocol
codes.

Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/inet_fragment.c                | 25 +++++++++++++--
 net/ipv4/ip_fragment.c                  | 57 ++++++++++++---------------------
 net/ipv6/netfilter/nf_conntrack_reasm.c | 32 ++++--------------
 net/ipv6/reassembly.c                   | 50 +++++++++++------------------
 4 files changed, 68 insertions(+), 96 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c
index b531f803cda..6ba98ebbed9 100644
--- a/net/ipv4/inet_fragment.c
+++ b/net/ipv4/inet_fragment.c
@@ -226,8 +226,8 @@ static struct inet_frag_queue *inet_frag_alloc(struct inet_frags *f, void *arg)
 	return q;
 }
 
-struct inet_frag_queue *inet_frag_create(struct inet_frags *f, void *arg,
-		unsigned int hash)
+static struct inet_frag_queue *inet_frag_create(struct inet_frags *f,
+		void *arg, unsigned int hash)
 {
 	struct inet_frag_queue *q;
 
@@ -237,4 +237,23 @@ struct inet_frag_queue *inet_frag_create(struct inet_frags *f, void *arg,
 
 	return inet_frag_intern(q, f, hash);
 }
-EXPORT_SYMBOL(inet_frag_create);
+
+struct inet_frag_queue *inet_frag_find(struct inet_frags *f, void *key,
+		unsigned int hash)
+{
+	struct inet_frag_queue *q;
+	struct hlist_node *n;
+
+	read_lock(&f->lock);
+	hlist_for_each_entry(q, n, &f->hash[hash], list) {
+		if (f->match(q, key)) {
+			atomic_inc(&q->refcnt);
+			read_unlock(&f->lock);
+			return q;
+		}
+	}
+	read_unlock(&f->lock);
+
+	return inet_frag_create(f, key, hash);
+}
+EXPORT_SYMBOL(inet_frag_find);
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index 0d6cff1de5a..928259dbc0f 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -142,6 +142,19 @@ static int ip4_frag_equal(struct inet_frag_queue *q1,
 			qp1->user == qp2->user);
 }
 
+static int ip4_frag_match(struct inet_frag_queue *q, void *a)
+{
+	struct ipq *qp;
+	struct ip4_create_arg *arg = a;
+
+	qp = container_of(q, struct ipq, q);
+	return (qp->id == arg->iph->id &&
+			qp->saddr == arg->iph->saddr &&
+			qp->daddr == arg->iph->daddr &&
+			qp->protocol == arg->iph->protocol &&
+			qp->user == arg->user);
+}
+
 /* Memory Tracking Functions. */
 static __inline__ void frag_kfree_skb(struct sk_buff *skb, int *work)
 {
@@ -235,18 +248,20 @@ out:
 	ipq_put(qp);
 }
 
-/* Creation primitives. */
-
-/* Add an entry to the 'ipq' queue for a newly received IP datagram. */
-static struct ipq *ip_frag_create(struct iphdr *iph, u32 user, unsigned int h)
+/* Find the correct entry in the "incomplete datagrams" queue for
+ * this IP datagram, and create new one, if nothing is found.
+ */
+static inline struct ipq *ip_find(struct iphdr *iph, u32 user)
 {
 	struct inet_frag_queue *q;
 	struct ip4_create_arg arg;
+	unsigned int hash;
 
 	arg.iph = iph;
 	arg.user = user;
+	hash = ipqhashfn(iph->id, iph->saddr, iph->daddr, iph->protocol);
 
-	q = inet_frag_create(&ip4_frags, &arg, h);
+	q = inet_frag_find(&ip4_frags, &arg, hash);
 	if (q == NULL)
 		goto out_nomem;
 
@@ -257,37 +272,6 @@ out_nomem:
 	return NULL;
 }
 
-/* Find the correct entry in the "incomplete datagrams" queue for
- * this IP datagram, and create new one, if nothing is found.
- */
-static inline struct ipq *ip_find(struct iphdr *iph, u32 user)
-{
-	__be16 id = iph->id;
-	__be32 saddr = iph->saddr;
-	__be32 daddr = iph->daddr;
-	__u8 protocol = iph->protocol;
-	unsigned int hash;
-	struct ipq *qp;
-	struct hlist_node *n;
-
-	read_lock(&ip4_frags.lock);
-	hash = ipqhashfn(id, saddr, daddr, protocol);
-	hlist_for_each_entry(qp, n, &ip4_frags.hash[hash], q.list) {
-		if (qp->id == id		&&
-		    qp->saddr == saddr	&&
-		    qp->daddr == daddr	&&
-		    qp->protocol == protocol &&
-		    qp->user == user) {
-			atomic_inc(&qp->q.refcnt);
-			read_unlock(&ip4_frags.lock);
-			return qp;
-		}
-	}
-	read_unlock(&ip4_frags.lock);
-
-	return ip_frag_create(iph, user, hash);
-}
-
 /* Is the fragment too far ahead to be part of ipq? */
 static inline int ip_frag_too_far(struct ipq *qp)
 {
@@ -648,6 +632,7 @@ void __init ipfrag_init(void)
 	ip4_frags.skb_free = NULL;
 	ip4_frags.qsize = sizeof(struct ipq);
 	ip4_frags.equal = ip4_frag_equal;
+	ip4_frags.match = ip4_frag_match;
 	ip4_frags.frag_expire = ip_expire;
 	inet_frags_init(&ip4_frags);
 }
diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
index 127d1d84278..bff63d79c64 100644
--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
+++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
@@ -176,18 +176,19 @@ out:
 
 /* Creation primitives. */
 
-static struct nf_ct_frag6_queue *
-nf_ct_frag6_create(unsigned int hash, __be32 id, struct in6_addr *src,
-		struct in6_addr *dst)
+static __inline__ struct nf_ct_frag6_queue *
+fq_find(__be32 id, struct in6_addr *src, struct in6_addr *dst)
 {
 	struct inet_frag_queue *q;
 	struct ip6_create_arg arg;
+	unsigned int hash;
 
 	arg.id = id;
 	arg.src = src;
 	arg.dst = dst;
+	hash = ip6qhashfn(id, src, dst);
 
-	q = inet_frag_create(&nf_frags, &arg, hash);
+	q = inet_frag_find(&nf_frags, &arg, hash);
 	if (q == NULL)
 		goto oom;
 
@@ -198,28 +199,6 @@ oom:
 	return NULL;
 }
 
-static __inline__ struct nf_ct_frag6_queue *
-fq_find(__be32 id, struct in6_addr *src, struct in6_addr *dst)
-{
-	struct nf_ct_frag6_queue *fq;
-	struct hlist_node *n;
-	unsigned int hash = ip6qhashfn(id, src, dst);
-
-	read_lock(&nf_frags.lock);
-	hlist_for_each_entry(fq, n, &nf_frags.hash[hash], q.list) {
-		if (fq->id == id &&
-		    ipv6_addr_equal(src, &fq->saddr) &&
-		    ipv6_addr_equal(dst, &fq->daddr)) {
-			atomic_inc(&fq->q.refcnt);
-			read_unlock(&nf_frags.lock);
-			return fq;
-		}
-	}
-	read_unlock(&nf_frags.lock);
-
-	return nf_ct_frag6_create(hash, id, src, dst);
-}
-
 
 static int nf_ct_frag6_queue(struct nf_ct_frag6_queue *fq, struct sk_buff *skb,
 			     struct frag_hdr *fhdr, int nhoff)
@@ -706,6 +685,7 @@ int nf_ct_frag6_init(void)
 	nf_frags.destructor = nf_frag_free;
 	nf_frags.skb_free = nf_skb_free;
 	nf_frags.qsize = sizeof(struct nf_ct_frag6_queue);
+	nf_frags.match = ip6_frag_match;
 	nf_frags.equal = ip6_frag_equal;
 	nf_frags.frag_expire = nf_ct_frag6_expire;
 	inet_frags_init(&nf_frags);
diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
index ce8734028d9..11fffe791fc 100644
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -155,6 +155,18 @@ int ip6_frag_equal(struct inet_frag_queue *q1, struct inet_frag_queue *q2)
 }
 EXPORT_SYMBOL(ip6_frag_equal);
 
+int ip6_frag_match(struct inet_frag_queue *q, void *a)
+{
+	struct frag_queue *fq;
+	struct ip6_create_arg *arg = a;
+
+	fq = container_of(q, struct frag_queue, q);
+	return (fq->id == arg->id &&
+			ipv6_addr_equal(&fq->saddr, arg->src) &&
+			ipv6_addr_equal(&fq->daddr, arg->dst));
+}
+EXPORT_SYMBOL(ip6_frag_match);
+
 /* Memory Tracking Functions. */
 static inline void frag_kfree_skb(struct sk_buff *skb, int *work)
 {
@@ -245,20 +257,20 @@ out:
 	fq_put(fq);
 }
 
-/* Creation primitives. */
-
-static struct frag_queue *
-ip6_frag_create(__be32 id, struct in6_addr *src, struct in6_addr *dst,
-		struct inet6_dev *idev, unsigned int hash)
+static __inline__ struct frag_queue *
+fq_find(__be32 id, struct in6_addr *src, struct in6_addr *dst,
+	struct inet6_dev *idev)
 {
 	struct inet_frag_queue *q;
 	struct ip6_create_arg arg;
+	unsigned int hash;
 
 	arg.id = id;
 	arg.src = src;
 	arg.dst = dst;
+	hash = ip6qhashfn(id, src, dst);
 
-	q = inet_frag_create(&ip6_frags, &arg, hash);
+	q = inet_frag_find(&ip6_frags, &arg, hash);
 	if (q == NULL)
 		goto oom;
 
@@ -269,31 +281,6 @@ oom:
 	return NULL;
 }
 
-static __inline__ struct frag_queue *
-fq_find(__be32 id, struct in6_addr *src, struct in6_addr *dst,
-	struct inet6_dev *idev)
-{
-	struct frag_queue *fq;
-	struct hlist_node *n;
-	unsigned int hash;
-
-	read_lock(&ip6_frags.lock);
-	hash = ip6qhashfn(id, src, dst);
-	hlist_for_each_entry(fq, n, &ip6_frags.hash[hash], q.list) {
-		if (fq->id == id &&
-		    ipv6_addr_equal(src, &fq->saddr) &&
-		    ipv6_addr_equal(dst, &fq->daddr)) {
-			atomic_inc(&fq->q.refcnt);
-			read_unlock(&ip6_frags.lock);
-			return fq;
-		}
-	}
-	read_unlock(&ip6_frags.lock);
-
-	return ip6_frag_create(id, src, dst, idev, hash);
-}
-
-
 static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb,
 			   struct frag_hdr *fhdr, int nhoff)
 {
@@ -673,6 +660,7 @@ void __init ipv6_frag_init(void)
 	ip6_frags.destructor = ip6_frag_free;
 	ip6_frags.skb_free = NULL;
 	ip6_frags.qsize = sizeof(struct frag_queue);
+	ip6_frags.match = ip6_frag_match;
 	ip6_frags.equal = ip6_frag_equal;
 	ip6_frags.frag_expire = ip6_frag_expire;
 	inet_frags_init(&ip6_frags);
-- 
cgit v1.2.3


From 48d60056387c37a17a46feda48613587a90535e5 Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@openvz.org>
Date: Wed, 17 Oct 2007 19:47:56 -0700
Subject: [INET]: Remove no longer needed ->equal callback

Since this callback is used to check for conflicts in
hashtable when inserting a newly created frag queue, we can
do the same by checking for matching the queue with the
argument, used to create one.

Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/inet_fragment.c                |  6 +++---
 net/ipv4/ip_fragment.c                  | 15 ---------------
 net/ipv6/netfilter/nf_conntrack_reasm.c |  1 -
 net/ipv6/reassembly.c                   | 13 -------------
 4 files changed, 3 insertions(+), 32 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c
index 6ba98ebbed9..3ed09dd9344 100644
--- a/net/ipv4/inet_fragment.c
+++ b/net/ipv4/inet_fragment.c
@@ -174,7 +174,7 @@ int inet_frag_evictor(struct inet_frags *f)
 EXPORT_SYMBOL(inet_frag_evictor);
 
 static struct inet_frag_queue *inet_frag_intern(struct inet_frag_queue *qp_in,
-		struct inet_frags *f, unsigned int hash)
+		struct inet_frags *f, unsigned int hash, void *arg)
 {
 	struct inet_frag_queue *qp;
 #ifdef CONFIG_SMP
@@ -188,7 +188,7 @@ static struct inet_frag_queue *inet_frag_intern(struct inet_frag_queue *qp_in,
 	 * promoted read lock to write lock.
 	 */
 	hlist_for_each_entry(qp, n, &f->hash[hash], list) {
-		if (f->equal(qp, qp_in)) {
+		if (f->match(qp, arg)) {
 			atomic_inc(&qp->refcnt);
 			write_unlock(&f->lock);
 			qp_in->last_in |= COMPLETE;
@@ -235,7 +235,7 @@ static struct inet_frag_queue *inet_frag_create(struct inet_frags *f,
 	if (q == NULL)
 		return NULL;
 
-	return inet_frag_intern(q, f, hash);
+	return inet_frag_intern(q, f, hash, arg);
 }
 
 struct inet_frag_queue *inet_frag_find(struct inet_frags *f, void *key,
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index 928259dbc0f..314593b2050 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -128,20 +128,6 @@ static unsigned int ip4_hashfn(struct inet_frag_queue *q)
 	return ipqhashfn(ipq->id, ipq->saddr, ipq->daddr, ipq->protocol);
 }
 
-static int ip4_frag_equal(struct inet_frag_queue *q1,
-		struct inet_frag_queue *q2)
-{
-	struct ipq *qp1, *qp2;
-
-	qp1 = container_of(q1, struct ipq, q);
-	qp2 = container_of(q2, struct ipq, q);
-	return (qp1->id == qp2->id &&
-			qp1->saddr == qp2->saddr &&
-			qp1->daddr == qp2->daddr &&
-			qp1->protocol == qp2->protocol &&
-			qp1->user == qp2->user);
-}
-
 static int ip4_frag_match(struct inet_frag_queue *q, void *a)
 {
 	struct ipq *qp;
@@ -631,7 +617,6 @@ void __init ipfrag_init(void)
 	ip4_frags.destructor = ip4_frag_free;
 	ip4_frags.skb_free = NULL;
 	ip4_frags.qsize = sizeof(struct ipq);
-	ip4_frags.equal = ip4_frag_equal;
 	ip4_frags.match = ip4_frag_match;
 	ip4_frags.frag_expire = ip_expire;
 	inet_frags_init(&ip4_frags);
diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
index bff63d79c64..25746d31504 100644
--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
+++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
@@ -686,7 +686,6 @@ int nf_ct_frag6_init(void)
 	nf_frags.skb_free = nf_skb_free;
 	nf_frags.qsize = sizeof(struct nf_ct_frag6_queue);
 	nf_frags.match = ip6_frag_match;
-	nf_frags.equal = ip6_frag_equal;
 	nf_frags.frag_expire = nf_ct_frag6_expire;
 	inet_frags_init(&nf_frags);
 
diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
index 11fffe791fc..01766bc75b6 100644
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -143,18 +143,6 @@ static unsigned int ip6_hashfn(struct inet_frag_queue *q)
 	return ip6qhashfn(fq->id, &fq->saddr, &fq->daddr);
 }
 
-int ip6_frag_equal(struct inet_frag_queue *q1, struct inet_frag_queue *q2)
-{
-	struct frag_queue *fq1, *fq2;
-
-	fq1 = container_of(q1, struct frag_queue, q);
-	fq2 = container_of(q2, struct frag_queue, q);
-	return (fq1->id == fq2->id &&
-			ipv6_addr_equal(&fq2->saddr, &fq1->saddr) &&
-			ipv6_addr_equal(&fq2->daddr, &fq1->daddr));
-}
-EXPORT_SYMBOL(ip6_frag_equal);
-
 int ip6_frag_match(struct inet_frag_queue *q, void *a)
 {
 	struct frag_queue *fq;
@@ -661,7 +649,6 @@ void __init ipv6_frag_init(void)
 	ip6_frags.skb_free = NULL;
 	ip6_frags.qsize = sizeof(struct frag_queue);
 	ip6_frags.match = ip6_frag_match;
-	ip6_frags.equal = ip6_frag_equal;
 	ip6_frags.frag_expire = ip6_frag_expire;
 	inet_frags_init(&ip6_frags);
 }
-- 
cgit v1.2.3


From c95477090a2ace6d241c184adc3fbfcab9c61ceb Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@openvz.org>
Date: Wed, 17 Oct 2007 19:48:26 -0700
Subject: [INET]: Consolidate frag queues freeing

Since we now allocate the queues in inet_fragment.c, we
can safely free it in the same place. The ->destructor
callback thus becomes optional for inet_frags.

Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/inet_fragment.c                | 4 +++-
 net/ipv4/ip_fragment.c                  | 1 -
 net/ipv6/netfilter/nf_conntrack_reasm.c | 7 +------
 net/ipv6/reassembly.c                   | 7 +------
 4 files changed, 5 insertions(+), 14 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c
index 3ed09dd9344..e15e04fc666 100644
--- a/net/ipv4/inet_fragment.c
+++ b/net/ipv4/inet_fragment.c
@@ -136,7 +136,9 @@ void inet_frag_destroy(struct inet_frag_queue *q, struct inet_frags *f,
 		*work -= f->qsize;
 	atomic_sub(f->qsize, &f->mem);
 
-	f->destructor(q);
+	if (f->destructor)
+		f->destructor(q);
+	kfree(q);
 
 }
 EXPORT_SYMBOL(inet_frag_destroy);
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index 314593b2050..453ae041edd 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -171,7 +171,6 @@ static __inline__ void ip4_frag_free(struct inet_frag_queue *q)
 	qp = container_of(q, struct ipq, q);
 	if (qp->peer)
 		inet_putpeer(qp->peer);
-	kfree(qp);
 }
 
 
diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
index 25746d31504..e170c67c47a 100644
--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
+++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
@@ -130,11 +130,6 @@ static inline void frag_kfree_skb(struct sk_buff *skb, unsigned int *work)
 	kfree_skb(skb);
 }
 
-static void nf_frag_free(struct inet_frag_queue *q)
-{
-	kfree(container_of(q, struct nf_ct_frag6_queue, q));
-}
-
 /* Destruction primitives. */
 
 static __inline__ void fq_put(struct nf_ct_frag6_queue *fq)
@@ -682,7 +677,7 @@ int nf_ct_frag6_init(void)
 	nf_frags.ctl = &nf_frags_ctl;
 	nf_frags.hashfn = nf_hashfn;
 	nf_frags.constructor = ip6_frag_init;
-	nf_frags.destructor = nf_frag_free;
+	nf_frags.destructor = NULL;
 	nf_frags.skb_free = nf_skb_free;
 	nf_frags.qsize = sizeof(struct nf_ct_frag6_queue);
 	nf_frags.match = ip6_frag_match;
diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
index 01766bc75b6..76c88a93b9b 100644
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -175,11 +175,6 @@ void ip6_frag_init(struct inet_frag_queue *q, void *a)
 }
 EXPORT_SYMBOL(ip6_frag_init);
 
-static void ip6_frag_free(struct inet_frag_queue *fq)
-{
-	kfree(container_of(fq, struct frag_queue, q));
-}
-
 /* Destruction primitives. */
 
 static __inline__ void fq_put(struct frag_queue *fq)
@@ -645,7 +640,7 @@ void __init ipv6_frag_init(void)
 	ip6_frags.ctl = &ip6_frags_ctl;
 	ip6_frags.hashfn = ip6_hashfn;
 	ip6_frags.constructor = ip6_frag_init;
-	ip6_frags.destructor = ip6_frag_free;
+	ip6_frags.destructor = NULL;
 	ip6_frags.skb_free = NULL;
 	ip6_frags.qsize = sizeof(struct frag_queue);
 	ip6_frags.match = ip6_frag_match;
-- 
cgit v1.2.3


From d114f399b4da6fa7f9da3bbf1fb841370c11e788 Mon Sep 17 00:00:00 2001
From: "John W. Linville" <linville@tuxdriver.com>
Date: Wed, 17 Oct 2007 21:16:16 -0700
Subject: [MAC80211]: only honor IW_SCAN_THIS_ESSID in STA, IBSS, and AP modes

The previous IW_SCAN_THIS_ESSID patch left a hole allowing scan
requests on interfaces in inappropriate modes.

Signed-off-by: John W. Linville <linville@tuxdriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/mac80211/ieee80211_ioctl.c | 37 +++++++++++++++++++------------------
 1 file changed, 19 insertions(+), 18 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/ieee80211_ioctl.c b/net/mac80211/ieee80211_ioctl.c
index a57fed77db2..6caa3ec2cff 100644
--- a/net/mac80211/ieee80211_ioctl.c
+++ b/net/mac80211/ieee80211_ioctl.c
@@ -522,29 +522,30 @@ static int ieee80211_ioctl_siwscan(struct net_device *dev,
 	if (!netif_running(dev))
 		return -ENETDOWN;
 
+	switch (sdata->type) {
+	case IEEE80211_IF_TYPE_STA:
+	case IEEE80211_IF_TYPE_IBSS:
+		if (local->scan_flags & IEEE80211_SCAN_MATCH_SSID) {
+			ssid = sdata->u.sta.ssid;
+			ssid_len = sdata->u.sta.ssid_len;
+		}
+		break;
+	case IEEE80211_IF_TYPE_AP:
+		if (local->scan_flags & IEEE80211_SCAN_MATCH_SSID) {
+			ssid = sdata->u.ap.ssid;
+			ssid_len = sdata->u.ap.ssid_len;
+		}
+		break;
+	default:
+		return -EOPNOTSUPP;
+	}
+
+	/* if SSID was specified explicitly then use that */
 	if (wrqu->data.length == sizeof(struct iw_scan_req) &&
 	    wrqu->data.flags & IW_SCAN_THIS_ESSID) {
 		req = (struct iw_scan_req *)extra;
 		ssid = req->essid;
 		ssid_len = req->essid_len;
-	} else {
-		switch (sdata->type) {
-		case IEEE80211_IF_TYPE_STA:
-		case IEEE80211_IF_TYPE_IBSS:
-			if (local->scan_flags & IEEE80211_SCAN_MATCH_SSID) {
-				ssid = sdata->u.sta.ssid;
-				ssid_len = sdata->u.sta.ssid_len;
-			}
-			break;
-		case IEEE80211_IF_TYPE_AP:
-			if (local->scan_flags & IEEE80211_SCAN_MATCH_SSID) {
-				ssid = sdata->u.ap.ssid;
-				ssid_len = sdata->u.ap.ssid_len;
-			}
-			break;
-		default:
-			return -EOPNOTSUPP;
-		}
 	}
 
 	return ieee80211_sta_req_scan(dev, ssid, ssid_len);
-- 
cgit v1.2.3


From 55b333253d5bcafbe187b50474e40789301c53c6 Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@openvz.org>
Date: Wed, 17 Oct 2007 21:21:26 -0700
Subject: [NET]: Introduce the sk_detach_filter() call

Filter is attached in a separate function, so do the
same for filter detaching.

This also removes one variable sock_setsockopt().

Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/filter.c | 16 ++++++++++++++++
 net/core/sock.c   | 12 +-----------
 2 files changed, 17 insertions(+), 11 deletions(-)

(limited to 'net')

diff --git a/net/core/filter.c b/net/core/filter.c
index bd903aaf7aa..fd607581ab5 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -433,5 +433,21 @@ int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk)
 	return err;
 }
 
+int sk_detach_filter(struct sock *sk)
+{
+	int ret = -ENOENT;
+	struct sk_filter *filter;
+
+	rcu_read_lock_bh();
+	filter = rcu_dereference(sk->sk_filter);
+	if (filter) {
+		rcu_assign_pointer(sk->sk_filter, NULL);
+		sk_filter_release(sk, filter);
+		ret = 0;
+	}
+	rcu_read_unlock_bh();
+	return ret;
+}
+
 EXPORT_SYMBOL(sk_chk_filter);
 EXPORT_SYMBOL(sk_run_filter);
diff --git a/net/core/sock.c b/net/core/sock.c
index d45ecdccc6a..07101381b8b 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -428,7 +428,6 @@ int sock_setsockopt(struct socket *sock, int level, int optname,
 		    char __user *optval, int optlen)
 {
 	struct sock *sk=sock->sk;
-	struct sk_filter *filter;
 	int val;
 	int valbool;
 	struct linger ling;
@@ -652,16 +651,7 @@ set_rcvbuf:
 		break;
 
 	case SO_DETACH_FILTER:
-		rcu_read_lock_bh();
-		filter = rcu_dereference(sk->sk_filter);
-		if (filter) {
-			rcu_assign_pointer(sk->sk_filter, NULL);
-			sk_filter_release(sk, filter);
-			rcu_read_unlock_bh();
-			break;
-		}
-		rcu_read_unlock_bh();
-		ret = -ENONET;
+		ret = sk_detach_filter(sk);
 		break;
 
 	case SO_PASSSEC:
-- 
cgit v1.2.3


From 309dd5fc872448e35634d510049642312ebc170d Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@openvz.org>
Date: Wed, 17 Oct 2007 21:21:51 -0700
Subject: [NET]: Move the filter releasing into a separate call

This is done merely as a preparation for the fix.

The sk_filter_uncharge() unaccounts the filter memory and calls
the sk_filter_release(), which in turn decrements the refcount
anf frees the filter.

The latter function will be required separately.

Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/filter.c | 4 ++--
 net/core/sock.c   | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/core/filter.c b/net/core/filter.c
index fd607581ab5..2be1830d3c3 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -429,7 +429,7 @@ int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk)
 	}
 
 	if (fp)
-		sk_filter_release(sk, fp);
+		sk_filter_uncharge(sk, fp);
 	return err;
 }
 
@@ -442,7 +442,7 @@ int sk_detach_filter(struct sock *sk)
 	filter = rcu_dereference(sk->sk_filter);
 	if (filter) {
 		rcu_assign_pointer(sk->sk_filter, NULL);
-		sk_filter_release(sk, filter);
+		sk_filter_uncharge(sk, filter);
 		ret = 0;
 	}
 	rcu_read_unlock_bh();
diff --git a/net/core/sock.c b/net/core/sock.c
index 07101381b8b..d292b4113d6 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -915,7 +915,7 @@ void sk_free(struct sock *sk)
 
 	filter = rcu_dereference(sk->sk_filter);
 	if (filter) {
-		sk_filter_release(sk, filter);
+		sk_filter_uncharge(sk, filter);
 		rcu_assign_pointer(sk->sk_filter, NULL);
 	}
 
-- 
cgit v1.2.3


From d3904b739928edd83d117f1eb5bfa69f18d6f046 Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@openvz.org>
Date: Wed, 17 Oct 2007 21:22:17 -0700
Subject: [NET]: Cleanup the error path in sk_attach_filter

The sk_filter_uncharge is called for error handling and
for releasing the former filter, but this will have to
be done in a bit different manner, so cleanup the error
path a bit.

Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/filter.c | 23 +++++++++++------------
 1 file changed, 11 insertions(+), 12 deletions(-)

(limited to 'net')

diff --git a/net/core/filter.c b/net/core/filter.c
index 2be1830d3c3..54dddc92452 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -398,7 +398,7 @@ int sk_chk_filter(struct sock_filter *filter, int flen)
  */
 int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk)
 {
-	struct sk_filter *fp;
+	struct sk_filter *fp, *old_fp;
 	unsigned int fsize = sizeof(struct sock_filter) * fprog->len;
 	int err;
 
@@ -418,19 +418,18 @@ int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk)
 	fp->len = fprog->len;
 
 	err = sk_chk_filter(fp->insns, fp->len);
-	if (!err) {
-		struct sk_filter *old_fp;
-
-		rcu_read_lock_bh();
-		old_fp = rcu_dereference(sk->sk_filter);
-		rcu_assign_pointer(sk->sk_filter, fp);
-		rcu_read_unlock_bh();
-		fp = old_fp;
+	if (err) {
+		sk_filter_uncharge(sk, fp);
+		return err;
 	}
 
-	if (fp)
-		sk_filter_uncharge(sk, fp);
-	return err;
+	rcu_read_lock_bh();
+	old_fp = rcu_dereference(sk->sk_filter);
+	rcu_assign_pointer(sk->sk_filter, fp);
+	rcu_read_unlock_bh();
+
+	sk_filter_uncharge(sk, old_fp);
+	return 0;
 }
 
 int sk_detach_filter(struct sock *sk)
-- 
cgit v1.2.3


From 47e958eac280c263397582d5581e868c3227a1bd Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@openvz.org>
Date: Wed, 17 Oct 2007 21:22:42 -0700
Subject: [NET]: Fix the race between sk_filter_(de|at)tach and sk_clone()

The proposed fix is to delay the reference counter decrement
until the quiescent state pass. This will give sk_clone() a
chance to get the reference on the cloned filter.

Regular sk_filter_uncharge can happen from the sk_free() only
and there's no need in delaying the put - the socket is dead
anyway and is to be release itself.

Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/filter.c | 23 +++++++++++++++++++++--
 1 file changed, 21 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/core/filter.c b/net/core/filter.c
index 54dddc92452..1f0068eae50 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -386,6 +386,25 @@ int sk_chk_filter(struct sock_filter *filter, int flen)
 	return (BPF_CLASS(filter[flen - 1].code) == BPF_RET) ? 0 : -EINVAL;
 }
 
+/**
+ * 	sk_filter_rcu_release: Release a socket filter by rcu_head
+ *	@rcu: rcu_head that contains the sk_filter to free
+ */
+static void sk_filter_rcu_release(struct rcu_head *rcu)
+{
+	struct sk_filter *fp = container_of(rcu, struct sk_filter, rcu);
+
+	sk_filter_release(fp);
+}
+
+static void sk_filter_delayed_uncharge(struct sock *sk, struct sk_filter *fp)
+{
+	unsigned int size = sk_filter_len(fp);
+
+	atomic_sub(size, &sk->sk_omem_alloc);
+	call_rcu_bh(&fp->rcu, sk_filter_rcu_release);
+}
+
 /**
  *	sk_attach_filter - attach a socket filter
  *	@fprog: the filter program
@@ -428,7 +447,7 @@ int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk)
 	rcu_assign_pointer(sk->sk_filter, fp);
 	rcu_read_unlock_bh();
 
-	sk_filter_uncharge(sk, old_fp);
+	sk_filter_delayed_uncharge(sk, old_fp);
 	return 0;
 }
 
@@ -441,7 +460,7 @@ int sk_detach_filter(struct sock *sk)
 	filter = rcu_dereference(sk->sk_filter);
 	if (filter) {
 		rcu_assign_pointer(sk->sk_filter, NULL);
-		sk_filter_uncharge(sk, filter);
+		sk_filter_delayed_uncharge(sk, filter);
 		ret = 0;
 	}
 	rcu_read_unlock_bh();
-- 
cgit v1.2.3


From 16910b9829797cda4032fbc84e5292ac7b4474f7 Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@openvz.org>
Date: Wed, 17 Oct 2007 21:23:43 -0700
Subject: [IPV6]: Fix return type for snmp6_free_dev()

This call is essentially void.

Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/addrconf.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 52d10d21321..edf06ca3474 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -283,12 +283,11 @@ err_ip:
 	return err;
 }
 
-static int snmp6_free_dev(struct inet6_dev *idev)
+static void snmp6_free_dev(struct inet6_dev *idev)
 {
 	snmp_mib_free((void **)idev->stats.icmpv6msg);
 	snmp_mib_free((void **)idev->stats.icmpv6);
 	snmp_mib_free((void **)idev->stats.ipv6);
-	return 0;
 }
 
 /* Nobody refers to this device, we may destroy it. */
-- 
cgit v1.2.3


From aaf70ec7fde2321281b2a49c7c9f881c90d0d208 Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@openvz.org>
Date: Wed, 17 Oct 2007 21:25:32 -0700
Subject: [IPV6]: Cleanup snmp6_alloc_dev()

This functions is never called with NULL or not setup argument,
so the checks inside are redundant.

Also, the return value is always -ENOMEM, so no need in
additional variable for this.

Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/addrconf.c | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index edf06ca3474..348bd8d0611 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -255,11 +255,6 @@ static void addrconf_mod_timer(struct inet6_ifaddr *ifp,
 
 static int snmp6_alloc_dev(struct inet6_dev *idev)
 {
-	int err = -ENOMEM;
-
-	if (!idev || !idev->dev)
-		return -EINVAL;
-
 	if (snmp_mib_init((void **)idev->stats.ipv6,
 			  sizeof(struct ipstats_mib),
 			  __alignof__(struct ipstats_mib)) < 0)
@@ -280,7 +275,7 @@ err_icmpmsg:
 err_icmp:
 	snmp_mib_free((void **)idev->stats.ipv6);
 err_ip:
-	return err;
+	return -ENOMEM;
 }
 
 static void snmp6_free_dev(struct inet6_dev *idev)
-- 
cgit v1.2.3


From 04663d0b8b3c8ce3804106279420cfe5bdfcce3c Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Wed, 17 Oct 2007 21:28:06 -0700
Subject: [IPSEC]: Fix pure tunnel modes involving IPv6

I noticed that my recent patch broke 6-on-4 pure IPsec tunnels (the ones
that are only used for incompressible IPsec packets).  Subsequent reviews
show that I broke 6-on-6 pure tunnels more than three years ago and nobody
ever noticed. I suppose every must be testing 6-on-6 IPComp with large
pings which are very compressible :)

This patch fixes both cases.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/xfrm4_tunnel.c | 2 +-
 net/ipv6/xfrm6_tunnel.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/xfrm4_tunnel.c b/net/ipv4/xfrm4_tunnel.c
index 1312417608e..83e9580feac 100644
--- a/net/ipv4/xfrm4_tunnel.c
+++ b/net/ipv4/xfrm4_tunnel.c
@@ -18,7 +18,7 @@ static int ipip_output(struct xfrm_state *x, struct sk_buff *skb)
 
 static int ipip_xfrm_rcv(struct xfrm_state *x, struct sk_buff *skb)
 {
-	return IPPROTO_IP;
+	return ip_hdr(skb)->protocol;
 }
 
 static int ipip_init_state(struct xfrm_state *x)
diff --git a/net/ipv6/xfrm6_tunnel.c b/net/ipv6/xfrm6_tunnel.c
index 3f8a3abde67..6c67ac197ee 100644
--- a/net/ipv6/xfrm6_tunnel.c
+++ b/net/ipv6/xfrm6_tunnel.c
@@ -248,7 +248,7 @@ static int xfrm6_tunnel_output(struct xfrm_state *x, struct sk_buff *skb)
 
 static int xfrm6_tunnel_input(struct xfrm_state *x, struct sk_buff *skb)
 {
-	return 0;
+	return skb_network_header(skb)[IP6CB(skb)->nhoff];
 }
 
 static int xfrm6_tunnel_rcv(struct sk_buff *skb)
-- 
cgit v1.2.3


From c4541b41c0e4b75b11125fed16db642fc03cb31c Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Wed, 17 Oct 2007 21:28:53 -0700
Subject: [IPSEC]: Move tunnel parsing for IPv4 out of xfrm4_input

This patch moves the tunnel parsing for IPv4 out of xfrm4_input and into
xfrm4_tunnel.  This change is in line with what IPv6 does and will allow
us to merge the two input functions.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/xfrm4_input.c  | 36 +++++++++++-------------------------
 net/ipv4/xfrm4_tunnel.c |  9 +++++++--
 2 files changed, 18 insertions(+), 27 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/xfrm4_input.c b/net/ipv4/xfrm4_input.c
index e9bbfde19ac..5cb0b5995bc 100644
--- a/net/ipv4/xfrm4_input.c
+++ b/net/ipv4/xfrm4_input.c
@@ -16,19 +16,6 @@
 #include <net/ip.h>
 #include <net/xfrm.h>
 
-static int xfrm4_parse_spi(struct sk_buff *skb, u8 nexthdr, __be32 *spi, __be32 *seq)
-{
-	switch (nexthdr) {
-	case IPPROTO_IPIP:
-	case IPPROTO_IPV6:
-		*spi = ip_hdr(skb)->saddr;
-		*seq = 0;
-		return 0;
-	}
-
-	return xfrm_parse_spi(skb, nexthdr, spi, seq);
-}
-
 #ifdef CONFIG_NETFILTER
 static inline int xfrm4_rcv_encap_finish(struct sk_buff *skb)
 {
@@ -46,28 +33,29 @@ drop:
 }
 #endif
 
-static int xfrm4_rcv_encap(struct sk_buff *skb, __u16 encap_type)
+int xfrm4_rcv_encap(struct sk_buff *skb, int nexthdr, __be32 spi,
+		    int encap_type)
 {
-	__be32 spi, seq;
+	int err;
+	__be32 seq;
 	struct xfrm_state *xfrm_vec[XFRM_MAX_DEPTH];
 	struct xfrm_state *x;
 	int xfrm_nr = 0;
 	int decaps = 0;
-	int err = xfrm4_parse_spi(skb, ip_hdr(skb)->protocol, &spi, &seq);
 	unsigned int nhoff = offsetof(struct iphdr, protocol);
 
-	if (err != 0)
+	seq = 0;
+	if (!spi && (err = xfrm_parse_spi(skb, nexthdr, &spi, &seq)) != 0)
 		goto drop;
 
 	do {
 		const struct iphdr *iph = ip_hdr(skb);
-		int nexthdr;
 
 		if (xfrm_nr == XFRM_MAX_DEPTH)
 			goto drop;
 
 		x = xfrm_state_lookup((xfrm_address_t *)&iph->daddr, spi,
-				iph->protocol != IPPROTO_IPV6 ? iph->protocol : IPPROTO_IPIP, AF_INET);
+				      nexthdr, AF_INET);
 		if (x == NULL)
 			goto drop;
 
@@ -111,7 +99,7 @@ static int xfrm4_rcv_encap(struct sk_buff *skb, __u16 encap_type)
 			break;
 		}
 
-		err = xfrm_parse_spi(skb, ip_hdr(skb)->protocol, &spi, &seq);
+		err = xfrm_parse_spi(skb, nexthdr, &spi, &seq);
 		if (err < 0)
 			goto drop;
 	} while (!err);
@@ -165,6 +153,7 @@ drop:
 	kfree_skb(skb);
 	return 0;
 }
+EXPORT_SYMBOL(xfrm4_rcv_encap);
 
 /* If it's a keepalive packet, then just eat it.
  * If it's an encapsulated packet, then pass it to the
@@ -252,11 +241,8 @@ int xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb)
 	__skb_pull(skb, len);
 	skb_reset_transport_header(skb);
 
-	/* modify the protocol (it's ESP!) */
-	iph->protocol = IPPROTO_ESP;
-
 	/* process ESP */
-	ret = xfrm4_rcv_encap(skb, encap_type);
+	ret = xfrm4_rcv_encap(skb, IPPROTO_ESP, 0, encap_type);
 	return ret;
 
 drop:
@@ -266,7 +252,7 @@ drop:
 
 int xfrm4_rcv(struct sk_buff *skb)
 {
-	return xfrm4_rcv_encap(skb, 0);
+	return xfrm4_rcv_spi(skb, ip_hdr(skb)->protocol, 0);
 }
 
 EXPORT_SYMBOL(xfrm4_rcv);
diff --git a/net/ipv4/xfrm4_tunnel.c b/net/ipv4/xfrm4_tunnel.c
index 83e9580feac..32684519562 100644
--- a/net/ipv4/xfrm4_tunnel.c
+++ b/net/ipv4/xfrm4_tunnel.c
@@ -48,20 +48,25 @@ static struct xfrm_type ipip_type = {
 	.output		= ipip_output
 };
 
+static int xfrm_tunnel_rcv(struct sk_buff *skb)
+{
+	return xfrm4_rcv_spi(skb, IPPROTO_IP, ip_hdr(skb)->saddr);
+}
+
 static int xfrm_tunnel_err(struct sk_buff *skb, u32 info)
 {
 	return -ENOENT;
 }
 
 static struct xfrm_tunnel xfrm_tunnel_handler = {
-	.handler	=	xfrm4_rcv,
+	.handler	=	xfrm_tunnel_rcv,
 	.err_handler	=	xfrm_tunnel_err,
 	.priority	=	2,
 };
 
 #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
 static struct xfrm_tunnel xfrm64_tunnel_handler = {
-	.handler	=	xfrm4_rcv,
+	.handler	=	xfrm_tunnel_rcv,
 	.err_handler	=	xfrm_tunnel_err,
 	.priority	=	2,
 };
-- 
cgit v1.2.3


From 33b5ecb8f64706d1ed472dcb44162ab3a7345724 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Wed, 17 Oct 2007 21:29:25 -0700
Subject: [IPSEC]: Get nexthdr from caller in xfrm6_rcv_spi

Currently xfrm6_rcv_spi gets the nexthdr value itself from the packet.
This means that we need to fix up the value in case we have a 4-on-6
tunnel.  Moving this logic into the caller simplifies things and allows
us to merge the code with IPv4.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/xfrm6_input.c  | 9 ++++-----
 net/ipv6/xfrm6_tunnel.c | 2 +-
 2 files changed, 5 insertions(+), 6 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/xfrm6_input.c b/net/ipv6/xfrm6_input.c
index 02f69e544f6..596a730294e 100644
--- a/net/ipv6/xfrm6_input.c
+++ b/net/ipv6/xfrm6_input.c
@@ -16,7 +16,7 @@
 #include <net/ipv6.h>
 #include <net/xfrm.h>
 
-int xfrm6_rcv_spi(struct sk_buff *skb, __be32 spi)
+int xfrm6_rcv_spi(struct sk_buff *skb, int nexthdr, __be32 spi)
 {
 	int err;
 	__be32 seq;
@@ -24,11 +24,9 @@ int xfrm6_rcv_spi(struct sk_buff *skb, __be32 spi)
 	struct xfrm_state *x;
 	int xfrm_nr = 0;
 	int decaps = 0;
-	int nexthdr;
 	unsigned int nhoff;
 
 	nhoff = IP6CB(skb)->nhoff;
-	nexthdr = skb_network_header(skb)[nhoff];
 
 	seq = 0;
 	if (!spi && (err = xfrm_parse_spi(skb, nexthdr, &spi, &seq)) != 0)
@@ -41,7 +39,7 @@ int xfrm6_rcv_spi(struct sk_buff *skb, __be32 spi)
 			goto drop;
 
 		x = xfrm_state_lookup((xfrm_address_t *)&iph->daddr, spi,
-				nexthdr != IPPROTO_IPIP ? nexthdr : IPPROTO_IPV6, AF_INET6);
+				      nexthdr, AF_INET6);
 		if (x == NULL)
 			goto drop;
 		spin_lock(&x->lock);
@@ -135,7 +133,8 @@ EXPORT_SYMBOL(xfrm6_rcv_spi);
 
 int xfrm6_rcv(struct sk_buff *skb)
 {
-	return xfrm6_rcv_spi(skb, 0);
+	return xfrm6_rcv_spi(skb, skb_network_header(skb)[IP6CB(skb)->nhoff],
+			     0);
 }
 
 EXPORT_SYMBOL(xfrm6_rcv);
diff --git a/net/ipv6/xfrm6_tunnel.c b/net/ipv6/xfrm6_tunnel.c
index 6c67ac197ee..fae90ff3108 100644
--- a/net/ipv6/xfrm6_tunnel.c
+++ b/net/ipv6/xfrm6_tunnel.c
@@ -257,7 +257,7 @@ static int xfrm6_tunnel_rcv(struct sk_buff *skb)
 	__be32 spi;
 
 	spi = xfrm6_tunnel_spi_lookup((xfrm_address_t *)&iph->saddr);
-	return xfrm6_rcv_spi(skb, spi) > 0 ? : 0;
+	return xfrm6_rcv_spi(skb, IPPROTO_IPV6, spi) > 0 ? : 0;
 }
 
 static int xfrm6_tunnel_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
-- 
cgit v1.2.3


From 7aa68cb90638ccc36559a936814e4c089892b3d9 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Wed, 17 Oct 2007 21:30:07 -0700
Subject: [IPSEC]: Move ip_summed zapping out of xfrm6_rcv_spi

Not every transform needs to zap ip_summed.  For example, a pure tunnel
mode encapsulation does not affect the hardware checksum at all.  In fact,
every algorithm (that needs this) other than AH6 already does its own
ip_summed zapping.

This patch moves the zapping into AH6 which is in line with what IPv4 does.

Possible future optimisation: Checksum the data as we copy them in IPComp.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/ah6.c         | 2 ++
 net/ipv6/xfrm6_input.c | 1 -
 2 files changed, 2 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c
index f9f68916269..a8221d1da0f 100644
--- a/net/ipv6/ah6.c
+++ b/net/ipv6/ah6.c
@@ -344,6 +344,8 @@ static int ah6_input(struct xfrm_state *x, struct sk_buff *skb)
 	    pskb_expand_head(skb, 0, 0, GFP_ATOMIC))
 		goto out;
 
+	skb->ip_summed = CHECKSUM_NONE;
+
 	hdr_len = skb->data - skb_network_header(skb);
 	ah = (struct ip_auth_hdr *)skb->data;
 	ahp = x->data;
diff --git a/net/ipv6/xfrm6_input.c b/net/ipv6/xfrm6_input.c
index 596a730294e..b1201c33eb1 100644
--- a/net/ipv6/xfrm6_input.c
+++ b/net/ipv6/xfrm6_input.c
@@ -97,7 +97,6 @@ int xfrm6_rcv_spi(struct sk_buff *skb, int nexthdr, __be32 spi)
 	memcpy(skb->sp->xvec + skb->sp->len, xfrm_vec,
 	       xfrm_nr * sizeof(xfrm_vec[0]));
 	skb->sp->len += xfrm_nr;
-	skb->ip_summed = CHECKSUM_NONE;
 
 	nf_reset(skb);
 
-- 
cgit v1.2.3


From 440725000cba0b1a68ca2df20124be3a5b7f7702 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Wed, 17 Oct 2007 21:30:34 -0700
Subject: [IPSEC]: Fix length check in xfrm_parse_spi

Currently xfrm_parse_spi requires there to be 16 bytes for AH and ESP.
In contrived cases there may not actually be 16 bytes there since the
respective header sizes are less than that (8 and 12 currently).

This patch changes the test to use the actual header length instead of 16.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/xfrm/xfrm_input.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c
index 113f4442998..cb97fda1b6d 100644
--- a/net/xfrm/xfrm_input.c
+++ b/net/xfrm/xfrm_input.c
@@ -49,13 +49,16 @@ EXPORT_SYMBOL(secpath_dup);
 int xfrm_parse_spi(struct sk_buff *skb, u8 nexthdr, __be32 *spi, __be32 *seq)
 {
 	int offset, offset_seq;
+	int hlen;
 
 	switch (nexthdr) {
 	case IPPROTO_AH:
+		hlen = sizeof(struct ip_auth_hdr);
 		offset = offsetof(struct ip_auth_hdr, spi);
 		offset_seq = offsetof(struct ip_auth_hdr, seq_no);
 		break;
 	case IPPROTO_ESP:
+		hlen = sizeof(struct ip_esp_hdr);
 		offset = offsetof(struct ip_esp_hdr, spi);
 		offset_seq = offsetof(struct ip_esp_hdr, seq_no);
 		break;
@@ -69,7 +72,7 @@ int xfrm_parse_spi(struct sk_buff *skb, u8 nexthdr, __be32 *spi, __be32 *seq)
 		return 1;
 	}
 
-	if (!pskb_may_pull(skb, 16))
+	if (!pskb_may_pull(skb, hlen))
 		return -EINVAL;
 
 	*spi = *(__be32*)(skb_transport_header(skb) + offset);
-- 
cgit v1.2.3


From aa5d62cc8777f733f8b59b5586c0a1989813189e Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Wed, 17 Oct 2007 21:31:12 -0700
Subject: [IPSEC]: Move type and mode map into xfrm_state.c

The type and mode maps are only used by SAs, not policies.  So it makes
sense to move them from xfrm_policy.c into xfrm_state.c.  This also allows
us to mark xfrm_get_type/xfrm_put_type/xfrm_get_mode/xfrm_put_mode as
static.

The only other change I've made in the move is to get rid of the casts
on the request_module call for types.  They're unnecessary because C
will promote them to ints anyway.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/xfrm/xfrm_policy.c | 173 -------------------------------------------------
 net/xfrm/xfrm_state.c  | 170 ++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 170 insertions(+), 173 deletions(-)

(limited to 'net')

diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index af27c193697..ca24c90d379 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -49,8 +49,6 @@ static DEFINE_SPINLOCK(xfrm_policy_gc_lock);
 
 static struct xfrm_policy_afinfo *xfrm_policy_get_afinfo(unsigned short family);
 static void xfrm_policy_put_afinfo(struct xfrm_policy_afinfo *afinfo);
-static struct xfrm_policy_afinfo *xfrm_policy_lock_afinfo(unsigned int family);
-static void xfrm_policy_unlock_afinfo(struct xfrm_policy_afinfo *afinfo);
 
 static inline int
 __xfrm4_selector_match(struct xfrm_selector *sel, struct flowi *fl)
@@ -86,72 +84,6 @@ int xfrm_selector_match(struct xfrm_selector *sel, struct flowi *fl,
 	return 0;
 }
 
-int xfrm_register_type(struct xfrm_type *type, unsigned short family)
-{
-	struct xfrm_policy_afinfo *afinfo = xfrm_policy_lock_afinfo(family);
-	struct xfrm_type **typemap;
-	int err = 0;
-
-	if (unlikely(afinfo == NULL))
-		return -EAFNOSUPPORT;
-	typemap = afinfo->type_map;
-
-	if (likely(typemap[type->proto] == NULL))
-		typemap[type->proto] = type;
-	else
-		err = -EEXIST;
-	xfrm_policy_unlock_afinfo(afinfo);
-	return err;
-}
-EXPORT_SYMBOL(xfrm_register_type);
-
-int xfrm_unregister_type(struct xfrm_type *type, unsigned short family)
-{
-	struct xfrm_policy_afinfo *afinfo = xfrm_policy_lock_afinfo(family);
-	struct xfrm_type **typemap;
-	int err = 0;
-
-	if (unlikely(afinfo == NULL))
-		return -EAFNOSUPPORT;
-	typemap = afinfo->type_map;
-
-	if (unlikely(typemap[type->proto] != type))
-		err = -ENOENT;
-	else
-		typemap[type->proto] = NULL;
-	xfrm_policy_unlock_afinfo(afinfo);
-	return err;
-}
-EXPORT_SYMBOL(xfrm_unregister_type);
-
-struct xfrm_type *xfrm_get_type(u8 proto, unsigned short family)
-{
-	struct xfrm_policy_afinfo *afinfo;
-	struct xfrm_type **typemap;
-	struct xfrm_type *type;
-	int modload_attempted = 0;
-
-retry:
-	afinfo = xfrm_policy_get_afinfo(family);
-	if (unlikely(afinfo == NULL))
-		return NULL;
-	typemap = afinfo->type_map;
-
-	type = typemap[proto];
-	if (unlikely(type && !try_module_get(type->owner)))
-		type = NULL;
-	if (!type && !modload_attempted) {
-		xfrm_policy_put_afinfo(afinfo);
-		request_module("xfrm-type-%d-%d",
-			       (int) family, (int) proto);
-		modload_attempted = 1;
-		goto retry;
-	}
-
-	xfrm_policy_put_afinfo(afinfo);
-	return type;
-}
-
 int xfrm_dst_lookup(struct xfrm_dst **dst, struct flowi *fl,
 		    unsigned short family)
 {
@@ -170,94 +102,6 @@ int xfrm_dst_lookup(struct xfrm_dst **dst, struct flowi *fl,
 }
 EXPORT_SYMBOL(xfrm_dst_lookup);
 
-void xfrm_put_type(struct xfrm_type *type)
-{
-	module_put(type->owner);
-}
-
-int xfrm_register_mode(struct xfrm_mode *mode, int family)
-{
-	struct xfrm_policy_afinfo *afinfo;
-	struct xfrm_mode **modemap;
-	int err;
-
-	if (unlikely(mode->encap >= XFRM_MODE_MAX))
-		return -EINVAL;
-
-	afinfo = xfrm_policy_lock_afinfo(family);
-	if (unlikely(afinfo == NULL))
-		return -EAFNOSUPPORT;
-
-	err = -EEXIST;
-	modemap = afinfo->mode_map;
-	if (likely(modemap[mode->encap] == NULL)) {
-		modemap[mode->encap] = mode;
-		err = 0;
-	}
-
-	xfrm_policy_unlock_afinfo(afinfo);
-	return err;
-}
-EXPORT_SYMBOL(xfrm_register_mode);
-
-int xfrm_unregister_mode(struct xfrm_mode *mode, int family)
-{
-	struct xfrm_policy_afinfo *afinfo;
-	struct xfrm_mode **modemap;
-	int err;
-
-	if (unlikely(mode->encap >= XFRM_MODE_MAX))
-		return -EINVAL;
-
-	afinfo = xfrm_policy_lock_afinfo(family);
-	if (unlikely(afinfo == NULL))
-		return -EAFNOSUPPORT;
-
-	err = -ENOENT;
-	modemap = afinfo->mode_map;
-	if (likely(modemap[mode->encap] == mode)) {
-		modemap[mode->encap] = NULL;
-		err = 0;
-	}
-
-	xfrm_policy_unlock_afinfo(afinfo);
-	return err;
-}
-EXPORT_SYMBOL(xfrm_unregister_mode);
-
-struct xfrm_mode *xfrm_get_mode(unsigned int encap, int family)
-{
-	struct xfrm_policy_afinfo *afinfo;
-	struct xfrm_mode *mode;
-	int modload_attempted = 0;
-
-	if (unlikely(encap >= XFRM_MODE_MAX))
-		return NULL;
-
-retry:
-	afinfo = xfrm_policy_get_afinfo(family);
-	if (unlikely(afinfo == NULL))
-		return NULL;
-
-	mode = afinfo->mode_map[encap];
-	if (unlikely(mode && !try_module_get(mode->owner)))
-		mode = NULL;
-	if (!mode && !modload_attempted) {
-		xfrm_policy_put_afinfo(afinfo);
-		request_module("xfrm-mode-%d-%d", family, encap);
-		modload_attempted = 1;
-		goto retry;
-	}
-
-	xfrm_policy_put_afinfo(afinfo);
-	return mode;
-}
-
-void xfrm_put_mode(struct xfrm_mode *mode)
-{
-	module_put(mode->owner);
-}
-
 static inline unsigned long make_jiffies(long secs)
 {
 	if (secs >= (MAX_SCHEDULE_TIMEOUT-1)/HZ)
@@ -2213,23 +2057,6 @@ static void xfrm_policy_put_afinfo(struct xfrm_policy_afinfo *afinfo)
 	read_unlock(&xfrm_policy_afinfo_lock);
 }
 
-static struct xfrm_policy_afinfo *xfrm_policy_lock_afinfo(unsigned int family)
-{
-	struct xfrm_policy_afinfo *afinfo;
-	if (unlikely(family >= NPROTO))
-		return NULL;
-	write_lock_bh(&xfrm_policy_afinfo_lock);
-	afinfo = xfrm_policy_afinfo[family];
-	if (unlikely(!afinfo))
-		write_unlock_bh(&xfrm_policy_afinfo_lock);
-	return afinfo;
-}
-
-static void xfrm_policy_unlock_afinfo(struct xfrm_policy_afinfo *afinfo)
-{
-	write_unlock_bh(&xfrm_policy_afinfo_lock);
-}
-
 static int xfrm_dev_event(struct notifier_block *this, unsigned long event, void *ptr)
 {
 	struct net_device *dev = ptr;
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index 344f0a6abec..dc438f2b944 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -187,6 +187,176 @@ int __xfrm_state_delete(struct xfrm_state *x);
 int km_query(struct xfrm_state *x, struct xfrm_tmpl *t, struct xfrm_policy *pol);
 void km_state_expired(struct xfrm_state *x, int hard, u32 pid);
 
+static struct xfrm_state_afinfo *xfrm_state_lock_afinfo(unsigned int family)
+{
+	struct xfrm_state_afinfo *afinfo;
+	if (unlikely(family >= NPROTO))
+		return NULL;
+	write_lock_bh(&xfrm_state_afinfo_lock);
+	afinfo = xfrm_state_afinfo[family];
+	if (unlikely(!afinfo))
+		write_unlock_bh(&xfrm_state_afinfo_lock);
+	return afinfo;
+}
+
+static void xfrm_state_unlock_afinfo(struct xfrm_state_afinfo *afinfo)
+{
+	write_unlock_bh(&xfrm_state_afinfo_lock);
+}
+
+int xfrm_register_type(struct xfrm_type *type, unsigned short family)
+{
+	struct xfrm_state_afinfo *afinfo = xfrm_state_lock_afinfo(family);
+	struct xfrm_type **typemap;
+	int err = 0;
+
+	if (unlikely(afinfo == NULL))
+		return -EAFNOSUPPORT;
+	typemap = afinfo->type_map;
+
+	if (likely(typemap[type->proto] == NULL))
+		typemap[type->proto] = type;
+	else
+		err = -EEXIST;
+	xfrm_state_unlock_afinfo(afinfo);
+	return err;
+}
+EXPORT_SYMBOL(xfrm_register_type);
+
+int xfrm_unregister_type(struct xfrm_type *type, unsigned short family)
+{
+	struct xfrm_state_afinfo *afinfo = xfrm_state_lock_afinfo(family);
+	struct xfrm_type **typemap;
+	int err = 0;
+
+	if (unlikely(afinfo == NULL))
+		return -EAFNOSUPPORT;
+	typemap = afinfo->type_map;
+
+	if (unlikely(typemap[type->proto] != type))
+		err = -ENOENT;
+	else
+		typemap[type->proto] = NULL;
+	xfrm_state_unlock_afinfo(afinfo);
+	return err;
+}
+EXPORT_SYMBOL(xfrm_unregister_type);
+
+static struct xfrm_type *xfrm_get_type(u8 proto, unsigned short family)
+{
+	struct xfrm_state_afinfo *afinfo;
+	struct xfrm_type **typemap;
+	struct xfrm_type *type;
+	int modload_attempted = 0;
+
+retry:
+	afinfo = xfrm_state_get_afinfo(family);
+	if (unlikely(afinfo == NULL))
+		return NULL;
+	typemap = afinfo->type_map;
+
+	type = typemap[proto];
+	if (unlikely(type && !try_module_get(type->owner)))
+		type = NULL;
+	if (!type && !modload_attempted) {
+		xfrm_state_put_afinfo(afinfo);
+		request_module("xfrm-type-%d-%d", family, proto);
+		modload_attempted = 1;
+		goto retry;
+	}
+
+	xfrm_state_put_afinfo(afinfo);
+	return type;
+}
+
+static void xfrm_put_type(struct xfrm_type *type)
+{
+	module_put(type->owner);
+}
+
+int xfrm_register_mode(struct xfrm_mode *mode, int family)
+{
+	struct xfrm_state_afinfo *afinfo;
+	struct xfrm_mode **modemap;
+	int err;
+
+	if (unlikely(mode->encap >= XFRM_MODE_MAX))
+		return -EINVAL;
+
+	afinfo = xfrm_state_lock_afinfo(family);
+	if (unlikely(afinfo == NULL))
+		return -EAFNOSUPPORT;
+
+	err = -EEXIST;
+	modemap = afinfo->mode_map;
+	if (likely(modemap[mode->encap] == NULL)) {
+		modemap[mode->encap] = mode;
+		err = 0;
+	}
+
+	xfrm_state_unlock_afinfo(afinfo);
+	return err;
+}
+EXPORT_SYMBOL(xfrm_register_mode);
+
+int xfrm_unregister_mode(struct xfrm_mode *mode, int family)
+{
+	struct xfrm_state_afinfo *afinfo;
+	struct xfrm_mode **modemap;
+	int err;
+
+	if (unlikely(mode->encap >= XFRM_MODE_MAX))
+		return -EINVAL;
+
+	afinfo = xfrm_state_lock_afinfo(family);
+	if (unlikely(afinfo == NULL))
+		return -EAFNOSUPPORT;
+
+	err = -ENOENT;
+	modemap = afinfo->mode_map;
+	if (likely(modemap[mode->encap] == mode)) {
+		modemap[mode->encap] = NULL;
+		err = 0;
+	}
+
+	xfrm_state_unlock_afinfo(afinfo);
+	return err;
+}
+EXPORT_SYMBOL(xfrm_unregister_mode);
+
+static struct xfrm_mode *xfrm_get_mode(unsigned int encap, int family)
+{
+	struct xfrm_state_afinfo *afinfo;
+	struct xfrm_mode *mode;
+	int modload_attempted = 0;
+
+	if (unlikely(encap >= XFRM_MODE_MAX))
+		return NULL;
+
+retry:
+	afinfo = xfrm_state_get_afinfo(family);
+	if (unlikely(afinfo == NULL))
+		return NULL;
+
+	mode = afinfo->mode_map[encap];
+	if (unlikely(mode && !try_module_get(mode->owner)))
+		mode = NULL;
+	if (!mode && !modload_attempted) {
+		xfrm_state_put_afinfo(afinfo);
+		request_module("xfrm-mode-%d-%d", family, encap);
+		modload_attempted = 1;
+		goto retry;
+	}
+
+	xfrm_state_put_afinfo(afinfo);
+	return mode;
+}
+
+static void xfrm_put_mode(struct xfrm_mode *mode)
+{
+	module_put(mode->owner);
+}
+
 static void xfrm_state_gc_destroy(struct xfrm_state *x)
 {
 	del_timer_sync(&x->timer);
-- 
cgit v1.2.3


From 1bfcb10f670f5ff5e1d9f53e59680573524cb142 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Wed, 17 Oct 2007 21:31:50 -0700
Subject: [IPSEC]: Add missing BEET checks

Currently BEET mode does not reinject the packet back into the stack
like tunnel mode does.  Since BEET should behave just like tunnel mode
this is incorrect.

This patch fixes this by introducing a flags field to xfrm_mode that
tells the IPsec code whether it should terminate and reinject the packet
back into the stack.

It then sets the flag for BEET and tunnel mode.

I've also added a number of missing BEET checks elsewhere where we check
whether a given mode is a tunnel or not.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/xfrm4_input.c       | 2 +-
 net/ipv4/xfrm4_mode_beet.c   | 1 +
 net/ipv4/xfrm4_mode_tunnel.c | 1 +
 net/ipv4/xfrm4_output.c      | 2 +-
 net/ipv4/xfrm4_policy.c      | 2 +-
 net/ipv6/xfrm6_input.c       | 2 +-
 net/ipv6/xfrm6_mode_beet.c   | 1 +
 net/ipv6/xfrm6_mode_tunnel.c | 1 +
 net/ipv6/xfrm6_output.c      | 2 +-
 net/ipv6/xfrm6_policy.c      | 3 +--
 net/ipv6/xfrm6_state.c       | 6 ++++--
 net/xfrm/xfrm_output.c       | 2 +-
 net/xfrm/xfrm_policy.c       | 6 ++++--
 13 files changed, 19 insertions(+), 12 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/xfrm4_input.c b/net/ipv4/xfrm4_input.c
index 5cb0b5995bc..bc5dc0747cd 100644
--- a/net/ipv4/xfrm4_input.c
+++ b/net/ipv4/xfrm4_input.c
@@ -94,7 +94,7 @@ int xfrm4_rcv_encap(struct sk_buff *skb, int nexthdr, __be32 spi,
 		if (x->mode->input(x, skb))
 			goto drop;
 
-		if (x->props.mode == XFRM_MODE_TUNNEL) {
+		if (x->mode->flags & XFRM_MODE_FLAG_TUNNEL) {
 			decaps = 1;
 			break;
 		}
diff --git a/net/ipv4/xfrm4_mode_beet.c b/net/ipv4/xfrm4_mode_beet.c
index 73d2338bec5..e42e122414b 100644
--- a/net/ipv4/xfrm4_mode_beet.c
+++ b/net/ipv4/xfrm4_mode_beet.c
@@ -114,6 +114,7 @@ static struct xfrm_mode xfrm4_beet_mode = {
 	.output = xfrm4_beet_output,
 	.owner = THIS_MODULE,
 	.encap = XFRM_MODE_BEET,
+	.flags = XFRM_MODE_FLAG_TUNNEL,
 };
 
 static int __init xfrm4_beet_init(void)
diff --git a/net/ipv4/xfrm4_mode_tunnel.c b/net/ipv4/xfrm4_mode_tunnel.c
index 1ae9d32276f..e4deecba6dd 100644
--- a/net/ipv4/xfrm4_mode_tunnel.c
+++ b/net/ipv4/xfrm4_mode_tunnel.c
@@ -139,6 +139,7 @@ static struct xfrm_mode xfrm4_tunnel_mode = {
 	.output = xfrm4_tunnel_output,
 	.owner = THIS_MODULE,
 	.encap = XFRM_MODE_TUNNEL,
+	.flags = XFRM_MODE_FLAG_TUNNEL,
 };
 
 static int __init xfrm4_tunnel_init(void)
diff --git a/net/ipv4/xfrm4_output.c b/net/ipv4/xfrm4_output.c
index a4edd666318..dcbc2743069 100644
--- a/net/ipv4/xfrm4_output.c
+++ b/net/ipv4/xfrm4_output.c
@@ -47,7 +47,7 @@ static inline int xfrm4_output_one(struct sk_buff *skb)
 	struct iphdr *iph;
 	int err;
 
-	if (x->props.mode == XFRM_MODE_TUNNEL) {
+	if (x->mode->flags & XFRM_MODE_FLAG_TUNNEL) {
 		err = xfrm4_tunnel_check_size(skb);
 		if (err)
 			goto error_nolock;
diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c
index 329825ca68f..2373d673df6 100644
--- a/net/ipv4/xfrm4_policy.c
+++ b/net/ipv4/xfrm4_policy.c
@@ -117,7 +117,7 @@ __xfrm4_bundle_create(struct xfrm_policy *policy, struct xfrm_state **xfrm, int
 		header_len += xfrm[i]->props.header_len;
 		trailer_len += xfrm[i]->props.trailer_len;
 
-		if (xfrm[i]->props.mode == XFRM_MODE_TUNNEL) {
+		if (xfrm[i]->props.mode != XFRM_MODE_TRANSPORT) {
 			unsigned short encap_family = xfrm[i]->props.family;
 			switch (encap_family) {
 			case AF_INET:
diff --git a/net/ipv6/xfrm6_input.c b/net/ipv6/xfrm6_input.c
index b1201c33eb1..c6ee1a3ba19 100644
--- a/net/ipv6/xfrm6_input.c
+++ b/net/ipv6/xfrm6_input.c
@@ -71,7 +71,7 @@ int xfrm6_rcv_spi(struct sk_buff *skb, int nexthdr, __be32 spi)
 		if (x->mode->input(x, skb))
 			goto drop;
 
-		if (x->props.mode == XFRM_MODE_TUNNEL) { /* XXX */
+		if (x->mode->flags & XFRM_MODE_FLAG_TUNNEL) {
 			decaps = 1;
 			break;
 		}
diff --git a/net/ipv6/xfrm6_mode_beet.c b/net/ipv6/xfrm6_mode_beet.c
index 13bb1e85676..2bfb4f05c14 100644
--- a/net/ipv6/xfrm6_mode_beet.c
+++ b/net/ipv6/xfrm6_mode_beet.c
@@ -79,6 +79,7 @@ static struct xfrm_mode xfrm6_beet_mode = {
 	.output = xfrm6_beet_output,
 	.owner = THIS_MODULE,
 	.encap = XFRM_MODE_BEET,
+	.flags = XFRM_MODE_FLAG_TUNNEL,
 };
 
 static int __init xfrm6_beet_init(void)
diff --git a/net/ipv6/xfrm6_mode_tunnel.c b/net/ipv6/xfrm6_mode_tunnel.c
index ea228387911..fd84e221727 100644
--- a/net/ipv6/xfrm6_mode_tunnel.c
+++ b/net/ipv6/xfrm6_mode_tunnel.c
@@ -118,6 +118,7 @@ static struct xfrm_mode xfrm6_tunnel_mode = {
 	.output = xfrm6_tunnel_output,
 	.owner = THIS_MODULE,
 	.encap = XFRM_MODE_TUNNEL,
+	.flags = XFRM_MODE_FLAG_TUNNEL,
 };
 
 static int __init xfrm6_tunnel_init(void)
diff --git a/net/ipv6/xfrm6_output.c b/net/ipv6/xfrm6_output.c
index a5a32c17249..c9f42d1c2df 100644
--- a/net/ipv6/xfrm6_output.c
+++ b/net/ipv6/xfrm6_output.c
@@ -50,7 +50,7 @@ static inline int xfrm6_output_one(struct sk_buff *skb)
 	struct ipv6hdr *iph;
 	int err;
 
-	if (x->props.mode == XFRM_MODE_TUNNEL) {
+	if (x->mode->flags & XFRM_MODE_FLAG_TUNNEL) {
 		err = xfrm6_tunnel_check_size(skb);
 		if (err)
 			goto error_nolock;
diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c
index 15aa4c58c31..dc4bdcb55cb 100644
--- a/net/ipv6/xfrm6_policy.c
+++ b/net/ipv6/xfrm6_policy.c
@@ -178,8 +178,7 @@ __xfrm6_bundle_create(struct xfrm_policy *policy, struct xfrm_state **xfrm, int
 		__xfrm6_bundle_len_inc(&header_len, &nfheader_len, xfrm[i]);
 		trailer_len += xfrm[i]->props.trailer_len;
 
-		if (xfrm[i]->props.mode == XFRM_MODE_TUNNEL ||
-		    xfrm[i]->props.mode == XFRM_MODE_ROUTEOPTIMIZATION) {
+		if (xfrm[i]->props.mode != XFRM_MODE_TRANSPORT) {
 			unsigned short encap_family = xfrm[i]->props.family;
 			switch(encap_family) {
 			case AF_INET:
diff --git a/net/ipv6/xfrm6_state.c b/net/ipv6/xfrm6_state.c
index cdadb484746..e644c80515f 100644
--- a/net/ipv6/xfrm6_state.c
+++ b/net/ipv6/xfrm6_state.c
@@ -93,7 +93,8 @@ __xfrm6_state_sort(struct xfrm_state **dst, struct xfrm_state **src, int n)
 	/* Rule 4: select IPsec tunnel */
 	for (i = 0; i < n; i++) {
 		if (src[i] &&
-		    src[i]->props.mode == XFRM_MODE_TUNNEL) {
+		    (src[i]->props.mode == XFRM_MODE_TUNNEL ||
+		     src[i]->props.mode == XFRM_MODE_BEET)) {
 			dst[j++] = src[i];
 			src[i] = NULL;
 		}
@@ -146,7 +147,8 @@ __xfrm6_tmpl_sort(struct xfrm_tmpl **dst, struct xfrm_tmpl **src, int n)
 	/* Rule 3: select IPsec tunnel */
 	for (i = 0; i < n; i++) {
 		if (src[i] &&
-		    src[i]->mode == XFRM_MODE_TUNNEL) {
+		    (src[i]->mode == XFRM_MODE_TUNNEL ||
+		     src[i]->mode == XFRM_MODE_BEET)) {
 			dst[j++] = src[i];
 			src[i] = NULL;
 		}
diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c
index 0eb3377602e..8bf71ba2345 100644
--- a/net/xfrm/xfrm_output.c
+++ b/net/xfrm/xfrm_output.c
@@ -82,7 +82,7 @@ int xfrm_output(struct sk_buff *skb)
 		}
 		dst = skb->dst;
 		x = dst->xfrm;
-	} while (x && (x->props.mode != XFRM_MODE_TUNNEL));
+	} while (x && !(x->mode->flags & XFRM_MODE_FLAG_TUNNEL));
 
 	err = 0;
 
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index ca24c90d379..1d66fb42c9c 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -1940,7 +1940,8 @@ int xfrm_bundle_ok(struct xfrm_policy *pol, struct xfrm_dst *first,
 		if (xdst->genid != dst->xfrm->genid)
 			return 0;
 
-		if (strict && fl && dst->xfrm->props.mode != XFRM_MODE_TUNNEL &&
+		if (strict && fl &&
+		    !(dst->xfrm->mode->flags & XFRM_MODE_FLAG_TUNNEL) &&
 		    !xfrm_state_addr_flow_check(dst->xfrm, fl, family))
 			return 0;
 
@@ -2291,7 +2292,8 @@ static int xfrm_policy_migrate(struct xfrm_policy *pol,
 			if (!migrate_tmpl_match(mp, &pol->xfrm_vec[i]))
 				continue;
 			n++;
-			if (pol->xfrm_vec[i].mode != XFRM_MODE_TUNNEL)
+			if (pol->xfrm_vec[i].mode != XFRM_MODE_TUNNEL &&
+			    pol->xfrm_vec[i].mode != XFRM_MODE_BEET)
 				continue;
 			/* update endpoints */
 			memcpy(&pol->xfrm_vec[i].id.daddr, &mp->new_daddr,
-- 
cgit v1.2.3


From 17c2a42a24e1e8dd6aa7cea4f84e034ab1bfff31 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Wed, 17 Oct 2007 21:33:12 -0700
Subject: [IPSEC]: Store afinfo pointer in xfrm_mode

It is convenient to have a pointer from xfrm_state to address-specific
functions such as the output function for a family.  Currently the
address-specific policy code calls out to the xfrm state code to get
those pointers when we could get it in an easier way via the state
itself.

This patch adds an xfrm_state_afinfo to xfrm_mode (since they're
address-specific) and changes the policy code to use it.  I've also
added an owner field to do reference counting on the module providing
the afinfo even though it isn't strictly necessary today since IPv6
can't be unloaded yet.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/xfrm4_policy.c | 13 +------------
 net/ipv4/xfrm4_state.c  |  1 +
 net/ipv6/xfrm6_policy.c | 14 +-------------
 net/ipv6/xfrm6_state.c  |  1 +
 net/xfrm/xfrm_state.c   | 26 +++++++++++++++++---------
 5 files changed, 21 insertions(+), 34 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c
index 2373d673df6..c65b8e03c04 100644
--- a/net/ipv4/xfrm4_policy.c
+++ b/net/ipv4/xfrm4_policy.c
@@ -151,7 +151,6 @@ __xfrm4_bundle_create(struct xfrm_policy *policy, struct xfrm_state **xfrm, int
 	i = 0;
 	for (; dst_prev != &rt->u.dst; dst_prev = dst_prev->child) {
 		struct xfrm_dst *x = (struct xfrm_dst*)dst_prev;
-		struct xfrm_state_afinfo *afinfo;
 		x->u.rt.fl = *fl;
 
 		dst_prev->xfrm = xfrm[i++];
@@ -169,17 +168,7 @@ __xfrm4_bundle_create(struct xfrm_policy *policy, struct xfrm_state **xfrm, int
 		/* Copy neighbout for reachability confirmation */
 		dst_prev->neighbour	= neigh_clone(rt->u.dst.neighbour);
 		dst_prev->input		= rt->u.dst.input;
-		/* XXX: When IPv6 module can be unloaded, we should manage reference
-		 * to xfrm6_output in afinfo->output. Miyazawa
-		 * */
-		afinfo = xfrm_state_get_afinfo(dst_prev->xfrm->props.family);
-		if (!afinfo) {
-			dst = *dst_p;
-			err = -EAFNOSUPPORT;
-			goto error;
-		}
-		dst_prev->output = afinfo->output;
-		xfrm_state_put_afinfo(afinfo);
+		dst_prev->output = dst_prev->xfrm->mode->afinfo->output;
 		if (dst_prev->xfrm->props.family == AF_INET && rt->peer)
 			atomic_inc(&rt->peer->refcnt);
 		x->u.rt.peer = rt->peer;
diff --git a/net/ipv4/xfrm4_state.c b/net/ipv4/xfrm4_state.c
index 93e2c061cdd..13d54a1c333 100644
--- a/net/ipv4/xfrm4_state.c
+++ b/net/ipv4/xfrm4_state.c
@@ -49,6 +49,7 @@ __xfrm4_init_tempsel(struct xfrm_state *x, struct flowi *fl,
 
 static struct xfrm_state_afinfo xfrm4_state_afinfo = {
 	.family			= AF_INET,
+	.owner			= THIS_MODULE,
 	.init_flags		= xfrm4_init_flags,
 	.init_tempsel		= __xfrm4_init_tempsel,
 	.output			= xfrm4_output,
diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c
index dc4bdcb55cb..324268329f6 100644
--- a/net/ipv6/xfrm6_policy.c
+++ b/net/ipv6/xfrm6_policy.c
@@ -214,7 +214,6 @@ __xfrm6_bundle_create(struct xfrm_policy *policy, struct xfrm_state **xfrm, int
 	i = 0;
 	for (; dst_prev != &rt->u.dst; dst_prev = dst_prev->child) {
 		struct xfrm_dst *x = (struct xfrm_dst*)dst_prev;
-		struct xfrm_state_afinfo *afinfo;
 
 		dst_prev->xfrm = xfrm[i++];
 		dst_prev->dev = rt->u.dst.dev;
@@ -231,18 +230,7 @@ __xfrm6_bundle_create(struct xfrm_policy *policy, struct xfrm_state **xfrm, int
 		/* Copy neighbour for reachability confirmation */
 		dst_prev->neighbour	= neigh_clone(rt->u.dst.neighbour);
 		dst_prev->input		= rt->u.dst.input;
-		/* XXX: When IPv4 is implemented as module and can be unloaded,
-		 * we should manage reference to xfrm4_output in afinfo->output.
-		 * Miyazawa
-		 */
-		afinfo = xfrm_state_get_afinfo(dst_prev->xfrm->props.family);
-		if (!afinfo) {
-			dst = *dst_p;
-			goto error;
-		}
-
-		dst_prev->output = afinfo->output;
-		xfrm_state_put_afinfo(afinfo);
+		dst_prev->output = dst_prev->xfrm->mode->afinfo->output;
 		/* Sheit... I remember I did this right. Apparently,
 		 * it was magically lost, so this code needs audit */
 		x->u.rt6.rt6i_flags    = rt0->rt6i_flags&(RTCF_BROADCAST|RTCF_MULTICAST|RTCF_LOCAL);
diff --git a/net/ipv6/xfrm6_state.c b/net/ipv6/xfrm6_state.c
index e644c80515f..b392bee396f 100644
--- a/net/ipv6/xfrm6_state.c
+++ b/net/ipv6/xfrm6_state.c
@@ -170,6 +170,7 @@ __xfrm6_tmpl_sort(struct xfrm_tmpl **dst, struct xfrm_tmpl **src, int n)
 
 static struct xfrm_state_afinfo xfrm6_state_afinfo = {
 	.family			= AF_INET6,
+	.owner			= THIS_MODULE,
 	.init_tempsel		= __xfrm6_init_tempsel,
 	.tmpl_sort		= __xfrm6_tmpl_sort,
 	.state_sort		= __xfrm6_state_sort,
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index dc438f2b944..48b4a06b3d1 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -57,6 +57,9 @@ static unsigned int xfrm_state_hashmax __read_mostly = 1 * 1024 * 1024;
 static unsigned int xfrm_state_num;
 static unsigned int xfrm_state_genid;
 
+static struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned int family);
+static void xfrm_state_put_afinfo(struct xfrm_state_afinfo *afinfo);
+
 static inline unsigned int xfrm_dst_hash(xfrm_address_t *daddr,
 					 xfrm_address_t *saddr,
 					 u32 reqid,
@@ -289,11 +292,18 @@ int xfrm_register_mode(struct xfrm_mode *mode, int family)
 
 	err = -EEXIST;
 	modemap = afinfo->mode_map;
-	if (likely(modemap[mode->encap] == NULL)) {
-		modemap[mode->encap] = mode;
-		err = 0;
-	}
+	if (modemap[mode->encap])
+		goto out;
 
+	err = -ENOENT;
+	if (!try_module_get(afinfo->owner))
+		goto out;
+
+	mode->afinfo = afinfo;
+	modemap[mode->encap] = mode;
+	err = 0;
+
+out:
 	xfrm_state_unlock_afinfo(afinfo);
 	return err;
 }
@@ -316,6 +326,7 @@ int xfrm_unregister_mode(struct xfrm_mode *mode, int family)
 	modemap = afinfo->mode_map;
 	if (likely(modemap[mode->encap] == mode)) {
 		modemap[mode->encap] = NULL;
+		module_put(mode->afinfo->owner);
 		err = 0;
 	}
 
@@ -1869,7 +1880,7 @@ int xfrm_state_unregister_afinfo(struct xfrm_state_afinfo *afinfo)
 }
 EXPORT_SYMBOL(xfrm_state_unregister_afinfo);
 
-struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned short family)
+static struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned int family)
 {
 	struct xfrm_state_afinfo *afinfo;
 	if (unlikely(family >= NPROTO))
@@ -1881,14 +1892,11 @@ struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned short family)
 	return afinfo;
 }
 
-void xfrm_state_put_afinfo(struct xfrm_state_afinfo *afinfo)
+static void xfrm_state_put_afinfo(struct xfrm_state_afinfo *afinfo)
 {
 	read_unlock(&xfrm_state_afinfo_lock);
 }
 
-EXPORT_SYMBOL(xfrm_state_get_afinfo);
-EXPORT_SYMBOL(xfrm_state_put_afinfo);
-
 /* Temporarily located here until net/xfrm/xfrm_tunnel.c is created */
 void xfrm_state_delete_tunnel(struct xfrm_state *x)
 {
-- 
cgit v1.2.3


From ed3e37ddb0b422120d3d2d5da718c44c40af30ba Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Wed, 17 Oct 2007 21:34:46 -0700
Subject: [IPSEC]: Use the top IPv4 route's peer instead of the bottom

For IPv4 we were using the bottom route's peer instead of the top one.
This is wrong because the peer is only used by TCP to keep track of
information about the TCP destination address which certainly does not
live in the bottom route.

This patch fixes that which allows us to get rid of the family check
since the bottom route could be IPv6 while the top one must always
be IPv4.

I've also changed the other fields which are IPv4-specific to get the
info from the top route instead of potentially bogus data from the
bottom route.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/xfrm4_policy.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c
index c65b8e03c04..1f0ea0e0371 100644
--- a/net/ipv4/xfrm4_policy.c
+++ b/net/ipv4/xfrm4_policy.c
@@ -169,16 +169,16 @@ __xfrm4_bundle_create(struct xfrm_policy *policy, struct xfrm_state **xfrm, int
 		dst_prev->neighbour	= neigh_clone(rt->u.dst.neighbour);
 		dst_prev->input		= rt->u.dst.input;
 		dst_prev->output = dst_prev->xfrm->mode->afinfo->output;
-		if (dst_prev->xfrm->props.family == AF_INET && rt->peer)
-			atomic_inc(&rt->peer->refcnt);
-		x->u.rt.peer = rt->peer;
+		if (rt0->peer)
+			atomic_inc(&rt0->peer->refcnt);
+		x->u.rt.peer = rt0->peer;
 		/* Sheit... I remember I did this right. Apparently,
 		 * it was magically lost, so this code needs audit */
 		x->u.rt.rt_flags = rt0->rt_flags&(RTCF_BROADCAST|RTCF_MULTICAST|RTCF_LOCAL);
-		x->u.rt.rt_type = rt->rt_type;
+		x->u.rt.rt_type = rt0->rt_type;
 		x->u.rt.rt_src = rt0->rt_src;
 		x->u.rt.rt_dst = rt0->rt_dst;
-		x->u.rt.rt_gateway = rt->rt_gateway;
+		x->u.rt.rt_gateway = rt0->rt_gateway;
 		x->u.rt.rt_spec_dst = rt0->rt_spec_dst;
 		x->u.rt.idev = rt0->idev;
 		in_dev_hold(rt0->idev);
@@ -280,7 +280,7 @@ static void xfrm4_dst_destroy(struct dst_entry *dst)
 
 	if (likely(xdst->u.rt.idev))
 		in_dev_put(xdst->u.rt.idev);
-	if (dst->xfrm && dst->xfrm->props.family == AF_INET && likely(xdst->u.rt.peer))
+	if (likely(xdst->u.rt.peer))
 		inet_putpeer(xdst->u.rt.peer);
 	xfrm_dst_destroy(xdst);
 }
-- 
cgit v1.2.3


From ca68145f16359f71cd62b2671aa3e8c58f45ef19 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Wed, 17 Oct 2007 21:35:15 -0700
Subject: [IPSEC]: Disallow combinations of RO and AH/ESP/IPCOMP

Combining RO and AH/ESP/IPCOMP does not make sense.  So this patch adds a
check in the state initialisation function to prevent this.

This allows us to safely remove the mode input function of RO since it
can never be called anymore.  Indeed, if somehow it does get called we'll
know about it through an OOPS instead of it slipping past silently.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/ah6.c           | 9 ++++++++-
 net/ipv6/esp6.c          | 9 ++++++++-
 net/ipv6/ipcomp6.c       | 9 ++++++++-
 net/ipv6/xfrm6_mode_ro.c | 9 ---------
 4 files changed, 24 insertions(+), 12 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c
index a8221d1da0f..67cd06613a2 100644
--- a/net/ipv6/ah6.c
+++ b/net/ipv6/ah6.c
@@ -477,8 +477,15 @@ static int ah6_init_state(struct xfrm_state *x)
 
 	x->props.header_len = XFRM_ALIGN8(sizeof(struct ip_auth_hdr) +
 					  ahp->icv_trunc_len);
-	if (x->props.mode == XFRM_MODE_TUNNEL)
+	switch (x->props.mode) {
+	case XFRM_MODE_BEET:
+	case XFRM_MODE_TRANSPORT:
+		break;
+	case XFRM_MODE_TUNNEL:
 		x->props.header_len += sizeof(struct ipv6hdr);
+	default:
+		goto error;
+	}
 	x->data = ahp;
 
 	return 0;
diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c
index 9eb92859835..b0715432e45 100644
--- a/net/ipv6/esp6.c
+++ b/net/ipv6/esp6.c
@@ -354,8 +354,15 @@ static int esp6_init_state(struct xfrm_state *x)
 				    (x->ealg->alg_key_len + 7) / 8))
 		goto error;
 	x->props.header_len = sizeof(struct ip_esp_hdr) + esp->conf.ivlen;
-	if (x->props.mode == XFRM_MODE_TUNNEL)
+	switch (x->props.mode) {
+	case XFRM_MODE_BEET:
+	case XFRM_MODE_TRANSPORT:
+		break;
+	case XFRM_MODE_TUNNEL:
 		x->props.header_len += sizeof(struct ipv6hdr);
+	default:
+		goto error;
+	}
 	x->data = esp;
 	return 0;
 
diff --git a/net/ipv6/ipcomp6.c b/net/ipv6/ipcomp6.c
index 28fc8edfdc3..80ef2a1d39f 100644
--- a/net/ipv6/ipcomp6.c
+++ b/net/ipv6/ipcomp6.c
@@ -411,8 +411,15 @@ static int ipcomp6_init_state(struct xfrm_state *x)
 		goto out;
 
 	x->props.header_len = 0;
-	if (x->props.mode == XFRM_MODE_TUNNEL)
+	switch (x->props.mode) {
+	case XFRM_MODE_BEET:
+	case XFRM_MODE_TRANSPORT:
+		break;
+	case XFRM_MODE_TUNNEL:
 		x->props.header_len += sizeof(struct ipv6hdr);
+	default:
+		goto error;
+	}
 
 	mutex_lock(&ipcomp6_resource_mutex);
 	if (!ipcomp6_alloc_scratches())
diff --git a/net/ipv6/xfrm6_mode_ro.c b/net/ipv6/xfrm6_mode_ro.c
index 957ae36b669..a7bc8c62317 100644
--- a/net/ipv6/xfrm6_mode_ro.c
+++ b/net/ipv6/xfrm6_mode_ro.c
@@ -58,16 +58,7 @@ static int xfrm6_ro_output(struct xfrm_state *x, struct sk_buff *skb)
 	return 0;
 }
 
-/*
- * Do nothing about routing optimization header unlike IPsec.
- */
-static int xfrm6_ro_input(struct xfrm_state *x, struct sk_buff *skb)
-{
-	return 0;
-}
-
 static struct xfrm_mode xfrm6_ro_mode = {
-	.input = xfrm6_ro_input,
 	.output = xfrm6_ro_output,
 	.owner = THIS_MODULE,
 	.encap = XFRM_MODE_ROUTEOPTIMIZATION,
-- 
cgit v1.2.3


From 13996378e6585fb25e582afe7489bf52dde78deb Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Wed, 17 Oct 2007 21:35:51 -0700
Subject: [IPSEC]: Rename mode to outer_mode and add inner_mode

This patch adds a new field to xfrm states called inner_mode.  The existing
mode object is renamed to outer_mode.

This is the first part of an attempt to fix inter-family transforms.  As it
is we always use the outer family when determining which mode to use.  As a
result we may end up shoving IPv4 packets into netfilter6 and vice versa.

What we really want is to use the inner family for the first part of outbound
processing and the outer family for the second part.  For inbound processing
we'd use the opposite pairing.

I've also added a check to prevent silly combinations such as transport mode
with inter-family transforms.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/pktgen.c       |  2 +-
 net/ipv4/xfrm4_input.c  |  4 ++--
 net/ipv4/xfrm4_output.c |  2 +-
 net/ipv4/xfrm4_policy.c |  2 +-
 net/ipv6/xfrm6_input.c  |  4 ++--
 net/ipv6/xfrm6_output.c |  2 +-
 net/ipv6/xfrm6_policy.c |  2 +-
 net/xfrm/xfrm_output.c  |  4 ++--
 net/xfrm/xfrm_policy.c  |  2 +-
 net/xfrm/xfrm_state.c   | 18 ++++++++++++++----
 10 files changed, 26 insertions(+), 16 deletions(-)

(limited to 'net')

diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index 2100c734b10..8cae60c5338 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -2454,7 +2454,7 @@ static int pktgen_output_ipsec(struct sk_buff *skb, struct pktgen_dev *pkt_dev)
 	spin_lock(&x->lock);
 	iph = ip_hdr(skb);
 
-	err = x->mode->output(x, skb);
+	err = x->outer_mode->output(x, skb);
 	if (err)
 		goto error;
 	err = x->type->output(x, skb);
diff --git a/net/ipv4/xfrm4_input.c b/net/ipv4/xfrm4_input.c
index bc5dc0747cd..5e95c8a07ef 100644
--- a/net/ipv4/xfrm4_input.c
+++ b/net/ipv4/xfrm4_input.c
@@ -91,10 +91,10 @@ int xfrm4_rcv_encap(struct sk_buff *skb, int nexthdr, __be32 spi,
 
 		xfrm_vec[xfrm_nr++] = x;
 
-		if (x->mode->input(x, skb))
+		if (x->outer_mode->input(x, skb))
 			goto drop;
 
-		if (x->mode->flags & XFRM_MODE_FLAG_TUNNEL) {
+		if (x->outer_mode->flags & XFRM_MODE_FLAG_TUNNEL) {
 			decaps = 1;
 			break;
 		}
diff --git a/net/ipv4/xfrm4_output.c b/net/ipv4/xfrm4_output.c
index dcbc2743069..c4a7156962b 100644
--- a/net/ipv4/xfrm4_output.c
+++ b/net/ipv4/xfrm4_output.c
@@ -47,7 +47,7 @@ static inline int xfrm4_output_one(struct sk_buff *skb)
 	struct iphdr *iph;
 	int err;
 
-	if (x->mode->flags & XFRM_MODE_FLAG_TUNNEL) {
+	if (x->outer_mode->flags & XFRM_MODE_FLAG_TUNNEL) {
 		err = xfrm4_tunnel_check_size(skb);
 		if (err)
 			goto error_nolock;
diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c
index 1f0ea0e0371..cc86fb110dd 100644
--- a/net/ipv4/xfrm4_policy.c
+++ b/net/ipv4/xfrm4_policy.c
@@ -168,7 +168,7 @@ __xfrm4_bundle_create(struct xfrm_policy *policy, struct xfrm_state **xfrm, int
 		/* Copy neighbout for reachability confirmation */
 		dst_prev->neighbour	= neigh_clone(rt->u.dst.neighbour);
 		dst_prev->input		= rt->u.dst.input;
-		dst_prev->output = dst_prev->xfrm->mode->afinfo->output;
+		dst_prev->output = dst_prev->xfrm->outer_mode->afinfo->output;
 		if (rt0->peer)
 			atomic_inc(&rt0->peer->refcnt);
 		x->u.rt.peer = rt0->peer;
diff --git a/net/ipv6/xfrm6_input.c b/net/ipv6/xfrm6_input.c
index c6ee1a3ba19..515783707e8 100644
--- a/net/ipv6/xfrm6_input.c
+++ b/net/ipv6/xfrm6_input.c
@@ -68,10 +68,10 @@ int xfrm6_rcv_spi(struct sk_buff *skb, int nexthdr, __be32 spi)
 
 		xfrm_vec[xfrm_nr++] = x;
 
-		if (x->mode->input(x, skb))
+		if (x->outer_mode->input(x, skb))
 			goto drop;
 
-		if (x->mode->flags & XFRM_MODE_FLAG_TUNNEL) {
+		if (x->outer_mode->flags & XFRM_MODE_FLAG_TUNNEL) {
 			decaps = 1;
 			break;
 		}
diff --git a/net/ipv6/xfrm6_output.c b/net/ipv6/xfrm6_output.c
index c9f42d1c2df..656976760ad 100644
--- a/net/ipv6/xfrm6_output.c
+++ b/net/ipv6/xfrm6_output.c
@@ -50,7 +50,7 @@ static inline int xfrm6_output_one(struct sk_buff *skb)
 	struct ipv6hdr *iph;
 	int err;
 
-	if (x->mode->flags & XFRM_MODE_FLAG_TUNNEL) {
+	if (x->outer_mode->flags & XFRM_MODE_FLAG_TUNNEL) {
 		err = xfrm6_tunnel_check_size(skb);
 		if (err)
 			goto error_nolock;
diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c
index 324268329f6..82e27b80d07 100644
--- a/net/ipv6/xfrm6_policy.c
+++ b/net/ipv6/xfrm6_policy.c
@@ -230,7 +230,7 @@ __xfrm6_bundle_create(struct xfrm_policy *policy, struct xfrm_state **xfrm, int
 		/* Copy neighbour for reachability confirmation */
 		dst_prev->neighbour	= neigh_clone(rt->u.dst.neighbour);
 		dst_prev->input		= rt->u.dst.input;
-		dst_prev->output = dst_prev->xfrm->mode->afinfo->output;
+		dst_prev->output = dst_prev->xfrm->outer_mode->afinfo->output;
 		/* Sheit... I remember I did this right. Apparently,
 		 * it was magically lost, so this code needs audit */
 		x->u.rt6.rt6i_flags    = rt0->rt6i_flags&(RTCF_BROADCAST|RTCF_MULTICAST|RTCF_LOCAL);
diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c
index 8bf71ba2345..f4bfd6c4565 100644
--- a/net/xfrm/xfrm_output.c
+++ b/net/xfrm/xfrm_output.c
@@ -63,7 +63,7 @@ int xfrm_output(struct sk_buff *skb)
 				xfrm_replay_notify(x, XFRM_REPLAY_UPDATE);
 		}
 
-		err = x->mode->output(x, skb);
+		err = x->outer_mode->output(x, skb);
 		if (err)
 			goto error;
 
@@ -82,7 +82,7 @@ int xfrm_output(struct sk_buff *skb)
 		}
 		dst = skb->dst;
 		x = dst->xfrm;
-	} while (x && !(x->mode->flags & XFRM_MODE_FLAG_TUNNEL));
+	} while (x && !(x->outer_mode->flags & XFRM_MODE_FLAG_TUNNEL));
 
 	err = 0;
 
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 1d66fb42c9c..b702bd8a389 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -1941,7 +1941,7 @@ int xfrm_bundle_ok(struct xfrm_policy *pol, struct xfrm_dst *first,
 			return 0;
 
 		if (strict && fl &&
-		    !(dst->xfrm->mode->flags & XFRM_MODE_FLAG_TUNNEL) &&
+		    !(dst->xfrm->outer_mode->flags & XFRM_MODE_FLAG_TUNNEL) &&
 		    !xfrm_state_addr_flow_check(dst->xfrm, fl, family))
 			return 0;
 
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index 48b4a06b3d1..224b44e31a0 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -377,8 +377,10 @@ static void xfrm_state_gc_destroy(struct xfrm_state *x)
 	kfree(x->calg);
 	kfree(x->encap);
 	kfree(x->coaddr);
-	if (x->mode)
-		xfrm_put_mode(x->mode);
+	if (x->inner_mode)
+		xfrm_put_mode(x->inner_mode);
+	if (x->outer_mode)
+		xfrm_put_mode(x->outer_mode);
 	if (x->type) {
 		x->type->destructor(x);
 		xfrm_put_type(x->type);
@@ -1947,6 +1949,14 @@ int xfrm_init_state(struct xfrm_state *x)
 		goto error;
 
 	err = -EPROTONOSUPPORT;
+	x->inner_mode = xfrm_get_mode(x->props.mode, x->sel.family);
+	if (x->inner_mode == NULL)
+		goto error;
+
+	if (!(x->inner_mode->flags & XFRM_MODE_FLAG_TUNNEL) &&
+	    family != x->sel.family)
+		goto error;
+
 	x->type = xfrm_get_type(x->id.proto, family);
 	if (x->type == NULL)
 		goto error;
@@ -1955,8 +1965,8 @@ int xfrm_init_state(struct xfrm_state *x)
 	if (err)
 		goto error;
 
-	x->mode = xfrm_get_mode(x->props.mode, family);
-	if (x->mode == NULL)
+	x->outer_mode = xfrm_get_mode(x->props.mode, family);
+	if (x->outer_mode == NULL)
 		goto error;
 
 	x->km.state = XFRM_STATE_VALID;
-- 
cgit v1.2.3


From 45542479fb261342d5244869cf3ca4636b7ffd43 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Wed, 17 Oct 2007 21:37:22 -0700
Subject: [NET]: Fix uninitialised variable in ip_frag_reasm()

Fix uninitialised variable in ip_frag_reasm().  err should be set to
-ENOMEM if the initial call of skb_clone() fails.

Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/ip_fragment.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index 453ae041edd..2143bf30597 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -486,7 +486,6 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev,
 	if (prev) {
 		head = prev->next;
 		fp = skb_clone(head, GFP_ATOMIC);
-
 		if (!fp)
 			goto out_nomem;
 
@@ -512,7 +511,6 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev,
 		goto out_oversize;
 
 	/* Head of list must not be cloned. */
-	err = -ENOMEM;
 	if (skb_cloned(head) && pskb_expand_head(head, 0, 0, GFP_ATOMIC))
 		goto out_nomem;
 
@@ -568,6 +566,7 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev,
 out_nomem:
 	LIMIT_NETDEBUG(KERN_ERR "IP: queue_glue: no memory for gluing "
 			      "queue %p\n", qp);
+	err = -ENOMEM;
 	goto out_fail;
 out_oversize:
 	if (net_ratelimit())
-- 
cgit v1.2.3


From bfaae0f04c68bafc12ec50c6922d71a90deea3e2 Mon Sep 17 00:00:00 2001
From: Jeff Garzik <jgarzik@redhat.com>
Date: Wed, 17 Oct 2007 23:26:43 -0700
Subject: [NET]: fix carrier-on bug?

While looking at a net driver with the following construct,

	if (!netif_carrier_ok(dev))
		netif_carrier_on(dev);

it stuck me that the netif_carrier_ok() check was redundant, since
netif_carrier_on() checks bit __LINK_STATE_NOCARRIER anyway.  This is
the same reason why netif_queue_stopped() need not be called prior to
netif_wake_queue().

This is true, but there is however an unwanted side effect from assuming
that netif_carrier_on() can be called multiple times:  it touches the
watchdog, regardless of pre-existing carrier state.

The fix:  move watchdog-up inside the bit-cleared code path.

Signed-off-by: Jeff Garzik <jgarzik@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_generic.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 95ae11956f3..e01d57692c9 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -249,10 +249,11 @@ static void dev_watchdog_down(struct net_device *dev)
  */
 void netif_carrier_on(struct net_device *dev)
 {
-	if (test_and_clear_bit(__LINK_STATE_NOCARRIER, &dev->state))
+	if (test_and_clear_bit(__LINK_STATE_NOCARRIER, &dev->state)) {
 		linkwatch_fire_event(dev);
-	if (netif_running(dev))
-		__netdev_watchdog_up(dev);
+		if (netif_running(dev))
+			__netdev_watchdog_up(dev);
+	}
 }
 
 /**
-- 
cgit v1.2.3


From df2e014bfbbf26d160e1bf47b05fc05c96205e2b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= <ilpo.jarvinen@helsinki.fi>
Date: Thu, 18 Oct 2007 05:07:57 -0700
Subject: [TCP]: Remove lost_retrans zero seqno special cases
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Both high-sack detection and new lowest seq variables have
unnecessary zero special case which are now removed by setting
safe initial seqnos.

This also fixes problem which caused zero received_upto being
passed to tcp_mark_lost_retrans which confused after relations
within the marker loop causing incorrect TCPCB_SACKED_RETRANS
clearing. The problem was noticed because of a performance
report from TAKANO Ryousei <takano@axe-inc.co.jp>.

Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Acked-by: Ryousei Takano <takano-ryousei@aist.go.jp>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_input.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 0f00966b178..9288220b73a 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -1121,7 +1121,7 @@ static int tcp_mark_lost_retrans(struct sock *sk, u32 received_upto)
 	struct sk_buff *skb;
 	int flag = 0;
 	int cnt = 0;
-	u32 new_low_seq = 0;
+	u32 new_low_seq = tp->snd_nxt;
 
 	tcp_for_write_queue(skb, sk) {
 		u32 ack_seq = TCP_SKB_CB(skb)->ack_seq;
@@ -1153,7 +1153,7 @@ static int tcp_mark_lost_retrans(struct sock *sk, u32 received_upto)
 				NET_INC_STATS_BH(LINUX_MIB_TCPLOSTRETRANSMIT);
 			}
 		} else {
-			if (!new_low_seq || before(ack_seq, new_low_seq))
+			if (before(ack_seq, new_low_seq))
 				new_low_seq = ack_seq;
 			cnt += tcp_skb_pcount(skb);
 		}
@@ -1242,7 +1242,7 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_
 	int num_sacks = (ptr[1] - TCPOLEN_SACK_BASE)>>3;
 	int reord = tp->packets_out;
 	int prior_fackets;
-	u32 highest_sack_end_seq = 0;
+	u32 highest_sack_end_seq = tp->lost_retrans_low;
 	int flag = 0;
 	int found_dup_sack = 0;
 	int cached_fack_count;
-- 
cgit v1.2.3


From 009e8c965fd72a78636b9a96c7015109c5c70176 Mon Sep 17 00:00:00 2001
From: Li Zefan <lizf@cn.fujitsu.com>
Date: Thu, 18 Oct 2007 05:12:21 -0700
Subject: [NETFILTER]: xt_sctp: fix mistake to pass a pointer where array is
 required

Macros like SCTP_CHUNKMAP_XXX(chukmap) require chukmap to be an array,
but match_packet() passes a pointer to these macros. Also remove the
ELEMCOUNT macro and fix a bug in SCTP_CHUNKMAP_COPY.

Signed-off-by: Li Zefan <lizf@cn.fujitsu.com>
Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netfilter/xt_sctp.c | 18 ++++++++----------
 1 file changed, 8 insertions(+), 10 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/xt_sctp.c b/net/netfilter/xt_sctp.c
index f907770fd4e..3358273a47b 100644
--- a/net/netfilter/xt_sctp.c
+++ b/net/netfilter/xt_sctp.c
@@ -42,21 +42,21 @@ match_flags(const struct xt_sctp_flag_info *flag_info,
 static inline bool
 match_packet(const struct sk_buff *skb,
 	     unsigned int offset,
-	     const u_int32_t *chunkmap,
-	     int chunk_match_type,
-	     const struct xt_sctp_flag_info *flag_info,
-	     const int flag_count,
+	     const struct xt_sctp_info *info,
 	     bool *hotdrop)
 {
 	u_int32_t chunkmapcopy[256 / sizeof (u_int32_t)];
 	sctp_chunkhdr_t _sch, *sch;
+	int chunk_match_type = info->chunk_match_type;
+	const struct xt_sctp_flag_info *flag_info = info->flag_info;
+	int flag_count = info->flag_count;
 
 #ifdef DEBUG_SCTP
 	int i = 0;
 #endif
 
 	if (chunk_match_type == SCTP_CHUNK_MATCH_ALL)
-		SCTP_CHUNKMAP_COPY(chunkmapcopy, chunkmap);
+		SCTP_CHUNKMAP_COPY(chunkmapcopy, info->chunkmap);
 
 	do {
 		sch = skb_header_pointer(skb, offset, sizeof(_sch), &_sch);
@@ -73,7 +73,7 @@ match_packet(const struct sk_buff *skb,
 
 		duprintf("skb->len: %d\toffset: %d\n", skb->len, offset);
 
-		if (SCTP_CHUNKMAP_IS_SET(chunkmap, sch->type)) {
+		if (SCTP_CHUNKMAP_IS_SET(info->chunkmap, sch->type)) {
 			switch (chunk_match_type) {
 			case SCTP_CHUNK_MATCH_ANY:
 				if (match_flags(flag_info, flag_count,
@@ -104,7 +104,7 @@ match_packet(const struct sk_buff *skb,
 
 	switch (chunk_match_type) {
 	case SCTP_CHUNK_MATCH_ALL:
-		return SCTP_CHUNKMAP_IS_CLEAR(chunkmap);
+		return SCTP_CHUNKMAP_IS_CLEAR(info->chunkmap);
 	case SCTP_CHUNK_MATCH_ANY:
 		return false;
 	case SCTP_CHUNK_MATCH_ONLY:
@@ -148,9 +148,7 @@ match(const struct sk_buff *skb,
 			&& ntohs(sh->dest) <= info->dpts[1],
 			XT_SCTP_DEST_PORTS, info->flags, info->invflags)
 		&& SCCHECK(match_packet(skb, protoff + sizeof (sctp_sctphdr_t),
-					info->chunkmap, info->chunk_match_type,
-					info->flag_info, info->flag_count,
-					hotdrop),
+					info, hotdrop),
 			   XT_SCTP_CHUNK_TYPES, info->flags, info->invflags);
 }
 
-- 
cgit v1.2.3


From 04028045a12ba941c579d0f3238489333ac18ea4 Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@openvz.org>
Date: Thu, 18 Oct 2007 05:14:58 -0700
Subject: [IPV6]: Lost locking when inserting a flowlabel in ipv6_fl_list

The new flowlabels should be inserted into the sock list
under the ip6_sk_fl_lock. This was lost in one place.

This list is naturally protected with the socket lock, but
the fl6_sock_lookup() is called without it, so another
protection is required.

Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/ip6_flowlabel.c | 20 ++++++++++++--------
 1 file changed, 12 insertions(+), 8 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c
index 217d60f9fc8..8550df20f98 100644
--- a/net/ipv6/ip6_flowlabel.c
+++ b/net/ipv6/ip6_flowlabel.c
@@ -409,6 +409,16 @@ static int ipv6_opt_cmp(struct ipv6_txoptions *o1, struct ipv6_txoptions *o2)
 	return 0;
 }
 
+static inline void fl_link(struct ipv6_pinfo *np, struct ipv6_fl_socklist *sfl,
+		struct ip6_flowlabel *fl)
+{
+	write_lock_bh(&ip6_sk_fl_lock);
+	sfl->fl = fl;
+	sfl->next = np->ipv6_fl_list;
+	np->ipv6_fl_list = sfl;
+	write_unlock_bh(&ip6_sk_fl_lock);
+}
+
 int ipv6_flowlabel_opt(struct sock *sk, char __user *optval, int optlen)
 {
 	int err;
@@ -513,11 +523,7 @@ int ipv6_flowlabel_opt(struct sock *sk, char __user *optval, int optlen)
 					fl1->linger = fl->linger;
 				if ((long)(fl->expires - fl1->expires) > 0)
 					fl1->expires = fl->expires;
-				write_lock_bh(&ip6_sk_fl_lock);
-				sfl1->fl = fl1;
-				sfl1->next = np->ipv6_fl_list;
-				np->ipv6_fl_list = sfl1;
-				write_unlock_bh(&ip6_sk_fl_lock);
+				fl_link(np, sfl1, fl1);
 				fl_free(fl);
 				return 0;
 
@@ -545,9 +551,7 @@ release:
 			}
 		}
 
-		sfl1->fl = fl;
-		sfl1->next = np->ipv6_fl_list;
-		np->ipv6_fl_list = sfl1;
+		fl_link(np, sfl1, fl);
 		return 0;
 
 	default:
-- 
cgit v1.2.3


From bd0bf57700cb0eaa92f3d2ee040a69743cdd99d0 Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@openvz.org>
Date: Thu, 18 Oct 2007 05:15:57 -0700
Subject: [IPV6]: Lost locking in fl6_sock_lookup

This routine scans the ipv6_fl_list whose update is
protected with the socket lock and the ip6_sk_fl_lock.

Since the socket lock is not taken in the lookup, use
the other one.

Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/ip6_flowlabel.c | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'net')

diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c
index 8550df20f98..f40a08669db 100644
--- a/net/ipv6/ip6_flowlabel.c
+++ b/net/ipv6/ip6_flowlabel.c
@@ -190,14 +190,17 @@ struct ip6_flowlabel * fl6_sock_lookup(struct sock *sk, __be32 label)
 
 	label &= IPV6_FLOWLABEL_MASK;
 
+	read_lock_bh(&ip6_sk_fl_lock);
 	for (sfl=np->ipv6_fl_list; sfl; sfl = sfl->next) {
 		struct ip6_flowlabel *fl = sfl->fl;
 		if (fl->label == label) {
+			read_unlock_bh(&ip6_sk_fl_lock);
 			fl->lastuse = jiffies;
 			atomic_inc(&fl->users);
 			return fl;
 		}
 	}
+	read_unlock_bh(&ip6_sk_fl_lock);
 	return NULL;
 }
 
-- 
cgit v1.2.3


From 78c2e50253569e62caa4a61fc1cc5a0158edec43 Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@openvz.org>
Date: Thu, 18 Oct 2007 05:18:56 -0700
Subject: [IPV6]: Fix race in ipv6_flowlabel_opt() when inserting two labels

In the IPV6_FL_A_GET case the hash is checked for flowlabels
with the given label. If it is not found, the lock, protecting
the hash, is dropped to be re-get for writing. After this a
newly allocated entry is inserted, but no checks are performed
to catch a classical SMP race, when the conflicting label may
be inserted on another cpu.

Use the (currently unused) return value from fl_intern() to
return the conflicting entry (if found) and re-check, whether
we can reuse it (IPV6_FL_F_EXCL) or return -EEXISTS.

Also add the comment, about why not re-lookup the current
sock for conflicting flowlabel entry.

Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/ip6_flowlabel.c | 34 +++++++++++++++++++++++++---------
 1 file changed, 25 insertions(+), 9 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c
index f40a08669db..e55ae1a1f56 100644
--- a/net/ipv6/ip6_flowlabel.c
+++ b/net/ipv6/ip6_flowlabel.c
@@ -154,8 +154,10 @@ static void ip6_fl_gc(unsigned long dummy)
 	write_unlock(&ip6_fl_lock);
 }
 
-static int fl_intern(struct ip6_flowlabel *fl, __be32 label)
+static struct ip6_flowlabel *fl_intern(struct ip6_flowlabel *fl, __be32 label)
 {
+	struct ip6_flowlabel *lfl;
+
 	fl->label = label & IPV6_FLOWLABEL_MASK;
 
 	write_lock_bh(&ip6_fl_lock);
@@ -163,12 +165,26 @@ static int fl_intern(struct ip6_flowlabel *fl, __be32 label)
 		for (;;) {
 			fl->label = htonl(net_random())&IPV6_FLOWLABEL_MASK;
 			if (fl->label) {
-				struct ip6_flowlabel *lfl;
 				lfl = __fl_lookup(fl->label);
 				if (lfl == NULL)
 					break;
 			}
 		}
+	} else {
+		/*
+		 * we dropper the ip6_fl_lock, so this entry could reappear
+		 * and we need to recheck with it.
+		 *
+		 * OTOH no need to search the active socket first, like it is
+		 * done in ipv6_flowlabel_opt - sock is locked, so new entry
+		 * with the same label can only appear on another sock
+		 */
+		lfl = __fl_lookup(fl->label);
+		if (lfl != NULL) {
+			atomic_inc(&lfl->users);
+			write_unlock_bh(&ip6_fl_lock);
+			return lfl;
+		}
 	}
 
 	fl->lastuse = jiffies;
@@ -176,7 +192,7 @@ static int fl_intern(struct ip6_flowlabel *fl, __be32 label)
 	fl_ht[FL_HASH(fl->label)] = fl;
 	atomic_inc(&fl_size);
 	write_unlock_bh(&ip6_fl_lock);
-	return 0;
+	return NULL;
 }
 
 
@@ -429,7 +445,8 @@ int ipv6_flowlabel_opt(struct sock *sk, char __user *optval, int optlen)
 	struct in6_flowlabel_req freq;
 	struct ipv6_fl_socklist *sfl1=NULL;
 	struct ipv6_fl_socklist *sfl, **sflp;
-	struct ip6_flowlabel *fl;
+	struct ip6_flowlabel *fl, *fl1 = NULL;
+
 
 	if (optlen < sizeof(freq))
 		return -EINVAL;
@@ -485,8 +502,6 @@ int ipv6_flowlabel_opt(struct sock *sk, char __user *optval, int optlen)
 		sfl1 = kmalloc(sizeof(*sfl1), GFP_KERNEL);
 
 		if (freq.flr_label) {
-			struct ip6_flowlabel *fl1 = NULL;
-
 			err = -EEXIST;
 			read_lock_bh(&ip6_sk_fl_lock);
 			for (sfl = np->ipv6_fl_list; sfl; sfl = sfl->next) {
@@ -505,6 +520,7 @@ int ipv6_flowlabel_opt(struct sock *sk, char __user *optval, int optlen)
 			if (fl1 == NULL)
 				fl1 = fl_lookup(freq.flr_label);
 			if (fl1) {
+recheck:
 				err = -EEXIST;
 				if (freq.flr_flags&IPV6_FL_F_EXCL)
 					goto release;
@@ -543,9 +559,9 @@ release:
 		if (sfl1 == NULL || (err = mem_check(sk)) != 0)
 			goto done;
 
-		err = fl_intern(fl, freq.flr_label);
-		if (err)
-			goto done;
+		fl1 = fl_intern(fl, freq.flr_label);
+		if (fl1 != NULL)
+			goto recheck;
 
 		if (!freq.flr_label) {
 			if (copy_to_user(&((struct in6_flowlabel_req __user *) optval)->flr_label,
-- 
cgit v1.2.3


From bc34b841556aad437baf4199744e55500bfa2088 Mon Sep 17 00:00:00 2001
From: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
Date: Thu, 18 Oct 2007 05:20:12 -0700
Subject: [NETFILTER]: nf_conntrack_tcp: fix connection reopening fix

If one side aborts an established connection, the entry still lingers
for 10s in conntrack for the late packets. Allow to open up the
connection again for the party which sent the RST packet.

Signed-off-by: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
Tested-by: Krzysztof Piotr Oledzki <ole@ans.pl>
Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netfilter/nf_conntrack_proto_tcp.c | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c
index c7075345971..4dc23ab3a39 100644
--- a/net/netfilter/nf_conntrack_proto_tcp.c
+++ b/net/netfilter/nf_conntrack_proto_tcp.c
@@ -834,10 +834,12 @@ static int tcp_packet(struct nf_conn *conntrack,
 	case TCP_CONNTRACK_SYN_SENT:
 		if (old_state < TCP_CONNTRACK_TIME_WAIT)
 			break;
-		if (conntrack->proto.tcp.seen[!dir].flags &
-			IP_CT_TCP_FLAG_CLOSE_INIT) {
-			/* Attempt to reopen a closed connection.
-			* Delete this connection and look up again. */
+		if ((conntrack->proto.tcp.seen[!dir].flags &
+			IP_CT_TCP_FLAG_CLOSE_INIT)
+		    || (conntrack->proto.tcp.last_dir == dir
+		        && conntrack->proto.tcp.last_index == TCP_RST_SET)) {
+			/* Attempt to reopen a closed/aborted connection.
+			 * Delete this connection and look up again. */
 			write_unlock_bh(&tcp_lock);
 			if (del_timer(&conntrack->timeout))
 				conntrack->timeout.function((unsigned long)
@@ -925,6 +927,7 @@ static int tcp_packet(struct nf_conn *conntrack,
      in_window:
 	/* From now on we have got in-window packets */
 	conntrack->proto.tcp.last_index = index;
+	conntrack->proto.tcp.last_dir = dir;
 
 	pr_debug("tcp_conntracks: ");
 	NF_CT_DUMP_TUPLE(tuple);
-- 
cgit v1.2.3


From 52f095ee88d8851866bc7694ab991ca5abf21d5e Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@openvz.org>
Date: Thu, 18 Oct 2007 05:38:48 -0700
Subject: [IPV6]: Fix again the fl6_sock_lookup() fixed locking

YOSHIFUJI fairly pointed out, that the users increment should
be done under the ip6_sk_fl_lock not to give IPV6_FL_A_PUT a
chance to put this count to zero and release the flowlabel.

Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/ip6_flowlabel.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c
index e55ae1a1f56..b12cc22e774 100644
--- a/net/ipv6/ip6_flowlabel.c
+++ b/net/ipv6/ip6_flowlabel.c
@@ -210,9 +210,9 @@ struct ip6_flowlabel * fl6_sock_lookup(struct sock *sk, __be32 label)
 	for (sfl=np->ipv6_fl_list; sfl; sfl = sfl->next) {
 		struct ip6_flowlabel *fl = sfl->fl;
 		if (fl->label == label) {
-			read_unlock_bh(&ip6_sk_fl_lock);
 			fl->lastuse = jiffies;
 			atomic_inc(&fl->users);
+			read_unlock_bh(&ip6_sk_fl_lock);
 			return fl;
 		}
 	}
-- 
cgit v1.2.3