From 7c6a329e444725f24c02192ac493d8a7cd9fa638 Mon Sep 17 00:00:00 2001
From: Marcel Holtmann <marcel@holtmann.org>
Date: Fri, 12 Sep 2008 03:11:54 +0200
Subject: [Bluetooth] Fix regression from using default link policy

To speed up the Simple Pairing connection setup, the support for the
default link policy has been enabled. This is in contrast to settings
the link policy on every connection setup. Using the default link policy
is the preferred way since there is no need to dynamically change it for
every connection.

For backward compatibility reason and to support old userspace the
HCISETLINKPOL ioctl has been switched over to using hci_request() to
issue the HCI command for setting the default link policy instead of
just storing it in the HCI device structure.

However the hci_request() can only be issued when the device is
brought up. If used on a device that is registered, but still down
it will timeout and fail. This is problematic since the command is
put on the TX queue and the Bluetooth core tries to submit it to
hardware that is not ready yet. The timeout for these requests is
10 seconds and this causes a significant regression when setting up
a new device.

The userspace can perfectly handle a failure of the HCISETLINKPOL
ioctl and will re-submit it later, but the 10 seconds delay causes
a problem. So in case hci_request() is called on a device that is
still down, just fail it with ENETDOWN to indicate what happens.

Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/hci_core.c | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'net')

diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index f5b21cb9369..278a3ace14f 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -164,6 +164,9 @@ static inline int hci_request(struct hci_dev *hdev, void (*req)(struct hci_dev *
 {
 	int ret;
 
+	if (!test_bit(HCI_UP, &hdev->flags))
+		return -ENETDOWN;
+
 	/* Serialize all requests */
 	hci_req_lock(hdev);
 	ret = __hci_request(hdev, req, opt, timeout);
-- 
cgit v1.2.3


From 93821778def10ec1e69aa3ac10adee975dad4ff3 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Mon, 15 Sep 2008 11:48:46 -0700
Subject: udp: Fix rcv socket locking

The previous patch in response to the recursive locking on IPsec
reception is broken as it tries to drop the BH socket lock while in
user context.

This patch fixes it by shrinking the section protected by the
socket lock to sock_queue_rcv_skb only.  The only reason we added
the lock is for the accounting which happens in that function.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/udp.c | 62 +++++++++++++++++++++++++++++++---------------------------
 1 file changed, 33 insertions(+), 29 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 8e42fbbd576..57e26fa6618 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -951,6 +951,27 @@ int udp_disconnect(struct sock *sk, int flags)
 	return 0;
 }
 
+static int __udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
+{
+	int is_udplite = IS_UDPLITE(sk);
+	int rc;
+
+	if ((rc = sock_queue_rcv_skb(sk, skb)) < 0) {
+		/* Note that an ENOMEM error is charged twice */
+		if (rc == -ENOMEM)
+			UDP_INC_STATS_BH(sock_net(sk), UDP_MIB_RCVBUFERRORS,
+					 is_udplite);
+		goto drop;
+	}
+
+	return 0;
+
+drop:
+	UDP_INC_STATS_BH(sock_net(sk), UDP_MIB_INERRORS, is_udplite);
+	kfree_skb(skb);
+	return -1;
+}
+
 /* returns:
  *  -1: error
  *   0: success
@@ -989,9 +1010,7 @@ int udp_queue_rcv_skb(struct sock * sk, struct sk_buff *skb)
 		    up->encap_rcv != NULL) {
 			int ret;
 
-			bh_unlock_sock(sk);
 			ret = (*up->encap_rcv)(sk, skb);
-			bh_lock_sock(sk);
 			if (ret <= 0) {
 				UDP_INC_STATS_BH(sock_net(sk),
 						 UDP_MIB_INDATAGRAMS,
@@ -1044,17 +1063,16 @@ int udp_queue_rcv_skb(struct sock * sk, struct sk_buff *skb)
 			goto drop;
 	}
 
-	if ((rc = sock_queue_rcv_skb(sk,skb)) < 0) {
-		/* Note that an ENOMEM error is charged twice */
-		if (rc == -ENOMEM) {
-			UDP_INC_STATS_BH(sock_net(sk),
-					UDP_MIB_RCVBUFERRORS, is_udplite);
-			atomic_inc(&sk->sk_drops);
-		}
-		goto drop;
-	}
+	rc = 0;
 
-	return 0;
+	bh_lock_sock(sk);
+	if (!sock_owned_by_user(sk))
+		rc = __udp_queue_rcv_skb(sk, skb);
+	else
+		sk_add_backlog(sk, skb);
+	bh_unlock_sock(sk);
+
+	return rc;
 
 drop:
 	UDP_INC_STATS_BH(sock_net(sk), UDP_MIB_INERRORS, is_udplite);
@@ -1092,15 +1110,7 @@ static int __udp4_lib_mcast_deliver(struct net *net, struct sk_buff *skb,
 				skb1 = skb_clone(skb, GFP_ATOMIC);
 
 			if (skb1) {
-				int ret = 0;
-
-				bh_lock_sock(sk);
-				if (!sock_owned_by_user(sk))
-					ret = udp_queue_rcv_skb(sk, skb1);
-				else
-					sk_add_backlog(sk, skb1);
-				bh_unlock_sock(sk);
-
+				int ret = udp_queue_rcv_skb(sk, skb1);
 				if (ret > 0)
 					/* we should probably re-process instead
 					 * of dropping packets here. */
@@ -1195,13 +1205,7 @@ int __udp4_lib_rcv(struct sk_buff *skb, struct hlist_head udptable[],
 			uh->dest, inet_iif(skb), udptable);
 
 	if (sk != NULL) {
-		int ret = 0;
-		bh_lock_sock(sk);
-		if (!sock_owned_by_user(sk))
-			ret = udp_queue_rcv_skb(sk, skb);
-		else
-			sk_add_backlog(sk, skb);
-		bh_unlock_sock(sk);
+		int ret = udp_queue_rcv_skb(sk, skb);
 		sock_put(sk);
 
 		/* a return value > 0 means to resubmit the input, but
@@ -1494,7 +1498,7 @@ struct proto udp_prot = {
 	.sendmsg	   = udp_sendmsg,
 	.recvmsg	   = udp_recvmsg,
 	.sendpage	   = udp_sendpage,
-	.backlog_rcv	   = udp_queue_rcv_skb,
+	.backlog_rcv	   = __udp_queue_rcv_skb,
 	.hash		   = udp_lib_hash,
 	.unhash		   = udp_lib_unhash,
 	.get_port	   = udp_v4_get_port,
-- 
cgit v1.2.3


From 45e9c0de2e86485f8b6633fd64ab19cfbff167f6 Mon Sep 17 00:00:00 2001
From: Arjan van de Ven <arjan@linux.intel.com>
Date: Mon, 15 Sep 2008 16:43:18 -0700
Subject: warn: Turn the netdev timeout WARN_ON() into a WARN()

this patch turns the netdev timeout WARN_ON_ONCE() into a WARN_ONCE(),
so that the device and driver names are inside the warning message.
This helps automated tools like kerneloops.org to collect the data
and do statistics, as well as making it more likely that humans
cut-n-paste the important message as part of a bugreport.

Signed-off-by: Arjan van de Ven <arjan@linux.intel.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 net/sched/sch_generic.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 9634091ee2f..ec0a0839ce5 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -215,10 +215,9 @@ static void dev_watchdog(unsigned long arg)
 			    time_after(jiffies, (dev->trans_start +
 						 dev->watchdog_timeo))) {
 				char drivername[64];
-				printk(KERN_INFO "NETDEV WATCHDOG: %s (%s): transmit timed out\n",
+				WARN_ONCE(1, KERN_INFO "NETDEV WATCHDOG: %s (%s): transmit timed out\n",
 				       dev->name, netdev_drivername(dev, drivername, 64));
 				dev->tx_timeout(dev);
-				WARN_ON_ONCE(1);
 			}
 			if (!mod_timer(&dev->watchdog_timer,
 				       round_jiffies(jiffies +
-- 
cgit v1.2.3


From a3028b8ed1e1e9930bfa70ce4555fb7f9fad3dcc Mon Sep 17 00:00:00 2001
From: Vlad Yasevich <vladislav.yasevich@hp.com>
Date: Thu, 18 Sep 2008 02:48:25 -0700
Subject: sctp: set the skb->ip_summed correctly when sending over loopback.

Loopback used to clobber the ip_summed filed which sctp then used
to figure out if it needed to do checksumming or not.  Now that
loopback doesn't do that any more, sctp needs to set the ip_summed
field correctly.

Signed-off-by: Vlad Yasevich <vladislav.yasevich@hp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sctp/output.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/sctp/output.c b/net/sctp/output.c
index 0dc4a7dfb23..225c7123c41 100644
--- a/net/sctp/output.c
+++ b/net/sctp/output.c
@@ -533,7 +533,8 @@ int sctp_packet_transmit(struct sctp_packet *packet)
 	if (!(dst->dev->features & NETIF_F_NO_CSUM)) {
 		crc32 = sctp_start_cksum((__u8 *)sh, cksum_buf_len);
 		crc32 = sctp_end_cksum(crc32);
-	}
+	} else
+		nskb->ip_summed = CHECKSUM_UNNECESSARY;
 
 	/* 3) Put the resultant value into the checksum field in the
 	 *    common header, and leave the rest of the bits unchanged.
-- 
cgit v1.2.3


From 0ef46e285c062cbe35d60c0adbff96f530d31c86 Mon Sep 17 00:00:00 2001
From: Vlad Yasevich <vladislav.yasevich@hp.com>
Date: Thu, 18 Sep 2008 16:27:38 -0700
Subject: sctp: do not enable peer features if we can't do them.

Do not enable peer features like addip and auth, if they
are administratively disabled localy.  If the peer resports
that he supports something that we don't, neither end can
use it so enabling it is pointless.  This solves a problem
when talking to a peer that has auth and addip enabled while
we do not.  Found by Andrei Pelinescu-Onciul <andrei@iptel.org>.

Signed-off-by: Vlad Yasevich <vladislav.yasevich@hp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sctp/sm_make_chunk.c | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c
index e8ca4e54981..fe94f42fa06 100644
--- a/net/sctp/sm_make_chunk.c
+++ b/net/sctp/sm_make_chunk.c
@@ -1886,11 +1886,13 @@ static void sctp_process_ext_param(struct sctp_association *asoc,
 			    /* if the peer reports AUTH, assume that he
 			     * supports AUTH.
 			     */
-			    asoc->peer.auth_capable = 1;
+			    if (sctp_auth_enable)
+				    asoc->peer.auth_capable = 1;
 			    break;
 		    case SCTP_CID_ASCONF:
 		    case SCTP_CID_ASCONF_ACK:
-			    asoc->peer.asconf_capable = 1;
+			    if (sctp_addip_enable)
+				    asoc->peer.asconf_capable = 1;
 			    break;
 		    default:
 			    break;
@@ -2460,6 +2462,9 @@ do_addr_param:
 		break;
 
 	case SCTP_PARAM_SET_PRIMARY:
+		if (!sctp_addip_enable)
+			goto fall_through;
+
 		addr_param = param.v + sizeof(sctp_addip_param_t);
 
 		af = sctp_get_af_specific(param_type2af(param.p->type));
-- 
cgit v1.2.3


From add52379dde2e5300e2d574b172e62c6cf43b3d3 Mon Sep 17 00:00:00 2001
From: Vlad Yasevich <vladislav.yasevich@hp.com>
Date: Thu, 18 Sep 2008 16:28:27 -0700
Subject: sctp: Fix oops when INIT-ACK indicates that peer doesn't support AUTH

If INIT-ACK is received with SupportedExtensions parameter which
indicates that the peer does not support AUTH, the packet will be
silently ignore, and sctp_process_init() do cleanup all of the
transports in the association.
When T1-Init timer is expires, OOPS happen while we try to choose
a different init transport.

The solution is to only clean up the non-active transports, i.e
the ones that the peer added.  However, that introduces a problem
with sctp_connectx(), because we don't mark the proper state for
the transports provided by the user.  So, we'll simply mark
user-provided transports as ACTIVE.  That will allow INIT
retransmissions to work properly in the sctp_connectx() context
and prevent the crash.

Signed-off-by: Vlad Yasevich <vladislav.yasevich@hp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sctp/associola.c     | 9 +++++----
 net/sctp/sm_make_chunk.c | 6 ++----
 2 files changed, 7 insertions(+), 8 deletions(-)

(limited to 'net')

diff --git a/net/sctp/associola.c b/net/sctp/associola.c
index 8472b8b349c..abd51cef241 100644
--- a/net/sctp/associola.c
+++ b/net/sctp/associola.c
@@ -599,11 +599,12 @@ struct sctp_transport *sctp_assoc_add_peer(struct sctp_association *asoc,
 	/* Check to see if this is a duplicate. */
 	peer = sctp_assoc_lookup_paddr(asoc, addr);
 	if (peer) {
+		/* An UNKNOWN state is only set on transports added by
+		 * user in sctp_connectx() call.  Such transports should be
+		 * considered CONFIRMED per RFC 4960, Section 5.4.
+		 */
 		if (peer->state == SCTP_UNKNOWN) {
-			if (peer_state == SCTP_ACTIVE)
-				peer->state = SCTP_ACTIVE;
-			if (peer_state == SCTP_UNCONFIRMED)
-				peer->state = SCTP_UNCONFIRMED;
+			peer->state = SCTP_ACTIVE;
 		}
 		return peer;
 	}
diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c
index fe94f42fa06..b599cbba4fb 100644
--- a/net/sctp/sm_make_chunk.c
+++ b/net/sctp/sm_make_chunk.c
@@ -2321,12 +2321,10 @@ clean_up:
 	/* Release the transport structures. */
 	list_for_each_safe(pos, temp, &asoc->peer.transport_addr_list) {
 		transport = list_entry(pos, struct sctp_transport, transports);
-		list_del_init(pos);
-		sctp_transport_free(transport);
+		if (transport->state != SCTP_ACTIVE)
+			sctp_assoc_rm_peer(asoc, transport);
 	}
 
-	asoc->peer.transport_count = 0;
-
 nomem:
 	return 0;
 }
-- 
cgit v1.2.3


From ad55dcaff0e34269f86975ce2ea0da22e9eb74a1 Mon Sep 17 00:00:00 2001
From: Alexander Duyck <alexander.h.duyck@intel.com>
Date: Sat, 20 Sep 2008 22:05:50 -0700
Subject: netdev: simple_tx_hash shouldn't hash inside fragments

Currently simple_tx_hash is hashing inside of udp fragments.  As a result
packets are getting getting sent to all queues when they shouldn't be.
This causes a serious performance regression which can be seen by sending
UDP frames larger than mtu on multiqueue devices.  This change will make
it so that fragments are hashed only as IP datagrams w/o any protocol
information.

Signed-off-by: Alexander Duyck <alexander.h.duyck@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/core/dev.c b/net/core/dev.c
index e719ed29310..e8eb2b47834 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -122,6 +122,7 @@
 #include <linux/if_arp.h>
 #include <linux/if_vlan.h>
 #include <linux/ip.h>
+#include <net/ip.h>
 #include <linux/ipv6.h>
 #include <linux/in.h>
 #include <linux/jhash.h>
@@ -1667,7 +1668,7 @@ static u16 simple_tx_hash(struct net_device *dev, struct sk_buff *skb)
 {
 	u32 addr1, addr2, ports;
 	u32 hash, ihl;
-	u8 ip_proto;
+	u8 ip_proto = 0;
 
 	if (unlikely(!simple_tx_hashrnd_initialized)) {
 		get_random_bytes(&simple_tx_hashrnd, 4);
@@ -1676,7 +1677,8 @@ static u16 simple_tx_hash(struct net_device *dev, struct sk_buff *skb)
 
 	switch (skb->protocol) {
 	case __constant_htons(ETH_P_IP):
-		ip_proto = ip_hdr(skb)->protocol;
+		if (!(ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)))
+			ip_proto = ip_hdr(skb)->protocol;
 		addr1 = ip_hdr(skb)->saddr;
 		addr2 = ip_hdr(skb)->daddr;
 		ihl = ip_hdr(skb)->ihl;
-- 
cgit v1.2.3


From 2d4c8266774188cda7f7e612e6dfb8ad12c579d5 Mon Sep 17 00:00:00 2001
From: Michael Kerrisk <mtk.manpages@googlemail.com>
Date: Mon, 22 Sep 2008 13:57:49 -0700
Subject: sys_paccept: disable paccept() until API design is resolved

The reasons for disabling paccept() are as follows:

* The API is more complex than needed.  There is AFAICS no demonstrated
  use case that the sigset argument of this syscall serves that couldn't
  equally be served by the use of pselect/ppoll/epoll_pwait + traditional
  accept().  Roland seems to concur with this opinion
  (http://thread.gmane.org/gmane.linux.kernel/723953/focus=732255).  I
  have (more than once) asked Ulrich to explain otherwise
  (http://thread.gmane.org/gmane.linux.kernel/723952/focus=731018), but he
  does not respond, so one is left to assume that he doesn't know of such
  a case.

* The use of a sigset argument is not consistent with other I/O APIs
  that can block on a single file descriptor (e.g., read(), recv(),
  connect()).

* The behavior of paccept() when interrupted by a signal is IMO strange:
  the kernel restarts the system call if SA_RESTART was set for the
  handler.  I think that it should not do this -- that it should behave
  consistently with paccept()/ppoll()/epoll_pwait(), which never restart,
  regardless of SA_RESTART.  The reasoning here is that the very purpose
  of paccept() is to wait for a connection or a signal, and that
  restarting in the latter case is probably never useful.  (Note: Roland
  disagrees on this point, believing that rather paccept() should be
  consistent with accept() in its behavior wrt EINTR
  (http://thread.gmane.org/gmane.linux.kernel/723953/focus=732255).)

I believe that instead, a simpler API, consistent with Ulrich's other
recent additions, is preferable:

accept4(int fd, struct sockaddr *sa, socklen_t *salen, ind flags);

(This simpler API was originally proposed by Ulrich:
http://thread.gmane.org/gmane.linux.network/92072)

If this simpler API is added, then if we later decide that the sigset
argument really is required, then a suitable bit in 'flags' could be added
to indicate the presence of the sigset argument.

At this point, I am hoping we either will get a counter-argument from
Ulrich about why we really do need paccept()'s sigset argument, or that he
will resubmit the original accept4() patch.

Signed-off-by: Michael Kerrisk <mtk.manpages@gmail.com>
Cc: David Miller <davem@davemloft.net>
Cc: Davide Libenzi <davidel@xmailserver.org>
Cc: Alan Cox <alan@redhat.com>
Cc: Ulrich Drepper <drepper@redhat.com>
Cc: Jakub Jelinek <jakub@redhat.com>
Cc: Roland McGrath <roland@redhat.com>
Cc: Oleg Nesterov <oleg@tv-sign.ru>
Cc: Christoph Hellwig <hch@lst.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 net/socket.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'net')

diff --git a/net/socket.c b/net/socket.c
index 8ef8ba81b9e..3e8d4e35c08 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -1511,6 +1511,7 @@ out_fd:
 	goto out_put;
 }
 
+#if 0
 #ifdef HAVE_SET_RESTORE_SIGMASK
 asmlinkage long sys_paccept(int fd, struct sockaddr __user *upeer_sockaddr,
 			    int __user *upeer_addrlen,
@@ -1564,6 +1565,7 @@ asmlinkage long sys_paccept(int fd, struct sockaddr __user *upeer_sockaddr,
 	return do_accept(fd, upeer_sockaddr, upeer_addrlen, flags);
 }
 #endif
+#endif
 
 asmlinkage long sys_accept(int fd, struct sockaddr __user *upeer_sockaddr,
 			   int __user *upeer_addrlen)
-- 
cgit v1.2.3


From 72029fe85d8d060b3f966f2dbc36b3c75b5a6532 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Wed, 24 Sep 2008 16:22:23 -0500
Subject: 9p: implement proper trans module refcounting and unregistration

9p trans modules aren't refcounted nor were they unregistered
properly.  Fix it.

* Add 9p_trans_module->owner and reference the module on each trans
  instance creation and put it on destruction.

* Protect v9fs_trans_list with a spinlock.  This isn't strictly
  necessary as the list is manipulated only during module loading /
  unloading but it's a good idea to make the API safe.

* Unregister trans modules when the corresponding module is being
  unloaded.

* While at it, kill unnecessary EXPORT_SYMBOL on p9_trans_fd_init().

Signed-off-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Eric Van Hensbergen <ericvh@gmail.com>
---
 net/9p/client.c       | 10 ++++--
 net/9p/mod.c          | 92 +++++++++++++++++++++++++++++++++++++--------------
 net/9p/trans_fd.c     | 11 +++++-
 net/9p/trans_virtio.c |  2 ++
 4 files changed, 87 insertions(+), 28 deletions(-)

(limited to 'net')

diff --git a/net/9p/client.c b/net/9p/client.c
index 2ffe40cf2f0..10e320307ec 100644
--- a/net/9p/client.c
+++ b/net/9p/client.c
@@ -75,7 +75,6 @@ static int parse_opts(char *opts, struct p9_client *clnt)
 	int option;
 	int ret = 0;
 
-	clnt->trans_mod = v9fs_default_trans();
 	clnt->dotu = 1;
 	clnt->msize = 8192;
 
@@ -108,7 +107,7 @@ static int parse_opts(char *opts, struct p9_client *clnt)
 			clnt->msize = option;
 			break;
 		case Opt_trans:
-			clnt->trans_mod = v9fs_match_trans(&args[0]);
+			clnt->trans_mod = v9fs_get_trans_by_name(&args[0]);
 			break;
 		case Opt_legacy:
 			clnt->dotu = 0;
@@ -117,6 +116,10 @@ static int parse_opts(char *opts, struct p9_client *clnt)
 			continue;
 		}
 	}
+
+	if (!clnt->trans_mod)
+		clnt->trans_mod = v9fs_get_default_trans();
+
 	kfree(options);
 	return ret;
 }
@@ -150,6 +153,7 @@ struct p9_client *p9_client_create(const char *dev_name, char *options)
 	if (!clnt)
 		return ERR_PTR(-ENOMEM);
 
+	clnt->trans_mod = NULL;
 	clnt->trans = NULL;
 	spin_lock_init(&clnt->lock);
 	INIT_LIST_HEAD(&clnt->fidlist);
@@ -235,6 +239,8 @@ void p9_client_destroy(struct p9_client *clnt)
 		clnt->trans = NULL;
 	}
 
+	v9fs_put_trans(clnt->trans_mod);
+
 	list_for_each_entry_safe(fid, fidptr, &clnt->fidlist, flist)
 		p9_fid_destroy(fid);
 
diff --git a/net/9p/mod.c b/net/9p/mod.c
index bdee1fb7cc6..1084feb24cb 100644
--- a/net/9p/mod.c
+++ b/net/9p/mod.c
@@ -31,6 +31,7 @@
 #include <linux/parser.h>
 #include <net/9p/transport.h>
 #include <linux/list.h>
+#include <linux/spinlock.h>
 
 #ifdef CONFIG_NET_9P_DEBUG
 unsigned int p9_debug_level = 0;	/* feature-rific global debug level  */
@@ -44,8 +45,8 @@ MODULE_PARM_DESC(debug, "9P debugging level");
  *
  */
 
+static DEFINE_SPINLOCK(v9fs_trans_lock);
 static LIST_HEAD(v9fs_trans_list);
-static struct p9_trans_module *v9fs_default_transport;
 
 /**
  * v9fs_register_trans - register a new transport with 9p
@@ -54,48 +55,87 @@ static struct p9_trans_module *v9fs_default_transport;
  */
 void v9fs_register_trans(struct p9_trans_module *m)
 {
+	spin_lock(&v9fs_trans_lock);
 	list_add_tail(&m->list, &v9fs_trans_list);
-	if (m->def)
-		v9fs_default_transport = m;
+	spin_unlock(&v9fs_trans_lock);
 }
 EXPORT_SYMBOL(v9fs_register_trans);
 
 /**
- * v9fs_match_trans - match transport versus registered transports
+ * v9fs_unregister_trans - unregister a 9p transport
+ * @m: the transport to remove
+ *
+ */
+void v9fs_unregister_trans(struct p9_trans_module *m)
+{
+	spin_lock(&v9fs_trans_lock);
+	list_del_init(&m->list);
+	spin_unlock(&v9fs_trans_lock);
+}
+EXPORT_SYMBOL(v9fs_unregister_trans);
+
+/**
+ * v9fs_get_trans_by_name - get transport with the matching name
  * @name: string identifying transport
  *
  */
-struct p9_trans_module *v9fs_match_trans(const substring_t *name)
+struct p9_trans_module *v9fs_get_trans_by_name(const substring_t *name)
 {
-	struct list_head *p;
-	struct p9_trans_module *t = NULL;
-
-	list_for_each(p, &v9fs_trans_list) {
-		t = list_entry(p, struct p9_trans_module, list);
-		if (strncmp(t->name, name->from, name->to-name->from) == 0)
-			return t;
-	}
-	return NULL;
+	struct p9_trans_module *t, *found = NULL;
+
+	spin_lock(&v9fs_trans_lock);
+
+	list_for_each_entry(t, &v9fs_trans_list, list)
+		if (strncmp(t->name, name->from, name->to-name->from) == 0 &&
+		    try_module_get(t->owner)) {
+			found = t;
+			break;
+		}
+
+	spin_unlock(&v9fs_trans_lock);
+	return found;
 }
-EXPORT_SYMBOL(v9fs_match_trans);
+EXPORT_SYMBOL(v9fs_get_trans_by_name);
 
 /**
- * v9fs_default_trans - returns pointer to default transport
+ * v9fs_get_default_trans - get the default transport
  *
  */
 
-struct p9_trans_module *v9fs_default_trans(void)
+struct p9_trans_module *v9fs_get_default_trans(void)
 {
-	if (v9fs_default_transport)
-		return v9fs_default_transport;
-	else if (!list_empty(&v9fs_trans_list))
-		return list_first_entry(&v9fs_trans_list,
-					struct p9_trans_module, list);
-	else
-		return NULL;
+	struct p9_trans_module *t, *found = NULL;
+
+	spin_lock(&v9fs_trans_lock);
+
+	list_for_each_entry(t, &v9fs_trans_list, list)
+		if (t->def && try_module_get(t->owner)) {
+			found = t;
+			break;
+		}
+
+	if (!found)
+		list_for_each_entry(t, &v9fs_trans_list, list)
+			if (try_module_get(t->owner)) {
+				found = t;
+				break;
+			}
+
+	spin_unlock(&v9fs_trans_lock);
+	return found;
 }
-EXPORT_SYMBOL(v9fs_default_trans);
+EXPORT_SYMBOL(v9fs_get_default_trans);
 
+/**
+ * v9fs_put_trans - put trans
+ * @m: transport to put
+ *
+ */
+void v9fs_put_trans(struct p9_trans_module *m)
+{
+	if (m)
+		module_put(m->owner);
+}
 
 /**
  * v9fs_init - Initialize module
@@ -120,6 +160,8 @@ static int __init init_p9(void)
 static void __exit exit_p9(void)
 {
 	printk(KERN_INFO "Unloading 9P2000 support\n");
+
+	p9_trans_fd_exit();
 }
 
 module_init(init_p9)
diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c
index cdf137af7ad..6a32ffdb942 100644
--- a/net/9p/trans_fd.c
+++ b/net/9p/trans_fd.c
@@ -1629,6 +1629,7 @@ static struct p9_trans_module p9_tcp_trans = {
 	.maxsize = MAX_SOCK_BUF,
 	.def = 1,
 	.create = p9_trans_create_tcp,
+	.owner = THIS_MODULE,
 };
 
 static struct p9_trans_module p9_unix_trans = {
@@ -1636,6 +1637,7 @@ static struct p9_trans_module p9_unix_trans = {
 	.maxsize = MAX_SOCK_BUF,
 	.def = 0,
 	.create = p9_trans_create_unix,
+	.owner = THIS_MODULE,
 };
 
 static struct p9_trans_module p9_fd_trans = {
@@ -1643,6 +1645,7 @@ static struct p9_trans_module p9_fd_trans = {
 	.maxsize = MAX_SOCK_BUF,
 	.def = 0,
 	.create = p9_trans_create_fd,
+	.owner = THIS_MODULE,
 };
 
 int p9_trans_fd_init(void)
@@ -1659,4 +1662,10 @@ int p9_trans_fd_init(void)
 
 	return 0;
 }
-EXPORT_SYMBOL(p9_trans_fd_init);
+
+void p9_trans_fd_exit(void)
+{
+	v9fs_unregister_trans(&p9_tcp_trans);
+	v9fs_unregister_trans(&p9_unix_trans);
+	v9fs_unregister_trans(&p9_fd_trans);
+}
diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c
index 42adc052b14..94912e077a5 100644
--- a/net/9p/trans_virtio.c
+++ b/net/9p/trans_virtio.c
@@ -528,6 +528,7 @@ static struct p9_trans_module p9_virtio_trans = {
 	.create = p9_virtio_create,
 	.maxsize = PAGE_SIZE*16,
 	.def = 0,
+	.owner = THIS_MODULE,
 };
 
 /* The standard init function */
@@ -545,6 +546,7 @@ static int __init p9_virtio_init(void)
 static void __exit p9_virtio_cleanup(void)
 {
 	unregister_virtio_driver(&p9_virtio_drv);
+	v9fs_unregister_trans(&p9_virtio_trans);
 }
 
 module_init(p9_virtio_init);
-- 
cgit v1.2.3


From 7dc5d24be06a5ed874af035d52a083a7b61ef1bd Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Wed, 24 Sep 2008 16:22:23 -0500
Subject: 9p-trans_fd: fix trans_fd::p9_conn_destroy()

p9_conn_destroy() first kills all current requests by calling
p9_conn_cancel(), then waits for the request list to be cleared by
waiting on p9_conn->equeue.  After that, polling is stopped and the
trans is destroyed.  This sequence has a few problems.

* Read and write works were never cancelled and the p9_conn can be
  destroyed while the works are running as r/w works remove requests
  from the list and dereference the p9_conn from them.

* The list emptiness wait using p9_conn->equeue wouldn't trigger
  because p9_conn_cancel() always clears all the lists and the only
  way the wait can be triggered is to have another task to issue a
  request between the slim window between p9_conn_cancel() and the
  wait, which isn't safe under the current implementation with or
  without the wait.

This patch fixes the problem by first stopping poll, which can
schedule r/w works, first and cancle r/w works which guarantees that
r/w works are not and will not run from that point and then calling
p9_conn_cancel() and do the rest of destruction.

Signed-off-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Eric Van Hensbergen <ericvh@gmail.com>
---
 net/9p/trans_fd.c | 24 +++++-------------------
 1 file changed, 5 insertions(+), 19 deletions(-)

(limited to 'net')

diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c
index 6a32ffdb942..ee0d151da31 100644
--- a/net/9p/trans_fd.c
+++ b/net/9p/trans_fd.c
@@ -151,7 +151,6 @@ struct p9_mux_poll_task {
  * @trans: reference to transport instance for this connection
  * @tagpool: id accounting for transactions
  * @err: error state
- * @equeue: event wait_q (?)
  * @req_list: accounting for requests which have been sent
  * @unsent_req_list: accounting for requests that haven't been sent
  * @rcall: current response &p9_fcall structure
@@ -178,7 +177,6 @@ struct p9_conn {
 	struct p9_trans *trans;
 	struct p9_idpool *tagpool;
 	int err;
-	wait_queue_head_t equeue;
 	struct list_head req_list;
 	struct list_head unsent_req_list;
 	struct p9_fcall *rcall;
@@ -430,7 +428,6 @@ static struct p9_conn *p9_conn_create(struct p9_trans *trans)
 	}
 
 	m->err = 0;
-	init_waitqueue_head(&m->equeue);
 	INIT_LIST_HEAD(&m->req_list);
 	INIT_LIST_HEAD(&m->unsent_req_list);
 	m->rcall = NULL;
@@ -483,18 +480,13 @@ static void p9_conn_destroy(struct p9_conn *m)
 {
 	P9_DPRINTK(P9_DEBUG_MUX, "mux %p prev %p next %p\n", m,
 		m->mux_list.prev, m->mux_list.next);
-	p9_conn_cancel(m, -ECONNRESET);
-
-	if (!list_empty(&m->req_list)) {
-		/* wait until all processes waiting on this session exit */
-		P9_DPRINTK(P9_DEBUG_MUX,
-			"mux %p waiting for empty request queue\n", m);
-		wait_event_timeout(m->equeue, (list_empty(&m->req_list)), 5000);
-		P9_DPRINTK(P9_DEBUG_MUX, "mux %p request queue empty: %d\n", m,
-			list_empty(&m->req_list));
-	}
 
 	p9_mux_poll_stop(m);
+	cancel_work_sync(&m->rq);
+	cancel_work_sync(&m->wq);
+
+	p9_conn_cancel(m, -ECONNRESET);
+
 	m->trans = NULL;
 	p9_idpool_destroy(m->tagpool);
 	kfree(m);
@@ -840,8 +832,6 @@ static void p9_read_work(struct work_struct *work)
 					(*req->cb) (req, req->cba);
 				else
 					kfree(req->rcall);
-
-				wake_up(&m->equeue);
 			}
 		} else {
 			if (err >= 0 && rcall->id != P9_RFLUSH)
@@ -984,8 +974,6 @@ static void p9_mux_flush_cb(struct p9_req *freq, void *a)
 			(*req->cb) (req, req->cba);
 		else
 			kfree(req->rcall);
-
-		wake_up(&m->equeue);
 	}
 
 	kfree(freq->tcall);
@@ -1191,8 +1179,6 @@ void p9_conn_cancel(struct p9_conn *m, int err)
 		else
 			kfree(req->rcall);
 	}
-
-	wake_up(&m->equeue);
 }
 
 /**
-- 
cgit v1.2.3


From 571ffeafffbfdd0b8f2f9d3b991028797ec87e42 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Wed, 24 Sep 2008 16:22:23 -0500
Subject: 9p-trans_fd: clean up p9_conn_create()

* Use kzalloc() to allocate p9_conn and remove 0/NULL initializations.

* Clean up error return paths.

Signed-off-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Eric Van Hensbergen <ericvh@gmail.com>
---
 net/9p/trans_fd.c | 20 ++++----------------
 1 file changed, 4 insertions(+), 16 deletions(-)

(limited to 'net')

diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c
index ee0d151da31..6c88e898375 100644
--- a/net/9p/trans_fd.c
+++ b/net/9p/trans_fd.c
@@ -407,11 +407,11 @@ static void p9_mux_poll_stop(struct p9_conn *m)
 static struct p9_conn *p9_conn_create(struct p9_trans *trans)
 {
 	int i, n;
-	struct p9_conn *m, *mtmp;
+	struct p9_conn *m;
 
 	P9_DPRINTK(P9_DEBUG_MUX, "transport %p msize %d\n", trans,
 								trans->msize);
-	m = kmalloc(sizeof(struct p9_conn), GFP_KERNEL);
+	m = kzalloc(sizeof(struct p9_conn), GFP_KERNEL);
 	if (!m)
 		return ERR_PTR(-ENOMEM);
 
@@ -422,24 +422,14 @@ static struct p9_conn *p9_conn_create(struct p9_trans *trans)
 	m->trans = trans;
 	m->tagpool = p9_idpool_create();
 	if (IS_ERR(m->tagpool)) {
-		mtmp = ERR_PTR(-ENOMEM);
 		kfree(m);
-		return mtmp;
+		return ERR_PTR(-ENOMEM);
 	}
 
-	m->err = 0;
 	INIT_LIST_HEAD(&m->req_list);
 	INIT_LIST_HEAD(&m->unsent_req_list);
-	m->rcall = NULL;
-	m->rpos = 0;
-	m->rbuf = NULL;
-	m->wpos = m->wsize = 0;
-	m->wbuf = NULL;
 	INIT_WORK(&m->rq, p9_read_work);
 	INIT_WORK(&m->wq, p9_write_work);
-	m->wsched = 0;
-	memset(&m->poll_waddr, 0, sizeof(m->poll_waddr));
-	m->poll_task = NULL;
 	n = p9_mux_poll_start(m);
 	if (n) {
 		kfree(m);
@@ -460,10 +450,8 @@ static struct p9_conn *p9_conn_create(struct p9_trans *trans)
 	for (i = 0; i < ARRAY_SIZE(m->poll_waddr); i++) {
 		if (IS_ERR(m->poll_waddr[i])) {
 			p9_mux_poll_stop(m);
-			mtmp = (void *)m->poll_waddr;	/* the error code */
 			kfree(m);
-			m = mtmp;
-			break;
+			return (void *)m->poll_waddr;	/* the error code */
 		}
 	}
 
-- 
cgit v1.2.3


From ec3c68f232f6d98b4596c05c1c7551b44c617c5f Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Wed, 24 Sep 2008 16:22:23 -0500
Subject: 9p-trans_fd: don't do fs segment mangling in p9_fd_poll()

p9_fd_poll() is never called with user pointers and f_op->poll()
doesn't expect its arguments to be from userland.  There's no need to
set kernel ds before calling f_op->poll() from p9_fd_poll().  Remove
it.

Signed-off-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Eric Van Hensbergen <ericvh@gmail.com>
---
 net/9p/trans_fd.c | 14 +++-----------
 1 file changed, 3 insertions(+), 11 deletions(-)

(limited to 'net')

diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c
index 6c88e898375..f6d4af16cb1 100644
--- a/net/9p/trans_fd.c
+++ b/net/9p/trans_fd.c
@@ -1344,7 +1344,6 @@ p9_fd_poll(struct p9_trans *trans, struct poll_table_struct *pt)
 {
 	int ret, n;
 	struct p9_trans_fd *ts = NULL;
-	mm_segment_t oldfs;
 
 	if (trans && trans->status == Connected)
 		ts = trans->priv;
@@ -1358,24 +1357,17 @@ p9_fd_poll(struct p9_trans *trans, struct poll_table_struct *pt)
 	if (!ts->wr->f_op || !ts->wr->f_op->poll)
 		return -EIO;
 
-	oldfs = get_fs();
-	set_fs(get_ds());
-
 	ret = ts->rd->f_op->poll(ts->rd, pt);
 	if (ret < 0)
-		goto end;
+		return ret;
 
 	if (ts->rd != ts->wr) {
 		n = ts->wr->f_op->poll(ts->wr, pt);
-		if (n < 0) {
-			ret = n;
-			goto end;
-		}
+		if (n < 0)
+			return n;
 		ret = (ret & ~POLLOUT) | (n & ~POLLIN);
 	}
 
-end:
-	set_fs(oldfs);
 	return ret;
 }
 
-- 
cgit v1.2.3


From 206ca50de77033c6cc17d0e14fbb12d119a67b01 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Wed, 24 Sep 2008 16:22:23 -0500
Subject: 9p-trans_fd: fix and clean up module init/exit paths

trans_fd leaked p9_mux_wq on module unload.  Fix it.  While at it,
collapse p9_mux_global_init() into p9_trans_fd_init().  It's easier to
follow this way and the global poll_tasks array is about to removed
anyway.

Signed-off-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Eric Van Hensbergen <ericvh@gmail.com>
---
 net/9p/trans_fd.c | 31 +++++++++++--------------------
 1 file changed, 11 insertions(+), 20 deletions(-)

(limited to 'net')

diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c
index f6d4af16cb1..0b4eb5f7835 100644
--- a/net/9p/trans_fd.c
+++ b/net/9p/trans_fd.c
@@ -238,22 +238,6 @@ static int p9_conn_rpcnb(struct p9_conn *m, struct p9_fcall *tc,
 
 static void p9_conn_cancel(struct p9_conn *m, int err);
 
-static int p9_mux_global_init(void)
-{
-	int i;
-
-	for (i = 0; i < ARRAY_SIZE(p9_mux_poll_tasks); i++)
-		p9_mux_poll_tasks[i].task = NULL;
-
-	p9_mux_wq = create_workqueue("v9fs");
-	if (!p9_mux_wq) {
-		printk(KERN_WARNING "v9fs: mux: creating workqueue failed\n");
-		return -ENOMEM;
-	}
-
-	return 0;
-}
-
 static u16 p9_mux_get_tag(struct p9_conn *m)
 {
 	int tag;
@@ -1616,10 +1600,15 @@ static struct p9_trans_module p9_fd_trans = {
 
 int p9_trans_fd_init(void)
 {
-	int ret = p9_mux_global_init();
-	if (ret) {
-		printk(KERN_WARNING "9p: starting mux failed\n");
-		return ret;
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(p9_mux_poll_tasks); i++)
+		p9_mux_poll_tasks[i].task = NULL;
+
+	p9_mux_wq = create_workqueue("v9fs");
+	if (!p9_mux_wq) {
+		printk(KERN_WARNING "v9fs: mux: creating workqueue failed\n");
+		return -ENOMEM;
 	}
 
 	v9fs_register_trans(&p9_tcp_trans);
@@ -1634,4 +1623,6 @@ void p9_trans_fd_exit(void)
 	v9fs_unregister_trans(&p9_tcp_trans);
 	v9fs_unregister_trans(&p9_unix_trans);
 	v9fs_unregister_trans(&p9_fd_trans);
+
+	destroy_workqueue(p9_mux_wq);
 }
-- 
cgit v1.2.3


From 620678244bc7b83287e2e283ed4fe6b959e94b7d Mon Sep 17 00:00:00 2001
From: Julia Lawall <julia@diku.dk>
Date: Wed, 24 Sep 2008 16:22:22 -0500
Subject: 9p: introduce missing kfree

Error handling code following a kmalloc should free the allocated data.

The semantic match that finds the problem is as follows:
(http://www.emn.fr/x-info/coccinelle/)

// <smpl>
@r exists@
local idexpression x;
statement S;
expression E;
identifier f,l;
position p1,p2;
expression *ptr != NULL;
@@

(
if ((x@p1 = \(kmalloc\|kzalloc\|kcalloc\)(...)) == NULL) S
|
x@p1 = \(kmalloc\|kzalloc\|kcalloc\)(...);
...
if (x == NULL) S
)
<... when != x
     when != if (...) { <+...x...+> }
x->f = E
...>
(
 return \(0\|<+...x...+>\|ptr\);
|
 return@p2 ...;
)

@script:python@
p1 << r.p1;
p2 << r.p2;
@@

print "* file: %s kmalloc %s return %s" % (p1[0].file,p1[0].line,p2[0].line)
// </smpl>

Signed-off-by: Julia Lawall <julia@diku.dk>
Signed-off-by: Eric Van Hensbergen <ericvh@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 net/9p/trans_fd.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c
index 0b4eb5f7835..d652baf5ff9 100644
--- a/net/9p/trans_fd.c
+++ b/net/9p/trans_fd.c
@@ -870,8 +870,10 @@ static struct p9_req *p9_send_request(struct p9_conn *m,
 	else
 		n = p9_mux_get_tag(m);
 
-	if (n < 0)
+	if (n < 0) {
+		kfree(req);
 		return ERR_PTR(-ENOMEM);
+	}
 
 	p9_set_tag(tc, n);
 
-- 
cgit v1.2.3


From 16ec4700127d479143c77fd9128dfa17ab572963 Mon Sep 17 00:00:00 2001
From: Eric Van Hensbergen <ericvh@ericvh-desktop.austin.ibm.com>
Date: Wed, 24 Sep 2008 16:22:22 -0500
Subject: 9p: fix put_data error handling

Abhishek Kulkarni pointed out an inconsistency in the way
errors are returned from p9_put_data.  On deeper exploration it
seems the error handling for this path was completely wrong.
This patch adds checks for allocation problems and propagates
errors correctly.

Signed-off-by: Eric Van Hensbergen <ericvh@gmail.com>
---
 net/9p/conv.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/9p/conv.c b/net/9p/conv.c
index 44547201f5b..5ad3a3bd73b 100644
--- a/net/9p/conv.c
+++ b/net/9p/conv.c
@@ -451,8 +451,10 @@ p9_put_data(struct cbuf *bufp, const char *data, int count,
 		   unsigned char **pdata)
 {
 	*pdata = buf_alloc(bufp, count);
+	if (*pdata == NULL)
+		return -ENOMEM;
 	memmove(*pdata, data, count);
-	return count;
+	return 0;
 }
 
 static int
@@ -460,6 +462,8 @@ p9_put_user_data(struct cbuf *bufp, const char __user *data, int count,
 		   unsigned char **pdata)
 {
 	*pdata = buf_alloc(bufp, count);
+	if (*pdata == NULL)
+		return -ENOMEM;
 	return copy_from_user(*pdata, data, count);
 }
 
-- 
cgit v1.2.3


From 8ca31ce52a5cfd03b960fd81a49197ae85d25347 Mon Sep 17 00:00:00 2001
From: Yasuyuki Kozakai <yasuyuki.kozakai@toshiba.co.jp>
Date: Wed, 24 Sep 2008 15:53:39 -0700
Subject: netfilter: ip6t_{hbh,dst}: Rejects not-strict mode on rule insertion

The current code ignores rules for internal options in HBH/DST options
header in packet processing if 'Not strict' mode is specified (which is not
implemented). Clearly it is not expected by user.

Kernel should reject HBH/DST rule insertion with 'Not strict' mode
in the first place.

Signed-off-by: Yasuyuki Kozakai <yasuyuki.kozakai@toshiba.co.jp>
Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/netfilter/ip6t_hbh.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/netfilter/ip6t_hbh.c b/net/ipv6/netfilter/ip6t_hbh.c
index 62e39ace058..26654b26d7f 100644
--- a/net/ipv6/netfilter/ip6t_hbh.c
+++ b/net/ipv6/netfilter/ip6t_hbh.c
@@ -97,8 +97,6 @@ hbh_mt6(const struct sk_buff *skb, const struct net_device *in,
 	hdrlen -= 2;
 	if (!(optinfo->flags & IP6T_OPTS_OPTS)) {
 		return ret;
-	} else if (optinfo->flags & IP6T_OPTS_NSTRICT) {
-		pr_debug("Not strict - not implemented");
 	} else {
 		pr_debug("Strict ");
 		pr_debug("#%d ", optinfo->optsnr);
@@ -177,6 +175,12 @@ hbh_mt6_check(const char *tablename, const void *entry,
 		pr_debug("ip6t_opts: unknown flags %X\n", optsinfo->invflags);
 		return false;
 	}
+
+	if (optsinfo->flags & IP6T_OPTS_NSTRICT) {
+		pr_debug("ip6t_opts: Not strict - not implemented");
+		return false;
+	}
+
 	return true;
 }
 
-- 
cgit v1.2.3


From d01dbeb6af7a0848063033f73c3d146fec7451f3 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Tue, 30 Sep 2008 02:03:19 -0700
Subject: ipsec: Fix pskb_expand_head corruption in xfrm_state_check_space

We're never supposed to shrink the headroom or tailroom.  In fact,
shrinking the headroom is a fatal action.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/xfrm/xfrm_output.c | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c
index ac25b4c0e98..dc50f1e71f7 100644
--- a/net/xfrm/xfrm_output.c
+++ b/net/xfrm/xfrm_output.c
@@ -27,10 +27,14 @@ static int xfrm_state_check_space(struct xfrm_state *x, struct sk_buff *skb)
 		- skb_headroom(skb);
 	int ntail = dst->dev->needed_tailroom - skb_tailroom(skb);
 
-	if (nhead > 0 || ntail > 0)
-		return pskb_expand_head(skb, nhead, ntail, GFP_ATOMIC);
-
-	return 0;
+	if (nhead <= 0) {
+		if (ntail <= 0)
+			return 0;
+		nhead = 0;
+	} else if (ntail < 0)
+		ntail = 0;
+
+	return pskb_expand_head(skb, nhead, ntail, GFP_ATOMIC);
 }
 
 static int xfrm_output_one(struct sk_buff *skb, int err)
-- 
cgit v1.2.3


From 8b122efd13a227d35d5ca242561770db1b5e3658 Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Tue, 30 Sep 2008 03:03:35 -0700
Subject: iucv: Fix mismerge again.

fb65a7c091529bfffb1262515252c0d0f6241c5c ("iucv: Fix bad merging.") fixed
a merge error, but in a wrong way. We now end up with the bug below.
This patch corrects the mismerge like it was intended.

BUG: scheduling while atomic: swapper/1/0x00000000
Modules linked in:
CPU: 1 Not tainted 2.6.27-rc7-00094-gc0f4d6d #9
Process swapper (pid: 1, task: 000000003fe7d988, ksp: 000000003fe838c0)
0000000000000000 000000003fe839b8 0000000000000002 0000000000000000
       000000003fe83a58 000000003fe839d0 000000003fe839d0 0000000000390de6
       000000000058acd8 00000000000000d0 000000003fe7dcd8 0000000000000000
       000000000000000c 000000000000000d 0000000000000000 000000003fe83a28
       000000000039c5b8 0000000000015e5e 000000003fe839b8 000000003fe83a00
Call Trace:
([<0000000000015d6a>] show_trace+0xe6/0x134)
 [<0000000000039656>] __schedule_bug+0xa2/0xa8
 [<0000000000391744>] schedule+0x49c/0x910
 [<0000000000391f64>] schedule_timeout+0xc4/0x114
 [<00000000003910d4>] wait_for_common+0xe8/0x1b4
 [<00000000000549ae>] call_usermodehelper_exec+0xa6/0xec
 [<00000000001af7b8>] kobject_uevent_env+0x418/0x438
 [<00000000001d08fc>] bus_add_driver+0x1e4/0x298
 [<00000000001d1ee4>] driver_register+0x90/0x18c
 [<0000000000566848>] netiucv_init+0x168/0x2c8
 [<00000000000120be>] do_one_initcall+0x3e/0x17c
 [<000000000054a31a>] kernel_init+0x1ce/0x248
 [<000000000001a97a>] kernel_thread_starter+0x6/0xc
 [<000000000001a974>] kernel_thread_starter+0x0/0xc
 iucv: NETIUCV driver initialized
initcall netiucv_init+0x0/0x2c8 returned with preemption imbalance

Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/iucv/iucv.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/iucv/iucv.c b/net/iucv/iucv.c
index 705959b31e2..d7b54b5bfa6 100644
--- a/net/iucv/iucv.c
+++ b/net/iucv/iucv.c
@@ -524,7 +524,6 @@ static int iucv_enable(void)
 	get_online_cpus();
 	for_each_online_cpu(cpu)
 		smp_call_function_single(cpu, iucv_declare_cpu, NULL, 1);
-	preempt_enable();
 	if (cpus_empty(iucv_buffer_cpumask))
 		/* No cpu could declare an iucv buffer. */
 		goto out_path;
@@ -547,7 +546,9 @@ out:
  */
 static void iucv_disable(void)
 {
+	get_online_cpus();
 	on_each_cpu(iucv_retrieve_cpu, NULL, 1);
+	put_online_cpus();
 	kfree(iucv_path_table);
 }
 
-- 
cgit v1.2.3


From ba0166708ef4da7eeb61dd92bbba4d5a749d6561 Mon Sep 17 00:00:00 2001
From: Wei Yongjun <yjwei@cn.fujitsu.com>
Date: Tue, 30 Sep 2008 05:32:24 -0700
Subject: sctp: Fix kernel panic while process protocol violation parameter

Since call to function sctp_sf_abort_violation() need paramter 'arg' with
'struct sctp_chunk' type, it will read the chunk type and chunk length from
the chunk_hdr member of chunk. But call to sctp_sf_violation_paramlen()
always with 'struct sctp_paramhdr' type's parameter, it will be passed to
sctp_sf_abort_violation(). This may cause kernel panic.

   sctp_sf_violation_paramlen()
     |-- sctp_sf_abort_violation()
        |-- sctp_make_abort_violation()

This patch fixed this problem. This patch also fix two place which called
sctp_sf_violation_paramlen() with wrong paramter type.

Signed-off-by: Wei Yongjun <yjwei@cn.fujitsu.com>
Signed-off-by: Vlad Yasevich <vladislav.yasevich@hp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sctp/sm_make_chunk.c | 37 ++++++++++++++++++++++++-------------
 net/sctp/sm_statefuns.c  | 48 +++++++++++++++++++++++++++++++++++++-----------
 2 files changed, 61 insertions(+), 24 deletions(-)

(limited to 'net')

diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c
index b599cbba4fb..d68869f966c 100644
--- a/net/sctp/sm_make_chunk.c
+++ b/net/sctp/sm_make_chunk.c
@@ -1012,6 +1012,29 @@ end:
 	return retval;
 }
 
+struct sctp_chunk *sctp_make_violation_paramlen(
+	const struct sctp_association *asoc,
+	const struct sctp_chunk *chunk,
+	struct sctp_paramhdr *param)
+{
+	struct sctp_chunk *retval;
+	static const char error[] = "The following parameter had invalid length:";
+	size_t payload_len = sizeof(error) + sizeof(sctp_errhdr_t) +
+				sizeof(sctp_paramhdr_t);
+
+	retval = sctp_make_abort(asoc, chunk, payload_len);
+	if (!retval)
+		goto nodata;
+
+	sctp_init_cause(retval, SCTP_ERROR_PROTO_VIOLATION,
+			sizeof(error) + sizeof(sctp_paramhdr_t));
+	sctp_addto_chunk(retval, sizeof(error), error);
+	sctp_addto_param(retval, sizeof(sctp_paramhdr_t), param);
+
+nodata:
+	return retval;
+}
+
 /* Make a HEARTBEAT chunk.  */
 struct sctp_chunk *sctp_make_heartbeat(const struct sctp_association *asoc,
 				  const struct sctp_transport *transport,
@@ -1782,11 +1805,6 @@ static int sctp_process_inv_paramlength(const struct sctp_association *asoc,
 					const struct sctp_chunk *chunk,
 					struct sctp_chunk **errp)
 {
-	static const char error[] = "The following parameter had invalid length:";
-	size_t		payload_len = WORD_ROUND(sizeof(error)) +
-						sizeof(sctp_paramhdr_t);
-
-
 	/* This is a fatal error.  Any accumulated non-fatal errors are
 	 * not reported.
 	 */
@@ -1794,14 +1812,7 @@ static int sctp_process_inv_paramlength(const struct sctp_association *asoc,
 		sctp_chunk_free(*errp);
 
 	/* Create an error chunk and fill it in with our payload. */
-	*errp = sctp_make_op_error_space(asoc, chunk, payload_len);
-
-	if (*errp) {
-		sctp_init_cause(*errp, SCTP_ERROR_PROTO_VIOLATION,
-				sizeof(error) + sizeof(sctp_paramhdr_t));
-		sctp_addto_chunk(*errp, sizeof(error), error);
-		sctp_addto_param(*errp, sizeof(sctp_paramhdr_t), param);
-	}
+	*errp = sctp_make_violation_paramlen(asoc, chunk, param);
 
 	return 0;
 }
diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c
index 8848d329aa2..7c622af2ce5 100644
--- a/net/sctp/sm_statefuns.c
+++ b/net/sctp/sm_statefuns.c
@@ -119,7 +119,7 @@ static sctp_disposition_t sctp_sf_violation_paramlen(
 				     const struct sctp_endpoint *ep,
 				     const struct sctp_association *asoc,
 				     const sctp_subtype_t type,
-				     void *arg,
+				     void *arg, void *ext,
 				     sctp_cmd_seq_t *commands);
 
 static sctp_disposition_t sctp_sf_violation_ctsn(
@@ -3425,7 +3425,7 @@ sctp_disposition_t sctp_sf_do_asconf(const struct sctp_endpoint *ep,
 	addr_param = (union sctp_addr_param *)hdr->params;
 	length = ntohs(addr_param->p.length);
 	if (length < sizeof(sctp_paramhdr_t))
-		return sctp_sf_violation_paramlen(ep, asoc, type,
+		return sctp_sf_violation_paramlen(ep, asoc, type, arg,
 			   (void *)addr_param, commands);
 
 	/* Verify the ASCONF chunk before processing it. */
@@ -3433,8 +3433,8 @@ sctp_disposition_t sctp_sf_do_asconf(const struct sctp_endpoint *ep,
 			    (sctp_paramhdr_t *)((void *)addr_param + length),
 			    (void *)chunk->chunk_end,
 			    &err_param))
-		return sctp_sf_violation_paramlen(ep, asoc, type,
-						  (void *)&err_param, commands);
+		return sctp_sf_violation_paramlen(ep, asoc, type, arg,
+						  (void *)err_param, commands);
 
 	/* ADDIP 5.2 E1) Compare the value of the serial number to the value
 	 * the endpoint stored in a new association variable
@@ -3542,8 +3542,8 @@ sctp_disposition_t sctp_sf_do_asconf_ack(const struct sctp_endpoint *ep,
 	    (sctp_paramhdr_t *)addip_hdr->params,
 	    (void *)asconf_ack->chunk_end,
 	    &err_param))
-		return sctp_sf_violation_paramlen(ep, asoc, type,
-			   (void *)&err_param, commands);
+		return sctp_sf_violation_paramlen(ep, asoc, type, arg,
+			   (void *)err_param, commands);
 
 	if (last_asconf) {
 		addip_hdr = (sctp_addiphdr_t *)last_asconf->subh.addip_hdr;
@@ -4240,12 +4240,38 @@ static sctp_disposition_t sctp_sf_violation_paramlen(
 				     const struct sctp_endpoint *ep,
 				     const struct sctp_association *asoc,
 				     const sctp_subtype_t type,
-				     void *arg,
-				     sctp_cmd_seq_t *commands) {
-	static const char err_str[] = "The following parameter had invalid length:";
+				     void *arg, void *ext,
+				     sctp_cmd_seq_t *commands)
+{
+	struct sctp_chunk *chunk =  arg;
+	struct sctp_paramhdr *param = ext;
+	struct sctp_chunk *abort = NULL;
 
-	return sctp_sf_abort_violation(ep, asoc, arg, commands, err_str,
-					sizeof(err_str));
+	if (sctp_auth_recv_cid(SCTP_CID_ABORT, asoc))
+		goto discard;
+
+	/* Make the abort chunk. */
+	abort = sctp_make_violation_paramlen(asoc, chunk, param);
+	if (!abort)
+		goto nomem;
+
+	sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(abort));
+	SCTP_INC_STATS(SCTP_MIB_OUTCTRLCHUNKS);
+
+	sctp_add_cmd_sf(commands, SCTP_CMD_SET_SK_ERR,
+			SCTP_ERROR(ECONNABORTED));
+	sctp_add_cmd_sf(commands, SCTP_CMD_ASSOC_FAILED,
+			SCTP_PERR(SCTP_ERROR_PROTO_VIOLATION));
+	SCTP_DEC_STATS(SCTP_MIB_CURRESTAB);
+
+discard:
+	sctp_sf_pdiscard(ep, asoc, SCTP_ST_CHUNK(0), arg, commands);
+
+	SCTP_INC_STATS(SCTP_MIB_ABORTEDS);
+
+	return SCTP_DISPOSITION_ABORT;
+nomem:
+	return SCTP_DISPOSITION_NOMEM;
 }
 
 /* Handle a protocol violation when the peer trying to advance the
-- 
cgit v1.2.3


From 4dd7972d1204c3851a4092cecd2207e05eb29b09 Mon Sep 17 00:00:00 2001
From: Vitaliy Gusev <vgusev@openvz.org>
Date: Wed, 1 Oct 2008 01:51:39 -0700
Subject: tcp: Fix NULL dereference in tcp_4_send_ack()

Fix NULL dereference in tcp_4_send_ack().

As skb->dev is reset to NULL in tcp_v4_rcv() thus OOPS occurs:

BUG: unable to handle kernel NULL pointer dereference at 00000000000004d0
IP: [<ffffffff80498503>] tcp_v4_send_ack+0x203/0x250

Stack:  ffff810005dbb000 ffff810015c8acc0 e77b2c6e5f861600 a01610802e90cb6d
 0a08010100000000 88afffff88afffff 0000000080762be8 0000000115c872e8
 0004122000000000 0000000000000001 ffffffff80762b88 0000000000000020
Call Trace:
 <IRQ>  [<ffffffff80499c33>] tcp_v4_reqsk_send_ack+0x20/0x22
 [<ffffffff8049bce5>] tcp_check_req+0x108/0x14c
 [<ffffffff8047aaf7>] ? rt_intern_hash+0x322/0x33c
 [<ffffffff80499846>] tcp_v4_do_rcv+0x399/0x4ec
 [<ffffffff8045ce4b>] ? skb_checksum+0x4f/0x272
 [<ffffffff80485b74>] ? __inet_lookup_listener+0x14a/0x15c
 [<ffffffff8049babc>] tcp_v4_rcv+0x6a1/0x701
 [<ffffffff8047e739>] ip_local_deliver_finish+0x157/0x24a
 [<ffffffff8047ec9a>] ip_local_deliver+0x72/0x7c
 [<ffffffff8047e5bd>] ip_rcv_finish+0x38d/0x3b2
 [<ffffffff803d3548>] ? scsi_io_completion+0x19d/0x39e
 [<ffffffff8047ebe5>] ip_rcv+0x2a2/0x2e5
 [<ffffffff80462faa>] netif_receive_skb+0x293/0x303
 [<ffffffff80465a9b>] process_backlog+0x80/0xd0
 [<ffffffff802630b4>] ? __rcu_process_callbacks+0x125/0x1b4
 [<ffffffff8046560e>] net_rx_action+0xb9/0x17f
 [<ffffffff80234cc5>] __do_softirq+0xa3/0x164
 [<ffffffff8020c52c>] call_softirq+0x1c/0x28
 <EOI>  [<ffffffff8020de1c>] do_softirq+0x34/0x72
 [<ffffffff80234b8e>] local_bh_enable_ip+0x3f/0x50
 [<ffffffff804d43ca>] _spin_unlock_bh+0x12/0x14
 [<ffffffff804599cd>] release_sock+0xb8/0xc1
 [<ffffffff804a6f9a>] inet_stream_connect+0x146/0x25c
 [<ffffffff80243078>] ? autoremove_wake_function+0x0/0x38
 [<ffffffff8045751f>] sys_connect+0x68/0x8e
 [<ffffffff80291818>] ? fd_install+0x5f/0x68
 [<ffffffff80457784>] ? sock_map_fd+0x55/0x62
 [<ffffffff8020b39b>] system_call_after_swapgs+0x7b/0x80

Code: 41 10 11 d0 83 d0 00 4d 85 ed 89 45 c0 c7 45 c4 08 00 00 00 74 07 41 8b 45 04 89 45 c8 48 8b 43 20 8b 4d b8 48 8d 55 b0 48 89 de <48> 8b 80 d0 04 00 00 48 8b b8 60 01 00 00 e8 20 ae fe ff 65 48
RIP  [<ffffffff80498503>] tcp_v4_send_ack+0x203/0x250
 RSP <ffffffff80762b78>
CR2: 00000000000004d0

Signed-off-by: Vitaliy Gusev <vgusev@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_ipv4.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 1b4fee20fc9..011478e46c4 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -618,7 +618,7 @@ static void tcp_v4_send_ack(struct sk_buff *skb, u32 seq, u32 ack,
 			];
 	} rep;
 	struct ip_reply_arg arg;
-	struct net *net = dev_net(skb->dev);
+	struct net *net = dev_net(skb->dst->dev);
 
 	memset(&rep.th, 0, sizeof(struct tcphdr));
 	memset(&arg, 0, sizeof(arg));
-- 
cgit v1.2.3


From 2a5b82751f73a0bf6f604ce56d34adba6da1b246 Mon Sep 17 00:00:00 2001
From: "Denis V. Lunev" <den@openvz.org>
Date: Wed, 1 Oct 2008 02:13:16 -0700
Subject: ipv6: NULL pointer dereferrence in tcp_v6_send_ack

The following actions are possible:
tcp_v6_rcv
  skb->dev = NULL;
  tcp_v6_do_rcv
    tcp_v6_hnd_req
      tcp_check_req
        req->rsk_ops->send_ack == tcp_v6_send_ack

So, skb->dev can be NULL in tcp_v6_send_ack. We must obtain namespace
from dst entry.

Thanks to Vitaliy Gusev <vgusev@openvz.org> for initial problem finding
in IPv4 code.

Signed-off-by: Denis V. Lunev <den@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/tcp_ipv6.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index b585c850a89..10e22fd4822 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -1050,7 +1050,7 @@ static void tcp_v6_send_ack(struct sk_buff *skb, u32 seq, u32 ack, u32 win, u32
 	struct tcphdr *th = tcp_hdr(skb), *t1;
 	struct sk_buff *buff;
 	struct flowi fl;
-	struct net *net = dev_net(skb->dev);
+	struct net *net = dev_net(skb->dst->dev);
 	struct sock *ctl_sk = net->ipv6.tcp_sk;
 	unsigned int tot_len = sizeof(struct tcphdr);
 	__be32 *topt;
-- 
cgit v1.2.3


From 5dc121e9a7a8a3721cefeb07f3559f50fbedc67e Mon Sep 17 00:00:00 2001
From: Arnaud Ebalard <arno@natisbad.org>
Date: Wed, 1 Oct 2008 02:37:56 -0700
Subject: XFRM,IPv6: initialize ip6_dst_blackhole_ops.kmem_cachep

ip6_dst_blackhole_ops.kmem_cachep is not expected to be NULL (i.e. to
be initialized) when dst_alloc() is called from ip6_dst_blackhole().
Otherwise, it results in the following (xfrm_larval_drop is now set to
1 by default):

[   78.697642] Unable to handle kernel paging request for data at address 0x0000004c
[   78.703449] Faulting instruction address: 0xc0097f54
[   78.786896] Oops: Kernel access of bad area, sig: 11 [#1]
[   78.792791] PowerMac
[   78.798383] Modules linked in: btusb usbhid bluetooth b43 mac80211 cfg80211 ehci_hcd ohci_hcd sungem sungem_phy usbcore ssb
[   78.804263] NIP: c0097f54 LR: c0334a28 CTR: c002d430
[   78.809997] REGS: eef19ad0 TRAP: 0300   Not tainted  (2.6.27-rc5)
[   78.815743] MSR: 00001032 <ME,IR,DR>  CR: 22242482  XER: 20000000
[   78.821550] DAR: 0000004c, DSISR: 40000000
[   78.827278] TASK = eef0df40[3035] 'mip6d' THREAD: eef18000
[   78.827408] GPR00: 00001032 eef19b80 eef0df40 00000000 00008020 eef19c30 00000001 00000000
[   78.833249] GPR08: eee5101c c05a5c10 ef9ad500 00000000 24242422 1005787c 00000000 1004f960
[   78.839151] GPR16: 00000000 10024e90 10050040 48030018 0fe44150 00000000 00000000 eef19c30
[   78.845046] GPR24: eef19e44 00000000 eef19bf8 efb37c14 eef19bf8 00008020 00009032 c0596064
[   78.856671] NIP [c0097f54] kmem_cache_alloc+0x20/0x94
[   78.862581] LR [c0334a28] dst_alloc+0x40/0xc4
[   78.868451] Call Trace:
[   78.874252] [eef19b80] [c03c1810] ip6_dst_lookup_tail+0x1c8/0x1dc (unreliable)
[   78.880222] [eef19ba0] [c0334a28] dst_alloc+0x40/0xc4
[   78.886164] [eef19bb0] [c03cd698] ip6_dst_blackhole+0x28/0x1cc
[   78.892090] [eef19be0] [c03d9be8] rawv6_sendmsg+0x75c/0xc88
[   78.897999] [eef19cb0] [c038bca4] inet_sendmsg+0x4c/0x78
[   78.903907] [eef19cd0] [c03207c8] sock_sendmsg+0xac/0xe4
[   78.909734] [eef19db0] [c03209e4] sys_sendmsg+0x1e4/0x2a0
[   78.915540] [eef19f00] [c03220a8] sys_socketcall+0xfc/0x210
[   78.921406] [eef19f40] [c0014b3c] ret_from_syscall+0x0/0x38
[   78.927295] --- Exception: c01 at 0xfe2d730
[   78.927297]     LR = 0xfe2d71c
[   78.939019] Instruction dump:
[   78.944835] 91640018 9144001c 900a0000 4bffff44 9421ffe0 7c0802a6 bf810010 7c9d2378
[   78.950694] 90010024 7fc000a6 57c0045e 7c000124 <83e3004c> 8383005c 2f9f0000 419e0050
[   78.956464] ---[ end trace 05fa1ed7972487a1 ]---

As commented by Benjamin Thery, the bug was introduced by
f2fc6a54585a1be6669613a31fbaba2ecbadcd36, while adding network
namespaces support to ipv6 routes.

Signed-off-by: Arnaud Ebalard <arno@natisbad.org>
Acked-by: Benjamin Thery <benjamin.thery@bull.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/route.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'net')

diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 9af6115f0f5..63442a1e741 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -2688,6 +2688,8 @@ int __init ip6_route_init(void)
 	if (ret)
 		goto out_kmem_cache;
 
+	ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
+
 	/* Registering of the loopback is done before this portion of code,
 	 * the loopback reference in rt6_info will not be taken, do it
 	 * manually for init_net */
-- 
cgit v1.2.3


From 0523820482dcb42784572ffd2296c2f08c275a2b Mon Sep 17 00:00:00 2001
From: Timo Teras <timo.teras@iki.fi>
Date: Wed, 1 Oct 2008 05:17:54 -0700
Subject: af_key: Free dumping state on socket close

Fix a xfrm_{state,policy}_walk leak if pfkey socket is closed while
dumping is on-going.

Signed-off-by: Timo Teras <timo.teras@iki.fi>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/key/af_key.c | 30 +++++++++++++++++++-----------
 1 file changed, 19 insertions(+), 11 deletions(-)

(limited to 'net')

diff --git a/net/key/af_key.c b/net/key/af_key.c
index d628df97e02..b7f5a1c353e 100644
--- a/net/key/af_key.c
+++ b/net/key/af_key.c
@@ -73,22 +73,18 @@ static int pfkey_can_dump(struct sock *sk)
 	return 0;
 }
 
-static int pfkey_do_dump(struct pfkey_sock *pfk)
+static void pfkey_terminate_dump(struct pfkey_sock *pfk)
 {
-	int rc;
-
-	rc = pfk->dump.dump(pfk);
-	if (rc == -ENOBUFS)
-		return 0;
-
-	pfk->dump.done(pfk);
-	pfk->dump.dump = NULL;
-	pfk->dump.done = NULL;
-	return rc;
+	if (pfk->dump.dump) {
+		pfk->dump.done(pfk);
+		pfk->dump.dump = NULL;
+		pfk->dump.done = NULL;
+	}
 }
 
 static void pfkey_sock_destruct(struct sock *sk)
 {
+	pfkey_terminate_dump(pfkey_sk(sk));
 	skb_queue_purge(&sk->sk_receive_queue);
 
 	if (!sock_flag(sk, SOCK_DEAD)) {
@@ -310,6 +306,18 @@ static int pfkey_broadcast(struct sk_buff *skb, gfp_t allocation,
 	return err;
 }
 
+static int pfkey_do_dump(struct pfkey_sock *pfk)
+{
+	int rc;
+
+	rc = pfk->dump.dump(pfk);
+	if (rc == -ENOBUFS)
+		return 0;
+
+	pfkey_terminate_dump(pfk);
+	return rc;
+}
+
 static inline void pfkey_hdr_dup(struct sadb_msg *new, struct sadb_msg *orig)
 {
 	*new = *orig;
-- 
cgit v1.2.3