From 679a87382433cf12a28f07a7d5c240f30f0daa08 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Tue, 3 May 2005 14:24:36 -0700
Subject: [IPV6]: Fix raw socket checksums with IPsec

I made a mistake in my last patch to the raw socket checksum code.
I used the value of inet->cork.length as the length of the payload.
While this works with normal packets, it breaks down when IPsec is
present since the cork length includes the extension header length.

So here is a patch to fix the length calculations.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/raw.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

(limited to 'net/ipv6')

diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index 1352c1d9bf4..617645bc5ed 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -455,11 +455,11 @@ csum_copy_err:
 static int rawv6_push_pending_frames(struct sock *sk, struct flowi *fl,
 				     struct raw6_sock *rp)
 {
-	struct inet_sock *inet = inet_sk(sk);
 	struct sk_buff *skb;
 	int err = 0;
 	int offset;
 	int len;
+	int total_len;
 	u32 tmp_csum;
 	u16 csum;
 
@@ -470,7 +470,8 @@ static int rawv6_push_pending_frames(struct sock *sk, struct flowi *fl,
 		goto out;
 
 	offset = rp->offset;
-	if (offset >= inet->cork.length - 1) {
+	total_len = inet_sk(sk)->cork.length - (skb->nh.raw - skb->data);
+	if (offset >= total_len - 1) {
 		err = -EINVAL;
 		ip6_flush_pending_frames(sk);
 		goto out;
@@ -514,7 +515,7 @@ static int rawv6_push_pending_frames(struct sock *sk, struct flowi *fl,
 
 	tmp_csum = csum_ipv6_magic(&fl->fl6_src,
 				   &fl->fl6_dst,
-				   inet->cork.length, fl->proto, tmp_csum);
+				   total_len, fl->proto, tmp_csum);
 
 	if (tmp_csum == 0)
 		tmp_csum = -1;
-- 
cgit v1.2.3


From db46edc6d3b66bf708a8f23a9aa89f63a49ebe33 Mon Sep 17 00:00:00 2001
From: Thomas Graf <tgraf@suug.ch>
Date: Tue, 3 May 2005 14:29:39 -0700
Subject: [RTNETLINK] Cleanup rtnetlink_link tables

Converts remaining rtnetlink_link tables to use c99 designated
initializers to make greping a little bit easier.

Signed-off-by: Thomas Graf <tgraf@suug.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/addrconf.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net/ipv6')

diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 7196ac2f2d1..7744a259269 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -3076,7 +3076,7 @@ static void inet6_prefix_notify(int event, struct inet6_dev *idev,
 	netlink_broadcast(rtnl, skb, 0, RTMGRP_IPV6_PREFIX, GFP_ATOMIC);
 }
 
-static struct rtnetlink_link inet6_rtnetlink_table[RTM_MAX - RTM_BASE + 1] = {
+static struct rtnetlink_link inet6_rtnetlink_table[RTM_NR_MSGTYPES] = {
 	[RTM_GETLINK - RTM_BASE] = { .dumpit	= inet6_dump_ifinfo, },
 	[RTM_NEWADDR - RTM_BASE] = { .doit	= inet6_rtm_newaddr, },
 	[RTM_DELADDR - RTM_BASE] = { .doit	= inet6_rtm_deladdr, },
-- 
cgit v1.2.3


From c3924c70dd3bddc28b99ccd1688bd281bad1a9be Mon Sep 17 00:00:00 2001
From: Folkert van Heusden <folkert@vanheusden.com>
Date: Tue, 3 May 2005 14:36:45 -0700
Subject: [TCP]: Optimize check in port-allocation code, v6 version.

Signed-off-by: Folkert van Heusden <folkert@vanheusden.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/tcp_ipv6.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

(limited to 'net/ipv6')

diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 4760c85e19d..0f69e800a0a 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -139,9 +139,12 @@ static int tcp_v6_get_port(struct sock *sk, unsigned short snum)
 		int rover;
 
 		spin_lock(&tcp_portalloc_lock);
-		rover = tcp_port_rover;
+		if (tcp_port_rover < low)
+			rover = low;
+		else
+			rover = tcp_port_rover;
 		do {	rover++;
-			if ((rover < low) || (rover > high))
+			if (rover > high)
 				rover = low;
 			head = &tcp_bhash[tcp_bhashfn(rover)];
 			spin_lock(&head->lock);
-- 
cgit v1.2.3


From 2a0a6ebee1d68552152ae8d4aeda91d806995dec Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Tue, 3 May 2005 14:55:09 -0700
Subject: [NETLINK]: Synchronous message processing.

Let's recap the problem.  The current asynchronous netlink kernel
message processing is vulnerable to these attacks:

1) Hit and run: Attacker sends one or more messages and then exits
before they're processed.  This may confuse/disable the next netlink
user that gets the netlink address of the attacker since it may
receive the responses to the attacker's messages.

Proposed solutions:

a) Synchronous processing.
b) Stream mode socket.
c) Restrict/prohibit binding.

2) Starvation: Because various netlink rcv functions were written
to not return until all messages have been processed on a socket,
it is possible for these functions to execute for an arbitrarily
long period of time.  If this is successfully exploited it could
also be used to hold rtnl forever.

Proposed solutions:

a) Synchronous processing.
b) Stream mode socket.

Firstly let's cross off solution c).  It only solves the first
problem and it has user-visible impacts.  In particular, it'll
break user space applications that expect to bind or communicate
with specific netlink addresses (pid's).

So we're left with a choice of synchronous processing versus
SOCK_STREAM for netlink.

For the moment I'm sticking with the synchronous approach as
suggested by Alexey since it's simpler and I'd rather spend
my time working on other things.

However, it does have a number of deficiencies compared to the
stream mode solution:

1) User-space to user-space netlink communication is still vulnerable.

2) Inefficient use of resources.  This is especially true for rtnetlink
since the lock is shared with other users such as networking drivers.
The latter could hold the rtnl while communicating with hardware which
causes the rtnetlink user to wait when it could be doing other things.

3) It is still possible to DoS all netlink users by flooding the kernel
netlink receive queue.  The attacker simply fills the receive socket
with a single netlink message that fills up the entire queue.  The
attacker then continues to call sendmsg with the same message in a loop.

Point 3) can be countered by retransmissions in user-space code, however
it is pretty messy.

In light of these problems (in particular, point 3), we should implement
stream mode netlink at some point.  In the mean time, here is a patch
that implements synchronous processing.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/netfilter/ip6_queue.c | 20 +++++++++-----------
 1 file changed, 9 insertions(+), 11 deletions(-)

(limited to 'net/ipv6')

diff --git a/net/ipv6/netfilter/ip6_queue.c b/net/ipv6/netfilter/ip6_queue.c
index c54830b8959..750943e2d34 100644
--- a/net/ipv6/netfilter/ip6_queue.c
+++ b/net/ipv6/netfilter/ip6_queue.c
@@ -549,20 +549,18 @@ ipq_rcv_skb(struct sk_buff *skb)
 static void
 ipq_rcv_sk(struct sock *sk, int len)
 {
-	do {
-		struct sk_buff *skb;
+	struct sk_buff *skb;
+	unsigned int qlen;
 
-		if (down_trylock(&ipqnl_sem))
-			return;
+	down(&ipqnl_sem);
 			
-		while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) {
-			ipq_rcv_skb(skb);
-			kfree_skb(skb);
-		}
+	for (qlen = skb_queue_len(&sk->sk_receive_queue); qlen; qlen--) {
+		skb = skb_dequeue(&sk->sk_receive_queue);
+		ipq_rcv_skb(skb);
+		kfree_skb(skb);
+	}
 		
-		up(&ipqnl_sem);
-
-	} while (ipqnl && ipqnl->sk_receive_queue.qlen);
+	up(&ipqnl_sem);
 }
 
 static int
-- 
cgit v1.2.3


From aabc9761b69f1bfa30a78f7005be95cc9cc06175 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Tue, 3 May 2005 16:27:10 -0700
Subject: [IPSEC]: Store idev entries

I found a bug that stopped IPsec/IPv6 from working.  About
a month ago IPv6 started using rt6i_idev->dev on the cached socket dst
entries.  If the cached socket dst entry is IPsec, then rt6i_idev will
be NULL.

Since we want to look at the rt6i_idev of the original route in this
case, the easiest fix is to store rt6i_idev in the IPsec dst entry just
as we do for a number of other IPv6 route attributes.  Unfortunately
this means that we need some new code to handle the references to
rt6i_idev.  That's why this patch is bigger than it would otherwise be.

I've also done the same thing for IPv4 since it is conceivable that
once these idev attributes start getting used for accounting, we
probably need to dereference them for IPv4 IPsec entries too.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/xfrm6_policy.c | 43 +++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 43 insertions(+)

(limited to 'net/ipv6')

diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c
index 8a4f37de4d2..4429b1a1fe5 100644
--- a/net/ipv6/xfrm6_policy.c
+++ b/net/ipv6/xfrm6_policy.c
@@ -11,7 +11,11 @@
  * 
  */
 
+#include <asm/bug.h>
+#include <linux/compiler.h>
 #include <linux/config.h>
+#include <linux/netdevice.h>
+#include <net/addrconf.h>
 #include <net/xfrm.h>
 #include <net/ip.h>
 #include <net/ipv6.h>
@@ -166,6 +170,8 @@ __xfrm6_bundle_create(struct xfrm_policy *policy, struct xfrm_state **xfrm, int
 		memcpy(&x->u.rt6.rt6i_gateway, &rt0->rt6i_gateway, sizeof(x->u.rt6.rt6i_gateway)); 
 		x->u.rt6.rt6i_dst      = rt0->rt6i_dst;
 		x->u.rt6.rt6i_src      = rt0->rt6i_src;	
+		x->u.rt6.rt6i_idev     = rt0->rt6i_idev;
+		in6_dev_hold(rt0->rt6i_idev);
 		header_len -= x->u.dst.xfrm->props.header_len;
 		trailer_len -= x->u.dst.xfrm->props.trailer_len;
 	}
@@ -251,11 +257,48 @@ static void xfrm6_update_pmtu(struct dst_entry *dst, u32 mtu)
 	path->ops->update_pmtu(path, mtu);
 }
 
+static void xfrm6_dst_destroy(struct dst_entry *dst)
+{
+	struct xfrm_dst *xdst = (struct xfrm_dst *)dst;
+
+	if (likely(xdst->u.rt6.rt6i_idev))
+		in6_dev_put(xdst->u.rt6.rt6i_idev);
+	xfrm_dst_destroy(xdst);
+}
+
+static void xfrm6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
+			     int unregister)
+{
+	struct xfrm_dst *xdst;
+
+	if (!unregister)
+		return;
+
+	xdst = (struct xfrm_dst *)dst;
+	if (xdst->u.rt6.rt6i_idev->dev == dev) {
+		struct inet6_dev *loopback_idev = in6_dev_get(&loopback_dev);
+		BUG_ON(!loopback_idev);
+
+		do {
+			in6_dev_put(xdst->u.rt6.rt6i_idev);
+			xdst->u.rt6.rt6i_idev = loopback_idev;
+			in6_dev_hold(loopback_idev);
+			xdst = (struct xfrm_dst *)xdst->u.dst.child;
+		} while (xdst->u.dst.xfrm);
+
+		__in6_dev_put(loopback_idev);
+	}
+
+	xfrm_dst_ifdown(dst, dev);
+}
+
 static struct dst_ops xfrm6_dst_ops = {
 	.family =		AF_INET6,
 	.protocol =		__constant_htons(ETH_P_IPV6),
 	.gc =			xfrm6_garbage_collect,
 	.update_pmtu =		xfrm6_update_pmtu,
+	.destroy =		xfrm6_dst_destroy,
+	.ifdown =		xfrm6_dst_ifdown,
 	.gc_thresh =		1024,
 	.entry_size =		sizeof(struct xfrm_dst),
 };
-- 
cgit v1.2.3