From 679a87382433cf12a28f07a7d5c240f30f0daa08 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Tue, 3 May 2005 14:24:36 -0700 Subject: [IPV6]: Fix raw socket checksums with IPsec I made a mistake in my last patch to the raw socket checksum code. I used the value of inet->cork.length as the length of the payload. While this works with normal packets, it breaks down when IPsec is present since the cork length includes the extension header length. So here is a patch to fix the length calculations. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- net/ipv6/raw.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index 1352c1d9bf4..617645bc5ed 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -455,11 +455,11 @@ csum_copy_err: static int rawv6_push_pending_frames(struct sock *sk, struct flowi *fl, struct raw6_sock *rp) { - struct inet_sock *inet = inet_sk(sk); struct sk_buff *skb; int err = 0; int offset; int len; + int total_len; u32 tmp_csum; u16 csum; @@ -470,7 +470,8 @@ static int rawv6_push_pending_frames(struct sock *sk, struct flowi *fl, goto out; offset = rp->offset; - if (offset >= inet->cork.length - 1) { + total_len = inet_sk(sk)->cork.length - (skb->nh.raw - skb->data); + if (offset >= total_len - 1) { err = -EINVAL; ip6_flush_pending_frames(sk); goto out; @@ -514,7 +515,7 @@ static int rawv6_push_pending_frames(struct sock *sk, struct flowi *fl, tmp_csum = csum_ipv6_magic(&fl->fl6_src, &fl->fl6_dst, - inet->cork.length, fl->proto, tmp_csum); + total_len, fl->proto, tmp_csum); if (tmp_csum == 0) tmp_csum = -1; -- cgit v1.2.3 From db46edc6d3b66bf708a8f23a9aa89f63a49ebe33 Mon Sep 17 00:00:00 2001 From: Thomas Graf Date: Tue, 3 May 2005 14:29:39 -0700 Subject: [RTNETLINK] Cleanup rtnetlink_link tables Converts remaining rtnetlink_link tables to use c99 designated initializers to make greping a little bit easier. Signed-off-by: Thomas Graf Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/ipv6') diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 7196ac2f2d1..7744a259269 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -3076,7 +3076,7 @@ static void inet6_prefix_notify(int event, struct inet6_dev *idev, netlink_broadcast(rtnl, skb, 0, RTMGRP_IPV6_PREFIX, GFP_ATOMIC); } -static struct rtnetlink_link inet6_rtnetlink_table[RTM_MAX - RTM_BASE + 1] = { +static struct rtnetlink_link inet6_rtnetlink_table[RTM_NR_MSGTYPES] = { [RTM_GETLINK - RTM_BASE] = { .dumpit = inet6_dump_ifinfo, }, [RTM_NEWADDR - RTM_BASE] = { .doit = inet6_rtm_newaddr, }, [RTM_DELADDR - RTM_BASE] = { .doit = inet6_rtm_deladdr, }, -- cgit v1.2.3 From c3924c70dd3bddc28b99ccd1688bd281bad1a9be Mon Sep 17 00:00:00 2001 From: Folkert van Heusden Date: Tue, 3 May 2005 14:36:45 -0700 Subject: [TCP]: Optimize check in port-allocation code, v6 version. Signed-off-by: Folkert van Heusden Signed-off-by: David S. Miller --- net/ipv6/tcp_ipv6.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 4760c85e19d..0f69e800a0a 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -139,9 +139,12 @@ static int tcp_v6_get_port(struct sock *sk, unsigned short snum) int rover; spin_lock(&tcp_portalloc_lock); - rover = tcp_port_rover; + if (tcp_port_rover < low) + rover = low; + else + rover = tcp_port_rover; do { rover++; - if ((rover < low) || (rover > high)) + if (rover > high) rover = low; head = &tcp_bhash[tcp_bhashfn(rover)]; spin_lock(&head->lock); -- cgit v1.2.3 From 2a0a6ebee1d68552152ae8d4aeda91d806995dec Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Tue, 3 May 2005 14:55:09 -0700 Subject: [NETLINK]: Synchronous message processing. Let's recap the problem. The current asynchronous netlink kernel message processing is vulnerable to these attacks: 1) Hit and run: Attacker sends one or more messages and then exits before they're processed. This may confuse/disable the next netlink user that gets the netlink address of the attacker since it may receive the responses to the attacker's messages. Proposed solutions: a) Synchronous processing. b) Stream mode socket. c) Restrict/prohibit binding. 2) Starvation: Because various netlink rcv functions were written to not return until all messages have been processed on a socket, it is possible for these functions to execute for an arbitrarily long period of time. If this is successfully exploited it could also be used to hold rtnl forever. Proposed solutions: a) Synchronous processing. b) Stream mode socket. Firstly let's cross off solution c). It only solves the first problem and it has user-visible impacts. In particular, it'll break user space applications that expect to bind or communicate with specific netlink addresses (pid's). So we're left with a choice of synchronous processing versus SOCK_STREAM for netlink. For the moment I'm sticking with the synchronous approach as suggested by Alexey since it's simpler and I'd rather spend my time working on other things. However, it does have a number of deficiencies compared to the stream mode solution: 1) User-space to user-space netlink communication is still vulnerable. 2) Inefficient use of resources. This is especially true for rtnetlink since the lock is shared with other users such as networking drivers. The latter could hold the rtnl while communicating with hardware which causes the rtnetlink user to wait when it could be doing other things. 3) It is still possible to DoS all netlink users by flooding the kernel netlink receive queue. The attacker simply fills the receive socket with a single netlink message that fills up the entire queue. The attacker then continues to call sendmsg with the same message in a loop. Point 3) can be countered by retransmissions in user-space code, however it is pretty messy. In light of these problems (in particular, point 3), we should implement stream mode netlink at some point. In the mean time, here is a patch that implements synchronous processing. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- net/ipv6/netfilter/ip6_queue.c | 20 +++++++++----------- 1 file changed, 9 insertions(+), 11 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/netfilter/ip6_queue.c b/net/ipv6/netfilter/ip6_queue.c index c54830b8959..750943e2d34 100644 --- a/net/ipv6/netfilter/ip6_queue.c +++ b/net/ipv6/netfilter/ip6_queue.c @@ -549,20 +549,18 @@ ipq_rcv_skb(struct sk_buff *skb) static void ipq_rcv_sk(struct sock *sk, int len) { - do { - struct sk_buff *skb; + struct sk_buff *skb; + unsigned int qlen; - if (down_trylock(&ipqnl_sem)) - return; + down(&ipqnl_sem); - while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) { - ipq_rcv_skb(skb); - kfree_skb(skb); - } + for (qlen = skb_queue_len(&sk->sk_receive_queue); qlen; qlen--) { + skb = skb_dequeue(&sk->sk_receive_queue); + ipq_rcv_skb(skb); + kfree_skb(skb); + } - up(&ipqnl_sem); - - } while (ipqnl && ipqnl->sk_receive_queue.qlen); + up(&ipqnl_sem); } static int -- cgit v1.2.3 From aabc9761b69f1bfa30a78f7005be95cc9cc06175 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Tue, 3 May 2005 16:27:10 -0700 Subject: [IPSEC]: Store idev entries I found a bug that stopped IPsec/IPv6 from working. About a month ago IPv6 started using rt6i_idev->dev on the cached socket dst entries. If the cached socket dst entry is IPsec, then rt6i_idev will be NULL. Since we want to look at the rt6i_idev of the original route in this case, the easiest fix is to store rt6i_idev in the IPsec dst entry just as we do for a number of other IPv6 route attributes. Unfortunately this means that we need some new code to handle the references to rt6i_idev. That's why this patch is bigger than it would otherwise be. I've also done the same thing for IPv4 since it is conceivable that once these idev attributes start getting used for accounting, we probably need to dereference them for IPv4 IPsec entries too. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- net/ipv6/xfrm6_policy.c | 43 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) (limited to 'net/ipv6') diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c index 8a4f37de4d2..4429b1a1fe5 100644 --- a/net/ipv6/xfrm6_policy.c +++ b/net/ipv6/xfrm6_policy.c @@ -11,7 +11,11 @@ * */ +#include +#include #include +#include +#include #include #include #include @@ -166,6 +170,8 @@ __xfrm6_bundle_create(struct xfrm_policy *policy, struct xfrm_state **xfrm, int memcpy(&x->u.rt6.rt6i_gateway, &rt0->rt6i_gateway, sizeof(x->u.rt6.rt6i_gateway)); x->u.rt6.rt6i_dst = rt0->rt6i_dst; x->u.rt6.rt6i_src = rt0->rt6i_src; + x->u.rt6.rt6i_idev = rt0->rt6i_idev; + in6_dev_hold(rt0->rt6i_idev); header_len -= x->u.dst.xfrm->props.header_len; trailer_len -= x->u.dst.xfrm->props.trailer_len; } @@ -251,11 +257,48 @@ static void xfrm6_update_pmtu(struct dst_entry *dst, u32 mtu) path->ops->update_pmtu(path, mtu); } +static void xfrm6_dst_destroy(struct dst_entry *dst) +{ + struct xfrm_dst *xdst = (struct xfrm_dst *)dst; + + if (likely(xdst->u.rt6.rt6i_idev)) + in6_dev_put(xdst->u.rt6.rt6i_idev); + xfrm_dst_destroy(xdst); +} + +static void xfrm6_dst_ifdown(struct dst_entry *dst, struct net_device *dev, + int unregister) +{ + struct xfrm_dst *xdst; + + if (!unregister) + return; + + xdst = (struct xfrm_dst *)dst; + if (xdst->u.rt6.rt6i_idev->dev == dev) { + struct inet6_dev *loopback_idev = in6_dev_get(&loopback_dev); + BUG_ON(!loopback_idev); + + do { + in6_dev_put(xdst->u.rt6.rt6i_idev); + xdst->u.rt6.rt6i_idev = loopback_idev; + in6_dev_hold(loopback_idev); + xdst = (struct xfrm_dst *)xdst->u.dst.child; + } while (xdst->u.dst.xfrm); + + __in6_dev_put(loopback_idev); + } + + xfrm_dst_ifdown(dst, dev); +} + static struct dst_ops xfrm6_dst_ops = { .family = AF_INET6, .protocol = __constant_htons(ETH_P_IPV6), .gc = xfrm6_garbage_collect, .update_pmtu = xfrm6_update_pmtu, + .destroy = xfrm6_dst_destroy, + .ifdown = xfrm6_dst_ifdown, .gc_thresh = 1024, .entry_size = sizeof(struct xfrm_dst), }; -- cgit v1.2.3