From cc6cdac0cf11955dc81d6c50b6738d05e1dca12b Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sat, 18 Feb 2006 16:02:18 -0500
Subject: [PATCH] missing ntohs() in ip6_tunnel

->payload_len is net-endian

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 net/ipv6/ip6_tunnel.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index 92ead3cf956..faea8a120ee 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -458,7 +458,7 @@ ip6ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 			mtu = IPV6_MIN_MTU;
 		t->dev->mtu = mtu;
 
-		if ((len = sizeof (*ipv6h) + ipv6h->payload_len) > mtu) {
+		if ((len = sizeof (*ipv6h) + ntohs(ipv6h->payload_len)) > mtu) {
 			rel_type = ICMPV6_PKT_TOOBIG;
 			rel_code = 0;
 			rel_info = mtu;
-- 
cgit v1.2.3


From 995110143880fd9cb255fa5df05f8950c56fb43a Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Sun, 19 Feb 2006 22:11:50 -0800
Subject: [XFRM]: Fix policy double put

The policy is put once immediately and once at the error label, which results
in the following Oops:

kernel BUG at net/xfrm/xfrm_policy.c:250!
invalid opcode: 0000 [#2]
PREEMPT
[...]
CPU:    0
EIP:    0060:[<c028caf7>]    Not tainted VLI
EFLAGS: 00210246   (2.6.16-rc3 #39)
EIP is at __xfrm_policy_destroy+0xf/0x46
eax: d49f2000   ebx: d49f2000   ecx: f74bd880   edx: f74bd280
esi: d49f2000   edi: 00000001   ebp: cd506dcc   esp: cd506dc8
ds: 007b   es: 007b   ss: 0068
Process ssh (pid: 31970, threadinfo=cd506000 task=cfb04a70)
Stack: <0>cd506000 cd506e34 c028e92b ebde7280 cd506e58 cd506ec0 f74bd280 00000000
       00000214 0000000a 0000000a 00000000 00000002 f7ae6000 00000000 cd506e58
       cd506e14 c0299e36 f74bd280 e873fe00 c02943fd cd506ec0 ebde7280 f271f440
Call Trace:
 [<c0103a44>] show_stack_log_lvl+0xaa/0xb5
 [<c0103b75>] show_registers+0x126/0x18c
 [<c0103e68>] die+0x14e/0x1db
 [<c02b6809>] do_trap+0x7c/0x96
 [<c0104237>] do_invalid_op+0x89/0x93
 [<c01035af>] error_code+0x4f/0x54
 [<c028e92b>] xfrm_lookup+0x349/0x3c2
 [<c02b0b0d>] ip6_datagram_connect+0x317/0x452
 [<c0281749>] inet_dgram_connect+0x49/0x54
 [<c02404d2>] sys_connect+0x51/0x68
 [<c0240928>] sys_socketcall+0x6f/0x166
 [<c0102aa1>] syscall_call+0x7/0xb

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/xfrm/xfrm_policy.c | 2 --
 1 file changed, 2 deletions(-)

(limited to 'net')

diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 98ec53bd3ac..5e6b05ac126 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -885,8 +885,6 @@ restart:
 			 * We can't enlist stable bundles either.
 			 */
 			write_unlock_bh(&policy->lock);
-
-			xfrm_pol_put(policy);
 			if (dst)
 				dst_free(dst);
 
-- 
cgit v1.2.3


From bc6e14b6f0b06fe93d809d22e257ddd275feeda9 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Sun, 19 Feb 2006 22:26:40 -0800
Subject: [NETFILTER]: Fix NAT PMTUD problems

ICMP errors are only SNATed when their source matches the source of the
connection they are related to, otherwise the source address is not
changed. This creates problems with ICMP frag. required messages
originating from a router behind the NAT, if private IPs are used the
packet has a good change of getting dropped on the path to its destination.

Always NAT ICMP errors similar to the original connection.

Based on report by Al Viro.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/netfilter/ip_nat_core.c | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/netfilter/ip_nat_core.c b/net/ipv4/netfilter/ip_nat_core.c
index c1a61462507..1741d555ad0 100644
--- a/net/ipv4/netfilter/ip_nat_core.c
+++ b/net/ipv4/netfilter/ip_nat_core.c
@@ -434,6 +434,7 @@ int ip_nat_icmp_reply_translation(struct sk_buff **pskb,
 	} *inside;
 	struct ip_conntrack_tuple inner, target;
 	int hdrlen = (*pskb)->nh.iph->ihl * 4;
+	unsigned long statusbit;
 
 	if (!skb_make_writable(pskb, hdrlen + sizeof(*inside)))
 		return 0;
@@ -495,17 +496,16 @@ int ip_nat_icmp_reply_translation(struct sk_buff **pskb,
 
 	/* Change outer to look the reply to an incoming packet
 	 * (proto 0 means don't invert per-proto part). */
+	if (manip == IP_NAT_MANIP_SRC)
+		statusbit = IPS_SRC_NAT;
+	else
+		statusbit = IPS_DST_NAT;
 
-	/* Obviously, we need to NAT destination IP, but source IP
-	   should be NAT'ed only if it is from a NAT'd host.
+	/* Invert if this is reply dir. */
+	if (dir == IP_CT_DIR_REPLY)
+		statusbit ^= IPS_NAT_MASK;
 
-	   Explanation: some people use NAT for anonymizing.  Also,
-	   CERT recommends dropping all packets from private IP
-	   addresses (although ICMP errors from internal links with
-	   such addresses are not too uncommon, as Alan Cox points
-	   out) */
-	if (manip != IP_NAT_MANIP_SRC
-	    || ((*pskb)->nh.iph->saddr == ct->tuplehash[dir].tuple.src.ip)) {
+	if (ct->status & statusbit) {
 		invert_tuplepr(&target, &ct->tuplehash[!dir].tuple);
 		if (!manip_pkt(0, pskb, 0, &target, manip))
 			return 0;
-- 
cgit v1.2.3


From 8e249f088131cde5f77fd073bf0b0e8b3e9ea4ac Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Sun, 19 Feb 2006 22:29:47 -0800
Subject: [NETFILTER]: Fix outgoing redirects to loopback

When redirecting an outgoing packet to loopback, it keeps the original
conntrack reference and information from the outgoing path, which
falsely triggers the check for DNAT on input and the dst_entry is
released to trigger rerouting. ip_route_input refuses to route the
packet because it has a local source address and it is dropped.

Look at the packet itself to dermine if it was NATed. Also fix a
missing inversion that causes unneccesary xfrm lookups.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/netfilter/ip_nat_standalone.c | 16 +++++-----------
 1 file changed, 5 insertions(+), 11 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/netfilter/ip_nat_standalone.c b/net/ipv4/netfilter/ip_nat_standalone.c
index 7c3f7d38024..ab1f88fa21e 100644
--- a/net/ipv4/netfilter/ip_nat_standalone.c
+++ b/net/ipv4/netfilter/ip_nat_standalone.c
@@ -200,20 +200,14 @@ ip_nat_in(unsigned int hooknum,
           const struct net_device *out,
           int (*okfn)(struct sk_buff *))
 {
-	struct ip_conntrack *ct;
-	enum ip_conntrack_info ctinfo;
 	unsigned int ret;
+	u_int32_t daddr = (*pskb)->nh.iph->daddr;
 
 	ret = ip_nat_fn(hooknum, pskb, in, out, okfn);
 	if (ret != NF_DROP && ret != NF_STOLEN
-	    && (ct = ip_conntrack_get(*pskb, &ctinfo)) != NULL) {
-		enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
-
-		if (ct->tuplehash[dir].tuple.dst.ip !=
-		    ct->tuplehash[!dir].tuple.src.ip) {
-			dst_release((*pskb)->dst);
-			(*pskb)->dst = NULL;
-		}
+	    && daddr != (*pskb)->nh.iph->daddr) {
+		dst_release((*pskb)->dst);
+		(*pskb)->dst = NULL;
 	}
 	return ret;
 }
@@ -276,7 +270,7 @@ ip_nat_local_fn(unsigned int hooknum,
 		    ct->tuplehash[!dir].tuple.src.ip
 #ifdef CONFIG_XFRM
 		    || ct->tuplehash[dir].tuple.dst.u.all !=
-		       ct->tuplehash[dir].tuple.src.u.all
+		       ct->tuplehash[!dir].tuple.src.u.all
 #endif
 		    )
 			return ip_route_me_harder(pskb) == 0 ? ret : NF_DROP;
-- 
cgit v1.2.3


From a8372f035aa2f6717123eb30679a08b619321dd4 Mon Sep 17 00:00:00 2001
From: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Date: Sun, 19 Feb 2006 22:32:06 -0800
Subject: [NET]: NETFILTER: remove duplicated lines and fix order in
 skb_clone().

Some of netfilter-related members are initalized / copied twice in
skb_clone(). Remove one.

Pointed out by Olivier MATZ <olivier.matz@6wind.com>.

And this patch also fixes order of copying / clearing members.

Signed-off-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/skbuff.c | 10 +++-------
 1 file changed, 3 insertions(+), 7 deletions(-)

(limited to 'net')

diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 6766f118f07..2144952d1c6 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -411,6 +411,9 @@ struct sk_buff *skb_clone(struct sk_buff *skb, gfp_t gfp_mask)
 	C(pkt_type);
 	C(ip_summed);
 	C(priority);
+#if defined(CONFIG_IP_VS) || defined(CONFIG_IP_VS_MODULE)
+	C(ipvs_property);
+#endif
 	C(protocol);
 	n->destructor = NULL;
 #ifdef CONFIG_NETFILTER
@@ -422,13 +425,6 @@ struct sk_buff *skb_clone(struct sk_buff *skb, gfp_t gfp_mask)
 	C(nfct_reasm);
 	nf_conntrack_get_reasm(skb->nfct_reasm);
 #endif
-#if defined(CONFIG_IP_VS) || defined(CONFIG_IP_VS_MODULE)
-	C(ipvs_property);
-#endif
-#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
-	C(nfct_reasm);
-	nf_conntrack_get_reasm(skb->nfct_reasm);
-#endif
 #ifdef CONFIG_BRIDGE_NETFILTER
 	C(nf_bridge);
 	nf_bridge_get(skb->nf_bridge);
-- 
cgit v1.2.3


From 42cf93cd464e0df3c85d298c647411bae6d99e6e Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Tue, 21 Feb 2006 13:37:35 -0800
Subject: [NETFILTER]: Fix bridge netfilter related in xfrm_lookup

The bridge-netfilter code attaches a fake dst_entry with dst->ops == NULL
to purely bridged packets. When these packets are SNATed and a policy
lookup is done, xfrm_lookup crashes because it tries to dereference
dst->ops.

Change xfrm_lookup not to dereference dst->ops before checking for the
DST_NOXFRM flag and set this flag in the fake dst_entry.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_netfilter.c | 1 +
 net/xfrm/xfrm_policy.c    | 7 ++++---
 2 files changed, 5 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c
index 6bb0c7eb1ef..e060aad8624 100644
--- a/net/bridge/br_netfilter.c
+++ b/net/bridge/br_netfilter.c
@@ -90,6 +90,7 @@ static struct rtable __fake_rtable = {
 			.dev			= &__fake_net_device,
 			.path			= &__fake_rtable.u.dst,
 			.metrics		= {[RTAX_MTU - 1] = 1500},
+			.flags			= DST_NOXFRM,
 		}
 	},
 	.rt_flags	= 0,
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 5e6b05ac126..8206025d8e4 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -782,7 +782,7 @@ int xfrm_lookup(struct dst_entry **dst_p, struct flowi *fl,
 	int nx = 0;
 	int err;
 	u32 genid;
-	u16 family = dst_orig->ops->family;
+	u16 family;
 	u8 dir = policy_to_flow_dir(XFRM_POLICY_OUT);
 	u32 sk_sid = security_sk_sid(sk, fl, dir);
 restart:
@@ -796,13 +796,14 @@ restart:
 		if ((dst_orig->flags & DST_NOXFRM) || !xfrm_policy_list[XFRM_POLICY_OUT])
 			return 0;
 
-		policy = flow_cache_lookup(fl, sk_sid, family, dir,
-					   xfrm_policy_lookup);
+		policy = flow_cache_lookup(fl, sk_sid, dst_orig->ops->family,
+					   dir, xfrm_policy_lookup);
 	}
 
 	if (!policy)
 		return 0;
 
+	family = dst_orig->ops->family;
 	policy->curlft.use_time = (unsigned long)xtime.tv_sec;
 
 	switch (policy->action) {
-- 
cgit v1.2.3


From 85259878499d6c428cba191bb4e415a250dcd75a Mon Sep 17 00:00:00 2001
From: Suresh Bhogavilli <sbhogavilli@verisign.com>
Date: Tue, 21 Feb 2006 13:42:22 -0800
Subject: [IPV4]: Fix garbage collection of multipath route entries

When garbage collecting route cache entries of multipath routes
in rt_garbage_collect(), entries were deleted from the hash bucket
'i' while holding a spin lock on bucket 'k' resulting in a system
hang.  Delete entries, if any, from bucket 'k' instead.

Signed-off-by: Suresh Bhogavilli <sbhogavilli@verisign.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/route.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index d82c242ea70..fca5fe0cf94 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -835,7 +835,7 @@ static int rt_garbage_collect(void)
 					int r;
 
 					rthp = rt_remove_balanced_route(
-						&rt_hash_table[i].chain,
+						&rt_hash_table[k].chain,
 						rth,
 						&r);
 					goal -= r;
-- 
cgit v1.2.3


From 21380b81ef8699179b535e197a95b891a7badac7 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Wed, 22 Feb 2006 14:47:13 -0800
Subject: [XFRM]: Eliminate refcounting confusion by creating
 __xfrm_state_put().

We often just do an atomic_dec(&x->refcnt) on an xfrm_state object
because we know there is more than 1 reference remaining and thus
we can elide the heavier xfrm_state_put() call.

Do this behind an inline function called __xfrm_state_put() so that is
more obvious and also to allow us to more cleanly add refcount
debugging later.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/key/af_key.c      | 2 +-
 net/xfrm/xfrm_state.c | 8 ++++----
 net/xfrm/xfrm_user.c  | 2 +-
 3 files changed, 6 insertions(+), 6 deletions(-)

(limited to 'net')

diff --git a/net/key/af_key.c b/net/key/af_key.c
index ae86d237a45..b2d4d1dd211 100644
--- a/net/key/af_key.c
+++ b/net/key/af_key.c
@@ -1423,7 +1423,7 @@ static int pfkey_add(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hdr,
 
 	if (err < 0) {
 		x->km.state = XFRM_STATE_DEAD;
-		xfrm_state_put(x);
+		__xfrm_state_put(x);
 		goto out;
 	}
 
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index e12d0be5f97..c656cbaf35e 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -220,14 +220,14 @@ static int __xfrm_state_delete(struct xfrm_state *x)
 		x->km.state = XFRM_STATE_DEAD;
 		spin_lock(&xfrm_state_lock);
 		list_del(&x->bydst);
-		atomic_dec(&x->refcnt);
+		__xfrm_state_put(x);
 		if (x->id.spi) {
 			list_del(&x->byspi);
-			atomic_dec(&x->refcnt);
+			__xfrm_state_put(x);
 		}
 		spin_unlock(&xfrm_state_lock);
 		if (del_timer(&x->timer))
-			atomic_dec(&x->refcnt);
+			__xfrm_state_put(x);
 
 		/* The number two in this test is the reference
 		 * mentioned in the comment below plus the reference
@@ -243,7 +243,7 @@ static int __xfrm_state_delete(struct xfrm_state *x)
 		 * The xfrm_state_alloc call gives a reference, and that
 		 * is what we are dropping here.
 		 */
-		atomic_dec(&x->refcnt);
+		__xfrm_state_put(x);
 		err = 0;
 	}
 
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index ac87a09ba83..7de17559249 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -345,7 +345,7 @@ static int xfrm_add_sa(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfrma)
 
 	if (err < 0) {
 		x->km.state = XFRM_STATE_DEAD;
-		xfrm_state_put(x);
+		__xfrm_state_put(x);
 		goto out;
 	}
 
-- 
cgit v1.2.3


From f8d0e3f11593928ac3f968c378a44e80b04488c9 Mon Sep 17 00:00:00 2001
From: Jamal Hadi Salim <hadi@cyberus.ca>
Date: Thu, 23 Feb 2006 16:18:01 -0800
Subject: [NET] ethernet: Fix first packet goes out with MAC 00:00:00:00:00:00

When you turn off ARP on a netdevice then the first packet always goes
out with a dstMAC of all zeroes. This is because the first packet is
used to resolve ARP entries. Even though the ARP entry may be resolved
(I tried by setting a static ARP entry for a host i was pinging from),
it gets overwritten by virtue of having the netdevice disabling ARP.

Subsequent packets go out fine with correct dstMAC address (which may
be why people have ignored reporting this issue).

To cut the story short:

the culprit code is in net/ethernet/eth.c::eth_header()

----
        /*
         *      Anyway, the loopback-device should never use this
function...
         */

        if (dev->flags & (IFF_LOOPBACK|IFF_NOARP))
        {
                memset(eth->h_dest, 0, dev->addr_len);
                return ETH_HLEN;
        }

	if(daddr)
        {
                memcpy(eth->h_dest,daddr,dev->addr_len);
                return ETH_HLEN;
        }

----

Note how the h_dest is being reset when device has IFF_NOARP.

As a note:
All devices including loopback pass a daddr. loopback in fact passes
a 0 all the time ;->
This means i can delete the check totaly or i can remove the IFF_NOARP

Alexey says:
--------------------
I think, it was me who did this crap. It was so long ago I do not remember
why it was made.

I remember some troubles with dummy device. It tried to resolve
addresses, apparently, without success and generated errors instead of
blackholing. I think the problem was eventually solved at neighbour
level.

After some thinking I suspect the deletion of this chunk could change
behaviour of some parts which do not use neighbour cache f.e. packet
socket.

I think safer approach would be to move this chunk after if (daddr).
And the possibility to remove this completely could be analyzed later.
--------------------

Patch updated with Alexey's safer suggestions.

Signed-off-by: Jamal Hadi Salim <hadi@cyberus.ca>
Acked-by: Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ethernet/eth.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

(limited to 'net')

diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c
index 9890fd97e53..c971f14712e 100644
--- a/net/ethernet/eth.c
+++ b/net/ethernet/eth.c
@@ -95,6 +95,12 @@ int eth_header(struct sk_buff *skb, struct net_device *dev, unsigned short type,
 		saddr = dev->dev_addr;
 	memcpy(eth->h_source,saddr,dev->addr_len);
 
+	if(daddr)
+	{
+		memcpy(eth->h_dest,daddr,dev->addr_len);
+		return ETH_HLEN;
+	}
+	
 	/*
 	 *	Anyway, the loopback-device should never use this function... 
 	 */
@@ -105,12 +111,6 @@ int eth_header(struct sk_buff *skb, struct net_device *dev, unsigned short type,
 		return ETH_HLEN;
 	}
 	
-	if(daddr)
-	{
-		memcpy(eth->h_dest,daddr,dev->addr_len);
-		return ETH_HLEN;
-	}
-	
 	return -ETH_HLEN;
 }
 
-- 
cgit v1.2.3


From 4da3089f2b582b21e1374ccc6df722d4361eb915 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Thu, 23 Feb 2006 16:19:26 -0800
Subject: [IPSEC]: Use TOS when doing tunnel lookups

We should use the TOS because it's one of the routing keys.  It also
means that we update the correct routing cache entry when PMTU occurs.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/xfrm4_policy.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c
index 45f7ae58f2c..f285bbf296e 100644
--- a/net/ipv4/xfrm4_policy.c
+++ b/net/ipv4/xfrm4_policy.c
@@ -35,6 +35,7 @@ __xfrm4_find_bundle(struct flowi *fl, struct xfrm_policy *policy)
 		if (xdst->u.rt.fl.oif == fl->oif &&	/*XXX*/
 		    xdst->u.rt.fl.fl4_dst == fl->fl4_dst &&
 	    	    xdst->u.rt.fl.fl4_src == fl->fl4_src &&
+	    	    xdst->u.rt.fl.fl4_tos == fl->fl4_tos &&
 		    xfrm_bundle_ok(xdst, fl, AF_INET)) {
 			dst_clone(dst);
 			break;
@@ -61,7 +62,8 @@ __xfrm4_bundle_create(struct xfrm_policy *policy, struct xfrm_state **xfrm, int
 		.nl_u = {
 			.ip4_u = {
 				.saddr = local,
-				.daddr = remote
+				.daddr = remote,
+				.tos = fl->fl4_tos
 			}
 		}
 	};
@@ -230,6 +232,7 @@ _decode_session4(struct sk_buff *skb, struct flowi *fl)
 	fl->proto = iph->protocol;
 	fl->fl4_dst = iph->daddr;
 	fl->fl4_src = iph->saddr;
+	fl->fl4_tos = iph->tos;
 }
 
 static inline int xfrm4_garbage_collect(void)
-- 
cgit v1.2.3


From 0c0888908dec145aaaa40d8a49d34913573f5a27 Mon Sep 17 00:00:00 2001
From: Hugo Santos <hsantos@av.it.pt>
Date: Fri, 24 Feb 2006 13:16:25 -0800
Subject: [IPV6] ip6_tunnel: release cached dst on change of tunnel params

The included patch fixes ip6_tunnel to release the cached dst entry
when the tunnel parameters (such as tunnel endpoints) are changed so
they are used immediatly for the next encapsulated packets.

Signed-off-by: Hugo Santos <hsantos@av.it.pt>
Acked-by: Ville Nuorvala <vnuorval@tcs.hut.fi>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/ip6_tunnel.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net')

diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index faea8a120ee..48597538db3 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -884,6 +884,7 @@ ip6ip6_tnl_change(struct ip6_tnl *t, struct ip6_tnl_parm *p)
 	t->parms.encap_limit = p->encap_limit;
 	t->parms.flowinfo = p->flowinfo;
 	t->parms.link = p->link;
+	ip6_tnl_dst_reset(t);
 	ip6ip6_tnl_link_config(t);
 	return 0;
 }
-- 
cgit v1.2.3


From d91675f9c7f5752e8657df1e1d926bd6a624434f Mon Sep 17 00:00:00 2001
From: YOSHIFUJI Hideaki <yosufuji@linux-ipv6.org>
Date: Fri, 24 Feb 2006 13:18:33 -0800
Subject: [IPV6]: Do not ignore IPV6_MTU socket option.

Based on patch by Hoerdt Mickael <hoerdt@clarinet.u-strasbg.fr>.

Signed-off-by: YOSHIFUJI Hideaki <yosufuji@linux-ipv6.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/ip6_output.c | 15 +++++++++++++--
 1 file changed, 13 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index efa3e72cfcf..f999edd846a 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -494,6 +494,7 @@ static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
 	struct net_device *dev;
 	struct sk_buff *frag;
 	struct rt6_info *rt = (struct rt6_info*)skb->dst;
+	struct ipv6_pinfo *np = skb->sk ? inet6_sk(skb->sk) : NULL;
 	struct ipv6hdr *tmp_hdr;
 	struct frag_hdr *fh;
 	unsigned int mtu, hlen, left, len;
@@ -505,7 +506,12 @@ static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
 	hlen = ip6_find_1stfragopt(skb, &prevhdr);
 	nexthdr = *prevhdr;
 
-	mtu = dst_mtu(&rt->u.dst) - hlen - sizeof(struct frag_hdr);
+	mtu = dst_mtu(&rt->u.dst);
+	if (np && np->frag_size < mtu) {
+		if (np->frag_size)
+			mtu = np->frag_size;
+	}
+	mtu -= hlen + sizeof(struct frag_hdr);
 
 	if (skb_shinfo(skb)->frag_list) {
 		int first_len = skb_pagelen(skb);
@@ -882,7 +888,12 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,
 		inet->cork.fl = *fl;
 		np->cork.hop_limit = hlimit;
 		np->cork.tclass = tclass;
-		inet->cork.fragsize = mtu = dst_mtu(rt->u.dst.path);
+		mtu = dst_mtu(rt->u.dst.path);
+		if (np && np->frag_size < mtu) {
+			if (np->frag_size)
+				mtu = np->frag_size;
+		}
+		inet->cork.fragsize = mtu;
 		if (dst_allfrag(rt->u.dst.path))
 			inet->cork.flags |= IPCORK_ALLFRAG;
 		inet->cork.length = 0;
-- 
cgit v1.2.3