From e4f8b5d4edc1edb0709531bd1a342655d5e8b98e Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Mon, 11 Feb 2008 17:50:30 -0800
Subject: [IPV4]: Remove IP_TOS setting privilege checks.

Various RFCs have all sorts of things to say about the CS field of the
DSCP value.  In particular they try to make the distinction between
values that should be used by "user applications" and things like
routing daemons.

This seems to have influenced the CAP_NET_ADMIN check which exists for
IP_TOS socket option settings, but in fact it has an off-by-one error
so it wasn't allowing CS5 which is meant for "user applications" as
well.

Further adding to the inconsistency and brokenness here, IPV6 does not
validate the DSCP values specified for the IPV6_TCLASS socket option.

The real actual uses of these TOS values are system specific in the
final analysis, and these RFC recommendations are just that, "a
recommendation".  In fact the standards very purposefully use
"SHOULD" and "SHOULD NOT" when describing how these values can be
used.

In the final analysis the only clean way to provide consistency here
is to remove the CAP_NET_ADMIN check.  The alternatives just don't
work out:

1) If we add the CAP_NET_ADMIN check to ipv6, this can break existing
   setups.

2) If we just fix the off-by-one error in the class comparison in
   IPV4, certain DSCP values can be used in IPV6 but not IPV4 by
   default.  So people will just ask for a sysctl asking to
   override that.

I checked several other freely available kernel trees and they
do not make any privilege checks in this area like we do.  For
the BSD stacks, this goes back all the way to Stevens Volume 2
and beyond.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/ip_sockglue.c | 5 -----
 1 file changed, 5 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index 754b0a5bbfe..de0572c8885 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -514,11 +514,6 @@ static int do_ip_setsockopt(struct sock *sk, int level,
 			val &= ~3;
 			val |= inet->tos & 3;
 		}
-		if (IPTOS_PREC(val) >= IPTOS_PREC_CRITIC_ECP &&
-		    !capable(CAP_NET_ADMIN)) {
-			err = -EPERM;
-			break;
-		}
 		if (inet->tos != val) {
 			inet->tos = val;
 			sk->sk_priority = rt_tos2priority(val);
-- 
cgit v1.2.3


From ec28cf738d899e9d0652108e1986101771aacb2e Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <shemminger@vyatta.com>
Date: Mon, 11 Feb 2008 21:12:49 -0800
Subject: fib_trie: handle empty tree

This fixes possible problems when trie_firstleaf() returns NULL
to trie_leafindex().

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/fib_trie.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index f5fba3f71c0..2d895274b7f 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -1762,11 +1762,9 @@ static struct leaf *trie_leafindex(struct trie *t, int index)
 {
 	struct leaf *l = trie_firstleaf(t);
 
-	while (index-- > 0) {
+	while (l && index-- > 0)
 		l = trie_nextleaf(l);
-		if (!l)
-			break;
-	}
+
 	return l;
 }
 
-- 
cgit v1.2.3


From 8315f5d80a90247bf92232f92ca49933ac49327b Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <shemminger@vyatta.com>
Date: Mon, 11 Feb 2008 21:14:39 -0800
Subject: fib_trie: /proc/net/route performance improvement

Use key/offset caching to change /proc/net/route (use by iputils route)
from O(n^2) to O(n). This improves performance from 30sec with 160,000
routes to 1sec.

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/fib_trie.c | 93 ++++++++++++++++++++++++++++++++++++++++++++++-------
 1 file changed, 82 insertions(+), 11 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 2d895274b7f..1ff446d0fa8 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -2459,6 +2459,84 @@ static const struct file_operations fib_trie_fops = {
 	.release = seq_release_net,
 };
 
+struct fib_route_iter {
+	struct seq_net_private p;
+	struct trie *main_trie;
+	loff_t	pos;
+	t_key	key;
+};
+
+static struct leaf *fib_route_get_idx(struct fib_route_iter *iter, loff_t pos)
+{
+	struct leaf *l = NULL;
+	struct trie *t = iter->main_trie;
+
+	/* use cache location of last found key */
+	if (iter->pos > 0 && pos >= iter->pos && (l = fib_find_node(t, iter->key)))
+		pos -= iter->pos;
+	else {
+		iter->pos = 0;
+		l = trie_firstleaf(t);
+	}
+
+	while (l && pos-- > 0) {
+		iter->pos++;
+		l = trie_nextleaf(l);
+	}
+
+	if (l)
+		iter->key = pos;	/* remember it */
+	else
+		iter->pos = 0;		/* forget it */
+
+	return l;
+}
+
+static void *fib_route_seq_start(struct seq_file *seq, loff_t *pos)
+	__acquires(RCU)
+{
+	struct fib_route_iter *iter = seq->private;
+	struct fib_table *tb;
+
+	rcu_read_lock();
+	tb = fib_get_table(iter->p.net, RT_TABLE_MAIN);
+	if (!tb)
+		return NULL;
+
+	iter->main_trie = (struct trie *) tb->tb_data;
+	if (*pos == 0)
+		return SEQ_START_TOKEN;
+	else
+		return fib_route_get_idx(iter, *pos - 1);
+}
+
+static void *fib_route_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+{
+	struct fib_route_iter *iter = seq->private;
+	struct leaf *l = v;
+
+	++*pos;
+	if (v == SEQ_START_TOKEN) {
+		iter->pos = 0;
+		l = trie_firstleaf(iter->main_trie);
+	} else {
+		iter->pos++;
+		l = trie_nextleaf(l);
+	}
+
+	if (l)
+		iter->key = l->key;
+	else
+		iter->pos = 0;
+	return l;
+}
+
+static void fib_route_seq_stop(struct seq_file *seq, void *v)
+	__releases(RCU)
+{
+	rcu_read_unlock();
+}
+
 static unsigned fib_flag_trans(int type, __be32 mask, const struct fib_info *fi)
 {
 	static unsigned type2flags[RTN_MAX + 1] = {
@@ -2482,7 +2560,6 @@ static unsigned fib_flag_trans(int type, __be32 mask, const struct fib_info *fi)
  */
 static int fib_route_seq_show(struct seq_file *seq, void *v)
 {
-	const struct fib_trie_iter *iter = seq->private;
 	struct leaf *l = v;
 	struct leaf_info *li;
 	struct hlist_node *node;
@@ -2494,12 +2571,6 @@ static int fib_route_seq_show(struct seq_file *seq, void *v)
 		return 0;
 	}
 
-	if (iter->trie == iter->trie_local)
-		return 0;
-
-	if (IS_TNODE(l))
-		return 0;
-
 	hlist_for_each_entry_rcu(li, node, &l->list, hlist) {
 		struct fib_alias *fa;
 		__be32 mask, prefix;
@@ -2542,16 +2613,16 @@ static int fib_route_seq_show(struct seq_file *seq, void *v)
 }
 
 static const struct seq_operations fib_route_seq_ops = {
-	.start  = fib_trie_seq_start,
-	.next   = fib_trie_seq_next,
-	.stop   = fib_trie_seq_stop,
+	.start  = fib_route_seq_start,
+	.next   = fib_route_seq_next,
+	.stop   = fib_route_seq_stop,
 	.show   = fib_route_seq_show,
 };
 
 static int fib_route_seq_open(struct inode *inode, struct file *file)
 {
 	return seq_open_net(inode, file, &fib_route_seq_ops,
-			    sizeof(struct fib_trie_iter));
+			    sizeof(struct fib_route_iter));
 }
 
 static const struct file_operations fib_route_fops = {
-- 
cgit v1.2.3


From 1105b5d1d44e6f00e31422dfcb0139bc8ae966a9 Mon Sep 17 00:00:00 2001
From: Jarek Poplawski <jarkao2@gmail.com>
Date: Mon, 11 Feb 2008 21:24:56 -0800
Subject: [AX25] af_ax25: remove sock lock in ax25_info_show()

This lockdep warning:

> =======================================================
> [ INFO: possible circular locking dependency detected ]
> 2.6.24 #3
> -------------------------------------------------------
> swapper/0 is trying to acquire lock:
>  (ax25_list_lock){-+..}, at: [<f91dd3b1>] ax25_destroy_socket+0x171/0x1f0 [ax25]
>
> but task is already holding lock:
>  (slock-AF_AX25){-+..}, at: [<f91dbabc>] ax25_std_heartbeat_expiry+0x1c/0xe0 [ax25]
>
> which lock already depends on the new lock.
...

shows that ax25_list_lock and slock-AF_AX25 are taken in different
order: ax25_info_show() takes slock (bh_lock_sock(ax25->sk)) while
ax25_list_lock is held, so reversely to other functions. To fix this
the sock lock should be moved to ax25_info_start(), and there would
be still problem with breaking ax25_list_lock (it seems this "proper"
order isn't optimal yet). But, since it's only for reading proc info
it seems this is not necessary (e.g.  ax25_send_to_raw() does similar
reading without this lock too).

So, this patch removes sock lock to avoid deadlock possibility; there
is also used sock_i_ino() function, which reads sk_socket under proper
read lock. Additionally printf format of this i_ino is changed to %lu.

Reported-by: Bernard Pidoux F6BVP <f6bvp@free.fr>
Signed-off-by: Jarek Poplawski <jarkao2@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ax25/af_ax25.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c
index 8fc64e3150a..5a4337a2909 100644
--- a/net/ax25/af_ax25.c
+++ b/net/ax25/af_ax25.c
@@ -1928,12 +1928,10 @@ static int ax25_info_show(struct seq_file *seq, void *v)
 		   ax25->paclen);
 
 	if (ax25->sk != NULL) {
-		bh_lock_sock(ax25->sk);
-		seq_printf(seq," %d %d %ld\n",
+		seq_printf(seq, " %d %d %lu\n",
 			   atomic_read(&ax25->sk->sk_wmem_alloc),
 			   atomic_read(&ax25->sk->sk_rmem_alloc),
-			   ax25->sk->sk_socket != NULL ? SOCK_INODE(ax25->sk->sk_socket)->i_ino : 0L);
-		bh_unlock_sock(ax25->sk);
+			   sock_i_ino(ax25->sk));
 	} else {
 		seq_puts(seq, " * * *\n");
 	}
-- 
cgit v1.2.3


From 4de211f1a279275c6c67d6e9b6b25513e46b0bb9 Mon Sep 17 00:00:00 2001
From: Jarek Poplawski <jarkao2@gmail.com>
Date: Mon, 11 Feb 2008 21:26:43 -0800
Subject: [AX25] ax25_route: make ax25_route_lock BH safe

> =================================
> [ INFO: inconsistent lock state ]
> 2.6.24-dg8ngn-p02 #1
> ---------------------------------
> inconsistent {softirq-on-W} -> {in-softirq-R} usage.
> linuxnet/3046 [HC0[0]:SC1[2]:HE1:SE0] takes:
>  (ax25_route_lock){--.+}, at: [<f8a0cfb7>] ax25_get_route+0x18/0xb7 [ax25]
> {softirq-on-W} state was registered at:
...

This lockdep report shows that ax25_route_lock is taken for reading in
softirq context, and for writing in process context with BHs enabled.
So, to make this safe, all write_locks in ax25_route.c are changed to
_bh versions.

Reported-by: Jann Traschewski <jann@gmx.de>,
Signed-off-by: Jarek Poplawski <jarkao2@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ax25/ax25_route.c | 28 ++++++++++++++--------------
 1 file changed, 14 insertions(+), 14 deletions(-)

(limited to 'net')

diff --git a/net/ax25/ax25_route.c b/net/ax25/ax25_route.c
index 38c7f3087ec..8672cd84fdf 100644
--- a/net/ax25/ax25_route.c
+++ b/net/ax25/ax25_route.c
@@ -45,7 +45,7 @@ void ax25_rt_device_down(struct net_device *dev)
 {
 	ax25_route *s, *t, *ax25_rt;
 
-	write_lock(&ax25_route_lock);
+	write_lock_bh(&ax25_route_lock);
 	ax25_rt = ax25_route_list;
 	while (ax25_rt != NULL) {
 		s       = ax25_rt;
@@ -68,7 +68,7 @@ void ax25_rt_device_down(struct net_device *dev)
 			}
 		}
 	}
-	write_unlock(&ax25_route_lock);
+	write_unlock_bh(&ax25_route_lock);
 }
 
 static int __must_check ax25_rt_add(struct ax25_routes_struct *route)
@@ -82,7 +82,7 @@ static int __must_check ax25_rt_add(struct ax25_routes_struct *route)
 	if (route->digi_count > AX25_MAX_DIGIS)
 		return -EINVAL;
 
-	write_lock(&ax25_route_lock);
+	write_lock_bh(&ax25_route_lock);
 
 	ax25_rt = ax25_route_list;
 	while (ax25_rt != NULL) {
@@ -92,7 +92,7 @@ static int __must_check ax25_rt_add(struct ax25_routes_struct *route)
 			ax25_rt->digipeat = NULL;
 			if (route->digi_count != 0) {
 				if ((ax25_rt->digipeat = kmalloc(sizeof(ax25_digi), GFP_ATOMIC)) == NULL) {
-					write_unlock(&ax25_route_lock);
+					write_unlock_bh(&ax25_route_lock);
 					return -ENOMEM;
 				}
 				ax25_rt->digipeat->lastrepeat = -1;
@@ -102,14 +102,14 @@ static int __must_check ax25_rt_add(struct ax25_routes_struct *route)
 					ax25_rt->digipeat->calls[i]    = route->digi_addr[i];
 				}
 			}
-			write_unlock(&ax25_route_lock);
+			write_unlock_bh(&ax25_route_lock);
 			return 0;
 		}
 		ax25_rt = ax25_rt->next;
 	}
 
 	if ((ax25_rt = kmalloc(sizeof(ax25_route), GFP_ATOMIC)) == NULL) {
-		write_unlock(&ax25_route_lock);
+		write_unlock_bh(&ax25_route_lock);
 		return -ENOMEM;
 	}
 
@@ -120,7 +120,7 @@ static int __must_check ax25_rt_add(struct ax25_routes_struct *route)
 	ax25_rt->ip_mode      = ' ';
 	if (route->digi_count != 0) {
 		if ((ax25_rt->digipeat = kmalloc(sizeof(ax25_digi), GFP_ATOMIC)) == NULL) {
-			write_unlock(&ax25_route_lock);
+			write_unlock_bh(&ax25_route_lock);
 			kfree(ax25_rt);
 			return -ENOMEM;
 		}
@@ -133,7 +133,7 @@ static int __must_check ax25_rt_add(struct ax25_routes_struct *route)
 	}
 	ax25_rt->next   = ax25_route_list;
 	ax25_route_list = ax25_rt;
-	write_unlock(&ax25_route_lock);
+	write_unlock_bh(&ax25_route_lock);
 
 	return 0;
 }
@@ -152,7 +152,7 @@ static int ax25_rt_del(struct ax25_routes_struct *route)
 	if ((ax25_dev = ax25_addr_ax25dev(&route->port_addr)) == NULL)
 		return -EINVAL;
 
-	write_lock(&ax25_route_lock);
+	write_lock_bh(&ax25_route_lock);
 
 	ax25_rt = ax25_route_list;
 	while (ax25_rt != NULL) {
@@ -174,7 +174,7 @@ static int ax25_rt_del(struct ax25_routes_struct *route)
 			}
 		}
 	}
-	write_unlock(&ax25_route_lock);
+	write_unlock_bh(&ax25_route_lock);
 
 	return 0;
 }
@@ -188,7 +188,7 @@ static int ax25_rt_opt(struct ax25_route_opt_struct *rt_option)
 	if ((ax25_dev = ax25_addr_ax25dev(&rt_option->port_addr)) == NULL)
 		return -EINVAL;
 
-	write_lock(&ax25_route_lock);
+	write_lock_bh(&ax25_route_lock);
 
 	ax25_rt = ax25_route_list;
 	while (ax25_rt != NULL) {
@@ -216,7 +216,7 @@ static int ax25_rt_opt(struct ax25_route_opt_struct *rt_option)
 	}
 
 out:
-	write_unlock(&ax25_route_lock);
+	write_unlock_bh(&ax25_route_lock);
 	return err;
 }
 
@@ -492,7 +492,7 @@ void __exit ax25_rt_free(void)
 {
 	ax25_route *s, *ax25_rt = ax25_route_list;
 
-	write_lock(&ax25_route_lock);
+	write_lock_bh(&ax25_route_lock);
 	while (ax25_rt != NULL) {
 		s       = ax25_rt;
 		ax25_rt = ax25_rt->next;
@@ -500,5 +500,5 @@ void __exit ax25_rt_free(void)
 		kfree(s->digipeat);
 		kfree(s);
 	}
-	write_unlock(&ax25_route_lock);
+	write_unlock_bh(&ax25_route_lock);
 }
-- 
cgit v1.2.3


From 21fab4a86a411c18c6b4d663ae710ca1f6206b3c Mon Sep 17 00:00:00 2001
From: Jarek Poplawski <jarkao2@gmail.com>
Date: Mon, 11 Feb 2008 21:36:39 -0800
Subject: [AX25] ax25_timer: use mod_timer instead of add_timer

According to one of Jann's OOPS reports it looks like
BUG_ON(timer_pending(timer)) triggers during add_timer()
in ax25_start_t1timer(). This patch changes current use
of: init_timer(), add_timer() and del_timer() to
setup_timer() with mod_timer(), which should be safer
anyway.

Reported-by: Jann Traschewski <jann@gmx.de>
Signed-off-by: Jarek Poplawski <jarkao2@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ax25/af_ax25.c    |  6 +-----
 net/ax25/ax25_timer.c | 60 ++++++++++++++++++---------------------------------
 2 files changed, 22 insertions(+), 44 deletions(-)

(limited to 'net')

diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c
index 5a4337a2909..48bfcc741f2 100644
--- a/net/ax25/af_ax25.c
+++ b/net/ax25/af_ax25.c
@@ -510,11 +510,7 @@ ax25_cb *ax25_create_cb(void)
 	skb_queue_head_init(&ax25->ack_queue);
 	skb_queue_head_init(&ax25->reseq_queue);
 
-	init_timer(&ax25->timer);
-	init_timer(&ax25->t1timer);
-	init_timer(&ax25->t2timer);
-	init_timer(&ax25->t3timer);
-	init_timer(&ax25->idletimer);
+	ax25_setup_timers(ax25);
 
 	ax25_fillin_cb(ax25, NULL);
 
diff --git a/net/ax25/ax25_timer.c b/net/ax25/ax25_timer.c
index 72594867fab..db29ea71e80 100644
--- a/net/ax25/ax25_timer.c
+++ b/net/ax25/ax25_timer.c
@@ -40,63 +40,45 @@ static void ax25_t2timer_expiry(unsigned long);
 static void ax25_t3timer_expiry(unsigned long);
 static void ax25_idletimer_expiry(unsigned long);
 
-void ax25_start_heartbeat(ax25_cb *ax25)
+void ax25_setup_timers(ax25_cb *ax25)
 {
-	del_timer(&ax25->timer);
-
-	ax25->timer.data     = (unsigned long)ax25;
-	ax25->timer.function = &ax25_heartbeat_expiry;
-	ax25->timer.expires  = jiffies + 5 * HZ;
+	setup_timer(&ax25->timer, ax25_heartbeat_expiry, (unsigned long)ax25);
+	setup_timer(&ax25->t1timer, ax25_t1timer_expiry, (unsigned long)ax25);
+	setup_timer(&ax25->t2timer, ax25_t2timer_expiry, (unsigned long)ax25);
+	setup_timer(&ax25->t3timer, ax25_t3timer_expiry, (unsigned long)ax25);
+	setup_timer(&ax25->idletimer, ax25_idletimer_expiry,
+		    (unsigned long)ax25);
+}
 
-	add_timer(&ax25->timer);
+void ax25_start_heartbeat(ax25_cb *ax25)
+{
+	mod_timer(&ax25->timer, jiffies + 5 * HZ);
 }
 
 void ax25_start_t1timer(ax25_cb *ax25)
 {
-	del_timer(&ax25->t1timer);
-
-	ax25->t1timer.data     = (unsigned long)ax25;
-	ax25->t1timer.function = &ax25_t1timer_expiry;
-	ax25->t1timer.expires  = jiffies + ax25->t1;
-
-	add_timer(&ax25->t1timer);
+	mod_timer(&ax25->t1timer, jiffies + ax25->t1);
 }
 
 void ax25_start_t2timer(ax25_cb *ax25)
 {
-	del_timer(&ax25->t2timer);
-
-	ax25->t2timer.data     = (unsigned long)ax25;
-	ax25->t2timer.function = &ax25_t2timer_expiry;
-	ax25->t2timer.expires  = jiffies + ax25->t2;
-
-	add_timer(&ax25->t2timer);
+	mod_timer(&ax25->t2timer, jiffies + ax25->t2);
 }
 
 void ax25_start_t3timer(ax25_cb *ax25)
 {
-	del_timer(&ax25->t3timer);
-
-	if (ax25->t3 > 0) {
-		ax25->t3timer.data     = (unsigned long)ax25;
-		ax25->t3timer.function = &ax25_t3timer_expiry;
-		ax25->t3timer.expires  = jiffies + ax25->t3;
-
-		add_timer(&ax25->t3timer);
-	}
+	if (ax25->t3 > 0)
+		mod_timer(&ax25->t3timer, jiffies + ax25->t3);
+	else
+		del_timer(&ax25->t3timer);
 }
 
 void ax25_start_idletimer(ax25_cb *ax25)
 {
-	del_timer(&ax25->idletimer);
-
-	if (ax25->idle > 0) {
-		ax25->idletimer.data     = (unsigned long)ax25;
-		ax25->idletimer.function = &ax25_idletimer_expiry;
-		ax25->idletimer.expires  = jiffies + ax25->idle;
-
-		add_timer(&ax25->idletimer);
-	}
+	if (ax25->idle > 0)
+		mod_timer(&ax25->idletimer, jiffies + ax25->idle);
+	else
+		del_timer(&ax25->idletimer);
 }
 
 void ax25_stop_heartbeat(ax25_cb *ax25)
-- 
cgit v1.2.3


From e848b583e03306f5f9b3a66a793c37e3649e04ca Mon Sep 17 00:00:00 2001
From: Jarek Poplawski <jarkao2@gmail.com>
Date: Mon, 11 Feb 2008 21:38:32 -0800
Subject: [AX25] ax25_ds_timer: use mod_timer instead of add_timer

This patch changes current use of: init_timer(), add_timer()
and del_timer() to setup_timer() with mod_timer(), which
should be safer anyway.

Reported-by: Jann Traschewski <jann@gmx.de>
Signed-off-by: Jarek Poplawski <jarkao2@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ax25/ax25_dev.c      |  2 +-
 net/ax25/ax25_ds_timer.c | 12 ++++--------
 2 files changed, 5 insertions(+), 9 deletions(-)

(limited to 'net')

diff --git a/net/ax25/ax25_dev.c b/net/ax25/ax25_dev.c
index 528c874d982..a7a0e0c9698 100644
--- a/net/ax25/ax25_dev.c
+++ b/net/ax25/ax25_dev.c
@@ -82,7 +82,7 @@ void ax25_dev_device_up(struct net_device *dev)
 	ax25_dev->values[AX25_VALUES_DS_TIMEOUT]= AX25_DEF_DS_TIMEOUT;
 
 #if defined(CONFIG_AX25_DAMA_SLAVE) || defined(CONFIG_AX25_DAMA_MASTER)
-	init_timer(&ax25_dev->dama.slave_timer);
+	ax25_ds_setup_timer(ax25_dev);
 #endif
 
 	spin_lock_bh(&ax25_dev_lock);
diff --git a/net/ax25/ax25_ds_timer.c b/net/ax25/ax25_ds_timer.c
index c4e3b025d21..2ce79df0068 100644
--- a/net/ax25/ax25_ds_timer.c
+++ b/net/ax25/ax25_ds_timer.c
@@ -40,13 +40,10 @@ static void ax25_ds_timeout(unsigned long);
  *	1/10th of a second.
  */
 
-static void ax25_ds_add_timer(ax25_dev *ax25_dev)
+void ax25_ds_setup_timer(ax25_dev *ax25_dev)
 {
-	struct timer_list *t = &ax25_dev->dama.slave_timer;
-	t->data		= (unsigned long) ax25_dev;
-	t->function	= &ax25_ds_timeout;
-	t->expires	= jiffies + HZ;
-	add_timer(t);
+	setup_timer(&ax25_dev->dama.slave_timer, ax25_ds_timeout,
+		    (unsigned long)ax25_dev);
 }
 
 void ax25_ds_del_timer(ax25_dev *ax25_dev)
@@ -60,10 +57,9 @@ void ax25_ds_set_timer(ax25_dev *ax25_dev)
 	if (ax25_dev == NULL)		/* paranoia */
 		return;
 
-	del_timer(&ax25_dev->dama.slave_timer);
 	ax25_dev->dama.slave_timeout =
 		msecs_to_jiffies(ax25_dev->values[AX25_VALUES_DS_TIMEOUT]) / 10;
-	ax25_ds_add_timer(ax25_dev);
+	mod_timer(&ax25_dev->dama.slave_timer, jiffies + HZ);
 }
 
 /*
-- 
cgit v1.2.3


From 69cc64d8d92bf852f933e90c888dfff083bd4fc9 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Mon, 11 Feb 2008 21:45:44 -0800
Subject: [NDISC]: Fix race in generic address resolution

Frank Blaschka provided the bug report and the initial suggested fix
for this bug.  He also validated this version of this fix.

The problem is that the access to neigh->arp_queue is inconsistent, we
grab references when dropping the lock lock to call
neigh->ops->solicit() but this does not prevent other threads of
control from trying to send out that packet at the same time causing
corruptions because both code paths believe they have exclusive access
to the skb.

The best option seems to be to hold the write lock on neigh->lock
during the ->solicit() call.  I looked at all of the ndisc_ops
implementations and this seems workable.  The only case that needs
special care is the IPV4 ARP implementation of arp_solicit().  It
wants to take neigh->lock as a reader to protect the header entry in
neigh->ha during the emission of the soliciation.  We can simply
remove the read lock calls to take care of that since holding the lock
as a writer at the caller providers a superset of the protection
afforded by the existing read locking.

The rest of the ->solicit() implementations don't care whether the
neigh is locked or not.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/neighbour.c | 12 +++---------
 net/ipv4/arp.c       |  3 ---
 2 files changed, 3 insertions(+), 12 deletions(-)

(limited to 'net')

diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index a16cf1ec5e5..7bb6a9a1256 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -834,18 +834,12 @@ static void neigh_timer_handler(unsigned long arg)
 	}
 	if (neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) {
 		struct sk_buff *skb = skb_peek(&neigh->arp_queue);
-		/* keep skb alive even if arp_queue overflows */
-		if (skb)
-			skb_get(skb);
-		write_unlock(&neigh->lock);
+
 		neigh->ops->solicit(neigh, skb);
 		atomic_inc(&neigh->probes);
-		if (skb)
-			kfree_skb(skb);
-	} else {
-out:
-		write_unlock(&neigh->lock);
 	}
+out:
+	write_unlock(&neigh->lock);
 
 	if (notify)
 		neigh_update_notify(neigh);
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index 8e17f65f400..c663fa5339e 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -368,7 +368,6 @@ static void arp_solicit(struct neighbour *neigh, struct sk_buff *skb)
 		if (!(neigh->nud_state&NUD_VALID))
 			printk(KERN_DEBUG "trying to ucast probe in NUD_INVALID\n");
 		dst_ha = neigh->ha;
-		read_lock_bh(&neigh->lock);
 	} else if ((probes -= neigh->parms->app_probes) < 0) {
 #ifdef CONFIG_ARPD
 		neigh_app_ns(neigh);
@@ -378,8 +377,6 @@ static void arp_solicit(struct neighbour *neigh, struct sk_buff *skb)
 
 	arp_send(ARPOP_REQUEST, ETH_P_ARP, target, dev, saddr,
 		 dst_ha, dev->dev_addr, NULL);
-	if (dst_ha)
-		read_unlock_bh(&neigh->lock);
 }
 
 static int arp_ignore(struct in_device *in_dev, __be32 sip, __be32 tip)
-- 
cgit v1.2.3


From 28a89453b1e8de8d777ad96fa1eef27b5d1ce074 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Tue, 12 Feb 2008 18:07:27 -0800
Subject: [IPV6]: Fix IPsec datagram fragmentation

This is a long-standing bug in the IPsec IPv6 code that breaks
when we emit a IPsec tunnel-mode datagram packet.  The problem
is that the code the emits the packet assumes the IPv6 stack
will fragment it later, but the IPv6 stack assumes that whoever
is emitting the packet is going to pre-fragment the packet.

In the long term we need to fix both sides, e.g., to get the
datagram code to pre-fragment as well as to get the IPv6 stack
to fragment locally generated tunnel-mode packet.

For now this patch does the second part which should make it
work for the IPsec host case.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/ip6_output.c   | 6 +++++-
 net/ipv6/xfrm6_output.c | 2 +-
 2 files changed, 6 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 9ac6ca2521c..4e9a2fe2f12 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -621,7 +621,7 @@ static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
 	 * or if the skb it not generated by a local socket.  (This last
 	 * check should be redundant, but it's free.)
 	 */
-	if (!np || np->pmtudisc >= IPV6_PMTUDISC_DO) {
+	if (skb->local_df) {
 		skb->dev = skb->dst->dev;
 		icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev);
 		IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_FRAGFAILS);
@@ -1420,6 +1420,10 @@ int ip6_push_pending_frames(struct sock *sk)
 		tmp_skb->sk = NULL;
 	}
 
+	/* Allow local fragmentation. */
+	if (np->pmtudisc >= IPV6_PMTUDISC_DO)
+		skb->local_df = 1;
+
 	ipv6_addr_copy(final_dst, &fl->fl6_dst);
 	__skb_pull(skb, skb_network_header_len(skb));
 	if (opt && opt->opt_flen)
diff --git a/net/ipv6/xfrm6_output.c b/net/ipv6/xfrm6_output.c
index b34c58c6565..79ccfb08073 100644
--- a/net/ipv6/xfrm6_output.c
+++ b/net/ipv6/xfrm6_output.c
@@ -36,7 +36,7 @@ static int xfrm6_tunnel_check_size(struct sk_buff *skb)
 	if (mtu < IPV6_MIN_MTU)
 		mtu = IPV6_MIN_MTU;
 
-	if (skb->len > mtu) {
+	if (!skb->local_df && skb->len > mtu) {
 		skb->dev = dst->dev;
 		icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev);
 		ret = -EMSGSIZE;
-- 
cgit v1.2.3


From fee54fa517bef1de2c10a1a3e918228cc59dce90 Mon Sep 17 00:00:00 2001
From: Urs Thuermann <urs@isnogud.escape.de>
Date: Tue, 12 Feb 2008 22:03:25 -0800
Subject: [NET]: Fix comment for skb_pull_rcsum

Fix comment for skb_pull_rcsum

Signed-off-by: Urs Thuermann <urs@isnogud.escape.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/skbuff.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 4e354221ec2..40dddcc6dc3 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -2106,11 +2106,10 @@ int skb_append_datato_frags(struct sock *sk, struct sk_buff *skb,
 /**
  *	skb_pull_rcsum - pull skb and update receive checksum
  *	@skb: buffer to update
- *	@start: start of data before pull
  *	@len: length of data pulled
  *
  *	This function performs an skb_pull on the packet and updates
- *	update the CHECKSUM_COMPLETE checksum.  It should be used on
+ *	the CHECKSUM_COMPLETE checksum.  It should be used on
  *	receive path processing instead of skb_pull unless you know
  *	that the checksum difference is zero (e.g., a valid IP header)
  *	or you are setting ip_summed to CHECKSUM_NONE.
-- 
cgit v1.2.3


From 4c3a0a254e5d706d3fe01bf42261534858d05586 Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@openvz.org>
Date: Tue, 12 Feb 2008 22:15:14 -0800
Subject: [NETLABEL]: Fix lookup logic of netlbl_domhsh_search_def.

Currently, if the call to netlbl_domhsh_search succeeds the
return result will still be NULL.

Fix that, by returning the found entry (if any).

Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
Acked-by: Paul Moore <paul.moore@hp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netlabel/netlabel_domainhash.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/netlabel/netlabel_domainhash.c b/net/netlabel/netlabel_domainhash.c
index 9a8ea0195c4..fd462313471 100644
--- a/net/netlabel/netlabel_domainhash.c
+++ b/net/netlabel/netlabel_domainhash.c
@@ -150,11 +150,11 @@ static struct netlbl_dom_map *netlbl_domhsh_search_def(const char *domain)
 	entry = netlbl_domhsh_search(domain);
 	if (entry == NULL) {
 		entry = rcu_dereference(netlbl_domhsh_def);
-		if (entry != NULL && entry->valid)
-			return entry;
+		if (entry != NULL && !entry->valid)
+			entry = NULL;
 	}
 
-	return NULL;
+	return entry;
 }
 
 /*
-- 
cgit v1.2.3


From 910d6c320cac65c81d66e8fd30dca167092722eb Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@openvz.org>
Date: Tue, 12 Feb 2008 22:16:33 -0800
Subject: [GENETLINK]: Relax dances with genl_lock.

The genl_unregister_family() calls the genl_unregister_mc_groups(),
which takes and releases the genl_lock and then locks and releases
this lock itself.

Relax this behavior, all the more so the genl_unregister_mc_groups()
is called from genl_unregister_family() only.

Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netlink/genetlink.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c
index 150579a2146..d16929c9b4b 100644
--- a/net/netlink/genetlink.c
+++ b/net/netlink/genetlink.c
@@ -230,10 +230,8 @@ static void genl_unregister_mc_groups(struct genl_family *family)
 {
 	struct genl_multicast_group *grp, *tmp;
 
-	genl_lock();
 	list_for_each_entry_safe(grp, tmp, &family->mcast_groups, list)
 		__genl_unregister_mc_group(family, grp);
-	genl_unlock();
 }
 
 /**
@@ -396,10 +394,10 @@ int genl_unregister_family(struct genl_family *family)
 {
 	struct genl_family *rc;
 
-	genl_unregister_mc_groups(family);
-
 	genl_lock();
 
+	genl_unregister_mc_groups(family);
+
 	list_for_each_entry(rc, genl_family_chain(family->id), family_list) {
 		if (family->id != rc->id || strcmp(rc->name, family->name))
 			continue;
-- 
cgit v1.2.3


From 94de7feb2dee6d0039ecbe98ae8b63bbb63808b6 Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@openvz.org>
Date: Tue, 12 Feb 2008 22:35:37 -0800
Subject: [NETLABEL]: Compilation for CONFIG_AUDIT=n case.

The audit_log_start() will expand into an empty do { } while (0)
construction and the audit_ctx becomes unused.

The solution: push current->audit_context into audit_log_start()
directly, since it is not required in any other place in the
calling function.

Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netlabel/netlabel_user.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/netlabel/netlabel_user.c b/net/netlabel/netlabel_user.c
index 85a96a3fdda..023fc8fe840 100644
--- a/net/netlabel/netlabel_user.c
+++ b/net/netlabel/netlabel_user.c
@@ -96,7 +96,6 @@ int netlbl_netlink_init(void)
 struct audit_buffer *netlbl_audit_start_common(int type,
 					       struct netlbl_audit *audit_info)
 {
-	struct audit_context *audit_ctx = current->audit_context;
 	struct audit_buffer *audit_buf;
 	char *secctx;
 	u32 secctx_len;
@@ -104,7 +103,7 @@ struct audit_buffer *netlbl_audit_start_common(int type,
 	if (audit_enabled == 0)
 		return NULL;
 
-	audit_buf = audit_log_start(audit_ctx, GFP_ATOMIC, type);
+	audit_buf = audit_log_start(current->audit_context, GFP_ATOMIC, type);
 	if (audit_buf == NULL)
 		return NULL;
 
-- 
cgit v1.2.3


From 56628b1d8964eb7ac924154d60b5d874bfb2b1e8 Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@openvz.org>
Date: Tue, 12 Feb 2008 22:37:19 -0800
Subject: [NETLABEL]: Don't produce unused variables when IPv6 is off.

Some code declares variables on the stack, but uses them
under #ifdef CONFIG_IPV6, so thay become unused when ipv6
is off. Fortunately, they are used in a switch's case
branches, so the fix is rather simple.

Is it OK from coding style POV to add braces inside "cases",
or should I better avoid such style and rework the patch?

Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netlabel/netlabel_unlabeled.c | 28 ++++++++++++++++++----------
 1 file changed, 18 insertions(+), 10 deletions(-)

(limited to 'net')

diff --git a/net/netlabel/netlabel_unlabeled.c b/net/netlabel/netlabel_unlabeled.c
index 42e81fd8cc4..3587874d64e 100644
--- a/net/netlabel/netlabel_unlabeled.c
+++ b/net/netlabel/netlabel_unlabeled.c
@@ -617,8 +617,6 @@ static int netlbl_unlhsh_add(struct net *net,
 	int ifindex;
 	struct net_device *dev;
 	struct netlbl_unlhsh_iface *iface;
-	struct in_addr *addr4, *mask4;
-	struct in6_addr *addr6, *mask6;
 	struct audit_buffer *audit_buf = NULL;
 	char *secctx = NULL;
 	u32 secctx_len;
@@ -651,7 +649,9 @@ static int netlbl_unlhsh_add(struct net *net,
 	audit_buf = netlbl_audit_start_common(AUDIT_MAC_UNLBL_STCADD,
 					      audit_info);
 	switch (addr_len) {
-	case sizeof(struct in_addr):
+	case sizeof(struct in_addr): {
+		struct in_addr *addr4, *mask4;
+
 		addr4 = (struct in_addr *)addr;
 		mask4 = (struct in_addr *)mask;
 		ret_val = netlbl_unlhsh_add_addr4(iface, addr4, mask4, secid);
@@ -661,8 +661,11 @@ static int netlbl_unlhsh_add(struct net *net,
 						   addr4->s_addr,
 						   mask4->s_addr);
 		break;
+	}
 #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
-	case sizeof(struct in6_addr):
+	case sizeof(struct in6_addr): {
+		struct in6_addr *addr6, *mask6;
+
 		addr6 = (struct in6_addr *)addr;
 		mask6 = (struct in6_addr *)mask;
 		ret_val = netlbl_unlhsh_add_addr6(iface, addr6, mask6, secid);
@@ -671,6 +674,7 @@ static int netlbl_unlhsh_add(struct net *net,
 						   dev_name,
 						   addr6, mask6);
 		break;
+	}
 #endif /* IPv6 */
 	default:
 		ret_val = -EINVAL;
@@ -1741,10 +1745,6 @@ int netlbl_unlabel_getattr(const struct sk_buff *skb,
 			   u16 family,
 			   struct netlbl_lsm_secattr *secattr)
 {
-	struct iphdr *hdr4;
-	struct ipv6hdr *hdr6;
-	struct netlbl_unlhsh_addr4 *addr4;
-	struct netlbl_unlhsh_addr6 *addr6;
 	struct netlbl_unlhsh_iface *iface;
 
 	rcu_read_lock();
@@ -1752,21 +1752,29 @@ int netlbl_unlabel_getattr(const struct sk_buff *skb,
 	if (iface == NULL)
 		goto unlabel_getattr_nolabel;
 	switch (family) {
-	case PF_INET:
+	case PF_INET: {
+		struct iphdr *hdr4;
+		struct netlbl_unlhsh_addr4 *addr4;
+
 		hdr4 = ip_hdr(skb);
 		addr4 = netlbl_unlhsh_search_addr4(hdr4->saddr, iface);
 		if (addr4 == NULL)
 			goto unlabel_getattr_nolabel;
 		secattr->attr.secid = addr4->secid;
 		break;
+	}
 #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
-	case PF_INET6:
+	case PF_INET6: {
+		struct ipv6hdr *hdr6;
+		struct netlbl_unlhsh_addr6 *addr6;
+
 		hdr6 = ipv6_hdr(skb);
 		addr6 = netlbl_unlhsh_search_addr6(&hdr6->saddr, iface);
 		if (addr6 == NULL)
 			goto unlabel_getattr_nolabel;
 		secattr->attr.secid = addr6->secid;
 		break;
+	}
 #endif /* IPv6 */
 	default:
 		goto unlabel_getattr_nolabel;
-- 
cgit v1.2.3


From 370125f0a48a2584a2506fd567d690df6d87cf2c Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@openvz.org>
Date: Tue, 12 Feb 2008 22:38:06 -0800
Subject: [NETLABLE]: Hide netlbl_unlabel_audit_addr6 under ifdef CONFIG_IPV6.

This one is called from under this config only, so move
it in the same place.

Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netlabel/netlabel_unlabeled.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'net')

diff --git a/net/netlabel/netlabel_unlabeled.c b/net/netlabel/netlabel_unlabeled.c
index 3587874d64e..3e745b72fde 100644
--- a/net/netlabel/netlabel_unlabeled.c
+++ b/net/netlabel/netlabel_unlabeled.c
@@ -180,6 +180,7 @@ static void netlbl_unlabel_audit_addr4(struct audit_buffer *audit_buf,
 	}
 }
 
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
 /**
  * netlbl_unlabel_audit_addr6 - Audit an IPv6 address
  * @audit_buf: audit buffer
@@ -213,6 +214,7 @@ static void netlbl_unlabel_audit_addr6(struct audit_buffer *audit_buf,
 		audit_log_format(audit_buf, " src_prefixlen=%d", mask_len);
 	}
 }
+#endif /* IPv6 */
 
 /*
  * Unlabeled Connection Hash Table Functions
-- 
cgit v1.2.3


From 45b503548210fe6f23e92b856421c2a3f05fd034 Mon Sep 17 00:00:00 2001
From: Laszlo Attila Toth <panther@balabit.hu>
Date: Tue, 12 Feb 2008 22:42:09 -0800
Subject: [RTNETLINK]: Send a single notification on device state changes.

In do_setlink() a single notification is sent at the end of the
function if any modification occured. If the address has been changed,
another notification is sent.

Both of them is required because originally only the NETDEV_CHANGEADDR
notification was sent and although device state change implies address
change, some programs may expect the original notification. It remains
for compatibity.

If set_operstate() is called from do_setlink(), it doesn't send a
notification, only if it is called from rtnl_create_link() as earlier.

Signed-off-by: Laszlo Attila Toth <panther@balabit.hu>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/rtnetlink.c | 36 ++++++++++++++++++++++++++----------
 1 file changed, 26 insertions(+), 10 deletions(-)

(limited to 'net')

diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 61ac8d06292..ecb02afd52d 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -504,7 +504,7 @@ int rtnl_put_cacheinfo(struct sk_buff *skb, struct dst_entry *dst, u32 id,
 
 EXPORT_SYMBOL_GPL(rtnl_put_cacheinfo);
 
-static void set_operstate(struct net_device *dev, unsigned char transition)
+static int set_operstate(struct net_device *dev, unsigned char transition, bool send_notification)
 {
 	unsigned char operstate = dev->operstate;
 
@@ -527,8 +527,12 @@ static void set_operstate(struct net_device *dev, unsigned char transition)
 		write_lock_bh(&dev_base_lock);
 		dev->operstate = operstate;
 		write_unlock_bh(&dev_base_lock);
-		netdev_state_change(dev);
-	}
+
+		if (send_notification)
+			netdev_state_change(dev);
+		return 1;
+	} else
+		return 0;
 }
 
 static void copy_rtnl_link_stats(struct rtnl_link_stats *a,
@@ -822,6 +826,7 @@ static int do_setlink(struct net_device *dev, struct ifinfomsg *ifm,
 	if (tb[IFLA_BROADCAST]) {
 		nla_memcpy(dev->broadcast, tb[IFLA_BROADCAST], dev->addr_len);
 		send_addr_notify = 1;
+		modified = 1;
 	}
 
 	if (ifm->ifi_flags || ifm->ifi_change) {
@@ -834,16 +839,23 @@ static int do_setlink(struct net_device *dev, struct ifinfomsg *ifm,
 		dev_change_flags(dev, flags);
 	}
 
-	if (tb[IFLA_TXQLEN])
-		dev->tx_queue_len = nla_get_u32(tb[IFLA_TXQLEN]);
+	if (tb[IFLA_TXQLEN]) {
+		if (dev->tx_queue_len != nla_get_u32(tb[IFLA_TXQLEN])) {
+			dev->tx_queue_len = nla_get_u32(tb[IFLA_TXQLEN]);
+			modified = 1;
+		}
+	}
 
 	if (tb[IFLA_OPERSTATE])
-		set_operstate(dev, nla_get_u8(tb[IFLA_OPERSTATE]));
+		modified |= set_operstate(dev, nla_get_u8(tb[IFLA_OPERSTATE]), false);
 
 	if (tb[IFLA_LINKMODE]) {
-		write_lock_bh(&dev_base_lock);
-		dev->link_mode = nla_get_u8(tb[IFLA_LINKMODE]);
-		write_unlock_bh(&dev_base_lock);
+		if (dev->link_mode != nla_get_u8(tb[IFLA_LINKMODE])) {
+			write_lock_bh(&dev_base_lock);
+			dev->link_mode = nla_get_u8(tb[IFLA_LINKMODE]);
+			write_lock_bh(&dev_base_lock);
+			modified = 1;
+		}
 	}
 
 	err = 0;
@@ -857,6 +869,10 @@ errout:
 
 	if (send_addr_notify)
 		call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
+
+	if (modified)
+		netdev_state_change(dev);
+
 	return err;
 }
 
@@ -974,7 +990,7 @@ struct net_device *rtnl_create_link(struct net *net, char *ifname,
 	if (tb[IFLA_TXQLEN])
 		dev->tx_queue_len = nla_get_u32(tb[IFLA_TXQLEN]);
 	if (tb[IFLA_OPERSTATE])
-		set_operstate(dev, nla_get_u8(tb[IFLA_OPERSTATE]));
+		set_operstate(dev, nla_get_u8(tb[IFLA_OPERSTATE]), true);
 	if (tb[IFLA_LINKMODE])
 		dev->link_mode = nla_get_u8(tb[IFLA_LINKMODE]);
 
-- 
cgit v1.2.3


From b318e0e4ef4e85812c25afa19f75addccc834cd4 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Tue, 12 Feb 2008 22:50:35 -0800
Subject: [IPSEC]: Fix bogus usage of u64 on input sequence number

Al Viro spotted a bogus use of u64 on the input sequence number which
is big-endian.  This patch fixes it by giving the input sequence number
its own member in the xfrm_skb_cb structure.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/ah4.c         | 2 +-
 net/ipv4/esp4.c        | 5 +++--
 net/ipv6/ah6.c         | 2 +-
 net/ipv6/esp6.c        | 5 +++--
 net/xfrm/xfrm_input.c  | 4 ++--
 net/xfrm/xfrm_output.c | 2 +-
 6 files changed, 11 insertions(+), 9 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/ah4.c b/net/ipv4/ah4.c
index 9d4555ec0b5..8219b7e0968 100644
--- a/net/ipv4/ah4.c
+++ b/net/ipv4/ah4.c
@@ -96,7 +96,7 @@ static int ah_output(struct xfrm_state *x, struct sk_buff *skb)
 
 	ah->reserved = 0;
 	ah->spi = x->id.spi;
-	ah->seq_no = htonl(XFRM_SKB_CB(skb)->seq);
+	ah->seq_no = htonl(XFRM_SKB_CB(skb)->seq.output);
 
 	spin_lock_bh(&x->lock);
 	err = ah_mac_digest(ahp, skb, ah->auth_data);
diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c
index 258d17631b4..091e6709f83 100644
--- a/net/ipv4/esp4.c
+++ b/net/ipv4/esp4.c
@@ -199,7 +199,7 @@ static int esp_output(struct xfrm_state *x, struct sk_buff *skb)
 	}
 
 	esph->spi = x->id.spi;
-	esph->seq_no = htonl(XFRM_SKB_CB(skb)->seq);
+	esph->seq_no = htonl(XFRM_SKB_CB(skb)->seq.output);
 
 	sg_init_table(sg, nfrags);
 	skb_to_sgvec(skb, sg,
@@ -210,7 +210,8 @@ static int esp_output(struct xfrm_state *x, struct sk_buff *skb)
 	aead_givcrypt_set_callback(req, 0, esp_output_done, skb);
 	aead_givcrypt_set_crypt(req, sg, sg, clen, iv);
 	aead_givcrypt_set_assoc(req, asg, sizeof(*esph));
-	aead_givcrypt_set_giv(req, esph->enc_data, XFRM_SKB_CB(skb)->seq);
+	aead_givcrypt_set_giv(req, esph->enc_data,
+			      XFRM_SKB_CB(skb)->seq.output);
 
 	ESP_SKB_CB(skb)->tmp = tmp;
 	err = crypto_aead_givencrypt(req);
diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c
index 379c8e04c36..2ff0c8233e4 100644
--- a/net/ipv6/ah6.c
+++ b/net/ipv6/ah6.c
@@ -283,7 +283,7 @@ static int ah6_output(struct xfrm_state *x, struct sk_buff *skb)
 
 	ah->reserved = 0;
 	ah->spi = x->id.spi;
-	ah->seq_no = htonl(XFRM_SKB_CB(skb)->seq);
+	ah->seq_no = htonl(XFRM_SKB_CB(skb)->seq.output);
 
 	spin_lock_bh(&x->lock);
 	err = ah_mac_digest(ahp, skb, ah->auth_data);
diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c
index 8e0f1428c71..0ec1402320e 100644
--- a/net/ipv6/esp6.c
+++ b/net/ipv6/esp6.c
@@ -188,7 +188,7 @@ static int esp6_output(struct xfrm_state *x, struct sk_buff *skb)
 	*skb_mac_header(skb) = IPPROTO_ESP;
 
 	esph->spi = x->id.spi;
-	esph->seq_no = htonl(XFRM_SKB_CB(skb)->seq);
+	esph->seq_no = htonl(XFRM_SKB_CB(skb)->seq.output);
 
 	sg_init_table(sg, nfrags);
 	skb_to_sgvec(skb, sg,
@@ -199,7 +199,8 @@ static int esp6_output(struct xfrm_state *x, struct sk_buff *skb)
 	aead_givcrypt_set_callback(req, 0, esp_output_done, skb);
 	aead_givcrypt_set_crypt(req, sg, sg, clen, iv);
 	aead_givcrypt_set_assoc(req, asg, sizeof(*esph));
-	aead_givcrypt_set_giv(req, esph->enc_data, XFRM_SKB_CB(skb)->seq);
+	aead_givcrypt_set_giv(req, esph->enc_data,
+			      XFRM_SKB_CB(skb)->seq.output);
 
 	ESP_SKB_CB(skb)->tmp = tmp;
 	err = crypto_aead_givencrypt(req);
diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c
index 4d6ebc633a9..62188c6a06d 100644
--- a/net/xfrm/xfrm_input.c
+++ b/net/xfrm/xfrm_input.c
@@ -109,7 +109,7 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type)
 	if (encap_type < 0) {
 		async = 1;
 		x = xfrm_input_state(skb);
-		seq = XFRM_SKB_CB(skb)->seq;
+		seq = XFRM_SKB_CB(skb)->seq.input;
 		goto resume;
 	}
 
@@ -175,7 +175,7 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type)
 
 		spin_unlock(&x->lock);
 
-		XFRM_SKB_CB(skb)->seq = seq;
+		XFRM_SKB_CB(skb)->seq.input = seq;
 
 		nexthdr = x->type->input(x, skb);
 
diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c
index fc690368325..569d377932c 100644
--- a/net/xfrm/xfrm_output.c
+++ b/net/xfrm/xfrm_output.c
@@ -62,7 +62,7 @@ static int xfrm_output_one(struct sk_buff *skb, int err)
 		}
 
 		if (x->type->flags & XFRM_TYPE_REPLAY_PROT) {
-			XFRM_SKB_CB(skb)->seq = ++x->replay.oseq;
+			XFRM_SKB_CB(skb)->seq.output = ++x->replay.oseq;
 			if (unlikely(x->replay.oseq == 0)) {
 				XFRM_INC_STATS(LINUX_MIB_XFRMOUTSTATESEQERROR);
 				x->replay.oseq--;
-- 
cgit v1.2.3


From d8b2a4d21e0b37b9669b202867bfef19f68f786a Mon Sep 17 00:00:00 2001
From: Matti Linnanvuori <mattilinnanvuori@yahoo.com>
Date: Tue, 12 Feb 2008 23:10:11 -0800
Subject: [NET]: Fix race in dev_close(). (Bug 9750)

There is a race in Linux kernel file net/core/dev.c, function dev_close.
The function calls function dev_deactivate, which calls function
dev_watchdog_down that deletes the watchdog timer. However, after that, a
driver can call netif_carrier_ok, which calls function
__netdev_watchdog_up that can add the watchdog timer again. Function
unregister_netdevice calls function dev_shutdown that traps the bug
!timer_pending(&dev->watchdog_timer). Moving dev_deactivate after
netif_running() has been cleared prevents function netif_carrier_on
from calling __netdev_watchdog_up and adding the watchdog timer again.

Signed-off-by: Matti Linnanvuori <mattilinnanvuori@yahoo.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/core/dev.c b/net/core/dev.c
index 9549417250b..6cfc1238c4a 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1071,8 +1071,6 @@ int dev_close(struct net_device *dev)
 	 */
 	call_netdevice_notifiers(NETDEV_GOING_DOWN, dev);
 
-	dev_deactivate(dev);
-
 	clear_bit(__LINK_STATE_START, &dev->state);
 
 	/* Synchronize to scheduled poll. We cannot touch poll list,
@@ -1083,6 +1081,8 @@ int dev_close(struct net_device *dev)
 	 */
 	smp_mb__after_clear_bit(); /* Commit netif_running(). */
 
+	dev_deactivate(dev);
+
 	/*
 	 *	Call the device specific close. This cannot fail.
 	 *	Only if device is UP
-- 
cgit v1.2.3


From 74da4d34e4a452c3f448fe659fa9f4ba1fbe507e Mon Sep 17 00:00:00 2001
From: Adrian Bunk <bunk@kernel.org>
Date: Wed, 13 Feb 2008 17:39:34 -0800
Subject: [INET]: Unexport __inet_hash_connect

This patch removes the unused EXPORT_SYMBOL_GPL(__inet_hash_connect).

Signed-off-by: Adrian Bunk <bunk@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/inet_hashtables.c | 1 -
 1 file changed, 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index 9cac6c034ab..e6a007260ce 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -494,7 +494,6 @@ out:
 		return ret;
 	}
 }
-EXPORT_SYMBOL_GPL(__inet_hash_connect);
 
 /*
  * Bind a port for a connect operation and hash it.
-- 
cgit v1.2.3


From 324b57619bdd151abbab73a48707c17cfb0e9ba4 Mon Sep 17 00:00:00 2001
From: Adrian Bunk <bunk@kernel.org>
Date: Wed, 13 Feb 2008 17:40:25 -0800
Subject: [INET]: Unexport inet_listen_wlock

This patch removes the no longer used EXPORT_SYMBOL(inet_listen_wlock).

Signed-off-by: Adrian Bunk <bunk@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/inet_hashtables.c | 2 --
 1 file changed, 2 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index e6a007260ce..1aba606f6bb 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -120,8 +120,6 @@ void inet_listen_wlock(struct inet_hashinfo *hashinfo)
 	}
 }
 
-EXPORT_SYMBOL(inet_listen_wlock);
-
 /*
  * Don't inline this cruft. Here are some nice properties to exploit here. The
  * BSD API does not allow a listening sock to specify the remote port nor the
-- 
cgit v1.2.3


From f51f5ec6909fad9ddfcaa962377f7892d7918302 Mon Sep 17 00:00:00 2001
From: Adrian Bunk <bunk@kernel.org>
Date: Wed, 13 Feb 2008 17:41:39 -0800
Subject: [NETFILTER]: make secmark_tg_destroy() static

This patch makes the needlessly global secmark_tg_destroy() static.

Signed-off-by: Adrian Bunk <bunk@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netfilter/xt_SECMARK.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/netfilter/xt_SECMARK.c b/net/netfilter/xt_SECMARK.c
index 7708e2084ce..c0284856ccd 100644
--- a/net/netfilter/xt_SECMARK.c
+++ b/net/netfilter/xt_SECMARK.c
@@ -111,7 +111,7 @@ secmark_tg_check(const char *tablename, const void *entry,
 	return true;
 }
 
-void secmark_tg_destroy(const struct xt_target *target, void *targinfo)
+static void secmark_tg_destroy(const struct xt_target *target, void *targinfo)
 {
 	switch (mode) {
 	case SECMARK_MODE_SEL:
-- 
cgit v1.2.3


From 0f4bda005fd685f7cbb2ad47b7bab1b155df2b86 Mon Sep 17 00:00:00 2001
From: Paul Mundt <lethal@linux-sh.org>
Date: Thu, 14 Feb 2008 14:48:45 -0800
Subject: net: xfrm statistics depend on INET

net/built-in.o: In function `xfrm_policy_init':
/home/pmundt/devel/git/sh-2.6.25/net/xfrm/xfrm_policy.c:2338: undefined reference to `snmp_mib_init'

snmp_mib_init() is only built in if CONFIG_INET is set.

Signed-off-by: Paul Mundt <lethal@linux-sh.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/xfrm/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/xfrm/Kconfig b/net/xfrm/Kconfig
index 8f9dbec319b..9201ef8ad90 100644
--- a/net/xfrm/Kconfig
+++ b/net/xfrm/Kconfig
@@ -38,7 +38,7 @@ config XFRM_MIGRATE
 
 config XFRM_STATISTICS
 	bool "Transformation statistics (EXPERIMENTAL)"
-	depends on XFRM && PROC_FS && EXPERIMENTAL
+	depends on INET && XFRM && PROC_FS && EXPERIMENTAL
 	---help---
 	  This statistics is not a SNMP/MIB specification but shows
 	  statistics about transformation error (or almost error) factor
-- 
cgit v1.2.3


From d0c1fd7a8f4cadb95b093d2600ad627f432c5edb Mon Sep 17 00:00:00 2001
From: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
Date: Thu, 14 Feb 2008 14:50:21 -0800
Subject: [NETFILTER] nf_conntrack_proto_tcp.c: Mistyped state corrected.

Signed-off-by: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netfilter/nf_conntrack_proto_tcp.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c
index 202d7fa0948..62567959b66 100644
--- a/net/netfilter/nf_conntrack_proto_tcp.c
+++ b/net/netfilter/nf_conntrack_proto_tcp.c
@@ -945,7 +945,7 @@ static int tcp_packet(struct nf_conn *ct,
 
 	ct->proto.tcp.state = new_state;
 	if (old_state != new_state
-	    && new_state == TCP_CONNTRACK_CLOSE)
+	    && new_state == TCP_CONNTRACK_FIN_WAIT)
 		ct->proto.tcp.seen[dir].flags |= IP_CT_TCP_FLAG_CLOSE_INIT;
 	timeout = ct->proto.tcp.retrans >= nf_ct_tcp_max_retrans
 		  && tcp_timeouts[new_state] > nf_ct_tcp_timeout_max_retrans
-- 
cgit v1.2.3


From a4d6b8af1e92daa872f55d06415b76c35f44d8bd Mon Sep 17 00:00:00 2001
From: Kazunori MIYAZAWA <kazunori@miyazawa.org>
Date: Thu, 14 Feb 2008 14:51:38 -0800
Subject: [AF_KEY]: Fix bug in spdadd

This patch fix a BUG when adding spds which have same selector.

Signed-off-by: Kazunori MIYAZAWA <kazunori@miyazawa.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/key/af_key.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net')

diff --git a/net/key/af_key.c b/net/key/af_key.c
index b3ac85e808a..1c853927810 100644
--- a/net/key/af_key.c
+++ b/net/key/af_key.c
@@ -2291,6 +2291,7 @@ static int pfkey_spdadd(struct sock *sk, struct sk_buff *skb, struct sadb_msg *h
 	return 0;
 
 out:
+	xp->dead = 1;
 	xfrm_policy_destroy(xp);
 	return err;
 }
-- 
cgit v1.2.3


From 073a371987f9a9806a85329eed51dca1fc52a7a0 Mon Sep 17 00:00:00 2001
From: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Date: Thu, 14 Feb 2008 14:52:38 -0800
Subject: [XFRM]: Avoid bogus BUG() when throwing new policy away.

From: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>

When we destory a new policy entry, we need to tell
xfrm_policy_destroy() explicitly that the entry is not
alive yet.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/xfrm/xfrm_user.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net')

diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index 78338079b7f..f971ca5645f 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -1105,6 +1105,7 @@ static struct xfrm_policy *xfrm_policy_construct(struct xfrm_userpolicy_info *p,
 	return xp;
  error:
 	*errp = err;
+	xp->dead = 1;
 	xfrm_policy_destroy(xp);
 	return NULL;
 }
-- 
cgit v1.2.3


From b5c15fc004ac83b7ad280acbe0fd4bbed7e2c8d4 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Thu, 14 Feb 2008 23:49:37 -0800
Subject: [IPV6]: Fix reversed local_df test in ip6_fragment

I managed to reverse the local_df test when forward-porting this
patch so it actually makes things worse by never fragmenting at
all.

Thanks to David Stevens for testing and reporting this bug.

Bill Fink pointed out that the local_df setting is also the wrong
way around.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/ip6_output.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 4e9a2fe2f12..8b67ca07467 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -621,7 +621,7 @@ static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
 	 * or if the skb it not generated by a local socket.  (This last
 	 * check should be redundant, but it's free.)
 	 */
-	if (skb->local_df) {
+	if (!skb->local_df) {
 		skb->dev = skb->dst->dev;
 		icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev);
 		IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_FRAGFAILS);
@@ -1421,7 +1421,7 @@ int ip6_push_pending_frames(struct sock *sk)
 	}
 
 	/* Allow local fragmentation. */
-	if (np->pmtudisc >= IPV6_PMTUDISC_DO)
+	if (np->pmtudisc < IPV6_PMTUDISC_DO)
 		skb->local_df = 1;
 
 	ipv6_addr_copy(final_dst, &fl->fl6_dst);
-- 
cgit v1.2.3


From 997b37da1515c1620692521786a74af271664eb7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?R=C3=A9mi=20Denis-Courmont?= <rdenis@simphalempin.com>
Date: Fri, 15 Feb 2008 02:35:45 -0800
Subject: [NET]: Make sure sockets implement splice_read
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Fixes a segmentation fault when trying to splice from a non-TCP socket.

Signed-off-by: Rémi Denis-Courmont <rdenis@simphalempin.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/socket.c | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'net')

diff --git a/net/socket.c b/net/socket.c
index 7651de00850..b6d35cd72a5 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -701,6 +701,9 @@ static ssize_t sock_splice_read(struct file *file, loff_t *ppos,
 {
 	struct socket *sock = file->private_data;
 
+	if (unlikely(!sock->ops->splice_read))
+		return -EINVAL;
+
 	return sock->ops->splice_read(sock, ppos, pipe, len, flags);
 }
 
-- 
cgit v1.2.3