From e4f8b5d4edc1edb0709531bd1a342655d5e8b98e Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Mon, 11 Feb 2008 17:50:30 -0800 Subject: [IPV4]: Remove IP_TOS setting privilege checks. Various RFCs have all sorts of things to say about the CS field of the DSCP value. In particular they try to make the distinction between values that should be used by "user applications" and things like routing daemons. This seems to have influenced the CAP_NET_ADMIN check which exists for IP_TOS socket option settings, but in fact it has an off-by-one error so it wasn't allowing CS5 which is meant for "user applications" as well. Further adding to the inconsistency and brokenness here, IPV6 does not validate the DSCP values specified for the IPV6_TCLASS socket option. The real actual uses of these TOS values are system specific in the final analysis, and these RFC recommendations are just that, "a recommendation". In fact the standards very purposefully use "SHOULD" and "SHOULD NOT" when describing how these values can be used. In the final analysis the only clean way to provide consistency here is to remove the CAP_NET_ADMIN check. The alternatives just don't work out: 1) If we add the CAP_NET_ADMIN check to ipv6, this can break existing setups. 2) If we just fix the off-by-one error in the class comparison in IPV4, certain DSCP values can be used in IPV6 but not IPV4 by default. So people will just ask for a sysctl asking to override that. I checked several other freely available kernel trees and they do not make any privilege checks in this area like we do. For the BSD stacks, this goes back all the way to Stevens Volume 2 and beyond. Signed-off-by: David S. Miller --- net/ipv4/ip_sockglue.c | 5 ----- 1 file changed, 5 deletions(-) (limited to 'net') diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index 754b0a5bbfe..de0572c8885 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -514,11 +514,6 @@ static int do_ip_setsockopt(struct sock *sk, int level, val &= ~3; val |= inet->tos & 3; } - if (IPTOS_PREC(val) >= IPTOS_PREC_CRITIC_ECP && - !capable(CAP_NET_ADMIN)) { - err = -EPERM; - break; - } if (inet->tos != val) { inet->tos = val; sk->sk_priority = rt_tos2priority(val); -- cgit v1.2.3 From ec28cf738d899e9d0652108e1986101771aacb2e Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Mon, 11 Feb 2008 21:12:49 -0800 Subject: fib_trie: handle empty tree This fixes possible problems when trie_firstleaf() returns NULL to trie_leafindex(). Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- net/ipv4/fib_trie.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index f5fba3f71c0..2d895274b7f 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -1762,11 +1762,9 @@ static struct leaf *trie_leafindex(struct trie *t, int index) { struct leaf *l = trie_firstleaf(t); - while (index-- > 0) { + while (l && index-- > 0) l = trie_nextleaf(l); - if (!l) - break; - } + return l; } -- cgit v1.2.3 From 8315f5d80a90247bf92232f92ca49933ac49327b Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Mon, 11 Feb 2008 21:14:39 -0800 Subject: fib_trie: /proc/net/route performance improvement Use key/offset caching to change /proc/net/route (use by iputils route) from O(n^2) to O(n). This improves performance from 30sec with 160,000 routes to 1sec. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- net/ipv4/fib_trie.c | 93 ++++++++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 82 insertions(+), 11 deletions(-) (limited to 'net') diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index 2d895274b7f..1ff446d0fa8 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -2459,6 +2459,84 @@ static const struct file_operations fib_trie_fops = { .release = seq_release_net, }; +struct fib_route_iter { + struct seq_net_private p; + struct trie *main_trie; + loff_t pos; + t_key key; +}; + +static struct leaf *fib_route_get_idx(struct fib_route_iter *iter, loff_t pos) +{ + struct leaf *l = NULL; + struct trie *t = iter->main_trie; + + /* use cache location of last found key */ + if (iter->pos > 0 && pos >= iter->pos && (l = fib_find_node(t, iter->key))) + pos -= iter->pos; + else { + iter->pos = 0; + l = trie_firstleaf(t); + } + + while (l && pos-- > 0) { + iter->pos++; + l = trie_nextleaf(l); + } + + if (l) + iter->key = pos; /* remember it */ + else + iter->pos = 0; /* forget it */ + + return l; +} + +static void *fib_route_seq_start(struct seq_file *seq, loff_t *pos) + __acquires(RCU) +{ + struct fib_route_iter *iter = seq->private; + struct fib_table *tb; + + rcu_read_lock(); + tb = fib_get_table(iter->p.net, RT_TABLE_MAIN); + if (!tb) + return NULL; + + iter->main_trie = (struct trie *) tb->tb_data; + if (*pos == 0) + return SEQ_START_TOKEN; + else + return fib_route_get_idx(iter, *pos - 1); +} + +static void *fib_route_seq_next(struct seq_file *seq, void *v, loff_t *pos) +{ + struct fib_route_iter *iter = seq->private; + struct leaf *l = v; + + ++*pos; + if (v == SEQ_START_TOKEN) { + iter->pos = 0; + l = trie_firstleaf(iter->main_trie); + } else { + iter->pos++; + l = trie_nextleaf(l); + } + + if (l) + iter->key = l->key; + else + iter->pos = 0; + return l; +} + +static void fib_route_seq_stop(struct seq_file *seq, void *v) + __releases(RCU) +{ + rcu_read_unlock(); +} + static unsigned fib_flag_trans(int type, __be32 mask, const struct fib_info *fi) { static unsigned type2flags[RTN_MAX + 1] = { @@ -2482,7 +2560,6 @@ static unsigned fib_flag_trans(int type, __be32 mask, const struct fib_info *fi) */ static int fib_route_seq_show(struct seq_file *seq, void *v) { - const struct fib_trie_iter *iter = seq->private; struct leaf *l = v; struct leaf_info *li; struct hlist_node *node; @@ -2494,12 +2571,6 @@ static int fib_route_seq_show(struct seq_file *seq, void *v) return 0; } - if (iter->trie == iter->trie_local) - return 0; - - if (IS_TNODE(l)) - return 0; - hlist_for_each_entry_rcu(li, node, &l->list, hlist) { struct fib_alias *fa; __be32 mask, prefix; @@ -2542,16 +2613,16 @@ static int fib_route_seq_show(struct seq_file *seq, void *v) } static const struct seq_operations fib_route_seq_ops = { - .start = fib_trie_seq_start, - .next = fib_trie_seq_next, - .stop = fib_trie_seq_stop, + .start = fib_route_seq_start, + .next = fib_route_seq_next, + .stop = fib_route_seq_stop, .show = fib_route_seq_show, }; static int fib_route_seq_open(struct inode *inode, struct file *file) { return seq_open_net(inode, file, &fib_route_seq_ops, - sizeof(struct fib_trie_iter)); + sizeof(struct fib_route_iter)); } static const struct file_operations fib_route_fops = { -- cgit v1.2.3 From 1105b5d1d44e6f00e31422dfcb0139bc8ae966a9 Mon Sep 17 00:00:00 2001 From: Jarek Poplawski Date: Mon, 11 Feb 2008 21:24:56 -0800 Subject: [AX25] af_ax25: remove sock lock in ax25_info_show() This lockdep warning: > ======================================================= > [ INFO: possible circular locking dependency detected ] > 2.6.24 #3 > ------------------------------------------------------- > swapper/0 is trying to acquire lock: > (ax25_list_lock){-+..}, at: [] ax25_destroy_socket+0x171/0x1f0 [ax25] > > but task is already holding lock: > (slock-AF_AX25){-+..}, at: [] ax25_std_heartbeat_expiry+0x1c/0xe0 [ax25] > > which lock already depends on the new lock. ... shows that ax25_list_lock and slock-AF_AX25 are taken in different order: ax25_info_show() takes slock (bh_lock_sock(ax25->sk)) while ax25_list_lock is held, so reversely to other functions. To fix this the sock lock should be moved to ax25_info_start(), and there would be still problem with breaking ax25_list_lock (it seems this "proper" order isn't optimal yet). But, since it's only for reading proc info it seems this is not necessary (e.g. ax25_send_to_raw() does similar reading without this lock too). So, this patch removes sock lock to avoid deadlock possibility; there is also used sock_i_ino() function, which reads sk_socket under proper read lock. Additionally printf format of this i_ino is changed to %lu. Reported-by: Bernard Pidoux F6BVP Signed-off-by: Jarek Poplawski Signed-off-by: David S. Miller --- net/ax25/af_ax25.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c index 8fc64e3150a..5a4337a2909 100644 --- a/net/ax25/af_ax25.c +++ b/net/ax25/af_ax25.c @@ -1928,12 +1928,10 @@ static int ax25_info_show(struct seq_file *seq, void *v) ax25->paclen); if (ax25->sk != NULL) { - bh_lock_sock(ax25->sk); - seq_printf(seq," %d %d %ld\n", + seq_printf(seq, " %d %d %lu\n", atomic_read(&ax25->sk->sk_wmem_alloc), atomic_read(&ax25->sk->sk_rmem_alloc), - ax25->sk->sk_socket != NULL ? SOCK_INODE(ax25->sk->sk_socket)->i_ino : 0L); - bh_unlock_sock(ax25->sk); + sock_i_ino(ax25->sk)); } else { seq_puts(seq, " * * *\n"); } -- cgit v1.2.3 From 4de211f1a279275c6c67d6e9b6b25513e46b0bb9 Mon Sep 17 00:00:00 2001 From: Jarek Poplawski Date: Mon, 11 Feb 2008 21:26:43 -0800 Subject: [AX25] ax25_route: make ax25_route_lock BH safe > ================================= > [ INFO: inconsistent lock state ] > 2.6.24-dg8ngn-p02 #1 > --------------------------------- > inconsistent {softirq-on-W} -> {in-softirq-R} usage. > linuxnet/3046 [HC0[0]:SC1[2]:HE1:SE0] takes: > (ax25_route_lock){--.+}, at: [] ax25_get_route+0x18/0xb7 [ax25] > {softirq-on-W} state was registered at: ... This lockdep report shows that ax25_route_lock is taken for reading in softirq context, and for writing in process context with BHs enabled. So, to make this safe, all write_locks in ax25_route.c are changed to _bh versions. Reported-by: Jann Traschewski , Signed-off-by: Jarek Poplawski Signed-off-by: David S. Miller --- net/ax25/ax25_route.c | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) (limited to 'net') diff --git a/net/ax25/ax25_route.c b/net/ax25/ax25_route.c index 38c7f3087ec..8672cd84fdf 100644 --- a/net/ax25/ax25_route.c +++ b/net/ax25/ax25_route.c @@ -45,7 +45,7 @@ void ax25_rt_device_down(struct net_device *dev) { ax25_route *s, *t, *ax25_rt; - write_lock(&ax25_route_lock); + write_lock_bh(&ax25_route_lock); ax25_rt = ax25_route_list; while (ax25_rt != NULL) { s = ax25_rt; @@ -68,7 +68,7 @@ void ax25_rt_device_down(struct net_device *dev) } } } - write_unlock(&ax25_route_lock); + write_unlock_bh(&ax25_route_lock); } static int __must_check ax25_rt_add(struct ax25_routes_struct *route) @@ -82,7 +82,7 @@ static int __must_check ax25_rt_add(struct ax25_routes_struct *route) if (route->digi_count > AX25_MAX_DIGIS) return -EINVAL; - write_lock(&ax25_route_lock); + write_lock_bh(&ax25_route_lock); ax25_rt = ax25_route_list; while (ax25_rt != NULL) { @@ -92,7 +92,7 @@ static int __must_check ax25_rt_add(struct ax25_routes_struct *route) ax25_rt->digipeat = NULL; if (route->digi_count != 0) { if ((ax25_rt->digipeat = kmalloc(sizeof(ax25_digi), GFP_ATOMIC)) == NULL) { - write_unlock(&ax25_route_lock); + write_unlock_bh(&ax25_route_lock); return -ENOMEM; } ax25_rt->digipeat->lastrepeat = -1; @@ -102,14 +102,14 @@ static int __must_check ax25_rt_add(struct ax25_routes_struct *route) ax25_rt->digipeat->calls[i] = route->digi_addr[i]; } } - write_unlock(&ax25_route_lock); + write_unlock_bh(&ax25_route_lock); return 0; } ax25_rt = ax25_rt->next; } if ((ax25_rt = kmalloc(sizeof(ax25_route), GFP_ATOMIC)) == NULL) { - write_unlock(&ax25_route_lock); + write_unlock_bh(&ax25_route_lock); return -ENOMEM; } @@ -120,7 +120,7 @@ static int __must_check ax25_rt_add(struct ax25_routes_struct *route) ax25_rt->ip_mode = ' '; if (route->digi_count != 0) { if ((ax25_rt->digipeat = kmalloc(sizeof(ax25_digi), GFP_ATOMIC)) == NULL) { - write_unlock(&ax25_route_lock); + write_unlock_bh(&ax25_route_lock); kfree(ax25_rt); return -ENOMEM; } @@ -133,7 +133,7 @@ static int __must_check ax25_rt_add(struct ax25_routes_struct *route) } ax25_rt->next = ax25_route_list; ax25_route_list = ax25_rt; - write_unlock(&ax25_route_lock); + write_unlock_bh(&ax25_route_lock); return 0; } @@ -152,7 +152,7 @@ static int ax25_rt_del(struct ax25_routes_struct *route) if ((ax25_dev = ax25_addr_ax25dev(&route->port_addr)) == NULL) return -EINVAL; - write_lock(&ax25_route_lock); + write_lock_bh(&ax25_route_lock); ax25_rt = ax25_route_list; while (ax25_rt != NULL) { @@ -174,7 +174,7 @@ static int ax25_rt_del(struct ax25_routes_struct *route) } } } - write_unlock(&ax25_route_lock); + write_unlock_bh(&ax25_route_lock); return 0; } @@ -188,7 +188,7 @@ static int ax25_rt_opt(struct ax25_route_opt_struct *rt_option) if ((ax25_dev = ax25_addr_ax25dev(&rt_option->port_addr)) == NULL) return -EINVAL; - write_lock(&ax25_route_lock); + write_lock_bh(&ax25_route_lock); ax25_rt = ax25_route_list; while (ax25_rt != NULL) { @@ -216,7 +216,7 @@ static int ax25_rt_opt(struct ax25_route_opt_struct *rt_option) } out: - write_unlock(&ax25_route_lock); + write_unlock_bh(&ax25_route_lock); return err; } @@ -492,7 +492,7 @@ void __exit ax25_rt_free(void) { ax25_route *s, *ax25_rt = ax25_route_list; - write_lock(&ax25_route_lock); + write_lock_bh(&ax25_route_lock); while (ax25_rt != NULL) { s = ax25_rt; ax25_rt = ax25_rt->next; @@ -500,5 +500,5 @@ void __exit ax25_rt_free(void) kfree(s->digipeat); kfree(s); } - write_unlock(&ax25_route_lock); + write_unlock_bh(&ax25_route_lock); } -- cgit v1.2.3 From 21fab4a86a411c18c6b4d663ae710ca1f6206b3c Mon Sep 17 00:00:00 2001 From: Jarek Poplawski Date: Mon, 11 Feb 2008 21:36:39 -0800 Subject: [AX25] ax25_timer: use mod_timer instead of add_timer According to one of Jann's OOPS reports it looks like BUG_ON(timer_pending(timer)) triggers during add_timer() in ax25_start_t1timer(). This patch changes current use of: init_timer(), add_timer() and del_timer() to setup_timer() with mod_timer(), which should be safer anyway. Reported-by: Jann Traschewski Signed-off-by: Jarek Poplawski Signed-off-by: David S. Miller --- net/ax25/af_ax25.c | 6 +----- net/ax25/ax25_timer.c | 60 ++++++++++++++++++--------------------------------- 2 files changed, 22 insertions(+), 44 deletions(-) (limited to 'net') diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c index 5a4337a2909..48bfcc741f2 100644 --- a/net/ax25/af_ax25.c +++ b/net/ax25/af_ax25.c @@ -510,11 +510,7 @@ ax25_cb *ax25_create_cb(void) skb_queue_head_init(&ax25->ack_queue); skb_queue_head_init(&ax25->reseq_queue); - init_timer(&ax25->timer); - init_timer(&ax25->t1timer); - init_timer(&ax25->t2timer); - init_timer(&ax25->t3timer); - init_timer(&ax25->idletimer); + ax25_setup_timers(ax25); ax25_fillin_cb(ax25, NULL); diff --git a/net/ax25/ax25_timer.c b/net/ax25/ax25_timer.c index 72594867fab..db29ea71e80 100644 --- a/net/ax25/ax25_timer.c +++ b/net/ax25/ax25_timer.c @@ -40,63 +40,45 @@ static void ax25_t2timer_expiry(unsigned long); static void ax25_t3timer_expiry(unsigned long); static void ax25_idletimer_expiry(unsigned long); -void ax25_start_heartbeat(ax25_cb *ax25) +void ax25_setup_timers(ax25_cb *ax25) { - del_timer(&ax25->timer); - - ax25->timer.data = (unsigned long)ax25; - ax25->timer.function = &ax25_heartbeat_expiry; - ax25->timer.expires = jiffies + 5 * HZ; + setup_timer(&ax25->timer, ax25_heartbeat_expiry, (unsigned long)ax25); + setup_timer(&ax25->t1timer, ax25_t1timer_expiry, (unsigned long)ax25); + setup_timer(&ax25->t2timer, ax25_t2timer_expiry, (unsigned long)ax25); + setup_timer(&ax25->t3timer, ax25_t3timer_expiry, (unsigned long)ax25); + setup_timer(&ax25->idletimer, ax25_idletimer_expiry, + (unsigned long)ax25); +} - add_timer(&ax25->timer); +void ax25_start_heartbeat(ax25_cb *ax25) +{ + mod_timer(&ax25->timer, jiffies + 5 * HZ); } void ax25_start_t1timer(ax25_cb *ax25) { - del_timer(&ax25->t1timer); - - ax25->t1timer.data = (unsigned long)ax25; - ax25->t1timer.function = &ax25_t1timer_expiry; - ax25->t1timer.expires = jiffies + ax25->t1; - - add_timer(&ax25->t1timer); + mod_timer(&ax25->t1timer, jiffies + ax25->t1); } void ax25_start_t2timer(ax25_cb *ax25) { - del_timer(&ax25->t2timer); - - ax25->t2timer.data = (unsigned long)ax25; - ax25->t2timer.function = &ax25_t2timer_expiry; - ax25->t2timer.expires = jiffies + ax25->t2; - - add_timer(&ax25->t2timer); + mod_timer(&ax25->t2timer, jiffies + ax25->t2); } void ax25_start_t3timer(ax25_cb *ax25) { - del_timer(&ax25->t3timer); - - if (ax25->t3 > 0) { - ax25->t3timer.data = (unsigned long)ax25; - ax25->t3timer.function = &ax25_t3timer_expiry; - ax25->t3timer.expires = jiffies + ax25->t3; - - add_timer(&ax25->t3timer); - } + if (ax25->t3 > 0) + mod_timer(&ax25->t3timer, jiffies + ax25->t3); + else + del_timer(&ax25->t3timer); } void ax25_start_idletimer(ax25_cb *ax25) { - del_timer(&ax25->idletimer); - - if (ax25->idle > 0) { - ax25->idletimer.data = (unsigned long)ax25; - ax25->idletimer.function = &ax25_idletimer_expiry; - ax25->idletimer.expires = jiffies + ax25->idle; - - add_timer(&ax25->idletimer); - } + if (ax25->idle > 0) + mod_timer(&ax25->idletimer, jiffies + ax25->idle); + else + del_timer(&ax25->idletimer); } void ax25_stop_heartbeat(ax25_cb *ax25) -- cgit v1.2.3 From e848b583e03306f5f9b3a66a793c37e3649e04ca Mon Sep 17 00:00:00 2001 From: Jarek Poplawski Date: Mon, 11 Feb 2008 21:38:32 -0800 Subject: [AX25] ax25_ds_timer: use mod_timer instead of add_timer This patch changes current use of: init_timer(), add_timer() and del_timer() to setup_timer() with mod_timer(), which should be safer anyway. Reported-by: Jann Traschewski Signed-off-by: Jarek Poplawski Signed-off-by: David S. Miller --- net/ax25/ax25_dev.c | 2 +- net/ax25/ax25_ds_timer.c | 12 ++++-------- 2 files changed, 5 insertions(+), 9 deletions(-) (limited to 'net') diff --git a/net/ax25/ax25_dev.c b/net/ax25/ax25_dev.c index 528c874d982..a7a0e0c9698 100644 --- a/net/ax25/ax25_dev.c +++ b/net/ax25/ax25_dev.c @@ -82,7 +82,7 @@ void ax25_dev_device_up(struct net_device *dev) ax25_dev->values[AX25_VALUES_DS_TIMEOUT]= AX25_DEF_DS_TIMEOUT; #if defined(CONFIG_AX25_DAMA_SLAVE) || defined(CONFIG_AX25_DAMA_MASTER) - init_timer(&ax25_dev->dama.slave_timer); + ax25_ds_setup_timer(ax25_dev); #endif spin_lock_bh(&ax25_dev_lock); diff --git a/net/ax25/ax25_ds_timer.c b/net/ax25/ax25_ds_timer.c index c4e3b025d21..2ce79df0068 100644 --- a/net/ax25/ax25_ds_timer.c +++ b/net/ax25/ax25_ds_timer.c @@ -40,13 +40,10 @@ static void ax25_ds_timeout(unsigned long); * 1/10th of a second. */ -static void ax25_ds_add_timer(ax25_dev *ax25_dev) +void ax25_ds_setup_timer(ax25_dev *ax25_dev) { - struct timer_list *t = &ax25_dev->dama.slave_timer; - t->data = (unsigned long) ax25_dev; - t->function = &ax25_ds_timeout; - t->expires = jiffies + HZ; - add_timer(t); + setup_timer(&ax25_dev->dama.slave_timer, ax25_ds_timeout, + (unsigned long)ax25_dev); } void ax25_ds_del_timer(ax25_dev *ax25_dev) @@ -60,10 +57,9 @@ void ax25_ds_set_timer(ax25_dev *ax25_dev) if (ax25_dev == NULL) /* paranoia */ return; - del_timer(&ax25_dev->dama.slave_timer); ax25_dev->dama.slave_timeout = msecs_to_jiffies(ax25_dev->values[AX25_VALUES_DS_TIMEOUT]) / 10; - ax25_ds_add_timer(ax25_dev); + mod_timer(&ax25_dev->dama.slave_timer, jiffies + HZ); } /* -- cgit v1.2.3 From 69cc64d8d92bf852f933e90c888dfff083bd4fc9 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Mon, 11 Feb 2008 21:45:44 -0800 Subject: [NDISC]: Fix race in generic address resolution Frank Blaschka provided the bug report and the initial suggested fix for this bug. He also validated this version of this fix. The problem is that the access to neigh->arp_queue is inconsistent, we grab references when dropping the lock lock to call neigh->ops->solicit() but this does not prevent other threads of control from trying to send out that packet at the same time causing corruptions because both code paths believe they have exclusive access to the skb. The best option seems to be to hold the write lock on neigh->lock during the ->solicit() call. I looked at all of the ndisc_ops implementations and this seems workable. The only case that needs special care is the IPV4 ARP implementation of arp_solicit(). It wants to take neigh->lock as a reader to protect the header entry in neigh->ha during the emission of the soliciation. We can simply remove the read lock calls to take care of that since holding the lock as a writer at the caller providers a superset of the protection afforded by the existing read locking. The rest of the ->solicit() implementations don't care whether the neigh is locked or not. Signed-off-by: David S. Miller --- net/core/neighbour.c | 12 +++--------- net/ipv4/arp.c | 3 --- 2 files changed, 3 insertions(+), 12 deletions(-) (limited to 'net') diff --git a/net/core/neighbour.c b/net/core/neighbour.c index a16cf1ec5e5..7bb6a9a1256 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -834,18 +834,12 @@ static void neigh_timer_handler(unsigned long arg) } if (neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) { struct sk_buff *skb = skb_peek(&neigh->arp_queue); - /* keep skb alive even if arp_queue overflows */ - if (skb) - skb_get(skb); - write_unlock(&neigh->lock); + neigh->ops->solicit(neigh, skb); atomic_inc(&neigh->probes); - if (skb) - kfree_skb(skb); - } else { -out: - write_unlock(&neigh->lock); } +out: + write_unlock(&neigh->lock); if (notify) neigh_update_notify(neigh); diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index 8e17f65f400..c663fa5339e 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c @@ -368,7 +368,6 @@ static void arp_solicit(struct neighbour *neigh, struct sk_buff *skb) if (!(neigh->nud_state&NUD_VALID)) printk(KERN_DEBUG "trying to ucast probe in NUD_INVALID\n"); dst_ha = neigh->ha; - read_lock_bh(&neigh->lock); } else if ((probes -= neigh->parms->app_probes) < 0) { #ifdef CONFIG_ARPD neigh_app_ns(neigh); @@ -378,8 +377,6 @@ static void arp_solicit(struct neighbour *neigh, struct sk_buff *skb) arp_send(ARPOP_REQUEST, ETH_P_ARP, target, dev, saddr, dst_ha, dev->dev_addr, NULL); - if (dst_ha) - read_unlock_bh(&neigh->lock); } static int arp_ignore(struct in_device *in_dev, __be32 sip, __be32 tip) -- cgit v1.2.3 From 28a89453b1e8de8d777ad96fa1eef27b5d1ce074 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Tue, 12 Feb 2008 18:07:27 -0800 Subject: [IPV6]: Fix IPsec datagram fragmentation This is a long-standing bug in the IPsec IPv6 code that breaks when we emit a IPsec tunnel-mode datagram packet. The problem is that the code the emits the packet assumes the IPv6 stack will fragment it later, but the IPv6 stack assumes that whoever is emitting the packet is going to pre-fragment the packet. In the long term we need to fix both sides, e.g., to get the datagram code to pre-fragment as well as to get the IPv6 stack to fragment locally generated tunnel-mode packet. For now this patch does the second part which should make it work for the IPsec host case. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- net/ipv6/ip6_output.c | 6 +++++- net/ipv6/xfrm6_output.c | 2 +- 2 files changed, 6 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 9ac6ca2521c..4e9a2fe2f12 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -621,7 +621,7 @@ static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) * or if the skb it not generated by a local socket. (This last * check should be redundant, but it's free.) */ - if (!np || np->pmtudisc >= IPV6_PMTUDISC_DO) { + if (skb->local_df) { skb->dev = skb->dst->dev; icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev); IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_FRAGFAILS); @@ -1420,6 +1420,10 @@ int ip6_push_pending_frames(struct sock *sk) tmp_skb->sk = NULL; } + /* Allow local fragmentation. */ + if (np->pmtudisc >= IPV6_PMTUDISC_DO) + skb->local_df = 1; + ipv6_addr_copy(final_dst, &fl->fl6_dst); __skb_pull(skb, skb_network_header_len(skb)); if (opt && opt->opt_flen) diff --git a/net/ipv6/xfrm6_output.c b/net/ipv6/xfrm6_output.c index b34c58c6565..79ccfb08073 100644 --- a/net/ipv6/xfrm6_output.c +++ b/net/ipv6/xfrm6_output.c @@ -36,7 +36,7 @@ static int xfrm6_tunnel_check_size(struct sk_buff *skb) if (mtu < IPV6_MIN_MTU) mtu = IPV6_MIN_MTU; - if (skb->len > mtu) { + if (!skb->local_df && skb->len > mtu) { skb->dev = dst->dev; icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev); ret = -EMSGSIZE; -- cgit v1.2.3 From fee54fa517bef1de2c10a1a3e918228cc59dce90 Mon Sep 17 00:00:00 2001 From: Urs Thuermann Date: Tue, 12 Feb 2008 22:03:25 -0800 Subject: [NET]: Fix comment for skb_pull_rcsum Fix comment for skb_pull_rcsum Signed-off-by: Urs Thuermann Signed-off-by: David S. Miller --- net/core/skbuff.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'net') diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 4e354221ec2..40dddcc6dc3 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -2106,11 +2106,10 @@ int skb_append_datato_frags(struct sock *sk, struct sk_buff *skb, /** * skb_pull_rcsum - pull skb and update receive checksum * @skb: buffer to update - * @start: start of data before pull * @len: length of data pulled * * This function performs an skb_pull on the packet and updates - * update the CHECKSUM_COMPLETE checksum. It should be used on + * the CHECKSUM_COMPLETE checksum. It should be used on * receive path processing instead of skb_pull unless you know * that the checksum difference is zero (e.g., a valid IP header) * or you are setting ip_summed to CHECKSUM_NONE. -- cgit v1.2.3 From 4c3a0a254e5d706d3fe01bf42261534858d05586 Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Tue, 12 Feb 2008 22:15:14 -0800 Subject: [NETLABEL]: Fix lookup logic of netlbl_domhsh_search_def. Currently, if the call to netlbl_domhsh_search succeeds the return result will still be NULL. Fix that, by returning the found entry (if any). Signed-off-by: Pavel Emelyanov Acked-by: Paul Moore Signed-off-by: David S. Miller --- net/netlabel/netlabel_domainhash.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/netlabel/netlabel_domainhash.c b/net/netlabel/netlabel_domainhash.c index 9a8ea0195c4..fd462313471 100644 --- a/net/netlabel/netlabel_domainhash.c +++ b/net/netlabel/netlabel_domainhash.c @@ -150,11 +150,11 @@ static struct netlbl_dom_map *netlbl_domhsh_search_def(const char *domain) entry = netlbl_domhsh_search(domain); if (entry == NULL) { entry = rcu_dereference(netlbl_domhsh_def); - if (entry != NULL && entry->valid) - return entry; + if (entry != NULL && !entry->valid) + entry = NULL; } - return NULL; + return entry; } /* -- cgit v1.2.3 From 910d6c320cac65c81d66e8fd30dca167092722eb Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Tue, 12 Feb 2008 22:16:33 -0800 Subject: [GENETLINK]: Relax dances with genl_lock. The genl_unregister_family() calls the genl_unregister_mc_groups(), which takes and releases the genl_lock and then locks and releases this lock itself. Relax this behavior, all the more so the genl_unregister_mc_groups() is called from genl_unregister_family() only. Signed-off-by: Pavel Emelyanov Signed-off-by: David S. Miller --- net/netlink/genetlink.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c index 150579a2146..d16929c9b4b 100644 --- a/net/netlink/genetlink.c +++ b/net/netlink/genetlink.c @@ -230,10 +230,8 @@ static void genl_unregister_mc_groups(struct genl_family *family) { struct genl_multicast_group *grp, *tmp; - genl_lock(); list_for_each_entry_safe(grp, tmp, &family->mcast_groups, list) __genl_unregister_mc_group(family, grp); - genl_unlock(); } /** @@ -396,10 +394,10 @@ int genl_unregister_family(struct genl_family *family) { struct genl_family *rc; - genl_unregister_mc_groups(family); - genl_lock(); + genl_unregister_mc_groups(family); + list_for_each_entry(rc, genl_family_chain(family->id), family_list) { if (family->id != rc->id || strcmp(rc->name, family->name)) continue; -- cgit v1.2.3 From 94de7feb2dee6d0039ecbe98ae8b63bbb63808b6 Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Tue, 12 Feb 2008 22:35:37 -0800 Subject: [NETLABEL]: Compilation for CONFIG_AUDIT=n case. The audit_log_start() will expand into an empty do { } while (0) construction and the audit_ctx becomes unused. The solution: push current->audit_context into audit_log_start() directly, since it is not required in any other place in the calling function. Signed-off-by: Pavel Emelyanov Signed-off-by: David S. Miller --- net/netlabel/netlabel_user.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'net') diff --git a/net/netlabel/netlabel_user.c b/net/netlabel/netlabel_user.c index 85a96a3fdda..023fc8fe840 100644 --- a/net/netlabel/netlabel_user.c +++ b/net/netlabel/netlabel_user.c @@ -96,7 +96,6 @@ int netlbl_netlink_init(void) struct audit_buffer *netlbl_audit_start_common(int type, struct netlbl_audit *audit_info) { - struct audit_context *audit_ctx = current->audit_context; struct audit_buffer *audit_buf; char *secctx; u32 secctx_len; @@ -104,7 +103,7 @@ struct audit_buffer *netlbl_audit_start_common(int type, if (audit_enabled == 0) return NULL; - audit_buf = audit_log_start(audit_ctx, GFP_ATOMIC, type); + audit_buf = audit_log_start(current->audit_context, GFP_ATOMIC, type); if (audit_buf == NULL) return NULL; -- cgit v1.2.3 From 56628b1d8964eb7ac924154d60b5d874bfb2b1e8 Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Tue, 12 Feb 2008 22:37:19 -0800 Subject: [NETLABEL]: Don't produce unused variables when IPv6 is off. Some code declares variables on the stack, but uses them under #ifdef CONFIG_IPV6, so thay become unused when ipv6 is off. Fortunately, they are used in a switch's case branches, so the fix is rather simple. Is it OK from coding style POV to add braces inside "cases", or should I better avoid such style and rework the patch? Signed-off-by: Pavel Emelyanov Signed-off-by: David S. Miller --- net/netlabel/netlabel_unlabeled.c | 28 ++++++++++++++++++---------- 1 file changed, 18 insertions(+), 10 deletions(-) (limited to 'net') diff --git a/net/netlabel/netlabel_unlabeled.c b/net/netlabel/netlabel_unlabeled.c index 42e81fd8cc4..3587874d64e 100644 --- a/net/netlabel/netlabel_unlabeled.c +++ b/net/netlabel/netlabel_unlabeled.c @@ -617,8 +617,6 @@ static int netlbl_unlhsh_add(struct net *net, int ifindex; struct net_device *dev; struct netlbl_unlhsh_iface *iface; - struct in_addr *addr4, *mask4; - struct in6_addr *addr6, *mask6; struct audit_buffer *audit_buf = NULL; char *secctx = NULL; u32 secctx_len; @@ -651,7 +649,9 @@ static int netlbl_unlhsh_add(struct net *net, audit_buf = netlbl_audit_start_common(AUDIT_MAC_UNLBL_STCADD, audit_info); switch (addr_len) { - case sizeof(struct in_addr): + case sizeof(struct in_addr): { + struct in_addr *addr4, *mask4; + addr4 = (struct in_addr *)addr; mask4 = (struct in_addr *)mask; ret_val = netlbl_unlhsh_add_addr4(iface, addr4, mask4, secid); @@ -661,8 +661,11 @@ static int netlbl_unlhsh_add(struct net *net, addr4->s_addr, mask4->s_addr); break; + } #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) - case sizeof(struct in6_addr): + case sizeof(struct in6_addr): { + struct in6_addr *addr6, *mask6; + addr6 = (struct in6_addr *)addr; mask6 = (struct in6_addr *)mask; ret_val = netlbl_unlhsh_add_addr6(iface, addr6, mask6, secid); @@ -671,6 +674,7 @@ static int netlbl_unlhsh_add(struct net *net, dev_name, addr6, mask6); break; + } #endif /* IPv6 */ default: ret_val = -EINVAL; @@ -1741,10 +1745,6 @@ int netlbl_unlabel_getattr(const struct sk_buff *skb, u16 family, struct netlbl_lsm_secattr *secattr) { - struct iphdr *hdr4; - struct ipv6hdr *hdr6; - struct netlbl_unlhsh_addr4 *addr4; - struct netlbl_unlhsh_addr6 *addr6; struct netlbl_unlhsh_iface *iface; rcu_read_lock(); @@ -1752,21 +1752,29 @@ int netlbl_unlabel_getattr(const struct sk_buff *skb, if (iface == NULL) goto unlabel_getattr_nolabel; switch (family) { - case PF_INET: + case PF_INET: { + struct iphdr *hdr4; + struct netlbl_unlhsh_addr4 *addr4; + hdr4 = ip_hdr(skb); addr4 = netlbl_unlhsh_search_addr4(hdr4->saddr, iface); if (addr4 == NULL) goto unlabel_getattr_nolabel; secattr->attr.secid = addr4->secid; break; + } #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) - case PF_INET6: + case PF_INET6: { + struct ipv6hdr *hdr6; + struct netlbl_unlhsh_addr6 *addr6; + hdr6 = ipv6_hdr(skb); addr6 = netlbl_unlhsh_search_addr6(&hdr6->saddr, iface); if (addr6 == NULL) goto unlabel_getattr_nolabel; secattr->attr.secid = addr6->secid; break; + } #endif /* IPv6 */ default: goto unlabel_getattr_nolabel; -- cgit v1.2.3 From 370125f0a48a2584a2506fd567d690df6d87cf2c Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Tue, 12 Feb 2008 22:38:06 -0800 Subject: [NETLABLE]: Hide netlbl_unlabel_audit_addr6 under ifdef CONFIG_IPV6. This one is called from under this config only, so move it in the same place. Signed-off-by: Pavel Emelyanov Signed-off-by: David S. Miller --- net/netlabel/netlabel_unlabeled.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'net') diff --git a/net/netlabel/netlabel_unlabeled.c b/net/netlabel/netlabel_unlabeled.c index 3587874d64e..3e745b72fde 100644 --- a/net/netlabel/netlabel_unlabeled.c +++ b/net/netlabel/netlabel_unlabeled.c @@ -180,6 +180,7 @@ static void netlbl_unlabel_audit_addr4(struct audit_buffer *audit_buf, } } +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) /** * netlbl_unlabel_audit_addr6 - Audit an IPv6 address * @audit_buf: audit buffer @@ -213,6 +214,7 @@ static void netlbl_unlabel_audit_addr6(struct audit_buffer *audit_buf, audit_log_format(audit_buf, " src_prefixlen=%d", mask_len); } } +#endif /* IPv6 */ /* * Unlabeled Connection Hash Table Functions -- cgit v1.2.3 From 45b503548210fe6f23e92b856421c2a3f05fd034 Mon Sep 17 00:00:00 2001 From: Laszlo Attila Toth Date: Tue, 12 Feb 2008 22:42:09 -0800 Subject: [RTNETLINK]: Send a single notification on device state changes. In do_setlink() a single notification is sent at the end of the function if any modification occured. If the address has been changed, another notification is sent. Both of them is required because originally only the NETDEV_CHANGEADDR notification was sent and although device state change implies address change, some programs may expect the original notification. It remains for compatibity. If set_operstate() is called from do_setlink(), it doesn't send a notification, only if it is called from rtnl_create_link() as earlier. Signed-off-by: Laszlo Attila Toth Signed-off-by: David S. Miller --- net/core/rtnetlink.c | 36 ++++++++++++++++++++++++++---------- 1 file changed, 26 insertions(+), 10 deletions(-) (limited to 'net') diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 61ac8d06292..ecb02afd52d 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -504,7 +504,7 @@ int rtnl_put_cacheinfo(struct sk_buff *skb, struct dst_entry *dst, u32 id, EXPORT_SYMBOL_GPL(rtnl_put_cacheinfo); -static void set_operstate(struct net_device *dev, unsigned char transition) +static int set_operstate(struct net_device *dev, unsigned char transition, bool send_notification) { unsigned char operstate = dev->operstate; @@ -527,8 +527,12 @@ static void set_operstate(struct net_device *dev, unsigned char transition) write_lock_bh(&dev_base_lock); dev->operstate = operstate; write_unlock_bh(&dev_base_lock); - netdev_state_change(dev); - } + + if (send_notification) + netdev_state_change(dev); + return 1; + } else + return 0; } static void copy_rtnl_link_stats(struct rtnl_link_stats *a, @@ -822,6 +826,7 @@ static int do_setlink(struct net_device *dev, struct ifinfomsg *ifm, if (tb[IFLA_BROADCAST]) { nla_memcpy(dev->broadcast, tb[IFLA_BROADCAST], dev->addr_len); send_addr_notify = 1; + modified = 1; } if (ifm->ifi_flags || ifm->ifi_change) { @@ -834,16 +839,23 @@ static int do_setlink(struct net_device *dev, struct ifinfomsg *ifm, dev_change_flags(dev, flags); } - if (tb[IFLA_TXQLEN]) - dev->tx_queue_len = nla_get_u32(tb[IFLA_TXQLEN]); + if (tb[IFLA_TXQLEN]) { + if (dev->tx_queue_len != nla_get_u32(tb[IFLA_TXQLEN])) { + dev->tx_queue_len = nla_get_u32(tb[IFLA_TXQLEN]); + modified = 1; + } + } if (tb[IFLA_OPERSTATE]) - set_operstate(dev, nla_get_u8(tb[IFLA_OPERSTATE])); + modified |= set_operstate(dev, nla_get_u8(tb[IFLA_OPERSTATE]), false); if (tb[IFLA_LINKMODE]) { - write_lock_bh(&dev_base_lock); - dev->link_mode = nla_get_u8(tb[IFLA_LINKMODE]); - write_unlock_bh(&dev_base_lock); + if (dev->link_mode != nla_get_u8(tb[IFLA_LINKMODE])) { + write_lock_bh(&dev_base_lock); + dev->link_mode = nla_get_u8(tb[IFLA_LINKMODE]); + write_lock_bh(&dev_base_lock); + modified = 1; + } } err = 0; @@ -857,6 +869,10 @@ errout: if (send_addr_notify) call_netdevice_notifiers(NETDEV_CHANGEADDR, dev); + + if (modified) + netdev_state_change(dev); + return err; } @@ -974,7 +990,7 @@ struct net_device *rtnl_create_link(struct net *net, char *ifname, if (tb[IFLA_TXQLEN]) dev->tx_queue_len = nla_get_u32(tb[IFLA_TXQLEN]); if (tb[IFLA_OPERSTATE]) - set_operstate(dev, nla_get_u8(tb[IFLA_OPERSTATE])); + set_operstate(dev, nla_get_u8(tb[IFLA_OPERSTATE]), true); if (tb[IFLA_LINKMODE]) dev->link_mode = nla_get_u8(tb[IFLA_LINKMODE]); -- cgit v1.2.3 From b318e0e4ef4e85812c25afa19f75addccc834cd4 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Tue, 12 Feb 2008 22:50:35 -0800 Subject: [IPSEC]: Fix bogus usage of u64 on input sequence number Al Viro spotted a bogus use of u64 on the input sequence number which is big-endian. This patch fixes it by giving the input sequence number its own member in the xfrm_skb_cb structure. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- net/ipv4/ah4.c | 2 +- net/ipv4/esp4.c | 5 +++-- net/ipv6/ah6.c | 2 +- net/ipv6/esp6.c | 5 +++-- net/xfrm/xfrm_input.c | 4 ++-- net/xfrm/xfrm_output.c | 2 +- 6 files changed, 11 insertions(+), 9 deletions(-) (limited to 'net') diff --git a/net/ipv4/ah4.c b/net/ipv4/ah4.c index 9d4555ec0b5..8219b7e0968 100644 --- a/net/ipv4/ah4.c +++ b/net/ipv4/ah4.c @@ -96,7 +96,7 @@ static int ah_output(struct xfrm_state *x, struct sk_buff *skb) ah->reserved = 0; ah->spi = x->id.spi; - ah->seq_no = htonl(XFRM_SKB_CB(skb)->seq); + ah->seq_no = htonl(XFRM_SKB_CB(skb)->seq.output); spin_lock_bh(&x->lock); err = ah_mac_digest(ahp, skb, ah->auth_data); diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c index 258d17631b4..091e6709f83 100644 --- a/net/ipv4/esp4.c +++ b/net/ipv4/esp4.c @@ -199,7 +199,7 @@ static int esp_output(struct xfrm_state *x, struct sk_buff *skb) } esph->spi = x->id.spi; - esph->seq_no = htonl(XFRM_SKB_CB(skb)->seq); + esph->seq_no = htonl(XFRM_SKB_CB(skb)->seq.output); sg_init_table(sg, nfrags); skb_to_sgvec(skb, sg, @@ -210,7 +210,8 @@ static int esp_output(struct xfrm_state *x, struct sk_buff *skb) aead_givcrypt_set_callback(req, 0, esp_output_done, skb); aead_givcrypt_set_crypt(req, sg, sg, clen, iv); aead_givcrypt_set_assoc(req, asg, sizeof(*esph)); - aead_givcrypt_set_giv(req, esph->enc_data, XFRM_SKB_CB(skb)->seq); + aead_givcrypt_set_giv(req, esph->enc_data, + XFRM_SKB_CB(skb)->seq.output); ESP_SKB_CB(skb)->tmp = tmp; err = crypto_aead_givencrypt(req); diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c index 379c8e04c36..2ff0c8233e4 100644 --- a/net/ipv6/ah6.c +++ b/net/ipv6/ah6.c @@ -283,7 +283,7 @@ static int ah6_output(struct xfrm_state *x, struct sk_buff *skb) ah->reserved = 0; ah->spi = x->id.spi; - ah->seq_no = htonl(XFRM_SKB_CB(skb)->seq); + ah->seq_no = htonl(XFRM_SKB_CB(skb)->seq.output); spin_lock_bh(&x->lock); err = ah_mac_digest(ahp, skb, ah->auth_data); diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c index 8e0f1428c71..0ec1402320e 100644 --- a/net/ipv6/esp6.c +++ b/net/ipv6/esp6.c @@ -188,7 +188,7 @@ static int esp6_output(struct xfrm_state *x, struct sk_buff *skb) *skb_mac_header(skb) = IPPROTO_ESP; esph->spi = x->id.spi; - esph->seq_no = htonl(XFRM_SKB_CB(skb)->seq); + esph->seq_no = htonl(XFRM_SKB_CB(skb)->seq.output); sg_init_table(sg, nfrags); skb_to_sgvec(skb, sg, @@ -199,7 +199,8 @@ static int esp6_output(struct xfrm_state *x, struct sk_buff *skb) aead_givcrypt_set_callback(req, 0, esp_output_done, skb); aead_givcrypt_set_crypt(req, sg, sg, clen, iv); aead_givcrypt_set_assoc(req, asg, sizeof(*esph)); - aead_givcrypt_set_giv(req, esph->enc_data, XFRM_SKB_CB(skb)->seq); + aead_givcrypt_set_giv(req, esph->enc_data, + XFRM_SKB_CB(skb)->seq.output); ESP_SKB_CB(skb)->tmp = tmp; err = crypto_aead_givencrypt(req); diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c index 4d6ebc633a9..62188c6a06d 100644 --- a/net/xfrm/xfrm_input.c +++ b/net/xfrm/xfrm_input.c @@ -109,7 +109,7 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type) if (encap_type < 0) { async = 1; x = xfrm_input_state(skb); - seq = XFRM_SKB_CB(skb)->seq; + seq = XFRM_SKB_CB(skb)->seq.input; goto resume; } @@ -175,7 +175,7 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type) spin_unlock(&x->lock); - XFRM_SKB_CB(skb)->seq = seq; + XFRM_SKB_CB(skb)->seq.input = seq; nexthdr = x->type->input(x, skb); diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c index fc690368325..569d377932c 100644 --- a/net/xfrm/xfrm_output.c +++ b/net/xfrm/xfrm_output.c @@ -62,7 +62,7 @@ static int xfrm_output_one(struct sk_buff *skb, int err) } if (x->type->flags & XFRM_TYPE_REPLAY_PROT) { - XFRM_SKB_CB(skb)->seq = ++x->replay.oseq; + XFRM_SKB_CB(skb)->seq.output = ++x->replay.oseq; if (unlikely(x->replay.oseq == 0)) { XFRM_INC_STATS(LINUX_MIB_XFRMOUTSTATESEQERROR); x->replay.oseq--; -- cgit v1.2.3 From d8b2a4d21e0b37b9669b202867bfef19f68f786a Mon Sep 17 00:00:00 2001 From: Matti Linnanvuori Date: Tue, 12 Feb 2008 23:10:11 -0800 Subject: [NET]: Fix race in dev_close(). (Bug 9750) There is a race in Linux kernel file net/core/dev.c, function dev_close. The function calls function dev_deactivate, which calls function dev_watchdog_down that deletes the watchdog timer. However, after that, a driver can call netif_carrier_ok, which calls function __netdev_watchdog_up that can add the watchdog timer again. Function unregister_netdevice calls function dev_shutdown that traps the bug !timer_pending(&dev->watchdog_timer). Moving dev_deactivate after netif_running() has been cleared prevents function netif_carrier_on from calling __netdev_watchdog_up and adding the watchdog timer again. Signed-off-by: Matti Linnanvuori Signed-off-by: David S. Miller --- net/core/dev.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index 9549417250b..6cfc1238c4a 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1071,8 +1071,6 @@ int dev_close(struct net_device *dev) */ call_netdevice_notifiers(NETDEV_GOING_DOWN, dev); - dev_deactivate(dev); - clear_bit(__LINK_STATE_START, &dev->state); /* Synchronize to scheduled poll. We cannot touch poll list, @@ -1083,6 +1081,8 @@ int dev_close(struct net_device *dev) */ smp_mb__after_clear_bit(); /* Commit netif_running(). */ + dev_deactivate(dev); + /* * Call the device specific close. This cannot fail. * Only if device is UP -- cgit v1.2.3 From 74da4d34e4a452c3f448fe659fa9f4ba1fbe507e Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Wed, 13 Feb 2008 17:39:34 -0800 Subject: [INET]: Unexport __inet_hash_connect This patch removes the unused EXPORT_SYMBOL_GPL(__inet_hash_connect). Signed-off-by: Adrian Bunk Signed-off-by: David S. Miller --- net/ipv4/inet_hashtables.c | 1 - 1 file changed, 1 deletion(-) (limited to 'net') diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index 9cac6c034ab..e6a007260ce 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -494,7 +494,6 @@ out: return ret; } } -EXPORT_SYMBOL_GPL(__inet_hash_connect); /* * Bind a port for a connect operation and hash it. -- cgit v1.2.3 From 324b57619bdd151abbab73a48707c17cfb0e9ba4 Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Wed, 13 Feb 2008 17:40:25 -0800 Subject: [INET]: Unexport inet_listen_wlock This patch removes the no longer used EXPORT_SYMBOL(inet_listen_wlock). Signed-off-by: Adrian Bunk Signed-off-by: David S. Miller --- net/ipv4/inet_hashtables.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'net') diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index e6a007260ce..1aba606f6bb 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -120,8 +120,6 @@ void inet_listen_wlock(struct inet_hashinfo *hashinfo) } } -EXPORT_SYMBOL(inet_listen_wlock); - /* * Don't inline this cruft. Here are some nice properties to exploit here. The * BSD API does not allow a listening sock to specify the remote port nor the -- cgit v1.2.3 From f51f5ec6909fad9ddfcaa962377f7892d7918302 Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Wed, 13 Feb 2008 17:41:39 -0800 Subject: [NETFILTER]: make secmark_tg_destroy() static This patch makes the needlessly global secmark_tg_destroy() static. Signed-off-by: Adrian Bunk Signed-off-by: David S. Miller --- net/netfilter/xt_SECMARK.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/netfilter/xt_SECMARK.c b/net/netfilter/xt_SECMARK.c index 7708e2084ce..c0284856ccd 100644 --- a/net/netfilter/xt_SECMARK.c +++ b/net/netfilter/xt_SECMARK.c @@ -111,7 +111,7 @@ secmark_tg_check(const char *tablename, const void *entry, return true; } -void secmark_tg_destroy(const struct xt_target *target, void *targinfo) +static void secmark_tg_destroy(const struct xt_target *target, void *targinfo) { switch (mode) { case SECMARK_MODE_SEL: -- cgit v1.2.3 From 0f4bda005fd685f7cbb2ad47b7bab1b155df2b86 Mon Sep 17 00:00:00 2001 From: Paul Mundt Date: Thu, 14 Feb 2008 14:48:45 -0800 Subject: net: xfrm statistics depend on INET net/built-in.o: In function `xfrm_policy_init': /home/pmundt/devel/git/sh-2.6.25/net/xfrm/xfrm_policy.c:2338: undefined reference to `snmp_mib_init' snmp_mib_init() is only built in if CONFIG_INET is set. Signed-off-by: Paul Mundt Signed-off-by: Andrew Morton Signed-off-by: David S. Miller --- net/xfrm/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/xfrm/Kconfig b/net/xfrm/Kconfig index 8f9dbec319b..9201ef8ad90 100644 --- a/net/xfrm/Kconfig +++ b/net/xfrm/Kconfig @@ -38,7 +38,7 @@ config XFRM_MIGRATE config XFRM_STATISTICS bool "Transformation statistics (EXPERIMENTAL)" - depends on XFRM && PROC_FS && EXPERIMENTAL + depends on INET && XFRM && PROC_FS && EXPERIMENTAL ---help--- This statistics is not a SNMP/MIB specification but shows statistics about transformation error (or almost error) factor -- cgit v1.2.3 From d0c1fd7a8f4cadb95b093d2600ad627f432c5edb Mon Sep 17 00:00:00 2001 From: Jozsef Kadlecsik Date: Thu, 14 Feb 2008 14:50:21 -0800 Subject: [NETFILTER] nf_conntrack_proto_tcp.c: Mistyped state corrected. Signed-off-by: Jozsef Kadlecsik Signed-off-by: David S. Miller --- net/netfilter/nf_conntrack_proto_tcp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c index 202d7fa0948..62567959b66 100644 --- a/net/netfilter/nf_conntrack_proto_tcp.c +++ b/net/netfilter/nf_conntrack_proto_tcp.c @@ -945,7 +945,7 @@ static int tcp_packet(struct nf_conn *ct, ct->proto.tcp.state = new_state; if (old_state != new_state - && new_state == TCP_CONNTRACK_CLOSE) + && new_state == TCP_CONNTRACK_FIN_WAIT) ct->proto.tcp.seen[dir].flags |= IP_CT_TCP_FLAG_CLOSE_INIT; timeout = ct->proto.tcp.retrans >= nf_ct_tcp_max_retrans && tcp_timeouts[new_state] > nf_ct_tcp_timeout_max_retrans -- cgit v1.2.3 From a4d6b8af1e92daa872f55d06415b76c35f44d8bd Mon Sep 17 00:00:00 2001 From: Kazunori MIYAZAWA Date: Thu, 14 Feb 2008 14:51:38 -0800 Subject: [AF_KEY]: Fix bug in spdadd This patch fix a BUG when adding spds which have same selector. Signed-off-by: Kazunori MIYAZAWA Signed-off-by: David S. Miller --- net/key/af_key.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/key/af_key.c b/net/key/af_key.c index b3ac85e808a..1c853927810 100644 --- a/net/key/af_key.c +++ b/net/key/af_key.c @@ -2291,6 +2291,7 @@ static int pfkey_spdadd(struct sock *sk, struct sk_buff *skb, struct sadb_msg *h return 0; out: + xp->dead = 1; xfrm_policy_destroy(xp); return err; } -- cgit v1.2.3 From 073a371987f9a9806a85329eed51dca1fc52a7a0 Mon Sep 17 00:00:00 2001 From: YOSHIFUJI Hideaki Date: Thu, 14 Feb 2008 14:52:38 -0800 Subject: [XFRM]: Avoid bogus BUG() when throwing new policy away. From: YOSHIFUJI Hideaki When we destory a new policy entry, we need to tell xfrm_policy_destroy() explicitly that the entry is not alive yet. Signed-off-by: David S. Miller --- net/xfrm/xfrm_user.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index 78338079b7f..f971ca5645f 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -1105,6 +1105,7 @@ static struct xfrm_policy *xfrm_policy_construct(struct xfrm_userpolicy_info *p, return xp; error: *errp = err; + xp->dead = 1; xfrm_policy_destroy(xp); return NULL; } -- cgit v1.2.3 From b5c15fc004ac83b7ad280acbe0fd4bbed7e2c8d4 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Thu, 14 Feb 2008 23:49:37 -0800 Subject: [IPV6]: Fix reversed local_df test in ip6_fragment I managed to reverse the local_df test when forward-porting this patch so it actually makes things worse by never fragmenting at all. Thanks to David Stevens for testing and reporting this bug. Bill Fink pointed out that the local_df setting is also the wrong way around. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- net/ipv6/ip6_output.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 4e9a2fe2f12..8b67ca07467 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -621,7 +621,7 @@ static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) * or if the skb it not generated by a local socket. (This last * check should be redundant, but it's free.) */ - if (skb->local_df) { + if (!skb->local_df) { skb->dev = skb->dst->dev; icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev); IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_FRAGFAILS); @@ -1421,7 +1421,7 @@ int ip6_push_pending_frames(struct sock *sk) } /* Allow local fragmentation. */ - if (np->pmtudisc >= IPV6_PMTUDISC_DO) + if (np->pmtudisc < IPV6_PMTUDISC_DO) skb->local_df = 1; ipv6_addr_copy(final_dst, &fl->fl6_dst); -- cgit v1.2.3 From 997b37da1515c1620692521786a74af271664eb7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9mi=20Denis-Courmont?= Date: Fri, 15 Feb 2008 02:35:45 -0800 Subject: [NET]: Make sure sockets implement splice_read MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fixes a segmentation fault when trying to splice from a non-TCP socket. Signed-off-by: RĂ©mi Denis-Courmont Signed-off-by: David S. Miller --- net/socket.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'net') diff --git a/net/socket.c b/net/socket.c index 7651de00850..b6d35cd72a5 100644 --- a/net/socket.c +++ b/net/socket.c @@ -701,6 +701,9 @@ static ssize_t sock_splice_read(struct file *file, loff_t *ppos, { struct socket *sock = file->private_data; + if (unlikely(!sock->ops->splice_read)) + return -EINVAL; + return sock->ops->splice_read(sock, ppos, pipe, len, flags); } -- cgit v1.2.3