diff options
Diffstat (limited to 'net')
79 files changed, 1086 insertions, 619 deletions
diff --git a/net/atm/proc.c b/net/atm/proc.c index 3f95b0886a6..91fe5f53ff1 100644 --- a/net/atm/proc.c +++ b/net/atm/proc.c @@ -507,7 +507,7 @@ err_out: goto out; } -void __exit atm_proc_exit(void) +void atm_proc_exit(void) { atm_proc_dirs_remove(); } diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c index 6ccd32b3080..864fbbc7b24 100644 --- a/net/bridge/br_forward.c +++ b/net/bridge/br_forward.c @@ -40,11 +40,15 @@ int br_dev_queue_push_xmit(struct sk_buff *skb) else { #ifdef CONFIG_BRIDGE_NETFILTER /* ip_refrag calls ip_fragment, doesn't copy the MAC header. */ - nf_bridge_maybe_copy_header(skb); + if (nf_bridge_maybe_copy_header(skb)) + kfree_skb(skb); + else #endif - skb_push(skb, ETH_HLEN); + { + skb_push(skb, ETH_HLEN); - dev_queue_xmit(skb); + dev_queue_xmit(skb); + } } return 0; diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c index f55ef682ef8..b1211d5342f 100644 --- a/net/bridge/br_if.c +++ b/net/bridge/br_if.c @@ -386,12 +386,17 @@ void br_features_recompute(struct net_bridge *br) checksum = 0; if (feature & NETIF_F_GSO) - feature |= NETIF_F_TSO; + feature |= NETIF_F_GSO_SOFTWARE; feature |= NETIF_F_GSO; features &= feature; } + if (!(checksum & NETIF_F_ALL_CSUM)) + features &= ~NETIF_F_SG; + if (!(features & NETIF_F_SG)) + features &= ~NETIF_F_GSO_MASK; + br->dev->features = features | checksum | NETIF_F_LLTX | NETIF_F_GSO_ROBUST; } diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c index 06abb6634f5..53086fb7508 100644 --- a/net/bridge/br_netlink.c +++ b/net/bridge/br_netlink.c @@ -85,7 +85,7 @@ void br_ifinfo_notify(int event, struct net_bridge_port *port) goto err_out; err = br_fill_ifinfo(skb, port, current->pid, 0, event, 0); - if (err) + if (err < 0) goto err_kfree; NETLINK_CB(skb).dst_group = RTNLGRP_LINK; diff --git a/net/bridge/netfilter/ebt_ulog.c b/net/bridge/netfilter/ebt_ulog.c index 02693a230dc..9f950db3b76 100644 --- a/net/bridge/netfilter/ebt_ulog.c +++ b/net/bridge/netfilter/ebt_ulog.c @@ -74,6 +74,9 @@ static void ulog_send(unsigned int nlgroup) if (timer_pending(&ub->timer)) del_timer(&ub->timer); + if (!ub->skb) + return; + /* last nlmsg needs NLMSG_DONE */ if (ub->qlen > 1) ub->lastnlh->nlmsg_type = NLMSG_DONE; diff --git a/net/core/Makefile b/net/core/Makefile index e9bd2467d5a..2645ba428d4 100644 --- a/net/core/Makefile +++ b/net/core/Makefile @@ -7,7 +7,7 @@ obj-y := sock.o request_sock.o skbuff.o iovec.o datagram.o stream.o scm.o \ obj-$(CONFIG_SYSCTL) += sysctl_net_core.o -obj-y += dev.o ethtool.o dev_mcast.o dst.o \ +obj-y += dev.o ethtool.o dev_mcast.o dst.o netevent.o \ neighbour.o rtnetlink.o utils.o link_watch.o filter.o obj-$(CONFIG_XFRM) += flow.o diff --git a/net/core/dev.c b/net/core/dev.c index 4d2b5167d7f..d4a1ec3bded 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -116,6 +116,7 @@ #include <linux/audit.h> #include <linux/dmaengine.h> #include <linux/err.h> +#include <linux/ctype.h> /* * The list of packet types we will receive (as opposed to discard) @@ -632,14 +633,22 @@ struct net_device * dev_get_by_flags(unsigned short if_flags, unsigned short mas * @name: name string * * Network device names need to be valid file names to - * to allow sysfs to work + * to allow sysfs to work. We also disallow any kind of + * whitespace. */ int dev_valid_name(const char *name) { - return !(*name == '\0' - || !strcmp(name, ".") - || !strcmp(name, "..") - || strchr(name, '/')); + if (*name == '\0') + return 0; + if (!strcmp(name, ".") || !strcmp(name, "..")) + return 0; + + while (*name) { + if (*name == '/' || isspace(*name)) + return 0; + name++; + } + return 1; } /** @@ -1166,11 +1175,6 @@ int skb_checksum_help(struct sk_buff *skb, int inward) goto out_set_summed; if (unlikely(skb_shinfo(skb)->gso_size)) { - static int warned; - - WARN_ON(!warned); - warned = 1; - /* Let GSO fix up the checksum. */ goto out_set_summed; } @@ -1220,11 +1224,6 @@ struct sk_buff *skb_gso_segment(struct sk_buff *skb, int features) __skb_pull(skb, skb->mac_len); if (unlikely(skb->ip_summed != CHECKSUM_HW)) { - static int warned; - - WARN_ON(!warned); - warned = 1; - if (skb_header_cloned(skb) && (err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC))) return ERR_PTR(err); @@ -1629,26 +1628,10 @@ static inline struct net_device *skb_bond(struct sk_buff *skb) struct net_device *dev = skb->dev; if (dev->master) { - /* - * On bonding slaves other than the currently active - * slave, suppress duplicates except for 802.3ad - * ETH_P_SLOW and alb non-mcast/bcast. - */ - if (dev->priv_flags & IFF_SLAVE_INACTIVE) { - if (dev->master->priv_flags & IFF_MASTER_ALB) { - if (skb->pkt_type != PACKET_BROADCAST && - skb->pkt_type != PACKET_MULTICAST) - goto keep; - } - - if (dev->master->priv_flags & IFF_MASTER_8023AD && - skb->protocol == __constant_htons(ETH_P_SLOW)) - goto keep; - + if (skb_bond_should_drop(skb)) { kfree_skb(skb); return NULL; } -keep: skb->dev = dev->master; } @@ -3429,12 +3412,9 @@ static void net_dma_rebalance(void) unsigned int cpu, i, n; struct dma_chan *chan; - lock_cpu_hotplug(); - if (net_dma_count == 0) { for_each_online_cpu(cpu) - rcu_assign_pointer(per_cpu(softnet_data.net_dma, cpu), NULL); - unlock_cpu_hotplug(); + rcu_assign_pointer(per_cpu(softnet_data, cpu).net_dma, NULL); return; } @@ -3447,15 +3427,13 @@ static void net_dma_rebalance(void) + (i < (num_online_cpus() % net_dma_count) ? 1 : 0)); while(n) { - per_cpu(softnet_data.net_dma, cpu) = chan; + per_cpu(softnet_data, cpu).net_dma = chan; cpu = next_cpu(cpu, cpu_online_map); n--; } i++; } rcu_read_unlock(); - - unlock_cpu_hotplug(); } /** diff --git a/net/core/dst.c b/net/core/dst.c index 470c05bc4cb..1a5e49da0e7 100644 --- a/net/core/dst.c +++ b/net/core/dst.c @@ -95,12 +95,11 @@ static void dst_run_gc(unsigned long dummy) dst_gc_timer_inc = DST_GC_INC; dst_gc_timer_expires = DST_GC_MIN; } - dst_gc_timer.expires = jiffies + dst_gc_timer_expires; #if RT_CACHE_DEBUG >= 2 printk("dst_total: %d/%d %ld\n", atomic_read(&dst_total), delayed, dst_gc_timer_expires); #endif - add_timer(&dst_gc_timer); + mod_timer(&dst_gc_timer, jiffies + dst_gc_timer_expires); out: spin_unlock(&dst_lock); diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 7ad681f5e71..5130d2efdbb 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -29,6 +29,7 @@ #include <net/neighbour.h> #include <net/dst.h> #include <net/sock.h> +#include <net/netevent.h> #include <linux/rtnetlink.h> #include <linux/random.h> #include <linux/string.h> @@ -754,6 +755,7 @@ static void neigh_timer_handler(unsigned long arg) neigh->nud_state = NUD_STALE; neigh->updated = jiffies; neigh_suspect(neigh); + notify = 1; } } else if (state & NUD_DELAY) { if (time_before_eq(now, @@ -762,6 +764,7 @@ static void neigh_timer_handler(unsigned long arg) neigh->nud_state = NUD_REACHABLE; neigh->updated = jiffies; neigh_connect(neigh); + notify = 1; next = neigh->confirmed + neigh->parms->reachable_time; } else { NEIGH_PRINTK2("neigh %p is probed.\n", neigh); @@ -819,6 +822,8 @@ static void neigh_timer_handler(unsigned long arg) out: write_unlock(&neigh->lock); } + if (notify) + call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, neigh); #ifdef CONFIG_ARPD if (notify && neigh->parms->app_probes) @@ -926,9 +931,7 @@ int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new, { u8 old; int err; -#ifdef CONFIG_ARPD int notify = 0; -#endif struct net_device *dev; int update_isrouter = 0; @@ -948,9 +951,7 @@ int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new, neigh_suspect(neigh); neigh->nud_state = new; err = 0; -#ifdef CONFIG_ARPD notify = old & NUD_VALID; -#endif goto out; } @@ -1022,9 +1023,7 @@ int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new, if (!(new & NUD_CONNECTED)) neigh->confirmed = jiffies - (neigh->parms->base_reachable_time << 1); -#ifdef CONFIG_ARPD notify = 1; -#endif } if (new == old) goto out; @@ -1056,6 +1055,9 @@ out: (neigh->flags & ~NTF_ROUTER); } write_unlock_bh(&neigh->lock); + + if (notify) + call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, neigh); #ifdef CONFIG_ARPD if (notify && neigh->parms->app_probes) neigh_app_notify(neigh); diff --git a/net/core/netevent.c b/net/core/netevent.c new file mode 100644 index 00000000000..35d02c38554 --- /dev/null +++ b/net/core/netevent.c @@ -0,0 +1,69 @@ +/* + * Network event notifiers + * + * Authors: + * Tom Tucker <tom@opengridcomputing.com> + * Steve Wise <swise@opengridcomputing.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * Fixes: + */ + +#include <linux/rtnetlink.h> +#include <linux/notifier.h> + +static ATOMIC_NOTIFIER_HEAD(netevent_notif_chain); + +/** + * register_netevent_notifier - register a netevent notifier block + * @nb: notifier + * + * Register a notifier to be called when a netevent occurs. + * The notifier passed is linked into the kernel structures and must + * not be reused until it has been unregistered. A negative errno code + * is returned on a failure. + */ +int register_netevent_notifier(struct notifier_block *nb) +{ + int err; + + err = atomic_notifier_chain_register(&netevent_notif_chain, nb); + return err; +} + +/** + * netevent_unregister_notifier - unregister a netevent notifier block + * @nb: notifier + * + * Unregister a notifier previously registered by + * register_neigh_notifier(). The notifier is unlinked into the + * kernel structures and may then be reused. A negative errno code + * is returned on a failure. + */ + +int unregister_netevent_notifier(struct notifier_block *nb) +{ + return atomic_notifier_chain_unregister(&netevent_notif_chain, nb); +} + +/** + * call_netevent_notifiers - call all netevent notifier blocks + * @val: value passed unmodified to notifier function + * @v: pointer passed unmodified to notifier function + * + * Call all neighbour notifier blocks. Parameters and return value + * are as for notifier_call_chain(). + */ + +int call_netevent_notifiers(unsigned long val, void *v) +{ + return atomic_notifier_call_chain(&netevent_notif_chain, val, v); +} + +EXPORT_SYMBOL_GPL(register_netevent_notifier); +EXPORT_SYMBOL_GPL(unregister_netevent_notifier); +EXPORT_SYMBOL_GPL(call_netevent_notifiers); diff --git a/net/core/pktgen.c b/net/core/pktgen.c index 67ed14ddabd..6a7320b39ed 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -2149,6 +2149,8 @@ static struct sk_buff *fill_packet_ipv4(struct net_device *odev, skb->mac.raw = ((u8 *) iph) - 14 - pkt_dev->nr_labels*sizeof(u32); skb->dev = odev; skb->pkt_type = PACKET_HOST; + skb->nh.iph = iph; + skb->h.uh = udph; if (pkt_dev->nfrags <= 0) pgh = (struct pktgen_hdr *)skb_put(skb, datalen); @@ -2460,6 +2462,8 @@ static struct sk_buff *fill_packet_ipv6(struct net_device *odev, skb->protocol = protocol; skb->dev = odev; skb->pkt_type = PACKET_HOST; + skb->nh.ipv6h = iph; + skb->h.uh = udph; if (pkt_dev->nfrags <= 0) pgh = (struct pktgen_hdr *)skb_put(skb, datalen); diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 20e5bb73f14..30cc1ba6ed5 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -394,6 +394,9 @@ static int do_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) } if (ida[IFLA_ADDRESS - 1]) { + struct sockaddr *sa; + int len; + if (!dev->set_mac_address) { err = -EOPNOTSUPP; goto out; @@ -405,7 +408,17 @@ static int do_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) if (ida[IFLA_ADDRESS - 1]->rta_len != RTA_LENGTH(dev->addr_len)) goto out; - err = dev->set_mac_address(dev, RTA_DATA(ida[IFLA_ADDRESS - 1])); + len = sizeof(sa_family_t) + dev->addr_len; + sa = kmalloc(len, GFP_KERNEL); + if (!sa) { + err = -ENOMEM; + goto out; + } + sa->sa_family = dev->type; + memcpy(sa->sa_data, RTA_DATA(ida[IFLA_ADDRESS - 1]), + dev->addr_len); + err = dev->set_mac_address(dev, sa); + kfree(sa); if (err) goto out; send_addr_notify = 1; diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 476aa397850..c54f3664bce 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -71,13 +71,6 @@ static kmem_cache_t *skbuff_head_cache __read_mostly; static kmem_cache_t *skbuff_fclone_cache __read_mostly; /* - * lockdep: lock class key used by skb_queue_head_init(): - */ -struct lock_class_key skb_queue_lock_key; - -EXPORT_SYMBOL(skb_queue_lock_key); - -/* * Keep out-of-line to prevent kernel bloat. * __builtin_return_address is not used because it is not always * reliable. @@ -256,6 +249,31 @@ nodata: goto out; } +/** + * __netdev_alloc_skb - allocate an skbuff for rx on a specific device + * @dev: network device to receive on + * @length: length to allocate + * @gfp_mask: get_free_pages mask, passed to alloc_skb + * + * Allocate a new &sk_buff and assign it a usage count of one. The + * buffer has unspecified headroom built in. Users should allocate + * the headroom they think they need without accounting for the + * built in space. The built in space is used for optimisations. + * + * %NULL is returned if there is no free memory. + */ +struct sk_buff *__netdev_alloc_skb(struct net_device *dev, + unsigned int length, gfp_t gfp_mask) +{ + struct sk_buff *skb; + + skb = alloc_skb(length + NET_SKB_PAD, gfp_mask); + if (likely(skb)) { + skb_reserve(skb, NET_SKB_PAD); + skb->dev = dev; + } + return skb; +} static void skb_drop_list(struct sk_buff **listp) { @@ -846,7 +864,11 @@ int ___pskb_trim(struct sk_buff *skb, unsigned int len) unlikely((err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC)))) return err; - for (i = 0; i < nfrags; i++) { + i = 0; + if (offset >= len) + goto drop_pages; + + for (; i < nfrags; i++) { int end = offset + skb_shinfo(skb)->frags[i].size; if (end < len) { @@ -854,9 +876,9 @@ int ___pskb_trim(struct sk_buff *skb, unsigned int len) continue; } - if (len > offset) - skb_shinfo(skb)->frags[i++].size = len - offset; + skb_shinfo(skb)->frags[i++].size = len - offset; +drop_pages: skb_shinfo(skb)->nr_frags = i; for (; i < nfrags; i++) @@ -864,7 +886,7 @@ int ___pskb_trim(struct sk_buff *skb, unsigned int len) if (skb_shinfo(skb)->frag_list) skb_drop_fraglist(skb); - break; + goto done; } for (fragp = &skb_shinfo(skb)->frag_list; (frag = *fragp); @@ -879,6 +901,7 @@ int ___pskb_trim(struct sk_buff *skb, unsigned int len) return -ENOMEM; nfrag->next = frag->next; + kfree_skb(frag); frag = nfrag; *fragp = frag; } @@ -897,6 +920,7 @@ int ___pskb_trim(struct sk_buff *skb, unsigned int len) break; } +done: if (len > skb_headlen(skb)) { skb->data_len -= skb->len - len; skb->len = len; @@ -2042,6 +2066,7 @@ EXPORT_SYMBOL(__kfree_skb); EXPORT_SYMBOL(kfree_skb); EXPORT_SYMBOL(__pskb_pull_tail); EXPORT_SYMBOL(__alloc_skb); +EXPORT_SYMBOL(__netdev_alloc_skb); EXPORT_SYMBOL(pskb_copy); EXPORT_SYMBOL(pskb_expand_head); EXPORT_SYMBOL(skb_checksum); diff --git a/net/core/utils.c b/net/core/utils.c index 4f96f389243..e31c90e0559 100644 --- a/net/core/utils.c +++ b/net/core/utils.c @@ -130,12 +130,13 @@ void __init net_random_init(void) static int net_random_reseed(void) { int i; - unsigned long seed[NR_CPUS]; + unsigned long seed; - get_random_bytes(seed, sizeof(seed)); for_each_possible_cpu(i) { struct nrnd_state *state = &per_cpu(net_rand_state,i); - __net_srandom(state, seed[i]); + + get_random_bytes(&seed, sizeof(seed)); + __net_srandom(state, seed); } return 0; } diff --git a/net/core/wireless.c b/net/core/wireless.c index d2bc72d318f..de0bde4b51d 100644 --- a/net/core/wireless.c +++ b/net/core/wireless.c @@ -82,6 +82,7 @@ #include <linux/init.h> /* for __init */ #include <linux/if_arp.h> /* ARPHRD_ETHER */ #include <linux/etherdevice.h> /* compare_ether_addr */ +#include <linux/interrupt.h> #include <linux/wireless.h> /* Pretty obvious */ #include <net/iw_handler.h> /* New driver API */ @@ -1842,6 +1843,18 @@ int wireless_rtnetlink_set(struct net_device * dev, */ #ifdef WE_EVENT_RTNETLINK +static struct sk_buff_head wireless_nlevent_queue; + +static void wireless_nlevent_process(unsigned long data) +{ + struct sk_buff *skb; + + while ((skb = skb_dequeue(&wireless_nlevent_queue))) + netlink_broadcast(rtnl, skb, 0, RTNLGRP_LINK, GFP_ATOMIC); +} + +static DECLARE_TASKLET(wireless_nlevent_tasklet, wireless_nlevent_process, 0); + /* ---------------------------------------------------------------- */ /* * Fill a rtnetlink message with our event data. @@ -1904,8 +1917,17 @@ static inline void rtmsg_iwinfo(struct net_device * dev, return; } NETLINK_CB(skb).dst_group = RTNLGRP_LINK; - netlink_broadcast(rtnl, skb, 0, RTNLGRP_LINK, GFP_ATOMIC); + skb_queue_tail(&wireless_nlevent_queue, skb); + tasklet_schedule(&wireless_nlevent_tasklet); +} + +static int __init wireless_nlevent_init(void) +{ + skb_queue_head_init(&wireless_nlevent_queue); + return 0; } + +subsys_initcall(wireless_nlevent_init); #endif /* WE_EVENT_RTNETLINK */ /* ---------------------------------------------------------------- */ diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c index c39bff706cf..090bc39e819 100644 --- a/net/dccp/ccids/ccid3.c +++ b/net/dccp/ccids/ccid3.c @@ -2,7 +2,7 @@ * net/dccp/ccids/ccid3.c * * Copyright (c) 2005 The University of Waikato, Hamilton, New Zealand. - * Copyright (c) 2005-6 Ian McDonald <imcdnzl@gmail.com> + * Copyright (c) 2005-6 Ian McDonald <ian.mcdonald@jandi.co.nz> * * An implementation of the DCCP protocol * @@ -342,6 +342,8 @@ static int ccid3_hc_tx_send_packet(struct sock *sk, new_packet->dccphtx_ccval = DCCP_SKB_CB(skb)->dccpd_ccval = hctx->ccid3hctx_last_win_count; + timeval_add_usecs(&hctx->ccid3hctx_t_nom, + hctx->ccid3hctx_t_ipi); } out: return rc; @@ -413,7 +415,8 @@ static void ccid3_hc_tx_packet_sent(struct sock *sk, int more, int len) case TFRC_SSTATE_NO_FBACK: case TFRC_SSTATE_FBACK: if (len > 0) { - hctx->ccid3hctx_t_nom = now; + timeval_sub_usecs(&hctx->ccid3hctx_t_nom, + hctx->ccid3hctx_t_ipi); ccid3_calc_new_t_ipi(hctx); ccid3_calc_new_delta(hctx); timeval_add_usecs(&hctx->ccid3hctx_t_nom, @@ -757,8 +760,7 @@ static void ccid3_hc_rx_send_feedback(struct sock *sk) } hcrx->ccid3hcrx_tstamp_last_feedback = now; - hcrx->ccid3hcrx_last_counter = packet->dccphrx_ccval; - hcrx->ccid3hcrx_seqno_last_counter = packet->dccphrx_seqno; + hcrx->ccid3hcrx_ccval_last_counter = packet->dccphrx_ccval; hcrx->ccid3hcrx_bytes_recv = 0; /* Convert to multiples of 10us */ @@ -782,7 +784,7 @@ static int ccid3_hc_rx_insert_options(struct sock *sk, struct sk_buff *skb) if (!(sk->sk_state == DCCP_OPEN || sk->sk_state == DCCP_PARTOPEN)) return 0; - DCCP_SKB_CB(skb)->dccpd_ccval = hcrx->ccid3hcrx_last_counter; + DCCP_SKB_CB(skb)->dccpd_ccval = hcrx->ccid3hcrx_ccval_last_counter; if (dccp_packet_without_ack(skb)) return 0; @@ -854,6 +856,11 @@ static u32 ccid3_hc_rx_calc_first_li(struct sock *sk) interval = 1; } found: + if (!tail) { + LIMIT_NETDEBUG(KERN_WARNING "%s: tail is null\n", + __FUNCTION__); + return ~0; + } rtt = timeval_delta(&tstamp, &tail->dccphrx_tstamp) * 4 / interval; ccid3_pr_debug("%s, sk=%p, approximated RTT to %uus\n", dccp_role(sk), sk, rtt); @@ -864,9 +871,20 @@ found: delta = timeval_delta(&tstamp, &hcrx->ccid3hcrx_tstamp_last_feedback); x_recv = usecs_div(hcrx->ccid3hcrx_bytes_recv, delta); + if (x_recv == 0) + x_recv = hcrx->ccid3hcrx_x_recv; + tmp1 = (u64)x_recv * (u64)rtt; do_div(tmp1,10000000); tmp2 = (u32)tmp1; + + if (!tmp2) { + LIMIT_NETDEBUG(KERN_WARNING "tmp2 = 0 " + "%s: x_recv = %u, rtt =%u\n", + __FUNCTION__, x_recv, rtt); + return ~0; + } + fval = (hcrx->ccid3hcrx_s * 100000) / tmp2; /* do not alter order above or you will get overflow on 32 bit */ p = tfrc_calc_x_reverse_lookup(fval); @@ -882,31 +900,101 @@ found: static void ccid3_hc_rx_update_li(struct sock *sk, u64 seq_loss, u8 win_loss) { struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk); + struct dccp_li_hist_entry *next, *head; + u64 seq_temp; - if (seq_loss != DCCP_MAX_SEQNO + 1 && - list_empty(&hcrx->ccid3hcrx_li_hist)) { - struct dccp_li_hist_entry *li_tail; + if (list_empty(&hcrx->ccid3hcrx_li_hist)) { + if (!dccp_li_hist_interval_new(ccid3_li_hist, + &hcrx->ccid3hcrx_li_hist, seq_loss, win_loss)) + return; - li_tail = dccp_li_hist_interval_new(ccid3_li_hist, - &hcrx->ccid3hcrx_li_hist, - seq_loss, win_loss); - if (li_tail == NULL) + next = (struct dccp_li_hist_entry *) + hcrx->ccid3hcrx_li_hist.next; + next->dccplih_interval = ccid3_hc_rx_calc_first_li(sk); + } else { + struct dccp_li_hist_entry *entry; + struct list_head *tail; + + head = (struct dccp_li_hist_entry *) + hcrx->ccid3hcrx_li_hist.next; + /* FIXME win count check removed as was wrong */ + /* should make this check with receive history */ + /* and compare there as per section 10.2 of RFC4342 */ + + /* new loss event detected */ + /* calculate last interval length */ + seq_temp = dccp_delta_seqno(head->dccplih_seqno, seq_loss); + entry = dccp_li_hist_entry_new(ccid3_li_hist, SLAB_ATOMIC); + + if (entry == NULL) { + printk(KERN_CRIT "%s: out of memory\n",__FUNCTION__); + dump_stack(); return; - li_tail->dccplih_interval = ccid3_hc_rx_calc_first_li(sk); - } else - LIMIT_NETDEBUG(KERN_WARNING "%s: FIXME: find end of " - "interval\n", __FUNCTION__); + } + + list_add(&entry->dccplih_node, &hcrx->ccid3hcrx_li_hist); + + tail = hcrx->ccid3hcrx_li_hist.prev; + list_del(tail); + kmem_cache_free(ccid3_li_hist->dccplih_slab, tail); + + /* Create the newest interval */ + entry->dccplih_seqno = seq_loss; + entry->dccplih_interval = seq_temp; + entry->dccplih_win_count = win_loss; + } } -static void ccid3_hc_rx_detect_loss(struct sock *sk) +static int ccid3_hc_rx_detect_loss(struct sock *sk, + struct dccp_rx_hist_entry *packet) { struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk); - u8 win_loss; - const u64 seq_loss = dccp_rx_hist_detect_loss(&hcrx->ccid3hcrx_hist, - &hcrx->ccid3hcrx_li_hist, - &win_loss); + struct dccp_rx_hist_entry *rx_hist = dccp_rx_hist_head(&hcrx->ccid3hcrx_hist); + u64 seqno = packet->dccphrx_seqno; + u64 tmp_seqno; + int loss = 0; + u8 ccval; + + + tmp_seqno = hcrx->ccid3hcrx_seqno_nonloss; + + if (!rx_hist || + follows48(packet->dccphrx_seqno, hcrx->ccid3hcrx_seqno_nonloss)) { + hcrx->ccid3hcrx_seqno_nonloss = seqno; + hcrx->ccid3hcrx_ccval_nonloss = packet->dccphrx_ccval; + goto detect_out; + } + - ccid3_hc_rx_update_li(sk, seq_loss, win_loss); + while (dccp_delta_seqno(hcrx->ccid3hcrx_seqno_nonloss, seqno) + > TFRC_RECV_NUM_LATE_LOSS) { + loss = 1; + ccid3_hc_rx_update_li(sk, hcrx->ccid3hcrx_seqno_nonloss, + hcrx->ccid3hcrx_ccval_nonloss); + tmp_seqno = hcrx->ccid3hcrx_seqno_nonloss; + dccp_inc_seqno(&tmp_seqno); + hcrx->ccid3hcrx_seqno_nonloss = tmp_seqno; + dccp_inc_seqno(&tmp_seqno); + while (dccp_rx_hist_find_entry(&hcrx->ccid3hcrx_hist, + tmp_seqno, &ccval)) { + hcrx->ccid3hcrx_seqno_nonloss = tmp_seqno; + hcrx->ccid3hcrx_ccval_nonloss = ccval; + dccp_inc_seqno(&tmp_seqno); + } + } + + /* FIXME - this code could be simplified with above while */ + /* but works at moment */ + if (follows48(packet->dccphrx_seqno, hcrx->ccid3hcrx_seqno_nonloss)) { + hcrx->ccid3hcrx_seqno_nonloss = seqno; + hcrx->ccid3hcrx_ccval_nonloss = packet->dccphrx_ccval; + } + +detect_out: + dccp_rx_hist_add_packet(ccid3_rx_hist, &hcrx->ccid3hcrx_hist, + &hcrx->ccid3hcrx_li_hist, packet, + hcrx->ccid3hcrx_seqno_nonloss); + return loss; } static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb) @@ -916,8 +1004,8 @@ static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb) struct dccp_rx_hist_entry *packet; struct timeval now; u8 win_count; - u32 p_prev, r_sample, t_elapsed; - int ins; + u32 p_prev, rtt_prev, r_sample, t_elapsed; + int loss; BUG_ON(hcrx == NULL || !(hcrx->ccid3hcrx_state == TFRC_RSTATE_NO_DATA || @@ -932,7 +1020,7 @@ static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb) case DCCP_PKT_DATAACK: if (opt_recv->dccpor_timestamp_echo == 0) break; - p_prev = hcrx->ccid3hcrx_rtt; + rtt_prev = hcrx->ccid3hcrx_rtt; dccp_timestamp(sk, &now); timeval_sub_usecs(&now, opt_recv->dccpor_timestamp_echo * 10); r_sample = timeval_usecs(&now); @@ -951,8 +1039,8 @@ static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb) hcrx->ccid3hcrx_rtt = (hcrx->ccid3hcrx_rtt * 9) / 10 + r_sample / 10; - if (p_prev != hcrx->ccid3hcrx_rtt) - ccid3_pr_debug("%s, New RTT=%luus, elapsed time=%u\n", + if (rtt_prev != hcrx->ccid3hcrx_rtt) + ccid3_pr_debug("%s, New RTT=%uus, elapsed time=%u\n", dccp_role(sk), hcrx->ccid3hcrx_rtt, opt_recv->dccpor_elapsed_time); break; @@ -973,8 +1061,7 @@ static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb) win_count = packet->dccphrx_ccval; - ins = dccp_rx_hist_add_packet(ccid3_rx_hist, &hcrx->ccid3hcrx_hist, - &hcrx->ccid3hcrx_li_hist, packet); + loss = ccid3_hc_rx_detect_loss(sk, packet); if (DCCP_SKB_CB(skb)->dccpd_type == DCCP_PKT_ACK) return; @@ -991,7 +1078,7 @@ static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb) case TFRC_RSTATE_DATA: hcrx->ccid3hcrx_bytes_recv += skb->len - dccp_hdr(skb)->dccph_doff * 4; - if (ins != 0) + if (loss) break; dccp_timestamp(sk, &now); @@ -1012,7 +1099,6 @@ static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb) ccid3_pr_debug("%s, sk=%p(%s), data loss! Reacting...\n", dccp_role(sk), sk, dccp_state_name(sk->sk_state)); - ccid3_hc_rx_detect_loss(sk); p_prev = hcrx->ccid3hcrx_p; /* Calculate loss event rate */ @@ -1022,6 +1108,9 @@ static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb) /* Scaling up by 1000000 as fixed decimal */ if (i_mean != 0) hcrx->ccid3hcrx_p = 1000000 / i_mean; + } else { + printk(KERN_CRIT "%s: empty loss hist\n",__FUNCTION__); + dump_stack(); } if (hcrx->ccid3hcrx_p > p_prev) { @@ -1230,7 +1319,7 @@ static __exit void ccid3_module_exit(void) } module_exit(ccid3_module_exit); -MODULE_AUTHOR("Ian McDonald <iam4@cs.waikato.ac.nz>, " +MODULE_AUTHOR("Ian McDonald <ian.mcdonald@jandi.co.nz>, " "Arnaldo Carvalho de Melo <acme@ghostprotocols.net>"); MODULE_DESCRIPTION("DCCP TFRC CCID3 CCID"); MODULE_LICENSE("GPL"); diff --git a/net/dccp/ccids/ccid3.h b/net/dccp/ccids/ccid3.h index 5ade4f668b2..0a2cb7536d2 100644 --- a/net/dccp/ccids/ccid3.h +++ b/net/dccp/ccids/ccid3.h @@ -1,13 +1,13 @@ /* * net/dccp/ccids/ccid3.h * - * Copyright (c) 2005 The University of Waikato, Hamilton, New Zealand. + * Copyright (c) 2005-6 The University of Waikato, Hamilton, New Zealand. * * An implementation of the DCCP protocol * * This code has been developed by the University of Waikato WAND * research group. For further information please see http://www.wand.net.nz/ - * or e-mail Ian McDonald - iam4@cs.waikato.ac.nz + * or e-mail Ian McDonald - ian.mcdonald@jandi.co.nz * * This code also uses code from Lulea University, rereleased as GPL by its * authors: @@ -120,9 +120,10 @@ struct ccid3_hc_rx_sock { #define ccid3hcrx_x_recv ccid3hcrx_tfrc.tfrcrx_x_recv #define ccid3hcrx_rtt ccid3hcrx_tfrc.tfrcrx_rtt #define ccid3hcrx_p ccid3hcrx_tfrc.tfrcrx_p - u64 ccid3hcrx_seqno_last_counter:48, + u64 ccid3hcrx_seqno_nonloss:48, + ccid3hcrx_ccval_nonloss:4, ccid3hcrx_state:8, - ccid3hcrx_last_counter:4; + ccid3hcrx_ccval_last_counter:4; u32 ccid3hcrx_bytes_recv; struct timeval ccid3hcrx_tstamp_last_feedback; struct timeval ccid3hcrx_tstamp_last_ack; diff --git a/net/dccp/ccids/lib/loss_interval.c b/net/dccp/ccids/lib/loss_interval.c index 5d7b7d86438..906c81ab9d4 100644 --- a/net/dccp/ccids/lib/loss_interval.c +++ b/net/dccp/ccids/lib/loss_interval.c @@ -2,7 +2,7 @@ * net/dccp/ccids/lib/loss_interval.c * * Copyright (c) 2005 The University of Waikato, Hamilton, New Zealand. - * Copyright (c) 2005 Ian McDonald <iam4@cs.waikato.ac.nz> + * Copyright (c) 2005-6 Ian McDonald <ian.mcdonald@jandi.co.nz> * Copyright (c) 2005 Arnaldo Carvalho de Melo <acme@conectiva.com.br> * * This program is free software; you can redistribute it and/or modify @@ -12,6 +12,7 @@ */ #include <linux/module.h> +#include <net/sock.h> #include "loss_interval.h" @@ -90,13 +91,13 @@ u32 dccp_li_hist_calc_i_mean(struct list_head *list) u32 w_tot = 0; list_for_each_entry_safe(li_entry, li_next, list, dccplih_node) { - if (i < DCCP_LI_HIST_IVAL_F_LENGTH) { + if (li_entry->dccplih_interval != ~0) { i_tot0 += li_entry->dccplih_interval * dccp_li_hist_w[i]; w_tot += dccp_li_hist_w[i]; + if (i != 0) + i_tot1 += li_entry->dccplih_interval * dccp_li_hist_w[i - 1]; } - if (i != 0) - i_tot1 += li_entry->dccplih_interval * dccp_li_hist_w[i - 1]; if (++i > DCCP_LI_HIST_IVAL_F_LENGTH) break; @@ -107,37 +108,36 @@ u32 dccp_li_hist_calc_i_mean(struct list_head *list) i_tot = max(i_tot0, i_tot1); - /* FIXME: Why do we do this? -Ian McDonald */ - if (i_tot * 4 < w_tot) - i_tot = w_tot * 4; + if (!w_tot) { + LIMIT_NETDEBUG(KERN_WARNING "%s: w_tot = 0\n", __FUNCTION__); + return 1; + } - return i_tot * 4 / w_tot; + return i_tot / w_tot; } EXPORT_SYMBOL_GPL(dccp_li_hist_calc_i_mean); -struct dccp_li_hist_entry *dccp_li_hist_interval_new(struct dccp_li_hist *hist, - struct list_head *list, - const u64 seq_loss, - const u8 win_loss) +int dccp_li_hist_interval_new(struct dccp_li_hist *hist, + struct list_head *list, const u64 seq_loss, const u8 win_loss) { - struct dccp_li_hist_entry *tail = NULL, *entry; + struct dccp_li_hist_entry *entry; int i; - for (i = 0; i <= DCCP_LI_HIST_IVAL_F_LENGTH; ++i) { + for (i = 0; i < DCCP_LI_HIST_IVAL_F_LENGTH; i++) { entry = dccp_li_hist_entry_new(hist, SLAB_ATOMIC); if (entry == NULL) { dccp_li_hist_purge(hist, list); - return NULL; + dump_stack(); + return 0; } - if (tail == NULL) - tail = entry; + entry->dccplih_interval = ~0; list_add(&entry->dccplih_node, list); } entry->dccplih_seqno = seq_loss; entry->dccplih_win_count = win_loss; - return tail; + return 1; } EXPORT_SYMBOL_GPL(dccp_li_hist_interval_new); diff --git a/net/dccp/ccids/lib/loss_interval.h b/net/dccp/ccids/lib/loss_interval.h index 43bf78269d1..0ae85f0340b 100644 --- a/net/dccp/ccids/lib/loss_interval.h +++ b/net/dccp/ccids/lib/loss_interval.h @@ -4,7 +4,7 @@ * net/dccp/ccids/lib/loss_interval.h * * Copyright (c) 2005 The University of Waikato, Hamilton, New Zealand. - * Copyright (c) 2005 Ian McDonald <iam4@cs.waikato.ac.nz> + * Copyright (c) 2005 Ian McDonald <ian.mcdonald@jandi.co.nz> * Copyright (c) 2005 Arnaldo Carvalho de Melo <acme@conectiva.com.br> * * This program is free software; you can redistribute it and/or modify it @@ -52,9 +52,6 @@ extern void dccp_li_hist_purge(struct dccp_li_hist *hist, extern u32 dccp_li_hist_calc_i_mean(struct list_head *list); -extern struct dccp_li_hist_entry * - dccp_li_hist_interval_new(struct dccp_li_hist *hist, - struct list_head *list, - const u64 seq_loss, - const u8 win_loss); +extern int dccp_li_hist_interval_new(struct dccp_li_hist *hist, + struct list_head *list, const u64 seq_loss, const u8 win_loss); #endif /* _DCCP_LI_HIST_ */ diff --git a/net/dccp/ccids/lib/packet_history.c b/net/dccp/ccids/lib/packet_history.c index ad98d6a322e..b876c9c81c6 100644 --- a/net/dccp/ccids/lib/packet_history.c +++ b/net/dccp/ccids/lib/packet_history.c @@ -1,13 +1,13 @@ /* - * net/dccp/packet_history.h + * net/dccp/packet_history.c * - * Copyright (c) 2005 The University of Waikato, Hamilton, New Zealand. + * Copyright (c) 2005-6 The University of Waikato, Hamilton, New Zealand. * * An implementation of the DCCP protocol * * This code has been developed by the University of Waikato WAND * research group. For further information please see http://www.wand.net.nz/ - * or e-mail Ian McDonald - iam4@cs.waikato.ac.nz + * or e-mail Ian McDonald - ian.mcdonald@jandi.co.nz * * This code also uses code from Lulea University, rereleased as GPL by its * authors: @@ -112,64 +112,27 @@ struct dccp_rx_hist_entry * EXPORT_SYMBOL_GPL(dccp_rx_hist_find_data_packet); -int dccp_rx_hist_add_packet(struct dccp_rx_hist *hist, +void dccp_rx_hist_add_packet(struct dccp_rx_hist *hist, struct list_head *rx_list, struct list_head *li_list, - struct dccp_rx_hist_entry *packet) + struct dccp_rx_hist_entry *packet, + u64 nonloss_seqno) { - struct dccp_rx_hist_entry *entry, *next, *iter; + struct dccp_rx_hist_entry *entry, *next; u8 num_later = 0; - iter = dccp_rx_hist_head(rx_list); - if (iter == NULL) - dccp_rx_hist_add_entry(rx_list, packet); - else { - const u64 seqno = packet->dccphrx_seqno; - - if (after48(seqno, iter->dccphrx_seqno)) - dccp_rx_hist_add_entry(rx_list, packet); - else { - if (dccp_rx_hist_entry_data_packet(iter)) - num_later = 1; - - list_for_each_entry_continue(iter, rx_list, - dccphrx_node) { - if (after48(seqno, iter->dccphrx_seqno)) { - dccp_rx_hist_add_entry(&iter->dccphrx_node, - packet); - goto trim_history; - } - - if (dccp_rx_hist_entry_data_packet(iter)) - num_later++; + list_add(&packet->dccphrx_node, rx_list); - if (num_later == TFRC_RECV_NUM_LATE_LOSS) { - dccp_rx_hist_entry_delete(hist, packet); - return 1; - } - } - - if (num_later < TFRC_RECV_NUM_LATE_LOSS) - dccp_rx_hist_add_entry(rx_list, packet); - /* - * FIXME: else what? should we destroy the packet - * like above? - */ - } - } - -trim_history: - /* - * Trim history (remove all packets after the NUM_LATE_LOSS + 1 - * data packets) - */ num_later = TFRC_RECV_NUM_LATE_LOSS + 1; if (!list_empty(li_list)) { list_for_each_entry_safe(entry, next, rx_list, dccphrx_node) { if (num_later == 0) { - list_del_init(&entry->dccphrx_node); - dccp_rx_hist_entry_delete(hist, entry); + if (after48(nonloss_seqno, + entry->dccphrx_seqno)) { + list_del_init(&entry->dccphrx_node); + dccp_rx_hist_entry_delete(hist, entry); + } } else if (dccp_rx_hist_entry_data_packet(entry)) --num_later; } @@ -217,94 +180,10 @@ trim_history: --num_later; } } - - return 0; } EXPORT_SYMBOL_GPL(dccp_rx_hist_add_packet); -u64 dccp_rx_hist_detect_loss(struct list_head *rx_list, - struct list_head *li_list, u8 *win_loss) -{ - struct dccp_rx_hist_entry *entry, *next, *packet; - struct dccp_rx_hist_entry *a_loss = NULL; - struct dccp_rx_hist_entry *b_loss = NULL; - u64 seq_loss = DCCP_MAX_SEQNO + 1; - u8 num_later = TFRC_RECV_NUM_LATE_LOSS; - - list_for_each_entry_safe(entry, next, rx_list, dccphrx_node) { - if (num_later == 0) { - b_loss = entry; - break; - } else if (dccp_rx_hist_entry_data_packet(entry)) - --num_later; - } - - if (b_loss == NULL) - goto out; - - num_later = 1; - list_for_each_entry_safe_continue(entry, next, rx_list, dccphrx_node) { - if (num_later == 0) { - a_loss = entry; - break; - } else if (dccp_rx_hist_entry_data_packet(entry)) - --num_later; - } - - if (a_loss == NULL) { - if (list_empty(li_list)) { - /* no loss event have occured yet */ - LIMIT_NETDEBUG("%s: TODO: find a lost data packet by " - "comparing to initial seqno\n", - __FUNCTION__); - goto out; - } else { - LIMIT_NETDEBUG("%s: Less than 4 data pkts in history!", - __FUNCTION__); - goto out; - } - } - - /* Locate a lost data packet */ - entry = packet = b_loss; - list_for_each_entry_safe_continue(entry, next, rx_list, dccphrx_node) { - u64 delta = dccp_delta_seqno(entry->dccphrx_seqno, - packet->dccphrx_seqno); - - if (delta != 0) { - if (dccp_rx_hist_entry_data_packet(packet)) - --delta; - /* - * FIXME: check this, probably this % usage is because - * in earlier drafts the ndp count was just 8 bits - * long, but now it cam be up to 24 bits long. - */ -#if 0 - if (delta % DCCP_NDP_LIMIT != - (packet->dccphrx_ndp - - entry->dccphrx_ndp) % DCCP_NDP_LIMIT) -#endif - if (delta != packet->dccphrx_ndp - entry->dccphrx_ndp) { - seq_loss = entry->dccphrx_seqno; - dccp_inc_seqno(&seq_loss); - } - } - packet = entry; - if (packet == a_loss) - break; - } -out: - if (seq_loss != DCCP_MAX_SEQNO + 1) - *win_loss = a_loss->dccphrx_ccval; - else - *win_loss = 0; /* Paranoia */ - - return seq_loss; -} - -EXPORT_SYMBOL_GPL(dccp_rx_hist_detect_loss); - struct dccp_tx_hist *dccp_tx_hist_new(const char *name) { struct dccp_tx_hist *hist = kmalloc(sizeof(*hist), GFP_ATOMIC); @@ -365,6 +244,25 @@ struct dccp_tx_hist_entry * EXPORT_SYMBOL_GPL(dccp_tx_hist_find_entry); +int dccp_rx_hist_find_entry(const struct list_head *list, const u64 seq, + u8 *ccval) +{ + struct dccp_rx_hist_entry *packet = NULL, *entry; + + list_for_each_entry(entry, list, dccphrx_node) + if (entry->dccphrx_seqno == seq) { + packet = entry; + break; + } + + if (packet) + *ccval = packet->dccphrx_ccval; + + return packet != NULL; +} + +EXPORT_SYMBOL_GPL(dccp_rx_hist_find_entry); + void dccp_tx_hist_purge_older(struct dccp_tx_hist *hist, struct list_head *list, struct dccp_tx_hist_entry *packet) @@ -391,7 +289,7 @@ void dccp_tx_hist_purge(struct dccp_tx_hist *hist, struct list_head *list) EXPORT_SYMBOL_GPL(dccp_tx_hist_purge); -MODULE_AUTHOR("Ian McDonald <iam4@cs.waikato.ac.nz>, " +MODULE_AUTHOR("Ian McDonald <ian.mcdonald@jandi.co.nz>, " "Arnaldo Carvalho de Melo <acme@ghostprotocols.net>"); MODULE_DESCRIPTION("DCCP TFRC library"); MODULE_LICENSE("GPL"); diff --git a/net/dccp/ccids/lib/packet_history.h b/net/dccp/ccids/lib/packet_history.h index 673c209e4e8..067cf1c85a3 100644 --- a/net/dccp/ccids/lib/packet_history.h +++ b/net/dccp/ccids/lib/packet_history.h @@ -1,13 +1,13 @@ /* * net/dccp/packet_history.h * - * Copyright (c) 2005 The University of Waikato, Hamilton, New Zealand. + * Copyright (c) 2005-6 The University of Waikato, Hamilton, New Zealand. * * An implementation of the DCCP protocol * * This code has been developed by the University of Waikato WAND * research group. For further information please see http://www.wand.net.nz/ - * or e-mail Ian McDonald - iam4@cs.waikato.ac.nz + * or e-mail Ian McDonald - ian.mcdonald@jandi.co.nz * * This code also uses code from Lulea University, rereleased as GPL by its * authors: @@ -106,6 +106,8 @@ static inline void dccp_tx_hist_entry_delete(struct dccp_tx_hist *hist, extern struct dccp_tx_hist_entry * dccp_tx_hist_find_entry(const struct list_head *list, const u64 seq); +extern int dccp_rx_hist_find_entry(const struct list_head *list, const u64 seq, + u8 *ccval); static inline void dccp_tx_hist_add_entry(struct list_head *list, struct dccp_tx_hist_entry *entry) @@ -164,12 +166,6 @@ static inline void dccp_rx_hist_entry_delete(struct dccp_rx_hist *hist, extern void dccp_rx_hist_purge(struct dccp_rx_hist *hist, struct list_head *list); -static inline void dccp_rx_hist_add_entry(struct list_head *list, - struct dccp_rx_hist_entry *entry) -{ - list_add(&entry->dccphrx_node, list); -} - static inline struct dccp_rx_hist_entry * dccp_rx_hist_head(struct list_head *list) { @@ -188,10 +184,11 @@ static inline int entry->dccphrx_type == DCCP_PKT_DATAACK; } -extern int dccp_rx_hist_add_packet(struct dccp_rx_hist *hist, +extern void dccp_rx_hist_add_packet(struct dccp_rx_hist *hist, struct list_head *rx_list, struct list_head *li_list, - struct dccp_rx_hist_entry *packet); + struct dccp_rx_hist_entry *packet, + u64 nonloss_seqno); extern u64 dccp_rx_hist_detect_loss(struct list_head *rx_list, struct list_head *li_list, u8 *win_loss); diff --git a/net/dccp/ccids/lib/tfrc.h b/net/dccp/ccids/lib/tfrc.h index 130c4c40cfe..45f30f59ea2 100644 --- a/net/dccp/ccids/lib/tfrc.h +++ b/net/dccp/ccids/lib/tfrc.h @@ -4,7 +4,7 @@ * net/dccp/ccids/lib/tfrc.h * * Copyright (c) 2005 The University of Waikato, Hamilton, New Zealand. - * Copyright (c) 2005 Ian McDonald <iam4@cs.waikato.ac.nz> + * Copyright (c) 2005 Ian McDonald <ian.mcdonald@jandi.co.nz> * Copyright (c) 2005 Arnaldo Carvalho de Melo <acme@conectiva.com.br> * Copyright (c) 2003 Nils-Erik Mattsson, Joacim Haggmark, Magnus Erixzon * diff --git a/net/dccp/ccids/lib/tfrc_equation.c b/net/dccp/ccids/lib/tfrc_equation.c index 4fd2ebebf5a..44076e0c659 100644 --- a/net/dccp/ccids/lib/tfrc_equation.c +++ b/net/dccp/ccids/lib/tfrc_equation.c @@ -2,7 +2,7 @@ * net/dccp/ccids/lib/tfrc_equation.c * * Copyright (c) 2005 The University of Waikato, Hamilton, New Zealand. - * Copyright (c) 2005 Ian McDonald <iam4@cs.waikato.ac.nz> + * Copyright (c) 2005 Ian McDonald <ian.mcdonald@jandi.co.nz> * Copyright (c) 2005 Arnaldo Carvalho de Melo <acme@conectiva.com.br> * Copyright (c) 2003 Nils-Erik Mattsson, Joacim Haggmark, Magnus Erixzon * diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h index d00a2f4ee5d..a5c5475724c 100644 --- a/net/dccp/dccp.h +++ b/net/dccp/dccp.h @@ -5,7 +5,7 @@ * * An implementation of the DCCP protocol * Copyright (c) 2005 Arnaldo Carvalho de Melo <acme@conectiva.com.br> - * Copyright (c) 2005 Ian McDonald <iam4@cs.waikato.ac.nz> + * Copyright (c) 2005-6 Ian McDonald <ian.mcdonald@jandi.co.nz> * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License version 2 as @@ -81,6 +81,14 @@ static inline u64 max48(const u64 seq1, const u64 seq2) return after48(seq1, seq2) ? seq1 : seq2; } +/* is seq1 next seqno after seq2 */ +static inline int follows48(const u64 seq1, const u64 seq2) +{ + int diff = (seq1 & 0xFFFF) - (seq2 & 0xFFFF); + + return diff==1; +} + enum { DCCP_MIB_NUM = 0, DCCP_MIB_ACTIVEOPENS, /* ActiveOpens */ diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index 9f3d4d7cd0b..610c722ac27 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -230,7 +230,7 @@ static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr, ipv6_addr_copy(&np->saddr, saddr); inet->rcv_saddr = LOOPBACK4_IPV6; - ip6_dst_store(sk, dst, NULL); + __ip6_dst_store(sk, dst, NULL); icsk->icsk_ext_hdr_len = 0; if (np->opt != NULL) @@ -863,7 +863,7 @@ static struct sock *dccp_v6_request_recv_sock(struct sock *sk, * comment in that function for the gory details. -acme */ - ip6_dst_store(newsk, dst, NULL); + __ip6_dst_store(newsk, dst, NULL); newsk->sk_route_caps = dst->dev->features & ~(NETIF_F_IP_CSUM | NETIF_F_TSO); newdp6 = (struct dccp6_sock *)newsk; diff --git a/net/dccp/options.c b/net/dccp/options.c index daf72bb671f..07a34696ac9 100644 --- a/net/dccp/options.c +++ b/net/dccp/options.c @@ -4,7 +4,7 @@ * An implementation of the DCCP protocol * Copyright (c) 2005 Aristeu Sergio Rozanski Filho <aris@cathedrallabs.org> * Copyright (c) 2005 Arnaldo Carvalho de Melo <acme@ghostprotocols.net> - * Copyright (c) 2005 Ian McDonald <iam4@cs.waikato.ac.nz> + * Copyright (c) 2005 Ian McDonald <ian.mcdonald@jandi.co.nz> * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c index 1355614ec11..743e9fcf7c5 100644 --- a/net/decnet/dn_route.c +++ b/net/decnet/dn_route.c @@ -925,8 +925,13 @@ static int dn_route_output_slow(struct dst_entry **pprt, const struct flowi *old for(dev_out = dev_base; dev_out; dev_out = dev_out->next) { if (!dev_out->dn_ptr) continue; - if (dn_dev_islocal(dev_out, oldflp->fld_src)) - break; + if (!dn_dev_islocal(dev_out, oldflp->fld_src)) + continue; + if ((dev_out->flags & IFF_LOOPBACK) && + oldflp->fld_dst && + !dn_dev_islocal(dev_out, oldflp->fld_dst)) + continue; + break; } read_unlock(&dev_base_lock); if (dev_out == NULL) diff --git a/net/ieee80211/Kconfig b/net/ieee80211/Kconfig index dbb08528ddf..f7e84e9d13a 100644 --- a/net/ieee80211/Kconfig +++ b/net/ieee80211/Kconfig @@ -58,6 +58,7 @@ config IEEE80211_CRYPT_TKIP depends on IEEE80211 && NET_RADIO select CRYPTO select CRYPTO_MICHAEL_MIC + select CRC32 ---help--- Include software based cipher suites in support of IEEE 802.11i (aka TGi, WPA, WPA2, WPA-PSK, etc.) for use with TKIP enabled diff --git a/net/ieee80211/softmac/ieee80211softmac_auth.c b/net/ieee80211/softmac/ieee80211softmac_auth.c index ebc33ca6e69..4cef39e171d 100644 --- a/net/ieee80211/softmac/ieee80211softmac_auth.c +++ b/net/ieee80211/softmac/ieee80211softmac_auth.c @@ -116,6 +116,16 @@ ieee80211softmac_auth_queue(void *data) kfree(auth); } +/* Sends a response to an auth challenge (for shared key auth). */ +static void +ieee80211softmac_auth_challenge_response(void *_aq) +{ + struct ieee80211softmac_auth_queue_item *aq = _aq; + + /* Send our response */ + ieee80211softmac_send_mgt_frame(aq->mac, aq->net, IEEE80211_STYPE_AUTH, aq->state); +} + /* Handle the auth response from the AP * This should be registered with ieee80211 as handle_auth */ @@ -197,24 +207,30 @@ ieee80211softmac_auth_resp(struct net_device *dev, struct ieee80211_auth *auth) case IEEE80211SOFTMAC_AUTH_SHARED_CHALLENGE: /* Check to make sure we have a challenge IE */ data = (u8 *)auth->info_element; - if(*data++ != MFIE_TYPE_CHALLENGE){ + if (*data++ != MFIE_TYPE_CHALLENGE) { printkl(KERN_NOTICE PFX "Shared Key Authentication failed due to a missing challenge.\n"); break; } /* Save the challenge */ spin_lock_irqsave(&mac->lock, flags); net->challenge_len = *data++; - if(net->challenge_len > WLAN_AUTH_CHALLENGE_LEN) + if (net->challenge_len > WLAN_AUTH_CHALLENGE_LEN) net->challenge_len = WLAN_AUTH_CHALLENGE_LEN; - if(net->challenge != NULL) + if (net->challenge != NULL) kfree(net->challenge); net->challenge = kmalloc(net->challenge_len, GFP_ATOMIC); memcpy(net->challenge, data, net->challenge_len); aq->state = IEEE80211SOFTMAC_AUTH_SHARED_RESPONSE; - spin_unlock_irqrestore(&mac->lock, flags); - /* Send our response */ - ieee80211softmac_send_mgt_frame(mac, aq->net, IEEE80211_STYPE_AUTH, aq->state); + /* We reuse the work struct from the auth request here. + * It is safe to do so as each one is per-request, and + * at this point (dealing with authentication response) + * we have obviously already sent the initial auth + * request. */ + cancel_delayed_work(&aq->work); + INIT_WORK(&aq->work, &ieee80211softmac_auth_challenge_response, (void *)aq); + schedule_work(&aq->work); + spin_unlock_irqrestore(&mac->lock, flags); return 0; case IEEE80211SOFTMAC_AUTH_SHARED_PASS: kfree(net->challenge); diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index 9be53a8e72c..51738000f3d 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -159,7 +159,7 @@ void free_fib_info(struct fib_info *fi) void fib_release_info(struct fib_info *fi) { - write_lock(&fib_info_lock); + write_lock_bh(&fib_info_lock); if (fi && --fi->fib_treeref == 0) { hlist_del(&fi->fib_hash); if (fi->fib_prefsrc) @@ -172,7 +172,7 @@ void fib_release_info(struct fib_info *fi) fi->fib_dead = 1; fib_info_put(fi); } - write_unlock(&fib_info_lock); + write_unlock_bh(&fib_info_lock); } static __inline__ int nh_comp(const struct fib_info *fi, const struct fib_info *ofi) @@ -598,7 +598,7 @@ static void fib_hash_move(struct hlist_head *new_info_hash, unsigned int old_size = fib_hash_size; unsigned int i, bytes; - write_lock(&fib_info_lock); + write_lock_bh(&fib_info_lock); old_info_hash = fib_info_hash; old_laddrhash = fib_info_laddrhash; fib_hash_size = new_size; @@ -639,7 +639,7 @@ static void fib_hash_move(struct hlist_head *new_info_hash, } fib_info_laddrhash = new_laddrhash; - write_unlock(&fib_info_lock); + write_unlock_bh(&fib_info_lock); bytes = old_size * sizeof(struct hlist_head *); fib_hash_free(old_info_hash, bytes); @@ -820,7 +820,7 @@ link_it: fi->fib_treeref++; atomic_inc(&fi->fib_clntref); - write_lock(&fib_info_lock); + write_lock_bh(&fib_info_lock); hlist_add_head(&fi->fib_hash, &fib_info_hash[fib_info_hashfn(fi)]); if (fi->fib_prefsrc) { @@ -839,7 +839,7 @@ link_it: head = &fib_info_devhash[hash]; hlist_add_head(&nh->nh_hash, head); } endfor_nexthops(fi) - write_unlock(&fib_info_lock); + write_unlock_bh(&fib_info_lock); return fi; err_inval: diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index 9f4b752f5a3..8e8117c19e4 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -1793,29 +1793,35 @@ int ip_mc_leave_group(struct sock *sk, struct ip_mreqn *imr) struct in_device *in_dev; u32 group = imr->imr_multiaddr.s_addr; u32 ifindex; + int ret = -EADDRNOTAVAIL; rtnl_lock(); in_dev = ip_mc_find_dev(imr); - if (!in_dev) { - rtnl_unlock(); - return -ENODEV; - } ifindex = imr->imr_ifindex; for (imlp = &inet->mc_list; (iml = *imlp) != NULL; imlp = &iml->next) { - if (iml->multi.imr_multiaddr.s_addr == group && - iml->multi.imr_ifindex == ifindex) { - (void) ip_mc_leave_src(sk, iml, in_dev); + if (iml->multi.imr_multiaddr.s_addr != group) + continue; + if (ifindex) { + if (iml->multi.imr_ifindex != ifindex) + continue; + } else if (imr->imr_address.s_addr && imr->imr_address.s_addr != + iml->multi.imr_address.s_addr) + continue; + + (void) ip_mc_leave_src(sk, iml, in_dev); - *imlp = iml->next; + *imlp = iml->next; + if (in_dev) ip_mc_dec_group(in_dev, group); - rtnl_unlock(); - sock_kfree_s(sk, iml, sizeof(*iml)); - return 0; - } + rtnl_unlock(); + sock_kfree_s(sk, iml, sizeof(*iml)); + return 0; } + if (!in_dev) + ret = -ENODEV; rtnl_unlock(); - return -EADDRNOTAVAIL; + return ret; } int ip_mc_source(int add, int omode, struct sock *sk, struct @@ -2199,13 +2205,13 @@ void ip_mc_drop_socket(struct sock *sk) struct in_device *in_dev; inet->mc_list = iml->next; - if ((in_dev = inetdev_by_index(iml->multi.imr_ifindex)) != NULL) { - (void) ip_mc_leave_src(sk, iml, in_dev); + in_dev = inetdev_by_index(iml->multi.imr_ifindex); + (void) ip_mc_leave_src(sk, iml, in_dev); + if (in_dev != NULL) { ip_mc_dec_group(in_dev, iml->multi.imr_multiaddr.s_addr); in_dev_put(in_dev); } sock_kfree_s(sk, iml, sizeof(*iml)); - } rtnl_unlock(); } diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 7c9f9a6421b..4c20f554689 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -526,6 +526,8 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff*)) err = output(skb); + if (!err) + IP_INC_STATS(IPSTATS_MIB_FRAGCREATES); if (err || !frag) break; @@ -649,9 +651,6 @@ slow_path: /* * Put this fragment into the sending queue. */ - - IP_INC_STATS(IPSTATS_MIB_FRAGCREATES); - iph->tot_len = htons(len + hlen); ip_send_check(iph); @@ -659,6 +658,8 @@ slow_path: err = output(skb2); if (err) goto fail; + + IP_INC_STATS(IPSTATS_MIB_FRAGCREATES); } kfree_skb(skb); IP_INC_STATS(IPSTATS_MIB_FRAGOKS); @@ -946,7 +947,7 @@ alloc_new_skb: skb_prev->csum = csum_sub(skb_prev->csum, skb->csum); data += fraggap; - skb_trim(skb_prev, maxfraglen); + pskb_trim_unique(skb_prev, maxfraglen); } copy = datalen - transhdrlen - fraggap; @@ -1141,7 +1142,7 @@ ssize_t ip_append_page(struct sock *sk, struct page *page, data, fraggap, 0); skb_prev->csum = csum_sub(skb_prev->csum, skb->csum); - skb_trim(skb_prev, maxfraglen); + pskb_trim_unique(skb_prev, maxfraglen); } /* diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index 84f43a3c909..2d05c4133d3 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -112,14 +112,19 @@ static void ip_cmsg_recv_retopts(struct msghdr *msg, struct sk_buff *skb) static void ip_cmsg_recv_security(struct msghdr *msg, struct sk_buff *skb) { char *secdata; - u32 seclen; + u32 seclen, secid; int err; - err = security_socket_getpeersec_dgram(skb, &secdata, &seclen); + err = security_socket_getpeersec_dgram(NULL, skb, &secid); + if (err) + return; + + err = security_secid_to_secctx(secid, &secdata, &seclen); if (err) return; put_cmsg(msg, SOL_IP, SCM_SECURITY, seclen, secdata); + security_release_secctx(secdata, seclen); } diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index 80c73ca9011..8d1d7a6e72a 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c @@ -236,7 +236,7 @@ unsigned int arpt_do_table(struct sk_buff **pskb, struct arpt_entry *e, *back; const char *indev, *outdev; void *table_base; - struct xt_table_info *private = table->private; + struct xt_table_info *private; /* ARP header, plus 2 device addresses, plus 2 IP addresses. */ if (!pskb_may_pull((*pskb), (sizeof(struct arphdr) + @@ -248,6 +248,7 @@ unsigned int arpt_do_table(struct sk_buff **pskb, outdev = out ? out->name : nulldevname; read_lock_bh(&table->lock); + private = table->private; table_base = (void *)private->entries[smp_processor_id()]; e = get_entry(table_base, private->hook_entry[hook]); back = get_entry(table_base, private->underflow[hook]); @@ -1170,21 +1171,34 @@ static int __init arp_tables_init(void) { int ret; - xt_proto_init(NF_ARP); + ret = xt_proto_init(NF_ARP); + if (ret < 0) + goto err1; /* Noone else will be downing sem now, so we won't sleep */ - xt_register_target(&arpt_standard_target); - xt_register_target(&arpt_error_target); + ret = xt_register_target(&arpt_standard_target); + if (ret < 0) + goto err2; + ret = xt_register_target(&arpt_error_target); + if (ret < 0) + goto err3; /* Register setsockopt */ ret = nf_register_sockopt(&arpt_sockopts); - if (ret < 0) { - duprintf("Unable to register sockopts.\n"); - return ret; - } + if (ret < 0) + goto err4; printk("arp_tables: (C) 2002 David S. Miller\n"); return 0; + +err4: + xt_unregister_target(&arpt_error_target); +err3: + xt_unregister_target(&arpt_standard_target); +err2: + xt_proto_fini(NF_ARP); +err1: + return ret; } static void __exit arp_tables_fini(void) diff --git a/net/ipv4/netfilter/ip_conntrack_netlink.c b/net/ipv4/netfilter/ip_conntrack_netlink.c index 33891bb1fde..0d4cc92391f 100644 --- a/net/ipv4/netfilter/ip_conntrack_netlink.c +++ b/net/ipv4/netfilter/ip_conntrack_netlink.c @@ -415,21 +415,18 @@ ctnetlink_dump_table(struct sk_buff *skb, struct netlink_callback *cb) cb->args[0], *id); read_lock_bh(&ip_conntrack_lock); + last = (struct ip_conntrack *)cb->args[1]; for (; cb->args[0] < ip_conntrack_htable_size; cb->args[0]++) { restart: - last = (struct ip_conntrack *)cb->args[1]; list_for_each_prev(i, &ip_conntrack_hash[cb->args[0]]) { h = (struct ip_conntrack_tuple_hash *) i; if (DIRECTION(h) != IP_CT_DIR_ORIGINAL) continue; ct = tuplehash_to_ctrack(h); - if (last != NULL) { - if (ct == last) { - ip_conntrack_put(last); - cb->args[1] = 0; - last = NULL; - } else + if (cb->args[1]) { + if (ct != last) continue; + cb->args[1] = 0; } if (ctnetlink_fill_info(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq, @@ -440,17 +437,17 @@ restart: goto out; } } - if (last != NULL) { - ip_conntrack_put(last); + if (cb->args[1]) { cb->args[1] = 0; goto restart; } } out: read_unlock_bh(&ip_conntrack_lock); + if (last) + ip_conntrack_put(last); DEBUGP("leaving, last bucket=%lu id=%u\n", cb->args[0], *id); - return skb->len; } diff --git a/net/ipv4/netfilter/ip_conntrack_sip.c b/net/ipv4/netfilter/ip_conntrack_sip.c index fc87ce0da40..4f222d6be00 100644 --- a/net/ipv4/netfilter/ip_conntrack_sip.c +++ b/net/ipv4/netfilter/ip_conntrack_sip.c @@ -442,7 +442,7 @@ static int __init init(void) sip[i].tuple.src.u.udp.port = htons(ports[i]); sip[i].mask.src.u.udp.port = 0xFFFF; sip[i].mask.dst.protonum = 0xFF; - sip[i].max_expected = 1; + sip[i].max_expected = 2; sip[i].timeout = 3 * 60; /* 3 minutes */ sip[i].me = THIS_MODULE; sip[i].help = sip_help; diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index fc5bdd5eb7d..048514f15f2 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -230,7 +230,7 @@ ipt_do_table(struct sk_buff **pskb, const char *indev, *outdev; void *table_base; struct ipt_entry *e, *back; - struct xt_table_info *private = table->private; + struct xt_table_info *private; /* Initialization */ ip = (*pskb)->nh.iph; @@ -247,6 +247,7 @@ ipt_do_table(struct sk_buff **pskb, read_lock_bh(&table->lock); IP_NF_ASSERT(table->valid_hooks & (1 << hook)); + private = table->private; table_base = (void *)private->entries[smp_processor_id()]; e = get_entry(table_base, private->hook_entry[hook]); @@ -2239,22 +2240,39 @@ static int __init ip_tables_init(void) { int ret; - xt_proto_init(AF_INET); + ret = xt_proto_init(AF_INET); + if (ret < 0) + goto err1; /* Noone else will be downing sem now, so we won't sleep */ - xt_register_target(&ipt_standard_target); - xt_register_target(&ipt_error_target); - xt_register_match(&icmp_matchstruct); + ret = xt_register_target(&ipt_standard_target); + if (ret < 0) + goto err2; + ret = xt_register_target(&ipt_error_target); + if (ret < 0) + goto err3; + ret = xt_register_match(&icmp_matchstruct); + if (ret < 0) + goto err4; /* Register setsockopt */ ret = nf_register_sockopt(&ipt_sockopts); - if (ret < 0) { - duprintf("Unable to register sockopts.\n"); - return ret; - } + if (ret < 0) + goto err5; printk("ip_tables: (C) 2000-2006 Netfilter Core Team\n"); return 0; + +err5: + xt_unregister_match(&icmp_matchstruct); +err4: + xt_unregister_target(&ipt_error_target); +err3: + xt_unregister_target(&ipt_standard_target); +err2: + xt_proto_fini(AF_INET); +err1: + return ret; } static void __exit ip_tables_fini(void) diff --git a/net/ipv4/netfilter/ipt_ULOG.c b/net/ipv4/netfilter/ipt_ULOG.c index d7dd7fe7051..d46fd677fa1 100644 --- a/net/ipv4/netfilter/ipt_ULOG.c +++ b/net/ipv4/netfilter/ipt_ULOG.c @@ -115,6 +115,11 @@ static void ulog_send(unsigned int nlgroupnum) del_timer(&ub->timer); } + if (!ub->skb) { + DEBUGP("ipt_ULOG: ulog_send: nothing to send\n"); + return; + } + /* last nlmsg needs NLMSG_DONE */ if (ub->qlen > 1) ub->lastnlh->nlmsg_type = NLMSG_DONE; diff --git a/net/ipv4/netfilter/ipt_hashlimit.c b/net/ipv4/netfilter/ipt_hashlimit.c index 92980ab8ce4..3bd2368e1fc 100644 --- a/net/ipv4/netfilter/ipt_hashlimit.c +++ b/net/ipv4/netfilter/ipt_hashlimit.c @@ -454,15 +454,12 @@ hashlimit_match(const struct sk_buff *skb, dh->rateinfo.credit_cap = user2credits(hinfo->cfg.avg * hinfo->cfg.burst); dh->rateinfo.cost = user2credits(hinfo->cfg.avg); - - spin_unlock_bh(&hinfo->lock); - return 1; + } else { + /* update expiration timeout */ + dh->expires = now + msecs_to_jiffies(hinfo->cfg.expire); + rateinfo_recalc(dh, now); } - /* update expiration timeout */ - dh->expires = now + msecs_to_jiffies(hinfo->cfg.expire); - - rateinfo_recalc(dh, now); if (dh->rateinfo.credit >= dh->rateinfo.cost) { /* We're underlimit. */ dh->rateinfo.credit -= dh->rateinfo.cost; @@ -508,6 +505,9 @@ hashlimit_checkentry(const char *tablename, if (!r->cfg.expire) return 0; + if (r->name[sizeof(r->name) - 1] != '\0') + return 0; + /* This is the best we've got: We cannot release and re-grab lock, * since checkentry() is called before ip_tables.c grabs ipt_mutex. * We also cannot grab the hashtable spinlock, since htable_create will diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 2dc6dbb2846..b873cbcdd0b 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -104,6 +104,7 @@ #include <net/icmp.h> #include <net/xfrm.h> #include <net/ip_mp_alg.h> +#include <net/netevent.h> #ifdef CONFIG_SYSCTL #include <linux/sysctl.h> #endif @@ -1125,6 +1126,7 @@ void ip_rt_redirect(u32 old_gw, u32 daddr, u32 new_gw, struct rtable *rth, **rthp; u32 skeys[2] = { saddr, 0 }; int ikeys[2] = { dev->ifindex, 0 }; + struct netevent_redirect netevent; if (!in_dev) return; @@ -1216,6 +1218,11 @@ void ip_rt_redirect(u32 old_gw, u32 daddr, u32 new_gw, rt_drop(rt); goto do_next; } + + netevent.old = &rth->u.dst; + netevent.new = &rt->u.dst; + call_netevent_notifiers(NETEVENT_REDIRECT, + &netevent); rt_del(hash, rth); if (!rt_intern_hash(hash, rt, &rt)) @@ -1452,6 +1459,7 @@ static void ip_rt_update_pmtu(struct dst_entry *dst, u32 mtu) } dst->metrics[RTAX_MTU-1] = mtu; dst_set_expires(dst, ip_rt_mtu_expires); + call_netevent_notifiers(NETEVENT_PMTU_UPDATE, dst); } } @@ -3149,7 +3157,7 @@ int __init ip_rt_init(void) rhash_entries, (num_physpages >= 128 * 1024) ? 15 : 17, - HASH_HIGHMEM, + 0, &rt_hash_log, &rt_hash_mask, 0); diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index f6a2d9223d0..934396bb137 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -1132,7 +1132,7 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, tp->ucopy.dma_chan = NULL; preempt_disable(); if ((len > sysctl_tcp_dma_copybreak) && !(flags & MSG_PEEK) && - !sysctl_tcp_low_latency && __get_cpu_var(softnet_data.net_dma)) { + !sysctl_tcp_low_latency && __get_cpu_var(softnet_data).net_dma) { preempt_enable_no_resched(); tp->ucopy.pinned_list = dma_pin_iovec_pages(msg->msg_iov, len); } else @@ -1659,7 +1659,8 @@ adjudge_to_death: const int tmo = tcp_fin_time(sk); if (tmo > TCP_TIMEWAIT_LEN) { - inet_csk_reset_keepalive_timer(sk, tcp_fin_time(sk)); + inet_csk_reset_keepalive_timer(sk, + tmo - TCP_TIMEWAIT_LEN); } else { tcp_time_wait(sk, TCP_FIN_WAIT2, tmo); goto out; diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 738dad9f7d4..104af5d5bcb 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -3541,7 +3541,8 @@ void tcp_cwnd_application_limited(struct sock *sk) if (inet_csk(sk)->icsk_ca_state == TCP_CA_Open && sk->sk_socket && !test_bit(SOCK_NOSPACE, &sk->sk_socket->flags)) { /* Limited by application or receiver window. */ - u32 win_used = max(tp->snd_cwnd_used, 2U); + u32 init_win = tcp_init_cwnd(tp, __sk_dst_get(sk)); + u32 win_used = max(tp->snd_cwnd_used, init_win); if (win_used < tp->snd_cwnd) { tp->snd_ssthresh = tcp_current_ssthresh(sk); tp->snd_cwnd = (tp->snd_cwnd + win_used) >> 1; diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index f6f39e81429..4b04c3edd4a 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -438,7 +438,6 @@ void tcp_v4_err(struct sk_buff *skb, u32 info) It can f.e. if SYNs crossed. */ if (!sock_owned_by_user(sk)) { - TCP_INC_STATS_BH(TCP_MIB_ATTEMPTFAILS); sk->sk_err = err; sk->sk_error_report(sk); @@ -874,7 +873,6 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) drop_and_free: reqsk_free(req); drop: - TCP_INC_STATS_BH(TCP_MIB_ATTEMPTFAILS); return 0; } diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 0ccb7cb22b1..624e2b2c7f5 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -589,8 +589,10 @@ struct sock *tcp_check_req(struct sock *sk,struct sk_buff *skb, /* RFC793: "second check the RST bit" and * "fourth, check the SYN bit" */ - if (flg & (TCP_FLAG_RST|TCP_FLAG_SYN)) + if (flg & (TCP_FLAG_RST|TCP_FLAG_SYN)) { + TCP_INC_STATS_BH(TCP_MIB_ATTEMPTFAILS); goto embryonic_reset; + } /* ACK sequence verified above, just make sure ACK is * set. If ACK not set, just silently drop the packet. diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 5c08ea20a18..b4f3ffe1b3b 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -201,6 +201,7 @@ void tcp_select_initial_window(int __space, __u32 mss, * See RFC1323 for an explanation of the limit to 14 */ space = max_t(u32, sysctl_tcp_rmem[2], sysctl_rmem_max); + space = min_t(u32, space, *window_clamp); while (space > 65535 && (*rcv_wscale) < 14) { space >>= 1; (*rcv_wscale)++; @@ -466,7 +467,8 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, if (skb->len != tcp_header_size) tcp_event_data_sent(tp, skb, sk); - TCP_INC_STATS(TCP_MIB_OUTSEGS); + if (after(tcb->end_seq, tp->snd_nxt) || tcb->seq == tcb->end_seq) + TCP_INC_STATS(TCP_MIB_OUTSEGS); err = icsk->icsk_af_ops->queue_xmit(skb, 0); if (likely(err <= 0)) @@ -2157,10 +2159,9 @@ int tcp_connect(struct sock *sk) skb_shinfo(buff)->gso_size = 0; skb_shinfo(buff)->gso_type = 0; buff->csum = 0; + tp->snd_nxt = tp->write_seq; TCP_SKB_CB(buff)->seq = tp->write_seq++; TCP_SKB_CB(buff)->end_seq = tp->write_seq; - tp->snd_nxt = tp->write_seq; - tp->pushed_seq = tp->write_seq; /* Send it off. */ TCP_SKB_CB(buff)->when = tcp_time_stamp; @@ -2170,6 +2171,12 @@ int tcp_connect(struct sock *sk) sk_charge_skb(sk, buff); tp->packets_out += tcp_skb_pcount(buff); tcp_transmit_skb(sk, buff, 1, GFP_KERNEL); + + /* We change tp->snd_nxt after the tcp_transmit_skb() call + * in order to make this packet get counted in tcpOutSegs. + */ + tp->snd_nxt = tp->write_seq; + tp->pushed_seq = tp->write_seq; TCP_INC_STATS(TCP_MIB_ACTIVEOPENS); /* Timer for repeating the SYN until an answer. */ diff --git a/net/ipv4/tcp_probe.c b/net/ipv4/tcp_probe.c index d7d517a3a23..dab37d2f65f 100644 --- a/net/ipv4/tcp_probe.c +++ b/net/ipv4/tcp_probe.c @@ -114,7 +114,7 @@ static int tcpprobe_open(struct inode * inode, struct file * file) static ssize_t tcpprobe_read(struct file *file, char __user *buf, size_t len, loff_t *ppos) { - int error = 0, cnt; + int error = 0, cnt = 0; unsigned char *tbuf; if (!buf || len < 0) @@ -130,11 +130,12 @@ static ssize_t tcpprobe_read(struct file *file, char __user *buf, error = wait_event_interruptible(tcpw.wait, __kfifo_len(tcpw.fifo) != 0); if (error) - return error; + goto out_free; cnt = kfifo_get(tcpw.fifo, tbuf, len); error = copy_to_user(buf, tbuf, cnt); +out_free: vfree(tbuf); return error ? error : cnt; diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 2316a4315a1..0c5042e7380 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -1869,15 +1869,21 @@ err_exit: /* * Manual configuration of address on an interface */ -static int inet6_addr_add(int ifindex, struct in6_addr *pfx, int plen) +static int inet6_addr_add(int ifindex, struct in6_addr *pfx, int plen, + __u32 prefered_lft, __u32 valid_lft) { struct inet6_ifaddr *ifp; struct inet6_dev *idev; struct net_device *dev; + __u8 ifa_flags = 0; int scope; ASSERT_RTNL(); + /* check the lifetime */ + if (!valid_lft || prefered_lft > valid_lft) + return -EINVAL; + if ((dev = __dev_get_by_index(ifindex)) == NULL) return -ENODEV; @@ -1889,10 +1895,29 @@ static int inet6_addr_add(int ifindex, struct in6_addr *pfx, int plen) scope = ipv6_addr_scope(pfx); - ifp = ipv6_add_addr(idev, pfx, plen, scope, IFA_F_PERMANENT); + if (valid_lft == INFINITY_LIFE_TIME) + ifa_flags |= IFA_F_PERMANENT; + else if (valid_lft >= 0x7FFFFFFF/HZ) + valid_lft = 0x7FFFFFFF/HZ; + + if (prefered_lft == 0) + ifa_flags |= IFA_F_DEPRECATED; + else if ((prefered_lft >= 0x7FFFFFFF/HZ) && + (prefered_lft != INFINITY_LIFE_TIME)) + prefered_lft = 0x7FFFFFFF/HZ; + + ifp = ipv6_add_addr(idev, pfx, plen, scope, ifa_flags); + if (!IS_ERR(ifp)) { + spin_lock_bh(&ifp->lock); + ifp->valid_lft = valid_lft; + ifp->prefered_lft = prefered_lft; + ifp->tstamp = jiffies; + spin_unlock_bh(&ifp->lock); + addrconf_dad_start(ifp, 0); in6_ifa_put(ifp); + addrconf_verify(0); return 0; } @@ -1945,7 +1970,8 @@ int addrconf_add_ifaddr(void __user *arg) return -EFAULT; rtnl_lock(); - err = inet6_addr_add(ireq.ifr6_ifindex, &ireq.ifr6_addr, ireq.ifr6_prefixlen); + err = inet6_addr_add(ireq.ifr6_ifindex, &ireq.ifr6_addr, ireq.ifr6_prefixlen, + INFINITY_LIFE_TIME, INFINITY_LIFE_TIME); rtnl_unlock(); return err; } @@ -2771,12 +2797,16 @@ restart: ifp->idev->nd_parms->retrans_time / HZ; #endif - if (age >= ifp->valid_lft) { + if (ifp->valid_lft != INFINITY_LIFE_TIME && + age >= ifp->valid_lft) { spin_unlock(&ifp->lock); in6_ifa_hold(ifp); read_unlock(&addrconf_hash_lock); ipv6_del_addr(ifp); goto restart; + } else if (ifp->prefered_lft == INFINITY_LIFE_TIME) { + spin_unlock(&ifp->lock); + continue; } else if (age >= ifp->prefered_lft) { /* jiffies - ifp->tsamp > age >= ifp->prefered_lft */ int deprecate = 0; @@ -2853,7 +2883,8 @@ inet6_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) pfx = RTA_DATA(rta[IFA_ADDRESS-1]); } if (rta[IFA_LOCAL-1]) { - if (pfx && memcmp(pfx, RTA_DATA(rta[IFA_LOCAL-1]), sizeof(*pfx))) + if (RTA_PAYLOAD(rta[IFA_LOCAL-1]) < sizeof(*pfx) || + (pfx && memcmp(pfx, RTA_DATA(rta[IFA_LOCAL-1]), sizeof(*pfx)))) return -EINVAL; pfx = RTA_DATA(rta[IFA_LOCAL-1]); } @@ -2864,11 +2895,61 @@ inet6_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) } static int +inet6_addr_modify(int ifindex, struct in6_addr *pfx, + __u32 prefered_lft, __u32 valid_lft) +{ + struct inet6_ifaddr *ifp = NULL; + struct net_device *dev; + int ifa_flags = 0; + + if ((dev = __dev_get_by_index(ifindex)) == NULL) + return -ENODEV; + + if (!(dev->flags&IFF_UP)) + return -ENETDOWN; + + if (!valid_lft || (prefered_lft > valid_lft)) + return -EINVAL; + + ifp = ipv6_get_ifaddr(pfx, dev, 1); + if (ifp == NULL) + return -ENOENT; + + if (valid_lft == INFINITY_LIFE_TIME) + ifa_flags = IFA_F_PERMANENT; + else if (valid_lft >= 0x7FFFFFFF/HZ) + valid_lft = 0x7FFFFFFF/HZ; + + if (prefered_lft == 0) + ifa_flags = IFA_F_DEPRECATED; + else if ((prefered_lft >= 0x7FFFFFFF/HZ) && + (prefered_lft != INFINITY_LIFE_TIME)) + prefered_lft = 0x7FFFFFFF/HZ; + + spin_lock_bh(&ifp->lock); + ifp->flags = (ifp->flags & ~(IFA_F_DEPRECATED|IFA_F_PERMANENT)) | ifa_flags; + + ifp->tstamp = jiffies; + ifp->valid_lft = valid_lft; + ifp->prefered_lft = prefered_lft; + + spin_unlock_bh(&ifp->lock); + if (!(ifp->flags&IFA_F_TENTATIVE)) + ipv6_ifa_notify(0, ifp); + in6_ifa_put(ifp); + + addrconf_verify(0); + + return 0; +} + +static int inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) { struct rtattr **rta = arg; struct ifaddrmsg *ifm = NLMSG_DATA(nlh); struct in6_addr *pfx; + __u32 valid_lft = INFINITY_LIFE_TIME, prefered_lft = INFINITY_LIFE_TIME; pfx = NULL; if (rta[IFA_ADDRESS-1]) { @@ -2877,14 +2958,34 @@ inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) pfx = RTA_DATA(rta[IFA_ADDRESS-1]); } if (rta[IFA_LOCAL-1]) { - if (pfx && memcmp(pfx, RTA_DATA(rta[IFA_LOCAL-1]), sizeof(*pfx))) + if (RTA_PAYLOAD(rta[IFA_LOCAL-1]) < sizeof(*pfx) || + (pfx && memcmp(pfx, RTA_DATA(rta[IFA_LOCAL-1]), sizeof(*pfx)))) return -EINVAL; pfx = RTA_DATA(rta[IFA_LOCAL-1]); } if (pfx == NULL) return -EINVAL; - return inet6_addr_add(ifm->ifa_index, pfx, ifm->ifa_prefixlen); + if (rta[IFA_CACHEINFO-1]) { + struct ifa_cacheinfo *ci; + if (RTA_PAYLOAD(rta[IFA_CACHEINFO-1]) < sizeof(*ci)) + return -EINVAL; + ci = RTA_DATA(rta[IFA_CACHEINFO-1]); + valid_lft = ci->ifa_valid; + prefered_lft = ci->ifa_prefered; + } + + if (nlh->nlmsg_flags & NLM_F_REPLACE) { + int ret; + ret = inet6_addr_modify(ifm->ifa_index, pfx, + prefered_lft, valid_lft); + if (ret == 0 || !(nlh->nlmsg_flags & NLM_F_CREATE)) + return ret; + } + + return inet6_addr_add(ifm->ifa_index, pfx, ifm->ifa_prefixlen, + prefered_lft, valid_lft); + } /* Maximum length of ifa_cacheinfo attributes */ @@ -3121,6 +3222,62 @@ static int inet6_dump_ifacaddr(struct sk_buff *skb, struct netlink_callback *cb) return inet6_dump_addr(skb, cb, type); } +static int inet6_rtm_getaddr(struct sk_buff *in_skb, + struct nlmsghdr* nlh, void *arg) +{ + struct rtattr **rta = arg; + struct ifaddrmsg *ifm = NLMSG_DATA(nlh); + struct in6_addr *addr = NULL; + struct net_device *dev = NULL; + struct inet6_ifaddr *ifa; + struct sk_buff *skb; + int size = NLMSG_SPACE(sizeof(struct ifaddrmsg) + INET6_IFADDR_RTA_SPACE); + int err; + + if (rta[IFA_ADDRESS-1]) { + if (RTA_PAYLOAD(rta[IFA_ADDRESS-1]) < sizeof(*addr)) + return -EINVAL; + addr = RTA_DATA(rta[IFA_ADDRESS-1]); + } + if (rta[IFA_LOCAL-1]) { + if (RTA_PAYLOAD(rta[IFA_LOCAL-1]) < sizeof(*addr) || + (addr && memcmp(addr, RTA_DATA(rta[IFA_LOCAL-1]), sizeof(*addr)))) + return -EINVAL; + addr = RTA_DATA(rta[IFA_LOCAL-1]); + } + if (addr == NULL) + return -EINVAL; + + if (ifm->ifa_index) + dev = __dev_get_by_index(ifm->ifa_index); + + if ((ifa = ipv6_get_ifaddr(addr, dev, 1)) == NULL) + return -EADDRNOTAVAIL; + + if ((skb = alloc_skb(size, GFP_KERNEL)) == NULL) { + err = -ENOBUFS; + goto out; + } + + NETLINK_CB(skb).dst_pid = NETLINK_CB(in_skb).pid; + err = inet6_fill_ifaddr(skb, ifa, NETLINK_CB(in_skb).pid, + nlh->nlmsg_seq, RTM_NEWADDR, 0); + if (err < 0) { + err = -EMSGSIZE; + goto out_free; + } + + err = netlink_unicast(rtnl, skb, NETLINK_CB(in_skb).pid, MSG_DONTWAIT); + if (err > 0) + err = 0; +out: + in6_ifa_put(ifa); + return err; +out_free: + kfree_skb(skb); + goto out; +} + static void inet6_ifa_notify(int event, struct inet6_ifaddr *ifa) { struct sk_buff *skb; @@ -3363,7 +3520,8 @@ static struct rtnetlink_link inet6_rtnetlink_table[RTM_NR_MSGTYPES] = { [RTM_GETLINK - RTM_BASE] = { .dumpit = inet6_dump_ifinfo, }, [RTM_NEWADDR - RTM_BASE] = { .doit = inet6_rtm_newaddr, }, [RTM_DELADDR - RTM_BASE] = { .doit = inet6_rtm_deladdr, }, - [RTM_GETADDR - RTM_BASE] = { .dumpit = inet6_dump_ifaddr, }, + [RTM_GETADDR - RTM_BASE] = { .doit = inet6_rtm_getaddr, + .dumpit = inet6_dump_ifaddr, }, [RTM_GETMULTICAST - RTM_BASE] = { .dumpit = inet6_dump_ifmcaddr, }, [RTM_GETANYCAST - RTM_BASE] = { .dumpit = inet6_dump_ifacaddr, }, [RTM_NEWROUTE - RTM_BASE] = { .doit = inet6_rtm_newroute, }, diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 5a0ba58b86c..ac85e9c532c 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -658,7 +658,7 @@ int inet6_sk_rebuild_header(struct sock *sk) return err; } - ip6_dst_store(sk, dst, NULL); + __ip6_dst_store(sk, dst, NULL); } return 0; diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index 1044b6fce0d..3d6e9a35115 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -712,6 +712,11 @@ discard_it: return 0; } +/* + * Special lock-class for __icmpv6_socket: + */ +static struct lock_class_key icmpv6_socket_sk_dst_lock_key; + int __init icmpv6_init(struct net_proto_family *ops) { struct sock *sk; @@ -730,6 +735,14 @@ int __init icmpv6_init(struct net_proto_family *ops) sk = per_cpu(__icmpv6_socket, i)->sk; sk->sk_allocation = GFP_ATOMIC; + /* + * Split off their lock-class, because sk->sk_dst_lock + * gets used from softirqs, which is safe for + * __icmpv6_socket (because those never get directly used + * via userspace syscalls), but unsafe for normal sockets. + */ + lockdep_set_class(&sk->sk_dst_lock, + &icmpv6_socket_sk_dst_lock_key); /* Enough space for 2 64K ICMP packets, including * sk_buff struct overhead. diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c index 5c950cc79d8..bf491077b82 100644 --- a/net/ipv6/inet6_connection_sock.c +++ b/net/ipv6/inet6_connection_sock.c @@ -185,7 +185,7 @@ int inet6_csk_xmit(struct sk_buff *skb, int ipfragok) return err; } - ip6_dst_store(sk, dst, NULL); + __ip6_dst_store(sk, dst, NULL); } skb->dst = dst_clone(dst); diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 3bc74ce7880..4fb47a25291 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -356,6 +356,7 @@ int ip6_forward(struct sk_buff *skb) skb->dev = dst->dev; icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT, 0, skb->dev); + IP6_INC_STATS_BH(IPSTATS_MIB_INHDRERRORS); kfree_skb(skb); return -ETIMEDOUT; @@ -595,6 +596,9 @@ static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) } err = output(skb); + if(!err) + IP6_INC_STATS(IPSTATS_MIB_FRAGCREATES); + if (err || !frag) break; @@ -706,12 +710,11 @@ slow_path: /* * Put this fragment into the sending queue. */ - - IP6_INC_STATS(IPSTATS_MIB_FRAGCREATES); - err = output(frag); if (err) goto fail; + + IP6_INC_STATS(IPSTATS_MIB_FRAGCREATES); } kfree_skb(skb); IP6_INC_STATS(IPSTATS_MIB_FRAGOKS); @@ -723,48 +726,51 @@ fail: return err; } -int ip6_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi *fl) +static struct dst_entry *ip6_sk_dst_check(struct sock *sk, + struct dst_entry *dst, + struct flowi *fl) { - int err = 0; + struct ipv6_pinfo *np = inet6_sk(sk); + struct rt6_info *rt = (struct rt6_info *)dst; - *dst = NULL; - if (sk) { - struct ipv6_pinfo *np = inet6_sk(sk); - - *dst = sk_dst_check(sk, np->dst_cookie); - if (*dst) { - struct rt6_info *rt = (struct rt6_info*)*dst; - - /* Yes, checking route validity in not connected - * case is not very simple. Take into account, - * that we do not support routing by source, TOS, - * and MSG_DONTROUTE --ANK (980726) - * - * 1. If route was host route, check that - * cached destination is current. - * If it is network route, we still may - * check its validity using saved pointer - * to the last used address: daddr_cache. - * We do not want to save whole address now, - * (because main consumer of this service - * is tcp, which has not this problem), - * so that the last trick works only on connected - * sockets. - * 2. oif also should be the same. - */ - if (((rt->rt6i_dst.plen != 128 || - !ipv6_addr_equal(&fl->fl6_dst, - &rt->rt6i_dst.addr)) - && (np->daddr_cache == NULL || - !ipv6_addr_equal(&fl->fl6_dst, - np->daddr_cache))) - || (fl->oif && fl->oif != (*dst)->dev->ifindex)) { - dst_release(*dst); - *dst = NULL; - } - } + if (!dst) + goto out; + + /* Yes, checking route validity in not connected + * case is not very simple. Take into account, + * that we do not support routing by source, TOS, + * and MSG_DONTROUTE --ANK (980726) + * + * 1. If route was host route, check that + * cached destination is current. + * If it is network route, we still may + * check its validity using saved pointer + * to the last used address: daddr_cache. + * We do not want to save whole address now, + * (because main consumer of this service + * is tcp, which has not this problem), + * so that the last trick works only on connected + * sockets. + * 2. oif also should be the same. + */ + if (((rt->rt6i_dst.plen != 128 || + !ipv6_addr_equal(&fl->fl6_dst, &rt->rt6i_dst.addr)) + && (np->daddr_cache == NULL || + !ipv6_addr_equal(&fl->fl6_dst, np->daddr_cache))) + || (fl->oif && fl->oif != dst->dev->ifindex)) { + dst_release(dst); + dst = NULL; } +out: + return dst; +} + +static int ip6_dst_lookup_tail(struct sock *sk, + struct dst_entry **dst, struct flowi *fl) +{ + int err; + if (*dst == NULL) *dst = ip6_route_output(sk, fl); @@ -773,7 +779,6 @@ int ip6_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi *fl) if (ipv6_addr_any(&fl->fl6_src)) { err = ipv6_get_saddr(*dst, &fl->fl6_dst, &fl->fl6_src); - if (err) goto out_err_release; } @@ -786,8 +791,48 @@ out_err_release: return err; } +/** + * ip6_dst_lookup - perform route lookup on flow + * @sk: socket which provides route info + * @dst: pointer to dst_entry * for result + * @fl: flow to lookup + * + * This function performs a route lookup on the given flow. + * + * It returns zero on success, or a standard errno code on error. + */ +int ip6_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi *fl) +{ + *dst = NULL; + return ip6_dst_lookup_tail(sk, dst, fl); +} EXPORT_SYMBOL_GPL(ip6_dst_lookup); +/** + * ip6_sk_dst_lookup - perform socket cached route lookup on flow + * @sk: socket which provides the dst cache and route info + * @dst: pointer to dst_entry * for result + * @fl: flow to lookup + * + * This function performs a route lookup on the given flow with the + * possibility of using the cached route in the socket if it is valid. + * It will take the socket dst lock when operating on the dst cache. + * As a result, this function can only be used in process context. + * + * It returns zero on success, or a standard errno code on error. + */ +int ip6_sk_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi *fl) +{ + *dst = NULL; + if (sk) { + *dst = sk_dst_check(sk, inet6_sk(sk)->dst_cookie); + *dst = ip6_sk_dst_check(sk, *dst, fl); + } + + return ip6_dst_lookup_tail(sk, dst, fl); +} +EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup); + static inline int ip6_ufo_append_data(struct sock *sk, int getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb), @@ -1050,7 +1095,7 @@ alloc_new_skb: skb_prev->csum = csum_sub(skb_prev->csum, skb->csum); data += fraggap; - skb_trim(skb_prev, maxfraglen); + pskb_trim_unique(skb_prev, maxfraglen); } copy = datalen - transhdrlen - fraggap; if (copy < 0) { diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index 9d697d4dcff..639eb20c9f1 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c @@ -268,13 +268,14 @@ int ipv6_sock_mc_drop(struct sock *sk, int ifindex, struct in6_addr *addr) if ((dev = dev_get_by_index(mc_lst->ifindex)) != NULL) { struct inet6_dev *idev = in6_dev_get(dev); + (void) ip6_mc_leave_src(sk, mc_lst, idev); if (idev) { - (void) ip6_mc_leave_src(sk,mc_lst,idev); __ipv6_dev_mc_dec(idev, &mc_lst->addr); in6_dev_put(idev); } dev_put(dev); - } + } else + (void) ip6_mc_leave_src(sk, mc_lst, NULL); sock_kfree_s(sk, mc_lst, sizeof(*mc_lst)); return 0; } @@ -334,13 +335,14 @@ void ipv6_sock_mc_close(struct sock *sk) if (dev) { struct inet6_dev *idev = in6_dev_get(dev); + (void) ip6_mc_leave_src(sk, mc_lst, idev); if (idev) { - (void) ip6_mc_leave_src(sk, mc_lst, idev); __ipv6_dev_mc_dec(idev, &mc_lst->addr); in6_dev_put(idev); } dev_put(dev); - } + } else + (void) ip6_mc_leave_src(sk, mc_lst, NULL); sock_kfree_s(sk, mc_lst, sizeof(*mc_lst)); diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index f26898b0034..c9d6b23cd3f 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -1398,23 +1398,39 @@ static int __init ip6_tables_init(void) { int ret; - xt_proto_init(AF_INET6); + ret = xt_proto_init(AF_INET6); + if (ret < 0) + goto err1; /* Noone else will be downing sem now, so we won't sleep */ - xt_register_target(&ip6t_standard_target); - xt_register_target(&ip6t_error_target); - xt_register_match(&icmp6_matchstruct); + ret = xt_register_target(&ip6t_standard_target); + if (ret < 0) + goto err2; + ret = xt_register_target(&ip6t_error_target); + if (ret < 0) + goto err3; + ret = xt_register_match(&icmp6_matchstruct); + if (ret < 0) + goto err4; /* Register setsockopt */ ret = nf_register_sockopt(&ip6t_sockopts); - if (ret < 0) { - duprintf("Unable to register sockopts.\n"); - xt_proto_fini(AF_INET6); - return ret; - } + if (ret < 0) + goto err5; printk("ip6_tables: (C) 2000-2006 Netfilter Core Team\n"); return 0; + +err5: + xt_unregister_match(&icmp6_matchstruct); +err4: + xt_unregister_target(&ip6t_error_target); +err3: + xt_unregister_target(&ip6t_standard_target); +err2: + xt_proto_fini(AF_INET6); +err1: + return ret; } static void __exit ip6_tables_fini(void) diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 87c39c978cd..4b163711f3a 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -53,6 +53,7 @@ #include <linux/rtnetlink.h> #include <net/dst.h> #include <net/xfrm.h> +#include <net/netevent.h> #include <asm/uaccess.h> @@ -742,6 +743,7 @@ static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu) dst->metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG; } dst->metrics[RTAX_MTU-1] = mtu; + call_netevent_notifiers(NETEVENT_PMTU_UPDATE, dst); } } @@ -1155,6 +1157,7 @@ void rt6_redirect(struct in6_addr *dest, struct in6_addr *saddr, struct rt6_info *rt, *nrt = NULL; int strict; struct fib6_node *fn; + struct netevent_redirect netevent; /* * Get the "current" route for this destination and @@ -1252,6 +1255,10 @@ restart: if (ip6_ins_rt(nrt, NULL, NULL, NULL)) goto out; + netevent.old = &rt->u.dst; + netevent.new = &nrt->u.dst; + call_netevent_notifiers(NETEVENT_REDIRECT, &netevent); + if (rt->rt6i_flags&RTF_CACHE) { ip6_del_rt(rt, NULL, NULL, NULL); return; diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 923989d0520..802a1a6b103 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -270,7 +270,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, inet->rcv_saddr = LOOPBACK4_IPV6; sk->sk_gso_type = SKB_GSO_TCPV6; - ip6_dst_store(sk, dst, NULL); + __ip6_dst_store(sk, dst, NULL); icsk->icsk_ext_hdr_len = 0; if (np->opt) @@ -427,7 +427,6 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, case TCP_SYN_RECV: /* Cannot happen. It can, it SYNs are crossed. --ANK */ if (!sock_owned_by_user(sk)) { - TCP_INC_STATS_BH(TCP_MIB_ATTEMPTFAILS); sk->sk_err = err; sk->sk_error_report(sk); /* Wake people up to see the error (see connect in sock.c) */ @@ -831,7 +830,6 @@ drop: if (req) reqsk_free(req); - TCP_INC_STATS_BH(TCP_MIB_ATTEMPTFAILS); return 0; /* don't send reset */ } @@ -946,8 +944,8 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, * comment in that function for the gory details. -acme */ - sk->sk_gso_type = SKB_GSO_TCPV6; - ip6_dst_store(newsk, dst, NULL); + newsk->sk_gso_type = SKB_GSO_TCPV6; + __ip6_dst_store(newsk, dst, NULL); newtcp6sk = (struct tcp6_sock *)newsk; inet_sk(newsk)->pinet6 = &newtcp6sk->inet6; diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index ccc57f434cd..3d54f246411 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -782,7 +782,7 @@ do_udp_sendmsg: connected = 0; } - err = ip6_dst_lookup(sk, &dst, fl); + err = ip6_sk_dst_lookup(sk, &dst, fl); if (err) goto out; if (final_p) diff --git a/net/ipv6/xfrm6_output.c b/net/ipv6/xfrm6_output.c index 0eea60ea9eb..c8c8b44a0f5 100644 --- a/net/ipv6/xfrm6_output.c +++ b/net/ipv6/xfrm6_output.c @@ -125,7 +125,7 @@ static int xfrm6_output_finish(struct sk_buff *skb) if (!skb_is_gso(skb)) return xfrm6_output_finish2(skb); - skb->protocol = htons(ETH_P_IP); + skb->protocol = htons(ETH_P_IPV6); segs = skb_gso_segment(skb, 0); kfree_skb(skb); if (unlikely(IS_ERR(segs))) diff --git a/net/ipx/af_ipx.c b/net/ipx/af_ipx.c index aa34ff4b707..bef3f61569f 100644 --- a/net/ipx/af_ipx.c +++ b/net/ipx/af_ipx.c @@ -1642,13 +1642,17 @@ static int ipx_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_ty if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL) goto out; - ipx = ipx_hdr(skb); - ipx_pktsize = ntohs(ipx->ipx_pktsize); + if (!pskb_may_pull(skb, sizeof(struct ipxhdr))) + goto drop; + + ipx_pktsize = ntohs(ipx_hdr(skb)->ipx_pktsize); /* Too small or invalid header? */ - if (ipx_pktsize < sizeof(struct ipxhdr) || ipx_pktsize > skb->len) + if (ipx_pktsize < sizeof(struct ipxhdr) || + !pskb_may_pull(skb, ipx_pktsize)) goto drop; + ipx = ipx_hdr(skb); if (ipx->ipx_checksum != IPX_NO_CHECKSUM && ipx->ipx_checksum != ipx_cksum(ipx, ipx_pktsize)) goto drop; diff --git a/net/lapb/lapb_iface.c b/net/lapb/lapb_iface.c index d504eed416f..7e6bc41eeb2 100644 --- a/net/lapb/lapb_iface.c +++ b/net/lapb/lapb_iface.c @@ -238,11 +238,13 @@ int lapb_setparms(struct net_device *dev, struct lapb_parms_struct *parms) goto out_put; if (lapb->state == LAPB_STATE_0) { - if (((parms->mode & LAPB_EXTENDED) && - (parms->window < 1 || parms->window > 127)) || - (parms->window < 1 || parms->window > 7)) - goto out_put; - + if (parms->mode & LAPB_EXTENDED) { + if (parms->window < 1 || parms->window > 127) + goto out_put; + } else { + if (parms->window < 1 || parms->window > 7) + goto out_put; + } lapb->mode = parms->mode; lapb->window = parms->window; } diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c index d6cfe84d521..2652ead96c6 100644 --- a/net/llc/af_llc.c +++ b/net/llc/af_llc.c @@ -784,24 +784,20 @@ static int llc_ui_recvmsg(struct kiocb *iocb, struct socket *sock, copied += used; len -= used; - if (used + offset < skb->len) - continue; - if (!(flags & MSG_PEEK)) { sk_eat_skb(sk, skb, 0); *seq = 0; } + + /* For non stream protcols we get one packet per recvmsg call */ + if (sk->sk_type != SOCK_STREAM) + goto copy_uaddr; + + /* Partial read */ + if (used + offset < skb->len) + continue; } while (len > 0); - /* - * According to UNIX98, msg_name/msg_namelen are ignored - * on connected socket. -ANK - * But... af_llc still doesn't have separate sets of methods for - * SOCK_DGRAM and SOCK_STREAM :-( So we have to do this test, will - * eventually fix this tho :-) -acme - */ - if (sk->sk_type == SOCK_DGRAM) - goto copy_uaddr; out: release_sock(sk); return copied; diff --git a/net/llc/llc_sap.c b/net/llc/llc_sap.c index 20c4eb5c1ac..61cb8cf7d15 100644 --- a/net/llc/llc_sap.c +++ b/net/llc/llc_sap.c @@ -51,10 +51,10 @@ void llc_save_primitive(struct sock *sk, struct sk_buff* skb, u8 prim) { struct sockaddr_llc *addr; - if (skb->sk->sk_type == SOCK_STREAM) /* See UNIX98 */ - return; /* save primitive for use by the user. */ addr = llc_ui_skb_cb(skb); + + memset(addr, 0, sizeof(*addr)); addr->sllc_family = sk->sk_family; addr->sllc_arphrd = skb->dev->type; addr->sllc_test = prim == LLC_TEST_PRIM; @@ -330,6 +330,9 @@ static void llc_sap_mcast(struct llc_sap *sap, if (llc->laddr.lsap != laddr->lsap) continue; + if (llc->dev != skb->dev) + continue; + skb1 = skb_clone(skb, GFP_ATOMIC); if (!skb1) break; diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index af4845971f7..6527d4e048d 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -429,9 +429,9 @@ ctnetlink_dump_table(struct sk_buff *skb, struct netlink_callback *cb) cb->args[0], *id); read_lock_bh(&nf_conntrack_lock); + last = (struct nf_conn *)cb->args[1]; for (; cb->args[0] < nf_conntrack_htable_size; cb->args[0]++) { restart: - last = (struct nf_conn *)cb->args[1]; list_for_each_prev(i, &nf_conntrack_hash[cb->args[0]]) { h = (struct nf_conntrack_tuple_hash *) i; if (DIRECTION(h) != IP_CT_DIR_ORIGINAL) @@ -442,13 +442,10 @@ restart: * then dump everything. */ if (l3proto && L3PROTO(ct) != l3proto) continue; - if (last != NULL) { - if (ct == last) { - nf_ct_put(last); - cb->args[1] = 0; - last = NULL; - } else + if (cb->args[1]) { + if (ct != last) continue; + cb->args[1] = 0; } if (ctnetlink_fill_info(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq, @@ -459,17 +456,17 @@ restart: goto out; } } - if (last != NULL) { - nf_ct_put(last); + if (cb->args[1]) { cb->args[1] = 0; goto restart; } } out: read_unlock_bh(&nf_conntrack_lock); + if (last) + nf_ct_put(last); DEBUGP("leaving, last bucket=%lu id=%u\n", cb->args[0], *id); - return skb->len; } diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c index 61cdda4e5d3..b59d3b2bde2 100644 --- a/net/netfilter/nfnetlink_log.c +++ b/net/netfilter/nfnetlink_log.c @@ -366,6 +366,9 @@ __nfulnl_send(struct nfulnl_instance *inst) if (timer_pending(&inst->timer)) del_timer(&inst->timer); + if (!inst->skb) + return 0; + if (inst->qlen > 1) inst->lastnlh->nlmsg_type = NLMSG_DONE; diff --git a/net/netfilter/xt_SECMARK.c b/net/netfilter/xt_SECMARK.c index c2ce9c4011c..de9537ad9a7 100644 --- a/net/netfilter/xt_SECMARK.c +++ b/net/netfilter/xt_SECMARK.c @@ -57,6 +57,8 @@ static int checkentry_selinux(struct xt_secmark_target_info *info) { int err; struct xt_secmark_target_selinux_info *sel = &info->u.sel; + + sel->selctx[SECMARK_SELCTX_MAX - 1] = '\0'; err = selinux_string_to_sid(sel->selctx, &sel->selsid); if (err) { diff --git a/net/netfilter/xt_physdev.c b/net/netfilter/xt_physdev.c index a9f4f6f3c62..63a96546746 100644 --- a/net/netfilter/xt_physdev.c +++ b/net/netfilter/xt_physdev.c @@ -10,6 +10,7 @@ #include <linux/module.h> #include <linux/skbuff.h> +#include <linux/netfilter_bridge.h> #include <linux/netfilter/xt_physdev.h> #include <linux/netfilter/x_tables.h> #include <linux/netfilter_bridge.h> diff --git a/net/netfilter/xt_string.c b/net/netfilter/xt_string.c index 0ebb6ac2c8c..275330fcdaa 100644 --- a/net/netfilter/xt_string.c +++ b/net/netfilter/xt_string.c @@ -37,7 +37,7 @@ static int match(const struct sk_buff *skb, return (skb_find_text((struct sk_buff *)skb, conf->from_offset, conf->to_offset, conf->config, &state) - != UINT_MAX) && !conf->invert; + != UINT_MAX) ^ conf->invert; } #define STRING_TEXT_PRIV(m) ((struct xt_string_info *) m) @@ -55,7 +55,10 @@ static int checkentry(const char *tablename, /* Damn, can't handle this case properly with iptables... */ if (conf->from_offset > conf->to_offset) return 0; - + if (conf->algo[XT_STRING_MAX_ALGO_NAME_SIZE - 1] != '\0') + return 0; + if (conf->patlen > XT_STRING_MAX_PATTERN_SIZE) + return 0; ts_conf = textsearch_prepare(conf->algo, conf->pattern, conf->patlen, GFP_KERNEL, TS_AUTOLOAD); if (IS_ERR(ts_conf)) diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c index eea36696674..0a6cfa0005b 100644 --- a/net/sched/cls_u32.c +++ b/net/sched/cls_u32.c @@ -796,7 +796,7 @@ static int __init init_u32(void) { printk("u32 classifier\n"); #ifdef CONFIG_CLS_U32_PERF - printk(" Perfomance counters on\n"); + printk(" Performance counters on\n"); #endif #ifdef CONFIG_NET_CLS_POLICE printk(" OLD policer on \n"); diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index c7844bacbbc..a19eff12cf7 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -430,7 +430,7 @@ qdisc_create(struct net_device *dev, u32 handle, struct rtattr **tca, int *errp) } #endif - err = -EINVAL; + err = -ENOENT; if (ops == NULL) goto err_out; diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c index 4f11f585820..17b509282cf 100644 --- a/net/sctp/sm_make_chunk.c +++ b/net/sctp/sm_make_chunk.c @@ -806,38 +806,26 @@ no_mem: /* Helper to create ABORT with a SCTP_ERROR_USER_ABORT error. */ struct sctp_chunk *sctp_make_abort_user(const struct sctp_association *asoc, - const struct sctp_chunk *chunk, - const struct msghdr *msg) + const struct msghdr *msg, + size_t paylen) { struct sctp_chunk *retval; - void *payload = NULL, *payoff; - size_t paylen = 0; - struct iovec *iov = NULL; - int iovlen = 0; - - if (msg) { - iov = msg->msg_iov; - iovlen = msg->msg_iovlen; - paylen = get_user_iov_size(iov, iovlen); - } + void *payload = NULL; + int err; - retval = sctp_make_abort(asoc, chunk, sizeof(sctp_errhdr_t) + paylen); + retval = sctp_make_abort(asoc, NULL, sizeof(sctp_errhdr_t) + paylen); if (!retval) goto err_chunk; if (paylen) { /* Put the msg_iov together into payload. */ - payload = kmalloc(paylen, GFP_ATOMIC); + payload = kmalloc(paylen, GFP_KERNEL); if (!payload) goto err_payload; - payoff = payload; - for (; iovlen > 0; --iovlen) { - if (copy_from_user(payoff, iov->iov_base,iov->iov_len)) - goto err_copy; - payoff += iov->iov_len; - iov++; - } + err = memcpy_fromiovec(payload, msg->msg_iov, paylen); + if (err < 0) + goto err_copy; } sctp_init_cause(retval, SCTP_ERROR_USER_ABORT, payload, paylen); diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c index ead3f1b0ea3..5b5ae795832 100644 --- a/net/sctp/sm_statefuns.c +++ b/net/sctp/sm_statefuns.c @@ -4031,18 +4031,12 @@ sctp_disposition_t sctp_sf_do_9_1_prm_abort( * from its upper layer, but retransmits data to the far end * if necessary to fill gaps. */ - struct msghdr *msg = arg; - struct sctp_chunk *abort; + struct sctp_chunk *abort = arg; sctp_disposition_t retval; retval = SCTP_DISPOSITION_CONSUME; - /* Generate ABORT chunk to send the peer. */ - abort = sctp_make_abort_user(asoc, NULL, msg); - if (!abort) - retval = SCTP_DISPOSITION_NOMEM; - else - sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(abort)); + sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(abort)); /* Even if we can't send the ABORT due to low memory delete the * TCB. This is a departure from our typical NOMEM handling. @@ -4166,8 +4160,7 @@ sctp_disposition_t sctp_sf_cookie_wait_prm_abort( void *arg, sctp_cmd_seq_t *commands) { - struct msghdr *msg = arg; - struct sctp_chunk *abort; + struct sctp_chunk *abort = arg; sctp_disposition_t retval; /* Stop T1-init timer */ @@ -4175,12 +4168,7 @@ sctp_disposition_t sctp_sf_cookie_wait_prm_abort( SCTP_TO(SCTP_EVENT_TIMEOUT_T1_INIT)); retval = SCTP_DISPOSITION_CONSUME; - /* Generate ABORT chunk to send the peer */ - abort = sctp_make_abort_user(asoc, NULL, msg); - if (!abort) - retval = SCTP_DISPOSITION_NOMEM; - else - sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(abort)); + sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(abort)); sctp_add_cmd_sf(commands, SCTP_CMD_NEW_STATE, SCTP_STATE(SCTP_STATE_CLOSED)); diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 54722e622e6..fde3f55bfd4 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -1520,8 +1520,16 @@ SCTP_STATIC int sctp_sendmsg(struct kiocb *iocb, struct sock *sk, goto out_unlock; } if (sinfo_flags & SCTP_ABORT) { + struct sctp_chunk *chunk; + + chunk = sctp_make_abort_user(asoc, msg, msg_len); + if (!chunk) { + err = -ENOMEM; + goto out_unlock; + } + SCTP_DEBUG_PRINTK("Aborting association: %p\n", asoc); - sctp_primitive_ABORT(asoc, msg); + sctp_primitive_ABORT(asoc, chunk); err = 0; goto out_unlock; } diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c index 4a9aa9393b9..ef1cf5b476c 100644 --- a/net/sunrpc/auth_gss/auth_gss.c +++ b/net/sunrpc/auth_gss/auth_gss.c @@ -718,8 +718,7 @@ gss_destroy(struct rpc_auth *auth) auth, auth->au_flavor); gss_auth = container_of(auth, struct gss_auth, rpc_auth); - rpc_unlink(gss_auth->path); - dput(gss_auth->dentry); + rpc_unlink(gss_auth->dentry); gss_auth->dentry = NULL; gss_mech_put(gss_auth->mech); diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c index 7026b0866b7..00cb388ece0 100644 --- a/net/sunrpc/cache.c +++ b/net/sunrpc/cache.c @@ -71,7 +71,12 @@ struct cache_head *sunrpc_cache_lookup(struct cache_detail *detail, new = detail->alloc(); if (!new) return NULL; + /* must fully initialise 'new', else + * we might get lose if we need to + * cache_put it soon. + */ cache_init(new); + detail->init(new, key); write_lock(&detail->hash_lock); @@ -85,7 +90,6 @@ struct cache_head *sunrpc_cache_lookup(struct cache_detail *detail, return tmp; } } - detail->init(new, key); new->next = *head; *head = new; detail->entries++; diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 4ba271f892c..3e19d321067 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -183,8 +183,7 @@ rpc_new_client(struct rpc_xprt *xprt, char *servname, out_no_auth: if (!IS_ERR(clnt->cl_dentry)) { - rpc_rmdir(clnt->cl_pathname); - dput(clnt->cl_dentry); + rpc_rmdir(clnt->cl_dentry); rpc_put_mount(); } out_no_path: @@ -251,10 +250,8 @@ rpc_clone_client(struct rpc_clnt *clnt) new->cl_autobind = 0; new->cl_oneshot = 0; new->cl_dead = 0; - if (!IS_ERR(new->cl_dentry)) { + if (!IS_ERR(new->cl_dentry)) dget(new->cl_dentry); - rpc_get_mount(); - } rpc_init_rtt(&new->cl_rtt_default, clnt->cl_xprt->timeout.to_initval); if (new->cl_auth) atomic_inc(&new->cl_auth->au_count); @@ -317,11 +314,15 @@ rpc_destroy_client(struct rpc_clnt *clnt) clnt->cl_auth = NULL; } if (clnt->cl_parent != clnt) { + if (!IS_ERR(clnt->cl_dentry)) + dput(clnt->cl_dentry); rpc_destroy_client(clnt->cl_parent); goto out_free; } - if (clnt->cl_pathname[0]) - rpc_rmdir(clnt->cl_pathname); + if (!IS_ERR(clnt->cl_dentry)) { + rpc_rmdir(clnt->cl_dentry); + rpc_put_mount(); + } if (clnt->cl_xprt) { xprt_destroy(clnt->cl_xprt); clnt->cl_xprt = NULL; @@ -331,10 +332,6 @@ rpc_destroy_client(struct rpc_clnt *clnt) out_free: rpc_free_iostats(clnt->cl_metrics); clnt->cl_metrics = NULL; - if (!IS_ERR(clnt->cl_dentry)) { - dput(clnt->cl_dentry); - rpc_put_mount(); - } kfree(clnt); return 0; } @@ -921,26 +918,43 @@ call_transmit(struct rpc_task *task) task->tk_status = xprt_prepare_transmit(task); if (task->tk_status != 0) return; + task->tk_action = call_transmit_status; /* Encode here so that rpcsec_gss can use correct sequence number. */ if (rpc_task_need_encode(task)) { - task->tk_rqstp->rq_bytes_sent = 0; + BUG_ON(task->tk_rqstp->rq_bytes_sent != 0); call_encode(task); /* Did the encode result in an error condition? */ if (task->tk_status != 0) - goto out_nosend; + return; } - task->tk_action = call_transmit_status; xprt_transmit(task); if (task->tk_status < 0) return; - if (!task->tk_msg.rpc_proc->p_decode) { - task->tk_action = rpc_exit_task; - rpc_wake_up_task(task); - } - return; -out_nosend: - /* release socket write lock before attempting to handle error */ - xprt_abort_transmit(task); + /* + * On success, ensure that we call xprt_end_transmit() before sleeping + * in order to allow access to the socket to other RPC requests. + */ + call_transmit_status(task); + if (task->tk_msg.rpc_proc->p_decode != NULL) + return; + task->tk_action = rpc_exit_task; + rpc_wake_up_task(task); +} + +/* + * 5a. Handle cleanup after a transmission + */ +static void +call_transmit_status(struct rpc_task *task) +{ + task->tk_action = call_status; + /* + * Special case: if we've been waiting on the socket's write_space() + * callback, then don't call xprt_end_transmit(). + */ + if (task->tk_status == -EAGAIN) + return; + xprt_end_transmit(task); rpc_task_force_reencode(task); } @@ -992,18 +1006,7 @@ call_status(struct rpc_task *task) } /* - * 6a. Handle transmission errors. - */ -static void -call_transmit_status(struct rpc_task *task) -{ - if (task->tk_status != -EAGAIN) - rpc_task_force_reencode(task); - call_status(task); -} - -/* - * 6b. Handle RPC timeout + * 6a. Handle RPC timeout * We do not release the request slot, so we keep using the * same XID for all retransmits. */ @@ -1178,6 +1181,17 @@ call_verify(struct rpc_task *task) u32 *p = iov->iov_base, n; int error = -EACCES; + if ((task->tk_rqstp->rq_rcv_buf.len & 3) != 0) { + /* RFC-1014 says that the representation of XDR data must be a + * multiple of four bytes + * - if it isn't pointer subtraction in the NFS client may give + * undefined results + */ + printk(KERN_WARNING + "call_verify: XDR representation not a multiple of" + " 4 bytes: 0x%x\n", task->tk_rqstp->rq_rcv_buf.len); + goto out_eio; + } if ((len -= 3) < 0) goto out_overflow; p += 1; /* skip XID */ diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c index dc6cb93c883..0b1a1ac8a4b 100644 --- a/net/sunrpc/rpc_pipe.c +++ b/net/sunrpc/rpc_pipe.c @@ -539,6 +539,7 @@ repeat: rpc_close_pipes(dentry->d_inode); simple_unlink(dir, dentry); } + inode_dir_notify(dir, DN_DELETE); dput(dentry); } while (n); goto repeat; @@ -610,8 +611,8 @@ __rpc_rmdir(struct inode *dir, struct dentry *dentry) int error; shrink_dcache_parent(dentry); - if (dentry->d_inode) - rpc_close_pipes(dentry->d_inode); + if (d_unhashed(dentry)) + return 0; if ((error = simple_rmdir(dir, dentry)) != 0) return error; if (!error) { @@ -667,10 +668,11 @@ rpc_mkdir(char *path, struct rpc_clnt *rpc_client) RPCAUTH_info, RPCAUTH_EOF); if (error) goto err_depopulate; + dget(dentry); out: mutex_unlock(&dir->i_mutex); rpc_release_path(&nd); - return dget(dentry); + return dentry; err_depopulate: rpc_depopulate(dentry); __rpc_rmdir(dir, dentry); @@ -683,28 +685,20 @@ err_dput: } int -rpc_rmdir(char *path) +rpc_rmdir(struct dentry *dentry) { - struct nameidata nd; - struct dentry *dentry; + struct dentry *parent; struct inode *dir; int error; - if ((error = rpc_lookup_parent(path, &nd)) != 0) - return error; - dir = nd.dentry->d_inode; + parent = dget_parent(dentry); + dir = parent->d_inode; mutex_lock_nested(&dir->i_mutex, I_MUTEX_PARENT); - dentry = lookup_one_len(nd.last.name, nd.dentry, nd.last.len); - if (IS_ERR(dentry)) { - error = PTR_ERR(dentry); - goto out_release; - } rpc_depopulate(dentry); error = __rpc_rmdir(dir, dentry); dput(dentry); -out_release: mutex_unlock(&dir->i_mutex); - rpc_release_path(&nd); + dput(parent); return error; } @@ -731,10 +725,11 @@ rpc_mkpipe(char *path, void *private, struct rpc_pipe_ops *ops, int flags) rpci->flags = flags; rpci->ops = ops; inode_dir_notify(dir, DN_CREATE); + dget(dentry); out: mutex_unlock(&dir->i_mutex); rpc_release_path(&nd); - return dget(dentry); + return dentry; err_dput: dput(dentry); dentry = ERR_PTR(-ENOMEM); @@ -744,32 +739,26 @@ err_dput: } int -rpc_unlink(char *path) +rpc_unlink(struct dentry *dentry) { - struct nameidata nd; - struct dentry *dentry; + struct dentry *parent; struct inode *dir; - int error; + int error = 0; - if ((error = rpc_lookup_parent(path, &nd)) != 0) - return error; - dir = nd.dentry->d_inode; + parent = dget_parent(dentry); + dir = parent->d_inode; mutex_lock_nested(&dir->i_mutex, I_MUTEX_PARENT); - dentry = lookup_one_len(nd.last.name, nd.dentry, nd.last.len); - if (IS_ERR(dentry)) { - error = PTR_ERR(dentry); - goto out_release; - } - d_drop(dentry); - if (dentry->d_inode) { - rpc_close_pipes(dentry->d_inode); - error = simple_unlink(dir, dentry); + if (!d_unhashed(dentry)) { + d_drop(dentry); + if (dentry->d_inode) { + rpc_close_pipes(dentry->d_inode); + error = simple_unlink(dir, dentry); + } + inode_dir_notify(dir, DN_DELETE); } dput(dentry); - inode_dir_notify(dir, DN_DELETE); -out_release: mutex_unlock(&dir->i_mutex); - rpc_release_path(&nd); + dput(parent); return error; } diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index 313b68d892c..e8c2bc4977f 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -707,12 +707,9 @@ out_unlock: return err; } -void -xprt_abort_transmit(struct rpc_task *task) +void xprt_end_transmit(struct rpc_task *task) { - struct rpc_xprt *xprt = task->tk_xprt; - - xprt_release_write(xprt, task); + xprt_release_write(task->tk_xprt, task); } /** @@ -761,8 +758,6 @@ void xprt_transmit(struct rpc_task *task) task->tk_status = -ENOTCONN; else if (!req->rq_received) rpc_sleep_on(&xprt->pending, task, NULL, xprt_timer); - - xprt->ops->release_xprt(xprt, task); spin_unlock_bh(&xprt->transport_lock); return; } @@ -772,18 +767,8 @@ void xprt_transmit(struct rpc_task *task) * schedq, and being picked up by a parallel run of rpciod(). */ task->tk_status = status; - - switch (status) { - case -ECONNREFUSED: + if (status == -ECONNREFUSED) rpc_sleep_on(&xprt->sending, task, NULL, NULL); - case -EAGAIN: - case -ENOTCONN: - return; - default: - break; - } - xprt_release_write(xprt, task); - return; } static inline void do_xprt_reserve(struct rpc_task *task) diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index ee678ed13b6..441bd53f5ec 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -414,6 +414,33 @@ static int xs_tcp_send_request(struct rpc_task *task) } /** + * xs_tcp_release_xprt - clean up after a tcp transmission + * @xprt: transport + * @task: rpc task + * + * This cleans up if an error causes us to abort the transmission of a request. + * In this case, the socket may need to be reset in order to avoid confusing + * the server. + */ +static void xs_tcp_release_xprt(struct rpc_xprt *xprt, struct rpc_task *task) +{ + struct rpc_rqst *req; + + if (task != xprt->snd_task) + return; + if (task == NULL) + goto out_release; + req = task->tk_rqstp; + if (req->rq_bytes_sent == 0) + goto out_release; + if (req->rq_bytes_sent == req->rq_snd_buf.len) + goto out_release; + set_bit(XPRT_CLOSE_WAIT, &task->tk_xprt->state); +out_release: + xprt_release_xprt(xprt, task); +} + +/** * xs_close - close a socket * @xprt: transport * @@ -1250,7 +1277,7 @@ static struct rpc_xprt_ops xs_udp_ops = { static struct rpc_xprt_ops xs_tcp_ops = { .reserve_xprt = xprt_reserve_xprt, - .release_xprt = xprt_release_xprt, + .release_xprt = xs_tcp_release_xprt, .set_port = xs_set_port, .connect = xs_connect, .buf_alloc = rpc_malloc, diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 6f290927926..de6ec519272 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -128,23 +128,17 @@ static atomic_t unix_nr_socks = ATOMIC_INIT(0); #define UNIX_ABSTRACT(sk) (unix_sk(sk)->addr->hash != UNIX_HASH_SIZE) #ifdef CONFIG_SECURITY_NETWORK -static void unix_get_peersec_dgram(struct sk_buff *skb) +static void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb) { - int err; - - err = security_socket_getpeersec_dgram(skb, UNIXSECDATA(skb), - UNIXSECLEN(skb)); - if (err) - *(UNIXSECDATA(skb)) = NULL; + memcpy(UNIXSID(skb), &scm->secid, sizeof(u32)); } static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb) { - scm->secdata = *UNIXSECDATA(skb); - scm->seclen = *UNIXSECLEN(skb); + scm->secid = *UNIXSID(skb); } #else -static inline void unix_get_peersec_dgram(struct sk_buff *skb) +static inline void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb) { } static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb) @@ -1322,8 +1316,7 @@ static int unix_dgram_sendmsg(struct kiocb *kiocb, struct socket *sock, memcpy(UNIXCREDS(skb), &siocb->scm->creds, sizeof(struct ucred)); if (siocb->scm->fp) unix_attach_fds(siocb->scm, skb); - - unix_get_peersec_dgram(skb); + unix_get_secdata(siocb->scm, skb); skb->h.raw = skb->data; err = memcpy_fromiovec(skb_put(skb,len), msg->msg_iov, len); diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index f35bc676128..3da67ca2c3c 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -1134,12 +1134,33 @@ int __xfrm_route_forward(struct sk_buff *skb, unsigned short family) } EXPORT_SYMBOL(__xfrm_route_forward); +/* Optimize later using cookies and generation ids. */ + static struct dst_entry *xfrm_dst_check(struct dst_entry *dst, u32 cookie) { - /* If it is marked obsolete, which is how we even get here, - * then we have purged it from the policy bundle list and we - * did that for a good reason. + /* Code (such as __xfrm4_bundle_create()) sets dst->obsolete + * to "-1" to force all XFRM destinations to get validated by + * dst_ops->check on every use. We do this because when a + * normal route referenced by an XFRM dst is obsoleted we do + * not go looking around for all parent referencing XFRM dsts + * so that we can invalidate them. It is just too much work. + * Instead we make the checks here on every use. For example: + * + * XFRM dst A --> IPv4 dst X + * + * X is the "xdst->route" of A (X is also the "dst->path" of A + * in this example). If X is marked obsolete, "A" will not + * notice. That's what we are validating here via the + * stale_bundle() check. + * + * When a policy's bundle is pruned, we dst_free() the XFRM + * dst which causes it's ->obsolete field to be set to a + * positive non-zero integer. If an XFRM dst has been pruned + * like this, we want to force a new route lookup. */ + if (dst->obsolete < 0 && !stale_bundle(dst)) + return dst; + return NULL; } |