From d212f87b068c9d72065ef579d85b5ee6b8b59381 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Wed, 27 Jun 2007 00:47:37 -0700 Subject: [NET]: IPV6 checksum offloading in network devices The existing model for checksum offload does not correctly handle devices that can offload IPV4 and IPV6 only. The NETIF_F_HW_CSUM flag implies device can do any arbitrary protocol. This patch: * adds NETIF_F_IPV6_CSUM for those devices * fixes bnx2 and tg3 devices that need it * add NETIF_F_IPV6_CSUM to ipv6 output (incl GSO) * fixes assumptions about NETIF_F_ALL_CSUM in nat * adjusts bridge union of checksumming computation Signed-off-by: David S. Miller --- net/core/dev.c | 24 +++++++++++++++++++++--- 1 file changed, 21 insertions(+), 3 deletions(-) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index ee051bb398a..a0a46e7ed13 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1509,9 +1509,11 @@ int dev_queue_xmit(struct sk_buff *skb) skb_set_transport_header(skb, skb->csum_start - skb_headroom(skb)); - if (!(dev->features & NETIF_F_GEN_CSUM) && - (!(dev->features & NETIF_F_IP_CSUM) || - skb->protocol != htons(ETH_P_IP))) + if (!(dev->features & NETIF_F_GEN_CSUM) + || ((dev->features & NETIF_F_IP_CSUM) + && skb->protocol == htons(ETH_P_IP)) + || ((dev->features & NETIF_F_IPV6_CSUM) + && skb->protocol == htons(ETH_P_IPV6))) if (skb_checksum_help(skb)) goto out_kfree_skb; } @@ -3107,6 +3109,22 @@ int register_netdevice(struct net_device *dev) } } + /* Fix illegal checksum combinations */ + if ((dev->features & NETIF_F_HW_CSUM) && + (dev->features & (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) { + printk(KERN_NOTICE "%s: mixed HW and IP checksum settings.\n", + dev->name); + dev->features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM); + } + + if ((dev->features & NETIF_F_NO_CSUM) && + (dev->features & (NETIF_F_HW_CSUM|NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) { + printk(KERN_NOTICE "%s: mixed no checksumming and other settings.\n", + dev->name); + dev->features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM|NETIF_F_HW_CSUM); + } + + /* Fix illegal SG+CSUM combinations. */ if ((dev->features & NETIF_F_SG) && !(dev->features & NETIF_F_ALL_CSUM)) { -- cgit v1.2.3 From bf742482d7a647c5c6f03f78eb35a862e159ecf5 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Wed, 27 Jun 2007 01:26:19 -0700 Subject: [NET]: dev: introduce generic net_device address lists Introduce struct dev_addr_list and list maintenance functions based on dev_mc_list and the related functions. This will be used by follow-up patches for both multicast and secondary unicast addresses. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/core/dev.c | 69 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 69 insertions(+) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index a0a46e7ed13..18759ccdf21 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2553,6 +2553,75 @@ void dev_set_allmulti(struct net_device *dev, int inc) dev_mc_upload(dev); } +int __dev_addr_delete(struct dev_addr_list **list, void *addr, int alen, + int glbl) +{ + struct dev_addr_list *da; + + for (; (da = *list) != NULL; list = &da->next) { + if (memcmp(da->da_addr, addr, da->da_addrlen) == 0 && + alen == da->da_addrlen) { + if (glbl) { + int old_glbl = da->da_gusers; + da->da_gusers = 0; + if (old_glbl == 0) + break; + } + if (--da->da_users) + return 0; + + *list = da->next; + kfree(da); + return 0; + } + } + return -ENOENT; +} + +int __dev_addr_add(struct dev_addr_list **list, void *addr, int alen, int glbl) +{ + struct dev_addr_list *da; + + for (da = *list; da != NULL; da = da->next) { + if (memcmp(da->da_addr, addr, da->da_addrlen) == 0 && + da->da_addrlen == alen) { + if (glbl) { + int old_glbl = da->da_gusers; + da->da_gusers = 1; + if (old_glbl) + return 0; + } + da->da_users++; + return 0; + } + } + + da = kmalloc(sizeof(*da), GFP_ATOMIC); + if (da == NULL) + return -ENOMEM; + memcpy(da->da_addr, addr, alen); + da->da_addrlen = alen; + da->da_users = 1; + da->da_gusers = glbl ? 1 : 0; + da->next = *list; + *list = da; + return 0; +} + +void __dev_addr_discard(struct dev_addr_list **list) +{ + struct dev_addr_list *tmp; + + while (*list != NULL) { + tmp = *list; + *list = tmp->next; + if (tmp->da_users > tmp->da_gusers) + printk("__dev_addr_discard: address leakage! " + "da_users=%d\n", tmp->da_users); + kfree(tmp); + } +} + unsigned dev_get_flags(const struct net_device *dev) { unsigned flags; -- cgit v1.2.3 From 4417da668c0021903464f92db278ddae348e0299 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Wed, 27 Jun 2007 01:28:10 -0700 Subject: [NET]: dev: secondary unicast address support Add support for configuring secondary unicast addresses on network devices. To support this devices capable of filtering multiple unicast addresses need to change their set_multicast_list function to configure unicast filters as well and assign it to dev->set_rx_mode instead of dev->set_multicast_list. Other devices are put into promiscous mode when secondary unicast addresses are present. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/core/dev.c | 144 ++++++++++++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 127 insertions(+), 17 deletions(-) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index 18759ccdf21..36e9bf8ec4a 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -942,7 +942,7 @@ int dev_open(struct net_device *dev) /* * Initialize multicasting status */ - dev_mc_upload(dev); + dev_set_rx_mode(dev); /* * Wakeup transmit queue engine @@ -2498,17 +2498,7 @@ int netdev_set_master(struct net_device *slave, struct net_device *master) return 0; } -/** - * dev_set_promiscuity - update promiscuity count on a device - * @dev: device - * @inc: modifier - * - * Add or remove promiscuity from a device. While the count in the device - * remains above zero the interface remains promiscuous. Once it hits zero - * the device reverts back to normal filtering operation. A negative inc - * value is used to drop promiscuity on the device. - */ -void dev_set_promiscuity(struct net_device *dev, int inc) +static void __dev_set_promiscuity(struct net_device *dev, int inc) { unsigned short old_flags = dev->flags; @@ -2517,7 +2507,6 @@ void dev_set_promiscuity(struct net_device *dev, int inc) else dev->flags |= IFF_PROMISC; if (dev->flags != old_flags) { - dev_mc_upload(dev); printk(KERN_INFO "device %s %s promiscuous mode\n", dev->name, (dev->flags & IFF_PROMISC) ? "entered" : "left"); @@ -2530,6 +2519,25 @@ void dev_set_promiscuity(struct net_device *dev, int inc) } } +/** + * dev_set_promiscuity - update promiscuity count on a device + * @dev: device + * @inc: modifier + * + * Add or remove promiscuity from a device. While the count in the device + * remains above zero the interface remains promiscuous. Once it hits zero + * the device reverts back to normal filtering operation. A negative inc + * value is used to drop promiscuity on the device. + */ +void dev_set_promiscuity(struct net_device *dev, int inc) +{ + unsigned short old_flags = dev->flags; + + __dev_set_promiscuity(dev, inc); + if (dev->flags != old_flags) + dev_set_rx_mode(dev); +} + /** * dev_set_allmulti - update allmulti count on a device * @dev: device @@ -2550,7 +2558,48 @@ void dev_set_allmulti(struct net_device *dev, int inc) if ((dev->allmulti += inc) == 0) dev->flags &= ~IFF_ALLMULTI; if (dev->flags ^ old_flags) - dev_mc_upload(dev); + dev_set_rx_mode(dev); +} + +/* + * Upload unicast and multicast address lists to device and + * configure RX filtering. When the device doesn't support unicast + * filtering it is put in promiscous mode while unicast addresses + * are present. + */ +void __dev_set_rx_mode(struct net_device *dev) +{ + /* dev_open will call this function so the list will stay sane. */ + if (!(dev->flags&IFF_UP)) + return; + + if (!netif_device_present(dev)) + return; + + if (dev->set_rx_mode) + dev->set_rx_mode(dev); + else { + /* Unicast addresses changes may only happen under the rtnl, + * therefore calling __dev_set_promiscuity here is safe. + */ + if (dev->uc_count > 0 && !dev->uc_promisc) { + __dev_set_promiscuity(dev, 1); + dev->uc_promisc = 1; + } else if (dev->uc_count == 0 && dev->uc_promisc) { + __dev_set_promiscuity(dev, -1); + dev->uc_promisc = 0; + } + + if (dev->set_multicast_list) + dev->set_multicast_list(dev); + } +} + +void dev_set_rx_mode(struct net_device *dev) +{ + netif_tx_lock_bh(dev); + __dev_set_rx_mode(dev); + netif_tx_unlock_bh(dev); } int __dev_addr_delete(struct dev_addr_list **list, void *addr, int alen, @@ -2622,6 +2671,66 @@ void __dev_addr_discard(struct dev_addr_list **list) } } +/** + * dev_unicast_delete - Release secondary unicast address. + * @dev: device + * + * Release reference to a secondary unicast address and remove it + * from the device if the reference count drop to zero. + * + * The caller must hold the rtnl_mutex. + */ +int dev_unicast_delete(struct net_device *dev, void *addr, int alen) +{ + int err; + + ASSERT_RTNL(); + + netif_tx_lock_bh(dev); + err = __dev_addr_delete(&dev->uc_list, addr, alen, 0); + if (!err) { + dev->uc_count--; + __dev_set_rx_mode(dev); + } + netif_tx_unlock_bh(dev); + return err; +} +EXPORT_SYMBOL(dev_unicast_delete); + +/** + * dev_unicast_add - add a secondary unicast address + * @dev: device + * + * Add a secondary unicast address to the device or increase + * the reference count if it already exists. + * + * The caller must hold the rtnl_mutex. + */ +int dev_unicast_add(struct net_device *dev, void *addr, int alen) +{ + int err; + + ASSERT_RTNL(); + + netif_tx_lock_bh(dev); + err = __dev_addr_add(&dev->uc_list, addr, alen, 0); + if (!err) { + dev->uc_count++; + __dev_set_rx_mode(dev); + } + netif_tx_unlock_bh(dev); + return err; +} +EXPORT_SYMBOL(dev_unicast_add); + +static void dev_unicast_discard(struct net_device *dev) +{ + netif_tx_lock_bh(dev); + __dev_addr_discard(&dev->uc_list); + dev->uc_count = 0; + netif_tx_unlock_bh(dev); +} + unsigned dev_get_flags(const struct net_device *dev) { unsigned flags; @@ -2665,7 +2774,7 @@ int dev_change_flags(struct net_device *dev, unsigned flags) * Load in the correct multicast list now the flags have changed. */ - dev_mc_upload(dev); + dev_set_rx_mode(dev); /* * Have we downed the interface. We handle IFF_UP ourselves @@ -2678,7 +2787,7 @@ int dev_change_flags(struct net_device *dev, unsigned flags) ret = ((old_flags & IFF_UP) ? dev_close : dev_open)(dev); if (!ret) - dev_mc_upload(dev); + dev_set_rx_mode(dev); } if (dev->flags & IFF_UP && @@ -3558,8 +3667,9 @@ void unregister_netdevice(struct net_device *dev) raw_notifier_call_chain(&netdev_chain, NETDEV_UNREGISTER, dev); /* - * Flush the multicast chain + * Flush the unicast and multicast chains */ + dev_unicast_discard(dev); dev_mc_discard(dev); if (dev->uninit) -- cgit v1.2.3 From a298830cd026b4c0cde45ef3679a5f68a17577e6 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Thu, 28 Jun 2007 13:44:37 -0700 Subject: [NET]: Fix TX checksum feature check This patch fixes a boolean error in the new TX checksum check that causes bogus TSO packets to be generated. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- net/core/dev.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index 36e9bf8ec4a..6dce9d2d46f 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1509,11 +1509,11 @@ int dev_queue_xmit(struct sk_buff *skb) skb_set_transport_header(skb, skb->csum_start - skb_headroom(skb)); - if (!(dev->features & NETIF_F_GEN_CSUM) - || ((dev->features & NETIF_F_IP_CSUM) - && skb->protocol == htons(ETH_P_IP)) - || ((dev->features & NETIF_F_IPV6_CSUM) - && skb->protocol == htons(ETH_P_IPV6))) + if (!(dev->features & NETIF_F_GEN_CSUM) && + !((dev->features & NETIF_F_IP_CSUM) && + skb->protocol == htons(ETH_P_IP)) && + !((dev->features & NETIF_F_IPV6_CSUM) && + skb->protocol == htons(ETH_P_IPV6))) if (skb_checksum_help(skb)) goto out_kfree_skb; } -- cgit v1.2.3 From f25f4e44808f0f6c9875d94ef1c41ef86c288eb2 Mon Sep 17 00:00:00 2001 From: Peter P Waskiewicz Jr Date: Fri, 6 Jul 2007 13:36:20 -0700 Subject: [CORE] Stack changes to add multiqueue hardware support API Add the multiqueue hardware device support API to the core network stack. Allow drivers to allocate multiple queues and manage them at the netdev level if they choose to do so. Added a new field to sk_buff, namely queue_mapping, for drivers to know which tx_ring to select based on OS classification of the flow. Signed-off-by: Peter P Waskiewicz Jr Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/core/dev.c | 36 ++++++++++++++++++++++++++---------- 1 file changed, 26 insertions(+), 10 deletions(-) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index 6dce9d2d46f..7ddf66d0ad5 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1429,7 +1429,9 @@ gso: skb->next = nskb; return rc; } - if (unlikely(netif_queue_stopped(dev) && skb->next)) + if (unlikely((netif_queue_stopped(dev) || + netif_subqueue_stopped(dev, skb->queue_mapping)) && + skb->next)) return NETDEV_TX_BUSY; } while (skb->next); @@ -1547,6 +1549,8 @@ gso: spin_lock(&dev->queue_lock); q = dev->qdisc; if (q->enqueue) { + /* reset queue_mapping to zero */ + skb->queue_mapping = 0; rc = q->enqueue(skb, q); qdisc_run(dev); spin_unlock(&dev->queue_lock); @@ -1576,7 +1580,8 @@ gso: HARD_TX_LOCK(dev, cpu); - if (!netif_queue_stopped(dev)) { + if (!netif_queue_stopped(dev) && + !netif_subqueue_stopped(dev, skb->queue_mapping)) { rc = 0; if (!dev_hard_start_xmit(skb, dev)) { HARD_TX_UNLOCK(dev); @@ -3539,16 +3544,18 @@ static struct net_device_stats *internal_stats(struct net_device *dev) } /** - * alloc_netdev - allocate network device + * alloc_netdev_mq - allocate network device * @sizeof_priv: size of private data to allocate space for * @name: device name format string * @setup: callback to initialize device + * @queue_count: the number of subqueues to allocate * * Allocates a struct net_device with private data area for driver use - * and performs basic initialization. + * and performs basic initialization. Also allocates subquue structs + * for each queue on the device at the end of the netdevice. */ -struct net_device *alloc_netdev(int sizeof_priv, const char *name, - void (*setup)(struct net_device *)) +struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name, + void (*setup)(struct net_device *), unsigned int queue_count) { void *p; struct net_device *dev; @@ -3557,7 +3564,9 @@ struct net_device *alloc_netdev(int sizeof_priv, const char *name, BUG_ON(strlen(name) >= sizeof(dev->name)); /* ensure 32-byte alignment of both the device and private area */ - alloc_size = (sizeof(*dev) + NETDEV_ALIGN_CONST) & ~NETDEV_ALIGN_CONST; + alloc_size = (sizeof(*dev) + NETDEV_ALIGN_CONST + + (sizeof(struct net_device_subqueue) * queue_count)) & + ~NETDEV_ALIGN_CONST; alloc_size += sizeof_priv + NETDEV_ALIGN_CONST; p = kzalloc(alloc_size, GFP_KERNEL); @@ -3570,15 +3579,22 @@ struct net_device *alloc_netdev(int sizeof_priv, const char *name, (((long)p + NETDEV_ALIGN_CONST) & ~NETDEV_ALIGN_CONST); dev->padded = (char *)dev - (char *)p; - if (sizeof_priv) - dev->priv = netdev_priv(dev); + if (sizeof_priv) { + dev->priv = ((char *)dev + + ((sizeof(struct net_device) + + (sizeof(struct net_device_subqueue) * + queue_count) + NETDEV_ALIGN_CONST) + & ~NETDEV_ALIGN_CONST)); + } + + dev->egress_subqueue_count = queue_count; dev->get_stats = internal_stats; setup(dev); strcpy(dev->name, name); return dev; } -EXPORT_SYMBOL(alloc_netdev); +EXPORT_SYMBOL(alloc_netdev_mq); /** * free_netdev - free network device -- cgit v1.2.3 From 61cbc2fca6335be52788773b21efdc52a2750924 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Sat, 30 Jun 2007 13:35:52 -0700 Subject: [NET]: Fix secondary unicast/multicast address count maintenance When a reference to an existing address is increased or decreased without hitting zero, the address count is incorrectly adjusted. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/core/dev.c | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index 7ddf66d0ad5..4221dcda88d 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2607,8 +2607,8 @@ void dev_set_rx_mode(struct net_device *dev) netif_tx_unlock_bh(dev); } -int __dev_addr_delete(struct dev_addr_list **list, void *addr, int alen, - int glbl) +int __dev_addr_delete(struct dev_addr_list **list, int *count, + void *addr, int alen, int glbl) { struct dev_addr_list *da; @@ -2626,13 +2626,15 @@ int __dev_addr_delete(struct dev_addr_list **list, void *addr, int alen, *list = da->next; kfree(da); + (*count)--; return 0; } } return -ENOENT; } -int __dev_addr_add(struct dev_addr_list **list, void *addr, int alen, int glbl) +int __dev_addr_add(struct dev_addr_list **list, int *count, + void *addr, int alen, int glbl) { struct dev_addr_list *da; @@ -2659,6 +2661,7 @@ int __dev_addr_add(struct dev_addr_list **list, void *addr, int alen, int glbl) da->da_gusers = glbl ? 1 : 0; da->next = *list; *list = da; + (*count)++; return 0; } @@ -2692,11 +2695,9 @@ int dev_unicast_delete(struct net_device *dev, void *addr, int alen) ASSERT_RTNL(); netif_tx_lock_bh(dev); - err = __dev_addr_delete(&dev->uc_list, addr, alen, 0); - if (!err) { - dev->uc_count--; + err = __dev_addr_delete(&dev->uc_list, &dev->uc_count, addr, alen, 0); + if (!err) __dev_set_rx_mode(dev); - } netif_tx_unlock_bh(dev); return err; } @@ -2718,11 +2719,9 @@ int dev_unicast_add(struct net_device *dev, void *addr, int alen) ASSERT_RTNL(); netif_tx_lock_bh(dev); - err = __dev_addr_add(&dev->uc_list, addr, alen, 0); - if (!err) { - dev->uc_count++; + err = __dev_addr_add(&dev->uc_list, &dev->uc_count, addr, alen, 0); + if (!err) __dev_set_rx_mode(dev); - } netif_tx_unlock_bh(dev); return err; } -- cgit v1.2.3