aboutsummaryrefslogtreecommitdiff
path: root/net/core/dev.c
diff options
context:
space:
mode:
authorIngo Molnar <mingo@elte.hu>2008-10-13 11:05:51 +0200
committerIngo Molnar <mingo@elte.hu>2008-10-13 11:05:51 +0200
commitaccba5f3965d6a9d1bf7c1e1a7995d17e9d521b6 (patch)
tree8fb40782e79472ed882ff2098d4dd295557278ee /net/core/dev.c
parent6852fd9b86d05063c6ef49d2e12e061cc7f6a105 (diff)
parent4480f15b3306f43bbb0310d461142b4e897ca45b (diff)
Merge branch 'linus' into oprofile-v2
Conflicts: arch/x86/kernel/apic_32.c arch/x86/oprofile/nmi_int.c include/linux/pci_ids.h
Diffstat (limited to 'net/core/dev.c')
-rw-r--r--net/core/dev.c281
1 files changed, 185 insertions, 96 deletions
diff --git a/net/core/dev.c b/net/core/dev.c
index 53af7841018..1408a083fe4 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -122,6 +122,7 @@
#include <linux/if_arp.h>
#include <linux/if_vlan.h>
#include <linux/ip.h>
+#include <net/ip.h>
#include <linux/ipv6.h>
#include <linux/in.h>
#include <linux/jhash.h>
@@ -890,7 +891,7 @@ int dev_alloc_name(struct net_device *dev, const char *name)
* Change name of a device, can pass format strings "eth%d".
* for wildcarding.
*/
-int dev_change_name(struct net_device *dev, char *newname)
+int dev_change_name(struct net_device *dev, const char *newname)
{
char oldname[IFNAMSIZ];
int err = 0;
@@ -916,7 +917,6 @@ int dev_change_name(struct net_device *dev, char *newname)
err = dev_alloc_name(dev, newname);
if (err < 0)
return err;
- strcpy(newname, dev->name);
}
else if (__dev_get_by_name(net, newname))
return -EEXIST;
@@ -954,6 +954,38 @@ rollback:
}
/**
+ * dev_set_alias - change ifalias of a device
+ * @dev: device
+ * @alias: name up to IFALIASZ
+ * @len: limit of bytes to copy from info
+ *
+ * Set ifalias for a device,
+ */
+int dev_set_alias(struct net_device *dev, const char *alias, size_t len)
+{
+ ASSERT_RTNL();
+
+ if (len >= IFALIASZ)
+ return -EINVAL;
+
+ if (!len) {
+ if (dev->ifalias) {
+ kfree(dev->ifalias);
+ dev->ifalias = NULL;
+ }
+ return 0;
+ }
+
+ dev->ifalias = krealloc(dev->ifalias, len+1, GFP_KERNEL);
+ if (!dev->ifalias)
+ return -ENOMEM;
+
+ strlcpy(dev->ifalias, alias, len+1);
+ return len;
+}
+
+
+/**
* netdev_features_change - device changes features
* @dev: device to cause notification
*
@@ -1339,19 +1371,23 @@ static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
}
-void __netif_schedule(struct Qdisc *q)
+static inline void __netif_reschedule(struct Qdisc *q)
{
- if (!test_and_set_bit(__QDISC_STATE_SCHED, &q->state)) {
- struct softnet_data *sd;
- unsigned long flags;
+ struct softnet_data *sd;
+ unsigned long flags;
- local_irq_save(flags);
- sd = &__get_cpu_var(softnet_data);
- q->next_sched = sd->output_queue;
- sd->output_queue = q;
- raise_softirq_irqoff(NET_TX_SOFTIRQ);
- local_irq_restore(flags);
- }
+ local_irq_save(flags);
+ sd = &__get_cpu_var(softnet_data);
+ q->next_sched = sd->output_queue;
+ sd->output_queue = q;
+ raise_softirq_irqoff(NET_TX_SOFTIRQ);
+ local_irq_restore(flags);
+}
+
+void __netif_schedule(struct Qdisc *q)
+{
+ if (!test_and_set_bit(__QDISC_STATE_SCHED, &q->state))
+ __netif_reschedule(q);
}
EXPORT_SYMBOL(__netif_schedule);
@@ -1663,7 +1699,7 @@ static u16 simple_tx_hash(struct net_device *dev, struct sk_buff *skb)
{
u32 addr1, addr2, ports;
u32 hash, ihl;
- u8 ip_proto;
+ u8 ip_proto = 0;
if (unlikely(!simple_tx_hashrnd_initialized)) {
get_random_bytes(&simple_tx_hashrnd, 4);
@@ -1671,13 +1707,14 @@ static u16 simple_tx_hash(struct net_device *dev, struct sk_buff *skb)
}
switch (skb->protocol) {
- case __constant_htons(ETH_P_IP):
- ip_proto = ip_hdr(skb)->protocol;
+ case htons(ETH_P_IP):
+ if (!(ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)))
+ ip_proto = ip_hdr(skb)->protocol;
addr1 = ip_hdr(skb)->saddr;
addr2 = ip_hdr(skb)->daddr;
ihl = ip_hdr(skb)->ihl;
break;
- case __constant_htons(ETH_P_IPV6):
+ case htons(ETH_P_IPV6):
ip_proto = ipv6_hdr(skb)->nexthdr;
addr1 = ipv6_hdr(skb)->saddr.s6_addr32[3];
addr2 = ipv6_hdr(skb)->daddr.s6_addr32[3];
@@ -1796,16 +1833,19 @@ gso:
skb->tc_verd = SET_TC_AT(skb->tc_verd,AT_EGRESS);
#endif
if (q->enqueue) {
- spinlock_t *root_lock = qdisc_root_lock(q);
+ spinlock_t *root_lock = qdisc_lock(q);
spin_lock(root_lock);
- rc = qdisc_enqueue_root(skb, q);
- qdisc_run(q);
-
+ if (unlikely(test_bit(__QDISC_STATE_DEACTIVATED, &q->state))) {
+ kfree_skb(skb);
+ rc = NET_XMIT_DROP;
+ } else {
+ rc = qdisc_enqueue_root(skb, q);
+ qdisc_run(q);
+ }
spin_unlock(root_lock);
- rc = rc == NET_XMIT_BYPASS ? NET_XMIT_SUCCESS : rc;
goto out;
}
@@ -1909,7 +1949,6 @@ int netif_rx(struct sk_buff *skb)
if (queue->input_pkt_queue.qlen <= netdev_max_backlog) {
if (queue->input_pkt_queue.qlen) {
enqueue:
- dev_hold(skb->dev);
__skb_queue_tail(&queue->input_pkt_queue, skb);
local_irq_restore(flags);
return NET_RX_SUCCESS;
@@ -1941,22 +1980,6 @@ int netif_rx_ni(struct sk_buff *skb)
EXPORT_SYMBOL(netif_rx_ni);
-static inline struct net_device *skb_bond(struct sk_buff *skb)
-{
- struct net_device *dev = skb->dev;
-
- if (dev->master) {
- if (skb_bond_should_drop(skb)) {
- kfree_skb(skb);
- return NULL;
- }
- skb->dev = dev->master;
- }
-
- return dev;
-}
-
-
static void net_tx_action(struct softirq_action *h)
{
struct softnet_data *sd = &__get_cpu_var(softnet_data);
@@ -1973,7 +1996,7 @@ static void net_tx_action(struct softirq_action *h)
struct sk_buff *skb = clist;
clist = clist->next;
- BUG_TRAP(!atomic_read(&skb->users));
+ WARN_ON(atomic_read(&skb->users));
__kfree_skb(skb);
}
}
@@ -1992,15 +2015,22 @@ static void net_tx_action(struct softirq_action *h)
head = head->next_sched;
- smp_mb__before_clear_bit();
- clear_bit(__QDISC_STATE_SCHED, &q->state);
-
- root_lock = qdisc_root_lock(q);
+ root_lock = qdisc_lock(q);
if (spin_trylock(root_lock)) {
+ smp_mb__before_clear_bit();
+ clear_bit(__QDISC_STATE_SCHED,
+ &q->state);
qdisc_run(q);
spin_unlock(root_lock);
} else {
- __netif_schedule(q);
+ if (!test_bit(__QDISC_STATE_DEACTIVATED,
+ &q->state)) {
+ __netif_reschedule(q);
+ } else {
+ smp_mb__before_clear_bit();
+ clear_bit(__QDISC_STATE_SCHED,
+ &q->state);
+ }
}
}
}
@@ -2100,9 +2130,10 @@ static int ing_filter(struct sk_buff *skb)
rxq = &dev->rx_queue;
q = rxq->qdisc;
- if (q) {
+ if (q != &noop_qdisc) {
spin_lock(qdisc_lock(q));
- result = qdisc_enqueue_root(skb, q);
+ if (likely(!test_bit(__QDISC_STATE_DEACTIVATED, &q->state)))
+ result = qdisc_enqueue_root(skb, q);
spin_unlock(qdisc_lock(q));
}
@@ -2113,7 +2144,7 @@ static inline struct sk_buff *handle_ing(struct sk_buff *skb,
struct packet_type **pt_prev,
int *ret, struct net_device *orig_dev)
{
- if (!skb->dev->rx_queue.qdisc)
+ if (skb->dev->rx_queue.qdisc == &noop_qdisc)
goto out;
if (*pt_prev) {
@@ -2183,6 +2214,7 @@ int netif_receive_skb(struct sk_buff *skb)
{
struct packet_type *ptype, *pt_prev;
struct net_device *orig_dev;
+ struct net_device *null_or_orig;
int ret = NET_RX_DROP;
__be16 type;
@@ -2196,10 +2228,14 @@ int netif_receive_skb(struct sk_buff *skb)
if (!skb->iif)
skb->iif = skb->dev->ifindex;
- orig_dev = skb_bond(skb);
-
- if (!orig_dev)
- return NET_RX_DROP;
+ null_or_orig = NULL;
+ orig_dev = skb->dev;
+ if (orig_dev->master) {
+ if (skb_bond_should_drop(skb))
+ null_or_orig = orig_dev; /* deliver only exact match */
+ else
+ skb->dev = orig_dev->master;
+ }
__get_cpu_var(netdev_rx_stat).total++;
@@ -2223,7 +2259,8 @@ int netif_receive_skb(struct sk_buff *skb)
#endif
list_for_each_entry_rcu(ptype, &ptype_all, list) {
- if (!ptype->dev || ptype->dev == skb->dev) {
+ if (ptype->dev == null_or_orig || ptype->dev == skb->dev ||
+ ptype->dev == orig_dev) {
if (pt_prev)
ret = deliver_skb(skb, pt_prev, orig_dev);
pt_prev = ptype;
@@ -2248,7 +2285,8 @@ ncls:
list_for_each_entry_rcu(ptype,
&ptype_base[ntohs(type) & PTYPE_HASH_MASK], list) {
if (ptype->type == type &&
- (!ptype->dev || ptype->dev == skb->dev)) {
+ (ptype->dev == null_or_orig || ptype->dev == skb->dev ||
+ ptype->dev == orig_dev)) {
if (pt_prev)
ret = deliver_skb(skb, pt_prev, orig_dev);
pt_prev = ptype;
@@ -2270,6 +2308,20 @@ out:
return ret;
}
+/* Network device is going away, flush any packets still pending */
+static void flush_backlog(void *arg)
+{
+ struct net_device *dev = arg;
+ struct softnet_data *queue = &__get_cpu_var(softnet_data);
+ struct sk_buff *skb, *tmp;
+
+ skb_queue_walk_safe(&queue->input_pkt_queue, skb, tmp)
+ if (skb->dev == dev) {
+ __skb_unlink(skb, &queue->input_pkt_queue);
+ kfree_skb(skb);
+ }
+}
+
static int process_backlog(struct napi_struct *napi, int quota)
{
int work = 0;
@@ -2279,7 +2331,6 @@ static int process_backlog(struct napi_struct *napi, int quota)
napi->weight = weight_p;
do {
struct sk_buff *skb;
- struct net_device *dev;
local_irq_disable();
skb = __skb_dequeue(&queue->input_pkt_queue);
@@ -2288,14 +2339,9 @@ static int process_backlog(struct napi_struct *napi, int quota)
local_irq_enable();
break;
}
-
local_irq_enable();
- dev = skb->dev;
-
netif_receive_skb(skb);
-
- dev_put(dev);
} while (++work < quota && jiffies == start_time);
return work;
@@ -2903,6 +2949,12 @@ int netdev_set_master(struct net_device *slave, struct net_device *master)
return 0;
}
+static void dev_change_rx_flags(struct net_device *dev, int flags)
+{
+ if (dev->flags & IFF_UP && dev->change_rx_flags)
+ dev->change_rx_flags(dev, flags);
+}
+
static int __dev_set_promiscuity(struct net_device *dev, int inc)
{
unsigned short old_flags = dev->flags;
@@ -2940,8 +2992,7 @@ static int __dev_set_promiscuity(struct net_device *dev, int inc)
current->uid, current->gid,
audit_get_sessionid(current));
- if (dev->change_rx_flags)
- dev->change_rx_flags(dev, IFF_PROMISC);
+ dev_change_rx_flags(dev, IFF_PROMISC);
}
return 0;
}
@@ -3007,8 +3058,7 @@ int dev_set_allmulti(struct net_device *dev, int inc)
}
}
if (dev->flags ^ old_flags) {
- if (dev->change_rx_flags)
- dev->change_rx_flags(dev, IFF_ALLMULTI);
+ dev_change_rx_flags(dev, IFF_ALLMULTI);
dev_set_rx_mode(dev);
}
return 0;
@@ -3287,6 +3337,12 @@ static void dev_addr_discard(struct net_device *dev)
netif_addr_unlock_bh(dev);
}
+/**
+ * dev_get_flags - get flags reported to userspace
+ * @dev: device
+ *
+ * Get the combination of flag bits exported through APIs to userspace.
+ */
unsigned dev_get_flags(const struct net_device *dev)
{
unsigned flags;
@@ -3311,6 +3367,14 @@ unsigned dev_get_flags(const struct net_device *dev)
return flags;
}
+/**
+ * dev_change_flags - change device settings
+ * @dev: device
+ * @flags: device state flags
+ *
+ * Change settings on device based state flags. The flags are
+ * in the userspace exported format.
+ */
int dev_change_flags(struct net_device *dev, unsigned flags)
{
int ret, changes;
@@ -3332,8 +3396,8 @@ int dev_change_flags(struct net_device *dev, unsigned flags)
* Load in the correct multicast list now the flags have changed.
*/
- if (dev->change_rx_flags && (old_flags ^ flags) & IFF_MULTICAST)
- dev->change_rx_flags(dev, IFF_MULTICAST);
+ if ((old_flags ^ flags) & IFF_MULTICAST)
+ dev_change_rx_flags(dev, IFF_MULTICAST);
dev_set_rx_mode(dev);
@@ -3380,6 +3444,13 @@ int dev_change_flags(struct net_device *dev, unsigned flags)
return ret;
}
+/**
+ * dev_set_mtu - Change maximum transfer unit
+ * @dev: device
+ * @new_mtu: new transfer unit
+ *
+ * Change the maximum transfer size of the network device.
+ */
int dev_set_mtu(struct net_device *dev, int new_mtu)
{
int err;
@@ -3404,6 +3475,13 @@ int dev_set_mtu(struct net_device *dev, int new_mtu)
return err;
}
+/**
+ * dev_set_mac_address - Change Media Access Control Address
+ * @dev: device
+ * @sa: new address
+ *
+ * Change the hardware (MAC) address of the device
+ */
int dev_set_mac_address(struct net_device *dev, struct sockaddr *sa)
{
int err;
@@ -3793,14 +3871,11 @@ static int dev_new_index(struct net *net)
}
/* Delayed registration/unregisteration */
-static DEFINE_SPINLOCK(net_todo_list_lock);
static LIST_HEAD(net_todo_list);
static void net_set_todo(struct net_device *dev)
{
- spin_lock(&net_todo_list_lock);
list_add_tail(&dev->todo_list, &net_todo_list);
- spin_unlock(&net_todo_list_lock);
}
static void rollback_registered(struct net_device *dev)
@@ -3847,7 +3922,7 @@ static void rollback_registered(struct net_device *dev)
dev->uninit(dev);
/* Notifier chain MUST detach us from master device. */
- BUG_TRAP(!dev->master);
+ WARN_ON(dev->master);
/* Remove entries from kobject tree */
netdev_unregister_kobject(dev);
@@ -3988,6 +4063,10 @@ int register_netdevice(struct net_device *dev)
}
}
+ /* Enable software GSO if SG is supported. */
+ if (dev->features & NETIF_F_SG)
+ dev->features |= NETIF_F_GSO;
+
netdev_initialize_kobject(dev);
ret = netdev_register_kobject(dev);
if (ret)
@@ -4123,33 +4202,24 @@ static void netdev_wait_allrefs(struct net_device *dev)
* free_netdev(y1);
* free_netdev(y2);
*
- * We are invoked by rtnl_unlock() after it drops the semaphore.
+ * We are invoked by rtnl_unlock().
* This allows us to deal with problems:
* 1) We can delete sysfs objects which invoke hotplug
* without deadlocking with linkwatch via keventd.
* 2) Since we run with the RTNL semaphore not held, we can sleep
* safely in order to wait for the netdev refcnt to drop to zero.
+ *
+ * We must not return until all unregister events added during
+ * the interval the lock was held have been completed.
*/
-static DEFINE_MUTEX(net_todo_run_mutex);
void netdev_run_todo(void)
{
struct list_head list;
- /* Need to guard against multiple cpu's getting out of order. */
- mutex_lock(&net_todo_run_mutex);
-
- /* Not safe to do outside the semaphore. We must not return
- * until all unregister events invoked by the local processor
- * have been completed (either by this todo run, or one on
- * another cpu).
- */
- if (list_empty(&net_todo_list))
- goto out;
-
/* Snapshot list, allow later requests */
- spin_lock(&net_todo_list_lock);
list_replace_init(&net_todo_list, &list);
- spin_unlock(&net_todo_list_lock);
+
+ __rtnl_unlock();
while (!list_empty(&list)) {
struct net_device *dev
@@ -4165,13 +4235,15 @@ void netdev_run_todo(void)
dev->reg_state = NETREG_UNREGISTERED;
+ on_each_cpu(flush_backlog, dev, 1);
+
netdev_wait_allrefs(dev);
/* paranoia */
BUG_ON(atomic_read(&dev->refcnt));
- BUG_TRAP(!dev->ip_ptr);
- BUG_TRAP(!dev->ip6_ptr);
- BUG_TRAP(!dev->dn_ptr);
+ WARN_ON(dev->ip_ptr);
+ WARN_ON(dev->ip6_ptr);
+ WARN_ON(dev->dn_ptr);
if (dev->destructor)
dev->destructor(dev);
@@ -4179,9 +4251,6 @@ void netdev_run_todo(void)
/* Free network device */
kobject_put(&dev->dev.kobj);
}
-
-out:
- mutex_unlock(&net_todo_run_mutex);
}
static struct net_device_stats *internal_stats(struct net_device *dev)
@@ -4200,6 +4269,7 @@ static void netdev_init_queues(struct net_device *dev)
{
netdev_init_one_queue(dev, &dev->rx_queue, NULL);
netdev_for_each_tx_queue(dev, netdev_init_one_queue, NULL);
+ spin_lock_init(&dev->tx_global_lock);
}
/**
@@ -4300,7 +4370,12 @@ void free_netdev(struct net_device *dev)
put_device(&dev->dev);
}
-/* Synchronize with packet receive processing. */
+/**
+ * synchronize_net - Synchronize with packet receive processing
+ *
+ * Wait for packets currently being received to be done.
+ * Does not block later packets from starting.
+ */
void synchronize_net(void)
{
might_sleep();
@@ -4602,7 +4677,7 @@ netdev_dma_event(struct dma_client *client, struct dma_chan *chan,
}
/**
- * netdev_dma_regiser - register the networking subsystem as a DMA client
+ * netdev_dma_register - register the networking subsystem as a DMA client
*/
static int __init netdev_dma_register(void)
{
@@ -4648,6 +4723,12 @@ int netdev_compute_features(unsigned long all, unsigned long one)
one |= NETIF_F_GSO_SOFTWARE;
one |= NETIF_F_GSO;
+ /*
+ * If even one device supports a GSO protocol with software fallback,
+ * enable it for all.
+ */
+ all |= one & NETIF_F_GSO_SOFTWARE;
+
/* If even one device supports robust GSO, enable it for all. */
if (one & NETIF_F_GSO_ROBUST)
all |= NETIF_F_GSO_ROBUST;
@@ -4697,10 +4778,18 @@ err_name:
return -ENOMEM;
}
-char *netdev_drivername(struct net_device *dev, char *buffer, int len)
+/**
+ * netdev_drivername - network driver for the device
+ * @dev: network device
+ * @buffer: buffer for resulting name
+ * @len: size of buffer
+ *
+ * Determine network driver for device.
+ */
+char *netdev_drivername(const struct net_device *dev, char *buffer, int len)
{
- struct device_driver *driver;
- struct device *parent;
+ const struct device_driver *driver;
+ const struct device *parent;
if (len <= 0 || !buffer)
return buffer;