aboutsummaryrefslogtreecommitdiff
path: root/net/core
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2008-12-28 12:49:40 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2008-12-28 12:49:40 -0800
commit0191b625ca5a46206d2fb862bb08f36f2fcb3b31 (patch)
tree454d1842b1833d976da62abcbd5c47521ebe9bd7 /net/core
parent54a696bd07c14d3b1192d03ce7269bc59b45209a (diff)
parenteb56092fc168bf5af199d47af50c0d84a96db898 (diff)
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next-2.6
* git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next-2.6: (1429 commits) net: Allow dependancies of FDDI & Tokenring to be modular. igb: Fix build warning when DCA is disabled. net: Fix warning fallout from recent NAPI interface changes. gro: Fix potential use after free sfc: If AN is enabled, always read speed/duplex from the AN advertising bits sfc: When disabling the NIC, close the device rather than unregistering it sfc: SFT9001: Add cable diagnostics sfc: Add support for multiple PHY self-tests sfc: Merge top-level functions for self-tests sfc: Clean up PHY mode management in loopback self-test sfc: Fix unreliable link detection in some loopback modes sfc: Generate unique names for per-NIC workqueues 802.3ad: use standard ethhdr instead of ad_header 802.3ad: generalize out mac address initializer 802.3ad: initialize ports LACPDU from const initializer 802.3ad: remove typedef around ad_system 802.3ad: turn ports is_individual into a bool 802.3ad: turn ports is_enabled into a bool 802.3ad: make ntt bool ixgbe: Fix set_ringparam in ixgbe to use the same memory pools. ... Fixed trivial IPv4/6 address printing conflicts in fs/cifs/connect.c due to the conversion to %pI (in this networking merge) and the addition of doing IPv6 addresses (from the earlier merge of CIFS).
Diffstat (limited to 'net/core')
-rw-r--r--net/core/datagram.c5
-rw-r--r--net/core/dev.c458
-rw-r--r--net/core/dst.c6
-rw-r--r--net/core/ethtool.c53
-rw-r--r--net/core/fib_rules.c7
-rw-r--r--net/core/filter.c19
-rw-r--r--net/core/flow.c6
-rw-r--r--net/core/gen_estimator.c97
-rw-r--r--net/core/neighbour.c73
-rw-r--r--net/core/net-sysfs.c15
-rw-r--r--net/core/net_namespace.c2
-rw-r--r--net/core/netpoll.c20
-rw-r--r--net/core/pktgen.c42
-rw-r--r--net/core/rtnetlink.c15
-rw-r--r--net/core/skbuff.c277
-rw-r--r--net/core/sock.c46
-rw-r--r--net/core/sysctl_net_core.c68
17 files changed, 923 insertions, 286 deletions
diff --git a/net/core/datagram.c b/net/core/datagram.c
index ee631843c2f..5e2ac0c4b07 100644
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -209,7 +209,7 @@ struct sk_buff *skb_recv_datagram(struct sock *sk, unsigned flags,
void skb_free_datagram(struct sock *sk, struct sk_buff *skb)
{
kfree_skb(skb);
- sk_mem_reclaim(sk);
+ sk_mem_reclaim_partial(sk);
}
/**
@@ -248,8 +248,7 @@ int skb_kill_datagram(struct sock *sk, struct sk_buff *skb, unsigned int flags)
spin_unlock_bh(&sk->sk_receive_queue.lock);
}
- kfree_skb(skb);
- sk_mem_reclaim(sk);
+ skb_free_datagram(sk, skb);
return err;
}
diff --git a/net/core/dev.c b/net/core/dev.c
index 89912ae6de6..446424027d2 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -108,7 +108,6 @@
#include <linux/init.h>
#include <linux/kmod.h>
#include <linux/module.h>
-#include <linux/kallsyms.h>
#include <linux/netpoll.h>
#include <linux/rcupdate.h>
#include <linux/delay.h>
@@ -130,6 +129,9 @@
#include "net-sysfs.h"
+/* Instead of increasing this, you should create a hash table. */
+#define MAX_GRO_SKBS 8
+
/*
* The list of packet types we will receive (as opposed to discard)
* and the routines to invoke.
@@ -281,8 +283,8 @@ static const unsigned short netdev_lock_type[] =
ARPHRD_PIMREG, ARPHRD_HIPPI, ARPHRD_ASH, ARPHRD_ECONET,
ARPHRD_IRDA, ARPHRD_FCPP, ARPHRD_FCAL, ARPHRD_FCPL,
ARPHRD_FCFABRIC, ARPHRD_IEEE802_TR, ARPHRD_IEEE80211,
- ARPHRD_IEEE80211_PRISM, ARPHRD_IEEE80211_RADIOTAP, ARPHRD_VOID,
- ARPHRD_NONE};
+ ARPHRD_IEEE80211_PRISM, ARPHRD_IEEE80211_RADIOTAP, ARPHRD_PHONET,
+ ARPHRD_PHONET_PIPE, ARPHRD_VOID, ARPHRD_NONE};
static const char *netdev_lock_name[] =
{"_xmit_NETROM", "_xmit_ETHER", "_xmit_EETHER", "_xmit_AX25",
@@ -298,8 +300,8 @@ static const char *netdev_lock_name[] =
"_xmit_PIMREG", "_xmit_HIPPI", "_xmit_ASH", "_xmit_ECONET",
"_xmit_IRDA", "_xmit_FCPP", "_xmit_FCAL", "_xmit_FCPL",
"_xmit_FCFABRIC", "_xmit_IEEE802_TR", "_xmit_IEEE80211",
- "_xmit_IEEE80211_PRISM", "_xmit_IEEE80211_RADIOTAP", "_xmit_VOID",
- "_xmit_NONE"};
+ "_xmit_IEEE80211_PRISM", "_xmit_IEEE80211_RADIOTAP", "_xmit_PHONET",
+ "_xmit_PHONET_PIPE", "_xmit_VOID", "_xmit_NONE"};
static struct lock_class_key netdev_xmit_lock_key[ARRAY_SIZE(netdev_lock_type)];
static struct lock_class_key netdev_addr_lock_key[ARRAY_SIZE(netdev_lock_type)];
@@ -924,10 +926,15 @@ int dev_change_name(struct net_device *dev, const char *newname)
strlcpy(dev->name, newname, IFNAMSIZ);
rollback:
- ret = device_rename(&dev->dev, dev->name);
- if (ret) {
- memcpy(dev->name, oldname, IFNAMSIZ);
- return ret;
+ /* For now only devices in the initial network namespace
+ * are in sysfs.
+ */
+ if (net == &init_net) {
+ ret = device_rename(&dev->dev, dev->name);
+ if (ret) {
+ memcpy(dev->name, oldname, IFNAMSIZ);
+ return ret;
+ }
}
write_lock_bh(&dev_base_lock);
@@ -1055,6 +1062,7 @@ void dev_load(struct net *net, const char *name)
*/
int dev_open(struct net_device *dev)
{
+ const struct net_device_ops *ops = dev->netdev_ops;
int ret = 0;
ASSERT_RTNL();
@@ -1077,11 +1085,11 @@ int dev_open(struct net_device *dev)
*/
set_bit(__LINK_STATE_START, &dev->state);
- if (dev->validate_addr)
- ret = dev->validate_addr(dev);
+ if (ops->ndo_validate_addr)
+ ret = ops->ndo_validate_addr(dev);
- if (!ret && dev->open)
- ret = dev->open(dev);
+ if (!ret && ops->ndo_open)
+ ret = ops->ndo_open(dev);
/*
* If it went open OK then:
@@ -1125,6 +1133,7 @@ int dev_open(struct net_device *dev)
*/
int dev_close(struct net_device *dev)
{
+ const struct net_device_ops *ops = dev->netdev_ops;
ASSERT_RTNL();
might_sleep();
@@ -1157,8 +1166,8 @@ int dev_close(struct net_device *dev)
* We allow it to be called even after a DETACH hot-plug
* event.
*/
- if (dev->stop)
- dev->stop(dev);
+ if (ops->ndo_stop)
+ ops->ndo_stop(dev);
/*
* Device is now down.
@@ -1527,8 +1536,6 @@ struct sk_buff *skb_gso_segment(struct sk_buff *skb, int features)
__be16 type = skb->protocol;
int err;
- BUG_ON(skb_shinfo(skb)->frag_list);
-
skb_reset_mac_header(skb);
skb->mac_len = skb->network_header - skb->mac_header;
__skb_pull(skb, skb->mac_len);
@@ -1654,6 +1661,9 @@ static int dev_gso_segment(struct sk_buff *skb)
int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
struct netdev_queue *txq)
{
+ const struct net_device_ops *ops = dev->netdev_ops;
+
+ prefetch(&dev->netdev_ops->ndo_start_xmit);
if (likely(!skb->next)) {
if (!list_empty(&ptype_all))
dev_queue_xmit_nit(skb, dev);
@@ -1665,7 +1675,7 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
goto gso;
}
- return dev->hard_start_xmit(skb, dev);
+ return ops->ndo_start_xmit(skb, dev);
}
gso:
@@ -1675,7 +1685,7 @@ gso:
skb->next = nskb->next;
nskb->next = NULL;
- rc = dev->hard_start_xmit(nskb, dev);
+ rc = ops->ndo_start_xmit(nskb, dev);
if (unlikely(rc)) {
nskb->next = skb->next;
skb->next = nskb;
@@ -1749,10 +1759,11 @@ static u16 simple_tx_hash(struct net_device *dev, struct sk_buff *skb)
static struct netdev_queue *dev_pick_tx(struct net_device *dev,
struct sk_buff *skb)
{
+ const struct net_device_ops *ops = dev->netdev_ops;
u16 queue_index = 0;
- if (dev->select_queue)
- queue_index = dev->select_queue(dev, skb);
+ if (ops->ndo_select_queue)
+ queue_index = ops->ndo_select_queue(dev, skb);
else if (dev->real_num_tx_queues > 1)
queue_index = simple_tx_hash(dev, skb);
@@ -2251,8 +2262,10 @@ int netif_receive_skb(struct sk_buff *skb)
rcu_read_lock();
/* Don't receive packets in an exiting network namespace */
- if (!net_alive(dev_net(skb->dev)))
+ if (!net_alive(dev_net(skb->dev))) {
+ kfree_skb(skb);
goto out;
+ }
#ifdef CONFIG_NET_CLS_ACT
if (skb->tc_verd & TC_NCLS) {
@@ -2325,6 +2338,125 @@ static void flush_backlog(void *arg)
}
}
+static int napi_gro_complete(struct sk_buff *skb)
+{
+ struct packet_type *ptype;
+ __be16 type = skb->protocol;
+ struct list_head *head = &ptype_base[ntohs(type) & PTYPE_HASH_MASK];
+ int err = -ENOENT;
+
+ if (!skb_shinfo(skb)->frag_list)
+ goto out;
+
+ rcu_read_lock();
+ list_for_each_entry_rcu(ptype, head, list) {
+ if (ptype->type != type || ptype->dev || !ptype->gro_complete)
+ continue;
+
+ err = ptype->gro_complete(skb);
+ break;
+ }
+ rcu_read_unlock();
+
+ if (err) {
+ WARN_ON(&ptype->list == head);
+ kfree_skb(skb);
+ return NET_RX_SUCCESS;
+ }
+
+out:
+ __skb_push(skb, -skb_network_offset(skb));
+ return netif_receive_skb(skb);
+}
+
+void napi_gro_flush(struct napi_struct *napi)
+{
+ struct sk_buff *skb, *next;
+
+ for (skb = napi->gro_list; skb; skb = next) {
+ next = skb->next;
+ skb->next = NULL;
+ napi_gro_complete(skb);
+ }
+
+ napi->gro_list = NULL;
+}
+EXPORT_SYMBOL(napi_gro_flush);
+
+int napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
+{
+ struct sk_buff **pp = NULL;
+ struct packet_type *ptype;
+ __be16 type = skb->protocol;
+ struct list_head *head = &ptype_base[ntohs(type) & PTYPE_HASH_MASK];
+ int count = 0;
+ int same_flow;
+ int mac_len;
+
+ if (!(skb->dev->features & NETIF_F_GRO))
+ goto normal;
+
+ rcu_read_lock();
+ list_for_each_entry_rcu(ptype, head, list) {
+ struct sk_buff *p;
+
+ if (ptype->type != type || ptype->dev || !ptype->gro_receive)
+ continue;
+
+ skb_reset_network_header(skb);
+ mac_len = skb->network_header - skb->mac_header;
+ skb->mac_len = mac_len;
+ NAPI_GRO_CB(skb)->same_flow = 0;
+ NAPI_GRO_CB(skb)->flush = 0;
+
+ for (p = napi->gro_list; p; p = p->next) {
+ count++;
+ NAPI_GRO_CB(p)->same_flow =
+ p->mac_len == mac_len &&
+ !memcmp(skb_mac_header(p), skb_mac_header(skb),
+ mac_len);
+ NAPI_GRO_CB(p)->flush = 0;
+ }
+
+ pp = ptype->gro_receive(&napi->gro_list, skb);
+ break;
+ }
+ rcu_read_unlock();
+
+ if (&ptype->list == head)
+ goto normal;
+
+ same_flow = NAPI_GRO_CB(skb)->same_flow;
+
+ if (pp) {
+ struct sk_buff *nskb = *pp;
+
+ *pp = nskb->next;
+ nskb->next = NULL;
+ napi_gro_complete(nskb);
+ count--;
+ }
+
+ if (same_flow)
+ goto ok;
+
+ if (NAPI_GRO_CB(skb)->flush || count >= MAX_GRO_SKBS) {
+ __skb_push(skb, -skb_network_offset(skb));
+ goto normal;
+ }
+
+ NAPI_GRO_CB(skb)->count = 1;
+ skb->next = napi->gro_list;
+ napi->gro_list = skb;
+
+ok:
+ return NET_RX_SUCCESS;
+
+normal:
+ return netif_receive_skb(skb);
+}
+EXPORT_SYMBOL(napi_gro_receive);
+
static int process_backlog(struct napi_struct *napi, int quota)
{
int work = 0;
@@ -2344,9 +2476,11 @@ static int process_backlog(struct napi_struct *napi, int quota)
}
local_irq_enable();
- netif_receive_skb(skb);
+ napi_gro_receive(napi, skb);
} while (++work < quota && jiffies == start_time);
+ napi_gro_flush(napi);
+
return work;
}
@@ -2367,11 +2501,73 @@ void __napi_schedule(struct napi_struct *n)
}
EXPORT_SYMBOL(__napi_schedule);
+void __napi_complete(struct napi_struct *n)
+{
+ BUG_ON(!test_bit(NAPI_STATE_SCHED, &n->state));
+ BUG_ON(n->gro_list);
+
+ list_del(&n->poll_list);
+ smp_mb__before_clear_bit();
+ clear_bit(NAPI_STATE_SCHED, &n->state);
+}
+EXPORT_SYMBOL(__napi_complete);
+
+void napi_complete(struct napi_struct *n)
+{
+ unsigned long flags;
+
+ /*
+ * don't let napi dequeue from the cpu poll list
+ * just in case its running on a different cpu
+ */
+ if (unlikely(test_bit(NAPI_STATE_NPSVC, &n->state)))
+ return;
+
+ napi_gro_flush(n);
+ local_irq_save(flags);
+ __napi_complete(n);
+ local_irq_restore(flags);
+}
+EXPORT_SYMBOL(napi_complete);
+
+void netif_napi_add(struct net_device *dev, struct napi_struct *napi,
+ int (*poll)(struct napi_struct *, int), int weight)
+{
+ INIT_LIST_HEAD(&napi->poll_list);
+ napi->gro_list = NULL;
+ napi->poll = poll;
+ napi->weight = weight;
+ list_add(&napi->dev_list, &dev->napi_list);
+#ifdef CONFIG_NETPOLL
+ napi->dev = dev;
+ spin_lock_init(&napi->poll_lock);
+ napi->poll_owner = -1;
+#endif
+ set_bit(NAPI_STATE_SCHED, &napi->state);
+}
+EXPORT_SYMBOL(netif_napi_add);
+
+void netif_napi_del(struct napi_struct *napi)
+{
+ struct sk_buff *skb, *next;
+
+ list_del_init(&napi->dev_list);
+
+ for (skb = napi->gro_list; skb; skb = next) {
+ next = skb->next;
+ skb->next = NULL;
+ kfree_skb(skb);
+ }
+
+ napi->gro_list = NULL;
+}
+EXPORT_SYMBOL(netif_napi_del);
+
static void net_rx_action(struct softirq_action *h)
{
struct list_head *list = &__get_cpu_var(softnet_data).poll_list;
- unsigned long start_time = jiffies;
+ unsigned long time_limit = jiffies + 2;
int budget = netdev_budget;
void *have;
@@ -2382,13 +2578,10 @@ static void net_rx_action(struct softirq_action *h)
int work, weight;
/* If softirq window is exhuasted then punt.
- *
- * Note that this is a slight policy change from the
- * previous NAPI code, which would allow up to 2
- * jiffies to pass before breaking out. The test
- * used to be "jiffies - start_time > 1".
+ * Allow this to run for 2 jiffies since which will allow
+ * an average latency of 1.5/HZ.
*/
- if (unlikely(budget <= 0 || jiffies != start_time))
+ if (unlikely(budget <= 0 || time_after(jiffies, time_limit)))
goto softnet_break;
local_irq_enable();
@@ -2615,7 +2808,7 @@ void dev_seq_stop(struct seq_file *seq, void *v)
static void dev_seq_printf_stats(struct seq_file *seq, struct net_device *dev)
{
- struct net_device_stats *stats = dev->get_stats(dev);
+ const struct net_device_stats *stats = dev_get_stats(dev);
seq_printf(seq, "%6s:%8lu %7lu %4lu %4lu %4lu %5lu %10lu %9lu "
"%8lu %7lu %4lu %4lu %4lu %5lu %7lu %10lu\n",
@@ -2797,31 +2990,6 @@ static void ptype_seq_stop(struct seq_file *seq, void *v)
rcu_read_unlock();
}
-static void ptype_seq_decode(struct seq_file *seq, void *sym)
-{
-#ifdef CONFIG_KALLSYMS
- unsigned long offset = 0, symsize;
- const char *symname;
- char *modname;
- char namebuf[128];
-
- symname = kallsyms_lookup((unsigned long)sym, &symsize, &offset,
- &modname, namebuf);
-
- if (symname) {
- char *delim = ":";
-
- if (!modname)
- modname = delim = "";
- seq_printf(seq, "%s%s%s%s+0x%lx", delim, modname, delim,
- symname, offset);
- return;
- }
-#endif
-
- seq_printf(seq, "[%p]", sym);
-}
-
static int ptype_seq_show(struct seq_file *seq, void *v)
{
struct packet_type *pt = v;
@@ -2834,10 +3002,8 @@ static int ptype_seq_show(struct seq_file *seq, void *v)
else
seq_printf(seq, "%04x", ntohs(pt->type));
- seq_printf(seq, " %-8s ",
- pt->dev ? pt->dev->name : "");
- ptype_seq_decode(seq, pt->func);
- seq_putc(seq, '\n');
+ seq_printf(seq, " %-8s %pF\n",
+ pt->dev ? pt->dev->name : "", pt->func);
}
return 0;
@@ -2954,8 +3120,10 @@ int netdev_set_master(struct net_device *slave, struct net_device *master)
static void dev_change_rx_flags(struct net_device *dev, int flags)
{
- if (dev->flags & IFF_UP && dev->change_rx_flags)
- dev->change_rx_flags(dev, flags);
+ const struct net_device_ops *ops = dev->netdev_ops;
+
+ if ((dev->flags & IFF_UP) && ops->ndo_change_rx_flags)
+ ops->ndo_change_rx_flags(dev, flags);
}
static int __dev_set_promiscuity(struct net_device *dev, int inc)
@@ -3079,6 +3247,8 @@ int dev_set_allmulti(struct net_device *dev, int inc)
*/
void __dev_set_rx_mode(struct net_device *dev)
{
+ const struct net_device_ops *ops = dev->netdev_ops;
+
/* dev_open will call this function so the list will stay sane. */
if (!(dev->flags&IFF_UP))
return;
@@ -3086,8 +3256,8 @@ void __dev_set_rx_mode(struct net_device *dev)
if (!netif_device_present(dev))
return;
- if (dev->set_rx_mode)
- dev->set_rx_mode(dev);
+ if (ops->ndo_set_rx_mode)
+ ops->ndo_set_rx_mode(dev);
else {
/* Unicast addresses changes may only happen under the rtnl,
* therefore calling __dev_set_promiscuity here is safe.
@@ -3100,8 +3270,8 @@ void __dev_set_rx_mode(struct net_device *dev)
dev->uc_promisc = 0;
}
- if (dev->set_multicast_list)
- dev->set_multicast_list(dev);
+ if (ops->ndo_set_multicast_list)
+ ops->ndo_set_multicast_list(dev);
}
}
@@ -3460,6 +3630,7 @@ int dev_change_flags(struct net_device *dev, unsigned flags)
*/
int dev_set_mtu(struct net_device *dev, int new_mtu)
{
+ const struct net_device_ops *ops = dev->netdev_ops;
int err;
if (new_mtu == dev->mtu)
@@ -3473,10 +3644,11 @@ int dev_set_mtu(struct net_device *dev, int new_mtu)
return -ENODEV;
err = 0;
- if (dev->change_mtu)
- err = dev->change_mtu(dev, new_mtu);
+ if (ops->ndo_change_mtu)
+ err = ops->ndo_change_mtu(dev, new_mtu);
else
dev->mtu = new_mtu;
+
if (!err && dev->flags & IFF_UP)
call_netdevice_notifiers(NETDEV_CHANGEMTU, dev);
return err;
@@ -3491,15 +3663,16 @@ int dev_set_mtu(struct net_device *dev, int new_mtu)
*/
int dev_set_mac_address(struct net_device *dev, struct sockaddr *sa)
{
+ const struct net_device_ops *ops = dev->netdev_ops;
int err;
- if (!dev->set_mac_address)
+ if (!ops->ndo_set_mac_address)
return -EOPNOTSUPP;
if (sa->sa_family != dev->type)
return -EINVAL;
if (!netif_device_present(dev))
return -ENODEV;
- err = dev->set_mac_address(dev, sa);
+ err = ops->ndo_set_mac_address(dev, sa);
if (!err)
call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
return err;
@@ -3579,10 +3752,13 @@ static int dev_ifsioc(struct net *net, struct ifreq *ifr, unsigned int cmd)
{
int err;
struct net_device *dev = __dev_get_by_name(net, ifr->ifr_name);
+ const struct net_device_ops *ops;
if (!dev)
return -ENODEV;
+ ops = dev->netdev_ops;
+
switch (cmd) {
case SIOCSIFFLAGS: /* Set interface flags */
return dev_change_flags(dev, ifr->ifr_flags);
@@ -3606,15 +3782,15 @@ static int dev_ifsioc(struct net *net, struct ifreq *ifr, unsigned int cmd)
return 0;
case SIOCSIFMAP:
- if (dev->set_config) {
+ if (ops->ndo_set_config) {
if (!netif_device_present(dev))
return -ENODEV;
- return dev->set_config(dev, &ifr->ifr_map);
+ return ops->ndo_set_config(dev, &ifr->ifr_map);
}
return -EOPNOTSUPP;
case SIOCADDMULTI:
- if ((!dev->set_multicast_list && !dev->set_rx_mode) ||
+ if ((!ops->ndo_set_multicast_list && !ops->ndo_set_rx_mode) ||
ifr->ifr_hwaddr.sa_family != AF_UNSPEC)
return -EINVAL;
if (!netif_device_present(dev))
@@ -3623,7 +3799,7 @@ static int dev_ifsioc(struct net *net, struct ifreq *ifr, unsigned int cmd)
dev->addr_len, 1);
case SIOCDELMULTI:
- if ((!dev->set_multicast_list && !dev->set_rx_mode) ||
+ if ((!ops->ndo_set_multicast_list && !ops->ndo_set_rx_mode) ||
ifr->ifr_hwaddr.sa_family != AF_UNSPEC)
return -EINVAL;
if (!netif_device_present(dev))
@@ -3661,10 +3837,9 @@ static int dev_ifsioc(struct net *net, struct ifreq *ifr, unsigned int cmd)
cmd == SIOCBRDELIF ||
cmd == SIOCWANDEV) {
err = -EOPNOTSUPP;
- if (dev->do_ioctl) {
+ if (ops->ndo_do_ioctl) {
if (netif_device_present(dev))
- err = dev->do_ioctl(dev, ifr,
- cmd);
+ err = ops->ndo_do_ioctl(dev, ifr, cmd);
else
err = -ENODEV;
}
@@ -3925,8 +4100,8 @@ static void rollback_registered(struct net_device *dev)
*/
dev_addr_discard(dev);
- if (dev->uninit)
- dev->uninit(dev);
+ if (dev->netdev_ops->ndo_uninit)
+ dev->netdev_ops->ndo_uninit(dev);
/* Notifier chain MUST detach us from master device. */
WARN_ON(dev->master);
@@ -4016,7 +4191,7 @@ int register_netdevice(struct net_device *dev)
struct hlist_head *head;
struct hlist_node *p;
int ret;
- struct net *net;
+ struct net *net = dev_net(dev);
BUG_ON(dev_boot_phase);
ASSERT_RTNL();
@@ -4025,8 +4200,7 @@ int register_netdevice(struct net_device *dev)
/* When net_device's are persistent, this will be fatal. */
BUG_ON(dev->reg_state != NETREG_UNINITIALIZED);
- BUG_ON(!dev_net(dev));
- net = dev_net(dev);
+ BUG_ON(!net);
spin_lock_init(&dev->addr_list_lock);
netdev_set_addr_lockdep_class(dev);
@@ -4034,9 +4208,46 @@ int register_netdevice(struct net_device *dev)
dev->iflink = -1;
+#ifdef CONFIG_COMPAT_NET_DEV_OPS
+ /* Netdevice_ops API compatiability support.
+ * This is temporary until all network devices are converted.
+ */
+ if (dev->netdev_ops) {
+ const struct net_device_ops *ops = dev->netdev_ops;
+
+ dev->init = ops->ndo_init;
+ dev->uninit = ops->ndo_uninit;
+ dev->open = ops->ndo_open;
+ dev->change_rx_flags = ops->ndo_change_rx_flags;
+ dev->set_rx_mode = ops->ndo_set_rx_mode;
+ dev->set_multicast_list = ops->ndo_set_multicast_list;
+ dev->set_mac_address = ops->ndo_set_mac_address;
+ dev->validate_addr = ops->ndo_validate_addr;
+ dev->do_ioctl = ops->ndo_do_ioctl;
+ dev->set_config = ops->ndo_set_config;
+ dev->change_mtu = ops->ndo_change_mtu;
+ dev->tx_timeout = ops->ndo_tx_timeout;
+ dev->get_stats = ops->ndo_get_stats;
+ dev->vlan_rx_register = ops->ndo_vlan_rx_register;
+ dev->vlan_rx_add_vid = ops->ndo_vlan_rx_add_vid;
+ dev->vlan_rx_kill_vid = ops->ndo_vlan_rx_kill_vid;
+#ifdef CONFIG_NET_POLL_CONTROLLER
+ dev->poll_controller = ops->ndo_poll_controller;
+#endif
+ } else {
+ char drivername[64];
+ pr_info("%s (%s): not using net_device_ops yet\n",
+ dev->name, netdev_drivername(dev, drivername, 64));
+
+ /* This works only because net_device_ops and the
+ compatiablity structure are the same. */
+ dev->netdev_ops = (void *) &(dev->init);
+ }
+#endif
+
/* Init, if this function is available */
- if (dev->init) {
- ret = dev->init(dev);
+ if (dev->netdev_ops->ndo_init) {
+ ret = dev->netdev_ops->ndo_init(dev);
if (ret) {
if (ret > 0)
ret = -EIO;
@@ -4114,8 +4325,8 @@ out:
return ret;
err_uninit:
- if (dev->uninit)
- dev->uninit(dev);
+ if (dev->netdev_ops->ndo_uninit)
+ dev->netdev_ops->ndo_uninit(dev);
goto out;
}
@@ -4271,10 +4482,24 @@ void netdev_run_todo(void)
}
}
-static struct net_device_stats *internal_stats(struct net_device *dev)
-{
- return &dev->stats;
+/**
+ * dev_get_stats - get network device statistics
+ * @dev: device to get statistics from
+ *
+ * Get network statistics from device. The device driver may provide
+ * its own method by setting dev->netdev_ops->get_stats; otherwise
+ * the internal statistics structure is used.
+ */
+const struct net_device_stats *dev_get_stats(struct net_device *dev)
+ {
+ const struct net_device_ops *ops = dev->netdev_ops;
+
+ if (ops->ndo_get_stats)
+ return ops->ndo_get_stats(dev);
+ else
+ return &dev->stats;
}
+EXPORT_SYMBOL(dev_get_stats);
static void netdev_init_one_queue(struct net_device *dev,
struct netdev_queue *queue,
@@ -4343,18 +4568,11 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
dev->num_tx_queues = queue_count;
dev->real_num_tx_queues = queue_count;
- if (sizeof_priv) {
- dev->priv = ((char *)dev +
- ((sizeof(struct net_device) + NETDEV_ALIGN_CONST)
- & ~NETDEV_ALIGN_CONST));
- }
-
dev->gso_max_size = GSO_MAX_SIZE;
netdev_init_queues(dev);
- dev->get_stats = internal_stats;
- netpoll_netdev_init(dev);
+ INIT_LIST_HEAD(&dev->napi_list);
setup(dev);
strcpy(dev->name, name);
return dev;
@@ -4371,10 +4589,15 @@ EXPORT_SYMBOL(alloc_netdev_mq);
*/
void free_netdev(struct net_device *dev)
{
+ struct napi_struct *p, *n;
+
release_net(dev_net(dev));
kfree(dev->_tx);
+ list_for_each_entry_safe(p, n, &dev->napi_list, dev_list)
+ netif_napi_del(p);
+
/* Compatibility with error handling in drivers */
if (dev->reg_state == NETREG_UNINITIALIZED) {
kfree((char *)dev - dev->padded);
@@ -4467,6 +4690,15 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char
if (dev->features & NETIF_F_NETNS_LOCAL)
goto out;
+#ifdef CONFIG_SYSFS
+ /* Don't allow real devices to be moved when sysfs
+ * is enabled.
+ */
+ err = -EINVAL;
+ if (dev->dev.parent)
+ goto out;
+#endif
+
/* Ensure the device has been registrered */
err = -EINVAL;
if (dev->reg_state != NETREG_REGISTERED)
@@ -4524,6 +4756,8 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char
*/
dev_addr_discard(dev);
+ netdev_unregister_kobject(dev);
+
/* Actually switch the network namespace */
dev_net_set(dev, net);
@@ -4540,7 +4774,6 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char
}
/* Fixup kobjects */
- netdev_unregister_kobject(dev);
err = netdev_register_kobject(dev);
WARN_ON(err);
@@ -4847,6 +5080,12 @@ static void __net_exit default_device_exit(struct net *net)
if (dev->features & NETIF_F_NETNS_LOCAL)
continue;
+ /* Delete virtual devices */
+ if (dev->rtnl_link_ops && dev->rtnl_link_ops->dellink) {
+ dev->rtnl_link_ops->dellink(dev);
+ continue;
+ }
+
/* Push remaing network devices to init_net */
snprintf(fb_name, IFNAMSIZ, "dev%d", dev->ifindex);
err = dev_change_net_namespace(dev, &init_net, fb_name);
@@ -4893,9 +5132,6 @@ static int __init net_dev_init(void)
if (register_pernet_subsys(&netdev_net_ops))
goto out;
- if (register_pernet_device(&default_device_ops))
- goto out;
-
/*
* Initialise the packet receive queues.
*/
@@ -4910,12 +5146,28 @@ static int __init net_dev_init(void)
queue->backlog.poll = process_backlog;
queue->backlog.weight = weight_p;
+ queue->backlog.gro_list = NULL;
}
- netdev_dma_register();
-
dev_boot_phase = 0;
+ /* The loopback device is special if any other network devices
+ * is present in a network namespace the loopback device must
+ * be present. Since we now dynamically allocate and free the
+ * loopback device ensure this invariant is maintained by
+ * keeping the loopback device as the first device on the
+ * list of network devices. Ensuring the loopback devices
+ * is the first device that appears and the last network device
+ * that disappears.
+ */
+ if (register_pernet_device(&loopback_net_ops))
+ goto out;
+
+ if (register_pernet_device(&default_device_ops))
+ goto out;
+
+ netdev_dma_register();
+
open_softirq(NET_TX_SOFTIRQ, net_tx_action);
open_softirq(NET_RX_SOFTIRQ, net_rx_action);
diff --git a/net/core/dst.c b/net/core/dst.c
index 09c1530f468..57bc4d5b8d0 100644
--- a/net/core/dst.c
+++ b/net/core/dst.c
@@ -263,9 +263,11 @@ again:
void dst_release(struct dst_entry *dst)
{
if (dst) {
- WARN_ON(atomic_read(&dst->__refcnt) < 1);
+ int newrefcnt;
+
smp_mb__before_atomic_dec();
- atomic_dec(&dst->__refcnt);
+ newrefcnt = atomic_dec_return(&dst->__refcnt);
+ WARN_ON(newrefcnt < 0);
}
}
EXPORT_SYMBOL(dst_release);
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index 14ada537f89..947710a36ce 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -528,6 +528,22 @@ static int ethtool_set_tx_csum(struct net_device *dev, char __user *useraddr)
return dev->ethtool_ops->set_tx_csum(dev, edata.data);
}
+static int ethtool_set_rx_csum(struct net_device *dev, char __user *useraddr)
+{
+ struct ethtool_value edata;
+
+ if (!dev->ethtool_ops->set_rx_csum)
+ return -EOPNOTSUPP;
+
+ if (copy_from_user(&edata, useraddr, sizeof(edata)))
+ return -EFAULT;
+
+ if (!edata.data && dev->ethtool_ops->set_sg)
+ dev->features &= ~NETIF_F_GRO;
+
+ return dev->ethtool_ops->set_rx_csum(dev, edata.data);
+}
+
static int ethtool_set_sg(struct net_device *dev, char __user *useraddr)
{
struct ethtool_value edata;
@@ -599,6 +615,34 @@ static int ethtool_set_gso(struct net_device *dev, char __user *useraddr)
return 0;
}
+static int ethtool_get_gro(struct net_device *dev, char __user *useraddr)
+{
+ struct ethtool_value edata = { ETHTOOL_GGRO };
+
+ edata.data = dev->features & NETIF_F_GRO;
+ if (copy_to_user(useraddr, &edata, sizeof(edata)))
+ return -EFAULT;
+ return 0;
+}
+
+static int ethtool_set_gro(struct net_device *dev, char __user *useraddr)
+{
+ struct ethtool_value edata;
+
+ if (copy_from_user(&edata, useraddr, sizeof(edata)))
+ return -EFAULT;
+
+ if (edata.data) {
+ if (!dev->ethtool_ops->get_rx_csum ||
+ !dev->ethtool_ops->get_rx_csum(dev))
+ return -EINVAL;
+ dev->features |= NETIF_F_GRO;
+ } else
+ dev->features &= ~NETIF_F_GRO;
+
+ return 0;
+}
+
static int ethtool_self_test(struct net_device *dev, char __user *useraddr)
{
struct ethtool_test test;
@@ -932,8 +976,7 @@ int dev_ethtool(struct net *net, struct ifreq *ifr)
dev->ethtool_ops->get_rx_csum);
break;
case ETHTOOL_SRXCSUM:
- rc = ethtool_set_value(dev, useraddr,
- dev->ethtool_ops->set_rx_csum);
+ rc = ethtool_set_rx_csum(dev, useraddr);
break;
case ETHTOOL_GTXCSUM:
rc = ethtool_get_value(dev, useraddr, ethcmd,
@@ -1014,6 +1057,12 @@ int dev_ethtool(struct net *net, struct ifreq *ifr)
case ETHTOOL_SRXFH:
rc = ethtool_set_rxhash(dev, useraddr);
break;
+ case ETHTOOL_GGRO:
+ rc = ethtool_get_gro(dev, useraddr);
+ break;
+ case ETHTOOL_SGRO:
+ rc = ethtool_set_gro(dev, useraddr);
+ break;
default:
rc = -EOPNOTSUPP;
}
diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c
index 79de3b14a8d..32b3a0152d7 100644
--- a/net/core/fib_rules.c
+++ b/net/core/fib_rules.c
@@ -664,17 +664,18 @@ static int __init fib_rules_init(void)
rtnl_register(PF_UNSPEC, RTM_DELRULE, fib_nl_delrule, NULL);
rtnl_register(PF_UNSPEC, RTM_GETRULE, NULL, fib_nl_dumprule);
- err = register_netdevice_notifier(&fib_rules_notifier);
+ err = register_pernet_subsys(&fib_rules_net_ops);
if (err < 0)
goto fail;
- err = register_pernet_subsys(&fib_rules_net_ops);
+ err = register_netdevice_notifier(&fib_rules_notifier);
if (err < 0)
goto fail_unregister;
+
return 0;
fail_unregister:
- unregister_netdevice_notifier(&fib_rules_notifier);
+ unregister_pernet_subsys(&fib_rules_net_ops);
fail:
rtnl_unregister(PF_UNSPEC, RTM_NEWRULE);
rtnl_unregister(PF_UNSPEC, RTM_DELRULE);
diff --git a/net/core/filter.c b/net/core/filter.c
index df374435583..d1d779ca096 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -319,6 +319,25 @@ load_b:
A = 0;
continue;
}
+ case SKF_AD_NLATTR_NEST: {
+ struct nlattr *nla;
+
+ if (skb_is_nonlinear(skb))
+ return 0;
+ if (A > skb->len - sizeof(struct nlattr))
+ return 0;
+
+ nla = (struct nlattr *)&skb->data[A];
+ if (nla->nla_len > A - skb->len)
+ return 0;
+
+ nla = nla_find_nested(nla, X);
+ if (nla)
+ A = (void *)nla - (void *)skb->data;
+ else
+ A = 0;
+ continue;
+ }
default:
return 0;
}
diff --git a/net/core/flow.c b/net/core/flow.c
index 5cf81052d04..96015871ece 100644
--- a/net/core/flow.c
+++ b/net/core/flow.c
@@ -165,7 +165,7 @@ static int flow_key_compare(struct flowi *key1, struct flowi *key2)
return 0;
}
-void *flow_cache_lookup(struct flowi *key, u16 family, u8 dir,
+void *flow_cache_lookup(struct net *net, struct flowi *key, u16 family, u8 dir,
flow_resolve_t resolver)
{
struct flow_cache_entry *fle, **head;
@@ -225,7 +225,7 @@ nocache:
void *obj;
atomic_t *obj_ref;
- err = resolver(key, family, dir, &obj, &obj_ref);
+ err = resolver(net, key, family, dir, &obj, &obj_ref);
if (fle && !err) {
fle->genid = atomic_read(&flow_cache_genid);
@@ -307,7 +307,7 @@ void flow_cache_flush(void)
put_online_cpus();
}
-static void __devinit flow_cache_cpu_prepare(int cpu)
+static void __init flow_cache_cpu_prepare(int cpu)
{
struct tasklet_struct *tasklet;
unsigned long order;
diff --git a/net/core/gen_estimator.c b/net/core/gen_estimator.c
index 57abe8266be..9cc9f95b109 100644
--- a/net/core/gen_estimator.c
+++ b/net/core/gen_estimator.c
@@ -31,6 +31,7 @@
#include <linux/skbuff.h>
#include <linux/rtnetlink.h>
#include <linux/init.h>
+#include <linux/rbtree.h>
#include <net/sock.h>
#include <net/gen_stats.h>
@@ -89,6 +90,7 @@ struct gen_estimator
u32 avpps;
u32 avbps;
struct rcu_head e_rcu;
+ struct rb_node node;
};
struct gen_estimator_head
@@ -102,6 +104,9 @@ static struct gen_estimator_head elist[EST_MAX_INTERVAL+1];
/* Protects against NULL dereference */
static DEFINE_RWLOCK(est_lock);
+/* Protects against soft lockup during large deletion */
+static struct rb_root est_root = RB_ROOT;
+
static void est_timer(unsigned long arg)
{
int idx = (int)arg;
@@ -139,6 +144,46 @@ skip:
rcu_read_unlock();
}
+static void gen_add_node(struct gen_estimator *est)
+{
+ struct rb_node **p = &est_root.rb_node, *parent = NULL;
+
+ while (*p) {
+ struct gen_estimator *e;
+
+ parent = *p;
+ e = rb_entry(parent, struct gen_estimator, node);
+
+ if (est->bstats > e->bstats)
+ p = &parent->rb_right;
+ else
+ p = &parent->rb_left;
+ }
+ rb_link_node(&est->node, parent, p);
+ rb_insert_color(&est->node, &est_root);
+}
+
+static
+struct gen_estimator *gen_find_node(const struct gnet_stats_basic *bstats,
+ const struct gnet_stats_rate_est *rate_est)
+{
+ struct rb_node *p = est_root.rb_node;
+
+ while (p) {
+ struct gen_estimator *e;
+
+ e = rb_entry(p, struct gen_estimator, node);
+
+ if (bstats > e->bstats)
+ p = p->rb_right;
+ else if (bstats < e->bstats || rate_est != e->rate_est)
+ p = p->rb_left;
+ else
+ return e;
+ }
+ return NULL;
+}
+
/**
* gen_new_estimator - create a new rate estimator
* @bstats: basic statistics
@@ -194,8 +239,11 @@ int gen_new_estimator(struct gnet_stats_basic *bstats,
mod_timer(&elist[idx].timer, jiffies + ((HZ/4) << idx));
list_add_rcu(&est->list, &elist[idx].list);
+ gen_add_node(est);
+
return 0;
}
+EXPORT_SYMBOL(gen_new_estimator);
static void __gen_kill_estimator(struct rcu_head *head)
{
@@ -209,36 +257,27 @@ static void __gen_kill_estimator(struct rcu_head *head)
* @bstats: basic statistics
* @rate_est: rate estimator statistics
*
- * Removes the rate estimator specified by &bstats and &rate_est
- * and deletes the timer.
+ * Removes the rate estimator specified by &bstats and &rate_est.
*
* NOTE: Called under rtnl_mutex
*/
void gen_kill_estimator(struct gnet_stats_basic *bstats,
- struct gnet_stats_rate_est *rate_est)
+ struct gnet_stats_rate_est *rate_est)
{
- int idx;
- struct gen_estimator *e, *n;
-
- for (idx=0; idx <= EST_MAX_INTERVAL; idx++) {
-
- /* Skip non initialized indexes */
- if (!elist[idx].timer.function)
- continue;
+ struct gen_estimator *e;
- list_for_each_entry_safe(e, n, &elist[idx].list, list) {
- if (e->rate_est != rate_est || e->bstats != bstats)
- continue;
+ while ((e = gen_find_node(bstats, rate_est))) {
+ rb_erase(&e->node, &est_root);
- write_lock_bh(&est_lock);
- e->bstats = NULL;
- write_unlock_bh(&est_lock);
+ write_lock_bh(&est_lock);
+ e->bstats = NULL;
+ write_unlock_bh(&est_lock);
- list_del_rcu(&e->list);
- call_rcu(&e->e_rcu, __gen_kill_estimator);
- }
+ list_del_rcu(&e->list);
+ call_rcu(&e->e_rcu, __gen_kill_estimator);
}
}
+EXPORT_SYMBOL(gen_kill_estimator);
/**
* gen_replace_estimator - replace rate estimator configuration
@@ -259,8 +298,20 @@ int gen_replace_estimator(struct gnet_stats_basic *bstats,
gen_kill_estimator(bstats, rate_est);
return gen_new_estimator(bstats, rate_est, stats_lock, opt);
}
+EXPORT_SYMBOL(gen_replace_estimator);
+/**
+ * gen_estimator_active - test if estimator is currently in use
+ * @bstats: basic statistics
+ * @rate_est: rate estimator statistics
+ *
+ * Returns true if estimator is active, and false if not.
+ */
+bool gen_estimator_active(const struct gnet_stats_basic *bstats,
+ const struct gnet_stats_rate_est *rate_est)
+{
+ ASSERT_RTNL();
-EXPORT_SYMBOL(gen_kill_estimator);
-EXPORT_SYMBOL(gen_new_estimator);
-EXPORT_SYMBOL(gen_replace_estimator);
+ return gen_find_node(bstats, rate_est) != NULL;
+}
+EXPORT_SYMBOL(gen_estimator_active);
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 1dc728b3858..9c3717a23cf 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -531,9 +531,7 @@ struct pneigh_entry * pneigh_lookup(struct neigh_table *tbl,
if (!n)
goto out;
-#ifdef CONFIG_NET_NS
- n->net = hold_net(net);
-#endif
+ write_pnet(&n->net, hold_net(net));
memcpy(n->key, pkey, key_len);
n->dev = dev;
if (dev)
@@ -1329,9 +1327,9 @@ struct neigh_parms *neigh_parms_alloc(struct net_device *dev,
struct neigh_table *tbl)
{
struct neigh_parms *p, *ref;
- struct net *net;
+ struct net *net = dev_net(dev);
+ const struct net_device_ops *ops = dev->netdev_ops;
- net = dev_net(dev);
ref = lookup_neigh_params(tbl, net, 0);
if (!ref)
return NULL;
@@ -1340,20 +1338,17 @@ struct neigh_parms *neigh_parms_alloc(struct net_device *dev,
if (p) {
p->tbl = tbl;
atomic_set(&p->refcnt, 1);
- INIT_RCU_HEAD(&p->rcu_head);
p->reachable_time =
neigh_rand_reach_time(p->base_reachable_time);
- if (dev->neigh_setup && dev->neigh_setup(dev, p)) {
+ if (ops->ndo_neigh_setup && ops->ndo_neigh_setup(dev, p)) {
kfree(p);
return NULL;
}
dev_hold(dev);
p->dev = dev;
-#ifdef CONFIG_NET_NS
- p->net = hold_net(net);
-#endif
+ write_pnet(&p->net, hold_net(net));
p->sysctl_table = NULL;
write_lock_bh(&tbl->lock);
p->next = tbl->parms.next;
@@ -1408,11 +1403,8 @@ void neigh_table_init_no_netlink(struct neigh_table *tbl)
unsigned long now = jiffies;
unsigned long phsize;
-#ifdef CONFIG_NET_NS
- tbl->parms.net = &init_net;
-#endif
+ write_pnet(&tbl->parms.net, &init_net);
atomic_set(&tbl->parms.refcnt, 1);
- INIT_RCU_HEAD(&tbl->parms.rcu_head);
tbl->parms.reachable_time =
neigh_rand_reach_time(tbl->parms.base_reachable_time);
@@ -1426,9 +1418,8 @@ void neigh_table_init_no_netlink(struct neigh_table *tbl)
panic("cannot create neighbour cache statistics");
#ifdef CONFIG_PROC_FS
- tbl->pde = proc_create_data(tbl->id, 0, init_net.proc_net_stat,
- &neigh_stat_seq_fops, tbl);
- if (!tbl->pde)
+ if (!proc_create_data(tbl->id, 0, init_net.proc_net_stat,
+ &neigh_stat_seq_fops, tbl))
panic("cannot create neighbour proc dir entry");
#endif
@@ -2568,128 +2559,128 @@ static struct neigh_sysctl_table {
.procname = "mcast_solicit",
.maxlen = sizeof(int),
.mode = 0644,
- .proc_handler = &proc_dointvec,
+ .proc_handler = proc_dointvec,
},
{
.ctl_name = NET_NEIGH_UCAST_SOLICIT,
.procname = "ucast_solicit",
.maxlen = sizeof(int),
.mode = 0644,
- .proc_handler = &proc_dointvec,
+ .proc_handler = proc_dointvec,
},
{
.ctl_name = NET_NEIGH_APP_SOLICIT,
.procname = "app_solicit",
.maxlen = sizeof(int),
.mode = 0644,
- .proc_handler = &proc_dointvec,
+ .proc_handler = proc_dointvec,
},
{
.procname = "retrans_time",
.maxlen = sizeof(int),
.mode = 0644,
- .proc_handler = &proc_dointvec_userhz_jiffies,
+ .proc_handler = proc_dointvec_userhz_jiffies,
},
{
.ctl_name = NET_NEIGH_REACHABLE_TIME,
.procname = "base_reachable_time",
.maxlen = sizeof(int),
.mode = 0644,
- .proc_handler = &proc_dointvec_jiffies,
- .strategy = &sysctl_jiffies,
+ .proc_handler = proc_dointvec_jiffies,
+ .strategy = sysctl_jiffies,
},
{
.ctl_name = NET_NEIGH_DELAY_PROBE_TIME,
.procname = "delay_first_probe_time",
.maxlen = sizeof(int),
.mode = 0644,
- .proc_handler = &proc_dointvec_jiffies,
- .strategy = &sysctl_jiffies,
+ .proc_handler = proc_dointvec_jiffies,
+ .strategy = sysctl_jiffies,
},
{
.ctl_name = NET_NEIGH_GC_STALE_TIME,
.procname = "gc_stale_time",
.maxlen = sizeof(int),
.mode = 0644,
- .proc_handler = &proc_dointvec_jiffies,
- .strategy = &sysctl_jiffies,
+ .proc_handler = proc_dointvec_jiffies,
+ .strategy = sysctl_jiffies,
},
{
.ctl_name = NET_NEIGH_UNRES_QLEN,
.procname = "unres_qlen",
.maxlen = sizeof(int),
.mode = 0644,
- .proc_handler = &proc_dointvec,
+ .proc_handler = proc_dointvec,
},
{
.ctl_name = NET_NEIGH_PROXY_QLEN,
.procname = "proxy_qlen",
.maxlen = sizeof(int),
.mode = 0644,
- .proc_handler = &proc_dointvec,
+ .proc_handler = proc_dointvec,
},
{
.procname = "anycast_delay",
.maxlen = sizeof(int),
.mode = 0644,
- .proc_handler = &proc_dointvec_userhz_jiffies,
+ .proc_handler = proc_dointvec_userhz_jiffies,
},
{
.procname = "proxy_delay",
.maxlen = sizeof(int),
.mode = 0644,
- .proc_handler = &proc_dointvec_userhz_jiffies,
+ .proc_handler = proc_dointvec_userhz_jiffies,
},
{
.procname = "locktime",
.maxlen = sizeof(int),
.mode = 0644,
- .proc_handler = &proc_dointvec_userhz_jiffies,
+ .proc_handler = proc_dointvec_userhz_jiffies,
},
{
.ctl_name = NET_NEIGH_RETRANS_TIME_MS,
.procname = "retrans_time_ms",
.maxlen = sizeof(int),
.mode = 0644,
- .proc_handler = &proc_dointvec_ms_jiffies,
- .strategy = &sysctl_ms_jiffies,
+ .proc_handler = proc_dointvec_ms_jiffies,
+ .strategy = sysctl_ms_jiffies,
},
{
.ctl_name = NET_NEIGH_REACHABLE_TIME_MS,
.procname = "base_reachable_time_ms",
.maxlen = sizeof(int),
.mode = 0644,
- .proc_handler = &proc_dointvec_ms_jiffies,
- .strategy = &sysctl_ms_jiffies,
+ .proc_handler = proc_dointvec_ms_jiffies,
+ .strategy = sysctl_ms_jiffies,
},
{
.ctl_name = NET_NEIGH_GC_INTERVAL,
.procname = "gc_interval",
.maxlen = sizeof(int),
.mode = 0644,
- .proc_handler = &proc_dointvec_jiffies,
- .strategy = &sysctl_jiffies,
+ .proc_handler = proc_dointvec_jiffies,
+ .strategy = sysctl_jiffies,
},
{
.ctl_name = NET_NEIGH_GC_THRESH1,
.procname = "gc_thresh1",
.maxlen = sizeof(int),
.mode = 0644,
- .proc_handler = &proc_dointvec,
+ .proc_handler = proc_dointvec,
},
{
.ctl_name = NET_NEIGH_GC_THRESH2,
.procname = "gc_thresh2",
.maxlen = sizeof(int),
.mode = 0644,
- .proc_handler = &proc_dointvec,
+ .proc_handler = proc_dointvec,
},
{
.ctl_name = NET_NEIGH_GC_THRESH3,
.procname = "gc_thresh3",
.maxlen = sizeof(int),
.mode = 0644,
- .proc_handler = &proc_dointvec,
+ .proc_handler = proc_dointvec,
},
{},
},
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index 92d6b946731..6ac29a46e23 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -270,7 +270,6 @@ static ssize_t netstat_show(const struct device *d,
unsigned long offset)
{
struct net_device *dev = to_net_dev(d);
- struct net_device_stats *stats;
ssize_t ret = -EINVAL;
WARN_ON(offset > sizeof(struct net_device_stats) ||
@@ -278,7 +277,7 @@ static ssize_t netstat_show(const struct device *d,
read_lock(&dev_base_lock);
if (dev_isalive(dev)) {
- stats = dev->get_stats(dev);
+ const struct net_device_stats *stats = dev_get_stats(dev);
ret = sprintf(buf, fmt_ulong,
*(unsigned long *)(((u8 *) stats) + offset));
}
@@ -428,6 +427,9 @@ static int netdev_uevent(struct device *d, struct kobj_uevent_env *env)
struct net_device *dev = to_net_dev(d);
int retval;
+ if (!net_eq(dev_net(dev), &init_net))
+ return 0;
+
/* pass interface to uevent. */
retval = add_uevent_var(env, "INTERFACE=%s", dev->name);
if (retval)
@@ -476,6 +478,10 @@ void netdev_unregister_kobject(struct net_device * net)
struct device *dev = &(net->dev);
kobject_get(&dev->kobj);
+
+ if (dev_net(net) != &init_net)
+ return;
+
device_del(dev);
}
@@ -490,7 +496,7 @@ int netdev_register_kobject(struct net_device *net)
dev->groups = groups;
BUILD_BUG_ON(BUS_ID_SIZE < IFNAMSIZ);
- strlcpy(dev->bus_id, net->name, BUS_ID_SIZE);
+ dev_set_name(dev, net->name);
#ifdef CONFIG_SYSFS
*groups++ = &netstat_group;
@@ -501,6 +507,9 @@ int netdev_register_kobject(struct net_device *net)
#endif
#endif /* CONFIG_SYSFS */
+ if (dev_net(net) != &init_net)
+ return 0;
+
return device_add(dev);
}
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index 1895a4ca9c4..55cffad2f32 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -47,7 +47,6 @@ static __net_init int setup_net(struct net *net)
goto out;
ng->len = INITIAL_NET_GEN_PTRS;
- INIT_RCU_HEAD(&ng->rcu);
rcu_assign_pointer(net->gen, ng);
error = 0;
@@ -478,7 +477,6 @@ int net_assign_generic(struct net *net, int id, void *data)
*/
ng->len = id;
- INIT_RCU_HEAD(&ng->rcu);
memcpy(&ng->ptr, &old_ng->ptr, old_ng->len);
rcu_assign_pointer(net->gen, ng);
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index dadac6281f2..755414cd49d 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -58,6 +58,7 @@ static void queue_process(struct work_struct *work)
while ((skb = skb_dequeue(&npinfo->txq))) {
struct net_device *dev = skb->dev;
+ const struct net_device_ops *ops = dev->netdev_ops;
struct netdev_queue *txq;
if (!netif_device_present(dev) || !netif_running(dev)) {
@@ -71,7 +72,7 @@ static void queue_process(struct work_struct *work)
__netif_tx_lock(txq, smp_processor_id());
if (netif_tx_queue_stopped(txq) ||
netif_tx_queue_frozen(txq) ||
- dev->hard_start_xmit(skb, dev) != NETDEV_TX_OK) {
+ ops->ndo_start_xmit(skb, dev) != NETDEV_TX_OK) {
skb_queue_head(&npinfo->txq, skb);
__netif_tx_unlock(txq);
local_irq_restore(flags);
@@ -174,12 +175,13 @@ static void service_arp_queue(struct netpoll_info *npi)
void netpoll_poll(struct netpoll *np)
{
struct net_device *dev = np->dev;
+ const struct net_device_ops *ops = dev->netdev_ops;
- if (!dev || !netif_running(dev) || !dev->poll_controller)
+ if (!dev || !netif_running(dev) || !ops->ndo_poll_controller)
return;
/* Process pending work on NIC */
- dev->poll_controller(dev);
+ ops->ndo_poll_controller(dev);
poll_napi(dev);
@@ -274,6 +276,7 @@ static void netpoll_send_skb(struct netpoll *np, struct sk_buff *skb)
int status = NETDEV_TX_BUSY;
unsigned long tries;
struct net_device *dev = np->dev;
+ const struct net_device_ops *ops = dev->netdev_ops;
struct netpoll_info *npinfo = np->dev->npinfo;
if (!npinfo || !netif_running(dev) || !netif_device_present(dev)) {
@@ -294,7 +297,7 @@ static void netpoll_send_skb(struct netpoll *np, struct sk_buff *skb)
tries > 0; --tries) {
if (__netif_tx_trylock(txq)) {
if (!netif_tx_queue_stopped(txq))
- status = dev->hard_start_xmit(skb, dev);
+ status = ops->ndo_start_xmit(skb, dev);
__netif_tx_unlock(txq);
if (status == NETDEV_TX_OK)
@@ -345,7 +348,7 @@ void netpoll_send_udp(struct netpoll *np, const char *msg, int len)
udph->check = csum_tcpudp_magic(htonl(np->local_ip),
htonl(np->remote_ip),
udp_len, IPPROTO_UDP,
- csum_partial((unsigned char *)udph, udp_len, 0));
+ csum_partial(udph, udp_len, 0));
if (udph->check == 0)
udph->check = CSUM_MANGLED_0;
@@ -555,7 +558,6 @@ out:
void netpoll_print_options(struct netpoll *np)
{
- DECLARE_MAC_BUF(mac);
printk(KERN_INFO "%s: local port %d\n",
np->name, np->local_port);
printk(KERN_INFO "%s: local IP %d.%d.%d.%d\n",
@@ -566,8 +568,8 @@ void netpoll_print_options(struct netpoll *np)
np->name, np->remote_port);
printk(KERN_INFO "%s: remote IP %d.%d.%d.%d\n",
np->name, HIPQUAD(np->remote_ip));
- printk(KERN_INFO "%s: remote ethernet address %s\n",
- np->name, print_mac(mac, np->remote_mac));
+ printk(KERN_INFO "%s: remote ethernet address %pM\n",
+ np->name, np->remote_mac);
}
int netpoll_parse_options(struct netpoll *np, char *opt)
@@ -697,7 +699,7 @@ int netpoll_setup(struct netpoll *np)
atomic_inc(&npinfo->refcnt);
}
- if (!ndev->poll_controller) {
+ if (!ndev->netdev_ops->ndo_poll_controller) {
printk(KERN_ERR "%s: %s doesn't support polling, aborting.\n",
np->name, np->dev_name);
err = -ENOTSUPP;
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index 8997e912aaa..65498483325 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -422,6 +422,7 @@ static struct pktgen_dev *pktgen_find_dev(struct pktgen_thread *t,
const char *ifname);
static int pktgen_device_event(struct notifier_block *, unsigned long, void *);
static void pktgen_run_all_threads(void);
+static void pktgen_reset_all_threads(void);
static void pktgen_stop_all_threads_ifs(void);
static int pktgen_stop_device(struct pktgen_dev *pkt_dev);
static void pktgen_stop(struct pktgen_thread *t);
@@ -480,6 +481,9 @@ static ssize_t pgctrl_write(struct file *file, const char __user * buf,
else if (!strcmp(data, "start"))
pktgen_run_all_threads();
+ else if (!strcmp(data, "reset"))
+ pktgen_reset_all_threads();
+
else
printk(KERN_WARNING "pktgen: Unknown command: %s\n", data);
@@ -509,7 +513,6 @@ static int pktgen_if_show(struct seq_file *seq, void *v)
__u64 sa;
__u64 stopped;
__u64 now = getCurUs();
- DECLARE_MAC_BUF(mac);
seq_printf(seq,
"Params: count %llu min_pkt_size: %u max_pkt_size: %u\n",
@@ -554,12 +557,12 @@ static int pktgen_if_show(struct seq_file *seq, void *v)
seq_puts(seq, " src_mac: ");
- seq_printf(seq, "%s ",
- print_mac(mac, is_zero_ether_addr(pkt_dev->src_mac) ?
- pkt_dev->odev->dev_addr : pkt_dev->src_mac));
+ seq_printf(seq, "%pM ",
+ is_zero_ether_addr(pkt_dev->src_mac) ?
+ pkt_dev->odev->dev_addr : pkt_dev->src_mac);
seq_printf(seq, "dst_mac: ");
- seq_printf(seq, "%s\n", print_mac(mac, pkt_dev->dst_mac));
+ seq_printf(seq, "%pM\n", pkt_dev->dst_mac);
seq_printf(seq,
" udp_src_min: %d udp_src_max: %d udp_dst_min: %d udp_dst_max: %d\n",
@@ -2162,7 +2165,8 @@ static void get_ipsec_sa(struct pktgen_dev *pkt_dev, int flow)
struct xfrm_state *x = pkt_dev->flows[flow].x;
if (!x) {
/*slow path: we dont already have xfrm_state*/
- x = xfrm_stateonly_find((xfrm_address_t *)&pkt_dev->cur_daddr,
+ x = xfrm_stateonly_find(&init_net,
+ (xfrm_address_t *)&pkt_dev->cur_daddr,
(xfrm_address_t *)&pkt_dev->cur_saddr,
AF_INET,
pkt_dev->ipsmode,
@@ -3169,6 +3173,24 @@ static void pktgen_run_all_threads(void)
pktgen_wait_all_threads_run();
}
+static void pktgen_reset_all_threads(void)
+{
+ struct pktgen_thread *t;
+
+ pr_debug("pktgen: entering pktgen_reset_all_threads.\n");
+
+ mutex_lock(&pktgen_thread_lock);
+
+ list_for_each_entry(t, &pktgen_threads, th_list)
+ t->control |= (T_REMDEVALL);
+
+ mutex_unlock(&pktgen_thread_lock);
+
+ schedule_timeout_interruptible(msecs_to_jiffies(125)); /* Propagate thread->control */
+
+ pktgen_wait_all_threads_run();
+}
+
static void show_results(struct pktgen_dev *pkt_dev, int nr_frags)
{
__u64 total_us, bps, mbps, pps, idle;
@@ -3331,14 +3353,14 @@ static void pktgen_rem_thread(struct pktgen_thread *t)
static __inline__ void pktgen_xmit(struct pktgen_dev *pkt_dev)
{
- struct net_device *odev = NULL;
+ struct net_device *odev = pkt_dev->odev;
+ int (*xmit)(struct sk_buff *, struct net_device *)
+ = odev->netdev_ops->ndo_start_xmit;
struct netdev_queue *txq;
__u64 idle_start = 0;
u16 queue_map;
int ret;
- odev = pkt_dev->odev;
-
if (pkt_dev->delay_us || pkt_dev->delay_ns) {
u64 now;
@@ -3419,7 +3441,7 @@ static __inline__ void pktgen_xmit(struct pktgen_dev *pkt_dev)
atomic_inc(&(pkt_dev->skb->users));
retry_now:
- ret = odev->hard_start_xmit(pkt_dev->skb, odev);
+ ret = (*xmit)(pkt_dev->skb, odev);
if (likely(ret == NETDEV_TX_OK)) {
pkt_dev->last_ok = 1;
pkt_dev->sofar++;
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 4dfb6b4d455..790dd205bb5 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -551,7 +551,7 @@ static void set_operstate(struct net_device *dev, unsigned char transition)
}
static void copy_rtnl_link_stats(struct rtnl_link_stats *a,
- struct net_device_stats *b)
+ const struct net_device_stats *b)
{
a->rx_packets = b->rx_packets;
a->tx_packets = b->tx_packets;
@@ -609,7 +609,7 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
struct netdev_queue *txq;
struct ifinfomsg *ifm;
struct nlmsghdr *nlh;
- struct net_device_stats *stats;
+ const struct net_device_stats *stats;
struct nlattr *attr;
nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ifm), flags);
@@ -666,7 +666,7 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
if (attr == NULL)
goto nla_put_failure;
- stats = dev->get_stats(dev);
+ stats = dev_get_stats(dev);
copy_rtnl_link_stats(nla_data(attr), stats);
if (dev->rtnl_link_ops) {
@@ -762,6 +762,7 @@ static int validate_linkmsg(struct net_device *dev, struct nlattr *tb[])
static int do_setlink(struct net_device *dev, struct ifinfomsg *ifm,
struct nlattr **tb, char *ifname, int modified)
{
+ const struct net_device_ops *ops = dev->netdev_ops;
int send_addr_notify = 0;
int err;
@@ -783,7 +784,7 @@ static int do_setlink(struct net_device *dev, struct ifinfomsg *ifm,
struct rtnl_link_ifmap *u_map;
struct ifmap k_map;
- if (!dev->set_config) {
+ if (!ops->ndo_set_config) {
err = -EOPNOTSUPP;
goto errout;
}
@@ -801,7 +802,7 @@ static int do_setlink(struct net_device *dev, struct ifinfomsg *ifm,
k_map.dma = (unsigned char) u_map->dma;
k_map.port = (unsigned char) u_map->port;
- err = dev->set_config(dev, &k_map);
+ err = ops->ndo_set_config(dev, &k_map);
if (err < 0)
goto errout;
@@ -812,7 +813,7 @@ static int do_setlink(struct net_device *dev, struct ifinfomsg *ifm,
struct sockaddr *sa;
int len;
- if (!dev->set_mac_address) {
+ if (!ops->ndo_set_mac_address) {
err = -EOPNOTSUPP;
goto errout;
}
@@ -831,7 +832,7 @@ static int do_setlink(struct net_device *dev, struct ifinfomsg *ifm,
sa->sa_family = dev->type;
memcpy(sa->sa_data, nla_data(tb[IFLA_ADDRESS]),
dev->addr_len);
- err = dev->set_mac_address(dev, sa);
+ err = ops->ndo_set_mac_address(dev, sa);
kfree(sa);
if (err)
goto errout;
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 65f7757465b..b8d0abb2643 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -501,7 +501,7 @@ static void __copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
new->network_header = old->network_header;
new->mac_header = old->mac_header;
new->dst = dst_clone(old->dst);
-#ifdef CONFIG_INET
+#ifdef CONFIG_XFRM
new->sp = secpath_get(old->sp);
#endif
memcpy(new->cb, old->cb, sizeof(old->cb));
@@ -556,6 +556,7 @@ static struct sk_buff *__skb_clone(struct sk_buff *n, struct sk_buff *skb)
C(truesize);
#if defined(CONFIG_MAC80211) || defined(CONFIG_MAC80211_MODULE)
C(do_not_encrypt);
+ C(requeue);
#endif
atomic_set(&n->users, 1);
@@ -2017,6 +2018,148 @@ void skb_split(struct sk_buff *skb, struct sk_buff *skb1, const u32 len)
skb_split_no_header(skb, skb1, len, pos);
}
+/* Shifting from/to a cloned skb is a no-go.
+ *
+ * Caller cannot keep skb_shinfo related pointers past calling here!
+ */
+static int skb_prepare_for_shift(struct sk_buff *skb)
+{
+ return skb_cloned(skb) && pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
+}
+
+/**
+ * skb_shift - Shifts paged data partially from skb to another
+ * @tgt: buffer into which tail data gets added
+ * @skb: buffer from which the paged data comes from
+ * @shiftlen: shift up to this many bytes
+ *
+ * Attempts to shift up to shiftlen worth of bytes, which may be less than
+ * the length of the skb, from tgt to skb. Returns number bytes shifted.
+ * It's up to caller to free skb if everything was shifted.
+ *
+ * If @tgt runs out of frags, the whole operation is aborted.
+ *
+ * Skb cannot include anything else but paged data while tgt is allowed
+ * to have non-paged data as well.
+ *
+ * TODO: full sized shift could be optimized but that would need
+ * specialized skb free'er to handle frags without up-to-date nr_frags.
+ */
+int skb_shift(struct sk_buff *tgt, struct sk_buff *skb, int shiftlen)
+{
+ int from, to, merge, todo;
+ struct skb_frag_struct *fragfrom, *fragto;
+
+ BUG_ON(shiftlen > skb->len);
+ BUG_ON(skb_headlen(skb)); /* Would corrupt stream */
+
+ todo = shiftlen;
+ from = 0;
+ to = skb_shinfo(tgt)->nr_frags;
+ fragfrom = &skb_shinfo(skb)->frags[from];
+
+ /* Actual merge is delayed until the point when we know we can
+ * commit all, so that we don't have to undo partial changes
+ */
+ if (!to ||
+ !skb_can_coalesce(tgt, to, fragfrom->page, fragfrom->page_offset)) {
+ merge = -1;
+ } else {
+ merge = to - 1;
+
+ todo -= fragfrom->size;
+ if (todo < 0) {
+ if (skb_prepare_for_shift(skb) ||
+ skb_prepare_for_shift(tgt))
+ return 0;
+
+ /* All previous frag pointers might be stale! */
+ fragfrom = &skb_shinfo(skb)->frags[from];
+ fragto = &skb_shinfo(tgt)->frags[merge];
+
+ fragto->size += shiftlen;
+ fragfrom->size -= shiftlen;
+ fragfrom->page_offset += shiftlen;
+
+ goto onlymerged;
+ }
+
+ from++;
+ }
+
+ /* Skip full, not-fitting skb to avoid expensive operations */
+ if ((shiftlen == skb->len) &&
+ (skb_shinfo(skb)->nr_frags - from) > (MAX_SKB_FRAGS - to))
+ return 0;
+
+ if (skb_prepare_for_shift(skb) || skb_prepare_for_shift(tgt))
+ return 0;
+
+ while ((todo > 0) && (from < skb_shinfo(skb)->nr_frags)) {
+ if (to == MAX_SKB_FRAGS)
+ return 0;
+
+ fragfrom = &skb_shinfo(skb)->frags[from];
+ fragto = &skb_shinfo(tgt)->frags[to];
+
+ if (todo >= fragfrom->size) {
+ *fragto = *fragfrom;
+ todo -= fragfrom->size;
+ from++;
+ to++;
+
+ } else {
+ get_page(fragfrom->page);
+ fragto->page = fragfrom->page;
+ fragto->page_offset = fragfrom->page_offset;
+ fragto->size = todo;
+
+ fragfrom->page_offset += todo;
+ fragfrom->size -= todo;
+ todo = 0;
+
+ to++;
+ break;
+ }
+ }
+
+ /* Ready to "commit" this state change to tgt */
+ skb_shinfo(tgt)->nr_frags = to;
+
+ if (merge >= 0) {
+ fragfrom = &skb_shinfo(skb)->frags[0];
+ fragto = &skb_shinfo(tgt)->frags[merge];
+
+ fragto->size += fragfrom->size;
+ put_page(fragfrom->page);
+ }
+
+ /* Reposition in the original skb */
+ to = 0;
+ while (from < skb_shinfo(skb)->nr_frags)
+ skb_shinfo(skb)->frags[to++] = skb_shinfo(skb)->frags[from++];
+ skb_shinfo(skb)->nr_frags = to;
+
+ BUG_ON(todo > 0 && !skb_shinfo(skb)->nr_frags);
+
+onlymerged:
+ /* Most likely the tgt won't ever need its checksum anymore, skb on
+ * the other hand might need it if it needs to be resent
+ */
+ tgt->ip_summed = CHECKSUM_PARTIAL;
+ skb->ip_summed = CHECKSUM_PARTIAL;
+
+ /* Yak, is it really working this way? Some helper please? */
+ skb->len -= shiftlen;
+ skb->data_len -= shiftlen;
+ skb->truesize -= shiftlen;
+ tgt->len += shiftlen;
+ tgt->data_len += shiftlen;
+ tgt->truesize += shiftlen;
+
+ return shiftlen;
+}
+
/**
* skb_prepare_seq_read - Prepare a sequential read of skb data
* @skb: the buffer to read
@@ -2285,6 +2428,7 @@ struct sk_buff *skb_segment(struct sk_buff *skb, int features)
{
struct sk_buff *segs = NULL;
struct sk_buff *tail = NULL;
+ struct sk_buff *fskb = skb_shinfo(skb)->frag_list;
unsigned int mss = skb_shinfo(skb)->gso_size;
unsigned int doffset = skb->data - skb_mac_header(skb);
unsigned int offset = doffset;
@@ -2304,7 +2448,6 @@ struct sk_buff *skb_segment(struct sk_buff *skb, int features)
struct sk_buff *nskb;
skb_frag_t *frag;
int hsize;
- int k;
int size;
len = skb->len - offset;
@@ -2317,9 +2460,36 @@ struct sk_buff *skb_segment(struct sk_buff *skb, int features)
if (hsize > len || !sg)
hsize = len;
- nskb = alloc_skb(hsize + doffset + headroom, GFP_ATOMIC);
- if (unlikely(!nskb))
- goto err;
+ if (!hsize && i >= nfrags) {
+ BUG_ON(fskb->len != len);
+
+ pos += len;
+ nskb = skb_clone(fskb, GFP_ATOMIC);
+ fskb = fskb->next;
+
+ if (unlikely(!nskb))
+ goto err;
+
+ hsize = skb_end_pointer(nskb) - nskb->head;
+ if (skb_cow_head(nskb, doffset + headroom)) {
+ kfree_skb(nskb);
+ goto err;
+ }
+
+ nskb->truesize += skb_end_pointer(nskb) - nskb->head -
+ hsize;
+ skb_release_head_state(nskb);
+ __skb_push(nskb, doffset);
+ } else {
+ nskb = alloc_skb(hsize + doffset + headroom,
+ GFP_ATOMIC);
+
+ if (unlikely(!nskb))
+ goto err;
+
+ skb_reserve(nskb, headroom);
+ __skb_put(nskb, doffset);
+ }
if (segs)
tail->next = nskb;
@@ -2330,13 +2500,15 @@ struct sk_buff *skb_segment(struct sk_buff *skb, int features)
__copy_skb_header(nskb, skb);
nskb->mac_len = skb->mac_len;
- skb_reserve(nskb, headroom);
skb_reset_mac_header(nskb);
skb_set_network_header(nskb, skb->mac_len);
nskb->transport_header = (nskb->network_header +
skb_network_header_len(skb));
- skb_copy_from_linear_data(skb, skb_put(nskb, doffset),
- doffset);
+ skb_copy_from_linear_data(skb, nskb->data, doffset);
+
+ if (pos >= offset + len)
+ continue;
+
if (!sg) {
nskb->ip_summed = CHECKSUM_NONE;
nskb->csum = skb_copy_and_csum_bits(skb, offset,
@@ -2346,14 +2518,11 @@ struct sk_buff *skb_segment(struct sk_buff *skb, int features)
}
frag = skb_shinfo(nskb)->frags;
- k = 0;
skb_copy_from_linear_data_offset(skb, offset,
skb_put(nskb, hsize), hsize);
- while (pos < offset + len) {
- BUG_ON(i >= nfrags);
-
+ while (pos < offset + len && i < nfrags) {
*frag = skb_shinfo(skb)->frags[i];
get_page(frag->page);
size = frag->size;
@@ -2363,20 +2532,39 @@ struct sk_buff *skb_segment(struct sk_buff *skb, int features)
frag->size -= offset - pos;
}
- k++;
+ skb_shinfo(nskb)->nr_frags++;
if (pos + size <= offset + len) {
i++;
pos += size;
} else {
frag->size -= pos + size - (offset + len);
- break;
+ goto skip_fraglist;
}
frag++;
}
- skb_shinfo(nskb)->nr_frags = k;
+ if (pos < offset + len) {
+ struct sk_buff *fskb2 = fskb;
+
+ BUG_ON(pos + fskb->len != offset + len);
+
+ pos += fskb->len;
+ fskb = fskb->next;
+
+ if (fskb2->next) {
+ fskb2 = skb_clone(fskb2, GFP_ATOMIC);
+ if (!fskb2)
+ goto err;
+ } else
+ skb_get(fskb2);
+
+ BUG_ON(skb_shinfo(nskb)->frag_list);
+ skb_shinfo(nskb)->frag_list = fskb2;
+ }
+
+skip_fraglist:
nskb->data_len = len - hsize;
nskb->len += nskb->data_len;
nskb->truesize += nskb->data_len;
@@ -2394,6 +2582,65 @@ err:
EXPORT_SYMBOL_GPL(skb_segment);
+int skb_gro_receive(struct sk_buff **head, struct sk_buff *skb)
+{
+ struct sk_buff *p = *head;
+ struct sk_buff *nskb;
+ unsigned int headroom;
+ unsigned int hlen = p->data - skb_mac_header(p);
+
+ if (hlen + p->len + skb->len >= 65536)
+ return -E2BIG;
+
+ if (skb_shinfo(p)->frag_list)
+ goto merge;
+
+ headroom = skb_headroom(p);
+ nskb = netdev_alloc_skb(p->dev, headroom);
+ if (unlikely(!nskb))
+ return -ENOMEM;
+
+ __copy_skb_header(nskb, p);
+ nskb->mac_len = p->mac_len;
+
+ skb_reserve(nskb, headroom);
+
+ skb_set_mac_header(nskb, -hlen);
+ skb_set_network_header(nskb, skb_network_offset(p));
+ skb_set_transport_header(nskb, skb_transport_offset(p));
+
+ memcpy(skb_mac_header(nskb), skb_mac_header(p), hlen);
+
+ *NAPI_GRO_CB(nskb) = *NAPI_GRO_CB(p);
+ skb_shinfo(nskb)->frag_list = p;
+ skb_header_release(p);
+ nskb->prev = p;
+
+ nskb->data_len += p->len;
+ nskb->truesize += p->len;
+ nskb->len += p->len;
+
+ *head = nskb;
+ nskb->next = p->next;
+ p->next = NULL;
+
+ p = nskb;
+
+merge:
+ NAPI_GRO_CB(p)->count++;
+ p->prev->next = skb;
+ p->prev = skb;
+ skb_header_release(skb);
+
+ p->data_len += skb->len;
+ p->truesize += skb->len;
+ p->len += skb->len;
+
+ NAPI_GRO_CB(skb)->same_flow = 1;
+ return 0;
+}
+EXPORT_SYMBOL_GPL(skb_gro_receive);
+
void __init skb_init(void)
{
skbuff_head_cache = kmem_cache_create("skbuff_head_cache",
diff --git a/net/core/sock.c b/net/core/sock.c
index edf7220889a..f3a0d08cbb4 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1071,7 +1071,7 @@ struct sock *sk_clone(const struct sock *sk, const gfp_t priority)
newsk->sk_sleep = NULL;
if (newsk->sk_prot->sockets_allocated)
- atomic_inc(newsk->sk_prot->sockets_allocated);
+ percpu_counter_inc(newsk->sk_prot->sockets_allocated);
}
out:
return newsk;
@@ -1463,8 +1463,12 @@ int __sk_mem_schedule(struct sock *sk, int size, int kind)
}
if (prot->memory_pressure) {
- if (!*prot->memory_pressure ||
- prot->sysctl_mem[2] > atomic_read(prot->sockets_allocated) *
+ int alloc;
+
+ if (!*prot->memory_pressure)
+ return 1;
+ alloc = percpu_counter_read_positive(prot->sockets_allocated);
+ if (prot->sysctl_mem[2] > alloc *
sk_mem_pages(sk->sk_wmem_queued +
atomic_read(&sk->sk_rmem_alloc) +
sk->sk_forward_alloc))
@@ -2037,7 +2041,8 @@ int proto_register(struct proto *prot, int alloc_slab)
{
if (alloc_slab) {
prot->slab = kmem_cache_create(prot->name, prot->obj_size, 0,
- SLAB_HWCACHE_ALIGN, NULL);
+ SLAB_HWCACHE_ALIGN | prot->slab_flags,
+ NULL);
if (prot->slab == NULL) {
printk(KERN_CRIT "%s: Can't create sock SLAB cache!\n",
@@ -2076,7 +2081,9 @@ int proto_register(struct proto *prot, int alloc_slab)
prot->twsk_prot->twsk_slab =
kmem_cache_create(prot->twsk_prot->twsk_slab_name,
prot->twsk_prot->twsk_obj_size,
- 0, SLAB_HWCACHE_ALIGN,
+ 0,
+ SLAB_HWCACHE_ALIGN |
+ prot->slab_flags,
NULL);
if (prot->twsk_prot->twsk_slab == NULL)
goto out_free_timewait_sock_slab_name;
@@ -2164,7 +2171,7 @@ static void proto_seq_printf(struct seq_file *seq, struct proto *proto)
"%2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c\n",
proto->name,
proto->obj_size,
- proto->sockets_allocated != NULL ? atomic_read(proto->sockets_allocated) : -1,
+ sock_prot_inuse_get(seq_file_net(seq), proto),
proto->memory_allocated != NULL ? atomic_read(proto->memory_allocated) : -1,
proto->memory_pressure != NULL ? *proto->memory_pressure ? "yes" : "no" : "NI",
proto->max_header,
@@ -2218,7 +2225,8 @@ static const struct seq_operations proto_seq_ops = {
static int proto_seq_open(struct inode *inode, struct file *file)
{
- return seq_open(file, &proto_seq_ops);
+ return seq_open_net(inode, file, &proto_seq_ops,
+ sizeof(struct seq_net_private));
}
static const struct file_operations proto_seq_fops = {
@@ -2226,13 +2234,31 @@ static const struct file_operations proto_seq_fops = {
.open = proto_seq_open,
.read = seq_read,
.llseek = seq_lseek,
- .release = seq_release,
+ .release = seq_release_net,
+};
+
+static __net_init int proto_init_net(struct net *net)
+{
+ if (!proc_net_fops_create(net, "protocols", S_IRUGO, &proto_seq_fops))
+ return -ENOMEM;
+
+ return 0;
+}
+
+static __net_exit void proto_exit_net(struct net *net)
+{
+ proc_net_remove(net, "protocols");
+}
+
+
+static __net_initdata struct pernet_operations proto_net_ops = {
+ .init = proto_init_net,
+ .exit = proto_exit_net,
};
static int __init proto_init(void)
{
- /* register /proc/net/protocols */
- return proc_net_fops_create(&init_net, "protocols", S_IRUGO, &proto_seq_fops) == NULL ? -ENOBUFS : 0;
+ return register_pernet_subsys(&proto_net_ops);
}
subsys_initcall(proto_init);
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
index f686467ff12..83d3398559e 100644
--- a/net/core/sysctl_net_core.c
+++ b/net/core/sysctl_net_core.c
@@ -12,7 +12,6 @@
#include <linux/netdevice.h>
#include <linux/init.h>
#include <net/sock.h>
-#include <net/xfrm.h>
static struct ctl_table net_core_table[] = {
#ifdef CONFIG_NET
@@ -22,7 +21,7 @@ static struct ctl_table net_core_table[] = {
.data = &sysctl_wmem_max,
.maxlen = sizeof(int),
.mode = 0644,
- .proc_handler = &proc_dointvec
+ .proc_handler = proc_dointvec
},
{
.ctl_name = NET_CORE_RMEM_MAX,
@@ -30,7 +29,7 @@ static struct ctl_table net_core_table[] = {
.data = &sysctl_rmem_max,
.maxlen = sizeof(int),
.mode = 0644,
- .proc_handler = &proc_dointvec
+ .proc_handler = proc_dointvec
},
{
.ctl_name = NET_CORE_WMEM_DEFAULT,
@@ -38,7 +37,7 @@ static struct ctl_table net_core_table[] = {
.data = &sysctl_wmem_default,
.maxlen = sizeof(int),
.mode = 0644,
- .proc_handler = &proc_dointvec
+ .proc_handler = proc_dointvec
},
{
.ctl_name = NET_CORE_RMEM_DEFAULT,
@@ -46,7 +45,7 @@ static struct ctl_table net_core_table[] = {
.data = &sysctl_rmem_default,
.maxlen = sizeof(int),
.mode = 0644,
- .proc_handler = &proc_dointvec
+ .proc_handler = proc_dointvec
},
{
.ctl_name = NET_CORE_DEV_WEIGHT,
@@ -54,7 +53,7 @@ static struct ctl_table net_core_table[] = {
.data = &weight_p,
.maxlen = sizeof(int),
.mode = 0644,
- .proc_handler = &proc_dointvec
+ .proc_handler = proc_dointvec
},
{
.ctl_name = NET_CORE_MAX_BACKLOG,
@@ -62,7 +61,7 @@ static struct ctl_table net_core_table[] = {
.data = &netdev_max_backlog,
.maxlen = sizeof(int),
.mode = 0644,
- .proc_handler = &proc_dointvec
+ .proc_handler = proc_dointvec
},
{
.ctl_name = NET_CORE_MSG_COST,
@@ -70,8 +69,8 @@ static struct ctl_table net_core_table[] = {
.data = &net_ratelimit_state.interval,
.maxlen = sizeof(int),
.mode = 0644,
- .proc_handler = &proc_dointvec_jiffies,
- .strategy = &sysctl_jiffies,
+ .proc_handler = proc_dointvec_jiffies,
+ .strategy = sysctl_jiffies,
},
{
.ctl_name = NET_CORE_MSG_BURST,
@@ -79,7 +78,7 @@ static struct ctl_table net_core_table[] = {
.data = &net_ratelimit_state.burst,
.maxlen = sizeof(int),
.mode = 0644,
- .proc_handler = &proc_dointvec,
+ .proc_handler = proc_dointvec,
},
{
.ctl_name = NET_CORE_OPTMEM_MAX,
@@ -87,42 +86,8 @@ static struct ctl_table net_core_table[] = {
.data = &sysctl_optmem_max,
.maxlen = sizeof(int),
.mode = 0644,
- .proc_handler = &proc_dointvec
+ .proc_handler = proc_dointvec
},
-#ifdef CONFIG_XFRM
- {
- .ctl_name = NET_CORE_AEVENT_ETIME,
- .procname = "xfrm_aevent_etime",
- .data = &sysctl_xfrm_aevent_etime,
- .maxlen = sizeof(u32),
- .mode = 0644,
- .proc_handler = &proc_dointvec
- },
- {
- .ctl_name = NET_CORE_AEVENT_RSEQTH,
- .procname = "xfrm_aevent_rseqth",
- .data = &sysctl_xfrm_aevent_rseqth,
- .maxlen = sizeof(u32),
- .mode = 0644,
- .proc_handler = &proc_dointvec
- },
- {
- .ctl_name = CTL_UNNUMBERED,
- .procname = "xfrm_larval_drop",
- .data = &sysctl_xfrm_larval_drop,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = &proc_dointvec
- },
- {
- .ctl_name = CTL_UNNUMBERED,
- .procname = "xfrm_acq_expires",
- .data = &sysctl_xfrm_acq_expires,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = &proc_dointvec
- },
-#endif /* CONFIG_XFRM */
#endif /* CONFIG_NET */
{
.ctl_name = NET_CORE_BUDGET,
@@ -130,7 +95,7 @@ static struct ctl_table net_core_table[] = {
.data = &netdev_budget,
.maxlen = sizeof(int),
.mode = 0644,
- .proc_handler = &proc_dointvec
+ .proc_handler = proc_dointvec
},
{
.ctl_name = NET_CORE_WARNINGS,
@@ -138,7 +103,7 @@ static struct ctl_table net_core_table[] = {
.data = &net_msg_warn,
.maxlen = sizeof(int),
.mode = 0644,
- .proc_handler = &proc_dointvec
+ .proc_handler = proc_dointvec
},
{ .ctl_name = 0 }
};
@@ -150,12 +115,12 @@ static struct ctl_table netns_core_table[] = {
.data = &init_net.core.sysctl_somaxconn,
.maxlen = sizeof(int),
.mode = 0644,
- .proc_handler = &proc_dointvec
+ .proc_handler = proc_dointvec
},
{ .ctl_name = 0 }
};
-static __net_initdata struct ctl_path net_core_path[] = {
+__net_initdata struct ctl_path net_core_path[] = {
{ .procname = "net", .ctl_name = CTL_NET, },
{ .procname = "core", .ctl_name = NET_CORE, },
{ },
@@ -207,8 +172,11 @@ static __net_initdata struct pernet_operations sysctl_core_ops = {
static __init int sysctl_core_init(void)
{
+ static struct ctl_table empty[1];
+
+ register_sysctl_paths(net_core_path, empty);
register_net_sysctl_rotable(net_core_path, net_core_table);
return register_pernet_subsys(&sysctl_core_ops);
}
-__initcall(sysctl_core_init);
+fs_initcall(sysctl_core_init);