aboutsummaryrefslogtreecommitdiff
path: root/net/core
diff options
context:
space:
mode:
Diffstat (limited to 'net/core')
-rw-r--r--net/core/Makefile1
-rw-r--r--net/core/datagram.c6
-rw-r--r--net/core/dev.c79
-rw-r--r--net/core/ethtool.c49
-rw-r--r--net/core/flow.c2
-rw-r--r--net/core/neighbour.c15
-rw-r--r--net/core/netfilter.c648
-rw-r--r--net/core/request_sock.c28
-rw-r--r--net/core/rtnetlink.c9
-rw-r--r--net/core/skbuff.c158
-rw-r--r--net/core/sock.c133
-rw-r--r--net/core/sysctl_net_core.c9
-rw-r--r--net/core/utils.c2
-rw-r--r--net/core/wireless.c8
14 files changed, 370 insertions, 777 deletions
diff --git a/net/core/Makefile b/net/core/Makefile
index f5f5e58943e..630da0f0579 100644
--- a/net/core/Makefile
+++ b/net/core/Makefile
@@ -12,7 +12,6 @@ obj-y += dev.o ethtool.o dev_mcast.o dst.o \
obj-$(CONFIG_XFRM) += flow.o
obj-$(CONFIG_SYSFS) += net-sysfs.o
-obj-$(CONFIG_NETFILTER) += netfilter.o
obj-$(CONFIG_NET_DIVERT) += dv.o
obj-$(CONFIG_NET_PKTGEN) += pktgen.o
obj-$(CONFIG_NET_RADIO) += wireless.o
diff --git a/net/core/datagram.c b/net/core/datagram.c
index fcee054b6f7..da9bf71421a 100644
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -43,7 +43,6 @@
#include <linux/errno.h>
#include <linux/sched.h>
#include <linux/inet.h>
-#include <linux/tcp.h>
#include <linux/netdevice.h>
#include <linux/rtnetlink.h>
#include <linux/poll.h>
@@ -51,9 +50,10 @@
#include <net/protocol.h>
#include <linux/skbuff.h>
-#include <net/sock.h>
-#include <net/checksum.h>
+#include <net/checksum.h>
+#include <net/sock.h>
+#include <net/tcp_states.h>
/*
* Is a socket 'connection oriented' ?
diff --git a/net/core/dev.c b/net/core/dev.c
index faf59b02c4b..c01511e3d0c 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -267,10 +267,6 @@ void dev_add_pack(struct packet_type *pt)
spin_unlock_bh(&ptype_lock);
}
-extern void linkwatch_run_queue(void);
-
-
-
/**
* __dev_remove_pack - remove packet handler
* @pt: packet type declaration
@@ -1009,13 +1005,22 @@ void net_disable_timestamp(void)
atomic_dec(&netstamp_needed);
}
-static inline void net_timestamp(struct timeval *stamp)
+void __net_timestamp(struct sk_buff *skb)
+{
+ struct timeval tv;
+
+ do_gettimeofday(&tv);
+ skb_set_timestamp(skb, &tv);
+}
+EXPORT_SYMBOL(__net_timestamp);
+
+static inline void net_timestamp(struct sk_buff *skb)
{
if (atomic_read(&netstamp_needed))
- do_gettimeofday(stamp);
+ __net_timestamp(skb);
else {
- stamp->tv_sec = 0;
- stamp->tv_usec = 0;
+ skb->tstamp.off_sec = 0;
+ skb->tstamp.off_usec = 0;
}
}
@@ -1027,7 +1032,8 @@ static inline void net_timestamp(struct timeval *stamp)
void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
{
struct packet_type *ptype;
- net_timestamp(&skb->stamp);
+
+ net_timestamp(skb);
rcu_read_lock();
list_for_each_entry_rcu(ptype, &ptype_all, list) {
@@ -1058,7 +1064,7 @@ void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
skb2->h.raw = skb2->nh.raw;
skb2->pkt_type = PACKET_OUTGOING;
- ptype->func(skb2, skb->dev, ptype);
+ ptype->func(skb2, skb->dev, ptype, skb->dev);
}
}
rcu_read_unlock();
@@ -1123,8 +1129,6 @@ static inline int illegal_highdma(struct net_device *dev, struct sk_buff *skb)
#define illegal_highdma(dev, skb) (0)
#endif
-extern void skb_release_data(struct sk_buff *);
-
/* Keep head the same: replace data */
int __skb_linearize(struct sk_buff *skb, unsigned int __nocast gfp_mask)
{
@@ -1379,8 +1383,8 @@ int netif_rx(struct sk_buff *skb)
if (netpoll_rx(skb))
return NET_RX_DROP;
- if (!skb->stamp.tv_sec)
- net_timestamp(&skb->stamp);
+ if (!skb->tstamp.off_sec)
+ net_timestamp(skb);
/*
* The code is rearranged so that the path is the most
@@ -1425,14 +1429,14 @@ int netif_rx_ni(struct sk_buff *skb)
EXPORT_SYMBOL(netif_rx_ni);
-static __inline__ void skb_bond(struct sk_buff *skb)
+static inline struct net_device *skb_bond(struct sk_buff *skb)
{
struct net_device *dev = skb->dev;
- if (dev->master) {
- skb->real_dev = skb->dev;
+ if (dev->master)
skb->dev = dev->master;
- }
+
+ return dev;
}
static void net_tx_action(struct softirq_action *h)
@@ -1482,10 +1486,11 @@ static void net_tx_action(struct softirq_action *h)
}
static __inline__ int deliver_skb(struct sk_buff *skb,
- struct packet_type *pt_prev)
+ struct packet_type *pt_prev,
+ struct net_device *orig_dev)
{
atomic_inc(&skb->users);
- return pt_prev->func(skb, skb->dev, pt_prev);
+ return pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
}
#if defined(CONFIG_BRIDGE) || defined (CONFIG_BRIDGE_MODULE)
@@ -1496,7 +1501,8 @@ struct net_bridge_fdb_entry *(*br_fdb_get_hook)(struct net_bridge *br,
void (*br_fdb_put_hook)(struct net_bridge_fdb_entry *ent);
static __inline__ int handle_bridge(struct sk_buff **pskb,
- struct packet_type **pt_prev, int *ret)
+ struct packet_type **pt_prev, int *ret,
+ struct net_device *orig_dev)
{
struct net_bridge_port *port;
@@ -1505,14 +1511,14 @@ static __inline__ int handle_bridge(struct sk_buff **pskb,
return 0;
if (*pt_prev) {
- *ret = deliver_skb(*pskb, *pt_prev);
+ *ret = deliver_skb(*pskb, *pt_prev, orig_dev);
*pt_prev = NULL;
}
return br_handle_frame_hook(port, pskb);
}
#else
-#define handle_bridge(skb, pt_prev, ret) (0)
+#define handle_bridge(skb, pt_prev, ret, orig_dev) (0)
#endif
#ifdef CONFIG_NET_CLS_ACT
@@ -1534,17 +1540,14 @@ static int ing_filter(struct sk_buff *skb)
__u32 ttl = (__u32) G_TC_RTTL(skb->tc_verd);
if (MAX_RED_LOOP < ttl++) {
printk("Redir loop detected Dropping packet (%s->%s)\n",
- skb->input_dev?skb->input_dev->name:"??",skb->dev->name);
+ skb->input_dev->name, skb->dev->name);
return TC_ACT_SHOT;
}
skb->tc_verd = SET_TC_RTTL(skb->tc_verd,ttl);
skb->tc_verd = SET_TC_AT(skb->tc_verd,AT_INGRESS);
- if (NULL == skb->input_dev) {
- skb->input_dev = skb->dev;
- printk("ing_filter: fixed %s out %s\n",skb->input_dev->name,skb->dev->name);
- }
+
spin_lock(&dev->ingress_lock);
if ((q = dev->qdisc_ingress) != NULL)
result = q->enqueue(skb, q);
@@ -1559,6 +1562,7 @@ static int ing_filter(struct sk_buff *skb)
int netif_receive_skb(struct sk_buff *skb)
{
struct packet_type *ptype, *pt_prev;
+ struct net_device *orig_dev;
int ret = NET_RX_DROP;
unsigned short type;
@@ -1566,10 +1570,13 @@ int netif_receive_skb(struct sk_buff *skb)
if (skb->dev->poll && netpoll_rx(skb))
return NET_RX_DROP;
- if (!skb->stamp.tv_sec)
- net_timestamp(&skb->stamp);
+ if (!skb->tstamp.off_sec)
+ net_timestamp(skb);
+
+ if (!skb->input_dev)
+ skb->input_dev = skb->dev;
- skb_bond(skb);
+ orig_dev = skb_bond(skb);
__get_cpu_var(netdev_rx_stat).total++;
@@ -1590,14 +1597,14 @@ int netif_receive_skb(struct sk_buff *skb)
list_for_each_entry_rcu(ptype, &ptype_all, list) {
if (!ptype->dev || ptype->dev == skb->dev) {
if (pt_prev)
- ret = deliver_skb(skb, pt_prev);
+ ret = deliver_skb(skb, pt_prev, orig_dev);
pt_prev = ptype;
}
}
#ifdef CONFIG_NET_CLS_ACT
if (pt_prev) {
- ret = deliver_skb(skb, pt_prev);
+ ret = deliver_skb(skb, pt_prev, orig_dev);
pt_prev = NULL; /* noone else should process this after*/
} else {
skb->tc_verd = SET_TC_OK2MUNGE(skb->tc_verd);
@@ -1616,7 +1623,7 @@ ncls:
handle_diverter(skb);
- if (handle_bridge(&skb, &pt_prev, &ret))
+ if (handle_bridge(&skb, &pt_prev, &ret, orig_dev))
goto out;
type = skb->protocol;
@@ -1624,13 +1631,13 @@ ncls:
if (ptype->type == type &&
(!ptype->dev || ptype->dev == skb->dev)) {
if (pt_prev)
- ret = deliver_skb(skb, pt_prev);
+ ret = deliver_skb(skb, pt_prev, orig_dev);
pt_prev = ptype;
}
}
if (pt_prev) {
- ret = pt_prev->func(skb, skb->dev, pt_prev);
+ ret = pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
} else {
kfree_skb(skb);
/* Jamal, now you will not able to escape explaining
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index a3eeb88e1c8..289c1b5a8e4 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -81,6 +81,18 @@ int ethtool_op_set_tso(struct net_device *dev, u32 data)
return 0;
}
+int ethtool_op_get_perm_addr(struct net_device *dev, struct ethtool_perm_addr *addr, u8 *data)
+{
+ unsigned char len = dev->addr_len;
+ if ( addr->size < len )
+ return -ETOOSMALL;
+
+ addr->size = len;
+ memcpy(data, dev->perm_addr, len);
+ return 0;
+}
+
+
/* Handlers for each ethtool command */
static int ethtool_get_settings(struct net_device *dev, void __user *useraddr)
@@ -683,6 +695,39 @@ static int ethtool_get_stats(struct net_device *dev, void __user *useraddr)
return ret;
}
+static int ethtool_get_perm_addr(struct net_device *dev, void *useraddr)
+{
+ struct ethtool_perm_addr epaddr;
+ u8 *data;
+ int ret;
+
+ if (!dev->ethtool_ops->get_perm_addr)
+ return -EOPNOTSUPP;
+
+ if (copy_from_user(&epaddr,useraddr,sizeof(epaddr)))
+ return -EFAULT;
+
+ data = kmalloc(epaddr.size, GFP_USER);
+ if (!data)
+ return -ENOMEM;
+
+ ret = dev->ethtool_ops->get_perm_addr(dev,&epaddr,data);
+ if (ret)
+ return ret;
+
+ ret = -EFAULT;
+ if (copy_to_user(useraddr, &epaddr, sizeof(epaddr)))
+ goto out;
+ useraddr += sizeof(epaddr);
+ if (copy_to_user(useraddr, data, epaddr.size))
+ goto out;
+ ret = 0;
+
+ out:
+ kfree(data);
+ return ret;
+}
+
/* The main entry point in this file. Called from net/core/dev.c */
int dev_ethtool(struct ifreq *ifr)
@@ -806,6 +851,9 @@ int dev_ethtool(struct ifreq *ifr)
case ETHTOOL_GSTATS:
rc = ethtool_get_stats(dev, useraddr);
break;
+ case ETHTOOL_GPERMADDR:
+ rc = ethtool_get_perm_addr(dev, useraddr);
+ break;
default:
rc = -EOPNOTSUPP;
}
@@ -826,6 +874,7 @@ int dev_ethtool(struct ifreq *ifr)
EXPORT_SYMBOL(dev_ethtool);
EXPORT_SYMBOL(ethtool_op_get_link);
+EXPORT_SYMBOL_GPL(ethtool_op_get_perm_addr);
EXPORT_SYMBOL(ethtool_op_get_sg);
EXPORT_SYMBOL(ethtool_op_get_tso);
EXPORT_SYMBOL(ethtool_op_get_tx_csum);
diff --git a/net/core/flow.c b/net/core/flow.c
index f289570b15a..7e95b39de9f 100644
--- a/net/core/flow.c
+++ b/net/core/flow.c
@@ -42,7 +42,7 @@ static DEFINE_PER_CPU(struct flow_cache_entry **, flow_tables) = { NULL };
#define flow_table(cpu) (per_cpu(flow_tables, cpu))
-static kmem_cache_t *flow_cachep;
+static kmem_cache_t *flow_cachep __read_mostly;
static int flow_lwm, flow_hwm;
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 1beb782ac41..39fc55edf69 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -1217,7 +1217,7 @@ static void neigh_proxy_process(unsigned long arg)
while (skb != (struct sk_buff *)&tbl->proxy_queue) {
struct sk_buff *back = skb;
- long tdif = back->stamp.tv_usec - now;
+ long tdif = NEIGH_CB(back)->sched_next - now;
skb = skb->next;
if (tdif <= 0) {
@@ -1248,8 +1248,9 @@ void pneigh_enqueue(struct neigh_table *tbl, struct neigh_parms *p,
kfree_skb(skb);
return;
}
- skb->stamp.tv_sec = LOCALLY_ENQUEUED;
- skb->stamp.tv_usec = sched_next;
+
+ NEIGH_CB(skb)->sched_next = sched_next;
+ NEIGH_CB(skb)->flags |= LOCALLY_ENQUEUED;
spin_lock(&tbl->proxy_queue.lock);
if (del_timer(&tbl->proxy_timer)) {
@@ -2342,8 +2343,8 @@ void neigh_app_ns(struct neighbour *n)
}
nlh = (struct nlmsghdr *)skb->data;
nlh->nlmsg_flags = NLM_F_REQUEST;
- NETLINK_CB(skb).dst_groups = RTMGRP_NEIGH;
- netlink_broadcast(rtnl, skb, 0, RTMGRP_NEIGH, GFP_ATOMIC);
+ NETLINK_CB(skb).dst_group = RTNLGRP_NEIGH;
+ netlink_broadcast(rtnl, skb, 0, RTNLGRP_NEIGH, GFP_ATOMIC);
}
static void neigh_app_notify(struct neighbour *n)
@@ -2360,8 +2361,8 @@ static void neigh_app_notify(struct neighbour *n)
return;
}
nlh = (struct nlmsghdr *)skb->data;
- NETLINK_CB(skb).dst_groups = RTMGRP_NEIGH;
- netlink_broadcast(rtnl, skb, 0, RTMGRP_NEIGH, GFP_ATOMIC);
+ NETLINK_CB(skb).dst_group = RTNLGRP_NEIGH;
+ netlink_broadcast(rtnl, skb, 0, RTNLGRP_NEIGH, GFP_ATOMIC);
}
#endif /* CONFIG_ARPD */
diff --git a/net/core/netfilter.c b/net/core/netfilter.c
deleted file mode 100644
index 076c156d5ed..00000000000
--- a/net/core/netfilter.c
+++ /dev/null
@@ -1,648 +0,0 @@
-/* netfilter.c: look after the filters for various protocols.
- * Heavily influenced by the old firewall.c by David Bonn and Alan Cox.
- *
- * Thanks to Rob `CmdrTaco' Malda for not influencing this code in any
- * way.
- *
- * Rusty Russell (C)2000 -- This code is GPL.
- *
- * February 2000: Modified by James Morris to have 1 queue per protocol.
- * 15-Mar-2000: Added NF_REPEAT --RR.
- * 08-May-2003: Internal logging interface added by Jozsef Kadlecsik.
- */
-#include <linux/config.h>
-#include <linux/kernel.h>
-#include <linux/netfilter.h>
-#include <net/protocol.h>
-#include <linux/init.h>
-#include <linux/skbuff.h>
-#include <linux/wait.h>
-#include <linux/module.h>
-#include <linux/interrupt.h>
-#include <linux/if.h>
-#include <linux/netdevice.h>
-#include <linux/inetdevice.h>
-#include <linux/tcp.h>
-#include <linux/udp.h>
-#include <linux/icmp.h>
-#include <net/sock.h>
-#include <net/route.h>
-#include <linux/ip.h>
-
-/* In this code, we can be waiting indefinitely for userspace to
- * service a packet if a hook returns NF_QUEUE. We could keep a count
- * of skbuffs queued for userspace, and not deregister a hook unless
- * this is zero, but that sucks. Now, we simply check when the
- * packets come back: if the hook is gone, the packet is discarded. */
-#ifdef CONFIG_NETFILTER_DEBUG
-#define NFDEBUG(format, args...) printk(format , ## args)
-#else
-#define NFDEBUG(format, args...)
-#endif
-
-/* Sockopts only registered and called from user context, so
- net locking would be overkill. Also, [gs]etsockopt calls may
- sleep. */
-static DECLARE_MUTEX(nf_sockopt_mutex);
-
-struct list_head nf_hooks[NPROTO][NF_MAX_HOOKS];
-static LIST_HEAD(nf_sockopts);
-static DEFINE_SPINLOCK(nf_hook_lock);
-
-/*
- * A queue handler may be registered for each protocol. Each is protected by
- * long term mutex. The handler must provide an an outfn() to accept packets
- * for queueing and must reinject all packets it receives, no matter what.
- */
-static struct nf_queue_handler_t {
- nf_queue_outfn_t outfn;
- void *data;
-} queue_handler[NPROTO];
-static DEFINE_RWLOCK(queue_handler_lock);
-
-int nf_register_hook(struct nf_hook_ops *reg)
-{
- struct list_head *i;
-
- spin_lock_bh(&nf_hook_lock);
- list_for_each(i, &nf_hooks[reg->pf][reg->hooknum]) {
- if (reg->priority < ((struct nf_hook_ops *)i)->priority)
- break;
- }
- list_add_rcu(&reg->list, i->prev);
- spin_unlock_bh(&nf_hook_lock);
-
- synchronize_net();
- return 0;
-}
-
-void nf_unregister_hook(struct nf_hook_ops *reg)
-{
- spin_lock_bh(&nf_hook_lock);
- list_del_rcu(&reg->list);
- spin_unlock_bh(&nf_hook_lock);
-
- synchronize_net();
-}
-
-/* Do exclusive ranges overlap? */
-static inline int overlap(int min1, int max1, int min2, int max2)
-{
- return max1 > min2 && min1 < max2;
-}
-
-/* Functions to register sockopt ranges (exclusive). */
-int nf_register_sockopt(struct nf_sockopt_ops *reg)
-{
- struct list_head *i;
- int ret = 0;
-
- if (down_interruptible(&nf_sockopt_mutex) != 0)
- return -EINTR;
-
- list_for_each(i, &nf_sockopts) {
- struct nf_sockopt_ops *ops = (struct nf_sockopt_ops *)i;
- if (ops->pf == reg->pf
- && (overlap(ops->set_optmin, ops->set_optmax,
- reg->set_optmin, reg->set_optmax)
- || overlap(ops->get_optmin, ops->get_optmax,
- reg->get_optmin, reg->get_optmax))) {
- NFDEBUG("nf_sock overlap: %u-%u/%u-%u v %u-%u/%u-%u\n",
- ops->set_optmin, ops->set_optmax,
- ops->get_optmin, ops->get_optmax,
- reg->set_optmin, reg->set_optmax,
- reg->get_optmin, reg->get_optmax);
- ret = -EBUSY;
- goto out;
- }
- }
-
- list_add(&reg->list, &nf_sockopts);
-out:
- up(&nf_sockopt_mutex);
- return ret;
-}
-
-void nf_unregister_sockopt(struct nf_sockopt_ops *reg)
-{
- /* No point being interruptible: we're probably in cleanup_module() */
- restart:
- down(&nf_sockopt_mutex);
- if (reg->use != 0) {
- /* To be woken by nf_sockopt call... */
- /* FIXME: Stuart Young's name appears gratuitously. */
- set_current_state(TASK_UNINTERRUPTIBLE);
- reg->cleanup_task = current;
- up(&nf_sockopt_mutex);
- schedule();
- goto restart;
- }
- list_del(&reg->list);
- up(&nf_sockopt_mutex);
-}
-
-/* Call get/setsockopt() */
-static int nf_sockopt(struct sock *sk, int pf, int val,
- char __user *opt, int *len, int get)
-{
- struct list_head *i;
- struct nf_sockopt_ops *ops;
- int ret;
-
- if (down_interruptible(&nf_sockopt_mutex) != 0)
- return -EINTR;
-
- list_for_each(i, &nf_sockopts) {
- ops = (struct nf_sockopt_ops *)i;
- if (ops->pf == pf) {
- if (get) {
- if (val >= ops->get_optmin
- && val < ops->get_optmax) {
- ops->use++;
- up(&nf_sockopt_mutex);
- ret = ops->get(sk, val, opt, len);
- goto out;
- }
- } else {
- if (val >= ops->set_optmin
- && val < ops->set_optmax) {
- ops->use++;
- up(&nf_sockopt_mutex);
- ret = ops->set(sk, val, opt, *len);
- goto out;
- }
- }
- }
- }
- up(&nf_sockopt_mutex);
- return -ENOPROTOOPT;
-
- out:
- down(&nf_sockopt_mutex);
- ops->use--;
- if (ops->cleanup_task)
- wake_up_process(ops->cleanup_task);
- up(&nf_sockopt_mutex);
- return ret;
-}
-
-int nf_setsockopt(struct sock *sk, int pf, int val, char __user *opt,
- int len)
-{
- return nf_sockopt(sk, pf, val, opt, &len, 0);
-}
-
-int nf_getsockopt(struct sock *sk, int pf, int val, char __user *opt, int *len)
-{
- return nf_sockopt(sk, pf, val, opt, len, 1);
-}
-
-static unsigned int nf_iterate(struct list_head *head,
- struct sk_buff **skb,
- int hook,
- const struct net_device *indev,
- const struct net_device *outdev,
- struct list_head **i,
- int (*okfn)(struct sk_buff *),
- int hook_thresh)
-{
- unsigned int verdict;
-
- /*
- * The caller must not block between calls to this
- * function because of risk of continuing from deleted element.
- */
- list_for_each_continue_rcu(*i, head) {
- struct nf_hook_ops *elem = (struct nf_hook_ops *)*i;
-
- if (hook_thresh > elem->priority)
- continue;
-
- /* Optimization: we don't need to hold module
- reference here, since function can't sleep. --RR */
- verdict = elem->hook(hook, skb, indev, outdev, okfn);
- if (verdict != NF_ACCEPT) {
-#ifdef CONFIG_NETFILTER_DEBUG
- if (unlikely(verdict > NF_MAX_VERDICT)) {
- NFDEBUG("Evil return from %p(%u).\n",
- elem->hook, hook);
- continue;
- }
-#endif
- if (verdict != NF_REPEAT)
- return verdict;
- *i = (*i)->prev;
- }
- }
- return NF_ACCEPT;
-}
-
-int nf_register_queue_handler(int pf, nf_queue_outfn_t outfn, void *data)
-{
- int ret;
-
- write_lock_bh(&queue_handler_lock);
- if (queue_handler[pf].outfn)
- ret = -EBUSY;
- else {
- queue_handler[pf].outfn = outfn;
- queue_handler[pf].data = data;
- ret = 0;
- }
- write_unlock_bh(&queue_handler_lock);
-
- return ret;
-}
-
-/* The caller must flush their queue before this */
-int nf_unregister_queue_handler(int pf)
-{
- write_lock_bh(&queue_handler_lock);
- queue_handler[pf].outfn = NULL;
- queue_handler[pf].data = NULL;
- write_unlock_bh(&queue_handler_lock);
-
- return 0;
-}
-
-/*
- * Any packet that leaves via this function must come back
- * through nf_reinject().
- */
-static int nf_queue(struct sk_buff *skb,
- struct list_head *elem,
- int pf, unsigned int hook,
- struct net_device *indev,
- struct net_device *outdev,
- int (*okfn)(struct sk_buff *))
-{
- int status;
- struct nf_info *info;
-#ifdef CONFIG_BRIDGE_NETFILTER
- struct net_device *physindev = NULL;
- struct net_device *physoutdev = NULL;
-#endif
-
- /* QUEUE == DROP if noone is waiting, to be safe. */
- read_lock(&queue_handler_lock);
- if (!queue_handler[pf].outfn) {
- read_unlock(&queue_handler_lock);
- kfree_skb(skb);
- return 1;
- }
-
- info = kmalloc(sizeof(*info), GFP_ATOMIC);
- if (!info) {
- if (net_ratelimit())
- printk(KERN_ERR "OOM queueing packet %p\n",
- skb);
- read_unlock(&queue_handler_lock);
- kfree_skb(skb);
- return 1;
- }
-
- *info = (struct nf_info) {
- (struct nf_hook_ops *)elem, pf, hook, indev, outdev, okfn };
-
- /* If it's going away, ignore hook. */
- if (!try_module_get(info->elem->owner)) {
- read_unlock(&queue_handler_lock);
- kfree(info);
- return 0;
- }
-
- /* Bump dev refs so they don't vanish while packet is out */
- if (indev) dev_hold(indev);
- if (outdev) dev_hold(outdev);
-
-#ifdef CONFIG_BRIDGE_NETFILTER
- if (skb->nf_bridge) {
- physindev = skb->nf_bridge->physindev;
- if (physindev) dev_hold(physindev);
- physoutdev = skb->nf_bridge->physoutdev;
- if (physoutdev) dev_hold(physoutdev);
- }
-#endif
-
- status = queue_handler[pf].outfn(skb, info, queue_handler[pf].data);
- read_unlock(&queue_handler_lock);
-
- if (status < 0) {
- /* James M doesn't say fuck enough. */
- if (indev) dev_put(indev);
- if (outdev) dev_put(outdev);
-#ifdef CONFIG_BRIDGE_NETFILTER
- if (physindev) dev_put(physindev);
- if (physoutdev) dev_put(physoutdev);
-#endif
- module_put(info->elem->owner);
- kfree(info);
- kfree_skb(skb);
- return 1;
- }
- return 1;
-}
-
-/* Returns 1 if okfn() needs to be executed by the caller,
- * -EPERM for NF_DROP, 0 otherwise. */
-int nf_hook_slow(int pf, unsigned int hook, struct sk_buff **pskb,
- struct net_device *indev,
- struct net_device *outdev,
- int (*okfn)(struct sk_buff *),
- int hook_thresh)
-{
- struct list_head *elem;
- unsigned int verdict;
- int ret = 0;
-
- /* We may already have this, but read-locks nest anyway */
- rcu_read_lock();
-
- elem = &nf_hooks[pf][hook];
-next_hook:
- verdict = nf_iterate(&nf_hooks[pf][hook], pskb, hook, indev,
- outdev, &elem, okfn, hook_thresh);
- if (verdict == NF_ACCEPT || verdict == NF_STOP) {
- ret = 1;
- goto unlock;
- } else if (verdict == NF_DROP) {
- kfree_skb(*pskb);
- ret = -EPERM;
- } else if (verdict == NF_QUEUE) {
- NFDEBUG("nf_hook: Verdict = QUEUE.\n");
- if (!nf_queue(*pskb, elem, pf, hook, indev, outdev, okfn))
- goto next_hook;
- }
-unlock:
- rcu_read_unlock();
- return ret;
-}
-
-void nf_reinject(struct sk_buff *skb, struct nf_info *info,
- unsigned int verdict)
-{
- struct list_head *elem = &info->elem->list;
- struct list_head *i;
-
- rcu_read_lock();
-
- /* Release those devices we held, or Alexey will kill me. */
- if (info->indev) dev_put(info->indev);
- if (info->outdev) dev_put(info->outdev);
-#ifdef CONFIG_BRIDGE_NETFILTER
- if (skb->nf_bridge) {
- if (skb->nf_bridge->physindev)
- dev_put(skb->nf_bridge->physindev);
- if (skb->nf_bridge->physoutdev)
- dev_put(skb->nf_bridge->physoutdev);
- }
-#endif
-
- /* Drop reference to owner of hook which queued us. */
- module_put(info->elem->owner);
-
- list_for_each_rcu(i, &nf_hooks[info->pf][info->hook]) {
- if (i == elem)
- break;
- }
-
- if (elem == &nf_hooks[info->pf][info->hook]) {
- /* The module which sent it to userspace is gone. */
- NFDEBUG("%s: module disappeared, dropping packet.\n",
- __FUNCTION__);
- verdict = NF_DROP;
- }
-
- /* Continue traversal iff userspace said ok... */
- if (verdict == NF_REPEAT) {
- elem = elem->prev;
- verdict = NF_ACCEPT;
- }
-
- if (verdict == NF_ACCEPT) {
- next_hook:
- verdict = nf_iterate(&nf_hooks[info->pf][info->hook],
- &skb, info->hook,
- info->indev, info->outdev, &elem,
- info->okfn, INT_MIN);
- }
-
- switch (verdict) {
- case NF_ACCEPT:
- info->okfn(skb);
- break;
-
- case NF_QUEUE:
- if (!nf_queue(skb, elem, info->pf, info->hook,
- info->indev, info->outdev, info->okfn))
- goto next_hook;
- break;
- }
- rcu_read_unlock();
-
- if (verdict == NF_DROP)
- kfree_skb(skb);
-
- kfree(info);
- return;
-}
-
-#ifdef CONFIG_INET
-/* route_me_harder function, used by iptable_nat, iptable_mangle + ip_queue */
-int ip_route_me_harder(struct sk_buff **pskb)
-{
- struct iphdr *iph = (*pskb)->nh.iph;
- struct rtable *rt;
- struct flowi fl = {};
- struct dst_entry *odst;
- unsigned int hh_len;
-
- /* some non-standard hacks like ipt_REJECT.c:send_reset() can cause
- * packets with foreign saddr to appear on the NF_IP_LOCAL_OUT hook.
- */
- if (inet_addr_type(iph->saddr) == RTN_LOCAL) {
- fl.nl_u.ip4_u.daddr = iph->daddr;
- fl.nl_u.ip4_u.saddr = iph->saddr;
- fl.nl_u.ip4_u.tos = RT_TOS(iph->tos);
- fl.oif = (*pskb)->sk ? (*pskb)->sk->sk_bound_dev_if : 0;
-#ifdef CONFIG_IP_ROUTE_FWMARK
- fl.nl_u.ip4_u.fwmark = (*pskb)->nfmark;
-#endif
- fl.proto = iph->protocol;
- if (ip_route_output_key(&rt, &fl) != 0)
- return -1;
-
- /* Drop old route. */
- dst_release((*pskb)->dst);
- (*pskb)->dst = &rt->u.dst;
- } else {
- /* non-local src, find valid iif to satisfy
- * rp-filter when calling ip_route_input. */
- fl.nl_u.ip4_u.daddr = iph->saddr;
- if (ip_route_output_key(&rt, &fl) != 0)
- return -1;
-
- odst = (*pskb)->dst;
- if (ip_route_input(*pskb, iph->daddr, iph->saddr,
- RT_TOS(iph->tos), rt->u.dst.dev) != 0) {
- dst_release(&rt->u.dst);
- return -1;
- }
- dst_release(&rt->u.dst);
- dst_release(odst);
- }
-
- if ((*pskb)->dst->error)
- return -1;
-
- /* Change in oif may mean change in hh_len. */
- hh_len = (*pskb)->dst->dev->hard_header_len;
- if (skb_headroom(*pskb) < hh_len) {
- struct sk_buff *nskb;
-
- nskb = skb_realloc_headroom(*pskb, hh_len);
- if (!nskb)
- return -1;
- if ((*pskb)->sk)
- skb_set_owner_w(nskb, (*pskb)->sk);
- kfree_skb(*pskb);
- *pskb = nskb;
- }
-
- return 0;
-}
-EXPORT_SYMBOL(ip_route_me_harder);
-
-int skb_ip_make_writable(struct sk_buff **pskb, unsigned int writable_len)
-{
- struct sk_buff *nskb;
-
- if (writable_len > (*pskb)->len)
- return 0;
-
- /* Not exclusive use of packet? Must copy. */
- if (skb_shared(*pskb) || skb_cloned(*pskb))
- goto copy_skb;
-
- return pskb_may_pull(*pskb, writable_len);
-
-copy_skb:
- nskb = skb_copy(*pskb, GFP_ATOMIC);
- if (!nskb)
- return 0;
- BUG_ON(skb_is_nonlinear(nskb));
-
- /* Rest of kernel will get very unhappy if we pass it a
- suddenly-orphaned skbuff */
- if ((*pskb)->sk)
- skb_set_owner_w(nskb, (*pskb)->sk);
- kfree_skb(*pskb);
- *pskb = nskb;
- return 1;
-}
-EXPORT_SYMBOL(skb_ip_make_writable);
-#endif /*CONFIG_INET*/
-
-/* Internal logging interface, which relies on the real
- LOG target modules */
-
-#define NF_LOG_PREFIXLEN 128
-
-static nf_logfn *nf_logging[NPROTO]; /* = NULL */
-static int reported = 0;
-static DEFINE_SPINLOCK(nf_log_lock);
-
-int nf_log_register(int pf, nf_logfn *logfn)
-{
- int ret = -EBUSY;
-
- /* Any setup of logging members must be done before
- * substituting pointer. */
- spin_lock(&nf_log_lock);
- if (!nf_logging[pf]) {
- rcu_assign_pointer(nf_logging[pf], logfn);
- ret = 0;
- }
- spin_unlock(&nf_log_lock);
- return ret;
-}
-
-void nf_log_unregister(int pf, nf_logfn *logfn)
-{
- spin_lock(&nf_log_lock);
- if (nf_logging[pf] == logfn)
- nf_logging[pf] = NULL;
- spin_unlock(&nf_log_lock);
-
- /* Give time to concurrent readers. */
- synchronize_net();
-}
-
-void nf_log_packet(int pf,
- unsigned int hooknum,
- const struct sk_buff *skb,
- const struct net_device *in,
- const struct net_device *out,
- const char *fmt, ...)
-{
- va_list args;
- char prefix[NF_LOG_PREFIXLEN];
- nf_logfn *logfn;
-
- rcu_read_lock();
- logfn = rcu_dereference(nf_logging[pf]);
- if (logfn) {
- va_start(args, fmt);
- vsnprintf(prefix, sizeof(prefix), fmt, args);
- va_end(args);
- /* We must read logging before nf_logfn[pf] */
- logfn(hooknum, skb, in, out, prefix);
- } else if (!reported) {
- printk(KERN_WARNING "nf_log_packet: can\'t log yet, "
- "no backend logging module loaded in!\n");
- reported++;
- }
- rcu_read_unlock();
-}
-EXPORT_SYMBOL(nf_log_register);
-EXPORT_SYMBOL(nf_log_unregister);
-EXPORT_SYMBOL(nf_log_packet);
-
-/* This does not belong here, but locally generated errors need it if connection
- tracking in use: without this, connection may not be in hash table, and hence
- manufactured ICMP or RST packets will not be associated with it. */
-void (*ip_ct_attach)(struct sk_buff *, struct sk_buff *);
-
-void nf_ct_attach(struct sk_buff *new, struct sk_buff *skb)
-{
- void (*attach)(struct sk_buff *, struct sk_buff *);
-
- if (skb->nfct && (attach = ip_ct_attach) != NULL) {
- mb(); /* Just to be sure: must be read before executing this */
- attach(new, skb);
- }
-}
-
-void __init netfilter_init(void)
-{
- int i, h;
-
- for (i = 0; i < NPROTO; i++) {
- for (h = 0; h < NF_MAX_HOOKS; h++)
- INIT_LIST_HEAD(&nf_hooks[i][h]);
- }
-}
-
-EXPORT_SYMBOL(ip_ct_attach);
-EXPORT_SYMBOL(nf_ct_attach);
-EXPORT_SYMBOL(nf_getsockopt);
-EXPORT_SYMBOL(nf_hook_slow);
-EXPORT_SYMBOL(nf_hooks);
-EXPORT_SYMBOL(nf_register_hook);
-EXPORT_SYMBOL(nf_register_queue_handler);
-EXPORT_SYMBOL(nf_register_sockopt);
-EXPORT_SYMBOL(nf_reinject);
-EXPORT_SYMBOL(nf_setsockopt);
-EXPORT_SYMBOL(nf_unregister_hook);
-EXPORT_SYMBOL(nf_unregister_queue_handler);
-EXPORT_SYMBOL(nf_unregister_sockopt);
diff --git a/net/core/request_sock.c b/net/core/request_sock.c
index bb55675f068..b8203de5ff0 100644
--- a/net/core/request_sock.c
+++ b/net/core/request_sock.c
@@ -32,7 +32,6 @@
* Further increasing requires to change hash table size.
*/
int sysctl_max_syn_backlog = 256;
-EXPORT_SYMBOL(sysctl_max_syn_backlog);
int reqsk_queue_alloc(struct request_sock_queue *queue,
const int nr_table_entries)
@@ -53,6 +52,8 @@ int reqsk_queue_alloc(struct request_sock_queue *queue,
get_random_bytes(&lopt->hash_rnd, sizeof(lopt->hash_rnd));
rwlock_init(&queue->syn_wait_lock);
queue->rskq_accept_head = queue->rskq_accept_head = NULL;
+ queue->rskq_defer_accept = 0;
+ lopt->nr_table_entries = nr_table_entries;
write_lock_bh(&queue->syn_wait_lock);
queue->listen_opt = lopt;
@@ -62,3 +63,28 @@ int reqsk_queue_alloc(struct request_sock_queue *queue,
}
EXPORT_SYMBOL(reqsk_queue_alloc);
+
+void reqsk_queue_destroy(struct request_sock_queue *queue)
+{
+ /* make all the listen_opt local to us */
+ struct listen_sock *lopt = reqsk_queue_yank_listen_sk(queue);
+
+ if (lopt->qlen != 0) {
+ int i;
+
+ for (i = 0; i < lopt->nr_table_entries; i++) {
+ struct request_sock *req;
+
+ while ((req = lopt->syn_table[i]) != NULL) {
+ lopt->syn_table[i] = req->dl_next;
+ lopt->qlen--;
+ reqsk_free(req);
+ }
+ }
+ }
+
+ BUG_TRAP(lopt->qlen == 0);
+ kfree(lopt);
+}
+
+EXPORT_SYMBOL(reqsk_queue_destroy);
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 4b1bb30e638..9bed7569ce3 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -148,7 +148,7 @@ int rtnetlink_send(struct sk_buff *skb, u32 pid, unsigned group, int echo)
{
int err = 0;
- NETLINK_CB(skb).dst_groups = group;
+ NETLINK_CB(skb).dst_group = group;
if (echo)
atomic_inc(&skb->users);
netlink_broadcast(rtnl, skb, pid, group, GFP_KERNEL);
@@ -458,8 +458,8 @@ void rtmsg_ifinfo(int type, struct net_device *dev, unsigned change)
kfree_skb(skb);
return;
}
- NETLINK_CB(skb).dst_groups = RTMGRP_LINK;
- netlink_broadcast(rtnl, skb, 0, RTMGRP_LINK, GFP_KERNEL);
+ NETLINK_CB(skb).dst_group = RTNLGRP_LINK;
+ netlink_broadcast(rtnl, skb, 0, RTNLGRP_LINK, GFP_KERNEL);
}
static int rtnetlink_done(struct netlink_callback *cb)
@@ -708,7 +708,8 @@ void __init rtnetlink_init(void)
if (!rta_buf)
panic("rtnetlink_init: cannot allocate rta_buf\n");
- rtnl = netlink_kernel_create(NETLINK_ROUTE, rtnetlink_rcv);
+ rtnl = netlink_kernel_create(NETLINK_ROUTE, RTNLGRP_MAX, rtnetlink_rcv,
+ THIS_MODULE);
if (rtnl == NULL)
panic("rtnetlink_init: cannot initialize rtnetlink\n");
netlink_set_nonroot(NETLINK_ROUTE, NL_NONROOT_RECV);
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 7eab867ede5..f80a2878561 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -68,7 +68,10 @@
#include <asm/uaccess.h>
#include <asm/system.h>
-static kmem_cache_t *skbuff_head_cache;
+static kmem_cache_t *skbuff_head_cache __read_mostly;
+static kmem_cache_t *skbuff_fclone_cache __read_mostly;
+
+struct timeval __read_mostly skb_tv_base;
/*
* Keep out-of-line to prevent kernel bloat.
@@ -118,7 +121,7 @@ void skb_under_panic(struct sk_buff *skb, int sz, void *here)
*/
/**
- * alloc_skb - allocate a network buffer
+ * __alloc_skb - allocate a network buffer
* @size: size to allocate
* @gfp_mask: allocation mask
*
@@ -129,14 +132,20 @@ void skb_under_panic(struct sk_buff *skb, int sz, void *here)
* Buffers may only be allocated from interrupts using a @gfp_mask of
* %GFP_ATOMIC.
*/
-struct sk_buff *alloc_skb(unsigned int size, unsigned int __nocast gfp_mask)
+struct sk_buff *__alloc_skb(unsigned int size, unsigned int __nocast gfp_mask,
+ int fclone)
{
struct sk_buff *skb;
u8 *data;
/* Get the HEAD */
- skb = kmem_cache_alloc(skbuff_head_cache,
- gfp_mask & ~__GFP_DMA);
+ if (fclone)
+ skb = kmem_cache_alloc(skbuff_fclone_cache,
+ gfp_mask & ~__GFP_DMA);
+ else
+ skb = kmem_cache_alloc(skbuff_head_cache,
+ gfp_mask & ~__GFP_DMA);
+
if (!skb)
goto out;
@@ -153,7 +162,15 @@ struct sk_buff *alloc_skb(unsigned int size, unsigned int __nocast gfp_mask)
skb->data = data;
skb->tail = data;
skb->end = data + size;
+ if (fclone) {
+ struct sk_buff *child = skb + 1;
+ atomic_t *fclone_ref = (atomic_t *) (child + 1);
+ skb->fclone = SKB_FCLONE_ORIG;
+ atomic_set(fclone_ref, 1);
+
+ child->fclone = SKB_FCLONE_UNAVAILABLE;
+ }
atomic_set(&(skb_shinfo(skb)->dataref), 1);
skb_shinfo(skb)->nr_frags = 0;
skb_shinfo(skb)->tso_size = 0;
@@ -266,8 +283,34 @@ void skb_release_data(struct sk_buff *skb)
*/
void kfree_skbmem(struct sk_buff *skb)
{
+ struct sk_buff *other;
+ atomic_t *fclone_ref;
+
skb_release_data(skb);
- kmem_cache_free(skbuff_head_cache, skb);
+ switch (skb->fclone) {
+ case SKB_FCLONE_UNAVAILABLE:
+ kmem_cache_free(skbuff_head_cache, skb);
+ break;
+
+ case SKB_FCLONE_ORIG:
+ fclone_ref = (atomic_t *) (skb + 2);
+ if (atomic_dec_and_test(fclone_ref))
+ kmem_cache_free(skbuff_fclone_cache, skb);
+ break;
+
+ case SKB_FCLONE_CLONE:
+ fclone_ref = (atomic_t *) (skb + 1);
+ other = skb - 1;
+
+ /* The clone portion is available for
+ * fast-cloning again.
+ */
+ skb->fclone = SKB_FCLONE_UNAVAILABLE;
+
+ if (atomic_dec_and_test(fclone_ref))
+ kmem_cache_free(skbuff_fclone_cache, other);
+ break;
+ };
}
/**
@@ -281,8 +324,6 @@ void kfree_skbmem(struct sk_buff *skb)
void __kfree_skb(struct sk_buff *skb)
{
- BUG_ON(skb->list != NULL);
-
dst_release(skb->dst);
#ifdef CONFIG_XFRM
secpath_put(skb->sp);
@@ -302,7 +343,6 @@ void __kfree_skb(struct sk_buff *skb)
skb->tc_index = 0;
#ifdef CONFIG_NET_CLS_ACT
skb->tc_verd = 0;
- skb->tc_classid = 0;
#endif
#endif
@@ -325,19 +365,27 @@ void __kfree_skb(struct sk_buff *skb)
struct sk_buff *skb_clone(struct sk_buff *skb, unsigned int __nocast gfp_mask)
{
- struct sk_buff *n = kmem_cache_alloc(skbuff_head_cache, gfp_mask);
-
- if (!n)
- return NULL;
+ struct sk_buff *n;
+
+ n = skb + 1;
+ if (skb->fclone == SKB_FCLONE_ORIG &&
+ n->fclone == SKB_FCLONE_UNAVAILABLE) {
+ atomic_t *fclone_ref = (atomic_t *) (n + 1);
+ n->fclone = SKB_FCLONE_CLONE;
+ atomic_inc(fclone_ref);
+ } else {
+ n = kmem_cache_alloc(skbuff_head_cache, gfp_mask);
+ if (!n)
+ return NULL;
+ n->fclone = SKB_FCLONE_UNAVAILABLE;
+ }
#define C(x) n->x = skb->x
n->next = n->prev = NULL;
- n->list = NULL;
n->sk = NULL;
- C(stamp);
+ C(tstamp);
C(dev);
- C(real_dev);
C(h);
C(nh);
C(mac);
@@ -361,7 +409,6 @@ struct sk_buff *skb_clone(struct sk_buff *skb, unsigned int __nocast gfp_mask)
n->destructor = NULL;
#ifdef CONFIG_NETFILTER
C(nfmark);
- C(nfcache);
C(nfct);
nf_conntrack_get(skb->nfct);
C(nfctinfo);
@@ -370,9 +417,6 @@ struct sk_buff *skb_clone(struct sk_buff *skb, unsigned int __nocast gfp_mask)
nf_bridge_get(skb->nf_bridge);
#endif
#endif /*CONFIG_NETFILTER*/
-#if defined(CONFIG_HIPPI)
- C(private);
-#endif
#ifdef CONFIG_NET_SCHED
C(tc_index);
#ifdef CONFIG_NET_CLS_ACT
@@ -380,7 +424,6 @@ struct sk_buff *skb_clone(struct sk_buff *skb, unsigned int __nocast gfp_mask)
n->tc_verd = CLR_TC_OK2MUNGE(n->tc_verd);
n->tc_verd = CLR_TC_MUNGED(n->tc_verd);
C(input_dev);
- C(tc_classid);
#endif
#endif
@@ -404,10 +447,8 @@ static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
*/
unsigned long offset = new->data - old->data;
- new->list = NULL;
new->sk = NULL;
new->dev = old->dev;
- new->real_dev = old->real_dev;
new->priority = old->priority;
new->protocol = old->protocol;
new->dst = dst_clone(old->dst);
@@ -419,12 +460,12 @@ static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
new->mac.raw = old->mac.raw + offset;
memcpy(new->cb, old->cb, sizeof(old->cb));
new->local_df = old->local_df;
+ new->fclone = SKB_FCLONE_UNAVAILABLE;
new->pkt_type = old->pkt_type;
- new->stamp = old->stamp;
+ new->tstamp = old->tstamp;
new->destructor = NULL;
#ifdef CONFIG_NETFILTER
new->nfmark = old->nfmark;
- new->nfcache = old->nfcache;
new->nfct = old->nfct;
nf_conntrack_get(old->nfct);
new->nfctinfo = old->nfctinfo;
@@ -1344,50 +1385,43 @@ void skb_queue_tail(struct sk_buff_head *list, struct sk_buff *newsk)
__skb_queue_tail(list, newsk);
spin_unlock_irqrestore(&list->lock, flags);
}
+
/**
* skb_unlink - remove a buffer from a list
* @skb: buffer to remove
+ * @list: list to use
*
- * Place a packet after a given packet in a list. The list locks are taken
- * and this function is atomic with respect to other list locked calls
+ * Remove a packet from a list. The list locks are taken and this
+ * function is atomic with respect to other list locked calls
*
- * Works even without knowing the list it is sitting on, which can be
- * handy at times. It also means that THE LIST MUST EXIST when you
- * unlink. Thus a list must have its contents unlinked before it is
- * destroyed.
+ * You must know what list the SKB is on.
*/
-void skb_unlink(struct sk_buff *skb)
+void skb_unlink(struct sk_buff *skb, struct sk_buff_head *list)
{
- struct sk_buff_head *list = skb->list;
-
- if (list) {
- unsigned long flags;
+ unsigned long flags;
- spin_lock_irqsave(&list->lock, flags);
- if (skb->list == list)
- __skb_unlink(skb, skb->list);
- spin_unlock_irqrestore(&list->lock, flags);
- }
+ spin_lock_irqsave(&list->lock, flags);
+ __skb_unlink(skb, list);
+ spin_unlock_irqrestore(&list->lock, flags);
}
-
/**
* skb_append - append a buffer
* @old: buffer to insert after
* @newsk: buffer to insert
+ * @list: list to use
*
* Place a packet after a given packet in a list. The list locks are taken
* and this function is atomic with respect to other list locked calls.
* A buffer cannot be placed on two lists at the same time.
*/
-
-void skb_append(struct sk_buff *old, struct sk_buff *newsk)
+void skb_append(struct sk_buff *old, struct sk_buff *newsk, struct sk_buff_head *list)
{
unsigned long flags;
- spin_lock_irqsave(&old->list->lock, flags);
- __skb_append(old, newsk);
- spin_unlock_irqrestore(&old->list->lock, flags);
+ spin_lock_irqsave(&list->lock, flags);
+ __skb_append(old, newsk, list);
+ spin_unlock_irqrestore(&list->lock, flags);
}
@@ -1395,19 +1429,21 @@ void skb_append(struct sk_buff *old, struct sk_buff *newsk)
* skb_insert - insert a buffer
* @old: buffer to insert before
* @newsk: buffer to insert
+ * @list: list to use
+ *
+ * Place a packet before a given packet in a list. The list locks are
+ * taken and this function is atomic with respect to other list locked
+ * calls.
*
- * Place a packet before a given packet in a list. The list locks are taken
- * and this function is atomic with respect to other list locked calls
* A buffer cannot be placed on two lists at the same time.
*/
-
-void skb_insert(struct sk_buff *old, struct sk_buff *newsk)
+void skb_insert(struct sk_buff *old, struct sk_buff *newsk, struct sk_buff_head *list)
{
unsigned long flags;
- spin_lock_irqsave(&old->list->lock, flags);
- __skb_insert(newsk, old->prev, old, old->list);
- spin_unlock_irqrestore(&old->list->lock, flags);
+ spin_lock_irqsave(&list->lock, flags);
+ __skb_insert(newsk, old->prev, old, list);
+ spin_unlock_irqrestore(&list->lock, flags);
}
#if 0
@@ -1663,12 +1699,23 @@ void __init skb_init(void)
NULL, NULL);
if (!skbuff_head_cache)
panic("cannot create skbuff cache");
+
+ skbuff_fclone_cache = kmem_cache_create("skbuff_fclone_cache",
+ (2*sizeof(struct sk_buff)) +
+ sizeof(atomic_t),
+ 0,
+ SLAB_HWCACHE_ALIGN,
+ NULL, NULL);
+ if (!skbuff_fclone_cache)
+ panic("cannot create skbuff cache");
+
+ do_gettimeofday(&skb_tv_base);
}
EXPORT_SYMBOL(___pskb_trim);
EXPORT_SYMBOL(__kfree_skb);
EXPORT_SYMBOL(__pskb_pull_tail);
-EXPORT_SYMBOL(alloc_skb);
+EXPORT_SYMBOL(__alloc_skb);
EXPORT_SYMBOL(pskb_copy);
EXPORT_SYMBOL(pskb_expand_head);
EXPORT_SYMBOL(skb_checksum);
@@ -1696,3 +1743,4 @@ EXPORT_SYMBOL(skb_prepare_seq_read);
EXPORT_SYMBOL(skb_seq_read);
EXPORT_SYMBOL(skb_abort_seq_read);
EXPORT_SYMBOL(skb_find_text);
+EXPORT_SYMBOL(skb_tv_base);
diff --git a/net/core/sock.c b/net/core/sock.c
index 12f6d9a2a52..ccd10fd6568 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -260,7 +260,7 @@ int sock_setsockopt(struct socket *sock, int level, int optname,
if (val > sysctl_wmem_max)
val = sysctl_wmem_max;
-
+set_sndbuf:
sk->sk_userlocks |= SOCK_SNDBUF_LOCK;
if ((val * 2) < SOCK_MIN_SNDBUF)
sk->sk_sndbuf = SOCK_MIN_SNDBUF;
@@ -274,6 +274,13 @@ int sock_setsockopt(struct socket *sock, int level, int optname,
sk->sk_write_space(sk);
break;
+ case SO_SNDBUFFORCE:
+ if (!capable(CAP_NET_ADMIN)) {
+ ret = -EPERM;
+ break;
+ }
+ goto set_sndbuf;
+
case SO_RCVBUF:
/* Don't error on this BSD doesn't and if you think
about it this is right. Otherwise apps have to
@@ -282,7 +289,7 @@ int sock_setsockopt(struct socket *sock, int level, int optname,
if (val > sysctl_rmem_max)
val = sysctl_rmem_max;
-
+set_rcvbuf:
sk->sk_userlocks |= SOCK_RCVBUF_LOCK;
/* FIXME: is this lower bound the right one? */
if ((val * 2) < SOCK_MIN_RCVBUF)
@@ -291,6 +298,13 @@ int sock_setsockopt(struct socket *sock, int level, int optname,
sk->sk_rcvbuf = val * 2;
break;
+ case SO_RCVBUFFORCE:
+ if (!capable(CAP_NET_ADMIN)) {
+ ret = -EPERM;
+ break;
+ }
+ goto set_rcvbuf;
+
case SO_KEEPALIVE:
#ifdef CONFIG_INET
if (sk->sk_protocol == IPPROTO_TCP)
@@ -686,6 +700,80 @@ void sk_free(struct sock *sk)
module_put(owner);
}
+struct sock *sk_clone(const struct sock *sk, const unsigned int __nocast priority)
+{
+ struct sock *newsk = sk_alloc(sk->sk_family, priority, sk->sk_prot, 0);
+
+ if (newsk != NULL) {
+ struct sk_filter *filter;
+
+ memcpy(newsk, sk, sk->sk_prot->obj_size);
+
+ /* SANITY */
+ sk_node_init(&newsk->sk_node);
+ sock_lock_init(newsk);
+ bh_lock_sock(newsk);
+
+ atomic_set(&newsk->sk_rmem_alloc, 0);
+ atomic_set(&newsk->sk_wmem_alloc, 0);
+ atomic_set(&newsk->sk_omem_alloc, 0);
+ skb_queue_head_init(&newsk->sk_receive_queue);
+ skb_queue_head_init(&newsk->sk_write_queue);
+
+ rwlock_init(&newsk->sk_dst_lock);
+ rwlock_init(&newsk->sk_callback_lock);
+
+ newsk->sk_dst_cache = NULL;
+ newsk->sk_wmem_queued = 0;
+ newsk->sk_forward_alloc = 0;
+ newsk->sk_send_head = NULL;
+ newsk->sk_backlog.head = newsk->sk_backlog.tail = NULL;
+ newsk->sk_userlocks = sk->sk_userlocks & ~SOCK_BINDPORT_LOCK;
+
+ sock_reset_flag(newsk, SOCK_DONE);
+ skb_queue_head_init(&newsk->sk_error_queue);
+
+ filter = newsk->sk_filter;
+ if (filter != NULL)
+ sk_filter_charge(newsk, filter);
+
+ if (unlikely(xfrm_sk_clone_policy(newsk))) {
+ /* It is still raw copy of parent, so invalidate
+ * destructor and make plain sk_free() */
+ newsk->sk_destruct = NULL;
+ sk_free(newsk);
+ newsk = NULL;
+ goto out;
+ }
+
+ newsk->sk_err = 0;
+ newsk->sk_priority = 0;
+ atomic_set(&newsk->sk_refcnt, 2);
+
+ /*
+ * Increment the counter in the same struct proto as the master
+ * sock (sk_refcnt_debug_inc uses newsk->sk_prot->socks, that
+ * is the same as sk->sk_prot->socks, as this field was copied
+ * with memcpy).
+ *
+ * This _changes_ the previous behaviour, where
+ * tcp_create_openreq_child always was incrementing the
+ * equivalent to tcp_prot->socks (inet_sock_nr), so this have
+ * to be taken into account in all callers. -acme
+ */
+ sk_refcnt_debug_inc(newsk);
+ newsk->sk_socket = NULL;
+ newsk->sk_sleep = NULL;
+
+ if (newsk->sk_prot->sockets_allocated)
+ atomic_inc(newsk->sk_prot->sockets_allocated);
+ }
+out:
+ return newsk;
+}
+
+EXPORT_SYMBOL_GPL(sk_clone);
+
void __init sk_init(void)
{
if (num_physpages <= 4096) {
@@ -1353,11 +1441,7 @@ void sk_common_release(struct sock *sk)
xfrm_sk_free_policy(sk);
-#ifdef INET_REFCNT_DEBUG
- if (atomic_read(&sk->sk_refcnt) != 1)
- printk(KERN_DEBUG "Destruction of the socket %p delayed, c=%d\n",
- sk, atomic_read(&sk->sk_refcnt));
-#endif
+ sk_refcnt_debug_release(sk);
sock_put(sk);
}
@@ -1368,7 +1452,8 @@ static LIST_HEAD(proto_list);
int proto_register(struct proto *prot, int alloc_slab)
{
- char *request_sock_slab_name;
+ char *request_sock_slab_name = NULL;
+ char *timewait_sock_slab_name;
int rc = -ENOBUFS;
if (alloc_slab) {
@@ -1399,6 +1484,23 @@ int proto_register(struct proto *prot, int alloc_slab)
goto out_free_request_sock_slab_name;
}
}
+
+ if (prot->twsk_obj_size) {
+ static const char mask[] = "tw_sock_%s";
+
+ timewait_sock_slab_name = kmalloc(strlen(prot->name) + sizeof(mask) - 1, GFP_KERNEL);
+
+ if (timewait_sock_slab_name == NULL)
+ goto out_free_request_sock_slab;
+
+ sprintf(timewait_sock_slab_name, mask, prot->name);
+ prot->twsk_slab = kmem_cache_create(timewait_sock_slab_name,
+ prot->twsk_obj_size,
+ 0, SLAB_HWCACHE_ALIGN,
+ NULL, NULL);
+ if (prot->twsk_slab == NULL)
+ goto out_free_timewait_sock_slab_name;
+ }
}
write_lock(&proto_list_lock);
@@ -1407,6 +1509,13 @@ int proto_register(struct proto *prot, int alloc_slab)
rc = 0;
out:
return rc;
+out_free_timewait_sock_slab_name:
+ kfree(timewait_sock_slab_name);
+out_free_request_sock_slab:
+ if (prot->rsk_prot && prot->rsk_prot->slab) {
+ kmem_cache_destroy(prot->rsk_prot->slab);
+ prot->rsk_prot->slab = NULL;
+ }
out_free_request_sock_slab_name:
kfree(request_sock_slab_name);
out_free_sock_slab:
@@ -1434,6 +1543,14 @@ void proto_unregister(struct proto *prot)
prot->rsk_prot->slab = NULL;
}
+ if (prot->twsk_slab != NULL) {
+ const char *name = kmem_cache_name(prot->twsk_slab);
+
+ kmem_cache_destroy(prot->twsk_slab);
+ kfree(name);
+ prot->twsk_slab = NULL;
+ }
+
list_del(&prot->node);
write_unlock(&proto_list_lock);
}
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
index 8f817ad9f54..2f278c8e474 100644
--- a/net/core/sysctl_net_core.c
+++ b/net/core/sysctl_net_core.c
@@ -9,23 +9,18 @@
#include <linux/sysctl.h>
#include <linux/config.h>
#include <linux/module.h>
+#include <linux/socket.h>
+#include <net/sock.h>
#ifdef CONFIG_SYSCTL
extern int netdev_max_backlog;
-extern int netdev_budget;
extern int weight_p;
-extern int net_msg_cost;
-extern int net_msg_burst;
extern __u32 sysctl_wmem_max;
extern __u32 sysctl_rmem_max;
-extern __u32 sysctl_wmem_default;
-extern __u32 sysctl_rmem_default;
extern int sysctl_core_destroy_delay;
-extern int sysctl_optmem_max;
-extern int sysctl_somaxconn;
#ifdef CONFIG_NET_DIVERT
extern char sysctl_divert_version[];
diff --git a/net/core/utils.c b/net/core/utils.c
index 88eb8b68e26..7b5970fc9e4 100644
--- a/net/core/utils.c
+++ b/net/core/utils.c
@@ -16,7 +16,9 @@
#include <linux/module.h>
#include <linux/jiffies.h>
#include <linux/kernel.h>
+#include <linux/inet.h>
#include <linux/mm.h>
+#include <linux/net.h>
#include <linux/string.h>
#include <linux/types.h>
#include <linux/random.h>
diff --git a/net/core/wireless.c b/net/core/wireless.c
index 3ff5639c0b7..5caae2399f3 100644
--- a/net/core/wireless.c
+++ b/net/core/wireless.c
@@ -571,10 +571,6 @@ static int wireless_seq_show(struct seq_file *seq, void *v)
return 0;
}
-extern void *dev_seq_start(struct seq_file *seq, loff_t *pos);
-extern void *dev_seq_next(struct seq_file *seq, void *v, loff_t *pos);
-extern void dev_seq_stop(struct seq_file *seq, void *v);
-
static struct seq_operations wireless_seq_ops = {
.start = dev_seq_start,
.next = dev_seq_next,
@@ -1144,8 +1140,8 @@ static inline void rtmsg_iwinfo(struct net_device * dev,
kfree_skb(skb);
return;
}
- NETLINK_CB(skb).dst_groups = RTMGRP_LINK;
- netlink_broadcast(rtnl, skb, 0, RTMGRP_LINK, GFP_ATOMIC);
+ NETLINK_CB(skb).dst_group = RTNLGRP_LINK;
+ netlink_broadcast(rtnl, skb, 0, RTNLGRP_LINK, GFP_ATOMIC);
}
#endif /* WE_EVENT_NETLINK */