Merge HEAD from /spare/repo/linux-2.6/.git

author: Jeff Garzik <jgarzik@pobox.com> 2005-08-30 13:32:29 -0400
committer: Jeff Garzik <jgarzik@pobox.com> 2005-08-30 13:32:29 -0400
commit: ed735ccbefaf7e5e3ef61418f7e209b8c59308a7 (patch)
tree: b8cc69814d2368b08d0a84c8da0c12028bd04867 /net/core
parent: 39fbe47377062200acc26ea0ccef223b4399a82c (diff)
parent: d8971fcb702e24d1e22c77fd1772f182ffee87e3 (diff)
14 files changed, 370 insertions, 777 deletions
diff --git a/net/core/Makefile b/net/core/Makefile
index f5f5e58943e..630da0f0579 100644
--- a/net/core/Makefile
+++ b/net/core/Makefile
@@ -12,7 +12,6 @@ obj-y		     += dev.o ethtool.o dev_mcast.o dst.o \
 
 obj-$(CONFIG_XFRM) += flow.o
 obj-$(CONFIG_SYSFS) += net-sysfs.o
-obj-$(CONFIG_NETFILTER) += netfilter.o
 obj-$(CONFIG_NET_DIVERT) += dv.o
 obj-$(CONFIG_NET_PKTGEN) += pktgen.o
 obj-$(CONFIG_NET_RADIO) += wireless.o
diff --git a/net/core/datagram.c b/net/core/datagram.c
index fcee054b6f7..da9bf71421a 100644
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -43,7 +43,6 @@
 #include <linux/errno.h>
 #include <linux/sched.h>
 #include <linux/inet.h>
-#include <linux/tcp.h>
 #include <linux/netdevice.h>
 #include <linux/rtnetlink.h>
 #include <linux/poll.h>
@@ -51,9 +50,10 @@
 
 #include <net/protocol.h>
 #include <linux/skbuff.h>
-#include <net/sock.h>
-#include <net/checksum.h>
 
+#include <net/checksum.h>
+#include <net/sock.h>
+#include <net/tcp_states.h>
 
 /*
  *	Is a socket 'connection oriented' ?
diff --git a/net/core/dev.c b/net/core/dev.c
index faf59b02c4b..c01511e3d0c 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -267,10 +267,6 @@ void dev_add_pack(struct packet_type *pt)
 	spin_unlock_bh(&ptype_lock);
 }
 
-extern void linkwatch_run_queue(void);
-
-
-
 /**
  *	__dev_remove_pack	 - remove packet handler
  *	@pt: packet type declaration
@@ -1009,13 +1005,22 @@ void net_disable_timestamp(void)
 	atomic_dec(&netstamp_needed);
 }
 
-static inline void net_timestamp(struct timeval *stamp)
+void __net_timestamp(struct sk_buff *skb)
+{
+	struct timeval tv;
+
+	do_gettimeofday(&tv);
+	skb_set_timestamp(skb, &tv);
+}
+EXPORT_SYMBOL(__net_timestamp);
+
+static inline void net_timestamp(struct sk_buff *skb)
 {
 	if (atomic_read(&netstamp_needed))
-		do_gettimeofday(stamp);
+		__net_timestamp(skb);
 	else {
-		stamp->tv_sec = 0;
-		stamp->tv_usec = 0;
+		skb->tstamp.off_sec = 0;
+		skb->tstamp.off_usec = 0;
 	}
 }
 
@@ -1027,7 +1032,8 @@ static inline void net_timestamp(struct timeval *stamp)
 void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
 {
 	struct packet_type *ptype;
-	net_timestamp(&skb->stamp);
+
+	net_timestamp(skb);
 
 	rcu_read_lock();
 	list_for_each_entry_rcu(ptype, &ptype_all, list) {
@@ -1058,7 +1064,7 @@ void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
 
 			skb2->h.raw = skb2->nh.raw;
 			skb2->pkt_type = PACKET_OUTGOING;
-			ptype->func(skb2, skb->dev, ptype);
+			ptype->func(skb2, skb->dev, ptype, skb->dev);
 		}
 	}
 	rcu_read_unlock();
@@ -1123,8 +1129,6 @@ static inline int illegal_highdma(struct net_device *dev, struct sk_buff *skb)
 #define illegal_highdma(dev, skb)	(0)
 #endif
 
-extern void skb_release_data(struct sk_buff *);
-
 /* Keep head the same: replace data */
 int __skb_linearize(struct sk_buff *skb, unsigned int __nocast gfp_mask)
 {
@@ -1379,8 +1383,8 @@ int netif_rx(struct sk_buff *skb)
 	if (netpoll_rx(skb))
 		return NET_RX_DROP;
 
-	if (!skb->stamp.tv_sec)
-		net_timestamp(&skb->stamp);
+	if (!skb->tstamp.off_sec)
+		net_timestamp(skb);
 
 	/*
 	 * The code is rearranged so that the path is the most
@@ -1425,14 +1429,14 @@ int netif_rx_ni(struct sk_buff *skb)
 
 EXPORT_SYMBOL(netif_rx_ni);
 
-static __inline__ void skb_bond(struct sk_buff *skb)
+static inline struct net_device *skb_bond(struct sk_buff *skb)
 {
 	struct net_device *dev = skb->dev;
 
-	if (dev->master) {
-		skb->real_dev = skb->dev;
+	if (dev->master)
 		skb->dev = dev->master;
-	}
+
+	return dev;
 }
 
 static void net_tx_action(struct softirq_action *h)
@@ -1482,10 +1486,11 @@ static void net_tx_action(struct softirq_action *h)
 }
 
 static __inline__ int deliver_skb(struct sk_buff *skb,
-				  struct packet_type *pt_prev)
+				  struct packet_type *pt_prev,
+				  struct net_device *orig_dev)
 {
 	atomic_inc(&skb->users);
-	return pt_prev->func(skb, skb->dev, pt_prev);
+	return pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
 }
 
 #if defined(CONFIG_BRIDGE) || defined (CONFIG_BRIDGE_MODULE)
@@ -1496,7 +1501,8 @@ struct net_bridge_fdb_entry *(*br_fdb_get_hook)(struct net_bridge *br,
 void (*br_fdb_put_hook)(struct net_bridge_fdb_entry *ent);
 
 static __inline__ int handle_bridge(struct sk_buff **pskb,
-				    struct packet_type **pt_prev, int *ret)
+				    struct packet_type **pt_prev, int *ret,
+				    struct net_device *orig_dev)
 {
 	struct net_bridge_port *port;
 
@@ -1505,14 +1511,14 @@ static __inline__ int handle_bridge(struct sk_buff **pskb,
 		return 0;
 
 	if (*pt_prev) {
-		*ret = deliver_skb(*pskb, *pt_prev);
+		*ret = deliver_skb(*pskb, *pt_prev, orig_dev);
 		*pt_prev = NULL;
 	} 
 	
 	return br_handle_frame_hook(port, pskb);
 }
 #else
-#define handle_bridge(skb, pt_prev, ret)	(0)
+#define handle_bridge(skb, pt_prev, ret, orig_dev)	(0)
 #endif
 
 #ifdef CONFIG_NET_CLS_ACT
@@ -1534,17 +1540,14 @@ static int ing_filter(struct sk_buff *skb)
 		__u32 ttl = (__u32) G_TC_RTTL(skb->tc_verd);
 		if (MAX_RED_LOOP < ttl++) {
 			printk("Redir loop detected Dropping packet (%s->%s)\n",
-				skb->input_dev?skb->input_dev->name:"??",skb->dev->name);
+				skb->input_dev->name, skb->dev->name);
 			return TC_ACT_SHOT;
 		}
 
 		skb->tc_verd = SET_TC_RTTL(skb->tc_verd,ttl);
 
 		skb->tc_verd = SET_TC_AT(skb->tc_verd,AT_INGRESS);
-		if (NULL == skb->input_dev) {
-			skb->input_dev = skb->dev;
-			printk("ing_filter:  fixed  %s out %s\n",skb->input_dev->name,skb->dev->name);
-		}
+
 		spin_lock(&dev->ingress_lock);
 		if ((q = dev->qdisc_ingress) != NULL)
 			result = q->enqueue(skb, q);
@@ -1559,6 +1562,7 @@ static int ing_filter(struct sk_buff *skb)
 int netif_receive_skb(struct sk_buff *skb)
 {
 	struct packet_type *ptype, *pt_prev;
+	struct net_device *orig_dev;
 	int ret = NET_RX_DROP;
 	unsigned short type;
 
@@ -1566,10 +1570,13 @@ int netif_receive_skb(struct sk_buff *skb)
 	if (skb->dev->poll && netpoll_rx(skb))
 		return NET_RX_DROP;
 
-	if (!skb->stamp.tv_sec)
-		net_timestamp(&skb->stamp);
+	if (!skb->tstamp.off_sec)
+		net_timestamp(skb);
+
+	if (!skb->input_dev)
+		skb->input_dev = skb->dev;
 
-	skb_bond(skb);
+	orig_dev = skb_bond(skb);
 
 	__get_cpu_var(netdev_rx_stat).total++;
 
@@ -1590,14 +1597,14 @@ int netif_receive_skb(struct sk_buff *skb)
 	list_for_each_entry_rcu(ptype, &ptype_all, list) {
 		if (!ptype->dev || ptype->dev == skb->dev) {
 			if (pt_prev) 
-				ret = deliver_skb(skb, pt_prev);
+				ret = deliver_skb(skb, pt_prev, orig_dev);
 			pt_prev = ptype;
 		}
 	}
 
 #ifdef CONFIG_NET_CLS_ACT
 	if (pt_prev) {
-		ret = deliver_skb(skb, pt_prev);
+		ret = deliver_skb(skb, pt_prev, orig_dev);
 		pt_prev = NULL; /* noone else should process this after*/
 	} else {
 		skb->tc_verd = SET_TC_OK2MUNGE(skb->tc_verd);
@@ -1616,7 +1623,7 @@ ncls:
 
 	handle_diverter(skb);
 
-	if (handle_bridge(&skb, &pt_prev, &ret))
+	if (handle_bridge(&skb, &pt_prev, &ret, orig_dev))
 		goto out;
 
 	type = skb->protocol;
@@ -1624,13 +1631,13 @@ ncls:
 		if (ptype->type == type &&
 		    (!ptype->dev || ptype->dev == skb->dev)) {
 			if (pt_prev) 
-				ret = deliver_skb(skb, pt_prev);
+				ret = deliver_skb(skb, pt_prev, orig_dev);
 			pt_prev = ptype;
 		}
 	}
 
 	if (pt_prev) {
-		ret = pt_prev->func(skb, skb->dev, pt_prev);
+		ret = pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
 	} else {
 		kfree_skb(skb);
 		/* Jamal, now you will not able to escape explaining
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index a3eeb88e1c8..289c1b5a8e4 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -81,6 +81,18 @@ int ethtool_op_set_tso(struct net_device *dev, u32 data)
 	return 0;
 }
 
+int ethtool_op_get_perm_addr(struct net_device *dev, struct ethtool_perm_addr *addr, u8 *data)
+{
+	unsigned char len = dev->addr_len;
+	if ( addr->size < len )
+		return -ETOOSMALL;
+	
+	addr->size = len;
+	memcpy(data, dev->perm_addr, len);
+	return 0;
+}
+ 
+
 /* Handlers for each ethtool command */
 
 static int ethtool_get_settings(struct net_device *dev, void __user *useraddr)
@@ -683,6 +695,39 @@ static int ethtool_get_stats(struct net_device *dev, void __user *useraddr)
 	return ret;
 }
 
+static int ethtool_get_perm_addr(struct net_device *dev, void *useraddr)
+{
+	struct ethtool_perm_addr epaddr;
+	u8 *data;
+	int ret;
+
+	if (!dev->ethtool_ops->get_perm_addr)
+		return -EOPNOTSUPP;
+
+	if (copy_from_user(&epaddr,useraddr,sizeof(epaddr)))
+		return -EFAULT;
+
+	data = kmalloc(epaddr.size, GFP_USER);
+	if (!data)
+		return -ENOMEM;
+
+	ret = dev->ethtool_ops->get_perm_addr(dev,&epaddr,data);
+	if (ret)
+		return ret;
+
+	ret = -EFAULT;
+	if (copy_to_user(useraddr, &epaddr, sizeof(epaddr)))
+		goto out;
+	useraddr += sizeof(epaddr);
+	if (copy_to_user(useraddr, data, epaddr.size))
+		goto out;
+	ret = 0;
+
+ out:
+	kfree(data);
+	return ret;
+}
+
 /* The main entry point in this file.  Called from net/core/dev.c */
 
 int dev_ethtool(struct ifreq *ifr)
@@ -806,6 +851,9 @@ int dev_ethtool(struct ifreq *ifr)
 	case ETHTOOL_GSTATS:
 		rc = ethtool_get_stats(dev, useraddr);
 		break;
+	case ETHTOOL_GPERMADDR:
+		rc = ethtool_get_perm_addr(dev, useraddr);
+		break;
 	default:
 		rc =  -EOPNOTSUPP;
 	}
@@ -826,6 +874,7 @@ int dev_ethtool(struct ifreq *ifr)
 
 EXPORT_SYMBOL(dev_ethtool);
 EXPORT_SYMBOL(ethtool_op_get_link);
+EXPORT_SYMBOL_GPL(ethtool_op_get_perm_addr);
 EXPORT_SYMBOL(ethtool_op_get_sg);
 EXPORT_SYMBOL(ethtool_op_get_tso);
 EXPORT_SYMBOL(ethtool_op_get_tx_csum);
diff --git a/net/core/flow.c b/net/core/flow.c
index f289570b15a..7e95b39de9f 100644
--- a/net/core/flow.c
+++ b/net/core/flow.c
@@ -42,7 +42,7 @@ static DEFINE_PER_CPU(struct flow_cache_entry **, flow_tables) = { NULL };
 
 #define flow_table(cpu) (per_cpu(flow_tables, cpu))
 
-static kmem_cache_t *flow_cachep;
+static kmem_cache_t *flow_cachep __read_mostly;
 
 static int flow_lwm, flow_hwm;
 
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 1beb782ac41..39fc55edf69 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -1217,7 +1217,7 @@ static void neigh_proxy_process(unsigned long arg)
 
 	while (skb != (struct sk_buff *)&tbl->proxy_queue) {
 		struct sk_buff *back = skb;
-		long tdif = back->stamp.tv_usec - now;
+		long tdif = NEIGH_CB(back)->sched_next - now;
 
 		skb = skb->next;
 		if (tdif <= 0) {
@@ -1248,8 +1248,9 @@ void pneigh_enqueue(struct neigh_table *tbl, struct neigh_parms *p,
 		kfree_skb(skb);
 		return;
 	}
-	skb->stamp.tv_sec  = LOCALLY_ENQUEUED;
-	skb->stamp.tv_usec = sched_next;
+
+	NEIGH_CB(skb)->sched_next = sched_next;
+	NEIGH_CB(skb)->flags |= LOCALLY_ENQUEUED;
 
 	spin_lock(&tbl->proxy_queue.lock);
 	if (del_timer(&tbl->proxy_timer)) {
@@ -2342,8 +2343,8 @@ void neigh_app_ns(struct neighbour *n)
 	}
 	nlh			   = (struct nlmsghdr *)skb->data;
 	nlh->nlmsg_flags	   = NLM_F_REQUEST;
-	NETLINK_CB(skb).dst_groups = RTMGRP_NEIGH;
-	netlink_broadcast(rtnl, skb, 0, RTMGRP_NEIGH, GFP_ATOMIC);
+	NETLINK_CB(skb).dst_group  = RTNLGRP_NEIGH;
+	netlink_broadcast(rtnl, skb, 0, RTNLGRP_NEIGH, GFP_ATOMIC);
 }
 
 static void neigh_app_notify(struct neighbour *n)
@@ -2360,8 +2361,8 @@ static void neigh_app_notify(struct neighbour *n)
 		return;
 	}
 	nlh			   = (struct nlmsghdr *)skb->data;
-	NETLINK_CB(skb).dst_groups = RTMGRP_NEIGH;
-	netlink_broadcast(rtnl, skb, 0, RTMGRP_NEIGH, GFP_ATOMIC);
+	NETLINK_CB(skb).dst_group  = RTNLGRP_NEIGH;
+	netlink_broadcast(rtnl, skb, 0, RTNLGRP_NEIGH, GFP_ATOMIC);
 }
 
 #endif /* CONFIG_ARPD */
diff --git a/net/core/netfilter.c b/net/core/netfilter.c
deleted file mode 100644
index 076c156d5ed..00000000000
--- a/net/core/netfilter.c
+++ /dev/null
@@ -1,648 +0,0 @@
-/* netfilter.c: look after the filters for various protocols. 
- * Heavily influenced by the old firewall.c by David Bonn and Alan Cox.
- *
- * Thanks to Rob `CmdrTaco' Malda for not influencing this code in any
- * way.
- *
- * Rusty Russell (C)2000 -- This code is GPL.
- *
- * February 2000: Modified by James Morris to have 1 queue per protocol.
- * 15-Mar-2000:   Added NF_REPEAT --RR.
- * 08-May-2003:	  Internal logging interface added by Jozsef Kadlecsik.
- */
-#include <linux/config.h>
-#include <linux/kernel.h>
-#include <linux/netfilter.h>
-#include <net/protocol.h>
-#include <linux/init.h>
-#include <linux/skbuff.h>
-#include <linux/wait.h>
-#include <linux/module.h>
-#include <linux/interrupt.h>
-#include <linux/if.h>
-#include <linux/netdevice.h>
-#include <linux/inetdevice.h>
-#include <linux/tcp.h>
-#include <linux/udp.h>
-#include <linux/icmp.h>
-#include <net/sock.h>
-#include <net/route.h>
-#include <linux/ip.h>
-
-/* In this code, we can be waiting indefinitely for userspace to
- * service a packet if a hook returns NF_QUEUE.  We could keep a count
- * of skbuffs queued for userspace, and not deregister a hook unless
- * this is zero, but that sucks.  Now, we simply check when the
- * packets come back: if the hook is gone, the packet is discarded. */
-#ifdef CONFIG_NETFILTER_DEBUG
-#define NFDEBUG(format, args...)  printk(format , ## args)
-#else
-#define NFDEBUG(format, args...)
-#endif
-
-/* Sockopts only registered and called from user context, so
-   net locking would be overkill.  Also, [gs]etsockopt calls may
-   sleep. */
-static DECLARE_MUTEX(nf_sockopt_mutex);
-
-struct list_head nf_hooks[NPROTO][NF_MAX_HOOKS];
-static LIST_HEAD(nf_sockopts);
-static DEFINE_SPINLOCK(nf_hook_lock);
-
-/* 
- * A queue handler may be registered for each protocol.  Each is protected by
- * long term mutex.  The handler must provide an an outfn() to accept packets
- * for queueing and must reinject all packets it receives, no matter what.
- */
-static struct nf_queue_handler_t {
-	nf_queue_outfn_t outfn;
-	void *data;
-} queue_handler[NPROTO];
-static DEFINE_RWLOCK(queue_handler_lock);
-
-int nf_register_hook(struct nf_hook_ops *reg)
-{
-	struct list_head *i;
-
-	spin_lock_bh(&nf_hook_lock);
-	list_for_each(i, &nf_hooks[reg->pf][reg->hooknum]) {
-		if (reg->priority < ((struct nf_hook_ops *)i)->priority)
-			break;
-	}
-	list_add_rcu(&reg->list, i->prev);
-	spin_unlock_bh(&nf_hook_lock);
-
-	synchronize_net();
-	return 0;
-}
-
-void nf_unregister_hook(struct nf_hook_ops *reg)
-{
-	spin_lock_bh(&nf_hook_lock);
-	list_del_rcu(&reg->list);
-	spin_unlock_bh(&nf_hook_lock);
-
-	synchronize_net();
-}
-
-/* Do exclusive ranges overlap? */
-static inline int overlap(int min1, int max1, int min2, int max2)
-{
-	return max1 > min2 && min1 < max2;
-}
-
-/* Functions to register sockopt ranges (exclusive). */
-int nf_register_sockopt(struct nf_sockopt_ops *reg)
-{
-	struct list_head *i;
-	int ret = 0;
-
-	if (down_interruptible(&nf_sockopt_mutex) != 0)
-		return -EINTR;
-
-	list_for_each(i, &nf_sockopts) {
-		struct nf_sockopt_ops *ops = (struct nf_sockopt_ops *)i;
-		if (ops->pf == reg->pf
-		    && (overlap(ops->set_optmin, ops->set_optmax, 
-				reg->set_optmin, reg->set_optmax)
-			|| overlap(ops->get_optmin, ops->get_optmax, 
-				   reg->get_optmin, reg->get_optmax))) {
-			NFDEBUG("nf_sock overlap: %u-%u/%u-%u v %u-%u/%u-%u\n",
-				ops->set_optmin, ops->set_optmax, 
-				ops->get_optmin, ops->get_optmax, 
-				reg->set_optmin, reg->set_optmax,
-				reg->get_optmin, reg->get_optmax);
-			ret = -EBUSY;
-			goto out;
-		}
-	}
-
-	list_add(&reg->list, &nf_sockopts);
-out:
-	up(&nf_sockopt_mutex);
-	return ret;
-}
-
-void nf_unregister_sockopt(struct nf_sockopt_ops *reg)
-{
-	/* No point being interruptible: we're probably in cleanup_module() */
- restart:
-	down(&nf_sockopt_mutex);
-	if (reg->use != 0) {
-		/* To be woken by nf_sockopt call... */
-		/* FIXME: Stuart Young's name appears gratuitously. */
-		set_current_state(TASK_UNINTERRUPTIBLE);
-		reg->cleanup_task = current;
-		up(&nf_sockopt_mutex);
-		schedule();
-		goto restart;
-	}
-	list_del(&reg->list);
-	up(&nf_sockopt_mutex);
-}
-
-/* Call get/setsockopt() */
-static int nf_sockopt(struct sock *sk, int pf, int val, 
-		      char __user *opt, int *len, int get)
-{
-	struct list_head *i;
-	struct nf_sockopt_ops *ops;
-	int ret;
-
-	if (down_interruptible(&nf_sockopt_mutex) != 0)
-		return -EINTR;
-
-	list_for_each(i, &nf_sockopts) {
-		ops = (struct nf_sockopt_ops *)i;
-		if (ops->pf == pf) {
-			if (get) {
-				if (val >= ops->get_optmin
-				    && val < ops->get_optmax) {
-					ops->use++;
-					up(&nf_sockopt_mutex);
-					ret = ops->get(sk, val, opt, len);
-					goto out;
-				}
-			} else {
-				if (val >= ops->set_optmin
-				    && val < ops->set_optmax) {
-					ops->use++;
-					up(&nf_sockopt_mutex);
-					ret = ops->set(sk, val, opt, *len);
-					goto out;
-				}
-			}
-		}
-	}
-	up(&nf_sockopt_mutex);
-	return -ENOPROTOOPT;
-	
- out:
-	down(&nf_sockopt_mutex);
-	ops->use--;
-	if (ops->cleanup_task)
-		wake_up_process(ops->cleanup_task);
-	up(&nf_sockopt_mutex);
-	return ret;
-}
-
-int nf_setsockopt(struct sock *sk, int pf, int val, char __user *opt,
-		  int len)
-{
-	return nf_sockopt(sk, pf, val, opt, &len, 0);
-}
-
-int nf_getsockopt(struct sock *sk, int pf, int val, char __user *opt, int *len)
-{
-	return nf_sockopt(sk, pf, val, opt, len, 1);
-}
-
-static unsigned int nf_iterate(struct list_head *head,
-			       struct sk_buff **skb,
-			       int hook,
-			       const struct net_device *indev,
-			       const struct net_device *outdev,
-			       struct list_head **i,
-			       int (*okfn)(struct sk_buff *),
-			       int hook_thresh)
-{
-	unsigned int verdict;
-
-	/*
-	 * The caller must not block between calls to this
-	 * function because of risk of continuing from deleted element.
-	 */
-	list_for_each_continue_rcu(*i, head) {
-		struct nf_hook_ops *elem = (struct nf_hook_ops *)*i;
-
-		if (hook_thresh > elem->priority)
-			continue;
-
-		/* Optimization: we don't need to hold module
-                   reference here, since function can't sleep. --RR */
-		verdict = elem->hook(hook, skb, indev, outdev, okfn);
-		if (verdict != NF_ACCEPT) {
-#ifdef CONFIG_NETFILTER_DEBUG
-			if (unlikely(verdict > NF_MAX_VERDICT)) {
-				NFDEBUG("Evil return from %p(%u).\n",
-				        elem->hook, hook);
-				continue;
-			}
-#endif
-			if (verdict != NF_REPEAT)
-				return verdict;
-			*i = (*i)->prev;
-		}
-	}
-	return NF_ACCEPT;
-}
-
-int nf_register_queue_handler(int pf, nf_queue_outfn_t outfn, void *data)
-{      
-	int ret;
-
-	write_lock_bh(&queue_handler_lock);
-	if (queue_handler[pf].outfn)
-		ret = -EBUSY;
-	else {
-		queue_handler[pf].outfn = outfn;
-		queue_handler[pf].data = data;
-		ret = 0;
-	}
-	write_unlock_bh(&queue_handler_lock);
-
-	return ret;
-}
-
-/* The caller must flush their queue before this */
-int nf_unregister_queue_handler(int pf)
-{
-	write_lock_bh(&queue_handler_lock);
-	queue_handler[pf].outfn = NULL;
-	queue_handler[pf].data = NULL;
-	write_unlock_bh(&queue_handler_lock);
-	
-	return 0;
-}
-
-/* 
- * Any packet that leaves via this function must come back 
- * through nf_reinject().
- */
-static int nf_queue(struct sk_buff *skb, 
-		    struct list_head *elem, 
-		    int pf, unsigned int hook,
-		    struct net_device *indev,
-		    struct net_device *outdev,
-		    int (*okfn)(struct sk_buff *))
-{
-	int status;
-	struct nf_info *info;
-#ifdef CONFIG_BRIDGE_NETFILTER
-	struct net_device *physindev = NULL;
-	struct net_device *physoutdev = NULL;
-#endif
-
-	/* QUEUE == DROP if noone is waiting, to be safe. */
-	read_lock(&queue_handler_lock);
-	if (!queue_handler[pf].outfn) {
-		read_unlock(&queue_handler_lock);
-		kfree_skb(skb);
-		return 1;
-	}
-
-	info = kmalloc(sizeof(*info), GFP_ATOMIC);
-	if (!info) {
-		if (net_ratelimit())
-			printk(KERN_ERR "OOM queueing packet %p\n",
-			       skb);
-		read_unlock(&queue_handler_lock);
-		kfree_skb(skb);
-		return 1;
-	}
-
-	*info = (struct nf_info) { 
-		(struct nf_hook_ops *)elem, pf, hook, indev, outdev, okfn };
-
-	/* If it's going away, ignore hook. */
-	if (!try_module_get(info->elem->owner)) {
-		read_unlock(&queue_handler_lock);
-		kfree(info);
-		return 0;
-	}
-
-	/* Bump dev refs so they don't vanish while packet is out */
-	if (indev) dev_hold(indev);
-	if (outdev) dev_hold(outdev);
-
-#ifdef CONFIG_BRIDGE_NETFILTER
-	if (skb->nf_bridge) {
-		physindev = skb->nf_bridge->physindev;
-		if (physindev) dev_hold(physindev);
-		physoutdev = skb->nf_bridge->physoutdev;
-		if (physoutdev) dev_hold(physoutdev);
-	}
-#endif
-
-	status = queue_handler[pf].outfn(skb, info, queue_handler[pf].data);
-	read_unlock(&queue_handler_lock);
-
-	if (status < 0) {
-		/* James M doesn't say fuck enough. */
-		if (indev) dev_put(indev);
-		if (outdev) dev_put(outdev);
-#ifdef CONFIG_BRIDGE_NETFILTER
-		if (physindev) dev_put(physindev);
-		if (physoutdev) dev_put(physoutdev);
-#endif
-		module_put(info->elem->owner);
-		kfree(info);
-		kfree_skb(skb);
-		return 1;
-	}
-	return 1;
-}
-
-/* Returns 1 if okfn() needs to be executed by the caller,
- * -EPERM for NF_DROP, 0 otherwise. */
-int nf_hook_slow(int pf, unsigned int hook, struct sk_buff **pskb,
-		 struct net_device *indev,
-		 struct net_device *outdev,
-		 int (*okfn)(struct sk_buff *),
-		 int hook_thresh)
-{
-	struct list_head *elem;
-	unsigned int verdict;
-	int ret = 0;
-
-	/* We may already have this, but read-locks nest anyway */
-	rcu_read_lock();
-
-	elem = &nf_hooks[pf][hook];
-next_hook:
-	verdict = nf_iterate(&nf_hooks[pf][hook], pskb, hook, indev,
-			     outdev, &elem, okfn, hook_thresh);
-	if (verdict == NF_ACCEPT || verdict == NF_STOP) {
-		ret = 1;
-		goto unlock;
-	} else if (verdict == NF_DROP) {
-		kfree_skb(*pskb);
-		ret = -EPERM;
-	} else if (verdict == NF_QUEUE) {
-		NFDEBUG("nf_hook: Verdict = QUEUE.\n");
-		if (!nf_queue(*pskb, elem, pf, hook, indev, outdev, okfn))
-			goto next_hook;
-	}
-unlock:
-	rcu_read_unlock();
-	return ret;
-}
-
-void nf_reinject(struct sk_buff *skb, struct nf_info *info,
-		 unsigned int verdict)
-{
-	struct list_head *elem = &info->elem->list;
-	struct list_head *i;
-
-	rcu_read_lock();
-
-	/* Release those devices we held, or Alexey will kill me. */
-	if (info->indev) dev_put(info->indev);
-	if (info->outdev) dev_put(info->outdev);
-#ifdef CONFIG_BRIDGE_NETFILTER
-	if (skb->nf_bridge) {
-		if (skb->nf_bridge->physindev)
-			dev_put(skb->nf_bridge->physindev);
-		if (skb->nf_bridge->physoutdev)
-			dev_put(skb->nf_bridge->physoutdev);
-	}
-#endif
-
-	/* Drop reference to owner of hook which queued us. */
-	module_put(info->elem->owner);
-
-	list_for_each_rcu(i, &nf_hooks[info->pf][info->hook]) {
-		if (i == elem) 
-  			break;
-  	}
-  
-	if (elem == &nf_hooks[info->pf][info->hook]) {
-		/* The module which sent it to userspace is gone. */
-		NFDEBUG("%s: module disappeared, dropping packet.\n",
-			__FUNCTION__);
-		verdict = NF_DROP;
-	}
-
-	/* Continue traversal iff userspace said ok... */
-	if (verdict == NF_REPEAT) {
-		elem = elem->prev;
-		verdict = NF_ACCEPT;
-	}
-
-	if (verdict == NF_ACCEPT) {
-	next_hook:
-		verdict = nf_iterate(&nf_hooks[info->pf][info->hook],
-				     &skb, info->hook, 
-				     info->indev, info->outdev, &elem,
-				     info->okfn, INT_MIN);
-	}
-
-	switch (verdict) {
-	case NF_ACCEPT:
-		info->okfn(skb);
-		break;
-
-	case NF_QUEUE:
-		if (!nf_queue(skb, elem, info->pf, info->hook, 
-			      info->indev, info->outdev, info->okfn))
-			goto next_hook;
-		break;
-	}
-	rcu_read_unlock();
-
-	if (verdict == NF_DROP)
-		kfree_skb(skb);
-
-	kfree(info);
-	return;
-}
-
-#ifdef CONFIG_INET
-/* route_me_harder function, used by iptable_nat, iptable_mangle + ip_queue */
-int ip_route_me_harder(struct sk_buff **pskb)
-{
-	struct iphdr *iph = (*pskb)->nh.iph;
-	struct rtable *rt;
-	struct flowi fl = {};
-	struct dst_entry *odst;
-	unsigned int hh_len;
-
-	/* some non-standard hacks like ipt_REJECT.c:send_reset() can cause
-	 * packets with foreign saddr to appear on the NF_IP_LOCAL_OUT hook.
-	 */
-	if (inet_addr_type(iph->saddr) == RTN_LOCAL) {
-		fl.nl_u.ip4_u.daddr = iph->daddr;
-		fl.nl_u.ip4_u.saddr = iph->saddr;
-		fl.nl_u.ip4_u.tos = RT_TOS(iph->tos);
-		fl.oif = (*pskb)->sk ? (*pskb)->sk->sk_bound_dev_if : 0;
-#ifdef CONFIG_IP_ROUTE_FWMARK
-		fl.nl_u.ip4_u.fwmark = (*pskb)->nfmark;
-#endif
-		fl.proto = iph->protocol;
-		if (ip_route_output_key(&rt, &fl) != 0)
-			return -1;
-
-		/* Drop old route. */
-		dst_release((*pskb)->dst);
-		(*pskb)->dst = &rt->u.dst;
-	} else {
-		/* non-local src, find valid iif to satisfy
-		 * rp-filter when calling ip_route_input. */
-		fl.nl_u.ip4_u.daddr = iph->saddr;
-		if (ip_route_output_key(&rt, &fl) != 0)
-			return -1;
-
-		odst = (*pskb)->dst;
-		if (ip_route_input(*pskb, iph->daddr, iph->saddr,
-				   RT_TOS(iph->tos), rt->u.dst.dev) != 0) {
-			dst_release(&rt->u.dst);
-			return -1;
-		}
-		dst_release(&rt->u.dst);
-		dst_release(odst);
-	}
-	
-	if ((*pskb)->dst->error)
-		return -1;
-
-	/* Change in oif may mean change in hh_len. */
-	hh_len = (*pskb)->dst->dev->hard_header_len;
-	if (skb_headroom(*pskb) < hh_len) {
-		struct sk_buff *nskb;
-
-		nskb = skb_realloc_headroom(*pskb, hh_len);
-		if (!nskb) 
-			return -1;
-		if ((*pskb)->sk)
-			skb_set_owner_w(nskb, (*pskb)->sk);
-		kfree_skb(*pskb);
-		*pskb = nskb;
-	}
-
-	return 0;
-}
-EXPORT_SYMBOL(ip_route_me_harder);
-
-int skb_ip_make_writable(struct sk_buff **pskb, unsigned int writable_len)
-{
-	struct sk_buff *nskb;
-
-	if (writable_len > (*pskb)->len)
-		return 0;
-
-	/* Not exclusive use of packet?  Must copy. */
-	if (skb_shared(*pskb) || skb_cloned(*pskb))
-		goto copy_skb;
-
-	return pskb_may_pull(*pskb, writable_len);
-
-copy_skb:
-	nskb = skb_copy(*pskb, GFP_ATOMIC);
-	if (!nskb)
-		return 0;
-	BUG_ON(skb_is_nonlinear(nskb));
-
-	/* Rest of kernel will get very unhappy if we pass it a
-	   suddenly-orphaned skbuff */
-	if ((*pskb)->sk)
-		skb_set_owner_w(nskb, (*pskb)->sk);
-	kfree_skb(*pskb);
-	*pskb = nskb;
-	return 1;
-}
-EXPORT_SYMBOL(skb_ip_make_writable);
-#endif /*CONFIG_INET*/
-
-/* Internal logging interface, which relies on the real 
-   LOG target modules */
-
-#define NF_LOG_PREFIXLEN		128
-
-static nf_logfn *nf_logging[NPROTO]; /* = NULL */
-static int reported = 0;
-static DEFINE_SPINLOCK(nf_log_lock);
-
-int nf_log_register(int pf, nf_logfn *logfn)
-{
-	int ret = -EBUSY;
-
-	/* Any setup of logging members must be done before
-	 * substituting pointer. */
-	spin_lock(&nf_log_lock);
-	if (!nf_logging[pf]) {
-		rcu_assign_pointer(nf_logging[pf], logfn);
-		ret = 0;
-	}
-	spin_unlock(&nf_log_lock);
-	return ret;
-}		
-
-void nf_log_unregister(int pf, nf_logfn *logfn)
-{
-	spin_lock(&nf_log_lock);
-	if (nf_logging[pf] == logfn)
-		nf_logging[pf] = NULL;
-	spin_unlock(&nf_log_lock);
-
-	/* Give time to concurrent readers. */
-	synchronize_net();
-}		
-
-void nf_log_packet(int pf,
-		   unsigned int hooknum,
-		   const struct sk_buff *skb,
-		   const struct net_device *in,
-		   const struct net_device *out,
-		   const char *fmt, ...)
-{
-	va_list args;
-	char prefix[NF_LOG_PREFIXLEN];
-	nf_logfn *logfn;
-	
-	rcu_read_lock();
-	logfn = rcu_dereference(nf_logging[pf]);
-	if (logfn) {
-		va_start(args, fmt);
-		vsnprintf(prefix, sizeof(prefix), fmt, args);
-		va_end(args);
-		/* We must read logging before nf_logfn[pf] */
-		logfn(hooknum, skb, in, out, prefix);
-	} else if (!reported) {
-		printk(KERN_WARNING "nf_log_packet: can\'t log yet, "
-		       "no backend logging module loaded in!\n");
-		reported++;
-	}
-	rcu_read_unlock();
-}
-EXPORT_SYMBOL(nf_log_register);
-EXPORT_SYMBOL(nf_log_unregister);
-EXPORT_SYMBOL(nf_log_packet);
-
-/* This does not belong here, but locally generated errors need it if connection
-   tracking in use: without this, connection may not be in hash table, and hence
-   manufactured ICMP or RST packets will not be associated with it. */
-void (*ip_ct_attach)(struct sk_buff *, struct sk_buff *);
-
-void nf_ct_attach(struct sk_buff *new, struct sk_buff *skb)
-{
-	void (*attach)(struct sk_buff *, struct sk_buff *);
-
-	if (skb->nfct && (attach = ip_ct_attach) != NULL) {
-		mb(); /* Just to be sure: must be read before executing this */
-		attach(new, skb);
-	}
-}
-
-void __init netfilter_init(void)
-{
-	int i, h;
-
-	for (i = 0; i < NPROTO; i++) {
-		for (h = 0; h < NF_MAX_HOOKS; h++)
-			INIT_LIST_HEAD(&nf_hooks[i][h]);
-	}
-}
-
-EXPORT_SYMBOL(ip_ct_attach);
-EXPORT_SYMBOL(nf_ct_attach);
-EXPORT_SYMBOL(nf_getsockopt);
-EXPORT_SYMBOL(nf_hook_slow);
-EXPORT_SYMBOL(nf_hooks);
-EXPORT_SYMBOL(nf_register_hook);
-EXPORT_SYMBOL(nf_register_queue_handler);
-EXPORT_SYMBOL(nf_register_sockopt);
-EXPORT_SYMBOL(nf_reinject);
-EXPORT_SYMBOL(nf_setsockopt);
-EXPORT_SYMBOL(nf_unregister_hook);
-EXPORT_SYMBOL(nf_unregister_queue_handler);
-EXPORT_SYMBOL(nf_unregister_sockopt);
diff --git a/net/core/request_sock.c b/net/core/request_sock.c
index bb55675f068..b8203de5ff0 100644
--- a/net/core/request_sock.c
+++ b/net/core/request_sock.c
@@ -32,7 +32,6 @@
  * Further increasing requires to change hash table size.
  */
 int sysctl_max_syn_backlog = 256;
-EXPORT_SYMBOL(sysctl_max_syn_backlog);
 
 int reqsk_queue_alloc(struct request_sock_queue *queue,
 		      const int nr_table_entries)
@@ -53,6 +52,8 @@ int reqsk_queue_alloc(struct request_sock_queue *queue,
 	get_random_bytes(&lopt->hash_rnd, sizeof(lopt->hash_rnd));
 	rwlock_init(&queue->syn_wait_lock);
 	queue->rskq_accept_head = queue->rskq_accept_head = NULL;
+	queue->rskq_defer_accept = 0;
+	lopt->nr_table_entries = nr_table_entries;
 
 	write_lock_bh(&queue->syn_wait_lock);
 	queue->listen_opt = lopt;
@@ -62,3 +63,28 @@ int reqsk_queue_alloc(struct request_sock_queue *queue,
 }
 
 EXPORT_SYMBOL(reqsk_queue_alloc);
+
+void reqsk_queue_destroy(struct request_sock_queue *queue)
+{
+	/* make all the listen_opt local to us */
+	struct listen_sock *lopt = reqsk_queue_yank_listen_sk(queue);
+
+	if (lopt->qlen != 0) {
+		int i;
+
+		for (i = 0; i < lopt->nr_table_entries; i++) {
+			struct request_sock *req;
+
+			while ((req = lopt->syn_table[i]) != NULL) {
+				lopt->syn_table[i] = req->dl_next;
+				lopt->qlen--;
+				reqsk_free(req);
+			}
+		}
+	}
+
+	BUG_TRAP(lopt->qlen == 0);
+	kfree(lopt);
+}
+
+EXPORT_SYMBOL(reqsk_queue_destroy);
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 4b1bb30e638..9bed7569ce3 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -148,7 +148,7 @@ int rtnetlink_send(struct sk_buff *skb, u32 pid, unsigned group, int echo)
 {
 	int err = 0;
 
-	NETLINK_CB(skb).dst_groups = group;
+	NETLINK_CB(skb).dst_group = group;
 	if (echo)
 		atomic_inc(&skb->users);
 	netlink_broadcast(rtnl, skb, pid, group, GFP_KERNEL);
@@ -458,8 +458,8 @@ void rtmsg_ifinfo(int type, struct net_device *dev, unsigned change)
 		kfree_skb(skb);
 		return;
 	}
-	NETLINK_CB(skb).dst_groups = RTMGRP_LINK;
-	netlink_broadcast(rtnl, skb, 0, RTMGRP_LINK, GFP_KERNEL);
+	NETLINK_CB(skb).dst_group = RTNLGRP_LINK;
+	netlink_broadcast(rtnl, skb, 0, RTNLGRP_LINK, GFP_KERNEL);
 }
 
 static int rtnetlink_done(struct netlink_callback *cb)
@@ -708,7 +708,8 @@ void __init rtnetlink_init(void)
 	if (!rta_buf)
 		panic("rtnetlink_init: cannot allocate rta_buf\n");
 
-	rtnl = netlink_kernel_create(NETLINK_ROUTE, rtnetlink_rcv);
+	rtnl = netlink_kernel_create(NETLINK_ROUTE, RTNLGRP_MAX, rtnetlink_rcv,
+	                             THIS_MODULE);
 	if (rtnl == NULL)
 		panic("rtnetlink_init: cannot initialize rtnetlink\n");
 	netlink_set_nonroot(NETLINK_ROUTE, NL_NONROOT_RECV);
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 7eab867ede5..f80a2878561 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -68,7 +68,10 @@
 #include <asm/uaccess.h>
 #include <asm/system.h>
 
-static kmem_cache_t *skbuff_head_cache;
+static kmem_cache_t *skbuff_head_cache __read_mostly;
+static kmem_cache_t *skbuff_fclone_cache __read_mostly;
+
+struct timeval __read_mostly skb_tv_base;
 
 /*
  *	Keep out-of-line to prevent kernel bloat.
@@ -118,7 +121,7 @@ void skb_under_panic(struct sk_buff *skb, int sz, void *here)
  */
 
 /**
- *	alloc_skb	-	allocate a network buffer
+ *	__alloc_skb	-	allocate a network buffer
  *	@size: size to allocate
  *	@gfp_mask: allocation mask
  *
@@ -129,14 +132,20 @@ void skb_under_panic(struct sk_buff *skb, int sz, void *here)
  *	Buffers may only be allocated from interrupts using a @gfp_mask of
  *	%GFP_ATOMIC.
  */
-struct sk_buff *alloc_skb(unsigned int size, unsigned int __nocast gfp_mask)
+struct sk_buff *__alloc_skb(unsigned int size, unsigned int __nocast gfp_mask,
+			    int fclone)
 {
 	struct sk_buff *skb;
 	u8 *data;
 
 	/* Get the HEAD */
-	skb = kmem_cache_alloc(skbuff_head_cache,
-			       gfp_mask & ~__GFP_DMA);
+	if (fclone)
+		skb = kmem_cache_alloc(skbuff_fclone_cache,
+				       gfp_mask & ~__GFP_DMA);
+	else
+		skb = kmem_cache_alloc(skbuff_head_cache,
+				       gfp_mask & ~__GFP_DMA);
+
 	if (!skb)
 		goto out;
 
@@ -153,7 +162,15 @@ struct sk_buff *alloc_skb(unsigned int size, unsigned int __nocast gfp_mask)
 	skb->data = data;
 	skb->tail = data;
 	skb->end  = data + size;
+	if (fclone) {
+		struct sk_buff *child = skb + 1;
+		atomic_t *fclone_ref = (atomic_t *) (child + 1);
 
+		skb->fclone = SKB_FCLONE_ORIG;
+		atomic_set(fclone_ref, 1);
+
+		child->fclone = SKB_FCLONE_UNAVAILABLE;
+	}
 	atomic_set(&(skb_shinfo(skb)->dataref), 1);
 	skb_shinfo(skb)->nr_frags  = 0;
 	skb_shinfo(skb)->tso_size = 0;
@@ -266,8 +283,34 @@ void skb_release_data(struct sk_buff *skb)
  */
 void kfree_skbmem(struct sk_buff *skb)
 {
+	struct sk_buff *other;
+	atomic_t *fclone_ref;
+
 	skb_release_data(skb);
-	kmem_cache_free(skbuff_head_cache, skb);
+	switch (skb->fclone) {
+	case SKB_FCLONE_UNAVAILABLE:
+		kmem_cache_free(skbuff_head_cache, skb);
+		break;
+
+	case SKB_FCLONE_ORIG:
+		fclone_ref = (atomic_t *) (skb + 2);
+		if (atomic_dec_and_test(fclone_ref))
+			kmem_cache_free(skbuff_fclone_cache, skb);
+		break;
+
+	case SKB_FCLONE_CLONE:
+		fclone_ref = (atomic_t *) (skb + 1);
+		other = skb - 1;
+
+		/* The clone portion is available for
+		 * fast-cloning again.
+		 */
+		skb->fclone = SKB_FCLONE_UNAVAILABLE;
+
+		if (atomic_dec_and_test(fclone_ref))
+			kmem_cache_free(skbuff_fclone_cache, other);
+		break;
+	};
 }
 
 /**
@@ -281,8 +324,6 @@ void kfree_skbmem(struct sk_buff *skb)
 
 void __kfree_skb(struct sk_buff *skb)
 {
-	BUG_ON(skb->list != NULL);
-
 	dst_release(skb->dst);
 #ifdef CONFIG_XFRM
 	secpath_put(skb->sp);
@@ -302,7 +343,6 @@ void __kfree_skb(struct sk_buff *skb)
 	skb->tc_index = 0;
 #ifdef CONFIG_NET_CLS_ACT
 	skb->tc_verd = 0;
-	skb->tc_classid = 0;
 #endif
 #endif
 
@@ -325,19 +365,27 @@ void __kfree_skb(struct sk_buff *skb)
 
 struct sk_buff *skb_clone(struct sk_buff *skb, unsigned int __nocast gfp_mask)
 {
-	struct sk_buff *n = kmem_cache_alloc(skbuff_head_cache, gfp_mask);
-
-	if (!n) 
-		return NULL;
+	struct sk_buff *n;
+
+	n = skb + 1;
+	if (skb->fclone == SKB_FCLONE_ORIG &&
+	    n->fclone == SKB_FCLONE_UNAVAILABLE) {
+		atomic_t *fclone_ref = (atomic_t *) (n + 1);
+		n->fclone = SKB_FCLONE_CLONE;
+		atomic_inc(fclone_ref);
+	} else {
+		n = kmem_cache_alloc(skbuff_head_cache, gfp_mask);
+		if (!n)
+			return NULL;
+		n->fclone = SKB_FCLONE_UNAVAILABLE;
+	}
 
 #define C(x) n->x = skb->x
 
 	n->next = n->prev = NULL;
-	n->list = NULL;
 	n->sk = NULL;
-	C(stamp);
+	C(tstamp);
 	C(dev);
-	C(real_dev);
 	C(h);
 	C(nh);
 	C(mac);
@@ -361,7 +409,6 @@ struct sk_buff *skb_clone(struct sk_buff *skb, unsigned int __nocast gfp_mask)
 	n->destructor = NULL;
 #ifdef CONFIG_NETFILTER
 	C(nfmark);
-	C(nfcache);
 	C(nfct);
 	nf_conntrack_get(skb->nfct);
 	C(nfctinfo);
@@ -370,9 +417,6 @@ struct sk_buff *skb_clone(struct sk_buff *skb, unsigned int __nocast gfp_mask)
 	nf_bridge_get(skb->nf_bridge);
 #endif
 #endif /*CONFIG_NETFILTER*/
-#if defined(CONFIG_HIPPI)
-	C(private);
-#endif
 #ifdef CONFIG_NET_SCHED
 	C(tc_index);
 #ifdef CONFIG_NET_CLS_ACT
@@ -380,7 +424,6 @@ struct sk_buff *skb_clone(struct sk_buff *skb, unsigned int __nocast gfp_mask)
 	n->tc_verd = CLR_TC_OK2MUNGE(n->tc_verd);
 	n->tc_verd = CLR_TC_MUNGED(n->tc_verd);
 	C(input_dev);
-	C(tc_classid);
 #endif
 
 #endif
@@ -404,10 +447,8 @@ static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
 	 */
 	unsigned long offset = new->data - old->data;
 
-	new->list	= NULL;
 	new->sk		= NULL;
 	new->dev	= old->dev;
-	new->real_dev	= old->real_dev;
 	new->priority	= old->priority;
 	new->protocol	= old->protocol;
 	new->dst	= dst_clone(old->dst);
@@ -419,12 +460,12 @@ static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
 	new->mac.raw	= old->mac.raw + offset;
 	memcpy(new->cb, old->cb, sizeof(old->cb));
 	new->local_df	= old->local_df;
+	new->fclone	= SKB_FCLONE_UNAVAILABLE;
 	new->pkt_type	= old->pkt_type;
-	new->stamp	= old->stamp;
+	new->tstamp	= old->tstamp;
 	new->destructor = NULL;
 #ifdef CONFIG_NETFILTER
 	new->nfmark	= old->nfmark;
-	new->nfcache	= old->nfcache;
 	new->nfct	= old->nfct;
 	nf_conntrack_get(old->nfct);
 	new->nfctinfo	= old->nfctinfo;
@@ -1344,50 +1385,43 @@ void skb_queue_tail(struct sk_buff_head *list, struct sk_buff *newsk)
 	__skb_queue_tail(list, newsk);
 	spin_unlock_irqrestore(&list->lock, flags);
 }
+
 /**
  *	skb_unlink	-	remove a buffer from a list
  *	@skb: buffer to remove
+ *	@list: list to use
  *
- *	Place a packet after a given packet in a list. The list locks are taken
- *	and this function is atomic with respect to other list locked calls
+ *	Remove a packet from a list. The list locks are taken and this
+ *	function is atomic with respect to other list locked calls
  *
- *	Works even without knowing the list it is sitting on, which can be
- *	handy at times. It also means that THE LIST MUST EXIST when you
- *	unlink. Thus a list must have its contents unlinked before it is
- *	destroyed.
+ *	You must know what list the SKB is on.
  */
-void skb_unlink(struct sk_buff *skb)
+void skb_unlink(struct sk_buff *skb, struct sk_buff_head *list)
 {
-	struct sk_buff_head *list = skb->list;
-
-	if (list) {
-		unsigned long flags;
+	unsigned long flags;
 
-		spin_lock_irqsave(&list->lock, flags);
-		if (skb->list == list)
-			__skb_unlink(skb, skb->list);
-		spin_unlock_irqrestore(&list->lock, flags);
-	}
+	spin_lock_irqsave(&list->lock, flags);
+	__skb_unlink(skb, list);
+	spin_unlock_irqrestore(&list->lock, flags);
 }
 
-
 /**
  *	skb_append	-	append a buffer
  *	@old: buffer to insert after
  *	@newsk: buffer to insert
+ *	@list: list to use
  *
  *	Place a packet after a given packet in a list. The list locks are taken
  *	and this function is atomic with respect to other list locked calls.
  *	A buffer cannot be placed on two lists at the same time.
  */
-
-void skb_append(struct sk_buff *old, struct sk_buff *newsk)
+void skb_append(struct sk_buff *old, struct sk_buff *newsk, struct sk_buff_head *list)
 {
 	unsigned long flags;
 
-	spin_lock_irqsave(&old->list->lock, flags);
-	__skb_append(old, newsk);
-	spin_unlock_irqrestore(&old->list->lock, flags);
+	spin_lock_irqsave(&list->lock, flags);
+	__skb_append(old, newsk, list);
+	spin_unlock_irqrestore(&list->lock, flags);
 }
 
 
@@ -1395,19 +1429,21 @@ void skb_append(struct sk_buff *old, struct sk_buff *newsk)
  *	skb_insert	-	insert a buffer
  *	@old: buffer to insert before
  *	@newsk: buffer to insert
+ *	@list: list to use
+ *
+ *	Place a packet before a given packet in a list. The list locks are
+ * 	taken and this function is atomic with respect to other list locked
+ *	calls.
  *
- *	Place a packet before a given packet in a list. The list locks are taken
- *	and this function is atomic with respect to other list locked calls
  *	A buffer cannot be placed on two lists at the same time.
  */
-
-void skb_insert(struct sk_buff *old, struct sk_buff *newsk)
+void skb_insert(struct sk_buff *old, struct sk_buff *newsk, struct sk_buff_head *list)
 {
 	unsigned long flags;
 
-	spin_lock_irqsave(&old->list->lock, flags);
-	__skb_insert(newsk, old->prev, old, old->list);
-	spin_unlock_irqrestore(&old->list->lock, flags);
+	spin_lock_irqsave(&list->lock, flags);
+	__skb_insert(newsk, old->prev, old, list);
+	spin_unlock_irqrestore(&list->lock, flags);
 }
 
 #if 0
@@ -1663,12 +1699,23 @@ void __init skb_init(void)
 					      NULL, NULL);
 	if (!skbuff_head_cache)
 		panic("cannot create skbuff cache");
+
+	skbuff_fclone_cache = kmem_cache_create("skbuff_fclone_cache",
+						(2*sizeof(struct sk_buff)) +
+						sizeof(atomic_t),
+						0,
+						SLAB_HWCACHE_ALIGN,
+						NULL, NULL);
+	if (!skbuff_fclone_cache)
+		panic("cannot create skbuff cache");
+
+	do_gettimeofday(&skb_tv_base);
 }
 
 EXPORT_SYMBOL(___pskb_trim);
 EXPORT_SYMBOL(__kfree_skb);
 EXPORT_SYMBOL(__pskb_pull_tail);
-EXPORT_SYMBOL(alloc_skb);
+EXPORT_SYMBOL(__alloc_skb);
 EXPORT_SYMBOL(pskb_copy);
 EXPORT_SYMBOL(pskb_expand_head);
 EXPORT_SYMBOL(skb_checksum);
@@ -1696,3 +1743,4 @@ EXPORT_SYMBOL(skb_prepare_seq_read);
 EXPORT_SYMBOL(skb_seq_read);
 EXPORT_SYMBOL(skb_abort_seq_read);
 EXPORT_SYMBOL(skb_find_text);
+EXPORT_SYMBOL(skb_tv_base);
diff --git a/net/core/sock.c b/net/core/sock.c
index 12f6d9a2a52..ccd10fd6568 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -260,7 +260,7 @@ int sock_setsockopt(struct socket *sock, int level, int optname,
 			   
 			if (val > sysctl_wmem_max)
 				val = sysctl_wmem_max;
-
+set_sndbuf:
 			sk->sk_userlocks |= SOCK_SNDBUF_LOCK;
 			if ((val * 2) < SOCK_MIN_SNDBUF)
 				sk->sk_sndbuf = SOCK_MIN_SNDBUF;
@@ -274,6 +274,13 @@ int sock_setsockopt(struct socket *sock, int level, int optname,
 			sk->sk_write_space(sk);
 			break;
 
+		case SO_SNDBUFFORCE:
+			if (!capable(CAP_NET_ADMIN)) {
+				ret = -EPERM;
+				break;
+			}
+			goto set_sndbuf;
+
 		case SO_RCVBUF:
 			/* Don't error on this BSD doesn't and if you think
 			   about it this is right. Otherwise apps have to
@@ -282,7 +289,7 @@ int sock_setsockopt(struct socket *sock, int level, int optname,
 			  
 			if (val > sysctl_rmem_max)
 				val = sysctl_rmem_max;
-
+set_rcvbuf:
 			sk->sk_userlocks |= SOCK_RCVBUF_LOCK;
 			/* FIXME: is this lower bound the right one? */
 			if ((val * 2) < SOCK_MIN_RCVBUF)
@@ -291,6 +298,13 @@ int sock_setsockopt(struct socket *sock, int level, int optname,
 				sk->sk_rcvbuf = val * 2;
 			break;
 
+		case SO_RCVBUFFORCE:
+			if (!capable(CAP_NET_ADMIN)) {
+				ret = -EPERM;
+				break;
+			}
+			goto set_rcvbuf;
+
 		case SO_KEEPALIVE:
 #ifdef CONFIG_INET
 			if (sk->sk_protocol == IPPROTO_TCP)
@@ -686,6 +700,80 @@ void sk_free(struct sock *sk)
 	module_put(owner);
 }
 
+struct sock *sk_clone(const struct sock *sk, const unsigned int __nocast priority)
+{
+	struct sock *newsk = sk_alloc(sk->sk_family, priority, sk->sk_prot, 0);
+
+	if (newsk != NULL) {
+		struct sk_filter *filter;
+
+		memcpy(newsk, sk, sk->sk_prot->obj_size);
+
+		/* SANITY */
+		sk_node_init(&newsk->sk_node);
+		sock_lock_init(newsk);
+		bh_lock_sock(newsk);
+
+		atomic_set(&newsk->sk_rmem_alloc, 0);
+		atomic_set(&newsk->sk_wmem_alloc, 0);
+		atomic_set(&newsk->sk_omem_alloc, 0);
+		skb_queue_head_init(&newsk->sk_receive_queue);
+		skb_queue_head_init(&newsk->sk_write_queue);
+
+		rwlock_init(&newsk->sk_dst_lock);
+		rwlock_init(&newsk->sk_callback_lock);
+
+		newsk->sk_dst_cache	= NULL;
+		newsk->sk_wmem_queued	= 0;
+		newsk->sk_forward_alloc = 0;
+		newsk->sk_send_head	= NULL;
+		newsk->sk_backlog.head	= newsk->sk_backlog.tail = NULL;
+		newsk->sk_userlocks	= sk->sk_userlocks & ~SOCK_BINDPORT_LOCK;
+
+		sock_reset_flag(newsk, SOCK_DONE);
+		skb_queue_head_init(&newsk->sk_error_queue);
+
+		filter = newsk->sk_filter;
+		if (filter != NULL)
+			sk_filter_charge(newsk, filter);
+
+		if (unlikely(xfrm_sk_clone_policy(newsk))) {
+			/* It is still raw copy of parent, so invalidate
+			 * destructor and make plain sk_free() */
+			newsk->sk_destruct = NULL;
+			sk_free(newsk);
+			newsk = NULL;
+			goto out;
+		}
+
+		newsk->sk_err	   = 0;
+		newsk->sk_priority = 0;
+		atomic_set(&newsk->sk_refcnt, 2);
+
+		/*
+		 * Increment the counter in the same struct proto as the master
+		 * sock (sk_refcnt_debug_inc uses newsk->sk_prot->socks, that
+		 * is the same as sk->sk_prot->socks, as this field was copied
+		 * with memcpy).
+		 *
+		 * This _changes_ the previous behaviour, where
+		 * tcp_create_openreq_child always was incrementing the
+		 * equivalent to tcp_prot->socks (inet_sock_nr), so this have
+		 * to be taken into account in all callers. -acme
+		 */
+		sk_refcnt_debug_inc(newsk);
+		newsk->sk_socket = NULL;
+		newsk->sk_sleep	 = NULL;
+
+		if (newsk->sk_prot->sockets_allocated)
+			atomic_inc(newsk->sk_prot->sockets_allocated);
+	}
+out:
+	return newsk;
+}
+
+EXPORT_SYMBOL_GPL(sk_clone);
+
 void __init sk_init(void)
 {
 	if (num_physpages <= 4096) {
@@ -1353,11 +1441,7 @@ void sk_common_release(struct sock *sk)
 
 	xfrm_sk_free_policy(sk);
 
-#ifdef INET_REFCNT_DEBUG
-	if (atomic_read(&sk->sk_refcnt) != 1)
-		printk(KERN_DEBUG "Destruction of the socket %p delayed, c=%d\n",
-		       sk, atomic_read(&sk->sk_refcnt));
-#endif
+	sk_refcnt_debug_release(sk);
 	sock_put(sk);
 }
 
@@ -1368,7 +1452,8 @@ static LIST_HEAD(proto_list);
 
 int proto_register(struct proto *prot, int alloc_slab)
 {
-	char *request_sock_slab_name;
+	char *request_sock_slab_name = NULL;
+	char *timewait_sock_slab_name;
 	int rc = -ENOBUFS;
 
 	if (alloc_slab) {
@@ -1399,6 +1484,23 @@ int proto_register(struct proto *prot, int alloc_slab)
 				goto out_free_request_sock_slab_name;
 			}
 		}
+
+		if (prot->twsk_obj_size) {
+			static const char mask[] = "tw_sock_%s";
+
+			timewait_sock_slab_name = kmalloc(strlen(prot->name) + sizeof(mask) - 1, GFP_KERNEL);
+
+			if (timewait_sock_slab_name == NULL)
+				goto out_free_request_sock_slab;
+
+			sprintf(timewait_sock_slab_name, mask, prot->name);
+			prot->twsk_slab = kmem_cache_create(timewait_sock_slab_name,
+							    prot->twsk_obj_size,
+							    0, SLAB_HWCACHE_ALIGN,
+							    NULL, NULL);
+			if (prot->twsk_slab == NULL)
+				goto out_free_timewait_sock_slab_name;
+		}
 	}
 
 	write_lock(&proto_list_lock);
@@ -1407,6 +1509,13 @@ int proto_register(struct proto *prot, int alloc_slab)
 	rc = 0;
 out:
 	return rc;
+out_free_timewait_sock_slab_name:
+	kfree(timewait_sock_slab_name);
+out_free_request_sock_slab:
+	if (prot->rsk_prot && prot->rsk_prot->slab) {
+		kmem_cache_destroy(prot->rsk_prot->slab);
+		prot->rsk_prot->slab = NULL;
+	}
 out_free_request_sock_slab_name:
 	kfree(request_sock_slab_name);
 out_free_sock_slab:
@@ -1434,6 +1543,14 @@ void proto_unregister(struct proto *prot)
 		prot->rsk_prot->slab = NULL;
 	}
 
+	if (prot->twsk_slab != NULL) {
+		const char *name = kmem_cache_name(prot->twsk_slab);
+
+		kmem_cache_destroy(prot->twsk_slab);
+		kfree(name);
+		prot->twsk_slab = NULL;
+	}
+
 	list_del(&prot->node);
 	write_unlock(&proto_list_lock);
 }
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
index 8f817ad9f54..2f278c8e474 100644
--- a/net/core/sysctl_net_core.c
+++ b/net/core/sysctl_net_core.c
@@ -9,23 +9,18 @@
 #include <linux/sysctl.h>
 #include <linux/config.h>
 #include <linux/module.h>
+#include <linux/socket.h>
+#include <net/sock.h>
 
 #ifdef CONFIG_SYSCTL
 
 extern int netdev_max_backlog;
-extern int netdev_budget;
 extern int weight_p;
-extern int net_msg_cost;
-extern int net_msg_burst;
 
 extern __u32 sysctl_wmem_max;
 extern __u32 sysctl_rmem_max;
-extern __u32 sysctl_wmem_default;
-extern __u32 sysctl_rmem_default;
 
 extern int sysctl_core_destroy_delay;
-extern int sysctl_optmem_max;
-extern int sysctl_somaxconn;
 
 #ifdef CONFIG_NET_DIVERT
 extern char sysctl_divert_version[];
diff --git a/net/core/utils.c b/net/core/utils.c
index 88eb8b68e26..7b5970fc9e4 100644
--- a/net/core/utils.c
+++ b/net/core/utils.c
@@ -16,7 +16,9 @@
 #include <linux/module.h>
 #include <linux/jiffies.h>
 #include <linux/kernel.h>
+#include <linux/inet.h>
 #include <linux/mm.h>
+#include <linux/net.h>
 #include <linux/string.h>
 #include <linux/types.h>
 #include <linux/random.h>
diff --git a/net/core/wireless.c b/net/core/wireless.c
index 3ff5639c0b7..5caae2399f3 100644
--- a/net/core/wireless.c
+++ b/net/core/wireless.c
@@ -571,10 +571,6 @@ static int wireless_seq_show(struct seq_file *seq, void *v)
 	return 0;
 }
 
-extern void *dev_seq_start(struct seq_file *seq, loff_t *pos);
-extern void *dev_seq_next(struct seq_file *seq, void *v, loff_t *pos);
-extern void dev_seq_stop(struct seq_file *seq, void *v);
-
 static struct seq_operations wireless_seq_ops = {
 	.start = dev_seq_start,
 	.next  = dev_seq_next,
@@ -1144,8 +1140,8 @@ static inline void rtmsg_iwinfo(struct net_device *	dev,
 		kfree_skb(skb);
 		return;
 	}
-	NETLINK_CB(skb).dst_groups = RTMGRP_LINK;
-	netlink_broadcast(rtnl, skb, 0, RTMGRP_LINK, GFP_ATOMIC);
+	NETLINK_CB(skb).dst_group = RTNLGRP_LINK;
+	netlink_broadcast(rtnl, skb, 0, RTNLGRP_LINK, GFP_ATOMIC);
 }
 #endif	/* WE_EVENT_NETLINK */
author	Jeff Garzik <jgarzik@pobox.com>	2005-08-30 13:32:29 -0400
committer	Jeff Garzik <jgarzik@pobox.com>	2005-08-30 13:32:29 -0400
commit	ed735ccbefaf7e5e3ef61418f7e209b8c59308a7 (patch)
tree	b8cc69814d2368b08d0a84c8da0c12028bd04867 /net/core
parent	39fbe47377062200acc26ea0ccef223b4399a82c (diff)
parent	d8971fcb702e24d1e22c77fd1772f182ffee87e3 (diff)