8 files changed, 117 insertions, 63 deletions
diff --git a/include/asm-powerpc/system.h b/include/asm-powerpc/system.h
index 65f5a7b2646..d075725bf44 100644
--- a/include/asm-powerpc/system.h
+++ b/include/asm-powerpc/system.h
@@ -365,8 +365,11 @@ __cmpxchg(volatile void *ptr, unsigned long old, unsigned long new,
  * powers of 2 writes until it reaches sufficient alignment).
  *
  * Based on this we disable the IP header alignment in network drivers.
+ * We also modify NET_SKB_PAD to be a cacheline in size, thus maintaining
+ * cacheline alignment of buffers.
  */
-#define NET_IP_ALIGN   0
+#define NET_IP_ALIGN	0
+#define NET_SKB_PAD	L1_CACHE_BYTES
 #endif
 
 #define arch_align_stack(x) (x)
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 950dc55e519..40ccf8cc423 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -598,20 +598,7 @@ DECLARE_PER_CPU(struct softnet_data,softnet_data);
 
 #define HAVE_NETIF_QUEUE
 
-static inline void __netif_schedule(struct net_device *dev)
-{
-	if (!test_and_set_bit(__LINK_STATE_SCHED, &dev->state)) {
-		unsigned long flags;
-		struct softnet_data *sd;
-
-		local_irq_save(flags);
-		sd = &__get_cpu_var(softnet_data);
-		dev->next_sched = sd->output_queue;
-		sd->output_queue = dev;
-		raise_softirq_irqoff(NET_TX_SOFTIRQ);
-		local_irq_restore(flags);
-	}
-}
+extern void __netif_schedule(struct net_device *dev);
 
 static inline void netif_schedule(struct net_device *dev)
 {
@@ -675,13 +662,7 @@ static inline void dev_kfree_skb_irq(struct sk_buff *skb)
 /* Use this variant in places where it could be invoked
  * either from interrupt or non-interrupt context.
  */
-static inline void dev_kfree_skb_any(struct sk_buff *skb)
-{
-	if (in_irq() || irqs_disabled())
-		dev_kfree_skb_irq(skb);
-	else
-		dev_kfree_skb(skb);
-}
+extern void dev_kfree_skb_any(struct sk_buff *skb);
 
 #define HAVE_NETIF_RX 1
 extern int		netif_rx(struct sk_buff *skb);
@@ -768,22 +749,9 @@ static inline int netif_device_present(struct net_device *dev)
 	return test_bit(__LINK_STATE_PRESENT, &dev->state);
 }
 
-static inline void netif_device_detach(struct net_device *dev)
-{
-	if (test_and_clear_bit(__LINK_STATE_PRESENT, &dev->state) &&
-	    netif_running(dev)) {
-		netif_stop_queue(dev);
-	}
-}
+extern void netif_device_detach(struct net_device *dev);
 
-static inline void netif_device_attach(struct net_device *dev)
-{
-	if (!test_and_set_bit(__LINK_STATE_PRESENT, &dev->state) &&
-	    netif_running(dev)) {
-		netif_wake_queue(dev);
- 		__netdev_watchdog_up(dev);
-	}
-}
+extern void netif_device_attach(struct net_device *dev);
 
 /*
  * Network interface message level settings
@@ -851,20 +819,7 @@ static inline int netif_rx_schedule_prep(struct net_device *dev)
  * already been called and returned 1.
  */
 
-static inline void __netif_rx_schedule(struct net_device *dev)
-{
-	unsigned long flags;
-
-	local_irq_save(flags);
-	dev_hold(dev);
-	list_add_tail(&dev->poll_list, &__get_cpu_var(softnet_data).poll_list);
-	if (dev->quota < 0)
-		dev->quota += dev->weight;
-	else
-		dev->quota = dev->weight;
-	__raise_softirq_irqoff(NET_RX_SOFTIRQ);
-	local_irq_restore(flags);
-}
+extern void __netif_rx_schedule(struct net_device *dev);
 
 /* Try to reschedule poll. Called by irq handler. */
 
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 613b9513f8b..c4619a428d9 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -941,6 +941,25 @@ static inline void skb_reserve(struct sk_buff *skb, int len)
 #define NET_IP_ALIGN	2
 #endif
 
+/*
+ * The networking layer reserves some headroom in skb data (via
+ * dev_alloc_skb). This is used to avoid having to reallocate skb data when
+ * the header has to grow. In the default case, if the header has to grow
+ * 16 bytes or less we avoid the reallocation.
+ *
+ * Unfortunately this headroom changes the DMA alignment of the resulting
+ * network packet. As for NET_IP_ALIGN, this unaligned DMA is expensive
+ * on some architectures. An architecture can override this value,
+ * perhaps setting it to a cacheline in size (since that will maintain
+ * cacheline alignment of the DMA). It must be a power of 2.
+ *
+ * Various parts of the networking layer expect at least 16 bytes of
+ * headroom, you should not reduce this.
+ */
+#ifndef NET_SKB_PAD
+#define NET_SKB_PAD	16
+#endif
+
 extern int ___pskb_trim(struct sk_buff *skb, unsigned int len, int realloc);
 
 static inline void __skb_trim(struct sk_buff *skb, unsigned int len)
@@ -1030,9 +1049,9 @@ static inline void __skb_queue_purge(struct sk_buff_head *list)
 static inline struct sk_buff *__dev_alloc_skb(unsigned int length,
 					      gfp_t gfp_mask)
 {
-	struct sk_buff *skb = alloc_skb(length + 16, gfp_mask);
+	struct sk_buff *skb = alloc_skb(length + NET_SKB_PAD, gfp_mask);
 	if (likely(skb))
-		skb_reserve(skb, 16);
+		skb_reserve(skb, NET_SKB_PAD);
 	return skb;
 }
 #else
@@ -1070,13 +1089,15 @@ static inline struct sk_buff *dev_alloc_skb(unsigned int length)
  */
 static inline int skb_cow(struct sk_buff *skb, unsigned int headroom)
 {
-	int delta = (headroom > 16 ? headroom : 16) - skb_headroom(skb);
+	int delta = (headroom > NET_SKB_PAD ? headroom : NET_SKB_PAD) -
+			skb_headroom(skb);
 
 	if (delta < 0)
 		delta = 0;
 
 	if (delta || skb_cloned(skb))
-		return pskb_expand_head(skb, (delta + 15) & ~15, 0, GFP_ATOMIC);
+		return pskb_expand_head(skb, (delta + (NET_SKB_PAD-1)) &
+				~(NET_SKB_PAD-1), 0, GFP_ATOMIC);
 	return 0;
 }
 
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 9418f4d1afb..3c989db8a7a 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -405,9 +405,6 @@ extern int			tcp_disconnect(struct sock *sk, int flags);
 
 extern void			tcp_unhash(struct sock *sk);
 
-extern int			tcp_v4_hash_connecting(struct sock *sk);
-
-
 /* From syncookies.c */
 extern struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb, 
 				    struct ip_options *opt);
diff --git a/net/core/dev.c b/net/core/dev.c
index a3ab11f3415..434220d093a 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1080,6 +1080,70 @@ void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
 	rcu_read_unlock();
 }
 
+
+void __netif_schedule(struct net_device *dev)
+{
+	if (!test_and_set_bit(__LINK_STATE_SCHED, &dev->state)) {
+		unsigned long flags;
+		struct softnet_data *sd;
+
+		local_irq_save(flags);
+		sd = &__get_cpu_var(softnet_data);
+		dev->next_sched = sd->output_queue;
+		sd->output_queue = dev;
+		raise_softirq_irqoff(NET_TX_SOFTIRQ);
+		local_irq_restore(flags);
+	}
+}
+EXPORT_SYMBOL(__netif_schedule);
+
+void __netif_rx_schedule(struct net_device *dev)
+{
+	unsigned long flags;
+
+	local_irq_save(flags);
+	dev_hold(dev);
+	list_add_tail(&dev->poll_list, &__get_cpu_var(softnet_data).poll_list);
+	if (dev->quota < 0)
+		dev->quota += dev->weight;
+	else
+		dev->quota = dev->weight;
+	__raise_softirq_irqoff(NET_RX_SOFTIRQ);
+	local_irq_restore(flags);
+}
+EXPORT_SYMBOL(__netif_rx_schedule);
+
+void dev_kfree_skb_any(struct sk_buff *skb)
+{
+	if (in_irq() || irqs_disabled())
+		dev_kfree_skb_irq(skb);
+	else
+		dev_kfree_skb(skb);
+}
+EXPORT_SYMBOL(dev_kfree_skb_any);
+
+
+/* Hot-plugging. */
+void netif_device_detach(struct net_device *dev)
+{
+	if (test_and_clear_bit(__LINK_STATE_PRESENT, &dev->state) &&
+	    netif_running(dev)) {
+		netif_stop_queue(dev);
+	}
+}
+EXPORT_SYMBOL(netif_device_detach);
+
+void netif_device_attach(struct net_device *dev)
+{
+	if (!test_and_set_bit(__LINK_STATE_PRESENT, &dev->state) &&
+	    netif_running(dev)) {
+		netif_wake_queue(dev);
+ 		__netdev_watchdog_up(dev);
+	}
+}
+EXPORT_SYMBOL(netif_device_attach);
+
+
 /*
  * Invalidate hardware checksum when packet is to be mangled, and
  * complete checksum manually on outgoing path.
diff --git a/net/core/sock.c b/net/core/sock.c
index a96ea7dd0fc..ed2afdb9ea2 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -385,7 +385,21 @@ set_sndbuf:
 				val = sysctl_rmem_max;
 set_rcvbuf:
 			sk->sk_userlocks |= SOCK_RCVBUF_LOCK;
-			/* FIXME: is this lower bound the right one? */
+			/*
+			 * We double it on the way in to account for
+			 * "struct sk_buff" etc. overhead.   Applications
+			 * assume that the SO_RCVBUF setting they make will
+			 * allow that much actual data to be received on that
+			 * socket.
+			 *
+			 * Applications are unaware that "struct sk_buff" and
+			 * other overheads allocate from the receive buffer
+			 * during socket buffer allocation.
+			 *
+			 * And after considering the possible alternatives,
+			 * returning the value we actually used in getsockopt
+			 * is the most desirable behavior.
+			 */
 			if ((val * 2) < SOCK_MIN_RCVBUF)
 				sk->sk_rcvbuf = SOCK_MIN_RCVBUF;
 			else
diff --git a/net/dccp/feat.c b/net/dccp/feat.c
index e3dd30d36c8..b39e2a59788 100644
--- a/net/dccp/feat.c
+++ b/net/dccp/feat.c
@@ -204,7 +204,7 @@ static int dccp_feat_reconcile(struct sock *sk, struct dccp_opt_pend *opt,
 	if (rc) {
 		kfree(opt->dccpop_sc->dccpoc_val);
 		kfree(opt->dccpop_sc);
-		opt->dccpop_sc = 0;
+		opt->dccpop_sc = NULL;
 		return rc;
 	}
 
@@ -322,7 +322,7 @@ static void dccp_feat_empty_confirm(struct dccp_minisock *dmsk,
 	opt->dccpop_type = type == DCCPO_CHANGE_L ? DCCPO_CONFIRM_R :
 						    DCCPO_CONFIRM_L;
 	opt->dccpop_feat = feature;
-	opt->dccpop_val	 = 0;
+	opt->dccpop_val	 = NULL;
 	opt->dccpop_len	 = 0;
 
 	/* change feature */
@@ -523,7 +523,7 @@ int dccp_feat_clone(struct sock *oldsk, struct sock *newsk)
 		 * once...
 		 */
 		/* the master socket no longer needs to worry about confirms */
-		opt->dccpop_sc = 0; /* it's not a memleak---new socket has it */
+		opt->dccpop_sc = NULL; /* it's not a memleak---new socket has it */
 
 		/* reset state for a new socket */
 		opt->dccpop_conf = 0;
diff --git a/net/decnet/dn_dev.c b/net/decnet/dn_dev.c
index d2ae9893ca1..a26ff9f4457 100644
--- a/net/decnet/dn_dev.c
+++ b/net/decnet/dn_dev.c
@@ -620,7 +620,7 @@ int dn_dev_set_default(struct net_device *dev, int force)
 	}
 	write_unlock(&dndev_lock);
 	if (old)
-		dev_put(dev);
+		dev_put(old);
 	return rv;
 }