From 30cfd0baf0a0c4329fff1ef4b622919297969ec8 Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <shemminger@linux-foundation.org>
Date: Wed, 25 Jul 2007 23:49:34 -0700
Subject: [TCP]: congestion control API pass RTT in microseconds

This patch changes the API for the callback that is done after an ACK is
received. It solves a couple of issues:

  * Some congestion controls want higher resolution value of RTT
    (controlled by TCP_CONG_RTT_SAMPLE flag). These don't really want a ktime, but
    all compute a RTT in microseconds.

  * Other congestion control could use RTT at jiffies resolution.

To keep API consistent the units should be the same for both cases, just the
resolution should change.

Signed-off-by: Stephen Hemminger <shemminger@linux-foundation.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_bic.c      |  2 +-
 net/ipv4/tcp_cubic.c    |  2 +-
 net/ipv4/tcp_htcp.c     |  2 +-
 net/ipv4/tcp_illinois.c |  8 +++-----
 net/ipv4/tcp_input.c    | 21 ++++++++++++++++-----
 net/ipv4/tcp_lp.c       |  6 +++---
 net/ipv4/tcp_vegas.c    |  6 +++---
 net/ipv4/tcp_vegas.h    |  2 +-
 net/ipv4/tcp_veno.c     |  6 +++---
 net/ipv4/tcp_westwood.c |  7 ++++---
 net/ipv4/tcp_yeah.c     |  4 ++--
 11 files changed, 38 insertions(+), 28 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/tcp_bic.c b/net/ipv4/tcp_bic.c
index 519de091a94..4586211e375 100644
--- a/net/ipv4/tcp_bic.c
+++ b/net/ipv4/tcp_bic.c
@@ -206,7 +206,7 @@ static void bictcp_state(struct sock *sk, u8 new_state)
 /* Track delayed acknowledgment ratio using sliding window
  * ratio = (15*ratio + sample) / 16
  */
-static void bictcp_acked(struct sock *sk, u32 cnt, ktime_t last)
+static void bictcp_acked(struct sock *sk, u32 cnt, s32 rtt)
 {
 	const struct inet_connection_sock *icsk = inet_csk(sk);
 
diff --git a/net/ipv4/tcp_cubic.c b/net/ipv4/tcp_cubic.c
index d17da30d82d..0c44bb67a67 100644
--- a/net/ipv4/tcp_cubic.c
+++ b/net/ipv4/tcp_cubic.c
@@ -334,7 +334,7 @@ static void bictcp_state(struct sock *sk, u8 new_state)
 /* Track delayed acknowledgment ratio using sliding window
  * ratio = (15*ratio + sample) / 16
  */
-static void bictcp_acked(struct sock *sk, u32 cnt, ktime_t last)
+static void bictcp_acked(struct sock *sk, u32 cnt, s32 rtt_us)
 {
 	const struct inet_connection_sock *icsk = inet_csk(sk);
 
diff --git a/net/ipv4/tcp_htcp.c b/net/ipv4/tcp_htcp.c
index 08a02e6045c..fa61663ace3 100644
--- a/net/ipv4/tcp_htcp.c
+++ b/net/ipv4/tcp_htcp.c
@@ -98,7 +98,7 @@ static inline void measure_rtt(struct sock *sk)
 	}
 }
 
-static void measure_achieved_throughput(struct sock *sk, u32 pkts_acked, ktime_t last)
+static void measure_achieved_throughput(struct sock *sk, u32 pkts_acked, s32 rtt)
 {
 	const struct inet_connection_sock *icsk = inet_csk(sk);
 	const struct tcp_sock *tp = tcp_sk(sk);
diff --git a/net/ipv4/tcp_illinois.c b/net/ipv4/tcp_illinois.c
index cc5de6f69d4..64f1cbaf96e 100644
--- a/net/ipv4/tcp_illinois.c
+++ b/net/ipv4/tcp_illinois.c
@@ -83,18 +83,16 @@ static void tcp_illinois_init(struct sock *sk)
 }
 
 /* Measure RTT for each ack. */
-static void tcp_illinois_acked(struct sock *sk, u32 pkts_acked, ktime_t last)
+static void tcp_illinois_acked(struct sock *sk, u32 pkts_acked, s32 rtt)
 {
 	struct illinois *ca = inet_csk_ca(sk);
-	u32 rtt;
 
 	ca->acked = pkts_acked;
 
-	if (ktime_equal(last, net_invalid_timestamp()))
+	/* dup ack, no rtt sample */
+	if (rtt < 0)
 		return;
 
-	rtt = ktime_to_us(net_timedelta(last));
-
 	/* ignore bogus values, this prevents wraparound in alpha math */
 	if (rtt > RTT_MAX)
 		rtt = RTT_MAX;
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index fec8a7a4dba..4b255fe999d 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -2490,12 +2490,23 @@ static int tcp_clean_rtx_queue(struct sock *sk, __s32 *seq_rtt_p)
 		tcp_ack_update_rtt(sk, acked, seq_rtt);
 		tcp_ack_packets_out(sk);
 
-		/* Is the ACK triggering packet unambiguous? */
-		if (acked & FLAG_RETRANS_DATA_ACKED)
-			last_ackt = net_invalid_timestamp();
+		if (ca_ops->pkts_acked) {
+			s32 rtt_us = -1;
+
+			/* Is the ACK triggering packet unambiguous? */
+			if (!(acked & FLAG_RETRANS_DATA_ACKED)) {
+				/* High resolution needed and available? */
+				if (ca_ops->flags & TCP_CONG_RTT_STAMP &&
+				    !ktime_equal(last_ackt,
+						 net_invalid_timestamp()))
+					rtt_us = ktime_us_delta(ktime_get_real(),
+								last_ackt);
+				else if (seq_rtt > 0)
+					rtt_us = jiffies_to_usecs(seq_rtt);
+			}
 
-		if (ca_ops->pkts_acked)
-			ca_ops->pkts_acked(sk, pkts_acked, last_ackt);
+			ca_ops->pkts_acked(sk, pkts_acked, rtt_us);
+		}
 	}
 
 #if FASTRETRANS_DEBUG > 0
diff --git a/net/ipv4/tcp_lp.c b/net/ipv4/tcp_lp.c
index 80e140e3ec2..e7f5ef92cbd 100644
--- a/net/ipv4/tcp_lp.c
+++ b/net/ipv4/tcp_lp.c
@@ -260,13 +260,13 @@ static void tcp_lp_rtt_sample(struct sock *sk, u32 rtt)
  * newReno in increase case.
  * We work it out by following the idea from TCP-LP's paper directly
  */
-static void tcp_lp_pkts_acked(struct sock *sk, u32 num_acked, ktime_t last)
+static void tcp_lp_pkts_acked(struct sock *sk, u32 num_acked, s32 rtt_us)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 	struct lp *lp = inet_csk_ca(sk);
 
-	if (!ktime_equal(last, net_invalid_timestamp()))
-		tcp_lp_rtt_sample(sk,  ktime_to_us(net_timedelta(last)));
+	if (rtt_us > 0)
+		tcp_lp_rtt_sample(sk, rtt_us);
 
 	/* calc inference */
 	if (tcp_time_stamp > tp->rx_opt.rcv_tsecr)
diff --git a/net/ipv4/tcp_vegas.c b/net/ipv4/tcp_vegas.c
index 914e0307f7a..b49dedcda52 100644
--- a/net/ipv4/tcp_vegas.c
+++ b/net/ipv4/tcp_vegas.c
@@ -112,16 +112,16 @@ EXPORT_SYMBOL_GPL(tcp_vegas_init);
  *   o min-filter RTT samples from a much longer window (forever for now)
  *     to find the propagation delay (baseRTT)
  */
-void tcp_vegas_pkts_acked(struct sock *sk, u32 cnt, ktime_t last)
+void tcp_vegas_pkts_acked(struct sock *sk, u32 cnt, s32 rtt_us)
 {
 	struct vegas *vegas = inet_csk_ca(sk);
 	u32 vrtt;
 
-	if (ktime_equal(last, net_invalid_timestamp()))
+	if (rtt_us < 0)
 		return;
 
 	/* Never allow zero rtt or baseRTT */
-	vrtt = ktime_to_us(net_timedelta(last)) + 1;
+	vrtt = rtt_us + 1;
 
 	/* Filter to find propagation delay: */
 	if (vrtt < vegas->baseRTT)
diff --git a/net/ipv4/tcp_vegas.h b/net/ipv4/tcp_vegas.h
index 502fa818363..6c0eea2f824 100644
--- a/net/ipv4/tcp_vegas.h
+++ b/net/ipv4/tcp_vegas.h
@@ -17,7 +17,7 @@ struct vegas {
 
 extern void tcp_vegas_init(struct sock *sk);
 extern void tcp_vegas_state(struct sock *sk, u8 ca_state);
-extern void tcp_vegas_pkts_acked(struct sock *sk, u32 cnt, ktime_t last);
+extern void tcp_vegas_pkts_acked(struct sock *sk, u32 cnt, s32 rtt_us);
 extern void tcp_vegas_cwnd_event(struct sock *sk, enum tcp_ca_event event);
 extern void tcp_vegas_get_info(struct sock *sk, u32 ext, struct sk_buff *skb);
 
diff --git a/net/ipv4/tcp_veno.c b/net/ipv4/tcp_veno.c
index 7a55ddf8603..8fb2aee0b1a 100644
--- a/net/ipv4/tcp_veno.c
+++ b/net/ipv4/tcp_veno.c
@@ -69,16 +69,16 @@ static void tcp_veno_init(struct sock *sk)
 }
 
 /* Do rtt sampling needed for Veno. */
-static void tcp_veno_pkts_acked(struct sock *sk, u32 cnt, ktime_t last)
+static void tcp_veno_pkts_acked(struct sock *sk, u32 cnt, s32 rtt_us)
 {
 	struct veno *veno = inet_csk_ca(sk);
 	u32 vrtt;
 
-	if (ktime_equal(last, net_invalid_timestamp()))
+	if (rtt_us < 0)
 		return;
 
 	/* Never allow zero rtt or baseRTT */
-	vrtt = ktime_to_us(net_timedelta(last)) + 1;
+	vrtt = rtt_us + 1;
 
 	/* Filter to find propagation delay: */
 	if (vrtt < veno->basertt)
diff --git a/net/ipv4/tcp_westwood.c b/net/ipv4/tcp_westwood.c
index e61e09dd513..20151d6a624 100644
--- a/net/ipv4/tcp_westwood.c
+++ b/net/ipv4/tcp_westwood.c
@@ -100,11 +100,12 @@ static void westwood_filter(struct westwood *w, u32 delta)
  * Called after processing group of packets.
  * but all westwood needs is the last sample of srtt.
  */
-static void tcp_westwood_pkts_acked(struct sock *sk, u32 cnt, ktime_t last)
+static void tcp_westwood_pkts_acked(struct sock *sk, u32 cnt, s32 rtt)
 {
 	struct westwood *w = inet_csk_ca(sk);
-	if (cnt > 0)
-		w->rtt = tcp_sk(sk)->srtt >> 3;
+
+	if (rtt > 0)
+		w->rtt = usecs_to_jiffies(rtt);
 }
 
 /*
diff --git a/net/ipv4/tcp_yeah.c b/net/ipv4/tcp_yeah.c
index c04b7c6ec70..c107fba7430 100644
--- a/net/ipv4/tcp_yeah.c
+++ b/net/ipv4/tcp_yeah.c
@@ -58,7 +58,7 @@ static void tcp_yeah_init(struct sock *sk)
 }
 
 
-static void tcp_yeah_pkts_acked(struct sock *sk, u32 pkts_acked, ktime_t last)
+static void tcp_yeah_pkts_acked(struct sock *sk, u32 pkts_acked, s32 rtt_us)
 {
 	const struct inet_connection_sock *icsk = inet_csk(sk);
 	struct yeah *yeah = inet_csk_ca(sk);
@@ -66,7 +66,7 @@ static void tcp_yeah_pkts_acked(struct sock *sk, u32 pkts_acked, ktime_t last)
 	if (icsk->icsk_ca_state == TCP_CA_Open)
 		yeah->pkts_acked = pkts_acked;
 
-	tcp_vegas_pkts_acked(sk, pkts_acked, last);
+	tcp_vegas_pkts_acked(sk, pkts_acked, rtt_us);
 }
 
 static void tcp_yeah_cong_avoid(struct sock *sk, u32 ack,
-- 
cgit v1.2.3


From e7d0c88586a66cf03e70750a8119d984fdedf2aa Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <shemminger@linux-foundation.org>
Date: Wed, 25 Jul 2007 23:50:06 -0700
Subject: [TCP]: cubic - eliminate use of receive time stamp

Remove use of received timestamp option value from RTT calculation in Cubic.
A hostile receiver may be returning a larger timestamp option than the original
value. This would cause the sender to believe the malevolent receiver had
a larger RTT and because Cubic tries to provide some RTT friendliness, the
sender would then favor the liar.

Instead, use the jiffie resolutionRTT value already computed and
passed back after ack.

Signed-off-by: Stephen Hemminger <shemminger@linux-foundation.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_cubic.c | 46 ++++++++++++++++++----------------------------
 1 file changed, 18 insertions(+), 28 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/tcp_cubic.c b/net/ipv4/tcp_cubic.c
index 0c44bb67a67..485d7ea35f7 100644
--- a/net/ipv4/tcp_cubic.c
+++ b/net/ipv4/tcp_cubic.c
@@ -246,38 +246,12 @@ static inline void bictcp_update(struct bictcp *ca, u32 cwnd)
 		ca->cnt = 1;
 }
 
-
-/* Keep track of minimum rtt */
-static inline void measure_delay(struct sock *sk)
-{
-	const struct tcp_sock *tp = tcp_sk(sk);
-	struct bictcp *ca = inet_csk_ca(sk);
-	u32 delay;
-
-	/* No time stamp */
-	if (!(tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr) ||
-	     /* Discard delay samples right after fast recovery */
-	    (s32)(tcp_time_stamp - ca->epoch_start) < HZ)
-		return;
-
-	delay = (tcp_time_stamp - tp->rx_opt.rcv_tsecr)<<3;
-	if (delay == 0)
-		delay = 1;
-
-	/* first time call or link delay decreases */
-	if (ca->delay_min == 0 || ca->delay_min > delay)
-		ca->delay_min = delay;
-}
-
 static void bictcp_cong_avoid(struct sock *sk, u32 ack,
 			      u32 in_flight, int data_acked)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 	struct bictcp *ca = inet_csk_ca(sk);
 
-	if (data_acked)
-		measure_delay(sk);
-
 	if (!tcp_is_cwnd_limited(sk, in_flight))
 		return;
 
@@ -337,14 +311,30 @@ static void bictcp_state(struct sock *sk, u8 new_state)
 static void bictcp_acked(struct sock *sk, u32 cnt, s32 rtt_us)
 {
 	const struct inet_connection_sock *icsk = inet_csk(sk);
+	struct bictcp *ca = inet_csk_ca(sk);
+	u32 delay;
 
 	if (cnt > 0 && icsk->icsk_ca_state == TCP_CA_Open) {
-		struct bictcp *ca = inet_csk_ca(sk);
 		cnt -= ca->delayed_ack >> ACK_RATIO_SHIFT;
 		ca->delayed_ack += cnt;
 	}
-}
 
+	/* Some calls are for duplicates without timetamps */
+	if (rtt_us < 0)
+		return;
+
+	/* Discard delay samples right after fast recovery */
+	if ((s32)(tcp_time_stamp - ca->epoch_start) < HZ)
+		return;
+
+	delay = usecs_to_jiffies(rtt_us) << 3;
+	if (delay == 0)
+		delay = 1;
+
+	/* first time call or link delay decreases */
+	if (ca->delay_min == 0 || ca->delay_min > delay)
+		ca->delay_min = delay;
+}
 
 static struct tcp_congestion_ops cubictcp = {
 	.init		= bictcp_init,
-- 
cgit v1.2.3


From 113bbbd8d2da61b50417a1dd06d8e7c19047e54b Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <shemminger@linux-foundation.org>
Date: Wed, 25 Jul 2007 23:50:28 -0700
Subject: [TCP]: htcp - use measured rtt

Change HTCP to use measured RTT rather than smooth RTT.
Srtt is computed using the TCP receive timestamp
options, so it is vulnerable to hostile receivers. To avoid any problems
this might cause use the measured RTT instead.

Signed-off-by: Stephen Hemminger <shemminger@linux-foundation.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_htcp.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/tcp_htcp.c b/net/ipv4/tcp_htcp.c
index fa61663ace3..b66556c0a5b 100644
--- a/net/ipv4/tcp_htcp.c
+++ b/net/ipv4/tcp_htcp.c
@@ -76,12 +76,11 @@ static u32 htcp_cwnd_undo(struct sock *sk)
 	return max(tp->snd_cwnd, (tp->snd_ssthresh << 7) / ca->beta);
 }
 
-static inline void measure_rtt(struct sock *sk)
+static inline void measure_rtt(struct sock *sk, u32 srtt)
 {
 	const struct inet_connection_sock *icsk = inet_csk(sk);
 	const struct tcp_sock *tp = tcp_sk(sk);
 	struct htcp *ca = inet_csk_ca(sk);
-	u32 srtt = tp->srtt >> 3;
 
 	/* keep track of minimum RTT seen so far, minRTT is zero at first */
 	if (ca->minRTT > srtt || !ca->minRTT)
@@ -108,6 +107,9 @@ static void measure_achieved_throughput(struct sock *sk, u32 pkts_acked, s32 rtt
 	if (icsk->icsk_ca_state == TCP_CA_Open)
 		ca->pkts_acked = pkts_acked;
 
+	if (rtt > 0)
+		measure_rtt(sk, usecs_to_jiffies(rtt));
+
 	if (!use_bandwidth_switch)
 		return;
 
@@ -237,8 +239,6 @@ static void htcp_cong_avoid(struct sock *sk, u32 ack,
 	if (tp->snd_cwnd <= tp->snd_ssthresh)
 		tcp_slow_start(tp);
 	else {
-		measure_rtt(sk);
-
 		/* In dangerous area, increase slowly.
 		 * In theory this is tp->snd_cwnd += alpha / tp->snd_cwnd
 		 */
-- 
cgit v1.2.3


From 0ed72ec4afb9fbd584e03763707d3db0f62ee1be Mon Sep 17 00:00:00 2001
From: Randy Dunlap <randy.dunlap@oracle.com>
Date: Thu, 26 Jul 2007 00:03:29 -0700
Subject: [NET]: kernel-doc fixes

Fix kernel-doc omissions in net/:

Warning(linux-2.6.23-rc1//net/core/dev.c:2728): No description found for parameter 'addr'
Warning(linux-2.6.23-rc1//net/core/dev.c:2752): No description found for parameter 'addr'
Warning(linux-2.6.23-rc1//net/core/dev.c:3839): No description found for parameter 'net_dma'
Warning(linux-2.6.23-rc1//net/core/dev.c:3877): No description found for parameter 'state'

Signed-off-by: Randy Dunlap <randy.dunlap@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 16 +++++++++++-----
 1 file changed, 11 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/net/core/dev.c b/net/core/dev.c
index ee4035571c2..8c9518e946f 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2718,9 +2718,11 @@ int __dev_addr_add(struct dev_addr_list **list, int *count,
 /**
  *	dev_unicast_delete	- Release secondary unicast address.
  *	@dev: device
+ *	@addr: address to delete
+ *	@alen: length of @addr
  *
  *	Release reference to a secondary unicast address and remove it
- *	from the device if the reference count drop to zero.
+ *	from the device if the reference count drops to zero.
  *
  * 	The caller must hold the rtnl_mutex.
  */
@@ -2742,6 +2744,8 @@ EXPORT_SYMBOL(dev_unicast_delete);
 /**
  *	dev_unicast_add		- add a secondary unicast address
  *	@dev: device
+ *	@addr: address to delete
+ *	@alen: length of @addr
  *
  *	Add a secondary unicast address to the device or increase
  *	the reference count if it already exists.
@@ -3830,9 +3834,11 @@ static int dev_cpu_callback(struct notifier_block *nfb,
 
 #ifdef CONFIG_NET_DMA
 /**
- * net_dma_rebalance -
- * This is called when the number of channels allocated to the net_dma_client
- * changes.  The net_dma_client tries to have one DMA channel per CPU.
+ * net_dma_rebalance - try to maintain one DMA channel per CPU
+ * @net_dma: DMA client and associated data (lock, channels, channel_mask)
+ *
+ * This is called when the number of channels allocated to the net_dma client
+ * changes.  The net_dma client tries to have one DMA channel per CPU.
  */
 
 static void net_dma_rebalance(struct net_dma *net_dma)
@@ -3869,7 +3875,7 @@ static void net_dma_rebalance(struct net_dma *net_dma)
  * netdev_dma_event - event callback for the net_dma_client
  * @client: should always be net_dma_client
  * @chan: DMA channel for the event
- * @event: event type
+ * @state: DMA state to be handled
  */
 static enum dma_state_client
 netdev_dma_event(struct dma_client *client, struct dma_chan *chan,
-- 
cgit v1.2.3


From 94571065757a4f2619c48ab4e36cafdc635028ce Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Thu, 26 Jul 2007 00:05:07 -0700
Subject: [TIPC]: fix tipc_link_create error handling

if printbuf allocation or tipc_node_attach_link() fails, invalid
references to the link are left in the associated node and bearer
structures.
Fix by allocating printbuf early and moving timer initialization
and the addition of the new link to the b_ptr->links list after
tipc_node_attach_link() succeeded.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/link.c | 28 +++++++++++++++-------------
 1 file changed, 15 insertions(+), 13 deletions(-)

(limited to 'net')

diff --git a/net/tipc/link.c b/net/tipc/link.c
index 5adfdfd49d6..1d674e0848f 100644
--- a/net/tipc/link.c
+++ b/net/tipc/link.c
@@ -423,6 +423,17 @@ struct link *tipc_link_create(struct bearer *b_ptr, const u32 peer,
 		return NULL;
 	}
 
+	if (LINK_LOG_BUF_SIZE) {
+		char *pb = kmalloc(LINK_LOG_BUF_SIZE, GFP_ATOMIC);
+
+		if (!pb) {
+			kfree(l_ptr);
+			warn("Link creation failed, no memory for print buffer\n");
+			return NULL;
+		}
+		tipc_printbuf_init(&l_ptr->print_buf, pb, LINK_LOG_BUF_SIZE);
+	}
+
 	l_ptr->addr = peer;
 	if_name = strchr(b_ptr->publ.name, ':') + 1;
 	sprintf(l_ptr->name, "%u.%u.%u:%s-%u.%u.%u:",
@@ -432,8 +443,6 @@ struct link *tipc_link_create(struct bearer *b_ptr, const u32 peer,
 		tipc_zone(peer), tipc_cluster(peer), tipc_node(peer));
 		/* note: peer i/f is appended to link name by reset/activate */
 	memcpy(&l_ptr->media_addr, media_addr, sizeof(*media_addr));
-	k_init_timer(&l_ptr->timer, (Handler)link_timeout, (unsigned long)l_ptr);
-	list_add_tail(&l_ptr->link_list, &b_ptr->links);
 	l_ptr->checkpoint = 1;
 	l_ptr->b_ptr = b_ptr;
 	link_set_supervision_props(l_ptr, b_ptr->media->tolerance);
@@ -459,21 +468,14 @@ struct link *tipc_link_create(struct bearer *b_ptr, const u32 peer,
 
 	l_ptr->owner = tipc_node_attach_link(l_ptr);
 	if (!l_ptr->owner) {
+		if (LINK_LOG_BUF_SIZE)
+			kfree(l_ptr->print_buf.buf);
 		kfree(l_ptr);
 		return NULL;
 	}
 
-	if (LINK_LOG_BUF_SIZE) {
-		char *pb = kmalloc(LINK_LOG_BUF_SIZE, GFP_ATOMIC);
-
-		if (!pb) {
-			kfree(l_ptr);
-			warn("Link creation failed, no memory for print buffer\n");
-			return NULL;
-		}
-		tipc_printbuf_init(&l_ptr->print_buf, pb, LINK_LOG_BUF_SIZE);
-	}
-
+	k_init_timer(&l_ptr->timer, (Handler)link_timeout, (unsigned long)l_ptr);
+	list_add_tail(&l_ptr->link_list, &b_ptr->links);
 	tipc_k_signal((Handler)tipc_link_start, (unsigned long)l_ptr);
 
 	dbg("tipc_link_create(): tolerance = %u,cont intv = %u, abort_limit = %u\n",
-- 
cgit v1.2.3


From 566cfd8f0e049a0647f94714f913e2a975dc464f Mon Sep 17 00:00:00 2001
From: Simon Arlott <simon@fire.lp0.eu>
Date: Thu, 26 Jul 2007 00:09:55 -0700
Subject: [IPV6]: Don't update ADVMSS on routes where the MTU is not also
 updated

The ADVMSS value was incorrectly updated for ALL routes when the MTU
is updated because it's outside the effect of the if statement's
condition.

Signed-off-by: Simon Arlott <simon@fire.lp0.eu>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/route.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 919de682b33..55ea80fac60 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -1983,9 +1983,10 @@ static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
 	    !dst_metric_locked(&rt->u.dst, RTAX_MTU) &&
 	    (dst_mtu(&rt->u.dst) > arg->mtu ||
 	     (dst_mtu(&rt->u.dst) < arg->mtu &&
-	      dst_mtu(&rt->u.dst) == idev->cnf.mtu6)))
+	      dst_mtu(&rt->u.dst) == idev->cnf.mtu6))) {
 		rt->u.dst.metrics[RTAX_MTU-1] = arg->mtu;
-	rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(arg->mtu);
+		rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(arg->mtu);
+	}
 	return 0;
 }
 
-- 
cgit v1.2.3


From 09c7d8293a2d1317d16ef4ddb9f6dd2553d0694e Mon Sep 17 00:00:00 2001
From: Marcel Holtmann <marcel@holtmann.org>
Date: Thu, 26 Jul 2007 00:12:25 -0700
Subject: [IRDA]: Fix rfcomm use-after-free

Adrian Bunk wrote:
> Commit 8de0a15483b357d0f0b821330ec84d1660cadc4e added the following
> use-after-free in net/bluetooth/rfcomm/tty.c:
>
> <--  snip  -->
>
> ...
> static int rfcomm_dev_add(struct rfcomm_dev_req *req, struct rfcomm_dlc *dlc)
> {
> ...
>         if (IS_ERR(dev->tty_dev)) {
>                 list_del(&dev->list);
>                 kfree(dev);
>                 return PTR_ERR(dev->tty_dev);
>         }
> ...
>
> <--  snip  -->
>
> Spotted by the Coverity checker.

really good catch. I fully overlooked that one. The attached patch
should fix it.

Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bluetooth/rfcomm/tty.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/bluetooth/rfcomm/tty.c b/net/bluetooth/rfcomm/tty.c
index 23ba61a13bd..22a832098d4 100644
--- a/net/bluetooth/rfcomm/tty.c
+++ b/net/bluetooth/rfcomm/tty.c
@@ -267,7 +267,7 @@ static int rfcomm_dev_add(struct rfcomm_dev_req *req, struct rfcomm_dlc *dlc)
 out:
 	write_unlock_bh(&rfcomm_dev_lock);
 
-	if (err) {
+	if (err < 0) {
 		kfree(dev);
 		return err;
 	}
@@ -275,9 +275,10 @@ out:
 	dev->tty_dev = tty_register_device(rfcomm_tty_driver, dev->id, NULL);
 
 	if (IS_ERR(dev->tty_dev)) {
+		err = PTR_ERR(dev->tty_dev);
 		list_del(&dev->list);
 		kfree(dev);
-		return PTR_ERR(dev->tty_dev);
+		return err;
 	}
 
 	return dev->id;
-- 
cgit v1.2.3


From 6dc0c2082b8acf30c9239fbbcc051eebdaf7ecff Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sun, 29 Jul 2007 00:15:18 -0700
Subject: [BLUETOOTH]: Fix endianness bug in l2cap_sock_listen()

We loop through psm values, calling __l2cap_get_sock_by_addr(psm, ...)
until we get NULL; then we set ->psm of our socket to htobs(psm).
IOW, we find unused psm value and put it into our socket.  So far, so
good, but...  __l2cap_get_sock_by_addr() compares its argument with
->psm of sockets.  IOW, the entire thing works correctly only on
little-endian.  On big-endian we'll get "no socket with such psm"
on the first iteration, since we won't find a socket with ->psm == 0x1001.
We will happily conclude that 0x1001 is unused and slap htobs(0x1001)
(i.e. 0x110) into ->psm of our socket.  Of course, the next time around
the same thing will repeat and we'll just get a fsckload of sockets
with the same ->psm assigned.

Fix: pass htobs(psm) to __l2cap_get_sock_by_addr() there.  All other
callers are already passing little-endian values and all places that
store something in ->psm are storing little-endian.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bluetooth/l2cap.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index 670ff95ca64..b82cbdd1fcb 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -748,7 +748,7 @@ static int l2cap_sock_listen(struct socket *sock, int backlog)
 		write_lock_bh(&l2cap_sk_list.lock);
 
 		for (psm = 0x1001; psm < 0x1100; psm += 2)
-			if (!__l2cap_get_sock_by_addr(psm, src)) {
+			if (!__l2cap_get_sock_by_addr(htobs(psm), src)) {
 				l2cap_pi(sk)->psm   = htobs(psm);
 				l2cap_pi(sk)->sport = htobs(psm);
 				err = 0;
-- 
cgit v1.2.3


From 8e036fc3143646dc304356fa50314681d654363f Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sun, 29 Jul 2007 00:16:36 -0700
Subject: [BLUETOOTH] l2cap: endianness annotations

no code changes, just documenting existing types

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bluetooth/l2cap.c | 27 ++++++++++++++-------------
 1 file changed, 14 insertions(+), 13 deletions(-)

(limited to 'net')

diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index b82cbdd1fcb..09126bf0684 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -353,7 +353,7 @@ static inline int l2cap_send_cmd(struct l2cap_conn *conn, u8 ident, u8 code, u16
 }
 
 /* ---- Socket interface ---- */
-static struct sock *__l2cap_get_sock_by_addr(u16 psm, bdaddr_t *src)
+static struct sock *__l2cap_get_sock_by_addr(__le16 psm, bdaddr_t *src)
 {
 	struct sock *sk;
 	struct hlist_node *node;
@@ -368,7 +368,7 @@ found:
 /* Find socket with psm and source bdaddr.
  * Returns closest match.
  */
-static struct sock *__l2cap_get_sock_by_psm(int state, u16 psm, bdaddr_t *src)
+static struct sock *__l2cap_get_sock_by_psm(int state, __le16 psm, bdaddr_t *src)
 {
 	struct sock *sk = NULL, *sk1 = NULL;
 	struct hlist_node *node;
@@ -392,7 +392,7 @@ static struct sock *__l2cap_get_sock_by_psm(int state, u16 psm, bdaddr_t *src)
 
 /* Find socket with given address (psm, src).
  * Returns locked socket */
-static inline struct sock *l2cap_get_sock_by_psm(int state, u16 psm, bdaddr_t *src)
+static inline struct sock *l2cap_get_sock_by_psm(int state, __le16 psm, bdaddr_t *src)
 {
 	struct sock *s;
 	read_lock(&l2cap_sk_list.lock);
@@ -586,7 +586,7 @@ static int l2cap_sock_bind(struct socket *sock, struct sockaddr *addr, int addr_
 		goto done;
 	}
 
-	if (la->l2_psm > 0 && btohs(la->l2_psm) < 0x1001 &&
+	if (la->l2_psm && btohs(la->l2_psm) < 0x1001 &&
 				!capable(CAP_NET_BIND_SERVICE)) {
 		err = -EACCES;
 		goto done;
@@ -873,7 +873,7 @@ static inline int l2cap_do_send(struct sock *sk, struct msghdr *msg, int len)
 	lh->len = cpu_to_le16(len + (hlen - L2CAP_HDR_SIZE));
 
 	if (sk->sk_type == SOCK_DGRAM)
-		put_unaligned(l2cap_pi(sk)->psm, (u16 *) skb_put(skb, 2));
+		put_unaligned(l2cap_pi(sk)->psm, (__le16 *) skb_put(skb, 2));
 
 	if (memcpy_fromiovec(skb_put(skb, count), msg->msg_iov, count)) {
 		err = -EFAULT;
@@ -1256,11 +1256,11 @@ static inline int l2cap_get_conf_opt(void **ptr, int *type, int *olen, unsigned
 		break;
 
 	case 2:
-		*val = __le16_to_cpu(*((u16 *)opt->val));
+		*val = __le16_to_cpu(*((__le16 *)opt->val));
 		break;
 
 	case 4:
-		*val = __le32_to_cpu(*((u32 *)opt->val));
+		*val = __le32_to_cpu(*((__le32 *)opt->val));
 		break;
 
 	default:
@@ -1287,11 +1287,11 @@ static void l2cap_add_conf_opt(void **ptr, u8 type, u8 len, unsigned long val)
 		break;
 
 	case 2:
-		*((u16 *) opt->val) = cpu_to_le16(val);
+		*((__le16 *) opt->val) = cpu_to_le16(val);
 		break;
 
 	case 4:
-		*((u32 *) opt->val) = cpu_to_le32(val);
+		*((__le32 *) opt->val) = cpu_to_le32(val);
 		break;
 
 	default:
@@ -1406,7 +1406,7 @@ static inline int l2cap_connect_req(struct l2cap_conn *conn, struct l2cap_cmd_hd
 	int result = 0, status = 0;
 
 	u16 dcid = 0, scid = __le16_to_cpu(req->scid);
-	u16 psm  = req->psm;
+	__le16 psm  = req->psm;
 
 	BT_DBG("psm 0x%2.2x scid 0x%4.4x", psm, scid);
 
@@ -1863,7 +1863,7 @@ done:
 	return 0;
 }
 
-static inline int l2cap_conless_channel(struct l2cap_conn *conn, u16 psm, struct sk_buff *skb)
+static inline int l2cap_conless_channel(struct l2cap_conn *conn, __le16 psm, struct sk_buff *skb)
 {
 	struct sock *sk;
 
@@ -1893,7 +1893,8 @@ done:
 static void l2cap_recv_frame(struct l2cap_conn *conn, struct sk_buff *skb)
 {
 	struct l2cap_hdr *lh = (void *) skb->data;
-	u16 cid, psm, len;
+	u16 cid, len;
+	__le16 psm;
 
 	skb_pull(skb, L2CAP_HDR_SIZE);
 	cid = __le16_to_cpu(lh->cid);
@@ -1907,7 +1908,7 @@ static void l2cap_recv_frame(struct l2cap_conn *conn, struct sk_buff *skb)
 		break;
 
 	case 0x0002:
-		psm = get_unaligned((u16 *) skb->data);
+		psm = get_unaligned((__le16 *) skb->data);
 		skb_pull(skb, 2);
 		l2cap_conless_channel(conn, psm, skb);
 		break;
-- 
cgit v1.2.3


From 88219a0f65ae14ba744fa424604c965b6f1c1a8d Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sun, 29 Jul 2007 00:17:25 -0700
Subject: [BLUETOOTH]: pass (host-endian) cmd length as explicit argument to
 l2cap_conf_req()

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bluetooth/l2cap.c | 20 +++++++++++---------
 1 file changed, 11 insertions(+), 9 deletions(-)

(limited to 'net')

diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index 09126bf0684..03309d29d30 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -1530,7 +1530,7 @@ static inline int l2cap_connect_rsp(struct l2cap_conn *conn, struct l2cap_cmd_hd
 	return 0;
 }
 
-static inline int l2cap_config_req(struct l2cap_conn *conn, struct l2cap_cmd_hdr *cmd, u8 *data)
+static inline int l2cap_config_req(struct l2cap_conn *conn, struct l2cap_cmd_hdr *cmd, u16 cmd_len, u8 *data)
 {
 	struct l2cap_conf_req *req = (struct l2cap_conf_req *) data;
 	u16 dcid, flags;
@@ -1550,7 +1550,7 @@ static inline int l2cap_config_req(struct l2cap_conn *conn, struct l2cap_cmd_hdr
 		goto unlock;
 
 	/* Reject if config buffer is too small. */
-	len = cmd->len - sizeof(*req);
+	len = cmd_len - sizeof(*req);
 	if (l2cap_pi(sk)->conf_len + len > sizeof(l2cap_pi(sk)->conf_req)) {
 		l2cap_send_cmd(conn, cmd->ident, L2CAP_CONF_RSP,
 				l2cap_build_conf_rsp(sk, rsp,
@@ -1748,15 +1748,17 @@ static inline void l2cap_sig_channel(struct l2cap_conn *conn, struct sk_buff *sk
 	l2cap_raw_recv(conn, skb);
 
 	while (len >= L2CAP_CMD_HDR_SIZE) {
+		u16 cmd_len;
 		memcpy(&cmd, data, L2CAP_CMD_HDR_SIZE);
 		data += L2CAP_CMD_HDR_SIZE;
 		len  -= L2CAP_CMD_HDR_SIZE;
 
-		cmd.len = __le16_to_cpu(cmd.len);
+		cmd_len = le16_to_cpu(cmd.len);
+		cmd.len = cmd_len;
 
-		BT_DBG("code 0x%2.2x len %d id 0x%2.2x", cmd.code, cmd.len, cmd.ident);
+		BT_DBG("code 0x%2.2x len %d id 0x%2.2x", cmd.code, cmd_len, cmd.ident);
 
-		if (cmd.len > len || !cmd.ident) {
+		if (cmd_len > len || !cmd.ident) {
 			BT_DBG("corrupted command");
 			break;
 		}
@@ -1775,7 +1777,7 @@ static inline void l2cap_sig_channel(struct l2cap_conn *conn, struct sk_buff *sk
 			break;
 
 		case L2CAP_CONF_REQ:
-			err = l2cap_config_req(conn, &cmd, data);
+			err = l2cap_config_req(conn, &cmd, cmd_len, data);
 			break;
 
 		case L2CAP_CONF_RSP:
@@ -1791,7 +1793,7 @@ static inline void l2cap_sig_channel(struct l2cap_conn *conn, struct sk_buff *sk
 			break;
 
 		case L2CAP_ECHO_REQ:
-			l2cap_send_cmd(conn, cmd.ident, L2CAP_ECHO_RSP, cmd.len, data);
+			l2cap_send_cmd(conn, cmd.ident, L2CAP_ECHO_RSP, cmd_len, data);
 			break;
 
 		case L2CAP_ECHO_RSP:
@@ -1820,8 +1822,8 @@ static inline void l2cap_sig_channel(struct l2cap_conn *conn, struct sk_buff *sk
 			l2cap_send_cmd(conn, cmd.ident, L2CAP_COMMAND_REJ, sizeof(rej), &rej);
 		}
 
-		data += cmd.len;
-		len  -= cmd.len;
+		data += cmd_len;
+		len  -= cmd_len;
 	}
 
 	kfree_skb(skb);
-- 
cgit v1.2.3


From d83852822cb58f0beaa2d06b7c1e9e005e0c4f94 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sun, 29 Jul 2007 00:18:23 -0700
Subject: [BLUETOOTH] l2cap: don't mangle cmd.len

Since nobody uses it after we convert it to host-endian,
no need to do that at all.  At that point l2cap is endian-clean.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bluetooth/l2cap.c | 1 -
 1 file changed, 1 deletion(-)

(limited to 'net')

diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index 03309d29d30..c4e4ce4ebb2 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -1754,7 +1754,6 @@ static inline void l2cap_sig_channel(struct l2cap_conn *conn, struct sk_buff *sk
 		len  -= L2CAP_CMD_HDR_SIZE;
 
 		cmd_len = le16_to_cpu(cmd.len);
-		cmd.len = cmd_len;
 
 		BT_DBG("code 0x%2.2x len %d id 0x%2.2x", cmd.code, cmd_len, cmd.ident);
 
-- 
cgit v1.2.3


From 25a8b2545bfb2238f6f66da8032d768c6870c863 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@sunset.davemloft.net>
Date: Mon, 30 Jul 2007 16:11:48 -0700
Subject: [PKTGEN]: Add missing KERN_* tags to printk()s.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/pktgen.c | 103 ++++++++++++++++++++++++++++++------------------------
 1 file changed, 57 insertions(+), 46 deletions(-)

(limited to 'net')

diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index bca787fdbc5..be985f068e7 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -567,7 +567,7 @@ static ssize_t pgctrl_write(struct file *file, const char __user * buf,
 		pktgen_run_all_threads();
 
 	else
-		printk("pktgen: Unknown command: %s\n", data);
+		printk(KERN_WARNING "pktgen: Unknown command: %s\n", data);
 
 	err = count;
 
@@ -908,14 +908,14 @@ static ssize_t pktgen_if_write(struct file *file,
 	pg_result = &(pkt_dev->result[0]);
 
 	if (count < 1) {
-		printk("pktgen: wrong command format\n");
+		printk(KERN_WARNING "pktgen: wrong command format\n");
 		return -EINVAL;
 	}
 
 	max = count - i;
 	tmp = count_trail_chars(&user_buffer[i], max);
 	if (tmp < 0) {
-		printk("pktgen: illegal format\n");
+		printk(KERN_WARNING "pktgen: illegal format\n");
 		return tmp;
 	}
 	i += tmp;
@@ -943,7 +943,7 @@ static ssize_t pktgen_if_write(struct file *file,
 		if (copy_from_user(tb, user_buffer, count))
 			return -EFAULT;
 		tb[count] = 0;
-		printk("pktgen: %s,%lu  buffer -:%s:-\n", name,
+		printk(KERN_DEBUG "pktgen: %s,%lu  buffer -:%s:-\n", name,
 		       (unsigned long)count, tb);
 	}
 
@@ -1248,7 +1248,7 @@ static ssize_t pktgen_if_write(struct file *file,
 			pkt_dev->cur_daddr = pkt_dev->daddr_min;
 		}
 		if (debug)
-			printk("pktgen: dst_min set to: %s\n",
+			printk(KERN_DEBUG "pktgen: dst_min set to: %s\n",
 			       pkt_dev->dst_min);
 		i += len;
 		sprintf(pg_result, "OK: dst_min=%s", pkt_dev->dst_min);
@@ -1271,7 +1271,7 @@ static ssize_t pktgen_if_write(struct file *file,
 			pkt_dev->cur_daddr = pkt_dev->daddr_max;
 		}
 		if (debug)
-			printk("pktgen: dst_max set to: %s\n",
+			printk(KERN_DEBUG "pktgen: dst_max set to: %s\n",
 			       pkt_dev->dst_max);
 		i += len;
 		sprintf(pg_result, "OK: dst_max=%s", pkt_dev->dst_max);
@@ -1294,7 +1294,7 @@ static ssize_t pktgen_if_write(struct file *file,
 		ipv6_addr_copy(&pkt_dev->cur_in6_daddr, &pkt_dev->in6_daddr);
 
 		if (debug)
-			printk("pktgen: dst6 set to: %s\n", buf);
+			printk(KERN_DEBUG "pktgen: dst6 set to: %s\n", buf);
 
 		i += len;
 		sprintf(pg_result, "OK: dst6=%s", buf);
@@ -1317,7 +1317,7 @@ static ssize_t pktgen_if_write(struct file *file,
 		ipv6_addr_copy(&pkt_dev->cur_in6_daddr,
 			       &pkt_dev->min_in6_daddr);
 		if (debug)
-			printk("pktgen: dst6_min set to: %s\n", buf);
+			printk(KERN_DEBUG "pktgen: dst6_min set to: %s\n", buf);
 
 		i += len;
 		sprintf(pg_result, "OK: dst6_min=%s", buf);
@@ -1338,7 +1338,7 @@ static ssize_t pktgen_if_write(struct file *file,
 		fmt_ip6(buf, pkt_dev->max_in6_daddr.s6_addr);
 
 		if (debug)
-			printk("pktgen: dst6_max set to: %s\n", buf);
+			printk(KERN_DEBUG "pktgen: dst6_max set to: %s\n", buf);
 
 		i += len;
 		sprintf(pg_result, "OK: dst6_max=%s", buf);
@@ -1361,7 +1361,7 @@ static ssize_t pktgen_if_write(struct file *file,
 		ipv6_addr_copy(&pkt_dev->cur_in6_saddr, &pkt_dev->in6_saddr);
 
 		if (debug)
-			printk("pktgen: src6 set to: %s\n", buf);
+			printk(KERN_DEBUG "pktgen: src6 set to: %s\n", buf);
 
 		i += len;
 		sprintf(pg_result, "OK: src6=%s", buf);
@@ -1382,7 +1382,7 @@ static ssize_t pktgen_if_write(struct file *file,
 			pkt_dev->cur_saddr = pkt_dev->saddr_min;
 		}
 		if (debug)
-			printk("pktgen: src_min set to: %s\n",
+			printk(KERN_DEBUG "pktgen: src_min set to: %s\n",
 			       pkt_dev->src_min);
 		i += len;
 		sprintf(pg_result, "OK: src_min=%s", pkt_dev->src_min);
@@ -1403,7 +1403,7 @@ static ssize_t pktgen_if_write(struct file *file,
 			pkt_dev->cur_saddr = pkt_dev->saddr_max;
 		}
 		if (debug)
-			printk("pktgen: src_max set to: %s\n",
+			printk(KERN_DEBUG "pktgen: src_max set to: %s\n",
 			       pkt_dev->src_max);
 		i += len;
 		sprintf(pg_result, "OK: src_max=%s", pkt_dev->src_max);
@@ -1533,7 +1533,7 @@ static ssize_t pktgen_if_write(struct file *file,
 			pkt_dev->svlan_id = 0xffff;
 
 			if (debug)
-				printk("pktgen: VLAN/SVLAN auto turned off\n");
+				printk(KERN_DEBUG "pktgen: VLAN/SVLAN auto turned off\n");
 		}
 		return count;
 	}
@@ -1548,10 +1548,10 @@ static ssize_t pktgen_if_write(struct file *file,
 			pkt_dev->vlan_id = value;  /* turn on VLAN */
 
 			if (debug)
-				printk("pktgen: VLAN turned on\n");
+				printk(KERN_DEBUG "pktgen: VLAN turned on\n");
 
 			if (debug && pkt_dev->nr_labels)
-				printk("pktgen: MPLS auto turned off\n");
+				printk(KERN_DEBUG "pktgen: MPLS auto turned off\n");
 
 			pkt_dev->nr_labels = 0;    /* turn off MPLS */
 			sprintf(pg_result, "OK: vlan_id=%u", pkt_dev->vlan_id);
@@ -1560,7 +1560,7 @@ static ssize_t pktgen_if_write(struct file *file,
 			pkt_dev->svlan_id = 0xffff;
 
 			if (debug)
-				printk("pktgen: VLAN/SVLAN turned off\n");
+				printk(KERN_DEBUG "pktgen: VLAN/SVLAN turned off\n");
 		}
 		return count;
 	}
@@ -1605,10 +1605,10 @@ static ssize_t pktgen_if_write(struct file *file,
 			pkt_dev->svlan_id = value;  /* turn on SVLAN */
 
 			if (debug)
-				printk("pktgen: SVLAN turned on\n");
+				printk(KERN_DEBUG "pktgen: SVLAN turned on\n");
 
 			if (debug && pkt_dev->nr_labels)
-				printk("pktgen: MPLS auto turned off\n");
+				printk(KERN_DEBUG "pktgen: MPLS auto turned off\n");
 
 			pkt_dev->nr_labels = 0;    /* turn off MPLS */
 			sprintf(pg_result, "OK: svlan_id=%u", pkt_dev->svlan_id);
@@ -1617,7 +1617,7 @@ static ssize_t pktgen_if_write(struct file *file,
 			pkt_dev->svlan_id = 0xffff;
 
 			if (debug)
-				printk("pktgen: VLAN/SVLAN turned off\n");
+				printk(KERN_DEBUG "pktgen: VLAN/SVLAN turned off\n");
 		}
 		return count;
 	}
@@ -1777,10 +1777,11 @@ static ssize_t pktgen_thread_write(struct file *file,
 	i += len;
 
 	if (debug)
-		printk("pktgen: t=%s, count=%lu\n", name, (unsigned long)count);
+		printk(KERN_DEBUG "pktgen: t=%s, count=%lu\n",
+		       name, (unsigned long)count);
 
 	if (!t) {
-		printk("pktgen: ERROR: No thread\n");
+		printk(KERN_ERR "pktgen: ERROR: No thread\n");
 		ret = -EINVAL;
 		goto out;
 	}
@@ -1891,8 +1892,8 @@ static void pktgen_mark_device(const char *ifname)
 		mutex_lock(&pktgen_thread_lock);
 
 		if (++i >= max_tries) {
-			printk("pktgen_mark_device: timed out after waiting "
-			       "%d msec for device %s to be removed\n",
+			printk(KERN_ERR "pktgen_mark_device: timed out after "
+			       "waiting %d msec for device %s to be removed\n",
 			       msec_per_try * i, ifname);
 			break;
 		}
@@ -1962,15 +1963,15 @@ static int pktgen_setup_dev(struct pktgen_dev *pkt_dev, const char *ifname)
 
 	odev = dev_get_by_name(ifname);
 	if (!odev) {
-		printk("pktgen: no such netdevice: \"%s\"\n", ifname);
+		printk(KERN_ERR "pktgen: no such netdevice: \"%s\"\n", ifname);
 		return -ENODEV;
 	}
 
 	if (odev->type != ARPHRD_ETHER) {
-		printk("pktgen: not an ethernet device: \"%s\"\n", ifname);
+		printk(KERN_ERR "pktgen: not an ethernet device: \"%s\"\n", ifname);
 		err = -EINVAL;
 	} else if (!netif_running(odev)) {
-		printk("pktgen: device is down: \"%s\"\n", ifname);
+		printk(KERN_ERR "pktgen: device is down: \"%s\"\n", ifname);
 		err = -ENETDOWN;
 	} else {
 		pkt_dev->odev = odev;
@@ -1987,7 +1988,8 @@ static int pktgen_setup_dev(struct pktgen_dev *pkt_dev, const char *ifname)
 static void pktgen_setup_inject(struct pktgen_dev *pkt_dev)
 {
 	if (!pkt_dev->odev) {
-		printk("pktgen: ERROR: pkt_dev->odev == NULL in setup_inject.\n");
+		printk(KERN_ERR "pktgen: ERROR: pkt_dev->odev == NULL in "
+		       "setup_inject.\n");
 		sprintf(pkt_dev->result,
 			"ERROR: pkt_dev->odev == NULL in setup_inject.\n");
 		return;
@@ -2049,7 +2051,8 @@ static void pktgen_setup_inject(struct pktgen_dev *pkt_dev)
 			}
 			rcu_read_unlock();
 			if (err)
-				printk("pktgen: ERROR: IPv6 link address not availble.\n");
+				printk(KERN_ERR "pktgen: ERROR: IPv6 link "
+				       "address not availble.\n");
 		}
 #endif
 	} else {
@@ -2441,7 +2444,8 @@ static inline int process_ipsec(struct pktgen_dev *pkt_dev,
 			if (nhead >0) {
 				ret = pskb_expand_head(skb, nhead, 0, GFP_ATOMIC);
 				if (ret < 0) {
-					printk("Error expanding ipsec packet %d\n",ret);
+					printk(KERN_ERR "Error expanding "
+					       "ipsec packet %d\n",ret);
 					return 0;
 				}
 			}
@@ -2450,7 +2454,8 @@ static inline int process_ipsec(struct pktgen_dev *pkt_dev,
 			skb_pull(skb, ETH_HLEN);
 			ret = pktgen_output_ipsec(skb, pkt_dev);
 			if (ret) {
-				printk("Error creating ipsec packet %d\n",ret);
+				printk(KERN_ERR "Error creating ipsec "
+				       "packet %d\n",ret);
 				kfree_skb(skb);
 				return 0;
 			}
@@ -3184,8 +3189,8 @@ static int pktgen_stop_device(struct pktgen_dev *pkt_dev)
 	int nr_frags = pkt_dev->skb ? skb_shinfo(pkt_dev->skb)->nr_frags : -1;
 
 	if (!pkt_dev->running) {
-		printk("pktgen: interface: %s is already stopped\n",
-		       pkt_dev->odev->name);
+		printk(KERN_WARNING "pktgen: interface: %s is already "
+		       "stopped\n", pkt_dev->odev->name);
 		return -EINVAL;
 	}
 
@@ -3360,7 +3365,8 @@ static __inline__ void pktgen_xmit(struct pktgen_dev *pkt_dev)
 
 			pkt_dev->skb = fill_packet(odev, pkt_dev);
 			if (pkt_dev->skb == NULL) {
-				printk("pktgen: ERROR: couldn't allocate skb in fill_packet.\n");
+				printk(KERN_ERR "pktgen: ERROR: couldn't "
+				       "allocate skb in fill_packet.\n");
 				schedule();
 				pkt_dev->clone_count--;	/* back out increment, OOM */
 				goto out;
@@ -3565,7 +3571,8 @@ static int add_dev_to_thread(struct pktgen_thread *t,
 	if_lock(t);
 
 	if (pkt_dev->pg_thread) {
-		printk("pktgen: ERROR:  already assigned to a thread.\n");
+		printk(KERN_ERR "pktgen: ERROR: already assigned "
+		       "to a thread.\n");
 		rv = -EBUSY;
 		goto out;
 	}
@@ -3590,7 +3597,7 @@ static int pktgen_add_device(struct pktgen_thread *t, const char *ifname)
 
 	pkt_dev = __pktgen_NN_threads(ifname, FIND);
 	if (pkt_dev) {
-		printk("pktgen: ERROR: interface already used.\n");
+		printk(KERN_ERR "pktgen: ERROR: interface already used.\n");
 		return -EBUSY;
 	}
 
@@ -3632,7 +3639,7 @@ static int pktgen_add_device(struct pktgen_thread *t, const char *ifname)
 
 	pkt_dev->entry = create_proc_entry(ifname, 0600, pg_proc_dir);
 	if (!pkt_dev->entry) {
-		printk("pktgen: cannot create %s/%s procfs entry.\n",
+		printk(KERN_ERR "pktgen: cannot create %s/%s procfs entry.\n",
 		       PG_PROC_DIR, ifname);
 		err = -EINVAL;
 		goto out2;
@@ -3665,7 +3672,8 @@ static int __init pktgen_create_thread(int cpu)
 
 	t = kzalloc(sizeof(struct pktgen_thread), GFP_KERNEL);
 	if (!t) {
-		printk("pktgen: ERROR: out of memory, can't create new thread.\n");
+		printk(KERN_ERR "pktgen: ERROR: out of memory, can't "
+		       "create new thread.\n");
 		return -ENOMEM;
 	}
 
@@ -3678,7 +3686,8 @@ static int __init pktgen_create_thread(int cpu)
 
 	p = kthread_create(pktgen_thread_worker, t, "kpktgend_%d", cpu);
 	if (IS_ERR(p)) {
-		printk("pktgen: kernel_thread() failed for cpu %d\n", t->cpu);
+		printk(KERN_ERR "pktgen: kernel_thread() failed "
+		       "for cpu %d\n", t->cpu);
 		list_del(&t->th_list);
 		kfree(t);
 		return PTR_ERR(p);
@@ -3688,7 +3697,7 @@ static int __init pktgen_create_thread(int cpu)
 
 	pe = create_proc_entry(t->tsk->comm, 0600, pg_proc_dir);
 	if (!pe) {
-		printk("pktgen: cannot create %s/%s procfs entry.\n",
+		printk(KERN_ERR "pktgen: cannot create %s/%s procfs entry.\n",
 		       PG_PROC_DIR, t->tsk->comm);
 		kthread_stop(p);
 		list_del(&t->th_list);
@@ -3727,7 +3736,8 @@ static int pktgen_remove_device(struct pktgen_thread *t,
 	pr_debug("pktgen: remove_device pkt_dev=%p\n", pkt_dev);
 
 	if (pkt_dev->running) {
-		printk("pktgen:WARNING: trying to remove a running interface, stopping it now.\n");
+		printk(KERN_WARNING "pktgen: WARNING: trying to remove a "
+		       "running interface, stopping it now.\n");
 		pktgen_stop_device(pkt_dev);
 	}
 
@@ -3759,7 +3769,7 @@ static int __init pg_init(void)
 	int cpu;
 	struct proc_dir_entry *pe;
 
-	printk(version);
+	printk(KERN_INFO "%s", version);
 
 	pg_proc_dir = proc_mkdir(PG_PROC_DIR, proc_net);
 	if (!pg_proc_dir)
@@ -3768,8 +3778,8 @@ static int __init pg_init(void)
 
 	pe = create_proc_entry(PGCTRL, 0600, pg_proc_dir);
 	if (pe == NULL) {
-		printk("pktgen: ERROR: cannot create %s procfs entry.\n",
-		       PGCTRL);
+		printk(KERN_ERR "pktgen: ERROR: cannot create %s "
+		       "procfs entry.\n", PGCTRL);
 		proc_net_remove(PG_PROC_DIR);
 		return -EINVAL;
 	}
@@ -3785,12 +3795,13 @@ static int __init pg_init(void)
 
 		err = pktgen_create_thread(cpu);
 		if (err)
-			printk("pktgen: WARNING: Cannot create thread for cpu %d (%d)\n",
-					cpu, err);
+			printk(KERN_WARNING "pktgen: WARNING: Cannot create "
+			       "thread for cpu %d (%d)\n", cpu, err);
 	}
 
 	if (list_empty(&pktgen_threads)) {
-		printk("pktgen: ERROR: Initialization failed for all threads\n");
+		printk(KERN_ERR "pktgen: ERROR: Initialization failed for "
+		       "all threads\n");
 		unregister_netdevice_notifier(&pktgen_notifier_block);
 		remove_proc_entry(PGCTRL, pg_proc_dir);
 		proc_net_remove(PG_PROC_DIR);
-- 
cgit v1.2.3


From ccc7911fbd1c5af9b60453d3a8cca0a36402fee5 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Mon, 30 Jul 2007 16:20:12 -0700
Subject: [IPVS]: Use skb_forward_csum

As a path that forwards packets, IPVS should be using
skb_forward_csum instead of directly setting ip_summed
to CHECKSUM_NONE.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/ipvs/ip_vs_xmit.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv4/ipvs/ip_vs_xmit.c b/net/ipv4/ipvs/ip_vs_xmit.c
index 900ce29db38..666e080a74a 100644
--- a/net/ipv4/ipvs/ip_vs_xmit.c
+++ b/net/ipv4/ipvs/ip_vs_xmit.c
@@ -128,7 +128,7 @@ ip_vs_dst_reset(struct ip_vs_dest *dest)
 #define IP_VS_XMIT(skb, rt)				\
 do {							\
 	(skb)->ipvs_property = 1;			\
-	(skb)->ip_summed = CHECKSUM_NONE;		\
+	skb_forward_csum(skb);				\
 	NF_HOOK(PF_INET, NF_IP_LOCAL_OUT, (skb), NULL,	\
 		(rt)->u.dst.dev, dst_output);		\
 } while (0)
-- 
cgit v1.2.3


From 7ce1b0edcb11f90f6fc5e0ceecff467f329889a0 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Mon, 30 Jul 2007 16:29:40 -0700
Subject: [NET]: Call uninit if necessary in register_netdevice

This patch makes register_netdevice call dev->uninit if the regsitration
fails after dev->init has completed successfully.  Very few drivers use
the init/uninit calls but at least one (drivers/net/wan/sealevel.c) may
leak without this change.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/core/dev.c b/net/core/dev.c
index 8c9518e946f..7bfea5e9030 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -3337,7 +3337,7 @@ int register_netdevice(struct net_device *dev)
 
 	if (!dev_valid_name(dev->name)) {
 		ret = -EINVAL;
-		goto out;
+		goto err_uninit;
 	}
 
 	dev->ifindex = dev_new_index();
@@ -3351,7 +3351,7 @@ int register_netdevice(struct net_device *dev)
 			= hlist_entry(p, struct net_device, name_hlist);
 		if (!strncmp(d->name, dev->name, IFNAMSIZ)) {
 			ret = -EEXIST;
-			goto out;
+			goto err_uninit;
 		}
 	}
 
@@ -3411,7 +3411,7 @@ int register_netdevice(struct net_device *dev)
 
 	ret = netdev_register_sysfs(dev);
 	if (ret)
-		goto out;
+		goto err_uninit;
 	dev->reg_state = NETREG_REGISTERED;
 
 	/*
@@ -3436,6 +3436,11 @@ int register_netdevice(struct net_device *dev)
 
 out:
 	return ret;
+
+err_uninit:
+	if (dev->uninit)
+		dev->uninit(dev);
+	goto out;
 }
 
 /**
-- 
cgit v1.2.3


From 7f988eab57bd22884bbc452fb04c6c18738666b3 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Mon, 30 Jul 2007 16:35:46 -0700
Subject: [NET]: Take dev_base_lock when moving device name hash list entry

When we added name-based hashing the dev_base_lock was designated as the
lock to take when changing the name hash list.  Unfortunately, because
it was a preexisting lock that just happened to be taken in the right
spots we neglected to take it in dev_change_name.

The race can affect calles of __dev_get_by_name that do so without taking
the RTNL.  They may end up walking down the wrong hash chain and end up
missing the device that they're looking for.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'net')

diff --git a/net/core/dev.c b/net/core/dev.c
index 7bfea5e9030..346cbf66534 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -839,8 +839,12 @@ int dev_change_name(struct net_device *dev, char *newname)
 		strlcpy(dev->name, newname, IFNAMSIZ);
 
 	device_rename(&dev->dev, dev->name);
+
+	write_lock_bh(&dev_base_lock);
 	hlist_del(&dev->name_hlist);
 	hlist_add_head(&dev->name_hlist, dev_name_hash(dev->name));
+	write_unlock_bh(&dev_base_lock);
+
 	raw_notifier_call_chain(&netdev_chain, NETDEV_CHANGENAME, dev);
 
 	return err;
-- 
cgit v1.2.3


From fcc5a03ac42564e9e255c1134dda47442289e466 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Mon, 30 Jul 2007 17:03:38 -0700
Subject: [NET]: Allow netdev REGISTER/CHANGENAME events to fail

This patch adds code to allow errors to be passed up from event
handlers of NETDEV_REGISTER and NETDEV_CHANGENAME.  It also adds
the notifier_from_errno/notifier_to_errnor helpers to pass the
errno value up to the notifier caller.

If an error is detected when a device is registered, it causes
that operation to fail.  A NETDEV_UNREGISTER will be sent to
all event handlers.

Similarly if NETDEV_CHANGENAME fails the original name is restored
and a new NETDEV_CHANGENAME event is sent.

As such all event handlers must be idempotent with respect to
these events.

When an event handler is registered NETDEV_REGISTER events are
sent for all devices currently registered.  Should any of them
fail, we will send NETDEV_GOING_DOWN/NETDEV_DOWN/NETDEV_UNREGISTER
events to that handler for the devices which have already been
registered with it.  The handler registration itself will fail.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 62 ++++++++++++++++++++++++++++++++++++++++++++++++----------
 1 file changed, 52 insertions(+), 10 deletions(-)

(limited to 'net')

diff --git a/net/core/dev.c b/net/core/dev.c
index 346cbf66534..6cc8a70350a 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -817,7 +817,9 @@ int dev_alloc_name(struct net_device *dev, const char *name)
  */
 int dev_change_name(struct net_device *dev, char *newname)
 {
+	char oldname[IFNAMSIZ];
 	int err = 0;
+	int ret;
 
 	ASSERT_RTNL();
 
@@ -827,6 +829,8 @@ int dev_change_name(struct net_device *dev, char *newname)
 	if (!dev_valid_name(newname))
 		return -EINVAL;
 
+	memcpy(oldname, dev->name, IFNAMSIZ);
+
 	if (strchr(newname, '%')) {
 		err = dev_alloc_name(dev, newname);
 		if (err < 0)
@@ -838,6 +842,7 @@ int dev_change_name(struct net_device *dev, char *newname)
 	else
 		strlcpy(dev->name, newname, IFNAMSIZ);
 
+rollback:
 	device_rename(&dev->dev, dev->name);
 
 	write_lock_bh(&dev_base_lock);
@@ -845,7 +850,20 @@ int dev_change_name(struct net_device *dev, char *newname)
 	hlist_add_head(&dev->name_hlist, dev_name_hash(dev->name));
 	write_unlock_bh(&dev_base_lock);
 
-	raw_notifier_call_chain(&netdev_chain, NETDEV_CHANGENAME, dev);
+	ret = raw_notifier_call_chain(&netdev_chain, NETDEV_CHANGENAME, dev);
+	ret = notifier_to_errno(ret);
+
+	if (ret) {
+		if (err) {
+			printk(KERN_ERR
+			       "%s: name change rollback failed: %d.\n",
+			       dev->name, ret);
+		} else {
+			err = ret;
+			memcpy(dev->name, oldname, IFNAMSIZ);
+			goto rollback;
+		}
+	}
 
 	return err;
 }
@@ -1058,20 +1076,43 @@ int dev_close(struct net_device *dev)
 int register_netdevice_notifier(struct notifier_block *nb)
 {
 	struct net_device *dev;
+	struct net_device *last;
 	int err;
 
 	rtnl_lock();
 	err = raw_notifier_chain_register(&netdev_chain, nb);
-	if (!err) {
-		for_each_netdev(dev) {
-			nb->notifier_call(nb, NETDEV_REGISTER, dev);
+	if (err)
+		goto unlock;
 
-			if (dev->flags & IFF_UP)
-				nb->notifier_call(nb, NETDEV_UP, dev);
-		}
+	for_each_netdev(dev) {
+		err = nb->notifier_call(nb, NETDEV_REGISTER, dev);
+		err = notifier_to_errno(err);
+		if (err)
+			goto rollback;
+
+		if (!(dev->flags & IFF_UP))
+			continue;
+
+		nb->notifier_call(nb, NETDEV_UP, dev);
 	}
+
+unlock:
 	rtnl_unlock();
 	return err;
+
+rollback:
+	last = dev;
+	for_each_netdev(dev) {
+		if (dev == last)
+			break;
+
+		if (dev->flags & IFF_UP) {
+			nb->notifier_call(nb, NETDEV_GOING_DOWN, dev);
+			nb->notifier_call(nb, NETDEV_DOWN, dev);
+		}
+		nb->notifier_call(nb, NETDEV_UNREGISTER, dev);
+	}
+	goto unlock;
 }
 
 /**
@@ -3434,9 +3475,10 @@ int register_netdevice(struct net_device *dev)
 	write_unlock_bh(&dev_base_lock);
 
 	/* Notify protocols, that a new device appeared. */
-	raw_notifier_call_chain(&netdev_chain, NETDEV_REGISTER, dev);
-
-	ret = 0;
+	ret = raw_notifier_call_chain(&netdev_chain, NETDEV_REGISTER, dev);
+	ret = notifier_to_errno(ret);
+	if (ret)
+		unregister_netdevice(dev);
 
 out:
 	return ret;
-- 
cgit v1.2.3


From b217d616a15fcfb3caf2a72c1a071c6d3f182f8d Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Mon, 30 Jul 2007 17:04:52 -0700
Subject: [IPV4/IPV6]: Fail registration if inet device construction fails

Now that netdev notifications can fail, we can use this to signal
errors during registration for IPv4/IPv6.  In particular, if we
fail to allocate memory for the inet device, we can fail the netdev
registration.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/devinet.c  | 5 ++---
 net/ipv6/addrconf.c | 8 +++++---
 2 files changed, 7 insertions(+), 6 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index abf6352f990..5b77bdaa57d 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -1056,10 +1056,9 @@ static int inetdev_event(struct notifier_block *this, unsigned long event,
 	if (!in_dev) {
 		if (event == NETDEV_REGISTER) {
 			in_dev = inetdev_init(dev);
+			if (!in_dev)
+				return notifier_from_errno(-ENOMEM);
 			if (dev == &loopback_dev) {
-				if (!in_dev)
-					panic("devinet: "
-					      "Failed to create loopback\n");
 				IN_DEV_CONF_SET(in_dev, NOXFRM, 1);
 				IN_DEV_CONF_SET(in_dev, NOPOLICY, 1);
 			}
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 06012920912..91ef3be5aba 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -2256,14 +2256,14 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event,
 	struct net_device *dev = (struct net_device *) data;
 	struct inet6_dev *idev = __in6_dev_get(dev);
 	int run_pending = 0;
+	int err;
 
 	switch(event) {
 	case NETDEV_REGISTER:
 		if (!idev && dev->mtu >= IPV6_MIN_MTU) {
 			idev = ipv6_add_dev(dev);
 			if (!idev)
-				printk(KERN_WARNING "IPv6: add_dev failed for %s\n",
-					dev->name);
+				return notifier_from_errno(-ENOMEM);
 		}
 		break;
 	case NETDEV_UP:
@@ -2373,7 +2373,9 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event,
 					      NULL);
 			addrconf_sysctl_register(idev, &idev->cnf);
 #endif
-			snmp6_register_dev(idev);
+			err = snmp6_register_dev(idev);
+			if (err)
+				return notifier_from_errno(err);
 		}
 		break;
 	}
-- 
cgit v1.2.3


From bdba91ec70fb5ccbdeb1c7068319adc6ea9e1a7d Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Mon, 30 Jul 2007 17:07:14 -0700
Subject: [NET_SCHED]: Fix prio/ingress classification logic error

Fix handling of empty or completely non-matching filter chains. In
that case -1 is returned and tcf_result is uninitialized, the
qdisc should fall back to default classification in that case.

Noticed by PJ Waskiewicz <peter.p.waskiewicz.jr@intel.com>.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_ingress.c | 3 +--
 net/sched/sch_prio.c    | 7 +++----
 2 files changed, 4 insertions(+), 6 deletions(-)

(limited to 'net')

diff --git a/net/sched/sch_ingress.c b/net/sched/sch_ingress.c
index 51f16b0af19..2d32fd27496 100644
--- a/net/sched/sch_ingress.c
+++ b/net/sched/sch_ingress.c
@@ -158,9 +158,8 @@ static int ingress_enqueue(struct sk_buff *skb,struct Qdisc *sch)
 			break;
 		case TC_ACT_RECLASSIFY:
 		case TC_ACT_OK:
-		case TC_ACT_UNSPEC:
-		default:
 			skb->tc_index = TC_H_MIN(res.classid);
+		default:
 			result = TC_ACT_OK;
 			break;
 	}
diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c
index 2d8c08493d6..71bafde353a 100644
--- a/net/sched/sch_prio.c
+++ b/net/sched/sch_prio.c
@@ -38,9 +38,11 @@ prio_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
 	struct prio_sched_data *q = qdisc_priv(sch);
 	u32 band = skb->priority;
 	struct tcf_result res;
+	int err;
 
 	*qerr = NET_XMIT_BYPASS;
 	if (TC_H_MAJ(skb->priority) != sch->handle) {
+		err = tc_classify(skb, q->filter_list, &res);
 #ifdef CONFIG_NET_CLS_ACT
 		switch (tc_classify(skb, q->filter_list, &res)) {
 		case TC_ACT_STOLEN:
@@ -49,11 +51,8 @@ prio_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
 		case TC_ACT_SHOT:
 			return NULL;
 		}
-
-		if (!q->filter_list ) {
-#else
-		if (!q->filter_list || tc_classify(skb, q->filter_list, &res)) {
 #endif
+		if (!q->filter_list || err < 0) {
 			if (TC_H_MAJ(band))
 				band = 0;
 			band = q->prio2band[band&TC_PRIO_MAX];
-- 
cgit v1.2.3


From ffc8fefaf289fa485bc5c33e71572e6ce559d569 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <trash@kaber.net>
Date: Mon, 30 Jul 2007 17:11:50 -0700
Subject: [NET]: Fix sch_api to properly set sch->parent on the root.

Fix sch_api to correctly set sch->parent for both ingress and egress
qdiscs in qdisc_create().

Signed-off-by: Patrick McHardy <trash@kaber.net>
Signed-off-by: Peter P Waskiewicz Jr <peter.p.waskiewicz.jr@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_api.c | 17 ++++++++++++-----
 1 file changed, 12 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index 13c09bc32aa..dee0d5fb39c 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -380,6 +380,10 @@ void qdisc_tree_decrease_qlen(struct Qdisc *sch, unsigned int n)
 		return;
 	while ((parentid = sch->parent)) {
 		sch = qdisc_lookup(sch->dev, TC_H_MAJ(parentid));
+		if (sch == NULL) {
+			WARN_ON(parentid != TC_H_ROOT);
+			return;
+		}
 		cops = sch->ops->cl_ops;
 		if (cops->qlen_notify) {
 			cl = cops->get(sch, parentid);
@@ -420,8 +424,6 @@ static int qdisc_graft(struct net_device *dev, struct Qdisc *parent,
 			unsigned long cl = cops->get(parent, classid);
 			if (cl) {
 				err = cops->graft(parent, cl, new, old);
-				if (new)
-					new->parent = classid;
 				cops->put(parent, cl);
 			}
 		}
@@ -436,7 +438,8 @@ static int qdisc_graft(struct net_device *dev, struct Qdisc *parent,
  */
 
 static struct Qdisc *
-qdisc_create(struct net_device *dev, u32 handle, struct rtattr **tca, int *errp)
+qdisc_create(struct net_device *dev, u32 parent, u32 handle,
+	   struct rtattr **tca, int *errp)
 {
 	int err;
 	struct rtattr *kind = tca[TCA_KIND-1];
@@ -482,6 +485,8 @@ qdisc_create(struct net_device *dev, u32 handle, struct rtattr **tca, int *errp)
 		goto err_out2;
 	}
 
+	sch->parent = parent;
+
 	if (handle == TC_H_INGRESS) {
 		sch->flags |= TCQ_F_INGRESS;
 		sch->stats_lock = &dev->ingress_lock;
@@ -758,9 +763,11 @@ create_n_graft:
 	if (!(n->nlmsg_flags&NLM_F_CREATE))
 		return -ENOENT;
 	if (clid == TC_H_INGRESS)
-		q = qdisc_create(dev, tcm->tcm_parent, tca, &err);
+		q = qdisc_create(dev, tcm->tcm_parent, tcm->tcm_parent,
+				 tca, &err);
 	else
-		q = qdisc_create(dev, tcm->tcm_handle, tca, &err);
+		q = qdisc_create(dev, tcm->tcm_parent, tcm->tcm_handle,
+				 tca, &err);
 	if (q == NULL) {
 		if (err == -EAGAIN)
 			goto replay;
-- 
cgit v1.2.3


From 0773192b0f8914222cd27e682c49f978a6c7860a Mon Sep 17 00:00:00 2001
From: Peter P Waskiewicz Jr <peter.p.waskiewicz.jr@intel.com>
Date: Mon, 30 Jul 2007 17:13:45 -0700
Subject: [NET]: Fix prio_tune() handling of root qdisc.

Fix the check in prio_tune() to see if sch->parent is TC_H_ROOT instead of
sch->handle to load or reject the qdisc for multiqueue devices.

Signed-off-by: Peter P Waskiewicz Jr <peter.p.waskiewicz.jr@intel.com>
Acked-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_prio.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c
index 71bafde353a..4a49db65772 100644
--- a/net/sched/sch_prio.c
+++ b/net/sched/sch_prio.c
@@ -238,11 +238,13 @@ static int prio_tune(struct Qdisc *sch, struct rtattr *opt)
 	/* If we're multiqueue, make sure the number of incoming bands
 	 * matches the number of queues on the device we're associating with.
 	 * If the number of bands requested is zero, then set q->bands to
-	 * dev->egress_subqueue_count.
+	 * dev->egress_subqueue_count.  Also, the root qdisc must be the
+	 * only one that is enabled for multiqueue, since it's the only one
+	 * that interacts with the underlying device.
 	 */
 	q->mq = RTA_GET_FLAG(tb[TCA_PRIO_MQ - 1]);
 	if (q->mq) {
-		if (sch->handle != TC_H_ROOT)
+		if (sch->parent != TC_H_ROOT)
 			return -EINVAL;
 		if (netif_is_multiqueue(sch->dev)) {
 			if (q->bands == 0)
-- 
cgit v1.2.3


From c61a7d10efbd187ab9bb54871238ebd1dfcacd44 Mon Sep 17 00:00:00 2001
From: Dave Johnson <djohnson@sw.starentnetworks.com>
Date: Mon, 30 Jul 2007 17:19:31 -0700
Subject: [IPV6]: ipv6_addr_type() doesn't know about RFC4193 addresses.

ipv6_addr_type() doesn't check for 'Unique Local IPv6 Unicast
Addresses' (RFC4193) and returns IPV6_ADDR_RESERVED for that range.

SCTP uses this function and will fail bind() and connect() calls that
use RFC4193 addresses, SCTP will also ignore inbound connections from
RFC4193 addresses if listening on IPV6_ADDR_ANY.

There may be other users of ipv6_addr_type() that could also have
problems.

Signed-off-by: Dave Johnson <djohnson@sw.starentnetworks.com>
Acked-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/addrconf_core.c | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'net')

diff --git a/net/ipv6/addrconf_core.c b/net/ipv6/addrconf_core.c
index faaefb69229..3f82e9542ed 100644
--- a/net/ipv6/addrconf_core.c
+++ b/net/ipv6/addrconf_core.c
@@ -50,6 +50,9 @@ int __ipv6_addr_type(const struct in6_addr *addr)
 	if ((st & htonl(0xFFC00000)) == htonl(0xFEC00000))
 		return (IPV6_ADDR_SITELOCAL | IPV6_ADDR_UNICAST |
 			IPV6_ADDR_SCOPE_TYPE(IPV6_ADDR_SCOPE_SITELOCAL));		/* addr-select 3.1 */
+	if ((st & htonl(0xFE000000)) == htonl(0xFC000000))
+		return (IPV6_ADDR_UNICAST |
+			IPV6_ADDR_SCOPE_TYPE(IPV6_ADDR_SCOPE_GLOBAL));			/* RFC 4193 */
 
 	if ((addr->s6_addr32[0] | addr->s6_addr32[1]) == 0) {
 		if (addr->s6_addr32[2] == 0) {
-- 
cgit v1.2.3


From fea1ab0fcf117bc1e9c9285af6463d31d9cc0ac6 Mon Sep 17 00:00:00 2001
From: Adrian Bunk <bunk@stusta.de>
Date: Mon, 30 Jul 2007 18:04:09 -0700
Subject: [PKTGEN]: make get_ipsec_sa() static and non-inline

Non-static inline code usually doesn't makes sense.

In this case making is static and non-inline is the correct solution.

Signed-off-by: Adrian Bunk <bunk@stusta.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/pktgen.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index be985f068e7..7bae576ac11 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -2159,8 +2159,7 @@ static inline int f_pick(struct pktgen_dev *pkt_dev)
 /* If there was already an IPSEC SA, we keep it as is, else
  * we go look for it ...
 */
-inline
-void get_ipsec_sa(struct pktgen_dev *pkt_dev, int flow)
+static void get_ipsec_sa(struct pktgen_dev *pkt_dev, int flow)
 {
 	struct xfrm_state *x = pkt_dev->flows[flow].x;
 	if (!x) {
-- 
cgit v1.2.3


From 1a3a206f7f2aa50545cc3d056405ad7bc3c9bca8 Mon Sep 17 00:00:00 2001
From: Adrian Bunk <bunk@stusta.de>
Date: Mon, 30 Jul 2007 18:04:57 -0700
Subject: [NETFILTER]: Make nf_ct_ipv6_skip_exthdr() static.

nf_ct_ipv6_skip_exthdr() can now become static.

Signed-off-by: Adrian Bunk <bunk@stusta.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
index 36df2218b66..3153e15e0f7 100644
--- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
+++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
@@ -86,8 +86,8 @@ static int ipv6_print_conntrack(struct seq_file *s,
  *        - Note also special handling of AUTH header. Thanks to IPsec wizards.
  */
 
-int nf_ct_ipv6_skip_exthdr(const struct sk_buff *skb, int start, u8 *nexthdrp,
-			   int len)
+static int nf_ct_ipv6_skip_exthdr(const struct sk_buff *skb, int start,
+				  u8 *nexthdrp, int len)
 {
 	u8 nexthdr = *nexthdrp;
 
-- 
cgit v1.2.3


From 131116989b5c349f9b1ab9ee083d37cc73160ac8 Mon Sep 17 00:00:00 2001
From: Adrian Bunk <bunk@stusta.de>
Date: Mon, 30 Jul 2007 18:05:45 -0700
Subject: [AF_UNIX]: Make code static.

The following code can now become static:
- struct unix_socket_table
- unix_table_lock

Signed-off-by: Adrian Bunk <bunk@stusta.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/unix/af_unix.c | 30 ++++++++++++++++++++++++++++--
 1 file changed, 28 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 65ebccc0a69..a05c34260e7 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -118,14 +118,40 @@
 
 int sysctl_unix_max_dgram_qlen __read_mostly = 10;
 
-struct hlist_head unix_socket_table[UNIX_HASH_SIZE + 1];
-DEFINE_SPINLOCK(unix_table_lock);
+static struct hlist_head unix_socket_table[UNIX_HASH_SIZE + 1];
+static DEFINE_SPINLOCK(unix_table_lock);
 static atomic_t unix_nr_socks = ATOMIC_INIT(0);
 
 #define unix_sockets_unbound	(&unix_socket_table[UNIX_HASH_SIZE])
 
 #define UNIX_ABSTRACT(sk)	(unix_sk(sk)->addr->hash != UNIX_HASH_SIZE)
 
+static struct sock *first_unix_socket(int *i)
+{
+	for (*i = 0; *i <= UNIX_HASH_SIZE; (*i)++) {
+		if (!hlist_empty(&unix_socket_table[*i]))
+			return __sk_head(&unix_socket_table[*i]);
+	}
+	return NULL;
+}
+
+static struct sock *next_unix_socket(int *i, struct sock *s)
+{
+	struct sock *next = sk_next(s);
+	/* More in this chain? */
+	if (next)
+		return next;
+	/* Look for next non-empty chain. */
+	for ((*i)++; *i <= UNIX_HASH_SIZE; (*i)++) {
+		if (!hlist_empty(&unix_socket_table[*i]))
+			return __sk_head(&unix_socket_table[*i]);
+	}
+	return NULL;
+}
+
+#define forall_unix_sockets(i, s) \
+	for (s = first_unix_socket(&(i)); s; s = next_unix_socket(&(i),(s)))
+
 #ifdef CONFIG_SECURITY_NETWORK
 static void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
 {
-- 
cgit v1.2.3


From 1e757f9996114f713a79d3fbcd08739efcfc5c34 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= <ilpo.jarvinen@helsinki.fi>
Date: Mon, 30 Jul 2007 19:48:37 -0700
Subject: [TCP]: Fix ratehalving with bidirectional flows
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Actually, the ratehalving seems to work too well, as cwnd is
reduced on every second ACK even though the packets in flight
remains unchanged. Recoveries in a bidirectional flows suffer
quite badly because of this, both NewReno and SACK are affected.

After this patch, rate halving is performed for ACK only if
packets in flight was supposedly changed too.

Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_input.c | 21 ++++++++++++---------
 1 file changed, 12 insertions(+), 9 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 4b255fe999d..41163ddc312 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -1851,19 +1851,22 @@ static inline u32 tcp_cwnd_min(const struct sock *sk)
 }
 
 /* Decrease cwnd each second ack. */
-static void tcp_cwnd_down(struct sock *sk)
+static void tcp_cwnd_down(struct sock *sk, int flag)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 	int decr = tp->snd_cwnd_cnt + 1;
 
-	tp->snd_cwnd_cnt = decr&1;
-	decr >>= 1;
+	if ((flag&FLAG_FORWARD_PROGRESS) ||
+	    (IsReno(tp) && !(flag&FLAG_NOT_DUP))) {
+		tp->snd_cwnd_cnt = decr&1;
+		decr >>= 1;
 
-	if (decr && tp->snd_cwnd > tcp_cwnd_min(sk))
-		tp->snd_cwnd -= decr;
+		if (decr && tp->snd_cwnd > tcp_cwnd_min(sk))
+			tp->snd_cwnd -= decr;
 
-	tp->snd_cwnd = min(tp->snd_cwnd, tcp_packets_in_flight(tp)+1);
-	tp->snd_cwnd_stamp = tcp_time_stamp;
+		tp->snd_cwnd = min(tp->snd_cwnd, tcp_packets_in_flight(tp)+1);
+		tp->snd_cwnd_stamp = tcp_time_stamp;
+	}
 }
 
 /* Nothing was retransmitted or returned timestamp is less
@@ -2060,7 +2063,7 @@ static void tcp_try_to_open(struct sock *sk, int flag)
 		}
 		tcp_moderate_cwnd(tp);
 	} else {
-		tcp_cwnd_down(sk);
+		tcp_cwnd_down(sk, flag);
 	}
 }
 
@@ -2260,7 +2263,7 @@ tcp_fastretrans_alert(struct sock *sk, u32 prior_snd_una,
 
 	if (is_dupack || tcp_head_timedout(sk))
 		tcp_update_scoreboard(sk);
-	tcp_cwnd_down(sk);
+	tcp_cwnd_down(sk, flag);
 	tcp_xmit_retransmit_queue(sk);
 }
 
-- 
cgit v1.2.3


From b8ed601cefe7a4014b93560bd846caf44f25b1c1 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= <ilpo.jarvinen@helsinki.fi>
Date: Mon, 30 Jul 2007 19:51:12 -0700
Subject: [TCP]: Bidir flow must not disregard SACK blocks for lost marking
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

It's possible that new SACK blocks that should trigger new LOST
markings arrive with new data (which previously made is_dupack
false). In addition, I think this fixes a case where we get
a cumulative ACK with enough SACK blocks to trigger the fast
recovery (is_dupack would be false there too).

I'm not completely pleased with this solution because readability
of the code is somewhat questionable as 'is_dupack' in SACK case
is no longer about dupacks only but would mean something like
'lost_marker_work_todo' too... But because of Eifel stuff done
in CA_Recovery, the FLAG_DATA_SACKED check cannot be placed to
the if statement which seems attractive solution. Nevertheless,
I didn't like adding another variable just for that either... :-)

Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_input.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 41163ddc312..378ca8a086a 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -2112,7 +2112,10 @@ tcp_fastretrans_alert(struct sock *sk, u32 prior_snd_una,
 {
 	struct inet_connection_sock *icsk = inet_csk(sk);
 	struct tcp_sock *tp = tcp_sk(sk);
-	int is_dupack = (tp->snd_una == prior_snd_una && !(flag&FLAG_NOT_DUP));
+	int is_dupack = (tp->snd_una == prior_snd_una &&
+			 (!(flag&FLAG_NOT_DUP) ||
+			  ((flag&FLAG_DATA_SACKED) &&
+			   (tp->fackets_out > tp->reordering))));
 
 	/* Some technical things:
 	 * 1. Reno does not count dupacks (sacked_out) automatically. */
-- 
cgit v1.2.3


From 196b003620f1ee8d0fc63f13f341187d63c1dc0a Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Tue, 31 Jul 2007 02:04:32 -0700
Subject: [IPSEC]: Ensure that state inner family is set

Similar to the issue we had with template families which
specified the inner families of policies, we need to set
the inner families of states as the main xfrm user Openswan
leaves it as zero.

af_key is unaffected because the inner family is set by it
and not the KM.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/xfrm/xfrm_user.c | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'net')

diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index c06883bf620..61339e17a0f 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -322,6 +322,13 @@ static void copy_from_user_state(struct xfrm_state *x, struct xfrm_usersa_info *
 	x->props.family = p->family;
 	memcpy(&x->props.saddr, &p->saddr, sizeof(x->props.saddr));
 	x->props.flags = p->flags;
+
+	/*
+	 * Set inner address family if the KM left it as zero.
+	 * See comment in validate_tmpl.
+	 */
+	if (!x->sel.family)
+		x->sel.family = p->family;
 }
 
 /*
-- 
cgit v1.2.3


From 48b8d78315bf2aef4b6b4fb41c2c94e0b6600234 Mon Sep 17 00:00:00 2001
From: Joakim Koskela <jookos@gmail.com>
Date: Thu, 26 Jul 2007 00:08:42 -0700
Subject: [XFRM]: State selection update to use inner addresses.

This patch modifies the xfrm state selection logic to use the inner
addresses where the outer have been (incorrectly) used. This is
required for beet mode in general and interfamily setups in both
tunnel and beet mode.

Signed-off-by: Joakim Koskela <jookos@gmail.com>
Signed-off-by: Herbert Xu     <herbert@gondor.apana.org.au>
Signed-off-by: Diego Beltrami <diego.beltrami@gmail.com>
Signed-off-by: Miika Komu     <miika@iki.fi>
Acked-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/xfrm/xfrm_policy.c | 3 ++-
 net/xfrm/xfrm_state.c  | 4 ++--
 2 files changed, 4 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index c3a4b0a1868..95a47304336 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -1299,7 +1299,8 @@ xfrm_tmpl_resolve_one(struct xfrm_policy *policy, struct flowi *fl,
 		xfrm_address_t *local  = saddr;
 		struct xfrm_tmpl *tmpl = &policy->xfrm_vec[i];
 
-		if (tmpl->mode == XFRM_MODE_TUNNEL) {
+		if (tmpl->mode == XFRM_MODE_TUNNEL ||
+		    tmpl->mode == XFRM_MODE_BEET) {
 			remote = &tmpl->id.daddr;
 			local = &tmpl->saddr;
 			family = tmpl->encap_family;
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index 38f90ca75b1..31be405efb5 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -611,7 +611,7 @@ xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr,
 			      selector.
 			 */
 			if (x->km.state == XFRM_STATE_VALID) {
-				if (!xfrm_selector_match(&x->sel, fl, family) ||
+				if (!xfrm_selector_match(&x->sel, fl, x->sel.family) ||
 				    !security_xfrm_state_pol_flow_match(x, pol, fl))
 					continue;
 				if (!best ||
@@ -623,7 +623,7 @@ xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr,
 				acquire_in_progress = 1;
 			} else if (x->km.state == XFRM_STATE_ERROR ||
 				   x->km.state == XFRM_STATE_EXPIRED) {
-				if (xfrm_selector_match(&x->sel, fl, family) &&
+				if (xfrm_selector_match(&x->sel, fl, x->sel.family) &&
 				    security_xfrm_state_pol_flow_match(x, pol, fl))
 					error = -ESRCH;
 			}
-- 
cgit v1.2.3


From 61a44b9c4b20d40c41fd1b70a4ceb13b75ea79a4 Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <matthew@wil.cx>
Date: Tue, 31 Jul 2007 14:00:02 -0700
Subject: [NET]: ethtool ops are the only way

During the transition to the ethtool_ops way of doing things, we supported
calling the device's ->do_ioctl method to allow unconverted drivers to
continue working.  Those days are long behind us, all in-tree drivers
use the ethtool_ops way, and so we no longer need to support this.

The bonding driver is the biggest beneficiary of this; it no longer
needs to call ioctl() as a fallback if ethtool_ops aren't supported.

Also put a proper copyright statement on ethtool.c.

Signed-off-by: Matthew Wilcox <matthew@wil.cx>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/8021q/vlan_dev.c |  3 ---
 net/bridge/br_if.c   | 41 +++++++++++++++--------------------------
 net/core/ethtool.c   | 21 +++++++--------------
 3 files changed, 22 insertions(+), 43 deletions(-)

(limited to 'net')

diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index 4d2aa4dd42a..4bab322c9f8 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -668,9 +668,6 @@ int vlan_dev_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
 		if (real_dev->do_ioctl && netif_device_present(real_dev))
 			err = real_dev->do_ioctl(real_dev, &ifrr, cmd);
 		break;
-
-	case SIOCETHTOOL:
-		err = dev_ethtool(&ifrr);
 	}
 
 	if (!err)
diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c
index 7b4ce9113be..b40dada002b 100644
--- a/net/bridge/br_if.c
+++ b/net/bridge/br_if.c
@@ -29,35 +29,24 @@
  * Determine initial path cost based on speed.
  * using recommendations from 802.1d standard
  *
- * Need to simulate user ioctl because not all device's that support
- * ethtool, use ethtool_ops.  Also, since driver might sleep need to
- * not be holding any locks.
+ * Since driver might sleep need to not be holding any locks.
  */
 static int port_cost(struct net_device *dev)
 {
-	struct ethtool_cmd ecmd = { ETHTOOL_GSET };
-	struct ifreq ifr;
-	mm_segment_t old_fs;
-	int err;
-
-	strncpy(ifr.ifr_name, dev->name, IFNAMSIZ);
-	ifr.ifr_data = (void __user *) &ecmd;
-
-	old_fs = get_fs();
-	set_fs(KERNEL_DS);
-	err = dev_ethtool(&ifr);
-	set_fs(old_fs);
-
-	if (!err) {
-		switch(ecmd.speed) {
-		case SPEED_100:
-			return 19;
-		case SPEED_1000:
-			return 4;
-		case SPEED_10000:
-			return 2;
-		case SPEED_10:
-			return 100;
+	if (dev->ethtool_ops->get_settings) {
+		struct ethtool_cmd ecmd = { ETHTOOL_GSET };
+		int err = dev->ethtool_ops->get_settings(dev, &ecmd);
+		if (!err) {
+			switch(ecmd.speed) {
+			case SPEED_100:
+				return 19;
+			case SPEED_1000:
+				return 4;
+			case SPEED_10000:
+				return 2;
+			case SPEED_10:
+				return 100;
+			}
 		}
 	}
 
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index 0b531e98ec3..2bf565e8d0b 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -3,10 +3,12 @@
  * Copyright (c) 2003 Matthew Wilcox <matthew@wil.cx>
  *
  * This file is where we call all the ethtool_ops commands to get
- * the information ethtool needs.  We fall back to calling do_ioctl()
- * for drivers which haven't been converted to ethtool_ops yet.
+ * the information ethtool needs.
  *
- * It's GPL, stupid.
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
  */
 
 #include <linux/module.h>
@@ -821,7 +823,7 @@ int dev_ethtool(struct ifreq *ifr)
 		return -ENODEV;
 
 	if (!dev->ethtool_ops)
-		goto ioctl;
+		return -EOPNOTSUPP;
 
 	if (copy_from_user(&ethcmd, useraddr, sizeof (ethcmd)))
 		return -EFAULT;
@@ -960,7 +962,7 @@ int dev_ethtool(struct ifreq *ifr)
 		rc = ethtool_set_gso(dev, useraddr);
 		break;
 	default:
-		rc =  -EOPNOTSUPP;
+		rc = -EOPNOTSUPP;
 	}
 
 	if (dev->ethtool_ops->complete)
@@ -970,15 +972,6 @@ int dev_ethtool(struct ifreq *ifr)
 		netdev_features_change(dev);
 
 	return rc;
-
- ioctl:
-	/* Keep existing behaviour for the moment.	 */
-	if (!capable(CAP_NET_ADMIN))
-		return -EPERM;
-
-	if (dev->do_ioctl)
-		return dev->do_ioctl(dev, ifr, SIOCETHTOOL);
-	return -EOPNOTSUPP;
 }
 
 EXPORT_SYMBOL(dev_ethtool);
-- 
cgit v1.2.3


From 313674afa8fdced2fe79f50f38e1c387b63d8790 Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <matthew@wil.cx>
Date: Tue, 31 Jul 2007 14:00:29 -0700
Subject: [NET]: ethtool_perm_addr only has one implementation

All drivers implement ethtool get_perm_addr the same way -- by calling
the generic function.  So we can inline the generic function into the
caller and avoid going through the drivers.

Signed-off-by: Matthew Wilcox <matthew@wil.cx>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/ethtool.c | 43 ++++++++-----------------------------------
 1 file changed, 8 insertions(+), 35 deletions(-)

(limited to 'net')

diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index 2bf565e8d0b..2ab0a60046a 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -95,18 +95,6 @@ int ethtool_op_set_tso(struct net_device *dev, u32 data)
 	return 0;
 }
 
-int ethtool_op_get_perm_addr(struct net_device *dev, struct ethtool_perm_addr *addr, u8 *data)
-{
-	unsigned char len = dev->addr_len;
-	if ( addr->size < len )
-		return -ETOOSMALL;
-
-	addr->size = len;
-	memcpy(data, dev->perm_addr, len);
-	return 0;
-}
-
-
 u32 ethtool_op_get_ufo(struct net_device *dev)
 {
 	return (dev->features & NETIF_F_UFO) != 0;
@@ -779,34 +767,20 @@ static int ethtool_get_stats(struct net_device *dev, void __user *useraddr)
 static int ethtool_get_perm_addr(struct net_device *dev, void __user *useraddr)
 {
 	struct ethtool_perm_addr epaddr;
-	u8 *data;
-	int ret;
-
-	if (!dev->ethtool_ops->get_perm_addr)
-		return -EOPNOTSUPP;
 
-	if (copy_from_user(&epaddr,useraddr,sizeof(epaddr)))
+	if (copy_from_user(&epaddr, useraddr, sizeof(epaddr)))
 		return -EFAULT;
 
-	data = kmalloc(epaddr.size, GFP_USER);
-	if (!data)
-		return -ENOMEM;
-
-	ret = dev->ethtool_ops->get_perm_addr(dev,&epaddr,data);
-	if (ret)
-		return ret;
+	if (epaddr.size < dev->addr_len)
+		return -ETOOSMALL;
+	epaddr.size = dev->addr_len;
 
-	ret = -EFAULT;
 	if (copy_to_user(useraddr, &epaddr, sizeof(epaddr)))
-		goto out;
+		return -EFAULT;
 	useraddr += sizeof(epaddr);
-	if (copy_to_user(useraddr, data, epaddr.size))
-		goto out;
-	ret = 0;
-
- out:
-	kfree(data);
-	return ret;
+	if (copy_to_user(useraddr, dev->perm_addr, epaddr.size))
+		return -EFAULT;
+	return 0;
 }
 
 /* The main entry point in this file.  Called from net/core/dev.c */
@@ -976,7 +950,6 @@ int dev_ethtool(struct ifreq *ifr)
 
 EXPORT_SYMBOL(dev_ethtool);
 EXPORT_SYMBOL(ethtool_op_get_link);
-EXPORT_SYMBOL_GPL(ethtool_op_get_perm_addr);
 EXPORT_SYMBOL(ethtool_op_get_sg);
 EXPORT_SYMBOL(ethtool_op_get_tso);
 EXPORT_SYMBOL(ethtool_op_get_tx_csum);
-- 
cgit v1.2.3


From f87966541ef15d28572c8a3caaec359801625228 Mon Sep 17 00:00:00 2001
From: Mariusz Kozlowski <m.kozlowski@tuxland.pl>
Date: Tue, 31 Jul 2007 14:05:56 -0700
Subject: [DECNET]: kmalloc + memset conversion to kzalloc

Signed-off-by: Mariusz Kozlowski <m.kozlowski@tuxland.pl>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/decnet/dn_route.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c
index f2a61ef2af9..a4a620971ef 100644
--- a/net/decnet/dn_route.c
+++ b/net/decnet/dn_route.c
@@ -1737,8 +1737,9 @@ static int dn_rt_cache_seq_open(struct inode *inode, struct file *file)
 {
 	struct seq_file *seq;
 	int rc = -ENOMEM;
-	struct dn_rt_cache_iter_state *s = kmalloc(sizeof(*s), GFP_KERNEL);
+	struct dn_rt_cache_iter_state *s;
 
+	s = kzalloc(sizeof(*s), GFP_KERNEL);
 	if (!s)
 		goto out;
 	rc = seq_open(file, &dn_rt_cache_seq_ops);
@@ -1746,7 +1747,6 @@ static int dn_rt_cache_seq_open(struct inode *inode, struct file *file)
 		goto out_kfree;
 	seq		= file->private_data;
 	seq->private	= s;
-	memset(s, 0, sizeof(*s));
 out:
 	return rc;
 out_kfree:
-- 
cgit v1.2.3


From 376407039c26caacc3e433437d25516ba8f3adc9 Mon Sep 17 00:00:00 2001
From: Mariusz Kozlowski <m.kozlowski@tuxland.pl>
Date: Tue, 31 Jul 2007 14:06:45 -0700
Subject: [IPV4] ip_options.c: kmalloc + memset conversion to kzalloc

Signed-off-by: Mariusz Kozlowski <m.kozlowski@tuxland.pl>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/ip_options.c | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/ip_options.c b/net/ipv4/ip_options.c
index 251346828cb..2f14745a9e1 100644
--- a/net/ipv4/ip_options.c
+++ b/net/ipv4/ip_options.c
@@ -513,11 +513,8 @@ void ip_options_undo(struct ip_options * opt)
 
 static struct ip_options *ip_options_get_alloc(const int optlen)
 {
-	struct ip_options *opt = kmalloc(sizeof(*opt) + ((optlen + 3) & ~3),
-					 GFP_KERNEL);
-	if (opt)
-		memset(opt, 0, sizeof(*opt));
-	return opt;
+	return kzalloc(sizeof(struct ip_options) + ((optlen + 3) & ~3),
+		       GFP_KERNEL);
 }
 
 static int ip_options_get_finish(struct ip_options **optp,
-- 
cgit v1.2.3


From 8072f085d79a0a73cc5a0333ffa7f0c5d35f76e0 Mon Sep 17 00:00:00 2001
From: Thomas Graf <tgraf@suug.ch>
Date: Tue, 31 Jul 2007 14:13:50 -0700
Subject: [RTNETLINK]: Fix warning for !CONFIG_KMOD

replay label is unused otherwise.

Signed-off-by: Thomas Graf <tgraf@suug.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/rtnetlink.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'net')

diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 06eccca8cb5..4756d5857ab 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -952,7 +952,9 @@ static int rtnl_newlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
 	struct nlattr *linkinfo[IFLA_INFO_MAX+1];
 	int err;
 
+#ifdef CONFIG_KMOD
 replay:
+#endif
 	err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFLA_MAX, ifla_policy);
 	if (err < 0)
 		return err;
-- 
cgit v1.2.3