Merge branch 'dccp' of git://eden-feed.erg.abdn.ac.uk/dccp_exp

Conflicts: net/dccp/input.c net/dccp/options.c
author: David S. Miller <davem@davemloft.net> 2008-09-08 17:28:59 -0700
committer: David S. Miller <davem@davemloft.net> 2008-09-08 17:28:59 -0700
commit: 0a68a20cc3eafa73bb54097c28b921147d7d3685 (patch)
tree: 8e5f315226b618cb8e050a0c7653c8ec134501e3 /net/dccp/ccids/lib
parent: 17dce5dfe38ae2fb359b61e855f5d8a3a8b7892b (diff)
parent: a3cbdde8e9c38b66b4f13ac5d6ff1939ded0ff20 (diff)
6 files changed, 271 insertions, 168 deletions
diff --git a/net/dccp/ccids/lib/loss_interval.c b/net/dccp/ccids/lib/loss_interval.c
index 5b3ce0688c5..b1ae8f8259e 100644
--- a/net/dccp/ccids/lib/loss_interval.c
+++ b/net/dccp/ccids/lib/loss_interval.c
@@ -86,21 +86,26 @@ static void tfrc_lh_calc_i_mean(struct tfrc_loss_hist *lh)
 
 /**
  * tfrc_lh_update_i_mean  -  Update the `open' loss interval I_0
- * For recomputing p: returns `true' if p > p_prev  <=>  1/p < 1/p_prev
+ * This updates I_mean as the sequence numbers increase. As a consequence, the
+ * open loss interval I_0 increases, hence p = W_tot/max(I_tot0, I_tot1)
+ * decreases, and thus there is no need to send renewed feedback.
  */
-u8 tfrc_lh_update_i_mean(struct tfrc_loss_hist *lh, struct sk_buff *skb)
+void tfrc_lh_update_i_mean(struct tfrc_loss_hist *lh, struct sk_buff *skb)
 {
 	struct tfrc_loss_interval *cur = tfrc_lh_peek(lh);
-	u32 old_i_mean = lh->i_mean;
 	s64 len;
 
 	if (cur == NULL)			/* not initialised */
-		return 0;
+		return;
+
+	/* FIXME: should probably also count non-data packets (RFC 4342, 6.1) */
+	if (!dccp_data_packet(skb))
+		return;
 
 	len = dccp_delta_seqno(cur->li_seqno, DCCP_SKB_CB(skb)->dccpd_seq) + 1;
 
 	if (len - (s64)cur->li_length <= 0)	/* duplicate or reordered */
-		return 0;
+		return;
 
 	if (SUB16(dccp_hdr(skb)->dccph_ccval, cur->li_ccval) > 4)
 		/*
@@ -114,14 +119,11 @@ u8 tfrc_lh_update_i_mean(struct tfrc_loss_hist *lh, struct sk_buff *skb)
 		cur->li_is_closed = 1;
 
 	if (tfrc_lh_length(lh) == 1)		/* due to RFC 3448, 6.3.1 */
-		return 0;
+		return;
 
 	cur->li_length = len;
 	tfrc_lh_calc_i_mean(lh);
-
-	return (lh->i_mean < old_i_mean);
 }
-EXPORT_SYMBOL_GPL(tfrc_lh_update_i_mean);
 
 /* Determine if `new_loss' does begin a new loss interval [RFC 4342, 10.2] */
 static inline u8 tfrc_lh_is_new_loss(struct tfrc_loss_interval *cur,
@@ -138,18 +140,18 @@ static inline u8 tfrc_lh_is_new_loss(struct tfrc_loss_interval *cur,
  * @sk:		   Used by @calc_first_li in caller-specific way (subtyping)
  * Updates I_mean and returns 1 if a new interval has in fact been added to @lh.
  */
-int tfrc_lh_interval_add(struct tfrc_loss_hist *lh, struct tfrc_rx_hist *rh,
-			 u32 (*calc_first_li)(struct sock *), struct sock *sk)
+bool tfrc_lh_interval_add(struct tfrc_loss_hist *lh, struct tfrc_rx_hist *rh,
+			  u32 (*calc_first_li)(struct sock *), struct sock *sk)
 {
 	struct tfrc_loss_interval *cur = tfrc_lh_peek(lh), *new;
 
 	if (cur != NULL && !tfrc_lh_is_new_loss(cur, tfrc_rx_hist_loss_prev(rh)))
-		return 0;
+		return false;
 
 	new = tfrc_lh_demand_next(lh);
 	if (unlikely(new == NULL)) {
 		DCCP_CRIT("Cannot allocate/add loss record.");
-		return 0;
+		return false;
 	}
 
 	new->li_seqno	  = tfrc_rx_hist_loss_prev(rh)->tfrchrx_seqno;
@@ -167,7 +169,7 @@ int tfrc_lh_interval_add(struct tfrc_loss_hist *lh, struct tfrc_rx_hist *rh,
 
 		tfrc_lh_calc_i_mean(lh);
 	}
-	return 1;
+	return true;
 }
 EXPORT_SYMBOL_GPL(tfrc_lh_interval_add);
 
diff --git a/net/dccp/ccids/lib/loss_interval.h b/net/dccp/ccids/lib/loss_interval.h
index 246018a3b26..d08a226db43 100644
--- a/net/dccp/ccids/lib/loss_interval.h
+++ b/net/dccp/ccids/lib/loss_interval.h
@@ -67,9 +67,9 @@ static inline u8 tfrc_lh_length(struct tfrc_loss_hist *lh)
 
 struct tfrc_rx_hist;
 
-extern int  tfrc_lh_interval_add(struct tfrc_loss_hist *, struct tfrc_rx_hist *,
+extern bool tfrc_lh_interval_add(struct tfrc_loss_hist *, struct tfrc_rx_hist *,
 				 u32 (*first_li)(struct sock *), struct sock *);
-extern u8   tfrc_lh_update_i_mean(struct tfrc_loss_hist *lh, struct sk_buff *);
+extern void tfrc_lh_update_i_mean(struct tfrc_loss_hist *lh, struct sk_buff *);
 extern void tfrc_lh_cleanup(struct tfrc_loss_hist *lh);
 
 #endif /* _DCCP_LI_HIST_ */
diff --git a/net/dccp/ccids/lib/packet_history.c b/net/dccp/ccids/lib/packet_history.c
index 6cc108afdc3..cce9f03bda3 100644
--- a/net/dccp/ccids/lib/packet_history.c
+++ b/net/dccp/ccids/lib/packet_history.c
@@ -40,18 +40,6 @@
 #include "packet_history.h"
 #include "../../dccp.h"
 
-/**
- *  tfrc_tx_hist_entry  -  Simple singly-linked TX history list
- *  @next:  next oldest entry (LIFO order)
- *  @seqno: sequence number of this entry
- *  @stamp: send time of packet with sequence number @seqno
- */
-struct tfrc_tx_hist_entry {
-	struct tfrc_tx_hist_entry *next;
-	u64			  seqno;
-	ktime_t			  stamp;
-};
-
 /*
  * Transmitter History Routines
  */
@@ -73,15 +61,6 @@ void tfrc_tx_packet_history_exit(void)
 	}
 }
 
-static struct tfrc_tx_hist_entry *
-	tfrc_tx_hist_find_entry(struct tfrc_tx_hist_entry *head, u64 seqno)
-{
-	while (head != NULL && head->seqno != seqno)
-		head = head->next;
-
-	return head;
-}
-
 int tfrc_tx_hist_add(struct tfrc_tx_hist_entry **headp, u64 seqno)
 {
 	struct tfrc_tx_hist_entry *entry = kmem_cache_alloc(tfrc_tx_hist_slab, gfp_any());
@@ -111,25 +90,6 @@ void tfrc_tx_hist_purge(struct tfrc_tx_hist_entry **headp)
 }
 EXPORT_SYMBOL_GPL(tfrc_tx_hist_purge);
 
-u32 tfrc_tx_hist_rtt(struct tfrc_tx_hist_entry *head, const u64 seqno,
-		     const ktime_t now)
-{
-	u32 rtt = 0;
-	struct tfrc_tx_hist_entry *packet = tfrc_tx_hist_find_entry(head, seqno);
-
-	if (packet != NULL) {
-		rtt = ktime_us_delta(now, packet->stamp);
-		/*
-		 * Garbage-collect older (irrelevant) entries:
-		 */
-		tfrc_tx_hist_purge(&packet->next);
-	}
-
-	return rtt;
-}
-EXPORT_SYMBOL_GPL(tfrc_tx_hist_rtt);
-
-
 /*
  * 	Receiver History Routines
  */
@@ -191,14 +151,31 @@ int tfrc_rx_hist_duplicate(struct tfrc_rx_hist *h, struct sk_buff *skb)
 }
 EXPORT_SYMBOL_GPL(tfrc_rx_hist_duplicate);
 
+
+static void __tfrc_rx_hist_swap(struct tfrc_rx_hist *h, const u8 a, const u8 b)
+{
+	struct tfrc_rx_hist_entry *tmp = h->ring[a];
+
+	h->ring[a] = h->ring[b];
+	h->ring[b] = tmp;
+}
+
 static void tfrc_rx_hist_swap(struct tfrc_rx_hist *h, const u8 a, const u8 b)
 {
-	const u8 idx_a = tfrc_rx_hist_index(h, a),
-		 idx_b = tfrc_rx_hist_index(h, b);
-	struct tfrc_rx_hist_entry *tmp = h->ring[idx_a];
+	__tfrc_rx_hist_swap(h, tfrc_rx_hist_index(h, a),
+			       tfrc_rx_hist_index(h, b));
+}
 
-	h->ring[idx_a] = h->ring[idx_b];
-	h->ring[idx_b] = tmp;
+/**
+ * tfrc_rx_hist_resume_rtt_sampling  -  Prepare RX history for RTT sampling
+ * This is called after loss detection has finished, when the history entry
+ * with the index of `loss_count' holds the highest-received sequence number.
+ * RTT sampling requires this information at ring[0] (tfrc_rx_hist_sample_rtt).
+ */
+static inline void tfrc_rx_hist_resume_rtt_sampling(struct tfrc_rx_hist *h)
+{
+	__tfrc_rx_hist_swap(h, 0, tfrc_rx_hist_index(h, h->loss_count));
+	h->loss_count = h->loss_start = 0;
 }
 
 /*
@@ -215,10 +192,8 @@ static void __do_track_loss(struct tfrc_rx_hist *h, struct sk_buff *skb, u64 n1)
 	u64 s0 = tfrc_rx_hist_loss_prev(h)->tfrchrx_seqno,
 	    s1 = DCCP_SKB_CB(skb)->dccpd_seq;
 
-	if (!dccp_loss_free(s0, s1, n1)) {	/* gap between S0 and S1 */
+	if (!dccp_loss_free(s0, s1, n1))	/* gap between S0 and S1 */
 		h->loss_count = 1;
-		tfrc_rx_hist_entry_from_skb(tfrc_rx_hist_entry(h, 1), skb, n1);
-	}
 }
 
 static void __one_after_loss(struct tfrc_rx_hist *h, struct sk_buff *skb, u32 n2)
@@ -240,8 +215,7 @@ static void __one_after_loss(struct tfrc_rx_hist *h, struct sk_buff *skb, u32 n2
 
 		if (dccp_loss_free(s2, s1, n1)) {
 			/* hole is filled: S0, S2, and S1 are consecutive */
-			h->loss_count = 0;
-			h->loss_start = tfrc_rx_hist_index(h, 1);
+			tfrc_rx_hist_resume_rtt_sampling(h);
 		} else
 			/* gap between S2 and S1: just update loss_prev */
 			tfrc_rx_hist_entry_from_skb(tfrc_rx_hist_loss_prev(h), skb, n2);
@@ -294,8 +268,7 @@ static int __two_after_loss(struct tfrc_rx_hist *h, struct sk_buff *skb, u32 n3)
 
 			if (dccp_loss_free(s1, s2, n2)) {
 				/* entire hole filled by S0, S3, S1, S2 */
-				h->loss_start = tfrc_rx_hist_index(h, 2);
-				h->loss_count = 0;
+				tfrc_rx_hist_resume_rtt_sampling(h);
 			} else {
 				/* gap remains between S1 and S2 */
 				h->loss_start = tfrc_rx_hist_index(h, 1);
@@ -339,8 +312,7 @@ static void __three_after_loss(struct tfrc_rx_hist *h)
 
 		if (dccp_loss_free(s2, s3, n3)) {
 			/* no gap between S2 and S3: entire hole is filled */
-			h->loss_start = tfrc_rx_hist_index(h, 3);
-			h->loss_count = 0;
+			tfrc_rx_hist_resume_rtt_sampling(h);
 		} else {
 			/* gap between S2 and S3 */
 			h->loss_start = tfrc_rx_hist_index(h, 2);
@@ -354,13 +326,13 @@ static void __three_after_loss(struct tfrc_rx_hist *h)
 }
 
 /**
- *  tfrc_rx_handle_loss  -  Loss detection and further processing
- *  @h:		    The non-empty RX history object
- *  @lh:	    Loss Intervals database to update
- *  @skb:	    Currently received packet
- *  @ndp:	    The NDP count belonging to @skb
- *  @calc_first_li: Caller-dependent computation of first loss interval in @lh
- *  @sk:	    Used by @calc_first_li (see tfrc_lh_interval_add)
+ *  tfrc_rx_congestion_event  -  Loss detection and further processing
+ *  @h:		The non-empty RX history object
+ *  @lh:	Loss Intervals database to update
+ *  @skb:	Currently received packet
+ *  @ndp:	The NDP count belonging to @skb
+ *  @first_li:	Caller-dependent computation of first loss interval in @lh
+ *  @sk:	Used by @calc_first_li (see tfrc_lh_interval_add)
  *  Chooses action according to pending loss, updates LI database when a new
  *  loss was detected, and does required post-processing. Returns 1 when caller
  *  should send feedback, 0 otherwise.
@@ -368,15 +340,20 @@ static void __three_after_loss(struct tfrc_rx_hist *h)
  *  records accordingly, the caller should not perform any more RX history
  *  operations when loss_count is greater than 0 after calling this function.
  */
-int tfrc_rx_handle_loss(struct tfrc_rx_hist *h,
-			struct tfrc_loss_hist *lh,
-			struct sk_buff *skb, const u64 ndp,
-			u32 (*calc_first_li)(struct sock *), struct sock *sk)
+bool tfrc_rx_congestion_event(struct tfrc_rx_hist *h,
+			      struct tfrc_loss_hist *lh,
+			      struct sk_buff *skb, const u64 ndp,
+			      u32 (*first_li)(struct sock *), struct sock *sk)
 {
-	int is_new_loss = 0;
+	bool new_event = false;
+
+	if (tfrc_rx_hist_duplicate(h, skb))
+		return 0;
 
 	if (h->loss_count == 0) {
 		__do_track_loss(h, skb, ndp);
+		tfrc_rx_hist_sample_rtt(h, skb);
+		tfrc_rx_hist_add_packet(h, skb, ndp);
 	} else if (h->loss_count == 1) {
 		__one_after_loss(h, skb, ndp);
 	} else if (h->loss_count != 2) {
@@ -385,34 +362,57 @@ int tfrc_rx_handle_loss(struct tfrc_rx_hist *h,
 		/*
 		 * Update Loss Interval database and recycle RX records
 		 */
-		is_new_loss = tfrc_lh_interval_add(lh, h, calc_first_li, sk);
+		new_event = tfrc_lh_interval_add(lh, h, first_li, sk);
 		__three_after_loss(h);
 	}
-	return is_new_loss;
+
+	/*
+	 * Update moving-average of `s' and the sum of received payload bytes.
+	 */
+	if (dccp_data_packet(skb)) {
+		const u32 payload = skb->len - dccp_hdr(skb)->dccph_doff * 4;
+
+		h->packet_size = tfrc_ewma(h->packet_size, payload, 9);
+		h->bytes_recvd += payload;
+	}
+
+	/* RFC 3448, 6.1: update I_0, whose growth implies p <= p_prev */
+	if (!new_event)
+		tfrc_lh_update_i_mean(lh, skb);
+
+	return new_event;
 }
-EXPORT_SYMBOL_GPL(tfrc_rx_handle_loss);
+EXPORT_SYMBOL_GPL(tfrc_rx_congestion_event);
 
-int tfrc_rx_hist_alloc(struct tfrc_rx_hist *h)
+/* Compute the sending rate X_recv measured between feedback intervals */
+u32 tfrc_rx_hist_x_recv(struct tfrc_rx_hist *h, const u32 last_x_recv)
 {
-	int i;
+	u64 bytes = h->bytes_recvd, last_rtt = h->rtt_estimate;
+	s64 delta = ktime_to_us(net_timedelta(h->bytes_start));
 
-	for (i = 0; i <= TFRC_NDUPACK; i++) {
-		h->ring[i] = kmem_cache_alloc(tfrc_rx_hist_slab, GFP_ATOMIC);
-		if (h->ring[i] == NULL)
-			goto out_free;
-	}
+	WARN_ON(delta <= 0);
+	/*
+	 * Ensure that the sampling interval for X_recv is at least one RTT,
+	 * by extending the sampling interval backwards in time, over the last
+	 * R_(m-1) seconds, as per rfc3448bis-06, 6.2.
+	 * To reduce noise (e.g. when the RTT changes often), this is only
+	 * done when delta is smaller than RTT/2.
+	 */
+	if (last_x_recv > 0 && delta < last_rtt/2) {
+		tfrc_pr_debug("delta < RTT ==> %ld us < %u us\n",
+			      (long)delta, (unsigned)last_rtt);
 
-	h->loss_count = h->loss_start = 0;
-	return 0;
+		delta = (bytes ? delta : 0) + last_rtt;
+		bytes += div_u64((u64)last_x_recv * last_rtt, USEC_PER_SEC);
+	}
 
-out_free:
-	while (i-- != 0) {
-		kmem_cache_free(tfrc_rx_hist_slab, h->ring[i]);
-		h->ring[i] = NULL;
+	if (unlikely(bytes == 0)) {
+		DCCP_WARN("X_recv == 0, using old value of %u\n", last_x_recv);
+		return last_x_recv;
 	}
-	return -ENOBUFS;
+	return scaled_div32(bytes, delta);
 }
-EXPORT_SYMBOL_GPL(tfrc_rx_hist_alloc);
+EXPORT_SYMBOL_GPL(tfrc_rx_hist_x_recv);
 
 void tfrc_rx_hist_purge(struct tfrc_rx_hist *h)
 {
@@ -426,73 +426,81 @@ void tfrc_rx_hist_purge(struct tfrc_rx_hist *h)
 }
 EXPORT_SYMBOL_GPL(tfrc_rx_hist_purge);
 
-/**
- * tfrc_rx_hist_rtt_last_s - reference entry to compute RTT samples against
- */
-static inline struct tfrc_rx_hist_entry *
-			tfrc_rx_hist_rtt_last_s(const struct tfrc_rx_hist *h)
+static int tfrc_rx_hist_alloc(struct tfrc_rx_hist *h)
 {
-	return h->ring[0];
+	int i;
+
+	memset(h, 0, sizeof(*h));
+
+	for (i = 0; i <= TFRC_NDUPACK; i++) {
+		h->ring[i] = kmem_cache_alloc(tfrc_rx_hist_slab, GFP_ATOMIC);
+		if (h->ring[i] == NULL) {
+			tfrc_rx_hist_purge(h);
+			return -ENOBUFS;
+		}
+	}
+	return 0;
 }
 
-/**
- * tfrc_rx_hist_rtt_prev_s: previously suitable (wrt rtt_last_s) RTT-sampling entry
- */
-static inline struct tfrc_rx_hist_entry *
-			tfrc_rx_hist_rtt_prev_s(const struct tfrc_rx_hist *h)
+int tfrc_rx_hist_init(struct tfrc_rx_hist *h, struct sock *sk)
 {
-	return h->ring[h->rtt_sample_prev];
+	if (tfrc_rx_hist_alloc(h))
+		return -ENOBUFS;
+	/*
+	 * Initialise first entry with GSR to start loss detection as early as
+	 * possible. Code using this must not use any other fields. The entry
+	 * will be overwritten once the CCID updates its received packets.
+	 */
+	tfrc_rx_hist_loss_prev(h)->tfrchrx_seqno = dccp_sk(sk)->dccps_gsr;
+	return 0;
 }
+EXPORT_SYMBOL_GPL(tfrc_rx_hist_init);
 
 /**
  * tfrc_rx_hist_sample_rtt  -  Sample RTT from timestamp / CCVal
- * Based on ideas presented in RFC 4342, 8.1. Returns 0 if it was not able
- * to compute a sample with given data - calling function should check this.
+ * Based on ideas presented in RFC 4342, 8.1. This function expects that no loss
+ * is pending and uses the following history entries (via rtt_sample_prev):
+ * - h->ring[0]  contains the most recent history entry prior to @skb;
+ * - h->ring[1]  is an unused `dummy' entry when the current difference is 0;
  */
-u32 tfrc_rx_hist_sample_rtt(struct tfrc_rx_hist *h, const struct sk_buff *skb)
+void tfrc_rx_hist_sample_rtt(struct tfrc_rx_hist *h, const struct sk_buff *skb)
 {
-	u32 sample = 0,
-	    delta_v = SUB16(dccp_hdr(skb)->dccph_ccval,
-			    tfrc_rx_hist_rtt_last_s(h)->tfrchrx_ccval);
-
-	if (delta_v < 1 || delta_v > 4) {	/* unsuitable CCVal delta */
-		if (h->rtt_sample_prev == 2) {	/* previous candidate stored */
-			sample = SUB16(tfrc_rx_hist_rtt_prev_s(h)->tfrchrx_ccval,
-				       tfrc_rx_hist_rtt_last_s(h)->tfrchrx_ccval);
-			if (sample)
-				sample = 4 / sample *
-				         ktime_us_delta(tfrc_rx_hist_rtt_prev_s(h)->tfrchrx_tstamp,
-							tfrc_rx_hist_rtt_last_s(h)->tfrchrx_tstamp);
-			else    /*
-				 * FIXME: This condition is in principle not
-				 * possible but occurs when CCID is used for
-				 * two-way data traffic. I have tried to trace
-				 * it, but the cause does not seem to be here.
-				 */
-				DCCP_BUG("please report to dccp@vger.kernel.org"
-					 " => prev = %u, last = %u",
-					 tfrc_rx_hist_rtt_prev_s(h)->tfrchrx_ccval,
-					 tfrc_rx_hist_rtt_last_s(h)->tfrchrx_ccval);
-		} else if (delta_v < 1) {
-			h->rtt_sample_prev = 1;
-			goto keep_ref_for_next_time;
-		}
+	struct tfrc_rx_hist_entry *last = h->ring[0];
+	u32 sample, delta_v;
 
-	} else if (delta_v == 4) /* optimal match */
-		sample = ktime_to_us(net_timedelta(tfrc_rx_hist_rtt_last_s(h)->tfrchrx_tstamp));
-	else {			 /* suboptimal match */
-		h->rtt_sample_prev = 2;
-		goto keep_ref_for_next_time;
-	}
+	/*
+	 * When not to sample:
+	 * - on non-data packets
+	 *   (RFC 4342, 8.1: CCVal only fully defined for data packets);
+	 * - when no data packets have been received yet
+	 *   (FIXME: using sampled packet size as indicator here);
+	 * - as long as there are gaps in the sequence space (pending loss).
+	 */
+	if (!dccp_data_packet(skb) || h->packet_size == 0 ||
+	    tfrc_rx_hist_loss_pending(h))
+		return;
 
-	if (unlikely(sample > DCCP_SANE_RTT_MAX)) {
-		DCCP_WARN("RTT sample %u too large, using max\n", sample);
-		sample = DCCP_SANE_RTT_MAX;
+	h->rtt_sample_prev = 0;		/* reset previous candidate */
+
+	delta_v = SUB16(dccp_hdr(skb)->dccph_ccval, last->tfrchrx_ccval);
+	if (delta_v == 0) {		/* less than RTT/4 difference */
+		h->rtt_sample_prev = 1;
+		return;
 	}
+	sample = dccp_sane_rtt(ktime_to_us(net_timedelta(last->tfrchrx_tstamp)));
 
-	h->rtt_sample_prev = 0;	       /* use current entry as next reference */
-keep_ref_for_next_time:
+	if (delta_v <= 4)		/* between RTT/4 and RTT */
+		sample *= 4 / delta_v;
+	else if (!(sample < h->rtt_estimate && sample > h->rtt_estimate/2))
+		/*
+		* Optimisation: CCVal difference is greater than 1 RTT, yet the
+		* sample is less than the local RTT estimate; which means that
+		* the RTT estimate is too high.
+		* To avoid noise, it is not done if the sample is below RTT/2.
+		*/
+		return;
 
-	return sample;
+	/* Use a lower weight than usual to increase responsiveness */
+	h->rtt_estimate = tfrc_ewma(h->rtt_estimate, sample, 5);
 }
 EXPORT_SYMBOL_GPL(tfrc_rx_hist_sample_rtt);
diff --git a/net/dccp/ccids/lib/packet_history.h b/net/dccp/ccids/lib/packet_history.h
index 461cc91cce8..555e65cd73a 100644
--- a/net/dccp/ccids/lib/packet_history.h
+++ b/net/dccp/ccids/lib/packet_history.h
@@ -40,12 +40,28 @@
 #include <linux/slab.h>
 #include "tfrc.h"
 
-struct tfrc_tx_hist_entry;
+/**
+ *  tfrc_tx_hist_entry  -  Simple singly-linked TX history list
+ *  @next:  next oldest entry (LIFO order)
+ *  @seqno: sequence number of this entry
+ *  @stamp: send time of packet with sequence number @seqno
+ */
+struct tfrc_tx_hist_entry {
+	struct tfrc_tx_hist_entry *next;
+	u64			  seqno;
+	ktime_t			  stamp;
+};
+
+static inline struct tfrc_tx_hist_entry *
+	tfrc_tx_hist_find_entry(struct tfrc_tx_hist_entry *head, u64 seqno)
+{
+	while (head != NULL && head->seqno != seqno)
+		head = head->next;
+	return head;
+}
 
 extern int  tfrc_tx_hist_add(struct tfrc_tx_hist_entry **headp, u64 seqno);
 extern void tfrc_tx_hist_purge(struct tfrc_tx_hist_entry **headp);
-extern u32  tfrc_tx_hist_rtt(struct tfrc_tx_hist_entry *head,
-			     const u64 seqno, const ktime_t now);
 
 /* Subtraction a-b modulo-16, respects circular wrap-around */
 #define SUB16(a, b) (((a) + 16 - (b)) & 0xF)
@@ -75,12 +91,22 @@ struct tfrc_rx_hist_entry {
  * @loss_count:		Number of entries in circular history
  * @loss_start:		Movable index (for loss detection)
  * @rtt_sample_prev:	Used during RTT sampling, points to candidate entry
+ * @rtt_estimate:	Receiver RTT estimate
+ * @packet_size:	Packet size in bytes (as per RFC 3448, 3.1)
+ * @bytes_recvd:	Number of bytes received since @bytes_start
+ * @bytes_start:	Start time for counting @bytes_recvd
  */
 struct tfrc_rx_hist {
 	struct tfrc_rx_hist_entry *ring[TFRC_NDUPACK + 1];
 	u8			  loss_count:2,
 				  loss_start:2;
+	/* Receiver RTT sampling */
 #define rtt_sample_prev		  loss_start
+	u32			  rtt_estimate;
+	/* Receiver sampling of application payload lengths */
+	u32			  packet_size,
+				  bytes_recvd;
+	ktime_t			  bytes_start;
 };
 
 /**
@@ -124,20 +150,50 @@ static inline bool tfrc_rx_hist_loss_pending(const struct tfrc_rx_hist *h)
 	return h->loss_count > 0;
 }
 
+/*
+ * Accessor functions to retrieve parameters sampled by the RX history
+ */
+static inline u32 tfrc_rx_hist_packet_size(const struct tfrc_rx_hist *h)
+{
+	if (h->packet_size == 0) {
+		DCCP_WARN("No sample for s, using fallback\n");
+		return TCP_MIN_RCVMSS;
+	}
+	return h->packet_size;
+
+}
+static inline u32 tfrc_rx_hist_rtt(const struct tfrc_rx_hist *h)
+{
+	if (h->rtt_estimate == 0) {
+		DCCP_WARN("No RTT estimate available, using fallback RTT\n");
+		return  DCCP_FALLBACK_RTT;
+	}
+	return h->rtt_estimate;
+}
+
+static inline void tfrc_rx_hist_restart_byte_counter(struct tfrc_rx_hist *h)
+{
+	h->bytes_recvd = 0;
+	h->bytes_start = ktime_get_real();
+}
+
+extern u32  tfrc_rx_hist_x_recv(struct tfrc_rx_hist *h, const u32 last_x_recv);
+
+
 extern void tfrc_rx_hist_add_packet(struct tfrc_rx_hist *h,
 				    const struct sk_buff *skb, const u64 ndp);
 
 extern int tfrc_rx_hist_duplicate(struct tfrc_rx_hist *h, struct sk_buff *skb);
 
 struct tfrc_loss_hist;
-extern int  tfrc_rx_handle_loss(struct tfrc_rx_hist *h,
-				struct tfrc_loss_hist *lh,
-				struct sk_buff *skb, const u64 ndp,
-				u32 (*first_li)(struct sock *sk),
-				struct sock *sk);
-extern u32 tfrc_rx_hist_sample_rtt(struct tfrc_rx_hist *h,
-				   const struct sk_buff *skb);
-extern int tfrc_rx_hist_alloc(struct tfrc_rx_hist *h);
+extern bool tfrc_rx_congestion_event(struct tfrc_rx_hist *h,
+				     struct tfrc_loss_hist *lh,
+				     struct sk_buff *skb, const u64 ndp,
+				     u32 (*first_li)(struct sock *sk),
+				     struct sock *sk);
+extern void tfrc_rx_hist_sample_rtt(struct tfrc_rx_hist *h,
+				    const struct sk_buff *skb);
+extern int  tfrc_rx_hist_init(struct tfrc_rx_hist *h, struct sock *sk);
 extern void tfrc_rx_hist_purge(struct tfrc_rx_hist *h);
 
 #endif /* _DCCP_PKT_HIST_ */
diff --git a/net/dccp/ccids/lib/tfrc.h b/net/dccp/ccids/lib/tfrc.h
index ed9857527ac..ede12f53de5 100644
--- a/net/dccp/ccids/lib/tfrc.h
+++ b/net/dccp/ccids/lib/tfrc.h
@@ -48,6 +48,21 @@ static inline u32 scaled_div32(u64 a, u64 b)
 }
 
 /**
+ * tfrc_scaled_sqrt  -  Compute scaled integer sqrt(x) for 0 < x < 2^22-1
+ * Uses scaling to improve accuracy of the integer approximation of sqrt(). The
+ * scaling factor of 2^10 limits the maximum @sample to 4e6; this is okay for
+ * clamped RTT samples (dccp_sample_rtt).
+ * Should best be used for expressions of type sqrt(x)/sqrt(y), since then the
+ * scaling factor is neutralised. For this purpose, it avoids returning zero.
+ */
+static inline u16 tfrc_scaled_sqrt(const u32 sample)
+{
+	const unsigned long non_zero_sample = sample ? : 1;
+
+	return int_sqrt(non_zero_sample << 10);
+}
+
+/**
  * tfrc_ewma  -  Exponentially weighted moving average
  * @weight: Weight to be used as damping factor, in units of 1/10
  */
@@ -58,6 +73,7 @@ static inline u32 tfrc_ewma(const u32 avg, const u32 newval, const u8 weight)
 
 extern u32  tfrc_calc_x(u16 s, u32 R, u32 p);
 extern u32  tfrc_calc_x_reverse_lookup(u32 fvalue);
+extern u32  tfrc_invert_loss_event_rate(u32 loss_event_rate);
 
 extern int  tfrc_tx_packet_history_init(void);
 extern void tfrc_tx_packet_history_exit(void);
diff --git a/net/dccp/ccids/lib/tfrc_equation.c b/net/dccp/ccids/lib/tfrc_equation.c
index 2f20a29cffe..38239c4d5e1 100644
--- a/net/dccp/ccids/lib/tfrc_equation.c
+++ b/net/dccp/ccids/lib/tfrc_equation.c
@@ -632,8 +632,16 @@ u32 tfrc_calc_x(u16 s, u32 R, u32 p)
 
 	if (p <= TFRC_CALC_X_SPLIT) 		{     /* 0.0000 < p <= 0.05   */
 		if (p < TFRC_SMALLEST_P) {	      /* 0.0000 < p <  0.0001 */
-			DCCP_WARN("Value of p (%d) below resolution. "
-				  "Substituting %d\n", p, TFRC_SMALLEST_P);
+			/*
+			 * In the congestion-avoidance phase p decays towards 0
+			 * when there are no further losses, so this case is
+			 * natural. Truncating to p_min = 0.01% means that the
+			 * maximum achievable throughput is limited to about
+			 * X_calc_max = 122.4 * s/RTT (see RFC 3448, 3.1); e.g.
+			 * with s=1500 bytes, RTT=0.01 s: X_calc_max = 147 Mbps.
+			 */
+			tfrc_pr_debug("Value of p (%d) below resolution. "
+				      "Substituting %d\n", p, TFRC_SMALLEST_P);
 			index = 0;
 		} else 				      /* 0.0001 <= p <= 0.05  */
 			index =  p/TFRC_SMALLEST_P - 1;
@@ -658,7 +666,6 @@ u32 tfrc_calc_x(u16 s, u32 R, u32 p)
 	result = scaled_div(s, R);
 	return scaled_div32(result, f);
 }
-
 EXPORT_SYMBOL_GPL(tfrc_calc_x);
 
 /**
@@ -693,5 +700,19 @@ u32 tfrc_calc_x_reverse_lookup(u32 fvalue)
 	index = tfrc_binsearch(fvalue, 0);
 	return (index + 1) * 1000000 / TFRC_CALC_X_ARRSIZE;
 }
-
 EXPORT_SYMBOL_GPL(tfrc_calc_x_reverse_lookup);
+
+/**
+ * tfrc_invert_loss_event_rate  -  Compute p so that 10^6 corresponds to 100%
+ * When @loss_event_rate is large, there is a chance that p is truncated to 0.
+ * To avoid re-entering slow-start in that case, we set p = TFRC_SMALLEST_P > 0.
+ */
+u32 tfrc_invert_loss_event_rate(u32 loss_event_rate)
+{
+	if (loss_event_rate == UINT_MAX)		/* see RFC 4342, 8.5 */
+		return 0;
+	if (unlikely(loss_event_rate == 0))		/* map 1/0 into 100% */
+		return 1000000;
+	return max_t(u32, scaled_div(1, loss_event_rate), TFRC_SMALLEST_P);
+}
+EXPORT_SYMBOL_GPL(tfrc_invert_loss_event_rate);
author	David S. Miller <davem@davemloft.net>	2008-09-08 17:28:59 -0700
committer	David S. Miller <davem@davemloft.net>	2008-09-08 17:28:59 -0700
commit	0a68a20cc3eafa73bb54097c28b921147d7d3685 (patch)
tree	8e5f315226b618cb8e050a0c7653c8ec134501e3 /net/dccp/ccids/lib
parent	17dce5dfe38ae2fb359b61e855f5d8a3a8b7892b (diff)
parent	a3cbdde8e9c38b66b4f13ac5d6ff1939ded0ff20 (diff)