aboutsummaryrefslogtreecommitdiff
path: root/net/dccp/ccids/lib
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2008-09-08 17:28:59 -0700
committerDavid S. Miller <davem@davemloft.net>2008-09-08 17:28:59 -0700
commit0a68a20cc3eafa73bb54097c28b921147d7d3685 (patch)
tree8e5f315226b618cb8e050a0c7653c8ec134501e3 /net/dccp/ccids/lib
parent17dce5dfe38ae2fb359b61e855f5d8a3a8b7892b (diff)
parenta3cbdde8e9c38b66b4f13ac5d6ff1939ded0ff20 (diff)
Merge branch 'dccp' of git://eden-feed.erg.abdn.ac.uk/dccp_exp
Conflicts: net/dccp/input.c net/dccp/options.c
Diffstat (limited to 'net/dccp/ccids/lib')
-rw-r--r--net/dccp/ccids/lib/loss_interval.c30
-rw-r--r--net/dccp/ccids/lib/loss_interval.h4
-rw-r--r--net/dccp/ccids/lib/packet_history.c282
-rw-r--r--net/dccp/ccids/lib/packet_history.h78
-rw-r--r--net/dccp/ccids/lib/tfrc.h16
-rw-r--r--net/dccp/ccids/lib/tfrc_equation.c29
6 files changed, 271 insertions, 168 deletions
diff --git a/net/dccp/ccids/lib/loss_interval.c b/net/dccp/ccids/lib/loss_interval.c
index 5b3ce0688c5..b1ae8f8259e 100644
--- a/net/dccp/ccids/lib/loss_interval.c
+++ b/net/dccp/ccids/lib/loss_interval.c
@@ -86,21 +86,26 @@ static void tfrc_lh_calc_i_mean(struct tfrc_loss_hist *lh)
/**
* tfrc_lh_update_i_mean - Update the `open' loss interval I_0
- * For recomputing p: returns `true' if p > p_prev <=> 1/p < 1/p_prev
+ * This updates I_mean as the sequence numbers increase. As a consequence, the
+ * open loss interval I_0 increases, hence p = W_tot/max(I_tot0, I_tot1)
+ * decreases, and thus there is no need to send renewed feedback.
*/
-u8 tfrc_lh_update_i_mean(struct tfrc_loss_hist *lh, struct sk_buff *skb)
+void tfrc_lh_update_i_mean(struct tfrc_loss_hist *lh, struct sk_buff *skb)
{
struct tfrc_loss_interval *cur = tfrc_lh_peek(lh);
- u32 old_i_mean = lh->i_mean;
s64 len;
if (cur == NULL) /* not initialised */
- return 0;
+ return;
+
+ /* FIXME: should probably also count non-data packets (RFC 4342, 6.1) */
+ if (!dccp_data_packet(skb))
+ return;
len = dccp_delta_seqno(cur->li_seqno, DCCP_SKB_CB(skb)->dccpd_seq) + 1;
if (len - (s64)cur->li_length <= 0) /* duplicate or reordered */
- return 0;
+ return;
if (SUB16(dccp_hdr(skb)->dccph_ccval, cur->li_ccval) > 4)
/*
@@ -114,14 +119,11 @@ u8 tfrc_lh_update_i_mean(struct tfrc_loss_hist *lh, struct sk_buff *skb)
cur->li_is_closed = 1;
if (tfrc_lh_length(lh) == 1) /* due to RFC 3448, 6.3.1 */
- return 0;
+ return;
cur->li_length = len;
tfrc_lh_calc_i_mean(lh);
-
- return (lh->i_mean < old_i_mean);
}
-EXPORT_SYMBOL_GPL(tfrc_lh_update_i_mean);
/* Determine if `new_loss' does begin a new loss interval [RFC 4342, 10.2] */
static inline u8 tfrc_lh_is_new_loss(struct tfrc_loss_interval *cur,
@@ -138,18 +140,18 @@ static inline u8 tfrc_lh_is_new_loss(struct tfrc_loss_interval *cur,
* @sk: Used by @calc_first_li in caller-specific way (subtyping)
* Updates I_mean and returns 1 if a new interval has in fact been added to @lh.
*/
-int tfrc_lh_interval_add(struct tfrc_loss_hist *lh, struct tfrc_rx_hist *rh,
- u32 (*calc_first_li)(struct sock *), struct sock *sk)
+bool tfrc_lh_interval_add(struct tfrc_loss_hist *lh, struct tfrc_rx_hist *rh,
+ u32 (*calc_first_li)(struct sock *), struct sock *sk)
{
struct tfrc_loss_interval *cur = tfrc_lh_peek(lh), *new;
if (cur != NULL && !tfrc_lh_is_new_loss(cur, tfrc_rx_hist_loss_prev(rh)))
- return 0;
+ return false;
new = tfrc_lh_demand_next(lh);
if (unlikely(new == NULL)) {
DCCP_CRIT("Cannot allocate/add loss record.");
- return 0;
+ return false;
}
new->li_seqno = tfrc_rx_hist_loss_prev(rh)->tfrchrx_seqno;
@@ -167,7 +169,7 @@ int tfrc_lh_interval_add(struct tfrc_loss_hist *lh, struct tfrc_rx_hist *rh,
tfrc_lh_calc_i_mean(lh);
}
- return 1;
+ return true;
}
EXPORT_SYMBOL_GPL(tfrc_lh_interval_add);
diff --git a/net/dccp/ccids/lib/loss_interval.h b/net/dccp/ccids/lib/loss_interval.h
index 246018a3b26..d08a226db43 100644
--- a/net/dccp/ccids/lib/loss_interval.h
+++ b/net/dccp/ccids/lib/loss_interval.h
@@ -67,9 +67,9 @@ static inline u8 tfrc_lh_length(struct tfrc_loss_hist *lh)
struct tfrc_rx_hist;
-extern int tfrc_lh_interval_add(struct tfrc_loss_hist *, struct tfrc_rx_hist *,
+extern bool tfrc_lh_interval_add(struct tfrc_loss_hist *, struct tfrc_rx_hist *,
u32 (*first_li)(struct sock *), struct sock *);
-extern u8 tfrc_lh_update_i_mean(struct tfrc_loss_hist *lh, struct sk_buff *);
+extern void tfrc_lh_update_i_mean(struct tfrc_loss_hist *lh, struct sk_buff *);
extern void tfrc_lh_cleanup(struct tfrc_loss_hist *lh);
#endif /* _DCCP_LI_HIST_ */
diff --git a/net/dccp/ccids/lib/packet_history.c b/net/dccp/ccids/lib/packet_history.c
index 6cc108afdc3..cce9f03bda3 100644
--- a/net/dccp/ccids/lib/packet_history.c
+++ b/net/dccp/ccids/lib/packet_history.c
@@ -40,18 +40,6 @@
#include "packet_history.h"
#include "../../dccp.h"
-/**
- * tfrc_tx_hist_entry - Simple singly-linked TX history list
- * @next: next oldest entry (LIFO order)
- * @seqno: sequence number of this entry
- * @stamp: send time of packet with sequence number @seqno
- */
-struct tfrc_tx_hist_entry {
- struct tfrc_tx_hist_entry *next;
- u64 seqno;
- ktime_t stamp;
-};
-
/*
* Transmitter History Routines
*/
@@ -73,15 +61,6 @@ void tfrc_tx_packet_history_exit(void)
}
}
-static struct tfrc_tx_hist_entry *
- tfrc_tx_hist_find_entry(struct tfrc_tx_hist_entry *head, u64 seqno)
-{
- while (head != NULL && head->seqno != seqno)
- head = head->next;
-
- return head;
-}
-
int tfrc_tx_hist_add(struct tfrc_tx_hist_entry **headp, u64 seqno)
{
struct tfrc_tx_hist_entry *entry = kmem_cache_alloc(tfrc_tx_hist_slab, gfp_any());
@@ -111,25 +90,6 @@ void tfrc_tx_hist_purge(struct tfrc_tx_hist_entry **headp)
}
EXPORT_SYMBOL_GPL(tfrc_tx_hist_purge);
-u32 tfrc_tx_hist_rtt(struct tfrc_tx_hist_entry *head, const u64 seqno,
- const ktime_t now)
-{
- u32 rtt = 0;
- struct tfrc_tx_hist_entry *packet = tfrc_tx_hist_find_entry(head, seqno);
-
- if (packet != NULL) {
- rtt = ktime_us_delta(now, packet->stamp);
- /*
- * Garbage-collect older (irrelevant) entries:
- */
- tfrc_tx_hist_purge(&packet->next);
- }
-
- return rtt;
-}
-EXPORT_SYMBOL_GPL(tfrc_tx_hist_rtt);
-
-
/*
* Receiver History Routines
*/
@@ -191,14 +151,31 @@ int tfrc_rx_hist_duplicate(struct tfrc_rx_hist *h, struct sk_buff *skb)
}
EXPORT_SYMBOL_GPL(tfrc_rx_hist_duplicate);
+
+static void __tfrc_rx_hist_swap(struct tfrc_rx_hist *h, const u8 a, const u8 b)
+{
+ struct tfrc_rx_hist_entry *tmp = h->ring[a];
+
+ h->ring[a] = h->ring[b];
+ h->ring[b] = tmp;
+}
+
static void tfrc_rx_hist_swap(struct tfrc_rx_hist *h, const u8 a, const u8 b)
{
- const u8 idx_a = tfrc_rx_hist_index(h, a),
- idx_b = tfrc_rx_hist_index(h, b);
- struct tfrc_rx_hist_entry *tmp = h->ring[idx_a];
+ __tfrc_rx_hist_swap(h, tfrc_rx_hist_index(h, a),
+ tfrc_rx_hist_index(h, b));
+}
- h->ring[idx_a] = h->ring[idx_b];
- h->ring[idx_b] = tmp;
+/**
+ * tfrc_rx_hist_resume_rtt_sampling - Prepare RX history for RTT sampling
+ * This is called after loss detection has finished, when the history entry
+ * with the index of `loss_count' holds the highest-received sequence number.
+ * RTT sampling requires this information at ring[0] (tfrc_rx_hist_sample_rtt).
+ */
+static inline void tfrc_rx_hist_resume_rtt_sampling(struct tfrc_rx_hist *h)
+{
+ __tfrc_rx_hist_swap(h, 0, tfrc_rx_hist_index(h, h->loss_count));
+ h->loss_count = h->loss_start = 0;
}
/*
@@ -215,10 +192,8 @@ static void __do_track_loss(struct tfrc_rx_hist *h, struct sk_buff *skb, u64 n1)
u64 s0 = tfrc_rx_hist_loss_prev(h)->tfrchrx_seqno,
s1 = DCCP_SKB_CB(skb)->dccpd_seq;
- if (!dccp_loss_free(s0, s1, n1)) { /* gap between S0 and S1 */
+ if (!dccp_loss_free(s0, s1, n1)) /* gap between S0 and S1 */
h->loss_count = 1;
- tfrc_rx_hist_entry_from_skb(tfrc_rx_hist_entry(h, 1), skb, n1);
- }
}
static void __one_after_loss(struct tfrc_rx_hist *h, struct sk_buff *skb, u32 n2)
@@ -240,8 +215,7 @@ static void __one_after_loss(struct tfrc_rx_hist *h, struct sk_buff *skb, u32 n2
if (dccp_loss_free(s2, s1, n1)) {
/* hole is filled: S0, S2, and S1 are consecutive */
- h->loss_count = 0;
- h->loss_start = tfrc_rx_hist_index(h, 1);
+ tfrc_rx_hist_resume_rtt_sampling(h);
} else
/* gap between S2 and S1: just update loss_prev */
tfrc_rx_hist_entry_from_skb(tfrc_rx_hist_loss_prev(h), skb, n2);
@@ -294,8 +268,7 @@ static int __two_after_loss(struct tfrc_rx_hist *h, struct sk_buff *skb, u32 n3)
if (dccp_loss_free(s1, s2, n2)) {
/* entire hole filled by S0, S3, S1, S2 */
- h->loss_start = tfrc_rx_hist_index(h, 2);
- h->loss_count = 0;
+ tfrc_rx_hist_resume_rtt_sampling(h);
} else {
/* gap remains between S1 and S2 */
h->loss_start = tfrc_rx_hist_index(h, 1);
@@ -339,8 +312,7 @@ static void __three_after_loss(struct tfrc_rx_hist *h)
if (dccp_loss_free(s2, s3, n3)) {
/* no gap between S2 and S3: entire hole is filled */
- h->loss_start = tfrc_rx_hist_index(h, 3);
- h->loss_count = 0;
+ tfrc_rx_hist_resume_rtt_sampling(h);
} else {
/* gap between S2 and S3 */
h->loss_start = tfrc_rx_hist_index(h, 2);
@@ -354,13 +326,13 @@ static void __three_after_loss(struct tfrc_rx_hist *h)
}
/**
- * tfrc_rx_handle_loss - Loss detection and further processing
- * @h: The non-empty RX history object
- * @lh: Loss Intervals database to update
- * @skb: Currently received packet
- * @ndp: The NDP count belonging to @skb
- * @calc_first_li: Caller-dependent computation of first loss interval in @lh
- * @sk: Used by @calc_first_li (see tfrc_lh_interval_add)
+ * tfrc_rx_congestion_event - Loss detection and further processing
+ * @h: The non-empty RX history object
+ * @lh: Loss Intervals database to update
+ * @skb: Currently received packet
+ * @ndp: The NDP count belonging to @skb
+ * @first_li: Caller-dependent computation of first loss interval in @lh
+ * @sk: Used by @calc_first_li (see tfrc_lh_interval_add)
* Chooses action according to pending loss, updates LI database when a new
* loss was detected, and does required post-processing. Returns 1 when caller
* should send feedback, 0 otherwise.
@@ -368,15 +340,20 @@ static void __three_after_loss(struct tfrc_rx_hist *h)
* records accordingly, the caller should not perform any more RX history
* operations when loss_count is greater than 0 after calling this function.
*/
-int tfrc_rx_handle_loss(struct tfrc_rx_hist *h,
- struct tfrc_loss_hist *lh,
- struct sk_buff *skb, const u64 ndp,
- u32 (*calc_first_li)(struct sock *), struct sock *sk)
+bool tfrc_rx_congestion_event(struct tfrc_rx_hist *h,
+ struct tfrc_loss_hist *lh,
+ struct sk_buff *skb, const u64 ndp,
+ u32 (*first_li)(struct sock *), struct sock *sk)
{
- int is_new_loss = 0;
+ bool new_event = false;
+
+ if (tfrc_rx_hist_duplicate(h, skb))
+ return 0;
if (h->loss_count == 0) {
__do_track_loss(h, skb, ndp);
+ tfrc_rx_hist_sample_rtt(h, skb);
+ tfrc_rx_hist_add_packet(h, skb, ndp);
} else if (h->loss_count == 1) {
__one_after_loss(h, skb, ndp);
} else if (h->loss_count != 2) {
@@ -385,34 +362,57 @@ int tfrc_rx_handle_loss(struct tfrc_rx_hist *h,
/*
* Update Loss Interval database and recycle RX records
*/
- is_new_loss = tfrc_lh_interval_add(lh, h, calc_first_li, sk);
+ new_event = tfrc_lh_interval_add(lh, h, first_li, sk);
__three_after_loss(h);
}
- return is_new_loss;
+
+ /*
+ * Update moving-average of `s' and the sum of received payload bytes.
+ */
+ if (dccp_data_packet(skb)) {
+ const u32 payload = skb->len - dccp_hdr(skb)->dccph_doff * 4;
+
+ h->packet_size = tfrc_ewma(h->packet_size, payload, 9);
+ h->bytes_recvd += payload;
+ }
+
+ /* RFC 3448, 6.1: update I_0, whose growth implies p <= p_prev */
+ if (!new_event)
+ tfrc_lh_update_i_mean(lh, skb);
+
+ return new_event;
}
-EXPORT_SYMBOL_GPL(tfrc_rx_handle_loss);
+EXPORT_SYMBOL_GPL(tfrc_rx_congestion_event);
-int tfrc_rx_hist_alloc(struct tfrc_rx_hist *h)
+/* Compute the sending rate X_recv measured between feedback intervals */
+u32 tfrc_rx_hist_x_recv(struct tfrc_rx_hist *h, const u32 last_x_recv)
{
- int i;
+ u64 bytes = h->bytes_recvd, last_rtt = h->rtt_estimate;
+ s64 delta = ktime_to_us(net_timedelta(h->bytes_start));
- for (i = 0; i <= TFRC_NDUPACK; i++) {
- h->ring[i] = kmem_cache_alloc(tfrc_rx_hist_slab, GFP_ATOMIC);
- if (h->ring[i] == NULL)
- goto out_free;
- }
+ WARN_ON(delta <= 0);
+ /*
+ * Ensure that the sampling interval for X_recv is at least one RTT,
+ * by extending the sampling interval backwards in time, over the last
+ * R_(m-1) seconds, as per rfc3448bis-06, 6.2.
+ * To reduce noise (e.g. when the RTT changes often), this is only
+ * done when delta is smaller than RTT/2.
+ */
+ if (last_x_recv > 0 && delta < last_rtt/2) {
+ tfrc_pr_debug("delta < RTT ==> %ld us < %u us\n",
+ (long)delta, (unsigned)last_rtt);
- h->loss_count = h->loss_start = 0;
- return 0;
+ delta = (bytes ? delta : 0) + last_rtt;
+ bytes += div_u64((u64)last_x_recv * last_rtt, USEC_PER_SEC);
+ }
-out_free:
- while (i-- != 0) {
- kmem_cache_free(tfrc_rx_hist_slab, h->ring[i]);
- h->ring[i] = NULL;
+ if (unlikely(bytes == 0)) {
+ DCCP_WARN("X_recv == 0, using old value of %u\n", last_x_recv);
+ return last_x_recv;
}
- return -ENOBUFS;
+ return scaled_div32(bytes, delta);
}
-EXPORT_SYMBOL_GPL(tfrc_rx_hist_alloc);
+EXPORT_SYMBOL_GPL(tfrc_rx_hist_x_recv);
void tfrc_rx_hist_purge(struct tfrc_rx_hist *h)
{
@@ -426,73 +426,81 @@ void tfrc_rx_hist_purge(struct tfrc_rx_hist *h)
}
EXPORT_SYMBOL_GPL(tfrc_rx_hist_purge);
-/**
- * tfrc_rx_hist_rtt_last_s - reference entry to compute RTT samples against
- */
-static inline struct tfrc_rx_hist_entry *
- tfrc_rx_hist_rtt_last_s(const struct tfrc_rx_hist *h)
+static int tfrc_rx_hist_alloc(struct tfrc_rx_hist *h)
{
- return h->ring[0];
+ int i;
+
+ memset(h, 0, sizeof(*h));
+
+ for (i = 0; i <= TFRC_NDUPACK; i++) {
+ h->ring[i] = kmem_cache_alloc(tfrc_rx_hist_slab, GFP_ATOMIC);
+ if (h->ring[i] == NULL) {
+ tfrc_rx_hist_purge(h);
+ return -ENOBUFS;
+ }
+ }
+ return 0;
}
-/**
- * tfrc_rx_hist_rtt_prev_s: previously suitable (wrt rtt_last_s) RTT-sampling entry
- */
-static inline struct tfrc_rx_hist_entry *
- tfrc_rx_hist_rtt_prev_s(const struct tfrc_rx_hist *h)
+int tfrc_rx_hist_init(struct tfrc_rx_hist *h, struct sock *sk)
{
- return h->ring[h->rtt_sample_prev];
+ if (tfrc_rx_hist_alloc(h))
+ return -ENOBUFS;
+ /*
+ * Initialise first entry with GSR to start loss detection as early as
+ * possible. Code using this must not use any other fields. The entry
+ * will be overwritten once the CCID updates its received packets.
+ */
+ tfrc_rx_hist_loss_prev(h)->tfrchrx_seqno = dccp_sk(sk)->dccps_gsr;
+ return 0;
}
+EXPORT_SYMBOL_GPL(tfrc_rx_hist_init);
/**
* tfrc_rx_hist_sample_rtt - Sample RTT from timestamp / CCVal
- * Based on ideas presented in RFC 4342, 8.1. Returns 0 if it was not able
- * to compute a sample with given data - calling function should check this.
+ * Based on ideas presented in RFC 4342, 8.1. This function expects that no loss
+ * is pending and uses the following history entries (via rtt_sample_prev):
+ * - h->ring[0] contains the most recent history entry prior to @skb;
+ * - h->ring[1] is an unused `dummy' entry when the current difference is 0;
*/
-u32 tfrc_rx_hist_sample_rtt(struct tfrc_rx_hist *h, const struct sk_buff *skb)
+void tfrc_rx_hist_sample_rtt(struct tfrc_rx_hist *h, const struct sk_buff *skb)
{
- u32 sample = 0,
- delta_v = SUB16(dccp_hdr(skb)->dccph_ccval,
- tfrc_rx_hist_rtt_last_s(h)->tfrchrx_ccval);
-
- if (delta_v < 1 || delta_v > 4) { /* unsuitable CCVal delta */
- if (h->rtt_sample_prev == 2) { /* previous candidate stored */
- sample = SUB16(tfrc_rx_hist_rtt_prev_s(h)->tfrchrx_ccval,
- tfrc_rx_hist_rtt_last_s(h)->tfrchrx_ccval);
- if (sample)
- sample = 4 / sample *
- ktime_us_delta(tfrc_rx_hist_rtt_prev_s(h)->tfrchrx_tstamp,
- tfrc_rx_hist_rtt_last_s(h)->tfrchrx_tstamp);
- else /*
- * FIXME: This condition is in principle not
- * possible but occurs when CCID is used for
- * two-way data traffic. I have tried to trace
- * it, but the cause does not seem to be here.
- */
- DCCP_BUG("please report to dccp@vger.kernel.org"
- " => prev = %u, last = %u",
- tfrc_rx_hist_rtt_prev_s(h)->tfrchrx_ccval,
- tfrc_rx_hist_rtt_last_s(h)->tfrchrx_ccval);
- } else if (delta_v < 1) {
- h->rtt_sample_prev = 1;
- goto keep_ref_for_next_time;
- }
+ struct tfrc_rx_hist_entry *last = h->ring[0];
+ u32 sample, delta_v;
- } else if (delta_v == 4) /* optimal match */
- sample = ktime_to_us(net_timedelta(tfrc_rx_hist_rtt_last_s(h)->tfrchrx_tstamp));
- else { /* suboptimal match */
- h->rtt_sample_prev = 2;
- goto keep_ref_for_next_time;
- }
+ /*
+ * When not to sample:
+ * - on non-data packets
+ * (RFC 4342, 8.1: CCVal only fully defined for data packets);
+ * - when no data packets have been received yet
+ * (FIXME: using sampled packet size as indicator here);
+ * - as long as there are gaps in the sequence space (pending loss).
+ */
+ if (!dccp_data_packet(skb) || h->packet_size == 0 ||
+ tfrc_rx_hist_loss_pending(h))
+ return;
- if (unlikely(sample > DCCP_SANE_RTT_MAX)) {
- DCCP_WARN("RTT sample %u too large, using max\n", sample);
- sample = DCCP_SANE_RTT_MAX;
+ h->rtt_sample_prev = 0; /* reset previous candidate */
+
+ delta_v = SUB16(dccp_hdr(skb)->dccph_ccval, last->tfrchrx_ccval);
+ if (delta_v == 0) { /* less than RTT/4 difference */
+ h->rtt_sample_prev = 1;
+ return;
}
+ sample = dccp_sane_rtt(ktime_to_us(net_timedelta(last->tfrchrx_tstamp)));
- h->rtt_sample_prev = 0; /* use current entry as next reference */
-keep_ref_for_next_time:
+ if (delta_v <= 4) /* between RTT/4 and RTT */
+ sample *= 4 / delta_v;
+ else if (!(sample < h->rtt_estimate && sample > h->rtt_estimate/2))
+ /*
+ * Optimisation: CCVal difference is greater than 1 RTT, yet the
+ * sample is less than the local RTT estimate; which means that
+ * the RTT estimate is too high.
+ * To avoid noise, it is not done if the sample is below RTT/2.
+ */
+ return;
- return sample;
+ /* Use a lower weight than usual to increase responsiveness */
+ h->rtt_estimate = tfrc_ewma(h->rtt_estimate, sample, 5);
}
EXPORT_SYMBOL_GPL(tfrc_rx_hist_sample_rtt);
diff --git a/net/dccp/ccids/lib/packet_history.h b/net/dccp/ccids/lib/packet_history.h
index 461cc91cce8..555e65cd73a 100644
--- a/net/dccp/ccids/lib/packet_history.h
+++ b/net/dccp/ccids/lib/packet_history.h
@@ -40,12 +40,28 @@
#include <linux/slab.h>
#include "tfrc.h"
-struct tfrc_tx_hist_entry;
+/**
+ * tfrc_tx_hist_entry - Simple singly-linked TX history list
+ * @next: next oldest entry (LIFO order)
+ * @seqno: sequence number of this entry
+ * @stamp: send time of packet with sequence number @seqno
+ */
+struct tfrc_tx_hist_entry {
+ struct tfrc_tx_hist_entry *next;
+ u64 seqno;
+ ktime_t stamp;
+};
+
+static inline struct tfrc_tx_hist_entry *
+ tfrc_tx_hist_find_entry(struct tfrc_tx_hist_entry *head, u64 seqno)
+{
+ while (head != NULL && head->seqno != seqno)
+ head = head->next;
+ return head;
+}
extern int tfrc_tx_hist_add(struct tfrc_tx_hist_entry **headp, u64 seqno);
extern void tfrc_tx_hist_purge(struct tfrc_tx_hist_entry **headp);
-extern u32 tfrc_tx_hist_rtt(struct tfrc_tx_hist_entry *head,
- const u64 seqno, const ktime_t now);
/* Subtraction a-b modulo-16, respects circular wrap-around */
#define SUB16(a, b) (((a) + 16 - (b)) & 0xF)
@@ -75,12 +91,22 @@ struct tfrc_rx_hist_entry {
* @loss_count: Number of entries in circular history
* @loss_start: Movable index (for loss detection)
* @rtt_sample_prev: Used during RTT sampling, points to candidate entry
+ * @rtt_estimate: Receiver RTT estimate
+ * @packet_size: Packet size in bytes (as per RFC 3448, 3.1)
+ * @bytes_recvd: Number of bytes received since @bytes_start
+ * @bytes_start: Start time for counting @bytes_recvd
*/
struct tfrc_rx_hist {
struct tfrc_rx_hist_entry *ring[TFRC_NDUPACK + 1];
u8 loss_count:2,
loss_start:2;
+ /* Receiver RTT sampling */
#define rtt_sample_prev loss_start
+ u32 rtt_estimate;
+ /* Receiver sampling of application payload lengths */
+ u32 packet_size,
+ bytes_recvd;
+ ktime_t bytes_start;
};
/**
@@ -124,20 +150,50 @@ static inline bool tfrc_rx_hist_loss_pending(const struct tfrc_rx_hist *h)
return h->loss_count > 0;
}
+/*
+ * Accessor functions to retrieve parameters sampled by the RX history
+ */
+static inline u32 tfrc_rx_hist_packet_size(const struct tfrc_rx_hist *h)
+{
+ if (h->packet_size == 0) {
+ DCCP_WARN("No sample for s, using fallback\n");
+ return TCP_MIN_RCVMSS;
+ }
+ return h->packet_size;
+
+}
+static inline u32 tfrc_rx_hist_rtt(const struct tfrc_rx_hist *h)
+{
+ if (h->rtt_estimate == 0) {
+ DCCP_WARN("No RTT estimate available, using fallback RTT\n");
+ return DCCP_FALLBACK_RTT;
+ }
+ return h->rtt_estimate;
+}
+
+static inline void tfrc_rx_hist_restart_byte_counter(struct tfrc_rx_hist *h)
+{
+ h->bytes_recvd = 0;
+ h->bytes_start = ktime_get_real();
+}
+
+extern u32 tfrc_rx_hist_x_recv(struct tfrc_rx_hist *h, const u32 last_x_recv);
+
+
extern void tfrc_rx_hist_add_packet(struct tfrc_rx_hist *h,
const struct sk_buff *skb, const u64 ndp);
extern int tfrc_rx_hist_duplicate(struct tfrc_rx_hist *h, struct sk_buff *skb);
struct tfrc_loss_hist;
-extern int tfrc_rx_handle_loss(struct tfrc_rx_hist *h,
- struct tfrc_loss_hist *lh,
- struct sk_buff *skb, const u64 ndp,
- u32 (*first_li)(struct sock *sk),
- struct sock *sk);
-extern u32 tfrc_rx_hist_sample_rtt(struct tfrc_rx_hist *h,
- const struct sk_buff *skb);
-extern int tfrc_rx_hist_alloc(struct tfrc_rx_hist *h);
+extern bool tfrc_rx_congestion_event(struct tfrc_rx_hist *h,
+ struct tfrc_loss_hist *lh,
+ struct sk_buff *skb, const u64 ndp,
+ u32 (*first_li)(struct sock *sk),
+ struct sock *sk);
+extern void tfrc_rx_hist_sample_rtt(struct tfrc_rx_hist *h,
+ const struct sk_buff *skb);
+extern int tfrc_rx_hist_init(struct tfrc_rx_hist *h, struct sock *sk);
extern void tfrc_rx_hist_purge(struct tfrc_rx_hist *h);
#endif /* _DCCP_PKT_HIST_ */
diff --git a/net/dccp/ccids/lib/tfrc.h b/net/dccp/ccids/lib/tfrc.h
index ed9857527ac..ede12f53de5 100644
--- a/net/dccp/ccids/lib/tfrc.h
+++ b/net/dccp/ccids/lib/tfrc.h
@@ -48,6 +48,21 @@ static inline u32 scaled_div32(u64 a, u64 b)
}
/**
+ * tfrc_scaled_sqrt - Compute scaled integer sqrt(x) for 0 < x < 2^22-1
+ * Uses scaling to improve accuracy of the integer approximation of sqrt(). The
+ * scaling factor of 2^10 limits the maximum @sample to 4e6; this is okay for
+ * clamped RTT samples (dccp_sample_rtt).
+ * Should best be used for expressions of type sqrt(x)/sqrt(y), since then the
+ * scaling factor is neutralised. For this purpose, it avoids returning zero.
+ */
+static inline u16 tfrc_scaled_sqrt(const u32 sample)
+{
+ const unsigned long non_zero_sample = sample ? : 1;
+
+ return int_sqrt(non_zero_sample << 10);
+}
+
+/**
* tfrc_ewma - Exponentially weighted moving average
* @weight: Weight to be used as damping factor, in units of 1/10
*/
@@ -58,6 +73,7 @@ static inline u32 tfrc_ewma(const u32 avg, const u32 newval, const u8 weight)
extern u32 tfrc_calc_x(u16 s, u32 R, u32 p);
extern u32 tfrc_calc_x_reverse_lookup(u32 fvalue);
+extern u32 tfrc_invert_loss_event_rate(u32 loss_event_rate);
extern int tfrc_tx_packet_history_init(void);
extern void tfrc_tx_packet_history_exit(void);
diff --git a/net/dccp/ccids/lib/tfrc_equation.c b/net/dccp/ccids/lib/tfrc_equation.c
index 2f20a29cffe..38239c4d5e1 100644
--- a/net/dccp/ccids/lib/tfrc_equation.c
+++ b/net/dccp/ccids/lib/tfrc_equation.c
@@ -632,8 +632,16 @@ u32 tfrc_calc_x(u16 s, u32 R, u32 p)
if (p <= TFRC_CALC_X_SPLIT) { /* 0.0000 < p <= 0.05 */
if (p < TFRC_SMALLEST_P) { /* 0.0000 < p < 0.0001 */
- DCCP_WARN("Value of p (%d) below resolution. "
- "Substituting %d\n", p, TFRC_SMALLEST_P);
+ /*
+ * In the congestion-avoidance phase p decays towards 0
+ * when there are no further losses, so this case is
+ * natural. Truncating to p_min = 0.01% means that the
+ * maximum achievable throughput is limited to about
+ * X_calc_max = 122.4 * s/RTT (see RFC 3448, 3.1); e.g.
+ * with s=1500 bytes, RTT=0.01 s: X_calc_max = 147 Mbps.
+ */
+ tfrc_pr_debug("Value of p (%d) below resolution. "
+ "Substituting %d\n", p, TFRC_SMALLEST_P);
index = 0;
} else /* 0.0001 <= p <= 0.05 */
index = p/TFRC_SMALLEST_P - 1;
@@ -658,7 +666,6 @@ u32 tfrc_calc_x(u16 s, u32 R, u32 p)
result = scaled_div(s, R);
return scaled_div32(result, f);
}
-
EXPORT_SYMBOL_GPL(tfrc_calc_x);
/**
@@ -693,5 +700,19 @@ u32 tfrc_calc_x_reverse_lookup(u32 fvalue)
index = tfrc_binsearch(fvalue, 0);
return (index + 1) * 1000000 / TFRC_CALC_X_ARRSIZE;
}
-
EXPORT_SYMBOL_GPL(tfrc_calc_x_reverse_lookup);
+
+/**
+ * tfrc_invert_loss_event_rate - Compute p so that 10^6 corresponds to 100%
+ * When @loss_event_rate is large, there is a chance that p is truncated to 0.
+ * To avoid re-entering slow-start in that case, we set p = TFRC_SMALLEST_P > 0.
+ */
+u32 tfrc_invert_loss_event_rate(u32 loss_event_rate)
+{
+ if (loss_event_rate == UINT_MAX) /* see RFC 4342, 8.5 */
+ return 0;
+ if (unlikely(loss_event_rate == 0)) /* map 1/0 into 100% */
+ return 1000000;
+ return max_t(u32, scaled_div(1, loss_event_rate), TFRC_SMALLEST_P);
+}
+EXPORT_SYMBOL_GPL(tfrc_invert_loss_event_rate);