diff options
author | Linus Torvalds <torvalds@woody.linux-foundation.org> | 2007-10-11 19:40:14 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@woody.linux-foundation.org> | 2007-10-11 19:40:14 -0700 |
commit | 038a5008b2f395c85e6e71d6ddf3c684e7c405b0 (patch) | |
tree | 4735eab577e97e5a22c3141e3f60071c8065585e /net/dccp | |
parent | dd6d1844af33acb4edd0a40b1770d091a22c94be (diff) | |
parent | 266918303226cceac7eca38ced30f15f277bd89c (diff) |
Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-2.6
* 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-2.6: (867 commits)
[SKY2]: status polling loop (post merge)
[NET]: Fix NAPI completion handling in some drivers.
[TCP]: Limit processing lost_retrans loop to work-to-do cases
[TCP]: Fix lost_retrans loop vs fastpath problems
[TCP]: No need to re-count fackets_out/sacked_out at RTO
[TCP]: Extract tcp_match_queue_to_sack from sacktag code
[TCP]: Kill almost unused variable pcount from sacktag
[TCP]: Fix mark_head_lost to ignore R-bit when trying to mark L
[TCP]: Add bytes_acked (ABC) clearing to FRTO too
[IPv6]: Update setsockopt(IPV6_MULTICAST_IF) to support RFC 3493, try2
[NETFILTER]: x_tables: add missing ip6t_modulename aliases
[NETFILTER]: nf_conntrack_tcp: fix connection reopening
[QETH]: fix qeth_main.c
[NETLINK]: fib_frontend build fixes
[IPv6]: Export userland ND options through netlink (RDNSS support)
[9P]: build fix with !CONFIG_SYSCTL
[NET]: Fix dev_put() and dev_hold() comments
[NET]: make netlink user -> kernel interface synchronious
[NET]: unify netlink kernel socket recognition
[NET]: cleanup 3rd argument in netlink_sendskb
...
Fix up conflicts manually in Documentation/feature-removal-schedule.txt
and my new least favourite crap, the "mod_devicetable" support in the
files include/linux/mod_devicetable.h and scripts/mod/file2alias.c.
(The latter files seem to be explicitly _designed_ to get conflicts when
different subsystems work with them - that have an absolutely horrid
lack of subsystem separation!)
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'net/dccp')
-rw-r--r-- | net/dccp/ackvec.c | 16 | ||||
-rw-r--r-- | net/dccp/ackvec.h | 4 | ||||
-rw-r--r-- | net/dccp/ccids/ccid2.c | 62 | ||||
-rw-r--r-- | net/dccp/ccids/ccid2.h | 2 | ||||
-rw-r--r-- | net/dccp/ccids/ccid3.c | 109 | ||||
-rw-r--r-- | net/dccp/ccids/ccid3.h | 23 | ||||
-rw-r--r-- | net/dccp/ccids/lib/loss_interval.c | 11 | ||||
-rw-r--r-- | net/dccp/ccids/lib/loss_interval.h | 4 | ||||
-rw-r--r-- | net/dccp/ccids/lib/packet_history.h | 9 | ||||
-rw-r--r-- | net/dccp/dccp.h | 65 | ||||
-rw-r--r-- | net/dccp/input.c | 120 | ||||
-rw-r--r-- | net/dccp/ipv4.c | 51 | ||||
-rw-r--r-- | net/dccp/ipv6.c | 46 | ||||
-rw-r--r-- | net/dccp/minisocks.c | 11 | ||||
-rw-r--r-- | net/dccp/options.c | 47 | ||||
-rw-r--r-- | net/dccp/output.c | 151 | ||||
-rw-r--r-- | net/dccp/probe.c | 7 | ||||
-rw-r--r-- | net/dccp/proto.c | 12 | ||||
-rw-r--r-- | net/dccp/sysctl.c | 10 | ||||
-rw-r--r-- | net/dccp/timer.c | 21 |
20 files changed, 339 insertions, 442 deletions
diff --git a/net/dccp/ackvec.c b/net/dccp/ackvec.c index 7ac775f9a64..83378f379f7 100644 --- a/net/dccp/ackvec.c +++ b/net/dccp/ackvec.c @@ -69,21 +69,20 @@ int dccp_insert_option_ackvec(struct sock *sk, struct sk_buff *skb) struct dccp_sock *dp = dccp_sk(sk); struct dccp_ackvec *av = dp->dccps_hc_rx_ackvec; /* Figure out how many options do we need to represent the ackvec */ - const u16 nr_opts = (av->dccpav_vec_len + - DCCP_MAX_ACKVEC_OPT_LEN - 1) / - DCCP_MAX_ACKVEC_OPT_LEN; + const u16 nr_opts = DIV_ROUND_UP(av->dccpav_vec_len, + DCCP_MAX_ACKVEC_OPT_LEN); u16 len = av->dccpav_vec_len + 2 * nr_opts, i; - struct timeval now; u32 elapsed_time; const unsigned char *tail, *from; unsigned char *to; struct dccp_ackvec_record *avr; + suseconds_t delta; if (DCCP_SKB_CB(skb)->dccpd_opt_len + len > DCCP_MAX_OPT_LEN) return -1; - dccp_timestamp(sk, &now); - elapsed_time = timeval_delta(&now, &av->dccpav_time) / 10; + delta = ktime_us_delta(ktime_get_real(), av->dccpav_time); + elapsed_time = delta / 10; if (elapsed_time != 0 && dccp_insert_option_elapsed_time(sk, skb, elapsed_time)) @@ -159,8 +158,7 @@ struct dccp_ackvec *dccp_ackvec_alloc(const gfp_t priority) av->dccpav_buf_head = DCCP_MAX_ACKVEC_LEN - 1; av->dccpav_buf_ackno = UINT48_MAX + 1; av->dccpav_buf_nonce = av->dccpav_buf_nonce = 0; - av->dccpav_time.tv_sec = 0; - av->dccpav_time.tv_usec = 0; + av->dccpav_time = ktime_set(0, 0); av->dccpav_vec_len = 0; INIT_LIST_HEAD(&av->dccpav_records); } @@ -321,7 +319,7 @@ int dccp_ackvec_add(struct dccp_ackvec *av, const struct sock *sk, } av->dccpav_buf_ackno = ackno; - dccp_timestamp(sk, &av->dccpav_time); + av->dccpav_time = ktime_get_real(); out: return 0; diff --git a/net/dccp/ackvec.h b/net/dccp/ackvec.h index 96504a3b16e..9ef0737043e 100644 --- a/net/dccp/ackvec.h +++ b/net/dccp/ackvec.h @@ -12,8 +12,8 @@ */ #include <linux/compiler.h> +#include <linux/ktime.h> #include <linux/list.h> -#include <linux/time.h> #include <linux/types.h> /* Read about the ECN nonce to see why it is 253 */ @@ -52,7 +52,7 @@ struct dccp_ackvec { u64 dccpav_buf_ackno; struct list_head dccpav_records; - struct timeval dccpav_time; + ktime_t dccpav_time; u16 dccpav_buf_head; u16 dccpav_vec_len; u8 dccpav_buf_nonce; diff --git a/net/dccp/ccids/ccid2.c b/net/dccp/ccids/ccid2.c index d29b88fe723..426008e3b7e 100644 --- a/net/dccp/ccids/ccid2.c +++ b/net/dccp/ccids/ccid2.c @@ -59,7 +59,8 @@ static void ccid2_hc_tx_check_sanity(const struct ccid2_hc_tx_sock *hctx) pipe++; /* packets are sent sequentially */ - BUG_ON(seqp->ccid2s_seq <= prev->ccid2s_seq); + BUG_ON(dccp_delta_seqno(seqp->ccid2s_seq, + prev->ccid2s_seq ) >= 0); BUG_ON(time_before(seqp->ccid2s_sent, prev->ccid2s_sent)); @@ -83,8 +84,7 @@ static void ccid2_hc_tx_check_sanity(const struct ccid2_hc_tx_sock *hctx) #define ccid2_hc_tx_check_sanity(hctx) #endif -static int ccid2_hc_tx_alloc_seq(struct ccid2_hc_tx_sock *hctx, int num, - gfp_t gfp) +static int ccid2_hc_tx_alloc_seq(struct ccid2_hc_tx_sock *hctx) { struct ccid2_seq *seqp; int i; @@ -95,16 +95,16 @@ static int ccid2_hc_tx_alloc_seq(struct ccid2_hc_tx_sock *hctx, int num, return -ENOMEM; /* allocate buffer and initialize linked list */ - seqp = kmalloc(sizeof(*seqp) * num, gfp); + seqp = kmalloc(CCID2_SEQBUF_LEN * sizeof(struct ccid2_seq), gfp_any()); if (seqp == NULL) return -ENOMEM; - for (i = 0; i < (num - 1); i++) { + for (i = 0; i < (CCID2_SEQBUF_LEN - 1); i++) { seqp[i].ccid2s_next = &seqp[i + 1]; seqp[i + 1].ccid2s_prev = &seqp[i]; } - seqp[num - 1].ccid2s_next = seqp; - seqp->ccid2s_prev = &seqp[num - 1]; + seqp[CCID2_SEQBUF_LEN - 1].ccid2s_next = seqp; + seqp->ccid2s_prev = &seqp[CCID2_SEQBUF_LEN - 1]; /* This is the first allocation. Initiate the head and tail. */ if (hctx->ccid2hctx_seqbufc == 0) @@ -114,8 +114,8 @@ static int ccid2_hc_tx_alloc_seq(struct ccid2_hc_tx_sock *hctx, int num, hctx->ccid2hctx_seqh->ccid2s_next = seqp; seqp->ccid2s_prev = hctx->ccid2hctx_seqh; - hctx->ccid2hctx_seqt->ccid2s_prev = &seqp[num - 1]; - seqp[num - 1].ccid2s_next = hctx->ccid2hctx_seqt; + hctx->ccid2hctx_seqt->ccid2s_prev = &seqp[CCID2_SEQBUF_LEN - 1]; + seqp[CCID2_SEQBUF_LEN - 1].ccid2s_next = hctx->ccid2hctx_seqt; } /* store the original pointer to the buffer so we can free it */ @@ -127,19 +127,7 @@ static int ccid2_hc_tx_alloc_seq(struct ccid2_hc_tx_sock *hctx, int num, static int ccid2_hc_tx_send_packet(struct sock *sk, struct sk_buff *skb) { - struct ccid2_hc_tx_sock *hctx; - - switch (DCCP_SKB_CB(skb)->dccpd_type) { - case 0: /* XXX data packets from userland come through like this */ - case DCCP_PKT_DATA: - case DCCP_PKT_DATAACK: - break; - /* No congestion control on other packets */ - default: - return 0; - } - - hctx = ccid2_hc_tx_sk(sk); + struct ccid2_hc_tx_sock *hctx = ccid2_hc_tx_sk(sk); ccid2_pr_debug("pipe=%d cwnd=%d\n", hctx->ccid2hctx_pipe, hctx->ccid2hctx_cwnd); @@ -180,16 +168,11 @@ static void ccid2_change_l_ack_ratio(struct sock *sk, int val) dp->dccps_l_ack_ratio = val; } -static void ccid2_change_cwnd(struct ccid2_hc_tx_sock *hctx, int val) +static void ccid2_change_cwnd(struct ccid2_hc_tx_sock *hctx, u32 val) { - if (val == 0) - val = 1; - /* XXX do we need to change ack ratio? */ - ccid2_pr_debug("change cwnd to %d\n", val); - - BUG_ON(val < 1); - hctx->ccid2hctx_cwnd = val; + hctx->ccid2hctx_cwnd = val? : 1; + ccid2_pr_debug("changed cwnd to %u\n", hctx->ccid2hctx_cwnd); } static void ccid2_change_srtt(struct ccid2_hc_tx_sock *hctx, long val) @@ -295,12 +278,11 @@ static void ccid2_hc_tx_packet_sent(struct sock *sk, int more, unsigned int len) next = hctx->ccid2hctx_seqh->ccid2s_next; /* check if we need to alloc more space */ if (next == hctx->ccid2hctx_seqt) { - int rc; - - ccid2_pr_debug("allocating more space in history\n"); - rc = ccid2_hc_tx_alloc_seq(hctx, CCID2_SEQBUF_LEN, gfp_any()); - BUG_ON(rc); /* XXX what do we do? */ - + if (ccid2_hc_tx_alloc_seq(hctx)) { + DCCP_CRIT("packet history - out of memory!"); + /* FIXME: find a more graceful way to bail out */ + return; + } next = hctx->ccid2hctx_seqh->ccid2s_next; BUG_ON(next == hctx->ccid2hctx_seqt); } @@ -581,8 +563,8 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) hctx->ccid2hctx_rpseq = seqno; } else { /* check if packet is consecutive */ - if ((hctx->ccid2hctx_rpseq + 1) == seqno) - hctx->ccid2hctx_rpseq++; + if (dccp_delta_seqno(hctx->ccid2hctx_rpseq, seqno) == 1) + hctx->ccid2hctx_rpseq = seqno; /* it's a later packet */ else if (after48(seqno, hctx->ccid2hctx_rpseq)) { hctx->ccid2hctx_rpdupack++; @@ -771,7 +753,7 @@ static int ccid2_hc_tx_init(struct ccid *ccid, struct sock *sk) hctx->ccid2hctx_seqbufc = 0; /* XXX init ~ to window size... */ - if (ccid2_hc_tx_alloc_seq(hctx, CCID2_SEQBUF_LEN, GFP_ATOMIC) != 0) + if (ccid2_hc_tx_alloc_seq(hctx)) return -ENOMEM; hctx->ccid2hctx_sent = 0; @@ -835,7 +817,7 @@ static struct ccid_operations ccid2 = { }; #ifdef CONFIG_IP_DCCP_CCID2_DEBUG -module_param(ccid2_debug, int, 0444); +module_param(ccid2_debug, bool, 0444); MODULE_PARM_DESC(ccid2_debug, "Enable debug messages"); #endif diff --git a/net/dccp/ccids/ccid2.h b/net/dccp/ccids/ccid2.h index ebd79499c85..d9daa534c9b 100644 --- a/net/dccp/ccids/ccid2.h +++ b/net/dccp/ccids/ccid2.h @@ -50,7 +50,7 @@ struct ccid2_seq { * @ccid2hctx_rpdupack - dupacks since rpseq */ struct ccid2_hc_tx_sock { - int ccid2hctx_cwnd; + u32 ccid2hctx_cwnd; int ccid2hctx_ssacks; int ccid2hctx_acks; unsigned int ccid2hctx_ssthresh; diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c index e91c2b9dc27..25772c32617 100644 --- a/net/dccp/ccids/ccid3.c +++ b/net/dccp/ccids/ccid3.c @@ -113,27 +113,24 @@ static inline void ccid3_update_send_interval(struct ccid3_hc_tx_sock *hctx) hctx->ccid3hctx_s, (unsigned)(hctx->ccid3hctx_x >> 6)); } -/* - * Update X by - * If (p > 0) - * X_calc = calcX(s, R, p); - * X = max(min(X_calc, 2 * X_recv), s / t_mbi); - * Else - * If (now - tld >= R) - * X = max(min(2 * X, 2 * X_recv), s / R); - * tld = now; + +/** + * ccid3_hc_tx_update_x - Update allowed sending rate X + * @stamp: most recent time if available - can be left NULL. + * This function tracks draft rfc3448bis, check there for latest details. * * Note: X and X_recv are both stored in units of 64 * bytes/second, to support * fine-grained resolution of sending rates. This requires scaling by 2^6 * throughout the code. Only X_calc is unscaled (in bytes/second). * */ -static void ccid3_hc_tx_update_x(struct sock *sk, struct timeval *now) +static void ccid3_hc_tx_update_x(struct sock *sk, ktime_t *stamp) { struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk); __u64 min_rate = 2 * hctx->ccid3hctx_x_recv; const __u64 old_x = hctx->ccid3hctx_x; + ktime_t now = stamp? *stamp : ktime_get_real(); /* * Handle IDLE periods: do not reduce below RFC3390 initial sending rate @@ -153,14 +150,14 @@ static void ccid3_hc_tx_update_x(struct sock *sk, struct timeval *now) (((__u64)hctx->ccid3hctx_s) << 6) / TFRC_T_MBI); - } else if (timeval_delta(now, &hctx->ccid3hctx_t_ld) - - (suseconds_t)hctx->ccid3hctx_rtt >= 0) { + } else if (ktime_us_delta(now, hctx->ccid3hctx_t_ld) + - (s64)hctx->ccid3hctx_rtt >= 0) { hctx->ccid3hctx_x = max(min(2 * hctx->ccid3hctx_x, min_rate), scaled_div(((__u64)hctx->ccid3hctx_s) << 6, hctx->ccid3hctx_rtt)); - hctx->ccid3hctx_t_ld = *now; + hctx->ccid3hctx_t_ld = now; } if (hctx->ccid3hctx_x != old_x) { @@ -214,7 +211,6 @@ static void ccid3_hc_tx_no_feedback_timer(unsigned long data) { struct sock *sk = (struct sock *)data; struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk); - struct timeval now; unsigned long t_nfb = USEC_PER_SEC / 5; bh_lock_sock(sk); @@ -265,15 +261,12 @@ static void ccid3_hc_tx_no_feedback_timer(unsigned long data) max(hctx->ccid3hctx_x_recv / 2, (((__u64)hctx->ccid3hctx_s) << 6) / (2 * TFRC_T_MBI)); - - if (hctx->ccid3hctx_p == 0) - dccp_timestamp(sk, &now); } else { hctx->ccid3hctx_x_recv = hctx->ccid3hctx_x_calc; hctx->ccid3hctx_x_recv <<= 4; } /* Now recalculate X [RFC 3448, 4.3, step (4)] */ - ccid3_hc_tx_update_x(sk, &now); + ccid3_hc_tx_update_x(sk, NULL); /* * Schedule no feedback timer to expire in * max(t_RTO, 2 * s/X) = max(t_RTO, 2 * t_ipi) @@ -309,8 +302,6 @@ static int ccid3_hc_tx_send_packet(struct sock *sk, struct sk_buff *skb) ktime_t now = ktime_get_real(); s64 delay; - BUG_ON(hctx == NULL); - /* * This function is called only for Data and DataAck packets. Sending * zero-sized Data(Ack)s is theoretically possible, but for congestion @@ -341,7 +332,7 @@ static int ccid3_hc_tx_send_packet(struct sock *sk, struct sk_buff *skb) ccid3_pr_debug("SYN RTT = %uus\n", dp->dccps_syn_rtt); hctx->ccid3hctx_rtt = dp->dccps_syn_rtt; hctx->ccid3hctx_x = rfc3390_initial_rate(sk); - hctx->ccid3hctx_t_ld = ktime_to_timeval(now); + hctx->ccid3hctx_t_ld = now; } else { /* Sender does not have RTT sample: X = MSS/second */ hctx->ccid3hctx_x = dp->dccps_mss_cache; @@ -388,11 +379,8 @@ static void ccid3_hc_tx_packet_sent(struct sock *sk, int more, unsigned int len) { struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk); - struct timeval now; struct dccp_tx_hist_entry *packet; - BUG_ON(hctx == NULL); - ccid3_hc_tx_update_s(hctx, len); packet = dccp_tx_hist_entry_new(ccid3_tx_hist, GFP_ATOMIC); @@ -402,8 +390,7 @@ static void ccid3_hc_tx_packet_sent(struct sock *sk, int more, } dccp_tx_hist_add_entry(&hctx->ccid3hctx_hist, packet); - dccp_timestamp(sk, &now); - packet->dccphtx_tstamp = now; + packet->dccphtx_tstamp = ktime_get_real(); packet->dccphtx_seqno = dccp_sk(sk)->dccps_gss; packet->dccphtx_rtt = hctx->ccid3hctx_rtt; packet->dccphtx_sent = 1; @@ -414,12 +401,10 @@ static void ccid3_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk); struct ccid3_options_received *opt_recv; struct dccp_tx_hist_entry *packet; - struct timeval now; + ktime_t now; unsigned long t_nfb; u32 pinv, r_sample; - BUG_ON(hctx == NULL); - /* we are only interested in ACKs */ if (!(DCCP_SKB_CB(skb)->dccpd_type == DCCP_PKT_ACK || DCCP_SKB_CB(skb)->dccpd_type == DCCP_PKT_DATAACK)) @@ -452,13 +437,12 @@ static void ccid3_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) else /* can not exceed 100% */ hctx->ccid3hctx_p = 1000000 / pinv; - dccp_timestamp(sk, &now); - + now = ktime_get_real(); /* * Calculate new round trip sample as per [RFC 3448, 4.3] by * R_sample = (now - t_recvdata) - t_elapsed */ - r_sample = dccp_sample_rtt(sk, &now, &packet->dccphtx_tstamp); + r_sample = dccp_sample_rtt(sk, ktime_us_delta(now, packet->dccphtx_tstamp)); /* * Update RTT estimate by @@ -561,8 +545,6 @@ static int ccid3_hc_tx_parse_options(struct sock *sk, unsigned char option, struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk); struct ccid3_options_received *opt_recv; - BUG_ON(hctx == NULL); - opt_recv = &hctx->ccid3hctx_options_received; if (opt_recv->ccid3or_seqno != dp->dccps_gsr) { @@ -636,8 +618,6 @@ static void ccid3_hc_tx_exit(struct sock *sk) { struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk); - BUG_ON(hctx == NULL); - ccid3_hc_tx_set_state(sk, TFRC_SSTATE_TERM); sk_stop_timer(sk, &hctx->ccid3hctx_no_feedback_timer); @@ -647,14 +627,13 @@ static void ccid3_hc_tx_exit(struct sock *sk) static void ccid3_hc_tx_get_info(struct sock *sk, struct tcp_info *info) { - const struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk); + struct ccid3_hc_tx_sock *hctx; /* Listen socks doesn't have a private CCID block */ if (sk->sk_state == DCCP_LISTEN) return; - BUG_ON(hctx == NULL); - + hctx = ccid3_hc_tx_sk(sk); info->tcpi_rto = hctx->ccid3hctx_t_rto; info->tcpi_rtt = hctx->ccid3hctx_rtt; } @@ -662,13 +641,14 @@ static void ccid3_hc_tx_get_info(struct sock *sk, struct tcp_info *info) static int ccid3_hc_tx_getsockopt(struct sock *sk, const int optname, int len, u32 __user *optval, int __user *optlen) { - const struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk); + const struct ccid3_hc_tx_sock *hctx; const void *val; /* Listen socks doesn't have a private CCID block */ if (sk->sk_state == DCCP_LISTEN) return -EINVAL; + hctx = ccid3_hc_tx_sk(sk); switch (optname) { case DCCP_SOCKOPT_CCID_TX_INFO: if (len < sizeof(hctx->ccid3hctx_tfrc)) @@ -729,20 +709,20 @@ static void ccid3_hc_rx_send_feedback(struct sock *sk) struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk); struct dccp_sock *dp = dccp_sk(sk); struct dccp_rx_hist_entry *packet; - struct timeval now; + ktime_t now; suseconds_t delta; ccid3_pr_debug("%s(%p) - entry \n", dccp_role(sk), sk); - dccp_timestamp(sk, &now); + now = ktime_get_real(); switch (hcrx->ccid3hcrx_state) { case TFRC_RSTATE_NO_DATA: hcrx->ccid3hcrx_x_recv = 0; break; case TFRC_RSTATE_DATA: - delta = timeval_delta(&now, - &hcrx->ccid3hcrx_tstamp_last_feedback); + delta = ktime_us_delta(now, + hcrx->ccid3hcrx_tstamp_last_feedback); DCCP_BUG_ON(delta < 0); hcrx->ccid3hcrx_x_recv = scaled_div32(hcrx->ccid3hcrx_bytes_recv, delta); @@ -764,7 +744,7 @@ static void ccid3_hc_rx_send_feedback(struct sock *sk) hcrx->ccid3hcrx_bytes_recv = 0; /* Elapsed time information [RFC 4340, 13.2] in units of 10 * usecs */ - delta = timeval_delta(&now, &packet->dccphrx_tstamp); + delta = ktime_us_delta(now, packet->dccphrx_tstamp); DCCP_BUG_ON(delta < 0); hcrx->ccid3hcrx_elapsed_time = delta / 10; @@ -782,14 +762,13 @@ static void ccid3_hc_rx_send_feedback(struct sock *sk) static int ccid3_hc_rx_insert_options(struct sock *sk, struct sk_buff *skb) { - const struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk); + const struct ccid3_hc_rx_sock *hcrx; __be32 x_recv, pinv; - BUG_ON(hcrx == NULL); - if (!(sk->sk_state == DCCP_OPEN || sk->sk_state == DCCP_PARTOPEN)) return 0; + hcrx = ccid3_hc_rx_sk(sk); DCCP_SKB_CB(skb)->dccpd_ccval = hcrx->ccid3hcrx_ccval_last_counter; if (dccp_packet_without_ack(skb)) @@ -839,7 +818,7 @@ static int ccid3_hc_rx_detect_loss(struct sock *sk, dccp_li_update_li(sk, &hcrx->ccid3hcrx_li_hist, &hcrx->ccid3hcrx_hist, - &hcrx->ccid3hcrx_tstamp_last_feedback, + hcrx->ccid3hcrx_tstamp_last_feedback, hcrx->ccid3hcrx_s, hcrx->ccid3hcrx_bytes_recv, hcrx->ccid3hcrx_x_recv, @@ -876,11 +855,9 @@ static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb) struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk); const struct dccp_options_received *opt_recv; struct dccp_rx_hist_entry *packet; - struct timeval now; u32 p_prev, r_sample, rtt_prev; int loss, payload_size; - - BUG_ON(hcrx == NULL); + ktime_t now; opt_recv = &dccp_sk(sk)->dccps_options_received; @@ -891,9 +868,9 @@ static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb) case DCCP_PKT_DATAACK: if (opt_recv->dccpor_timestamp_echo == 0) break; + r_sample = dccp_timestamp() - opt_recv->dccpor_timestamp_echo; rtt_prev = hcrx->ccid3hcrx_rtt; - dccp_timestamp(sk, &now); - r_sample = dccp_sample_rtt(sk, &now, NULL); + r_sample = dccp_sample_rtt(sk, 10 * r_sample); if (hcrx->ccid3hcrx_state == TFRC_RSTATE_NO_DATA) hcrx->ccid3hcrx_rtt = r_sample; @@ -912,7 +889,7 @@ static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb) return; } - packet = dccp_rx_hist_entry_new(ccid3_rx_hist, sk, opt_recv->dccpor_ndp, + packet = dccp_rx_hist_entry_new(ccid3_rx_hist, opt_recv->dccpor_ndp, skb, GFP_ATOMIC); if (unlikely(packet == NULL)) { DCCP_WARN("%s(%p), Not enough mem to add rx packet " @@ -941,9 +918,9 @@ static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb) if (loss) break; - dccp_timestamp(sk, &now); - if ((timeval_delta(&now, &hcrx->ccid3hcrx_tstamp_last_ack) - - (suseconds_t)hcrx->ccid3hcrx_rtt) >= 0) { + now = ktime_get_real(); + if ((ktime_us_delta(now, hcrx->ccid3hcrx_tstamp_last_ack) - + (s64)hcrx->ccid3hcrx_rtt) >= 0) { hcrx->ccid3hcrx_tstamp_last_ack = now; ccid3_hc_rx_send_feedback(sk); } @@ -984,8 +961,8 @@ static int ccid3_hc_rx_init(struct ccid *ccid, struct sock *sk) hcrx->ccid3hcrx_state = TFRC_RSTATE_NO_DATA; INIT_LIST_HEAD(&hcrx->ccid3hcrx_hist); INIT_LIST_HEAD(&hcrx->ccid3hcrx_li_hist); - dccp_timestamp(sk, &hcrx->ccid3hcrx_tstamp_last_ack); - hcrx->ccid3hcrx_tstamp_last_feedback = hcrx->ccid3hcrx_tstamp_last_ack; + hcrx->ccid3hcrx_tstamp_last_feedback = + hcrx->ccid3hcrx_tstamp_last_ack = ktime_get_real(); hcrx->ccid3hcrx_s = 0; hcrx->ccid3hcrx_rtt = 0; return 0; @@ -995,8 +972,6 @@ static void ccid3_hc_rx_exit(struct sock *sk) { struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk); - BUG_ON(hcrx == NULL); - ccid3_hc_rx_set_state(sk, TFRC_RSTATE_TERM); /* Empty packet history */ @@ -1008,14 +983,13 @@ static void ccid3_hc_rx_exit(struct sock *sk) static void ccid3_hc_rx_get_info(struct sock *sk, struct tcp_info *info) { - const struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk); + const struct ccid3_hc_rx_sock *hcrx; /* Listen socks doesn't have a private CCID block */ if (sk->sk_state == DCCP_LISTEN) return; - BUG_ON(hcrx == NULL); - + hcrx = ccid3_hc_rx_sk(sk); info->tcpi_ca_state = hcrx->ccid3hcrx_state; info->tcpi_options |= TCPI_OPT_TIMESTAMPS; info->tcpi_rcv_rtt = hcrx->ccid3hcrx_rtt; @@ -1024,13 +998,14 @@ static void ccid3_hc_rx_get_info(struct sock *sk, struct tcp_info *info) static int ccid3_hc_rx_getsockopt(struct sock *sk, const int optname, int len, u32 __user *optval, int __user *optlen) { - const struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk); + const struct ccid3_hc_rx_sock *hcrx; const void *val; /* Listen socks doesn't have a private CCID block */ if (sk->sk_state == DCCP_LISTEN) return -EINVAL; + hcrx = ccid3_hc_rx_sk(sk); switch (optname) { case DCCP_SOCKOPT_CCID_RX_INFO: if (len < sizeof(hcrx->ccid3hcrx_tfrc)) @@ -1071,7 +1046,7 @@ static struct ccid_operations ccid3 = { }; #ifdef CONFIG_IP_DCCP_CCID3_DEBUG -module_param(ccid3_debug, int, 0444); +module_param(ccid3_debug, bool, 0444); MODULE_PARM_DESC(ccid3_debug, "Enable debug messages"); #endif diff --git a/net/dccp/ccids/ccid3.h b/net/dccp/ccids/ccid3.h index 51d4b804e33..0cdc982cfe4 100644 --- a/net/dccp/ccids/ccid3.h +++ b/net/dccp/ccids/ccid3.h @@ -38,7 +38,6 @@ #include <linux/ktime.h> #include <linux/list.h> -#include <linux/time.h> #include <linux/types.h> #include <linux/tfrc.h> #include "../ccid.h" @@ -111,13 +110,20 @@ struct ccid3_hc_tx_sock { u8 ccid3hctx_idle; ktime_t ccid3hctx_t_last_win_count; struct timer_list ccid3hctx_no_feedback_timer; - struct timeval ccid3hctx_t_ld; + ktime_t ccid3hctx_t_ld; ktime_t ccid3hctx_t_nom; u32 ccid3hctx_delta; struct list_head ccid3hctx_hist; struct ccid3_options_received ccid3hctx_options_received; }; +static inline struct ccid3_hc_tx_sock *ccid3_hc_tx_sk(const struct sock *sk) +{ + struct ccid3_hc_tx_sock *hctx = ccid_priv(dccp_sk(sk)->dccps_hc_tx_ccid); + BUG_ON(hctx == NULL); + return hctx; +} + /* TFRC receiver states */ enum ccid3_hc_rx_states { TFRC_RSTATE_NO_DATA = 1, @@ -153,8 +159,8 @@ struct ccid3_hc_rx_sock { ccid3hcrx_ccval_last_counter:4; enum ccid3_hc_rx_states ccid3hcrx_state:8; u32 ccid3hcrx_bytes_recv; - struct timeval ccid3hcrx_tstamp_last_feedback; - struct timeval ccid3hcrx_tstamp_last_ack; + ktime_t ccid3hcrx_tstamp_last_feedback; + ktime_t ccid3hcrx_tstamp_last_ack; struct list_head ccid3hcrx_hist; struct list_head ccid3hcrx_li_hist; u16 ccid3hcrx_s; @@ -162,14 +168,11 @@ struct ccid3_hc_rx_sock { u32 ccid3hcrx_elapsed_time; }; -static inline struct ccid3_hc_tx_sock *ccid3_hc_tx_sk(const struct sock *sk) -{ - return ccid_priv(dccp_sk(sk)->dccps_hc_tx_ccid); -} - static inline struct ccid3_hc_rx_sock *ccid3_hc_rx_sk(const struct sock *sk) { - return ccid_priv(dccp_sk(sk)->dccps_hc_rx_ccid); + struct ccid3_hc_rx_sock *hcrx = ccid_priv(dccp_sk(sk)->dccps_hc_rx_ccid); + BUG_ON(hcrx == NULL); + return hcrx; } #endif /* _DCCP_CCID3_H_ */ diff --git a/net/dccp/ccids/lib/loss_interval.c b/net/dccp/ccids/lib/loss_interval.c index 174d3f13d93..40ad428a27f 100644 --- a/net/dccp/ccids/lib/loss_interval.c +++ b/net/dccp/ccids/lib/loss_interval.c @@ -125,14 +125,14 @@ static int dccp_li_hist_interval_new(struct list_head *list, * returns estimated loss interval in usecs */ static u32 dccp_li_calc_first_li(struct sock *sk, struct list_head *hist_list, - struct timeval *last_feedback, + ktime_t last_feedback, u16 s, u32 bytes_recv, u32 previous_x_recv) { struct dccp_rx_hist_entry *entry, *next, *tail = NULL; u32 x_recv, p; suseconds_t rtt, delta; - struct timeval tstamp = { 0, 0 }; + ktime_t tstamp = ktime_set(0, 0); int interval = 0; int win_count = 0; int step = 0; @@ -176,7 +176,7 @@ found: return ~0; } - delta = timeval_delta(&tstamp, &tail->dccphrx_tstamp); + delta = ktime_us_delta(tstamp, tail->dccphrx_tstamp); DCCP_BUG_ON(delta < 0); rtt = delta * 4 / interval; @@ -196,8 +196,7 @@ found: return ~0; } - dccp_timestamp(sk, &tstamp); - delta = timeval_delta(&tstamp, last_feedback); + delta = ktime_us_delta(ktime_get_real(), last_feedback); DCCP_BUG_ON(delta <= 0); x_recv = scaled_div32(bytes_recv, delta); @@ -226,7 +225,7 @@ found: void dccp_li_update_li(struct sock *sk, struct list_head *li_hist_list, struct list_head *hist_list, - struct timeval *last_feedback, u16 s, u32 bytes_recv, + ktime_t last_feedback, u16 s, u32 bytes_recv, u32 previous_x_recv, u64 seq_loss, u8 win_loss) { struct dccp_li_hist_entry *head; diff --git a/net/dccp/ccids/lib/loss_interval.h b/net/dccp/ccids/lib/loss_interval.h index 906c806d6d9..27bee92dae1 100644 --- a/net/dccp/ccids/lib/loss_interval.h +++ b/net/dccp/ccids/lib/loss_interval.h @@ -13,8 +13,8 @@ * any later version. */ +#include <linux/ktime.h> #include <linux/list.h> -#include <linux/time.h> extern void dccp_li_hist_purge(struct list_head *list); @@ -23,7 +23,7 @@ extern u32 dccp_li_hist_calc_i_mean(struct list_head *list); extern void dccp_li_update_li(struct sock *sk, struct list_head *li_hist_list, struct list_head *hist_list, - struct timeval *last_feedback, u16 s, + ktime_t last_feedback, u16 s, u32 bytes_recv, u32 previous_x_recv, u64 seq_loss, u8 win_loss); #endif /* _DCCP_LI_HIST_ */ diff --git a/net/dccp/ccids/lib/packet_history.h b/net/dccp/ccids/lib/packet_history.h index 60d00f01539..032bb61c6e3 100644 --- a/net/dccp/ccids/lib/packet_history.h +++ b/net/dccp/ccids/lib/packet_history.h @@ -37,9 +37,9 @@ #ifndef _DCCP_PKT_HIST_ #define _DCCP_PKT_HIST_ +#include <linux/ktime.h> #include <linux/list.h> #include <linux/slab.h> -#include <linux/time.h> #include "../../dccp.h" @@ -57,7 +57,7 @@ struct dccp_tx_hist_entry { u64 dccphtx_seqno:48, dccphtx_sent:1; u32 dccphtx_rtt; - struct timeval dccphtx_tstamp; + ktime_t dccphtx_tstamp; }; struct dccp_tx_hist { @@ -124,7 +124,7 @@ struct dccp_rx_hist_entry { dccphrx_ccval:4, dccphrx_type:4; u32 dccphrx_ndp; /* In fact it is from 8 to 24 bits */ - struct timeval dccphrx_tstamp; + ktime_t dccphrx_tstamp; }; struct dccp_rx_hist { @@ -136,7 +136,6 @@ extern void dccp_rx_hist_delete(struct dccp_rx_hist *hist); static inline struct dccp_rx_hist_entry * dccp_rx_hist_entry_new(struct dccp_rx_hist *hist, - const struct sock *sk, const u32 ndp, const struct sk_buff *skb, const gfp_t prio) @@ -151,7 +150,7 @@ static inline struct dccp_rx_hist_entry * entry->dccphrx_ccval = dh->dccph_ccval; entry->dccphrx_type = dh->dccph_type; entry->dccphrx_ndp = ndp; - dccp_timestamp(sk, &entry->dccphrx_tstamp); + entry->dccphrx_tstamp = ktime_get_real(); } return entry; diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h index e2d74cd7eee..ee97950d77d 100644 --- a/net/dccp/dccp.h +++ b/net/dccp/dccp.h @@ -13,6 +13,7 @@ */ #include <linux/dccp.h> +#include <linux/ktime.h> #include <net/snmp.h> #include <net/sock.h> #include <net/tcp.h> @@ -91,6 +92,7 @@ extern int sysctl_dccp_feat_ack_ratio; extern int sysctl_dccp_feat_send_ack_vector; extern int sysctl_dccp_feat_send_ndp_count; extern int sysctl_dccp_tx_qlen; +extern int sysctl_dccp_sync_ratelimit; /* * 48-bit sequence number arithmetic (signed and unsigned) @@ -208,7 +210,6 @@ extern void dccp_v4_send_check(struct sock *sk, int len, struct sk_buff *skb); extern int dccp_retransmit_skb(struct sock *sk, struct sk_buff *skb); extern void dccp_send_ack(struct sock *sk); -extern void dccp_send_delayed_ack(struct sock *sk); extern void dccp_reqsk_send_ack(struct sk_buff *sk, struct request_sock *rsk); extern void dccp_send_sync(struct sock *sk, const u64 seq, @@ -293,11 +294,12 @@ extern unsigned int dccp_poll(struct file *file, struct socket *sock, extern int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len); +extern struct sk_buff *dccp_ctl_make_reset(struct socket *ctl, + struct sk_buff *skb); extern int dccp_send_reset(struct sock *sk, enum dccp_reset_codes code); extern void dccp_send_close(struct sock *sk, const int active); extern int dccp_invalid_packet(struct sk_buff *skb); -extern u32 dccp_sample_rtt(struct sock *sk, struct timeval *t_recv, - struct timeval *t_history); +extern u32 dccp_sample_rtt(struct sock *sk, long delta); static inline int dccp_bad_service_code(const struct sock *sk, const __be32 service) @@ -309,10 +311,22 @@ static inline int dccp_bad_service_code(const struct sock *sk, return !dccp_list_has_service(dp->dccps_service_list, service); } +/** + * dccp_skb_cb - DCCP per-packet control information + * @dccpd_type: one of %dccp_pkt_type (or unknown) + * @dccpd_ccval: CCVal field (5.1), see e.g. RFC 4342, 8.1 + * @dccpd_reset_code: one of %dccp_reset_codes + * @dccpd_reset_data: Data1..3 fields (depend on @dccpd_reset_code) + * @dccpd_opt_len: total length of all options (5.8) in the packet + * @dccpd_seq: sequence number + * @dccpd_ack_seq: acknowledgment number subheader field value + * This is used for transmission as well as for reception. + */ struct dccp_skb_cb { __u8 dccpd_type:4; __u8 dccpd_ccval:4; - __u8 dccpd_reset_code; + __u8 dccpd_reset_code, + dccpd_reset_data[3]; __u16 dccpd_opt_len; __u64 dccpd_seq; __u64 dccpd_ack_seq; @@ -395,53 +409,14 @@ extern int dccp_insert_options(struct sock *sk, struct sk_buff *skb); extern int dccp_insert_option_elapsed_time(struct sock *sk, struct sk_buff *skb, u32 elapsed_time); +extern u32 dccp_timestamp(void); +extern void dccp_timestamping_init(void); extern int dccp_insert_option_timestamp(struct sock *sk, struct sk_buff *skb); extern int dccp_insert_option(struct sock *sk, struct sk_buff *skb, unsigned char option, const void *value, unsigned char len); -extern void dccp_timestamp(const struct sock *sk, struct timeval *tv); - -static inline suseconds_t timeval_usecs(const struct timeval *tv) -{ - return tv->tv_sec * USEC_PER_SEC + tv->tv_usec; -} - -static inline suseconds_t timeval_delta(const struct timeval *large, - const struct timeval *small) -{ - time_t secs = large->tv_sec - small->tv_sec; - suseconds_t usecs = large->tv_usec - small->tv_usec; - - if (usecs < 0) { - secs--; - usecs += USEC_PER_SEC; - } - return secs * USEC_PER_SEC + usecs; -} - -static inline void timeval_add_usecs(struct timeval *tv, - const suseconds_t usecs) -{ - tv->tv_usec += usecs; - while (tv->tv_usec >= USEC_PER_SEC) { - tv->tv_sec++; - tv->tv_usec -= USEC_PER_SEC; - } -} - -static inline void timeval_sub_usecs(struct timeval *tv, - const suseconds_t usecs) -{ - tv->tv_usec -= usecs; - while (tv->tv_usec < 0) { - tv->tv_sec--; - tv->tv_usec += USEC_PER_SEC; - } - DCCP_BUG_ON(tv->tv_sec < 0); -} - #ifdef CONFIG_SYSCTL extern int dccp_sysctl_init(void); extern void dccp_sysctl_exit(void); diff --git a/net/dccp/input.c b/net/dccp/input.c index da6ec185ed5..19d7e1dbd87 100644 --- a/net/dccp/input.c +++ b/net/dccp/input.c @@ -68,7 +68,8 @@ static int dccp_check_seqno(struct sock *sk, struct sk_buff *skb) { const struct dccp_hdr *dh = dccp_hdr(skb); struct dccp_sock *dp = dccp_sk(sk); - u64 lswl, lawl; + u64 lswl, lawl, seqno = DCCP_SKB_CB(skb)->dccpd_seq, + ackno = DCCP_SKB_CB(skb)->dccpd_ack_seq; /* * Step 5: Prepare sequence numbers for Sync @@ -84,11 +85,9 @@ static int dccp_check_seqno(struct sock *sk, struct sk_buff *skb) */ if (dh->dccph_type == DCCP_PKT_SYNC || dh->dccph_type == DCCP_PKT_SYNCACK) { - if (between48(DCCP_SKB_CB(skb)->dccpd_ack_seq, - dp->dccps_awl, dp->dccps_awh) && - dccp_delta_seqno(dp->dccps_swl, - DCCP_SKB_CB(skb)->dccpd_seq) >= 0) - dccp_update_gsr(sk, DCCP_SKB_CB(skb)->dccpd_seq); + if (between48(ackno, dp->dccps_awl, dp->dccps_awh) && + dccp_delta_seqno(dp->dccps_swl, seqno) >= 0) + dccp_update_gsr(sk, seqno); else return -1; } @@ -103,9 +102,6 @@ static int dccp_check_seqno(struct sock *sk, struct sk_buff *skb) * Update S.GSR, S.SWL, S.SWH * If P.type != Sync, * Update S.GAR - * Otherwise, - * Send Sync packet acknowledging P.seqno - * Drop packet and return */ lswl = dp->dccps_swl; lawl = dp->dccps_awl; @@ -113,35 +109,52 @@ static int dccp_check_seqno(struct sock *sk, struct sk_buff *skb) if (dh->dccph_type == DCCP_PKT_CLOSEREQ || dh->dccph_type == DCCP_PKT_CLOSE || dh->dccph_type == DCCP_PKT_RESET) { - lswl = dp->dccps_gsr; - dccp_inc_seqno(&lswl); + lswl = ADD48(dp->dccps_gsr, 1); lawl = dp->dccps_gar; } - if (between48(DCCP_SKB_CB(skb)->dccpd_seq, lswl, dp->dccps_swh) && - (DCCP_SKB_CB(skb)->dccpd_ack_seq == DCCP_PKT_WITHOUT_ACK_SEQ || - between48(DCCP_SKB_CB(skb)->dccpd_ack_seq, - lawl, dp->dccps_awh))) { - dccp_update_gsr(sk, DCCP_SKB_CB(skb)->dccpd_seq); + if (between48(seqno, lswl, dp->dccps_swh) && + (ackno == DCCP_PKT_WITHOUT_ACK_SEQ || + between48(ackno, lawl, dp->dccps_awh))) { + dccp_update_gsr(sk, seqno); if (dh->dccph_type != DCCP_PKT_SYNC && - (DCCP_SKB_CB(skb)->dccpd_ack_seq != - DCCP_PKT_WITHOUT_ACK_SEQ)) - dp->dccps_gar = DCCP_SKB_CB(skb)->dccpd_ack_seq; + (ackno != DCCP_PKT_WITHOUT_ACK_SEQ)) + dp->dccps_gar = ackno; } else { + unsigned long now = jiffies; + /* + * Step 6: Check sequence numbers + * Otherwise, + * If P.type == Reset, + * Send Sync packet acknowledging S.GSR + * Otherwise, + * Send Sync packet acknowledging P.seqno + * Drop packet and return + * + * These Syncs are rate-limited as per RFC 4340, 7.5.4: + * at most 1 / (dccp_sync_rate_limit * HZ) Syncs per second. + */ + if (time_before(now, (dp->dccps_rate_last + + sysctl_dccp_sync_ratelimit))) + return 0; + DCCP_WARN("DCCP: Step 6 failed for %s packet, " "(LSWL(%llu) <= P.seqno(%llu) <= S.SWH(%llu)) and " "(P.ackno %s or LAWL(%llu) <= P.ackno(%llu) <= S.AWH(%llu), " "sending SYNC...\n", dccp_packet_name(dh->dccph_type), - (unsigned long long) lswl, - (unsigned long long) DCCP_SKB_CB(skb)->dccpd_seq, + (unsigned long long) lswl, (unsigned long long) seqno, (unsigned long long) dp->dccps_swh, - (DCCP_SKB_CB(skb)->dccpd_ack_seq == - DCCP_PKT_WITHOUT_ACK_SEQ) ? "doesn't exist" : "exists", - (unsigned long long) lawl, - (unsigned long long) DCCP_SKB_CB(skb)->dccpd_ack_seq, + (ackno == DCCP_PKT_WITHOUT_ACK_SEQ) ? "doesn't exist" + : "exists", + (unsigned long long) lawl, (unsigned long long) ackno, (unsigned long long) dp->dccps_awh); - dccp_send_sync(sk, DCCP_SKB_CB(skb)->dccpd_seq, DCCP_PKT_SYNC); + + dp->dccps_rate_last = now; + + if (dh->dccph_type == DCCP_PKT_RESET) + seqno = dp->dccps_gsr; + dccp_send_sync(sk, seqno, DCCP_PKT_SYNC); return -1; } @@ -280,6 +293,7 @@ static int dccp_rcv_request_sent_state_process(struct sock *sk, if (dh->dccph_type == DCCP_PKT_RESPONSE) { const struct inet_connection_sock *icsk = inet_csk(sk); struct dccp_sock *dp = dccp_sk(sk); + long tstamp = dccp_timestamp(); /* Stop the REQUEST timer */ inet_csk_clear_xmit_timer(sk, ICSK_TIME_RETRANS); @@ -300,13 +314,10 @@ static int dccp_rcv_request_sent_state_process(struct sock *sk, if (dccp_parse_options(sk, skb)) goto out_invalid_packet; - /* Obtain RTT sample from SYN exchange (used by CCID 3) */ - if (dp->dccps_options_received.dccpor_timestamp_echo) { - struct timeval now; - - dccp_timestamp(sk, &now); - dp->dccps_syn_rtt = dccp_sample_rtt(sk, &now, NULL); - } + /* Obtain usec RTT sample from SYN exchange (used by CCID 3) */ + if (likely(dp->dccps_options_received.dccpor_timestamp_echo)) + dp->dccps_syn_rtt = dccp_sample_rtt(sk, 10 * (tstamp - + dp->dccps_options_received.dccpor_timestamp_echo)); if (dccp_msk(sk)->dccpms_send_ack_vector && dccp_ackvec_add(dp->dccps_hc_rx_ackvec, sk, @@ -540,11 +551,6 @@ int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb, return 0; } - if (unlikely(dh->dccph_type == DCCP_PKT_SYNC)) { - dccp_send_sync(sk, dcb->dccpd_seq, DCCP_PKT_SYNCACK); - goto discard; - } - switch (sk->sk_state) { case DCCP_CLOSED: dcb->dccpd_reset_code = DCCP_RESET_CODE_NO_CONNECTION; @@ -575,6 +581,9 @@ int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb, sk_wake_async(sk, 0, POLL_OUT); break; } + } else if (unlikely(dh->dccph_type == DCCP_PKT_SYNC)) { + dccp_send_sync(sk, dcb->dccpd_seq, DCCP_PKT_SYNCACK); + goto discard; } if (!queued) { @@ -587,37 +596,22 @@ discard: EXPORT_SYMBOL_GPL(dccp_rcv_state_process); /** - * dccp_sample_rtt - Sample RTT from packet exchange - * - * @sk: connected dccp_sock - * @t_recv: receive timestamp of packet with timestamp echo - * @t_hist: packet history timestamp or NULL + * dccp_sample_rtt - Validate and finalise computation of RTT sample + * @delta: number of microseconds between packet and acknowledgment + * The routine is kept generic to work in different contexts. It should be + * called immediately when the ACK used for the RTT sample arrives. */ -u32 dccp_sample_rtt(struct sock *sk, struct timeval *t_recv, - struct timeval *t_hist) +u32 dccp_sample_rtt(struct sock *sk, long delta) { - struct dccp_sock *dp = dccp_sk(sk); - struct dccp_options_received *or = &dp->dccps_options_received; - suseconds_t delta; - - if (t_hist == NULL) { - if (!or->dccpor_timestamp_echo) { - DCCP_WARN("packet without timestamp echo\n"); - return DCCP_SANE_RTT_MAX; - } - timeval_sub_usecs(t_recv, or->dccpor_timestamp_echo * 10); - delta = timeval_usecs(t_recv); - } else - delta = timeval_delta(t_recv, t_hist); - - delta -= or->dccpor_elapsed_time * 10; /* either set or 0 */ + /* dccpor_elapsed_time is either zeroed out or set and > 0 */ + delta -= dccp_sk(sk)->dccps_options_received.dccpor_elapsed_time * 10; if (unlikely(delta <= 0)) { - DCCP_WARN("unusable RTT sample %ld, using min\n", (long)delta); + DCCP_WARN("unusable RTT sample %ld, using min\n", delta); return DCCP_SANE_RTT_MIN; } - if (unlikely(delta - (suseconds_t)DCCP_SANE_RTT_MAX > 0)) { - DCCP_WARN("RTT sample %ld too large, using max\n", (long)delta); + if (unlikely(delta > DCCP_SANE_RTT_MAX)) { + DCCP_WARN("RTT sample %ld too large, using max\n", delta); return DCCP_SANE_RTT_MAX; } diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index 718f2fa923a..44f6e17e105 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -381,7 +381,6 @@ struct sock *dccp_v4_request_recv_sock(struct sock *sk, struct sk_buff *skb, { struct inet_request_sock *ireq; struct inet_sock *newinet; - struct dccp_sock *newdp; struct sock *newsk; if (sk_acceptq_is_full(sk)) @@ -396,7 +395,6 @@ struct sock *dccp_v4_request_recv_sock(struct sock *sk, struct sk_buff *skb, sk_setup_caps(newsk, dst); - newdp = dccp_sk(newsk); newinet = inet_sk(newsk); ireq = inet_rsk(req); newinet->daddr = ireq->rmt_addr; @@ -512,17 +510,12 @@ out: static void dccp_v4_ctl_send_reset(struct sock *sk, struct sk_buff *rxskb) { int err; - struct dccp_hdr *rxdh = dccp_hdr(rxskb), *dh; const struct iphdr *rxiph; - const int dccp_hdr_reset_len = sizeof(struct dccp_hdr) + - sizeof(struct dccp_hdr_ext) + - sizeof(struct dccp_hdr_reset); struct sk_buff *skb; struct dst_entry *dst; - u64 seqno = 0; /* Never send a reset in response to a reset. */ - if (rxdh->dccph_type == DCCP_PKT_RESET) + if (dccp_hdr(rxskb)->dccph_type == DCCP_PKT_RESET) return; if (((struct rtable *)rxskb->dst)->rt_type != RTN_LOCAL) @@ -532,37 +525,14 @@ static void dccp_v4_ctl_send_reset(struct sock *sk, struct sk_buff *rxskb) if (dst == NULL) return; - skb = alloc_skb(dccp_v4_ctl_socket->sk->sk_prot->max_header, - GFP_ATOMIC); + skb = dccp_ctl_make_reset(dccp_v4_ctl_socket, rxskb); if (skb == NULL) goto out; - /* Reserve space for headers. */ - skb_reserve(skb, dccp_v4_ctl_socket->sk->sk_prot->max_header); - skb->dst = dst_clone(dst); - - dh = dccp_zeroed_hdr(skb, dccp_hdr_reset_len); - - /* Build DCCP header and checksum it. */ - dh->dccph_type = DCCP_PKT_RESET; - dh->dccph_sport = rxdh->dccph_dport; - dh->dccph_dport = rxdh->dccph_sport; - dh->dccph_doff = dccp_hdr_reset_len / 4; - dh->dccph_x = 1; - dccp_hdr_reset(skb)->dccph_reset_code = - DCCP_SKB_CB(rxskb)->dccpd_reset_code; - - /* See "8.3.1. Abnormal Termination" in RFC 4340 */ - if (DCCP_SKB_CB(rxskb)->dccpd_ack_seq != DCCP_PKT_WITHOUT_ACK_SEQ) - dccp_set_seqno(&seqno, DCCP_SKB_CB(rxskb)->dccpd_ack_seq + 1); - - dccp_hdr_set_seq(dh, seqno); - dccp_hdr_set_ack(dccp_hdr_ack_bits(skb), DCCP_SKB_CB(rxskb)->dccpd_seq); - - dccp_csum_outgoing(skb); rxiph = ip_hdr(rxskb); - dh->dccph_checksum = dccp_v4_csum_finish(skb, rxiph->saddr, - rxiph->daddr); + dccp_hdr(skb)->dccph_checksum = dccp_v4_csum_finish(skb, rxiph->saddr, + rxiph->daddr); + skb->dst = dst_clone(dst); bh_lock_sock(dccp_v4_ctl_socket->sk); err = ip_build_and_send_pkt(skb, dccp_v4_ctl_socket->sk, @@ -598,17 +568,14 @@ int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb) struct dccp_request_sock *dreq; const __be32 service = dccp_hdr_request(skb)->dccph_req_service; struct dccp_skb_cb *dcb = DCCP_SKB_CB(skb); - __u8 reset_code = DCCP_RESET_CODE_TOO_BUSY; /* Never answer to DCCP_PKT_REQUESTs send to broadcast or multicast */ if (((struct rtable *)skb->dst)->rt_flags & - (RTCF_BROADCAST | RTCF_MULTICAST)) { - reset_code = DCCP_RESET_CODE_NO_CONNECTION; - goto drop; - } + (RTCF_BROADCAST | RTCF_MULTICAST)) + return 0; /* discard, don't send a reset here */ if (dccp_bad_service_code(sk, service)) { - reset_code = DCCP_RESET_CODE_BAD_SERVICE_CODE; + dcb->dccpd_reset_code = DCCP_RESET_CODE_BAD_SERVICE_CODE; goto drop; } /* @@ -616,6 +583,7 @@ int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb) * limitations, they conserve resources and peer is * evidently real one. */ + dcb->dccpd_reset_code = DCCP_RESET_CODE_TOO_BUSY; if (inet_csk_reqsk_queue_is_full(sk)) goto drop; @@ -668,7 +636,6 @@ drop_and_free: reqsk_free(req); drop: DCCP_INC_STATS_BH(DCCP_MIB_ATTEMPTFAILS); - dcb->dccpd_reset_code = reset_code; return -1; } diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index b158c661867..006a3834fbc 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -301,50 +301,23 @@ static void dccp_v6_reqsk_destructor(struct request_sock *req) static void dccp_v6_ctl_send_reset(struct sock *sk, struct sk_buff *rxskb) { - struct dccp_hdr *rxdh = dccp_hdr(rxskb), *dh; struct ipv6hdr *rxip6h; - const u32 dccp_hdr_reset_len = sizeof(struct dccp_hdr) + - sizeof(struct dccp_hdr_ext) + - sizeof(struct dccp_hdr_reset); struct sk_buff *skb; struct flowi fl; - u64 seqno = 0; - if (rxdh->dccph_type == DCCP_PKT_RESET) + if (dccp_hdr(rxskb)->dccph_type == DCCP_PKT_RESET) return; if (!ipv6_unicast_destination(rxskb)) return; - skb = alloc_skb(dccp_v6_ctl_socket->sk->sk_prot->max_header, - GFP_ATOMIC); + skb = dccp_ctl_make_reset(dccp_v6_ctl_socket, rxskb); if (skb == NULL) return; - skb_reserve(skb, dccp_v6_ctl_socket->sk->sk_prot->max_header); - - dh = dccp_zeroed_hdr(skb, dccp_hdr_reset_len); - - /* Swap the send and the receive. */ - dh->dccph_type = DCCP_PKT_RESET; - dh->dccph_sport = rxdh->dccph_dport; - dh->dccph_dport = rxdh->dccph_sport; - dh->dccph_doff = dccp_hdr_reset_len / 4; - dh->dccph_x = 1; - dccp_hdr_reset(skb)->dccph_reset_code = - DCCP_SKB_CB(rxskb)->dccpd_reset_code; - - /* See "8.3.1. Abnormal Termination" in RFC 4340 */ - if (DCCP_SKB_CB(rxskb)->dccpd_ack_seq != DCCP_PKT_WITHOUT_ACK_SEQ) - dccp_set_seqno(&seqno, DCCP_SKB_CB(rxskb)->dccpd_ack_seq + 1); - - dccp_hdr_set_seq(dh, seqno); - dccp_hdr_set_ack(dccp_hdr_ack_bits(skb), DCCP_SKB_CB(rxskb)->dccpd_seq); - - dccp_csum_outgoing(skb); rxip6h = ipv6_hdr(rxskb); - dh->dccph_checksum = dccp_v6_csum_finish(skb, &rxip6h->saddr, - &rxip6h->daddr); + dccp_hdr(skb)->dccph_checksum = dccp_v6_csum_finish(skb, &rxip6h->saddr, + &rxip6h->daddr); memset(&fl, 0, sizeof(fl)); ipv6_addr_copy(&fl.fl6_dst, &rxip6h->saddr); @@ -352,8 +325,8 @@ static void dccp_v6_ctl_send_reset(struct sock *sk, struct sk_buff *rxskb) fl.proto = IPPROTO_DCCP; fl.oif = inet6_iif(rxskb); - fl.fl_ip_dport = dh->dccph_dport; - fl.fl_ip_sport = dh->dccph_sport; + fl.fl_ip_dport = dccp_hdr(skb)->dccph_dport; + fl.fl_ip_sport = dccp_hdr(skb)->dccph_sport; security_skb_classify_flow(rxskb, &fl); /* sk = NULL, but it is safe for now. RST socket required. */ @@ -417,21 +390,21 @@ static int dccp_v6_conn_request(struct sock *sk, struct sk_buff *skb) struct ipv6_pinfo *np = inet6_sk(sk); const __be32 service = dccp_hdr_request(skb)->dccph_req_service; struct dccp_skb_cb *dcb = DCCP_SKB_CB(skb); - __u8 reset_code = DCCP_RESET_CODE_TOO_BUSY; if (skb->protocol == htons(ETH_P_IP)) return dccp_v4_conn_request(sk, skb); if (!ipv6_unicast_destination(skb)) - goto drop; + return 0; /* discard, don't send a reset here */ if (dccp_bad_service_code(sk, service)) { - reset_code = DCCP_RESET_CODE_BAD_SERVICE_CODE; + dcb->dccpd_reset_code = DCCP_RESET_CODE_BAD_SERVICE_CODE; goto drop; } /* * There are no SYN attacks on IPv6, yet... */ + dcb->dccpd_reset_code = DCCP_RESET_CODE_TOO_BUSY; if (inet_csk_reqsk_queue_is_full(sk)) goto drop; @@ -491,7 +464,6 @@ drop_and_free: reqsk_free(req); drop: DCCP_INC_STATS_BH(DCCP_MIB_ATTEMPTFAILS); - dcb->dccpd_reset_code = reset_code; return -1; } diff --git a/net/dccp/minisocks.c b/net/dccp/minisocks.c index e18e249ac49..831b76e08d0 100644 --- a/net/dccp/minisocks.c +++ b/net/dccp/minisocks.c @@ -42,6 +42,16 @@ struct inet_timewait_death_row dccp_death_row = { EXPORT_SYMBOL_GPL(dccp_death_row); +void dccp_minisock_init(struct dccp_minisock *dmsk) +{ + dmsk->dccpms_sequence_window = sysctl_dccp_feat_sequence_window; + dmsk->dccpms_rx_ccid = sysctl_dccp_feat_rx_ccid; + dmsk->dccpms_tx_ccid = sysctl_dccp_feat_tx_ccid; + dmsk->dccpms_ack_ratio = sysctl_dccp_feat_ack_ratio; + dmsk->dccpms_send_ack_vector = sysctl_dccp_feat_send_ack_vector; + dmsk->dccpms_send_ndp_count = sysctl_dccp_feat_send_ndp_count; +} + void dccp_time_wait(struct sock *sk, int state, int timeo) { struct inet_timewait_sock *tw = NULL; @@ -112,7 +122,6 @@ struct sock *dccp_create_openreq_child(struct sock *sk, newdp->dccps_service_list = NULL; newdp->dccps_service = dreq->dreq_service; newicsk->icsk_rto = DCCP_TIMEOUT_INIT; - do_gettimeofday(&newdp->dccps_epoch); if (dccp_feat_clone(sk, newsk)) goto out_free; diff --git a/net/dccp/options.c b/net/dccp/options.c index 34d536d5f1a..d361b553330 100644 --- a/net/dccp/options.c +++ b/net/dccp/options.c @@ -29,16 +29,6 @@ int sysctl_dccp_feat_ack_ratio = DCCPF_INITIAL_ACK_RATIO; int sysctl_dccp_feat_send_ack_vector = DCCPF_INITIAL_SEND_ACK_VECTOR; int sysctl_dccp_feat_send_ndp_count = DCCPF_INITIAL_SEND_NDP_COUNT; -void dccp_minisock_init(struct dccp_minisock *dmsk) -{ - dmsk->dccpms_sequence_window = sysctl_dccp_feat_sequence_window; - dmsk->dccpms_rx_ccid = sysctl_dccp_feat_rx_ccid; - dmsk->dccpms_tx_ccid = sysctl_dccp_feat_tx_ccid; - dmsk->dccpms_ack_ratio = sysctl_dccp_feat_ack_ratio; - dmsk->dccpms_send_ack_vector = sysctl_dccp_feat_send_ack_vector; - dmsk->dccpms_send_ndp_count = sysctl_dccp_feat_send_ndp_count; -} - static u32 dccp_decode_value_var(const unsigned char *bf, const u8 len) { u32 value = 0; @@ -158,7 +148,7 @@ int dccp_parse_options(struct sock *sk, struct sk_buff *skb) opt_recv->dccpor_timestamp = ntohl(*(__be32 *)value); dp->dccps_timestamp_echo = opt_recv->dccpor_timestamp; - dccp_timestamp(sk, &dp->dccps_timestamp_time); + dp->dccps_timestamp_time = ktime_get_real(); dccp_pr_debug("%s rx opt: TIMESTAMP=%u, ackno=%llu\n", dccp_role(sk), opt_recv->dccpor_timestamp, @@ -189,7 +179,7 @@ int dccp_parse_options(struct sock *sk, struct sk_buff *skb) else elapsed_time = ntohl(*(__be32 *)(value + 4)); - dccp_pr_debug_cat(", ELAPSED_TIME=%d\n", elapsed_time); + dccp_pr_debug_cat(", ELAPSED_TIME=%u\n", elapsed_time); /* Give precedence to the biggest ELAPSED_TIME */ if (elapsed_time > opt_recv->dccpor_elapsed_time) @@ -370,29 +360,9 @@ int dccp_insert_option_elapsed_time(struct sock *sk, struct sk_buff *skb, EXPORT_SYMBOL_GPL(dccp_insert_option_elapsed_time); -void dccp_timestamp(const struct sock *sk, struct timeval *tv) -{ - const struct dccp_sock *dp = dccp_sk(sk); - - do_gettimeofday(tv); - tv->tv_sec -= dp->dccps_epoch.tv_sec; - tv->tv_usec -= dp->dccps_epoch.tv_usec; - - while (tv->tv_usec < 0) { - tv->tv_sec--; - tv->tv_usec += USEC_PER_SEC; - } -} - -EXPORT_SYMBOL_GPL(dccp_timestamp); - int dccp_insert_option_timestamp(struct sock *sk, struct sk_buff *skb) { - struct timeval tv; - __be32 now; - - dccp_timestamp(sk, &tv); - now = htonl(timeval_usecs(&tv) / 10); + __be32 now = htonl(dccp_timestamp()); /* yes this will overflow but that is the point as we want a * 10 usec 32 bit timer which mean it wraps every 11.9 hours */ @@ -405,14 +375,12 @@ static int dccp_insert_option_timestamp_echo(struct sock *sk, struct sk_buff *skb) { struct dccp_sock *dp = dccp_sk(sk); - struct timeval now; __be32 tstamp_echo; - u32 elapsed_time; int len, elapsed_time_len; unsigned char *to; - - dccp_timestamp(sk, &now); - elapsed_time = timeval_delta(&now, &dp->dccps_timestamp_time) / 10; + const suseconds_t delta = ktime_us_delta(ktime_get_real(), + dp->dccps_timestamp_time); + u32 elapsed_time = delta / 10; elapsed_time_len = dccp_elapsed_time_len(elapsed_time); len = 6 + elapsed_time_len; @@ -438,8 +406,7 @@ static int dccp_insert_option_timestamp_echo(struct sock *sk, } dp->dccps_timestamp_echo = 0; - dp->dccps_timestamp_time.tv_sec = 0; - dp->dccps_timestamp_time.tv_usec = 0; + dp->dccps_timestamp_time = ktime_set(0, 0); return 0; } diff --git a/net/dccp/output.c b/net/dccp/output.c index c8d843e983f..f49544618f2 100644 --- a/net/dccp/output.c +++ b/net/dccp/output.c @@ -61,6 +61,7 @@ static int dccp_transmit_skb(struct sock *sk, struct sk_buff *skb) set_ack = 0; /* fall through */ case DCCP_PKT_DATAACK: + case DCCP_PKT_RESET: break; case DCCP_PKT_REQUEST: @@ -69,12 +70,14 @@ static int dccp_transmit_skb(struct sock *sk, struct sk_buff *skb) case DCCP_PKT_SYNC: case DCCP_PKT_SYNCACK: - ackno = dcb->dccpd_seq; + ackno = dcb->dccpd_ack_seq; /* fall through */ default: /* - * Only data packets should come through with skb->sk - * set. + * Set owner/destructor: some skbs are allocated via + * alloc_skb (e.g. when retransmission may happen). + * Only Data, DataAck, and Reset packets should come + * through here with skb->sk set. */ WARN_ON(skb->sk); skb_set_owner_w(skb, sk); @@ -174,34 +177,38 @@ void dccp_write_space(struct sock *sk) /** * dccp_wait_for_ccid - Wait for ccid to tell us we can send a packet - * @sk: socket to wait for + * @sk: socket to wait for + * @skb: current skb to pass on for waiting + * @delay: sleep timeout in milliseconds (> 0) + * This function is called by default when the socket is closed, and + * when a non-zero linger time is set on the socket. For consistency */ -static int dccp_wait_for_ccid(struct sock *sk, struct sk_buff *skb) +static int dccp_wait_for_ccid(struct sock *sk, struct sk_buff *skb, int delay) { struct dccp_sock *dp = dccp_sk(sk); DEFINE_WAIT(wait); - unsigned long delay; + unsigned long jiffdelay; int rc; - while (1) { + do { + dccp_pr_debug("delayed send by %d msec\n", delay); + jiffdelay = msecs_to_jiffies(delay); + prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE); + sk->sk_write_pending++; + release_sock(sk); + schedule_timeout(jiffdelay); + lock_sock(sk); + sk->sk_write_pending--; + if (sk->sk_err) goto do_error; if (signal_pending(current)) goto do_interrupted; rc = ccid_hc_tx_send_packet(dp->dccps_hc_tx_ccid, sk, skb); - if (rc <= 0) - break; - dccp_pr_debug("delayed send by %d msec\n", rc); - delay = msecs_to_jiffies(rc); - sk->sk_write_pending++; - release_sock(sk); - schedule_timeout(delay); - lock_sock(sk); - sk->sk_write_pending--; - } + } while ((delay = rc) > 0); out: finish_wait(sk->sk_sleep, &wait); return rc; @@ -228,7 +235,7 @@ void dccp_write_xmit(struct sock *sk, int block) msecs_to_jiffies(err)+jiffies); break; } else - err = dccp_wait_for_ccid(sk, skb); + err = dccp_wait_for_ccid(sk, skb, err); if (err && err != -EINTR) DCCP_BUG("err=%d after dccp_wait_for_ccid", err); } @@ -324,72 +331,81 @@ struct sk_buff *dccp_make_response(struct sock *sk, struct dst_entry *dst, EXPORT_SYMBOL_GPL(dccp_make_response); -static struct sk_buff *dccp_make_reset(struct sock *sk, struct dst_entry *dst, - const enum dccp_reset_codes code) +/* answer offending packet in @rcv_skb with Reset from control socket @ctl */ +struct sk_buff *dccp_ctl_make_reset(struct socket *ctl, struct sk_buff *rcv_skb) { - struct dccp_hdr *dh; - struct dccp_sock *dp = dccp_sk(sk); - const u32 dccp_header_size = sizeof(struct dccp_hdr) + - sizeof(struct dccp_hdr_ext) + - sizeof(struct dccp_hdr_reset); - struct sk_buff *skb = sock_wmalloc(sk, sk->sk_prot->max_header, 1, - GFP_ATOMIC); - if (skb == NULL) - return NULL; - - /* Reserve space for headers. */ - skb_reserve(skb, sk->sk_prot->max_header); - - skb->dst = dst_clone(dst); - - dccp_inc_seqno(&dp->dccps_gss); - - DCCP_SKB_CB(skb)->dccpd_reset_code = code; - DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_RESET; - DCCP_SKB_CB(skb)->dccpd_seq = dp->dccps_gss; + struct dccp_hdr *rxdh = dccp_hdr(rcv_skb), *dh; + struct dccp_skb_cb *dcb = DCCP_SKB_CB(rcv_skb); + const u32 dccp_hdr_reset_len = sizeof(struct dccp_hdr) + + sizeof(struct dccp_hdr_ext) + + sizeof(struct dccp_hdr_reset); + struct dccp_hdr_reset *dhr; + struct sk_buff *skb; - if (dccp_insert_options(sk, skb)) { - kfree_skb(skb); + skb = alloc_skb(ctl->sk->sk_prot->max_header, GFP_ATOMIC); + if (skb == NULL) return NULL; - } - dh = dccp_zeroed_hdr(skb, dccp_header_size); + skb_reserve(skb, ctl->sk->sk_prot->max_header); - dh->dccph_sport = inet_sk(sk)->sport; - dh->dccph_dport = inet_sk(sk)->dport; - dh->dccph_doff = (dccp_header_size + - DCCP_SKB_CB(skb)->dccpd_opt_len) / 4; + /* Swap the send and the receive. */ + dh = dccp_zeroed_hdr(skb, dccp_hdr_reset_len); dh->dccph_type = DCCP_PKT_RESET; + dh->dccph_sport = rxdh->dccph_dport; + dh->dccph_dport = rxdh->dccph_sport; + dh->dccph_doff = dccp_hdr_reset_len / 4; dh->dccph_x = 1; - dccp_hdr_set_seq(dh, dp->dccps_gss); - dccp_hdr_set_ack(dccp_hdr_ack_bits(skb), dp->dccps_gsr); - dccp_hdr_reset(skb)->dccph_reset_code = code; - inet_csk(sk)->icsk_af_ops->send_check(sk, 0, skb); + dhr = dccp_hdr_reset(skb); + dhr->dccph_reset_code = dcb->dccpd_reset_code; + + switch (dcb->dccpd_reset_code) { + case DCCP_RESET_CODE_PACKET_ERROR: + dhr->dccph_reset_data[0] = rxdh->dccph_type; + break; + case DCCP_RESET_CODE_OPTION_ERROR: /* fall through */ + case DCCP_RESET_CODE_MANDATORY_ERROR: + memcpy(dhr->dccph_reset_data, dcb->dccpd_reset_data, 3); + break; + } + /* + * From RFC 4340, 8.3.1: + * If P.ackno exists, set R.seqno := P.ackno + 1. + * Else set R.seqno := 0. + */ + if (dcb->dccpd_ack_seq != DCCP_PKT_WITHOUT_ACK_SEQ) + dccp_hdr_set_seq(dh, ADD48(dcb->dccpd_ack_seq, 1)); + dccp_hdr_set_ack(dccp_hdr_ack_bits(skb), dcb->dccpd_seq); - DCCP_INC_STATS(DCCP_MIB_OUTSEGS); + dccp_csum_outgoing(skb); return skb; } +EXPORT_SYMBOL_GPL(dccp_ctl_make_reset); + +/* send Reset on established socket, to close or abort the connection */ int dccp_send_reset(struct sock *sk, enum dccp_reset_codes code) { + struct sk_buff *skb; /* * FIXME: what if rebuild_header fails? * Should we be doing a rebuild_header here? */ int err = inet_sk_rebuild_header(sk); - if (err == 0) { - struct sk_buff *skb = dccp_make_reset(sk, sk->sk_dst_cache, - code); - if (skb != NULL) { - memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); - err = inet_csk(sk)->icsk_af_ops->queue_xmit(skb, 0); - return net_xmit_eval(err); - } - } + if (err != 0) + return err; + + skb = sock_wmalloc(sk, sk->sk_prot->max_header, 1, GFP_ATOMIC); + if (skb == NULL) + return -ENOBUFS; - return err; + /* Reserve space for headers and prepare control bits. */ + skb_reserve(skb, sk->sk_prot->max_header); + DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_RESET; + DCCP_SKB_CB(skb)->dccpd_reset_code = code; + + return dccp_transmit_skb(sk, skb); } /* @@ -477,6 +493,7 @@ void dccp_send_ack(struct sock *sk) EXPORT_SYMBOL_GPL(dccp_send_ack); +/* FIXME: Is this still necessary (11.3) - currently nowhere used by DCCP. */ void dccp_send_delayed_ack(struct sock *sk) { struct inet_connection_sock *icsk = inet_csk(sk); @@ -507,7 +524,7 @@ void dccp_send_delayed_ack(struct sock *sk) sk_reset_timer(sk, &icsk->icsk_delack_timer, timeout); } -void dccp_send_sync(struct sock *sk, const u64 seq, +void dccp_send_sync(struct sock *sk, const u64 ackno, const enum dccp_pkt_type pkt_type) { /* @@ -517,14 +534,16 @@ void dccp_send_sync(struct sock *sk, const u64 seq, */ struct sk_buff *skb = alloc_skb(sk->sk_prot->max_header, GFP_ATOMIC); - if (skb == NULL) + if (skb == NULL) { /* FIXME: how to make sure the sync is sent? */ + DCCP_CRIT("could not send %s", dccp_packet_name(pkt_type)); return; + } /* Reserve space for headers and prepare control bits. */ skb_reserve(skb, sk->sk_prot->max_header); DCCP_SKB_CB(skb)->dccpd_type = pkt_type; - DCCP_SKB_CB(skb)->dccpd_seq = seq; + DCCP_SKB_CB(skb)->dccpd_ack_seq = ackno; dccp_transmit_skb(sk, skb); } diff --git a/net/dccp/probe.c b/net/dccp/probe.c index bae10b0f2fc..7053bb827bc 100644 --- a/net/dccp/probe.c +++ b/net/dccp/probe.c @@ -30,6 +30,7 @@ #include <linux/module.h> #include <linux/kfifo.h> #include <linux/vmalloc.h> +#include <net/net_namespace.h> #include "dccp.h" #include "ccid.h" @@ -168,7 +169,7 @@ static __init int dccpprobe_init(void) if (IS_ERR(dccpw.fifo)) return PTR_ERR(dccpw.fifo); - if (!proc_net_fops_create(procname, S_IRUSR, &dccpprobe_fops)) + if (!proc_net_fops_create(&init_net, procname, S_IRUSR, &dccpprobe_fops)) goto err0; ret = register_jprobe(&dccp_send_probe); @@ -178,7 +179,7 @@ static __init int dccpprobe_init(void) pr_info("DCCP watch registered (port=%d)\n", port); return 0; err1: - proc_net_remove(procname); + proc_net_remove(&init_net, procname); err0: kfifo_free(dccpw.fifo); return ret; @@ -188,7 +189,7 @@ module_init(dccpprobe_init); static __exit void dccpprobe_exit(void) { kfifo_free(dccpw.fifo); - proc_net_remove(procname); + proc_net_remove(&init_net, procname); unregister_jprobe(&dccp_send_probe); } diff --git a/net/dccp/proto.c b/net/dccp/proto.c index 04b59ec4f51..cc9bf1cb264 100644 --- a/net/dccp/proto.c +++ b/net/dccp/proto.c @@ -172,7 +172,6 @@ int dccp_init_sock(struct sock *sk, const __u8 ctl_sock_initialized) struct inet_connection_sock *icsk = inet_csk(sk); dccp_minisock_init(&dp->dccps_minisock); - do_gettimeofday(&dp->dccps_epoch); /* * FIXME: We're hardcoding the CCID, and doing this at this point makes @@ -220,6 +219,7 @@ int dccp_init_sock(struct sock *sk, const __u8 ctl_sock_initialized) sk->sk_write_space = dccp_write_space; icsk->icsk_sync_mss = dccp_sync_mss; dp->dccps_mss_cache = 536; + dp->dccps_rate_last = jiffies; dp->dccps_role = DCCP_ROLE_UNDEFINED; dp->dccps_service = DCCP_SERVICE_CODE_IS_ABSENT; dp->dccps_l_ack_ratio = dp->dccps_r_ack_ratio = 1; @@ -587,6 +587,10 @@ static int do_dccp_getsockopt(struct sock *sk, int level, int optname, case DCCP_SOCKOPT_SERVICE: return dccp_getsockopt_service(sk, len, (__be32 __user *)optval, optlen); + case DCCP_SOCKOPT_GET_CUR_MPS: + val = dp->dccps_mss_cache; + len = sizeof(val); + break; case DCCP_SOCKOPT_SEND_CSCOV: val = dp->dccps_pcslen; len = sizeof(val); @@ -664,7 +668,7 @@ int dccp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, * so that the trick in dccp_rcv_request_sent_state_process. */ /* Wait for a connection to finish. */ - if ((1 << sk->sk_state) & ~(DCCPF_OPEN | DCCPF_PARTOPEN | DCCPF_CLOSING)) + if ((1 << sk->sk_state) & ~(DCCPF_OPEN | DCCPF_PARTOPEN)) if ((rc = sk_stream_wait_connect(sk, &timeo)) != 0) goto out_release; @@ -988,7 +992,7 @@ MODULE_PARM_DESC(thash_entries, "Number of ehash buckets"); #ifdef CONFIG_IP_DCCP_DEBUG int dccp_debug; -module_param(dccp_debug, int, 0444); +module_param(dccp_debug, bool, 0444); MODULE_PARM_DESC(dccp_debug, "Enable debug messages"); EXPORT_SYMBOL_GPL(dccp_debug); @@ -1077,6 +1081,8 @@ static int __init dccp_init(void) rc = dccp_sysctl_init(); if (rc) goto out_ackvec_exit; + + dccp_timestamping_init(); out: return rc; out_ackvec_exit: diff --git a/net/dccp/sysctl.c b/net/dccp/sysctl.c index 1260aabac5e..9364b2fb4db 100644 --- a/net/dccp/sysctl.c +++ b/net/dccp/sysctl.c @@ -18,6 +18,9 @@ #error This file should not be compiled without CONFIG_SYSCTL defined #endif +/* rate-limit for syncs in reply to sequence-invalid packets; RFC 4340, 7.5.4 */ +int sysctl_dccp_sync_ratelimit __read_mostly = HZ / 8; + static struct ctl_table dccp_default_table[] = { { .procname = "seq_window", @@ -89,6 +92,13 @@ static struct ctl_table dccp_default_table[] = { .mode = 0644, .proc_handler = proc_dointvec, }, + { + .procname = "sync_ratelimit", + .data = &sysctl_dccp_sync_ratelimit, + .maxlen = sizeof(sysctl_dccp_sync_ratelimit), + .mode = 0644, + .proc_handler = proc_dointvec_ms_jiffies, + }, { .ctl_name = 0, } }; diff --git a/net/dccp/timer.c b/net/dccp/timer.c index 0197a41c256..3af067354bd 100644 --- a/net/dccp/timer.c +++ b/net/dccp/timer.c @@ -291,3 +291,24 @@ void dccp_init_xmit_timers(struct sock *sk) inet_csk_init_xmit_timers(sk, &dccp_write_timer, &dccp_delack_timer, &dccp_keepalive_timer); } + +static ktime_t dccp_timestamp_seed; +/** + * dccp_timestamp - 10s of microseconds time source + * Returns the number of 10s of microseconds since loading DCCP. This is native + * DCCP time difference format (RFC 4340, sec. 13). + * Please note: This will wrap around about circa every 11.9 hours. + */ +u32 dccp_timestamp(void) +{ + s64 delta = ktime_us_delta(ktime_get_real(), dccp_timestamp_seed); + + do_div(delta, 10); + return delta; +} +EXPORT_SYMBOL_GPL(dccp_timestamp); + +void __init dccp_timestamping_init(void) +{ + dccp_timestamp_seed = ktime_get_real(); +} |