From 7c657876b63cb1d8a2ec06f8fc6c37bb8412e66c Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 9 Aug 2005 20:14:34 -0700 Subject: [DCCP]: Initial implementation Development to this point was done on a subversion repository at: http://oops.ghostprotocols.net:81/cgi-bin/viewcvs.cgi/dccp-2.6/ This repository will be kept at this site for the foreseable future, so that interested parties can see the history of this code, attributions, etc. If I ever decide to take this offline I'll provide the full history at some other suitable place. Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- net/dccp/Kconfig | 24 + net/dccp/Makefile | 5 + net/dccp/ccid.c | 139 +++ net/dccp/ccid.h | 156 ++++ net/dccp/ccids/Kconfig | 25 + net/dccp/ccids/Makefile | 3 + net/dccp/ccids/ccid3.c | 2164 +++++++++++++++++++++++++++++++++++++++++++++++ net/dccp/ccids/ccid3.h | 137 +++ net/dccp/dccp.h | 422 +++++++++ net/dccp/input.c | 510 +++++++++++ net/dccp/ipv4.c | 1289 ++++++++++++++++++++++++++++ net/dccp/minisocks.c | 199 +++++ net/dccp/options.c | 763 +++++++++++++++++ net/dccp/output.c | 406 +++++++++ net/dccp/proto.c | 818 ++++++++++++++++++ net/dccp/timer.c | 249 ++++++ 16 files changed, 7309 insertions(+) create mode 100644 net/dccp/Kconfig create mode 100644 net/dccp/Makefile create mode 100644 net/dccp/ccid.c create mode 100644 net/dccp/ccid.h create mode 100644 net/dccp/ccids/Kconfig create mode 100644 net/dccp/ccids/Makefile create mode 100644 net/dccp/ccids/ccid3.c create mode 100644 net/dccp/ccids/ccid3.h create mode 100644 net/dccp/dccp.h create mode 100644 net/dccp/input.c create mode 100644 net/dccp/ipv4.c create mode 100644 net/dccp/minisocks.c create mode 100644 net/dccp/options.c create mode 100644 net/dccp/output.c create mode 100644 net/dccp/proto.c create mode 100644 net/dccp/timer.c (limited to 'net/dccp') diff --git a/net/dccp/Kconfig b/net/dccp/Kconfig new file mode 100644 index 00000000000..90460bc629b --- /dev/null +++ b/net/dccp/Kconfig @@ -0,0 +1,24 @@ +menu "DCCP Configuration (EXPERIMENTAL)" + depends on INET && EXPERIMENTAL + +config IP_DCCP + tristate "The DCCP Protocol (EXPERIMENTAL)" + ---help--- + Datagram Congestion Control Protocol + + From draft-ietf-dccp-spec-11 . + + The Datagram Congestion Control Protocol (DCCP) is a transport + protocol that implements bidirectional, unicast connections of + congestion-controlled, unreliable datagrams. It should be suitable + for use by applications such as streaming media, Internet telephony, + and on-line games + + To compile this protocol support as a module, choose M here: the + module will be called dccp. + + If in doubt, say N. + +source "net/dccp/ccids/Kconfig" + +endmenu diff --git a/net/dccp/Makefile b/net/dccp/Makefile new file mode 100644 index 00000000000..c6e6ba55c36 --- /dev/null +++ b/net/dccp/Makefile @@ -0,0 +1,5 @@ +obj-$(CONFIG_IP_DCCP) += dccp.o + +dccp-y := ccid.o input.o ipv4.o minisocks.o options.o output.o proto.o timer.o + +obj-y += ccids/ diff --git a/net/dccp/ccid.c b/net/dccp/ccid.c new file mode 100644 index 00000000000..9d8fc0e289e --- /dev/null +++ b/net/dccp/ccid.c @@ -0,0 +1,139 @@ +/* + * net/dccp/ccid.c + * + * An implementation of the DCCP protocol + * Arnaldo Carvalho de Melo + * + * CCID infrastructure + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include "ccid.h" + +static struct ccid *ccids[CCID_MAX]; +#if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT) +static atomic_t ccids_lockct = ATOMIC_INIT(0); +static DEFINE_SPINLOCK(ccids_lock); + +/* + * The strategy is: modifications ccids vector are short, do not sleep and + * veeery rare, but read access should be free of any exclusive locks. + */ +static void ccids_write_lock(void) +{ + spin_lock(&ccids_lock); + while (atomic_read(&ccids_lockct) != 0) { + spin_unlock(&ccids_lock); + yield(); + spin_lock(&ccids_lock); + } +} + +static inline void ccids_write_unlock(void) +{ + spin_unlock(&ccids_lock); +} + +static inline void ccids_read_lock(void) +{ + atomic_inc(&ccids_lockct); + spin_unlock_wait(&ccids_lock); +} + +static inline void ccids_read_unlock(void) +{ + atomic_dec(&ccids_lockct); +} + +#else +#define ccids_write_lock() do { } while(0) +#define ccids_write_unlock() do { } while(0) +#define ccids_read_lock() do { } while(0) +#define ccids_read_unlock() do { } while(0) +#endif + +int ccid_register(struct ccid *ccid) +{ + int err; + + if (ccid->ccid_init == NULL) + return -1; + + ccids_write_lock(); + err = -EEXIST; + if (ccids[ccid->ccid_id] == NULL) { + ccids[ccid->ccid_id] = ccid; + err = 0; + } + ccids_write_unlock(); + if (err == 0) + pr_info("CCID: Registered CCID %d (%s)\n", + ccid->ccid_id, ccid->ccid_name); + return err; +} + +EXPORT_SYMBOL_GPL(ccid_register); + +int ccid_unregister(struct ccid *ccid) +{ + ccids_write_lock(); + ccids[ccid->ccid_id] = NULL; + ccids_write_unlock(); + pr_info("CCID: Unregistered CCID %d (%s)\n", + ccid->ccid_id, ccid->ccid_name); + return 0; +} + +EXPORT_SYMBOL_GPL(ccid_unregister); + +struct ccid *ccid_init(unsigned char id, struct sock *sk) +{ + struct ccid *ccid; + +#ifdef CONFIG_KMOD + if (ccids[id] == NULL) + request_module("net-dccp-ccid-%d", id); +#endif + ccids_read_lock(); + + ccid = ccids[id]; + if (ccid == NULL) + goto out; + + if (!try_module_get(ccid->ccid_owner)) + goto out_err; + + if (ccid->ccid_init(sk) != 0) + goto out_module_put; +out: + ccids_read_unlock(); + return ccid; +out_module_put: + module_put(ccid->ccid_owner); +out_err: + ccid = NULL; + goto out; +} + +EXPORT_SYMBOL_GPL(ccid_init); + +void ccid_exit(struct ccid *ccid, struct sock *sk) +{ + if (ccid == NULL) + return; + + ccids_read_lock(); + + if (ccids[ccid->ccid_id] != NULL) { + if (ccid->ccid_exit != NULL) + ccid->ccid_exit(sk); + module_put(ccid->ccid_owner); + } + + ccids_read_unlock(); +} + +EXPORT_SYMBOL_GPL(ccid_exit); diff --git a/net/dccp/ccid.h b/net/dccp/ccid.h new file mode 100644 index 00000000000..06105b2a613 --- /dev/null +++ b/net/dccp/ccid.h @@ -0,0 +1,156 @@ +#ifndef _CCID_H +#define _CCID_H +/* + * net/dccp/ccid.h + * + * An implementation of the DCCP protocol + * Arnaldo Carvalho de Melo + * + * CCID infrastructure + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include + +#define CCID_MAX 255 + +struct ccid { + unsigned char ccid_id; + const char *ccid_name; + struct module *ccid_owner; + int (*ccid_init)(struct sock *sk); + void (*ccid_exit)(struct sock *sk); + int (*ccid_hc_rx_init)(struct sock *sk); + int (*ccid_hc_tx_init)(struct sock *sk); + void (*ccid_hc_rx_exit)(struct sock *sk); + void (*ccid_hc_tx_exit)(struct sock *sk); + void (*ccid_hc_rx_packet_recv)(struct sock *sk, struct sk_buff *skb); + int (*ccid_hc_rx_parse_options)(struct sock *sk, + unsigned char option, + unsigned char len, u16 idx, + unsigned char* value); + void (*ccid_hc_rx_insert_options)(struct sock *sk, struct sk_buff *skb); + void (*ccid_hc_tx_insert_options)(struct sock *sk, struct sk_buff *skb); + void (*ccid_hc_tx_packet_recv)(struct sock *sk, struct sk_buff *skb); + int (*ccid_hc_tx_parse_options)(struct sock *sk, + unsigned char option, + unsigned char len, u16 idx, + unsigned char* value); + int (*ccid_hc_tx_send_packet)(struct sock *sk, + struct sk_buff *skb, int len, + long *delay); + void (*ccid_hc_tx_packet_sent)(struct sock *sk, int more, int len); +}; + +extern int ccid_register(struct ccid *ccid); +extern int ccid_unregister(struct ccid *ccid); + +extern struct ccid *ccid_init(unsigned char id, struct sock *sk); +extern void ccid_exit(struct ccid *ccid, struct sock *sk); + +static inline void __ccid_get(struct ccid *ccid) +{ + __module_get(ccid->ccid_owner); +} + +static inline int ccid_hc_tx_send_packet(struct ccid *ccid, struct sock *sk, + struct sk_buff *skb, int len, + long *delay) +{ + int rc = 0; + if (ccid->ccid_hc_tx_send_packet != NULL) + rc = ccid->ccid_hc_tx_send_packet(sk, skb, len, delay); + return rc; +} + +static inline void ccid_hc_tx_packet_sent(struct ccid *ccid, struct sock *sk, + int more, int len) +{ + if (ccid->ccid_hc_tx_packet_sent != NULL) + ccid->ccid_hc_tx_packet_sent(sk, more, len); +} + +static inline int ccid_hc_rx_init(struct ccid *ccid, struct sock *sk) +{ + int rc = 0; + if (ccid->ccid_hc_rx_init != NULL) + rc = ccid->ccid_hc_rx_init(sk); + return rc; +} + +static inline int ccid_hc_tx_init(struct ccid *ccid, struct sock *sk) +{ + int rc = 0; + if (ccid->ccid_hc_tx_init != NULL) + rc = ccid->ccid_hc_tx_init(sk); + return rc; +} + +static inline void ccid_hc_rx_exit(struct ccid *ccid, struct sock *sk) +{ + if (ccid->ccid_hc_rx_exit != NULL) + ccid->ccid_hc_rx_exit(sk); +} + +static inline void ccid_hc_tx_exit(struct ccid *ccid, struct sock *sk) +{ + if (ccid->ccid_hc_tx_exit != NULL) + ccid->ccid_hc_tx_exit(sk); +} + +static inline void ccid_hc_rx_packet_recv(struct ccid *ccid, struct sock *sk, + struct sk_buff *skb) +{ + if (ccid->ccid_hc_rx_packet_recv != NULL) + ccid->ccid_hc_rx_packet_recv(sk, skb); +} + +static inline void ccid_hc_tx_packet_recv(struct ccid *ccid, struct sock *sk, + struct sk_buff *skb) +{ + if (ccid->ccid_hc_tx_packet_recv != NULL) + ccid->ccid_hc_tx_packet_recv(sk, skb); +} + +static inline int ccid_hc_tx_parse_options(struct ccid *ccid, struct sock *sk, + unsigned char option, + unsigned char len, u16 idx, + unsigned char* value) +{ + int rc = 0; + if (ccid->ccid_hc_tx_parse_options != NULL) + rc = ccid->ccid_hc_tx_parse_options(sk, option, len, idx, value); + return rc; +} + +static inline int ccid_hc_rx_parse_options(struct ccid *ccid, struct sock *sk, + unsigned char option, + unsigned char len, u16 idx, + unsigned char* value) +{ + int rc = 0; + if (ccid->ccid_hc_rx_parse_options != NULL) + rc = ccid->ccid_hc_rx_parse_options(sk, option, len, idx, value); + return rc; +} + +static inline void ccid_hc_tx_insert_options(struct ccid *ccid, struct sock *sk, + struct sk_buff *skb) +{ + if (ccid->ccid_hc_tx_insert_options != NULL) + ccid->ccid_hc_tx_insert_options(sk, skb); +} + +static inline void ccid_hc_rx_insert_options(struct ccid *ccid, struct sock *sk, + struct sk_buff *skb) +{ + if (ccid->ccid_hc_rx_insert_options != NULL) + ccid->ccid_hc_rx_insert_options(sk, skb); +} +#endif /* _CCID_H */ diff --git a/net/dccp/ccids/Kconfig b/net/dccp/ccids/Kconfig new file mode 100644 index 00000000000..67f9c06bd17 --- /dev/null +++ b/net/dccp/ccids/Kconfig @@ -0,0 +1,25 @@ +menu "DCCP CCIDs Configuration (EXPERIMENTAL)" + depends on IP_DCCP && EXPERIMENTAL + +config IP_DCCP_CCID3 + tristate "CCID3 (TFRC) (EXPERIMENTAL)" + depends on IP_DCCP + ---help--- + CCID 3 denotes TCP-Friendly Rate Control (TFRC), an equation-based + rate-controlled congestion control mechanism. TFRC is designed to + be reasonably fair when competing for bandwidth with TCP-like flows, + where a flow is "reasonably fair" if its sending rate is generally + within a factor of two of the sending rate of a TCP flow under the + same conditions. However, TFRC has a much lower variation of + throughput over time compared with TCP, which makes CCID 3 more + suitable than CCID 2 for applications such streaming media where a + relatively smooth sending rate is of importance. + + CCID 3 is further described in [CCID 3 PROFILE]. The TFRC + congestion control algorithms were initially described in RFC 3448. + + This text was extracted from draft-ietf-dccp-spec-11.txt. + + If in doubt, say M. + +endmenu diff --git a/net/dccp/ccids/Makefile b/net/dccp/ccids/Makefile new file mode 100644 index 00000000000..1c720131c5d --- /dev/null +++ b/net/dccp/ccids/Makefile @@ -0,0 +1,3 @@ +obj-$(CONFIG_IP_DCCP_CCID3) += dccp_ccid3.o + +dccp_ccid3-y := ccid3.o diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c new file mode 100644 index 00000000000..4f45902cb55 --- /dev/null +++ b/net/dccp/ccids/ccid3.c @@ -0,0 +1,2164 @@ +/* + * net/dccp/ccids/ccid3.c + * + * Copyright (c) 2005 The University of Waikato, Hamilton, New Zealand. + * + * An implementation of the DCCP protocol + * + * This code has been developed by the University of Waikato WAND + * research group. For further information please see http://www.wand.net.nz/ + * or e-mail Ian McDonald - iam4@cs.waikato.ac.nz + * + * This code also uses code from Lulea University, rereleased as GPL by its + * authors: + * Copyright (c) 2003 Nils-Erik Mattsson, Joacim Haggmark, Magnus Erixzon + * + * Changes to meet Linux coding standards, to make it meet latest ccid3 draft + * and to make it work as a loadable module in the DCCP stack written by + * Arnaldo Carvalho de Melo . + * + * Copyright (c) 2005 Arnaldo Carvalho de Melo + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#include "../ccid.h" +#include "../dccp.h" +#include "ccid3.h" + +#ifdef CCID3_DEBUG +extern int ccid3_debug; + +#define ccid3_pr_debug(format, a...) \ + do { if (ccid3_debug) \ + printk(KERN_DEBUG "%s: " format, __FUNCTION__, ##a); \ + } while (0) +#else +#define ccid3_pr_debug(format, a...) +#endif + +#define TFRC_MIN_PACKET_SIZE 16 +#define TFRC_STD_PACKET_SIZE 256 +#define TFRC_MAX_PACKET_SIZE 65535 + +#define USEC_IN_SEC 1000000 + +#define TFRC_INITIAL_TIMEOUT (2 * USEC_IN_SEC) +/* two seconds as per CCID3 spec 11 */ + +#define TFRC_OPSYS_HALF_TIME_GRAN (USEC_IN_SEC / (2 * HZ)) +/* above is in usecs - half the scheduling granularity as per RFC3448 4.6 */ + +#define TFRC_WIN_COUNT_PER_RTT 4 +#define TFRC_WIN_COUNT_LIMIT 16 + +#define TFRC_MAX_BACK_OFF_TIME 64 +/* above is in seconds */ + +#define TFRC_SMALLEST_P 40 + +#define TFRC_RECV_IVAL_F_LENGTH 8 /* length(w[]) */ + +/* Number of later packets received before one is considered lost */ +#define TFRC_RECV_NUM_LATE_LOSS 3 + +enum ccid3_options { + TFRC_OPT_LOSS_EVENT_RATE = 192, + TFRC_OPT_LOSS_INTERVALS = 193, + TFRC_OPT_RECEIVE_RATE = 194, +}; + +static int ccid3_debug; + +static kmem_cache_t *ccid3_tx_hist_slab; +static kmem_cache_t *ccid3_rx_hist_slab; +static kmem_cache_t *ccid3_loss_interval_hist_slab; + +static inline struct ccid3_tx_hist_entry *ccid3_tx_hist_entry_new(int prio) +{ + struct ccid3_tx_hist_entry *entry = kmem_cache_alloc(ccid3_tx_hist_slab, prio); + + if (entry != NULL) + entry->ccid3htx_sent = 0; + + return entry; +} + +static inline void ccid3_tx_hist_entry_delete(struct ccid3_tx_hist_entry *entry) +{ + if (entry != NULL) + kmem_cache_free(ccid3_tx_hist_slab, entry); +} + +static inline struct ccid3_rx_hist_entry *ccid3_rx_hist_entry_new(struct sock *sk, + struct sk_buff *skb, + int prio) +{ + struct ccid3_rx_hist_entry *entry = kmem_cache_alloc(ccid3_rx_hist_slab, prio); + + if (entry != NULL) { + const struct dccp_hdr *dh = dccp_hdr(skb); + + entry->ccid3hrx_seqno = DCCP_SKB_CB(skb)->dccpd_seq; + entry->ccid3hrx_win_count = dh->dccph_ccval; + entry->ccid3hrx_type = dh->dccph_type; + entry->ccid3hrx_ndp = dccp_sk(sk)->dccps_options_received.dccpor_ndp; + do_gettimeofday(&(entry->ccid3hrx_tstamp)); + } + + return entry; +} + +static inline void ccid3_rx_hist_entry_delete(struct ccid3_rx_hist_entry *entry) +{ + if (entry != NULL) + kmem_cache_free(ccid3_rx_hist_slab, entry); +} + +static void ccid3_rx_history_delete(struct list_head *hist) +{ + struct ccid3_rx_hist_entry *entry, *next; + + list_for_each_entry_safe(entry, next, hist, ccid3hrx_node) { + list_del_init(&entry->ccid3hrx_node); + kmem_cache_free(ccid3_rx_hist_slab, entry); + } +} + +static inline struct ccid3_loss_interval_hist_entry *ccid3_loss_interval_hist_entry_new(int prio) +{ + return kmem_cache_alloc(ccid3_loss_interval_hist_slab, prio); +} + +static inline void ccid3_loss_interval_hist_entry_delete(struct ccid3_loss_interval_hist_entry *entry) +{ + if (entry != NULL) + kmem_cache_free(ccid3_loss_interval_hist_slab, entry); +} + +static void ccid3_loss_interval_history_delete(struct list_head *hist) +{ + struct ccid3_loss_interval_hist_entry *entry, *next; + + list_for_each_entry_safe(entry, next, hist, ccid3lih_node) { + list_del_init(&entry->ccid3lih_node); + kmem_cache_free(ccid3_loss_interval_hist_slab, entry); + } +} + +static int ccid3_init(struct sock *sk) +{ + ccid3_pr_debug("%s, sk=%p\n", dccp_role(sk), sk); + return 0; +} + +static void ccid3_exit(struct sock *sk) +{ + ccid3_pr_debug("%s, sk=%p\n", dccp_role(sk), sk); +} + +/* TFRC sender states */ +enum ccid3_hc_tx_states { + TFRC_SSTATE_NO_SENT = 1, + TFRC_SSTATE_NO_FBACK, + TFRC_SSTATE_FBACK, + TFRC_SSTATE_TERM, +}; + +#ifdef CCID3_DEBUG +static const char *ccid3_tx_state_name(enum ccid3_hc_tx_states state) +{ + static char *ccid3_state_names[] = { + [TFRC_SSTATE_NO_SENT] = "NO_SENT", + [TFRC_SSTATE_NO_FBACK] = "NO_FBACK", + [TFRC_SSTATE_FBACK] = "FBACK", + [TFRC_SSTATE_TERM] = "TERM", + }; + + return ccid3_state_names[state]; +} +#endif + +static inline void ccid3_hc_tx_set_state(struct sock *sk, enum ccid3_hc_tx_states state) +{ + struct dccp_sock *dp = dccp_sk(sk); + struct ccid3_hc_tx_sock *hctx = dp->dccps_hc_tx_ccid_private; + enum ccid3_hc_tx_states oldstate = hctx->ccid3hctx_state; + + ccid3_pr_debug("%s(%p) %-8.8s -> %s\n", + dccp_role(sk), sk, ccid3_tx_state_name(oldstate), ccid3_tx_state_name(state)); + WARN_ON(state == oldstate); + hctx->ccid3hctx_state = state; +} + +static void timeval_sub(struct timeval large, struct timeval small, struct timeval *result) { + + result->tv_sec = large.tv_sec-small.tv_sec; + if (large.tv_usec < small.tv_usec) { + (result->tv_sec)--; + result->tv_usec = USEC_IN_SEC+large.tv_usec-small.tv_usec; + } else + result->tv_usec = large.tv_usec-small.tv_usec; +} + +static inline void timeval_fix(struct timeval *tv) { + if (tv->tv_usec >= USEC_IN_SEC) { + tv->tv_sec++; + tv->tv_usec -= USEC_IN_SEC; + } +} + +/* returns the difference in usecs between timeval passed in and current time */ +static inline u32 now_delta(struct timeval tv) { + struct timeval now; + + do_gettimeofday(&now); + return ((now.tv_sec-tv.tv_sec)*1000000+now.tv_usec-tv.tv_usec); +} + +#define CALCX_ARRSIZE 500 + +#define CALCX_SPLIT 50000 +/* equivalent to 0.05 */ + +static const u32 calcx_lookup[CALCX_ARRSIZE][2] = { + { 37172 , 8172 }, + { 53499 , 11567 }, + { 66664 , 14180 }, + { 78298 , 16388 }, + { 89021 , 18339 }, + { 99147 , 20108 }, + { 108858 , 21738 }, + { 118273 , 23260 }, + { 127474 , 24693 }, + { 136520 , 26052 }, + { 145456 , 27348 }, + { 154316 , 28589 }, + { 163130 , 29783 }, + { 171919 , 30935 }, + { 180704 , 32049 }, + { 189502 , 33130 }, + { 198328 , 34180 }, + { 207194 , 35202 }, + { 216114 , 36198 }, + { 225097 , 37172 }, + { 234153 , 38123 }, + { 243294 , 39055 }, + { 252527 , 39968 }, + { 261861 , 40864 }, + { 271305 , 41743 }, + { 280866 , 42607 }, + { 290553 , 43457 }, + { 300372 , 44293 }, + { 310333 , 45117 }, + { 320441 , 45929 }, + { 330705 , 46729 }, + { 341131 , 47518 }, + { 351728 , 48297 }, + { 362501 , 49066 }, + { 373460 , 49826 }, + { 384609 , 50577 }, + { 395958 , 51320 }, + { 407513 , 52054 }, + { 419281 , 52780 }, + { 431270 , 53499 }, + { 443487 , 54211 }, + { 455940 , 54916 }, + { 468635 , 55614 }, + { 481581 , 56306 }, + { 494785 , 56991 }, + { 508254 , 57671 }, + { 521996 , 58345 }, + { 536019 , 59014 }, + { 550331 , 59677 }, + { 564939 , 60335 }, + { 579851 , 60988 }, + { 595075 , 61636 }, + { 610619 , 62279 }, + { 626491 , 62918 }, + { 642700 , 63553 }, + { 659253 , 64183 }, + { 676158 , 64809 }, + { 693424 , 65431 }, + { 711060 , 66050 }, + { 729073 , 66664 }, + { 747472 , 67275 }, + { 766266 , 67882 }, + { 785464 , 68486 }, + { 805073 , 69087 }, + { 825103 , 69684 }, + { 845562 , 70278 }, + { 866460 , 70868 }, + { 887805 , 71456 }, + { 909606 , 72041 }, + { 931873 , 72623 }, + { 954614 , 73202 }, + { 977839 , 73778 }, + { 1001557 , 74352 }, + { 1025777 , 74923 }, + { 1050508 , 75492 }, + { 1075761 , 76058 }, + { 1101544 , 76621 }, + { 1127867 , 77183 }, + { 1154739 , 77741 }, + { 1182172 , 78298 }, + { 1210173 , 78852 }, + { 1238753 , 79405 }, + { 1267922 , 79955 }, + { 1297689 , 80503 }, + { 1328066 , 81049 }, + { 1359060 , 81593 }, + { 1390684 , 82135 }, + { 1422947 , 82675 }, + { 1455859 , 83213 }, + { 1489430 , 83750 }, + { 1523671 , 84284 }, + { 1558593 , 84817 }, + { 1594205 , 85348 }, + { 1630518 , 85878 }, + { 1667543 , 86406 }, + { 1705290 , 86932 }, + { 1743770 , 87457 }, + { 1782994 , 87980 }, + { 1822973 , 88501 }, + { 1863717 , 89021 }, + { 1905237 , 89540 }, + { 1947545 , 90057 }, + { 1990650 , 90573 }, + { 2034566 , 91087 }, + { 2079301 , 91600 }, + { 2124869 , 92111 }, + { 2171279 , 92622 }, + { 2218543 , 93131 }, + { 2266673 , 93639 }, + { 2315680 , 94145 }, + { 2365575 , 94650 }, + { 2416371 , 95154 }, + { 2468077 , 95657 }, + { 2520707 , 96159 }, + { 2574271 , 96660 }, + { 2628782 , 97159 }, + { 2684250 , 97658 }, + { 2740689 , 98155 }, + { 2798110 , 98651 }, + { 2856524 , 99147 }, + { 2915944 , 99641 }, + { 2976382 , 100134 }, + { 3037850 , 100626 }, + { 3100360 , 101117 }, + { 3163924 , 101608 }, + { 3228554 , 102097 }, + { 3294263 , 102586 }, + { 3361063 , 103073 }, + { 3428966 , 103560 }, + { 3497984 , 104045 }, + { 3568131 , 104530 }, + { 3639419 , 105014 }, + { 3711860 , 105498 }, + { 3785467 , 105980 }, + { 3860253 , 106462 }, + { 3936229 , 106942 }, + { 4013410 , 107422 }, + { 4091808 , 107902 }, + { 4171435 , 108380 }, + { 4252306 , 108858 }, + { 4334431 , 109335 }, + { 4417825 , 109811 }, + { 4502501 , 110287 }, + { 4588472 , 110762 }, + { 4675750 , 111236 }, + { 4764349 , 111709 }, + { 4854283 , 112182 }, + { 4945564 , 112654 }, + { 5038206 , 113126 }, + { 5132223 , 113597 }, + { 5227627 , 114067 }, + { 5324432 , 114537 }, + { 5422652 , 115006 }, + { 5522299 , 115474 }, + { 5623389 , 115942 }, + { 5725934 , 116409 }, + { 5829948 , 116876 }, + { 5935446 , 117342 }, + { 6042439 , 117808 }, + { 6150943 , 118273 }, + { 6260972 , 118738 }, + { 6372538 , 119202 }, + { 6485657 , 119665 }, + { 6600342 , 120128 }, + { 6716607 , 120591 }, + { 6834467 , 121053 }, + { 6953935 , 121514 }, + { 7075025 , 121976 }, + { 7197752 , 122436 }, + { 7322131 , 122896 }, + { 7448175 , 123356 }, + { 7575898 , 123815 }, + { 7705316 , 124274 }, + { 7836442 , 124733 }, + { 7969291 , 125191 }, + { 8103877 , 125648 }, + { 8240216 , 126105 }, + { 8378321 , 126562 }, + { 8518208 , 127018 }, + { 8659890 , 127474 }, + { 8803384 , 127930 }, + { 8948702 , 128385 }, + { 9095861 , 128840 }, + { 9244875 , 129294 }, + { 9395760 , 129748 }, + { 9548529 , 130202 }, + { 9703198 , 130655 }, + { 9859782 , 131108 }, + { 10018296 , 131561 }, + { 10178755 , 132014 }, + { 10341174 , 132466 }, + { 10505569 , 132917 }, + { 10671954 , 133369 }, + { 10840345 , 133820 }, + { 11010757 , 134271 }, + { 11183206 , 134721 }, + { 11357706 , 135171 }, + { 11534274 , 135621 }, + { 11712924 , 136071 }, + { 11893673 , 136520 }, + { 12076536 , 136969 }, + { 12261527 , 137418 }, + { 12448664 , 137867 }, + { 12637961 , 138315 }, + { 12829435 , 138763 }, + { 13023101 , 139211 }, + { 13218974 , 139658 }, + { 13417071 , 140106 }, + { 13617407 , 140553 }, + { 13819999 , 140999 }, + { 14024862 , 141446 }, + { 14232012 , 141892 }, + { 14441465 , 142339 }, + { 14653238 , 142785 }, + { 14867346 , 143230 }, + { 15083805 , 143676 }, + { 15302632 , 144121 }, + { 15523842 , 144566 }, + { 15747453 , 145011 }, + { 15973479 , 145456 }, + { 16201939 , 145900 }, + { 16432847 , 146345 }, + { 16666221 , 146789 }, + { 16902076 , 147233 }, + { 17140429 , 147677 }, + { 17381297 , 148121 }, + { 17624696 , 148564 }, + { 17870643 , 149007 }, + { 18119154 , 149451 }, + { 18370247 , 149894 }, + { 18623936 , 150336 }, + { 18880241 , 150779 }, + { 19139176 , 151222 }, + { 19400759 , 151664 }, + { 19665007 , 152107 }, + { 19931936 , 152549 }, + { 20201564 , 152991 }, + { 20473907 , 153433 }, + { 20748982 , 153875 }, + { 21026807 , 154316 }, + { 21307399 , 154758 }, + { 21590773 , 155199 }, + { 21876949 , 155641 }, + { 22165941 , 156082 }, + { 22457769 , 156523 }, + { 22752449 , 156964 }, + { 23049999 , 157405 }, + { 23350435 , 157846 }, + { 23653774 , 158287 }, + { 23960036 , 158727 }, + { 24269236 , 159168 }, + { 24581392 , 159608 }, + { 24896521 , 160049 }, + { 25214642 , 160489 }, + { 25535772 , 160929 }, + { 25859927 , 161370 }, + { 26187127 , 161810 }, + { 26517388 , 162250 }, + { 26850728 , 162690 }, + { 27187165 , 163130 }, + { 27526716 , 163569 }, + { 27869400 , 164009 }, + { 28215234 , 164449 }, + { 28564236 , 164889 }, + { 28916423 , 165328 }, + { 29271815 , 165768 }, + { 29630428 , 166208 }, + { 29992281 , 166647 }, + { 30357392 , 167087 }, + { 30725779 , 167526 }, + { 31097459 , 167965 }, + { 31472452 , 168405 }, + { 31850774 , 168844 }, + { 32232445 , 169283 }, + { 32617482 , 169723 }, + { 33005904 , 170162 }, + { 33397730 , 170601 }, + { 33792976 , 171041 }, + { 34191663 , 171480 }, + { 34593807 , 171919 }, + { 34999428 , 172358 }, + { 35408544 , 172797 }, + { 35821174 , 173237 }, + { 36237335 , 173676 }, + { 36657047 , 174115 }, + { 37080329 , 174554 }, + { 37507197 , 174993 }, + { 37937673 , 175433 }, + { 38371773 , 175872 }, + { 38809517 , 176311 }, + { 39250924 , 176750 }, + { 39696012 , 177190 }, + { 40144800 , 177629 }, + { 40597308 , 178068 }, + { 41053553 , 178507 }, + { 41513554 , 178947 }, + { 41977332 , 179386 }, + { 42444904 , 179825 }, + { 42916290 , 180265 }, + { 43391509 , 180704 }, + { 43870579 , 181144 }, + { 44353520 , 181583 }, + { 44840352 , 182023 }, + { 45331092 , 182462 }, + { 45825761 , 182902 }, + { 46324378 , 183342 }, + { 46826961 , 183781 }, + { 47333531 , 184221 }, + { 47844106 , 184661 }, + { 48358706 , 185101 }, + { 48877350 , 185541 }, + { 49400058 , 185981 }, + { 49926849 , 186421 }, + { 50457743 , 186861 }, + { 50992759 , 187301 }, + { 51531916 , 187741 }, + { 52075235 , 188181 }, + { 52622735 , 188622 }, + { 53174435 , 189062 }, + { 53730355 , 189502 }, + { 54290515 , 189943 }, + { 54854935 , 190383 }, + { 55423634 , 190824 }, + { 55996633 , 191265 }, + { 56573950 , 191706 }, + { 57155606 , 192146 }, + { 57741621 , 192587 }, + { 58332014 , 193028 }, + { 58926806 , 193470 }, + { 59526017 , 193911 }, + { 60129666 , 194352 }, + { 60737774 , 194793 }, + { 61350361 , 195235 }, + { 61967446 , 195677 }, + { 62589050 , 196118 }, + { 63215194 , 196560 }, + { 63845897 , 197002 }, + { 64481179 , 197444 }, + { 65121061 , 197886 }, + { 65765563 , 198328 }, + { 66414705 , 198770 }, + { 67068508 , 199213 }, + { 67726992 , 199655 }, + { 68390177 , 200098 }, + { 69058085 , 200540 }, + { 69730735 , 200983 }, + { 70408147 , 201426 }, + { 71090343 , 201869 }, + { 71777343 , 202312 }, + { 72469168 , 202755 }, + { 73165837 , 203199 }, + { 73867373 , 203642 }, + { 74573795 , 204086 }, + { 75285124 , 204529 }, + { 76001380 , 204973 }, + { 76722586 , 205417 }, + { 77448761 , 205861 }, + { 78179926 , 206306 }, + { 78916102 , 206750 }, + { 79657310 , 207194 }, + { 80403571 , 207639 }, + { 81154906 , 208084 }, + { 81911335 , 208529 }, + { 82672880 , 208974 }, + { 83439562 , 209419 }, + { 84211402 , 209864 }, + { 84988421 , 210309 }, + { 85770640 , 210755 }, + { 86558080 , 211201 }, + { 87350762 , 211647 }, + { 88148708 , 212093 }, + { 88951938 , 212539 }, + { 89760475 , 212985 }, + { 90574339 , 213432 }, + { 91393551 , 213878 }, + { 92218133 , 214325 }, + { 93048107 , 214772 }, + { 93883493 , 215219 }, + { 94724314 , 215666 }, + { 95570590 , 216114 }, + { 96422343 , 216561 }, + { 97279594 , 217009 }, + { 98142366 , 217457 }, + { 99010679 , 217905 }, + { 99884556 , 218353 }, + { 100764018 , 218801 }, + { 101649086 , 219250 }, + { 102539782 , 219698 }, + { 103436128 , 220147 }, + { 104338146 , 220596 }, + { 105245857 , 221046 }, + { 106159284 , 221495 }, + { 107078448 , 221945 }, + { 108003370 , 222394 }, + { 108934074 , 222844 }, + { 109870580 , 223294 }, + { 110812910 , 223745 }, + { 111761087 , 224195 }, + { 112715133 , 224646 }, + { 113675069 , 225097 }, + { 114640918 , 225548 }, + { 115612702 , 225999 }, + { 116590442 , 226450 }, + { 117574162 , 226902 }, + { 118563882 , 227353 }, + { 119559626 , 227805 }, + { 120561415 , 228258 }, + { 121569272 , 228710 }, + { 122583219 , 229162 }, + { 123603278 , 229615 }, + { 124629471 , 230068 }, + { 125661822 , 230521 }, + { 126700352 , 230974 }, + { 127745083 , 231428 }, + { 128796039 , 231882 }, + { 129853241 , 232336 }, + { 130916713 , 232790 }, + { 131986475 , 233244 }, + { 133062553 , 233699 }, + { 134144966 , 234153 }, + { 135233739 , 234608 }, + { 136328894 , 235064 }, + { 137430453 , 235519 }, + { 138538440 , 235975 }, + { 139652876 , 236430 }, + { 140773786 , 236886 }, + { 141901190 , 237343 }, + { 143035113 , 237799 }, + { 144175576 , 238256 }, + { 145322604 , 238713 }, + { 146476218 , 239170 }, + { 147636442 , 239627 }, + { 148803298 , 240085 }, + { 149976809 , 240542 }, + { 151156999 , 241000 }, + { 152343890 , 241459 }, + { 153537506 , 241917 }, + { 154737869 , 242376 }, + { 155945002 , 242835 }, + { 157158929 , 243294 }, + { 158379673 , 243753 }, + { 159607257 , 244213 }, + { 160841704 , 244673 }, + { 162083037 , 245133 }, + { 163331279 , 245593 }, + { 164586455 , 246054 }, + { 165848586 , 246514 }, + { 167117696 , 246975 }, + { 168393810 , 247437 }, + { 169676949 , 247898 }, + { 170967138 , 248360 }, + { 172264399 , 248822 }, + { 173568757 , 249284 }, + { 174880235 , 249747 }, + { 176198856 , 250209 }, + { 177524643 , 250672 }, + { 178857621 , 251136 }, + { 180197813 , 251599 }, + { 181545242 , 252063 }, + { 182899933 , 252527 }, + { 184261908 , 252991 }, + { 185631191 , 253456 }, + { 187007807 , 253920 }, + { 188391778 , 254385 }, + { 189783129 , 254851 }, + { 191181884 , 255316 }, + { 192588065 , 255782 }, + { 194001698 , 256248 }, + { 195422805 , 256714 }, + { 196851411 , 257181 }, + { 198287540 , 257648 }, + { 199731215 , 258115 }, + { 201182461 , 258582 }, + { 202641302 , 259050 }, + { 204107760 , 259518 }, + { 205581862 , 259986 }, + { 207063630 , 260454 }, + { 208553088 , 260923 }, + { 210050262 , 261392 }, + { 211555174 , 261861 }, + { 213067849 , 262331 }, + { 214588312 , 262800 }, + { 216116586 , 263270 }, + { 217652696 , 263741 }, + { 219196666 , 264211 }, + { 220748520 , 264682 }, + { 222308282 , 265153 }, + { 223875978 , 265625 }, + { 225451630 , 266097 }, + { 227035265 , 266569 }, + { 228626905 , 267041 }, + { 230226576 , 267514 }, + { 231834302 , 267986 }, + { 233450107 , 268460 }, + { 235074016 , 268933 }, + { 236706054 , 269407 }, + { 238346244 , 269881 }, + { 239994613 , 270355 }, + { 241651183 , 270830 }, + { 243315981 , 271305 } +}; + +/* Calculate the send rate as per section 3.1 of RFC3448 + +Returns send rate in bytes per second + +Integer maths and lookups are used as not allowed floating point in kernel + +The function for Xcalc as per section 3.1 of RFC3448 is: + +X = s + ------------------------------------------------------------- + R*sqrt(2*b*p/3) + (t_RTO * (3*sqrt(3*b*p/8) * p * (1+32*p^2))) + +where +X is the trasmit rate in bytes/second +s is the packet size in bytes +R is the round trip time in seconds +p is the loss event rate, between 0 and 1.0, of the number of loss events + as a fraction of the number of packets transmitted +t_RTO is the TCP retransmission timeout value in seconds +b is the number of packets acknowledged by a single TCP acknowledgement + +we can assume that b = 1 and t_RTO is 4 * R. With this the equation becomes: + +X = s + ----------------------------------------------------------------------- + R * sqrt(2 * p / 3) + (12 * R * (sqrt(3 * p / 8) * p * (1 + 32 * p^2))) + + +which we can break down into: + +X = s + -------- + R * f(p) + +where f(p) = sqrt(2 * p / 3) + (12 * sqrt(3 * p / 8) * p * (1 + 32 * p * p)) + +Function parameters: +s - bytes +R - RTT in usecs +p - loss rate (decimal fraction multiplied by 1,000,000) + +Returns Xcalc in bytes per second + +DON'T alter this code unless you run test cases against it as the code +has been manipulated to stop underflow/overlow. + +*/ +static u32 ccid3_calc_x(u16 s, u32 R, u32 p) +{ + int index; + u32 f; + u64 tmp1, tmp2; + + if (p < CALCX_SPLIT) + index = (p / (CALCX_SPLIT / CALCX_ARRSIZE)) - 1; + else + index = (p / (1000000 / CALCX_ARRSIZE)) - 1; + + if (index < 0) + /* p should be 0 unless there is a bug in my code */ + index = 0; + + if (R == 0) + R = 1; /* RTT can't be zero or else divide by zero */ + + BUG_ON(index >= CALCX_ARRSIZE); + + if (p >= CALCX_SPLIT) + f = calcx_lookup[index][0]; + else + f = calcx_lookup[index][1]; + + tmp1 = ((u64)s * 100000000); + tmp2 = ((u64)R * (u64)f); + do_div(tmp2,10000); + do_div(tmp1,tmp2); + /* don't alter above math unless you test due to overflow on 32 bit */ + + return (u32)tmp1; +} + +/* Calculate new t_ipi (inter packet interval) by t_ipi = s / X_inst */ +static inline void ccid3_calc_new_t_ipi(struct ccid3_hc_tx_sock *hctx) +{ + if (hctx->ccid3hctx_state == TFRC_SSTATE_NO_FBACK) + return; + /* if no feedback spec says t_ipi is 1 second (set elsewhere and then + * doubles after every no feedback timer (separate function) */ + + if (hctx->ccid3hctx_x < 10) { + ccid3_pr_debug("ccid3_calc_new_t_ipi - ccid3hctx_x < 10\n"); + hctx->ccid3hctx_x = 10; + } + hctx->ccid3hctx_t_ipi = (hctx->ccid3hctx_s * 100000) + / (hctx->ccid3hctx_x / 10); + /* reason for above maths with 10 in there is to avoid 32 bit + * overflow for jumbo packets */ + +} + +/* Calculate new delta by delta = min(t_ipi / 2, t_gran / 2) */ +static inline void ccid3_calc_new_delta(struct ccid3_hc_tx_sock *hctx) +{ + hctx->ccid3hctx_delta = min_t(u32, hctx->ccid3hctx_t_ipi / 2, TFRC_OPSYS_HALF_TIME_GRAN); + +} + +/* + * Update X by + * If (p > 0) + * x_calc = calcX(s, R, p); + * X = max(min(X_calc, 2 * X_recv), s / t_mbi); + * Else + * If (now - tld >= R) + * X = max(min(2 * X, 2 * X_recv), s / R); + * tld = now; + */ +static void ccid3_hc_tx_update_x(struct sock *sk) +{ + struct dccp_sock *dp = dccp_sk(sk); + struct ccid3_hc_tx_sock *hctx = dp->dccps_hc_tx_ccid_private; + + if (hctx->ccid3hctx_p >= TFRC_SMALLEST_P) { /* to avoid large error in calcX */ + hctx->ccid3hctx_x_calc = ccid3_calc_x(hctx->ccid3hctx_s, + hctx->ccid3hctx_rtt, + hctx->ccid3hctx_p); + hctx->ccid3hctx_x = max_t(u32, min_t(u32, hctx->ccid3hctx_x_calc, 2 * hctx->ccid3hctx_x_recv), + hctx->ccid3hctx_s / TFRC_MAX_BACK_OFF_TIME); + } else if (now_delta(hctx->ccid3hctx_t_ld) >= hctx->ccid3hctx_rtt) { + u32 rtt = hctx->ccid3hctx_rtt; + if (rtt < 10) { + rtt = 10; + } /* avoid divide by zero below */ + + hctx->ccid3hctx_x = max_t(u32, min_t(u32, 2 * hctx->ccid3hctx_x_recv, 2 * hctx->ccid3hctx_x), + (hctx->ccid3hctx_s * 100000) / (rtt / 10)); + /* Using 100000 and 10 to avoid 32 bit overflow for jumbo frames */ + do_gettimeofday(&hctx->ccid3hctx_t_ld); + } + + if (hctx->ccid3hctx_x == 0) { + ccid3_pr_debug("ccid3hctx_x = 0!\n"); + hctx->ccid3hctx_x = 1; + } +} + +static void ccid3_hc_tx_no_feedback_timer(unsigned long data) +{ + struct sock *sk = (struct sock *)data; + struct dccp_sock *dp = dccp_sk(sk); + unsigned long next_tmout = 0; + struct ccid3_hc_tx_sock *hctx = dp->dccps_hc_tx_ccid_private; + u32 rtt; + + bh_lock_sock(sk); + if (sock_owned_by_user(sk)) { + /* Try again later. */ + /* XXX: set some sensible MIB */ + sk_reset_timer(sk, &hctx->ccid3hctx_no_feedback_timer, jiffies + HZ / 5); + goto out; + } + + ccid3_pr_debug("%s, sk=%p, state=%s\n", dccp_role(sk), sk, + ccid3_tx_state_name(hctx->ccid3hctx_state)); + + if (hctx->ccid3hctx_x < 10) { + ccid3_pr_debug("TFRC_SSTATE_NO_FBACK ccid3hctx_x < 10\n"); + hctx->ccid3hctx_x = 10; + } + + switch (hctx->ccid3hctx_state) { + case TFRC_SSTATE_TERM: + goto out; + case TFRC_SSTATE_NO_FBACK: + /* Halve send rate */ + hctx->ccid3hctx_x /= 2; + if (hctx->ccid3hctx_x < (hctx->ccid3hctx_s / TFRC_MAX_BACK_OFF_TIME)) + hctx->ccid3hctx_x = hctx->ccid3hctx_s / TFRC_MAX_BACK_OFF_TIME; + + ccid3_pr_debug("%s, sk=%p, state=%s, updated tx rate to %d bytes/s\n", + dccp_role(sk), sk, ccid3_tx_state_name(hctx->ccid3hctx_state), + hctx->ccid3hctx_x); + next_tmout = max_t(u32, 2 * (hctx->ccid3hctx_s * 100000) + / (hctx->ccid3hctx_x / 10), TFRC_INITIAL_TIMEOUT); + /* do above maths with 100000 and 10 to prevent overflow on 32 bit */ + /* FIXME - not sure above calculation is correct. See section 5 of CCID3 11 + * should adjust tx_t_ipi and double that to achieve it really */ + break; + case TFRC_SSTATE_FBACK: + /* Check if IDLE since last timeout and recv rate is less than 4 packets per RTT */ + rtt = hctx->ccid3hctx_rtt; + if (rtt < 10) + rtt = 10; + /* stop divide by zero below */ + if (!hctx->ccid3hctx_idle || (hctx->ccid3hctx_x_recv >= + 4 * (hctx->ccid3hctx_s * 100000) / (rtt / 10))) { + ccid3_pr_debug("%s, sk=%p, state=%s, not idle\n", dccp_role(sk), sk, + ccid3_tx_state_name(hctx->ccid3hctx_state)); + /* Halve sending rate */ + + /* If (X_calc > 2 * X_recv) + * X_recv = max(X_recv / 2, s / (2 * t_mbi)); + * Else + * X_recv = X_calc / 4; + */ + BUG_ON(hctx->ccid3hctx_p >= TFRC_SMALLEST_P && hctx->ccid3hctx_x_calc == 0); + + /* check also if p is zero -> x_calc is infinity? */ + if (hctx->ccid3hctx_p < TFRC_SMALLEST_P || + hctx->ccid3hctx_x_calc > 2 * hctx->ccid3hctx_x_recv) + hctx->ccid3hctx_x_recv = max_t(u32, hctx->ccid3hctx_x_recv / 2, + hctx->ccid3hctx_s / (2 * TFRC_MAX_BACK_OFF_TIME)); + else + hctx->ccid3hctx_x_recv = hctx->ccid3hctx_x_calc / 4; + + /* Update sending rate */ + ccid3_hc_tx_update_x(sk); + } + if (hctx->ccid3hctx_x == 0) { + ccid3_pr_debug("TFRC_SSTATE_FBACK ccid3hctx_x = 0!\n"); + hctx->ccid3hctx_x = 10; + } + /* Schedule no feedback timer to expire in max(4 * R, 2 * s / X) */ + next_tmout = max_t(u32, inet_csk(sk)->icsk_rto, + 2 * (hctx->ccid3hctx_s * 100000) / (hctx->ccid3hctx_x / 10)); + break; + default: + printk(KERN_CRIT "%s: %s, sk=%p, Illegal state (%d)!\n", + __FUNCTION__, dccp_role(sk), sk, hctx->ccid3hctx_state); + dump_stack(); + goto out; + } + + sk_reset_timer(sk, &hctx->ccid3hctx_no_feedback_timer, + jiffies + max_t(u32, 1, usecs_to_jiffies(next_tmout))); + hctx->ccid3hctx_idle = 1; +out: + bh_unlock_sock(sk); + sock_put(sk); +} + +static int ccid3_hc_tx_send_packet(struct sock *sk, struct sk_buff *skb, + int len, long *delay) +{ + struct dccp_sock *dp = dccp_sk(sk); + struct ccid3_hc_tx_sock *hctx = dp->dccps_hc_tx_ccid_private; + struct ccid3_tx_hist_entry *new_packet = NULL; + struct timeval now; + int rc = -ENOTCONN; + +// ccid3_pr_debug("%s, sk=%p, skb=%p, len=%d\n", dccp_role(sk), sk, skb, len); + /* + * check if pure ACK or Terminating */ + /* XXX: We only call this function for DATA and DATAACK, on, these packets can have + * zero length, but why the comment about "pure ACK"? + */ + if (hctx == NULL || len == 0 || hctx->ccid3hctx_state == TFRC_SSTATE_TERM) + goto out; + + /* See if last packet allocated was not sent */ + if (!list_empty(&hctx->ccid3hctx_hist)) + new_packet = list_entry(hctx->ccid3hctx_hist.next, + struct ccid3_tx_hist_entry, ccid3htx_node); + + if (new_packet == NULL || new_packet->ccid3htx_sent) { + new_packet = ccid3_tx_hist_entry_new(SLAB_ATOMIC); + + rc = -ENOBUFS; + if (new_packet == NULL) { + ccid3_pr_debug("%s, sk=%p, not enough mem to add " + "to history, send refused\n", dccp_role(sk), sk); + goto out; + } + + list_add(&new_packet->ccid3htx_node, &hctx->ccid3hctx_hist); + } + + do_gettimeofday(&now); + + switch (hctx->ccid3hctx_state) { + case TFRC_SSTATE_NO_SENT: + ccid3_pr_debug("%s, sk=%p, first packet(%llu)\n", dccp_role(sk), sk, + dp->dccps_gss); + + hctx->ccid3hctx_no_feedback_timer.function = ccid3_hc_tx_no_feedback_timer; + hctx->ccid3hctx_no_feedback_timer.data = (unsigned long)sk; + sk_reset_timer(sk, &hctx->ccid3hctx_no_feedback_timer, jiffies + usecs_to_jiffies(TFRC_INITIAL_TIMEOUT)); + hctx->ccid3hctx_last_win_count = 0; + hctx->ccid3hctx_t_last_win_count = now; + ccid3_hc_tx_set_state(sk, TFRC_SSTATE_NO_FBACK); + hctx->ccid3hctx_t_ipi = TFRC_INITIAL_TIMEOUT; + + /* Set nominal send time for initial packet */ + hctx->ccid3hctx_t_nom = now; + (hctx->ccid3hctx_t_nom).tv_usec += hctx->ccid3hctx_t_ipi; + timeval_fix(&(hctx->ccid3hctx_t_nom)); + ccid3_calc_new_delta(hctx); + rc = 0; + break; + case TFRC_SSTATE_NO_FBACK: + case TFRC_SSTATE_FBACK: + *delay = (now_delta(hctx->ccid3hctx_t_nom) - hctx->ccid3hctx_delta); + ccid3_pr_debug("send_packet delay=%ld\n",*delay); + *delay /= -1000; + /* divide by -1000 is to convert to ms and get sign right */ + rc = *delay > 0 ? -EAGAIN : 0; + break; + default: + printk(KERN_CRIT "%s: %s, sk=%p, Illegal state (%d)!\n", + __FUNCTION__, dccp_role(sk), sk, hctx->ccid3hctx_state); + dump_stack(); + rc = -EINVAL; + break; + } + + /* Can we send? if so add options and add to packet history */ + if (rc == 0) + new_packet->ccid3htx_win_count = DCCP_SKB_CB(skb)->dccpd_ccval = hctx->ccid3hctx_last_win_count; +out: + return rc; +} + +static void ccid3_hc_tx_packet_sent(struct sock *sk, int more, int len) +{ + struct dccp_sock *dp = dccp_sk(sk); + struct ccid3_hc_tx_sock *hctx = dp->dccps_hc_tx_ccid_private; + struct ccid3_tx_hist_entry *packet = NULL; + struct timeval now; + +// ccid3_pr_debug("%s, sk=%p, more=%d, len=%d\n", dccp_role(sk), sk, more, len); + BUG_ON(hctx == NULL); + + if (hctx->ccid3hctx_state == TFRC_SSTATE_TERM) { + ccid3_pr_debug("%s, sk=%p, while state is TFRC_SSTATE_TERM!\n", + dccp_role(sk), sk); + return; + } + + do_gettimeofday(&now); + + /* check if we have sent a data packet */ + if (len > 0) { + unsigned long quarter_rtt; + + if (list_empty(&hctx->ccid3hctx_hist)) { + printk(KERN_CRIT "%s: packet doesn't exists in history!\n", __FUNCTION__); + return; + } + packet = list_entry(hctx->ccid3hctx_hist.next, struct ccid3_tx_hist_entry, ccid3htx_node); + if (packet->ccid3htx_sent) { + printk(KERN_CRIT "%s: no unsent packet in history!\n", __FUNCTION__); + return; + } + packet->ccid3htx_tstamp = now; + packet->ccid3htx_seqno = dp->dccps_gss; + // ccid3_pr_debug("%s, sk=%p, seqno=%llu inserted!\n", dccp_role(sk), sk, packet->ccid3htx_seqno); + + /* + * Check if win_count have changed */ + /* COMPLIANCE_BEGIN + * Algorithm in "8.1. Window Counter Valuer" in draft-ietf-dccp-ccid3-11.txt + */ + quarter_rtt = now_delta(hctx->ccid3hctx_t_last_win_count) / (hctx->ccid3hctx_rtt / 4); + if (quarter_rtt > 0) { + hctx->ccid3hctx_t_last_win_count = now; + hctx->ccid3hctx_last_win_count = (hctx->ccid3hctx_last_win_count + + min_t(unsigned long, quarter_rtt, 5)) % 16; + ccid3_pr_debug("%s, sk=%p, window changed from %u to %u!\n", + dccp_role(sk), sk, + packet->ccid3htx_win_count, + hctx->ccid3hctx_last_win_count); + } + /* COMPLIANCE_END */ +#if 0 + ccid3_pr_debug("%s, sk=%p, packet sent (%llu,%u)\n", + dccp_role(sk), sk, + packet->ccid3htx_seqno, + packet->ccid3htx_win_count); +#endif + hctx->ccid3hctx_idle = 0; + packet->ccid3htx_sent = 1; + } else + ccid3_pr_debug("%s, sk=%p, seqno=%llu NOT inserted!\n", + dccp_role(sk), sk, dp->dccps_gss); + + switch (hctx->ccid3hctx_state) { + case TFRC_SSTATE_NO_SENT: + /* if first wasn't pure ack */ + if (len != 0) + printk(KERN_CRIT "%s: %s, First packet sent is noted as a data packet\n", + __FUNCTION__, dccp_role(sk)); + return; + case TFRC_SSTATE_NO_FBACK: + case TFRC_SSTATE_FBACK: + if (len > 0) { + hctx->ccid3hctx_t_nom = now; + ccid3_calc_new_t_ipi(hctx); + ccid3_calc_new_delta(hctx); + (hctx->ccid3hctx_t_nom).tv_usec += hctx->ccid3hctx_t_ipi; + timeval_fix(&(hctx->ccid3hctx_t_nom)); + } + break; + default: + printk(KERN_CRIT "%s: %s, sk=%p, Illegal state (%d)!\n", + __FUNCTION__, dccp_role(sk), sk, hctx->ccid3hctx_state); + dump_stack(); + break; + } +} + +static void ccid3_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) +{ + struct dccp_sock *dp = dccp_sk(sk); + struct ccid3_hc_tx_sock *hctx = dp->dccps_hc_tx_ccid_private; + struct ccid3_options_received *opt_recv; + struct ccid3_tx_hist_entry *entry, *next, *packet; + unsigned long next_tmout; + u16 t_elapsed; + u32 pinv; + u32 x_recv; + u32 r_sample; +#if 0 + ccid3_pr_debug("%s, sk=%p(%s), skb=%p(%s)\n", + dccp_role(sk), sk, dccp_state_name(sk->sk_state), + skb, dccp_packet_name(DCCP_SKB_CB(skb)->dccpd_type)); +#endif + if (hctx == NULL) + return; + + if (hctx->ccid3hctx_state == TFRC_SSTATE_TERM) { + ccid3_pr_debug("%s, sk=%p, received a packet when terminating!\n", dccp_role(sk), sk); + return; + } + + /* we are only interested in ACKs */ + if (!(DCCP_SKB_CB(skb)->dccpd_type == DCCP_PKT_ACK || + DCCP_SKB_CB(skb)->dccpd_type == DCCP_PKT_DATAACK)) + return; + + opt_recv = &hctx->ccid3hctx_options_received; + + t_elapsed = dp->dccps_options_received.dccpor_elapsed_time; + x_recv = opt_recv->ccid3or_receive_rate; + pinv = opt_recv->ccid3or_loss_event_rate; + + switch (hctx->ccid3hctx_state) { + case TFRC_SSTATE_NO_SENT: + /* FIXME: what to do here? */ + return; + case TFRC_SSTATE_NO_FBACK: + case TFRC_SSTATE_FBACK: + /* Calculate new round trip sample by + * R_sample = (now - t_recvdata) - t_delay */ + /* get t_recvdata from history */ + packet = NULL; + list_for_each_entry_safe(entry, next, &hctx->ccid3hctx_hist, ccid3htx_node) + if (entry->ccid3htx_seqno == DCCP_SKB_CB(skb)->dccpd_ack_seq) { + packet = entry; + break; + } + + if (packet == NULL) { + ccid3_pr_debug("%s, sk=%p, seqno %llu(%s) does't exist in history!\n", + dccp_role(sk), sk, DCCP_SKB_CB(skb)->dccpd_ack_seq, + dccp_packet_name(DCCP_SKB_CB(skb)->dccpd_type)); + return; + } + + /* Update RTT */ + r_sample = now_delta(packet->ccid3htx_tstamp); + /* FIXME: */ + // r_sample -= usecs_to_jiffies(t_elapsed * 10); + + /* Update RTT estimate by + * If (No feedback recv) + * R = R_sample; + * Else + * R = q * R + (1 - q) * R_sample; + * + * q is a constant, RFC 3448 recomments 0.9 + */ + if (hctx->ccid3hctx_state == TFRC_SSTATE_NO_FBACK) { + ccid3_hc_tx_set_state(sk, TFRC_SSTATE_FBACK); + hctx->ccid3hctx_rtt = r_sample; + } else + hctx->ccid3hctx_rtt = (hctx->ccid3hctx_rtt * 9) / 10 + r_sample / 10; + + /* + * XXX: this is to avoid a division by zero in ccid3_hc_tx_packet_sent + * implemention of the new window count. + */ + if (hctx->ccid3hctx_rtt < 4) + hctx->ccid3hctx_rtt = 4; + + ccid3_pr_debug("%s, sk=%p, New RTT estimate=%uus, r_sample=%us\n", + dccp_role(sk), sk, + hctx->ccid3hctx_rtt, + r_sample); + + /* Update timeout interval */ + inet_csk(sk)->icsk_rto = max_t(u32, 4 * hctx->ccid3hctx_rtt, USEC_IN_SEC); + + /* Update receive rate */ + hctx->ccid3hctx_x_recv = x_recv; /* x_recv in bytes per second */ + + /* Update loss event rate */ + if (pinv == ~0 || pinv == 0) + hctx->ccid3hctx_p = 0; + else { + hctx->ccid3hctx_p = 1000000 / pinv; + + if (hctx->ccid3hctx_p < TFRC_SMALLEST_P) { + hctx->ccid3hctx_p = TFRC_SMALLEST_P; + ccid3_pr_debug("%s, sk=%p, Smallest p used!\n", dccp_role(sk), sk); + } + } + + /* unschedule no feedback timer */ + sk_stop_timer(sk, &hctx->ccid3hctx_no_feedback_timer); + + /* Update sending rate */ + ccid3_hc_tx_update_x(sk); + + /* Update next send time */ + if (hctx->ccid3hctx_t_ipi > (hctx->ccid3hctx_t_nom).tv_usec) { + (hctx->ccid3hctx_t_nom).tv_usec += USEC_IN_SEC; + (hctx->ccid3hctx_t_nom).tv_sec--; + } + /* FIXME - if no feedback then t_ipi can go > 1 second */ + (hctx->ccid3hctx_t_nom).tv_usec -= hctx->ccid3hctx_t_ipi; + ccid3_calc_new_t_ipi(hctx); + (hctx->ccid3hctx_t_nom).tv_usec += hctx->ccid3hctx_t_ipi; + timeval_fix(&(hctx->ccid3hctx_t_nom)); + ccid3_calc_new_delta(hctx); + + /* remove all packets older than the one acked from history */ +#if 0 + FIXME! + list_for_each_entry_safe_continue(entry, next, &hctx->ccid3hctx_hist, ccid3htx_node) { + list_del_init(&entry->ccid3htx_node); + ccid3_tx_hist_entry_delete(entry); + } +#endif + if (hctx->ccid3hctx_x < 10) { + ccid3_pr_debug("ccid3_hc_tx_packet_recv hctx->ccid3hctx_x < 10\n"); + hctx->ccid3hctx_x = 10; + } + /* to prevent divide by zero below */ + + /* Schedule no feedback timer to expire in max(4 * R, 2 * s / X) */ + next_tmout = max(inet_csk(sk)->icsk_rto, + 2 * (hctx->ccid3hctx_s * 100000) / (hctx->ccid3hctx_x/10)); + /* maths with 100000 and 10 is to prevent overflow with 32 bit */ + + ccid3_pr_debug("%s, sk=%p, Scheduled no feedback timer to expire in %lu jiffies (%luus)\n", + dccp_role(sk), sk, usecs_to_jiffies(next_tmout), next_tmout); + + sk_reset_timer(sk, &hctx->ccid3hctx_no_feedback_timer, + jiffies + max_t(u32,1,usecs_to_jiffies(next_tmout))); + + /* set idle flag */ + hctx->ccid3hctx_idle = 1; + break; + default: + printk(KERN_CRIT "%s: %s, sk=%p, Illegal state (%d)!\n", + __FUNCTION__, dccp_role(sk), sk, hctx->ccid3hctx_state); + dump_stack(); + break; + } +} + +static void ccid3_hc_tx_insert_options(struct sock *sk, struct sk_buff *skb) +{ + const struct dccp_sock *dp = dccp_sk(sk); + struct ccid3_hc_tx_sock *hctx = dp->dccps_hc_tx_ccid_private; + + if (hctx == NULL || !(sk->sk_state == DCCP_OPEN || sk->sk_state == DCCP_PARTOPEN)) + return; + + DCCP_SKB_CB(skb)->dccpd_ccval = hctx->ccid3hctx_last_win_count; +} + +static int ccid3_hc_tx_parse_options(struct sock *sk, unsigned char option, + unsigned char len, u16 idx, unsigned char *value) +{ + int rc = 0; + struct dccp_sock *dp = dccp_sk(sk); + struct ccid3_hc_tx_sock *hctx = dp->dccps_hc_tx_ccid_private; + struct ccid3_options_received *opt_recv; + + if (hctx == NULL) + return 0; + + opt_recv = &hctx->ccid3hctx_options_received; + + if (opt_recv->ccid3or_seqno != dp->dccps_gsr) { + opt_recv->ccid3or_seqno = dp->dccps_gsr; + opt_recv->ccid3or_loss_event_rate = ~0; + opt_recv->ccid3or_loss_intervals_idx = 0; + opt_recv->ccid3or_loss_intervals_len = 0; + opt_recv->ccid3or_receive_rate = 0; + } + + switch (option) { + case TFRC_OPT_LOSS_EVENT_RATE: + if (len != 4) { + ccid3_pr_debug("%s, sk=%p, invalid len for TFRC_OPT_LOSS_EVENT_RATE\n", + dccp_role(sk), sk); + rc = -EINVAL; + } else { + opt_recv->ccid3or_loss_event_rate = ntohl(*(u32 *)value); + ccid3_pr_debug("%s, sk=%p, LOSS_EVENT_RATE=%u\n", + dccp_role(sk), sk, + opt_recv->ccid3or_loss_event_rate); + } + break; + case TFRC_OPT_LOSS_INTERVALS: + opt_recv->ccid3or_loss_intervals_idx = idx; + opt_recv->ccid3or_loss_intervals_len = len; + ccid3_pr_debug("%s, sk=%p, LOSS_INTERVALS=(%u, %u)\n", + dccp_role(sk), sk, + opt_recv->ccid3or_loss_intervals_idx, + opt_recv->ccid3or_loss_intervals_len); + break; + case TFRC_OPT_RECEIVE_RATE: + if (len != 4) { + ccid3_pr_debug("%s, sk=%p, invalid len for TFRC_OPT_RECEIVE_RATE\n", + dccp_role(sk), sk); + rc = -EINVAL; + } else { + opt_recv->ccid3or_receive_rate = ntohl(*(u32 *)value); + ccid3_pr_debug("%s, sk=%p, RECEIVE_RATE=%u\n", + dccp_role(sk), sk, + opt_recv->ccid3or_receive_rate); + } + break; + } + + return rc; +} + +static int ccid3_hc_tx_init(struct sock *sk) +{ + struct dccp_sock *dp = dccp_sk(sk); + struct ccid3_hc_tx_sock *hctx; + + ccid3_pr_debug("%s, sk=%p\n", dccp_role(sk), sk); + + hctx = dp->dccps_hc_tx_ccid_private = kmalloc(sizeof(*hctx), gfp_any()); + if (hctx == NULL) + return -ENOMEM; + + memset(hctx, 0, sizeof(*hctx)); + + if (dp->dccps_avg_packet_size >= TFRC_MIN_PACKET_SIZE && + dp->dccps_avg_packet_size <= TFRC_MAX_PACKET_SIZE) + hctx->ccid3hctx_s = (u16)dp->dccps_avg_packet_size; + else + hctx->ccid3hctx_s = TFRC_STD_PACKET_SIZE; + + hctx->ccid3hctx_x = hctx->ccid3hctx_s; /* set transmission rate to 1 packet per second */ + hctx->ccid3hctx_rtt = 4; /* See ccid3_hc_tx_packet_sent win_count calculatation */ + inet_csk(sk)->icsk_rto = USEC_IN_SEC; + hctx->ccid3hctx_state = TFRC_SSTATE_NO_SENT; + INIT_LIST_HEAD(&hctx->ccid3hctx_hist); + init_timer(&hctx->ccid3hctx_no_feedback_timer); + + return 0; +} + +static void ccid3_hc_tx_exit(struct sock *sk) +{ + struct dccp_sock *dp = dccp_sk(sk); + struct ccid3_hc_tx_sock *hctx = dp->dccps_hc_tx_ccid_private; + struct ccid3_tx_hist_entry *entry, *next; + + ccid3_pr_debug("%s, sk=%p\n", dccp_role(sk), sk); + BUG_ON(hctx == NULL); + + ccid3_hc_tx_set_state(sk, TFRC_SSTATE_TERM); + sk_stop_timer(sk, &hctx->ccid3hctx_no_feedback_timer); + + /* Empty packet history */ + list_for_each_entry_safe(entry, next, &hctx->ccid3hctx_hist, ccid3htx_node) { + list_del_init(&entry->ccid3htx_node); + ccid3_tx_hist_entry_delete(entry); + } + + kfree(dp->dccps_hc_tx_ccid_private); + dp->dccps_hc_tx_ccid_private = NULL; +} + +/* + * RX Half Connection methods + */ + +/* TFRC receiver states */ +enum ccid3_hc_rx_states { + TFRC_RSTATE_NO_DATA = 1, + TFRC_RSTATE_DATA, + TFRC_RSTATE_TERM = 127, +}; + +#ifdef CCID3_DEBUG +static const char *ccid3_rx_state_name(enum ccid3_hc_rx_states state) +{ + static char *ccid3_rx_state_names[] = { + [TFRC_RSTATE_NO_DATA] = "NO_DATA", + [TFRC_RSTATE_DATA] = "DATA", + [TFRC_RSTATE_TERM] = "TERM", + }; + + return ccid3_rx_state_names[state]; +} +#endif + +static inline void ccid3_hc_rx_set_state(struct sock *sk, enum ccid3_hc_rx_states state) +{ + struct dccp_sock *dp = dccp_sk(sk); + struct ccid3_hc_rx_sock *hcrx = dp->dccps_hc_rx_ccid_private; + enum ccid3_hc_rx_states oldstate = hcrx->ccid3hcrx_state; + + ccid3_pr_debug("%s(%p) %-8.8s -> %s\n", + dccp_role(sk), sk, ccid3_rx_state_name(oldstate), ccid3_rx_state_name(state)); + WARN_ON(state == oldstate); + hcrx->ccid3hcrx_state = state; +} + +static int ccid3_hc_rx_add_hist(struct sock *sk, struct ccid3_rx_hist_entry *packet) +{ + struct dccp_sock *dp = dccp_sk(sk); + struct ccid3_hc_rx_sock *hcrx = dp->dccps_hc_rx_ccid_private; + struct ccid3_rx_hist_entry *entry, *next; + u8 num_later = 0; + + if (list_empty(&hcrx->ccid3hcrx_hist)) + list_add(&packet->ccid3hrx_node, &hcrx->ccid3hcrx_hist); + else { + u64 seqno = packet->ccid3hrx_seqno; + struct ccid3_rx_hist_entry *iter = list_entry(hcrx->ccid3hcrx_hist.next, + struct ccid3_rx_hist_entry, + ccid3hrx_node); + if (after48(seqno, iter->ccid3hrx_seqno)) + list_add(&packet->ccid3hrx_node, &hcrx->ccid3hcrx_hist); + else { + if (iter->ccid3hrx_type == DCCP_PKT_DATA || + iter->ccid3hrx_type == DCCP_PKT_DATAACK) + num_later = 1; + + list_for_each_entry_continue(iter, &hcrx->ccid3hcrx_hist, ccid3hrx_node) { + if (after48(seqno, iter->ccid3hrx_seqno)) { + list_add(&packet->ccid3hrx_node, &iter->ccid3hrx_node); + goto trim_history; + } + + if (iter->ccid3hrx_type == DCCP_PKT_DATA || + iter->ccid3hrx_type == DCCP_PKT_DATAACK) + num_later++; + + if (num_later == TFRC_RECV_NUM_LATE_LOSS) { + ccid3_rx_hist_entry_delete(packet); + ccid3_pr_debug("%s, sk=%p, packet(%llu) already lost!\n", + dccp_role(sk), sk, seqno); + return 1; + } + } + + if (num_later < TFRC_RECV_NUM_LATE_LOSS) + list_add_tail(&packet->ccid3hrx_node, &hcrx->ccid3hcrx_hist); + /* FIXME: else what? should we destroy the packet like above? */ + } + } + +trim_history: + /* Trim history (remove all packets after the NUM_LATE_LOSS + 1 data packets) */ + num_later = TFRC_RECV_NUM_LATE_LOSS + 1; + + if (!list_empty(&hcrx->ccid3hcrx_loss_interval_hist)) { + list_for_each_entry_safe(entry, next, &hcrx->ccid3hcrx_hist, ccid3hrx_node) { + if (num_later == 0) { + list_del_init(&entry->ccid3hrx_node); + ccid3_rx_hist_entry_delete(entry); + } else if (entry->ccid3hrx_type == DCCP_PKT_DATA || + entry->ccid3hrx_type == DCCP_PKT_DATAACK) + --num_later; + } + } else { + int step = 0; + u8 win_count = 0; /* Not needed, but lets shut up gcc */ + int tmp; + /* + * We have no loss interval history so we need at least one + * rtt:s of data packets to approximate rtt. + */ + list_for_each_entry_safe(entry, next, &hcrx->ccid3hcrx_hist, ccid3hrx_node) { + if (num_later == 0) { + switch (step) { + case 0: + step = 1; + /* OK, find next data packet */ + num_later = 1; + break; + case 1: + step = 2; + /* OK, find next data packet */ + num_later = 1; + win_count = entry->ccid3hrx_win_count; + break; + case 2: + tmp = win_count - entry->ccid3hrx_win_count; + if (tmp < 0) + tmp += TFRC_WIN_COUNT_LIMIT; + if (tmp > TFRC_WIN_COUNT_PER_RTT + 1) { + /* we have found a packet older than one rtt + * remove the rest */ + step = 3; + } else /* OK, find next data packet */ + num_later = 1; + break; + case 3: + list_del_init(&entry->ccid3hrx_node); + ccid3_rx_hist_entry_delete(entry); + break; + } + } else if (entry->ccid3hrx_type == DCCP_PKT_DATA || + entry->ccid3hrx_type == DCCP_PKT_DATAACK) + --num_later; + } + } + + return 0; +} + +static void ccid3_hc_rx_send_feedback(struct sock *sk) +{ + struct dccp_sock *dp = dccp_sk(sk); + struct ccid3_hc_rx_sock *hcrx = dp->dccps_hc_rx_ccid_private; + struct ccid3_rx_hist_entry *entry, *packet; + + ccid3_pr_debug("%s, sk=%p\n", dccp_role(sk), sk); + + switch (hcrx->ccid3hcrx_state) { + case TFRC_RSTATE_NO_DATA: + hcrx->ccid3hcrx_x_recv = 0; + break; + case TFRC_RSTATE_DATA: { + u32 delta = now_delta(hcrx->ccid3hcrx_tstamp_last_feedback); + + if (delta == 0) + delta = 1; /* to prevent divide by zero */ + hcrx->ccid3hcrx_x_recv = (hcrx->ccid3hcrx_bytes_recv * USEC_IN_SEC) / delta; + } + break; + default: + printk(KERN_CRIT "%s: %s, sk=%p, Illegal state (%d)!\n", + __FUNCTION__, dccp_role(sk), sk, hcrx->ccid3hcrx_state); + dump_stack(); + return; + } + + packet = NULL; + list_for_each_entry(entry, &hcrx->ccid3hcrx_hist, ccid3hrx_node) + if (entry->ccid3hrx_type == DCCP_PKT_DATA || + entry->ccid3hrx_type == DCCP_PKT_DATAACK) { + packet = entry; + break; + } + + if (packet == NULL) { + printk(KERN_CRIT "%s: %s, sk=%p, no data packet in history!\n", + __FUNCTION__, dccp_role(sk), sk); + dump_stack(); + return; + } + + do_gettimeofday(&(hcrx->ccid3hcrx_tstamp_last_feedback)); + hcrx->ccid3hcrx_last_counter = packet->ccid3hrx_win_count; + hcrx->ccid3hcrx_seqno_last_counter = packet->ccid3hrx_seqno; + hcrx->ccid3hcrx_bytes_recv = 0; + + /* Convert to multiples of 10us */ + hcrx->ccid3hcrx_elapsed_time = now_delta(packet->ccid3hrx_tstamp) / 10; + if (hcrx->ccid3hcrx_p == 0) + hcrx->ccid3hcrx_pinv = ~0; + else + hcrx->ccid3hcrx_pinv = 1000000 / hcrx->ccid3hcrx_p; + dccp_send_ack(sk); +} + +static void ccid3_hc_rx_insert_options(struct sock *sk, struct sk_buff *skb) +{ + const struct dccp_sock *dp = dccp_sk(sk); + struct ccid3_hc_rx_sock *hcrx = dp->dccps_hc_rx_ccid_private; + + if (hcrx == NULL || !(sk->sk_state == DCCP_OPEN || sk->sk_state == DCCP_PARTOPEN)) + return; + + if (hcrx->ccid3hcrx_elapsed_time != 0 && !dccp_packet_without_ack(skb)) + dccp_insert_option_elapsed_time(sk, skb, hcrx->ccid3hcrx_elapsed_time); + + if (DCCP_SKB_CB(skb)->dccpd_type != DCCP_PKT_DATA) { + const u32 x_recv = htonl(hcrx->ccid3hcrx_x_recv); + const u32 pinv = htonl(hcrx->ccid3hcrx_pinv); + + dccp_insert_option(sk, skb, TFRC_OPT_LOSS_EVENT_RATE, &pinv, sizeof(pinv)); + dccp_insert_option(sk, skb, TFRC_OPT_RECEIVE_RATE, &x_recv, sizeof(x_recv)); + } + + DCCP_SKB_CB(skb)->dccpd_ccval = hcrx->ccid3hcrx_last_counter; +} + +/* Weights used to calculate loss event rate */ +/* + * These are integers as per section 8 of RFC3448. We can then divide by 4 * + * when we use it. + */ +const int ccid3_hc_rx_w[TFRC_RECV_IVAL_F_LENGTH] = { 4, 4, 4, 4, 3, 2, 1, 1, }; + +/* + * args: fvalue - function value to match + * returns: p closest to that value + * + * both fvalue and p are multiplied by 1,000,000 to use ints + */ +u32 calcx_reverse_lookup(u32 fvalue) { + int ctr = 0; + int small; + + if (fvalue < calcx_lookup[0][1]) + return 0; + if (fvalue <= calcx_lookup[CALCX_ARRSIZE-1][1]) + small = 1; + else if (fvalue > calcx_lookup[CALCX_ARRSIZE-1][0]) + return 1000000; + else + small = 0; + while (fvalue > calcx_lookup[ctr][small]) + ctr++; + if (small) + return (CALCX_SPLIT * ctr / CALCX_ARRSIZE); + else + return (1000000 * ctr / CALCX_ARRSIZE) ; +} + +/* calculate first loss interval + * + * returns estimated loss interval in usecs */ + +static u32 ccid3_hc_rx_calc_first_li(struct sock *sk) +{ + struct dccp_sock *dp = dccp_sk(sk); + struct ccid3_hc_rx_sock *hcrx = dp->dccps_hc_rx_ccid_private; + struct ccid3_rx_hist_entry *entry, *next, *tail = NULL; + u32 rtt, delta, x_recv, fval, p, tmp2; + struct timeval tstamp, tmp_tv; + int interval = 0; + int win_count = 0; + int step = 0; + u64 tmp1; + + list_for_each_entry_safe(entry, next, &hcrx->ccid3hcrx_hist, ccid3hrx_node) { + if (entry->ccid3hrx_type == DCCP_PKT_DATA || + entry->ccid3hrx_type == DCCP_PKT_DATAACK) { + tail = entry; + + switch (step) { + case 0: + tstamp = entry->ccid3hrx_tstamp; + win_count = entry->ccid3hrx_win_count; + step = 1; + break; + case 1: + interval = win_count - entry->ccid3hrx_win_count; + if (interval < 0) + interval += TFRC_WIN_COUNT_LIMIT; + if (interval > 4) + goto found; + break; + } + } + } + + if (step == 0) { + printk(KERN_CRIT "%s: %s, sk=%p, packet history contains no data packets!\n", + __FUNCTION__, dccp_role(sk), sk); + return ~0; + } + + if (interval == 0) { + ccid3_pr_debug("%s, sk=%p, Could not find a win_count interval > 0. Defaulting to 1\n", + dccp_role(sk), sk); + interval = 1; + } +found: + timeval_sub(tstamp,tail->ccid3hrx_tstamp,&tmp_tv); + rtt = (tmp_tv.tv_sec * USEC_IN_SEC + tmp_tv.tv_usec) * 4 / interval; + ccid3_pr_debug("%s, sk=%p, approximated RTT to %uus\n", + dccp_role(sk), sk, rtt); + if (rtt == 0) + rtt = 1; + + delta = now_delta(hcrx->ccid3hcrx_tstamp_last_feedback); + if (delta == 0) + delta = 1; + + x_recv = (hcrx->ccid3hcrx_bytes_recv * USEC_IN_SEC) / delta; + + tmp1 = (u64)x_recv * (u64)rtt; + do_div(tmp1,10000000); + tmp2 = (u32)tmp1; + fval = (hcrx->ccid3hcrx_s * 100000) / tmp2; + /* do not alter order above or you will get overflow on 32 bit */ + p = calcx_reverse_lookup(fval); + ccid3_pr_debug("%s, sk=%p, receive rate=%u bytes/s, implied loss rate=%u\n",\ + dccp_role(sk), sk, x_recv, p); + + if (p == 0) + return ~0; + else + return 1000000 / p; +} + +static void ccid3_hc_rx_update_li(struct sock *sk, u64 seq_loss, u8 win_loss) +{ + struct dccp_sock *dp = dccp_sk(sk); + struct ccid3_hc_rx_sock *hcrx = dp->dccps_hc_rx_ccid_private; + struct ccid3_loss_interval_hist_entry *li_entry; + + if (seq_loss != DCCP_MAX_SEQNO + 1) { + ccid3_pr_debug("%s, sk=%p, seq_loss=%llu, win_loss=%u, packet loss detected\n", + dccp_role(sk), sk, seq_loss, win_loss); + + if (list_empty(&hcrx->ccid3hcrx_loss_interval_hist)) { + struct ccid3_loss_interval_hist_entry *li_tail = NULL; + int i; + + ccid3_pr_debug("%s, sk=%p, first loss event detected, creating history\n", dccp_role(sk), sk); + for (i = 0; i <= TFRC_RECV_IVAL_F_LENGTH; ++i) { + li_entry = ccid3_loss_interval_hist_entry_new(SLAB_ATOMIC); + if (li_entry == NULL) { + ccid3_loss_interval_history_delete(&hcrx->ccid3hcrx_loss_interval_hist); + ccid3_pr_debug("%s, sk=%p, not enough mem for creating history\n", + dccp_role(sk), sk); + return; + } + if (li_tail == NULL) + li_tail = li_entry; + list_add(&li_entry->ccid3lih_node, &hcrx->ccid3hcrx_loss_interval_hist); + } + + li_entry->ccid3lih_seqno = seq_loss; + li_entry->ccid3lih_win_count = win_loss; + + li_tail->ccid3lih_interval = ccid3_hc_rx_calc_first_li(sk); + } + } + /* FIXME: find end of interval */ +} + +static void ccid3_hc_rx_detect_loss(struct sock *sk) +{ + struct dccp_sock *dp = dccp_sk(sk); + struct ccid3_hc_rx_sock *hcrx = dp->dccps_hc_rx_ccid_private; + struct ccid3_rx_hist_entry *entry, *a_next, *b_next, *packet; + struct ccid3_rx_hist_entry *a_loss = NULL; + struct ccid3_rx_hist_entry *b_loss = NULL; + u64 seq_loss = DCCP_MAX_SEQNO + 1; + u8 win_loss = 0; + u8 num_later = TFRC_RECV_NUM_LATE_LOSS; + + list_for_each_entry_safe(entry, b_next, &hcrx->ccid3hcrx_hist, ccid3hrx_node) { + if (num_later == 0) { + b_loss = entry; + break; + } else if (entry->ccid3hrx_type == DCCP_PKT_DATA || + entry->ccid3hrx_type == DCCP_PKT_DATAACK) + --num_later; + } + + if (b_loss == NULL) + goto out_update_li; + + a_next = b_next; + num_later = 1; +#if 0 + FIXME MERGE GIT! + list_for_each_entry_safe_continue(entry, a_next, &hcrx->ccid3hcrx_hist, ccid3hrx_node) { + if (num_later == 0) { + a_loss = entry; + break; + } else if (entry->ccid3hrx_type == DCCP_PKT_DATA || + entry->ccid3hrx_type == DCCP_PKT_DATAACK) + --num_later; + } +#endif + + if (a_loss == NULL) { + if (list_empty(&hcrx->ccid3hcrx_loss_interval_hist)) { + /* no loss event have occured yet */ + ccid3_pr_debug("%s, sk=%p, TODO: find a lost data " + "packet by comparing to initial seqno\n", + dccp_role(sk), sk); + goto out_update_li; + } else { + pr_info("%s: %s, sk=%p, ERROR! Less than 4 data packets in history", + __FUNCTION__, dccp_role(sk), sk); + return; + } + } + + /* Locate a lost data packet */ + entry = packet = b_loss; +#if 0 + FIXME MERGE GIT! + list_for_each_entry_safe_continue(entry, b_next, &hcrx->ccid3hcrx_hist, ccid3hrx_node) { + u64 delta = dccp_delta_seqno(entry->ccid3hrx_seqno, packet->ccid3hrx_seqno); + + if (delta != 0) { + if (packet->ccid3hrx_type == DCCP_PKT_DATA || + packet->ccid3hrx_type == DCCP_PKT_DATAACK) + --delta; + /* + * FIXME: check this, probably this % usage is because + * in earlier drafts the ndp count was just 8 bits + * long, but now it cam be up to 24 bits long. + */ +#if 0 + if (delta % DCCP_NDP_LIMIT != + (packet->ccid3hrx_ndp - entry->ccid3hrx_ndp) % DCCP_NDP_LIMIT) +#endif + if (delta != packet->ccid3hrx_ndp - entry->ccid3hrx_ndp) { + seq_loss = entry->ccid3hrx_seqno; + dccp_inc_seqno(&seq_loss); + } + } + packet = entry; + if (packet == a_loss) + break; + } +#endif + + if (seq_loss != DCCP_MAX_SEQNO + 1) + win_loss = a_loss->ccid3hrx_win_count; + +out_update_li: + ccid3_hc_rx_update_li(sk, seq_loss, win_loss); +} + +static u32 ccid3_hc_rx_calc_i_mean(struct sock *sk) +{ + struct dccp_sock *dp = dccp_sk(sk); + struct ccid3_hc_rx_sock *hcrx = dp->dccps_hc_rx_ccid_private; + struct ccid3_loss_interval_hist_entry *li_entry, *li_next; + int i = 0; + u32 i_tot; + u32 i_tot0 = 0; + u32 i_tot1 = 0; + u32 w_tot = 0; + + list_for_each_entry_safe(li_entry, li_next, &hcrx->ccid3hcrx_loss_interval_hist, ccid3lih_node) { + if (i < TFRC_RECV_IVAL_F_LENGTH) { + i_tot0 += li_entry->ccid3lih_interval * ccid3_hc_rx_w[i]; + w_tot += ccid3_hc_rx_w[i]; + } + + if (i != 0) + i_tot1 += li_entry->ccid3lih_interval * ccid3_hc_rx_w[i - 1]; + + if (++i > TFRC_RECV_IVAL_F_LENGTH) + break; + } + + if (i != TFRC_RECV_IVAL_F_LENGTH) { + pr_info("%s: %s, sk=%p, ERROR! Missing entry in interval history!\n", + __FUNCTION__, dccp_role(sk), sk); + return 0; + } + + i_tot = max(i_tot0, i_tot1); + + /* FIXME: Why do we do this? -Ian McDonald */ + if (i_tot * 4 < w_tot) + i_tot = w_tot * 4; + + return i_tot * 4 / w_tot; +} + +static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb) +{ + struct dccp_sock *dp = dccp_sk(sk); + struct ccid3_hc_rx_sock *hcrx = dp->dccps_hc_rx_ccid_private; + struct ccid3_rx_hist_entry *packet; + struct timeval now; + u8 win_count; + u32 p_prev; + int ins; +#if 0 + ccid3_pr_debug("%s, sk=%p(%s), skb=%p(%s)\n", + dccp_role(sk), sk, dccp_state_name(sk->sk_state), + skb, dccp_packet_name(DCCP_SKB_CB(skb)->dccpd_type)); +#endif + if (hcrx == NULL) + return; + + BUG_ON(!(hcrx->ccid3hcrx_state == TFRC_RSTATE_NO_DATA || + hcrx->ccid3hcrx_state == TFRC_RSTATE_DATA)); + + switch (DCCP_SKB_CB(skb)->dccpd_type) { + case DCCP_PKT_ACK: + if (hcrx->ccid3hcrx_state == TFRC_RSTATE_NO_DATA) + return; + case DCCP_PKT_DATAACK: + if (dp->dccps_options_received.dccpor_timestamp_echo == 0) + break; + p_prev = hcrx->ccid3hcrx_rtt; + do_gettimeofday(&now); + /* hcrx->ccid3hcrx_rtt = now - dp->dccps_options_received.dccpor_timestamp_echo - + usecs_to_jiffies(dp->dccps_options_received.dccpor_elapsed_time * 10); + FIXME - I think above code is broken - have to look at options more, will also need + to fix pr_debug below */ + if (p_prev != hcrx->ccid3hcrx_rtt) + ccid3_pr_debug("%s, sk=%p, New RTT estimate=%lu jiffies, tstamp_echo=%u, elapsed time=%u\n", + dccp_role(sk), sk, hcrx->ccid3hcrx_rtt, + dp->dccps_options_received.dccpor_timestamp_echo, + dp->dccps_options_received.dccpor_elapsed_time); + break; + case DCCP_PKT_DATA: + break; + default: + ccid3_pr_debug("%s, sk=%p, not DATA/DATAACK/ACK packet(%s)\n", + dccp_role(sk), sk, + dccp_packet_name(DCCP_SKB_CB(skb)->dccpd_type)); + return; + } + + packet = ccid3_rx_hist_entry_new(sk, skb, SLAB_ATOMIC); + if (packet == NULL) { + ccid3_pr_debug("%s, sk=%p, Not enough mem to add rx packet to history (consider it lost)!", + dccp_role(sk), sk); + return; + } + + win_count = packet->ccid3hrx_win_count; + + ins = ccid3_hc_rx_add_hist(sk, packet); + + if (DCCP_SKB_CB(skb)->dccpd_type == DCCP_PKT_ACK) + return; + + switch (hcrx->ccid3hcrx_state) { + case TFRC_RSTATE_NO_DATA: + ccid3_pr_debug("%s, sk=%p(%s), skb=%p, sending initial feedback\n", + dccp_role(sk), sk, dccp_state_name(sk->sk_state), skb); + ccid3_hc_rx_send_feedback(sk); + ccid3_hc_rx_set_state(sk, TFRC_RSTATE_DATA); + return; + case TFRC_RSTATE_DATA: + hcrx->ccid3hcrx_bytes_recv += skb->len - dccp_hdr(skb)->dccph_doff * 4; + if (ins == 0) { + do_gettimeofday(&now); + if ((now_delta(hcrx->ccid3hcrx_tstamp_last_ack)) >= hcrx->ccid3hcrx_rtt) { + hcrx->ccid3hcrx_tstamp_last_ack = now; + ccid3_hc_rx_send_feedback(sk); + } + return; + } + break; + default: + printk(KERN_CRIT "%s: %s, sk=%p, Illegal state (%d)!\n", + __FUNCTION__, dccp_role(sk), sk, hcrx->ccid3hcrx_state); + dump_stack(); + return; + } + + /* Dealing with packet loss */ + ccid3_pr_debug("%s, sk=%p(%s), skb=%p, data loss! Reacting...\n", + dccp_role(sk), sk, dccp_state_name(sk->sk_state), skb); + + ccid3_hc_rx_detect_loss(sk); + p_prev = hcrx->ccid3hcrx_p; + + /* Calculate loss event rate */ + if (!list_empty(&hcrx->ccid3hcrx_loss_interval_hist)) + /* Scaling up by 1000000 as fixed decimal */ + hcrx->ccid3hcrx_p = 1000000 / ccid3_hc_rx_calc_i_mean(sk); + + if (hcrx->ccid3hcrx_p > p_prev) { + ccid3_hc_rx_send_feedback(sk); + return; + } +} + +static int ccid3_hc_rx_init(struct sock *sk) +{ + struct dccp_sock *dp = dccp_sk(sk); + struct ccid3_hc_rx_sock *hcrx; + + ccid3_pr_debug("%s, sk=%p\n", dccp_role(sk), sk); + + hcrx = dp->dccps_hc_rx_ccid_private = kmalloc(sizeof(*hcrx), gfp_any()); + if (hcrx == NULL) + return -ENOMEM; + + memset(hcrx, 0, sizeof(*hcrx)); + + if (dp->dccps_avg_packet_size >= TFRC_MIN_PACKET_SIZE && + dp->dccps_avg_packet_size <= TFRC_MAX_PACKET_SIZE) + hcrx->ccid3hcrx_s = (u16)dp->dccps_avg_packet_size; + else + hcrx->ccid3hcrx_s = TFRC_STD_PACKET_SIZE; + + hcrx->ccid3hcrx_state = TFRC_RSTATE_NO_DATA; + INIT_LIST_HEAD(&hcrx->ccid3hcrx_hist); + INIT_LIST_HEAD(&hcrx->ccid3hcrx_loss_interval_hist); + + return 0; +} + +static void ccid3_hc_rx_exit(struct sock *sk) +{ + struct dccp_sock *dp = dccp_sk(sk); + struct ccid3_hc_rx_sock *hcrx = dp->dccps_hc_rx_ccid_private; + + ccid3_pr_debug("%s, sk=%p\n", dccp_role(sk), sk); + + if (hcrx == NULL) + return; + + ccid3_hc_rx_set_state(sk, TFRC_RSTATE_TERM); + + /* Empty packet history */ + ccid3_rx_history_delete(&hcrx->ccid3hcrx_hist); + + /* Empty loss interval history */ + ccid3_loss_interval_history_delete(&hcrx->ccid3hcrx_loss_interval_hist); + + kfree(dp->dccps_hc_rx_ccid_private); + dp->dccps_hc_rx_ccid_private = NULL; +} + +static struct ccid ccid3 = { + .ccid_id = 3, + .ccid_name = "ccid3", + .ccid_owner = THIS_MODULE, + .ccid_init = ccid3_init, + .ccid_exit = ccid3_exit, + .ccid_hc_tx_init = ccid3_hc_tx_init, + .ccid_hc_tx_exit = ccid3_hc_tx_exit, + .ccid_hc_tx_send_packet = ccid3_hc_tx_send_packet, + .ccid_hc_tx_packet_sent = ccid3_hc_tx_packet_sent, + .ccid_hc_tx_packet_recv = ccid3_hc_tx_packet_recv, + .ccid_hc_tx_insert_options = ccid3_hc_tx_insert_options, + .ccid_hc_tx_parse_options = ccid3_hc_tx_parse_options, + .ccid_hc_rx_init = ccid3_hc_rx_init, + .ccid_hc_rx_exit = ccid3_hc_rx_exit, + .ccid_hc_rx_insert_options = ccid3_hc_rx_insert_options, + .ccid_hc_rx_packet_recv = ccid3_hc_rx_packet_recv, +}; + +module_param(ccid3_debug, int, 0444); +MODULE_PARM_DESC(ccid3_debug, "Enable debug messages"); + +static __init int ccid3_module_init(void) +{ + int rc = -ENOMEM; + + ccid3_tx_hist_slab = kmem_cache_create("dccp_ccid3_tx_history", + sizeof(struct ccid3_tx_hist_entry), 0, + SLAB_HWCACHE_ALIGN, NULL, NULL); + if (ccid3_tx_hist_slab == NULL) + goto out; + + ccid3_rx_hist_slab = kmem_cache_create("dccp_ccid3_rx_history", + sizeof(struct ccid3_rx_hist_entry), 0, + SLAB_HWCACHE_ALIGN, NULL, NULL); + if (ccid3_rx_hist_slab == NULL) + goto out_free_tx_history; + + ccid3_loss_interval_hist_slab = kmem_cache_create("dccp_ccid3_loss_interval_history", + sizeof(struct ccid3_loss_interval_hist_entry), 0, + SLAB_HWCACHE_ALIGN, NULL, NULL); + if (ccid3_loss_interval_hist_slab == NULL) + goto out_free_rx_history; + + rc = ccid_register(&ccid3); + if (rc != 0) + goto out_free_loss_interval_history; + +out: + return rc; +out_free_loss_interval_history: + kmem_cache_destroy(ccid3_loss_interval_hist_slab); + ccid3_loss_interval_hist_slab = NULL; +out_free_rx_history: + kmem_cache_destroy(ccid3_rx_hist_slab); + ccid3_rx_hist_slab = NULL; +out_free_tx_history: + kmem_cache_destroy(ccid3_tx_hist_slab); + ccid3_tx_hist_slab = NULL; + goto out; +} +module_init(ccid3_module_init); + +static __exit void ccid3_module_exit(void) +{ + ccid_unregister(&ccid3); + + if (ccid3_tx_hist_slab != NULL) { + kmem_cache_destroy(ccid3_tx_hist_slab); + ccid3_tx_hist_slab = NULL; + } + if (ccid3_rx_hist_slab != NULL) { + kmem_cache_destroy(ccid3_rx_hist_slab); + ccid3_rx_hist_slab = NULL; + } + if (ccid3_loss_interval_hist_slab != NULL) { + kmem_cache_destroy(ccid3_loss_interval_hist_slab); + ccid3_loss_interval_hist_slab = NULL; + } +} +module_exit(ccid3_module_exit); + +MODULE_AUTHOR("Ian McDonald & Arnaldo Carvalho de Melo "); +MODULE_DESCRIPTION("DCCP TFRC CCID3 CCID"); +MODULE_LICENSE("GPL"); +MODULE_ALIAS("net-dccp-ccid-3"); diff --git a/net/dccp/ccids/ccid3.h b/net/dccp/ccids/ccid3.h new file mode 100644 index 00000000000..5d6b623e64d --- /dev/null +++ b/net/dccp/ccids/ccid3.h @@ -0,0 +1,137 @@ +/* + * net/dccp/ccids/ccid3.h + * + * Copyright (c) 2005 The University of Waikato, Hamilton, New Zealand. + * + * An implementation of the DCCP protocol + * + * This code has been developed by the University of Waikato WAND + * research group. For further information please see http://www.wand.net.nz/ + * or e-mail Ian McDonald - iam4@cs.waikato.ac.nz + * + * This code also uses code from Lulea University, rereleased as GPL by its + * authors: + * Copyright (c) 2003 Nils-Erik Mattsson, Joacim Haggmark, Magnus Erixzon + * + * Changes to meet Linux coding standards, to make it meet latest ccid3 draft + * and to make it work as a loadable module in the DCCP stack written by + * Arnaldo Carvalho de Melo . + * + * Copyright (c) 2005 Arnaldo Carvalho de Melo + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ +#ifndef _DCCP_CCID3_H_ +#define _DCCP_CCID3_H_ + +#include +#include +#include + +struct ccid3_tx_hist_entry { + struct list_head ccid3htx_node; + u64 ccid3htx_seqno:48, + ccid3htx_win_count:8, + ccid3htx_sent:1; + struct timeval ccid3htx_tstamp; +}; + +struct ccid3_options_received { + u64 ccid3or_seqno:48, + ccid3or_loss_intervals_idx:16; + u16 ccid3or_loss_intervals_len; + u32 ccid3or_loss_event_rate; + u32 ccid3or_receive_rate; +}; + +/** struct ccid3_hc_tx_sock - CCID3 sender half connection congestion control block + * + * @ccid3hctx_state - Sender state + * @ccid3hctx_x - Current sending rate + * @ccid3hctx_x_recv - Receive rate + * @ccid3hctx_x_calc - Calculated send (?) rate + * @ccid3hctx_s - Packet size + * @ccid3hctx_rtt - Estimate of current round trip time in usecs + * @@ccid3hctx_p - Current loss event rate (0-1) scaled by 1000000 + * @ccid3hctx_last_win_count - Last window counter sent + * @ccid3hctx_t_last_win_count - Timestamp of earliest packet with last_win_count value sent + * @ccid3hctx_no_feedback_timer - Handle to no feedback timer + * @ccid3hctx_idle - FIXME + * @ccid3hctx_t_ld - Time last doubled during slow start + * @ccid3hctx_t_nom - Nominal send time of next packet + * @ccid3hctx_t_ipi - Interpacket (send) interval + * @ccid3hctx_delta - Send timer delta + * @ccid3hctx_hist - Packet history + */ +struct ccid3_hc_tx_sock { + u32 ccid3hctx_x; + u32 ccid3hctx_x_recv; + u32 ccid3hctx_x_calc; + u16 ccid3hctx_s; + u32 ccid3hctx_rtt; + u32 ccid3hctx_p; + u8 ccid3hctx_state; + u8 ccid3hctx_last_win_count; + u8 ccid3hctx_idle; + struct timeval ccid3hctx_t_last_win_count; + struct timer_list ccid3hctx_no_feedback_timer; + struct timeval ccid3hctx_t_ld; + struct timeval ccid3hctx_t_nom; + u32 ccid3hctx_t_ipi; + u32 ccid3hctx_delta; + struct list_head ccid3hctx_hist; + struct ccid3_options_received ccid3hctx_options_received; +}; + +struct ccid3_loss_interval_hist_entry { + struct list_head ccid3lih_node; + u64 ccid3lih_seqno:48, + ccid3lih_win_count:4; + u32 ccid3lih_interval; +}; + +struct ccid3_rx_hist_entry { + struct list_head ccid3hrx_node; + u64 ccid3hrx_seqno:48, + ccid3hrx_win_count:4, + ccid3hrx_type:4; + u32 ccid3hrx_ndp; /* In fact it is from 8 to 24 bits */ + struct timeval ccid3hrx_tstamp; +}; + +struct ccid3_hc_rx_sock { + u64 ccid3hcrx_seqno_last_counter:48, + ccid3hcrx_state:8, + ccid3hcrx_last_counter:4; + unsigned long ccid3hcrx_rtt; + u32 ccid3hcrx_p; + u32 ccid3hcrx_bytes_recv; + struct timeval ccid3hcrx_tstamp_last_feedback; + struct timeval ccid3hcrx_tstamp_last_ack; + struct list_head ccid3hcrx_hist; + struct list_head ccid3hcrx_loss_interval_hist; + u16 ccid3hcrx_s; + u32 ccid3hcrx_pinv; + u32 ccid3hcrx_elapsed_time; + u32 ccid3hcrx_x_recv; +}; + +#define ccid3_hc_tx_field(s,field) (s->dccps_hc_tx_ccid_private == NULL ? 0 : \ + ((struct ccid3_hc_tx_sock *)s->dccps_hc_tx_ccid_private)->ccid3hctx_##field) + +#define ccid3_hc_rx_field(s,field) (s->dccps_hc_rx_ccid_private == NULL ? 0 : \ + ((struct ccid3_hc_rx_sock *)s->dccps_hc_rx_ccid_private)->ccid3hcrx_##field) + +#endif /* _DCCP_CCID3_H_ */ diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h new file mode 100644 index 00000000000..fb83454102c --- /dev/null +++ b/net/dccp/dccp.h @@ -0,0 +1,422 @@ +#ifndef _DCCP_H +#define _DCCP_H +/* + * net/dccp/dccp.h + * + * An implementation of the DCCP protocol + * Arnaldo Carvalho de Melo + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include + +#define DCCP_DEBUG + +#ifdef DCCP_DEBUG +extern int dccp_debug; + +#define dccp_pr_debug(format, a...) \ + do { if (dccp_debug) \ + printk(KERN_DEBUG "%s: " format, __FUNCTION__ , ##a); \ + } while (0) +#define dccp_pr_debug_cat(format, a...) do { if (dccp_debug) printk(format, ##a); } while (0) +#else +#define dccp_pr_debug(format, a...) +#define dccp_pr_debug_cat(format, a...) +#endif + +extern struct inet_hashinfo dccp_hashinfo; + +extern atomic_t dccp_orphan_count; +extern int dccp_tw_count; +extern void dccp_tw_deschedule(struct inet_timewait_sock *tw); + +extern void dccp_time_wait(struct sock *sk, int state, int timeo); + +/* FIXME: Right size this */ +#define DCCP_MAX_OPT_LEN 128 + +#define DCCP_MAX_PACKET_HDR 32 + +#define MAX_DCCP_HEADER (DCCP_MAX_PACKET_HDR + DCCP_MAX_OPT_LEN + MAX_HEADER) + +#define DCCP_TIMEWAIT_LEN (60 * HZ) /* how long to wait to destroy TIME-WAIT + * state, about 60 seconds */ + +/* draft-ietf-dccp-spec-11.txt initial RTO value */ +#define DCCP_TIMEOUT_INIT ((unsigned)(3 * HZ)) + +/* Maximal interval between probes for local resources. */ +#define DCCP_RESOURCE_PROBE_INTERVAL ((unsigned)(HZ / 2U)) + +#define DCCP_RTO_MAX ((unsigned)(120 * HZ)) /* FIXME: using TCP value */ + +extern struct proto dccp_v4_prot; + +/* is seq1 < seq2 ? */ +static inline const int before48(const u64 seq1, const u64 seq2) +{ + return (const s64)((seq1 << 16) - (seq2 << 16)) < 0; +} + +/* is seq1 > seq2 ? */ +static inline const int after48(const u64 seq1, const u64 seq2) +{ + return (const s64)((seq2 << 16) - (seq1 << 16)) < 0; +} + +/* is seq2 <= seq1 <= seq3 ? */ +static inline const int between48(const u64 seq1, const u64 seq2, const u64 seq3) +{ + return (seq3 << 16) - (seq2 << 16) >= (seq1 << 16) - (seq2 << 16); +} + +static inline u64 max48(const u64 seq1, const u64 seq2) +{ + return after48(seq1, seq2) ? seq1 : seq2; +} + +enum { + DCCP_MIB_NUM = 0, + DCCP_MIB_ACTIVEOPENS, /* ActiveOpens */ + DCCP_MIB_ESTABRESETS, /* EstabResets */ + DCCP_MIB_CURRESTAB, /* CurrEstab */ + DCCP_MIB_OUTSEGS, /* OutSegs */ + DCCP_MIB_OUTRSTS, + DCCP_MIB_ABORTONTIMEOUT, + DCCP_MIB_TIMEOUTS, + DCCP_MIB_ABORTFAILED, + DCCP_MIB_PASSIVEOPENS, + DCCP_MIB_ATTEMPTFAILS, + DCCP_MIB_OUTDATAGRAMS, + DCCP_MIB_INERRS, + DCCP_MIB_OPTMANDATORYERROR, + DCCP_MIB_INVALIDOPT, + __DCCP_MIB_MAX +}; + +#define DCCP_MIB_MAX __DCCP_MIB_MAX +struct dccp_mib { + unsigned long mibs[DCCP_MIB_MAX]; +} __SNMP_MIB_ALIGN__; + +DECLARE_SNMP_STAT(struct dccp_mib, dccp_statistics); +#define DCCP_INC_STATS(field) SNMP_INC_STATS(dccp_statistics, field) +#define DCCP_INC_STATS_BH(field) SNMP_INC_STATS_BH(dccp_statistics, field) +#define DCCP_INC_STATS_USER(field) SNMP_INC_STATS_USER(dccp_statistics, field) +#define DCCP_DEC_STATS(field) SNMP_DEC_STATS(dccp_statistics, field) +#define DCCP_ADD_STATS_BH(field, val) SNMP_ADD_STATS_BH(dccp_statistics, field, val) +#define DCCP_ADD_STATS_USER(field, val) SNMP_ADD_STATS_USER(dccp_statistics, field, val) + +extern int dccp_transmit_skb(struct sock *sk, struct sk_buff *skb); +extern int dccp_retransmit_skb(struct sock *sk, struct sk_buff *skb); + +extern int dccp_send_response(struct sock *sk); +extern void dccp_send_ack(struct sock *sk); +extern void dccp_send_delayed_ack(struct sock *sk); +extern void dccp_send_sync(struct sock *sk, u64 seq); + +extern void dccp_init_xmit_timers(struct sock *sk); +static inline void dccp_clear_xmit_timers(struct sock *sk) +{ + inet_csk_clear_xmit_timers(sk); +} + +extern unsigned int dccp_sync_mss(struct sock *sk, u32 pmtu); + +extern const char *dccp_packet_name(const int type); +extern const char *dccp_state_name(const int state); + +static inline void dccp_set_state(struct sock *sk, const int state) +{ + const int oldstate = sk->sk_state; + + dccp_pr_debug("%s(%p) %-10.10s -> %s\n", + dccp_role(sk), sk, + dccp_state_name(oldstate), dccp_state_name(state)); + WARN_ON(state == oldstate); + + switch (state) { + case DCCP_OPEN: + if (oldstate != DCCP_OPEN) + DCCP_INC_STATS(DCCP_MIB_CURRESTAB); + break; + + case DCCP_CLOSED: + if (oldstate == DCCP_CLOSING || oldstate == DCCP_OPEN) + DCCP_INC_STATS(DCCP_MIB_ESTABRESETS); + + sk->sk_prot->unhash(sk); + if (inet_csk(sk)->icsk_bind_hash != NULL && + !(sk->sk_userlocks & SOCK_BINDPORT_LOCK)) + inet_put_port(&dccp_hashinfo, sk); + /* fall through */ + default: + if (oldstate == DCCP_OPEN) + DCCP_DEC_STATS(DCCP_MIB_CURRESTAB); + } + + /* Change state AFTER socket is unhashed to avoid closed + * socket sitting in hash tables. + */ + sk->sk_state = state; +} + +static inline void dccp_done(struct sock *sk) +{ + dccp_set_state(sk, DCCP_CLOSED); + dccp_clear_xmit_timers(sk); + + sk->sk_shutdown = SHUTDOWN_MASK; + + if (!sock_flag(sk, SOCK_DEAD)) + sk->sk_state_change(sk); + else + inet_csk_destroy_sock(sk); +} + +static inline void dccp_openreq_init(struct request_sock *req, + struct dccp_sock *dp, + struct sk_buff *skb) +{ + /* + * FIXME: fill in the other req fields from the DCCP options + * received + */ + inet_rsk(req)->rmt_port = dccp_hdr(skb)->dccph_sport; + inet_rsk(req)->acked = 0; + req->rcv_wnd = 0; +} + +extern void dccp_v4_send_check(struct sock *sk, struct dccp_hdr *dh, int len, + struct sk_buff *skb); +extern int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb); + +extern struct sock *dccp_create_openreq_child(struct sock *sk, + const struct request_sock *req, + const struct sk_buff *skb); + +extern int dccp_v4_do_rcv(struct sock *sk, struct sk_buff *skb); + +extern void dccp_v4_err(struct sk_buff *skb, u32); + +extern int dccp_v4_rcv(struct sk_buff *skb); + +extern struct sock *dccp_v4_request_recv_sock(struct sock *sk, + struct sk_buff *skb, + struct request_sock *req, + struct dst_entry *dst); +extern struct sock *dccp_check_req(struct sock *sk, struct sk_buff *skb, + struct request_sock *req, + struct request_sock **prev); + +extern int dccp_child_process(struct sock *parent, struct sock *child, + struct sk_buff *skb); +extern int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb, + struct dccp_hdr *dh, unsigned len); +extern int dccp_rcv_established(struct sock *sk, struct sk_buff *skb, + const struct dccp_hdr *dh, const unsigned len); + +extern void dccp_close(struct sock *sk, long timeout); +extern struct sk_buff *dccp_make_response(struct sock *sk, + struct dst_entry *dst, + struct request_sock *req); + +extern int dccp_connect(struct sock *sk); +extern int dccp_disconnect(struct sock *sk, int flags); +extern int dccp_getsockopt(struct sock *sk, int level, int optname, + char *optval, int *optlen); +extern int dccp_ioctl(struct sock *sk, int cmd, unsigned long arg); +extern int dccp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, + size_t size); +extern int dccp_recvmsg(struct kiocb *iocb, struct sock *sk, + struct msghdr *msg, size_t len, int nonblock, + int flags, int *addr_len); +extern int dccp_setsockopt(struct sock *sk, int level, int optname, + char *optval, int optlen); +extern void dccp_shutdown(struct sock *sk, int how); + +extern int dccp_v4_checksum(struct sk_buff *skb); + +extern int dccp_v4_send_reset(struct sock *sk, enum dccp_reset_codes code); +extern void dccp_send_close(struct sock *sk); + +struct dccp_skb_cb { + __u8 dccpd_type; + __u8 dccpd_reset_code; + __u8 dccpd_service; + __u8 dccpd_ccval; + __u64 dccpd_seq; + __u64 dccpd_ack_seq; + int dccpd_opt_len; +}; + +#define DCCP_SKB_CB(__skb) ((struct dccp_skb_cb *)&((__skb)->cb[0])) + +static inline int dccp_non_data_packet(const struct sk_buff *skb) +{ + const __u8 type = DCCP_SKB_CB(skb)->dccpd_type; + + return type == DCCP_PKT_ACK || + type == DCCP_PKT_CLOSE || + type == DCCP_PKT_CLOSEREQ || + type == DCCP_PKT_RESET || + type == DCCP_PKT_SYNC || + type == DCCP_PKT_SYNCACK; +} + +static inline int dccp_packet_without_ack(const struct sk_buff *skb) +{ + const __u8 type = DCCP_SKB_CB(skb)->dccpd_type; + + return type == DCCP_PKT_DATA || type == DCCP_PKT_REQUEST; +} + +#define DCCP_MAX_SEQNO ((((u64)1) << 48) - 1) +#define DCCP_PKT_WITHOUT_ACK_SEQ (DCCP_MAX_SEQNO << 2) + +static inline void dccp_set_seqno(u64 *seqno, u64 value) +{ + if (value > DCCP_MAX_SEQNO) + value -= DCCP_MAX_SEQNO + 1; + *seqno = value; +} + +static inline u64 dccp_delta_seqno(u64 seqno1, u64 seqno2) +{ + return ((seqno2 << 16) - (seqno1 << 16)) >> 16; +} + +static inline void dccp_inc_seqno(u64 *seqno) +{ + if (++*seqno > DCCP_MAX_SEQNO) + *seqno = 0; +} + +static inline void dccp_hdr_set_seq(struct dccp_hdr *dh, const u64 gss) +{ + struct dccp_hdr_ext *dhx = (struct dccp_hdr_ext *)((void *)dh + sizeof(*dh)); + +#if defined(__LITTLE_ENDIAN_BITFIELD) + dh->dccph_seq = htonl((gss >> 32)) >> 8; +#elif defined(__BIG_ENDIAN_BITFIELD) + dh->dccph_seq = htonl((gss >> 32)); +#else +#error "Adjust your defines" +#endif + dhx->dccph_seq_low = htonl(gss & 0xffffffff); +} + +static inline void dccp_hdr_set_ack(struct dccp_hdr_ack_bits *dhack, const u64 gsr) +{ +#if defined(__LITTLE_ENDIAN_BITFIELD) + dhack->dccph_ack_nr_high = htonl((gsr >> 32)) >> 8; +#elif defined(__BIG_ENDIAN_BITFIELD) + dhack->dccph_ack_nr_high = htonl((gsr >> 32)); +#else +#error "Adjust your defines" +#endif + dhack->dccph_ack_nr_low = htonl(gsr & 0xffffffff); +} + +static inline void dccp_update_gsr(struct sock *sk, u64 seq) +{ + struct dccp_sock *dp = dccp_sk(sk); + u64 tmp_gsr; + + dccp_set_seqno(&tmp_gsr, dp->dccps_gsr + 1 - (dp->dccps_options.dccpo_sequence_window / 4)); + dp->dccps_gsr = seq; + dccp_set_seqno(&dp->dccps_swl, max48(tmp_gsr, dp->dccps_isr)); + dccp_set_seqno(&dp->dccps_swh, + dp->dccps_gsr + (3 * dp->dccps_options.dccpo_sequence_window) / 4); +} + +static inline void dccp_update_gss(struct sock *sk, u64 seq) +{ + struct dccp_sock *dp = dccp_sk(sk); + u64 tmp_gss; + + dccp_set_seqno(&tmp_gss, dp->dccps_gss - dp->dccps_options.dccpo_sequence_window + 1); + dp->dccps_awl = max48(tmp_gss, dp->dccps_iss); + dp->dccps_awh = dp->dccps_gss = seq; +} + +extern void dccp_insert_options(struct sock *sk, struct sk_buff *skb); +extern void dccp_insert_option_elapsed_time(struct sock *sk, + struct sk_buff *skb, + u32 elapsed_time); +extern void dccp_insert_option(struct sock *sk, struct sk_buff *skb, + unsigned char option, + const void *value, unsigned char len); + +extern struct socket *dccp_ctl_socket; + +#define DCCP_ACKPKTS_STATE_RECEIVED 0 +#define DCCP_ACKPKTS_STATE_ECN_MARKED (1 << 6) +#define DCCP_ACKPKTS_STATE_NOT_RECEIVED (3 << 6) + +#define DCCP_ACKPKTS_STATE_MASK 0xC0 /* 11000000 */ +#define DCCP_ACKPKTS_LEN_MASK 0x3F /* 00111111 */ + +/** struct dccp_ackpkts - acknowledgeable packets + * + * This data structure is the one defined in the DCCP draft + * Appendix A. + * + * @dccpap_buf_head - circular buffer head + * @dccpap_buf_tail - circular buffer tail + * @dccpap_buf_ackno - ack # of the most recent packet acknoldgeable in the buffer (i.e. %dccpap_buf_head) + * @dccpap_buf_nonce - the one-bit sum of the ECN Nonces on all packets acked by the buffer with State 0 + * + * Additionally, the HC-Receiver must keep some information about the + * Ack Vectors it has recently sent. For each packet sent carrying an + * Ack Vector, it remembers four variables: + * + * @dccpap_ack_seqno - the Sequence Number used for the packet (HC-Receiver seqno) + * @dccpap_ack_ptr - the value of buf_head at the time of acknowledgement. + * @dccpap_ack_ackno - the Acknowledgement Number used for the packet (HC-Sender seqno) + * @dccpap_ack_nonce - the one-bit sum of the ECN Nonces for all State 0. + * + * @dccpap_buf_len - circular buffer length + * @dccpap_buf - circular buffer of acknowledgeable packets + */ +struct dccp_ackpkts { + unsigned int dccpap_buf_head; + unsigned int dccpap_buf_tail; + u64 dccpap_buf_ackno; + u64 dccpap_ack_seqno; + u64 dccpap_ack_ackno; + unsigned int dccpap_ack_ptr; + unsigned int dccpap_buf_vector_len; + unsigned int dccpap_ack_vector_len; + unsigned int dccpap_buf_len; + unsigned long dccpap_time; + u8 dccpap_buf_nonce; + u8 dccpap_ack_nonce; + u8 dccpap_buf[0]; +}; + +extern struct dccp_ackpkts *dccp_ackpkts_alloc(unsigned int len, int priority); +extern void dccp_ackpkts_free(struct dccp_ackpkts *ap); +extern int dccp_ackpkts_add(struct dccp_ackpkts *ap, u64 ackno, u8 state); +extern void dccp_ackpkts_check_rcv_ackno(struct dccp_ackpkts *ap, + struct sock *sk, u64 ackno); + +#ifdef DCCP_DEBUG +extern void dccp_ackvector_print(const u64 ackno, + const unsigned char *vector, int len); +extern void dccp_ackpkts_print(const struct dccp_ackpkts *ap); +#else +static inline void dccp_ackvector_print(const u64 ackno, + const unsigned char *vector, + int len) { } +static inline void dccp_ackpkts_print(const struct dccp_ackpkts *ap) { } +#endif + +#endif /* _DCCP_H */ diff --git a/net/dccp/input.c b/net/dccp/input.c new file mode 100644 index 00000000000..622e976a51f --- /dev/null +++ b/net/dccp/input.c @@ -0,0 +1,510 @@ +/* + * net/dccp/input.c + * + * An implementation of the DCCP protocol + * Arnaldo Carvalho de Melo + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include +#include +#include + +#include + +#include "ccid.h" +#include "dccp.h" + +static void dccp_fin(struct sock *sk, struct sk_buff *skb) +{ + sk->sk_shutdown |= RCV_SHUTDOWN; + sock_set_flag(sk, SOCK_DONE); + __skb_pull(skb, dccp_hdr(skb)->dccph_doff * 4); + __skb_queue_tail(&sk->sk_receive_queue, skb); + skb_set_owner_r(skb, sk); + sk->sk_data_ready(sk, 0); +} + +static void dccp_rcv_close(struct sock *sk, struct sk_buff *skb) +{ + switch (sk->sk_state) { + case DCCP_PARTOPEN: + case DCCP_OPEN: + dccp_v4_send_reset(sk, DCCP_RESET_CODE_CLOSED); + dccp_fin(sk, skb); + dccp_set_state(sk, DCCP_CLOSED); + break; + } +} + +static void dccp_rcv_closereq(struct sock *sk, struct sk_buff *skb) +{ + /* + * Step 7: Check for unexpected packet types + * If (S.is_server and P.type == CloseReq) + * Send Sync packet acknowledging P.seqno + * Drop packet and return + */ + if (dccp_sk(sk)->dccps_role != DCCP_ROLE_CLIENT) { + dccp_send_sync(sk, DCCP_SKB_CB(skb)->dccpd_seq); + return; + } + + switch (sk->sk_state) { + case DCCP_PARTOPEN: + case DCCP_OPEN: + dccp_set_state(sk, DCCP_CLOSING); + dccp_send_close(sk); + break; + } +} + +static inline void dccp_event_ack_recv(struct sock *sk, struct sk_buff *skb) +{ + struct dccp_sock *dp = dccp_sk(sk); + + if (dp->dccps_options.dccpo_send_ack_vector) + dccp_ackpkts_check_rcv_ackno(dp->dccps_hc_rx_ackpkts, sk, + DCCP_SKB_CB(skb)->dccpd_ack_seq); +} + +static int dccp_check_seqno(struct sock *sk, struct sk_buff *skb) +{ + const struct dccp_hdr *dh = dccp_hdr(skb); + struct dccp_sock *dp = dccp_sk(sk); + u64 lswl = dp->dccps_swl; + u64 lawl = dp->dccps_awl; + + /* + * Step 5: Prepare sequence numbers for Sync + * If P.type == Sync or P.type == SyncAck, + * If S.AWL <= P.ackno <= S.AWH and P.seqno >= S.SWL, + * / * P is valid, so update sequence number variables + * accordingly. After this update, P will pass the tests + * in Step 6. A SyncAck is generated if necessary in + * Step 15 * / + * Update S.GSR, S.SWL, S.SWH + * Otherwise, + * Drop packet and return + */ + if (dh->dccph_type == DCCP_PKT_SYNC || + dh->dccph_type == DCCP_PKT_SYNCACK) { + if (between48(DCCP_SKB_CB(skb)->dccpd_ack_seq, dp->dccps_awl, dp->dccps_awh) && + !before48(DCCP_SKB_CB(skb)->dccpd_seq, dp->dccps_swl)) + dccp_update_gsr(sk, DCCP_SKB_CB(skb)->dccpd_seq); + else + return -1; + /* + * Step 6: Check sequence numbers + * Let LSWL = S.SWL and LAWL = S.AWL + * If P.type == CloseReq or P.type == Close or P.type == Reset, + * LSWL := S.GSR + 1, LAWL := S.GAR + * If LSWL <= P.seqno <= S.SWH + * and (P.ackno does not exist or LAWL <= P.ackno <= S.AWH), + * Update S.GSR, S.SWL, S.SWH + * If P.type != Sync, + * Update S.GAR + * Otherwise, + * Send Sync packet acknowledging P.seqno + * Drop packet and return + */ + } else if (dh->dccph_type == DCCP_PKT_CLOSEREQ || + dh->dccph_type == DCCP_PKT_CLOSE || + dh->dccph_type == DCCP_PKT_RESET) { + lswl = dp->dccps_gsr; + dccp_inc_seqno(&lswl); + lawl = dp->dccps_gar; + } + + if (between48(DCCP_SKB_CB(skb)->dccpd_seq, lswl, dp->dccps_swh) && + (DCCP_SKB_CB(skb)->dccpd_ack_seq == DCCP_PKT_WITHOUT_ACK_SEQ || + between48(DCCP_SKB_CB(skb)->dccpd_ack_seq, lawl, dp->dccps_awh))) { + dccp_update_gsr(sk, DCCP_SKB_CB(skb)->dccpd_seq); + + if (dh->dccph_type != DCCP_PKT_SYNC && + DCCP_SKB_CB(skb)->dccpd_ack_seq != DCCP_PKT_WITHOUT_ACK_SEQ) + dp->dccps_gar = DCCP_SKB_CB(skb)->dccpd_ack_seq; + } else { + dccp_pr_debug("Step 6 failed, sending SYNC...\n"); + dccp_send_sync(sk, DCCP_SKB_CB(skb)->dccpd_seq); + return -1; + } + + return 0; +} + +int dccp_rcv_established(struct sock *sk, struct sk_buff *skb, + const struct dccp_hdr *dh, const unsigned len) +{ + struct dccp_sock *dp = dccp_sk(sk); + + if (dccp_check_seqno(sk, skb)) + goto discard; + + if (dccp_parse_options(sk, skb)) + goto discard; + + if (DCCP_SKB_CB(skb)->dccpd_ack_seq != DCCP_PKT_WITHOUT_ACK_SEQ) + dccp_event_ack_recv(sk, skb); + + /* + * FIXME: check ECN to see if we should use + * DCCP_ACKPKTS_STATE_ECN_MARKED + */ + if (dp->dccps_options.dccpo_send_ack_vector) { + struct dccp_ackpkts *ap = dp->dccps_hc_rx_ackpkts; + + if (dccp_ackpkts_add(dp->dccps_hc_rx_ackpkts, + DCCP_SKB_CB(skb)->dccpd_seq, + DCCP_ACKPKTS_STATE_RECEIVED)) { + LIMIT_NETDEBUG(pr_info("DCCP: acknowledgeable packets buffer full!\n")); + ap->dccpap_ack_seqno = DCCP_MAX_SEQNO + 1; + inet_csk_schedule_ack(sk); + inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK, TCP_DELACK_MIN, TCP_RTO_MAX); + goto discard; + } + + /* + * FIXME: this activation is probably wrong, have to study more + * TCP delack machinery and how it fits into DCCP draft, but + * for now it kinda "works" 8) + */ + if (!inet_csk_ack_scheduled(sk)) { + inet_csk_schedule_ack(sk); + inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK, 5 * HZ, TCP_RTO_MAX); + } + } + + ccid_hc_rx_packet_recv(dp->dccps_hc_rx_ccid, sk, skb); + ccid_hc_tx_packet_recv(dp->dccps_hc_tx_ccid, sk, skb); + + switch (dccp_hdr(skb)->dccph_type) { + case DCCP_PKT_DATAACK: + case DCCP_PKT_DATA: + /* + * FIXME: check if sk_receive_queue is full, schedule DATA_DROPPED option + * if it is. + */ + __skb_pull(skb, dh->dccph_doff * 4); + __skb_queue_tail(&sk->sk_receive_queue, skb); + skb_set_owner_r(skb, sk); + sk->sk_data_ready(sk, 0); + return 0; + case DCCP_PKT_ACK: + goto discard; + case DCCP_PKT_RESET: + /* + * Step 9: Process Reset + * If P.type == Reset, + * Tear down connection + * S.state := TIMEWAIT + * Set TIMEWAIT timer + * Drop packet and return + */ + dccp_fin(sk, skb); + dccp_time_wait(sk, DCCP_TIME_WAIT, 0); + return 0; + case DCCP_PKT_CLOSEREQ: + dccp_rcv_closereq(sk, skb); + goto discard; + case DCCP_PKT_CLOSE: + dccp_rcv_close(sk, skb); + return 0; + case DCCP_PKT_REQUEST: + /* Step 7 + * or (S.is_server and P.type == Response) + * or (S.is_client and P.type == Request) + * or (S.state >= OPEN and P.type == Request + * and P.seqno >= S.OSR) + * or (S.state >= OPEN and P.type == Response + * and P.seqno >= S.OSR) + * or (S.state == RESPOND and P.type == Data), + * Send Sync packet acknowledging P.seqno + * Drop packet and return + */ + if (dp->dccps_role != DCCP_ROLE_LISTEN) + goto send_sync; + goto check_seq; + case DCCP_PKT_RESPONSE: + if (dp->dccps_role != DCCP_ROLE_CLIENT) + goto send_sync; +check_seq: + if (!before48(DCCP_SKB_CB(skb)->dccpd_seq, dp->dccps_osr)) { +send_sync: + dccp_send_sync(sk, DCCP_SKB_CB(skb)->dccpd_seq); + } + break; + } + + DCCP_INC_STATS_BH(DCCP_MIB_INERRS); +discard: + __kfree_skb(skb); + return 0; +} + +static int dccp_rcv_request_sent_state_process(struct sock *sk, + struct sk_buff *skb, + const struct dccp_hdr *dh, + const unsigned len) +{ + /* + * Step 4: Prepare sequence numbers in REQUEST + * If S.state == REQUEST, + * If (P.type == Response or P.type == Reset) + * and S.AWL <= P.ackno <= S.AWH, + * / * Set sequence number variables corresponding to the + * other endpoint, so P will pass the tests in Step 6 * / + * Set S.GSR, S.ISR, S.SWL, S.SWH + * / * Response processing continues in Step 10; Reset + * processing continues in Step 9 * / + */ + if (dh->dccph_type == DCCP_PKT_RESPONSE) { + const struct inet_connection_sock *icsk = inet_csk(sk); + struct dccp_sock *dp = dccp_sk(sk); + + /* Stop the REQUEST timer */ + inet_csk_clear_xmit_timer(sk, ICSK_TIME_RETRANS); + BUG_TRAP(sk->sk_send_head != NULL); + __kfree_skb(sk->sk_send_head); + sk->sk_send_head = NULL; + + if (!between48(DCCP_SKB_CB(skb)->dccpd_ack_seq, dp->dccps_awl, dp->dccps_awh)) { + dccp_pr_debug("invalid ackno: S.AWL=%llu, P.ackno=%llu, S.AWH=%llu \n", + dp->dccps_awl, DCCP_SKB_CB(skb)->dccpd_ack_seq, dp->dccps_awh); + goto out_invalid_packet; + } + + dp->dccps_isr = DCCP_SKB_CB(skb)->dccpd_seq; + dccp_update_gsr(sk, DCCP_SKB_CB(skb)->dccpd_seq); + + if (ccid_hc_rx_init(dp->dccps_hc_rx_ccid, sk) != 0 || + ccid_hc_tx_init(dp->dccps_hc_tx_ccid, sk) != 0) { + ccid_hc_rx_exit(dp->dccps_hc_rx_ccid, sk); + ccid_hc_tx_exit(dp->dccps_hc_tx_ccid, sk); + /* FIXME: send appropriate RESET code */ + goto out_invalid_packet; + } + + dccp_sync_mss(sk, dp->dccps_pmtu_cookie); + + /* + * Step 10: Process REQUEST state (second part) + * If S.state == REQUEST, + * / * If we get here, P is a valid Response from the server (see + * Step 4), and we should move to PARTOPEN state. PARTOPEN + * means send an Ack, don't send Data packets, retransmit + * Acks periodically, and always include any Init Cookie from + * the Response * / + * S.state := PARTOPEN + * Set PARTOPEN timer + * Continue with S.state == PARTOPEN + * / * Step 12 will send the Ack completing the three-way + * handshake * / + */ + dccp_set_state(sk, DCCP_PARTOPEN); + + /* Make sure socket is routed, for correct metrics. */ + inet_sk_rebuild_header(sk); + + if (!sock_flag(sk, SOCK_DEAD)) { + sk->sk_state_change(sk); + sk_wake_async(sk, 0, POLL_OUT); + } + + if (sk->sk_write_pending || icsk->icsk_ack.pingpong || + icsk->icsk_accept_queue.rskq_defer_accept) { + /* Save one ACK. Data will be ready after + * several ticks, if write_pending is set. + * + * It may be deleted, but with this feature tcpdumps + * look so _wonderfully_ clever, that I was not able + * to stand against the temptation 8) --ANK + */ + /* + * OK, in DCCP we can as well do a similar trick, its + * even in the draft, but there is no need for us to + * schedule an ack here, as dccp_sendmsg does this for + * us, also stated in the draft. -acme + */ + __kfree_skb(skb); + return 0; + } + dccp_send_ack(sk); + return -1; + } + +out_invalid_packet: + return 1; /* dccp_v4_do_rcv will send a reset, but... + FIXME: the reset code should be DCCP_RESET_CODE_PACKET_ERROR */ +} + +static int dccp_rcv_respond_partopen_state_process(struct sock *sk, + struct sk_buff *skb, + const struct dccp_hdr *dh, + const unsigned len) +{ + int queued = 0; + + switch (dh->dccph_type) { + case DCCP_PKT_RESET: + inet_csk_clear_xmit_timer(sk, ICSK_TIME_DACK); + break; + case DCCP_PKT_DATAACK: + case DCCP_PKT_ACK: + /* + * FIXME: we should be reseting the PARTOPEN (DELACK) timer here, + * but only if we haven't used the DELACK timer for something else, + * like sending a delayed ack for a TIMESTAMP echo, etc, for now + * were not clearing it, sending an extra ACK when there is nothing + * else to do in DELACK is not a big deal after all. + */ + + /* Stop the PARTOPEN timer */ + if (sk->sk_state == DCCP_PARTOPEN) + inet_csk_clear_xmit_timer(sk, ICSK_TIME_DACK); + + dccp_sk(sk)->dccps_osr = DCCP_SKB_CB(skb)->dccpd_seq; + dccp_set_state(sk, DCCP_OPEN); + + if (dh->dccph_type == DCCP_PKT_DATAACK) { + dccp_rcv_established(sk, skb, dh, len); + queued = 1; /* packet was queued (by dccp_rcv_established) */ + } + break; + } + + return queued; +} + +int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb, + struct dccp_hdr *dh, unsigned len) +{ + struct dccp_sock *dp = dccp_sk(sk); + const int old_state = sk->sk_state; + int queued = 0; + + if (sk->sk_state != DCCP_LISTEN && sk->sk_state != DCCP_REQUESTING) { + if (dccp_check_seqno(sk, skb)) + goto discard; + + /* + * Step 8: Process options and mark acknowledgeable + */ + if (dccp_parse_options(sk, skb)) + goto discard; + + if (DCCP_SKB_CB(skb)->dccpd_ack_seq != DCCP_PKT_WITHOUT_ACK_SEQ) + dccp_event_ack_recv(sk, skb); + + ccid_hc_rx_packet_recv(dp->dccps_hc_rx_ccid, sk, skb); + ccid_hc_tx_packet_recv(dp->dccps_hc_tx_ccid, sk, skb); + + /* + * FIXME: check ECN to see if we should use + * DCCP_ACKPKTS_STATE_ECN_MARKED + */ + if (dp->dccps_options.dccpo_send_ack_vector) { + if (dccp_ackpkts_add(dp->dccps_hc_rx_ackpkts, + DCCP_SKB_CB(skb)->dccpd_seq, + DCCP_ACKPKTS_STATE_RECEIVED)) + goto discard; + /* + * FIXME: this activation is probably wrong, have to study more + * TCP delack machinery and how it fits into DCCP draft, but + * for now it kinda "works" 8) + */ + if (dp->dccps_hc_rx_ackpkts->dccpap_ack_seqno == DCCP_MAX_SEQNO + 1 && + !inet_csk_ack_scheduled(sk)) { + inet_csk_schedule_ack(sk); + inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK, TCP_DELACK_MIN, TCP_RTO_MAX); + } + } + } + + /* + * Step 9: Process Reset + * If P.type == Reset, + * Tear down connection + * S.state := TIMEWAIT + * Set TIMEWAIT timer + * Drop packet and return + */ + if (dh->dccph_type == DCCP_PKT_RESET) { + /* Queue the equivalent of TCP fin so that dccp_recvmsg exits the loop */ + dccp_fin(sk, skb); + dccp_time_wait(sk, DCCP_TIME_WAIT, 0); + return 0; + /* + * Step 7: Check for unexpected packet types + * If (S.is_server and P.type == CloseReq) + * or (S.is_server and P.type == Response) + * or (S.is_client and P.type == Request) + * or (S.state == RESPOND and P.type == Data), + * Send Sync packet acknowledging P.seqno + * Drop packet and return + */ + } else if ((dp->dccps_role != DCCP_ROLE_CLIENT && + (dh->dccph_type == DCCP_PKT_RESPONSE || dh->dccph_type == DCCP_PKT_CLOSEREQ)) || + (dp->dccps_role == DCCP_ROLE_CLIENT && + dh->dccph_type == DCCP_PKT_REQUEST) || + (sk->sk_state == DCCP_RESPOND && dh->dccph_type == DCCP_PKT_DATA)) { + dccp_send_sync(sk, DCCP_SKB_CB(skb)->dccpd_seq); + goto discard; + } + + switch (sk->sk_state) { + case DCCP_CLOSED: + return 1; + + case DCCP_LISTEN: + if (dh->dccph_type == DCCP_PKT_ACK || + dh->dccph_type == DCCP_PKT_DATAACK) + return 1; + + if (dh->dccph_type == DCCP_PKT_RESET) + goto discard; + + if (dh->dccph_type == DCCP_PKT_REQUEST) { + if (dccp_v4_conn_request(sk, skb) < 0) + return 1; + + /* FIXME: do congestion control initialization */ + goto discard; + } + goto discard; + + case DCCP_REQUESTING: + /* FIXME: do congestion control initialization */ + + queued = dccp_rcv_request_sent_state_process(sk, skb, dh, len); + if (queued >= 0) + return queued; + + __kfree_skb(skb); + return 0; + + case DCCP_RESPOND: + case DCCP_PARTOPEN: + queued = dccp_rcv_respond_partopen_state_process(sk, skb, dh, len); + break; + } + + if (dh->dccph_type == DCCP_PKT_ACK || dh->dccph_type == DCCP_PKT_DATAACK) { + switch (old_state) { + case DCCP_PARTOPEN: + sk->sk_state_change(sk); + sk_wake_async(sk, 0, POLL_OUT); + break; + } + } + + if (!queued) { +discard: + __kfree_skb(skb); + } + return 0; +} diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c new file mode 100644 index 00000000000..083bacaecb3 --- /dev/null +++ b/net/dccp/ipv4.c @@ -0,0 +1,1289 @@ +/* + * net/dccp/ipv4.c + * + * An implementation of the DCCP protocol + * Arnaldo Carvalho de Melo + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#include "ccid.h" +#include "dccp.h" + +struct inet_hashinfo __cacheline_aligned dccp_hashinfo = { + .lhash_lock = RW_LOCK_UNLOCKED, + .lhash_users = ATOMIC_INIT(0), + .lhash_wait = __WAIT_QUEUE_HEAD_INITIALIZER(dccp_hashinfo.lhash_wait), + .portalloc_lock = SPIN_LOCK_UNLOCKED, + .port_rover = 1024 - 1, +}; + +static int dccp_v4_get_port(struct sock *sk, const unsigned short snum) +{ + return inet_csk_get_port(&dccp_hashinfo, sk, snum); +} + +static void dccp_v4_hash(struct sock *sk) +{ + inet_hash(&dccp_hashinfo, sk); +} + +static void dccp_v4_unhash(struct sock *sk) +{ + inet_unhash(&dccp_hashinfo, sk); +} + +/* called with local bh disabled */ +static int __dccp_v4_check_established(struct sock *sk, const __u16 lport, + struct inet_timewait_sock **twp) +{ + struct inet_sock *inet = inet_sk(sk); + const u32 daddr = inet->rcv_saddr; + const u32 saddr = inet->daddr; + const int dif = sk->sk_bound_dev_if; + INET_ADDR_COOKIE(acookie, saddr, daddr) + const __u32 ports = INET_COMBINED_PORTS(inet->dport, lport); + const int hash = inet_ehashfn(daddr, lport, saddr, inet->dport, dccp_hashinfo.ehash_size); + struct inet_ehash_bucket *head = &dccp_hashinfo.ehash[hash]; + const struct sock *sk2; + const struct hlist_node *node; + struct inet_timewait_sock *tw; + + write_lock(&head->lock); + + /* Check TIME-WAIT sockets first. */ + sk_for_each(sk2, node, &(head + dccp_hashinfo.ehash_size)->chain) { + tw = inet_twsk(sk2); + + if (INET_TW_MATCH(sk2, acookie, saddr, daddr, ports, dif)) + goto not_unique; + } + tw = NULL; + + /* And established part... */ + sk_for_each(sk2, node, &head->chain) { + if (INET_MATCH(sk2, acookie, saddr, daddr, ports, dif)) + goto not_unique; + } + + /* Must record num and sport now. Otherwise we will see + * in hash table socket with a funny identity. */ + inet->num = lport; + inet->sport = htons(lport); + sk->sk_hashent = hash; + BUG_TRAP(sk_unhashed(sk)); + __sk_add_node(sk, &head->chain); + sock_prot_inc_use(sk->sk_prot); + write_unlock(&head->lock); + + if (twp != NULL) { + *twp = tw; + NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED); + } else if (tw != NULL) { + /* Silly. Should hash-dance instead... */ + dccp_tw_deschedule(tw); + NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED); + + inet_twsk_put(tw); + } + + return 0; + +not_unique: + write_unlock(&head->lock); + return -EADDRNOTAVAIL; +} + +/* + * Bind a port for a connect operation and hash it. + */ +static int dccp_v4_hash_connect(struct sock *sk) +{ + const unsigned short snum = inet_sk(sk)->num; + struct inet_bind_hashbucket *head; + struct inet_bind_bucket *tb; + int ret; + + if (snum == 0) { + int rover; + int low = sysctl_local_port_range[0]; + int high = sysctl_local_port_range[1]; + int remaining = (high - low) + 1; + struct hlist_node *node; + struct inet_timewait_sock *tw = NULL; + + local_bh_disable(); + + /* TODO. Actually it is not so bad idea to remove + * dccp_hashinfo.portalloc_lock before next submission to Linus. + * As soon as we touch this place at all it is time to think. + * + * Now it protects single _advisory_ variable dccp_hashinfo.port_rover, + * hence it is mostly useless. + * Code will work nicely if we just delete it, but + * I am afraid in contented case it will work not better or + * even worse: another cpu just will hit the same bucket + * and spin there. + * So some cpu salt could remove both contention and + * memory pingpong. Any ideas how to do this in a nice way? + */ + spin_lock(&dccp_hashinfo.portalloc_lock); + rover = dccp_hashinfo.port_rover; + + do { + rover++; + if ((rover < low) || (rover > high)) + rover = low; + head = &dccp_hashinfo.bhash[inet_bhashfn(rover, dccp_hashinfo.bhash_size)]; + spin_lock(&head->lock); + + /* Does not bother with rcv_saddr checks, + * because the established check is already + * unique enough. + */ + inet_bind_bucket_for_each(tb, node, &head->chain) { + if (tb->port == rover) { + BUG_TRAP(!hlist_empty(&tb->owners)); + if (tb->fastreuse >= 0) + goto next_port; + if (!__dccp_v4_check_established(sk, + rover, + &tw)) + goto ok; + goto next_port; + } + } + + tb = inet_bind_bucket_create(dccp_hashinfo.bind_bucket_cachep, head, rover); + if (tb == NULL) { + spin_unlock(&head->lock); + break; + } + tb->fastreuse = -1; + goto ok; + + next_port: + spin_unlock(&head->lock); + } while (--remaining > 0); + dccp_hashinfo.port_rover = rover; + spin_unlock(&dccp_hashinfo.portalloc_lock); + + local_bh_enable(); + + return -EADDRNOTAVAIL; + +ok: + /* All locks still held and bhs disabled */ + dccp_hashinfo.port_rover = rover; + spin_unlock(&dccp_hashinfo.portalloc_lock); + + inet_bind_hash(sk, tb, rover); + if (sk_unhashed(sk)) { + inet_sk(sk)->sport = htons(rover); + __inet_hash(&dccp_hashinfo, sk, 0); + } + spin_unlock(&head->lock); + + if (tw != NULL) { + dccp_tw_deschedule(tw); + inet_twsk_put(tw); + } + + ret = 0; + goto out; + } + + head = &dccp_hashinfo.bhash[inet_bhashfn(snum, dccp_hashinfo.bhash_size)]; + tb = inet_csk(sk)->icsk_bind_hash; + spin_lock_bh(&head->lock); + if (sk_head(&tb->owners) == sk && sk->sk_bind_node.next == NULL) { + __inet_hash(&dccp_hashinfo, sk, 0); + spin_unlock_bh(&head->lock); + return 0; + } else { + spin_unlock(&head->lock); + /* No definite answer... Walk to established hash table */ + ret = __dccp_v4_check_established(sk, snum, NULL); +out: + local_bh_enable(); + return ret; + } +} + +static int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr, + int addr_len) +{ + struct inet_sock *inet = inet_sk(sk); + struct dccp_sock *dp = dccp_sk(sk); + const struct sockaddr_in *usin = (struct sockaddr_in *)uaddr; + struct rtable *rt; + u32 daddr, nexthop; + int tmp; + int err; + + dp->dccps_role = DCCP_ROLE_CLIENT; + + if (addr_len < sizeof(struct sockaddr_in)) + return -EINVAL; + + if (usin->sin_family != AF_INET) + return -EAFNOSUPPORT; + + nexthop = daddr = usin->sin_addr.s_addr; + if (inet->opt != NULL && inet->opt->srr) { + if (daddr == 0) + return -EINVAL; + nexthop = inet->opt->faddr; + } + + tmp = ip_route_connect(&rt, nexthop, inet->saddr, + RT_CONN_FLAGS(sk), sk->sk_bound_dev_if, + IPPROTO_DCCP, + inet->sport, usin->sin_port, sk); + if (tmp < 0) + return tmp; + + if (rt->rt_flags & (RTCF_MULTICAST | RTCF_BROADCAST)) { + ip_rt_put(rt); + return -ENETUNREACH; + } + + if (inet->opt == NULL || !inet->opt->srr) + daddr = rt->rt_dst; + + if (inet->saddr == 0) + inet->saddr = rt->rt_src; + inet->rcv_saddr = inet->saddr; + + inet->dport = usin->sin_port; + inet->daddr = daddr; + + dp->dccps_ext_header_len = 0; + if (inet->opt != NULL) + dp->dccps_ext_header_len = inet->opt->optlen; + /* + * Socket identity is still unknown (sport may be zero). + * However we set state to DCCP_REQUESTING and not releasing socket + * lock select source port, enter ourselves into the hash tables and + * complete initialization after this. + */ + dccp_set_state(sk, DCCP_REQUESTING); + err = dccp_v4_hash_connect(sk); + if (err != 0) + goto failure; + + err = ip_route_newports(&rt, inet->sport, inet->dport, sk); + if (err != 0) + goto failure; + + /* OK, now commit destination to socket. */ + sk_setup_caps(sk, &rt->u.dst); + + dp->dccps_gar = + dp->dccps_iss = secure_dccp_sequence_number(inet->saddr, + inet->daddr, + inet->sport, + usin->sin_port); + dccp_update_gss(sk, dp->dccps_iss); + + inet->id = dp->dccps_iss ^ jiffies; + + err = dccp_connect(sk); + rt = NULL; + if (err != 0) + goto failure; +out: + return err; +failure: + /* This unhashes the socket and releases the local port, if necessary. */ + dccp_set_state(sk, DCCP_CLOSED); + ip_rt_put(rt); + sk->sk_route_caps = 0; + inet->dport = 0; + goto out; +} + +/* + * This routine does path mtu discovery as defined in RFC1191. + */ +static inline void dccp_do_pmtu_discovery(struct sock *sk, + const struct iphdr *iph, + u32 mtu) +{ + struct dst_entry *dst; + const struct inet_sock *inet = inet_sk(sk); + const struct dccp_sock *dp = dccp_sk(sk); + + /* We are not interested in DCCP_LISTEN and request_socks (RESPONSEs + * send out by Linux are always < 576bytes so they should go through + * unfragmented). + */ + if (sk->sk_state == DCCP_LISTEN) + return; + + /* We don't check in the destentry if pmtu discovery is forbidden + * on this route. We just assume that no packet_to_big packets + * are send back when pmtu discovery is not active. + * There is a small race when the user changes this flag in the + * route, but I think that's acceptable. + */ + if ((dst = __sk_dst_check(sk, 0)) == NULL) + return; + + dst->ops->update_pmtu(dst, mtu); + + /* Something is about to be wrong... Remember soft error + * for the case, if this connection will not able to recover. + */ + if (mtu < dst_mtu(dst) && ip_dont_fragment(sk, dst)) + sk->sk_err_soft = EMSGSIZE; + + mtu = dst_mtu(dst); + + if (inet->pmtudisc != IP_PMTUDISC_DONT && + dp->dccps_pmtu_cookie > mtu) { + dccp_sync_mss(sk, mtu); + + /* + * From: draft-ietf-dccp-spec-11.txt + * + * DCCP-Sync packets are the best choice for upward probing, + * since DCCP-Sync probes do not risk application data loss. + */ + dccp_send_sync(sk, dp->dccps_gsr); + } /* else let the usual retransmit timer handle it */ +} + +static void dccp_v4_ctl_send_ack(struct sk_buff *rxskb) +{ + int err; + struct dccp_hdr *rxdh = dccp_hdr(rxskb), *dh; + const int dccp_hdr_ack_len = sizeof(struct dccp_hdr) + + sizeof(struct dccp_hdr_ext) + + sizeof(struct dccp_hdr_ack_bits); + struct sk_buff *skb; + + if (((struct rtable *)rxskb->dst)->rt_type != RTN_LOCAL) + return; + + skb = alloc_skb(MAX_DCCP_HEADER + 15, GFP_ATOMIC); + if (skb == NULL) + return; + + /* Reserve space for headers. */ + skb_reserve(skb, MAX_DCCP_HEADER); + + skb->dst = dst_clone(rxskb->dst); + + skb->h.raw = skb_push(skb, dccp_hdr_ack_len); + dh = dccp_hdr(skb); + memset(dh, 0, dccp_hdr_ack_len); + + /* Build DCCP header and checksum it. */ + dh->dccph_type = DCCP_PKT_ACK; + dh->dccph_sport = rxdh->dccph_dport; + dh->dccph_dport = rxdh->dccph_sport; + dh->dccph_doff = dccp_hdr_ack_len / 4; + dh->dccph_x = 1; + + dccp_hdr_set_seq(dh, DCCP_SKB_CB(rxskb)->dccpd_ack_seq); + dccp_hdr_set_ack(dccp_hdr_ack_bits(skb), DCCP_SKB_CB(rxskb)->dccpd_seq); + + bh_lock_sock(dccp_ctl_socket->sk); + err = ip_build_and_send_pkt(skb, dccp_ctl_socket->sk, + rxskb->nh.iph->daddr, rxskb->nh.iph->saddr, NULL); + bh_unlock_sock(dccp_ctl_socket->sk); + + if (err == NET_XMIT_CN || err == 0) { + DCCP_INC_STATS_BH(DCCP_MIB_OUTSEGS); + DCCP_INC_STATS_BH(DCCP_MIB_OUTRSTS); + } +} + +static void dccp_v4_reqsk_send_ack(struct sk_buff *skb, struct request_sock *req) +{ + dccp_v4_ctl_send_ack(skb); +} + +static int dccp_v4_send_response(struct sock *sk, struct request_sock *req, + struct dst_entry *dst) +{ + int err = -1; + struct sk_buff *skb; + + /* First, grab a route. */ + + if (dst == NULL && (dst = inet_csk_route_req(sk, req)) == NULL) + goto out; + + skb = dccp_make_response(sk, dst, req); + if (skb != NULL) { + const struct inet_request_sock *ireq = inet_rsk(req); + + err = ip_build_and_send_pkt(skb, sk, ireq->loc_addr, + ireq->rmt_addr, + ireq->opt); + if (err == NET_XMIT_CN) + err = 0; + } + +out: + dst_release(dst); + return err; +} + +/* + * This routine is called by the ICMP module when it gets some sort of error + * condition. If err < 0 then the socket should be closed and the error + * returned to the user. If err > 0 it's just the icmp type << 8 | icmp code. + * After adjustment header points to the first 8 bytes of the tcp header. We + * need to find the appropriate port. + * + * The locking strategy used here is very "optimistic". When someone else + * accesses the socket the ICMP is just dropped and for some paths there is no + * check at all. A more general error queue to queue errors for later handling + * is probably better. + */ +void dccp_v4_err(struct sk_buff *skb, u32 info) +{ + const struct iphdr *iph = (struct iphdr *)skb->data; + const struct dccp_hdr *dh = (struct dccp_hdr *)(skb->data + (iph->ihl << 2)); + struct dccp_sock *dp; + struct inet_sock *inet; + const int type = skb->h.icmph->type; + const int code = skb->h.icmph->code; + struct sock *sk; + __u64 seq; + int err; + + if (skb->len < (iph->ihl << 2) + 8) { + ICMP_INC_STATS_BH(ICMP_MIB_INERRORS); + return; + } + + sk = inet_lookup(&dccp_hashinfo, iph->daddr, dh->dccph_dport, + iph->saddr, dh->dccph_sport, inet_iif(skb)); + if (sk == NULL) { + ICMP_INC_STATS_BH(ICMP_MIB_INERRORS); + return; + } + + if (sk->sk_state == DCCP_TIME_WAIT) { + inet_twsk_put((struct inet_timewait_sock *)sk); + return; + } + + bh_lock_sock(sk); + /* If too many ICMPs get dropped on busy + * servers this needs to be solved differently. + */ + if (sock_owned_by_user(sk)) + NET_INC_STATS_BH(LINUX_MIB_LOCKDROPPEDICMPS); + + if (sk->sk_state == DCCP_CLOSED) + goto out; + + dp = dccp_sk(sk); + seq = dccp_hdr_seq(skb); + if (sk->sk_state != DCCP_LISTEN && + !between48(seq, dp->dccps_swl, dp->dccps_swh)) { + NET_INC_STATS(LINUX_MIB_OUTOFWINDOWICMPS); + goto out; + } + + switch (type) { + case ICMP_SOURCE_QUENCH: + /* Just silently ignore these. */ + goto out; + case ICMP_PARAMETERPROB: + err = EPROTO; + break; + case ICMP_DEST_UNREACH: + if (code > NR_ICMP_UNREACH) + goto out; + + if (code == ICMP_FRAG_NEEDED) { /* PMTU discovery (RFC1191) */ + if (!sock_owned_by_user(sk)) + dccp_do_pmtu_discovery(sk, iph, info); + goto out; + } + + err = icmp_err_convert[code].errno; + break; + case ICMP_TIME_EXCEEDED: + err = EHOSTUNREACH; + break; + default: + goto out; + } + + switch (sk->sk_state) { + struct request_sock *req , **prev; + case DCCP_LISTEN: + if (sock_owned_by_user(sk)) + goto out; + req = inet_csk_search_req(sk, &prev, dh->dccph_dport, + iph->daddr, iph->saddr); + if (!req) + goto out; + + /* + * ICMPs are not backlogged, hence we cannot get an established + * socket here. + */ + BUG_TRAP(!req->sk); + + if (seq != dccp_rsk(req)->dreq_iss) { + NET_INC_STATS_BH(LINUX_MIB_OUTOFWINDOWICMPS); + goto out; + } + /* + * Still in RESPOND, just remove it silently. + * There is no good way to pass the error to the newly + * created socket, and POSIX does not want network + * errors returned from accept(). + */ + inet_csk_reqsk_queue_drop(sk, req, prev); + goto out; + + case DCCP_REQUESTING: + case DCCP_RESPOND: + if (!sock_owned_by_user(sk)) { + DCCP_INC_STATS_BH(DCCP_MIB_ATTEMPTFAILS); + sk->sk_err = err; + + sk->sk_error_report(sk); + + dccp_done(sk); + } else + sk->sk_err_soft = err; + goto out; + } + + /* If we've already connected we will keep trying + * until we time out, or the user gives up. + * + * rfc1122 4.2.3.9 allows to consider as hard errors + * only PROTO_UNREACH and PORT_UNREACH (well, FRAG_FAILED too, + * but it is obsoleted by pmtu discovery). + * + * Note, that in modern internet, where routing is unreliable + * and in each dark corner broken firewalls sit, sending random + * errors ordered by their masters even this two messages finally lose + * their original sense (even Linux sends invalid PORT_UNREACHs) + * + * Now we are in compliance with RFCs. + * --ANK (980905) + */ + + inet = inet_sk(sk); + if (!sock_owned_by_user(sk) && inet->recverr) { + sk->sk_err = err; + sk->sk_error_report(sk); + } else /* Only an error on timeout */ + sk->sk_err_soft = err; +out: + bh_unlock_sock(sk); + sock_put(sk); +} + +extern struct sk_buff *dccp_make_reset(struct sock *sk, struct dst_entry *dst, enum dccp_reset_codes code); + +int dccp_v4_send_reset(struct sock *sk, enum dccp_reset_codes code) +{ + struct sk_buff *skb; + /* + * FIXME: what if rebuild_header fails? + * Should we be doing a rebuild_header here? + */ + int err = inet_sk_rebuild_header(sk); + + if (err != 0) + return err; + + skb = dccp_make_reset(sk, sk->sk_dst_cache, code); + if (skb != NULL) { + const struct dccp_sock *dp = dccp_sk(sk); + const struct inet_sock *inet = inet_sk(sk); + + err = ip_build_and_send_pkt(skb, sk, + inet->saddr, inet->daddr, NULL); + if (err == NET_XMIT_CN) + err = 0; + + ccid_hc_rx_exit(dp->dccps_hc_rx_ccid, sk); + ccid_hc_tx_exit(dp->dccps_hc_tx_ccid, sk); + } + + return err; +} + +static inline u64 dccp_v4_init_sequence(const struct sock *sk, + const struct sk_buff *skb) +{ + return secure_dccp_sequence_number(skb->nh.iph->daddr, + skb->nh.iph->saddr, + dccp_hdr(skb)->dccph_dport, + dccp_hdr(skb)->dccph_sport); +} + +int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb) +{ + struct inet_request_sock *ireq; + struct dccp_sock dp; + struct request_sock *req; + struct dccp_request_sock *dreq; + const __u32 saddr = skb->nh.iph->saddr; + const __u32 daddr = skb->nh.iph->daddr; + struct dst_entry *dst = NULL; + + /* Never answer to DCCP_PKT_REQUESTs send to broadcast or multicast */ + if (((struct rtable *)skb->dst)->rt_flags & + (RTCF_BROADCAST | RTCF_MULTICAST)) + goto drop; + + /* + * TW buckets are converted to open requests without + * limitations, they conserve resources and peer is + * evidently real one. + */ + if (inet_csk_reqsk_queue_is_full(sk)) + goto drop; + + /* + * Accept backlog is full. If we have already queued enough + * of warm entries in syn queue, drop request. It is better than + * clogging syn queue with openreqs with exponentially increasing + * timeout. + */ + if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1) + goto drop; + + req = reqsk_alloc(sk->sk_prot->rsk_prot); + if (req == NULL) + goto drop; + + /* FIXME: process options */ + + dccp_openreq_init(req, &dp, skb); + + ireq = inet_rsk(req); + ireq->loc_addr = daddr; + ireq->rmt_addr = saddr; + /* FIXME: Merge Aristeu's option parsing code when ready */ + req->rcv_wnd = 100; /* Fake, option parsing will get the right value */ + ireq->opt = NULL; + + /* + * Step 3: Process LISTEN state + * + * Set S.ISR, S.GSR, S.SWL, S.SWH from packet or Init Cookie + * + * In fact we defer setting S.GSR, S.SWL, S.SWH to + * dccp_create_openreq_child. + */ + dreq = dccp_rsk(req); + dreq->dreq_isr = DCCP_SKB_CB(skb)->dccpd_seq; + dreq->dreq_iss = dccp_v4_init_sequence(sk, skb); + dreq->dreq_service = dccp_hdr_request(skb)->dccph_req_service; + + if (dccp_v4_send_response(sk, req, dst)) + goto drop_and_free; + + inet_csk_reqsk_queue_hash_add(sk, req, DCCP_TIMEOUT_INIT); + return 0; + +drop_and_free: + /* + * FIXME: should be reqsk_free after implementing req->rsk_ops + */ + __reqsk_free(req); +drop: + DCCP_INC_STATS_BH(DCCP_MIB_ATTEMPTFAILS); + return -1; +} + +/* + * The three way handshake has completed - we got a valid ACK or DATAACK - + * now create the new socket. + * + * This is the equivalent of TCP's tcp_v4_syn_recv_sock + */ +struct sock *dccp_v4_request_recv_sock(struct sock *sk, struct sk_buff *skb, + struct request_sock *req, + struct dst_entry *dst) +{ + struct inet_request_sock *ireq; + struct inet_sock *newinet; + struct dccp_sock *newdp; + struct sock *newsk; + + if (sk_acceptq_is_full(sk)) + goto exit_overflow; + + if (dst == NULL && (dst = inet_csk_route_req(sk, req)) == NULL) + goto exit; + + newsk = dccp_create_openreq_child(sk, req, skb); + if (newsk == NULL) + goto exit; + + sk_setup_caps(newsk, dst); + + newdp = dccp_sk(newsk); + newinet = inet_sk(newsk); + ireq = inet_rsk(req); + newinet->daddr = ireq->rmt_addr; + newinet->rcv_saddr = ireq->loc_addr; + newinet->saddr = ireq->loc_addr; + newinet->opt = ireq->opt; + ireq->opt = NULL; + newinet->mc_index = inet_iif(skb); + newinet->mc_ttl = skb->nh.iph->ttl; + newinet->id = jiffies; + + dccp_sync_mss(newsk, dst_mtu(dst)); + + __inet_hash(&dccp_hashinfo, newsk, 0); + __inet_inherit_port(&dccp_hashinfo, sk, newsk); + + return newsk; + +exit_overflow: + NET_INC_STATS_BH(LINUX_MIB_LISTENOVERFLOWS); +exit: + NET_INC_STATS_BH(LINUX_MIB_LISTENDROPS); + dst_release(dst); + return NULL; +} + +static struct sock *dccp_v4_hnd_req(struct sock *sk, struct sk_buff *skb) +{ + const struct dccp_hdr *dh = dccp_hdr(skb); + const struct iphdr *iph = skb->nh.iph; + struct sock *nsk; + struct request_sock **prev; + /* Find possible connection requests. */ + struct request_sock *req = inet_csk_search_req(sk, &prev, + dh->dccph_sport, + iph->saddr, iph->daddr); + if (req != NULL) + return dccp_check_req(sk, skb, req, prev); + + nsk = __inet_lookup_established(&dccp_hashinfo, + iph->saddr, dh->dccph_sport, + iph->daddr, ntohs(dh->dccph_dport), + inet_iif(skb)); + if (nsk != NULL) { + if (nsk->sk_state != DCCP_TIME_WAIT) { + bh_lock_sock(nsk); + return nsk; + } + inet_twsk_put((struct inet_timewait_sock *)nsk); + return NULL; + } + + return sk; +} + +int dccp_v4_checksum(struct sk_buff *skb) +{ + struct dccp_hdr* dh = dccp_hdr(skb); + int checksum_len; + u32 tmp; + + if (dh->dccph_cscov == 0) + checksum_len = skb->len; + else { + checksum_len = (dh->dccph_cscov + dh->dccph_x) * sizeof(u32); + checksum_len = checksum_len < skb->len ? checksum_len : skb->len; + } + + tmp = csum_partial((unsigned char *)dh, checksum_len, 0); + return csum_fold(tmp); +} + +static int dccp_v4_verify_checksum(struct sk_buff *skb) +{ + struct dccp_hdr *th = dccp_hdr(skb); + const u16 remote_checksum = th->dccph_checksum; + u16 local_checksum; + + /* FIXME: don't mess with skb payload */ + th->dccph_checksum = 0; /* zero it for computation */ + + local_checksum = dccp_v4_checksum(skb); + + /* FIXME: don't mess with skb payload */ + th->dccph_checksum = remote_checksum; /* put it back */ + + return remote_checksum == local_checksum ? 0 : -1; +} + +static struct dst_entry* dccp_v4_route_skb(struct sock *sk, + struct sk_buff *skb) +{ + struct rtable *rt; + struct flowi fl = { .oif = ((struct rtable *)skb->dst)->rt_iif, + .nl_u = { .ip4_u = + { .daddr = skb->nh.iph->saddr, + .saddr = skb->nh.iph->daddr, + .tos = RT_CONN_FLAGS(sk) } }, + .proto = sk->sk_protocol, + .uli_u = { .ports = + { .sport = dccp_hdr(skb)->dccph_dport, + .dport = dccp_hdr(skb)->dccph_sport } } }; + + if (ip_route_output_flow(&rt, &fl, sk, 0)) { + IP_INC_STATS_BH(IPSTATS_MIB_OUTNOROUTES); + return NULL; + } + + return &rt->u.dst; +} + +void dccp_v4_ctl_send_reset(struct sk_buff *rxskb) +{ + int err; + struct dccp_hdr *rxdh = dccp_hdr(rxskb), *dh; + const int dccp_hdr_reset_len = sizeof(struct dccp_hdr) + + sizeof(struct dccp_hdr_ext) + + sizeof(struct dccp_hdr_reset); + struct sk_buff *skb; + struct dst_entry *dst; + + /* Never send a reset in response to a reset. */ + if (rxdh->dccph_type == DCCP_PKT_RESET) + return; + + if (((struct rtable *)rxskb->dst)->rt_type != RTN_LOCAL) + return; + + dst = dccp_v4_route_skb(dccp_ctl_socket->sk, rxskb); + if (dst == NULL) + return; + + skb = alloc_skb(MAX_DCCP_HEADER + 15, GFP_ATOMIC); + if (skb == NULL) + goto out; + + /* Reserve space for headers. */ + skb_reserve(skb, MAX_DCCP_HEADER); + skb->dst = dst_clone(dst); + + skb->h.raw = skb_push(skb, dccp_hdr_reset_len); + dh = dccp_hdr(skb); + memset(dh, 0, dccp_hdr_reset_len); + + /* Build DCCP header and checksum it. */ + dh->dccph_type = DCCP_PKT_RESET; + dh->dccph_sport = rxdh->dccph_dport; + dh->dccph_dport = rxdh->dccph_sport; + dh->dccph_doff = dccp_hdr_reset_len / 4; + dh->dccph_x = 1; + dccp_hdr_reset(skb)->dccph_reset_code = DCCP_SKB_CB(rxskb)->dccpd_reset_code; + + dccp_hdr_set_seq(dh, DCCP_SKB_CB(rxskb)->dccpd_ack_seq); + dccp_hdr_set_ack(dccp_hdr_ack_bits(skb), DCCP_SKB_CB(rxskb)->dccpd_seq); + + dh->dccph_checksum = dccp_v4_checksum(skb); + + bh_lock_sock(dccp_ctl_socket->sk); + err = ip_build_and_send_pkt(skb, dccp_ctl_socket->sk, + rxskb->nh.iph->daddr, rxskb->nh.iph->saddr, NULL); + bh_unlock_sock(dccp_ctl_socket->sk); + + if (err == NET_XMIT_CN || err == 0) { + DCCP_INC_STATS_BH(DCCP_MIB_OUTSEGS); + DCCP_INC_STATS_BH(DCCP_MIB_OUTRSTS); + } +out: + dst_release(dst); +} + +int dccp_v4_do_rcv(struct sock *sk, struct sk_buff *skb) +{ + struct dccp_hdr *dh = dccp_hdr(skb); + + if (sk->sk_state == DCCP_OPEN) { /* Fast path */ + if (dccp_rcv_established(sk, skb, dh, skb->len)) + goto reset; + return 0; + } + + /* + * Step 3: Process LISTEN state + * If S.state == LISTEN, + * If P.type == Request or P contains a valid Init Cookie option, + * * Must scan the packet's options to check for an Init + * Cookie. Only the Init Cookie is processed here, + * however; other options are processed in Step 8. This + * scan need only be performed if the endpoint uses Init + * Cookies * + * * Generate a new socket and switch to that socket * + * Set S := new socket for this port pair + * S.state = RESPOND + * Choose S.ISS (initial seqno) or set from Init Cookie + * Set S.ISR, S.GSR, S.SWL, S.SWH from packet or Init Cookie + * Continue with S.state == RESPOND + * * A Response packet will be generated in Step 11 * + * Otherwise, + * Generate Reset(No Connection) unless P.type == Reset + * Drop packet and return + * + * NOTE: the check for the packet types is done in dccp_rcv_state_process + */ + if (sk->sk_state == DCCP_LISTEN) { + struct sock *nsk = dccp_v4_hnd_req(sk, skb); + + if (nsk == NULL) + goto discard; + + if (nsk != sk) { + if (dccp_child_process(sk, nsk, skb)) + goto reset; + return 0; + } + } + + if (dccp_rcv_state_process(sk, skb, dh, skb->len)) + goto reset; + return 0; + +reset: + DCCP_SKB_CB(skb)->dccpd_reset_code = DCCP_RESET_CODE_NO_CONNECTION; + dccp_v4_ctl_send_reset(skb); +discard: + kfree_skb(skb); + return 0; +} + +static inline int dccp_invalid_packet(struct sk_buff *skb) +{ + const struct dccp_hdr *dh; + + if (skb->pkt_type != PACKET_HOST) + return 1; + + if (!pskb_may_pull(skb, sizeof(struct dccp_hdr))) { + dccp_pr_debug("pskb_may_pull failed\n"); + return 1; + } + + dh = dccp_hdr(skb); + + /* If the packet type is not understood, drop packet and return */ + if (dh->dccph_type >= DCCP_PKT_INVALID) { + dccp_pr_debug("invalid packet type\n"); + return 1; + } + + /* + * If P.Data Offset is too small for packet type, or too large for + * packet, drop packet and return + */ + if (dh->dccph_doff < dccp_hdr_len(skb) / sizeof(u32)) { + dccp_pr_debug("Offset(%u) too small 1\n", dh->dccph_doff); + return 1; + } + + if (!pskb_may_pull(skb, dh->dccph_doff * sizeof(u32))) { + dccp_pr_debug("P.Data Offset(%u) too small 2\n", dh->dccph_doff); + return 1; + } + + dh = dccp_hdr(skb); + + /* + * If P.type is not Data, Ack, or DataAck and P.X == 0 (the packet + * has short sequence numbers), drop packet and return + */ + if (dh->dccph_x == 0 && + dh->dccph_type != DCCP_PKT_DATA && + dh->dccph_type != DCCP_PKT_ACK && + dh->dccph_type != DCCP_PKT_DATAACK) { + dccp_pr_debug("P.type (%s) not Data, Ack nor DataAck and P.X == 0\n", + dccp_packet_name(dh->dccph_type)); + return 1; + } + + /* If the header checksum is incorrect, drop packet and return */ + if (dccp_v4_verify_checksum(skb) < 0) { + dccp_pr_debug("header checksum is incorrect\n"); + return 1; + } + + return 0; +} + +/* this is called when real data arrives */ +int dccp_v4_rcv(struct sk_buff *skb) +{ + const struct dccp_hdr *dh; + struct sock *sk; + int rc; + + /* Step 1: Check header basics: */ + + if (dccp_invalid_packet(skb)) + goto discard_it; + + dh = dccp_hdr(skb); +#if 0 + /* + * Use something like this to simulate some DATA/DATAACK loss to test + * dccp_ackpkts_add, you'll get something like this on a session that + * sends 10 DATA/DATAACK packets: + * + * dccp_ackpkts_print: 281473596467422 |0,0|3,0|0,0|3,0|0,0|3,0|0,0|3,0|0,1| + * + * 0, 0 means: DCCP_ACKPKTS_STATE_RECEIVED, RLE == just this packet + * 0, 1 means: DCCP_ACKPKTS_STATE_RECEIVED, RLE == two adjacent packets with the same state + * 3, 0 means: DCCP_ACKPKTS_STATE_NOT_RECEIVED, RLE == just this packet + * + * So... + * + * 281473596467422 was received + * 281473596467421 was not received + * 281473596467420 was received + * 281473596467419 was not received + * 281473596467418 was received + * 281473596467417 was not received + * 281473596467416 was received + * 281473596467415 was not received + * 281473596467414 was received + * 281473596467413 was received (this one was the 3way handshake RESPONSE) + * + */ + if (dh->dccph_type == DCCP_PKT_DATA || dh->dccph_type == DCCP_PKT_DATAACK) { + static int discard = 0; + + if (discard) { + discard = 0; + goto discard_it; + } + discard = 1; + } +#endif + DCCP_SKB_CB(skb)->dccpd_seq = dccp_hdr_seq(skb); + DCCP_SKB_CB(skb)->dccpd_type = dh->dccph_type; + + dccp_pr_debug("%8.8s " + "src=%u.%u.%u.%u@%-5d " + "dst=%u.%u.%u.%u@%-5d seq=%llu", + dccp_packet_name(dh->dccph_type), + NIPQUAD(skb->nh.iph->saddr), ntohs(dh->dccph_sport), + NIPQUAD(skb->nh.iph->daddr), ntohs(dh->dccph_dport), + DCCP_SKB_CB(skb)->dccpd_seq); + + if (dccp_packet_without_ack(skb)) { + DCCP_SKB_CB(skb)->dccpd_ack_seq = DCCP_PKT_WITHOUT_ACK_SEQ; + dccp_pr_debug_cat("\n"); + } else { + DCCP_SKB_CB(skb)->dccpd_ack_seq = dccp_hdr_ack_seq(skb); + dccp_pr_debug_cat(", ack=%llu\n", DCCP_SKB_CB(skb)->dccpd_ack_seq); + } + + /* Step 2: + * Look up flow ID in table and get corresponding socket */ + sk = __inet_lookup(&dccp_hashinfo, + skb->nh.iph->saddr, dh->dccph_sport, + skb->nh.iph->daddr, ntohs(dh->dccph_dport), + inet_iif(skb)); + + /* + * Step 2: + * If no socket ... + * Generate Reset(No Connection) unless P.type == Reset + * Drop packet and return + */ + if (sk == NULL) { + dccp_pr_debug("failed to look up flow ID in table and " + "get corresponding socket\n"); + goto no_dccp_socket; + } + + /* + * Step 2: + * ... or S.state == TIMEWAIT, + * Generate Reset(No Connection) unless P.type == Reset + * Drop packet and return + */ + + if (sk->sk_state == DCCP_TIME_WAIT) { + dccp_pr_debug("sk->sk_state == DCCP_TIME_WAIT: discard_and_relse\n"); + goto discard_and_relse; + } + + if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb)) { + dccp_pr_debug("xfrm4_policy_check failed\n"); + goto discard_and_relse; + } + + if (sk_filter(sk, skb, 0)) { + dccp_pr_debug("sk_filter failed\n"); + goto discard_and_relse; + } + + skb->dev = NULL; + + bh_lock_sock(sk); + rc = 0; + if (!sock_owned_by_user(sk)) + rc = dccp_v4_do_rcv(sk, skb); + else + sk_add_backlog(sk, skb); + bh_unlock_sock(sk); + + sock_put(sk); + return rc; + +no_dccp_socket: + if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) + goto discard_it; + /* + * Step 2: + * Generate Reset(No Connection) unless P.type == Reset + * Drop packet and return + */ + if (dh->dccph_type != DCCP_PKT_RESET) { + DCCP_SKB_CB(skb)->dccpd_reset_code = DCCP_RESET_CODE_NO_CONNECTION; + dccp_v4_ctl_send_reset(skb); + } + +discard_it: + /* Discard frame. */ + kfree_skb(skb); + return 0; + +discard_and_relse: + sock_put(sk); + goto discard_it; +} + +static int dccp_v4_init_sock(struct sock *sk) +{ + struct dccp_sock *dp = dccp_sk(sk); + static int dccp_ctl_socket_init = 1; + + dccp_options_init(&dp->dccps_options); + + if (dp->dccps_options.dccpo_send_ack_vector) { + dp->dccps_hc_rx_ackpkts = dccp_ackpkts_alloc(DCCP_MAX_ACK_VECTOR_LEN, + GFP_KERNEL); + + if (dp->dccps_hc_rx_ackpkts == NULL) + return -ENOMEM; + } + + /* + * FIXME: We're hardcoding the CCID, and doing this at this point makes + * the listening (master) sock get CCID control blocks, which is not + * necessary, but for now, to not mess with the test userspace apps, + * lets leave it here, later the real solution is to do this in a + * setsockopt(CCIDs-I-want/accept). -acme + */ + if (likely(!dccp_ctl_socket_init)) { + dp->dccps_hc_rx_ccid = ccid_init(dp->dccps_options.dccpo_ccid, sk); + dp->dccps_hc_tx_ccid = ccid_init(dp->dccps_options.dccpo_ccid, sk); + if (dp->dccps_hc_rx_ccid == NULL || + dp->dccps_hc_tx_ccid == NULL) { + ccid_exit(dp->dccps_hc_rx_ccid, sk); + ccid_exit(dp->dccps_hc_tx_ccid, sk); + dccp_ackpkts_free(dp->dccps_hc_rx_ackpkts); + dp->dccps_hc_rx_ackpkts = NULL; + dp->dccps_hc_rx_ccid = dp->dccps_hc_tx_ccid = NULL; + return -ENOMEM; + } + } else + dccp_ctl_socket_init = 0; + + dccp_init_xmit_timers(sk); + sk->sk_state = DCCP_CLOSED; + dp->dccps_mss_cache = 536; + dp->dccps_role = DCCP_ROLE_UNDEFINED; + + return 0; +} + +int dccp_v4_destroy_sock(struct sock *sk) +{ + struct dccp_sock *dp = dccp_sk(sk); + + /* + * DCCP doesn't use sk_qrite_queue, just sk_send_head + * for retransmissions + */ + if (sk->sk_send_head != NULL) { + kfree_skb(sk->sk_send_head); + sk->sk_send_head = NULL; + } + + /* Clean up a referenced DCCP bind bucket. */ + if (inet_csk(sk)->icsk_bind_hash != NULL) + inet_put_port(&dccp_hashinfo, sk); + + dccp_ackpkts_free(dp->dccps_hc_rx_ackpkts); + dp->dccps_hc_rx_ackpkts = NULL; + ccid_exit(dp->dccps_hc_rx_ccid, sk); + ccid_exit(dp->dccps_hc_tx_ccid, sk); + dp->dccps_hc_rx_ccid = dp->dccps_hc_tx_ccid = NULL; + + return 0; +} + +static void dccp_v4_reqsk_destructor(struct request_sock *req) +{ + kfree(inet_rsk(req)->opt); +} + +static struct request_sock_ops dccp_request_sock_ops = { + .family = PF_INET, + .obj_size = sizeof(struct dccp_request_sock), + .rtx_syn_ack = dccp_v4_send_response, + .send_ack = dccp_v4_reqsk_send_ack, + .destructor = dccp_v4_reqsk_destructor, + .send_reset = dccp_v4_ctl_send_reset, +}; + +struct proto dccp_v4_prot = { + .name = "DCCP", + .owner = THIS_MODULE, + .close = dccp_close, + .connect = dccp_v4_connect, + .disconnect = dccp_disconnect, + .ioctl = dccp_ioctl, + .init = dccp_v4_init_sock, + .setsockopt = dccp_setsockopt, + .getsockopt = dccp_getsockopt, + .sendmsg = dccp_sendmsg, + .recvmsg = dccp_recvmsg, + .backlog_rcv = dccp_v4_do_rcv, + .hash = dccp_v4_hash, + .unhash = dccp_v4_unhash, + .accept = inet_csk_accept, + .get_port = dccp_v4_get_port, + .shutdown = dccp_shutdown, + .destroy = dccp_v4_destroy_sock, + .orphan_count = &dccp_orphan_count, + .max_header = MAX_DCCP_HEADER, + .obj_size = sizeof(struct dccp_sock), + .rsk_prot = &dccp_request_sock_ops, + .twsk_obj_size = sizeof(struct inet_timewait_sock), /* FIXME! create dccp_timewait_sock */ +}; diff --git a/net/dccp/minisocks.c b/net/dccp/minisocks.c new file mode 100644 index 00000000000..810f0c293b8 --- /dev/null +++ b/net/dccp/minisocks.c @@ -0,0 +1,199 @@ +/* + * net/dccp/minisocks.c + * + * An implementation of the DCCP protocol + * Arnaldo Carvalho de Melo + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include +#include +#include +#include + +#include +#include +#include + +#include "ccid.h" +#include "dccp.h" + +void dccp_time_wait(struct sock *sk, int state, int timeo) +{ + /* FIXME: Implement */ + dccp_pr_debug("Want to help? Start here\n"); + dccp_set_state(sk, state); +} + +/* This is for handling early-kills of TIME_WAIT sockets. */ +void dccp_tw_deschedule(struct inet_timewait_sock *tw) +{ + dccp_pr_debug("Want to help? Start here\n"); + __inet_twsk_kill(tw, &dccp_hashinfo); +} + +struct sock *dccp_create_openreq_child(struct sock *sk, + const struct request_sock *req, + const struct sk_buff *skb) +{ + /* + * Step 3: Process LISTEN state + * + * // Generate a new socket and switch to that socket + * Set S := new socket for this port pair + */ + struct sock *newsk = inet_csk_clone(sk, req, GFP_ATOMIC); + + if (newsk != NULL) { + const struct dccp_request_sock *dreq = dccp_rsk(req); + struct inet_connection_sock *newicsk = inet_csk(sk); + struct dccp_sock *newdp = dccp_sk(newsk); + + newdp->dccps_hc_rx_ackpkts = NULL; + newdp->dccps_role = DCCP_ROLE_SERVER; + newicsk->icsk_rto = TCP_TIMEOUT_INIT; + + if (newdp->dccps_options.dccpo_send_ack_vector) { + newdp->dccps_hc_rx_ackpkts = dccp_ackpkts_alloc(DCCP_MAX_ACK_VECTOR_LEN, + GFP_ATOMIC); + /* + * XXX: We're using the same CCIDs set on the parent, i.e. sk_clone + * copied the master sock and left the CCID pointers for this child, + * that is why we do the __ccid_get calls. + */ + if (unlikely(newdp->dccps_hc_rx_ackpkts == NULL)) + goto out_free; + } + + if (unlikely(ccid_hc_rx_init(newdp->dccps_hc_rx_ccid, newsk) != 0 || + ccid_hc_tx_init(newdp->dccps_hc_tx_ccid, newsk) != 0)) { + dccp_ackpkts_free(newdp->dccps_hc_rx_ackpkts); + ccid_hc_rx_exit(newdp->dccps_hc_rx_ccid, newsk); + ccid_hc_tx_exit(newdp->dccps_hc_tx_ccid, newsk); +out_free: + /* It is still raw copy of parent, so invalidate + * destructor and make plain sk_free() */ + newsk->sk_destruct = NULL; + sk_free(newsk); + return NULL; + } + + __ccid_get(newdp->dccps_hc_rx_ccid); + __ccid_get(newdp->dccps_hc_tx_ccid); + + /* + * Step 3: Process LISTEN state + * + * Choose S.ISS (initial seqno) or set from Init Cookie + * Set S.ISR, S.GSR, S.SWL, S.SWH from packet or Init Cookie + */ + + /* See dccp_v4_conn_request */ + newdp->dccps_options.dccpo_sequence_window = req->rcv_wnd; + + newdp->dccps_gar = newdp->dccps_isr = dreq->dreq_isr; + dccp_update_gsr(newsk, dreq->dreq_isr); + + newdp->dccps_iss = dreq->dreq_iss; + dccp_update_gss(newsk, dreq->dreq_iss); + + dccp_init_xmit_timers(newsk); + + DCCP_INC_STATS_BH(DCCP_MIB_PASSIVEOPENS); + } + return newsk; +} + +/* + * Process an incoming packet for RESPOND sockets represented + * as an request_sock. + */ +struct sock *dccp_check_req(struct sock *sk, struct sk_buff *skb, + struct request_sock *req, + struct request_sock **prev) +{ + struct sock *child = NULL; + + /* Check for retransmitted REQUEST */ + if (dccp_hdr(skb)->dccph_type == DCCP_PKT_REQUEST) { + if (after48(DCCP_SKB_CB(skb)->dccpd_seq, dccp_rsk(req)->dreq_isr)) { + struct dccp_request_sock *dreq = dccp_rsk(req); + + dccp_pr_debug("Retransmitted REQUEST\n"); + /* Send another RESPONSE packet */ + dccp_set_seqno(&dreq->dreq_iss, dreq->dreq_iss + 1); + dccp_set_seqno(&dreq->dreq_isr, DCCP_SKB_CB(skb)->dccpd_seq); + req->rsk_ops->rtx_syn_ack(sk, req, NULL); + } + /* Network Duplicate, discard packet */ + return NULL; + } + + DCCP_SKB_CB(skb)->dccpd_reset_code = DCCP_RESET_CODE_PACKET_ERROR; + + if (dccp_hdr(skb)->dccph_type != DCCP_PKT_ACK && + dccp_hdr(skb)->dccph_type != DCCP_PKT_DATAACK) + goto drop; + + /* Invalid ACK */ + if (DCCP_SKB_CB(skb)->dccpd_ack_seq != dccp_rsk(req)->dreq_iss) { + dccp_pr_debug("Invalid ACK number: ack_seq=%llu, dreq_iss=%llu\n", + DCCP_SKB_CB(skb)->dccpd_ack_seq, dccp_rsk(req)->dreq_iss); + goto drop; + } + + child = dccp_v4_request_recv_sock(sk, skb, req, NULL); + if (child == NULL) + goto listen_overflow; + + /* FIXME: deal with options */ + + inet_csk_reqsk_queue_unlink(sk, req, prev); + inet_csk_reqsk_queue_removed(sk, req); + inet_csk_reqsk_queue_add(sk, req, child); +out: + return child; +listen_overflow: + dccp_pr_debug("listen_overflow!\n"); + DCCP_SKB_CB(skb)->dccpd_reset_code = DCCP_RESET_CODE_TOO_BUSY; +drop: + if (dccp_hdr(skb)->dccph_type != DCCP_PKT_RESET) + req->rsk_ops->send_reset(skb); + + inet_csk_reqsk_queue_drop(sk, req, prev); + goto out; +} + +/* + * Queue segment on the new socket if the new socket is active, + * otherwise we just shortcircuit this and continue with + * the new socket. + */ +int dccp_child_process(struct sock *parent, struct sock *child, + struct sk_buff *skb) +{ + int ret = 0; + const int state = child->sk_state; + + if (!sock_owned_by_user(child)) { + ret = dccp_rcv_state_process(child, skb, dccp_hdr(skb), skb->len); + + /* Wakeup parent, send SIGIO */ + if (state == DCCP_RESPOND && child->sk_state != state) + parent->sk_data_ready(parent, 0); + } else { + /* Alas, it is possible again, because we do lookup + * in main socket hash table and lock on listening + * socket does not protect us more. + */ + sk_add_backlog(child, skb); + } + + bh_unlock_sock(child); + sock_put(child); + return ret; +} diff --git a/net/dccp/options.c b/net/dccp/options.c new file mode 100644 index 00000000000..e1867767946 --- /dev/null +++ b/net/dccp/options.c @@ -0,0 +1,763 @@ +/* + * net/dccp/options.c + * + * An implementation of the DCCP protocol + * Aristeu Sergio Rozanski Filho + * Arnaldo Carvalho de Melo + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#include +#include +#include +#include +#include +#include + +#include "ccid.h" +#include "dccp.h" + +static void dccp_ackpkts_check_rcv_ackvector(struct dccp_ackpkts *ap, + struct sock *sk, + const u64 ackno, + const unsigned char len, + const unsigned char *vector); + +/* stores the default values for new connection. may be changed with sysctl */ +static const struct dccp_options dccpo_default_values = { + .dccpo_sequence_window = DCCPF_INITIAL_SEQUENCE_WINDOW, + .dccpo_ccid = DCCPF_INITIAL_CCID, + .dccpo_send_ack_vector = DCCPF_INITIAL_SEND_ACK_VECTOR, + .dccpo_send_ndp_count = DCCPF_INITIAL_SEND_NDP_COUNT, +}; + +void dccp_options_init(struct dccp_options *dccpo) +{ + memcpy(dccpo, &dccpo_default_values, sizeof(*dccpo)); +} + +static u32 dccp_decode_value_var(const unsigned char *bf, const u8 len) +{ + u32 value = 0; + + if (len > 3) + value += *bf++ << 24; + if (len > 2) + value += *bf++ << 16; + if (len > 1) + value += *bf++ << 8; + if (len > 0) + value += *bf; + + return value; +} + +int dccp_parse_options(struct sock *sk, struct sk_buff *skb) +{ + struct dccp_sock *dp = dccp_sk(sk); +#ifdef DCCP_DEBUG + const char *debug_prefix = dp->dccps_role == DCCP_ROLE_CLIENT ? "CLIENT rx opt: " : + "server rx opt: "; +#endif + const struct dccp_hdr *dh = dccp_hdr(skb); + const u8 pkt_type = DCCP_SKB_CB(skb)->dccpd_type; + unsigned char *options = (unsigned char *)dh + dccp_hdr_len(skb); + unsigned char *opt_ptr = options; + const unsigned char *opt_end = (unsigned char *)dh + (dh->dccph_doff * 4); + struct dccp_options_received *opt_recv = &dp->dccps_options_received; + unsigned char opt, len; + unsigned char *value; + + memset(opt_recv, 0, sizeof(*opt_recv)); + + while (opt_ptr != opt_end) { + opt = *opt_ptr++; + len = 0; + value = NULL; + + /* Check if this isn't a single byte option */ + if (opt > DCCPO_MAX_RESERVED) { + if (opt_ptr == opt_end) + goto out_invalid_option; + + len = *opt_ptr++; + if (len < 3) + goto out_invalid_option; + /* + * Remove the type and len fields, leaving + * just the value size + */ + len -= 2; + value = opt_ptr; + opt_ptr += len; + + if (opt_ptr > opt_end) + goto out_invalid_option; + } + + switch (opt) { + case DCCPO_PADDING: + break; + case DCCPO_NDP_COUNT: + if (len > 3) + goto out_invalid_option; + + opt_recv->dccpor_ndp = dccp_decode_value_var(value, len); + dccp_pr_debug("%sNDP count=%d\n", debug_prefix, opt_recv->dccpor_ndp); + break; + case DCCPO_ACK_VECTOR_0: + if (len > DCCP_MAX_ACK_VECTOR_LEN) + goto out_invalid_option; + + if (pkt_type == DCCP_PKT_DATA) + continue; + + opt_recv->dccpor_ack_vector_len = len; + opt_recv->dccpor_ack_vector_idx = value - options; + + dccp_pr_debug("%sACK vector 0, len=%d, ack_ackno=%llu\n", + debug_prefix, len, DCCP_SKB_CB(skb)->dccpd_ack_seq); + dccp_ackvector_print(DCCP_SKB_CB(skb)->dccpd_ack_seq, + value, len); + dccp_ackpkts_check_rcv_ackvector(dp->dccps_hc_rx_ackpkts, sk, + DCCP_SKB_CB(skb)->dccpd_ack_seq, + len, value); + break; + case DCCPO_TIMESTAMP: + if (len != 4) + goto out_invalid_option; + + opt_recv->dccpor_timestamp = ntohl(*(u32 *)value); + + dp->dccps_timestamp_echo = opt_recv->dccpor_timestamp; + dp->dccps_timestamp_time = jiffies; + + dccp_pr_debug("%sTIMESTAMP=%u, ackno=%llu\n", + debug_prefix, opt_recv->dccpor_timestamp, + DCCP_SKB_CB(skb)->dccpd_ack_seq); + break; + case DCCPO_TIMESTAMP_ECHO: + if (len < 4 || len > 8) + goto out_invalid_option; + + opt_recv->dccpor_timestamp_echo = ntohl(*(u32 *)value); + + dccp_pr_debug("%sTIMESTAMP_ECHO=%u, len=%d, ackno=%llu, diff=%u\n", + debug_prefix, opt_recv->dccpor_timestamp_echo, + len + 2, DCCP_SKB_CB(skb)->dccpd_ack_seq, + tcp_time_stamp - opt_recv->dccpor_timestamp_echo); + + opt_recv->dccpor_elapsed_time = dccp_decode_value_var(value + 4, len - 4); + dccp_pr_debug("%sTIMESTAMP_ECHO ELAPSED_TIME=%d\n", debug_prefix, + opt_recv->dccpor_elapsed_time); + break; + case DCCPO_ELAPSED_TIME: + if (len > 4) + goto out_invalid_option; + + if (pkt_type == DCCP_PKT_DATA) + continue; + opt_recv->dccpor_elapsed_time = dccp_decode_value_var(value, len); + dccp_pr_debug("%sELAPSED_TIME=%d\n", debug_prefix, + opt_recv->dccpor_elapsed_time); + break; + /* + * From draft-ietf-dccp-spec-11.txt: + * + * Option numbers 128 through 191 are for options sent from the HC- + * Sender to the HC-Receiver; option numbers 192 through 255 are for + * options sent from the HC-Receiver to the HC-Sender. + */ + case 128 ... 191: { + const u16 idx = value - options; + + if (ccid_hc_rx_parse_options(dp->dccps_hc_rx_ccid, sk, opt, len, idx, value) != 0) + goto out_invalid_option; + } + break; + case 192 ... 255: { + const u16 idx = value - options; + + if (ccid_hc_tx_parse_options(dp->dccps_hc_tx_ccid, sk, opt, len, idx, value) != 0) + goto out_invalid_option; + } + break; + default: + pr_info("DCCP(%p): option %d(len=%d) not implemented, ignoring\n", + sk, opt, len); + break; + } + } + + return 0; + +out_invalid_option: + DCCP_INC_STATS_BH(DCCP_MIB_INVALIDOPT); + DCCP_SKB_CB(skb)->dccpd_reset_code = DCCP_RESET_CODE_OPTION_ERROR; + pr_info("DCCP(%p): invalid option %d, len=%d\n", sk, opt, len); + return -1; +} + +static void dccp_encode_value_var(const u32 value, unsigned char *to, + const unsigned int len) +{ + if (len > 3) + *to++ = (value & 0xFF000000) >> 24; + if (len > 2) + *to++ = (value & 0xFF0000) >> 16; + if (len > 1) + *to++ = (value & 0xFF00) >> 8; + if (len > 0) + *to++ = (value & 0xFF); +} + +static inline int dccp_ndp_len(const int ndp) +{ + return likely(ndp <= 0xFF) ? 1 : ndp <= 0xFFFF ? 2 : 3; +} + +void dccp_insert_option(struct sock *sk, struct sk_buff *skb, + const unsigned char option, + const void *value, const unsigned char len) +{ + unsigned char *to; + + if (DCCP_SKB_CB(skb)->dccpd_opt_len + len + 2 > DCCP_MAX_OPT_LEN) { + LIMIT_NETDEBUG(pr_info("DCCP: packet too small to insert %d option!\n", option)); + return; + } + + DCCP_SKB_CB(skb)->dccpd_opt_len += len + 2; + + to = skb_push(skb, len + 2); + *to++ = option; + *to++ = len + 2; + + memcpy(to, value, len); +} + +EXPORT_SYMBOL_GPL(dccp_insert_option); + +static void dccp_insert_option_ndp(struct sock *sk, struct sk_buff *skb) +{ + struct dccp_sock *dp = dccp_sk(sk); + int ndp = dp->dccps_ndp_count; + + if (dccp_non_data_packet(skb)) + ++dp->dccps_ndp_count; + else + dp->dccps_ndp_count = 0; + + if (ndp > 0) { + unsigned char *ptr; + const int ndp_len = dccp_ndp_len(ndp); + const int len = ndp_len + 2; + + if (DCCP_SKB_CB(skb)->dccpd_opt_len + len > DCCP_MAX_OPT_LEN) + return; + + DCCP_SKB_CB(skb)->dccpd_opt_len += len; + + ptr = skb_push(skb, len); + *ptr++ = DCCPO_NDP_COUNT; + *ptr++ = len; + dccp_encode_value_var(ndp, ptr, ndp_len); + } +} + +static inline int dccp_elapsed_time_len(const u32 elapsed_time) +{ + return elapsed_time == 0 ? 0 : + elapsed_time <= 0xFF ? 1 : + elapsed_time <= 0xFFFF ? 2 : + elapsed_time <= 0xFFFFFF ? 3 : 4; +} + +void dccp_insert_option_elapsed_time(struct sock *sk, + struct sk_buff *skb, + u32 elapsed_time) +{ +#ifdef DCCP_DEBUG + struct dccp_sock *dp = dccp_sk(sk); + const char *debug_prefix = dp->dccps_role == DCCP_ROLE_CLIENT ? "CLIENT TX opt: " : + "server TX opt: "; +#endif + const int elapsed_time_len = dccp_elapsed_time_len(elapsed_time); + const int len = 2 + elapsed_time_len; + unsigned char *to; + + /* If elapsed_time == 0... */ + if (elapsed_time_len == 2) + return; + + if (DCCP_SKB_CB(skb)->dccpd_opt_len + len > DCCP_MAX_OPT_LEN) { + LIMIT_NETDEBUG(pr_info("DCCP: packet too small to insert elapsed time!\n")); + return; + } + + DCCP_SKB_CB(skb)->dccpd_opt_len += len; + + to = skb_push(skb, len); + *to++ = DCCPO_ELAPSED_TIME; + *to++ = len; + + dccp_encode_value_var(elapsed_time, to, elapsed_time_len); + + dccp_pr_debug("%sELAPSED_TIME=%u, len=%d, seqno=%llu\n", + debug_prefix, elapsed_time, + len, DCCP_SKB_CB(skb)->dccpd_seq); +} + +EXPORT_SYMBOL(dccp_insert_option_elapsed_time); + +static void dccp_insert_option_ack_vector(struct sock *sk, struct sk_buff *skb) +{ + struct dccp_sock *dp = dccp_sk(sk); +#ifdef DCCP_DEBUG + const char *debug_prefix = dp->dccps_role == DCCP_ROLE_CLIENT ? "CLIENT TX opt: " : + "server TX opt: "; +#endif + struct dccp_ackpkts *ap = dp->dccps_hc_rx_ackpkts; + int len = ap->dccpap_buf_vector_len + 2; + const u32 elapsed_time = jiffies_to_usecs(jiffies - ap->dccpap_time) / 10; + unsigned char *to, *from; + + if (elapsed_time != 0) + dccp_insert_option_elapsed_time(sk, skb, elapsed_time); + + if (DCCP_SKB_CB(skb)->dccpd_opt_len + len > DCCP_MAX_OPT_LEN) { + LIMIT_NETDEBUG(pr_info("DCCP: packet too small to insert ACK Vector!\n")); + return; + } + + /* + * XXX: now we have just one ack vector sent record, so + * we have to wait for it to be cleared. + * + * Of course this is not acceptable, but this is just for + * basic testing now. + */ + if (ap->dccpap_ack_seqno != DCCP_MAX_SEQNO + 1) + return; + + DCCP_SKB_CB(skb)->dccpd_opt_len += len; + + to = skb_push(skb, len); + *to++ = DCCPO_ACK_VECTOR_0; + *to++ = len; + + len = ap->dccpap_buf_vector_len; + from = ap->dccpap_buf + ap->dccpap_buf_head; + + /* Check if buf_head wraps */ + if (ap->dccpap_buf_head + len > ap->dccpap_buf_len) { + const unsigned int tailsize = ap->dccpap_buf_len - ap->dccpap_buf_head; + + memcpy(to, from, tailsize); + to += tailsize; + len -= tailsize; + from = ap->dccpap_buf; + } + + memcpy(to, from, len); + /* + * From draft-ietf-dccp-spec-11.txt: + * + * For each acknowledgement it sends, the HC-Receiver will add an + * acknowledgement record. ack_seqno will equal the HC-Receiver + * sequence number it used for the ack packet; ack_ptr will equal + * buf_head; ack_ackno will equal buf_ackno; and ack_nonce will equal + * buf_nonce. + * + * This implemention uses just one ack record for now. + */ + ap->dccpap_ack_seqno = DCCP_SKB_CB(skb)->dccpd_seq; + ap->dccpap_ack_ptr = ap->dccpap_buf_head; + ap->dccpap_ack_ackno = ap->dccpap_buf_ackno; + ap->dccpap_ack_nonce = ap->dccpap_buf_nonce; + ap->dccpap_ack_vector_len = ap->dccpap_buf_vector_len; + + dccp_pr_debug("%sACK Vector 0, len=%d, ack_seqno=%llu, ack_ackno=%llu\n", + debug_prefix, ap->dccpap_ack_vector_len, + ap->dccpap_ack_seqno, ap->dccpap_ack_ackno); +} + +static inline void dccp_insert_option_timestamp(struct sock *sk, struct sk_buff *skb) +{ + const u32 now = htonl(tcp_time_stamp); + dccp_insert_option(sk, skb, DCCPO_TIMESTAMP, &now, sizeof(now)); +} + +static void dccp_insert_option_timestamp_echo(struct sock *sk, struct sk_buff *skb) +{ + struct dccp_sock *dp = dccp_sk(sk); +#ifdef DCCP_DEBUG + const char *debug_prefix = dp->dccps_role == DCCP_ROLE_CLIENT ? "CLIENT TX opt: " : + "server TX opt: "; +#endif + u32 tstamp_echo; + const u32 elapsed_time = jiffies_to_usecs(jiffies - dp->dccps_timestamp_time) / 10; + const int elapsed_time_len = dccp_elapsed_time_len(elapsed_time); + const int len = 6 + elapsed_time_len; + unsigned char *to; + + if (DCCP_SKB_CB(skb)->dccpd_opt_len + len > DCCP_MAX_OPT_LEN) { + LIMIT_NETDEBUG(pr_info("DCCP: packet too small to insert timestamp echo!\n")); + return; + } + + DCCP_SKB_CB(skb)->dccpd_opt_len += len; + + to = skb_push(skb, len); + *to++ = DCCPO_TIMESTAMP_ECHO; + *to++ = len; + + tstamp_echo = htonl(dp->dccps_timestamp_echo); + memcpy(to, &tstamp_echo, 4); + to += 4; + dccp_encode_value_var(elapsed_time, to, elapsed_time_len); + + dccp_pr_debug("%sTIMESTAMP_ECHO=%u, len=%d, seqno=%llu\n", + debug_prefix, dp->dccps_timestamp_echo, + len, DCCP_SKB_CB(skb)->dccpd_seq); + + dp->dccps_timestamp_echo = 0; + dp->dccps_timestamp_time = 0; +} + +void dccp_insert_options(struct sock *sk, struct sk_buff *skb) +{ + struct dccp_sock *dp = dccp_sk(sk); + + DCCP_SKB_CB(skb)->dccpd_opt_len = 0; + + if (dp->dccps_options.dccpo_send_ndp_count) + dccp_insert_option_ndp(sk, skb); + + if (!dccp_packet_without_ack(skb)) { + if (dp->dccps_options.dccpo_send_ack_vector && + dp->dccps_hc_rx_ackpkts->dccpap_buf_ackno != DCCP_MAX_SEQNO + 1) + dccp_insert_option_ack_vector(sk, skb); + + dccp_insert_option_timestamp(sk, skb); + if (dp->dccps_timestamp_echo != 0) + dccp_insert_option_timestamp_echo(sk, skb); + } + + ccid_hc_rx_insert_options(dp->dccps_hc_rx_ccid, sk, skb); + ccid_hc_tx_insert_options(dp->dccps_hc_tx_ccid, sk, skb); + + /* XXX: insert other options when appropriate */ + + if (DCCP_SKB_CB(skb)->dccpd_opt_len != 0) { + /* The length of all options has to be a multiple of 4 */ + int padding = DCCP_SKB_CB(skb)->dccpd_opt_len % 4; + + if (padding != 0) { + padding = 4 - padding; + memset(skb_push(skb, padding), 0, padding); + DCCP_SKB_CB(skb)->dccpd_opt_len += padding; + } + } +} + +struct dccp_ackpkts *dccp_ackpkts_alloc(unsigned int len, int priority) +{ + struct dccp_ackpkts *ap = kmalloc(sizeof(*ap) + len, priority); + + if (ap != NULL) { +#ifdef DCCP_DEBUG + memset(ap->dccpap_buf, 0xFF, len); +#endif + ap->dccpap_buf_len = len; + ap->dccpap_buf_head = ap->dccpap_buf_tail = ap->dccpap_buf_len - 1; + ap->dccpap_buf_ackno = ap->dccpap_ack_ackno = ap->dccpap_ack_seqno = DCCP_MAX_SEQNO + 1; + ap->dccpap_buf_nonce = ap->dccpap_buf_nonce = 0; + ap->dccpap_ack_ptr = 0; + ap->dccpap_time = 0; + ap->dccpap_buf_vector_len = ap->dccpap_ack_vector_len = 0; + } + + return ap; +} + +void dccp_ackpkts_free(struct dccp_ackpkts *ap) +{ + if (ap != NULL) { +#ifdef DCCP_DEBUG + memset(ap, 0xFF, sizeof(*ap) + ap->dccpap_buf_len); +#endif + kfree(ap); + } +} + +static inline u8 dccp_ackpkts_state(const struct dccp_ackpkts *ap, + const unsigned int index) +{ + return ap->dccpap_buf[index] & DCCP_ACKPKTS_STATE_MASK; +} + +static inline u8 dccp_ackpkts_len(const struct dccp_ackpkts *ap, + const unsigned int index) +{ + return ap->dccpap_buf[index] & DCCP_ACKPKTS_LEN_MASK; +} + +/* + * If several packets are missing, the HC-Receiver may prefer to enter multiple + * bytes with run length 0, rather than a single byte with a larger run length; + * this simplifies table updates if one of the missing packets arrives. + */ +static inline int dccp_ackpkts_set_buf_head_state(struct dccp_ackpkts *ap, + const unsigned int packets, + const unsigned char state) +{ + unsigned int gap; + signed long new_head; + + if (ap->dccpap_buf_vector_len + packets > ap->dccpap_buf_len) + return -ENOBUFS; + + gap = packets - 1; + new_head = ap->dccpap_buf_head - packets; + + if (new_head < 0) { + if (gap > 0) { + memset(ap->dccpap_buf, DCCP_ACKPKTS_STATE_NOT_RECEIVED, + gap + new_head + 1); + gap = -new_head; + } + new_head += ap->dccpap_buf_len; + } + + ap->dccpap_buf_head = new_head; + + if (gap > 0) + memset(ap->dccpap_buf + ap->dccpap_buf_head + 1, + DCCP_ACKPKTS_STATE_NOT_RECEIVED, gap); + + ap->dccpap_buf[ap->dccpap_buf_head] = state; + ap->dccpap_buf_vector_len += packets; + return 0; +} + +/* + * Implements the draft-ietf-dccp-spec-11.txt Appendix A + */ +int dccp_ackpkts_add(struct dccp_ackpkts *ap, u64 ackno, u8 state) +{ + /* + * Check at the right places if the buffer is full, if it is, tell the + * caller to start dropping packets till the HC-Sender acks our ACK + * vectors, when we will free up space in dccpap_buf. + * + * We may well decide to do buffer compression, etc, but for now lets + * just drop. + * + * From Appendix A: + * + * Of course, the circular buffer may overflow, either when the HC- + * Sender is sending data at a very high rate, when the HC-Receiver's + * acknowledgements are not reaching the HC-Sender, or when the HC- + * Sender is forgetting to acknowledge those acks (so the HC-Receiver + * is unable to clean up old state). In this case, the HC-Receiver + * should either compress the buffer (by increasing run lengths when + * possible), transfer its state to a larger buffer, or, as a last + * resort, drop all received packets, without processing them + * whatsoever, until its buffer shrinks again. + */ + + /* See if this is the first ackno being inserted */ + if (ap->dccpap_buf_vector_len == 0) { + ap->dccpap_buf[ap->dccpap_buf_head] = state; + ap->dccpap_buf_vector_len = 1; + } else if (after48(ackno, ap->dccpap_buf_ackno)) { + const u64 delta = dccp_delta_seqno(ap->dccpap_buf_ackno, ackno); + + /* + * Look if the state of this packet is the same as the previous ackno + * and if so if we can bump the head len. + */ + if (delta == 1 && + dccp_ackpkts_state(ap, ap->dccpap_buf_head) == state && + dccp_ackpkts_len(ap, ap->dccpap_buf_head) < DCCP_ACKPKTS_LEN_MASK) + ap->dccpap_buf[ap->dccpap_buf_head]++; + else if (dccp_ackpkts_set_buf_head_state(ap, delta, state)) + return -ENOBUFS; + } else { + /* + * A.1.2. Old Packets + * + * When a packet with Sequence Number S arrives, and S <= buf_ackno, + * the HC-Receiver will scan the table for the byte corresponding to S. + * (Indexing structures could reduce the complexity of this scan.) + */ + u64 delta = dccp_delta_seqno(ackno, ap->dccpap_buf_ackno); + unsigned int index = ap->dccpap_buf_head; + + while (1) { + const u8 len = dccp_ackpkts_len(ap, index); + const u8 state = dccp_ackpkts_state(ap, index); + /* + * valid packets not yet in dccpap_buf have a reserved entry, with + * a len equal to 0 + */ + if (state == DCCP_ACKPKTS_STATE_NOT_RECEIVED && + len == 0 && delta == 0) { /* Found our reserved seat! */ + dccp_pr_debug("Found %llu reserved seat!\n", ackno); + ap->dccpap_buf[index] = state; + goto out; + } + /* len == 0 means one packet */ + if (delta < len + 1) + goto out_duplicate; + + delta -= len + 1; + if (++index == ap->dccpap_buf_len) + index = 0; + } + } + + ap->dccpap_buf_ackno = ackno; + ap->dccpap_time = jiffies; +out: + dccp_pr_debug(""); + dccp_ackpkts_print(ap); + return 0; + +out_duplicate: + /* Duplicate packet */ + dccp_pr_debug("Received a dup or already considered lost packet: %llu\n", ackno); + return -EILSEQ; +} + +#ifdef DCCP_DEBUG +void dccp_ackvector_print(const u64 ackno, const unsigned char *vector, int len) +{ + if (!dccp_debug) + return; + + printk("ACK vector len=%d, ackno=%llu |", len, ackno); + + while (len--) { + const u8 state = (*vector & DCCP_ACKPKTS_STATE_MASK) >> 6; + const u8 rl = (*vector & DCCP_ACKPKTS_LEN_MASK); + + printk("%d,%d|", state, rl); + ++vector; + } + + printk("\n"); +} + +void dccp_ackpkts_print(const struct dccp_ackpkts *ap) +{ + dccp_ackvector_print(ap->dccpap_buf_ackno, + ap->dccpap_buf + ap->dccpap_buf_head, + ap->dccpap_buf_vector_len); +} +#endif + +static void dccp_ackpkts_trow_away_ack_record(struct dccp_ackpkts *ap) +{ + /* + * As we're keeping track of the ack vector size + * (dccpap_buf_vector_len) and the sent ack vector size + * (dccpap_ack_vector_len) we don't need dccpap_buf_tail at all, but + * keep this code here as in the future we'll implement a vector of ack + * records, as suggested in draft-ietf-dccp-spec-11.txt Appendix A. -acme + */ +#if 0 + ap->dccpap_buf_tail = ap->dccpap_ack_ptr + 1; + if (ap->dccpap_buf_tail >= ap->dccpap_buf_len) + ap->dccpap_buf_tail -= ap->dccpap_buf_len; +#endif + ap->dccpap_buf_vector_len -= ap->dccpap_ack_vector_len; +} + +void dccp_ackpkts_check_rcv_ackno(struct dccp_ackpkts *ap, struct sock *sk, + u64 ackno) +{ + /* Check if we actually sent an ACK vector */ + if (ap->dccpap_ack_seqno == DCCP_MAX_SEQNO + 1) + return; + + if (ackno == ap->dccpap_ack_seqno) { +#ifdef DCCP_DEBUG + struct dccp_sock *dp = dccp_sk(sk); + const char *debug_prefix = dp->dccps_role == DCCP_ROLE_CLIENT ? "CLIENT rx ack: " : + "server rx ack: "; +#endif + dccp_pr_debug("%sACK packet 0, len=%d, ack_seqno=%llu, ack_ackno=%llu, ACKED!\n", + debug_prefix, 1, + ap->dccpap_ack_seqno, ap->dccpap_ack_ackno); + dccp_ackpkts_trow_away_ack_record(ap); + ap->dccpap_ack_seqno = DCCP_MAX_SEQNO + 1; + } +} + +static void dccp_ackpkts_check_rcv_ackvector(struct dccp_ackpkts *ap, + struct sock *sk, u64 ackno, + const unsigned char len, + const unsigned char *vector) +{ + unsigned char i; + + /* Check if we actually sent an ACK vector */ + if (ap->dccpap_ack_seqno == DCCP_MAX_SEQNO + 1) + return; + /* + * We're in the receiver half connection, so if the received an ACK vector + * ackno (e.g. 50) before dccpap_ack_seqno (e.g. 52), we're not interested. + * + * Extra explanation with example: + * + * if we received an ACK vector with ackno 50, it can only be acking + * 50, 49, 48, etc, not 52 (the seqno for the ACK vector we sent). + */ + // dccp_pr_debug("is %llu < %llu? ", ackno, ap->dccpap_ack_seqno); + if (before48(ackno, ap->dccpap_ack_seqno)) { + // dccp_pr_debug_cat("yes\n"); + return; + } + // dccp_pr_debug_cat("no\n"); + + i = len; + while (i--) { + const u8 rl = (*vector & DCCP_ACKPKTS_LEN_MASK); + u64 ackno_end_rl; + + dccp_set_seqno(&ackno_end_rl, ackno - rl); + + // dccp_pr_debug("is %llu <= %llu <= %llu? ", ackno_end_rl, ap->dccpap_ack_seqno, ackno); + if (between48(ap->dccpap_ack_seqno, ackno_end_rl, ackno)) { + const u8 state = (*vector & DCCP_ACKPKTS_STATE_MASK) >> 6; + // dccp_pr_debug_cat("yes\n"); + + if (state != DCCP_ACKPKTS_STATE_NOT_RECEIVED) { +#ifdef DCCP_DEBUG + struct dccp_sock *dp = dccp_sk(sk); + const char *debug_prefix = dp->dccps_role == DCCP_ROLE_CLIENT ? "CLIENT rx ack: " : + "server rx ack: "; +#endif + dccp_pr_debug("%sACK vector 0, len=%d, ack_seqno=%llu, ack_ackno=%llu, ACKED!\n", + debug_prefix, len, + ap->dccpap_ack_seqno, ap->dccpap_ack_ackno); + dccp_ackpkts_trow_away_ack_record(ap); + } + /* + * If dccpap_ack_seqno was not received, no problem we'll + * send another ACK vector. + */ + ap->dccpap_ack_seqno = DCCP_MAX_SEQNO + 1; + break; + } + // dccp_pr_debug_cat("no\n"); + + dccp_set_seqno(&ackno, ackno_end_rl - 1); + ++vector; + } +} diff --git a/net/dccp/output.c b/net/dccp/output.c new file mode 100644 index 00000000000..22ca2910d4f --- /dev/null +++ b/net/dccp/output.c @@ -0,0 +1,406 @@ +/* + * net/dccp/output.c + * + * An implementation of the DCCP protocol + * Arnaldo Carvalho de Melo + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include +#include +#include + +#include + +#include "ccid.h" +#include "dccp.h" + +static inline void dccp_event_ack_sent(struct sock *sk) +{ + inet_csk_clear_xmit_timer(sk, ICSK_TIME_DACK); +} + +/* + * All SKB's seen here are completely headerless. It is our + * job to build the DCCP header, and pass the packet down to + * IP so it can do the same plus pass the packet off to the + * device. + */ +int dccp_transmit_skb(struct sock *sk, struct sk_buff *skb) +{ + if (likely(skb != NULL)) { + const struct inet_sock *inet = inet_sk(sk); + struct dccp_sock *dp = dccp_sk(sk); + struct dccp_skb_cb *dcb = DCCP_SKB_CB(skb); + struct dccp_hdr *dh; + /* XXX For now we're using only 48 bits sequence numbers */ + const int dccp_header_size = sizeof(*dh) + + sizeof(struct dccp_hdr_ext) + + dccp_packet_hdr_len(dcb->dccpd_type); + int err, set_ack = 1; + u64 ackno = dp->dccps_gsr; + + /* + * FIXME: study DCCP_PKT_SYNC[ACK] to see what is the right thing + * to do here... + */ + dccp_inc_seqno(&dp->dccps_gss); + + dcb->dccpd_seq = dp->dccps_gss; + dccp_insert_options(sk, skb); + + switch (dcb->dccpd_type) { + case DCCP_PKT_DATA: + set_ack = 0; + break; + case DCCP_PKT_SYNC: + case DCCP_PKT_SYNCACK: + ackno = dcb->dccpd_seq; + break; + } + + skb->h.raw = skb_push(skb, dccp_header_size); + dh = dccp_hdr(skb); + /* Data packets are not cloned as they are never retransmitted */ + if (skb_cloned(skb)) + skb_set_owner_w(skb, sk); + + /* Build DCCP header and checksum it. */ + memset(dh, 0, dccp_header_size); + dh->dccph_type = dcb->dccpd_type; + dh->dccph_sport = inet->sport; + dh->dccph_dport = inet->dport; + dh->dccph_doff = (dccp_header_size + dcb->dccpd_opt_len) / 4; + dh->dccph_ccval = dcb->dccpd_ccval; + /* XXX For now we're using only 48 bits sequence numbers */ + dh->dccph_x = 1; + + dp->dccps_awh = dp->dccps_gss; + dccp_hdr_set_seq(dh, dp->dccps_gss); + if (set_ack) + dccp_hdr_set_ack(dccp_hdr_ack_bits(skb), ackno); + + switch (dcb->dccpd_type) { + case DCCP_PKT_REQUEST: + dccp_hdr_request(skb)->dccph_req_service = dcb->dccpd_service; + break; + case DCCP_PKT_RESET: + dccp_hdr_reset(skb)->dccph_reset_code = dcb->dccpd_reset_code; + break; + } + + dh->dccph_checksum = dccp_v4_checksum(skb); + + if (dcb->dccpd_type == DCCP_PKT_ACK || + dcb->dccpd_type == DCCP_PKT_DATAACK) + dccp_event_ack_sent(sk); + + DCCP_INC_STATS(DCCP_MIB_OUTSEGS); + + err = ip_queue_xmit(skb, 0); + if (err <= 0) + return err; + + /* NET_XMIT_CN is special. It does not guarantee, + * that this packet is lost. It tells that device + * is about to start to drop packets or already + * drops some packets of the same priority and + * invokes us to send less aggressively. + */ + return err == NET_XMIT_CN ? 0 : err; + } + return -ENOBUFS; +} + +unsigned int dccp_sync_mss(struct sock *sk, u32 pmtu) +{ + struct dccp_sock *dp = dccp_sk(sk); + int mss_now; + + /* + * FIXME: we really should be using the af_specific thing to support IPv6. + * mss_now = pmtu - tp->af_specific->net_header_len - sizeof(struct dccp_hdr) - sizeof(struct dccp_hdr_ext); + */ + mss_now = pmtu - sizeof(struct iphdr) - sizeof(struct dccp_hdr) - sizeof(struct dccp_hdr_ext); + + /* Now subtract optional transport overhead */ + mss_now -= dp->dccps_ext_header_len; + + /* + * FIXME: this should come from the CCID infrastructure, where, say, + * TFRC will say it wants TIMESTAMPS, ELAPSED time, etc, for now lets + * put a rough estimate for NDP + TIMESTAMP + TIMESTAMP_ECHO + ELAPSED + * TIME + TFRC_OPT_LOSS_EVENT_RATE + TFRC_OPT_RECEIVE_RATE + padding to + * make it a multiple of 4 + */ + + mss_now -= ((5 + 6 + 10 + 6 + 6 + 6 + 3) / 4) * 4; + + /* And store cached results */ + dp->dccps_pmtu_cookie = pmtu; + dp->dccps_mss_cache = mss_now; + + return mss_now; +} + +int dccp_retransmit_skb(struct sock *sk, struct sk_buff *skb) +{ + if (inet_sk_rebuild_header(sk) != 0) + return -EHOSTUNREACH; /* Routing failure or similar. */ + + return dccp_transmit_skb(sk, (skb_cloned(skb) ? + pskb_copy(skb, GFP_ATOMIC): + skb_clone(skb, GFP_ATOMIC))); +} + +struct sk_buff *dccp_make_response(struct sock *sk, struct dst_entry *dst, + struct request_sock *req) +{ + struct dccp_hdr *dh; + const int dccp_header_size = sizeof(struct dccp_hdr) + + sizeof(struct dccp_hdr_ext) + + sizeof(struct dccp_hdr_response); + struct sk_buff *skb = sock_wmalloc(sk, MAX_HEADER + DCCP_MAX_OPT_LEN + + dccp_header_size, 1, + GFP_ATOMIC); + if (skb == NULL) + return NULL; + + /* Reserve space for headers. */ + skb_reserve(skb, MAX_HEADER + DCCP_MAX_OPT_LEN + dccp_header_size); + + skb->dst = dst_clone(dst); + skb->csum = 0; + + DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_RESPONSE; + DCCP_SKB_CB(skb)->dccpd_seq = dccp_rsk(req)->dreq_iss; + dccp_insert_options(sk, skb); + + skb->h.raw = skb_push(skb, dccp_header_size); + + dh = dccp_hdr(skb); + memset(dh, 0, dccp_header_size); + + dh->dccph_sport = inet_sk(sk)->sport; + dh->dccph_dport = inet_rsk(req)->rmt_port; + dh->dccph_doff = (dccp_header_size + DCCP_SKB_CB(skb)->dccpd_opt_len) / 4; + dh->dccph_type = DCCP_PKT_RESPONSE; + dh->dccph_x = 1; + dccp_hdr_set_seq(dh, dccp_rsk(req)->dreq_iss); + dccp_hdr_set_ack(dccp_hdr_ack_bits(skb), dccp_rsk(req)->dreq_isr); + + dh->dccph_checksum = dccp_v4_checksum(skb); + + DCCP_INC_STATS(DCCP_MIB_OUTSEGS); + return skb; +} + +struct sk_buff *dccp_make_reset(struct sock *sk, struct dst_entry *dst, + const enum dccp_reset_codes code) + +{ + struct dccp_hdr *dh; + struct dccp_sock *dp = dccp_sk(sk); + const int dccp_header_size = sizeof(struct dccp_hdr) + + sizeof(struct dccp_hdr_ext) + + sizeof(struct dccp_hdr_reset); + struct sk_buff *skb = sock_wmalloc(sk, MAX_HEADER + DCCP_MAX_OPT_LEN + + dccp_header_size, 1, + GFP_ATOMIC); + if (skb == NULL) + return NULL; + + /* Reserve space for headers. */ + skb_reserve(skb, MAX_HEADER + DCCP_MAX_OPT_LEN + dccp_header_size); + + skb->dst = dst_clone(dst); + skb->csum = 0; + + dccp_inc_seqno(&dp->dccps_gss); + + DCCP_SKB_CB(skb)->dccpd_reset_code = code; + DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_RESET; + DCCP_SKB_CB(skb)->dccpd_seq = dp->dccps_gss; + dccp_insert_options(sk, skb); + + skb->h.raw = skb_push(skb, dccp_header_size); + + dh = dccp_hdr(skb); + memset(dh, 0, dccp_header_size); + + dh->dccph_sport = inet_sk(sk)->sport; + dh->dccph_dport = inet_sk(sk)->dport; + dh->dccph_doff = (dccp_header_size + DCCP_SKB_CB(skb)->dccpd_opt_len) / 4; + dh->dccph_type = DCCP_PKT_RESET; + dh->dccph_x = 1; + dccp_hdr_set_seq(dh, dp->dccps_gss); + dccp_hdr_set_ack(dccp_hdr_ack_bits(skb), dp->dccps_gsr); + + dccp_hdr_reset(skb)->dccph_reset_code = code; + + dh->dccph_checksum = dccp_v4_checksum(skb); + + DCCP_INC_STATS(DCCP_MIB_OUTSEGS); + return skb; +} + +/* + * Do all connect socket setups that can be done AF independent. + */ +static inline void dccp_connect_init(struct sock *sk) +{ + struct dst_entry *dst = __sk_dst_get(sk); + struct inet_connection_sock *icsk = inet_csk(sk); + + sk->sk_err = 0; + sock_reset_flag(sk, SOCK_DONE); + + dccp_sync_mss(sk, dst_mtu(dst)); + + /* + * FIXME: set dp->{dccps_swh,dccps_swl}, with + * something like dccp_inc_seq + */ + + icsk->icsk_retransmits = 0; +} + +int dccp_connect(struct sock *sk) +{ + struct sk_buff *skb; + struct inet_connection_sock *icsk = inet_csk(sk); + + dccp_connect_init(sk); + + skb = alloc_skb(MAX_DCCP_HEADER + 15, sk->sk_allocation); + if (unlikely(skb == NULL)) + return -ENOBUFS; + + /* Reserve space for headers. */ + skb_reserve(skb, MAX_DCCP_HEADER); + + DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_REQUEST; + /* FIXME: set service to something meaningful, coming + * from userspace*/ + DCCP_SKB_CB(skb)->dccpd_service = 0; + skb->csum = 0; + skb_set_owner_w(skb, sk); + + BUG_TRAP(sk->sk_send_head == NULL); + sk->sk_send_head = skb; + dccp_transmit_skb(sk, skb_clone(skb, GFP_KERNEL)); + DCCP_INC_STATS(DCCP_MIB_ACTIVEOPENS); + + /* Timer for repeating the REQUEST until an answer. */ + inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, icsk->icsk_rto, TCP_RTO_MAX); + return 0; +} + +void dccp_send_ack(struct sock *sk) +{ + /* If we have been reset, we may not send again. */ + if (sk->sk_state != DCCP_CLOSED) { + struct sk_buff *skb = alloc_skb(MAX_DCCP_HEADER, GFP_ATOMIC); + + if (skb == NULL) { + inet_csk_schedule_ack(sk); + inet_csk(sk)->icsk_ack.ato = TCP_ATO_MIN; + inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK, TCP_DELACK_MAX, TCP_RTO_MAX); + return; + } + + /* Reserve space for headers */ + skb_reserve(skb, MAX_DCCP_HEADER); + skb->csum = 0; + DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_ACK; + skb_set_owner_w(skb, sk); + dccp_transmit_skb(sk, skb); + } +} + +EXPORT_SYMBOL_GPL(dccp_send_ack); + +void dccp_send_delayed_ack(struct sock *sk) +{ + struct inet_connection_sock *icsk = inet_csk(sk); + /* + * FIXME: tune this timer. elapsed time fixes the skew, so no problem + * with using 2s, and active senders also piggyback the ACK into a + * DATAACK packet, so this is really for quiescent senders. + */ + unsigned long timeout = jiffies + 2 * HZ; + + /* Use new timeout only if there wasn't a older one earlier. */ + if (icsk->icsk_ack.pending & ICSK_ACK_TIMER) { + /* If delack timer was blocked or is about to expire, + * send ACK now. + * + * FIXME: check the "about to expire" part + */ + if (icsk->icsk_ack.blocked) { + dccp_send_ack(sk); + return; + } + + if (!time_before(timeout, icsk->icsk_ack.timeout)) + timeout = icsk->icsk_ack.timeout; + } + icsk->icsk_ack.pending |= ICSK_ACK_SCHED | ICSK_ACK_TIMER; + icsk->icsk_ack.timeout = timeout; + sk_reset_timer(sk, &icsk->icsk_delack_timer, timeout); +} + +void dccp_send_sync(struct sock *sk, u64 seq) +{ + /* + * We are not putting this on the write queue, so + * dccp_transmit_skb() will set the ownership to this + * sock. + */ + struct sk_buff *skb = alloc_skb(MAX_DCCP_HEADER, GFP_ATOMIC); + + if (skb == NULL) + /* FIXME: how to make sure the sync is sent? */ + return; + + /* Reserve space for headers and prepare control bits. */ + skb_reserve(skb, MAX_DCCP_HEADER); + skb->csum = 0; + DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_SYNC; + DCCP_SKB_CB(skb)->dccpd_seq = seq; + + skb_set_owner_w(skb, sk); + dccp_transmit_skb(sk, skb); +} + +/* Send a DCCP_PKT_CLOSE/CLOSEREQ. The caller locks the socket for us. This cannot be + * allowed to fail queueing a DCCP_PKT_CLOSE/CLOSEREQ frame under any circumstances. + */ +void dccp_send_close(struct sock *sk) +{ + struct dccp_sock *dp = dccp_sk(sk); + struct sk_buff *skb; + + /* Socket is locked, keep trying until memory is available. */ + for (;;) { + skb = alloc_skb(sk->sk_prot->max_header, GFP_KERNEL); + if (skb != NULL) + break; + yield(); + } + + /* Reserve space for headers and prepare control bits. */ + skb_reserve(skb, sk->sk_prot->max_header); + skb->csum = 0; + DCCP_SKB_CB(skb)->dccpd_type = dp->dccps_role == DCCP_ROLE_CLIENT ? DCCP_PKT_CLOSE : DCCP_PKT_CLOSEREQ; + + skb_set_owner_w(skb, sk); + dccp_transmit_skb(sk, skb); + + ccid_hc_rx_exit(dp->dccps_hc_rx_ccid, sk); + ccid_hc_tx_exit(dp->dccps_hc_tx_ccid, sk); +} diff --git a/net/dccp/proto.c b/net/dccp/proto.c new file mode 100644 index 00000000000..70284e6afe0 --- /dev/null +++ b/net/dccp/proto.c @@ -0,0 +1,818 @@ +/* + * net/dccp/proto.c + * + * An implementation of the DCCP protocol + * Arnaldo Carvalho de Melo + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#include "ccid.h" +#include "dccp.h" + +DEFINE_SNMP_STAT(struct dccp_mib, dccp_statistics); + +atomic_t dccp_orphan_count = ATOMIC_INIT(0); + +static struct net_protocol dccp_protocol = { + .handler = dccp_v4_rcv, + .err_handler = dccp_v4_err, +}; + +const char *dccp_packet_name(const int type) +{ + static const char *dccp_packet_names[] = { + [DCCP_PKT_REQUEST] = "REQUEST", + [DCCP_PKT_RESPONSE] = "RESPONSE", + [DCCP_PKT_DATA] = "DATA", + [DCCP_PKT_ACK] = "ACK", + [DCCP_PKT_DATAACK] = "DATAACK", + [DCCP_PKT_CLOSEREQ] = "CLOSEREQ", + [DCCP_PKT_CLOSE] = "CLOSE", + [DCCP_PKT_RESET] = "RESET", + [DCCP_PKT_SYNC] = "SYNC", + [DCCP_PKT_SYNCACK] = "SYNCACK", + }; + + if (type >= DCCP_NR_PKT_TYPES) + return "INVALID"; + else + return dccp_packet_names[type]; +} + +EXPORT_SYMBOL_GPL(dccp_packet_name); + +const char *dccp_state_name(const int state) +{ + static char *dccp_state_names[] = { + [DCCP_OPEN] = "OPEN", + [DCCP_REQUESTING] = "REQUESTING", + [DCCP_PARTOPEN] = "PARTOPEN", + [DCCP_LISTEN] = "LISTEN", + [DCCP_RESPOND] = "RESPOND", + [DCCP_CLOSING] = "CLOSING", + [DCCP_TIME_WAIT] = "TIME_WAIT", + [DCCP_CLOSED] = "CLOSED", + }; + + if (state >= DCCP_MAX_STATES) + return "INVALID STATE!"; + else + return dccp_state_names[state]; +} + +EXPORT_SYMBOL_GPL(dccp_state_name); + +static inline int dccp_listen_start(struct sock *sk) +{ + dccp_sk(sk)->dccps_role = DCCP_ROLE_LISTEN; + return inet_csk_listen_start(sk, TCP_SYNQ_HSIZE); +} + +int dccp_disconnect(struct sock *sk, int flags) +{ + struct inet_connection_sock *icsk = inet_csk(sk); + struct inet_sock *inet = inet_sk(sk); + int err = 0; + const int old_state = sk->sk_state; + + if (old_state != DCCP_CLOSED) + dccp_set_state(sk, DCCP_CLOSED); + + /* ABORT function of RFC793 */ + if (old_state == DCCP_LISTEN) { + inet_csk_listen_stop(sk); + /* FIXME: do the active reset thing */ + } else if (old_state == DCCP_REQUESTING) + sk->sk_err = ECONNRESET; + + dccp_clear_xmit_timers(sk); + __skb_queue_purge(&sk->sk_receive_queue); + if (sk->sk_send_head != NULL) { + __kfree_skb(sk->sk_send_head); + sk->sk_send_head = NULL; + } + + inet->dport = 0; + + if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK)) + inet_reset_saddr(sk); + + sk->sk_shutdown = 0; + sock_reset_flag(sk, SOCK_DONE); + + icsk->icsk_backoff = 0; + inet_csk_delack_init(sk); + __sk_dst_reset(sk); + + BUG_TRAP(!inet->num || icsk->icsk_bind_hash); + + sk->sk_error_report(sk); + return err; +} + +int dccp_ioctl(struct sock *sk, int cmd, unsigned long arg) +{ + dccp_pr_debug("entry\n"); + return -ENOIOCTLCMD; +} + +int dccp_setsockopt(struct sock *sk, int level, int optname, + char *optval, int optlen) +{ + dccp_pr_debug("entry\n"); + + if (level != SOL_DCCP) + return ip_setsockopt(sk, level, optname, optval, optlen); + + return -EOPNOTSUPP; +} + +int dccp_getsockopt(struct sock *sk, int level, int optname, + char *optval, int *optlen) +{ + dccp_pr_debug("entry\n"); + + if (level != SOL_DCCP) + return ip_getsockopt(sk, level, optname, optval, optlen); + + return -EOPNOTSUPP; +} + +int dccp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, + size_t len) +{ + const struct dccp_sock *dp = dccp_sk(sk); + const int flags = msg->msg_flags; + const int noblock = flags & MSG_DONTWAIT; + struct sk_buff *skb; + int rc, size; + long timeo; + + if (len > dp->dccps_mss_cache) + return -EMSGSIZE; + + lock_sock(sk); + + timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT); + + /* + * We have to use sk_stream_wait_connect here to set sk_write_pending, + * so that the trick in dccp_rcv_request_sent_state_process. + */ + /* Wait for a connection to finish. */ + if ((1 << sk->sk_state) & ~(DCCPF_OPEN | DCCPF_PARTOPEN | DCCPF_CLOSING)) + if ((rc = sk_stream_wait_connect(sk, &timeo)) != 0) + goto out_err; + + size = sk->sk_prot->max_header + len; + release_sock(sk); + skb = sock_alloc_send_skb(sk, size, noblock, &rc); + lock_sock(sk); + + if (skb == NULL) + goto out_release; + + skb_reserve(skb, sk->sk_prot->max_header); + rc = memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len); + if (rc == 0) { + struct dccp_skb_cb *dcb = DCCP_SKB_CB(skb); + const struct dccp_ackpkts *ap = dp->dccps_hc_rx_ackpkts; + long delay; + + /* + * XXX: This is just to match the Waikato tree CA interaction + * points, after the CCID3 code is stable and I have a better + * understanding of behaviour I'll change this to look more like + * TCP. + */ + while (1) { + rc = ccid_hc_tx_send_packet(dp->dccps_hc_tx_ccid, sk, + skb, len, &delay); + if (rc == 0) + break; + if (rc != -EAGAIN) + goto out_discard; + if (delay > timeo) + goto out_discard; + release_sock(sk); + delay = schedule_timeout(delay); + lock_sock(sk); + timeo -= delay; + if (signal_pending(current)) + goto out_interrupted; + rc = -EPIPE; + if (!(sk->sk_state == DCCP_PARTOPEN || sk->sk_state == DCCP_OPEN)) + goto out_discard; + } + + if (sk->sk_state == DCCP_PARTOPEN) { + /* See 8.1.5. Handshake Completion */ + inet_csk_schedule_ack(sk); + inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK, inet_csk(sk)->icsk_rto, TCP_RTO_MAX); + dcb->dccpd_type = DCCP_PKT_DATAACK; + /* FIXME: we really should have a dccps_ack_pending or use icsk */ + } else if (inet_csk_ack_scheduled(sk) || + (dp->dccps_options.dccpo_send_ack_vector && + ap->dccpap_buf_ackno != DCCP_MAX_SEQNO + 1 && + ap->dccpap_ack_seqno == DCCP_MAX_SEQNO + 1)) + dcb->dccpd_type = DCCP_PKT_DATAACK; + else + dcb->dccpd_type = DCCP_PKT_DATA; + dccp_transmit_skb(sk, skb); + ccid_hc_tx_packet_sent(dp->dccps_hc_tx_ccid, sk, 0, len); + } else { +out_discard: + kfree_skb(skb); + } +out_release: + release_sock(sk); + return rc ? : len; +out_err: + rc = sk_stream_error(sk, flags, rc); + goto out_release; +out_interrupted: + rc = sock_intr_errno(timeo); + goto out_discard; +} + +EXPORT_SYMBOL(dccp_sendmsg); + +int dccp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, + size_t len, int nonblock, int flags, int *addr_len) +{ + const struct dccp_hdr *dh; + int copied = 0; + unsigned long used; + int err; + int target; /* Read at least this many bytes */ + long timeo; + + lock_sock(sk); + + err = -ENOTCONN; + if (sk->sk_state == DCCP_LISTEN) + goto out; + + timeo = sock_rcvtimeo(sk, nonblock); + + /* Urgent data needs to be handled specially. */ + if (flags & MSG_OOB) + goto recv_urg; + + /* FIXME */ +#if 0 + seq = &tp->copied_seq; + if (flags & MSG_PEEK) { + peek_seq = tp->copied_seq; + seq = &peek_seq; + } +#endif + + target = sock_rcvlowat(sk, flags & MSG_WAITALL, len); + + do { + struct sk_buff *skb; + u32 offset; + + /* FIXME */ +#if 0 + /* Are we at urgent data? Stop if we have read anything or have SIGURG pending. */ + if (tp->urg_data && tp->urg_seq == *seq) { + if (copied) + break; + if (signal_pending(current)) { + copied = timeo ? sock_intr_errno(timeo) : -EAGAIN; + break; + } + } +#endif + + /* Next get a buffer. */ + + skb = skb_peek(&sk->sk_receive_queue); + do { + if (!skb) + break; + + offset = 0; + dh = dccp_hdr(skb); + + if (dh->dccph_type == DCCP_PKT_DATA || + dh->dccph_type == DCCP_PKT_DATAACK) + goto found_ok_skb; + + if (dh->dccph_type == DCCP_PKT_RESET || + dh->dccph_type == DCCP_PKT_CLOSE) { + dccp_pr_debug("found fin ok!\n"); + goto found_fin_ok; + } + dccp_pr_debug("packet_type=%s\n", dccp_packet_name(dh->dccph_type)); + BUG_TRAP(flags & MSG_PEEK); + skb = skb->next; + } while (skb != (struct sk_buff *)&sk->sk_receive_queue); + + /* Well, if we have backlog, try to process it now yet. */ + if (copied >= target && !sk->sk_backlog.tail) + break; + + if (copied) { + if (sk->sk_err || + sk->sk_state == DCCP_CLOSED || + (sk->sk_shutdown & RCV_SHUTDOWN) || + !timeo || + signal_pending(current) || + (flags & MSG_PEEK)) + break; + } else { + if (sock_flag(sk, SOCK_DONE)) + break; + + if (sk->sk_err) { + copied = sock_error(sk); + break; + } + + if (sk->sk_shutdown & RCV_SHUTDOWN) + break; + + if (sk->sk_state == DCCP_CLOSED) { + if (!sock_flag(sk, SOCK_DONE)) { + /* This occurs when user tries to read + * from never connected socket. + */ + copied = -ENOTCONN; + break; + } + break; + } + + if (!timeo) { + copied = -EAGAIN; + break; + } + + if (signal_pending(current)) { + copied = sock_intr_errno(timeo); + break; + } + } + + /* FIXME: cleanup_rbuf(sk, copied); */ + + if (copied >= target) { + /* Do not sleep, just process backlog. */ + release_sock(sk); + lock_sock(sk); + } else + sk_wait_data(sk, &timeo); + + continue; + + found_ok_skb: + /* Ok so how much can we use? */ + used = skb->len - offset; + if (len < used) + used = len; + + if (!(flags & MSG_TRUNC)) { + err = skb_copy_datagram_iovec(skb, offset, + msg->msg_iov, used); + if (err) { + /* Exception. Bailout! */ + if (!copied) + copied = -EFAULT; + break; + } + } + + copied += used; + len -= used; + + /* FIXME: tcp_rcv_space_adjust(sk); */ + +//skip_copy: + if (used + offset < skb->len) + continue; + + if (!(flags & MSG_PEEK)) + sk_eat_skb(sk, skb); + continue; + found_fin_ok: + if (!(flags & MSG_PEEK)) + sk_eat_skb(sk, skb); + break; + + } while (len > 0); + + /* According to UNIX98, msg_name/msg_namelen are ignored + * on connected socket. I was just happy when found this 8) --ANK + */ + + /* Clean up data we have read: This will do ACK frames. */ + /* FIXME: cleanup_rbuf(sk, copied); */ + + release_sock(sk); + return copied; + +out: + release_sock(sk); + return err; + +recv_urg: + /* FIXME: err = tcp_recv_urg(sk, timeo, msg, len, flags, addr_len); */ + goto out; +} + +static int inet_dccp_listen(struct socket *sock, int backlog) +{ + struct sock *sk = sock->sk; + unsigned char old_state; + int err; + + lock_sock(sk); + + err = -EINVAL; + if (sock->state != SS_UNCONNECTED || sock->type != SOCK_DCCP) + goto out; + + old_state = sk->sk_state; + if (!((1 << old_state) & (DCCPF_CLOSED | DCCPF_LISTEN))) + goto out; + + /* Really, if the socket is already in listen state + * we can only allow the backlog to be adjusted. + */ + if (old_state != DCCP_LISTEN) { + /* + * FIXME: here it probably should be sk->sk_prot->listen_start + * see tcp_listen_start + */ + err = dccp_listen_start(sk); + if (err) + goto out; + } + sk->sk_max_ack_backlog = backlog; + err = 0; + +out: + release_sock(sk); + return err; +} + +static const unsigned char dccp_new_state[] = { + /* current state: new state: action: */ + [0] = DCCP_CLOSED, + [DCCP_OPEN] = DCCP_CLOSING | DCCP_ACTION_FIN, + [DCCP_REQUESTING] = DCCP_CLOSED, + [DCCP_PARTOPEN] = DCCP_CLOSING | DCCP_ACTION_FIN, + [DCCP_LISTEN] = DCCP_CLOSED, + [DCCP_RESPOND] = DCCP_CLOSED, + [DCCP_CLOSING] = DCCP_CLOSED, + [DCCP_TIME_WAIT] = DCCP_CLOSED, + [DCCP_CLOSED] = DCCP_CLOSED, +}; + +static int dccp_close_state(struct sock *sk) +{ + const int next = dccp_new_state[sk->sk_state]; + const int ns = next & DCCP_STATE_MASK; + + if (ns != sk->sk_state) + dccp_set_state(sk, ns); + + return next & DCCP_ACTION_FIN; +} + +void dccp_close(struct sock *sk, long timeout) +{ + struct sk_buff *skb; + + lock_sock(sk); + + sk->sk_shutdown = SHUTDOWN_MASK; + + if (sk->sk_state == DCCP_LISTEN) { + dccp_set_state(sk, DCCP_CLOSED); + + /* Special case. */ + inet_csk_listen_stop(sk); + + goto adjudge_to_death; + } + + /* + * We need to flush the recv. buffs. We do this only on the + * descriptor close, not protocol-sourced closes, because the + *reader process may not have drained the data yet! + */ + /* FIXME: check for unread data */ + while ((skb = __skb_dequeue(&sk->sk_receive_queue)) != NULL) { + __kfree_skb(skb); + } + + if (sock_flag(sk, SOCK_LINGER) && !sk->sk_lingertime) { + /* Check zero linger _after_ checking for unread data. */ + sk->sk_prot->disconnect(sk, 0); + } else if (dccp_close_state(sk)) { + dccp_send_close(sk); + } + + sk_stream_wait_close(sk, timeout); + +adjudge_to_death: + release_sock(sk); + /* + * Now socket is owned by kernel and we acquire BH lock + * to finish close. No need to check for user refs. + */ + local_bh_disable(); + bh_lock_sock(sk); + BUG_TRAP(!sock_owned_by_user(sk)); + + sock_hold(sk); + sock_orphan(sk); + + if (sk->sk_state != DCCP_CLOSED) + dccp_set_state(sk, DCCP_CLOSED); + + atomic_inc(&dccp_orphan_count); + if (sk->sk_state == DCCP_CLOSED) + inet_csk_destroy_sock(sk); + + /* Otherwise, socket is reprieved until protocol close. */ + + bh_unlock_sock(sk); + local_bh_enable(); + sock_put(sk); +} + +void dccp_shutdown(struct sock *sk, int how) +{ + dccp_pr_debug("entry\n"); +} + +struct proto_ops inet_dccp_ops = { + .family = PF_INET, + .owner = THIS_MODULE, + .release = inet_release, + .bind = inet_bind, + .connect = inet_stream_connect, + .socketpair = sock_no_socketpair, + .accept = inet_accept, + .getname = inet_getname, + .poll = sock_no_poll, + .ioctl = inet_ioctl, + .listen = inet_dccp_listen, /* FIXME: work on inet_listen to rename it to sock_common_listen */ + .shutdown = inet_shutdown, + .setsockopt = sock_common_setsockopt, + .getsockopt = sock_common_getsockopt, + .sendmsg = inet_sendmsg, + .recvmsg = sock_common_recvmsg, + .mmap = sock_no_mmap, + .sendpage = sock_no_sendpage, +}; + +extern struct net_proto_family inet_family_ops; + +static struct inet_protosw dccp_v4_protosw = { + .type = SOCK_DCCP, + .protocol = IPPROTO_DCCP, + .prot = &dccp_v4_prot, + .ops = &inet_dccp_ops, + .capability = -1, + .no_check = 0, + .flags = 0, +}; + +/* + * This is the global socket data structure used for responding to + * the Out-of-the-blue (OOTB) packets. A control sock will be created + * for this socket at the initialization time. + */ +struct socket *dccp_ctl_socket; + +static char dccp_ctl_socket_err_msg[] __initdata = + KERN_ERR "DCCP: Failed to create the control socket.\n"; + +static int __init dccp_ctl_sock_init(void) +{ + int rc = sock_create_kern(PF_INET, SOCK_DCCP, IPPROTO_DCCP, + &dccp_ctl_socket); + if (rc < 0) + printk(dccp_ctl_socket_err_msg); + else { + dccp_ctl_socket->sk->sk_allocation = GFP_ATOMIC; + inet_sk(dccp_ctl_socket->sk)->uc_ttl = -1; + + /* Unhash it so that IP input processing does not even + * see it, we do not wish this socket to see incoming + * packets. + */ + dccp_ctl_socket->sk->sk_prot->unhash(dccp_ctl_socket->sk); + } + + return rc; +} + +static void __exit dccp_ctl_sock_exit(void) +{ + if (dccp_ctl_socket != NULL) + sock_release(dccp_ctl_socket); +} + +static int __init init_dccp_v4_mibs(void) +{ + int rc = -ENOMEM; + + dccp_statistics[0] = alloc_percpu(struct dccp_mib); + if (dccp_statistics[0] == NULL) + goto out; + + dccp_statistics[1] = alloc_percpu(struct dccp_mib); + if (dccp_statistics[1] == NULL) + goto out_free_one; + + rc = 0; +out: + return rc; +out_free_one: + free_percpu(dccp_statistics[0]); + dccp_statistics[0] = NULL; + goto out; + +} + +static int thash_entries; +module_param(thash_entries, int, 0444); +MODULE_PARM_DESC(thash_entries, "Number of ehash buckets"); + +int dccp_debug; +module_param(dccp_debug, int, 0444); +MODULE_PARM_DESC(dccp_debug, "Enable debug messages"); + +static int __init dccp_init(void) +{ + unsigned long goal; + int ehash_order, bhash_order, i; + int rc = proto_register(&dccp_v4_prot, 1); + + if (rc) + goto out; + + dccp_hashinfo.bind_bucket_cachep = kmem_cache_create("dccp_bind_bucket", + sizeof(struct inet_bind_bucket), + 0, SLAB_HWCACHE_ALIGN, + NULL, NULL); + if (!dccp_hashinfo.bind_bucket_cachep) + goto out_proto_unregister; + + /* + * Size and allocate the main established and bind bucket + * hash tables. + * + * The methodology is similar to that of the buffer cache. + */ + if (num_physpages >= (128 * 1024)) + goal = num_physpages >> (21 - PAGE_SHIFT); + else + goal = num_physpages >> (23 - PAGE_SHIFT); + + if (thash_entries) + goal = (thash_entries * sizeof(struct inet_ehash_bucket)) >> PAGE_SHIFT; + for (ehash_order = 0; (1UL << ehash_order) < goal; ehash_order++) + ; + do { + dccp_hashinfo.ehash_size = (1UL << ehash_order) * PAGE_SIZE / + sizeof(struct inet_ehash_bucket); + dccp_hashinfo.ehash_size >>= 1; + while (dccp_hashinfo.ehash_size & (dccp_hashinfo.ehash_size - 1)) + dccp_hashinfo.ehash_size--; + dccp_hashinfo.ehash = (struct inet_ehash_bucket *) + __get_free_pages(GFP_ATOMIC, ehash_order); + } while (!dccp_hashinfo.ehash && --ehash_order > 0); + + if (!dccp_hashinfo.ehash) { + printk(KERN_CRIT "Failed to allocate DCCP " + "established hash table\n"); + goto out_free_bind_bucket_cachep; + } + + for (i = 0; i < (dccp_hashinfo.ehash_size << 1); i++) { + rwlock_init(&dccp_hashinfo.ehash[i].lock); + INIT_HLIST_HEAD(&dccp_hashinfo.ehash[i].chain); + } + + bhash_order = ehash_order; + + do { + dccp_hashinfo.bhash_size = (1UL << bhash_order) * PAGE_SIZE / + sizeof(struct inet_bind_hashbucket); + if ((dccp_hashinfo.bhash_size > (64 * 1024)) && bhash_order > 0) + continue; + dccp_hashinfo.bhash = (struct inet_bind_hashbucket *) + __get_free_pages(GFP_ATOMIC, bhash_order); + } while (!dccp_hashinfo.bhash && --bhash_order >= 0); + + if (!dccp_hashinfo.bhash) { + printk(KERN_CRIT "Failed to allocate DCCP bind hash table\n"); + goto out_free_dccp_ehash; + } + + for (i = 0; i < dccp_hashinfo.bhash_size; i++) { + spin_lock_init(&dccp_hashinfo.bhash[i].lock); + INIT_HLIST_HEAD(&dccp_hashinfo.bhash[i].chain); + } + + if (init_dccp_v4_mibs()) + goto out_free_dccp_bhash; + + rc = -EAGAIN; + if (inet_add_protocol(&dccp_protocol, IPPROTO_DCCP)) + goto out_free_dccp_v4_mibs; + + inet_register_protosw(&dccp_v4_protosw); + + rc = dccp_ctl_sock_init(); + if (rc) + goto out_unregister_protosw; +out: + return rc; +out_unregister_protosw: + inet_unregister_protosw(&dccp_v4_protosw); + inet_del_protocol(&dccp_protocol, IPPROTO_DCCP); +out_free_dccp_v4_mibs: + free_percpu(dccp_statistics[0]); + free_percpu(dccp_statistics[1]); + dccp_statistics[0] = dccp_statistics[1] = NULL; +out_free_dccp_bhash: + free_pages((unsigned long)dccp_hashinfo.bhash, bhash_order); + dccp_hashinfo.bhash = NULL; +out_free_dccp_ehash: + free_pages((unsigned long)dccp_hashinfo.ehash, ehash_order); + dccp_hashinfo.ehash = NULL; +out_free_bind_bucket_cachep: + kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep); + dccp_hashinfo.bind_bucket_cachep = NULL; +out_proto_unregister: + proto_unregister(&dccp_v4_prot); + goto out; +} + +static const char dccp_del_proto_err_msg[] __exitdata = + KERN_ERR "can't remove dccp net_protocol\n"; + +static void __exit dccp_fini(void) +{ + dccp_ctl_sock_exit(); + + inet_unregister_protosw(&dccp_v4_protosw); + + if (inet_del_protocol(&dccp_protocol, IPPROTO_DCCP) < 0) + printk(dccp_del_proto_err_msg); + + /* Free the control endpoint. */ + sock_release(dccp_ctl_socket); + + proto_unregister(&dccp_v4_prot); + + kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep); +} + +module_init(dccp_init); +module_exit(dccp_fini); + +/* __stringify doesn't likes enums, so use SOCK_DCCP (6) value directly */ +MODULE_ALIAS("net-pf-" __stringify(PF_INET) "-6"); +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Arnaldo Carvalho de Melo "); +MODULE_DESCRIPTION("DCCP - Datagram Congestion Controlled Protocol"); diff --git a/net/dccp/timer.c b/net/dccp/timer.c new file mode 100644 index 00000000000..8c396ee01aa --- /dev/null +++ b/net/dccp/timer.c @@ -0,0 +1,249 @@ +/* + * net/dccp/timer.c + * + * An implementation of the DCCP protocol + * Arnaldo Carvalho de Melo + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include +#include +#include + +#include "dccp.h" + +static void dccp_write_timer(unsigned long data); +static void dccp_keepalive_timer(unsigned long data); +static void dccp_delack_timer(unsigned long data); + +void dccp_init_xmit_timers(struct sock *sk) +{ + inet_csk_init_xmit_timers(sk, &dccp_write_timer, &dccp_delack_timer, + &dccp_keepalive_timer); +} + +static void dccp_write_err(struct sock *sk) +{ + sk->sk_err = sk->sk_err_soft ? : ETIMEDOUT; + sk->sk_error_report(sk); + + dccp_v4_send_reset(sk, DCCP_RESET_CODE_ABORTED); + dccp_done(sk); + DCCP_INC_STATS_BH(DCCP_MIB_ABORTONTIMEOUT); +} + +/* A write timeout has occurred. Process the after effects. */ +static int dccp_write_timeout(struct sock *sk) +{ + const struct inet_connection_sock *icsk = inet_csk(sk); + int retry_until; + + if (sk->sk_state == DCCP_REQUESTING || sk->sk_state == DCCP_PARTOPEN) { + if (icsk->icsk_retransmits != 0) + dst_negative_advice(&sk->sk_dst_cache); + retry_until = icsk->icsk_syn_retries ? : /* FIXME! */ 3 /* FIXME! sysctl_tcp_syn_retries */; + } else { + if (icsk->icsk_retransmits >= /* FIXME! sysctl_tcp_retries1 */ 5 /* FIXME! */) { + /* NOTE. draft-ietf-tcpimpl-pmtud-01.txt requires pmtu black + hole detection. :-( + + It is place to make it. It is not made. I do not want + to make it. It is disguisting. It does not work in any + case. Let me to cite the same draft, which requires for + us to implement this: + + "The one security concern raised by this memo is that ICMP black holes + are often caused by over-zealous security administrators who block + all ICMP messages. It is vitally important that those who design and + deploy security systems understand the impact of strict filtering on + upper-layer protocols. The safest web site in the world is worthless + if most TCP implementations cannot transfer data from it. It would + be far nicer to have all of the black holes fixed rather than fixing + all of the TCP implementations." + + Golden words :-). + */ + + dst_negative_advice(&sk->sk_dst_cache); + } + + retry_until = /* FIXME! */ 15 /* FIXME! sysctl_tcp_retries2 */; + /* + * FIXME: see tcp_write_timout and tcp_out_of_resources + */ + } + + if (icsk->icsk_retransmits >= retry_until) { + /* Has it gone just too far? */ + dccp_write_err(sk); + return 1; + } + return 0; +} + +/* This is the same as tcp_delack_timer, sans prequeue & mem_reclaim stuff */ +static void dccp_delack_timer(unsigned long data) +{ + struct sock *sk = (struct sock *)data; + struct inet_connection_sock *icsk = inet_csk(sk); + + bh_lock_sock(sk); + if (sock_owned_by_user(sk)) { + /* Try again later. */ + icsk->icsk_ack.blocked = 1; + NET_INC_STATS_BH(LINUX_MIB_DELAYEDACKLOCKED); + sk_reset_timer(sk, &icsk->icsk_delack_timer, jiffies + TCP_DELACK_MIN); + goto out; + } + + if (sk->sk_state == DCCP_CLOSED || !(icsk->icsk_ack.pending & ICSK_ACK_TIMER)) + goto out; + if (time_after(icsk->icsk_ack.timeout, jiffies)) { + sk_reset_timer(sk, &icsk->icsk_delack_timer, icsk->icsk_ack.timeout); + goto out; + } + + icsk->icsk_ack.pending &= ~ICSK_ACK_TIMER; + + if (inet_csk_ack_scheduled(sk)) { + if (!icsk->icsk_ack.pingpong) { + /* Delayed ACK missed: inflate ATO. */ + icsk->icsk_ack.ato = min(icsk->icsk_ack.ato << 1, icsk->icsk_rto); + } else { + /* Delayed ACK missed: leave pingpong mode and + * deflate ATO. + */ + icsk->icsk_ack.pingpong = 0; + icsk->icsk_ack.ato = TCP_ATO_MIN; + } + dccp_send_ack(sk); + NET_INC_STATS_BH(LINUX_MIB_DELAYEDACKS); + } +out: + bh_unlock_sock(sk); + sock_put(sk); +} + +/* + * The DCCP retransmit timer. + */ +static void dccp_retransmit_timer(struct sock *sk) +{ + struct inet_connection_sock *icsk = inet_csk(sk); + + /* + * sk->sk_send_head has to have one skb with + * DCCP_SKB_CB(skb)->dccpd_type set to one of the retransmittable DCCP + * packet types (REQUEST, RESPONSE, the ACK in the 3way hanshake + * (PARTOPEN timer), etc). + */ + BUG_TRAP(sk->sk_send_head != NULL); + + /* + * More than than 4MSL (8 minutes) has passed, a RESET(aborted) was + * sent, no need to retransmit, this sock is dead. + */ + if (dccp_write_timeout(sk)) + goto out; + + /* + * We want to know the number of packets retransmitted, not the + * total number of retransmissions of clones of original packets. + */ + if (icsk->icsk_retransmits == 0) + DCCP_INC_STATS_BH(DCCP_MIB_TIMEOUTS); + + if (dccp_retransmit_skb(sk, sk->sk_send_head) < 0) { + /* + * Retransmission failed because of local congestion, + * do not backoff. + */ + if (icsk->icsk_retransmits == 0) + icsk->icsk_retransmits = 1; + inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, + min(icsk->icsk_rto, + TCP_RESOURCE_PROBE_INTERVAL), + TCP_RTO_MAX); + goto out; + } + + icsk->icsk_backoff++; + icsk->icsk_retransmits++; + + icsk->icsk_rto = min(icsk->icsk_rto << 1, DCCP_RTO_MAX); + inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, icsk->icsk_rto, TCP_RTO_MAX); + if (icsk->icsk_retransmits > 3 /* FIXME: sysctl_dccp_retries1 */) + __sk_dst_reset(sk); +out:; +} + +static void dccp_write_timer(unsigned long data) +{ + struct sock *sk = (struct sock *)data; + struct inet_connection_sock *icsk = inet_csk(sk); + int event = 0; + + bh_lock_sock(sk); + if (sock_owned_by_user(sk)) { + /* Try again later */ + sk_reset_timer(sk, &icsk->icsk_retransmit_timer, jiffies + (HZ / 20)); + goto out; + } + + if (sk->sk_state == DCCP_CLOSED || !icsk->icsk_pending) + goto out; + + if (time_after(icsk->icsk_timeout, jiffies)) { + sk_reset_timer(sk, &icsk->icsk_retransmit_timer, icsk->icsk_timeout); + goto out; + } + + event = icsk->icsk_pending; + icsk->icsk_pending = 0; + + switch (event) { + case ICSK_TIME_RETRANS: + dccp_retransmit_timer(sk); + break; + } +out: + bh_unlock_sock(sk); + sock_put(sk); +} + +/* + * Timer for listening sockets + */ +static void dccp_response_timer(struct sock *sk) +{ + struct inet_connection_sock *icsk = inet_csk(sk); + const int max_retries = icsk->icsk_syn_retries ? : TCP_SYNACK_RETRIES /* FIXME sysctl_tcp_synack_retries */; + + reqsk_queue_prune(&icsk->icsk_accept_queue, sk, TCP_SYNQ_INTERVAL, + DCCP_TIMEOUT_INIT, DCCP_RTO_MAX, max_retries); +} + +static void dccp_keepalive_timer(unsigned long data) +{ + struct sock *sk = (struct sock *)data; + + /* Only process if socket is not in use. */ + bh_lock_sock(sk); + if (sock_owned_by_user(sk)) { + /* Try again later. */ + inet_csk_reset_keepalive_timer(sk, HZ / 20); + goto out; + } + + if (sk->sk_state == DCCP_LISTEN) { + dccp_response_timer(sk); + goto out; + } +out: + bh_unlock_sock(sk); + sock_put(sk); +} -- cgit v1.2.3 From a019d6fe2b9da68ea4ba6cf3c4e86fc1dbf554c3 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 9 Aug 2005 20:15:09 -0700 Subject: [ICSK]: Move generalised functions from tcp to inet_connection_sock This also improves reqsk_queue_prune and renames it to inet_csk_reqsk_queue_prune, as it deals with both inet_connection_sock and inet_request_sock objects, not just with request_sock ones thus belonging to inet_request_sock. Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- net/dccp/timer.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) (limited to 'net/dccp') diff --git a/net/dccp/timer.c b/net/dccp/timer.c index 8c396ee01aa..9f1f1ab9e2b 100644 --- a/net/dccp/timer.c +++ b/net/dccp/timer.c @@ -220,11 +220,7 @@ out: */ static void dccp_response_timer(struct sock *sk) { - struct inet_connection_sock *icsk = inet_csk(sk); - const int max_retries = icsk->icsk_syn_retries ? : TCP_SYNACK_RETRIES /* FIXME sysctl_tcp_synack_retries */; - - reqsk_queue_prune(&icsk->icsk_accept_queue, sk, TCP_SYNQ_INTERVAL, - DCCP_TIMEOUT_INIT, DCCP_RTO_MAX, max_retries); + inet_csk_reqsk_queue_prune(sk, TCP_SYNQ_INTERVAL, DCCP_TIMEOUT_INIT, DCCP_RTO_MAX); } static void dccp_keepalive_timer(unsigned long data) -- cgit v1.2.3 From 95b81ef794278c835b321f6376b0522cd5df59b7 Mon Sep 17 00:00:00 2001 From: Yoshifumi Nishida Date: Tue, 9 Aug 2005 20:15:35 -0700 Subject: [DCCP]: Fix checksum routines Signed-off-by: Yoshifumi Nishida Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- net/dccp/dccp.h | 3 ++- net/dccp/ipv4.c | 38 ++++++++++++++++++++------------------ net/dccp/output.c | 9 ++++++--- 3 files changed, 28 insertions(+), 22 deletions(-) (limited to 'net/dccp') diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h index fb83454102c..55b690ab61a 100644 --- a/net/dccp/dccp.h +++ b/net/dccp/dccp.h @@ -242,7 +242,8 @@ extern int dccp_setsockopt(struct sock *sk, int level, int optname, char *optval, int optlen); extern void dccp_shutdown(struct sock *sk, int how); -extern int dccp_v4_checksum(struct sk_buff *skb); +extern int dccp_v4_checksum(const struct sk_buff *skb, + const u32 saddr, const u32 daddr); extern int dccp_v4_send_reset(struct sock *sk, enum dccp_reset_codes code); extern void dccp_send_close(struct sock *sk); diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index 083bacaecb3..7b90606ec10 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -802,9 +802,9 @@ static struct sock *dccp_v4_hnd_req(struct sock *sk, struct sk_buff *skb) return sk; } -int dccp_v4_checksum(struct sk_buff *skb) +int dccp_v4_checksum(const struct sk_buff *skb, const u32 saddr, const u32 daddr) { - struct dccp_hdr* dh = dccp_hdr(skb); + const struct dccp_hdr* dh = dccp_hdr(skb); int checksum_len; u32 tmp; @@ -816,24 +816,24 @@ int dccp_v4_checksum(struct sk_buff *skb) } tmp = csum_partial((unsigned char *)dh, checksum_len, 0); - return csum_fold(tmp); + return csum_tcpudp_magic(saddr, daddr, checksum_len, IPPROTO_DCCP, tmp); } -static int dccp_v4_verify_checksum(struct sk_buff *skb) +static int dccp_v4_verify_checksum(struct sk_buff *skb, + const u32 saddr, const u32 daddr) { - struct dccp_hdr *th = dccp_hdr(skb); - const u16 remote_checksum = th->dccph_checksum; - u16 local_checksum; - - /* FIXME: don't mess with skb payload */ - th->dccph_checksum = 0; /* zero it for computation */ - - local_checksum = dccp_v4_checksum(skb); - - /* FIXME: don't mess with skb payload */ - th->dccph_checksum = remote_checksum; /* put it back */ + struct dccp_hdr *dh = dccp_hdr(skb); + int checksum_len; + u32 tmp; - return remote_checksum == local_checksum ? 0 : -1; + if (dh->dccph_cscov == 0) + checksum_len = skb->len; + else { + checksum_len = (dh->dccph_cscov + dh->dccph_x) * sizeof(u32); + checksum_len = checksum_len < skb->len ? checksum_len : skb->len; + } + tmp = csum_partial((unsigned char *)dh, checksum_len, 0); + return csum_tcpudp_magic(saddr, daddr, checksum_len, IPPROTO_DCCP, tmp) == 0 ? 0 : -1; } static struct dst_entry* dccp_v4_route_skb(struct sock *sk, @@ -902,7 +902,8 @@ void dccp_v4_ctl_send_reset(struct sk_buff *rxskb) dccp_hdr_set_seq(dh, DCCP_SKB_CB(rxskb)->dccpd_ack_seq); dccp_hdr_set_ack(dccp_hdr_ack_bits(skb), DCCP_SKB_CB(rxskb)->dccpd_seq); - dh->dccph_checksum = dccp_v4_checksum(skb); + dh->dccph_checksum = dccp_v4_checksum(skb, rxskb->nh.iph->saddr, + rxskb->nh.iph->daddr); bh_lock_sock(dccp_ctl_socket->sk); err = ip_build_and_send_pkt(skb, dccp_ctl_socket->sk, @@ -1024,7 +1025,8 @@ static inline int dccp_invalid_packet(struct sk_buff *skb) } /* If the header checksum is incorrect, drop packet and return */ - if (dccp_v4_verify_checksum(skb) < 0) { + if (dccp_v4_verify_checksum(skb, skb->nh.iph->saddr, + skb->nh.iph->daddr) < 0) { dccp_pr_debug("header checksum is incorrect\n"); return 1; } diff --git a/net/dccp/output.c b/net/dccp/output.c index 22ca2910d4f..4945eaa9d1a 100644 --- a/net/dccp/output.c +++ b/net/dccp/output.c @@ -93,7 +93,8 @@ int dccp_transmit_skb(struct sock *sk, struct sk_buff *skb) break; } - dh->dccph_checksum = dccp_v4_checksum(skb); + dh->dccph_checksum = dccp_v4_checksum(skb, inet->saddr, + inet->daddr); if (dcb->dccpd_type == DCCP_PKT_ACK || dcb->dccpd_type == DCCP_PKT_DATAACK) @@ -193,7 +194,8 @@ struct sk_buff *dccp_make_response(struct sock *sk, struct dst_entry *dst, dccp_hdr_set_seq(dh, dccp_rsk(req)->dreq_iss); dccp_hdr_set_ack(dccp_hdr_ack_bits(skb), dccp_rsk(req)->dreq_isr); - dh->dccph_checksum = dccp_v4_checksum(skb); + dh->dccph_checksum = dccp_v4_checksum(skb, inet_rsk(req)->loc_addr, + inet_rsk(req)->rmt_addr); DCCP_INC_STATS(DCCP_MIB_OUTSEGS); return skb; @@ -242,7 +244,8 @@ struct sk_buff *dccp_make_reset(struct sock *sk, struct dst_entry *dst, dccp_hdr_reset(skb)->dccph_reset_code = code; - dh->dccph_checksum = dccp_v4_checksum(skb); + dh->dccph_checksum = dccp_v4_checksum(skb, inet_sk(sk)->saddr, + inet_sk(sk)->daddr); DCCP_INC_STATS(DCCP_MIB_OUTSEGS); return skb; -- cgit v1.2.3 From 757f612e091e7d13707eedc3ff71f1a9b53f5537 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 9 Aug 2005 20:16:04 -0700 Subject: [CCID3]: Reenable list_for_each_entry_safe_continue usage Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- net/dccp/ccids/ccid3.c | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) (limited to 'net/dccp') diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c index 4f45902cb55..04299c7565f 100644 --- a/net/dccp/ccids/ccid3.c +++ b/net/dccp/ccids/ccid3.c @@ -1272,13 +1272,10 @@ static void ccid3_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) ccid3_calc_new_delta(hctx); /* remove all packets older than the one acked from history */ -#if 0 - FIXME! list_for_each_entry_safe_continue(entry, next, &hctx->ccid3hctx_hist, ccid3htx_node) { list_del_init(&entry->ccid3htx_node); ccid3_tx_hist_entry_delete(entry); } -#endif if (hctx->ccid3hctx_x < 10) { ccid3_pr_debug("ccid3_hc_tx_packet_recv hctx->ccid3hctx_x < 10\n"); hctx->ccid3hctx_x = 10; @@ -1820,8 +1817,7 @@ static void ccid3_hc_rx_detect_loss(struct sock *sk) a_next = b_next; num_later = 1; -#if 0 - FIXME MERGE GIT! + list_for_each_entry_safe_continue(entry, a_next, &hcrx->ccid3hcrx_hist, ccid3hrx_node) { if (num_later == 0) { a_loss = entry; @@ -1830,7 +1826,6 @@ static void ccid3_hc_rx_detect_loss(struct sock *sk) entry->ccid3hrx_type == DCCP_PKT_DATAACK) --num_later; } -#endif if (a_loss == NULL) { if (list_empty(&hcrx->ccid3hcrx_loss_interval_hist)) { @@ -1848,8 +1843,6 @@ static void ccid3_hc_rx_detect_loss(struct sock *sk) /* Locate a lost data packet */ entry = packet = b_loss; -#if 0 - FIXME MERGE GIT! list_for_each_entry_safe_continue(entry, b_next, &hcrx->ccid3hcrx_hist, ccid3hrx_node) { u64 delta = dccp_delta_seqno(entry->ccid3hrx_seqno, packet->ccid3hrx_seqno); @@ -1875,7 +1868,6 @@ static void ccid3_hc_rx_detect_loss(struct sock *sk) if (packet == a_loss) break; } -#endif if (seq_loss != DCCP_MAX_SEQNO + 1) win_loss = a_loss->ccid3hrx_win_count; -- cgit v1.2.3 From bb97d31f5130d677644d9931ef38613d1164ec94 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 9 Aug 2005 20:19:14 -0700 Subject: [INET]: Make inet_create try to load protocol modules Syntax is net-pf-PROTOCOL_FAMILY-PROTOCOL-SOCK_TYPE and if this fails net-pf-PROTOCOL_FAMILY-PROTOCOL. Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- net/dccp/proto.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'net/dccp') diff --git a/net/dccp/proto.c b/net/dccp/proto.c index 70284e6afe0..66c43fce17a 100644 --- a/net/dccp/proto.c +++ b/net/dccp/proto.c @@ -811,8 +811,13 @@ static void __exit dccp_fini(void) module_init(dccp_init); module_exit(dccp_fini); -/* __stringify doesn't likes enums, so use SOCK_DCCP (6) value directly */ -MODULE_ALIAS("net-pf-" __stringify(PF_INET) "-6"); +/* + * __stringify doesn't likes enums, so use SOCK_DCCP (6) and IPPROTO_DCCP (33) + * values directly, Also cover the case where the protocol is not specified, + * i.e. net-pf-PF_INET-proto-0-type-SOCK_DCCP + */ +MODULE_ALIAS("net-pf-" __stringify(PF_INET) "-proto-33-type-6"); +MODULE_ALIAS("net-pf-" __stringify(PF_INET) "-proto-0-type-6"); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Arnaldo Carvalho de Melo "); MODULE_DESCRIPTION("DCCP - Datagram Congestion Controlled Protocol"); -- cgit v1.2.3 From f6ccf55419c4f0021e7382f000f2fd14a29f3d3c Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 9 Aug 2005 20:27:14 -0700 Subject: [DCCP]: Fix u64 printf format warnings. Signed-off-by: David S. Miller --- net/dccp/input.c | 4 +++- net/dccp/ipv4.c | 6 ++++-- net/dccp/minisocks.c | 5 ++++- net/dccp/options.c | 35 +++++++++++++++++++++++++---------- 4 files changed, 36 insertions(+), 14 deletions(-) (limited to 'net/dccp') diff --git a/net/dccp/input.c b/net/dccp/input.c index 622e976a51f..76c3401e93a 100644 --- a/net/dccp/input.c +++ b/net/dccp/input.c @@ -274,7 +274,9 @@ static int dccp_rcv_request_sent_state_process(struct sock *sk, if (!between48(DCCP_SKB_CB(skb)->dccpd_ack_seq, dp->dccps_awl, dp->dccps_awh)) { dccp_pr_debug("invalid ackno: S.AWL=%llu, P.ackno=%llu, S.AWH=%llu \n", - dp->dccps_awl, DCCP_SKB_CB(skb)->dccpd_ack_seq, dp->dccps_awh); + (unsigned long long) dp->dccps_awl, + (unsigned long long) DCCP_SKB_CB(skb)->dccpd_ack_seq, + (unsigned long long) dp->dccps_awh); goto out_invalid_packet; } diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index 7b90606ec10..4fa56dbcbea 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -1092,14 +1092,16 @@ int dccp_v4_rcv(struct sk_buff *skb) dccp_packet_name(dh->dccph_type), NIPQUAD(skb->nh.iph->saddr), ntohs(dh->dccph_sport), NIPQUAD(skb->nh.iph->daddr), ntohs(dh->dccph_dport), - DCCP_SKB_CB(skb)->dccpd_seq); + (unsigned long long) DCCP_SKB_CB(skb)->dccpd_seq); if (dccp_packet_without_ack(skb)) { DCCP_SKB_CB(skb)->dccpd_ack_seq = DCCP_PKT_WITHOUT_ACK_SEQ; dccp_pr_debug_cat("\n"); } else { DCCP_SKB_CB(skb)->dccpd_ack_seq = dccp_hdr_ack_seq(skb); - dccp_pr_debug_cat(", ack=%llu\n", DCCP_SKB_CB(skb)->dccpd_ack_seq); + dccp_pr_debug_cat(", ack=%llu\n", + (unsigned long long) + DCCP_SKB_CB(skb)->dccpd_ack_seq); } /* Step 2: diff --git a/net/dccp/minisocks.c b/net/dccp/minisocks.c index 810f0c293b8..e498e389fcc 100644 --- a/net/dccp/minisocks.c +++ b/net/dccp/minisocks.c @@ -142,7 +142,10 @@ struct sock *dccp_check_req(struct sock *sk, struct sk_buff *skb, /* Invalid ACK */ if (DCCP_SKB_CB(skb)->dccpd_ack_seq != dccp_rsk(req)->dreq_iss) { dccp_pr_debug("Invalid ACK number: ack_seq=%llu, dreq_iss=%llu\n", - DCCP_SKB_CB(skb)->dccpd_ack_seq, dccp_rsk(req)->dreq_iss); + (unsigned long long) + DCCP_SKB_CB(skb)->dccpd_ack_seq, + (unsigned long long) + dccp_rsk(req)->dreq_iss); goto drop; } diff --git a/net/dccp/options.c b/net/dccp/options.c index e1867767946..9ca32cba83a 100644 --- a/net/dccp/options.c +++ b/net/dccp/options.c @@ -119,7 +119,9 @@ int dccp_parse_options(struct sock *sk, struct sk_buff *skb) opt_recv->dccpor_ack_vector_idx = value - options; dccp_pr_debug("%sACK vector 0, len=%d, ack_ackno=%llu\n", - debug_prefix, len, DCCP_SKB_CB(skb)->dccpd_ack_seq); + debug_prefix, len, + (unsigned long long) + DCCP_SKB_CB(skb)->dccpd_ack_seq); dccp_ackvector_print(DCCP_SKB_CB(skb)->dccpd_ack_seq, value, len); dccp_ackpkts_check_rcv_ackvector(dp->dccps_hc_rx_ackpkts, sk, @@ -137,6 +139,7 @@ int dccp_parse_options(struct sock *sk, struct sk_buff *skb) dccp_pr_debug("%sTIMESTAMP=%u, ackno=%llu\n", debug_prefix, opt_recv->dccpor_timestamp, + (unsigned long long) DCCP_SKB_CB(skb)->dccpd_ack_seq); break; case DCCPO_TIMESTAMP_ECHO: @@ -147,7 +150,9 @@ int dccp_parse_options(struct sock *sk, struct sk_buff *skb) dccp_pr_debug("%sTIMESTAMP_ECHO=%u, len=%d, ackno=%llu, diff=%u\n", debug_prefix, opt_recv->dccpor_timestamp_echo, - len + 2, DCCP_SKB_CB(skb)->dccpd_ack_seq, + len + 2, + (unsigned long long) + DCCP_SKB_CB(skb)->dccpd_ack_seq, tcp_time_stamp - opt_recv->dccpor_timestamp_echo); opt_recv->dccpor_elapsed_time = dccp_decode_value_var(value + 4, len - 4); @@ -308,7 +313,8 @@ void dccp_insert_option_elapsed_time(struct sock *sk, dccp_pr_debug("%sELAPSED_TIME=%u, len=%d, seqno=%llu\n", debug_prefix, elapsed_time, - len, DCCP_SKB_CB(skb)->dccpd_seq); + len, + (unsigned long long) DCCP_SKB_CB(skb)->dccpd_seq); } EXPORT_SYMBOL(dccp_insert_option_elapsed_time); @@ -382,7 +388,8 @@ static void dccp_insert_option_ack_vector(struct sock *sk, struct sk_buff *skb) dccp_pr_debug("%sACK Vector 0, len=%d, ack_seqno=%llu, ack_ackno=%llu\n", debug_prefix, ap->dccpap_ack_vector_len, - ap->dccpap_ack_seqno, ap->dccpap_ack_ackno); + (unsigned long long) ap->dccpap_ack_seqno, + (unsigned long long) ap->dccpap_ack_ackno); } static inline void dccp_insert_option_timestamp(struct sock *sk, struct sk_buff *skb) @@ -422,7 +429,8 @@ static void dccp_insert_option_timestamp_echo(struct sock *sk, struct sk_buff *s dccp_pr_debug("%sTIMESTAMP_ECHO=%u, len=%d, seqno=%llu\n", debug_prefix, dp->dccps_timestamp_echo, - len, DCCP_SKB_CB(skb)->dccpd_seq); + len, + (unsigned long long) DCCP_SKB_CB(skb)->dccpd_seq); dp->dccps_timestamp_echo = 0; dp->dccps_timestamp_time = 0; @@ -607,7 +615,8 @@ int dccp_ackpkts_add(struct dccp_ackpkts *ap, u64 ackno, u8 state) */ if (state == DCCP_ACKPKTS_STATE_NOT_RECEIVED && len == 0 && delta == 0) { /* Found our reserved seat! */ - dccp_pr_debug("Found %llu reserved seat!\n", ackno); + dccp_pr_debug("Found %llu reserved seat!\n", + (unsigned long long) ackno); ap->dccpap_buf[index] = state; goto out; } @@ -630,7 +639,8 @@ out: out_duplicate: /* Duplicate packet */ - dccp_pr_debug("Received a dup or already considered lost packet: %llu\n", ackno); + dccp_pr_debug("Received a dup or already considered lost packet: %llu\n", + (unsigned long long) ackno); return -EILSEQ; } @@ -640,7 +650,8 @@ void dccp_ackvector_print(const u64 ackno, const unsigned char *vector, int len) if (!dccp_debug) return; - printk("ACK vector len=%d, ackno=%llu |", len, ackno); + printk("ACK vector len=%d, ackno=%llu |", len, + (unsigned long long) ackno); while (len--) { const u8 state = (*vector & DCCP_ACKPKTS_STATE_MASK) >> 6; @@ -693,7 +704,8 @@ void dccp_ackpkts_check_rcv_ackno(struct dccp_ackpkts *ap, struct sock *sk, #endif dccp_pr_debug("%sACK packet 0, len=%d, ack_seqno=%llu, ack_ackno=%llu, ACKED!\n", debug_prefix, 1, - ap->dccpap_ack_seqno, ap->dccpap_ack_ackno); + (unsigned long long) ap->dccpap_ack_seqno, + (unsigned long long) ap->dccpap_ack_ackno); dccp_ackpkts_trow_away_ack_record(ap); ap->dccpap_ack_seqno = DCCP_MAX_SEQNO + 1; } @@ -745,7 +757,10 @@ static void dccp_ackpkts_check_rcv_ackvector(struct dccp_ackpkts *ap, #endif dccp_pr_debug("%sACK vector 0, len=%d, ack_seqno=%llu, ack_ackno=%llu, ACKED!\n", debug_prefix, len, - ap->dccpap_ack_seqno, ap->dccpap_ack_ackno); + (unsigned long long) + ap->dccpap_ack_seqno, + (unsigned long long) + ap->dccpap_ack_ackno); dccp_ackpkts_trow_away_ack_record(ap); } /* -- cgit v1.2.3 From 27258ee54f8cd4a43d09319aa5448145afc2cb8d Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 9 Aug 2005 20:30:56 -0700 Subject: [DCCP]: Introduce dccp_write_xmit from code in dccp_sendmsg This way it gets closer to the TCP flow, where congestion window checks are done, it seems we can map ccid_hc_tx_send_packet in dccp_write_xmit to tcp_snd_wnd_test in tcp_write_xmit, a CCID2 decision should just fit in here as well... Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- net/dccp/ccid.h | 8 +++---- net/dccp/ccids/ccid3.c | 13 +++++----- net/dccp/dccp.h | 5 ++-- net/dccp/output.c | 38 ++++++++++++++++++++++++++++- net/dccp/proto.c | 65 ++++++-------------------------------------------- 5 files changed, 57 insertions(+), 72 deletions(-) (limited to 'net/dccp') diff --git a/net/dccp/ccid.h b/net/dccp/ccid.h index 06105b2a613..469f9a14b46 100644 --- a/net/dccp/ccid.h +++ b/net/dccp/ccid.h @@ -43,8 +43,7 @@ struct ccid { unsigned char len, u16 idx, unsigned char* value); int (*ccid_hc_tx_send_packet)(struct sock *sk, - struct sk_buff *skb, int len, - long *delay); + struct sk_buff *skb, int len); void (*ccid_hc_tx_packet_sent)(struct sock *sk, int more, int len); }; @@ -60,12 +59,11 @@ static inline void __ccid_get(struct ccid *ccid) } static inline int ccid_hc_tx_send_packet(struct ccid *ccid, struct sock *sk, - struct sk_buff *skb, int len, - long *delay) + struct sk_buff *skb, int len) { int rc = 0; if (ccid->ccid_hc_tx_send_packet != NULL) - rc = ccid->ccid_hc_tx_send_packet(sk, skb, len, delay); + rc = ccid->ccid_hc_tx_send_packet(sk, skb, len); return rc; } diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c index 04299c7565f..df4adfeaafa 100644 --- a/net/dccp/ccids/ccid3.c +++ b/net/dccp/ccids/ccid3.c @@ -977,13 +977,14 @@ out: sock_put(sk); } -static int ccid3_hc_tx_send_packet(struct sock *sk, struct sk_buff *skb, - int len, long *delay) +static int ccid3_hc_tx_send_packet(struct sock *sk, + struct sk_buff *skb, int len) { struct dccp_sock *dp = dccp_sk(sk); struct ccid3_hc_tx_sock *hctx = dp->dccps_hc_tx_ccid_private; struct ccid3_tx_hist_entry *new_packet = NULL; struct timeval now; + long delay; int rc = -ENOTCONN; // ccid3_pr_debug("%s, sk=%p, skb=%p, len=%d\n", dccp_role(sk), sk, skb, len); @@ -1037,11 +1038,11 @@ static int ccid3_hc_tx_send_packet(struct sock *sk, struct sk_buff *skb, break; case TFRC_SSTATE_NO_FBACK: case TFRC_SSTATE_FBACK: - *delay = (now_delta(hctx->ccid3hctx_t_nom) - hctx->ccid3hctx_delta); - ccid3_pr_debug("send_packet delay=%ld\n",*delay); - *delay /= -1000; + delay = (now_delta(hctx->ccid3hctx_t_nom) - hctx->ccid3hctx_delta); + ccid3_pr_debug("send_packet delay=%ld\n", delay); + delay /= -1000; /* divide by -1000 is to convert to ms and get sign right */ - rc = *delay > 0 ? -EAGAIN : 0; + rc = delay > 0 ? -EAGAIN : 0; break; default: printk(KERN_CRIT "%s: %s, sk=%p, Illegal state (%d)!\n", diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h index 55b690ab61a..8a0d7af649e 100644 --- a/net/dccp/dccp.h +++ b/net/dccp/dccp.h @@ -122,6 +122,9 @@ extern void dccp_send_ack(struct sock *sk); extern void dccp_send_delayed_ack(struct sock *sk); extern void dccp_send_sync(struct sock *sk, u64 seq); +extern int dccp_write_xmit(struct sock *sk, struct sk_buff *skb, + const int len); + extern void dccp_init_xmit_timers(struct sock *sk); static inline void dccp_clear_xmit_timers(struct sock *sk) { @@ -194,8 +197,6 @@ static inline void dccp_openreq_init(struct request_sock *req, req->rcv_wnd = 0; } -extern void dccp_v4_send_check(struct sock *sk, struct dccp_hdr *dh, int len, - struct sk_buff *skb); extern int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb); extern struct sock *dccp_create_openreq_child(struct sock *sk, diff --git a/net/dccp/output.c b/net/dccp/output.c index 4945eaa9d1a..50292c0605f 100644 --- a/net/dccp/output.c +++ b/net/dccp/output.c @@ -148,6 +148,41 @@ unsigned int dccp_sync_mss(struct sock *sk, u32 pmtu) return mss_now; } +int dccp_write_xmit(struct sock *sk, struct sk_buff *skb, const int len) +{ + const struct dccp_sock *dp = dccp_sk(sk); + int err = ccid_hc_tx_send_packet(dp->dccps_hc_tx_ccid, sk, skb, len); + + if (err == 0) { + const struct dccp_ackpkts *ap = dp->dccps_hc_rx_ackpkts; + struct dccp_skb_cb *dcb = DCCP_SKB_CB(skb); + + if (sk->sk_state == DCCP_PARTOPEN) { + /* See 8.1.5. Handshake Completion */ + inet_csk_schedule_ack(sk); + inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK, + inet_csk(sk)->icsk_rto, + DCCP_RTO_MAX); + dcb->dccpd_type = DCCP_PKT_DATAACK; + /* + * FIXME: we really should have a + * dccps_ack_pending or use icsk. + */ + } else if (inet_csk_ack_scheduled(sk) || + (dp->dccps_options.dccpo_send_ack_vector && + ap->dccpap_buf_ackno != DCCP_MAX_SEQNO + 1 && + ap->dccpap_ack_seqno == DCCP_MAX_SEQNO + 1)) + dcb->dccpd_type = DCCP_PKT_DATAACK; + else + dcb->dccpd_type = DCCP_PKT_DATA; + + err = dccp_transmit_skb(sk, skb); + ccid_hc_tx_packet_sent(dp->dccps_hc_tx_ccid, sk, 0, len); + } + + return err; +} + int dccp_retransmit_skb(struct sock *sk, struct sk_buff *skb) { if (inet_sk_rebuild_header(sk) != 0) @@ -299,7 +334,8 @@ int dccp_connect(struct sock *sk) DCCP_INC_STATS(DCCP_MIB_ACTIVEOPENS); /* Timer for repeating the REQUEST until an answer. */ - inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, icsk->icsk_rto, TCP_RTO_MAX); + inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, + icsk->icsk_rto, DCCP_RTO_MAX); return 0; } diff --git a/net/dccp/proto.c b/net/dccp/proto.c index 66c43fce17a..877c1e0e3c4 100644 --- a/net/dccp/proto.c +++ b/net/dccp/proto.c @@ -182,8 +182,7 @@ int dccp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, return -EMSGSIZE; lock_sock(sk); - - timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT); + timeo = sock_sndtimeo(sk, noblock); /* * We have to use sk_stream_wait_connect here to set sk_write_pending, @@ -192,77 +191,27 @@ int dccp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, /* Wait for a connection to finish. */ if ((1 << sk->sk_state) & ~(DCCPF_OPEN | DCCPF_PARTOPEN | DCCPF_CLOSING)) if ((rc = sk_stream_wait_connect(sk, &timeo)) != 0) - goto out_err; + goto out_release; size = sk->sk_prot->max_header + len; release_sock(sk); skb = sock_alloc_send_skb(sk, size, noblock, &rc); lock_sock(sk); - if (skb == NULL) goto out_release; skb_reserve(skb, sk->sk_prot->max_header); rc = memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len); - if (rc == 0) { - struct dccp_skb_cb *dcb = DCCP_SKB_CB(skb); - const struct dccp_ackpkts *ap = dp->dccps_hc_rx_ackpkts; - long delay; - - /* - * XXX: This is just to match the Waikato tree CA interaction - * points, after the CCID3 code is stable and I have a better - * understanding of behaviour I'll change this to look more like - * TCP. - */ - while (1) { - rc = ccid_hc_tx_send_packet(dp->dccps_hc_tx_ccid, sk, - skb, len, &delay); - if (rc == 0) - break; - if (rc != -EAGAIN) - goto out_discard; - if (delay > timeo) - goto out_discard; - release_sock(sk); - delay = schedule_timeout(delay); - lock_sock(sk); - timeo -= delay; - if (signal_pending(current)) - goto out_interrupted; - rc = -EPIPE; - if (!(sk->sk_state == DCCP_PARTOPEN || sk->sk_state == DCCP_OPEN)) - goto out_discard; - } + if (rc != 0) + goto out_discard; - if (sk->sk_state == DCCP_PARTOPEN) { - /* See 8.1.5. Handshake Completion */ - inet_csk_schedule_ack(sk); - inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK, inet_csk(sk)->icsk_rto, TCP_RTO_MAX); - dcb->dccpd_type = DCCP_PKT_DATAACK; - /* FIXME: we really should have a dccps_ack_pending or use icsk */ - } else if (inet_csk_ack_scheduled(sk) || - (dp->dccps_options.dccpo_send_ack_vector && - ap->dccpap_buf_ackno != DCCP_MAX_SEQNO + 1 && - ap->dccpap_ack_seqno == DCCP_MAX_SEQNO + 1)) - dcb->dccpd_type = DCCP_PKT_DATAACK; - else - dcb->dccpd_type = DCCP_PKT_DATA; - dccp_transmit_skb(sk, skb); - ccid_hc_tx_packet_sent(dp->dccps_hc_tx_ccid, sk, 0, len); - } else { -out_discard: - kfree_skb(skb); - } + rc = dccp_write_xmit(sk, skb, len); out_release: release_sock(sk); return rc ? : len; -out_err: - rc = sk_stream_error(sk, flags, rc); +out_discard: + kfree_skb(skb); goto out_release; -out_interrupted: - rc = sock_intr_errno(timeo); - goto out_discard; } EXPORT_SYMBOL(dccp_sendmsg); -- cgit v1.2.3 From 0b4e03bf0bc43ad6250a1e2fa25fc3eb2b028977 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 9 Aug 2005 20:31:11 -0700 Subject: [DCCP]: Initialize icsk_rto in dccp_v4_init_sock Fixes nasty bug related to the retransmit timer (yeah, DCCP does retransmits) firing too early. Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- net/dccp/ipv4.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net/dccp') diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index 4fa56dbcbea..6bccf4dd1e7 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -1219,6 +1219,7 @@ static int dccp_v4_init_sock(struct sock *sk) dccp_ctl_socket_init = 0; dccp_init_xmit_timers(sk); + inet_csk(sk)->icsk_rto = DCCP_TIMEOUT_INIT; sk->sk_state = DCCP_CLOSED; dp->dccps_mss_cache = 536; dp->dccps_role = DCCP_ROLE_UNDEFINED; -- cgit v1.2.3 From 64cf1e5d8b5f88d56509260e08fa0d8314277350 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 9 Aug 2005 20:45:21 -0700 Subject: [DCCP]: Finish the TIMEWAIT minisock support Using most of the infrastructure TCP uses, with a dccp_death_row, etc. As per my current interpretation of the draft what we have with this changeset seems to be all we need (or very close to it 8)). Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- net/dccp/ipv4.c | 15 ++++++++----- net/dccp/minisocks.c | 60 +++++++++++++++++++++++++++++++++++++++++++--------- 2 files changed, 60 insertions(+), 15 deletions(-) (limited to 'net/dccp') diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index 6bccf4dd1e7..f6da9328221 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -97,7 +97,7 @@ static int __dccp_v4_check_established(struct sock *sk, const __u16 lport, NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED); } else if (tw != NULL) { /* Silly. Should hash-dance instead... */ - dccp_tw_deschedule(tw); + inet_twsk_deschedule(tw, &dccp_death_row); NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED); inet_twsk_put(tw); @@ -201,7 +201,7 @@ ok: spin_unlock(&head->lock); if (tw != NULL) { - dccp_tw_deschedule(tw); + inet_twsk_deschedule(tw, &dccp_death_row); inet_twsk_put(tw); } @@ -1131,8 +1131,9 @@ int dccp_v4_rcv(struct sk_buff *skb) */ if (sk->sk_state == DCCP_TIME_WAIT) { - dccp_pr_debug("sk->sk_state == DCCP_TIME_WAIT: discard_and_relse\n"); - goto discard_and_relse; + dccp_pr_debug("sk->sk_state == DCCP_TIME_WAIT: " + "do_time_wait\n"); + goto do_time_wait; } if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb)) { @@ -1179,6 +1180,10 @@ discard_it: discard_and_relse: sock_put(sk); goto discard_it; + +do_time_wait: + inet_twsk_put((struct inet_timewait_sock *)sk); + goto no_dccp_socket; } static int dccp_v4_init_sock(struct sock *sk) @@ -1290,5 +1295,5 @@ struct proto dccp_v4_prot = { .max_header = MAX_DCCP_HEADER, .obj_size = sizeof(struct dccp_sock), .rsk_prot = &dccp_request_sock_ops, - .twsk_obj_size = sizeof(struct inet_timewait_sock), /* FIXME! create dccp_timewait_sock */ + .twsk_obj_size = sizeof(struct inet_timewait_sock), }; diff --git a/net/dccp/minisocks.c b/net/dccp/minisocks.c index e498e389fcc..a6a0b270fb6 100644 --- a/net/dccp/minisocks.c +++ b/net/dccp/minisocks.c @@ -22,18 +22,58 @@ #include "ccid.h" #include "dccp.h" +struct inet_timewait_death_row dccp_death_row = { + .sysctl_max_tw_buckets = NR_FILE * 2, + .period = DCCP_TIMEWAIT_LEN / INET_TWDR_TWKILL_SLOTS, + .death_lock = SPIN_LOCK_UNLOCKED, + .hashinfo = &dccp_hashinfo, + .tw_timer = TIMER_INITIALIZER(inet_twdr_hangman, 0, + (unsigned long)&dccp_death_row), + .twkill_work = __WORK_INITIALIZER(dccp_death_row.twkill_work, + inet_twdr_twkill_work, + &dccp_death_row), +/* Short-time timewait calendar */ + + .twcal_hand = -1, + .twcal_timer = TIMER_INITIALIZER(inet_twdr_twcal_tick, 0, + (unsigned long)&dccp_death_row), +}; + void dccp_time_wait(struct sock *sk, int state, int timeo) { - /* FIXME: Implement */ - dccp_pr_debug("Want to help? Start here\n"); - dccp_set_state(sk, state); -} + struct inet_timewait_sock *tw = NULL; -/* This is for handling early-kills of TIME_WAIT sockets. */ -void dccp_tw_deschedule(struct inet_timewait_sock *tw) -{ - dccp_pr_debug("Want to help? Start here\n"); - __inet_twsk_kill(tw, &dccp_hashinfo); + if (dccp_death_row.tw_count < dccp_death_row.sysctl_max_tw_buckets) + tw = inet_twsk_alloc(sk, state); + + if (tw != NULL) { + const struct inet_connection_sock *icsk = inet_csk(sk); + const int rto = (icsk->icsk_rto << 2) - (icsk->icsk_rto >> 1); + + /* Linkage updates. */ + __inet_twsk_hashdance(tw, sk, &dccp_hashinfo); + + /* Get the TIME_WAIT timeout firing. */ + if (timeo < rto) + timeo = rto; + + tw->tw_timeout = DCCP_TIMEWAIT_LEN; + if (state == DCCP_TIME_WAIT) + timeo = DCCP_TIMEWAIT_LEN; + + inet_twsk_schedule(tw, &dccp_death_row, timeo, + DCCP_TIMEWAIT_LEN); + inet_twsk_put(tw); + } else { + /* Sorry, if we're out of memory, just CLOSE this + * socket up. We've got bigger problems than + * non-graceful socket closings. + */ + if (net_ratelimit()) + printk(KERN_INFO "DCCP: time wait bucket table overflow\n"); + } + + dccp_done(sk); } struct sock *dccp_create_openreq_child(struct sock *sk, @@ -55,7 +95,7 @@ struct sock *dccp_create_openreq_child(struct sock *sk, newdp->dccps_hc_rx_ackpkts = NULL; newdp->dccps_role = DCCP_ROLE_SERVER; - newicsk->icsk_rto = TCP_TIMEOUT_INIT; + newicsk->icsk_rto = DCCP_TIMEOUT_INIT; if (newdp->dccps_options.dccpo_send_ack_vector) { newdp->dccps_hc_rx_ackpkts = dccp_ackpkts_alloc(DCCP_MAX_ACK_VECTOR_LEN, -- cgit v1.2.3 From 64ce207306debd7157f47282be94770407bec01c Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Tue, 9 Aug 2005 20:50:53 -0700 Subject: [NET]: Make NETDEBUG pure printk wrappers Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/dccp/input.c | 2 +- net/dccp/options.c | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) (limited to 'net/dccp') diff --git a/net/dccp/input.c b/net/dccp/input.c index 76c3401e93a..bdaecde0bde 100644 --- a/net/dccp/input.c +++ b/net/dccp/input.c @@ -161,7 +161,7 @@ int dccp_rcv_established(struct sock *sk, struct sk_buff *skb, if (dccp_ackpkts_add(dp->dccps_hc_rx_ackpkts, DCCP_SKB_CB(skb)->dccpd_seq, DCCP_ACKPKTS_STATE_RECEIVED)) { - LIMIT_NETDEBUG(pr_info("DCCP: acknowledgeable packets buffer full!\n")); + LIMIT_NETDEBUG(KERN_INFO "DCCP: acknowledgeable packets buffer full!\n"); ap->dccpap_ack_seqno = DCCP_MAX_SEQNO + 1; inet_csk_schedule_ack(sk); inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK, TCP_DELACK_MIN, TCP_RTO_MAX); diff --git a/net/dccp/options.c b/net/dccp/options.c index 9ca32cba83a..5bf997683a1 100644 --- a/net/dccp/options.c +++ b/net/dccp/options.c @@ -231,7 +231,7 @@ void dccp_insert_option(struct sock *sk, struct sk_buff *skb, unsigned char *to; if (DCCP_SKB_CB(skb)->dccpd_opt_len + len + 2 > DCCP_MAX_OPT_LEN) { - LIMIT_NETDEBUG(pr_info("DCCP: packet too small to insert %d option!\n", option)); + LIMIT_NETDEBUG(KERN_INFO "DCCP: packet too small to insert %d option!\n", option); return; } @@ -299,7 +299,7 @@ void dccp_insert_option_elapsed_time(struct sock *sk, return; if (DCCP_SKB_CB(skb)->dccpd_opt_len + len > DCCP_MAX_OPT_LEN) { - LIMIT_NETDEBUG(pr_info("DCCP: packet too small to insert elapsed time!\n")); + LIMIT_NETDEBUG(KERN_INFO "DCCP: packet too small to insert elapsed time!\n"); return; } @@ -335,7 +335,7 @@ static void dccp_insert_option_ack_vector(struct sock *sk, struct sk_buff *skb) dccp_insert_option_elapsed_time(sk, skb, elapsed_time); if (DCCP_SKB_CB(skb)->dccpd_opt_len + len > DCCP_MAX_OPT_LEN) { - LIMIT_NETDEBUG(pr_info("DCCP: packet too small to insert ACK Vector!\n")); + LIMIT_NETDEBUG(KERN_INFO "DCCP: packet too small to insert ACK Vector!\n"); return; } @@ -412,7 +412,7 @@ static void dccp_insert_option_timestamp_echo(struct sock *sk, struct sk_buff *s unsigned char *to; if (DCCP_SKB_CB(skb)->dccpd_opt_len + len > DCCP_MAX_OPT_LEN) { - LIMIT_NETDEBUG(pr_info("DCCP: packet too small to insert timestamp echo!\n")); + LIMIT_NETDEBUG(KERN_INFO "DCCP: packet too small to insert timestamp echo!\n"); return; } -- cgit v1.2.3 From 540722ffc3a0d7e11d97a13e1ce6f3bc23b061c1 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 10 Aug 2005 05:54:28 -0300 Subject: [TCPDIAG]: Implement cheapest way of supporting DCCPDIAG_GETSOCK With ugly ifdefs, etc, but this actually: 1. keeps the existing ABI, i.e. no need to recompile the iproute2 utilities if not interested in DCCP. 2. Provides all the tcp_diag functionality in DCCP, with just a small patch that makes iproute2 support DCCP. Of course I'll get this cleaned-up in time, but for now I think its OK to be this way to quickly get this functionality. iproute2-ss050808 patch at: http://vger.kernel.org/~acme/iproute2-ss050808.dccp.patch Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- net/dccp/ipv4.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'net/dccp') diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index f6da9328221..d3770aed3b1 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -34,6 +34,8 @@ struct inet_hashinfo __cacheline_aligned dccp_hashinfo = { .port_rover = 1024 - 1, }; +EXPORT_SYMBOL_GPL(dccp_hashinfo); + static int dccp_v4_get_port(struct sock *sk, const unsigned short snum) { return inet_csk_get_port(&dccp_hashinfo, sk, snum); -- cgit v1.2.3 From 8c60f3fab55712f23f2bd557ceedfbb00c649f37 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 10 Aug 2005 12:59:38 -0300 Subject: [CCID3]: Separate most of the packet history code This also changes the list_for_each_entry_safe_continue behaviour to match its kerneldoc comment, that is, to start after the pos passed. Also adds several helper functions from previously open coded fragments, making the code more clear. Signed-off-by: Arnaldo Carvalho de Melo --- net/dccp/Makefile | 3 +- net/dccp/ccids/ccid3.c | 327 +++++++++++++++++++--------------------------- net/dccp/ccids/ccid3.h | 18 --- net/dccp/packet_history.c | 198 ++++++++++++++++++++++++++++ net/dccp/packet_history.h | 182 ++++++++++++++++++++++++++ 5 files changed, 516 insertions(+), 212 deletions(-) create mode 100644 net/dccp/packet_history.c create mode 100644 net/dccp/packet_history.h (limited to 'net/dccp') diff --git a/net/dccp/Makefile b/net/dccp/Makefile index c6e6ba55c36..25a50bdbf1b 100644 --- a/net/dccp/Makefile +++ b/net/dccp/Makefile @@ -1,5 +1,6 @@ obj-$(CONFIG_IP_DCCP) += dccp.o -dccp-y := ccid.o input.o ipv4.o minisocks.o options.o output.o proto.o timer.o +dccp-y := ccid.o input.o ipv4.o minisocks.o options.o output.o proto.o \ + timer.o packet_history.o obj-y += ccids/ diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c index df4adfeaafa..15c25f62200 100644 --- a/net/dccp/ccids/ccid3.c +++ b/net/dccp/ccids/ccid3.c @@ -34,8 +34,10 @@ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ +#include #include "../ccid.h" #include "../dccp.h" +#include "../packet_history.h" #include "ccid3.h" #ifdef CCID3_DEBUG @@ -82,60 +84,10 @@ enum ccid3_options { static int ccid3_debug; -static kmem_cache_t *ccid3_tx_hist_slab; -static kmem_cache_t *ccid3_rx_hist_slab; -static kmem_cache_t *ccid3_loss_interval_hist_slab; - -static inline struct ccid3_tx_hist_entry *ccid3_tx_hist_entry_new(int prio) -{ - struct ccid3_tx_hist_entry *entry = kmem_cache_alloc(ccid3_tx_hist_slab, prio); - - if (entry != NULL) - entry->ccid3htx_sent = 0; - - return entry; -} - -static inline void ccid3_tx_hist_entry_delete(struct ccid3_tx_hist_entry *entry) -{ - if (entry != NULL) - kmem_cache_free(ccid3_tx_hist_slab, entry); -} - -static inline struct ccid3_rx_hist_entry *ccid3_rx_hist_entry_new(struct sock *sk, - struct sk_buff *skb, - int prio) -{ - struct ccid3_rx_hist_entry *entry = kmem_cache_alloc(ccid3_rx_hist_slab, prio); - - if (entry != NULL) { - const struct dccp_hdr *dh = dccp_hdr(skb); - - entry->ccid3hrx_seqno = DCCP_SKB_CB(skb)->dccpd_seq; - entry->ccid3hrx_win_count = dh->dccph_ccval; - entry->ccid3hrx_type = dh->dccph_type; - entry->ccid3hrx_ndp = dccp_sk(sk)->dccps_options_received.dccpor_ndp; - do_gettimeofday(&(entry->ccid3hrx_tstamp)); - } - - return entry; -} - -static inline void ccid3_rx_hist_entry_delete(struct ccid3_rx_hist_entry *entry) -{ - if (entry != NULL) - kmem_cache_free(ccid3_rx_hist_slab, entry); -} +struct dccp_tx_hist *ccid3_tx_hist; +struct dccp_rx_hist *ccid3_rx_hist; -static void ccid3_rx_history_delete(struct list_head *hist) -{ - struct ccid3_rx_hist_entry *entry, *next; - - list_for_each_entry_safe(entry, next, hist, ccid3hrx_node) { - list_del_init(&entry->ccid3hrx_node); - kmem_cache_free(ccid3_rx_hist_slab, entry); - } -} +static kmem_cache_t *ccid3_loss_interval_hist_slab; static inline struct ccid3_loss_interval_hist_entry *ccid3_loss_interval_hist_entry_new(int prio) { @@ -982,7 +934,7 @@ static int ccid3_hc_tx_send_packet(struct sock *sk, { struct dccp_sock *dp = dccp_sk(sk); struct ccid3_hc_tx_sock *hctx = dp->dccps_hc_tx_ccid_private; - struct ccid3_tx_hist_entry *new_packet = NULL; + struct dccp_tx_hist_entry *new_packet; struct timeval now; long delay; int rc = -ENOTCONN; @@ -997,12 +949,9 @@ static int ccid3_hc_tx_send_packet(struct sock *sk, goto out; /* See if last packet allocated was not sent */ - if (!list_empty(&hctx->ccid3hctx_hist)) - new_packet = list_entry(hctx->ccid3hctx_hist.next, - struct ccid3_tx_hist_entry, ccid3htx_node); - - if (new_packet == NULL || new_packet->ccid3htx_sent) { - new_packet = ccid3_tx_hist_entry_new(SLAB_ATOMIC); + new_packet = dccp_tx_hist_head(&hctx->ccid3hctx_hist); + if (new_packet == NULL || new_packet->dccphtx_sent) { + new_packet = dccp_tx_hist_entry_new(ccid3_tx_hist, SLAB_ATOMIC); rc = -ENOBUFS; if (new_packet == NULL) { @@ -1011,7 +960,7 @@ static int ccid3_hc_tx_send_packet(struct sock *sk, goto out; } - list_add(&new_packet->ccid3htx_node, &hctx->ccid3hctx_hist); + dccp_tx_hist_add_entry(&hctx->ccid3hctx_hist, new_packet); } do_gettimeofday(&now); @@ -1054,7 +1003,9 @@ static int ccid3_hc_tx_send_packet(struct sock *sk, /* Can we send? if so add options and add to packet history */ if (rc == 0) - new_packet->ccid3htx_win_count = DCCP_SKB_CB(skb)->dccpd_ccval = hctx->ccid3hctx_last_win_count; + new_packet->dccphtx_win_count = + DCCP_SKB_CB(skb)->dccpd_ccval = + hctx->ccid3hctx_last_win_count; out: return rc; } @@ -1063,7 +1014,6 @@ static void ccid3_hc_tx_packet_sent(struct sock *sk, int more, int len) { struct dccp_sock *dp = dccp_sk(sk); struct ccid3_hc_tx_sock *hctx = dp->dccps_hc_tx_ccid_private; - struct ccid3_tx_hist_entry *packet = NULL; struct timeval now; // ccid3_pr_debug("%s, sk=%p, more=%d, len=%d\n", dccp_role(sk), sk, more, len); @@ -1080,20 +1030,23 @@ static void ccid3_hc_tx_packet_sent(struct sock *sk, int more, int len) /* check if we have sent a data packet */ if (len > 0) { unsigned long quarter_rtt; + struct dccp_tx_hist_entry *packet; - if (list_empty(&hctx->ccid3hctx_hist)) { + packet = dccp_tx_hist_head(&hctx->ccid3hctx_hist); + if (packet == NULL) { printk(KERN_CRIT "%s: packet doesn't exists in history!\n", __FUNCTION__); return; } - packet = list_entry(hctx->ccid3hctx_hist.next, struct ccid3_tx_hist_entry, ccid3htx_node); - if (packet->ccid3htx_sent) { + if (packet->dccphtx_sent) { printk(KERN_CRIT "%s: no unsent packet in history!\n", __FUNCTION__); return; } - packet->ccid3htx_tstamp = now; - packet->ccid3htx_seqno = dp->dccps_gss; - // ccid3_pr_debug("%s, sk=%p, seqno=%llu inserted!\n", dccp_role(sk), sk, packet->ccid3htx_seqno); - + packet->dccphtx_tstamp = now; + packet->dccphtx_seqno = dp->dccps_gss; +#if 0 + ccid3_pr_debug("%s, sk=%p, seqno=%llu inserted!\n", + dccp_role(sk), sk, packet->dccphtx_seqno); +#endif /* * Check if win_count have changed */ /* COMPLIANCE_BEGIN @@ -1106,18 +1059,18 @@ static void ccid3_hc_tx_packet_sent(struct sock *sk, int more, int len) min_t(unsigned long, quarter_rtt, 5)) % 16; ccid3_pr_debug("%s, sk=%p, window changed from %u to %u!\n", dccp_role(sk), sk, - packet->ccid3htx_win_count, + packet->dccphtx_win_count, hctx->ccid3hctx_last_win_count); } /* COMPLIANCE_END */ #if 0 ccid3_pr_debug("%s, sk=%p, packet sent (%llu,%u)\n", dccp_role(sk), sk, - packet->ccid3htx_seqno, - packet->ccid3htx_win_count); + packet->dccphtx_seqno, + packet->dccphtx_win_count); #endif hctx->ccid3hctx_idle = 0; - packet->ccid3htx_sent = 1; + packet->dccphtx_sent = 1; } else ccid3_pr_debug("%s, sk=%p, seqno=%llu NOT inserted!\n", dccp_role(sk), sk, dp->dccps_gss); @@ -1152,7 +1105,7 @@ static void ccid3_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) struct dccp_sock *dp = dccp_sk(sk); struct ccid3_hc_tx_sock *hctx = dp->dccps_hc_tx_ccid_private; struct ccid3_options_received *opt_recv; - struct ccid3_tx_hist_entry *entry, *next, *packet; + struct dccp_tx_hist_entry *packet; unsigned long next_tmout; u16 t_elapsed; u32 pinv; @@ -1191,13 +1144,8 @@ static void ccid3_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) /* Calculate new round trip sample by * R_sample = (now - t_recvdata) - t_delay */ /* get t_recvdata from history */ - packet = NULL; - list_for_each_entry_safe(entry, next, &hctx->ccid3hctx_hist, ccid3htx_node) - if (entry->ccid3htx_seqno == DCCP_SKB_CB(skb)->dccpd_ack_seq) { - packet = entry; - break; - } - + packet = dccp_tx_hist_find_entry(&hctx->ccid3hctx_hist, + DCCP_SKB_CB(skb)->dccpd_ack_seq); if (packet == NULL) { ccid3_pr_debug("%s, sk=%p, seqno %llu(%s) does't exist in history!\n", dccp_role(sk), sk, DCCP_SKB_CB(skb)->dccpd_ack_seq, @@ -1206,7 +1154,7 @@ static void ccid3_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) } /* Update RTT */ - r_sample = now_delta(packet->ccid3htx_tstamp); + r_sample = now_delta(packet->dccphtx_tstamp); /* FIXME: */ // r_sample -= usecs_to_jiffies(t_elapsed * 10); @@ -1273,10 +1221,9 @@ static void ccid3_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) ccid3_calc_new_delta(hctx); /* remove all packets older than the one acked from history */ - list_for_each_entry_safe_continue(entry, next, &hctx->ccid3hctx_hist, ccid3htx_node) { - list_del_init(&entry->ccid3htx_node); - ccid3_tx_hist_entry_delete(entry); - } + dccp_tx_hist_purge_older(ccid3_tx_hist, + &hctx->ccid3hctx_hist, packet); + if (hctx->ccid3hctx_x < 10) { ccid3_pr_debug("ccid3_hc_tx_packet_recv hctx->ccid3hctx_x < 10\n"); hctx->ccid3hctx_x = 10; @@ -1285,7 +1232,8 @@ static void ccid3_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) /* Schedule no feedback timer to expire in max(4 * R, 2 * s / X) */ next_tmout = max(inet_csk(sk)->icsk_rto, - 2 * (hctx->ccid3hctx_s * 100000) / (hctx->ccid3hctx_x/10)); + (2 * (hctx->ccid3hctx_s * 100000) / + (hctx->ccid3hctx_x / 10))); /* maths with 100000 and 10 is to prevent overflow with 32 bit */ ccid3_pr_debug("%s, sk=%p, Scheduled no feedback timer to expire in %lu jiffies (%luus)\n", @@ -1408,7 +1356,6 @@ static void ccid3_hc_tx_exit(struct sock *sk) { struct dccp_sock *dp = dccp_sk(sk); struct ccid3_hc_tx_sock *hctx = dp->dccps_hc_tx_ccid_private; - struct ccid3_tx_hist_entry *entry, *next; ccid3_pr_debug("%s, sk=%p\n", dccp_role(sk), sk); BUG_ON(hctx == NULL); @@ -1417,10 +1364,7 @@ static void ccid3_hc_tx_exit(struct sock *sk) sk_stop_timer(sk, &hctx->ccid3hctx_no_feedback_timer); /* Empty packet history */ - list_for_each_entry_safe(entry, next, &hctx->ccid3hctx_hist, ccid3htx_node) { - list_del_init(&entry->ccid3htx_node); - ccid3_tx_hist_entry_delete(entry); - } + dccp_tx_hist_purge(ccid3_tx_hist, &hctx->ccid3hctx_hist); kfree(dp->dccps_hc_tx_ccid_private); dp->dccps_hc_tx_ccid_private = NULL; @@ -1462,39 +1406,40 @@ static inline void ccid3_hc_rx_set_state(struct sock *sk, enum ccid3_hc_rx_state hcrx->ccid3hcrx_state = state; } -static int ccid3_hc_rx_add_hist(struct sock *sk, struct ccid3_rx_hist_entry *packet) +static int ccid3_hc_rx_add_hist(struct sock *sk, + struct dccp_rx_hist_entry *packet) { struct dccp_sock *dp = dccp_sk(sk); struct ccid3_hc_rx_sock *hcrx = dp->dccps_hc_rx_ccid_private; - struct ccid3_rx_hist_entry *entry, *next; + struct dccp_rx_hist_entry *entry, *next, *iter; u8 num_later = 0; - if (list_empty(&hcrx->ccid3hcrx_hist)) - list_add(&packet->ccid3hrx_node, &hcrx->ccid3hcrx_hist); + iter = dccp_rx_hist_head(&hcrx->ccid3hcrx_hist); + if (iter == NULL) + dccp_rx_hist_add_entry(&hcrx->ccid3hcrx_hist, packet); else { - u64 seqno = packet->ccid3hrx_seqno; - struct ccid3_rx_hist_entry *iter = list_entry(hcrx->ccid3hcrx_hist.next, - struct ccid3_rx_hist_entry, - ccid3hrx_node); - if (after48(seqno, iter->ccid3hrx_seqno)) - list_add(&packet->ccid3hrx_node, &hcrx->ccid3hcrx_hist); + const u64 seqno = packet->dccphrx_seqno; + + if (after48(seqno, iter->dccphrx_seqno)) + dccp_rx_hist_add_entry(&hcrx->ccid3hcrx_hist, packet); else { - if (iter->ccid3hrx_type == DCCP_PKT_DATA || - iter->ccid3hrx_type == DCCP_PKT_DATAACK) + if (dccp_rx_hist_entry_data_packet(iter)) num_later = 1; - list_for_each_entry_continue(iter, &hcrx->ccid3hcrx_hist, ccid3hrx_node) { - if (after48(seqno, iter->ccid3hrx_seqno)) { - list_add(&packet->ccid3hrx_node, &iter->ccid3hrx_node); + list_for_each_entry_continue(iter, + &hcrx->ccid3hcrx_hist, + dccphrx_node) { + if (after48(seqno, iter->dccphrx_seqno)) { + dccp_rx_hist_add_entry(&iter->dccphrx_node, + packet); goto trim_history; } - if (iter->ccid3hrx_type == DCCP_PKT_DATA || - iter->ccid3hrx_type == DCCP_PKT_DATAACK) + if (dccp_rx_hist_entry_data_packet(iter)) num_later++; if (num_later == TFRC_RECV_NUM_LATE_LOSS) { - ccid3_rx_hist_entry_delete(packet); + dccp_rx_hist_entry_delete(ccid3_rx_hist, packet); ccid3_pr_debug("%s, sk=%p, packet(%llu) already lost!\n", dccp_role(sk), sk, seqno); return 1; @@ -1502,7 +1447,8 @@ static int ccid3_hc_rx_add_hist(struct sock *sk, struct ccid3_rx_hist_entry *pac } if (num_later < TFRC_RECV_NUM_LATE_LOSS) - list_add_tail(&packet->ccid3hrx_node, &hcrx->ccid3hcrx_hist); + dccp_rx_hist_add_entry(&hcrx->ccid3hcrx_hist, + packet); /* FIXME: else what? should we destroy the packet like above? */ } } @@ -1512,12 +1458,12 @@ trim_history: num_later = TFRC_RECV_NUM_LATE_LOSS + 1; if (!list_empty(&hcrx->ccid3hcrx_loss_interval_hist)) { - list_for_each_entry_safe(entry, next, &hcrx->ccid3hcrx_hist, ccid3hrx_node) { + list_for_each_entry_safe(entry, next, &hcrx->ccid3hcrx_hist, + dccphrx_node) { if (num_later == 0) { - list_del_init(&entry->ccid3hrx_node); - ccid3_rx_hist_entry_delete(entry); - } else if (entry->ccid3hrx_type == DCCP_PKT_DATA || - entry->ccid3hrx_type == DCCP_PKT_DATAACK) + list_del_init(&entry->dccphrx_node); + dccp_rx_hist_entry_delete(ccid3_rx_hist, entry); + } else if (dccp_rx_hist_entry_data_packet(entry)) --num_later; } } else { @@ -1528,7 +1474,8 @@ trim_history: * We have no loss interval history so we need at least one * rtt:s of data packets to approximate rtt. */ - list_for_each_entry_safe(entry, next, &hcrx->ccid3hcrx_hist, ccid3hrx_node) { + list_for_each_entry_safe(entry, next, &hcrx->ccid3hcrx_hist, + dccphrx_node) { if (num_later == 0) { switch (step) { case 0: @@ -1540,10 +1487,11 @@ trim_history: step = 2; /* OK, find next data packet */ num_later = 1; - win_count = entry->ccid3hrx_win_count; + win_count = entry->dccphrx_win_count; break; case 2: - tmp = win_count - entry->ccid3hrx_win_count; + tmp = (win_count - + entry->dccphrx_win_count); if (tmp < 0) tmp += TFRC_WIN_COUNT_LIMIT; if (tmp > TFRC_WIN_COUNT_PER_RTT + 1) { @@ -1554,12 +1502,11 @@ trim_history: num_later = 1; break; case 3: - list_del_init(&entry->ccid3hrx_node); - ccid3_rx_hist_entry_delete(entry); + list_del_init(&entry->dccphrx_node); + dccp_rx_hist_entry_delete(ccid3_rx_hist, entry); break; } - } else if (entry->ccid3hrx_type == DCCP_PKT_DATA || - entry->ccid3hrx_type == DCCP_PKT_DATAACK) + } else if (dccp_rx_hist_entry_data_packet(entry)) --num_later; } } @@ -1571,7 +1518,7 @@ static void ccid3_hc_rx_send_feedback(struct sock *sk) { struct dccp_sock *dp = dccp_sk(sk); struct ccid3_hc_rx_sock *hcrx = dp->dccps_hc_rx_ccid_private; - struct ccid3_rx_hist_entry *entry, *packet; + struct dccp_rx_hist_entry *packet; ccid3_pr_debug("%s, sk=%p\n", dccp_role(sk), sk); @@ -1594,14 +1541,7 @@ static void ccid3_hc_rx_send_feedback(struct sock *sk) return; } - packet = NULL; - list_for_each_entry(entry, &hcrx->ccid3hcrx_hist, ccid3hrx_node) - if (entry->ccid3hrx_type == DCCP_PKT_DATA || - entry->ccid3hrx_type == DCCP_PKT_DATAACK) { - packet = entry; - break; - } - + packet = dccp_rx_hist_find_data_packet(&hcrx->ccid3hcrx_hist); if (packet == NULL) { printk(KERN_CRIT "%s: %s, sk=%p, no data packet in history!\n", __FUNCTION__, dccp_role(sk), sk); @@ -1610,12 +1550,12 @@ static void ccid3_hc_rx_send_feedback(struct sock *sk) } do_gettimeofday(&(hcrx->ccid3hcrx_tstamp_last_feedback)); - hcrx->ccid3hcrx_last_counter = packet->ccid3hrx_win_count; - hcrx->ccid3hcrx_seqno_last_counter = packet->ccid3hrx_seqno; + hcrx->ccid3hcrx_last_counter = packet->dccphrx_win_count; + hcrx->ccid3hcrx_seqno_last_counter = packet->dccphrx_seqno; hcrx->ccid3hcrx_bytes_recv = 0; /* Convert to multiples of 10us */ - hcrx->ccid3hcrx_elapsed_time = now_delta(packet->ccid3hrx_tstamp) / 10; + hcrx->ccid3hcrx_elapsed_time = now_delta(packet->dccphrx_tstamp) / 10; if (hcrx->ccid3hcrx_p == 0) hcrx->ccid3hcrx_pinv = ~0; else @@ -1686,7 +1626,7 @@ static u32 ccid3_hc_rx_calc_first_li(struct sock *sk) { struct dccp_sock *dp = dccp_sk(sk); struct ccid3_hc_rx_sock *hcrx = dp->dccps_hc_rx_ccid_private; - struct ccid3_rx_hist_entry *entry, *next, *tail = NULL; + struct dccp_rx_hist_entry *entry, *next, *tail = NULL; u32 rtt, delta, x_recv, fval, p, tmp2; struct timeval tstamp, tmp_tv; int interval = 0; @@ -1694,19 +1634,19 @@ static u32 ccid3_hc_rx_calc_first_li(struct sock *sk) int step = 0; u64 tmp1; - list_for_each_entry_safe(entry, next, &hcrx->ccid3hcrx_hist, ccid3hrx_node) { - if (entry->ccid3hrx_type == DCCP_PKT_DATA || - entry->ccid3hrx_type == DCCP_PKT_DATAACK) { + list_for_each_entry_safe(entry, next, &hcrx->ccid3hcrx_hist, + dccphrx_node) { + if (dccp_rx_hist_entry_data_packet(entry)) { tail = entry; switch (step) { case 0: - tstamp = entry->ccid3hrx_tstamp; - win_count = entry->ccid3hrx_win_count; + tstamp = entry->dccphrx_tstamp; + win_count = entry->dccphrx_win_count; step = 1; break; case 1: - interval = win_count - entry->ccid3hrx_win_count; + interval = win_count - entry->dccphrx_win_count; if (interval < 0) interval += TFRC_WIN_COUNT_LIMIT; if (interval > 4) @@ -1728,7 +1668,7 @@ static u32 ccid3_hc_rx_calc_first_li(struct sock *sk) interval = 1; } found: - timeval_sub(tstamp,tail->ccid3hrx_tstamp,&tmp_tv); + timeval_sub(tstamp,tail->dccphrx_tstamp,&tmp_tv); rtt = (tmp_tv.tv_sec * USEC_IN_SEC + tmp_tv.tv_usec) * 4 / interval; ccid3_pr_debug("%s, sk=%p, approximated RTT to %uus\n", dccp_role(sk), sk, rtt); @@ -1797,34 +1737,33 @@ static void ccid3_hc_rx_detect_loss(struct sock *sk) { struct dccp_sock *dp = dccp_sk(sk); struct ccid3_hc_rx_sock *hcrx = dp->dccps_hc_rx_ccid_private; - struct ccid3_rx_hist_entry *entry, *a_next, *b_next, *packet; - struct ccid3_rx_hist_entry *a_loss = NULL; - struct ccid3_rx_hist_entry *b_loss = NULL; + struct dccp_rx_hist_entry *entry, *next, *packet; + struct dccp_rx_hist_entry *a_loss = NULL; + struct dccp_rx_hist_entry *b_loss = NULL; u64 seq_loss = DCCP_MAX_SEQNO + 1; u8 win_loss = 0; u8 num_later = TFRC_RECV_NUM_LATE_LOSS; - list_for_each_entry_safe(entry, b_next, &hcrx->ccid3hcrx_hist, ccid3hrx_node) { + list_for_each_entry_safe(entry, next, &hcrx->ccid3hcrx_hist, + dccphrx_node) { if (num_later == 0) { b_loss = entry; break; - } else if (entry->ccid3hrx_type == DCCP_PKT_DATA || - entry->ccid3hrx_type == DCCP_PKT_DATAACK) + } else if (dccp_rx_hist_entry_data_packet(entry)) --num_later; } if (b_loss == NULL) goto out_update_li; - a_next = b_next; num_later = 1; - list_for_each_entry_safe_continue(entry, a_next, &hcrx->ccid3hcrx_hist, ccid3hrx_node) { + list_for_each_entry_safe_continue(entry, next, &hcrx->ccid3hcrx_hist, + dccphrx_node) { if (num_later == 0) { a_loss = entry; break; - } else if (entry->ccid3hrx_type == DCCP_PKT_DATA || - entry->ccid3hrx_type == DCCP_PKT_DATAACK) + } else if (dccp_rx_hist_entry_data_packet(entry)) --num_later; } @@ -1844,12 +1783,13 @@ static void ccid3_hc_rx_detect_loss(struct sock *sk) /* Locate a lost data packet */ entry = packet = b_loss; - list_for_each_entry_safe_continue(entry, b_next, &hcrx->ccid3hcrx_hist, ccid3hrx_node) { - u64 delta = dccp_delta_seqno(entry->ccid3hrx_seqno, packet->ccid3hrx_seqno); + list_for_each_entry_safe_continue(entry, next, &hcrx->ccid3hcrx_hist, + dccphrx_node) { + u64 delta = dccp_delta_seqno(entry->dccphrx_seqno, + packet->dccphrx_seqno); if (delta != 0) { - if (packet->ccid3hrx_type == DCCP_PKT_DATA || - packet->ccid3hrx_type == DCCP_PKT_DATAACK) + if (dccp_rx_hist_entry_data_packet(packet)) --delta; /* * FIXME: check this, probably this % usage is because @@ -1858,10 +1798,12 @@ static void ccid3_hc_rx_detect_loss(struct sock *sk) */ #if 0 if (delta % DCCP_NDP_LIMIT != - (packet->ccid3hrx_ndp - entry->ccid3hrx_ndp) % DCCP_NDP_LIMIT) + (packet->dccphrx_ndp - + entry->dccphrx_ndp) % DCCP_NDP_LIMIT) #endif - if (delta != packet->ccid3hrx_ndp - entry->ccid3hrx_ndp) { - seq_loss = entry->ccid3hrx_seqno; + if (delta != + packet->dccphrx_ndp - entry->dccphrx_ndp) { + seq_loss = entry->dccphrx_seqno; dccp_inc_seqno(&seq_loss); } } @@ -1871,7 +1813,7 @@ static void ccid3_hc_rx_detect_loss(struct sock *sk) } if (seq_loss != DCCP_MAX_SEQNO + 1) - win_loss = a_loss->ccid3hrx_win_count; + win_loss = a_loss->dccphrx_win_count; out_update_li: ccid3_hc_rx_update_li(sk, seq_loss, win_loss); @@ -1920,7 +1862,7 @@ static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb) { struct dccp_sock *dp = dccp_sk(sk); struct ccid3_hc_rx_sock *hcrx = dp->dccps_hc_rx_ccid_private; - struct ccid3_rx_hist_entry *packet; + struct dccp_rx_hist_entry *packet; struct timeval now; u8 win_count; u32 p_prev; @@ -1964,14 +1906,16 @@ static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb) return; } - packet = ccid3_rx_hist_entry_new(sk, skb, SLAB_ATOMIC); + packet = dccp_rx_hist_entry_new(ccid3_rx_hist, + dp->dccps_options_received.dccpor_ndp, + skb, SLAB_ATOMIC); if (packet == NULL) { ccid3_pr_debug("%s, sk=%p, Not enough mem to add rx packet to history (consider it lost)!", dccp_role(sk), sk); return; } - win_count = packet->ccid3hrx_win_count; + win_count = packet->dccphrx_win_count; ins = ccid3_hc_rx_add_hist(sk, packet); @@ -2060,7 +2004,7 @@ static void ccid3_hc_rx_exit(struct sock *sk) ccid3_hc_rx_set_state(sk, TFRC_RSTATE_TERM); /* Empty packet history */ - ccid3_rx_history_delete(&hcrx->ccid3hcrx_hist); + dccp_rx_hist_purge(ccid3_rx_hist, &hcrx->ccid3hcrx_hist); /* Empty loss interval history */ ccid3_loss_interval_history_delete(&hcrx->ccid3hcrx_loss_interval_hist); @@ -2093,41 +2037,38 @@ MODULE_PARM_DESC(ccid3_debug, "Enable debug messages"); static __init int ccid3_module_init(void) { - int rc = -ENOMEM; + int rc = -ENOBUFS; - ccid3_tx_hist_slab = kmem_cache_create("dccp_ccid3_tx_history", - sizeof(struct ccid3_tx_hist_entry), 0, - SLAB_HWCACHE_ALIGN, NULL, NULL); - if (ccid3_tx_hist_slab == NULL) + ccid3_rx_hist = dccp_rx_hist_new("ccid3"); + if (ccid3_rx_hist == NULL) goto out; - ccid3_rx_hist_slab = kmem_cache_create("dccp_ccid3_rx_history", - sizeof(struct ccid3_rx_hist_entry), 0, - SLAB_HWCACHE_ALIGN, NULL, NULL); - if (ccid3_rx_hist_slab == NULL) - goto out_free_tx_history; + ccid3_tx_hist = dccp_tx_hist_new("ccid3"); + if (ccid3_tx_hist == NULL) + goto out_free_rx; - ccid3_loss_interval_hist_slab = kmem_cache_create("dccp_ccid3_loss_interval_history", - sizeof(struct ccid3_loss_interval_hist_entry), 0, - SLAB_HWCACHE_ALIGN, NULL, NULL); + ccid3_loss_interval_hist_slab = kmem_cache_create("li_hist_ccid3", + sizeof(struct ccid3_loss_interval_hist_entry), + 0, SLAB_HWCACHE_ALIGN, + NULL, NULL); if (ccid3_loss_interval_hist_slab == NULL) - goto out_free_rx_history; + goto out_free_tx; rc = ccid_register(&ccid3); if (rc != 0) goto out_free_loss_interval_history; - out: return rc; + out_free_loss_interval_history: kmem_cache_destroy(ccid3_loss_interval_hist_slab); ccid3_loss_interval_hist_slab = NULL; -out_free_rx_history: - kmem_cache_destroy(ccid3_rx_hist_slab); - ccid3_rx_hist_slab = NULL; -out_free_tx_history: - kmem_cache_destroy(ccid3_tx_hist_slab); - ccid3_tx_hist_slab = NULL; +out_free_tx: + dccp_tx_hist_delete(ccid3_tx_hist); + ccid3_tx_hist = NULL; +out_free_rx: + dccp_rx_hist_delete(ccid3_rx_hist); + ccid3_rx_hist = NULL; goto out; } module_init(ccid3_module_init); @@ -2136,13 +2077,13 @@ static __exit void ccid3_module_exit(void) { ccid_unregister(&ccid3); - if (ccid3_tx_hist_slab != NULL) { - kmem_cache_destroy(ccid3_tx_hist_slab); - ccid3_tx_hist_slab = NULL; + if (ccid3_tx_hist != NULL) { + dccp_tx_hist_delete(ccid3_tx_hist); + ccid3_tx_hist = NULL; } - if (ccid3_rx_hist_slab != NULL) { - kmem_cache_destroy(ccid3_rx_hist_slab); - ccid3_rx_hist_slab = NULL; + if (ccid3_rx_hist != NULL) { + dccp_rx_hist_delete(ccid3_rx_hist); + ccid3_rx_hist = NULL; } if (ccid3_loss_interval_hist_slab != NULL) { kmem_cache_destroy(ccid3_loss_interval_hist_slab); diff --git a/net/dccp/ccids/ccid3.h b/net/dccp/ccids/ccid3.h index 5d6b623e64d..d2705fb7419 100644 --- a/net/dccp/ccids/ccid3.h +++ b/net/dccp/ccids/ccid3.h @@ -38,15 +38,6 @@ #include #include -#include - -struct ccid3_tx_hist_entry { - struct list_head ccid3htx_node; - u64 ccid3htx_seqno:48, - ccid3htx_win_count:8, - ccid3htx_sent:1; - struct timeval ccid3htx_tstamp; -}; struct ccid3_options_received { u64 ccid3or_seqno:48, @@ -102,15 +93,6 @@ struct ccid3_loss_interval_hist_entry { u32 ccid3lih_interval; }; -struct ccid3_rx_hist_entry { - struct list_head ccid3hrx_node; - u64 ccid3hrx_seqno:48, - ccid3hrx_win_count:4, - ccid3hrx_type:4; - u32 ccid3hrx_ndp; /* In fact it is from 8 to 24 bits */ - struct timeval ccid3hrx_tstamp; -}; - struct ccid3_hc_rx_sock { u64 ccid3hcrx_seqno_last_counter:48, ccid3hcrx_state:8, diff --git a/net/dccp/packet_history.c b/net/dccp/packet_history.c new file mode 100644 index 00000000000..6b414898f0c --- /dev/null +++ b/net/dccp/packet_history.c @@ -0,0 +1,198 @@ +/* + * net/dccp/packet_history.h + * + * Copyright (c) 2005 The University of Waikato, Hamilton, New Zealand. + * + * An implementation of the DCCP protocol + * + * This code has been developed by the University of Waikato WAND + * research group. For further information please see http://www.wand.net.nz/ + * or e-mail Ian McDonald - iam4@cs.waikato.ac.nz + * + * This code also uses code from Lulea University, rereleased as GPL by its + * authors: + * Copyright (c) 2003 Nils-Erik Mattsson, Joacim Haggmark, Magnus Erixzon + * + * Changes to meet Linux coding standards, to make it meet latest ccid3 draft + * and to make it work as a loadable module in the DCCP stack written by + * Arnaldo Carvalho de Melo . + * + * Copyright (c) 2005 Arnaldo Carvalho de Melo + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#include +#include + +#include "packet_history.h" + +struct dccp_rx_hist *dccp_rx_hist_new(const char *name) +{ + struct dccp_rx_hist *hist = kmalloc(sizeof(*hist), GFP_ATOMIC); + static const char dccp_rx_hist_mask[] = "rx_hist_%s"; + char *slab_name; + + if (hist == NULL) + goto out; + + slab_name = kmalloc(strlen(name) + sizeof(dccp_rx_hist_mask) - 1, + GFP_ATOMIC); + if (slab_name == NULL) + goto out_free_hist; + + sprintf(slab_name, dccp_rx_hist_mask, name); + hist->dccprxh_slab = kmem_cache_create(slab_name, + sizeof(struct dccp_rx_hist_entry), + 0, SLAB_HWCACHE_ALIGN, + NULL, NULL); + if (hist->dccprxh_slab == NULL) + goto out_free_slab_name; +out: + return hist; +out_free_slab_name: + kfree(slab_name); +out_free_hist: + kfree(hist); + hist = NULL; + goto out; +} + +EXPORT_SYMBOL_GPL(dccp_rx_hist_new); + +void dccp_rx_hist_delete(struct dccp_rx_hist *hist) +{ + const char* name = kmem_cache_name(hist->dccprxh_slab); + + kmem_cache_destroy(hist->dccprxh_slab); + kfree(name); + kfree(hist); +} + +EXPORT_SYMBOL_GPL(dccp_rx_hist_delete); + +void dccp_rx_hist_purge(struct dccp_rx_hist *hist, struct list_head *list) +{ + struct dccp_rx_hist_entry *entry, *next; + + list_for_each_entry_safe(entry, next, list, dccphrx_node) { + list_del_init(&entry->dccphrx_node); + kmem_cache_free(hist->dccprxh_slab, entry); + } +} + +EXPORT_SYMBOL_GPL(dccp_rx_hist_purge); + +struct dccp_rx_hist_entry * + dccp_rx_hist_find_data_packet(const struct list_head *list) +{ + struct dccp_rx_hist_entry *entry, *packet = NULL; + + list_for_each_entry(entry, list, dccphrx_node) + if (entry->dccphrx_type == DCCP_PKT_DATA || + entry->dccphrx_type == DCCP_PKT_DATAACK) { + packet = entry; + break; + } + + return packet; +} + +EXPORT_SYMBOL_GPL(dccp_rx_hist_find_data_packet); + +struct dccp_tx_hist *dccp_tx_hist_new(const char *name) +{ + struct dccp_tx_hist *hist = kmalloc(sizeof(*hist), GFP_ATOMIC); + static const char dccp_tx_hist_mask[] = "tx_hist_%s"; + char *slab_name; + + if (hist == NULL) + goto out; + + slab_name = kmalloc(strlen(name) + sizeof(dccp_tx_hist_mask) - 1, + GFP_ATOMIC); + if (slab_name == NULL) + goto out_free_hist; + + sprintf(slab_name, dccp_tx_hist_mask, name); + hist->dccptxh_slab = kmem_cache_create(slab_name, + sizeof(struct dccp_tx_hist_entry), + 0, SLAB_HWCACHE_ALIGN, + NULL, NULL); + if (hist->dccptxh_slab == NULL) + goto out_free_slab_name; +out: + return hist; +out_free_slab_name: + kfree(slab_name); +out_free_hist: + kfree(hist); + hist = NULL; + goto out; +} + +EXPORT_SYMBOL_GPL(dccp_tx_hist_new); + +void dccp_tx_hist_delete(struct dccp_tx_hist *hist) +{ + const char* name = kmem_cache_name(hist->dccptxh_slab); + + kmem_cache_destroy(hist->dccptxh_slab); + kfree(name); + kfree(hist); +} + +EXPORT_SYMBOL_GPL(dccp_tx_hist_delete); + +struct dccp_tx_hist_entry *dccp_tx_hist_find_entry(const struct list_head *list, + const u64 seq) +{ + struct dccp_tx_hist_entry *packet = NULL, *entry; + + list_for_each_entry(entry, list, dccphtx_node) + if (entry->dccphtx_seqno == seq) { + packet = entry; + break; + } + + return packet; +} + +EXPORT_SYMBOL_GPL(dccp_tx_hist_find_entry); + +void dccp_tx_hist_purge_older(struct dccp_tx_hist *hist, struct list_head *list, + struct dccp_tx_hist_entry *packet) +{ + struct dccp_tx_hist_entry *next; + + list_for_each_entry_safe_continue(packet, next, list, dccphtx_node) { + list_del_init(&packet->dccphtx_node); + dccp_tx_hist_entry_delete(hist, packet); + } +} + +EXPORT_SYMBOL_GPL(dccp_tx_hist_purge_older); + +void dccp_tx_hist_purge(struct dccp_tx_hist *hist, struct list_head *list) +{ + struct dccp_tx_hist_entry *entry, *next; + + list_for_each_entry_safe(entry, next, list, dccphtx_node) { + list_del_init(&entry->dccphtx_node); + dccp_tx_hist_entry_delete(hist, entry); + } +} + +EXPORT_SYMBOL_GPL(dccp_tx_hist_purge); diff --git a/net/dccp/packet_history.h b/net/dccp/packet_history.h new file mode 100644 index 00000000000..565dc96506e --- /dev/null +++ b/net/dccp/packet_history.h @@ -0,0 +1,182 @@ +/* + * net/dccp/packet_history.h + * + * Copyright (c) 2005 The University of Waikato, Hamilton, New Zealand. + * + * An implementation of the DCCP protocol + * + * This code has been developed by the University of Waikato WAND + * research group. For further information please see http://www.wand.net.nz/ + * or e-mail Ian McDonald - iam4@cs.waikato.ac.nz + * + * This code also uses code from Lulea University, rereleased as GPL by its + * authors: + * Copyright (c) 2003 Nils-Erik Mattsson, Joacim Haggmark, Magnus Erixzon + * + * Changes to meet Linux coding standards, to make it meet latest ccid3 draft + * and to make it work as a loadable module in the DCCP stack written by + * Arnaldo Carvalho de Melo . + * + * Copyright (c) 2005 Arnaldo Carvalho de Melo + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#ifndef _DCCP_PKT_HIST_ +#define _DCCP_PKT_HIST_ + +#include +#include +#include +#include + +#include "dccp.h" + +struct dccp_tx_hist_entry { + struct list_head dccphtx_node; + u64 dccphtx_seqno:48, + dccphtx_win_count:8, + dccphtx_sent:1; + struct timeval dccphtx_tstamp; +}; + +struct dccp_rx_hist_entry { + struct list_head dccphrx_node; + u64 dccphrx_seqno:48, + dccphrx_win_count:4, + dccphrx_type:4; + u32 dccphrx_ndp; /* In fact it is from 8 to 24 bits */ + struct timeval dccphrx_tstamp; +}; + +struct dccp_tx_hist { + kmem_cache_t *dccptxh_slab; +}; + +extern struct dccp_tx_hist *dccp_tx_hist_new(const char *name); +extern void dccp_tx_hist_delete(struct dccp_tx_hist *hist); + +struct dccp_rx_hist { + kmem_cache_t *dccprxh_slab; +}; + +extern struct dccp_rx_hist *dccp_rx_hist_new(const char *name); +extern void dccp_rx_hist_delete(struct dccp_rx_hist *hist); +extern struct dccp_rx_hist_entry * + dccp_rx_hist_find_data_packet(const struct list_head *list); + +static inline struct dccp_tx_hist_entry * + dccp_tx_hist_entry_new(struct dccp_tx_hist *hist, + const int prio) +{ + struct dccp_tx_hist_entry *entry = kmem_cache_alloc(hist->dccptxh_slab, + prio); + + if (entry != NULL) + entry->dccphtx_sent = 0; + + return entry; +} + +static inline void dccp_tx_hist_entry_delete(struct dccp_tx_hist *hist, + struct dccp_tx_hist_entry *entry) +{ + if (entry != NULL) + kmem_cache_free(hist->dccptxh_slab, entry); +} + +extern struct dccp_tx_hist_entry * + dccp_tx_hist_find_entry(const struct list_head *list, + const u64 seq); + +static inline void dccp_tx_hist_add_entry(struct list_head *list, + struct dccp_tx_hist_entry *entry) +{ + list_add(&entry->dccphtx_node, list); +} + +extern void dccp_tx_hist_purge_older(struct dccp_tx_hist *hist, + struct list_head *list, + struct dccp_tx_hist_entry *next); + +extern void dccp_tx_hist_purge(struct dccp_tx_hist *hist, + struct list_head *list); + +static inline struct dccp_tx_hist_entry *dccp_tx_hist_head(struct list_head *list) +{ + struct dccp_tx_hist_entry *head = NULL; + + if (!list_empty(list)) + head = list_entry(list->next, struct dccp_tx_hist_entry, + dccphtx_node); + return head; +} + +static inline struct dccp_rx_hist_entry * + dccp_rx_hist_entry_new(struct dccp_rx_hist *hist, + const u32 ndp, + const struct sk_buff *skb, + const int prio) +{ + struct dccp_rx_hist_entry *entry = kmem_cache_alloc(hist->dccprxh_slab, + prio); + + if (entry != NULL) { + const struct dccp_hdr *dh = dccp_hdr(skb); + + entry->dccphrx_seqno = DCCP_SKB_CB(skb)->dccpd_seq; + entry->dccphrx_win_count = dh->dccph_ccval; + entry->dccphrx_type = dh->dccph_type; + entry->dccphrx_ndp = ndp; + do_gettimeofday(&(entry->dccphrx_tstamp)); + } + + return entry; +} + +static inline void dccp_rx_hist_entry_delete(struct dccp_rx_hist *hist, + struct dccp_rx_hist_entry *entry) +{ + if (entry != NULL) + kmem_cache_free(hist->dccprxh_slab, entry); +} + +extern void dccp_rx_hist_purge(struct dccp_rx_hist *hist, + struct list_head *list); + +static inline void dccp_rx_hist_add_entry(struct list_head *list, + struct dccp_rx_hist_entry *entry) +{ + list_add(&entry->dccphrx_node, list); +} + +static inline struct dccp_rx_hist_entry *dccp_rx_hist_head(struct list_head *list) +{ + struct dccp_rx_hist_entry *head = NULL; + + if (!list_empty(list)) + head = list_entry(list->next, struct dccp_rx_hist_entry, + dccphrx_node); + return head; +} + +static inline int + dccp_rx_hist_entry_data_packet(const struct dccp_rx_hist_entry *entry) +{ + return entry->dccphrx_type == DCCP_PKT_DATA || + entry->dccphrx_type == DCCP_PKT_DATAACK; +} + +#endif /* _DCCP_PKT_HIST_ */ -- cgit v1.2.3 From cef07fd6029c20f95571d09cefce45ee3276a920 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 10 Aug 2005 13:29:27 -0300 Subject: [CCID3]: Ditch USEC_IN_SEC as time.h has USEC_PER_SEC That is equivalent, no need to have a private one. Signed-off-by: Arnaldo Carvalho de Melo --- net/dccp/ccids/ccid3.c | 35 +++++++++++++++++++---------------- 1 file changed, 19 insertions(+), 16 deletions(-) (limited to 'net/dccp') diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c index 15c25f62200..80f12c990c0 100644 --- a/net/dccp/ccids/ccid3.c +++ b/net/dccp/ccids/ccid3.c @@ -55,12 +55,10 @@ extern int ccid3_debug; #define TFRC_STD_PACKET_SIZE 256 #define TFRC_MAX_PACKET_SIZE 65535 -#define USEC_IN_SEC 1000000 - -#define TFRC_INITIAL_TIMEOUT (2 * USEC_IN_SEC) +#define TFRC_INITIAL_TIMEOUT (2 * USEC_PER_SEC) /* two seconds as per CCID3 spec 11 */ -#define TFRC_OPSYS_HALF_TIME_GRAN (USEC_IN_SEC / (2 * HZ)) +#define TFRC_OPSYS_HALF_TIME_GRAN (USEC_PER_SEC / (2 * HZ)) /* above is in usecs - half the scheduling granularity as per RFC3448 4.6 */ #define TFRC_WIN_COUNT_PER_RTT 4 @@ -155,20 +153,23 @@ static inline void ccid3_hc_tx_set_state(struct sock *sk, enum ccid3_hc_tx_state hctx->ccid3hctx_state = state; } -static void timeval_sub(struct timeval large, struct timeval small, struct timeval *result) { - +static void timeval_sub(struct timeval large, struct timeval small, + struct timeval *result) +{ result->tv_sec = large.tv_sec-small.tv_sec; if (large.tv_usec < small.tv_usec) { (result->tv_sec)--; - result->tv_usec = USEC_IN_SEC+large.tv_usec-small.tv_usec; + result->tv_usec = USEC_PER_SEC + + large.tv_usec - small.tv_usec; } else result->tv_usec = large.tv_usec-small.tv_usec; } -static inline void timeval_fix(struct timeval *tv) { - if (tv->tv_usec >= USEC_IN_SEC) { +static inline void timeval_fix(struct timeval *tv) +{ + if (tv->tv_usec >= USEC_PER_SEC) { tv->tv_sec++; - tv->tv_usec -= USEC_IN_SEC; + tv->tv_usec -= USEC_PER_SEC; } } @@ -1185,7 +1186,8 @@ static void ccid3_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) r_sample); /* Update timeout interval */ - inet_csk(sk)->icsk_rto = max_t(u32, 4 * hctx->ccid3hctx_rtt, USEC_IN_SEC); + inet_csk(sk)->icsk_rto = max_t(u32, 4 * hctx->ccid3hctx_rtt, + USEC_PER_SEC); /* Update receive rate */ hctx->ccid3hctx_x_recv = x_recv; /* x_recv in bytes per second */ @@ -1210,7 +1212,7 @@ static void ccid3_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) /* Update next send time */ if (hctx->ccid3hctx_t_ipi > (hctx->ccid3hctx_t_nom).tv_usec) { - (hctx->ccid3hctx_t_nom).tv_usec += USEC_IN_SEC; + hctx->ccid3hctx_t_nom.tv_usec += USEC_PER_SEC; (hctx->ccid3hctx_t_nom).tv_sec--; } /* FIXME - if no feedback then t_ipi can go > 1 second */ @@ -1344,7 +1346,7 @@ static int ccid3_hc_tx_init(struct sock *sk) hctx->ccid3hctx_x = hctx->ccid3hctx_s; /* set transmission rate to 1 packet per second */ hctx->ccid3hctx_rtt = 4; /* See ccid3_hc_tx_packet_sent win_count calculatation */ - inet_csk(sk)->icsk_rto = USEC_IN_SEC; + inet_csk(sk)->icsk_rto = USEC_PER_SEC; hctx->ccid3hctx_state = TFRC_SSTATE_NO_SENT; INIT_LIST_HEAD(&hctx->ccid3hctx_hist); init_timer(&hctx->ccid3hctx_no_feedback_timer); @@ -1531,7 +1533,8 @@ static void ccid3_hc_rx_send_feedback(struct sock *sk) if (delta == 0) delta = 1; /* to prevent divide by zero */ - hcrx->ccid3hcrx_x_recv = (hcrx->ccid3hcrx_bytes_recv * USEC_IN_SEC) / delta; + hcrx->ccid3hcrx_x_recv = (hcrx->ccid3hcrx_bytes_recv * + USEC_PER_SEC) / delta; } break; default: @@ -1669,7 +1672,7 @@ static u32 ccid3_hc_rx_calc_first_li(struct sock *sk) } found: timeval_sub(tstamp,tail->dccphrx_tstamp,&tmp_tv); - rtt = (tmp_tv.tv_sec * USEC_IN_SEC + tmp_tv.tv_usec) * 4 / interval; + rtt = (tmp_tv.tv_sec * USEC_PER_SEC + tmp_tv.tv_usec) * 4 / interval; ccid3_pr_debug("%s, sk=%p, approximated RTT to %uus\n", dccp_role(sk), sk, rtt); if (rtt == 0) @@ -1679,7 +1682,7 @@ found: if (delta == 0) delta = 1; - x_recv = (hcrx->ccid3hcrx_bytes_recv * USEC_IN_SEC) / delta; + x_recv = (hcrx->ccid3hcrx_bytes_recv * USEC_PER_SEC) / delta; tmp1 = (u64)x_recv * (u64)rtt; do_div(tmp1,10000000); -- cgit v1.2.3 From 4f5736c4c7cf6f9bd8db82b712cfdd51c87e06b9 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 12 Aug 2005 09:27:49 -0300 Subject: [TCPDIAG]: Introduce inet_diag_{register,unregister} Next changeset will rename tcp_diag to inet_diag and move the tcp_diag code out of it and into a new tcp_diag.c, similar to the net/dccp/diag.c introduced in this changeset, completing the transition to a generic inet_diag infrastructure. Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- net/dccp/Kconfig | 5 +++++ net/dccp/Makefile | 4 ++++ net/dccp/diag.c | 47 +++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 56 insertions(+) create mode 100644 net/dccp/diag.c (limited to 'net/dccp') diff --git a/net/dccp/Kconfig b/net/dccp/Kconfig index 90460bc629b..ff5b5459b97 100644 --- a/net/dccp/Kconfig +++ b/net/dccp/Kconfig @@ -19,6 +19,11 @@ config IP_DCCP If in doubt, say N. +config IP_DCCP_DIAG + depends on IP_DCCP && IP_TCPDIAG + def_tristate y if (IP_DCCP = y && IP_TCPDIAG = y) + def_tristate m + source "net/dccp/ccids/Kconfig" endmenu diff --git a/net/dccp/Makefile b/net/dccp/Makefile index 25a50bdbf1b..5741fffc436 100644 --- a/net/dccp/Makefile +++ b/net/dccp/Makefile @@ -3,4 +3,8 @@ obj-$(CONFIG_IP_DCCP) += dccp.o dccp-y := ccid.o input.o ipv4.o minisocks.o options.o output.o proto.o \ timer.o packet_history.o +obj-$(CONFIG_IP_DCCP_DIAG) += dccp_diag.o + obj-y += ccids/ + +dccp_diag-y := diag.o diff --git a/net/dccp/diag.c b/net/dccp/diag.c new file mode 100644 index 00000000000..4d9037c56dd --- /dev/null +++ b/net/dccp/diag.c @@ -0,0 +1,47 @@ +/* + * net/dccp/diag.c + * + * An implementation of the DCCP protocol + * Arnaldo Carvalho de Melo + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include + +#include +#include + +#include "dccp.h" + +static void dccp_diag_get_info(struct sock *sk, struct tcpdiagmsg *r, + void *_info) +{ + r->tcpdiag_rqueue = r->tcpdiag_wqueue = 0; +} + +static struct inet_diag_handler dccp_diag_handler = { + .idiag_hashinfo = &dccp_hashinfo, + .idiag_get_info = dccp_diag_get_info, + .idiag_type = DCCPDIAG_GETSOCK, + .idiag_info_size = 0, +}; + +static int __init dccp_diag_init(void) +{ + return inet_diag_register(&dccp_diag_handler); +} + +static void __exit dccp_diag_fini(void) +{ + inet_diag_unregister(&dccp_diag_handler); +} + +module_init(dccp_diag_init); +module_exit(dccp_diag_fini); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Arnaldo Carvalho de Melo "); +MODULE_DESCRIPTION("DCCP inet_diag handler"); -- cgit v1.2.3 From 73c1f4a033675f168df7e98bbeeafca3c644b8a6 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 12 Aug 2005 12:51:49 -0300 Subject: [TCPDIAG]: Just rename everything to inet_diag Next changeset will rename tcp_diag.[ch] to inet_diag.[ch]. I'm taking this longer route so as to easy review, making clear the changes made all along the way. Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- net/dccp/Kconfig | 4 ++-- net/dccp/diag.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'net/dccp') diff --git a/net/dccp/Kconfig b/net/dccp/Kconfig index ff5b5459b97..efce4f346fd 100644 --- a/net/dccp/Kconfig +++ b/net/dccp/Kconfig @@ -20,8 +20,8 @@ config IP_DCCP If in doubt, say N. config IP_DCCP_DIAG - depends on IP_DCCP && IP_TCPDIAG - def_tristate y if (IP_DCCP = y && IP_TCPDIAG = y) + depends on IP_DCCP && IP_INET_DIAG + def_tristate y if (IP_DCCP = y && IP_INET_DIAG = y) def_tristate m source "net/dccp/ccids/Kconfig" diff --git a/net/dccp/diag.c b/net/dccp/diag.c index 4d9037c56dd..9f07eff2e3b 100644 --- a/net/dccp/diag.c +++ b/net/dccp/diag.c @@ -16,10 +16,10 @@ #include "dccp.h" -static void dccp_diag_get_info(struct sock *sk, struct tcpdiagmsg *r, +static void dccp_diag_get_info(struct sock *sk, struct inet_diag_msg *r, void *_info) { - r->tcpdiag_rqueue = r->tcpdiag_wqueue = 0; + r->idiag_rqueue = r->idiag_wqueue = 0; } static struct inet_diag_handler dccp_diag_handler = { -- cgit v1.2.3 From a8c2190ee7da1a1dc68ff1a6b5f03feb61e523a5 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 12 Aug 2005 12:56:38 -0300 Subject: [INET_DIAG]: Rename tcp_diag.[ch] to inet_diag.[ch] Next changeset will introduce net/ipv4/tcp_diag.c, moving the code that was put transitioanlly in inet_diag.c. Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- net/dccp/diag.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/dccp') diff --git a/net/dccp/diag.c b/net/dccp/diag.c index 9f07eff2e3b..0b10c176c35 100644 --- a/net/dccp/diag.c +++ b/net/dccp/diag.c @@ -12,7 +12,7 @@ #include #include -#include +#include #include "dccp.h" -- cgit v1.2.3 From 17b085eacef81a6286bd478f2ec75e04abb091cb Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 12 Aug 2005 12:59:17 -0300 Subject: [INET_DIAG]: Move the tcp_diag interface to the proper place With this the previous setup is back, i.e. tcp_diag can be built as a module, as dccp_diag and both share the infrastructure available in inet_diag. If one selects CONFIG_INET_DIAG as module CONFIG_INET_TCP_DIAG will also be built as a module, as will CONFIG_INET_DCCP_DIAG, if CONFIG_IP_DCCP was selected static or as a module, if CONFIG_INET_DIAG is y, being statically linked CONFIG_INET_TCP_DIAG will follow suit and CONFIG_INET_DCCP_DIAG will be built in the same manner as CONFIG_IP_DCCP. Now to aim at UDP, converting it to use inet_hashinfo, so that we can use iproute2 for UDP sockets as well. Ah, just to show an example of this new infrastructure working for DCCP :-) [root@qemu ~]# ./ss -dane State Recv-Q Send-Q Local Address:Port Peer Address:Port LISTEN 0 0 *:5001 *:* ino:942 sk:cfd503a0 ESTAB 0 0 127.0.0.1:5001 127.0.0.1:32770 ino:943 sk:cfd50a60 ESTAB 0 0 127.0.0.1:32770 127.0.0.1:5001 ino:947 sk:cfd50700 TIME-WAIT 0 0 127.0.0.1:32769 127.0.0.1:5001 timer:(timewait,3.430ms,0) ino:0 sk:cf209620 Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- net/dccp/Kconfig | 6 +++--- net/dccp/Makefile | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) (limited to 'net/dccp') diff --git a/net/dccp/Kconfig b/net/dccp/Kconfig index efce4f346fd..6760830c490 100644 --- a/net/dccp/Kconfig +++ b/net/dccp/Kconfig @@ -19,9 +19,9 @@ config IP_DCCP If in doubt, say N. -config IP_DCCP_DIAG - depends on IP_DCCP && IP_INET_DIAG - def_tristate y if (IP_DCCP = y && IP_INET_DIAG = y) +config INET_DCCP_DIAG + depends on IP_DCCP && INET_DIAG + def_tristate y if (IP_DCCP = y && INET_DIAG = y) def_tristate m source "net/dccp/ccids/Kconfig" diff --git a/net/dccp/Makefile b/net/dccp/Makefile index 5741fffc436..44a867f2918 100644 --- a/net/dccp/Makefile +++ b/net/dccp/Makefile @@ -3,8 +3,8 @@ obj-$(CONFIG_IP_DCCP) += dccp.o dccp-y := ccid.o input.o ipv4.o minisocks.o options.o output.o proto.o \ timer.o packet_history.o -obj-$(CONFIG_IP_DCCP_DIAG) += dccp_diag.o - -obj-y += ccids/ +obj-$(CONFIG_INET_DCCP_DIAG) += dccp_diag.o dccp_diag-y := diag.o + +obj-y += ccids/ -- cgit v1.2.3 From c173437669967301facff151bfeb7bae67354e4c Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Sat, 13 Aug 2005 20:34:23 -0300 Subject: [PACKET_HISTORY]: Add dccphtx_rtt and rename the win_count fields As requested by Ian. Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Ian McDonald Signed-off-by: David S. Miller --- net/dccp/ccids/ccid3.c | 22 +++++++++++----------- net/dccp/packet_history.h | 13 +++++++------ 2 files changed, 18 insertions(+), 17 deletions(-) (limited to 'net/dccp') diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c index 80f12c990c0..edf9740d8d8 100644 --- a/net/dccp/ccids/ccid3.c +++ b/net/dccp/ccids/ccid3.c @@ -1004,7 +1004,7 @@ static int ccid3_hc_tx_send_packet(struct sock *sk, /* Can we send? if so add options and add to packet history */ if (rc == 0) - new_packet->dccphtx_win_count = + new_packet->dccphtx_ccval = DCCP_SKB_CB(skb)->dccpd_ccval = hctx->ccid3hctx_last_win_count; out: @@ -1060,7 +1060,7 @@ static void ccid3_hc_tx_packet_sent(struct sock *sk, int more, int len) min_t(unsigned long, quarter_rtt, 5)) % 16; ccid3_pr_debug("%s, sk=%p, window changed from %u to %u!\n", dccp_role(sk), sk, - packet->dccphtx_win_count, + packet->dccphtx_ccval, hctx->ccid3hctx_last_win_count); } /* COMPLIANCE_END */ @@ -1068,9 +1068,10 @@ static void ccid3_hc_tx_packet_sent(struct sock *sk, int more, int len) ccid3_pr_debug("%s, sk=%p, packet sent (%llu,%u)\n", dccp_role(sk), sk, packet->dccphtx_seqno, - packet->dccphtx_win_count); + packet->dccphtx_ccval); #endif hctx->ccid3hctx_idle = 0; + packet->dccphtx_rtt = hctx->ccid3hctx_rtt; packet->dccphtx_sent = 1; } else ccid3_pr_debug("%s, sk=%p, seqno=%llu NOT inserted!\n", @@ -1489,11 +1490,10 @@ trim_history: step = 2; /* OK, find next data packet */ num_later = 1; - win_count = entry->dccphrx_win_count; + win_count = entry->dccphrx_ccval; break; case 2: - tmp = (win_count - - entry->dccphrx_win_count); + tmp = win_count - entry->dccphrx_ccval; if (tmp < 0) tmp += TFRC_WIN_COUNT_LIMIT; if (tmp > TFRC_WIN_COUNT_PER_RTT + 1) { @@ -1553,7 +1553,7 @@ static void ccid3_hc_rx_send_feedback(struct sock *sk) } do_gettimeofday(&(hcrx->ccid3hcrx_tstamp_last_feedback)); - hcrx->ccid3hcrx_last_counter = packet->dccphrx_win_count; + hcrx->ccid3hcrx_last_counter = packet->dccphrx_ccval; hcrx->ccid3hcrx_seqno_last_counter = packet->dccphrx_seqno; hcrx->ccid3hcrx_bytes_recv = 0; @@ -1645,11 +1645,11 @@ static u32 ccid3_hc_rx_calc_first_li(struct sock *sk) switch (step) { case 0: tstamp = entry->dccphrx_tstamp; - win_count = entry->dccphrx_win_count; + win_count = entry->dccphrx_ccval; step = 1; break; case 1: - interval = win_count - entry->dccphrx_win_count; + interval = win_count - entry->dccphrx_ccval; if (interval < 0) interval += TFRC_WIN_COUNT_LIMIT; if (interval > 4) @@ -1816,7 +1816,7 @@ static void ccid3_hc_rx_detect_loss(struct sock *sk) } if (seq_loss != DCCP_MAX_SEQNO + 1) - win_loss = a_loss->dccphrx_win_count; + win_loss = a_loss->dccphrx_ccval; out_update_li: ccid3_hc_rx_update_li(sk, seq_loss, win_loss); @@ -1918,7 +1918,7 @@ static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb) return; } - win_count = packet->dccphrx_win_count; + win_count = packet->dccphrx_ccval; ins = ccid3_hc_rx_add_hist(sk, packet); diff --git a/net/dccp/packet_history.h b/net/dccp/packet_history.h index 565dc96506e..0056525a656 100644 --- a/net/dccp/packet_history.h +++ b/net/dccp/packet_history.h @@ -47,15 +47,16 @@ struct dccp_tx_hist_entry { struct list_head dccphtx_node; u64 dccphtx_seqno:48, - dccphtx_win_count:8, + dccphtx_ccval:4, dccphtx_sent:1; + u32 dccphtx_rtt; struct timeval dccphtx_tstamp; }; struct dccp_rx_hist_entry { struct list_head dccphrx_node; u64 dccphrx_seqno:48, - dccphrx_win_count:4, + dccphrx_ccval:4, dccphrx_type:4; u32 dccphrx_ndp; /* In fact it is from 8 to 24 bits */ struct timeval dccphrx_tstamp; @@ -136,10 +137,10 @@ static inline struct dccp_rx_hist_entry * if (entry != NULL) { const struct dccp_hdr *dh = dccp_hdr(skb); - entry->dccphrx_seqno = DCCP_SKB_CB(skb)->dccpd_seq; - entry->dccphrx_win_count = dh->dccph_ccval; - entry->dccphrx_type = dh->dccph_type; - entry->dccphrx_ndp = ndp; + entry->dccphrx_seqno = DCCP_SKB_CB(skb)->dccpd_seq; + entry->dccphrx_ccval = dh->dccph_ccval; + entry->dccphrx_type = dh->dccph_type; + entry->dccphrx_ndp = ndp; do_gettimeofday(&(entry->dccphrx_tstamp)); } -- cgit v1.2.3 From 7690af3fff7633e40b1b9950eb8489129251d074 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Sat, 13 Aug 2005 20:34:54 -0300 Subject: [DCCP]: Just reflow the source code to fit in 80 columns Andrew Morton should be happy now 8) Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- net/dccp/ccid.h | 18 ++-- net/dccp/dccp.h | 56 +++++++----- net/dccp/input.c | 100 +++++++++++++-------- net/dccp/ipv4.c | 114 +++++++++++++++--------- net/dccp/minisocks.c | 37 +++++--- net/dccp/options.c | 215 ++++++++++++++++++++++++++++------------------ net/dccp/output.c | 44 ++++++---- net/dccp/packet_history.c | 11 +-- net/dccp/packet_history.h | 6 +- net/dccp/proto.c | 51 ++++++----- net/dccp/timer.c | 36 +++++--- 11 files changed, 430 insertions(+), 258 deletions(-) (limited to 'net/dccp') diff --git a/net/dccp/ccid.h b/net/dccp/ccid.h index 469f9a14b46..95eb47d8551 100644 --- a/net/dccp/ccid.h +++ b/net/dccp/ccid.h @@ -30,21 +30,26 @@ struct ccid { int (*ccid_hc_tx_init)(struct sock *sk); void (*ccid_hc_rx_exit)(struct sock *sk); void (*ccid_hc_tx_exit)(struct sock *sk); - void (*ccid_hc_rx_packet_recv)(struct sock *sk, struct sk_buff *skb); + void (*ccid_hc_rx_packet_recv)(struct sock *sk, + struct sk_buff *skb); int (*ccid_hc_rx_parse_options)(struct sock *sk, unsigned char option, unsigned char len, u16 idx, unsigned char* value); - void (*ccid_hc_rx_insert_options)(struct sock *sk, struct sk_buff *skb); - void (*ccid_hc_tx_insert_options)(struct sock *sk, struct sk_buff *skb); - void (*ccid_hc_tx_packet_recv)(struct sock *sk, struct sk_buff *skb); + void (*ccid_hc_rx_insert_options)(struct sock *sk, + struct sk_buff *skb); + void (*ccid_hc_tx_insert_options)(struct sock *sk, + struct sk_buff *skb); + void (*ccid_hc_tx_packet_recv)(struct sock *sk, + struct sk_buff *skb); int (*ccid_hc_tx_parse_options)(struct sock *sk, unsigned char option, unsigned char len, u16 idx, unsigned char* value); int (*ccid_hc_tx_send_packet)(struct sock *sk, struct sk_buff *skb, int len); - void (*ccid_hc_tx_packet_sent)(struct sock *sk, int more, int len); + void (*ccid_hc_tx_packet_sent)(struct sock *sk, int more, + int len); }; extern int ccid_register(struct ccid *ccid); @@ -123,7 +128,8 @@ static inline int ccid_hc_tx_parse_options(struct ccid *ccid, struct sock *sk, { int rc = 0; if (ccid->ccid_hc_tx_parse_options != NULL) - rc = ccid->ccid_hc_tx_parse_options(sk, option, len, idx, value); + rc = ccid->ccid_hc_tx_parse_options(sk, option, len, idx, + value); return rc; } diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h index 8a0d7af649e..62e735f1807 100644 --- a/net/dccp/dccp.h +++ b/net/dccp/dccp.h @@ -25,7 +25,8 @@ extern int dccp_debug; do { if (dccp_debug) \ printk(KERN_DEBUG "%s: " format, __FUNCTION__ , ##a); \ } while (0) -#define dccp_pr_debug_cat(format, a...) do { if (dccp_debug) printk(format, ##a); } while (0) +#define dccp_pr_debug_cat(format, a...) do { if (dccp_debug) \ + printk(format, ##a); } while (0) #else #define dccp_pr_debug(format, a...) #define dccp_pr_debug_cat(format, a...) @@ -72,7 +73,8 @@ static inline const int after48(const u64 seq1, const u64 seq2) } /* is seq2 <= seq1 <= seq3 ? */ -static inline const int between48(const u64 seq1, const u64 seq2, const u64 seq3) +static inline const int between48(const u64 seq1, const u64 seq2, + const u64 seq3) { return (seq3 << 16) - (seq2 << 16) >= (seq1 << 16) - (seq2 << 16); } @@ -107,12 +109,14 @@ struct dccp_mib { } __SNMP_MIB_ALIGN__; DECLARE_SNMP_STAT(struct dccp_mib, dccp_statistics); -#define DCCP_INC_STATS(field) SNMP_INC_STATS(dccp_statistics, field) -#define DCCP_INC_STATS_BH(field) SNMP_INC_STATS_BH(dccp_statistics, field) -#define DCCP_INC_STATS_USER(field) SNMP_INC_STATS_USER(dccp_statistics, field) -#define DCCP_DEC_STATS(field) SNMP_DEC_STATS(dccp_statistics, field) -#define DCCP_ADD_STATS_BH(field, val) SNMP_ADD_STATS_BH(dccp_statistics, field, val) -#define DCCP_ADD_STATS_USER(field, val) SNMP_ADD_STATS_USER(dccp_statistics, field, val) +#define DCCP_INC_STATS(field) SNMP_INC_STATS(dccp_statistics, field) +#define DCCP_INC_STATS_BH(field) SNMP_INC_STATS_BH(dccp_statistics, field) +#define DCCP_INC_STATS_USER(field) SNMP_INC_STATS_USER(dccp_statistics, field) +#define DCCP_DEC_STATS(field) SNMP_DEC_STATS(dccp_statistics, field) +#define DCCP_ADD_STATS_BH(field, val) \ + SNMP_ADD_STATS_BH(dccp_statistics, field, val) +#define DCCP_ADD_STATS_USER(field, val) \ + SNMP_ADD_STATS_USER(dccp_statistics, field, val) extern int dccp_transmit_skb(struct sock *sk, struct sk_buff *skb); extern int dccp_retransmit_skb(struct sock *sk, struct sk_buff *skb); @@ -234,8 +238,8 @@ extern int dccp_disconnect(struct sock *sk, int flags); extern int dccp_getsockopt(struct sock *sk, int level, int optname, char *optval, int *optlen); extern int dccp_ioctl(struct sock *sk, int cmd, unsigned long arg); -extern int dccp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, - size_t size); +extern int dccp_sendmsg(struct kiocb *iocb, struct sock *sk, + struct msghdr *msg, size_t size); extern int dccp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, size_t len, int nonblock, int flags, int *addr_len); @@ -246,7 +250,8 @@ extern void dccp_shutdown(struct sock *sk, int how); extern int dccp_v4_checksum(const struct sk_buff *skb, const u32 saddr, const u32 daddr); -extern int dccp_v4_send_reset(struct sock *sk, enum dccp_reset_codes code); +extern int dccp_v4_send_reset(struct sock *sk, + enum dccp_reset_codes code); extern void dccp_send_close(struct sock *sk); struct dccp_skb_cb { @@ -303,7 +308,8 @@ static inline void dccp_inc_seqno(u64 *seqno) static inline void dccp_hdr_set_seq(struct dccp_hdr *dh, const u64 gss) { - struct dccp_hdr_ext *dhx = (struct dccp_hdr_ext *)((void *)dh + sizeof(*dh)); + struct dccp_hdr_ext *dhx = (struct dccp_hdr_ext *)((void *)dh + + sizeof(*dh)); #if defined(__LITTLE_ENDIAN_BITFIELD) dh->dccph_seq = htonl((gss >> 32)) >> 8; @@ -315,7 +321,8 @@ static inline void dccp_hdr_set_seq(struct dccp_hdr *dh, const u64 gss) dhx->dccph_seq_low = htonl(gss & 0xffffffff); } -static inline void dccp_hdr_set_ack(struct dccp_hdr_ack_bits *dhack, const u64 gsr) +static inline void dccp_hdr_set_ack(struct dccp_hdr_ack_bits *dhack, + const u64 gsr) { #if defined(__LITTLE_ENDIAN_BITFIELD) dhack->dccph_ack_nr_high = htonl((gsr >> 32)) >> 8; @@ -332,11 +339,14 @@ static inline void dccp_update_gsr(struct sock *sk, u64 seq) struct dccp_sock *dp = dccp_sk(sk); u64 tmp_gsr; - dccp_set_seqno(&tmp_gsr, dp->dccps_gsr + 1 - (dp->dccps_options.dccpo_sequence_window / 4)); + dccp_set_seqno(&tmp_gsr, + (dp->dccps_gsr + 1 - + (dp->dccps_options.dccpo_sequence_window / 4))); dp->dccps_gsr = seq; dccp_set_seqno(&dp->dccps_swl, max48(tmp_gsr, dp->dccps_isr)); dccp_set_seqno(&dp->dccps_swh, - dp->dccps_gsr + (3 * dp->dccps_options.dccpo_sequence_window) / 4); + (dp->dccps_gsr + + (3 * dp->dccps_options.dccpo_sequence_window) / 4)); } static inline void dccp_update_gss(struct sock *sk, u64 seq) @@ -344,7 +354,9 @@ static inline void dccp_update_gss(struct sock *sk, u64 seq) struct dccp_sock *dp = dccp_sk(sk); u64 tmp_gss; - dccp_set_seqno(&tmp_gss, dp->dccps_gss - dp->dccps_options.dccpo_sequence_window + 1); + dccp_set_seqno(&tmp_gss, + (dp->dccps_gss - + dp->dccps_options.dccpo_sequence_window + 1)); dp->dccps_awl = max48(tmp_gss, dp->dccps_iss); dp->dccps_awh = dp->dccps_gss = seq; } @@ -373,16 +385,20 @@ extern struct socket *dccp_ctl_socket; * * @dccpap_buf_head - circular buffer head * @dccpap_buf_tail - circular buffer tail - * @dccpap_buf_ackno - ack # of the most recent packet acknoldgeable in the buffer (i.e. %dccpap_buf_head) - * @dccpap_buf_nonce - the one-bit sum of the ECN Nonces on all packets acked by the buffer with State 0 + * @dccpap_buf_ackno - ack # of the most recent packet acknowledgeable in the + * buffer (i.e. %dccpap_buf_head) + * @dccpap_buf_nonce - the one-bit sum of the ECN Nonces on all packets acked + * by the buffer with State 0 * * Additionally, the HC-Receiver must keep some information about the * Ack Vectors it has recently sent. For each packet sent carrying an * Ack Vector, it remembers four variables: * - * @dccpap_ack_seqno - the Sequence Number used for the packet (HC-Receiver seqno) + * @dccpap_ack_seqno - the Sequence Number used for the packet + * (HC-Receiver seqno) * @dccpap_ack_ptr - the value of buf_head at the time of acknowledgement. - * @dccpap_ack_ackno - the Acknowledgement Number used for the packet (HC-Sender seqno) + * @dccpap_ack_ackno - the Acknowledgement Number used for the packet + * (HC-Sender seqno) * @dccpap_ack_nonce - the one-bit sum of the ECN Nonces for all State 0. * * @dccpap_buf_len - circular buffer length diff --git a/net/dccp/input.c b/net/dccp/input.c index bdaecde0bde..4b8638f153a 100644 --- a/net/dccp/input.c +++ b/net/dccp/input.c @@ -93,7 +93,8 @@ static int dccp_check_seqno(struct sock *sk, struct sk_buff *skb) */ if (dh->dccph_type == DCCP_PKT_SYNC || dh->dccph_type == DCCP_PKT_SYNCACK) { - if (between48(DCCP_SKB_CB(skb)->dccpd_ack_seq, dp->dccps_awl, dp->dccps_awh) && + if (between48(DCCP_SKB_CB(skb)->dccpd_ack_seq, + dp->dccps_awl, dp->dccps_awh) && !before48(DCCP_SKB_CB(skb)->dccpd_seq, dp->dccps_swl)) dccp_update_gsr(sk, DCCP_SKB_CB(skb)->dccpd_seq); else @@ -122,11 +123,13 @@ static int dccp_check_seqno(struct sock *sk, struct sk_buff *skb) if (between48(DCCP_SKB_CB(skb)->dccpd_seq, lswl, dp->dccps_swh) && (DCCP_SKB_CB(skb)->dccpd_ack_seq == DCCP_PKT_WITHOUT_ACK_SEQ || - between48(DCCP_SKB_CB(skb)->dccpd_ack_seq, lawl, dp->dccps_awh))) { + between48(DCCP_SKB_CB(skb)->dccpd_ack_seq, + lawl, dp->dccps_awh))) { dccp_update_gsr(sk, DCCP_SKB_CB(skb)->dccpd_seq); if (dh->dccph_type != DCCP_PKT_SYNC && - DCCP_SKB_CB(skb)->dccpd_ack_seq != DCCP_PKT_WITHOUT_ACK_SEQ) + (DCCP_SKB_CB(skb)->dccpd_ack_seq != + DCCP_PKT_WITHOUT_ACK_SEQ)) dp->dccps_gar = DCCP_SKB_CB(skb)->dccpd_ack_seq; } else { dccp_pr_debug("Step 6 failed, sending SYNC...\n"); @@ -161,10 +164,13 @@ int dccp_rcv_established(struct sock *sk, struct sk_buff *skb, if (dccp_ackpkts_add(dp->dccps_hc_rx_ackpkts, DCCP_SKB_CB(skb)->dccpd_seq, DCCP_ACKPKTS_STATE_RECEIVED)) { - LIMIT_NETDEBUG(KERN_INFO "DCCP: acknowledgeable packets buffer full!\n"); + LIMIT_NETDEBUG(KERN_INFO "DCCP: acknowledgeable " + "packets buffer full!\n"); ap->dccpap_ack_seqno = DCCP_MAX_SEQNO + 1; inet_csk_schedule_ack(sk); - inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK, TCP_DELACK_MIN, TCP_RTO_MAX); + inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK, + TCP_DELACK_MIN, + DCCP_RTO_MAX); goto discard; } @@ -175,7 +181,8 @@ int dccp_rcv_established(struct sock *sk, struct sk_buff *skb, */ if (!inet_csk_ack_scheduled(sk)) { inet_csk_schedule_ack(sk); - inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK, 5 * HZ, TCP_RTO_MAX); + inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK, 5 * HZ, + DCCP_RTO_MAX); } } @@ -186,8 +193,8 @@ int dccp_rcv_established(struct sock *sk, struct sk_buff *skb, case DCCP_PKT_DATAACK: case DCCP_PKT_DATA: /* - * FIXME: check if sk_receive_queue is full, schedule DATA_DROPPED option - * if it is. + * FIXME: check if sk_receive_queue is full, schedule DATA_DROPPED + * option if it is. */ __skb_pull(skb, dh->dccph_doff * 4); __skb_queue_tail(&sk->sk_receive_queue, skb); @@ -272,11 +279,13 @@ static int dccp_rcv_request_sent_state_process(struct sock *sk, __kfree_skb(sk->sk_send_head); sk->sk_send_head = NULL; - if (!between48(DCCP_SKB_CB(skb)->dccpd_ack_seq, dp->dccps_awl, dp->dccps_awh)) { - dccp_pr_debug("invalid ackno: S.AWL=%llu, P.ackno=%llu, S.AWH=%llu \n", - (unsigned long long) dp->dccps_awl, - (unsigned long long) DCCP_SKB_CB(skb)->dccpd_ack_seq, - (unsigned long long) dp->dccps_awh); + if (!between48(DCCP_SKB_CB(skb)->dccpd_ack_seq, + dp->dccps_awl, dp->dccps_awh)) { + dccp_pr_debug("invalid ackno: S.AWL=%llu, " + "P.ackno=%llu, S.AWH=%llu \n", + (unsigned long long)dp->dccps_awl, + (unsigned long long)DCCP_SKB_CB(skb)->dccpd_ack_seq, + (unsigned long long)dp->dccps_awh); goto out_invalid_packet; } @@ -296,16 +305,17 @@ static int dccp_rcv_request_sent_state_process(struct sock *sk, /* * Step 10: Process REQUEST state (second part) * If S.state == REQUEST, - * / * If we get here, P is a valid Response from the server (see - * Step 4), and we should move to PARTOPEN state. PARTOPEN - * means send an Ack, don't send Data packets, retransmit - * Acks periodically, and always include any Init Cookie from - * the Response * / + * / * If we get here, P is a valid Response from the + * server (see Step 4), and we should move to + * PARTOPEN state. PARTOPEN means send an Ack, + * don't send Data packets, retransmit Acks + * periodically, and always include any Init Cookie + * from the Response * / * S.state := PARTOPEN * Set PARTOPEN timer * Continue with S.state == PARTOPEN - * / * Step 12 will send the Ack completing the three-way - * handshake * / + * / * Step 12 will send the Ack completing the + * three-way handshake * / */ dccp_set_state(sk, DCCP_PARTOPEN); @@ -341,7 +351,8 @@ static int dccp_rcv_request_sent_state_process(struct sock *sk, out_invalid_packet: return 1; /* dccp_v4_do_rcv will send a reset, but... - FIXME: the reset code should be DCCP_RESET_CODE_PACKET_ERROR */ + FIXME: the reset code should be + DCCP_RESET_CODE_PACKET_ERROR */ } static int dccp_rcv_respond_partopen_state_process(struct sock *sk, @@ -358,11 +369,12 @@ static int dccp_rcv_respond_partopen_state_process(struct sock *sk, case DCCP_PKT_DATAACK: case DCCP_PKT_ACK: /* - * FIXME: we should be reseting the PARTOPEN (DELACK) timer here, - * but only if we haven't used the DELACK timer for something else, - * like sending a delayed ack for a TIMESTAMP echo, etc, for now - * were not clearing it, sending an extra ACK when there is nothing - * else to do in DELACK is not a big deal after all. + * FIXME: we should be reseting the PARTOPEN (DELACK) timer + * here but only if we haven't used the DELACK timer for + * something else, like sending a delayed ack for a TIMESTAMP + * echo, etc, for now were not clearing it, sending an extra + * ACK when there is nothing else to do in DELACK is not a big + * deal after all. */ /* Stop the PARTOPEN timer */ @@ -374,7 +386,8 @@ static int dccp_rcv_respond_partopen_state_process(struct sock *sk, if (dh->dccph_type == DCCP_PKT_DATAACK) { dccp_rcv_established(sk, skb, dh, len); - queued = 1; /* packet was queued (by dccp_rcv_established) */ + queued = 1; /* packet was queued + (by dccp_rcv_established) */ } break; } @@ -399,7 +412,8 @@ int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb, if (dccp_parse_options(sk, skb)) goto discard; - if (DCCP_SKB_CB(skb)->dccpd_ack_seq != DCCP_PKT_WITHOUT_ACK_SEQ) + if (DCCP_SKB_CB(skb)->dccpd_ack_seq != + DCCP_PKT_WITHOUT_ACK_SEQ) dccp_event_ack_recv(sk, skb); ccid_hc_rx_packet_recv(dp->dccps_hc_rx_ccid, sk, skb); @@ -415,14 +429,17 @@ int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb, DCCP_ACKPKTS_STATE_RECEIVED)) goto discard; /* - * FIXME: this activation is probably wrong, have to study more - * TCP delack machinery and how it fits into DCCP draft, but - * for now it kinda "works" 8) + * FIXME: this activation is probably wrong, have to + * study more TCP delack machinery and how it fits into + * DCCP draft, but for now it kinda "works" 8) */ - if (dp->dccps_hc_rx_ackpkts->dccpap_ack_seqno == DCCP_MAX_SEQNO + 1 && + if ((dp->dccps_hc_rx_ackpkts->dccpap_ack_seqno == + DCCP_MAX_SEQNO + 1) && !inet_csk_ack_scheduled(sk)) { inet_csk_schedule_ack(sk); - inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK, TCP_DELACK_MIN, TCP_RTO_MAX); + inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK, + TCP_DELACK_MIN, + DCCP_RTO_MAX); } } } @@ -436,7 +453,10 @@ int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb, * Drop packet and return */ if (dh->dccph_type == DCCP_PKT_RESET) { - /* Queue the equivalent of TCP fin so that dccp_recvmsg exits the loop */ + /* + * Queue the equivalent of TCP fin so that dccp_recvmsg + * exits the loop + */ dccp_fin(sk, skb); dccp_time_wait(sk, DCCP_TIME_WAIT, 0); return 0; @@ -450,10 +470,12 @@ int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb, * Drop packet and return */ } else if ((dp->dccps_role != DCCP_ROLE_CLIENT && - (dh->dccph_type == DCCP_PKT_RESPONSE || dh->dccph_type == DCCP_PKT_CLOSEREQ)) || + (dh->dccph_type == DCCP_PKT_RESPONSE || + dh->dccph_type == DCCP_PKT_CLOSEREQ)) || (dp->dccps_role == DCCP_ROLE_CLIENT && dh->dccph_type == DCCP_PKT_REQUEST) || - (sk->sk_state == DCCP_RESPOND && dh->dccph_type == DCCP_PKT_DATA)) { + (sk->sk_state == DCCP_RESPOND && + dh->dccph_type == DCCP_PKT_DATA)) { dccp_send_sync(sk, DCCP_SKB_CB(skb)->dccpd_seq); goto discard; } @@ -491,11 +513,13 @@ int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb, case DCCP_RESPOND: case DCCP_PARTOPEN: - queued = dccp_rcv_respond_partopen_state_process(sk, skb, dh, len); + queued = dccp_rcv_respond_partopen_state_process(sk, skb, + dh, len); break; } - if (dh->dccph_type == DCCP_PKT_ACK || dh->dccph_type == DCCP_PKT_DATAACK) { + if (dh->dccph_type == DCCP_PKT_ACK || + dh->dccph_type == DCCP_PKT_DATAACK) { switch (old_state) { case DCCP_PARTOPEN: sk->sk_state_change(sk); diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index d3770aed3b1..42d9c878d4c 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -29,7 +29,7 @@ struct inet_hashinfo __cacheline_aligned dccp_hashinfo = { .lhash_lock = RW_LOCK_UNLOCKED, .lhash_users = ATOMIC_INIT(0), - .lhash_wait = __WAIT_QUEUE_HEAD_INITIALIZER(dccp_hashinfo.lhash_wait), + .lhash_wait = __WAIT_QUEUE_HEAD_INITIALIZER(dccp_hashinfo.lhash_wait), .portalloc_lock = SPIN_LOCK_UNLOCKED, .port_rover = 1024 - 1, }; @@ -61,7 +61,8 @@ static int __dccp_v4_check_established(struct sock *sk, const __u16 lport, const int dif = sk->sk_bound_dev_if; INET_ADDR_COOKIE(acookie, saddr, daddr) const __u32 ports = INET_COMBINED_PORTS(inet->dport, lport); - const int hash = inet_ehashfn(daddr, lport, saddr, inet->dport, dccp_hashinfo.ehash_size); + const int hash = inet_ehashfn(daddr, lport, saddr, inet->dport, + dccp_hashinfo.ehash_size); struct inet_ehash_bucket *head = &dccp_hashinfo.ehash[hash]; const struct sock *sk2; const struct hlist_node *node; @@ -133,11 +134,12 @@ static int dccp_v4_hash_connect(struct sock *sk) local_bh_disable(); /* TODO. Actually it is not so bad idea to remove - * dccp_hashinfo.portalloc_lock before next submission to Linus. + * dccp_hashinfo.portalloc_lock before next submission to + * Linus. * As soon as we touch this place at all it is time to think. * - * Now it protects single _advisory_ variable dccp_hashinfo.port_rover, - * hence it is mostly useless. + * Now it protects single _advisory_ variable + * dccp_hashinfo.port_rover, hence it is mostly useless. * Code will work nicely if we just delete it, but * I am afraid in contented case it will work not better or * even worse: another cpu just will hit the same bucket @@ -152,7 +154,8 @@ static int dccp_v4_hash_connect(struct sock *sk) rover++; if ((rover < low) || (rover > high)) rover = low; - head = &dccp_hashinfo.bhash[inet_bhashfn(rover, dccp_hashinfo.bhash_size)]; + head = &dccp_hashinfo.bhash[inet_bhashfn(rover, + dccp_hashinfo.bhash_size)]; spin_lock(&head->lock); /* Does not bother with rcv_saddr checks, @@ -172,7 +175,8 @@ static int dccp_v4_hash_connect(struct sock *sk) } } - tb = inet_bind_bucket_create(dccp_hashinfo.bind_bucket_cachep, head, rover); + tb = inet_bind_bucket_create(dccp_hashinfo.bind_bucket_cachep, + head, rover); if (tb == NULL) { spin_unlock(&head->lock); break; @@ -211,7 +215,8 @@ ok: goto out; } - head = &dccp_hashinfo.bhash[inet_bhashfn(snum, dccp_hashinfo.bhash_size)]; + head = &dccp_hashinfo.bhash[inet_bhashfn(snum, + dccp_hashinfo.bhash_size)]; tb = inet_csk(sk)->icsk_bind_hash; spin_lock_bh(&head->lock); if (sk_head(&tb->owners) == sk && sk->sk_bind_node.next == NULL) { @@ -313,7 +318,9 @@ static int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr, out: return err; failure: - /* This unhashes the socket and releases the local port, if necessary. */ + /* + * This unhashes the socket and releases the local port, if necessary. + */ dccp_set_state(sk, DCCP_CLOSED); ip_rt_put(rt); sk->sk_route_caps = 0; @@ -365,8 +372,9 @@ static inline void dccp_do_pmtu_discovery(struct sock *sk, /* * From: draft-ietf-dccp-spec-11.txt * - * DCCP-Sync packets are the best choice for upward probing, - * since DCCP-Sync probes do not risk application data loss. + * DCCP-Sync packets are the best choice for upward + * probing, since DCCP-Sync probes do not risk application + * data loss. */ dccp_send_sync(sk, dp->dccps_gsr); } /* else let the usual retransmit timer handle it */ @@ -405,11 +413,13 @@ static void dccp_v4_ctl_send_ack(struct sk_buff *rxskb) dh->dccph_x = 1; dccp_hdr_set_seq(dh, DCCP_SKB_CB(rxskb)->dccpd_ack_seq); - dccp_hdr_set_ack(dccp_hdr_ack_bits(skb), DCCP_SKB_CB(rxskb)->dccpd_seq); + dccp_hdr_set_ack(dccp_hdr_ack_bits(skb), + DCCP_SKB_CB(rxskb)->dccpd_seq); bh_lock_sock(dccp_ctl_socket->sk); err = ip_build_and_send_pkt(skb, dccp_ctl_socket->sk, - rxskb->nh.iph->daddr, rxskb->nh.iph->saddr, NULL); + rxskb->nh.iph->daddr, + rxskb->nh.iph->saddr, NULL); bh_unlock_sock(dccp_ctl_socket->sk); if (err == NET_XMIT_CN || err == 0) { @@ -418,7 +428,8 @@ static void dccp_v4_ctl_send_ack(struct sk_buff *rxskb) } } -static void dccp_v4_reqsk_send_ack(struct sk_buff *skb, struct request_sock *req) +static void dccp_v4_reqsk_send_ack(struct sk_buff *skb, + struct request_sock *req) { dccp_v4_ctl_send_ack(skb); } @@ -465,7 +476,8 @@ out: void dccp_v4_err(struct sk_buff *skb, u32 info) { const struct iphdr *iph = (struct iphdr *)skb->data; - const struct dccp_hdr *dh = (struct dccp_hdr *)(skb->data + (iph->ihl << 2)); + const struct dccp_hdr *dh = (struct dccp_hdr *)(skb->data + + (iph->ihl << 2)); struct dccp_sock *dp; struct inet_sock *inet; const int type = skb->h.icmph->type; @@ -605,7 +617,8 @@ out: sock_put(sk); } -extern struct sk_buff *dccp_make_reset(struct sock *sk, struct dst_entry *dst, enum dccp_reset_codes code); +extern struct sk_buff *dccp_make_reset(struct sock *sk, struct dst_entry *dst, + enum dccp_reset_codes code); int dccp_v4_send_reset(struct sock *sk, enum dccp_reset_codes code) { @@ -689,7 +702,8 @@ int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb) ireq->loc_addr = daddr; ireq->rmt_addr = saddr; /* FIXME: Merge Aristeu's option parsing code when ready */ - req->rcv_wnd = 100; /* Fake, option parsing will get the right value */ + req->rcv_wnd = 100; /* Fake, option parsing will get the + right value */ ireq->opt = NULL; /* @@ -804,7 +818,8 @@ static struct sock *dccp_v4_hnd_req(struct sock *sk, struct sk_buff *skb) return sk; } -int dccp_v4_checksum(const struct sk_buff *skb, const u32 saddr, const u32 daddr) +int dccp_v4_checksum(const struct sk_buff *skb, const u32 saddr, + const u32 daddr) { const struct dccp_hdr* dh = dccp_hdr(skb); int checksum_len; @@ -814,11 +829,13 @@ int dccp_v4_checksum(const struct sk_buff *skb, const u32 saddr, const u32 daddr checksum_len = skb->len; else { checksum_len = (dh->dccph_cscov + dh->dccph_x) * sizeof(u32); - checksum_len = checksum_len < skb->len ? checksum_len : skb->len; + checksum_len = checksum_len < skb->len ? checksum_len : + skb->len; } tmp = csum_partial((unsigned char *)dh, checksum_len, 0); - return csum_tcpudp_magic(saddr, daddr, checksum_len, IPPROTO_DCCP, tmp); + return csum_tcpudp_magic(saddr, daddr, checksum_len, + IPPROTO_DCCP, tmp); } static int dccp_v4_verify_checksum(struct sk_buff *skb, @@ -832,10 +849,12 @@ static int dccp_v4_verify_checksum(struct sk_buff *skb, checksum_len = skb->len; else { checksum_len = (dh->dccph_cscov + dh->dccph_x) * sizeof(u32); - checksum_len = checksum_len < skb->len ? checksum_len : skb->len; + checksum_len = checksum_len < skb->len ? checksum_len : + skb->len; } tmp = csum_partial((unsigned char *)dh, checksum_len, 0); - return csum_tcpudp_magic(saddr, daddr, checksum_len, IPPROTO_DCCP, tmp) == 0 ? 0 : -1; + return csum_tcpudp_magic(saddr, daddr, checksum_len, + IPPROTO_DCCP, tmp) == 0 ? 0 : -1; } static struct dst_entry* dccp_v4_route_skb(struct sock *sk, @@ -850,7 +869,9 @@ static struct dst_entry* dccp_v4_route_skb(struct sock *sk, .proto = sk->sk_protocol, .uli_u = { .ports = { .sport = dccp_hdr(skb)->dccph_dport, - .dport = dccp_hdr(skb)->dccph_sport } } }; + .dport = dccp_hdr(skb)->dccph_sport } + } + }; if (ip_route_output_flow(&rt, &fl, sk, 0)) { IP_INC_STATS_BH(IPSTATS_MIB_OUTNOROUTES); @@ -899,17 +920,20 @@ void dccp_v4_ctl_send_reset(struct sk_buff *rxskb) dh->dccph_dport = rxdh->dccph_sport; dh->dccph_doff = dccp_hdr_reset_len / 4; dh->dccph_x = 1; - dccp_hdr_reset(skb)->dccph_reset_code = DCCP_SKB_CB(rxskb)->dccpd_reset_code; + dccp_hdr_reset(skb)->dccph_reset_code = + DCCP_SKB_CB(rxskb)->dccpd_reset_code; dccp_hdr_set_seq(dh, DCCP_SKB_CB(rxskb)->dccpd_ack_seq); - dccp_hdr_set_ack(dccp_hdr_ack_bits(skb), DCCP_SKB_CB(rxskb)->dccpd_seq); + dccp_hdr_set_ack(dccp_hdr_ack_bits(skb), + DCCP_SKB_CB(rxskb)->dccpd_seq); dh->dccph_checksum = dccp_v4_checksum(skb, rxskb->nh.iph->saddr, rxskb->nh.iph->daddr); bh_lock_sock(dccp_ctl_socket->sk); err = ip_build_and_send_pkt(skb, dccp_ctl_socket->sk, - rxskb->nh.iph->daddr, rxskb->nh.iph->saddr, NULL); + rxskb->nh.iph->daddr, + rxskb->nh.iph->saddr, NULL); bh_unlock_sock(dccp_ctl_socket->sk); if (err == NET_XMIT_CN || err == 0) { @@ -933,7 +957,8 @@ int dccp_v4_do_rcv(struct sock *sk, struct sk_buff *skb) /* * Step 3: Process LISTEN state * If S.state == LISTEN, - * If P.type == Request or P contains a valid Init Cookie option, + * If P.type == Request or P contains a valid Init Cookie + * option, * * Must scan the packet's options to check for an Init * Cookie. Only the Init Cookie is processed here, * however; other options are processed in Step 8. This @@ -950,7 +975,8 @@ int dccp_v4_do_rcv(struct sock *sk, struct sk_buff *skb) * Generate Reset(No Connection) unless P.type == Reset * Drop packet and return * - * NOTE: the check for the packet types is done in dccp_rcv_state_process + * NOTE: the check for the packet types is done in + * dccp_rcv_state_process */ if (sk->sk_state == DCCP_LISTEN) { struct sock *nsk = dccp_v4_hnd_req(sk, skb); @@ -1007,7 +1033,8 @@ static inline int dccp_invalid_packet(struct sk_buff *skb) } if (!pskb_may_pull(skb, dh->dccph_doff * sizeof(u32))) { - dccp_pr_debug("P.Data Offset(%u) too small 2\n", dh->dccph_doff); + dccp_pr_debug("P.Data Offset(%u) too small 2\n", + dh->dccph_doff); return 1; } @@ -1021,8 +1048,8 @@ static inline int dccp_invalid_packet(struct sk_buff *skb) dh->dccph_type != DCCP_PKT_DATA && dh->dccph_type != DCCP_PKT_ACK && dh->dccph_type != DCCP_PKT_DATAACK) { - dccp_pr_debug("P.type (%s) not Data, Ack nor DataAck and P.X == 0\n", - dccp_packet_name(dh->dccph_type)); + dccp_pr_debug("P.type (%s) not Data, Ack nor DataAck and " + "P.X == 0\n", dccp_packet_name(dh->dccph_type)); return 1; } @@ -1055,10 +1082,11 @@ int dccp_v4_rcv(struct sk_buff *skb) * dccp_ackpkts_add, you'll get something like this on a session that * sends 10 DATA/DATAACK packets: * - * dccp_ackpkts_print: 281473596467422 |0,0|3,0|0,0|3,0|0,0|3,0|0,0|3,0|0,1| + * ackpkts_print: 281473596467422 |0,0|3,0|0,0|3,0|0,0|3,0|0,0|3,0|0,1| * * 0, 0 means: DCCP_ACKPKTS_STATE_RECEIVED, RLE == just this packet - * 0, 1 means: DCCP_ACKPKTS_STATE_RECEIVED, RLE == two adjacent packets with the same state + * 0, 1 means: DCCP_ACKPKTS_STATE_RECEIVED, RLE == two adjacent packets + * with the same state * 3, 0 means: DCCP_ACKPKTS_STATE_NOT_RECEIVED, RLE == just this packet * * So... @@ -1072,10 +1100,12 @@ int dccp_v4_rcv(struct sk_buff *skb) * 281473596467416 was received * 281473596467415 was not received * 281473596467414 was received - * 281473596467413 was received (this one was the 3way handshake RESPONSE) + * 281473596467413 was received (this one was the 3way handshake + * RESPONSE) * */ - if (dh->dccph_type == DCCP_PKT_DATA || dh->dccph_type == DCCP_PKT_DATAACK) { + if (dh->dccph_type == DCCP_PKT_DATA || + dh->dccph_type == DCCP_PKT_DATAACK) { static int discard = 0; if (discard) { @@ -1170,7 +1200,8 @@ no_dccp_socket: * Drop packet and return */ if (dh->dccph_type != DCCP_PKT_RESET) { - DCCP_SKB_CB(skb)->dccpd_reset_code = DCCP_RESET_CODE_NO_CONNECTION; + DCCP_SKB_CB(skb)->dccpd_reset_code = + DCCP_RESET_CODE_NO_CONNECTION; dccp_v4_ctl_send_reset(skb); } @@ -1196,8 +1227,9 @@ static int dccp_v4_init_sock(struct sock *sk) dccp_options_init(&dp->dccps_options); if (dp->dccps_options.dccpo_send_ack_vector) { - dp->dccps_hc_rx_ackpkts = dccp_ackpkts_alloc(DCCP_MAX_ACK_VECTOR_LEN, - GFP_KERNEL); + dp->dccps_hc_rx_ackpkts = + dccp_ackpkts_alloc(DCCP_MAX_ACK_VECTOR_LEN, + GFP_KERNEL); if (dp->dccps_hc_rx_ackpkts == NULL) return -ENOMEM; @@ -1211,8 +1243,10 @@ static int dccp_v4_init_sock(struct sock *sk) * setsockopt(CCIDs-I-want/accept). -acme */ if (likely(!dccp_ctl_socket_init)) { - dp->dccps_hc_rx_ccid = ccid_init(dp->dccps_options.dccpo_ccid, sk); - dp->dccps_hc_tx_ccid = ccid_init(dp->dccps_options.dccpo_ccid, sk); + dp->dccps_hc_rx_ccid = ccid_init(dp->dccps_options.dccpo_ccid, + sk); + dp->dccps_hc_tx_ccid = ccid_init(dp->dccps_options.dccpo_ccid, + sk); if (dp->dccps_hc_rx_ccid == NULL || dp->dccps_hc_tx_ccid == NULL) { ccid_exit(dp->dccps_hc_rx_ccid, sk); diff --git a/net/dccp/minisocks.c b/net/dccp/minisocks.c index a6a0b270fb6..b8e67207e97 100644 --- a/net/dccp/minisocks.c +++ b/net/dccp/minisocks.c @@ -69,8 +69,8 @@ void dccp_time_wait(struct sock *sk, int state, int timeo) * socket up. We've got bigger problems than * non-graceful socket closings. */ - if (net_ratelimit()) - printk(KERN_INFO "DCCP: time wait bucket table overflow\n"); + LIMIT_NETDEBUG(KERN_INFO "DCCP: time wait bucket " + "table overflow\n"); } dccp_done(sk); @@ -98,19 +98,23 @@ struct sock *dccp_create_openreq_child(struct sock *sk, newicsk->icsk_rto = DCCP_TIMEOUT_INIT; if (newdp->dccps_options.dccpo_send_ack_vector) { - newdp->dccps_hc_rx_ackpkts = dccp_ackpkts_alloc(DCCP_MAX_ACK_VECTOR_LEN, - GFP_ATOMIC); + newdp->dccps_hc_rx_ackpkts = + dccp_ackpkts_alloc(DCCP_MAX_ACK_VECTOR_LEN, + GFP_ATOMIC); /* - * XXX: We're using the same CCIDs set on the parent, i.e. sk_clone - * copied the master sock and left the CCID pointers for this child, - * that is why we do the __ccid_get calls. + * XXX: We're using the same CCIDs set on the parent, + * i.e. sk_clone copied the master sock and left the + * CCID pointers for this child, that is why we do the + * __ccid_get calls. */ if (unlikely(newdp->dccps_hc_rx_ackpkts == NULL)) goto out_free; } - if (unlikely(ccid_hc_rx_init(newdp->dccps_hc_rx_ccid, newsk) != 0 || - ccid_hc_tx_init(newdp->dccps_hc_tx_ccid, newsk) != 0)) { + if (unlikely(ccid_hc_rx_init(newdp->dccps_hc_rx_ccid, + newsk) != 0 || + ccid_hc_tx_init(newdp->dccps_hc_tx_ccid, + newsk) != 0)) { dccp_ackpkts_free(newdp->dccps_hc_rx_ackpkts); ccid_hc_rx_exit(newdp->dccps_hc_rx_ccid, newsk); ccid_hc_tx_exit(newdp->dccps_hc_tx_ccid, newsk); @@ -129,7 +133,8 @@ out_free: * Step 3: Process LISTEN state * * Choose S.ISS (initial seqno) or set from Init Cookie - * Set S.ISR, S.GSR, S.SWL, S.SWH from packet or Init Cookie + * Set S.ISR, S.GSR, S.SWL, S.SWH from packet or Init + * Cookie */ /* See dccp_v4_conn_request */ @@ -160,13 +165,15 @@ struct sock *dccp_check_req(struct sock *sk, struct sk_buff *skb, /* Check for retransmitted REQUEST */ if (dccp_hdr(skb)->dccph_type == DCCP_PKT_REQUEST) { - if (after48(DCCP_SKB_CB(skb)->dccpd_seq, dccp_rsk(req)->dreq_isr)) { + if (after48(DCCP_SKB_CB(skb)->dccpd_seq, + dccp_rsk(req)->dreq_isr)) { struct dccp_request_sock *dreq = dccp_rsk(req); dccp_pr_debug("Retransmitted REQUEST\n"); /* Send another RESPONSE packet */ dccp_set_seqno(&dreq->dreq_iss, dreq->dreq_iss + 1); - dccp_set_seqno(&dreq->dreq_isr, DCCP_SKB_CB(skb)->dccpd_seq); + dccp_set_seqno(&dreq->dreq_isr, + DCCP_SKB_CB(skb)->dccpd_seq); req->rsk_ops->rtx_syn_ack(sk, req, NULL); } /* Network Duplicate, discard packet */ @@ -181,7 +188,8 @@ struct sock *dccp_check_req(struct sock *sk, struct sk_buff *skb, /* Invalid ACK */ if (DCCP_SKB_CB(skb)->dccpd_ack_seq != dccp_rsk(req)->dreq_iss) { - dccp_pr_debug("Invalid ACK number: ack_seq=%llu, dreq_iss=%llu\n", + dccp_pr_debug("Invalid ACK number: ack_seq=%llu, " + "dreq_iss=%llu\n", (unsigned long long) DCCP_SKB_CB(skb)->dccpd_ack_seq, (unsigned long long) @@ -223,7 +231,8 @@ int dccp_child_process(struct sock *parent, struct sock *child, const int state = child->sk_state; if (!sock_owned_by_user(child)) { - ret = dccp_rcv_state_process(child, skb, dccp_hdr(skb), skb->len); + ret = dccp_rcv_state_process(child, skb, dccp_hdr(skb), + skb->len); /* Wakeup parent, send SIGIO */ if (state == DCCP_RESPOND && child->sk_state != state) diff --git a/net/dccp/options.c b/net/dccp/options.c index 5bf997683a1..68d6614edcf 100644 --- a/net/dccp/options.c +++ b/net/dccp/options.c @@ -59,14 +59,15 @@ int dccp_parse_options(struct sock *sk, struct sk_buff *skb) { struct dccp_sock *dp = dccp_sk(sk); #ifdef DCCP_DEBUG - const char *debug_prefix = dp->dccps_role == DCCP_ROLE_CLIENT ? "CLIENT rx opt: " : - "server rx opt: "; + const char *debug_prefix = dp->dccps_role == DCCP_ROLE_CLIENT ? + "CLIENT rx opt: " : "server rx opt: "; #endif const struct dccp_hdr *dh = dccp_hdr(skb); const u8 pkt_type = DCCP_SKB_CB(skb)->dccpd_type; unsigned char *options = (unsigned char *)dh + dccp_hdr_len(skb); unsigned char *opt_ptr = options; - const unsigned char *opt_end = (unsigned char *)dh + (dh->dccph_doff * 4); + const unsigned char *opt_end = (unsigned char *)dh + + (dh->dccph_doff * 4); struct dccp_options_received *opt_recv = &dp->dccps_options_received; unsigned char opt, len; unsigned char *value; @@ -106,7 +107,8 @@ int dccp_parse_options(struct sock *sk, struct sk_buff *skb) goto out_invalid_option; opt_recv->dccpor_ndp = dccp_decode_value_var(value, len); - dccp_pr_debug("%sNDP count=%d\n", debug_prefix, opt_recv->dccpor_ndp); + dccp_pr_debug("%sNDP count=%d\n", debug_prefix, + opt_recv->dccpor_ndp); break; case DCCPO_ACK_VECTOR_0: if (len > DCCP_MAX_ACK_VECTOR_LEN) @@ -124,8 +126,9 @@ int dccp_parse_options(struct sock *sk, struct sk_buff *skb) DCCP_SKB_CB(skb)->dccpd_ack_seq); dccp_ackvector_print(DCCP_SKB_CB(skb)->dccpd_ack_seq, value, len); - dccp_ackpkts_check_rcv_ackvector(dp->dccps_hc_rx_ackpkts, sk, - DCCP_SKB_CB(skb)->dccpd_ack_seq, + dccp_ackpkts_check_rcv_ackvector(dp->dccps_hc_rx_ackpkts, + sk, + DCCP_SKB_CB(skb)->dccpd_ack_seq, len, value); break; case DCCPO_TIMESTAMP: @@ -148,15 +151,21 @@ int dccp_parse_options(struct sock *sk, struct sk_buff *skb) opt_recv->dccpor_timestamp_echo = ntohl(*(u32 *)value); - dccp_pr_debug("%sTIMESTAMP_ECHO=%u, len=%d, ackno=%llu, diff=%u\n", - debug_prefix, opt_recv->dccpor_timestamp_echo, + dccp_pr_debug("%sTIMESTAMP_ECHO=%u, len=%d, ackno=%llu, " + "diff=%u\n", + debug_prefix, + opt_recv->dccpor_timestamp_echo, len + 2, (unsigned long long) DCCP_SKB_CB(skb)->dccpd_ack_seq, - tcp_time_stamp - opt_recv->dccpor_timestamp_echo); - - opt_recv->dccpor_elapsed_time = dccp_decode_value_var(value + 4, len - 4); - dccp_pr_debug("%sTIMESTAMP_ECHO ELAPSED_TIME=%d\n", debug_prefix, + (tcp_time_stamp - + opt_recv->dccpor_timestamp_echo)); + + opt_recv->dccpor_elapsed_time = + dccp_decode_value_var(value + 4, + len - 4); + dccp_pr_debug("%sTIMESTAMP_ECHO ELAPSED_TIME=%d\n", + debug_prefix, opt_recv->dccpor_elapsed_time); break; case DCCPO_ELAPSED_TIME: @@ -165,33 +174,41 @@ int dccp_parse_options(struct sock *sk, struct sk_buff *skb) if (pkt_type == DCCP_PKT_DATA) continue; - opt_recv->dccpor_elapsed_time = dccp_decode_value_var(value, len); + opt_recv->dccpor_elapsed_time = + dccp_decode_value_var(value, len); dccp_pr_debug("%sELAPSED_TIME=%d\n", debug_prefix, opt_recv->dccpor_elapsed_time); break; /* * From draft-ietf-dccp-spec-11.txt: * - * Option numbers 128 through 191 are for options sent from the HC- - * Sender to the HC-Receiver; option numbers 192 through 255 are for - * options sent from the HC-Receiver to the HC-Sender. + * Option numbers 128 through 191 are for + * options sent from the HC-Sender to the + * HC-Receiver; option numbers 192 through 255 + * are for options sent from the HC-Receiver to + * the HC-Sender. */ case 128 ... 191: { const u16 idx = value - options; - if (ccid_hc_rx_parse_options(dp->dccps_hc_rx_ccid, sk, opt, len, idx, value) != 0) + if (ccid_hc_rx_parse_options(dp->dccps_hc_rx_ccid, sk, + opt, len, idx, + value) != 0) goto out_invalid_option; } break; case 192 ... 255: { const u16 idx = value - options; - if (ccid_hc_tx_parse_options(dp->dccps_hc_tx_ccid, sk, opt, len, idx, value) != 0) + if (ccid_hc_tx_parse_options(dp->dccps_hc_tx_ccid, sk, + opt, len, idx, + value) != 0) goto out_invalid_option; } break; default: - pr_info("DCCP(%p): option %d(len=%d) not implemented, ignoring\n", + pr_info("DCCP(%p): option %d(len=%d) not " + "implemented, ignoring\n", sk, opt, len); break; } @@ -231,7 +248,8 @@ void dccp_insert_option(struct sock *sk, struct sk_buff *skb, unsigned char *to; if (DCCP_SKB_CB(skb)->dccpd_opt_len + len + 2 > DCCP_MAX_OPT_LEN) { - LIMIT_NETDEBUG(KERN_INFO "DCCP: packet too small to insert %d option!\n", option); + LIMIT_NETDEBUG(KERN_INFO "DCCP: packet too small to insert " + "%d option!\n", option); return; } @@ -287,8 +305,8 @@ void dccp_insert_option_elapsed_time(struct sock *sk, { #ifdef DCCP_DEBUG struct dccp_sock *dp = dccp_sk(sk); - const char *debug_prefix = dp->dccps_role == DCCP_ROLE_CLIENT ? "CLIENT TX opt: " : - "server TX opt: "; + const char *debug_prefix = dp->dccps_role == DCCP_ROLE_CLIENT ? + "CLIENT TX opt: " : "server TX opt: "; #endif const int elapsed_time_len = dccp_elapsed_time_len(elapsed_time); const int len = 2 + elapsed_time_len; @@ -299,7 +317,8 @@ void dccp_insert_option_elapsed_time(struct sock *sk, return; if (DCCP_SKB_CB(skb)->dccpd_opt_len + len > DCCP_MAX_OPT_LEN) { - LIMIT_NETDEBUG(KERN_INFO "DCCP: packet too small to insert elapsed time!\n"); + LIMIT_NETDEBUG(KERN_INFO "DCCP: packet too small to " + "insert elapsed time!\n"); return; } @@ -323,8 +342,8 @@ static void dccp_insert_option_ack_vector(struct sock *sk, struct sk_buff *skb) { struct dccp_sock *dp = dccp_sk(sk); #ifdef DCCP_DEBUG - const char *debug_prefix = dp->dccps_role == DCCP_ROLE_CLIENT ? "CLIENT TX opt: " : - "server TX opt: "; + const char *debug_prefix = dp->dccps_role == DCCP_ROLE_CLIENT ? + "CLIENT TX opt: " : "server TX opt: "; #endif struct dccp_ackpkts *ap = dp->dccps_hc_rx_ackpkts; int len = ap->dccpap_buf_vector_len + 2; @@ -335,7 +354,8 @@ static void dccp_insert_option_ack_vector(struct sock *sk, struct sk_buff *skb) dccp_insert_option_elapsed_time(sk, skb, elapsed_time); if (DCCP_SKB_CB(skb)->dccpd_opt_len + len > DCCP_MAX_OPT_LEN) { - LIMIT_NETDEBUG(KERN_INFO "DCCP: packet too small to insert ACK Vector!\n"); + LIMIT_NETDEBUG(KERN_INFO "DCCP: packet too small to " + "insert ACK Vector!\n"); return; } @@ -360,7 +380,8 @@ static void dccp_insert_option_ack_vector(struct sock *sk, struct sk_buff *skb) /* Check if buf_head wraps */ if (ap->dccpap_buf_head + len > ap->dccpap_buf_len) { - const unsigned int tailsize = ap->dccpap_buf_len - ap->dccpap_buf_head; + const unsigned int tailsize = (ap->dccpap_buf_len - + ap->dccpap_buf_head); memcpy(to, from, tailsize); to += tailsize; @@ -375,8 +396,8 @@ static void dccp_insert_option_ack_vector(struct sock *sk, struct sk_buff *skb) * For each acknowledgement it sends, the HC-Receiver will add an * acknowledgement record. ack_seqno will equal the HC-Receiver * sequence number it used for the ack packet; ack_ptr will equal - * buf_head; ack_ackno will equal buf_ackno; and ack_nonce will equal - * buf_nonce. + * buf_head; ack_ackno will equal buf_ackno; and ack_nonce will + * equal buf_nonce. * * This implemention uses just one ack record for now. */ @@ -386,33 +407,38 @@ static void dccp_insert_option_ack_vector(struct sock *sk, struct sk_buff *skb) ap->dccpap_ack_nonce = ap->dccpap_buf_nonce; ap->dccpap_ack_vector_len = ap->dccpap_buf_vector_len; - dccp_pr_debug("%sACK Vector 0, len=%d, ack_seqno=%llu, ack_ackno=%llu\n", + dccp_pr_debug("%sACK Vector 0, len=%d, ack_seqno=%llu, " + "ack_ackno=%llu\n", debug_prefix, ap->dccpap_ack_vector_len, (unsigned long long) ap->dccpap_ack_seqno, (unsigned long long) ap->dccpap_ack_ackno); } -static inline void dccp_insert_option_timestamp(struct sock *sk, struct sk_buff *skb) +static inline void dccp_insert_option_timestamp(struct sock *sk, + struct sk_buff *skb) { const u32 now = htonl(tcp_time_stamp); dccp_insert_option(sk, skb, DCCPO_TIMESTAMP, &now, sizeof(now)); } -static void dccp_insert_option_timestamp_echo(struct sock *sk, struct sk_buff *skb) +static void dccp_insert_option_timestamp_echo(struct sock *sk, + struct sk_buff *skb) { struct dccp_sock *dp = dccp_sk(sk); #ifdef DCCP_DEBUG - const char *debug_prefix = dp->dccps_role == DCCP_ROLE_CLIENT ? "CLIENT TX opt: " : - "server TX opt: "; + const char *debug_prefix = dp->dccps_role == DCCP_ROLE_CLIENT ? + "CLIENT TX opt: " : "server TX opt: "; #endif u32 tstamp_echo; - const u32 elapsed_time = jiffies_to_usecs(jiffies - dp->dccps_timestamp_time) / 10; + const u32 elapsed_time = jiffies_to_usecs(jiffies - + dp->dccps_timestamp_time) / 10; const int elapsed_time_len = dccp_elapsed_time_len(elapsed_time); const int len = 6 + elapsed_time_len; unsigned char *to; if (DCCP_SKB_CB(skb)->dccpd_opt_len + len > DCCP_MAX_OPT_LEN) { - LIMIT_NETDEBUG(KERN_INFO "DCCP: packet too small to insert timestamp echo!\n"); + LIMIT_NETDEBUG(KERN_INFO "DCCP: packet too small to insert " + "timestamp echo!\n"); return; } @@ -447,7 +473,8 @@ void dccp_insert_options(struct sock *sk, struct sk_buff *skb) if (!dccp_packet_without_ack(skb)) { if (dp->dccps_options.dccpo_send_ack_vector && - dp->dccps_hc_rx_ackpkts->dccpap_buf_ackno != DCCP_MAX_SEQNO + 1) + (dp->dccps_hc_rx_ackpkts->dccpap_buf_ackno != + DCCP_MAX_SEQNO + 1)) dccp_insert_option_ack_vector(sk, skb); dccp_insert_option_timestamp(sk, skb); @@ -480,12 +507,16 @@ struct dccp_ackpkts *dccp_ackpkts_alloc(unsigned int len, int priority) #ifdef DCCP_DEBUG memset(ap->dccpap_buf, 0xFF, len); #endif - ap->dccpap_buf_len = len; - ap->dccpap_buf_head = ap->dccpap_buf_tail = ap->dccpap_buf_len - 1; - ap->dccpap_buf_ackno = ap->dccpap_ack_ackno = ap->dccpap_ack_seqno = DCCP_MAX_SEQNO + 1; - ap->dccpap_buf_nonce = ap->dccpap_buf_nonce = 0; - ap->dccpap_ack_ptr = 0; - ap->dccpap_time = 0; + ap->dccpap_buf_len = len; + ap->dccpap_buf_head = + ap->dccpap_buf_tail = + ap->dccpap_buf_len - 1; + ap->dccpap_buf_ackno = + ap->dccpap_ack_ackno = + ap->dccpap_ack_seqno = DCCP_MAX_SEQNO + 1; + ap->dccpap_buf_nonce = ap->dccpap_buf_nonce = 0; + ap->dccpap_ack_ptr = 0; + ap->dccpap_time = 0; ap->dccpap_buf_vector_len = ap->dccpap_ack_vector_len = 0; } @@ -567,15 +598,16 @@ int dccp_ackpkts_add(struct dccp_ackpkts *ap, u64 ackno, u8 state) * * From Appendix A: * - * Of course, the circular buffer may overflow, either when the HC- - * Sender is sending data at a very high rate, when the HC-Receiver's - * acknowledgements are not reaching the HC-Sender, or when the HC- - * Sender is forgetting to acknowledge those acks (so the HC-Receiver - * is unable to clean up old state). In this case, the HC-Receiver - * should either compress the buffer (by increasing run lengths when - * possible), transfer its state to a larger buffer, or, as a last - * resort, drop all received packets, without processing them - * whatsoever, until its buffer shrinks again. + * Of course, the circular buffer may overflow, either when the + * HC-Sender is sending data at a very high rate, when the + * HC-Receiver's acknowledgements are not reaching the HC-Sender, + * or when the HC-Sender is forgetting to acknowledge those acks + * (so the HC-Receiver is unable to clean up old state). In this + * case, the HC-Receiver should either compress the buffer (by + * increasing run lengths when possible), transfer its state to + * a larger buffer, or, as a last resort, drop all received + * packets, without processing them whatsoever, until its buffer + * shrinks again. */ /* See if this is the first ackno being inserted */ @@ -583,15 +615,17 @@ int dccp_ackpkts_add(struct dccp_ackpkts *ap, u64 ackno, u8 state) ap->dccpap_buf[ap->dccpap_buf_head] = state; ap->dccpap_buf_vector_len = 1; } else if (after48(ackno, ap->dccpap_buf_ackno)) { - const u64 delta = dccp_delta_seqno(ap->dccpap_buf_ackno, ackno); + const u64 delta = dccp_delta_seqno(ap->dccpap_buf_ackno, + ackno); /* - * Look if the state of this packet is the same as the previous ackno - * and if so if we can bump the head len. + * Look if the state of this packet is the same as the + * previous ackno and if so if we can bump the head len. */ if (delta == 1 && dccp_ackpkts_state(ap, ap->dccpap_buf_head) == state && - dccp_ackpkts_len(ap, ap->dccpap_buf_head) < DCCP_ACKPKTS_LEN_MASK) + (dccp_ackpkts_len(ap, ap->dccpap_buf_head) < + DCCP_ACKPKTS_LEN_MASK)) ap->dccpap_buf[ap->dccpap_buf_head]++; else if (dccp_ackpkts_set_buf_head_state(ap, delta, state)) return -ENOBUFS; @@ -599,9 +633,10 @@ int dccp_ackpkts_add(struct dccp_ackpkts *ap, u64 ackno, u8 state) /* * A.1.2. Old Packets * - * When a packet with Sequence Number S arrives, and S <= buf_ackno, - * the HC-Receiver will scan the table for the byte corresponding to S. - * (Indexing structures could reduce the complexity of this scan.) + * When a packet with Sequence Number S arrives, and + * S <= buf_ackno, the HC-Receiver will scan the table + * for the byte corresponding to S. (Indexing structures + * could reduce the complexity of this scan.) */ u64 delta = dccp_delta_seqno(ackno, ap->dccpap_buf_ackno); unsigned int index = ap->dccpap_buf_head; @@ -610,11 +645,12 @@ int dccp_ackpkts_add(struct dccp_ackpkts *ap, u64 ackno, u8 state) const u8 len = dccp_ackpkts_len(ap, index); const u8 state = dccp_ackpkts_state(ap, index); /* - * valid packets not yet in dccpap_buf have a reserved entry, with - * a len equal to 0 + * valid packets not yet in dccpap_buf have a reserved + * entry, with a len equal to 0. */ if (state == DCCP_ACKPKTS_STATE_NOT_RECEIVED && - len == 0 && delta == 0) { /* Found our reserved seat! */ + len == 0 && delta == 0) { /* Found our + reserved seat! */ dccp_pr_debug("Found %llu reserved seat!\n", (unsigned long long) ackno); ap->dccpap_buf[index] = state; @@ -639,13 +675,14 @@ out: out_duplicate: /* Duplicate packet */ - dccp_pr_debug("Received a dup or already considered lost packet: %llu\n", - (unsigned long long) ackno); + dccp_pr_debug("Received a dup or already considered lost " + "packet: %llu\n", (unsigned long long) ackno); return -EILSEQ; } #ifdef DCCP_DEBUG -void dccp_ackvector_print(const u64 ackno, const unsigned char *vector, int len) +void dccp_ackvector_print(const u64 ackno, const unsigned char *vector, + int len) { if (!dccp_debug) return; @@ -678,8 +715,9 @@ static void dccp_ackpkts_trow_away_ack_record(struct dccp_ackpkts *ap) * As we're keeping track of the ack vector size * (dccpap_buf_vector_len) and the sent ack vector size * (dccpap_ack_vector_len) we don't need dccpap_buf_tail at all, but - * keep this code here as in the future we'll implement a vector of ack - * records, as suggested in draft-ietf-dccp-spec-11.txt Appendix A. -acme + * keep this code here as in the future we'll implement a vector of + * ack records, as suggested in draft-ietf-dccp-spec-11.txt + * Appendix A. -acme */ #if 0 ap->dccpap_buf_tail = ap->dccpap_ack_ptr + 1; @@ -699,10 +737,11 @@ void dccp_ackpkts_check_rcv_ackno(struct dccp_ackpkts *ap, struct sock *sk, if (ackno == ap->dccpap_ack_seqno) { #ifdef DCCP_DEBUG struct dccp_sock *dp = dccp_sk(sk); - const char *debug_prefix = dp->dccps_role == DCCP_ROLE_CLIENT ? "CLIENT rx ack: " : - "server rx ack: "; + const char *debug_prefix = dp->dccps_role == DCCP_ROLE_CLIENT ? + "CLIENT rx ack: " : "server rx ack: "; #endif - dccp_pr_debug("%sACK packet 0, len=%d, ack_seqno=%llu, ack_ackno=%llu, ACKED!\n", + dccp_pr_debug("%sACK packet 0, len=%d, ack_seqno=%llu, " + "ack_ackno=%llu, ACKED!\n", debug_prefix, 1, (unsigned long long) ap->dccpap_ack_seqno, (unsigned long long) ap->dccpap_ack_ackno); @@ -722,20 +761,21 @@ static void dccp_ackpkts_check_rcv_ackvector(struct dccp_ackpkts *ap, if (ap->dccpap_ack_seqno == DCCP_MAX_SEQNO + 1) return; /* - * We're in the receiver half connection, so if the received an ACK vector - * ackno (e.g. 50) before dccpap_ack_seqno (e.g. 52), we're not interested. + * We're in the receiver half connection, so if the received an ACK + * vector ackno (e.g. 50) before dccpap_ack_seqno (e.g. 52), we're + * not interested. * * Extra explanation with example: * * if we received an ACK vector with ackno 50, it can only be acking * 50, 49, 48, etc, not 52 (the seqno for the ACK vector we sent). */ - // dccp_pr_debug("is %llu < %llu? ", ackno, ap->dccpap_ack_seqno); + /* dccp_pr_debug("is %llu < %llu? ", ackno, ap->dccpap_ack_seqno); */ if (before48(ackno, ap->dccpap_ack_seqno)) { - // dccp_pr_debug_cat("yes\n"); + /* dccp_pr_debug_cat("yes\n"); */ return; } - // dccp_pr_debug_cat("no\n"); + /* dccp_pr_debug_cat("no\n"); */ i = len; while (i--) { @@ -744,18 +784,25 @@ static void dccp_ackpkts_check_rcv_ackvector(struct dccp_ackpkts *ap, dccp_set_seqno(&ackno_end_rl, ackno - rl); - // dccp_pr_debug("is %llu <= %llu <= %llu? ", ackno_end_rl, ap->dccpap_ack_seqno, ackno); + /* + * dccp_pr_debug("is %llu <= %llu <= %llu? ", ackno_end_rl, + * ap->dccpap_ack_seqno, ackno); + */ if (between48(ap->dccpap_ack_seqno, ackno_end_rl, ackno)) { - const u8 state = (*vector & DCCP_ACKPKTS_STATE_MASK) >> 6; - // dccp_pr_debug_cat("yes\n"); + const u8 state = (*vector & + DCCP_ACKPKTS_STATE_MASK) >> 6; + /* dccp_pr_debug_cat("yes\n"); */ if (state != DCCP_ACKPKTS_STATE_NOT_RECEIVED) { #ifdef DCCP_DEBUG struct dccp_sock *dp = dccp_sk(sk); - const char *debug_prefix = dp->dccps_role == DCCP_ROLE_CLIENT ? "CLIENT rx ack: " : - "server rx ack: "; + const char *debug_prefix = + dp->dccps_role == DCCP_ROLE_CLIENT ? + "CLIENT rx ack: " : "server rx ack: "; #endif - dccp_pr_debug("%sACK vector 0, len=%d, ack_seqno=%llu, ack_ackno=%llu, ACKED!\n", + dccp_pr_debug("%sACK vector 0, len=%d, " + "ack_seqno=%llu, ack_ackno=%llu, " + "ACKED!\n", debug_prefix, len, (unsigned long long) ap->dccpap_ack_seqno, @@ -764,13 +811,13 @@ static void dccp_ackpkts_check_rcv_ackvector(struct dccp_ackpkts *ap, dccp_ackpkts_trow_away_ack_record(ap); } /* - * If dccpap_ack_seqno was not received, no problem we'll - * send another ACK vector. + * If dccpap_ack_seqno was not received, no problem + * we'll send another ACK vector. */ ap->dccpap_ack_seqno = DCCP_MAX_SEQNO + 1; break; } - // dccp_pr_debug_cat("no\n"); + /* dccp_pr_debug_cat("no\n"); */ dccp_set_seqno(&ackno, ackno_end_rl - 1); ++vector; diff --git a/net/dccp/output.c b/net/dccp/output.c index 50292c0605f..dcc061bed92 100644 --- a/net/dccp/output.c +++ b/net/dccp/output.c @@ -40,13 +40,13 @@ int dccp_transmit_skb(struct sock *sk, struct sk_buff *skb) /* XXX For now we're using only 48 bits sequence numbers */ const int dccp_header_size = sizeof(*dh) + sizeof(struct dccp_hdr_ext) + - dccp_packet_hdr_len(dcb->dccpd_type); + dccp_packet_hdr_len(dcb->dccpd_type); int err, set_ack = 1; u64 ackno = dp->dccps_gsr; /* - * FIXME: study DCCP_PKT_SYNC[ACK] to see what is the right thing - * to do here... + * FIXME: study DCCP_PKT_SYNC[ACK] to see what is the right + * thing to do here... */ dccp_inc_seqno(&dp->dccps_gss); @@ -65,7 +65,9 @@ int dccp_transmit_skb(struct sock *sk, struct sk_buff *skb) skb->h.raw = skb_push(skb, dccp_header_size); dh = dccp_hdr(skb); - /* Data packets are not cloned as they are never retransmitted */ + /* + * Data packets are not cloned as they are never retransmitted + */ if (skb_cloned(skb)) skb_set_owner_w(skb, sk); @@ -86,10 +88,12 @@ int dccp_transmit_skb(struct sock *sk, struct sk_buff *skb) switch (dcb->dccpd_type) { case DCCP_PKT_REQUEST: - dccp_hdr_request(skb)->dccph_req_service = dcb->dccpd_service; + dccp_hdr_request(skb)->dccph_req_service = + dcb->dccpd_service; break; case DCCP_PKT_RESET: - dccp_hdr_reset(skb)->dccph_reset_code = dcb->dccpd_reset_code; + dccp_hdr_reset(skb)->dccph_reset_code = + dcb->dccpd_reset_code; break; } @@ -123,10 +127,13 @@ unsigned int dccp_sync_mss(struct sock *sk, u32 pmtu) int mss_now; /* - * FIXME: we really should be using the af_specific thing to support IPv6. - * mss_now = pmtu - tp->af_specific->net_header_len - sizeof(struct dccp_hdr) - sizeof(struct dccp_hdr_ext); + * FIXME: we really should be using the af_specific thing to support + * IPv6. + * mss_now = pmtu - tp->af_specific->net_header_len - + * sizeof(struct dccp_hdr) - sizeof(struct dccp_hdr_ext); */ - mss_now = pmtu - sizeof(struct iphdr) - sizeof(struct dccp_hdr) - sizeof(struct dccp_hdr_ext); + mss_now = pmtu - sizeof(struct iphdr) - sizeof(struct dccp_hdr) - + sizeof(struct dccp_hdr_ext); /* Now subtract optional transport overhead */ mss_now -= dp->dccps_ext_header_len; @@ -223,7 +230,8 @@ struct sk_buff *dccp_make_response(struct sock *sk, struct dst_entry *dst, dh->dccph_sport = inet_sk(sk)->sport; dh->dccph_dport = inet_rsk(req)->rmt_port; - dh->dccph_doff = (dccp_header_size + DCCP_SKB_CB(skb)->dccpd_opt_len) / 4; + dh->dccph_doff = (dccp_header_size + + DCCP_SKB_CB(skb)->dccpd_opt_len) / 4; dh->dccph_type = DCCP_PKT_RESPONSE; dh->dccph_x = 1; dccp_hdr_set_seq(dh, dccp_rsk(req)->dreq_iss); @@ -271,7 +279,8 @@ struct sk_buff *dccp_make_reset(struct sock *sk, struct dst_entry *dst, dh->dccph_sport = inet_sk(sk)->sport; dh->dccph_dport = inet_sk(sk)->dport; - dh->dccph_doff = (dccp_header_size + DCCP_SKB_CB(skb)->dccpd_opt_len) / 4; + dh->dccph_doff = (dccp_header_size + + DCCP_SKB_CB(skb)->dccpd_opt_len) / 4; dh->dccph_type = DCCP_PKT_RESET; dh->dccph_x = 1; dccp_hdr_set_seq(dh, dp->dccps_gss); @@ -348,7 +357,9 @@ void dccp_send_ack(struct sock *sk) if (skb == NULL) { inet_csk_schedule_ack(sk); inet_csk(sk)->icsk_ack.ato = TCP_ATO_MIN; - inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK, TCP_DELACK_MAX, TCP_RTO_MAX); + inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK, + TCP_DELACK_MAX, + DCCP_RTO_MAX); return; } @@ -416,8 +427,10 @@ void dccp_send_sync(struct sock *sk, u64 seq) dccp_transmit_skb(sk, skb); } -/* Send a DCCP_PKT_CLOSE/CLOSEREQ. The caller locks the socket for us. This cannot be - * allowed to fail queueing a DCCP_PKT_CLOSE/CLOSEREQ frame under any circumstances. +/* + * Send a DCCP_PKT_CLOSE/CLOSEREQ. The caller locks the socket for us. This + * cannot be allowed to fail queueing a DCCP_PKT_CLOSE/CLOSEREQ frame under + * any circumstances. */ void dccp_send_close(struct sock *sk) { @@ -435,7 +448,8 @@ void dccp_send_close(struct sock *sk) /* Reserve space for headers and prepare control bits. */ skb_reserve(skb, sk->sk_prot->max_header); skb->csum = 0; - DCCP_SKB_CB(skb)->dccpd_type = dp->dccps_role == DCCP_ROLE_CLIENT ? DCCP_PKT_CLOSE : DCCP_PKT_CLOSEREQ; + DCCP_SKB_CB(skb)->dccpd_type = dp->dccps_role == DCCP_ROLE_CLIENT ? + DCCP_PKT_CLOSE : DCCP_PKT_CLOSEREQ; skb_set_owner_w(skb, sk); dccp_transmit_skb(sk, skb); diff --git a/net/dccp/packet_history.c b/net/dccp/packet_history.c index 6b414898f0c..2d9ef5ae0bf 100644 --- a/net/dccp/packet_history.c +++ b/net/dccp/packet_history.c @@ -55,7 +55,7 @@ struct dccp_rx_hist *dccp_rx_hist_new(const char *name) sprintf(slab_name, dccp_rx_hist_mask, name); hist->dccprxh_slab = kmem_cache_create(slab_name, - sizeof(struct dccp_rx_hist_entry), + sizeof(struct dccp_rx_hist_entry), 0, SLAB_HWCACHE_ALIGN, NULL, NULL); if (hist->dccprxh_slab == NULL) @@ -128,7 +128,7 @@ struct dccp_tx_hist *dccp_tx_hist_new(const char *name) sprintf(slab_name, dccp_tx_hist_mask, name); hist->dccptxh_slab = kmem_cache_create(slab_name, - sizeof(struct dccp_tx_hist_entry), + sizeof(struct dccp_tx_hist_entry), 0, SLAB_HWCACHE_ALIGN, NULL, NULL); if (hist->dccptxh_slab == NULL) @@ -156,8 +156,8 @@ void dccp_tx_hist_delete(struct dccp_tx_hist *hist) EXPORT_SYMBOL_GPL(dccp_tx_hist_delete); -struct dccp_tx_hist_entry *dccp_tx_hist_find_entry(const struct list_head *list, - const u64 seq) +struct dccp_tx_hist_entry * + dccp_tx_hist_find_entry(const struct list_head *list, const u64 seq) { struct dccp_tx_hist_entry *packet = NULL, *entry; @@ -172,7 +172,8 @@ struct dccp_tx_hist_entry *dccp_tx_hist_find_entry(const struct list_head *list, EXPORT_SYMBOL_GPL(dccp_tx_hist_find_entry); -void dccp_tx_hist_purge_older(struct dccp_tx_hist *hist, struct list_head *list, +void dccp_tx_hist_purge_older(struct dccp_tx_hist *hist, + struct list_head *list, struct dccp_tx_hist_entry *packet) { struct dccp_tx_hist_entry *next; diff --git a/net/dccp/packet_history.h b/net/dccp/packet_history.h index 0056525a656..489fff45ccd 100644 --- a/net/dccp/packet_history.h +++ b/net/dccp/packet_history.h @@ -115,7 +115,8 @@ extern void dccp_tx_hist_purge_older(struct dccp_tx_hist *hist, extern void dccp_tx_hist_purge(struct dccp_tx_hist *hist, struct list_head *list); -static inline struct dccp_tx_hist_entry *dccp_tx_hist_head(struct list_head *list) +static inline struct dccp_tx_hist_entry * + dccp_tx_hist_head(struct list_head *list) { struct dccp_tx_hist_entry *head = NULL; @@ -163,7 +164,8 @@ static inline void dccp_rx_hist_add_entry(struct list_head *list, list_add(&entry->dccphrx_node, list); } -static inline struct dccp_rx_hist_entry *dccp_rx_hist_head(struct list_head *list) +static inline struct dccp_rx_hist_entry * + dccp_rx_hist_head(struct list_head *list) { struct dccp_rx_hist_entry *head = NULL; diff --git a/net/dccp/proto.c b/net/dccp/proto.c index 877c1e0e3c4..46dd489f66c 100644 --- a/net/dccp/proto.c +++ b/net/dccp/proto.c @@ -255,12 +255,16 @@ int dccp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, /* FIXME */ #if 0 - /* Are we at urgent data? Stop if we have read anything or have SIGURG pending. */ + /* + * Are we at urgent data? Stop if we have read anything or + * have SIGURG pending. + */ if (tp->urg_data && tp->urg_seq == *seq) { if (copied) break; if (signal_pending(current)) { - copied = timeo ? sock_intr_errno(timeo) : -EAGAIN; + copied = timeo ? sock_intr_errno(timeo) : + -EAGAIN; break; } } @@ -285,7 +289,8 @@ int dccp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, dccp_pr_debug("found fin ok!\n"); goto found_fin_ok; } - dccp_pr_debug("packet_type=%s\n", dccp_packet_name(dh->dccph_type)); + dccp_pr_debug("packet_type=%s\n", + dccp_packet_name(dh->dccph_type)); BUG_TRAP(flags & MSG_PEEK); skb = skb->next; } while (skb != (struct sk_buff *)&sk->sk_receive_queue); @@ -439,16 +444,16 @@ out: } static const unsigned char dccp_new_state[] = { - /* current state: new state: action: */ - [0] = DCCP_CLOSED, - [DCCP_OPEN] = DCCP_CLOSING | DCCP_ACTION_FIN, - [DCCP_REQUESTING] = DCCP_CLOSED, - [DCCP_PARTOPEN] = DCCP_CLOSING | DCCP_ACTION_FIN, - [DCCP_LISTEN] = DCCP_CLOSED, - [DCCP_RESPOND] = DCCP_CLOSED, - [DCCP_CLOSING] = DCCP_CLOSED, - [DCCP_TIME_WAIT] = DCCP_CLOSED, - [DCCP_CLOSED] = DCCP_CLOSED, + /* current state: new state: action: */ + [0] = DCCP_CLOSED, + [DCCP_OPEN] = DCCP_CLOSING | DCCP_ACTION_FIN, + [DCCP_REQUESTING] = DCCP_CLOSED, + [DCCP_PARTOPEN] = DCCP_CLOSING | DCCP_ACTION_FIN, + [DCCP_LISTEN] = DCCP_CLOSED, + [DCCP_RESPOND] = DCCP_CLOSED, + [DCCP_CLOSING] = DCCP_CLOSED, + [DCCP_TIME_WAIT] = DCCP_CLOSED, + [DCCP_CLOSED] = DCCP_CLOSED, }; static int dccp_close_state(struct sock *sk) @@ -541,7 +546,8 @@ struct proto_ops inet_dccp_ops = { .getname = inet_getname, .poll = sock_no_poll, .ioctl = inet_ioctl, - .listen = inet_dccp_listen, /* FIXME: work on inet_listen to rename it to sock_common_listen */ + /* FIXME: work on inet_listen to rename it to sock_common_listen */ + .listen = inet_dccp_listen, .shutdown = inet_shutdown, .setsockopt = sock_common_setsockopt, .getsockopt = sock_common_getsockopt, @@ -638,10 +644,10 @@ static int __init dccp_init(void) if (rc) goto out; - dccp_hashinfo.bind_bucket_cachep = kmem_cache_create("dccp_bind_bucket", - sizeof(struct inet_bind_bucket), - 0, SLAB_HWCACHE_ALIGN, - NULL, NULL); + dccp_hashinfo.bind_bucket_cachep = + kmem_cache_create("dccp_bind_bucket", + sizeof(struct inet_bind_bucket), 0, + SLAB_HWCACHE_ALIGN, NULL, NULL); if (!dccp_hashinfo.bind_bucket_cachep) goto out_proto_unregister; @@ -657,14 +663,16 @@ static int __init dccp_init(void) goal = num_physpages >> (23 - PAGE_SHIFT); if (thash_entries) - goal = (thash_entries * sizeof(struct inet_ehash_bucket)) >> PAGE_SHIFT; + goal = (thash_entries * + sizeof(struct inet_ehash_bucket)) >> PAGE_SHIFT; for (ehash_order = 0; (1UL << ehash_order) < goal; ehash_order++) ; do { dccp_hashinfo.ehash_size = (1UL << ehash_order) * PAGE_SIZE / sizeof(struct inet_ehash_bucket); dccp_hashinfo.ehash_size >>= 1; - while (dccp_hashinfo.ehash_size & (dccp_hashinfo.ehash_size - 1)) + while (dccp_hashinfo.ehash_size & + (dccp_hashinfo.ehash_size - 1)) dccp_hashinfo.ehash_size--; dccp_hashinfo.ehash = (struct inet_ehash_bucket *) __get_free_pages(GFP_ATOMIC, ehash_order); @@ -686,7 +694,8 @@ static int __init dccp_init(void) do { dccp_hashinfo.bhash_size = (1UL << bhash_order) * PAGE_SIZE / sizeof(struct inet_bind_hashbucket); - if ((dccp_hashinfo.bhash_size > (64 * 1024)) && bhash_order > 0) + if ((dccp_hashinfo.bhash_size > (64 * 1024)) && + bhash_order > 0) continue; dccp_hashinfo.bhash = (struct inet_bind_hashbucket *) __get_free_pages(GFP_ATOMIC, bhash_order); diff --git a/net/dccp/timer.c b/net/dccp/timer.c index 9f1f1ab9e2b..47b1616e618 100644 --- a/net/dccp/timer.c +++ b/net/dccp/timer.c @@ -45,11 +45,13 @@ static int dccp_write_timeout(struct sock *sk) if (sk->sk_state == DCCP_REQUESTING || sk->sk_state == DCCP_PARTOPEN) { if (icsk->icsk_retransmits != 0) dst_negative_advice(&sk->sk_dst_cache); - retry_until = icsk->icsk_syn_retries ? : /* FIXME! */ 3 /* FIXME! sysctl_tcp_syn_retries */; + retry_until = icsk->icsk_syn_retries ? : + /* FIXME! */ 3 /* FIXME! sysctl_tcp_syn_retries */; } else { - if (icsk->icsk_retransmits >= /* FIXME! sysctl_tcp_retries1 */ 5 /* FIXME! */) { - /* NOTE. draft-ietf-tcpimpl-pmtud-01.txt requires pmtu black - hole detection. :-( + if (icsk->icsk_retransmits >= + /* FIXME! sysctl_tcp_retries1 */ 5 /* FIXME! */) { + /* NOTE. draft-ietf-tcpimpl-pmtud-01.txt requires pmtu + black hole detection. :-( It is place to make it. It is not made. I do not want to make it. It is disguisting. It does not work in any @@ -96,14 +98,17 @@ static void dccp_delack_timer(unsigned long data) /* Try again later. */ icsk->icsk_ack.blocked = 1; NET_INC_STATS_BH(LINUX_MIB_DELAYEDACKLOCKED); - sk_reset_timer(sk, &icsk->icsk_delack_timer, jiffies + TCP_DELACK_MIN); + sk_reset_timer(sk, &icsk->icsk_delack_timer, + jiffies + TCP_DELACK_MIN); goto out; } - if (sk->sk_state == DCCP_CLOSED || !(icsk->icsk_ack.pending & ICSK_ACK_TIMER)) + if (sk->sk_state == DCCP_CLOSED || + !(icsk->icsk_ack.pending & ICSK_ACK_TIMER)) goto out; if (time_after(icsk->icsk_ack.timeout, jiffies)) { - sk_reset_timer(sk, &icsk->icsk_delack_timer, icsk->icsk_ack.timeout); + sk_reset_timer(sk, &icsk->icsk_delack_timer, + icsk->icsk_ack.timeout); goto out; } @@ -112,7 +117,8 @@ static void dccp_delack_timer(unsigned long data) if (inet_csk_ack_scheduled(sk)) { if (!icsk->icsk_ack.pingpong) { /* Delayed ACK missed: inflate ATO. */ - icsk->icsk_ack.ato = min(icsk->icsk_ack.ato << 1, icsk->icsk_rto); + icsk->icsk_ack.ato = min(icsk->icsk_ack.ato << 1, + icsk->icsk_rto); } else { /* Delayed ACK missed: leave pingpong mode and * deflate ATO. @@ -167,7 +173,7 @@ static void dccp_retransmit_timer(struct sock *sk) inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, min(icsk->icsk_rto, TCP_RESOURCE_PROBE_INTERVAL), - TCP_RTO_MAX); + DCCP_RTO_MAX); goto out; } @@ -175,7 +181,8 @@ static void dccp_retransmit_timer(struct sock *sk) icsk->icsk_retransmits++; icsk->icsk_rto = min(icsk->icsk_rto << 1, DCCP_RTO_MAX); - inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, icsk->icsk_rto, TCP_RTO_MAX); + inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, icsk->icsk_rto, + DCCP_RTO_MAX); if (icsk->icsk_retransmits > 3 /* FIXME: sysctl_dccp_retries1 */) __sk_dst_reset(sk); out:; @@ -190,7 +197,8 @@ static void dccp_write_timer(unsigned long data) bh_lock_sock(sk); if (sock_owned_by_user(sk)) { /* Try again later */ - sk_reset_timer(sk, &icsk->icsk_retransmit_timer, jiffies + (HZ / 20)); + sk_reset_timer(sk, &icsk->icsk_retransmit_timer, + jiffies + (HZ / 20)); goto out; } @@ -198,7 +206,8 @@ static void dccp_write_timer(unsigned long data) goto out; if (time_after(icsk->icsk_timeout, jiffies)) { - sk_reset_timer(sk, &icsk->icsk_retransmit_timer, icsk->icsk_timeout); + sk_reset_timer(sk, &icsk->icsk_retransmit_timer, + icsk->icsk_timeout); goto out; } @@ -220,7 +229,8 @@ out: */ static void dccp_response_timer(struct sock *sk) { - inet_csk_reqsk_queue_prune(sk, TCP_SYNQ_INTERVAL, DCCP_TIMEOUT_INIT, DCCP_RTO_MAX); + inet_csk_reqsk_queue_prune(sk, TCP_SYNQ_INTERVAL, DCCP_TIMEOUT_INIT, + DCCP_RTO_MAX); } static void dccp_keepalive_timer(unsigned long data) -- cgit v1.2.3 From 531669a0a9041d60d13920973ef8aa4f743c14a0 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Sat, 13 Aug 2005 20:35:17 -0300 Subject: [DCCP]: Rewrite dccp_sendmsg to be more like UDP Based on discussions with Nishida-san. Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- net/dccp/proto.c | 214 +++++++++++++++---------------------------------------- 1 file changed, 59 insertions(+), 155 deletions(-) (limited to 'net/dccp') diff --git a/net/dccp/proto.c b/net/dccp/proto.c index 46dd489f66c..ed0bf58c8ae 100644 --- a/net/dccp/proto.c +++ b/net/dccp/proto.c @@ -214,197 +214,101 @@ out_discard: goto out_release; } -EXPORT_SYMBOL(dccp_sendmsg); - int dccp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, size_t len, int nonblock, int flags, int *addr_len) { const struct dccp_hdr *dh; - int copied = 0; - unsigned long used; - int err; - int target; /* Read at least this many bytes */ long timeo; lock_sock(sk); - err = -ENOTCONN; - if (sk->sk_state == DCCP_LISTEN) + if (sk->sk_state == DCCP_LISTEN) { + len = -ENOTCONN; goto out; - - timeo = sock_rcvtimeo(sk, nonblock); - - /* Urgent data needs to be handled specially. */ - if (flags & MSG_OOB) - goto recv_urg; - - /* FIXME */ -#if 0 - seq = &tp->copied_seq; - if (flags & MSG_PEEK) { - peek_seq = tp->copied_seq; - seq = &peek_seq; } -#endif - target = sock_rcvlowat(sk, flags & MSG_WAITALL, len); + timeo = sock_rcvtimeo(sk, nonblock); do { - struct sk_buff *skb; - u32 offset; + struct sk_buff *skb = skb_peek(&sk->sk_receive_queue); - /* FIXME */ -#if 0 - /* - * Are we at urgent data? Stop if we have read anything or - * have SIGURG pending. - */ - if (tp->urg_data && tp->urg_seq == *seq) { - if (copied) - break; - if (signal_pending(current)) { - copied = timeo ? sock_intr_errno(timeo) : - -EAGAIN; - break; - } - } -#endif + if (skb == NULL) + goto verify_sock_status; - /* Next get a buffer. */ - - skb = skb_peek(&sk->sk_receive_queue); - do { - if (!skb) - break; + dh = dccp_hdr(skb); - offset = 0; - dh = dccp_hdr(skb); + if (dh->dccph_type == DCCP_PKT_DATA || + dh->dccph_type == DCCP_PKT_DATAACK) + goto found_ok_skb; - if (dh->dccph_type == DCCP_PKT_DATA || - dh->dccph_type == DCCP_PKT_DATAACK) - goto found_ok_skb; - - if (dh->dccph_type == DCCP_PKT_RESET || - dh->dccph_type == DCCP_PKT_CLOSE) { - dccp_pr_debug("found fin ok!\n"); - goto found_fin_ok; - } - dccp_pr_debug("packet_type=%s\n", - dccp_packet_name(dh->dccph_type)); - BUG_TRAP(flags & MSG_PEEK); - skb = skb->next; - } while (skb != (struct sk_buff *)&sk->sk_receive_queue); - - /* Well, if we have backlog, try to process it now yet. */ - if (copied >= target && !sk->sk_backlog.tail) + if (dh->dccph_type == DCCP_PKT_RESET || + dh->dccph_type == DCCP_PKT_CLOSE) { + dccp_pr_debug("found fin ok!\n"); + len = 0; + goto found_fin_ok; + } + dccp_pr_debug("packet_type=%s\n", + dccp_packet_name(dh->dccph_type)); + sk_eat_skb(sk, skb); +verify_sock_status: + if (sock_flag(sk, SOCK_DONE)) { + len = 0; break; + } - if (copied) { - if (sk->sk_err || - sk->sk_state == DCCP_CLOSED || - (sk->sk_shutdown & RCV_SHUTDOWN) || - !timeo || - signal_pending(current) || - (flags & MSG_PEEK)) - break; - } else { - if (sock_flag(sk, SOCK_DONE)) - break; - - if (sk->sk_err) { - copied = sock_error(sk); - break; - } - - if (sk->sk_shutdown & RCV_SHUTDOWN) - break; - - if (sk->sk_state == DCCP_CLOSED) { - if (!sock_flag(sk, SOCK_DONE)) { - /* This occurs when user tries to read - * from never connected socket. - */ - copied = -ENOTCONN; - break; - } - break; - } + if (sk->sk_err) { + len = sock_error(sk); + break; + } - if (!timeo) { - copied = -EAGAIN; - break; - } + if (sk->sk_shutdown & RCV_SHUTDOWN) { + len = 0; + break; + } - if (signal_pending(current)) { - copied = sock_intr_errno(timeo); + if (sk->sk_state == DCCP_CLOSED) { + if (!sock_flag(sk, SOCK_DONE)) { + /* This occurs when user tries to read + * from never connected socket. + */ + len = -ENOTCONN; break; } + len = 0; + break; } - /* FIXME: cleanup_rbuf(sk, copied); */ + if (!timeo) { + len = -EAGAIN; + break; + } - if (copied >= target) { - /* Do not sleep, just process backlog. */ - release_sock(sk); - lock_sock(sk); - } else - sk_wait_data(sk, &timeo); + if (signal_pending(current)) { + len = sock_intr_errno(timeo); + break; + } + sk_wait_data(sk, &timeo); continue; - found_ok_skb: - /* Ok so how much can we use? */ - used = skb->len - offset; - if (len < used) - used = len; - - if (!(flags & MSG_TRUNC)) { - err = skb_copy_datagram_iovec(skb, offset, - msg->msg_iov, used); - if (err) { - /* Exception. Bailout! */ - if (!copied) - copied = -EFAULT; - break; - } + if (len > skb->len) + len = skb->len; + else if (len < skb->len) + msg->msg_flags |= MSG_TRUNC; + + if (skb_copy_datagram_iovec(skb, 0, msg->msg_iov, len)) { + /* Exception. Bailout! */ + len = -EFAULT; + break; } - - copied += used; - len -= used; - - /* FIXME: tcp_rcv_space_adjust(sk); */ - -//skip_copy: - if (used + offset < skb->len) - continue; - - if (!(flags & MSG_PEEK)) - sk_eat_skb(sk, skb); - continue; found_fin_ok: if (!(flags & MSG_PEEK)) sk_eat_skb(sk, skb); break; - - } while (len > 0); - - /* According to UNIX98, msg_name/msg_namelen are ignored - * on connected socket. I was just happy when found this 8) --ANK - */ - - /* Clean up data we have read: This will do ACK frames. */ - /* FIXME: cleanup_rbuf(sk, copied); */ - - release_sock(sk); - return copied; - + } while (1); out: release_sock(sk); - return err; - -recv_urg: - /* FIXME: err = tcp_recv_urg(sk, timeo, msg, len, flags, addr_len); */ - goto out; + return len; } static int inet_dccp_listen(struct socket *sock, int backlog) -- cgit v1.2.3 From 725ba8eee3881e619c8e5a0116f1bdb6480ac2d9 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Sat, 13 Aug 2005 20:35:39 -0300 Subject: [DCCP]: Introduce the DCCP Kernel hacking menu Only available if CONFIG_DEBUG_KERNEL is enabled in the "Kernel Hacking" Menu. Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- net/dccp/Kconfig | 21 +++++++++++++++++++++ net/dccp/ccids/ccid3.c | 9 +++++++++ net/dccp/dccp.h | 7 +++---- net/dccp/options.c | 18 +++++++++--------- net/dccp/proto.c | 22 ++++++++++++++-------- 5 files changed, 56 insertions(+), 21 deletions(-) (limited to 'net/dccp') diff --git a/net/dccp/Kconfig b/net/dccp/Kconfig index 6760830c490..3023f702eb8 100644 --- a/net/dccp/Kconfig +++ b/net/dccp/Kconfig @@ -26,4 +26,25 @@ config INET_DCCP_DIAG source "net/dccp/ccids/Kconfig" +menu "DCCP Kernel Hacking" + depends on IP_DCCP=m && DEBUG_KERNEL=y + +config IP_DCCP_DEBUG + bool "DCCP debug messages" + ---help--- + Only use this if you're hacking DCCP. + + Just say N. + +config IP_DCCP_UNLOAD_HACK + depends on IP_DCCP_CCID3=m + bool "DCCP control sock unload hack" + ---help--- + Enable this to be able to unload the dccp module when the it + has only one refcount held, the control sock one. Just execute + "rmmod dccp_ccid3 dccp" + + Just say N. +endmenu + endmenu diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c index edf9740d8d8..09274f32a33 100644 --- a/net/dccp/ccids/ccid3.c +++ b/net/dccp/ccids/ccid3.c @@ -2078,6 +2078,15 @@ module_init(ccid3_module_init); static __exit void ccid3_module_exit(void) { +#ifdef CONFIG_IP_DCCP_UNLOAD_HACK + /* + * Hack to use while developing, so that we get rid of the control + * sock, that is what keeps a refcount on dccp.ko -acme + */ + extern void dccp_ctl_sock_exit(void); + + dccp_ctl_sock_exit(); +#endif ccid_unregister(&ccid3); if (ccid3_tx_hist != NULL) { diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h index 62e735f1807..270f1943996 100644 --- a/net/dccp/dccp.h +++ b/net/dccp/dccp.h @@ -11,14 +11,13 @@ * published by the Free Software Foundation. */ +#include #include #include #include #include -#define DCCP_DEBUG - -#ifdef DCCP_DEBUG +#ifdef CONFIG_IP_DCCP_DEBUG extern int dccp_debug; #define dccp_pr_debug(format, a...) \ @@ -426,7 +425,7 @@ extern int dccp_ackpkts_add(struct dccp_ackpkts *ap, u64 ackno, u8 state); extern void dccp_ackpkts_check_rcv_ackno(struct dccp_ackpkts *ap, struct sock *sk, u64 ackno); -#ifdef DCCP_DEBUG +#ifdef CONFIG_IP_DCCP_DEBUG extern void dccp_ackvector_print(const u64 ackno, const unsigned char *vector, int len); extern void dccp_ackpkts_print(const struct dccp_ackpkts *ap); diff --git a/net/dccp/options.c b/net/dccp/options.c index 68d6614edcf..fc363aaeeda 100644 --- a/net/dccp/options.c +++ b/net/dccp/options.c @@ -58,7 +58,7 @@ static u32 dccp_decode_value_var(const unsigned char *bf, const u8 len) int dccp_parse_options(struct sock *sk, struct sk_buff *skb) { struct dccp_sock *dp = dccp_sk(sk); -#ifdef DCCP_DEBUG +#ifdef CONFIG_IP_DCCP_DEBUG const char *debug_prefix = dp->dccps_role == DCCP_ROLE_CLIENT ? "CLIENT rx opt: " : "server rx opt: "; #endif @@ -303,7 +303,7 @@ void dccp_insert_option_elapsed_time(struct sock *sk, struct sk_buff *skb, u32 elapsed_time) { -#ifdef DCCP_DEBUG +#ifdef CONFIG_IP_DCCP_DEBUG struct dccp_sock *dp = dccp_sk(sk); const char *debug_prefix = dp->dccps_role == DCCP_ROLE_CLIENT ? "CLIENT TX opt: " : "server TX opt: "; @@ -341,7 +341,7 @@ EXPORT_SYMBOL(dccp_insert_option_elapsed_time); static void dccp_insert_option_ack_vector(struct sock *sk, struct sk_buff *skb) { struct dccp_sock *dp = dccp_sk(sk); -#ifdef DCCP_DEBUG +#ifdef CONFIG_IP_DCCP_DEBUG const char *debug_prefix = dp->dccps_role == DCCP_ROLE_CLIENT ? "CLIENT TX opt: " : "server TX opt: "; #endif @@ -425,7 +425,7 @@ static void dccp_insert_option_timestamp_echo(struct sock *sk, struct sk_buff *skb) { struct dccp_sock *dp = dccp_sk(sk); -#ifdef DCCP_DEBUG +#ifdef CONFIG_IP_DCCP_DEBUG const char *debug_prefix = dp->dccps_role == DCCP_ROLE_CLIENT ? "CLIENT TX opt: " : "server TX opt: "; #endif @@ -504,7 +504,7 @@ struct dccp_ackpkts *dccp_ackpkts_alloc(unsigned int len, int priority) struct dccp_ackpkts *ap = kmalloc(sizeof(*ap) + len, priority); if (ap != NULL) { -#ifdef DCCP_DEBUG +#ifdef CONFIG_IP_DCCP_DEBUG memset(ap->dccpap_buf, 0xFF, len); #endif ap->dccpap_buf_len = len; @@ -526,7 +526,7 @@ struct dccp_ackpkts *dccp_ackpkts_alloc(unsigned int len, int priority) void dccp_ackpkts_free(struct dccp_ackpkts *ap) { if (ap != NULL) { -#ifdef DCCP_DEBUG +#ifdef CONFIG_IP_DCCP_DEBUG memset(ap, 0xFF, sizeof(*ap) + ap->dccpap_buf_len); #endif kfree(ap); @@ -680,7 +680,7 @@ out_duplicate: return -EILSEQ; } -#ifdef DCCP_DEBUG +#ifdef CONFIG_IP_DCCP_DEBUG void dccp_ackvector_print(const u64 ackno, const unsigned char *vector, int len) { @@ -735,7 +735,7 @@ void dccp_ackpkts_check_rcv_ackno(struct dccp_ackpkts *ap, struct sock *sk, return; if (ackno == ap->dccpap_ack_seqno) { -#ifdef DCCP_DEBUG +#ifdef CONFIG_IP_DCCP_DEBUG struct dccp_sock *dp = dccp_sk(sk); const char *debug_prefix = dp->dccps_role == DCCP_ROLE_CLIENT ? "CLIENT rx ack: " : "server rx ack: "; @@ -794,7 +794,7 @@ static void dccp_ackpkts_check_rcv_ackvector(struct dccp_ackpkts *ap, /* dccp_pr_debug_cat("yes\n"); */ if (state != DCCP_ACKPKTS_STATE_NOT_RECEIVED) { -#ifdef DCCP_DEBUG +#ifdef CONFIG_IP_DCCP_DEBUG struct dccp_sock *dp = dccp_sk(sk); const char *debug_prefix = dp->dccps_role == DCCP_ROLE_CLIENT ? diff --git a/net/dccp/proto.c b/net/dccp/proto.c index ed0bf58c8ae..be066924206 100644 --- a/net/dccp/proto.c +++ b/net/dccp/proto.c @@ -503,12 +503,16 @@ static int __init dccp_ctl_sock_init(void) return rc; } -static void __exit dccp_ctl_sock_exit(void) +#ifdef CONFIG_IP_DCCP_UNLOAD_HACK +void dccp_ctl_sock_exit(void) { if (dccp_ctl_socket != NULL) sock_release(dccp_ctl_socket); } +EXPORT_SYMBOL_GPL(dccp_ctl_sock_exit); +#endif + static int __init init_dccp_v4_mibs(void) { int rc = -ENOMEM; @@ -655,19 +659,21 @@ static const char dccp_del_proto_err_msg[] __exitdata = static void __exit dccp_fini(void) { - dccp_ctl_sock_exit(); - inet_unregister_protosw(&dccp_v4_protosw); if (inet_del_protocol(&dccp_protocol, IPPROTO_DCCP) < 0) printk(dccp_del_proto_err_msg); - /* Free the control endpoint. */ - sock_release(dccp_ctl_socket); - - proto_unregister(&dccp_v4_prot); - + free_percpu(dccp_statistics[0]); + free_percpu(dccp_statistics[1]); + free_pages((unsigned long)dccp_hashinfo.bhash, + get_order(dccp_hashinfo.bhash_size * + sizeof(struct inet_bind_hashbucket))); + free_pages((unsigned long)dccp_hashinfo.ehash, + get_order(dccp_hashinfo.ehash_size * + sizeof(struct inet_ehash_bucket))); kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep); + proto_unregister(&dccp_v4_prot); } module_init(dccp_init); -- cgit v1.2.3 From 8649b0d4166e6e80ffa298e75abd8f2afdd491a6 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Sat, 13 Aug 2005 20:36:01 -0300 Subject: [DCCP]: Fix RESET handling in dccp_rcv_state_process To avoid holding TIMEWAIT state for sockets in the LISTEN state. Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- net/dccp/input.c | 60 +++++++++++++++++++++++++++++++++++++++----------------- 1 file changed, 42 insertions(+), 18 deletions(-) (limited to 'net/dccp') diff --git a/net/dccp/input.c b/net/dccp/input.c index 4b8638f153a..9dadfc36251 100644 --- a/net/dccp/input.c +++ b/net/dccp/input.c @@ -402,7 +402,48 @@ int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb, const int old_state = sk->sk_state; int queued = 0; - if (sk->sk_state != DCCP_LISTEN && sk->sk_state != DCCP_REQUESTING) { + /* + * Step 3: Process LISTEN state + * (Continuing from dccp_v4_do_rcv and dccp_v6_do_rcv) + * + * If S.state == LISTEN, + * If P.type == Request or P contains a valid Init Cookie + * option, + * * Must scan the packet's options to check for an Init + * Cookie. Only the Init Cookie is processed here, + * however; other options are processed in Step 8. This + * scan need only be performed if the endpoint uses Init + * Cookies * + * * Generate a new socket and switch to that socket * + * Set S := new socket for this port pair + * S.state = RESPOND + * Choose S.ISS (initial seqno) or set from Init Cookie + * Set S.ISR, S.GSR, S.SWL, S.SWH from packet or Init Cookie + * Continue with S.state == RESPOND + * * A Response packet will be generated in Step 11 * + * Otherwise, + * Generate Reset(No Connection) unless P.type == Reset + * Drop packet and return + * + * NOTE: the check for the packet types is done in + * dccp_rcv_state_process + */ + if (sk->sk_state == DCCP_LISTEN) { + if (dh->dccph_type == DCCP_PKT_REQUEST) { + if (dccp_v4_conn_request(sk, skb) < 0) + return 1; + + /* FIXME: do congestion control initialization */ + goto discard; + } + if (dh->dccph_type == DCCP_PKT_RESET) + goto discard; + + /* Caller (dccp_v4_do_rcv) will send Reset(No Connection)*/ + return 1; + } + + if (sk->sk_state != DCCP_REQUESTING) { if (dccp_check_seqno(sk, skb)) goto discard; @@ -484,23 +525,6 @@ int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb, case DCCP_CLOSED: return 1; - case DCCP_LISTEN: - if (dh->dccph_type == DCCP_PKT_ACK || - dh->dccph_type == DCCP_PKT_DATAACK) - return 1; - - if (dh->dccph_type == DCCP_PKT_RESET) - goto discard; - - if (dh->dccph_type == DCCP_PKT_REQUEST) { - if (dccp_v4_conn_request(sk, skb) < 0) - return 1; - - /* FIXME: do congestion control initialization */ - goto discard; - } - goto discard; - case DCCP_REQUESTING: /* FIXME: do congestion control initialization */ -- cgit v1.2.3 From a1d3a35518779df0579dd9de0121354b49c68ddc Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Sat, 13 Aug 2005 22:42:25 -0300 Subject: [DCCP]: Fix sparse warnings Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- net/dccp/ccids/ccid3.c | 13 ++++++++----- net/dccp/dccp.h | 13 +++++++++---- net/dccp/ipv4.c | 7 ++----- net/dccp/options.c | 3 ++- net/dccp/packet_history.h | 12 ++++++------ net/dccp/proto.c | 8 +++++--- 6 files changed, 32 insertions(+), 24 deletions(-) (limited to 'net/dccp') diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c index 09274f32a33..21948d023c7 100644 --- a/net/dccp/ccids/ccid3.c +++ b/net/dccp/ccids/ccid3.c @@ -82,12 +82,13 @@ enum ccid3_options { static int ccid3_debug; -struct dccp_tx_hist *ccid3_tx_hist; -struct dccp_rx_hist *ccid3_rx_hist; +static struct dccp_tx_hist *ccid3_tx_hist; +static struct dccp_rx_hist *ccid3_rx_hist; static kmem_cache_t *ccid3_loss_interval_hist_slab; -static inline struct ccid3_loss_interval_hist_entry *ccid3_loss_interval_hist_entry_new(int prio) +static inline struct ccid3_loss_interval_hist_entry * + ccid3_loss_interval_hist_entry_new(const unsigned int __nocast prio) { return kmem_cache_alloc(ccid3_loss_interval_hist_slab, prio); } @@ -1593,7 +1594,9 @@ static void ccid3_hc_rx_insert_options(struct sock *sk, struct sk_buff *skb) * These are integers as per section 8 of RFC3448. We can then divide by 4 * * when we use it. */ -const int ccid3_hc_rx_w[TFRC_RECV_IVAL_F_LENGTH] = { 4, 4, 4, 4, 3, 2, 1, 1, }; +static const int ccid3_hc_rx_w[TFRC_RECV_IVAL_F_LENGTH] = { + 4, 4, 4, 4, 3, 2, 1, 1, +}; /* * args: fvalue - function value to match @@ -1601,7 +1604,7 @@ const int ccid3_hc_rx_w[TFRC_RECV_IVAL_F_LENGTH] = { 4, 4, 4, 4, 3, 2, 1, 1, }; * * both fvalue and p are multiplied by 1,000,000 to use ints */ -u32 calcx_reverse_lookup(u32 fvalue) { +static u32 calcx_reverse_lookup(u32 fvalue) { int ctr = 0; int small; diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h index 270f1943996..148e8a65a10 100644 --- a/net/dccp/dccp.h +++ b/net/dccp/dccp.h @@ -231,19 +231,22 @@ extern void dccp_close(struct sock *sk, long timeout); extern struct sk_buff *dccp_make_response(struct sock *sk, struct dst_entry *dst, struct request_sock *req); +extern struct sk_buff *dccp_make_reset(struct sock *sk, + struct dst_entry *dst, + enum dccp_reset_codes code); extern int dccp_connect(struct sock *sk); extern int dccp_disconnect(struct sock *sk, int flags); extern int dccp_getsockopt(struct sock *sk, int level, int optname, - char *optval, int *optlen); + char __user *optval, int __user *optlen); +extern int dccp_setsockopt(struct sock *sk, int level, int optname, + char __user *optval, int optlen); extern int dccp_ioctl(struct sock *sk, int cmd, unsigned long arg); extern int dccp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, size_t size); extern int dccp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, size_t len, int nonblock, int flags, int *addr_len); -extern int dccp_setsockopt(struct sock *sk, int level, int optname, - char *optval, int optlen); extern void dccp_shutdown(struct sock *sk, int how); extern int dccp_v4_checksum(const struct sk_buff *skb, @@ -419,7 +422,9 @@ struct dccp_ackpkts { u8 dccpap_buf[0]; }; -extern struct dccp_ackpkts *dccp_ackpkts_alloc(unsigned int len, int priority); +extern struct dccp_ackpkts * + dccp_ackpkts_alloc(unsigned int len, + const unsigned int __nocast priority); extern void dccp_ackpkts_free(struct dccp_ackpkts *ap); extern int dccp_ackpkts_add(struct dccp_ackpkts *ap, u64 ackno, u8 state); extern void dccp_ackpkts_check_rcv_ackno(struct dccp_ackpkts *ap, diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index 42d9c878d4c..bc3cfc0533c 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -617,9 +617,6 @@ out: sock_put(sk); } -extern struct sk_buff *dccp_make_reset(struct sock *sk, struct dst_entry *dst, - enum dccp_reset_codes code); - int dccp_v4_send_reset(struct sock *sk, enum dccp_reset_codes code) { struct sk_buff *skb; @@ -881,7 +878,7 @@ static struct dst_entry* dccp_v4_route_skb(struct sock *sk, return &rt->u.dst; } -void dccp_v4_ctl_send_reset(struct sk_buff *rxskb) +static void dccp_v4_ctl_send_reset(struct sk_buff *rxskb) { int err; struct dccp_hdr *rxdh = dccp_hdr(rxskb), *dh; @@ -1268,7 +1265,7 @@ static int dccp_v4_init_sock(struct sock *sk) return 0; } -int dccp_v4_destroy_sock(struct sock *sk) +static int dccp_v4_destroy_sock(struct sock *sk) { struct dccp_sock *dp = dccp_sk(sk); diff --git a/net/dccp/options.c b/net/dccp/options.c index fc363aaeeda..d87d6be7ab1 100644 --- a/net/dccp/options.c +++ b/net/dccp/options.c @@ -499,7 +499,8 @@ void dccp_insert_options(struct sock *sk, struct sk_buff *skb) } } -struct dccp_ackpkts *dccp_ackpkts_alloc(unsigned int len, int priority) +struct dccp_ackpkts *dccp_ackpkts_alloc(const unsigned int len, + const unsigned int __nocast priority) { struct dccp_ackpkts *ap = kmalloc(sizeof(*ap) + len, priority); diff --git a/net/dccp/packet_history.h b/net/dccp/packet_history.h index 489fff45ccd..2e5ba343e3d 100644 --- a/net/dccp/packet_history.h +++ b/net/dccp/packet_history.h @@ -79,8 +79,8 @@ extern struct dccp_rx_hist_entry * dccp_rx_hist_find_data_packet(const struct list_head *list); static inline struct dccp_tx_hist_entry * - dccp_tx_hist_entry_new(struct dccp_tx_hist *hist, - const int prio) + dccp_tx_hist_entry_new(struct dccp_tx_hist *hist, + const unsigned int __nocast prio) { struct dccp_tx_hist_entry *entry = kmem_cache_alloc(hist->dccptxh_slab, prio); @@ -127,10 +127,10 @@ static inline struct dccp_tx_hist_entry * } static inline struct dccp_rx_hist_entry * - dccp_rx_hist_entry_new(struct dccp_rx_hist *hist, - const u32 ndp, - const struct sk_buff *skb, - const int prio) + dccp_rx_hist_entry_new(struct dccp_rx_hist *hist, + const u32 ndp, + const struct sk_buff *skb, + const unsigned int __nocast prio) { struct dccp_rx_hist_entry *entry = kmem_cache_alloc(hist->dccprxh_slab, prio); diff --git a/net/dccp/proto.c b/net/dccp/proto.c index be066924206..0b715ceb38b 100644 --- a/net/dccp/proto.c +++ b/net/dccp/proto.c @@ -147,7 +147,7 @@ int dccp_ioctl(struct sock *sk, int cmd, unsigned long arg) } int dccp_setsockopt(struct sock *sk, int level, int optname, - char *optval, int optlen) + char __user *optval, int optlen) { dccp_pr_debug("entry\n"); @@ -158,7 +158,7 @@ int dccp_setsockopt(struct sock *sk, int level, int optname, } int dccp_getsockopt(struct sock *sk, int level, int optname, - char *optval, int *optlen) + char __user *optval, int __user *optlen) { dccp_pr_debug("entry\n"); @@ -439,7 +439,7 @@ void dccp_shutdown(struct sock *sk, int how) dccp_pr_debug("entry\n"); } -struct proto_ops inet_dccp_ops = { +static struct proto_ops inet_dccp_ops = { .family = PF_INET, .owner = THIS_MODULE, .release = inet_release, @@ -539,9 +539,11 @@ static int thash_entries; module_param(thash_entries, int, 0444); MODULE_PARM_DESC(thash_entries, "Number of ehash buckets"); +#ifdef CONFIG_IP_DCCP_DEBUG int dccp_debug; module_param(dccp_debug, int, 0444); MODULE_PARM_DESC(dccp_debug, "Enable debug messages"); +#endif static int __init dccp_init(void) { -- cgit v1.2.3 From a10cedd4b905236603c6c4fd77cf338ebbfb1a60 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Sun, 14 Aug 2005 21:05:53 -0300 Subject: [DCCP]: Fix compiler warnings may be a false warning if there always is something on ccid3hcrx_hist: net/dccp/ccids/ccid3.c: In function 'ccid3_hc_rx_packet_recv': net/dccp/ccids/ccid3.c:1634: warning: 'tstamp.tv_usec' may be used uninitialized in this function net/dccp/ccids/ccid3.c:1634: warning: 'tstamp.tv_sec' may be used uninitialized in this function const on inline functions doesn't have any effect: net/dccp/dccp.h:64: warning: type qualifiers ignored on function return type net/dccp/dccp.h:70: warning: type qualifiers ignored on function return type net/dccp/dccp.h:76: warning: type qualifiers ignored on function return type Signed-off-by: Patrick McHardy Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- net/dccp/ccids/ccid3.c | 2 +- net/dccp/dccp.h | 11 +++++------ 2 files changed, 6 insertions(+), 7 deletions(-) (limited to 'net/dccp') diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c index 21948d023c7..2dd3e94ba8f 100644 --- a/net/dccp/ccids/ccid3.c +++ b/net/dccp/ccids/ccid3.c @@ -1634,7 +1634,7 @@ static u32 ccid3_hc_rx_calc_first_li(struct sock *sk) struct ccid3_hc_rx_sock *hcrx = dp->dccps_hc_rx_ccid_private; struct dccp_rx_hist_entry *entry, *next, *tail = NULL; u32 rtt, delta, x_recv, fval, p, tmp2; - struct timeval tstamp, tmp_tv; + struct timeval tstamp = { 0 }, tmp_tv; int interval = 0; int win_count = 0; int step = 0; diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h index 148e8a65a10..fff794c8dff 100644 --- a/net/dccp/dccp.h +++ b/net/dccp/dccp.h @@ -60,20 +60,19 @@ extern void dccp_time_wait(struct sock *sk, int state, int timeo); extern struct proto dccp_v4_prot; /* is seq1 < seq2 ? */ -static inline const int before48(const u64 seq1, const u64 seq2) +static inline int before48(const u64 seq1, const u64 seq2) { - return (const s64)((seq1 << 16) - (seq2 << 16)) < 0; + return (s64)((seq1 << 16) - (seq2 << 16)) < 0; } /* is seq1 > seq2 ? */ -static inline const int after48(const u64 seq1, const u64 seq2) +static inline int after48(const u64 seq1, const u64 seq2) { - return (const s64)((seq2 << 16) - (seq1 << 16)) < 0; + return (s64)((seq2 << 16) - (seq1 << 16)) < 0; } /* is seq2 <= seq1 <= seq3 ? */ -static inline const int between48(const u64 seq1, const u64 seq2, - const u64 seq3) +static inline int between48(const u64 seq1, const u64 seq2, const u64 seq3) { return (seq3 << 16) - (seq2 << 16) >= (seq1 << 16) - (seq2 << 16); } -- cgit v1.2.3 From e92ae93a8aa66aea12935420cb22d4df1c18d023 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 17 Aug 2005 03:10:59 -0300 Subject: [DCCP]: Send SYNCACK packets in response to SYNC packets Also fix step 6 when receiving SYNC or SYNCACK packets, i.e. we were not using the updated swl. Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- net/dccp/dccp.h | 3 ++- net/dccp/input.c | 33 +++++++++++++++++++++++++-------- net/dccp/ipv4.c | 18 ++++++++++-------- net/dccp/output.c | 5 +++-- 4 files changed, 40 insertions(+), 19 deletions(-) (limited to 'net/dccp') diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h index fff794c8dff..4efdce47000 100644 --- a/net/dccp/dccp.h +++ b/net/dccp/dccp.h @@ -122,7 +122,8 @@ extern int dccp_retransmit_skb(struct sock *sk, struct sk_buff *skb); extern int dccp_send_response(struct sock *sk); extern void dccp_send_ack(struct sock *sk); extern void dccp_send_delayed_ack(struct sock *sk); -extern void dccp_send_sync(struct sock *sk, u64 seq); +extern void dccp_send_sync(struct sock *sk, const u64 seq, + const enum dccp_pkt_type pkt_type); extern int dccp_write_xmit(struct sock *sk, struct sk_buff *skb, const int len); diff --git a/net/dccp/input.c b/net/dccp/input.c index 9dadfc36251..68b6e72551e 100644 --- a/net/dccp/input.c +++ b/net/dccp/input.c @@ -50,7 +50,7 @@ static void dccp_rcv_closereq(struct sock *sk, struct sk_buff *skb) * Drop packet and return */ if (dccp_sk(sk)->dccps_role != DCCP_ROLE_CLIENT) { - dccp_send_sync(sk, DCCP_SKB_CB(skb)->dccpd_seq); + dccp_send_sync(sk, DCCP_SKB_CB(skb)->dccpd_seq, DCCP_PKT_SYNC); return; } @@ -76,8 +76,7 @@ static int dccp_check_seqno(struct sock *sk, struct sk_buff *skb) { const struct dccp_hdr *dh = dccp_hdr(skb); struct dccp_sock *dp = dccp_sk(sk); - u64 lswl = dp->dccps_swl; - u64 lawl = dp->dccps_awl; + u64 lswl, lawl; /* * Step 5: Prepare sequence numbers for Sync @@ -99,6 +98,8 @@ static int dccp_check_seqno(struct sock *sk, struct sk_buff *skb) dccp_update_gsr(sk, DCCP_SKB_CB(skb)->dccpd_seq); else return -1; + } + /* * Step 6: Check sequence numbers * Let LSWL = S.SWL and LAWL = S.AWL @@ -113,7 +114,10 @@ static int dccp_check_seqno(struct sock *sk, struct sk_buff *skb) * Send Sync packet acknowledging P.seqno * Drop packet and return */ - } else if (dh->dccph_type == DCCP_PKT_CLOSEREQ || + lswl = dp->dccps_swl; + lawl = dp->dccps_awl; + + if (dh->dccph_type == DCCP_PKT_CLOSEREQ || dh->dccph_type == DCCP_PKT_CLOSE || dh->dccph_type == DCCP_PKT_RESET) { lswl = dp->dccps_gsr; @@ -132,8 +136,8 @@ static int dccp_check_seqno(struct sock *sk, struct sk_buff *skb) DCCP_PKT_WITHOUT_ACK_SEQ)) dp->dccps_gar = DCCP_SKB_CB(skb)->dccpd_ack_seq; } else { - dccp_pr_debug("Step 6 failed, sending SYNC...\n"); - dccp_send_sync(sk, DCCP_SKB_CB(skb)->dccpd_seq); + LIMIT_NETDEBUG("Step 6 failed, sending SYNC...\n"); + dccp_send_sync(sk, DCCP_SKB_CB(skb)->dccpd_seq, DCCP_PKT_SYNC); return -1; } @@ -242,9 +246,21 @@ int dccp_rcv_established(struct sock *sk, struct sk_buff *skb, check_seq: if (!before48(DCCP_SKB_CB(skb)->dccpd_seq, dp->dccps_osr)) { send_sync: - dccp_send_sync(sk, DCCP_SKB_CB(skb)->dccpd_seq); + dccp_send_sync(sk, DCCP_SKB_CB(skb)->dccpd_seq, + DCCP_PKT_SYNC); } break; + case DCCP_PKT_SYNC: + dccp_send_sync(sk, DCCP_SKB_CB(skb)->dccpd_seq, + DCCP_PKT_SYNCACK); + /* + * From the draft: + * + * As with DCCP-Ack packets, DCCP-Sync and DCCP-SyncAck packets + * MAY have non-zero-length application data areas, whose + * contents * receivers MUST ignore. + */ + goto discard; } DCCP_INC_STATS_BH(DCCP_MIB_INERRS); @@ -517,7 +533,8 @@ int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb, dh->dccph_type == DCCP_PKT_REQUEST) || (sk->sk_state == DCCP_RESPOND && dh->dccph_type == DCCP_PKT_DATA)) { - dccp_send_sync(sk, DCCP_SKB_CB(skb)->dccpd_seq); + dccp_send_sync(sk, DCCP_SKB_CB(skb)->dccpd_seq, + DCCP_PKT_SYNC); goto discard; } diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index bc3cfc0533c..335e00e9631 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -376,7 +376,7 @@ static inline void dccp_do_pmtu_discovery(struct sock *sk, * probing, since DCCP-Sync probes do not risk application * data loss. */ - dccp_send_sync(sk, dp->dccps_gsr); + dccp_send_sync(sk, dp->dccps_gsr, DCCP_PKT_SYNC); } /* else let the usual retransmit timer handle it */ } @@ -1008,7 +1008,7 @@ static inline int dccp_invalid_packet(struct sk_buff *skb) return 1; if (!pskb_may_pull(skb, sizeof(struct dccp_hdr))) { - dccp_pr_debug("pskb_may_pull failed\n"); + printk(KERN_WARNING "DCCP: pskb_may_pull failed\n"); return 1; } @@ -1016,7 +1016,7 @@ static inline int dccp_invalid_packet(struct sk_buff *skb) /* If the packet type is not understood, drop packet and return */ if (dh->dccph_type >= DCCP_PKT_INVALID) { - dccp_pr_debug("invalid packet type\n"); + printk(KERN_WARNING "DCCP: invalid packet type\n"); return 1; } @@ -1025,12 +1025,13 @@ static inline int dccp_invalid_packet(struct sk_buff *skb) * packet, drop packet and return */ if (dh->dccph_doff < dccp_hdr_len(skb) / sizeof(u32)) { - dccp_pr_debug("Offset(%u) too small 1\n", dh->dccph_doff); + printk(KERN_WARNING "DCCP: Offset(%u) too small 1\n", + dh->dccph_doff); return 1; } if (!pskb_may_pull(skb, dh->dccph_doff * sizeof(u32))) { - dccp_pr_debug("P.Data Offset(%u) too small 2\n", + printk(KERN_WARNING "DCCP: P.Data Offset(%u) too small 2\n", dh->dccph_doff); return 1; } @@ -1045,15 +1046,16 @@ static inline int dccp_invalid_packet(struct sk_buff *skb) dh->dccph_type != DCCP_PKT_DATA && dh->dccph_type != DCCP_PKT_ACK && dh->dccph_type != DCCP_PKT_DATAACK) { - dccp_pr_debug("P.type (%s) not Data, Ack nor DataAck and " - "P.X == 0\n", dccp_packet_name(dh->dccph_type)); + printk(KERN_WARNING "DCCP: P.type (%s) not Data, Ack nor " + "DataAck and P.X == 0\n", + dccp_packet_name(dh->dccph_type)); return 1; } /* If the header checksum is incorrect, drop packet and return */ if (dccp_v4_verify_checksum(skb, skb->nh.iph->saddr, skb->nh.iph->daddr) < 0) { - dccp_pr_debug("header checksum is incorrect\n"); + printk(KERN_WARNING "DCCP: header checksum is incorrect\n"); return 1; } diff --git a/net/dccp/output.c b/net/dccp/output.c index dcc061bed92..384fd092098 100644 --- a/net/dccp/output.c +++ b/net/dccp/output.c @@ -404,7 +404,8 @@ void dccp_send_delayed_ack(struct sock *sk) sk_reset_timer(sk, &icsk->icsk_delack_timer, timeout); } -void dccp_send_sync(struct sock *sk, u64 seq) +void dccp_send_sync(struct sock *sk, const u64 seq, + const enum dccp_pkt_type pkt_type) { /* * We are not putting this on the write queue, so @@ -420,7 +421,7 @@ void dccp_send_sync(struct sock *sk, u64 seq) /* Reserve space for headers and prepare control bits. */ skb_reserve(skb, MAX_DCCP_HEADER); skb->csum = 0; - DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_SYNC; + DCCP_SKB_CB(skb)->dccpd_type = pkt_type; DCCP_SKB_CB(skb)->dccpd_seq = seq; skb_set_owner_w(skb, sk); -- cgit v1.2.3 From b1c9fe7b818acbd36dc908c5c1ad4cab34c67b39 Mon Sep 17 00:00:00 2001 From: Ian McDonald Date: Thu, 18 Aug 2005 20:45:29 -0300 Subject: [DCCP]: Fix elapsed time option as per section 13.2 of spec v11 The elapsed time can be two bytes or four bytes only. Signed-off-by: Ian McDonald Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- net/dccp/options.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'net/dccp') diff --git a/net/dccp/options.c b/net/dccp/options.c index d87d6be7ab1..85a86bd61f4 100644 --- a/net/dccp/options.c +++ b/net/dccp/options.c @@ -293,10 +293,7 @@ static void dccp_insert_option_ndp(struct sock *sk, struct sk_buff *skb) static inline int dccp_elapsed_time_len(const u32 elapsed_time) { - return elapsed_time == 0 ? 0 : - elapsed_time <= 0xFF ? 1 : - elapsed_time <= 0xFFFF ? 2 : - elapsed_time <= 0xFFFFFF ? 3 : 4; + return elapsed_time == 0 ? 0 : elapsed_time <= 0xFFFF ? 2 : 4; } void dccp_insert_option_elapsed_time(struct sock *sk, -- cgit v1.2.3 From 5480855bfbc125f34d9b752689bb9a64da7e1fc6 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 18 Aug 2005 20:47:02 -0300 Subject: [DCCP]: Set dccp_ctl_socket to NULL in dccp_ctl_sock_exit Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- net/dccp/proto.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'net/dccp') diff --git a/net/dccp/proto.c b/net/dccp/proto.c index 0b715ceb38b..8b613c3017c 100644 --- a/net/dccp/proto.c +++ b/net/dccp/proto.c @@ -506,8 +506,10 @@ static int __init dccp_ctl_sock_init(void) #ifdef CONFIG_IP_DCCP_UNLOAD_HACK void dccp_ctl_sock_exit(void) { - if (dccp_ctl_socket != NULL) + if (dccp_ctl_socket != NULL) { sock_release(dccp_ctl_socket); + dccp_ctl_socket = NULL; + } } EXPORT_SYMBOL_GPL(dccp_ctl_sock_exit); -- cgit v1.2.3 From c59eab4637dbc3f832503be4ccb9213b0f323d92 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 18 Aug 2005 21:12:02 -0300 Subject: [DCCP]: Use LIMIT_NETDEBUG in some debugging printks Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- net/dccp/input.c | 11 ++++++----- net/dccp/ipv4.c | 23 +++++++++++++---------- 2 files changed, 19 insertions(+), 15 deletions(-) (limited to 'net/dccp') diff --git a/net/dccp/input.c b/net/dccp/input.c index 68b6e72551e..3c4cbff82e9 100644 --- a/net/dccp/input.c +++ b/net/dccp/input.c @@ -118,8 +118,8 @@ static int dccp_check_seqno(struct sock *sk, struct sk_buff *skb) lawl = dp->dccps_awl; if (dh->dccph_type == DCCP_PKT_CLOSEREQ || - dh->dccph_type == DCCP_PKT_CLOSE || - dh->dccph_type == DCCP_PKT_RESET) { + dh->dccph_type == DCCP_PKT_CLOSE || + dh->dccph_type == DCCP_PKT_RESET) { lswl = dp->dccps_gsr; dccp_inc_seqno(&lswl); lawl = dp->dccps_gar; @@ -136,7 +136,8 @@ static int dccp_check_seqno(struct sock *sk, struct sk_buff *skb) DCCP_PKT_WITHOUT_ACK_SEQ)) dp->dccps_gar = DCCP_SKB_CB(skb)->dccpd_ack_seq; } else { - LIMIT_NETDEBUG("Step 6 failed, sending SYNC...\n"); + LIMIT_NETDEBUG(KERN_WARNING "DCCP: Step 6 failed, " + "sending SYNC...\n"); dccp_send_sync(sk, DCCP_SKB_CB(skb)->dccpd_seq, DCCP_PKT_SYNC); return -1; } @@ -168,8 +169,8 @@ int dccp_rcv_established(struct sock *sk, struct sk_buff *skb, if (dccp_ackpkts_add(dp->dccps_hc_rx_ackpkts, DCCP_SKB_CB(skb)->dccpd_seq, DCCP_ACKPKTS_STATE_RECEIVED)) { - LIMIT_NETDEBUG(KERN_INFO "DCCP: acknowledgeable " - "packets buffer full!\n"); + LIMIT_NETDEBUG(KERN_WARNING "DCCP: acknowledgeable " + "packets buffer full!\n"); ap->dccpap_ack_seqno = DCCP_MAX_SEQNO + 1; inet_csk_schedule_ack(sk); inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK, diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index 335e00e9631..cc5d60d9afa 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -1008,7 +1008,7 @@ static inline int dccp_invalid_packet(struct sk_buff *skb) return 1; if (!pskb_may_pull(skb, sizeof(struct dccp_hdr))) { - printk(KERN_WARNING "DCCP: pskb_may_pull failed\n"); + LIMIT_NETDEBUG(KERN_WARNING "DCCP: pskb_may_pull failed\n"); return 1; } @@ -1016,7 +1016,7 @@ static inline int dccp_invalid_packet(struct sk_buff *skb) /* If the packet type is not understood, drop packet and return */ if (dh->dccph_type >= DCCP_PKT_INVALID) { - printk(KERN_WARNING "DCCP: invalid packet type\n"); + LIMIT_NETDEBUG(KERN_WARNING "DCCP: invalid packet type\n"); return 1; } @@ -1025,14 +1025,16 @@ static inline int dccp_invalid_packet(struct sk_buff *skb) * packet, drop packet and return */ if (dh->dccph_doff < dccp_hdr_len(skb) / sizeof(u32)) { - printk(KERN_WARNING "DCCP: Offset(%u) too small 1\n", - dh->dccph_doff); + LIMIT_NETDEBUG(KERN_WARNING "DCCP: P.Data Offset(%u) " + "too small 1\n", + dh->dccph_doff); return 1; } if (!pskb_may_pull(skb, dh->dccph_doff * sizeof(u32))) { - printk(KERN_WARNING "DCCP: P.Data Offset(%u) too small 2\n", - dh->dccph_doff); + LIMIT_NETDEBUG(KERN_WARNING "DCCP: P.Data Offset(%u) " + "too small 2\n", + dh->dccph_doff); return 1; } @@ -1046,16 +1048,17 @@ static inline int dccp_invalid_packet(struct sk_buff *skb) dh->dccph_type != DCCP_PKT_DATA && dh->dccph_type != DCCP_PKT_ACK && dh->dccph_type != DCCP_PKT_DATAACK) { - printk(KERN_WARNING "DCCP: P.type (%s) not Data, Ack nor " - "DataAck and P.X == 0\n", - dccp_packet_name(dh->dccph_type)); + LIMIT_NETDEBUG(KERN_WARNING "DCCP: P.type (%s) not Data, Ack " + "nor DataAck and P.X == 0\n", + dccp_packet_name(dh->dccph_type)); return 1; } /* If the header checksum is incorrect, drop packet and return */ if (dccp_v4_verify_checksum(skb, skb->nh.iph->saddr, skb->nh.iph->daddr) < 0) { - printk(KERN_WARNING "DCCP: header checksum is incorrect\n"); + LIMIT_NETDEBUG(KERN_WARNING "DCCP: header checksum is " + "incorrect\n"); return 1; } -- cgit v1.2.3 From 1bc0986957b63a2fbbc46ab95d3d1d72830bda83 Mon Sep 17 00:00:00 2001 From: Ian McDonald Date: Sat, 20 Aug 2005 00:23:43 -0300 Subject: [DCCP]: Fix the timestamp options This changes timestamp, timestamp echo, and elapsed time to use units of 10 usecs as per DCCP spec. This has been tested to verify that times are correct. Also fixed up length and used hton/ntoh more. Still to add in later patches: - actually use elapsed time to adjust RTT (commented out as was prior to this patch) - send options at times more closely following the spec (content is now correct) Signed-off-by: Ian McDonald Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- net/dccp/ccids/ccid3.c | 12 ++----- net/dccp/dccp.h | 19 +++++++++-- net/dccp/options.c | 87 ++++++++++++++++++++++++++++++++++---------------- 3 files changed, 78 insertions(+), 40 deletions(-) (limited to 'net/dccp') diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c index 2dd3e94ba8f..694149061b8 100644 --- a/net/dccp/ccids/ccid3.c +++ b/net/dccp/ccids/ccid3.c @@ -2,12 +2,12 @@ * net/dccp/ccids/ccid3.c * * Copyright (c) 2005 The University of Waikato, Hamilton, New Zealand. + * Copyright (c) 2005 Ian McDonald * * An implementation of the DCCP protocol * * This code has been developed by the University of Waikato WAND * research group. For further information please see http://www.wand.net.nz/ - * or e-mail Ian McDonald - iam4@cs.waikato.ac.nz * * This code also uses code from Lulea University, rereleased as GPL by its * authors: @@ -174,14 +174,6 @@ static inline void timeval_fix(struct timeval *tv) } } -/* returns the difference in usecs between timeval passed in and current time */ -static inline u32 now_delta(struct timeval tv) { - struct timeval now; - - do_gettimeofday(&now); - return ((now.tv_sec-tv.tv_sec)*1000000+now.tv_usec-tv.tv_usec); -} - #define CALCX_ARRSIZE 500 #define CALCX_SPLIT 50000 @@ -1110,7 +1102,7 @@ static void ccid3_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) struct ccid3_options_received *opt_recv; struct dccp_tx_hist_entry *packet; unsigned long next_tmout; - u16 t_elapsed; + u32 t_elapsed; u32 pinv; u32 x_recv; u32 r_sample; diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h index 4efdce47000..aab72b8d070 100644 --- a/net/dccp/dccp.h +++ b/net/dccp/dccp.h @@ -4,7 +4,8 @@ * net/dccp/dccp.h * * An implementation of the DCCP protocol - * Arnaldo Carvalho de Melo + * Copyright (c) 2005 Arnaldo Carvalho de Melo + * Copyright (c) 2005 Ian McDonald * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License version 2 as @@ -404,6 +405,7 @@ extern struct socket *dccp_ctl_socket; * @dccpap_ack_nonce - the one-bit sum of the ECN Nonces for all State 0. * * @dccpap_buf_len - circular buffer length + * @dccpap_time - the time in usecs * @dccpap_buf - circular buffer of acknowledgeable packets */ struct dccp_ackpkts { @@ -416,7 +418,7 @@ struct dccp_ackpkts { unsigned int dccpap_buf_vector_len; unsigned int dccpap_ack_vector_len; unsigned int dccpap_buf_len; - unsigned long dccpap_time; + struct timeval dccpap_time; u8 dccpap_buf_nonce; u8 dccpap_ack_nonce; u8 dccpap_buf[0]; @@ -430,6 +432,19 @@ extern int dccp_ackpkts_add(struct dccp_ackpkts *ap, u64 ackno, u8 state); extern void dccp_ackpkts_check_rcv_ackno(struct dccp_ackpkts *ap, struct sock *sk, u64 ackno); +/* + * Returns the difference in usecs between timeval + * passed in and current time + */ +static inline u32 now_delta(struct timeval tv) +{ + struct timeval now; + + do_gettimeofday(&now); + return (now.tv_sec - tv.tv_sec) * USEC_PER_SEC + + (now.tv_usec - tv.tv_usec); +} + #ifdef CONFIG_IP_DCCP_DEBUG extern void dccp_ackvector_print(const u64 ackno, const unsigned char *vector, int len); diff --git a/net/dccp/options.c b/net/dccp/options.c index 85a86bd61f4..7ecffdf8575 100644 --- a/net/dccp/options.c +++ b/net/dccp/options.c @@ -2,8 +2,9 @@ * net/dccp/options.c * * An implementation of the DCCP protocol - * Aristeu Sergio Rozanski Filho - * Arnaldo Carvalho de Melo + * Copyright (c) 2005 Aristeu Sergio Rozanski Filho + * Copyright (c) 2005 Arnaldo Carvalho de Melo + * Copyright (c) 2005 Ian McDonald * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License @@ -138,7 +139,7 @@ int dccp_parse_options(struct sock *sk, struct sk_buff *skb) opt_recv->dccpor_timestamp = ntohl(*(u32 *)value); dp->dccps_timestamp_echo = opt_recv->dccpor_timestamp; - dp->dccps_timestamp_time = jiffies; + do_gettimeofday(&dp->dccps_timestamp_time); dccp_pr_debug("%sTIMESTAMP=%u, ackno=%llu\n", debug_prefix, opt_recv->dccpor_timestamp, @@ -146,36 +147,45 @@ int dccp_parse_options(struct sock *sk, struct sk_buff *skb) DCCP_SKB_CB(skb)->dccpd_ack_seq); break; case DCCPO_TIMESTAMP_ECHO: - if (len < 4 || len > 8) + if (len != 4 && len != 6 && len != 8) goto out_invalid_option; opt_recv->dccpor_timestamp_echo = ntohl(*(u32 *)value); - dccp_pr_debug("%sTIMESTAMP_ECHO=%u, len=%d, ackno=%llu, " - "diff=%u\n", + dccp_pr_debug("%sTIMESTAMP_ECHO=%u, len=%d, ackno=%llu, ", debug_prefix, opt_recv->dccpor_timestamp_echo, len + 2, (unsigned long long) - DCCP_SKB_CB(skb)->dccpd_ack_seq, - (tcp_time_stamp - - opt_recv->dccpor_timestamp_echo)); - - opt_recv->dccpor_elapsed_time = - dccp_decode_value_var(value + 4, - len - 4); - dccp_pr_debug("%sTIMESTAMP_ECHO ELAPSED_TIME=%d\n", + DCCP_SKB_CB(skb)->dccpd_ack_seq); + + if (len > 4) { + if (len == 6) + opt_recv->dccpor_elapsed_time = + ntohs(*(u16 *)(value + 4)); + else + opt_recv->dccpor_elapsed_time = + ntohl(*(u32 *)(value + 4)); + + dccp_pr_debug("%sTIMESTAMP_ECHO ELAPSED_TIME=%d\n", debug_prefix, opt_recv->dccpor_elapsed_time); + } break; case DCCPO_ELAPSED_TIME: - if (len > 4) + if (len != 2 && len != 4) goto out_invalid_option; if (pkt_type == DCCP_PKT_DATA) continue; - opt_recv->dccpor_elapsed_time = - dccp_decode_value_var(value, len); + + if (len == 2) + opt_recv->dccpor_elapsed_time = + ntohs(*(u16 *)value); + else + opt_recv->dccpor_elapsed_time = + ntohl(*(u32 *)value); + dccp_pr_debug("%sELAPSED_TIME=%d\n", debug_prefix, opt_recv->dccpor_elapsed_time); break; @@ -309,8 +319,7 @@ void dccp_insert_option_elapsed_time(struct sock *sk, const int len = 2 + elapsed_time_len; unsigned char *to; - /* If elapsed_time == 0... */ - if (elapsed_time_len == 2) + if (elapsed_time_len == 0) return; if (DCCP_SKB_CB(skb)->dccpd_opt_len + len > DCCP_MAX_OPT_LEN) { @@ -325,7 +334,13 @@ void dccp_insert_option_elapsed_time(struct sock *sk, *to++ = DCCPO_ELAPSED_TIME; *to++ = len; - dccp_encode_value_var(elapsed_time, to, elapsed_time_len); + if (elapsed_time_len == 2) { + const u16 var16 = htons((u16)elapsed_time); + memcpy(to, &var16, 2); + } else { + const u32 var32 = htonl(elapsed_time); + memcpy(to, &var32, 4); + } dccp_pr_debug("%sELAPSED_TIME=%u, len=%d, seqno=%llu\n", debug_prefix, elapsed_time, @@ -344,7 +359,7 @@ static void dccp_insert_option_ack_vector(struct sock *sk, struct sk_buff *skb) #endif struct dccp_ackpkts *ap = dp->dccps_hc_rx_ackpkts; int len = ap->dccpap_buf_vector_len + 2; - const u32 elapsed_time = jiffies_to_usecs(jiffies - ap->dccpap_time) / 10; + const u32 elapsed_time = now_delta(ap->dccpap_time) / 10; unsigned char *to, *from; if (elapsed_time != 0) @@ -414,7 +429,15 @@ static void dccp_insert_option_ack_vector(struct sock *sk, struct sk_buff *skb) static inline void dccp_insert_option_timestamp(struct sock *sk, struct sk_buff *skb) { - const u32 now = htonl(tcp_time_stamp); + struct timeval tv; + u32 now; + + do_gettimeofday(&tv); + now = (tv.tv_sec * USEC_PER_SEC + tv.tv_usec) / 10; + /* yes this will overflow but that is the point as we want a + * 10 usec 32 bit timer which mean it wraps every 11.9 hours */ + + now = htonl(now); dccp_insert_option(sk, skb, DCCPO_TIMESTAMP, &now, sizeof(now)); } @@ -427,8 +450,7 @@ static void dccp_insert_option_timestamp_echo(struct sock *sk, "CLIENT TX opt: " : "server TX opt: "; #endif u32 tstamp_echo; - const u32 elapsed_time = jiffies_to_usecs(jiffies - - dp->dccps_timestamp_time) / 10; + const u32 elapsed_time = now_delta(dp->dccps_timestamp_time) / 10; const int elapsed_time_len = dccp_elapsed_time_len(elapsed_time); const int len = 6 + elapsed_time_len; unsigned char *to; @@ -448,7 +470,14 @@ static void dccp_insert_option_timestamp_echo(struct sock *sk, tstamp_echo = htonl(dp->dccps_timestamp_echo); memcpy(to, &tstamp_echo, 4); to += 4; - dccp_encode_value_var(elapsed_time, to, elapsed_time_len); + + if (elapsed_time_len == 2) { + const u16 var16 = htons((u16)elapsed_time); + memcpy(to, &var16, 2); + } else if (elapsed_time_len == 4) { + const u32 var32 = htonl(elapsed_time); + memcpy(to, &var32, 4); + } dccp_pr_debug("%sTIMESTAMP_ECHO=%u, len=%d, seqno=%llu\n", debug_prefix, dp->dccps_timestamp_echo, @@ -456,7 +485,8 @@ static void dccp_insert_option_timestamp_echo(struct sock *sk, (unsigned long long) DCCP_SKB_CB(skb)->dccpd_seq); dp->dccps_timestamp_echo = 0; - dp->dccps_timestamp_time = 0; + dp->dccps_timestamp_time.tv_sec = 0; + dp->dccps_timestamp_time.tv_usec = 0; } void dccp_insert_options(struct sock *sk, struct sk_buff *skb) @@ -514,7 +544,8 @@ struct dccp_ackpkts *dccp_ackpkts_alloc(const unsigned int len, ap->dccpap_ack_seqno = DCCP_MAX_SEQNO + 1; ap->dccpap_buf_nonce = ap->dccpap_buf_nonce = 0; ap->dccpap_ack_ptr = 0; - ap->dccpap_time = 0; + ap->dccpap_time.tv_sec = 0; + ap->dccpap_time.tv_usec = 0; ap->dccpap_buf_vector_len = ap->dccpap_ack_vector_len = 0; } @@ -665,7 +696,7 @@ int dccp_ackpkts_add(struct dccp_ackpkts *ap, u64 ackno, u8 state) } ap->dccpap_buf_ackno = ackno; - ap->dccpap_time = jiffies; + do_gettimeofday(&ap->dccpap_time); out: dccp_pr_debug(""); dccp_ackpkts_print(ap); -- cgit v1.2.3 From c68e64cfb5ac675b002215b5659146b73d2e9d5d Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Sun, 21 Aug 2005 05:07:37 -0300 Subject: [CCID3]: Reintroduce ccid3hctx_t_rto CCID3 keeps this variable in usecs, inet_connection_socks in jiffies, so to avoid Mars orbiter losses lets reintroduce ccid3hctx_t_rto 8) Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- net/dccp/ccids/ccid3.c | 10 +++++----- net/dccp/ccids/ccid3.h | 1 + 2 files changed, 6 insertions(+), 5 deletions(-) (limited to 'net/dccp') diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c index 694149061b8..ffd5b449f5f 100644 --- a/net/dccp/ccids/ccid3.c +++ b/net/dccp/ccids/ccid3.c @@ -905,7 +905,7 @@ static void ccid3_hc_tx_no_feedback_timer(unsigned long data) hctx->ccid3hctx_x = 10; } /* Schedule no feedback timer to expire in max(4 * R, 2 * s / X) */ - next_tmout = max_t(u32, inet_csk(sk)->icsk_rto, + next_tmout = max_t(u32, hctx->ccid3hctx_t_rto, 2 * (hctx->ccid3hctx_s * 100000) / (hctx->ccid3hctx_x / 10)); break; default: @@ -1180,8 +1180,8 @@ static void ccid3_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) r_sample); /* Update timeout interval */ - inet_csk(sk)->icsk_rto = max_t(u32, 4 * hctx->ccid3hctx_rtt, - USEC_PER_SEC); + hctx->ccid3hctx_t_rto = max_t(u32, 4 * hctx->ccid3hctx_rtt, + USEC_PER_SEC); /* Update receive rate */ hctx->ccid3hctx_x_recv = x_recv; /* x_recv in bytes per second */ @@ -1227,7 +1227,7 @@ static void ccid3_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) /* to prevent divide by zero below */ /* Schedule no feedback timer to expire in max(4 * R, 2 * s / X) */ - next_tmout = max(inet_csk(sk)->icsk_rto, + next_tmout = max(hctx->ccid3hctx_t_rto, (2 * (hctx->ccid3hctx_s * 100000) / (hctx->ccid3hctx_x / 10))); /* maths with 100000 and 10 is to prevent overflow with 32 bit */ @@ -1340,7 +1340,7 @@ static int ccid3_hc_tx_init(struct sock *sk) hctx->ccid3hctx_x = hctx->ccid3hctx_s; /* set transmission rate to 1 packet per second */ hctx->ccid3hctx_rtt = 4; /* See ccid3_hc_tx_packet_sent win_count calculatation */ - inet_csk(sk)->icsk_rto = USEC_PER_SEC; + hctx->ccid3hctx_t_rto = USEC_PER_SEC; hctx->ccid3hctx_state = TFRC_SSTATE_NO_SENT; INIT_LIST_HEAD(&hctx->ccid3hctx_hist); init_timer(&hctx->ccid3hctx_no_feedback_timer); diff --git a/net/dccp/ccids/ccid3.h b/net/dccp/ccids/ccid3.h index d2705fb7419..5ef72cda7cd 100644 --- a/net/dccp/ccids/ccid3.h +++ b/net/dccp/ccids/ccid3.h @@ -80,6 +80,7 @@ struct ccid3_hc_tx_sock { struct timer_list ccid3hctx_no_feedback_timer; struct timeval ccid3hctx_t_ld; struct timeval ccid3hctx_t_nom; + u32 ccid3hctx_t_rto; u32 ccid3hctx_t_ipi; u32 ccid3hctx_delta; struct list_head ccid3hctx_hist; -- cgit v1.2.3 From 2807d4ffb0dccb8f932c3e1701b6b6163153d333 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Sun, 21 Aug 2005 05:33:48 -0300 Subject: [DCCP]: Fix seqno setting in dccp_v4_ctl_send_reset Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- net/dccp/ipv4.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'net/dccp') diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index cc5d60d9afa..02ebf1f39f3 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -887,6 +887,7 @@ static void dccp_v4_ctl_send_reset(struct sk_buff *rxskb) sizeof(struct dccp_hdr_reset); struct sk_buff *skb; struct dst_entry *dst; + u64 seqno; /* Never send a reset in response to a reset. */ if (rxdh->dccph_type == DCCP_PKT_RESET) @@ -920,7 +921,12 @@ static void dccp_v4_ctl_send_reset(struct sk_buff *rxskb) dccp_hdr_reset(skb)->dccph_reset_code = DCCP_SKB_CB(rxskb)->dccpd_reset_code; - dccp_hdr_set_seq(dh, DCCP_SKB_CB(rxskb)->dccpd_ack_seq); + /* See "8.3.1. Abnormal Termination" in draft-ietf-dccp-spec-11 */ + seqno = 0; + if (DCCP_SKB_CB(rxskb)->dccpd_ack_seq != DCCP_PKT_WITHOUT_ACK_SEQ) + dccp_set_seqno(&seqno, DCCP_SKB_CB(rxskb)->dccpd_ack_seq + 1); + + dccp_hdr_set_seq(dh, seqno); dccp_hdr_set_ack(dccp_hdr_ack_bits(skb), DCCP_SKB_CB(rxskb)->dccpd_seq); -- cgit v1.2.3 From a3054d48b9b9d6290eccc9fc09c286ef450d9b1d Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Sun, 21 Aug 2005 05:35:18 -0300 Subject: [DCCP]: Give more info on Step 6 failure debug printk Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- net/dccp/input.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) (limited to 'net/dccp') diff --git a/net/dccp/input.c b/net/dccp/input.c index 3c4cbff82e9..ce8396b126d 100644 --- a/net/dccp/input.c +++ b/net/dccp/input.c @@ -136,8 +136,15 @@ static int dccp_check_seqno(struct sock *sk, struct sk_buff *skb) DCCP_PKT_WITHOUT_ACK_SEQ)) dp->dccps_gar = DCCP_SKB_CB(skb)->dccpd_ack_seq; } else { - LIMIT_NETDEBUG(KERN_WARNING "DCCP: Step 6 failed, " - "sending SYNC...\n"); + LIMIT_NETDEBUG(KERN_WARNING "DCCP: Step 6 failed for %s packet, " + "(LSWL(%llu) <= P.seqno(%llu) <= S.SWH(%llu)) and " + "(P.ackno %s or LAWL(%llu) <= P.ackno(%llu) <= S.AWH(%llu), " + "sending SYNC...\n", + dccp_packet_name(dh->dccph_type), + lswl, DCCP_SKB_CB(skb)->dccpd_seq, dp->dccps_swh, + (DCCP_SKB_CB(skb)->dccpd_ack_seq == + DCCP_PKT_WITHOUT_ACK_SEQ) ? "doesn't exist" : "exists", + lawl, DCCP_SKB_CB(skb)->dccpd_ack_seq, dp->dccps_awh); dccp_send_sync(sk, DCCP_SKB_CB(skb)->dccpd_seq, DCCP_PKT_SYNC); return -1; } -- cgit v1.2.3 From 03ace394ac9bcad38043a381ae5f4860b9c9fa1c Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Sun, 21 Aug 2005 05:36:45 -0300 Subject: [DCCP]: Fix the ACK and SEQ window variables settings This is from a first audit, more eyeballs are more than welcome. Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- net/dccp/dccp.h | 12 ++++-------- net/dccp/input.c | 14 +++++++++++++- net/dccp/ipv4.c | 10 ++++++++++ net/dccp/minisocks.c | 13 +++++++++++++ net/dccp/timer.c | 2 +- 5 files changed, 41 insertions(+), 10 deletions(-) (limited to 'net/dccp') diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h index aab72b8d070..33968a9422f 100644 --- a/net/dccp/dccp.h +++ b/net/dccp/dccp.h @@ -340,13 +340,11 @@ static inline void dccp_hdr_set_ack(struct dccp_hdr_ack_bits *dhack, static inline void dccp_update_gsr(struct sock *sk, u64 seq) { struct dccp_sock *dp = dccp_sk(sk); - u64 tmp_gsr; - dccp_set_seqno(&tmp_gsr, + dp->dccps_gsr = seq; + dccp_set_seqno(&dp->dccps_swl, (dp->dccps_gsr + 1 - (dp->dccps_options.dccpo_sequence_window / 4))); - dp->dccps_gsr = seq; - dccp_set_seqno(&dp->dccps_swl, max48(tmp_gsr, dp->dccps_isr)); dccp_set_seqno(&dp->dccps_swh, (dp->dccps_gsr + (3 * dp->dccps_options.dccpo_sequence_window) / 4)); @@ -355,13 +353,11 @@ static inline void dccp_update_gsr(struct sock *sk, u64 seq) static inline void dccp_update_gss(struct sock *sk, u64 seq) { struct dccp_sock *dp = dccp_sk(sk); - u64 tmp_gss; - dccp_set_seqno(&tmp_gss, + dp->dccps_awh = dp->dccps_gss = seq; + dccp_set_seqno(&dp->dccps_awl, (dp->dccps_gss - dp->dccps_options.dccpo_sequence_window + 1)); - dp->dccps_awl = max48(tmp_gss, dp->dccps_iss); - dp->dccps_awh = dp->dccps_gss = seq; } extern void dccp_insert_options(struct sock *sk, struct sk_buff *skb); diff --git a/net/dccp/input.c b/net/dccp/input.c index ce8396b126d..5847cf454e2 100644 --- a/net/dccp/input.c +++ b/net/dccp/input.c @@ -314,7 +314,19 @@ static int dccp_rcv_request_sent_state_process(struct sock *sk, } dp->dccps_isr = DCCP_SKB_CB(skb)->dccpd_seq; - dccp_update_gsr(sk, DCCP_SKB_CB(skb)->dccpd_seq); + dccp_update_gsr(sk, dp->dccps_isr); + /* + * SWL and AWL are initially adjusted so that they are not less than + * the initial Sequence Numbers received and sent, respectively: + * SWL := max(GSR + 1 - floor(W/4), ISR), + * AWL := max(GSS - W' + 1, ISS). + * These adjustments MUST be applied only at the beginning of the + * connection. + * + * AWL was adjusted in dccp_v4_connect -acme + */ + dccp_set_seqno(&dp->dccps_swl, + max48(dp->dccps_swl, dp->dccps_isr)); if (ccid_hc_rx_init(dp->dccps_hc_rx_ccid, sk) != 0 || ccid_hc_tx_init(dp->dccps_hc_tx_ccid, sk) != 0) { diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index 02ebf1f39f3..647e669a126 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -309,6 +309,16 @@ static int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr, usin->sin_port); dccp_update_gss(sk, dp->dccps_iss); + /* + * SWL and AWL are initially adjusted so that they are not less than + * the initial Sequence Numbers received and sent, respectively: + * SWL := max(GSR + 1 - floor(W/4), ISR), + * AWL := max(GSS - W' + 1, ISS). + * These adjustments MUST be applied only at the beginning of the + * connection. + */ + dccp_set_seqno(&dp->dccps_awl, max48(dp->dccps_awl, dp->dccps_iss)); + inet->id = dp->dccps_iss ^ jiffies; err = dccp_connect(sk); diff --git a/net/dccp/minisocks.c b/net/dccp/minisocks.c index b8e67207e97..ce5dff4ac22 100644 --- a/net/dccp/minisocks.c +++ b/net/dccp/minisocks.c @@ -146,6 +146,19 @@ out_free: newdp->dccps_iss = dreq->dreq_iss; dccp_update_gss(newsk, dreq->dreq_iss); + /* + * SWL and AWL are initially adjusted so that they are not less than + * the initial Sequence Numbers received and sent, respectively: + * SWL := max(GSR + 1 - floor(W/4), ISR), + * AWL := max(GSS - W' + 1, ISS). + * These adjustments MUST be applied only at the beginning of the + * connection. + */ + dccp_set_seqno(&newdp->dccps_swl, + max48(newdp->dccps_swl, newdp->dccps_isr)); + dccp_set_seqno(&newdp->dccps_awl, + max48(newdp->dccps_awl, newdp->dccps_iss)); + dccp_init_xmit_timers(newsk); DCCP_INC_STATS_BH(DCCP_MIB_PASSIVEOPENS); diff --git a/net/dccp/timer.c b/net/dccp/timer.c index 47b1616e618..aa34b576e22 100644 --- a/net/dccp/timer.c +++ b/net/dccp/timer.c @@ -144,7 +144,7 @@ static void dccp_retransmit_timer(struct sock *sk) /* * sk->sk_send_head has to have one skb with * DCCP_SKB_CB(skb)->dccpd_type set to one of the retransmittable DCCP - * packet types (REQUEST, RESPONSE, the ACK in the 3way hanshake + * packet types (REQUEST, RESPONSE, the ACK in the 3way handshake * (PARTOPEN timer), etc). */ BUG_TRAP(sk->sk_send_head != NULL); -- cgit v1.2.3 From 24117727b753426d85ba09671c24854834f81b2c Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Sun, 21 Aug 2005 05:40:16 -0300 Subject: [DCCP]: Fix ackno setting in SYNC/SYNCACK packets Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- net/dccp/output.c | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) (limited to 'net/dccp') diff --git a/net/dccp/output.c b/net/dccp/output.c index 384fd092098..708fc3c0a96 100644 --- a/net/dccp/output.c +++ b/net/dccp/output.c @@ -44,15 +44,8 @@ int dccp_transmit_skb(struct sock *sk, struct sk_buff *skb) int err, set_ack = 1; u64 ackno = dp->dccps_gsr; - /* - * FIXME: study DCCP_PKT_SYNC[ACK] to see what is the right - * thing to do here... - */ dccp_inc_seqno(&dp->dccps_gss); - dcb->dccpd_seq = dp->dccps_gss; - dccp_insert_options(sk, skb); - switch (dcb->dccpd_type) { case DCCP_PKT_DATA: set_ack = 0; @@ -62,6 +55,9 @@ int dccp_transmit_skb(struct sock *sk, struct sk_buff *skb) ackno = dcb->dccpd_seq; break; } + + dcb->dccpd_seq = dp->dccps_gss; + dccp_insert_options(sk, skb); skb->h.raw = skb_push(skb, dccp_header_size); dh = dccp_hdr(skb); -- cgit v1.2.3 From 58e45131dc269eff0983c6d44494f9e687686900 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Sun, 21 Aug 2005 23:46:01 -0700 Subject: [DCCP]: Fix printf format warnings on 64-bit. Signed-off-by: David S. Miller --- net/dccp/input.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'net/dccp') diff --git a/net/dccp/input.c b/net/dccp/input.c index 5847cf454e2..85402532e4e 100644 --- a/net/dccp/input.c +++ b/net/dccp/input.c @@ -141,10 +141,16 @@ static int dccp_check_seqno(struct sock *sk, struct sk_buff *skb) "(P.ackno %s or LAWL(%llu) <= P.ackno(%llu) <= S.AWH(%llu), " "sending SYNC...\n", dccp_packet_name(dh->dccph_type), - lswl, DCCP_SKB_CB(skb)->dccpd_seq, dp->dccps_swh, + (unsigned long long) lswl, + (unsigned long long) + DCCP_SKB_CB(skb)->dccpd_seq, + (unsigned long long) dp->dccps_swh, (DCCP_SKB_CB(skb)->dccpd_ack_seq == DCCP_PKT_WITHOUT_ACK_SEQ) ? "doesn't exist" : "exists", - lawl, DCCP_SKB_CB(skb)->dccpd_ack_seq, dp->dccps_awh); + (unsigned long long) lawl, + (unsigned long long) + DCCP_SKB_CB(skb)->dccpd_ack_seq, + (unsigned long long) dp->dccps_awh); dccp_send_sync(sk, DCCP_SKB_CB(skb)->dccpd_seq, DCCP_PKT_SYNC); return -1; } -- cgit v1.2.3 From 7ad07e7cf343181002c10c39d3f57a88e4903d4f Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 23 Aug 2005 21:50:06 -0700 Subject: [DCCP]: Implement the CLOSING timer So that we retransmit CLOSE/CLOSEREQ packets till they elicit an answer or we hit a timeout. Most of the machinery uses TCP approaches, this code has to be polished & audited, but this is better than we had before. Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- net/dccp/dccp.h | 2 +- net/dccp/input.c | 26 +++++++++++--------------- net/dccp/output.c | 23 ++++++++++++----------- net/dccp/proto.c | 26 ++++++++++++++++++++++---- 4 files changed, 46 insertions(+), 31 deletions(-) (limited to 'net/dccp') diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h index 33968a9422f..53994f10ced 100644 --- a/net/dccp/dccp.h +++ b/net/dccp/dccp.h @@ -255,7 +255,7 @@ extern int dccp_v4_checksum(const struct sk_buff *skb, extern int dccp_v4_send_reset(struct sock *sk, enum dccp_reset_codes code); -extern void dccp_send_close(struct sock *sk); +extern void dccp_send_close(struct sock *sk, const int active); struct dccp_skb_cb { __u8 dccpd_type; diff --git a/net/dccp/input.c b/net/dccp/input.c index 85402532e4e..02af05ec23a 100644 --- a/net/dccp/input.c +++ b/net/dccp/input.c @@ -31,14 +31,9 @@ static void dccp_fin(struct sock *sk, struct sk_buff *skb) static void dccp_rcv_close(struct sock *sk, struct sk_buff *skb) { - switch (sk->sk_state) { - case DCCP_PARTOPEN: - case DCCP_OPEN: - dccp_v4_send_reset(sk, DCCP_RESET_CODE_CLOSED); - dccp_fin(sk, skb); - dccp_set_state(sk, DCCP_CLOSED); - break; - } + dccp_v4_send_reset(sk, DCCP_RESET_CODE_CLOSED); + dccp_fin(sk, skb); + dccp_set_state(sk, DCCP_CLOSED); } static void dccp_rcv_closereq(struct sock *sk, struct sk_buff *skb) @@ -54,13 +49,8 @@ static void dccp_rcv_closereq(struct sock *sk, struct sk_buff *skb) return; } - switch (sk->sk_state) { - case DCCP_PARTOPEN: - case DCCP_OPEN: - dccp_set_state(sk, DCCP_CLOSING); - dccp_send_close(sk); - break; - } + dccp_set_state(sk, DCCP_CLOSING); + dccp_send_close(sk, 0); } static inline void dccp_event_ack_recv(struct sock *sk, struct sk_buff *skb) @@ -562,6 +552,12 @@ int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb, dccp_send_sync(sk, DCCP_SKB_CB(skb)->dccpd_seq, DCCP_PKT_SYNC); goto discard; + } else if (dh->dccph_type == DCCP_PKT_CLOSEREQ) { + dccp_rcv_closereq(sk, skb); + goto discard; + } else if (dh->dccph_type == DCCP_PKT_CLOSE) { + dccp_rcv_close(sk, skb); + return 0; } switch (sk->sk_state) { diff --git a/net/dccp/output.c b/net/dccp/output.c index 708fc3c0a96..630ca774102 100644 --- a/net/dccp/output.c +++ b/net/dccp/output.c @@ -96,8 +96,7 @@ int dccp_transmit_skb(struct sock *sk, struct sk_buff *skb) dh->dccph_checksum = dccp_v4_checksum(skb, inet->saddr, inet->daddr); - if (dcb->dccpd_type == DCCP_PKT_ACK || - dcb->dccpd_type == DCCP_PKT_DATAACK) + if (set_ack) dccp_event_ack_sent(sk); DCCP_INC_STATS(DCCP_MIB_OUTSEGS); @@ -429,18 +428,15 @@ void dccp_send_sync(struct sock *sk, const u64 seq, * cannot be allowed to fail queueing a DCCP_PKT_CLOSE/CLOSEREQ frame under * any circumstances. */ -void dccp_send_close(struct sock *sk) +void dccp_send_close(struct sock *sk, const int active) { struct dccp_sock *dp = dccp_sk(sk); struct sk_buff *skb; + const unsigned int prio = active ? GFP_KERNEL : GFP_ATOMIC; - /* Socket is locked, keep trying until memory is available. */ - for (;;) { - skb = alloc_skb(sk->sk_prot->max_header, GFP_KERNEL); - if (skb != NULL) - break; - yield(); - } + skb = alloc_skb(sk->sk_prot->max_header, prio); + if (skb == NULL) + return; /* Reserve space for headers and prepare control bits. */ skb_reserve(skb, sk->sk_prot->max_header); @@ -449,7 +445,12 @@ void dccp_send_close(struct sock *sk) DCCP_PKT_CLOSE : DCCP_PKT_CLOSEREQ; skb_set_owner_w(skb, sk); - dccp_transmit_skb(sk, skb); + if (active) { + BUG_TRAP(sk->sk_send_head == NULL); + sk->sk_send_head = skb; + dccp_transmit_skb(sk, skb_clone(skb, prio)); + } else + dccp_transmit_skb(sk, skb); ccid_hc_rx_exit(dp->dccps_hc_rx_ccid, sk); ccid_hc_tx_exit(dp->dccps_hc_tx_ccid, sk); diff --git a/net/dccp/proto.c b/net/dccp/proto.c index 8b613c3017c..a3f8a8095f8 100644 --- a/net/dccp/proto.c +++ b/net/dccp/proto.c @@ -402,12 +402,15 @@ void dccp_close(struct sock *sk, long timeout) /* Check zero linger _after_ checking for unread data. */ sk->sk_prot->disconnect(sk, 0); } else if (dccp_close_state(sk)) { - dccp_send_close(sk); + dccp_send_close(sk, 1); } sk_stream_wait_close(sk, timeout); adjudge_to_death: + /* + * It is the last release_sock in its life. It will remove backlog. + */ release_sock(sk); /* * Now socket is owned by kernel and we acquire BH lock @@ -419,11 +422,26 @@ adjudge_to_death: sock_hold(sk); sock_orphan(sk); - - if (sk->sk_state != DCCP_CLOSED) + + /* + * The last release_sock may have processed the CLOSE or RESET + * packet moving sock to CLOSED state, if not we have to fire + * the CLOSE/CLOSEREQ retransmission timer, see "8.3. Termination" + * in draft-ietf-dccp-spec-11. -acme + */ + if (sk->sk_state == DCCP_CLOSING) { + /* FIXME: should start at 2 * RTT */ + /* Timer for repeating the CLOSE/CLOSEREQ until an answer. */ + inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, + inet_csk(sk)->icsk_rto, + DCCP_RTO_MAX); +#if 0 + /* Yeah, we should use sk->sk_prot->orphan_count, etc */ dccp_set_state(sk, DCCP_CLOSED); +#endif + } - atomic_inc(&dccp_orphan_count); + atomic_inc(sk->sk_prot->orphan_count); if (sk->sk_state == DCCP_CLOSED) inet_csk_destroy_sock(sk); -- cgit v1.2.3 From 20472af986569b0615bd77f0fd7ca9e3d33e9895 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 23 Aug 2005 21:50:21 -0700 Subject: [DCCP]: Fix skb leak in dccp_sendmsg Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- net/dccp/proto.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'net/dccp') diff --git a/net/dccp/proto.c b/net/dccp/proto.c index a3f8a8095f8..2b6db18e607 100644 --- a/net/dccp/proto.c +++ b/net/dccp/proto.c @@ -206,6 +206,18 @@ int dccp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, goto out_discard; rc = dccp_write_xmit(sk, skb, len); + /* + * XXX we don't use sk_write_queue, so just discard the packet. + * Current plan however is to _use_ sk_write_queue with + * an algorith similar to tcp_sendmsg, where the main difference + * is that in DCCP we have to respect packet boundaries, so + * no coalescing of skbs. + * + * This bug was _quickly_ found & fixed by just looking at an OSTRA + * generated callgraph 8) -acme + */ + if (rc != 0) + goto out_discard; out_release: release_sock(sk); return rc ? : len; -- cgit v1.2.3 From a4beb1b64f5846e216bf2c439022df480151902a Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 23 Aug 2005 21:50:45 -0700 Subject: [DCCP]: Send a DATAACK packet when we have a TIMESTAMP_ECHO pending Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- net/dccp/output.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net/dccp') diff --git a/net/dccp/output.c b/net/dccp/output.c index 630ca774102..f96dedd3ad5 100644 --- a/net/dccp/output.c +++ b/net/dccp/output.c @@ -171,6 +171,7 @@ int dccp_write_xmit(struct sock *sk, struct sk_buff *skb, const int len) * dccps_ack_pending or use icsk. */ } else if (inet_csk_ack_scheduled(sk) || + dp->dccps_timestamp_echo != 0 || (dp->dccps_options.dccpo_send_ack_vector && ap->dccpap_buf_ackno != DCCP_MAX_SEQNO + 1 && ap->dccpap_ack_seqno == DCCP_MAX_SEQNO + 1)) -- cgit v1.2.3 From 012e13eac7579fcc7618df4ca1d5af3cdc03748c Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 23 Aug 2005 21:51:13 -0700 Subject: [CCID]: Make ccid_hc_[rt]x_exit accept NULL arguments Just like kfree, etc it will just not call the CCID exit routines when the private data area is set to NULL. Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- net/dccp/ccid.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'net/dccp') diff --git a/net/dccp/ccid.h b/net/dccp/ccid.h index 95eb47d8551..c6767b28224 100644 --- a/net/dccp/ccid.h +++ b/net/dccp/ccid.h @@ -97,13 +97,15 @@ static inline int ccid_hc_tx_init(struct ccid *ccid, struct sock *sk) static inline void ccid_hc_rx_exit(struct ccid *ccid, struct sock *sk) { - if (ccid->ccid_hc_rx_exit != NULL) + if (ccid->ccid_hc_rx_exit != NULL && + dccp_sk(sk)->dccps_hc_rx_ccid_private != NULL) ccid->ccid_hc_rx_exit(sk); } static inline void ccid_hc_tx_exit(struct ccid *ccid, struct sock *sk) { - if (ccid->ccid_hc_tx_exit != NULL) + if (ccid->ccid_hc_tx_exit != NULL && + dccp_sk(sk)->dccps_hc_tx_ccid_private != NULL) ccid->ccid_hc_tx_exit(sk); } -- cgit v1.2.3 From d4b81ff70547b40c9b0742b163e8354560003cc0 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 23 Aug 2005 21:51:36 -0700 Subject: [DCCP]: Export dccp_insert_option_timestamp to CCIDs And don't insert a TIMESTAMP option in all packets, leave the decision to the CCIDs. Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- net/dccp/dccp.h | 2 ++ net/dccp/options.c | 8 ++++---- 2 files changed, 6 insertions(+), 4 deletions(-) (limited to 'net/dccp') diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h index 53994f10ced..c6ba07ea1a9 100644 --- a/net/dccp/dccp.h +++ b/net/dccp/dccp.h @@ -364,6 +364,8 @@ extern void dccp_insert_options(struct sock *sk, struct sk_buff *skb); extern void dccp_insert_option_elapsed_time(struct sock *sk, struct sk_buff *skb, u32 elapsed_time); +extern void dccp_insert_option_timestamp(struct sock *sk, + struct sk_buff *skb); extern void dccp_insert_option(struct sock *sk, struct sk_buff *skb, unsigned char option, const void *value, unsigned char len); diff --git a/net/dccp/options.c b/net/dccp/options.c index 7ecffdf8575..eabcc8f1c62 100644 --- a/net/dccp/options.c +++ b/net/dccp/options.c @@ -348,7 +348,7 @@ void dccp_insert_option_elapsed_time(struct sock *sk, (unsigned long long) DCCP_SKB_CB(skb)->dccpd_seq); } -EXPORT_SYMBOL(dccp_insert_option_elapsed_time); +EXPORT_SYMBOL_GPL(dccp_insert_option_elapsed_time); static void dccp_insert_option_ack_vector(struct sock *sk, struct sk_buff *skb) { @@ -426,8 +426,7 @@ static void dccp_insert_option_ack_vector(struct sock *sk, struct sk_buff *skb) (unsigned long long) ap->dccpap_ack_ackno); } -static inline void dccp_insert_option_timestamp(struct sock *sk, - struct sk_buff *skb) +void dccp_insert_option_timestamp(struct sock *sk, struct sk_buff *skb) { struct timeval tv; u32 now; @@ -441,6 +440,8 @@ static inline void dccp_insert_option_timestamp(struct sock *sk, dccp_insert_option(sk, skb, DCCPO_TIMESTAMP, &now, sizeof(now)); } +EXPORT_SYMBOL_GPL(dccp_insert_option_timestamp); + static void dccp_insert_option_timestamp_echo(struct sock *sk, struct sk_buff *skb) { @@ -504,7 +505,6 @@ void dccp_insert_options(struct sock *sk, struct sk_buff *skb) DCCP_MAX_SEQNO + 1)) dccp_insert_option_ack_vector(sk, skb); - dccp_insert_option_timestamp(sk, skb); if (dp->dccps_timestamp_echo != 0) dccp_insert_option_timestamp_echo(sk, skb); } -- cgit v1.2.3 From 4fded33b3e8177d1d2eec0ccc69af8dfe8b4c3c3 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 23 Aug 2005 21:51:59 -0700 Subject: [CCID3]: Calculate the RTT in the RX half connection Using TIMESTAMP_ECHO and ELAPSED_TIME options received. Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- net/dccp/ccids/ccid3.c | 67 ++++++++++++++++++++++++++++---------------------- 1 file changed, 38 insertions(+), 29 deletions(-) (limited to 'net/dccp') diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c index ffd5b449f5f..48c36afa493 100644 --- a/net/dccp/ccids/ccid3.c +++ b/net/dccp/ccids/ccid3.c @@ -1010,7 +1010,6 @@ static void ccid3_hc_tx_packet_sent(struct sock *sk, int more, int len) struct ccid3_hc_tx_sock *hctx = dp->dccps_hc_tx_ccid_private; struct timeval now; -// ccid3_pr_debug("%s, sk=%p, more=%d, len=%d\n", dccp_role(sk), sk, more, len); BUG_ON(hctx == NULL); if (hctx->ccid3hctx_state == TFRC_SSTATE_TERM) { @@ -1562,23 +1561,27 @@ static void ccid3_hc_rx_send_feedback(struct sock *sk) static void ccid3_hc_rx_insert_options(struct sock *sk, struct sk_buff *skb) { const struct dccp_sock *dp = dccp_sk(sk); + u32 x_recv, pinv; struct ccid3_hc_rx_sock *hcrx = dp->dccps_hc_rx_ccid_private; if (hcrx == NULL || !(sk->sk_state == DCCP_OPEN || sk->sk_state == DCCP_PARTOPEN)) return; - if (hcrx->ccid3hcrx_elapsed_time != 0 && !dccp_packet_without_ack(skb)) - dccp_insert_option_elapsed_time(sk, skb, hcrx->ccid3hcrx_elapsed_time); - - if (DCCP_SKB_CB(skb)->dccpd_type != DCCP_PKT_DATA) { - const u32 x_recv = htonl(hcrx->ccid3hcrx_x_recv); - const u32 pinv = htonl(hcrx->ccid3hcrx_pinv); - - dccp_insert_option(sk, skb, TFRC_OPT_LOSS_EVENT_RATE, &pinv, sizeof(pinv)); - dccp_insert_option(sk, skb, TFRC_OPT_RECEIVE_RATE, &x_recv, sizeof(x_recv)); - } - DCCP_SKB_CB(skb)->dccpd_ccval = hcrx->ccid3hcrx_last_counter; + + if (dccp_packet_without_ack(skb)) + return; + + if (hcrx->ccid3hcrx_elapsed_time != 0) + dccp_insert_option_elapsed_time(sk, skb, + hcrx->ccid3hcrx_elapsed_time); + dccp_insert_option_timestamp(sk, skb); + x_recv = htonl(hcrx->ccid3hcrx_x_recv); + pinv = htonl(hcrx->ccid3hcrx_pinv); + dccp_insert_option(sk, skb, TFRC_OPT_LOSS_EVENT_RATE, + &pinv, sizeof(pinv)); + dccp_insert_option(sk, skb, TFRC_OPT_RECEIVE_RATE, + &x_recv, sizeof(x_recv)); } /* Weights used to calculate loss event rate */ @@ -1860,8 +1863,10 @@ static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb) { struct dccp_sock *dp = dccp_sk(sk); struct ccid3_hc_rx_sock *hcrx = dp->dccps_hc_rx_ccid_private; + const struct dccp_options_received *opt_recv; struct dccp_rx_hist_entry *packet; struct timeval now; + u32 now_usecs; u8 win_count; u32 p_prev; int ins; @@ -1876,24 +1881,25 @@ static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb) BUG_ON(!(hcrx->ccid3hcrx_state == TFRC_RSTATE_NO_DATA || hcrx->ccid3hcrx_state == TFRC_RSTATE_DATA)); + opt_recv = &dp->dccps_options_received; + switch (DCCP_SKB_CB(skb)->dccpd_type) { case DCCP_PKT_ACK: if (hcrx->ccid3hcrx_state == TFRC_RSTATE_NO_DATA) return; case DCCP_PKT_DATAACK: - if (dp->dccps_options_received.dccpor_timestamp_echo == 0) + if (opt_recv->dccpor_timestamp_echo == 0) break; p_prev = hcrx->ccid3hcrx_rtt; do_gettimeofday(&now); - /* hcrx->ccid3hcrx_rtt = now - dp->dccps_options_received.dccpor_timestamp_echo - - usecs_to_jiffies(dp->dccps_options_received.dccpor_elapsed_time * 10); - FIXME - I think above code is broken - have to look at options more, will also need - to fix pr_debug below */ + now_usecs = now.tv_sec * USEC_PER_SEC + now.tv_usec; + hcrx->ccid3hcrx_rtt = now_usecs - + (opt_recv->dccpor_timestamp_echo - + opt_recv->dccpor_elapsed_time) * 10; if (p_prev != hcrx->ccid3hcrx_rtt) - ccid3_pr_debug("%s, sk=%p, New RTT estimate=%lu jiffies, tstamp_echo=%u, elapsed time=%u\n", - dccp_role(sk), sk, hcrx->ccid3hcrx_rtt, - dp->dccps_options_received.dccpor_timestamp_echo, - dp->dccps_options_received.dccpor_elapsed_time); + ccid3_pr_debug("%s, New RTT=%luus, elapsed time=%u\n", + dccp_role(sk), hcrx->ccid3hcrx_rtt, + opt_recv->dccpor_elapsed_time); break; case DCCP_PKT_DATA: break; @@ -1904,8 +1910,7 @@ static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb) return; } - packet = dccp_rx_hist_entry_new(ccid3_rx_hist, - dp->dccps_options_received.dccpor_ndp, + packet = dccp_rx_hist_entry_new(ccid3_rx_hist, opt_recv->dccpor_ndp, skb, SLAB_ATOMIC); if (packet == NULL) { ccid3_pr_debug("%s, sk=%p, Not enough mem to add rx packet to history (consider it lost)!", @@ -1930,9 +1935,9 @@ static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb) case TFRC_RSTATE_DATA: hcrx->ccid3hcrx_bytes_recv += skb->len - dccp_hdr(skb)->dccph_doff * 4; if (ins == 0) { - do_gettimeofday(&now); - if ((now_delta(hcrx->ccid3hcrx_tstamp_last_ack)) >= hcrx->ccid3hcrx_rtt) { - hcrx->ccid3hcrx_tstamp_last_ack = now; + if (now_delta(hcrx->ccid3hcrx_tstamp_last_ack) >= + hcrx->ccid3hcrx_rtt) { + do_gettimeofday(&hcrx->ccid3hcrx_tstamp_last_ack); ccid3_hc_rx_send_feedback(sk); } return; @@ -1946,8 +1951,8 @@ static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb) } /* Dealing with packet loss */ - ccid3_pr_debug("%s, sk=%p(%s), skb=%p, data loss! Reacting...\n", - dccp_role(sk), sk, dccp_state_name(sk->sk_state), skb); + ccid3_pr_debug("%s, sk=%p(%s), data loss! Reacting...\n", + dccp_role(sk), sk, dccp_state_name(sk->sk_state)); ccid3_hc_rx_detect_loss(sk); p_prev = hcrx->ccid3hcrx_p; @@ -1985,7 +1990,11 @@ static int ccid3_hc_rx_init(struct sock *sk) hcrx->ccid3hcrx_state = TFRC_RSTATE_NO_DATA; INIT_LIST_HEAD(&hcrx->ccid3hcrx_hist); INIT_LIST_HEAD(&hcrx->ccid3hcrx_loss_interval_hist); - + /* + * XXX this seems to be paranoid, need to think more about this, for + * now start with something different than zero. -acme + */ + hcrx->ccid3hcrx_rtt = USEC_PER_SEC / 5; return 0; } -- cgit v1.2.3 From 2babe1f6fea717c36c008c878fe095d1ca5696c1 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 23 Aug 2005 21:52:35 -0700 Subject: [DCCP]: Introduce dccp_get_info And also hc_tx and hc_rx get_info functions for the CCIDs to fill in information that is specific to them. For now reusing struct tcp_info, later I'll try to figure out a better solution, for now its really nice to get this kind of info: [root@qemu ~]# ./ss -danemi State Recv-Q Send-Q Local Addr:Port Peer Addr:Port LISTEN 0 0 *:5001 *:* ino:628 sk:c1340040 mem:(r0,w0,f0,t0) cwnd:0 ssthresh:0 ESTAB 0 0 172.20.0.2:5001 172.20.0.1:32785 ino:629 sk:c13409a0 mem:(r0,w0,f0,t0) ts rto:1000 rtt:0.004/0 cwnd:0 ssthresh:0 rcv_rtt:61.377 This, for instance, shows that we're not congestion controlling ACKs, as the above output is in the ttcp receiving host, and ttcp is a one way app, i.e. the received never calls sendmsg, so ccid_hc_tx_send_packet is never called, so the TX half connection stays in TFRC_SSTATE_NO_SENT state and hctx_rtt is never calculated, stays with the value set in ccid3_hc_tx_init, 4us, as show above in milliseconds (0.004ms), upcoming patches will fix this. rcv_rtt seems sane tho, matching ping results :-) Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- net/dccp/ccid.h | 18 ++++++++++++++++++ net/dccp/ccids/ccid3.c | 27 +++++++++++++++++++++++++++ net/dccp/diag.c | 26 +++++++++++++++++++++++++- 3 files changed, 70 insertions(+), 1 deletion(-) (limited to 'net/dccp') diff --git a/net/dccp/ccid.h b/net/dccp/ccid.h index c6767b28224..962f1e9e2f7 100644 --- a/net/dccp/ccid.h +++ b/net/dccp/ccid.h @@ -50,6 +50,10 @@ struct ccid { struct sk_buff *skb, int len); void (*ccid_hc_tx_packet_sent)(struct sock *sk, int more, int len); + void (*ccid_hc_rx_get_info)(struct sock *sk, + struct tcp_info *info); + void (*ccid_hc_tx_get_info)(struct sock *sk, + struct tcp_info *info); }; extern int ccid_register(struct ccid *ccid); @@ -159,4 +163,18 @@ static inline void ccid_hc_rx_insert_options(struct ccid *ccid, struct sock *sk, if (ccid->ccid_hc_rx_insert_options != NULL) ccid->ccid_hc_rx_insert_options(sk, skb); } + +static inline void ccid_hc_rx_get_info(struct ccid *ccid, struct sock *sk, + struct tcp_info *info) +{ + if (ccid->ccid_hc_rx_get_info != NULL) + ccid->ccid_hc_rx_get_info(sk, info); +} + +static inline void ccid_hc_tx_get_info(struct ccid *ccid, struct sock *sk, + struct tcp_info *info) +{ + if (ccid->ccid_hc_tx_get_info != NULL) + ccid->ccid_hc_tx_get_info(sk, info); +} #endif /* _CCID_H */ diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c index 48c36afa493..fe4cc85f5bc 100644 --- a/net/dccp/ccids/ccid3.c +++ b/net/dccp/ccids/ccid3.c @@ -2020,6 +2020,31 @@ static void ccid3_hc_rx_exit(struct sock *sk) dp->dccps_hc_rx_ccid_private = NULL; } +static void ccid3_hc_rx_get_info(struct sock *sk, struct tcp_info *info) +{ + const struct dccp_sock *dp = dccp_sk(sk); + const struct ccid3_hc_rx_sock *hcrx = dp->dccps_hc_rx_ccid_private; + + if (hcrx == NULL) + return; + + info->tcpi_ca_state = hcrx->ccid3hcrx_state; + info->tcpi_options |= TCPI_OPT_TIMESTAMPS; + info->tcpi_rcv_rtt = hcrx->ccid3hcrx_rtt; +} + +static void ccid3_hc_tx_get_info(struct sock *sk, struct tcp_info *info) +{ + const struct dccp_sock *dp = dccp_sk(sk); + const struct ccid3_hc_tx_sock *hctx = dp->dccps_hc_tx_ccid_private; + + if (hctx == NULL) + return; + + info->tcpi_rto = hctx->ccid3hctx_t_rto; + info->tcpi_rtt = hctx->ccid3hctx_rtt; +} + static struct ccid ccid3 = { .ccid_id = 3, .ccid_name = "ccid3", @@ -2037,6 +2062,8 @@ static struct ccid ccid3 = { .ccid_hc_rx_exit = ccid3_hc_rx_exit, .ccid_hc_rx_insert_options = ccid3_hc_rx_insert_options, .ccid_hc_rx_packet_recv = ccid3_hc_rx_packet_recv, + .ccid_hc_rx_get_info = ccid3_hc_rx_get_info, + .ccid_hc_tx_get_info = ccid3_hc_tx_get_info, }; module_param(ccid3_debug, int, 0444); diff --git a/net/dccp/diag.c b/net/dccp/diag.c index 0b10c176c35..f675d8e642d 100644 --- a/net/dccp/diag.c +++ b/net/dccp/diag.c @@ -14,19 +14,43 @@ #include #include +#include "ccid.h" #include "dccp.h" +static void dccp_get_info(struct sock *sk, struct tcp_info *info) +{ + struct dccp_sock *dp = dccp_sk(sk); + const struct inet_connection_sock *icsk = inet_csk(sk); + + memset(info, 0, sizeof(*info)); + + info->tcpi_state = sk->sk_state; + info->tcpi_retransmits = icsk->icsk_retransmits; + info->tcpi_probes = icsk->icsk_probes_out; + info->tcpi_backoff = icsk->icsk_backoff; + info->tcpi_pmtu = dp->dccps_pmtu_cookie; + + if (dp->dccps_options.dccpo_send_ack_vector) + info->tcpi_options |= TCPI_OPT_SACK; + + ccid_hc_rx_get_info(dp->dccps_hc_rx_ccid, sk, info); + ccid_hc_tx_get_info(dp->dccps_hc_tx_ccid, sk, info); +} + static void dccp_diag_get_info(struct sock *sk, struct inet_diag_msg *r, void *_info) { r->idiag_rqueue = r->idiag_wqueue = 0; + + if (_info != NULL) + dccp_get_info(sk, _info); } static struct inet_diag_handler dccp_diag_handler = { .idiag_hashinfo = &dccp_hashinfo, .idiag_get_info = dccp_diag_get_info, .idiag_type = DCCPDIAG_GETSOCK, - .idiag_info_size = 0, + .idiag_info_size = sizeof(struct tcp_info), }; static int __init dccp_diag_init(void) -- cgit v1.2.3 From 8efa544f9c84919c047dc2f96e308c902e8fb1a4 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 23 Aug 2005 21:54:00 -0700 Subject: [DCCP]: Call the HC exit routines at dccp_v4_destroy_sock Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- net/dccp/ipv4.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'net/dccp') diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index 647e669a126..3cf2cbcdcaf 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -1303,6 +1303,8 @@ static int dccp_v4_destroy_sock(struct sock *sk) if (inet_csk(sk)->icsk_bind_hash != NULL) inet_put_port(&dccp_hashinfo, sk); + ccid_hc_rx_exit(dp->dccps_hc_rx_ccid, sk); + ccid_hc_tx_exit(dp->dccps_hc_tx_ccid, sk); dccp_ackpkts_free(dp->dccps_hc_rx_ackpkts); dp->dccps_hc_rx_ackpkts = NULL; ccid_exit(dp->dccps_hc_rx_ccid, sk); -- cgit v1.2.3 From 331968bd0c1b2437f3ad773cbf55f2e0737bafc0 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 23 Aug 2005 21:54:23 -0700 Subject: [DCCP]: Initial dccp_poll implementation Tested with a patched netcat, no horror stories so far 8) Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- net/dccp/input.c | 1 + net/dccp/proto.c | 59 +++++++++++++++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 59 insertions(+), 1 deletion(-) (limited to 'net/dccp') diff --git a/net/dccp/input.c b/net/dccp/input.c index 02af05ec23a..ef29cef1daf 100644 --- a/net/dccp/input.c +++ b/net/dccp/input.c @@ -34,6 +34,7 @@ static void dccp_rcv_close(struct sock *sk, struct sk_buff *skb) dccp_v4_send_reset(sk, DCCP_RESET_CODE_CLOSED); dccp_fin(sk, skb); dccp_set_state(sk, DCCP_CLOSED); + sk_wake_async(sk, 1, POLL_HUP); } static void dccp_rcv_closereq(struct sock *sk, struct sk_buff *skb) diff --git a/net/dccp/proto.c b/net/dccp/proto.c index 2b6db18e607..600dda51d99 100644 --- a/net/dccp/proto.c +++ b/net/dccp/proto.c @@ -140,6 +140,62 @@ int dccp_disconnect(struct sock *sk, int flags) return err; } +/* + * Wait for a DCCP event. + * + * Note that we don't need to lock the socket, as the upper poll layers + * take care of normal races (between the test and the event) and we don't + * go look at any of the socket buffers directly. + */ +static unsigned int dccp_poll(struct file *file, struct socket *sock, + poll_table *wait) +{ + unsigned int mask; + struct sock *sk = sock->sk; + + poll_wait(file, sk->sk_sleep, wait); + if (sk->sk_state == DCCP_LISTEN) + return inet_csk_listen_poll(sk); + + /* Socket is not locked. We are protected from async events + by poll logic and correct handling of state changes + made by another threads is impossible in any case. + */ + + mask = 0; + if (sk->sk_err) + mask = POLLERR; + + if (sk->sk_shutdown == SHUTDOWN_MASK || sk->sk_state == DCCP_CLOSED) + mask |= POLLHUP; + if (sk->sk_shutdown & RCV_SHUTDOWN) + mask |= POLLIN | POLLRDNORM; + + /* Connected? */ + if ((1 << sk->sk_state) & ~(DCCPF_REQUESTING | DCCPF_RESPOND)) { + if (atomic_read(&sk->sk_rmem_alloc) > 0) + mask |= POLLIN | POLLRDNORM; + + if (!(sk->sk_shutdown & SEND_SHUTDOWN)) { + if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk)) { + mask |= POLLOUT | POLLWRNORM; + } else { /* send SIGIO later */ + set_bit(SOCK_ASYNC_NOSPACE, + &sk->sk_socket->flags); + set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); + + /* Race breaker. If space is freed after + * wspace test but before the flags are set, + * IO signal will be lost. + */ + if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk)) + mask |= POLLOUT | POLLWRNORM; + } + } + } + return mask; +} + int dccp_ioctl(struct sock *sk, int cmd, unsigned long arg) { dccp_pr_debug("entry\n"); @@ -478,7 +534,8 @@ static struct proto_ops inet_dccp_ops = { .socketpair = sock_no_socketpair, .accept = inet_accept, .getname = inet_getname, - .poll = sock_no_poll, + /* FIXME: work on tcp_poll to rename it to inet_csk_poll */ + .poll = dccp_poll, .ioctl = inet_ioctl, /* FIXME: work on inet_listen to rename it to sock_common_listen */ .listen = inet_dccp_listen, -- cgit v1.2.3 From ba89966c1984513f4f2cc0a6c182266be44ddd03 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 26 Aug 2005 12:05:31 -0700 Subject: [NET]: use __read_mostly on kmem_cache_t , DEFINE_SNMP_STAT pointers This patch puts mostly read only data in the right section (read_mostly), to help sharing of these data between CPUS without memory ping pongs. On one of my production machine, tcp_statistics was sitting in a heavily modified cache line, so *every* SNMP update had to force a reload. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/dccp/ccids/ccid3.c | 2 +- net/dccp/proto.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'net/dccp') diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c index fe4cc85f5bc..cf93b019ecb 100644 --- a/net/dccp/ccids/ccid3.c +++ b/net/dccp/ccids/ccid3.c @@ -85,7 +85,7 @@ static int ccid3_debug; static struct dccp_tx_hist *ccid3_tx_hist; static struct dccp_rx_hist *ccid3_rx_hist; -static kmem_cache_t *ccid3_loss_interval_hist_slab; +static kmem_cache_t *ccid3_loss_interval_hist_slab __read_mostly; static inline struct ccid3_loss_interval_hist_entry * ccid3_loss_interval_hist_entry_new(const unsigned int __nocast prio) diff --git a/net/dccp/proto.c b/net/dccp/proto.c index 600dda51d99..f97e92ea34f 100644 --- a/net/dccp/proto.c +++ b/net/dccp/proto.c @@ -39,7 +39,7 @@ #include "ccid.h" #include "dccp.h" -DEFINE_SNMP_STAT(struct dccp_mib, dccp_statistics); +DEFINE_SNMP_STAT(struct dccp_mib, dccp_statistics) __read_mostly; atomic_t dccp_orphan_count = ATOMIC_INIT(0); -- cgit v1.2.3 From 75b3f207b433dcb807fcf0f47de1c8398571ba5f Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Sat, 27 Aug 2005 02:35:30 -0300 Subject: [DCCP]: Make the Debug Menu available when DCCP is statically linked too Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- net/dccp/Kconfig | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net/dccp') diff --git a/net/dccp/Kconfig b/net/dccp/Kconfig index 3023f702eb8..187ac182e24 100644 --- a/net/dccp/Kconfig +++ b/net/dccp/Kconfig @@ -27,7 +27,7 @@ config INET_DCCP_DIAG source "net/dccp/ccids/Kconfig" menu "DCCP Kernel Hacking" - depends on IP_DCCP=m && DEBUG_KERNEL=y + depends on IP_DCCP && DEBUG_KERNEL=y config IP_DCCP_DEBUG bool "DCCP debug messages" @@ -37,7 +37,7 @@ config IP_DCCP_DEBUG Just say N. config IP_DCCP_UNLOAD_HACK - depends on IP_DCCP_CCID3=m + depends on IP_DCCP=m && IP_DCCP_CCID3=m bool "DCCP control sock unload hack" ---help--- Enable this to be able to unload the dccp module when the it -- cgit v1.2.3 From d6809c12b3334a929c39bf08ea63bd819e0500f7 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Sat, 27 Aug 2005 03:06:35 -0300 Subject: [DCCP]: Introduce dccp_wait_for_ccid and use it in dccp_write_xmit This is not quite what I think we should have long term but improves performance for now, so lets use it till we get CCID3 working well, then we can think about using sk_write_queue, perhaps using some ideas from Juwen Lai's old stack for 2.4.20. Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- net/dccp/ccids/ccid3.c | 2 +- net/dccp/dccp.h | 3 +-- net/dccp/output.c | 61 ++++++++++++++++++++++++++++++++++++++++++++++++-- net/dccp/proto.c | 2 +- 4 files changed, 62 insertions(+), 6 deletions(-) (limited to 'net/dccp') diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c index cf93b019ecb..9866dc17525 100644 --- a/net/dccp/ccids/ccid3.c +++ b/net/dccp/ccids/ccid3.c @@ -985,7 +985,7 @@ static int ccid3_hc_tx_send_packet(struct sock *sk, ccid3_pr_debug("send_packet delay=%ld\n", delay); delay /= -1000; /* divide by -1000 is to convert to ms and get sign right */ - rc = delay > 0 ? -EAGAIN : 0; + rc = delay > 0 ? delay : 0; break; default: printk(KERN_CRIT "%s: %s, sk=%p, Illegal state (%d)!\n", diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h index c6ba07ea1a9..6ba21509e79 100644 --- a/net/dccp/dccp.h +++ b/net/dccp/dccp.h @@ -126,8 +126,7 @@ extern void dccp_send_delayed_ack(struct sock *sk); extern void dccp_send_sync(struct sock *sk, const u64 seq, const enum dccp_pkt_type pkt_type); -extern int dccp_write_xmit(struct sock *sk, struct sk_buff *skb, - const int len); +extern int dccp_write_xmit(struct sock *sk, struct sk_buff *skb, long *timeo); extern void dccp_init_xmit_timers(struct sock *sk); static inline void dccp_clear_xmit_timers(struct sock *sk) diff --git a/net/dccp/output.c b/net/dccp/output.c index f96dedd3ad5..116f6db5678 100644 --- a/net/dccp/output.c +++ b/net/dccp/output.c @@ -150,14 +150,71 @@ unsigned int dccp_sync_mss(struct sock *sk, u32 pmtu) return mss_now; } -int dccp_write_xmit(struct sock *sk, struct sk_buff *skb, const int len) +/** + * dccp_wait_for_ccid - Wait for ccid to tell us we can send a packet + * @sk: socket to wait for + * @timeo: for how long + */ +static int dccp_wait_for_ccid(struct sock *sk, struct sk_buff *skb, + long *timeo) +{ + struct dccp_sock *dp = dccp_sk(sk); + DEFINE_WAIT(wait); + long delay; + int rc; + + while (1) { + prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE); + + if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN)) + goto do_error; + if (!*timeo) + goto do_nonblock; + if (signal_pending(current)) + goto do_interrupted; + + rc = ccid_hc_tx_send_packet(dp->dccps_hc_tx_ccid, sk, skb, + skb->len); + if (rc <= 0) + break; + delay = msecs_to_jiffies(rc); + if (delay > *timeo || delay < 0) + goto do_nonblock; + + sk->sk_write_pending++; + release_sock(sk); + *timeo -= schedule_timeout(delay); + lock_sock(sk); + sk->sk_write_pending--; + } +out: + finish_wait(sk->sk_sleep, &wait); + return rc; + +do_error: + rc = -EPIPE; + goto out; +do_nonblock: + rc = -EAGAIN; + goto out; +do_interrupted: + rc = sock_intr_errno(*timeo); + goto out; +} + +int dccp_write_xmit(struct sock *sk, struct sk_buff *skb, long *timeo) { const struct dccp_sock *dp = dccp_sk(sk); - int err = ccid_hc_tx_send_packet(dp->dccps_hc_tx_ccid, sk, skb, len); + int err = ccid_hc_tx_send_packet(dp->dccps_hc_tx_ccid, sk, skb, + skb->len); + + if (err > 0) + err = dccp_wait_for_ccid(sk, skb, timeo); if (err == 0) { const struct dccp_ackpkts *ap = dp->dccps_hc_rx_ackpkts; struct dccp_skb_cb *dcb = DCCP_SKB_CB(skb); + const int len = skb->len; if (sk->sk_state == DCCP_PARTOPEN) { /* See 8.1.5. Handshake Completion */ diff --git a/net/dccp/proto.c b/net/dccp/proto.c index f97e92ea34f..f4da6561e40 100644 --- a/net/dccp/proto.c +++ b/net/dccp/proto.c @@ -261,7 +261,7 @@ int dccp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, if (rc != 0) goto out_discard; - rc = dccp_write_xmit(sk, skb, len); + rc = dccp_write_xmit(sk, skb, &timeo); /* * XXX we don't use sk_write_queue, so just discard the packet. * Current plan however is to _use_ sk_write_queue with -- cgit v1.2.3 From 1f2333aea3269e196c44ae9a220e714cc1427792 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Sat, 27 Aug 2005 03:51:58 -0300 Subject: [CCID3]: Reflow to mostly fit under 80 columns No code changes. Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- net/dccp/ccids/ccid3.c | 290 ++++++++++++++++++++++++++++++------------------- 1 file changed, 176 insertions(+), 114 deletions(-) (limited to 'net/dccp') diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c index 9866dc17525..225c5301317 100644 --- a/net/dccp/ccids/ccid3.c +++ b/net/dccp/ccids/ccid3.c @@ -142,14 +142,16 @@ static const char *ccid3_tx_state_name(enum ccid3_hc_tx_states state) } #endif -static inline void ccid3_hc_tx_set_state(struct sock *sk, enum ccid3_hc_tx_states state) +static inline void ccid3_hc_tx_set_state(struct sock *sk, + enum ccid3_hc_tx_states state) { struct dccp_sock *dp = dccp_sk(sk); struct ccid3_hc_tx_sock *hctx = dp->dccps_hc_tx_ccid_private; enum ccid3_hc_tx_states oldstate = hctx->ccid3hctx_state; ccid3_pr_debug("%s(%p) %-8.8s -> %s\n", - dccp_role(sk), sk, ccid3_tx_state_name(oldstate), ccid3_tx_state_name(state)); + dccp_role(sk), sk, ccid3_tx_state_name(oldstate), + ccid3_tx_state_name(state)); WARN_ON(state == oldstate); hctx->ccid3hctx_state = state; } @@ -785,7 +787,8 @@ static inline void ccid3_calc_new_t_ipi(struct ccid3_hc_tx_sock *hctx) /* Calculate new delta by delta = min(t_ipi / 2, t_gran / 2) */ static inline void ccid3_calc_new_delta(struct ccid3_hc_tx_sock *hctx) { - hctx->ccid3hctx_delta = min_t(u32, hctx->ccid3hctx_t_ipi / 2, TFRC_OPSYS_HALF_TIME_GRAN); + hctx->ccid3hctx_delta = min_t(u32, hctx->ccid3hctx_t_ipi / 2, + TFRC_OPSYS_HALF_TIME_GRAN); } @@ -804,20 +807,25 @@ static void ccid3_hc_tx_update_x(struct sock *sk) struct dccp_sock *dp = dccp_sk(sk); struct ccid3_hc_tx_sock *hctx = dp->dccps_hc_tx_ccid_private; - if (hctx->ccid3hctx_p >= TFRC_SMALLEST_P) { /* to avoid large error in calcX */ + /* To avoid large error in calcX */ + if (hctx->ccid3hctx_p >= TFRC_SMALLEST_P) { hctx->ccid3hctx_x_calc = ccid3_calc_x(hctx->ccid3hctx_s, hctx->ccid3hctx_rtt, hctx->ccid3hctx_p); - hctx->ccid3hctx_x = max_t(u32, min_t(u32, hctx->ccid3hctx_x_calc, 2 * hctx->ccid3hctx_x_recv), - hctx->ccid3hctx_s / TFRC_MAX_BACK_OFF_TIME); + hctx->ccid3hctx_x = max_t(u32, min_t(u32, hctx->ccid3hctx_x_calc, + 2 * hctx->ccid3hctx_x_recv), + (hctx->ccid3hctx_s / + TFRC_MAX_BACK_OFF_TIME)); } else if (now_delta(hctx->ccid3hctx_t_ld) >= hctx->ccid3hctx_rtt) { u32 rtt = hctx->ccid3hctx_rtt; if (rtt < 10) { rtt = 10; } /* avoid divide by zero below */ - hctx->ccid3hctx_x = max_t(u32, min_t(u32, 2 * hctx->ccid3hctx_x_recv, 2 * hctx->ccid3hctx_x), - (hctx->ccid3hctx_s * 100000) / (rtt / 10)); + hctx->ccid3hctx_x = max_t(u32, min_t(u32, 2 * hctx->ccid3hctx_x_recv, + 2 * hctx->ccid3hctx_x), + ((hctx->ccid3hctx_s * 100000) / + (rtt / 10))); /* Using 100000 and 10 to avoid 32 bit overflow for jumbo frames */ do_gettimeofday(&hctx->ccid3hctx_t_ld); } @@ -840,7 +848,8 @@ static void ccid3_hc_tx_no_feedback_timer(unsigned long data) if (sock_owned_by_user(sk)) { /* Try again later. */ /* XXX: set some sensible MIB */ - sk_reset_timer(sk, &hctx->ccid3hctx_no_feedback_timer, jiffies + HZ / 5); + sk_reset_timer(sk, &hctx->ccid3hctx_no_feedback_timer, + jiffies + HZ / 5); goto out; } @@ -858,27 +867,38 @@ static void ccid3_hc_tx_no_feedback_timer(unsigned long data) case TFRC_SSTATE_NO_FBACK: /* Halve send rate */ hctx->ccid3hctx_x /= 2; - if (hctx->ccid3hctx_x < (hctx->ccid3hctx_s / TFRC_MAX_BACK_OFF_TIME)) - hctx->ccid3hctx_x = hctx->ccid3hctx_s / TFRC_MAX_BACK_OFF_TIME; + if (hctx->ccid3hctx_x < + (hctx->ccid3hctx_s / TFRC_MAX_BACK_OFF_TIME)) + hctx->ccid3hctx_x = (hctx->ccid3hctx_s / + TFRC_MAX_BACK_OFF_TIME); - ccid3_pr_debug("%s, sk=%p, state=%s, updated tx rate to %d bytes/s\n", - dccp_role(sk), sk, ccid3_tx_state_name(hctx->ccid3hctx_state), + ccid3_pr_debug("%s, sk=%p, state=%s, updated tx rate to %d " + "bytes/s\n", + dccp_role(sk), sk, + ccid3_tx_state_name(hctx->ccid3hctx_state), hctx->ccid3hctx_x); - next_tmout = max_t(u32, 2 * (hctx->ccid3hctx_s * 100000) - / (hctx->ccid3hctx_x / 10), TFRC_INITIAL_TIMEOUT); + next_tmout = max_t(u32, 2 * (hctx->ccid3hctx_s * 100000) / (hctx->ccid3hctx_x / 10), + TFRC_INITIAL_TIMEOUT); /* do above maths with 100000 and 10 to prevent overflow on 32 bit */ - /* FIXME - not sure above calculation is correct. See section 5 of CCID3 11 - * should adjust tx_t_ipi and double that to achieve it really */ + /* + * FIXME - not sure above calculation is correct. See section + * 5 of CCID3 11 should adjust tx_t_ipi and double that to + * achieve it really + */ break; case TFRC_SSTATE_FBACK: - /* Check if IDLE since last timeout and recv rate is less than 4 packets per RTT */ + /* + * Check if IDLE since last timeout and recv rate is less than + * 4 packets per RTT + */ rtt = hctx->ccid3hctx_rtt; if (rtt < 10) rtt = 10; /* stop divide by zero below */ - if (!hctx->ccid3hctx_idle || (hctx->ccid3hctx_x_recv >= - 4 * (hctx->ccid3hctx_s * 100000) / (rtt / 10))) { - ccid3_pr_debug("%s, sk=%p, state=%s, not idle\n", dccp_role(sk), sk, + if (!hctx->ccid3hctx_idle || + (hctx->ccid3hctx_x_recv >= 4 * (hctx->ccid3hctx_s * 100000) / (rtt / 10))) { + ccid3_pr_debug("%s, sk=%p, state=%s, not idle\n", + dccp_role(sk), sk, ccid3_tx_state_name(hctx->ccid3hctx_state)); /* Halve sending rate */ @@ -887,7 +907,8 @@ static void ccid3_hc_tx_no_feedback_timer(unsigned long data) * Else * X_recv = X_calc / 4; */ - BUG_ON(hctx->ccid3hctx_p >= TFRC_SMALLEST_P && hctx->ccid3hctx_x_calc == 0); + BUG_ON(hctx->ccid3hctx_p >= TFRC_SMALLEST_P && + hctx->ccid3hctx_x_calc == 0); /* check also if p is zero -> x_calc is infinity? */ if (hctx->ccid3hctx_p < TFRC_SMALLEST_P || @@ -916,7 +937,7 @@ static void ccid3_hc_tx_no_feedback_timer(unsigned long data) } sk_reset_timer(sk, &hctx->ccid3hctx_no_feedback_timer, - jiffies + max_t(u32, 1, usecs_to_jiffies(next_tmout))); + jiffies + max_t(u32, 1, usecs_to_jiffies(next_tmout))); hctx->ccid3hctx_idle = 1; out: bh_unlock_sock(sk); @@ -933,24 +954,27 @@ static int ccid3_hc_tx_send_packet(struct sock *sk, long delay; int rc = -ENOTCONN; -// ccid3_pr_debug("%s, sk=%p, skb=%p, len=%d\n", dccp_role(sk), sk, skb, len); + /* Check if pure ACK or Terminating*/ + /* - * check if pure ACK or Terminating */ - /* XXX: We only call this function for DATA and DATAACK, on, these packets can have - * zero length, but why the comment about "pure ACK"? + * XXX: We only call this function for DATA and DATAACK, on, these + * packets can have zero length, but why the comment about "pure ACK"? */ - if (hctx == NULL || len == 0 || hctx->ccid3hctx_state == TFRC_SSTATE_TERM) + if (hctx == NULL || len == 0 || + hctx->ccid3hctx_state == TFRC_SSTATE_TERM) goto out; /* See if last packet allocated was not sent */ new_packet = dccp_tx_hist_head(&hctx->ccid3hctx_hist); if (new_packet == NULL || new_packet->dccphtx_sent) { - new_packet = dccp_tx_hist_entry_new(ccid3_tx_hist, SLAB_ATOMIC); + new_packet = dccp_tx_hist_entry_new(ccid3_tx_hist, + SLAB_ATOMIC); rc = -ENOBUFS; if (new_packet == NULL) { ccid3_pr_debug("%s, sk=%p, not enough mem to add " - "to history, send refused\n", dccp_role(sk), sk); + "to history, send refused\n", + dccp_role(sk), sk); goto out; } @@ -961,12 +985,13 @@ static int ccid3_hc_tx_send_packet(struct sock *sk, switch (hctx->ccid3hctx_state) { case TFRC_SSTATE_NO_SENT: - ccid3_pr_debug("%s, sk=%p, first packet(%llu)\n", dccp_role(sk), sk, - dp->dccps_gss); + ccid3_pr_debug("%s, sk=%p, first packet(%llu)\n", + dccp_role(sk), sk, dp->dccps_gss); hctx->ccid3hctx_no_feedback_timer.function = ccid3_hc_tx_no_feedback_timer; hctx->ccid3hctx_no_feedback_timer.data = (unsigned long)sk; - sk_reset_timer(sk, &hctx->ccid3hctx_no_feedback_timer, jiffies + usecs_to_jiffies(TFRC_INITIAL_TIMEOUT)); + sk_reset_timer(sk, &hctx->ccid3hctx_no_feedback_timer, + jiffies + usecs_to_jiffies(TFRC_INITIAL_TIMEOUT)); hctx->ccid3hctx_last_win_count = 0; hctx->ccid3hctx_t_last_win_count = now; ccid3_hc_tx_set_state(sk, TFRC_SSTATE_NO_FBACK); @@ -981,7 +1006,7 @@ static int ccid3_hc_tx_send_packet(struct sock *sk, break; case TFRC_SSTATE_NO_FBACK: case TFRC_SSTATE_FBACK: - delay = (now_delta(hctx->ccid3hctx_t_nom) - hctx->ccid3hctx_delta); + delay = now_delta(hctx->ccid3hctx_t_nom) - hctx->ccid3hctx_delta; ccid3_pr_debug("send_packet delay=%ld\n", delay); delay /= -1000; /* divide by -1000 is to convert to ms and get sign right */ @@ -1027,41 +1052,35 @@ static void ccid3_hc_tx_packet_sent(struct sock *sk, int more, int len) packet = dccp_tx_hist_head(&hctx->ccid3hctx_hist); if (packet == NULL) { - printk(KERN_CRIT "%s: packet doesn't exists in history!\n", __FUNCTION__); + printk(KERN_CRIT "%s: packet doesn't exists in " + "history!\n", __FUNCTION__); return; } if (packet->dccphtx_sent) { - printk(KERN_CRIT "%s: no unsent packet in history!\n", __FUNCTION__); + printk(KERN_CRIT "%s: no unsent packet in history!\n", + __FUNCTION__); return; } packet->dccphtx_tstamp = now; packet->dccphtx_seqno = dp->dccps_gss; -#if 0 - ccid3_pr_debug("%s, sk=%p, seqno=%llu inserted!\n", - dccp_role(sk), sk, packet->dccphtx_seqno); -#endif /* - * Check if win_count have changed */ - /* COMPLIANCE_BEGIN - * Algorithm in "8.1. Window Counter Valuer" in draft-ietf-dccp-ccid3-11.txt + * Check if win_count have changed + * Algorithm in "8.1. Window Counter Valuer" in + * draft-ietf-dccp-ccid3-11.txt */ - quarter_rtt = now_delta(hctx->ccid3hctx_t_last_win_count) / (hctx->ccid3hctx_rtt / 4); + quarter_rtt = now_delta(hctx->ccid3hctx_t_last_win_count) / + (hctx->ccid3hctx_rtt / 4); if (quarter_rtt > 0) { hctx->ccid3hctx_t_last_win_count = now; hctx->ccid3hctx_last_win_count = (hctx->ccid3hctx_last_win_count + min_t(unsigned long, quarter_rtt, 5)) % 16; - ccid3_pr_debug("%s, sk=%p, window changed from %u to %u!\n", + ccid3_pr_debug("%s, sk=%p, window changed from " + "%u to %u!\n", dccp_role(sk), sk, packet->dccphtx_ccval, hctx->ccid3hctx_last_win_count); } - /* COMPLIANCE_END */ -#if 0 - ccid3_pr_debug("%s, sk=%p, packet sent (%llu,%u)\n", - dccp_role(sk), sk, - packet->dccphtx_seqno, - packet->dccphtx_ccval); -#endif + hctx->ccid3hctx_idle = 0; packet->dccphtx_rtt = hctx->ccid3hctx_rtt; packet->dccphtx_sent = 1; @@ -1073,7 +1092,8 @@ static void ccid3_hc_tx_packet_sent(struct sock *sk, int more, int len) case TFRC_SSTATE_NO_SENT: /* if first wasn't pure ack */ if (len != 0) - printk(KERN_CRIT "%s: %s, First packet sent is noted as a data packet\n", + printk(KERN_CRIT "%s: %s, First packet sent is noted " + "as a data packet\n", __FUNCTION__, dccp_role(sk)); return; case TFRC_SSTATE_NO_FBACK: @@ -1105,16 +1125,13 @@ static void ccid3_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) u32 pinv; u32 x_recv; u32 r_sample; -#if 0 - ccid3_pr_debug("%s, sk=%p(%s), skb=%p(%s)\n", - dccp_role(sk), sk, dccp_state_name(sk->sk_state), - skb, dccp_packet_name(DCCP_SKB_CB(skb)->dccpd_type)); -#endif + if (hctx == NULL) return; if (hctx->ccid3hctx_state == TFRC_SSTATE_TERM) { - ccid3_pr_debug("%s, sk=%p, received a packet when terminating!\n", dccp_role(sk), sk); + ccid3_pr_debug("%s, sk=%p, received a packet when " + "terminating!\n", dccp_role(sk), sk); return; } @@ -1141,8 +1158,10 @@ static void ccid3_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) packet = dccp_tx_hist_find_entry(&hctx->ccid3hctx_hist, DCCP_SKB_CB(skb)->dccpd_ack_seq); if (packet == NULL) { - ccid3_pr_debug("%s, sk=%p, seqno %llu(%s) does't exist in history!\n", - dccp_role(sk), sk, DCCP_SKB_CB(skb)->dccpd_ack_seq, + ccid3_pr_debug("%s, sk=%p, seqno %llu(%s) does't " + "exist in history!\n", + dccp_role(sk), sk, + DCCP_SKB_CB(skb)->dccpd_ack_seq, dccp_packet_name(DCCP_SKB_CB(skb)->dccpd_type)); return; } @@ -1164,7 +1183,8 @@ static void ccid3_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) ccid3_hc_tx_set_state(sk, TFRC_SSTATE_FBACK); hctx->ccid3hctx_rtt = r_sample; } else - hctx->ccid3hctx_rtt = (hctx->ccid3hctx_rtt * 9) / 10 + r_sample / 10; + hctx->ccid3hctx_rtt = (hctx->ccid3hctx_rtt * 9) / 10 + + r_sample / 10; /* * XXX: this is to avoid a division by zero in ccid3_hc_tx_packet_sent @@ -1173,17 +1193,16 @@ static void ccid3_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) if (hctx->ccid3hctx_rtt < 4) hctx->ccid3hctx_rtt = 4; - ccid3_pr_debug("%s, sk=%p, New RTT estimate=%uus, r_sample=%us\n", - dccp_role(sk), sk, - hctx->ccid3hctx_rtt, - r_sample); + ccid3_pr_debug("%s, sk=%p, New RTT estimate=%uus, " + "r_sample=%us\n", dccp_role(sk), sk, + hctx->ccid3hctx_rtt, r_sample); /* Update timeout interval */ hctx->ccid3hctx_t_rto = max_t(u32, 4 * hctx->ccid3hctx_rtt, USEC_PER_SEC); /* Update receive rate */ - hctx->ccid3hctx_x_recv = x_recv; /* x_recv in bytes per second */ + hctx->ccid3hctx_x_recv = x_recv;/* X_recv in bytes per sec */ /* Update loss event rate */ if (pinv == ~0 || pinv == 0) @@ -1193,7 +1212,8 @@ static void ccid3_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) if (hctx->ccid3hctx_p < TFRC_SMALLEST_P) { hctx->ccid3hctx_p = TFRC_SMALLEST_P; - ccid3_pr_debug("%s, sk=%p, Smallest p used!\n", dccp_role(sk), sk); + ccid3_pr_debug("%s, sk=%p, Smallest p used!\n", + dccp_role(sk), sk); } } @@ -1220,22 +1240,27 @@ static void ccid3_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) &hctx->ccid3hctx_hist, packet); if (hctx->ccid3hctx_x < 10) { - ccid3_pr_debug("ccid3_hc_tx_packet_recv hctx->ccid3hctx_x < 10\n"); + ccid3_pr_debug("ccid3_hc_tx_packet_recv hctx_x < 10\n"); hctx->ccid3hctx_x = 10; } /* to prevent divide by zero below */ - /* Schedule no feedback timer to expire in max(4 * R, 2 * s / X) */ + /* + * Schedule no feedback timer to expire in + * max(4 * R, 2 * s / X) + */ next_tmout = max(hctx->ccid3hctx_t_rto, (2 * (hctx->ccid3hctx_s * 100000) / (hctx->ccid3hctx_x / 10))); /* maths with 100000 and 10 is to prevent overflow with 32 bit */ - ccid3_pr_debug("%s, sk=%p, Scheduled no feedback timer to expire in %lu jiffies (%luus)\n", - dccp_role(sk), sk, usecs_to_jiffies(next_tmout), next_tmout); + ccid3_pr_debug("%s, sk=%p, Scheduled no feedback timer to " + "expire in %lu jiffies (%luus)\n", + dccp_role(sk), sk, + usecs_to_jiffies(next_tmout), next_tmout); sk_reset_timer(sk, &hctx->ccid3hctx_no_feedback_timer, - jiffies + max_t(u32,1,usecs_to_jiffies(next_tmout))); + jiffies + max_t(u32, 1, usecs_to_jiffies(next_tmout))); /* set idle flag */ hctx->ccid3hctx_idle = 1; @@ -1253,14 +1278,16 @@ static void ccid3_hc_tx_insert_options(struct sock *sk, struct sk_buff *skb) const struct dccp_sock *dp = dccp_sk(sk); struct ccid3_hc_tx_sock *hctx = dp->dccps_hc_tx_ccid_private; - if (hctx == NULL || !(sk->sk_state == DCCP_OPEN || sk->sk_state == DCCP_PARTOPEN)) + if (hctx == NULL || !(sk->sk_state == DCCP_OPEN || + sk->sk_state == DCCP_PARTOPEN)) return; DCCP_SKB_CB(skb)->dccpd_ccval = hctx->ccid3hctx_last_win_count; } static int ccid3_hc_tx_parse_options(struct sock *sk, unsigned char option, - unsigned char len, u16 idx, unsigned char *value) + unsigned char len, u16 idx, + unsigned char *value) { int rc = 0; struct dccp_sock *dp = dccp_sk(sk); @@ -1283,7 +1310,8 @@ static int ccid3_hc_tx_parse_options(struct sock *sk, unsigned char option, switch (option) { case TFRC_OPT_LOSS_EVENT_RATE: if (len != 4) { - ccid3_pr_debug("%s, sk=%p, invalid len for TFRC_OPT_LOSS_EVENT_RATE\n", + ccid3_pr_debug("%s, sk=%p, invalid len for " + "TFRC_OPT_LOSS_EVENT_RATE\n", dccp_role(sk), sk); rc = -EINVAL; } else { @@ -1303,7 +1331,8 @@ static int ccid3_hc_tx_parse_options(struct sock *sk, unsigned char option, break; case TFRC_OPT_RECEIVE_RATE: if (len != 4) { - ccid3_pr_debug("%s, sk=%p, invalid len for TFRC_OPT_RECEIVE_RATE\n", + ccid3_pr_debug("%s, sk=%p, invalid len for " + "TFRC_OPT_RECEIVE_RATE\n", dccp_role(sk), sk); rc = -EINVAL; } else { @@ -1325,7 +1354,8 @@ static int ccid3_hc_tx_init(struct sock *sk) ccid3_pr_debug("%s, sk=%p\n", dccp_role(sk), sk); - hctx = dp->dccps_hc_tx_ccid_private = kmalloc(sizeof(*hctx), gfp_any()); + hctx = dp->dccps_hc_tx_ccid_private = kmalloc(sizeof(*hctx), + gfp_any()); if (hctx == NULL) return -ENOMEM; @@ -1337,8 +1367,10 @@ static int ccid3_hc_tx_init(struct sock *sk) else hctx->ccid3hctx_s = TFRC_STD_PACKET_SIZE; - hctx->ccid3hctx_x = hctx->ccid3hctx_s; /* set transmission rate to 1 packet per second */ - hctx->ccid3hctx_rtt = 4; /* See ccid3_hc_tx_packet_sent win_count calculatation */ + /* Set transmission rate to 1 packet per second */ + hctx->ccid3hctx_x = hctx->ccid3hctx_s; + /* See ccid3_hc_tx_packet_sent win_count calculatation */ + hctx->ccid3hctx_rtt = 4; hctx->ccid3hctx_t_rto = USEC_PER_SEC; hctx->ccid3hctx_state = TFRC_SSTATE_NO_SENT; INIT_LIST_HEAD(&hctx->ccid3hctx_hist); @@ -1389,14 +1421,16 @@ static const char *ccid3_rx_state_name(enum ccid3_hc_rx_states state) } #endif -static inline void ccid3_hc_rx_set_state(struct sock *sk, enum ccid3_hc_rx_states state) +static inline void ccid3_hc_rx_set_state(struct sock *sk, + enum ccid3_hc_rx_states state) { struct dccp_sock *dp = dccp_sk(sk); struct ccid3_hc_rx_sock *hcrx = dp->dccps_hc_rx_ccid_private; enum ccid3_hc_rx_states oldstate = hcrx->ccid3hcrx_state; ccid3_pr_debug("%s(%p) %-8.8s -> %s\n", - dccp_role(sk), sk, ccid3_rx_state_name(oldstate), ccid3_rx_state_name(state)); + dccp_role(sk), sk, ccid3_rx_state_name(oldstate), + ccid3_rx_state_name(state)); WARN_ON(state == oldstate); hcrx->ccid3hcrx_state = state; } @@ -1434,9 +1468,12 @@ static int ccid3_hc_rx_add_hist(struct sock *sk, num_later++; if (num_later == TFRC_RECV_NUM_LATE_LOSS) { - dccp_rx_hist_entry_delete(ccid3_rx_hist, packet); - ccid3_pr_debug("%s, sk=%p, packet(%llu) already lost!\n", - dccp_role(sk), sk, seqno); + dccp_rx_hist_entry_delete(ccid3_rx_hist, + packet); + ccid3_pr_debug("%s, sk=%p, packet" + "(%llu) already lost!\n", + dccp_role(sk), sk, + seqno); return 1; } } @@ -1444,12 +1481,18 @@ static int ccid3_hc_rx_add_hist(struct sock *sk, if (num_later < TFRC_RECV_NUM_LATE_LOSS) dccp_rx_hist_add_entry(&hcrx->ccid3hcrx_hist, packet); - /* FIXME: else what? should we destroy the packet like above? */ + /* + * FIXME: else what? should we destroy the packet + * like above? + */ } } trim_history: - /* Trim history (remove all packets after the NUM_LATE_LOSS + 1 data packets) */ + /* + * Trim history (remove all packets after the NUM_LATE_LOSS + 1 + * data packets) + */ num_later = TFRC_RECV_NUM_LATE_LOSS + 1; if (!list_empty(&hcrx->ccid3hcrx_loss_interval_hist)) { @@ -1489,15 +1532,18 @@ trim_history: if (tmp < 0) tmp += TFRC_WIN_COUNT_LIMIT; if (tmp > TFRC_WIN_COUNT_PER_RTT + 1) { - /* we have found a packet older than one rtt - * remove the rest */ + /* + * We have found a packet older + * than one rtt remove the rest + */ step = 3; } else /* OK, find next data packet */ num_later = 1; break; case 3: list_del_init(&entry->dccphrx_node); - dccp_rx_hist_entry_delete(ccid3_rx_hist, entry); + dccp_rx_hist_entry_delete(ccid3_rx_hist, + entry); break; } } else if (dccp_rx_hist_entry_data_packet(entry)) @@ -1564,7 +1610,8 @@ static void ccid3_hc_rx_insert_options(struct sock *sk, struct sk_buff *skb) u32 x_recv, pinv; struct ccid3_hc_rx_sock *hcrx = dp->dccps_hc_rx_ccid_private; - if (hcrx == NULL || !(sk->sk_state == DCCP_OPEN || sk->sk_state == DCCP_PARTOPEN)) + if (hcrx == NULL || !(sk->sk_state == DCCP_OPEN || + sk->sk_state == DCCP_PARTOPEN)) return; DCCP_SKB_CB(skb)->dccpd_ccval = hcrx->ccid3hcrx_last_counter; @@ -1658,13 +1705,15 @@ static u32 ccid3_hc_rx_calc_first_li(struct sock *sk) } if (step == 0) { - printk(KERN_CRIT "%s: %s, sk=%p, packet history contains no data packets!\n", + printk(KERN_CRIT "%s: %s, sk=%p, packet history contains no " + "data packets!\n", __FUNCTION__, dccp_role(sk), sk); return ~0; } if (interval == 0) { - ccid3_pr_debug("%s, sk=%p, Could not find a win_count interval > 0. Defaulting to 1\n", + ccid3_pr_debug("%s, sk=%p, Could not find a win_count " + "interval > 0. Defaulting to 1\n", dccp_role(sk), sk); interval = 1; } @@ -1688,8 +1737,8 @@ found: fval = (hcrx->ccid3hcrx_s * 100000) / tmp2; /* do not alter order above or you will get overflow on 32 bit */ p = calcx_reverse_lookup(fval); - ccid3_pr_debug("%s, sk=%p, receive rate=%u bytes/s, implied loss rate=%u\n",\ - dccp_role(sk), sk, x_recv, p); + ccid3_pr_debug("%s, sk=%p, receive rate=%u bytes/s, implied " + "loss rate=%u\n", dccp_role(sk), sk, x_recv, p); if (p == 0) return ~0; @@ -1704,25 +1753,31 @@ static void ccid3_hc_rx_update_li(struct sock *sk, u64 seq_loss, u8 win_loss) struct ccid3_loss_interval_hist_entry *li_entry; if (seq_loss != DCCP_MAX_SEQNO + 1) { - ccid3_pr_debug("%s, sk=%p, seq_loss=%llu, win_loss=%u, packet loss detected\n", + ccid3_pr_debug("%s, sk=%p, seq_loss=%llu, win_loss=%u, " + "packet loss detected\n", dccp_role(sk), sk, seq_loss, win_loss); if (list_empty(&hcrx->ccid3hcrx_loss_interval_hist)) { struct ccid3_loss_interval_hist_entry *li_tail = NULL; int i; - ccid3_pr_debug("%s, sk=%p, first loss event detected, creating history\n", dccp_role(sk), sk); + ccid3_pr_debug("%s, sk=%p, first loss event detected, " + "creating history\n", + dccp_role(sk), sk); for (i = 0; i <= TFRC_RECV_IVAL_F_LENGTH; ++i) { li_entry = ccid3_loss_interval_hist_entry_new(SLAB_ATOMIC); if (li_entry == NULL) { ccid3_loss_interval_history_delete(&hcrx->ccid3hcrx_loss_interval_hist); - ccid3_pr_debug("%s, sk=%p, not enough mem for creating history\n", + ccid3_pr_debug("%s, sk=%p, not enough " + "mem for creating " + "history\n", dccp_role(sk), sk); return; } if (li_tail == NULL) li_tail = li_entry; - list_add(&li_entry->ccid3lih_node, &hcrx->ccid3hcrx_loss_interval_hist); + list_add(&li_entry->ccid3lih_node, + &hcrx->ccid3hcrx_loss_interval_hist); } li_entry->ccid3lih_seqno = seq_loss; @@ -1772,11 +1827,13 @@ static void ccid3_hc_rx_detect_loss(struct sock *sk) if (list_empty(&hcrx->ccid3hcrx_loss_interval_hist)) { /* no loss event have occured yet */ ccid3_pr_debug("%s, sk=%p, TODO: find a lost data " - "packet by comparing to initial seqno\n", + "packet by comparing to initial " + "seqno\n", dccp_role(sk), sk); goto out_update_li; } else { - pr_info("%s: %s, sk=%p, ERROR! Less than 4 data packets in history", + pr_info("%s: %s, sk=%p, ERROR! Less than 4 data " + "packets in history", __FUNCTION__, dccp_role(sk), sk); return; } @@ -1831,7 +1888,9 @@ static u32 ccid3_hc_rx_calc_i_mean(struct sock *sk) u32 i_tot1 = 0; u32 w_tot = 0; - list_for_each_entry_safe(li_entry, li_next, &hcrx->ccid3hcrx_loss_interval_hist, ccid3lih_node) { + list_for_each_entry_safe(li_entry, li_next, + &hcrx->ccid3hcrx_loss_interval_hist, + ccid3lih_node) { if (i < TFRC_RECV_IVAL_F_LENGTH) { i_tot0 += li_entry->ccid3lih_interval * ccid3_hc_rx_w[i]; w_tot += ccid3_hc_rx_w[i]; @@ -1845,7 +1904,8 @@ static u32 ccid3_hc_rx_calc_i_mean(struct sock *sk) } if (i != TFRC_RECV_IVAL_F_LENGTH) { - pr_info("%s: %s, sk=%p, ERROR! Missing entry in interval history!\n", + pr_info("%s: %s, sk=%p, ERROR! Missing entry in " + "interval history!\n", __FUNCTION__, dccp_role(sk), sk); return 0; } @@ -1870,11 +1930,7 @@ static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb) u8 win_count; u32 p_prev; int ins; -#if 0 - ccid3_pr_debug("%s, sk=%p(%s), skb=%p(%s)\n", - dccp_role(sk), sk, dccp_state_name(sk->sk_state), - skb, dccp_packet_name(DCCP_SKB_CB(skb)->dccpd_type)); -#endif + if (hcrx == NULL) return; @@ -1913,7 +1969,8 @@ static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb) packet = dccp_rx_hist_entry_new(ccid3_rx_hist, opt_recv->dccpor_ndp, skb, SLAB_ATOMIC); if (packet == NULL) { - ccid3_pr_debug("%s, sk=%p, Not enough mem to add rx packet to history (consider it lost)!", + ccid3_pr_debug("%s, sk=%p, Not enough mem to add rx packet " + "to history (consider it lost)!", dccp_role(sk), sk); return; } @@ -1927,13 +1984,16 @@ static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb) switch (hcrx->ccid3hcrx_state) { case TFRC_RSTATE_NO_DATA: - ccid3_pr_debug("%s, sk=%p(%s), skb=%p, sending initial feedback\n", - dccp_role(sk), sk, dccp_state_name(sk->sk_state), skb); + ccid3_pr_debug("%s, sk=%p(%s), skb=%p, sending initial " + "feedback\n", + dccp_role(sk), sk, + dccp_state_name(sk->sk_state), skb); ccid3_hc_rx_send_feedback(sk); ccid3_hc_rx_set_state(sk, TFRC_RSTATE_DATA); return; case TFRC_RSTATE_DATA: - hcrx->ccid3hcrx_bytes_recv += skb->len - dccp_hdr(skb)->dccph_doff * 4; + hcrx->ccid3hcrx_bytes_recv += skb->len - + dccp_hdr(skb)->dccph_doff * 4; if (ins == 0) { if (now_delta(hcrx->ccid3hcrx_tstamp_last_ack) >= hcrx->ccid3hcrx_rtt) { @@ -1975,7 +2035,8 @@ static int ccid3_hc_rx_init(struct sock *sk) ccid3_pr_debug("%s, sk=%p\n", dccp_role(sk), sk); - hcrx = dp->dccps_hc_rx_ccid_private = kmalloc(sizeof(*hcrx), gfp_any()); + hcrx = dp->dccps_hc_rx_ccid_private = kmalloc(sizeof(*hcrx), + gfp_any()); if (hcrx == NULL) return -ENOMEM; @@ -2135,7 +2196,8 @@ static __exit void ccid3_module_exit(void) } module_exit(ccid3_module_exit); -MODULE_AUTHOR("Ian McDonald & Arnaldo Carvalho de Melo "); +MODULE_AUTHOR("Ian McDonald , " + "Arnaldo Carvalho de Melo "); MODULE_DESCRIPTION("DCCP TFRC CCID3 CCID"); MODULE_LICENSE("GPL"); MODULE_ALIAS("net-dccp-ccid-3"); -- cgit v1.2.3 From b6ee3d4ada4e85d9b9b9164c1327ef0850c79d5e Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Sat, 27 Aug 2005 18:18:18 -0300 Subject: [CCID3]: Reorganise timeval handling Introducing functions to add to or subtract from a timeval variable and renaming now_delta to timeval_new_delta that calls do_gettimeofday and then timeval_delta, that should be used when there are several deltas made relative to the current time or setting variables to it, so as to avoid calling do_gettimeofday excessively. I'm leaving these "timeval_" prefixed funcions internal to DCCP for a while till we're sure there are no subtle bugs in it. It also is more correct as it checks if the number of usecs added to or subtracted from a tv_usec field is more than 2 seconds. Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- net/dccp/ccids/ccid3.c | 127 +++++++++++++++++++++---------------------------- net/dccp/dccp.h | 44 +++++++++++++++-- net/dccp/options.c | 5 +- 3 files changed, 98 insertions(+), 78 deletions(-) (limited to 'net/dccp') diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c index 225c5301317..60e3a5f9fcb 100644 --- a/net/dccp/ccids/ccid3.c +++ b/net/dccp/ccids/ccid3.c @@ -156,26 +156,6 @@ static inline void ccid3_hc_tx_set_state(struct sock *sk, hctx->ccid3hctx_state = state; } -static void timeval_sub(struct timeval large, struct timeval small, - struct timeval *result) -{ - result->tv_sec = large.tv_sec-small.tv_sec; - if (large.tv_usec < small.tv_usec) { - (result->tv_sec)--; - result->tv_usec = USEC_PER_SEC + - large.tv_usec - small.tv_usec; - } else - result->tv_usec = large.tv_usec-small.tv_usec; -} - -static inline void timeval_fix(struct timeval *tv) -{ - if (tv->tv_usec >= USEC_PER_SEC) { - tv->tv_sec++; - tv->tv_usec -= USEC_PER_SEC; - } -} - #define CALCX_ARRSIZE 500 #define CALCX_SPLIT 50000 @@ -816,18 +796,22 @@ static void ccid3_hc_tx_update_x(struct sock *sk) 2 * hctx->ccid3hctx_x_recv), (hctx->ccid3hctx_s / TFRC_MAX_BACK_OFF_TIME)); - } else if (now_delta(hctx->ccid3hctx_t_ld) >= hctx->ccid3hctx_rtt) { - u32 rtt = hctx->ccid3hctx_rtt; - if (rtt < 10) { - rtt = 10; - } /* avoid divide by zero below */ - - hctx->ccid3hctx_x = max_t(u32, min_t(u32, 2 * hctx->ccid3hctx_x_recv, - 2 * hctx->ccid3hctx_x), - ((hctx->ccid3hctx_s * 100000) / - (rtt / 10))); - /* Using 100000 and 10 to avoid 32 bit overflow for jumbo frames */ - do_gettimeofday(&hctx->ccid3hctx_t_ld); + } else { + struct timeval now; + + do_gettimeofday(&now); + if (timeval_delta(&now, &hctx->ccid3hctx_t_ld) >= + hctx->ccid3hctx_rtt) { + /* Avoid divide by zero below */ + const u32 rtt = max_t(u32, hctx->ccid3hctx_rtt, 10); + + hctx->ccid3hctx_x = max_t(u32, min_t(u32, 2 * hctx->ccid3hctx_x_recv, + 2 * hctx->ccid3hctx_x), + ((hctx->ccid3hctx_s * 100000) / + (rtt / 10))); + /* Using 100000 and 10 to avoid 32 bit overflow for jumbo frames */ + hctx->ccid3hctx_t_ld = now; + } } if (hctx->ccid3hctx_x == 0) { @@ -999,14 +983,15 @@ static int ccid3_hc_tx_send_packet(struct sock *sk, /* Set nominal send time for initial packet */ hctx->ccid3hctx_t_nom = now; - (hctx->ccid3hctx_t_nom).tv_usec += hctx->ccid3hctx_t_ipi; - timeval_fix(&(hctx->ccid3hctx_t_nom)); + timeval_add_usecs(&hctx->ccid3hctx_t_nom, + hctx->ccid3hctx_t_ipi); ccid3_calc_new_delta(hctx); rc = 0; break; case TFRC_SSTATE_NO_FBACK: case TFRC_SSTATE_FBACK: - delay = now_delta(hctx->ccid3hctx_t_nom) - hctx->ccid3hctx_delta; + delay = (timeval_delta(&now, &hctx->ccid3hctx_t_nom) - + hctx->ccid3hctx_delta); ccid3_pr_debug("send_packet delay=%ld\n", delay); delay /= -1000; /* divide by -1000 is to convert to ms and get sign right */ @@ -1068,7 +1053,7 @@ static void ccid3_hc_tx_packet_sent(struct sock *sk, int more, int len) * Algorithm in "8.1. Window Counter Valuer" in * draft-ietf-dccp-ccid3-11.txt */ - quarter_rtt = now_delta(hctx->ccid3hctx_t_last_win_count) / + quarter_rtt = timeval_delta(&now, &hctx->ccid3hctx_t_last_win_count) / (hctx->ccid3hctx_rtt / 4); if (quarter_rtt > 0) { hctx->ccid3hctx_t_last_win_count = now; @@ -1102,8 +1087,8 @@ static void ccid3_hc_tx_packet_sent(struct sock *sk, int more, int len) hctx->ccid3hctx_t_nom = now; ccid3_calc_new_t_ipi(hctx); ccid3_calc_new_delta(hctx); - (hctx->ccid3hctx_t_nom).tv_usec += hctx->ccid3hctx_t_ipi; - timeval_fix(&(hctx->ccid3hctx_t_nom)); + timeval_add_usecs(&hctx->ccid3hctx_t_nom, + hctx->ccid3hctx_t_ipi); } break; default: @@ -1167,7 +1152,7 @@ static void ccid3_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) } /* Update RTT */ - r_sample = now_delta(packet->dccphtx_tstamp); + r_sample = timeval_now_delta(&packet->dccphtx_tstamp); /* FIXME: */ // r_sample -= usecs_to_jiffies(t_elapsed * 10); @@ -1224,15 +1209,11 @@ static void ccid3_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) ccid3_hc_tx_update_x(sk); /* Update next send time */ - if (hctx->ccid3hctx_t_ipi > (hctx->ccid3hctx_t_nom).tv_usec) { - hctx->ccid3hctx_t_nom.tv_usec += USEC_PER_SEC; - (hctx->ccid3hctx_t_nom).tv_sec--; - } - /* FIXME - if no feedback then t_ipi can go > 1 second */ - (hctx->ccid3hctx_t_nom).tv_usec -= hctx->ccid3hctx_t_ipi; + timeval_sub_usecs(&hctx->ccid3hctx_t_nom, + hctx->ccid3hctx_t_ipi); ccid3_calc_new_t_ipi(hctx); - (hctx->ccid3hctx_t_nom).tv_usec += hctx->ccid3hctx_t_ipi; - timeval_fix(&(hctx->ccid3hctx_t_nom)); + timeval_add_usecs(&hctx->ccid3hctx_t_nom, + hctx->ccid3hctx_t_ipi); ccid3_calc_new_delta(hctx); /* remove all packets older than the one acked from history */ @@ -1559,20 +1540,24 @@ static void ccid3_hc_rx_send_feedback(struct sock *sk) struct dccp_sock *dp = dccp_sk(sk); struct ccid3_hc_rx_sock *hcrx = dp->dccps_hc_rx_ccid_private; struct dccp_rx_hist_entry *packet; + struct timeval now; ccid3_pr_debug("%s, sk=%p\n", dccp_role(sk), sk); + do_gettimeofday(&now); + switch (hcrx->ccid3hcrx_state) { case TFRC_RSTATE_NO_DATA: hcrx->ccid3hcrx_x_recv = 0; break; case TFRC_RSTATE_DATA: { - u32 delta = now_delta(hcrx->ccid3hcrx_tstamp_last_feedback); + const u32 delta = timeval_delta(&now, + &hcrx->ccid3hcrx_tstamp_last_feedback); - if (delta == 0) - delta = 1; /* to prevent divide by zero */ hcrx->ccid3hcrx_x_recv = (hcrx->ccid3hcrx_bytes_recv * - USEC_PER_SEC) / delta; + USEC_PER_SEC); + if (likely(delta > 1)) + hcrx->ccid3hcrx_x_recv /= delta; } break; default: @@ -1590,13 +1575,14 @@ static void ccid3_hc_rx_send_feedback(struct sock *sk) return; } - do_gettimeofday(&(hcrx->ccid3hcrx_tstamp_last_feedback)); + hcrx->ccid3hcrx_tstamp_last_feedback = now; hcrx->ccid3hcrx_last_counter = packet->dccphrx_ccval; hcrx->ccid3hcrx_seqno_last_counter = packet->dccphrx_seqno; hcrx->ccid3hcrx_bytes_recv = 0; /* Convert to multiples of 10us */ - hcrx->ccid3hcrx_elapsed_time = now_delta(packet->dccphrx_tstamp) / 10; + hcrx->ccid3hcrx_elapsed_time = + timeval_delta(&now, &packet->dccphrx_tstamp) / 10; if (hcrx->ccid3hcrx_p == 0) hcrx->ccid3hcrx_pinv = ~0; else @@ -1676,7 +1662,7 @@ static u32 ccid3_hc_rx_calc_first_li(struct sock *sk) struct ccid3_hc_rx_sock *hcrx = dp->dccps_hc_rx_ccid_private; struct dccp_rx_hist_entry *entry, *next, *tail = NULL; u32 rtt, delta, x_recv, fval, p, tmp2; - struct timeval tstamp = { 0 }, tmp_tv; + struct timeval tstamp = { 0, }; int interval = 0; int win_count = 0; int step = 0; @@ -1718,18 +1704,16 @@ static u32 ccid3_hc_rx_calc_first_li(struct sock *sk) interval = 1; } found: - timeval_sub(tstamp,tail->dccphrx_tstamp,&tmp_tv); - rtt = (tmp_tv.tv_sec * USEC_PER_SEC + tmp_tv.tv_usec) * 4 / interval; + rtt = timeval_delta(&tstamp, &tail->dccphrx_tstamp) * 4 / interval; ccid3_pr_debug("%s, sk=%p, approximated RTT to %uus\n", dccp_role(sk), sk, rtt); if (rtt == 0) rtt = 1; - delta = now_delta(hcrx->ccid3hcrx_tstamp_last_feedback); - if (delta == 0) - delta = 1; - - x_recv = (hcrx->ccid3hcrx_bytes_recv * USEC_PER_SEC) / delta; + delta = timeval_now_delta(&hcrx->ccid3hcrx_tstamp_last_feedback); + x_recv = hcrx->ccid3hcrx_bytes_recv * USEC_PER_SEC; + if (likely(delta > 1)) + x_recv /= delta; tmp1 = (u64)x_recv * (u64)rtt; do_div(tmp1,10000000); @@ -1926,7 +1910,6 @@ static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb) const struct dccp_options_received *opt_recv; struct dccp_rx_hist_entry *packet; struct timeval now; - u32 now_usecs; u8 win_count; u32 p_prev; int ins; @@ -1948,8 +1931,7 @@ static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb) break; p_prev = hcrx->ccid3hcrx_rtt; do_gettimeofday(&now); - now_usecs = now.tv_sec * USEC_PER_SEC + now.tv_usec; - hcrx->ccid3hcrx_rtt = now_usecs - + hcrx->ccid3hcrx_rtt = timeval_usecs(&now) - (opt_recv->dccpor_timestamp_echo - opt_recv->dccpor_elapsed_time) * 10; if (p_prev != hcrx->ccid3hcrx_rtt) @@ -1994,15 +1976,16 @@ static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb) case TFRC_RSTATE_DATA: hcrx->ccid3hcrx_bytes_recv += skb->len - dccp_hdr(skb)->dccph_doff * 4; - if (ins == 0) { - if (now_delta(hcrx->ccid3hcrx_tstamp_last_ack) >= - hcrx->ccid3hcrx_rtt) { - do_gettimeofday(&hcrx->ccid3hcrx_tstamp_last_ack); - ccid3_hc_rx_send_feedback(sk); - } - return; + if (ins != 0) + break; + + do_gettimeofday(&now); + if (timeval_delta(&now, &hcrx->ccid3hcrx_tstamp_last_ack) >= + hcrx->ccid3hcrx_rtt) { + hcrx->ccid3hcrx_tstamp_last_ack = now; + ccid3_hc_rx_send_feedback(sk); } - break; + return; default: printk(KERN_CRIT "%s: %s, sk=%p, Illegal state (%d)!\n", __FUNCTION__, dccp_role(sk), sk, hcrx->ccid3hcrx_state); diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h index 6ba21509e79..5cd9e794bbe 100644 --- a/net/dccp/dccp.h +++ b/net/dccp/dccp.h @@ -429,17 +429,53 @@ extern int dccp_ackpkts_add(struct dccp_ackpkts *ap, u64 ackno, u8 state); extern void dccp_ackpkts_check_rcv_ackno(struct dccp_ackpkts *ap, struct sock *sk, u64 ackno); +static inline suseconds_t timeval_usecs(const struct timeval *tv) +{ + return tv->tv_sec * USEC_PER_SEC + tv->tv_usec; +} + +static inline suseconds_t timeval_delta(const struct timeval *large, + const struct timeval *small) +{ + time_t secs = large->tv_sec - small->tv_sec; + suseconds_t usecs = large->tv_usec - small->tv_usec; + + if (usecs < 0) { + secs--; + usecs += USEC_PER_SEC; + } + return secs * USEC_PER_SEC + usecs; +} + +static inline void timeval_add_usecs(struct timeval *tv, + const suseconds_t usecs) +{ + tv->tv_usec += usecs; + while (tv->tv_usec >= USEC_PER_SEC) { + tv->tv_sec++; + tv->tv_usec -= USEC_PER_SEC; + } +} + +static inline void timeval_sub_usecs(struct timeval *tv, + const suseconds_t usecs) +{ + tv->tv_usec -= usecs; + while (tv->tv_usec < 0) { + tv->tv_sec--; + tv->tv_usec += USEC_PER_SEC; + } +} + /* * Returns the difference in usecs between timeval * passed in and current time */ -static inline u32 now_delta(struct timeval tv) +static inline suseconds_t timeval_now_delta(const struct timeval *tv) { struct timeval now; - do_gettimeofday(&now); - return (now.tv_sec - tv.tv_sec) * USEC_PER_SEC + - (now.tv_usec - tv.tv_usec); + return timeval_delta(&now, tv); } #ifdef CONFIG_IP_DCCP_DEBUG diff --git a/net/dccp/options.c b/net/dccp/options.c index eabcc8f1c62..382c5894acb 100644 --- a/net/dccp/options.c +++ b/net/dccp/options.c @@ -359,7 +359,7 @@ static void dccp_insert_option_ack_vector(struct sock *sk, struct sk_buff *skb) #endif struct dccp_ackpkts *ap = dp->dccps_hc_rx_ackpkts; int len = ap->dccpap_buf_vector_len + 2; - const u32 elapsed_time = now_delta(ap->dccpap_time) / 10; + const u32 elapsed_time = timeval_now_delta(&ap->dccpap_time) / 10; unsigned char *to, *from; if (elapsed_time != 0) @@ -451,7 +451,8 @@ static void dccp_insert_option_timestamp_echo(struct sock *sk, "CLIENT TX opt: " : "server TX opt: "; #endif u32 tstamp_echo; - const u32 elapsed_time = now_delta(dp->dccps_timestamp_time) / 10; + const u32 elapsed_time = + timeval_now_delta(&dp->dccps_timestamp_time) / 10; const int elapsed_time_len = dccp_elapsed_time_len(elapsed_time); const int len = 6 + elapsed_time_len; unsigned char *to; -- cgit v1.2.3 From 6b5e633ab1525b4def3f36b53903b00586e9966d Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Sat, 27 Aug 2005 20:11:28 -0300 Subject: [CCID3]: Introduce usecs_div To avoid open coding this all over the place. Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- net/dccp/ccids/ccid3.c | 109 ++++++++++++++++++------------------------------- 1 file changed, 39 insertions(+), 70 deletions(-) (limited to 'net/dccp') diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c index 60e3a5f9fcb..ff41977f1ed 100644 --- a/net/dccp/ccids/ccid3.c +++ b/net/dccp/ccids/ccid3.c @@ -40,6 +40,15 @@ #include "../packet_history.h" #include "ccid3.h" +/* + * Reason for maths with 10 here is to avoid 32 bit overflow when a is big. + */ +static inline u32 usecs_div(const u32 a, const u32 b) +{ + const u32 tmp = a * (USEC_PER_SEC / 10); + return b > 20 ? tmp / (b / 10) : tmp; +} + #ifdef CCID3_DEBUG extern int ccid3_debug; @@ -748,20 +757,13 @@ static u32 ccid3_calc_x(u16 s, u32 R, u32 p) /* Calculate new t_ipi (inter packet interval) by t_ipi = s / X_inst */ static inline void ccid3_calc_new_t_ipi(struct ccid3_hc_tx_sock *hctx) { - if (hctx->ccid3hctx_state == TFRC_SSTATE_NO_FBACK) - return; - /* if no feedback spec says t_ipi is 1 second (set elsewhere and then - * doubles after every no feedback timer (separate function) */ - - if (hctx->ccid3hctx_x < 10) { - ccid3_pr_debug("ccid3_calc_new_t_ipi - ccid3hctx_x < 10\n"); - hctx->ccid3hctx_x = 10; - } - hctx->ccid3hctx_t_ipi = (hctx->ccid3hctx_s * 100000) - / (hctx->ccid3hctx_x / 10); - /* reason for above maths with 10 in there is to avoid 32 bit - * overflow for jumbo packets */ - + /* + * If no feedback spec says t_ipi is 1 second (set elsewhere and then + * doubles after every no feedback timer (separate function) + */ + if (hctx->ccid3hctx_state != TFRC_SSTATE_NO_FBACK) + hctx->ccid3hctx_t_ipi = usecs_div(hctx->ccid3hctx_s, + hctx->ccid3hctx_x); } /* Calculate new delta by delta = min(t_ipi / 2, t_gran / 2) */ @@ -769,7 +771,6 @@ static inline void ccid3_calc_new_delta(struct ccid3_hc_tx_sock *hctx) { hctx->ccid3hctx_delta = min_t(u32, hctx->ccid3hctx_t_ipi / 2, TFRC_OPSYS_HALF_TIME_GRAN); - } /* @@ -802,22 +803,13 @@ static void ccid3_hc_tx_update_x(struct sock *sk) do_gettimeofday(&now); if (timeval_delta(&now, &hctx->ccid3hctx_t_ld) >= hctx->ccid3hctx_rtt) { - /* Avoid divide by zero below */ - const u32 rtt = max_t(u32, hctx->ccid3hctx_rtt, 10); - - hctx->ccid3hctx_x = max_t(u32, min_t(u32, 2 * hctx->ccid3hctx_x_recv, - 2 * hctx->ccid3hctx_x), - ((hctx->ccid3hctx_s * 100000) / - (rtt / 10))); - /* Using 100000 and 10 to avoid 32 bit overflow for jumbo frames */ + hctx->ccid3hctx_x = max_t(u32, min_t(u32, hctx->ccid3hctx_x_recv, + hctx->ccid3hctx_x) * 2, + usecs_div(hctx->ccid3hctx_s, + hctx->ccid3hctx_rtt)); hctx->ccid3hctx_t_ld = now; } } - - if (hctx->ccid3hctx_x == 0) { - ccid3_pr_debug("ccid3hctx_x = 0!\n"); - hctx->ccid3hctx_x = 1; - } } static void ccid3_hc_tx_no_feedback_timer(unsigned long data) @@ -826,7 +818,6 @@ static void ccid3_hc_tx_no_feedback_timer(unsigned long data) struct dccp_sock *dp = dccp_sk(sk); unsigned long next_tmout = 0; struct ccid3_hc_tx_sock *hctx = dp->dccps_hc_tx_ccid_private; - u32 rtt; bh_lock_sock(sk); if (sock_owned_by_user(sk)) { @@ -840,19 +831,14 @@ static void ccid3_hc_tx_no_feedback_timer(unsigned long data) ccid3_pr_debug("%s, sk=%p, state=%s\n", dccp_role(sk), sk, ccid3_tx_state_name(hctx->ccid3hctx_state)); - if (hctx->ccid3hctx_x < 10) { - ccid3_pr_debug("TFRC_SSTATE_NO_FBACK ccid3hctx_x < 10\n"); - hctx->ccid3hctx_x = 10; - } - switch (hctx->ccid3hctx_state) { case TFRC_SSTATE_TERM: goto out; case TFRC_SSTATE_NO_FBACK: /* Halve send rate */ hctx->ccid3hctx_x /= 2; - if (hctx->ccid3hctx_x < - (hctx->ccid3hctx_s / TFRC_MAX_BACK_OFF_TIME)) + if (hctx->ccid3hctx_x < (hctx->ccid3hctx_s / + TFRC_MAX_BACK_OFF_TIME)) hctx->ccid3hctx_x = (hctx->ccid3hctx_s / TFRC_MAX_BACK_OFF_TIME); @@ -861,9 +847,9 @@ static void ccid3_hc_tx_no_feedback_timer(unsigned long data) dccp_role(sk), sk, ccid3_tx_state_name(hctx->ccid3hctx_state), hctx->ccid3hctx_x); - next_tmout = max_t(u32, 2 * (hctx->ccid3hctx_s * 100000) / (hctx->ccid3hctx_x / 10), + next_tmout = max_t(u32, 2 * usecs_div(hctx->ccid3hctx_s, + hctx->ccid3hctx_x), TFRC_INITIAL_TIMEOUT); - /* do above maths with 100000 and 10 to prevent overflow on 32 bit */ /* * FIXME - not sure above calculation is correct. See section * 5 of CCID3 11 should adjust tx_t_ipi and double that to @@ -875,12 +861,9 @@ static void ccid3_hc_tx_no_feedback_timer(unsigned long data) * Check if IDLE since last timeout and recv rate is less than * 4 packets per RTT */ - rtt = hctx->ccid3hctx_rtt; - if (rtt < 10) - rtt = 10; - /* stop divide by zero below */ if (!hctx->ccid3hctx_idle || - (hctx->ccid3hctx_x_recv >= 4 * (hctx->ccid3hctx_s * 100000) / (rtt / 10))) { + (hctx->ccid3hctx_x_recv >= + 4 * usecs_div(hctx->ccid3hctx_s, hctx->ccid3hctx_rtt))) { ccid3_pr_debug("%s, sk=%p, state=%s, not idle\n", dccp_role(sk), sk, ccid3_tx_state_name(hctx->ccid3hctx_state)); @@ -905,13 +888,13 @@ static void ccid3_hc_tx_no_feedback_timer(unsigned long data) /* Update sending rate */ ccid3_hc_tx_update_x(sk); } - if (hctx->ccid3hctx_x == 0) { - ccid3_pr_debug("TFRC_SSTATE_FBACK ccid3hctx_x = 0!\n"); - hctx->ccid3hctx_x = 10; - } - /* Schedule no feedback timer to expire in max(4 * R, 2 * s / X) */ + /* + * Schedule no feedback timer to expire in + * max(4 * R, 2 * s / X) + */ next_tmout = max_t(u32, hctx->ccid3hctx_t_rto, - 2 * (hctx->ccid3hctx_s * 100000) / (hctx->ccid3hctx_x / 10)); + 2 * usecs_div(hctx->ccid3hctx_s, + hctx->ccid3hctx_x)); break; default: printk(KERN_CRIT "%s: %s, sk=%p, Illegal state (%d)!\n", @@ -1053,8 +1036,10 @@ static void ccid3_hc_tx_packet_sent(struct sock *sk, int more, int len) * Algorithm in "8.1. Window Counter Valuer" in * draft-ietf-dccp-ccid3-11.txt */ - quarter_rtt = timeval_delta(&now, &hctx->ccid3hctx_t_last_win_count) / - (hctx->ccid3hctx_rtt / 4); + quarter_rtt = timeval_delta(&now, &hctx->ccid3hctx_t_last_win_count); + if (likely(hctx->ccid3hctx_rtt > 8)) + quarter_rtt /= hctx->ccid3hctx_rtt / 4; + if (quarter_rtt > 0) { hctx->ccid3hctx_t_last_win_count = now; hctx->ccid3hctx_last_win_count = (hctx->ccid3hctx_last_win_count + @@ -1171,13 +1156,6 @@ static void ccid3_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) hctx->ccid3hctx_rtt = (hctx->ccid3hctx_rtt * 9) / 10 + r_sample / 10; - /* - * XXX: this is to avoid a division by zero in ccid3_hc_tx_packet_sent - * implemention of the new window count. - */ - if (hctx->ccid3hctx_rtt < 4) - hctx->ccid3hctx_rtt = 4; - ccid3_pr_debug("%s, sk=%p, New RTT estimate=%uus, " "r_sample=%us\n", dccp_role(sk), sk, hctx->ccid3hctx_rtt, r_sample); @@ -1220,21 +1198,14 @@ static void ccid3_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) dccp_tx_hist_purge_older(ccid3_tx_hist, &hctx->ccid3hctx_hist, packet); - if (hctx->ccid3hctx_x < 10) { - ccid3_pr_debug("ccid3_hc_tx_packet_recv hctx_x < 10\n"); - hctx->ccid3hctx_x = 10; - } - /* to prevent divide by zero below */ - /* * Schedule no feedback timer to expire in * max(4 * R, 2 * s / X) */ next_tmout = max(hctx->ccid3hctx_t_rto, - (2 * (hctx->ccid3hctx_s * 100000) / - (hctx->ccid3hctx_x / 10))); - /* maths with 100000 and 10 is to prevent overflow with 32 bit */ - + 2 * usecs_div(hctx->ccid3hctx_s, + hctx->ccid3hctx_x)); + ccid3_pr_debug("%s, sk=%p, Scheduled no feedback timer to " "expire in %lu jiffies (%luus)\n", dccp_role(sk), sk, @@ -1350,8 +1321,6 @@ static int ccid3_hc_tx_init(struct sock *sk) /* Set transmission rate to 1 packet per second */ hctx->ccid3hctx_x = hctx->ccid3hctx_s; - /* See ccid3_hc_tx_packet_sent win_count calculatation */ - hctx->ccid3hctx_rtt = 4; hctx->ccid3hctx_t_rto = USEC_PER_SEC; hctx->ccid3hctx_state = TFRC_SSTATE_NO_SENT; INIT_LIST_HEAD(&hctx->ccid3hctx_hist); -- cgit v1.2.3 From cfc3c525a3b434cabf92bf7054f2c6c93497fbea Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Sat, 27 Aug 2005 20:20:37 -0300 Subject: [CCID3]: Move the CCID3 defines to ccid3.h Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- net/dccp/ccids/ccid3.c | 35 ++--------------------------------- net/dccp/ccids/ccid3.h | 42 +++++++++++++++++++++++++++++++++++++----- 2 files changed, 39 insertions(+), 38 deletions(-) (limited to 'net/dccp') diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c index ff41977f1ed..cfd11234d8f 100644 --- a/net/dccp/ccids/ccid3.c +++ b/net/dccp/ccids/ccid3.c @@ -49,9 +49,9 @@ static inline u32 usecs_div(const u32 a, const u32 b) return b > 20 ? tmp / (b / 10) : tmp; } -#ifdef CCID3_DEBUG -extern int ccid3_debug; +static int ccid3_debug; +#ifdef CCID3_DEBUG #define ccid3_pr_debug(format, a...) \ do { if (ccid3_debug) \ printk(KERN_DEBUG "%s: " format, __FUNCTION__, ##a); \ @@ -60,37 +60,6 @@ extern int ccid3_debug; #define ccid3_pr_debug(format, a...) #endif -#define TFRC_MIN_PACKET_SIZE 16 -#define TFRC_STD_PACKET_SIZE 256 -#define TFRC_MAX_PACKET_SIZE 65535 - -#define TFRC_INITIAL_TIMEOUT (2 * USEC_PER_SEC) -/* two seconds as per CCID3 spec 11 */ - -#define TFRC_OPSYS_HALF_TIME_GRAN (USEC_PER_SEC / (2 * HZ)) -/* above is in usecs - half the scheduling granularity as per RFC3448 4.6 */ - -#define TFRC_WIN_COUNT_PER_RTT 4 -#define TFRC_WIN_COUNT_LIMIT 16 - -#define TFRC_MAX_BACK_OFF_TIME 64 -/* above is in seconds */ - -#define TFRC_SMALLEST_P 40 - -#define TFRC_RECV_IVAL_F_LENGTH 8 /* length(w[]) */ - -/* Number of later packets received before one is considered lost */ -#define TFRC_RECV_NUM_LATE_LOSS 3 - -enum ccid3_options { - TFRC_OPT_LOSS_EVENT_RATE = 192, - TFRC_OPT_LOSS_INTERVALS = 193, - TFRC_OPT_RECEIVE_RATE = 194, -}; - -static int ccid3_debug; - static struct dccp_tx_hist *ccid3_tx_hist; static struct dccp_rx_hist *ccid3_rx_hist; diff --git a/net/dccp/ccids/ccid3.h b/net/dccp/ccids/ccid3.h index 5ef72cda7cd..f8965700bbe 100644 --- a/net/dccp/ccids/ccid3.h +++ b/net/dccp/ccids/ccid3.h @@ -36,8 +36,39 @@ #ifndef _DCCP_CCID3_H_ #define _DCCP_CCID3_H_ -#include +#include #include +#include +#include + +#define TFRC_MIN_PACKET_SIZE 16 +#define TFRC_STD_PACKET_SIZE 256 +#define TFRC_MAX_PACKET_SIZE 65535 + +/* Two seconds as per CCID3 spec */ +#define TFRC_INITIAL_TIMEOUT (2 * USEC_PER_SEC) + +/* In usecs - half the scheduling granularity as per RFC3448 4.6 */ +#define TFRC_OPSYS_HALF_TIME_GRAN (USEC_PER_SEC / (2 * HZ)) + +#define TFRC_WIN_COUNT_PER_RTT 4 +#define TFRC_WIN_COUNT_LIMIT 16 + +/* In seconds */ +#define TFRC_MAX_BACK_OFF_TIME 64 + +#define TFRC_SMALLEST_P 40 + +#define TFRC_RECV_IVAL_F_LENGTH 8 + +/* Number of later packets received before one is considered lost */ +#define TFRC_RECV_NUM_LATE_LOSS 3 + +enum ccid3_options { + TFRC_OPT_LOSS_EVENT_RATE = 192, + TFRC_OPT_LOSS_INTERVALS = 193, + TFRC_OPT_RECEIVE_RATE = 194, +}; struct ccid3_options_received { u64 ccid3or_seqno:48, @@ -47,7 +78,7 @@ struct ccid3_options_received { u32 ccid3or_receive_rate; }; -/** struct ccid3_hc_tx_sock - CCID3 sender half connection congestion control block +/** struct ccid3_hc_tx_sock - CCID3 sender half connection sock * * @ccid3hctx_state - Sender state * @ccid3hctx_x - Current sending rate @@ -57,7 +88,8 @@ struct ccid3_options_received { * @ccid3hctx_rtt - Estimate of current round trip time in usecs * @@ccid3hctx_p - Current loss event rate (0-1) scaled by 1000000 * @ccid3hctx_last_win_count - Last window counter sent - * @ccid3hctx_t_last_win_count - Timestamp of earliest packet with last_win_count value sent + * @ccid3hctx_t_last_win_count - Timestamp of earliest packet + * with last_win_count value sent * @ccid3hctx_no_feedback_timer - Handle to no feedback timer * @ccid3hctx_idle - FIXME * @ccid3hctx_t_ld - Time last doubled during slow start @@ -112,9 +144,9 @@ struct ccid3_hc_rx_sock { }; #define ccid3_hc_tx_field(s,field) (s->dccps_hc_tx_ccid_private == NULL ? 0 : \ - ((struct ccid3_hc_tx_sock *)s->dccps_hc_tx_ccid_private)->ccid3hctx_##field) + ((struct ccid3_hc_tx_sock *)s->dccps_hc_tx_ccid_private)->ccid3hctx_##field) #define ccid3_hc_rx_field(s,field) (s->dccps_hc_rx_ccid_private == NULL ? 0 : \ - ((struct ccid3_hc_rx_sock *)s->dccps_hc_rx_ccid_private)->ccid3hcrx_##field) + ((struct ccid3_hc_rx_sock *)s->dccps_hc_rx_ccid_private)->ccid3hcrx_##field) #endif /* _DCCP_CCID3_H_ */ -- cgit v1.2.3 From ae6706f0678b89de07ad3b456893cc883584f711 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Sat, 27 Aug 2005 23:03:09 -0300 Subject: [CCID3]: Move the loss interval code to loss_interval.[ch] And put this into net/dccp/ccids/lib/, where packet_history.[ch] will also be moved and then we'll have a tfrc_lib.ko module that will be used by dccp_ccid3.ko and other CCIDs that are variations of TFRC (RFC 3448). Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- net/dccp/ccids/Makefile | 2 +- net/dccp/ccids/ccid3.c | 155 +++++++------------------------------ net/dccp/ccids/ccid3.h | 11 +-- net/dccp/ccids/lib/loss_interval.c | 144 ++++++++++++++++++++++++++++++++++ net/dccp/ccids/lib/loss_interval.h | 61 +++++++++++++++ 5 files changed, 234 insertions(+), 139 deletions(-) create mode 100644 net/dccp/ccids/lib/loss_interval.c create mode 100644 net/dccp/ccids/lib/loss_interval.h (limited to 'net/dccp') diff --git a/net/dccp/ccids/Makefile b/net/dccp/ccids/Makefile index 1c720131c5d..323b68f3b60 100644 --- a/net/dccp/ccids/Makefile +++ b/net/dccp/ccids/Makefile @@ -1,3 +1,3 @@ obj-$(CONFIG_IP_DCCP_CCID3) += dccp_ccid3.o -dccp_ccid3-y := ccid3.o +dccp_ccid3-y := ccid3.o lib/loss_interval.o diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c index cfd11234d8f..7468928b83c 100644 --- a/net/dccp/ccids/ccid3.c +++ b/net/dccp/ccids/ccid3.c @@ -38,6 +38,7 @@ #include "../ccid.h" #include "../dccp.h" #include "../packet_history.h" +#include "lib/loss_interval.h" #include "ccid3.h" /* @@ -62,30 +63,7 @@ static int ccid3_debug; static struct dccp_tx_hist *ccid3_tx_hist; static struct dccp_rx_hist *ccid3_rx_hist; - -static kmem_cache_t *ccid3_loss_interval_hist_slab __read_mostly; - -static inline struct ccid3_loss_interval_hist_entry * - ccid3_loss_interval_hist_entry_new(const unsigned int __nocast prio) -{ - return kmem_cache_alloc(ccid3_loss_interval_hist_slab, prio); -} - -static inline void ccid3_loss_interval_hist_entry_delete(struct ccid3_loss_interval_hist_entry *entry) -{ - if (entry != NULL) - kmem_cache_free(ccid3_loss_interval_hist_slab, entry); -} - -static void ccid3_loss_interval_history_delete(struct list_head *hist) -{ - struct ccid3_loss_interval_hist_entry *entry, *next; - - list_for_each_entry_safe(entry, next, hist, ccid3lih_node) { - list_del_init(&entry->ccid3lih_node); - kmem_cache_free(ccid3_loss_interval_hist_slab, entry); - } -} +static struct dccp_li_hist *ccid3_li_hist; static int ccid3_init(struct sock *sk) { @@ -1414,7 +1392,7 @@ trim_history: */ num_later = TFRC_RECV_NUM_LATE_LOSS + 1; - if (!list_empty(&hcrx->ccid3hcrx_loss_interval_hist)) { + if (!list_empty(&hcrx->ccid3hcrx_li_hist)) { list_for_each_entry_safe(entry, next, &hcrx->ccid3hcrx_hist, dccphrx_node) { if (num_later == 0) { @@ -1555,15 +1533,6 @@ static void ccid3_hc_rx_insert_options(struct sock *sk, struct sk_buff *skb) &x_recv, sizeof(x_recv)); } -/* Weights used to calculate loss event rate */ -/* - * These are integers as per section 8 of RFC3448. We can then divide by 4 * - * when we use it. - */ -static const int ccid3_hc_rx_w[TFRC_RECV_IVAL_F_LENGTH] = { - 4, 4, 4, 4, 3, 2, 1, 1, -}; - /* * args: fvalue - function value to match * returns: p closest to that value @@ -1672,41 +1641,17 @@ static void ccid3_hc_rx_update_li(struct sock *sk, u64 seq_loss, u8 win_loss) { struct dccp_sock *dp = dccp_sk(sk); struct ccid3_hc_rx_sock *hcrx = dp->dccps_hc_rx_ccid_private; - struct ccid3_loss_interval_hist_entry *li_entry; - if (seq_loss != DCCP_MAX_SEQNO + 1) { - ccid3_pr_debug("%s, sk=%p, seq_loss=%llu, win_loss=%u, " - "packet loss detected\n", - dccp_role(sk), sk, seq_loss, win_loss); - - if (list_empty(&hcrx->ccid3hcrx_loss_interval_hist)) { - struct ccid3_loss_interval_hist_entry *li_tail = NULL; - int i; - - ccid3_pr_debug("%s, sk=%p, first loss event detected, " - "creating history\n", - dccp_role(sk), sk); - for (i = 0; i <= TFRC_RECV_IVAL_F_LENGTH; ++i) { - li_entry = ccid3_loss_interval_hist_entry_new(SLAB_ATOMIC); - if (li_entry == NULL) { - ccid3_loss_interval_history_delete(&hcrx->ccid3hcrx_loss_interval_hist); - ccid3_pr_debug("%s, sk=%p, not enough " - "mem for creating " - "history\n", - dccp_role(sk), sk); - return; - } - if (li_tail == NULL) - li_tail = li_entry; - list_add(&li_entry->ccid3lih_node, - &hcrx->ccid3hcrx_loss_interval_hist); - } + if (seq_loss != DCCP_MAX_SEQNO + 1 && + list_empty(&hcrx->ccid3hcrx_li_hist)) { + struct dccp_li_hist_entry *li_tail; - li_entry->ccid3lih_seqno = seq_loss; - li_entry->ccid3lih_win_count = win_loss; - - li_tail->ccid3lih_interval = ccid3_hc_rx_calc_first_li(sk); - } + li_tail = dccp_li_hist_interval_new(ccid3_li_hist, + &hcrx->ccid3hcrx_li_hist, + seq_loss, win_loss); + if (li_tail == NULL) + return; + li_tail->dccplih_interval = ccid3_hc_rx_calc_first_li(sk); } /* FIXME: find end of interval */ } @@ -1746,12 +1691,11 @@ static void ccid3_hc_rx_detect_loss(struct sock *sk) } if (a_loss == NULL) { - if (list_empty(&hcrx->ccid3hcrx_loss_interval_hist)) { + if (list_empty(&hcrx->ccid3hcrx_li_hist)) { /* no loss event have occured yet */ - ccid3_pr_debug("%s, sk=%p, TODO: find a lost data " - "packet by comparing to initial " - "seqno\n", - dccp_role(sk), sk); + LIMIT_NETDEBUG("%s: TODO: find a lost data packet by " + "comparing to initial seqno\n", + dccp_role(sk)); goto out_update_li; } else { pr_info("%s: %s, sk=%p, ERROR! Less than 4 data " @@ -1799,48 +1743,6 @@ out_update_li: ccid3_hc_rx_update_li(sk, seq_loss, win_loss); } -static u32 ccid3_hc_rx_calc_i_mean(struct sock *sk) -{ - struct dccp_sock *dp = dccp_sk(sk); - struct ccid3_hc_rx_sock *hcrx = dp->dccps_hc_rx_ccid_private; - struct ccid3_loss_interval_hist_entry *li_entry, *li_next; - int i = 0; - u32 i_tot; - u32 i_tot0 = 0; - u32 i_tot1 = 0; - u32 w_tot = 0; - - list_for_each_entry_safe(li_entry, li_next, - &hcrx->ccid3hcrx_loss_interval_hist, - ccid3lih_node) { - if (i < TFRC_RECV_IVAL_F_LENGTH) { - i_tot0 += li_entry->ccid3lih_interval * ccid3_hc_rx_w[i]; - w_tot += ccid3_hc_rx_w[i]; - } - - if (i != 0) - i_tot1 += li_entry->ccid3lih_interval * ccid3_hc_rx_w[i - 1]; - - if (++i > TFRC_RECV_IVAL_F_LENGTH) - break; - } - - if (i != TFRC_RECV_IVAL_F_LENGTH) { - pr_info("%s: %s, sk=%p, ERROR! Missing entry in " - "interval history!\n", - __FUNCTION__, dccp_role(sk), sk); - return 0; - } - - i_tot = max(i_tot0, i_tot1); - - /* FIXME: Why do we do this? -Ian McDonald */ - if (i_tot * 4 < w_tot) - i_tot = w_tot * 4; - - return i_tot * 4 / w_tot; -} - static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb) { struct dccp_sock *dp = dccp_sk(sk); @@ -1939,9 +1841,9 @@ static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb) p_prev = hcrx->ccid3hcrx_p; /* Calculate loss event rate */ - if (!list_empty(&hcrx->ccid3hcrx_loss_interval_hist)) + if (!list_empty(&hcrx->ccid3hcrx_li_hist)) /* Scaling up by 1000000 as fixed decimal */ - hcrx->ccid3hcrx_p = 1000000 / ccid3_hc_rx_calc_i_mean(sk); + hcrx->ccid3hcrx_p = 1000000 / dccp_li_hist_calc_i_mean(&hcrx->ccid3hcrx_li_hist); if (hcrx->ccid3hcrx_p > p_prev) { ccid3_hc_rx_send_feedback(sk); @@ -1971,7 +1873,7 @@ static int ccid3_hc_rx_init(struct sock *sk) hcrx->ccid3hcrx_state = TFRC_RSTATE_NO_DATA; INIT_LIST_HEAD(&hcrx->ccid3hcrx_hist); - INIT_LIST_HEAD(&hcrx->ccid3hcrx_loss_interval_hist); + INIT_LIST_HEAD(&hcrx->ccid3hcrx_li_hist); /* * XXX this seems to be paranoid, need to think more about this, for * now start with something different than zero. -acme @@ -1996,7 +1898,7 @@ static void ccid3_hc_rx_exit(struct sock *sk) dccp_rx_hist_purge(ccid3_rx_hist, &hcrx->ccid3hcrx_hist); /* Empty loss interval history */ - ccid3_loss_interval_history_delete(&hcrx->ccid3hcrx_loss_interval_hist); + dccp_li_hist_purge(ccid3_li_hist, &hcrx->ccid3hcrx_li_hist); kfree(dp->dccps_hc_rx_ccid_private); dp->dccps_hc_rx_ccid_private = NULL; @@ -2063,11 +1965,8 @@ static __init int ccid3_module_init(void) if (ccid3_tx_hist == NULL) goto out_free_rx; - ccid3_loss_interval_hist_slab = kmem_cache_create("li_hist_ccid3", - sizeof(struct ccid3_loss_interval_hist_entry), - 0, SLAB_HWCACHE_ALIGN, - NULL, NULL); - if (ccid3_loss_interval_hist_slab == NULL) + ccid3_li_hist = dccp_li_hist_new("ccid3"); + if (ccid3_li_hist == NULL) goto out_free_tx; rc = ccid_register(&ccid3); @@ -2077,8 +1976,8 @@ out: return rc; out_free_loss_interval_history: - kmem_cache_destroy(ccid3_loss_interval_hist_slab); - ccid3_loss_interval_hist_slab = NULL; + dccp_li_hist_delete(ccid3_li_hist); + ccid3_li_hist = NULL; out_free_tx: dccp_tx_hist_delete(ccid3_tx_hist); ccid3_tx_hist = NULL; @@ -2110,9 +2009,9 @@ static __exit void ccid3_module_exit(void) dccp_rx_hist_delete(ccid3_rx_hist); ccid3_rx_hist = NULL; } - if (ccid3_loss_interval_hist_slab != NULL) { - kmem_cache_destroy(ccid3_loss_interval_hist_slab); - ccid3_loss_interval_hist_slab = NULL; + if (ccid3_li_hist != NULL) { + dccp_li_hist_delete(ccid3_li_hist); + ccid3_li_hist = NULL; } } module_exit(ccid3_module_exit); diff --git a/net/dccp/ccids/ccid3.h b/net/dccp/ccids/ccid3.h index f8965700bbe..f68d0b4e31e 100644 --- a/net/dccp/ccids/ccid3.h +++ b/net/dccp/ccids/ccid3.h @@ -59,8 +59,6 @@ #define TFRC_SMALLEST_P 40 -#define TFRC_RECV_IVAL_F_LENGTH 8 - /* Number of later packets received before one is considered lost */ #define TFRC_RECV_NUM_LATE_LOSS 3 @@ -119,13 +117,6 @@ struct ccid3_hc_tx_sock { struct ccid3_options_received ccid3hctx_options_received; }; -struct ccid3_loss_interval_hist_entry { - struct list_head ccid3lih_node; - u64 ccid3lih_seqno:48, - ccid3lih_win_count:4; - u32 ccid3lih_interval; -}; - struct ccid3_hc_rx_sock { u64 ccid3hcrx_seqno_last_counter:48, ccid3hcrx_state:8, @@ -136,7 +127,7 @@ struct ccid3_hc_rx_sock { struct timeval ccid3hcrx_tstamp_last_feedback; struct timeval ccid3hcrx_tstamp_last_ack; struct list_head ccid3hcrx_hist; - struct list_head ccid3hcrx_loss_interval_hist; + struct list_head ccid3hcrx_li_hist; u16 ccid3hcrx_s; u32 ccid3hcrx_pinv; u32 ccid3hcrx_elapsed_time; diff --git a/net/dccp/ccids/lib/loss_interval.c b/net/dccp/ccids/lib/loss_interval.c new file mode 100644 index 00000000000..4c01a54143a --- /dev/null +++ b/net/dccp/ccids/lib/loss_interval.c @@ -0,0 +1,144 @@ +/* + * net/dccp/ccids/lib/loss_interval.c + * + * Copyright (c) 2005 The University of Waikato, Hamilton, New Zealand. + * Copyright (c) 2005 Ian McDonald + * Copyright (c) 2005 Arnaldo Carvalho de Melo + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#include +#include + +#include "loss_interval.h" + +struct dccp_li_hist *dccp_li_hist_new(const char *name) +{ + struct dccp_li_hist *hist = kmalloc(sizeof(*hist), GFP_ATOMIC); + static const char dccp_li_hist_mask[] = "li_hist_%s"; + char *slab_name; + + if (hist == NULL) + goto out; + + slab_name = kmalloc(strlen(name) + sizeof(dccp_li_hist_mask) - 1, + GFP_ATOMIC); + if (slab_name == NULL) + goto out_free_hist; + + sprintf(slab_name, dccp_li_hist_mask, name); + hist->dccplih_slab = kmem_cache_create(slab_name, + sizeof(struct dccp_li_hist_entry), + 0, SLAB_HWCACHE_ALIGN, + NULL, NULL); + if (hist->dccplih_slab == NULL) + goto out_free_slab_name; +out: + return hist; +out_free_slab_name: + kfree(slab_name); +out_free_hist: + kfree(hist); + hist = NULL; + goto out; +} + +EXPORT_SYMBOL_GPL(dccp_li_hist_new); + +void dccp_li_hist_delete(struct dccp_li_hist *hist) +{ + const char* name = kmem_cache_name(hist->dccplih_slab); + + kmem_cache_destroy(hist->dccplih_slab); + kfree(name); + kfree(hist); +} + +EXPORT_SYMBOL_GPL(dccp_li_hist_delete); + +void dccp_li_hist_purge(struct dccp_li_hist *hist, struct list_head *list) +{ + struct dccp_li_hist_entry *entry, *next; + + list_for_each_entry_safe(entry, next, list, dccplih_node) { + list_del_init(&entry->dccplih_node); + kmem_cache_free(hist->dccplih_slab, entry); + } +} + +EXPORT_SYMBOL_GPL(dccp_li_hist_purge); + +/* Weights used to calculate loss event rate */ +/* + * These are integers as per section 8 of RFC3448. We can then divide by 4 * + * when we use it. + */ +static const int dccp_li_hist_w[DCCP_LI_HIST_IVAL_F_LENGTH] = { + 4, 4, 4, 4, 3, 2, 1, 1, +}; + +u32 dccp_li_hist_calc_i_mean(struct list_head *list) +{ + struct dccp_li_hist_entry *li_entry, *li_next; + int i = 0; + u32 i_tot; + u32 i_tot0 = 0; + u32 i_tot1 = 0; + u32 w_tot = 0; + + list_for_each_entry_safe(li_entry, li_next, list, dccplih_node) { + if (i < DCCP_LI_HIST_IVAL_F_LENGTH) { + i_tot0 += li_entry->dccplih_interval * dccp_li_hist_w[i]; + w_tot += dccp_li_hist_w[i]; + } + + if (i != 0) + i_tot1 += li_entry->dccplih_interval * dccp_li_hist_w[i - 1]; + + if (++i > DCCP_LI_HIST_IVAL_F_LENGTH) + break; + } + + if (i != DCCP_LI_HIST_IVAL_F_LENGTH) + return 0; + + i_tot = max(i_tot0, i_tot1); + + /* FIXME: Why do we do this? -Ian McDonald */ + if (i_tot * 4 < w_tot) + i_tot = w_tot * 4; + + return i_tot * 4 / w_tot; +} + +EXPORT_SYMBOL_GPL(dccp_li_hist_calc_i_mean); + +struct dccp_li_hist_entry *dccp_li_hist_interval_new(struct dccp_li_hist *hist, + struct list_head *list, + const u64 seq_loss, + const u8 win_loss) +{ + struct dccp_li_hist_entry *tail = NULL, *entry; + int i; + + for (i = 0; i <= DCCP_LI_HIST_IVAL_F_LENGTH; ++i) { + entry = dccp_li_hist_entry_new(hist, SLAB_ATOMIC); + if (entry == NULL) { + dccp_li_hist_purge(hist, list); + return NULL; + } + if (tail == NULL) + tail = entry; + list_add(&entry->dccplih_node, list); + } + + entry->dccplih_seqno = seq_loss; + entry->dccplih_win_count = win_loss; + return tail; +} + +EXPORT_SYMBOL_GPL(dccp_li_hist_interval_new); diff --git a/net/dccp/ccids/lib/loss_interval.h b/net/dccp/ccids/lib/loss_interval.h new file mode 100644 index 00000000000..13ad47ba142 --- /dev/null +++ b/net/dccp/ccids/lib/loss_interval.h @@ -0,0 +1,61 @@ +#ifndef _DCCP_LI_HIST_ +#define _DCCP_LI_HIST_ +/* + * net/dccp/ccids/lib/loss_interval.h + * + * Copyright (c) 2005 The University of Waikato, Hamilton, New Zealand. + * Copyright (c) 2005 Ian McDonald + * Copyright (c) 2005 Arnaldo Carvalho de Melo + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + */ + +#include +#include +#include +#include + +#define DCCP_LI_HIST_IVAL_F_LENGTH 8 + +struct dccp_li_hist { + kmem_cache_t *dccplih_slab; +}; + +extern struct dccp_li_hist *dccp_li_hist_new(const char *name); +extern void dccp_li_hist_delete(struct dccp_li_hist *hist); + +struct dccp_li_hist_entry { + struct list_head dccplih_node; + u64 dccplih_seqno:48, + dccplih_win_count:4; + u32 dccplih_interval; +}; + +static inline struct dccp_li_hist_entry * + dccp_li_hist_entry_new(struct dccp_li_hist *hist, + const unsigned int __nocast prio) +{ + return kmem_cache_alloc(hist->dccplih_slab, prio); +} + +static inline void dccp_li_hist_entry_delete(struct dccp_li_hist *hist, + struct dccp_li_hist_entry *entry) +{ + if (entry != NULL) + kmem_cache_free(hist->dccplih_slab, entry); +} + +extern void dccp_li_hist_purge(struct dccp_li_hist *hist, + struct list_head *list); + +extern u32 dccp_li_hist_calc_i_mean(struct list_head *list); + +extern struct dccp_li_hist_entry * + dccp_li_hist_interval_new(struct dccp_li_hist *hist, + struct list_head *list, + const u64 seq_loss, + const u8 win_loss); +#endif /* _DCCP_LI_HIST_ */ -- cgit v1.2.3 From 4524b259541e1eea07020af825d8e7b0e4faaec5 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Sat, 27 Aug 2005 23:18:26 -0300 Subject: [DCCP]: Just move packet_history.[ch] to net/dccp/ccids/lib/ Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- net/dccp/Makefile | 2 +- net/dccp/ccids/Makefile | 2 +- net/dccp/ccids/ccid3.c | 2 +- net/dccp/ccids/lib/packet_history.c | 199 ++++++++++++++++++++++++++++++++++++ net/dccp/ccids/lib/packet_history.h | 185 +++++++++++++++++++++++++++++++++ net/dccp/packet_history.c | 199 ------------------------------------ net/dccp/packet_history.h | 185 --------------------------------- 7 files changed, 387 insertions(+), 387 deletions(-) create mode 100644 net/dccp/ccids/lib/packet_history.c create mode 100644 net/dccp/ccids/lib/packet_history.h delete mode 100644 net/dccp/packet_history.c delete mode 100644 net/dccp/packet_history.h (limited to 'net/dccp') diff --git a/net/dccp/Makefile b/net/dccp/Makefile index 44a867f2918..fb97bb04245 100644 --- a/net/dccp/Makefile +++ b/net/dccp/Makefile @@ -1,7 +1,7 @@ obj-$(CONFIG_IP_DCCP) += dccp.o dccp-y := ccid.o input.o ipv4.o minisocks.o options.o output.o proto.o \ - timer.o packet_history.o + timer.o obj-$(CONFIG_INET_DCCP_DIAG) += dccp_diag.o diff --git a/net/dccp/ccids/Makefile b/net/dccp/ccids/Makefile index 323b68f3b60..29eb1b61fdb 100644 --- a/net/dccp/ccids/Makefile +++ b/net/dccp/ccids/Makefile @@ -1,3 +1,3 @@ obj-$(CONFIG_IP_DCCP_CCID3) += dccp_ccid3.o -dccp_ccid3-y := ccid3.o lib/loss_interval.o +dccp_ccid3-y := ccid3.o lib/loss_interval.o lib/packet_history.o diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c index 7468928b83c..12548fbde86 100644 --- a/net/dccp/ccids/ccid3.c +++ b/net/dccp/ccids/ccid3.c @@ -37,7 +37,7 @@ #include #include "../ccid.h" #include "../dccp.h" -#include "../packet_history.h" +#include "lib/packet_history.h" #include "lib/loss_interval.h" #include "ccid3.h" diff --git a/net/dccp/ccids/lib/packet_history.c b/net/dccp/ccids/lib/packet_history.c new file mode 100644 index 00000000000..2d9ef5ae0bf --- /dev/null +++ b/net/dccp/ccids/lib/packet_history.c @@ -0,0 +1,199 @@ +/* + * net/dccp/packet_history.h + * + * Copyright (c) 2005 The University of Waikato, Hamilton, New Zealand. + * + * An implementation of the DCCP protocol + * + * This code has been developed by the University of Waikato WAND + * research group. For further information please see http://www.wand.net.nz/ + * or e-mail Ian McDonald - iam4@cs.waikato.ac.nz + * + * This code also uses code from Lulea University, rereleased as GPL by its + * authors: + * Copyright (c) 2003 Nils-Erik Mattsson, Joacim Haggmark, Magnus Erixzon + * + * Changes to meet Linux coding standards, to make it meet latest ccid3 draft + * and to make it work as a loadable module in the DCCP stack written by + * Arnaldo Carvalho de Melo . + * + * Copyright (c) 2005 Arnaldo Carvalho de Melo + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#include +#include + +#include "packet_history.h" + +struct dccp_rx_hist *dccp_rx_hist_new(const char *name) +{ + struct dccp_rx_hist *hist = kmalloc(sizeof(*hist), GFP_ATOMIC); + static const char dccp_rx_hist_mask[] = "rx_hist_%s"; + char *slab_name; + + if (hist == NULL) + goto out; + + slab_name = kmalloc(strlen(name) + sizeof(dccp_rx_hist_mask) - 1, + GFP_ATOMIC); + if (slab_name == NULL) + goto out_free_hist; + + sprintf(slab_name, dccp_rx_hist_mask, name); + hist->dccprxh_slab = kmem_cache_create(slab_name, + sizeof(struct dccp_rx_hist_entry), + 0, SLAB_HWCACHE_ALIGN, + NULL, NULL); + if (hist->dccprxh_slab == NULL) + goto out_free_slab_name; +out: + return hist; +out_free_slab_name: + kfree(slab_name); +out_free_hist: + kfree(hist); + hist = NULL; + goto out; +} + +EXPORT_SYMBOL_GPL(dccp_rx_hist_new); + +void dccp_rx_hist_delete(struct dccp_rx_hist *hist) +{ + const char* name = kmem_cache_name(hist->dccprxh_slab); + + kmem_cache_destroy(hist->dccprxh_slab); + kfree(name); + kfree(hist); +} + +EXPORT_SYMBOL_GPL(dccp_rx_hist_delete); + +void dccp_rx_hist_purge(struct dccp_rx_hist *hist, struct list_head *list) +{ + struct dccp_rx_hist_entry *entry, *next; + + list_for_each_entry_safe(entry, next, list, dccphrx_node) { + list_del_init(&entry->dccphrx_node); + kmem_cache_free(hist->dccprxh_slab, entry); + } +} + +EXPORT_SYMBOL_GPL(dccp_rx_hist_purge); + +struct dccp_rx_hist_entry * + dccp_rx_hist_find_data_packet(const struct list_head *list) +{ + struct dccp_rx_hist_entry *entry, *packet = NULL; + + list_for_each_entry(entry, list, dccphrx_node) + if (entry->dccphrx_type == DCCP_PKT_DATA || + entry->dccphrx_type == DCCP_PKT_DATAACK) { + packet = entry; + break; + } + + return packet; +} + +EXPORT_SYMBOL_GPL(dccp_rx_hist_find_data_packet); + +struct dccp_tx_hist *dccp_tx_hist_new(const char *name) +{ + struct dccp_tx_hist *hist = kmalloc(sizeof(*hist), GFP_ATOMIC); + static const char dccp_tx_hist_mask[] = "tx_hist_%s"; + char *slab_name; + + if (hist == NULL) + goto out; + + slab_name = kmalloc(strlen(name) + sizeof(dccp_tx_hist_mask) - 1, + GFP_ATOMIC); + if (slab_name == NULL) + goto out_free_hist; + + sprintf(slab_name, dccp_tx_hist_mask, name); + hist->dccptxh_slab = kmem_cache_create(slab_name, + sizeof(struct dccp_tx_hist_entry), + 0, SLAB_HWCACHE_ALIGN, + NULL, NULL); + if (hist->dccptxh_slab == NULL) + goto out_free_slab_name; +out: + return hist; +out_free_slab_name: + kfree(slab_name); +out_free_hist: + kfree(hist); + hist = NULL; + goto out; +} + +EXPORT_SYMBOL_GPL(dccp_tx_hist_new); + +void dccp_tx_hist_delete(struct dccp_tx_hist *hist) +{ + const char* name = kmem_cache_name(hist->dccptxh_slab); + + kmem_cache_destroy(hist->dccptxh_slab); + kfree(name); + kfree(hist); +} + +EXPORT_SYMBOL_GPL(dccp_tx_hist_delete); + +struct dccp_tx_hist_entry * + dccp_tx_hist_find_entry(const struct list_head *list, const u64 seq) +{ + struct dccp_tx_hist_entry *packet = NULL, *entry; + + list_for_each_entry(entry, list, dccphtx_node) + if (entry->dccphtx_seqno == seq) { + packet = entry; + break; + } + + return packet; +} + +EXPORT_SYMBOL_GPL(dccp_tx_hist_find_entry); + +void dccp_tx_hist_purge_older(struct dccp_tx_hist *hist, + struct list_head *list, + struct dccp_tx_hist_entry *packet) +{ + struct dccp_tx_hist_entry *next; + + list_for_each_entry_safe_continue(packet, next, list, dccphtx_node) { + list_del_init(&packet->dccphtx_node); + dccp_tx_hist_entry_delete(hist, packet); + } +} + +EXPORT_SYMBOL_GPL(dccp_tx_hist_purge_older); + +void dccp_tx_hist_purge(struct dccp_tx_hist *hist, struct list_head *list) +{ + struct dccp_tx_hist_entry *entry, *next; + + list_for_each_entry_safe(entry, next, list, dccphtx_node) { + list_del_init(&entry->dccphtx_node); + dccp_tx_hist_entry_delete(hist, entry); + } +} + +EXPORT_SYMBOL_GPL(dccp_tx_hist_purge); diff --git a/net/dccp/ccids/lib/packet_history.h b/net/dccp/ccids/lib/packet_history.h new file mode 100644 index 00000000000..235828d822d --- /dev/null +++ b/net/dccp/ccids/lib/packet_history.h @@ -0,0 +1,185 @@ +/* + * net/dccp/packet_history.h + * + * Copyright (c) 2005 The University of Waikato, Hamilton, New Zealand. + * + * An implementation of the DCCP protocol + * + * This code has been developed by the University of Waikato WAND + * research group. For further information please see http://www.wand.net.nz/ + * or e-mail Ian McDonald - iam4@cs.waikato.ac.nz + * + * This code also uses code from Lulea University, rereleased as GPL by its + * authors: + * Copyright (c) 2003 Nils-Erik Mattsson, Joacim Haggmark, Magnus Erixzon + * + * Changes to meet Linux coding standards, to make it meet latest ccid3 draft + * and to make it work as a loadable module in the DCCP stack written by + * Arnaldo Carvalho de Melo . + * + * Copyright (c) 2005 Arnaldo Carvalho de Melo + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#ifndef _DCCP_PKT_HIST_ +#define _DCCP_PKT_HIST_ + +#include +#include +#include +#include + +#include "../../dccp.h" + +struct dccp_tx_hist_entry { + struct list_head dccphtx_node; + u64 dccphtx_seqno:48, + dccphtx_ccval:4, + dccphtx_sent:1; + u32 dccphtx_rtt; + struct timeval dccphtx_tstamp; +}; + +struct dccp_rx_hist_entry { + struct list_head dccphrx_node; + u64 dccphrx_seqno:48, + dccphrx_ccval:4, + dccphrx_type:4; + u32 dccphrx_ndp; /* In fact it is from 8 to 24 bits */ + struct timeval dccphrx_tstamp; +}; + +struct dccp_tx_hist { + kmem_cache_t *dccptxh_slab; +}; + +extern struct dccp_tx_hist *dccp_tx_hist_new(const char *name); +extern void dccp_tx_hist_delete(struct dccp_tx_hist *hist); + +struct dccp_rx_hist { + kmem_cache_t *dccprxh_slab; +}; + +extern struct dccp_rx_hist *dccp_rx_hist_new(const char *name); +extern void dccp_rx_hist_delete(struct dccp_rx_hist *hist); +extern struct dccp_rx_hist_entry * + dccp_rx_hist_find_data_packet(const struct list_head *list); + +static inline struct dccp_tx_hist_entry * + dccp_tx_hist_entry_new(struct dccp_tx_hist *hist, + const unsigned int __nocast prio) +{ + struct dccp_tx_hist_entry *entry = kmem_cache_alloc(hist->dccptxh_slab, + prio); + + if (entry != NULL) + entry->dccphtx_sent = 0; + + return entry; +} + +static inline void dccp_tx_hist_entry_delete(struct dccp_tx_hist *hist, + struct dccp_tx_hist_entry *entry) +{ + if (entry != NULL) + kmem_cache_free(hist->dccptxh_slab, entry); +} + +extern struct dccp_tx_hist_entry * + dccp_tx_hist_find_entry(const struct list_head *list, + const u64 seq); + +static inline void dccp_tx_hist_add_entry(struct list_head *list, + struct dccp_tx_hist_entry *entry) +{ + list_add(&entry->dccphtx_node, list); +} + +extern void dccp_tx_hist_purge_older(struct dccp_tx_hist *hist, + struct list_head *list, + struct dccp_tx_hist_entry *next); + +extern void dccp_tx_hist_purge(struct dccp_tx_hist *hist, + struct list_head *list); + +static inline struct dccp_tx_hist_entry * + dccp_tx_hist_head(struct list_head *list) +{ + struct dccp_tx_hist_entry *head = NULL; + + if (!list_empty(list)) + head = list_entry(list->next, struct dccp_tx_hist_entry, + dccphtx_node); + return head; +} + +static inline struct dccp_rx_hist_entry * + dccp_rx_hist_entry_new(struct dccp_rx_hist *hist, + const u32 ndp, + const struct sk_buff *skb, + const unsigned int __nocast prio) +{ + struct dccp_rx_hist_entry *entry = kmem_cache_alloc(hist->dccprxh_slab, + prio); + + if (entry != NULL) { + const struct dccp_hdr *dh = dccp_hdr(skb); + + entry->dccphrx_seqno = DCCP_SKB_CB(skb)->dccpd_seq; + entry->dccphrx_ccval = dh->dccph_ccval; + entry->dccphrx_type = dh->dccph_type; + entry->dccphrx_ndp = ndp; + do_gettimeofday(&(entry->dccphrx_tstamp)); + } + + return entry; +} + +static inline void dccp_rx_hist_entry_delete(struct dccp_rx_hist *hist, + struct dccp_rx_hist_entry *entry) +{ + if (entry != NULL) + kmem_cache_free(hist->dccprxh_slab, entry); +} + +extern void dccp_rx_hist_purge(struct dccp_rx_hist *hist, + struct list_head *list); + +static inline void dccp_rx_hist_add_entry(struct list_head *list, + struct dccp_rx_hist_entry *entry) +{ + list_add(&entry->dccphrx_node, list); +} + +static inline struct dccp_rx_hist_entry * + dccp_rx_hist_head(struct list_head *list) +{ + struct dccp_rx_hist_entry *head = NULL; + + if (!list_empty(list)) + head = list_entry(list->next, struct dccp_rx_hist_entry, + dccphrx_node); + return head; +} + +static inline int + dccp_rx_hist_entry_data_packet(const struct dccp_rx_hist_entry *entry) +{ + return entry->dccphrx_type == DCCP_PKT_DATA || + entry->dccphrx_type == DCCP_PKT_DATAACK; +} + +#endif /* _DCCP_PKT_HIST_ */ diff --git a/net/dccp/packet_history.c b/net/dccp/packet_history.c deleted file mode 100644 index 2d9ef5ae0bf..00000000000 --- a/net/dccp/packet_history.c +++ /dev/null @@ -1,199 +0,0 @@ -/* - * net/dccp/packet_history.h - * - * Copyright (c) 2005 The University of Waikato, Hamilton, New Zealand. - * - * An implementation of the DCCP protocol - * - * This code has been developed by the University of Waikato WAND - * research group. For further information please see http://www.wand.net.nz/ - * or e-mail Ian McDonald - iam4@cs.waikato.ac.nz - * - * This code also uses code from Lulea University, rereleased as GPL by its - * authors: - * Copyright (c) 2003 Nils-Erik Mattsson, Joacim Haggmark, Magnus Erixzon - * - * Changes to meet Linux coding standards, to make it meet latest ccid3 draft - * and to make it work as a loadable module in the DCCP stack written by - * Arnaldo Carvalho de Melo . - * - * Copyright (c) 2005 Arnaldo Carvalho de Melo - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -#include -#include - -#include "packet_history.h" - -struct dccp_rx_hist *dccp_rx_hist_new(const char *name) -{ - struct dccp_rx_hist *hist = kmalloc(sizeof(*hist), GFP_ATOMIC); - static const char dccp_rx_hist_mask[] = "rx_hist_%s"; - char *slab_name; - - if (hist == NULL) - goto out; - - slab_name = kmalloc(strlen(name) + sizeof(dccp_rx_hist_mask) - 1, - GFP_ATOMIC); - if (slab_name == NULL) - goto out_free_hist; - - sprintf(slab_name, dccp_rx_hist_mask, name); - hist->dccprxh_slab = kmem_cache_create(slab_name, - sizeof(struct dccp_rx_hist_entry), - 0, SLAB_HWCACHE_ALIGN, - NULL, NULL); - if (hist->dccprxh_slab == NULL) - goto out_free_slab_name; -out: - return hist; -out_free_slab_name: - kfree(slab_name); -out_free_hist: - kfree(hist); - hist = NULL; - goto out; -} - -EXPORT_SYMBOL_GPL(dccp_rx_hist_new); - -void dccp_rx_hist_delete(struct dccp_rx_hist *hist) -{ - const char* name = kmem_cache_name(hist->dccprxh_slab); - - kmem_cache_destroy(hist->dccprxh_slab); - kfree(name); - kfree(hist); -} - -EXPORT_SYMBOL_GPL(dccp_rx_hist_delete); - -void dccp_rx_hist_purge(struct dccp_rx_hist *hist, struct list_head *list) -{ - struct dccp_rx_hist_entry *entry, *next; - - list_for_each_entry_safe(entry, next, list, dccphrx_node) { - list_del_init(&entry->dccphrx_node); - kmem_cache_free(hist->dccprxh_slab, entry); - } -} - -EXPORT_SYMBOL_GPL(dccp_rx_hist_purge); - -struct dccp_rx_hist_entry * - dccp_rx_hist_find_data_packet(const struct list_head *list) -{ - struct dccp_rx_hist_entry *entry, *packet = NULL; - - list_for_each_entry(entry, list, dccphrx_node) - if (entry->dccphrx_type == DCCP_PKT_DATA || - entry->dccphrx_type == DCCP_PKT_DATAACK) { - packet = entry; - break; - } - - return packet; -} - -EXPORT_SYMBOL_GPL(dccp_rx_hist_find_data_packet); - -struct dccp_tx_hist *dccp_tx_hist_new(const char *name) -{ - struct dccp_tx_hist *hist = kmalloc(sizeof(*hist), GFP_ATOMIC); - static const char dccp_tx_hist_mask[] = "tx_hist_%s"; - char *slab_name; - - if (hist == NULL) - goto out; - - slab_name = kmalloc(strlen(name) + sizeof(dccp_tx_hist_mask) - 1, - GFP_ATOMIC); - if (slab_name == NULL) - goto out_free_hist; - - sprintf(slab_name, dccp_tx_hist_mask, name); - hist->dccptxh_slab = kmem_cache_create(slab_name, - sizeof(struct dccp_tx_hist_entry), - 0, SLAB_HWCACHE_ALIGN, - NULL, NULL); - if (hist->dccptxh_slab == NULL) - goto out_free_slab_name; -out: - return hist; -out_free_slab_name: - kfree(slab_name); -out_free_hist: - kfree(hist); - hist = NULL; - goto out; -} - -EXPORT_SYMBOL_GPL(dccp_tx_hist_new); - -void dccp_tx_hist_delete(struct dccp_tx_hist *hist) -{ - const char* name = kmem_cache_name(hist->dccptxh_slab); - - kmem_cache_destroy(hist->dccptxh_slab); - kfree(name); - kfree(hist); -} - -EXPORT_SYMBOL_GPL(dccp_tx_hist_delete); - -struct dccp_tx_hist_entry * - dccp_tx_hist_find_entry(const struct list_head *list, const u64 seq) -{ - struct dccp_tx_hist_entry *packet = NULL, *entry; - - list_for_each_entry(entry, list, dccphtx_node) - if (entry->dccphtx_seqno == seq) { - packet = entry; - break; - } - - return packet; -} - -EXPORT_SYMBOL_GPL(dccp_tx_hist_find_entry); - -void dccp_tx_hist_purge_older(struct dccp_tx_hist *hist, - struct list_head *list, - struct dccp_tx_hist_entry *packet) -{ - struct dccp_tx_hist_entry *next; - - list_for_each_entry_safe_continue(packet, next, list, dccphtx_node) { - list_del_init(&packet->dccphtx_node); - dccp_tx_hist_entry_delete(hist, packet); - } -} - -EXPORT_SYMBOL_GPL(dccp_tx_hist_purge_older); - -void dccp_tx_hist_purge(struct dccp_tx_hist *hist, struct list_head *list) -{ - struct dccp_tx_hist_entry *entry, *next; - - list_for_each_entry_safe(entry, next, list, dccphtx_node) { - list_del_init(&entry->dccphtx_node); - dccp_tx_hist_entry_delete(hist, entry); - } -} - -EXPORT_SYMBOL_GPL(dccp_tx_hist_purge); diff --git a/net/dccp/packet_history.h b/net/dccp/packet_history.h deleted file mode 100644 index 2e5ba343e3d..00000000000 --- a/net/dccp/packet_history.h +++ /dev/null @@ -1,185 +0,0 @@ -/* - * net/dccp/packet_history.h - * - * Copyright (c) 2005 The University of Waikato, Hamilton, New Zealand. - * - * An implementation of the DCCP protocol - * - * This code has been developed by the University of Waikato WAND - * research group. For further information please see http://www.wand.net.nz/ - * or e-mail Ian McDonald - iam4@cs.waikato.ac.nz - * - * This code also uses code from Lulea University, rereleased as GPL by its - * authors: - * Copyright (c) 2003 Nils-Erik Mattsson, Joacim Haggmark, Magnus Erixzon - * - * Changes to meet Linux coding standards, to make it meet latest ccid3 draft - * and to make it work as a loadable module in the DCCP stack written by - * Arnaldo Carvalho de Melo . - * - * Copyright (c) 2005 Arnaldo Carvalho de Melo - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -#ifndef _DCCP_PKT_HIST_ -#define _DCCP_PKT_HIST_ - -#include -#include -#include -#include - -#include "dccp.h" - -struct dccp_tx_hist_entry { - struct list_head dccphtx_node; - u64 dccphtx_seqno:48, - dccphtx_ccval:4, - dccphtx_sent:1; - u32 dccphtx_rtt; - struct timeval dccphtx_tstamp; -}; - -struct dccp_rx_hist_entry { - struct list_head dccphrx_node; - u64 dccphrx_seqno:48, - dccphrx_ccval:4, - dccphrx_type:4; - u32 dccphrx_ndp; /* In fact it is from 8 to 24 bits */ - struct timeval dccphrx_tstamp; -}; - -struct dccp_tx_hist { - kmem_cache_t *dccptxh_slab; -}; - -extern struct dccp_tx_hist *dccp_tx_hist_new(const char *name); -extern void dccp_tx_hist_delete(struct dccp_tx_hist *hist); - -struct dccp_rx_hist { - kmem_cache_t *dccprxh_slab; -}; - -extern struct dccp_rx_hist *dccp_rx_hist_new(const char *name); -extern void dccp_rx_hist_delete(struct dccp_rx_hist *hist); -extern struct dccp_rx_hist_entry * - dccp_rx_hist_find_data_packet(const struct list_head *list); - -static inline struct dccp_tx_hist_entry * - dccp_tx_hist_entry_new(struct dccp_tx_hist *hist, - const unsigned int __nocast prio) -{ - struct dccp_tx_hist_entry *entry = kmem_cache_alloc(hist->dccptxh_slab, - prio); - - if (entry != NULL) - entry->dccphtx_sent = 0; - - return entry; -} - -static inline void dccp_tx_hist_entry_delete(struct dccp_tx_hist *hist, - struct dccp_tx_hist_entry *entry) -{ - if (entry != NULL) - kmem_cache_free(hist->dccptxh_slab, entry); -} - -extern struct dccp_tx_hist_entry * - dccp_tx_hist_find_entry(const struct list_head *list, - const u64 seq); - -static inline void dccp_tx_hist_add_entry(struct list_head *list, - struct dccp_tx_hist_entry *entry) -{ - list_add(&entry->dccphtx_node, list); -} - -extern void dccp_tx_hist_purge_older(struct dccp_tx_hist *hist, - struct list_head *list, - struct dccp_tx_hist_entry *next); - -extern void dccp_tx_hist_purge(struct dccp_tx_hist *hist, - struct list_head *list); - -static inline struct dccp_tx_hist_entry * - dccp_tx_hist_head(struct list_head *list) -{ - struct dccp_tx_hist_entry *head = NULL; - - if (!list_empty(list)) - head = list_entry(list->next, struct dccp_tx_hist_entry, - dccphtx_node); - return head; -} - -static inline struct dccp_rx_hist_entry * - dccp_rx_hist_entry_new(struct dccp_rx_hist *hist, - const u32 ndp, - const struct sk_buff *skb, - const unsigned int __nocast prio) -{ - struct dccp_rx_hist_entry *entry = kmem_cache_alloc(hist->dccprxh_slab, - prio); - - if (entry != NULL) { - const struct dccp_hdr *dh = dccp_hdr(skb); - - entry->dccphrx_seqno = DCCP_SKB_CB(skb)->dccpd_seq; - entry->dccphrx_ccval = dh->dccph_ccval; - entry->dccphrx_type = dh->dccph_type; - entry->dccphrx_ndp = ndp; - do_gettimeofday(&(entry->dccphrx_tstamp)); - } - - return entry; -} - -static inline void dccp_rx_hist_entry_delete(struct dccp_rx_hist *hist, - struct dccp_rx_hist_entry *entry) -{ - if (entry != NULL) - kmem_cache_free(hist->dccprxh_slab, entry); -} - -extern void dccp_rx_hist_purge(struct dccp_rx_hist *hist, - struct list_head *list); - -static inline void dccp_rx_hist_add_entry(struct list_head *list, - struct dccp_rx_hist_entry *entry) -{ - list_add(&entry->dccphrx_node, list); -} - -static inline struct dccp_rx_hist_entry * - dccp_rx_hist_head(struct list_head *list) -{ - struct dccp_rx_hist_entry *head = NULL; - - if (!list_empty(list)) - head = list_entry(list->next, struct dccp_rx_hist_entry, - dccphrx_node); - return head; -} - -static inline int - dccp_rx_hist_entry_data_packet(const struct dccp_rx_hist_entry *entry) -{ - return entry->dccphrx_type == DCCP_PKT_DATA || - entry->dccphrx_type == DCCP_PKT_DATAACK; -} - -#endif /* _DCCP_PKT_HIST_ */ -- cgit v1.2.3 From 5cea0ddce56ff3406a81fbbab80ef45c65701673 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Sat, 27 Aug 2005 23:50:46 -0300 Subject: [DCCP]: Introduce dccp_tfrc_lib module with net/dccp/ccids/lib/*.c I'll now take a look at the other proposed TFRC DCCP CCIDs to find more code that is now in ccid3.c and move to this module, the loss event rate, calc_X, etc most probably will be moved there. The main goal of these changes is to pave the way for the implementation of more TFRC based DCCP CCIDs and to shrink ccid3.c, reducing its complexity and helping in getting it rock solid. Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- net/dccp/ccids/Kconfig | 4 ++++ net/dccp/ccids/Makefile | 4 +++- net/dccp/ccids/lib/Makefile | 3 +++ net/dccp/ccids/lib/packet_history.c | 6 ++++++ 4 files changed, 16 insertions(+), 1 deletion(-) create mode 100644 net/dccp/ccids/lib/Makefile (limited to 'net/dccp') diff --git a/net/dccp/ccids/Kconfig b/net/dccp/ccids/Kconfig index 67f9c06bd17..7684d83946a 100644 --- a/net/dccp/ccids/Kconfig +++ b/net/dccp/ccids/Kconfig @@ -22,4 +22,8 @@ config IP_DCCP_CCID3 If in doubt, say M. +config IP_DCCP_TFRC_LIB + depends on IP_DCCP_CCID3 + def_tristate IP_DCCP_CCID3 + endmenu diff --git a/net/dccp/ccids/Makefile b/net/dccp/ccids/Makefile index 29eb1b61fdb..956f79f5074 100644 --- a/net/dccp/ccids/Makefile +++ b/net/dccp/ccids/Makefile @@ -1,3 +1,5 @@ obj-$(CONFIG_IP_DCCP_CCID3) += dccp_ccid3.o -dccp_ccid3-y := ccid3.o lib/loss_interval.o lib/packet_history.o +dccp_ccid3-y := ccid3.o + +obj-y += lib/ diff --git a/net/dccp/ccids/lib/Makefile b/net/dccp/ccids/lib/Makefile new file mode 100644 index 00000000000..e9a91e238c8 --- /dev/null +++ b/net/dccp/ccids/lib/Makefile @@ -0,0 +1,3 @@ +obj-$(CONFIG_IP_DCCP_TFRC_LIB) += dccp_tfrc_lib.o + +dccp_tfrc_lib-y := loss_interval.o packet_history.o diff --git a/net/dccp/ccids/lib/packet_history.c b/net/dccp/ccids/lib/packet_history.c index 2d9ef5ae0bf..f252a9555e3 100644 --- a/net/dccp/ccids/lib/packet_history.c +++ b/net/dccp/ccids/lib/packet_history.c @@ -35,6 +35,7 @@ */ #include +#include #include #include "packet_history.h" @@ -197,3 +198,8 @@ void dccp_tx_hist_purge(struct dccp_tx_hist *hist, struct list_head *list) } EXPORT_SYMBOL_GPL(dccp_tx_hist_purge); + +MODULE_AUTHOR("Ian McDonald , " + "Arnaldo Carvalho de Melo "); +MODULE_DESCRIPTION("DCCP TFRC library"); +MODULE_LICENSE("GPL"); -- cgit v1.2.3 From 36729c1a73c354a155db18d64d9e79b86c446fcf Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Sun, 28 Aug 2005 00:47:15 -0300 Subject: [DCCP]: Move the calc_X routines to dccp_tfrc_lib Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- net/dccp/ccids/ccid3.c | 624 +---------------------------------- net/dccp/ccids/lib/Makefile | 2 +- net/dccp/ccids/lib/tfrc.h | 22 ++ net/dccp/ccids/lib/tfrc_equation.c | 644 +++++++++++++++++++++++++++++++++++++ 4 files changed, 672 insertions(+), 620 deletions(-) create mode 100644 net/dccp/ccids/lib/tfrc.h create mode 100644 net/dccp/ccids/lib/tfrc_equation.c (limited to 'net/dccp') diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c index 12548fbde86..a215c46d6f1 100644 --- a/net/dccp/ccids/ccid3.c +++ b/net/dccp/ccids/ccid3.c @@ -39,6 +39,7 @@ #include "../dccp.h" #include "lib/packet_history.h" #include "lib/loss_interval.h" +#include "lib/tfrc.h" #include "ccid3.h" /* @@ -112,595 +113,6 @@ static inline void ccid3_hc_tx_set_state(struct sock *sk, hctx->ccid3hctx_state = state; } -#define CALCX_ARRSIZE 500 - -#define CALCX_SPLIT 50000 -/* equivalent to 0.05 */ - -static const u32 calcx_lookup[CALCX_ARRSIZE][2] = { - { 37172 , 8172 }, - { 53499 , 11567 }, - { 66664 , 14180 }, - { 78298 , 16388 }, - { 89021 , 18339 }, - { 99147 , 20108 }, - { 108858 , 21738 }, - { 118273 , 23260 }, - { 127474 , 24693 }, - { 136520 , 26052 }, - { 145456 , 27348 }, - { 154316 , 28589 }, - { 163130 , 29783 }, - { 171919 , 30935 }, - { 180704 , 32049 }, - { 189502 , 33130 }, - { 198328 , 34180 }, - { 207194 , 35202 }, - { 216114 , 36198 }, - { 225097 , 37172 }, - { 234153 , 38123 }, - { 243294 , 39055 }, - { 252527 , 39968 }, - { 261861 , 40864 }, - { 271305 , 41743 }, - { 280866 , 42607 }, - { 290553 , 43457 }, - { 300372 , 44293 }, - { 310333 , 45117 }, - { 320441 , 45929 }, - { 330705 , 46729 }, - { 341131 , 47518 }, - { 351728 , 48297 }, - { 362501 , 49066 }, - { 373460 , 49826 }, - { 384609 , 50577 }, - { 395958 , 51320 }, - { 407513 , 52054 }, - { 419281 , 52780 }, - { 431270 , 53499 }, - { 443487 , 54211 }, - { 455940 , 54916 }, - { 468635 , 55614 }, - { 481581 , 56306 }, - { 494785 , 56991 }, - { 508254 , 57671 }, - { 521996 , 58345 }, - { 536019 , 59014 }, - { 550331 , 59677 }, - { 564939 , 60335 }, - { 579851 , 60988 }, - { 595075 , 61636 }, - { 610619 , 62279 }, - { 626491 , 62918 }, - { 642700 , 63553 }, - { 659253 , 64183 }, - { 676158 , 64809 }, - { 693424 , 65431 }, - { 711060 , 66050 }, - { 729073 , 66664 }, - { 747472 , 67275 }, - { 766266 , 67882 }, - { 785464 , 68486 }, - { 805073 , 69087 }, - { 825103 , 69684 }, - { 845562 , 70278 }, - { 866460 , 70868 }, - { 887805 , 71456 }, - { 909606 , 72041 }, - { 931873 , 72623 }, - { 954614 , 73202 }, - { 977839 , 73778 }, - { 1001557 , 74352 }, - { 1025777 , 74923 }, - { 1050508 , 75492 }, - { 1075761 , 76058 }, - { 1101544 , 76621 }, - { 1127867 , 77183 }, - { 1154739 , 77741 }, - { 1182172 , 78298 }, - { 1210173 , 78852 }, - { 1238753 , 79405 }, - { 1267922 , 79955 }, - { 1297689 , 80503 }, - { 1328066 , 81049 }, - { 1359060 , 81593 }, - { 1390684 , 82135 }, - { 1422947 , 82675 }, - { 1455859 , 83213 }, - { 1489430 , 83750 }, - { 1523671 , 84284 }, - { 1558593 , 84817 }, - { 1594205 , 85348 }, - { 1630518 , 85878 }, - { 1667543 , 86406 }, - { 1705290 , 86932 }, - { 1743770 , 87457 }, - { 1782994 , 87980 }, - { 1822973 , 88501 }, - { 1863717 , 89021 }, - { 1905237 , 89540 }, - { 1947545 , 90057 }, - { 1990650 , 90573 }, - { 2034566 , 91087 }, - { 2079301 , 91600 }, - { 2124869 , 92111 }, - { 2171279 , 92622 }, - { 2218543 , 93131 }, - { 2266673 , 93639 }, - { 2315680 , 94145 }, - { 2365575 , 94650 }, - { 2416371 , 95154 }, - { 2468077 , 95657 }, - { 2520707 , 96159 }, - { 2574271 , 96660 }, - { 2628782 , 97159 }, - { 2684250 , 97658 }, - { 2740689 , 98155 }, - { 2798110 , 98651 }, - { 2856524 , 99147 }, - { 2915944 , 99641 }, - { 2976382 , 100134 }, - { 3037850 , 100626 }, - { 3100360 , 101117 }, - { 3163924 , 101608 }, - { 3228554 , 102097 }, - { 3294263 , 102586 }, - { 3361063 , 103073 }, - { 3428966 , 103560 }, - { 3497984 , 104045 }, - { 3568131 , 104530 }, - { 3639419 , 105014 }, - { 3711860 , 105498 }, - { 3785467 , 105980 }, - { 3860253 , 106462 }, - { 3936229 , 106942 }, - { 4013410 , 107422 }, - { 4091808 , 107902 }, - { 4171435 , 108380 }, - { 4252306 , 108858 }, - { 4334431 , 109335 }, - { 4417825 , 109811 }, - { 4502501 , 110287 }, - { 4588472 , 110762 }, - { 4675750 , 111236 }, - { 4764349 , 111709 }, - { 4854283 , 112182 }, - { 4945564 , 112654 }, - { 5038206 , 113126 }, - { 5132223 , 113597 }, - { 5227627 , 114067 }, - { 5324432 , 114537 }, - { 5422652 , 115006 }, - { 5522299 , 115474 }, - { 5623389 , 115942 }, - { 5725934 , 116409 }, - { 5829948 , 116876 }, - { 5935446 , 117342 }, - { 6042439 , 117808 }, - { 6150943 , 118273 }, - { 6260972 , 118738 }, - { 6372538 , 119202 }, - { 6485657 , 119665 }, - { 6600342 , 120128 }, - { 6716607 , 120591 }, - { 6834467 , 121053 }, - { 6953935 , 121514 }, - { 7075025 , 121976 }, - { 7197752 , 122436 }, - { 7322131 , 122896 }, - { 7448175 , 123356 }, - { 7575898 , 123815 }, - { 7705316 , 124274 }, - { 7836442 , 124733 }, - { 7969291 , 125191 }, - { 8103877 , 125648 }, - { 8240216 , 126105 }, - { 8378321 , 126562 }, - { 8518208 , 127018 }, - { 8659890 , 127474 }, - { 8803384 , 127930 }, - { 8948702 , 128385 }, - { 9095861 , 128840 }, - { 9244875 , 129294 }, - { 9395760 , 129748 }, - { 9548529 , 130202 }, - { 9703198 , 130655 }, - { 9859782 , 131108 }, - { 10018296 , 131561 }, - { 10178755 , 132014 }, - { 10341174 , 132466 }, - { 10505569 , 132917 }, - { 10671954 , 133369 }, - { 10840345 , 133820 }, - { 11010757 , 134271 }, - { 11183206 , 134721 }, - { 11357706 , 135171 }, - { 11534274 , 135621 }, - { 11712924 , 136071 }, - { 11893673 , 136520 }, - { 12076536 , 136969 }, - { 12261527 , 137418 }, - { 12448664 , 137867 }, - { 12637961 , 138315 }, - { 12829435 , 138763 }, - { 13023101 , 139211 }, - { 13218974 , 139658 }, - { 13417071 , 140106 }, - { 13617407 , 140553 }, - { 13819999 , 140999 }, - { 14024862 , 141446 }, - { 14232012 , 141892 }, - { 14441465 , 142339 }, - { 14653238 , 142785 }, - { 14867346 , 143230 }, - { 15083805 , 143676 }, - { 15302632 , 144121 }, - { 15523842 , 144566 }, - { 15747453 , 145011 }, - { 15973479 , 145456 }, - { 16201939 , 145900 }, - { 16432847 , 146345 }, - { 16666221 , 146789 }, - { 16902076 , 147233 }, - { 17140429 , 147677 }, - { 17381297 , 148121 }, - { 17624696 , 148564 }, - { 17870643 , 149007 }, - { 18119154 , 149451 }, - { 18370247 , 149894 }, - { 18623936 , 150336 }, - { 18880241 , 150779 }, - { 19139176 , 151222 }, - { 19400759 , 151664 }, - { 19665007 , 152107 }, - { 19931936 , 152549 }, - { 20201564 , 152991 }, - { 20473907 , 153433 }, - { 20748982 , 153875 }, - { 21026807 , 154316 }, - { 21307399 , 154758 }, - { 21590773 , 155199 }, - { 21876949 , 155641 }, - { 22165941 , 156082 }, - { 22457769 , 156523 }, - { 22752449 , 156964 }, - { 23049999 , 157405 }, - { 23350435 , 157846 }, - { 23653774 , 158287 }, - { 23960036 , 158727 }, - { 24269236 , 159168 }, - { 24581392 , 159608 }, - { 24896521 , 160049 }, - { 25214642 , 160489 }, - { 25535772 , 160929 }, - { 25859927 , 161370 }, - { 26187127 , 161810 }, - { 26517388 , 162250 }, - { 26850728 , 162690 }, - { 27187165 , 163130 }, - { 27526716 , 163569 }, - { 27869400 , 164009 }, - { 28215234 , 164449 }, - { 28564236 , 164889 }, - { 28916423 , 165328 }, - { 29271815 , 165768 }, - { 29630428 , 166208 }, - { 29992281 , 166647 }, - { 30357392 , 167087 }, - { 30725779 , 167526 }, - { 31097459 , 167965 }, - { 31472452 , 168405 }, - { 31850774 , 168844 }, - { 32232445 , 169283 }, - { 32617482 , 169723 }, - { 33005904 , 170162 }, - { 33397730 , 170601 }, - { 33792976 , 171041 }, - { 34191663 , 171480 }, - { 34593807 , 171919 }, - { 34999428 , 172358 }, - { 35408544 , 172797 }, - { 35821174 , 173237 }, - { 36237335 , 173676 }, - { 36657047 , 174115 }, - { 37080329 , 174554 }, - { 37507197 , 174993 }, - { 37937673 , 175433 }, - { 38371773 , 175872 }, - { 38809517 , 176311 }, - { 39250924 , 176750 }, - { 39696012 , 177190 }, - { 40144800 , 177629 }, - { 40597308 , 178068 }, - { 41053553 , 178507 }, - { 41513554 , 178947 }, - { 41977332 , 179386 }, - { 42444904 , 179825 }, - { 42916290 , 180265 }, - { 43391509 , 180704 }, - { 43870579 , 181144 }, - { 44353520 , 181583 }, - { 44840352 , 182023 }, - { 45331092 , 182462 }, - { 45825761 , 182902 }, - { 46324378 , 183342 }, - { 46826961 , 183781 }, - { 47333531 , 184221 }, - { 47844106 , 184661 }, - { 48358706 , 185101 }, - { 48877350 , 185541 }, - { 49400058 , 185981 }, - { 49926849 , 186421 }, - { 50457743 , 186861 }, - { 50992759 , 187301 }, - { 51531916 , 187741 }, - { 52075235 , 188181 }, - { 52622735 , 188622 }, - { 53174435 , 189062 }, - { 53730355 , 189502 }, - { 54290515 , 189943 }, - { 54854935 , 190383 }, - { 55423634 , 190824 }, - { 55996633 , 191265 }, - { 56573950 , 191706 }, - { 57155606 , 192146 }, - { 57741621 , 192587 }, - { 58332014 , 193028 }, - { 58926806 , 193470 }, - { 59526017 , 193911 }, - { 60129666 , 194352 }, - { 60737774 , 194793 }, - { 61350361 , 195235 }, - { 61967446 , 195677 }, - { 62589050 , 196118 }, - { 63215194 , 196560 }, - { 63845897 , 197002 }, - { 64481179 , 197444 }, - { 65121061 , 197886 }, - { 65765563 , 198328 }, - { 66414705 , 198770 }, - { 67068508 , 199213 }, - { 67726992 , 199655 }, - { 68390177 , 200098 }, - { 69058085 , 200540 }, - { 69730735 , 200983 }, - { 70408147 , 201426 }, - { 71090343 , 201869 }, - { 71777343 , 202312 }, - { 72469168 , 202755 }, - { 73165837 , 203199 }, - { 73867373 , 203642 }, - { 74573795 , 204086 }, - { 75285124 , 204529 }, - { 76001380 , 204973 }, - { 76722586 , 205417 }, - { 77448761 , 205861 }, - { 78179926 , 206306 }, - { 78916102 , 206750 }, - { 79657310 , 207194 }, - { 80403571 , 207639 }, - { 81154906 , 208084 }, - { 81911335 , 208529 }, - { 82672880 , 208974 }, - { 83439562 , 209419 }, - { 84211402 , 209864 }, - { 84988421 , 210309 }, - { 85770640 , 210755 }, - { 86558080 , 211201 }, - { 87350762 , 211647 }, - { 88148708 , 212093 }, - { 88951938 , 212539 }, - { 89760475 , 212985 }, - { 90574339 , 213432 }, - { 91393551 , 213878 }, - { 92218133 , 214325 }, - { 93048107 , 214772 }, - { 93883493 , 215219 }, - { 94724314 , 215666 }, - { 95570590 , 216114 }, - { 96422343 , 216561 }, - { 97279594 , 217009 }, - { 98142366 , 217457 }, - { 99010679 , 217905 }, - { 99884556 , 218353 }, - { 100764018 , 218801 }, - { 101649086 , 219250 }, - { 102539782 , 219698 }, - { 103436128 , 220147 }, - { 104338146 , 220596 }, - { 105245857 , 221046 }, - { 106159284 , 221495 }, - { 107078448 , 221945 }, - { 108003370 , 222394 }, - { 108934074 , 222844 }, - { 109870580 , 223294 }, - { 110812910 , 223745 }, - { 111761087 , 224195 }, - { 112715133 , 224646 }, - { 113675069 , 225097 }, - { 114640918 , 225548 }, - { 115612702 , 225999 }, - { 116590442 , 226450 }, - { 117574162 , 226902 }, - { 118563882 , 227353 }, - { 119559626 , 227805 }, - { 120561415 , 228258 }, - { 121569272 , 228710 }, - { 122583219 , 229162 }, - { 123603278 , 229615 }, - { 124629471 , 230068 }, - { 125661822 , 230521 }, - { 126700352 , 230974 }, - { 127745083 , 231428 }, - { 128796039 , 231882 }, - { 129853241 , 232336 }, - { 130916713 , 232790 }, - { 131986475 , 233244 }, - { 133062553 , 233699 }, - { 134144966 , 234153 }, - { 135233739 , 234608 }, - { 136328894 , 235064 }, - { 137430453 , 235519 }, - { 138538440 , 235975 }, - { 139652876 , 236430 }, - { 140773786 , 236886 }, - { 141901190 , 237343 }, - { 143035113 , 237799 }, - { 144175576 , 238256 }, - { 145322604 , 238713 }, - { 146476218 , 239170 }, - { 147636442 , 239627 }, - { 148803298 , 240085 }, - { 149976809 , 240542 }, - { 151156999 , 241000 }, - { 152343890 , 241459 }, - { 153537506 , 241917 }, - { 154737869 , 242376 }, - { 155945002 , 242835 }, - { 157158929 , 243294 }, - { 158379673 , 243753 }, - { 159607257 , 244213 }, - { 160841704 , 244673 }, - { 162083037 , 245133 }, - { 163331279 , 245593 }, - { 164586455 , 246054 }, - { 165848586 , 246514 }, - { 167117696 , 246975 }, - { 168393810 , 247437 }, - { 169676949 , 247898 }, - { 170967138 , 248360 }, - { 172264399 , 248822 }, - { 173568757 , 249284 }, - { 174880235 , 249747 }, - { 176198856 , 250209 }, - { 177524643 , 250672 }, - { 178857621 , 251136 }, - { 180197813 , 251599 }, - { 181545242 , 252063 }, - { 182899933 , 252527 }, - { 184261908 , 252991 }, - { 185631191 , 253456 }, - { 187007807 , 253920 }, - { 188391778 , 254385 }, - { 189783129 , 254851 }, - { 191181884 , 255316 }, - { 192588065 , 255782 }, - { 194001698 , 256248 }, - { 195422805 , 256714 }, - { 196851411 , 257181 }, - { 198287540 , 257648 }, - { 199731215 , 258115 }, - { 201182461 , 258582 }, - { 202641302 , 259050 }, - { 204107760 , 259518 }, - { 205581862 , 259986 }, - { 207063630 , 260454 }, - { 208553088 , 260923 }, - { 210050262 , 261392 }, - { 211555174 , 261861 }, - { 213067849 , 262331 }, - { 214588312 , 262800 }, - { 216116586 , 263270 }, - { 217652696 , 263741 }, - { 219196666 , 264211 }, - { 220748520 , 264682 }, - { 222308282 , 265153 }, - { 223875978 , 265625 }, - { 225451630 , 266097 }, - { 227035265 , 266569 }, - { 228626905 , 267041 }, - { 230226576 , 267514 }, - { 231834302 , 267986 }, - { 233450107 , 268460 }, - { 235074016 , 268933 }, - { 236706054 , 269407 }, - { 238346244 , 269881 }, - { 239994613 , 270355 }, - { 241651183 , 270830 }, - { 243315981 , 271305 } -}; - -/* Calculate the send rate as per section 3.1 of RFC3448 - -Returns send rate in bytes per second - -Integer maths and lookups are used as not allowed floating point in kernel - -The function for Xcalc as per section 3.1 of RFC3448 is: - -X = s - ------------------------------------------------------------- - R*sqrt(2*b*p/3) + (t_RTO * (3*sqrt(3*b*p/8) * p * (1+32*p^2))) - -where -X is the trasmit rate in bytes/second -s is the packet size in bytes -R is the round trip time in seconds -p is the loss event rate, between 0 and 1.0, of the number of loss events - as a fraction of the number of packets transmitted -t_RTO is the TCP retransmission timeout value in seconds -b is the number of packets acknowledged by a single TCP acknowledgement - -we can assume that b = 1 and t_RTO is 4 * R. With this the equation becomes: - -X = s - ----------------------------------------------------------------------- - R * sqrt(2 * p / 3) + (12 * R * (sqrt(3 * p / 8) * p * (1 + 32 * p^2))) - - -which we can break down into: - -X = s - -------- - R * f(p) - -where f(p) = sqrt(2 * p / 3) + (12 * sqrt(3 * p / 8) * p * (1 + 32 * p * p)) - -Function parameters: -s - bytes -R - RTT in usecs -p - loss rate (decimal fraction multiplied by 1,000,000) - -Returns Xcalc in bytes per second - -DON'T alter this code unless you run test cases against it as the code -has been manipulated to stop underflow/overlow. - -*/ -static u32 ccid3_calc_x(u16 s, u32 R, u32 p) -{ - int index; - u32 f; - u64 tmp1, tmp2; - - if (p < CALCX_SPLIT) - index = (p / (CALCX_SPLIT / CALCX_ARRSIZE)) - 1; - else - index = (p / (1000000 / CALCX_ARRSIZE)) - 1; - - if (index < 0) - /* p should be 0 unless there is a bug in my code */ - index = 0; - - if (R == 0) - R = 1; /* RTT can't be zero or else divide by zero */ - - BUG_ON(index >= CALCX_ARRSIZE); - - if (p >= CALCX_SPLIT) - f = calcx_lookup[index][0]; - else - f = calcx_lookup[index][1]; - - tmp1 = ((u64)s * 100000000); - tmp2 = ((u64)R * (u64)f); - do_div(tmp2,10000); - do_div(tmp1,tmp2); - /* don't alter above math unless you test due to overflow on 32 bit */ - - return (u32)tmp1; -} - /* Calculate new t_ipi (inter packet interval) by t_ipi = s / X_inst */ static inline void ccid3_calc_new_t_ipi(struct ccid3_hc_tx_sock *hctx) { @@ -737,9 +149,9 @@ static void ccid3_hc_tx_update_x(struct sock *sk) /* To avoid large error in calcX */ if (hctx->ccid3hctx_p >= TFRC_SMALLEST_P) { - hctx->ccid3hctx_x_calc = ccid3_calc_x(hctx->ccid3hctx_s, - hctx->ccid3hctx_rtt, - hctx->ccid3hctx_p); + hctx->ccid3hctx_x_calc = tfrc_calc_x(hctx->ccid3hctx_s, + hctx->ccid3hctx_rtt, + hctx->ccid3hctx_p); hctx->ccid3hctx_x = max_t(u32, min_t(u32, hctx->ccid3hctx_x_calc, 2 * hctx->ccid3hctx_x_recv), (hctx->ccid3hctx_s / @@ -1533,32 +945,6 @@ static void ccid3_hc_rx_insert_options(struct sock *sk, struct sk_buff *skb) &x_recv, sizeof(x_recv)); } -/* - * args: fvalue - function value to match - * returns: p closest to that value - * - * both fvalue and p are multiplied by 1,000,000 to use ints - */ -static u32 calcx_reverse_lookup(u32 fvalue) { - int ctr = 0; - int small; - - if (fvalue < calcx_lookup[0][1]) - return 0; - if (fvalue <= calcx_lookup[CALCX_ARRSIZE-1][1]) - small = 1; - else if (fvalue > calcx_lookup[CALCX_ARRSIZE-1][0]) - return 1000000; - else - small = 0; - while (fvalue > calcx_lookup[ctr][small]) - ctr++; - if (small) - return (CALCX_SPLIT * ctr / CALCX_ARRSIZE); - else - return (1000000 * ctr / CALCX_ARRSIZE) ; -} - /* calculate first loss interval * * returns estimated loss interval in usecs */ @@ -1627,7 +1013,7 @@ found: tmp2 = (u32)tmp1; fval = (hcrx->ccid3hcrx_s * 100000) / tmp2; /* do not alter order above or you will get overflow on 32 bit */ - p = calcx_reverse_lookup(fval); + p = tfrc_calc_x_reverse_lookup(fval); ccid3_pr_debug("%s, sk=%p, receive rate=%u bytes/s, implied " "loss rate=%u\n", dccp_role(sk), sk, x_recv, p); diff --git a/net/dccp/ccids/lib/Makefile b/net/dccp/ccids/lib/Makefile index e9a91e238c8..5f940a6cbac 100644 --- a/net/dccp/ccids/lib/Makefile +++ b/net/dccp/ccids/lib/Makefile @@ -1,3 +1,3 @@ obj-$(CONFIG_IP_DCCP_TFRC_LIB) += dccp_tfrc_lib.o -dccp_tfrc_lib-y := loss_interval.o packet_history.o +dccp_tfrc_lib-y := loss_interval.o packet_history.o tfrc_equation.o diff --git a/net/dccp/ccids/lib/tfrc.h b/net/dccp/ccids/lib/tfrc.h new file mode 100644 index 00000000000..130c4c40cfe --- /dev/null +++ b/net/dccp/ccids/lib/tfrc.h @@ -0,0 +1,22 @@ +#ifndef _TFRC_H_ +#define _TFRC_H_ +/* + * net/dccp/ccids/lib/tfrc.h + * + * Copyright (c) 2005 The University of Waikato, Hamilton, New Zealand. + * Copyright (c) 2005 Ian McDonald + * Copyright (c) 2005 Arnaldo Carvalho de Melo + * Copyright (c) 2003 Nils-Erik Mattsson, Joacim Haggmark, Magnus Erixzon + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#include + +extern u32 tfrc_calc_x(u16 s, u32 R, u32 p); +extern u32 tfrc_calc_x_reverse_lookup(u32 fvalue); + +#endif /* _TFRC_H_ */ diff --git a/net/dccp/ccids/lib/tfrc_equation.c b/net/dccp/ccids/lib/tfrc_equation.c new file mode 100644 index 00000000000..d2b5933b451 --- /dev/null +++ b/net/dccp/ccids/lib/tfrc_equation.c @@ -0,0 +1,644 @@ +/* + * net/dccp/ccids/lib/tfrc_equation.c + * + * Copyright (c) 2005 The University of Waikato, Hamilton, New Zealand. + * Copyright (c) 2005 Ian McDonald + * Copyright (c) 2005 Arnaldo Carvalho de Melo + * Copyright (c) 2003 Nils-Erik Mattsson, Joacim Haggmark, Magnus Erixzon + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#include +#include + +#include +#include + +#include "tfrc.h" + +#define TFRC_CALC_X_ARRSIZE 500 + +#define TFRC_CALC_X_SPLIT 50000 +/* equivalent to 0.05 */ + +static const u32 tfrc_calc_x_lookup[TFRC_CALC_X_ARRSIZE][2] = { + { 37172, 8172 }, + { 53499, 11567 }, + { 66664, 14180 }, + { 78298, 16388 }, + { 89021, 18339 }, + { 99147, 20108 }, + { 108858, 21738 }, + { 118273, 23260 }, + { 127474, 24693 }, + { 136520, 26052 }, + { 145456, 27348 }, + { 154316, 28589 }, + { 163130, 29783 }, + { 171919, 30935 }, + { 180704, 32049 }, + { 189502, 33130 }, + { 198328, 34180 }, + { 207194, 35202 }, + { 216114, 36198 }, + { 225097, 37172 }, + { 234153, 38123 }, + { 243294, 39055 }, + { 252527, 39968 }, + { 261861, 40864 }, + { 271305, 41743 }, + { 280866, 42607 }, + { 290553, 43457 }, + { 300372, 44293 }, + { 310333, 45117 }, + { 320441, 45929 }, + { 330705, 46729 }, + { 341131, 47518 }, + { 351728, 48297 }, + { 362501, 49066 }, + { 373460, 49826 }, + { 384609, 50577 }, + { 395958, 51320 }, + { 407513, 52054 }, + { 419281, 52780 }, + { 431270, 53499 }, + { 443487, 54211 }, + { 455940, 54916 }, + { 468635, 55614 }, + { 481581, 56306 }, + { 494785, 56991 }, + { 508254, 57671 }, + { 521996, 58345 }, + { 536019, 59014 }, + { 550331, 59677 }, + { 564939, 60335 }, + { 579851, 60988 }, + { 595075, 61636 }, + { 610619, 62279 }, + { 626491, 62918 }, + { 642700, 63553 }, + { 659253, 64183 }, + { 676158, 64809 }, + { 693424, 65431 }, + { 711060, 66050 }, + { 729073, 66664 }, + { 747472, 67275 }, + { 766266, 67882 }, + { 785464, 68486 }, + { 805073, 69087 }, + { 825103, 69684 }, + { 845562, 70278 }, + { 866460, 70868 }, + { 887805, 71456 }, + { 909606, 72041 }, + { 931873, 72623 }, + { 954614, 73202 }, + { 977839, 73778 }, + { 1001557, 74352 }, + { 1025777, 74923 }, + { 1050508, 75492 }, + { 1075761, 76058 }, + { 1101544, 76621 }, + { 1127867, 77183 }, + { 1154739, 77741 }, + { 1182172, 78298 }, + { 1210173, 78852 }, + { 1238753, 79405 }, + { 1267922, 79955 }, + { 1297689, 80503 }, + { 1328066, 81049 }, + { 1359060, 81593 }, + { 1390684, 82135 }, + { 1422947, 82675 }, + { 1455859, 83213 }, + { 1489430, 83750 }, + { 1523671, 84284 }, + { 1558593, 84817 }, + { 1594205, 85348 }, + { 1630518, 85878 }, + { 1667543, 86406 }, + { 1705290, 86932 }, + { 1743770, 87457 }, + { 1782994, 87980 }, + { 1822973, 88501 }, + { 1863717, 89021 }, + { 1905237, 89540 }, + { 1947545, 90057 }, + { 1990650, 90573 }, + { 2034566, 91087 }, + { 2079301, 91600 }, + { 2124869, 92111 }, + { 2171279, 92622 }, + { 2218543, 93131 }, + { 2266673, 93639 }, + { 2315680, 94145 }, + { 2365575, 94650 }, + { 2416371, 95154 }, + { 2468077, 95657 }, + { 2520707, 96159 }, + { 2574271, 96660 }, + { 2628782, 97159 }, + { 2684250, 97658 }, + { 2740689, 98155 }, + { 2798110, 98651 }, + { 2856524, 99147 }, + { 2915944, 99641 }, + { 2976382, 100134 }, + { 3037850, 100626 }, + { 3100360, 101117 }, + { 3163924, 101608 }, + { 3228554, 102097 }, + { 3294263, 102586 }, + { 3361063, 103073 }, + { 3428966, 103560 }, + { 3497984, 104045 }, + { 3568131, 104530 }, + { 3639419, 105014 }, + { 3711860, 105498 }, + { 3785467, 105980 }, + { 3860253, 106462 }, + { 3936229, 106942 }, + { 4013410, 107422 }, + { 4091808, 107902 }, + { 4171435, 108380 }, + { 4252306, 108858 }, + { 4334431, 109335 }, + { 4417825, 109811 }, + { 4502501, 110287 }, + { 4588472, 110762 }, + { 4675750, 111236 }, + { 4764349, 111709 }, + { 4854283, 112182 }, + { 4945564, 112654 }, + { 5038206, 113126 }, + { 5132223, 113597 }, + { 5227627, 114067 }, + { 5324432, 114537 }, + { 5422652, 115006 }, + { 5522299, 115474 }, + { 5623389, 115942 }, + { 5725934, 116409 }, + { 5829948, 116876 }, + { 5935446, 117342 }, + { 6042439, 117808 }, + { 6150943, 118273 }, + { 6260972, 118738 }, + { 6372538, 119202 }, + { 6485657, 119665 }, + { 6600342, 120128 }, + { 6716607, 120591 }, + { 6834467, 121053 }, + { 6953935, 121514 }, + { 7075025, 121976 }, + { 7197752, 122436 }, + { 7322131, 122896 }, + { 7448175, 123356 }, + { 7575898, 123815 }, + { 7705316, 124274 }, + { 7836442, 124733 }, + { 7969291, 125191 }, + { 8103877, 125648 }, + { 8240216, 126105 }, + { 8378321, 126562 }, + { 8518208, 127018 }, + { 8659890, 127474 }, + { 8803384, 127930 }, + { 8948702, 128385 }, + { 9095861, 128840 }, + { 9244875, 129294 }, + { 9395760, 129748 }, + { 9548529, 130202 }, + { 9703198, 130655 }, + { 9859782, 131108 }, + { 10018296, 131561 }, + { 10178755, 132014 }, + { 10341174, 132466 }, + { 10505569, 132917 }, + { 10671954, 133369 }, + { 10840345, 133820 }, + { 11010757, 134271 }, + { 11183206, 134721 }, + { 11357706, 135171 }, + { 11534274, 135621 }, + { 11712924, 136071 }, + { 11893673, 136520 }, + { 12076536, 136969 }, + { 12261527, 137418 }, + { 12448664, 137867 }, + { 12637961, 138315 }, + { 12829435, 138763 }, + { 13023101, 139211 }, + { 13218974, 139658 }, + { 13417071, 140106 }, + { 13617407, 140553 }, + { 13819999, 140999 }, + { 14024862, 141446 }, + { 14232012, 141892 }, + { 14441465, 142339 }, + { 14653238, 142785 }, + { 14867346, 143230 }, + { 15083805, 143676 }, + { 15302632, 144121 }, + { 15523842, 144566 }, + { 15747453, 145011 }, + { 15973479, 145456 }, + { 16201939, 145900 }, + { 16432847, 146345 }, + { 16666221, 146789 }, + { 16902076, 147233 }, + { 17140429, 147677 }, + { 17381297, 148121 }, + { 17624696, 148564 }, + { 17870643, 149007 }, + { 18119154, 149451 }, + { 18370247, 149894 }, + { 18623936, 150336 }, + { 18880241, 150779 }, + { 19139176, 151222 }, + { 19400759, 151664 }, + { 19665007, 152107 }, + { 19931936, 152549 }, + { 20201564, 152991 }, + { 20473907, 153433 }, + { 20748982, 153875 }, + { 21026807, 154316 }, + { 21307399, 154758 }, + { 21590773, 155199 }, + { 21876949, 155641 }, + { 22165941, 156082 }, + { 22457769, 156523 }, + { 22752449, 156964 }, + { 23049999, 157405 }, + { 23350435, 157846 }, + { 23653774, 158287 }, + { 23960036, 158727 }, + { 24269236, 159168 }, + { 24581392, 159608 }, + { 24896521, 160049 }, + { 25214642, 160489 }, + { 25535772, 160929 }, + { 25859927, 161370 }, + { 26187127, 161810 }, + { 26517388, 162250 }, + { 26850728, 162690 }, + { 27187165, 163130 }, + { 27526716, 163569 }, + { 27869400, 164009 }, + { 28215234, 164449 }, + { 28564236, 164889 }, + { 28916423, 165328 }, + { 29271815, 165768 }, + { 29630428, 166208 }, + { 29992281, 166647 }, + { 30357392, 167087 }, + { 30725779, 167526 }, + { 31097459, 167965 }, + { 31472452, 168405 }, + { 31850774, 168844 }, + { 32232445, 169283 }, + { 32617482, 169723 }, + { 33005904, 170162 }, + { 33397730, 170601 }, + { 33792976, 171041 }, + { 34191663, 171480 }, + { 34593807, 171919 }, + { 34999428, 172358 }, + { 35408544, 172797 }, + { 35821174, 173237 }, + { 36237335, 173676 }, + { 36657047, 174115 }, + { 37080329, 174554 }, + { 37507197, 174993 }, + { 37937673, 175433 }, + { 38371773, 175872 }, + { 38809517, 176311 }, + { 39250924, 176750 }, + { 39696012, 177190 }, + { 40144800, 177629 }, + { 40597308, 178068 }, + { 41053553, 178507 }, + { 41513554, 178947 }, + { 41977332, 179386 }, + { 42444904, 179825 }, + { 42916290, 180265 }, + { 43391509, 180704 }, + { 43870579, 181144 }, + { 44353520, 181583 }, + { 44840352, 182023 }, + { 45331092, 182462 }, + { 45825761, 182902 }, + { 46324378, 183342 }, + { 46826961, 183781 }, + { 47333531, 184221 }, + { 47844106, 184661 }, + { 48358706, 185101 }, + { 48877350, 185541 }, + { 49400058, 185981 }, + { 49926849, 186421 }, + { 50457743, 186861 }, + { 50992759, 187301 }, + { 51531916, 187741 }, + { 52075235, 188181 }, + { 52622735, 188622 }, + { 53174435, 189062 }, + { 53730355, 189502 }, + { 54290515, 189943 }, + { 54854935, 190383 }, + { 55423634, 190824 }, + { 55996633, 191265 }, + { 56573950, 191706 }, + { 57155606, 192146 }, + { 57741621, 192587 }, + { 58332014, 193028 }, + { 58926806, 193470 }, + { 59526017, 193911 }, + { 60129666, 194352 }, + { 60737774, 194793 }, + { 61350361, 195235 }, + { 61967446, 195677 }, + { 62589050, 196118 }, + { 63215194, 196560 }, + { 63845897, 197002 }, + { 64481179, 197444 }, + { 65121061, 197886 }, + { 65765563, 198328 }, + { 66414705, 198770 }, + { 67068508, 199213 }, + { 67726992, 199655 }, + { 68390177, 200098 }, + { 69058085, 200540 }, + { 69730735, 200983 }, + { 70408147, 201426 }, + { 71090343, 201869 }, + { 71777343, 202312 }, + { 72469168, 202755 }, + { 73165837, 203199 }, + { 73867373, 203642 }, + { 74573795, 204086 }, + { 75285124, 204529 }, + { 76001380, 204973 }, + { 76722586, 205417 }, + { 77448761, 205861 }, + { 78179926, 206306 }, + { 78916102, 206750 }, + { 79657310, 207194 }, + { 80403571, 207639 }, + { 81154906, 208084 }, + { 81911335, 208529 }, + { 82672880, 208974 }, + { 83439562, 209419 }, + { 84211402, 209864 }, + { 84988421, 210309 }, + { 85770640, 210755 }, + { 86558080, 211201 }, + { 87350762, 211647 }, + { 88148708, 212093 }, + { 88951938, 212539 }, + { 89760475, 212985 }, + { 90574339, 213432 }, + { 91393551, 213878 }, + { 92218133, 214325 }, + { 93048107, 214772 }, + { 93883493, 215219 }, + { 94724314, 215666 }, + { 95570590, 216114 }, + { 96422343, 216561 }, + { 97279594, 217009 }, + { 98142366, 217457 }, + { 99010679, 217905 }, + { 99884556, 218353 }, + { 100764018, 218801 }, + { 101649086, 219250 }, + { 102539782, 219698 }, + { 103436128, 220147 }, + { 104338146, 220596 }, + { 105245857, 221046 }, + { 106159284, 221495 }, + { 107078448, 221945 }, + { 108003370, 222394 }, + { 108934074, 222844 }, + { 109870580, 223294 }, + { 110812910, 223745 }, + { 111761087, 224195 }, + { 112715133, 224646 }, + { 113675069, 225097 }, + { 114640918, 225548 }, + { 115612702, 225999 }, + { 116590442, 226450 }, + { 117574162, 226902 }, + { 118563882, 227353 }, + { 119559626, 227805 }, + { 120561415, 228258 }, + { 121569272, 228710 }, + { 122583219, 229162 }, + { 123603278, 229615 }, + { 124629471, 230068 }, + { 125661822, 230521 }, + { 126700352, 230974 }, + { 127745083, 231428 }, + { 128796039, 231882 }, + { 129853241, 232336 }, + { 130916713, 232790 }, + { 131986475, 233244 }, + { 133062553, 233699 }, + { 134144966, 234153 }, + { 135233739, 234608 }, + { 136328894, 235064 }, + { 137430453, 235519 }, + { 138538440, 235975 }, + { 139652876, 236430 }, + { 140773786, 236886 }, + { 141901190, 237343 }, + { 143035113, 237799 }, + { 144175576, 238256 }, + { 145322604, 238713 }, + { 146476218, 239170 }, + { 147636442, 239627 }, + { 148803298, 240085 }, + { 149976809, 240542 }, + { 151156999, 241000 }, + { 152343890, 241459 }, + { 153537506, 241917 }, + { 154737869, 242376 }, + { 155945002, 242835 }, + { 157158929, 243294 }, + { 158379673, 243753 }, + { 159607257, 244213 }, + { 160841704, 244673 }, + { 162083037, 245133 }, + { 163331279, 245593 }, + { 164586455, 246054 }, + { 165848586, 246514 }, + { 167117696, 246975 }, + { 168393810, 247437 }, + { 169676949, 247898 }, + { 170967138, 248360 }, + { 172264399, 248822 }, + { 173568757, 249284 }, + { 174880235, 249747 }, + { 176198856, 250209 }, + { 177524643, 250672 }, + { 178857621, 251136 }, + { 180197813, 251599 }, + { 181545242, 252063 }, + { 182899933, 252527 }, + { 184261908, 252991 }, + { 185631191, 253456 }, + { 187007807, 253920 }, + { 188391778, 254385 }, + { 189783129, 254851 }, + { 191181884, 255316 }, + { 192588065, 255782 }, + { 194001698, 256248 }, + { 195422805, 256714 }, + { 196851411, 257181 }, + { 198287540, 257648 }, + { 199731215, 258115 }, + { 201182461, 258582 }, + { 202641302, 259050 }, + { 204107760, 259518 }, + { 205581862, 259986 }, + { 207063630, 260454 }, + { 208553088, 260923 }, + { 210050262, 261392 }, + { 211555174, 261861 }, + { 213067849, 262331 }, + { 214588312, 262800 }, + { 216116586, 263270 }, + { 217652696, 263741 }, + { 219196666, 264211 }, + { 220748520, 264682 }, + { 222308282, 265153 }, + { 223875978, 265625 }, + { 225451630, 266097 }, + { 227035265, 266569 }, + { 228626905, 267041 }, + { 230226576, 267514 }, + { 231834302, 267986 }, + { 233450107, 268460 }, + { 235074016, 268933 }, + { 236706054, 269407 }, + { 238346244, 269881 }, + { 239994613, 270355 }, + { 241651183, 270830 }, + { 243315981, 271305 } +}; + +/* Calculate the send rate as per section 3.1 of RFC3448 + +Returns send rate in bytes per second + +Integer maths and lookups are used as not allowed floating point in kernel + +The function for Xcalc as per section 3.1 of RFC3448 is: + +X = s + ------------------------------------------------------------- + R*sqrt(2*b*p/3) + (t_RTO * (3*sqrt(3*b*p/8) * p * (1+32*p^2))) + +where +X is the trasmit rate in bytes/second +s is the packet size in bytes +R is the round trip time in seconds +p is the loss event rate, between 0 and 1.0, of the number of loss events + as a fraction of the number of packets transmitted +t_RTO is the TCP retransmission timeout value in seconds +b is the number of packets acknowledged by a single TCP acknowledgement + +we can assume that b = 1 and t_RTO is 4 * R. With this the equation becomes: + +X = s + ----------------------------------------------------------------------- + R * sqrt(2 * p / 3) + (12 * R * (sqrt(3 * p / 8) * p * (1 + 32 * p^2))) + + +which we can break down into: + +X = s + -------- + R * f(p) + +where f(p) = sqrt(2 * p / 3) + (12 * sqrt(3 * p / 8) * p * (1 + 32 * p * p)) + +Function parameters: +s - bytes +R - RTT in usecs +p - loss rate (decimal fraction multiplied by 1,000,000) + +Returns Xcalc in bytes per second + +DON'T alter this code unless you run test cases against it as the code +has been manipulated to stop underflow/overlow. + +*/ +u32 tfrc_calc_x(u16 s, u32 R, u32 p) +{ + int index; + u32 f; + u64 tmp1, tmp2; + + if (p < TFRC_CALC_X_SPLIT) + index = (p / (TFRC_CALC_X_SPLIT / TFRC_CALC_X_ARRSIZE)) - 1; + else + index = (p / (1000000 / TFRC_CALC_X_ARRSIZE)) - 1; + + if (index < 0) + /* p should be 0 unless there is a bug in my code */ + index = 0; + + if (R == 0) + R = 1; /* RTT can't be zero or else divide by zero */ + + BUG_ON(index >= TFRC_CALC_X_ARRSIZE); + + if (p >= TFRC_CALC_X_SPLIT) + f = tfrc_calc_x_lookup[index][0]; + else + f = tfrc_calc_x_lookup[index][1]; + + tmp1 = ((u64)s * 100000000); + tmp2 = ((u64)R * (u64)f); + do_div(tmp2, 10000); + do_div(tmp1, tmp2); + /* Don't alter above math unless you test due to overflow on 32 bit */ + + return (u32)tmp1; +} + +EXPORT_SYMBOL_GPL(tfrc_calc_x); + +/* + * args: fvalue - function value to match + * returns: p closest to that value + * + * both fvalue and p are multiplied by 1,000,000 to use ints + */ +u32 tfrc_calc_x_reverse_lookup(u32 fvalue) +{ + int ctr = 0; + int small; + + if (fvalue < tfrc_calc_x_lookup[0][1]) + return 0; + + if (fvalue <= tfrc_calc_x_lookup[TFRC_CALC_X_ARRSIZE - 1][1]) + small = 1; + else if (fvalue > tfrc_calc_x_lookup[TFRC_CALC_X_ARRSIZE - 1][0]) + return 1000000; + else + small = 0; + + while (fvalue > tfrc_calc_x_lookup[ctr][small]) + ctr++; + + if (small) + return TFRC_CALC_X_SPLIT * ctr / TFRC_CALC_X_ARRSIZE; + else + return 1000000 * ctr / TFRC_CALC_X_ARRSIZE; +} + +EXPORT_SYMBOL_GPL(tfrc_calc_x_reverse_lookup); -- cgit v1.2.3 From 072ab6c68e3dd158b68d97eaff16734474d2f8f8 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Sun, 28 Aug 2005 01:19:14 -0300 Subject: [CCID3]: Move ccid3_hc_rx_add_hist to packet_history.c Renaming it to dccp_rx_hist_add_packet. Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- net/dccp/ccids/ccid3.c | 122 +----------------------------------- net/dccp/ccids/ccid3.h | 6 -- net/dccp/ccids/lib/packet_history.c | 111 ++++++++++++++++++++++++++++++++ net/dccp/ccids/lib/packet_history.h | 11 ++++ 4 files changed, 124 insertions(+), 126 deletions(-) (limited to 'net/dccp') diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c index a215c46d6f1..849f5580efb 100644 --- a/net/dccp/ccids/ccid3.c +++ b/net/dccp/ccids/ccid3.c @@ -744,125 +744,6 @@ static inline void ccid3_hc_rx_set_state(struct sock *sk, hcrx->ccid3hcrx_state = state; } -static int ccid3_hc_rx_add_hist(struct sock *sk, - struct dccp_rx_hist_entry *packet) -{ - struct dccp_sock *dp = dccp_sk(sk); - struct ccid3_hc_rx_sock *hcrx = dp->dccps_hc_rx_ccid_private; - struct dccp_rx_hist_entry *entry, *next, *iter; - u8 num_later = 0; - - iter = dccp_rx_hist_head(&hcrx->ccid3hcrx_hist); - if (iter == NULL) - dccp_rx_hist_add_entry(&hcrx->ccid3hcrx_hist, packet); - else { - const u64 seqno = packet->dccphrx_seqno; - - if (after48(seqno, iter->dccphrx_seqno)) - dccp_rx_hist_add_entry(&hcrx->ccid3hcrx_hist, packet); - else { - if (dccp_rx_hist_entry_data_packet(iter)) - num_later = 1; - - list_for_each_entry_continue(iter, - &hcrx->ccid3hcrx_hist, - dccphrx_node) { - if (after48(seqno, iter->dccphrx_seqno)) { - dccp_rx_hist_add_entry(&iter->dccphrx_node, - packet); - goto trim_history; - } - - if (dccp_rx_hist_entry_data_packet(iter)) - num_later++; - - if (num_later == TFRC_RECV_NUM_LATE_LOSS) { - dccp_rx_hist_entry_delete(ccid3_rx_hist, - packet); - ccid3_pr_debug("%s, sk=%p, packet" - "(%llu) already lost!\n", - dccp_role(sk), sk, - seqno); - return 1; - } - } - - if (num_later < TFRC_RECV_NUM_LATE_LOSS) - dccp_rx_hist_add_entry(&hcrx->ccid3hcrx_hist, - packet); - /* - * FIXME: else what? should we destroy the packet - * like above? - */ - } - } - -trim_history: - /* - * Trim history (remove all packets after the NUM_LATE_LOSS + 1 - * data packets) - */ - num_later = TFRC_RECV_NUM_LATE_LOSS + 1; - - if (!list_empty(&hcrx->ccid3hcrx_li_hist)) { - list_for_each_entry_safe(entry, next, &hcrx->ccid3hcrx_hist, - dccphrx_node) { - if (num_later == 0) { - list_del_init(&entry->dccphrx_node); - dccp_rx_hist_entry_delete(ccid3_rx_hist, entry); - } else if (dccp_rx_hist_entry_data_packet(entry)) - --num_later; - } - } else { - int step = 0; - u8 win_count = 0; /* Not needed, but lets shut up gcc */ - int tmp; - /* - * We have no loss interval history so we need at least one - * rtt:s of data packets to approximate rtt. - */ - list_for_each_entry_safe(entry, next, &hcrx->ccid3hcrx_hist, - dccphrx_node) { - if (num_later == 0) { - switch (step) { - case 0: - step = 1; - /* OK, find next data packet */ - num_later = 1; - break; - case 1: - step = 2; - /* OK, find next data packet */ - num_later = 1; - win_count = entry->dccphrx_ccval; - break; - case 2: - tmp = win_count - entry->dccphrx_ccval; - if (tmp < 0) - tmp += TFRC_WIN_COUNT_LIMIT; - if (tmp > TFRC_WIN_COUNT_PER_RTT + 1) { - /* - * We have found a packet older - * than one rtt remove the rest - */ - step = 3; - } else /* OK, find next data packet */ - num_later = 1; - break; - case 3: - list_del_init(&entry->dccphrx_node); - dccp_rx_hist_entry_delete(ccid3_rx_hist, - entry); - break; - } - } else if (dccp_rx_hist_entry_data_packet(entry)) - --num_later; - } - } - - return 0; -} - static void ccid3_hc_rx_send_feedback(struct sock *sk) { struct dccp_sock *dp = dccp_sk(sk); @@ -1185,7 +1066,8 @@ static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb) win_count = packet->dccphrx_ccval; - ins = ccid3_hc_rx_add_hist(sk, packet); + ins = dccp_rx_hist_add_packet(ccid3_rx_hist, &hcrx->ccid3hcrx_hist, + &hcrx->ccid3hcrx_li_hist, packet); if (DCCP_SKB_CB(skb)->dccpd_type == DCCP_PKT_ACK) return; diff --git a/net/dccp/ccids/ccid3.h b/net/dccp/ccids/ccid3.h index f68d0b4e31e..ee8cbace663 100644 --- a/net/dccp/ccids/ccid3.h +++ b/net/dccp/ccids/ccid3.h @@ -51,17 +51,11 @@ /* In usecs - half the scheduling granularity as per RFC3448 4.6 */ #define TFRC_OPSYS_HALF_TIME_GRAN (USEC_PER_SEC / (2 * HZ)) -#define TFRC_WIN_COUNT_PER_RTT 4 -#define TFRC_WIN_COUNT_LIMIT 16 - /* In seconds */ #define TFRC_MAX_BACK_OFF_TIME 64 #define TFRC_SMALLEST_P 40 -/* Number of later packets received before one is considered lost */ -#define TFRC_RECV_NUM_LATE_LOSS 3 - enum ccid3_options { TFRC_OPT_LOSS_EVENT_RATE = 192, TFRC_OPT_LOSS_INTERVALS = 193, diff --git a/net/dccp/ccids/lib/packet_history.c b/net/dccp/ccids/lib/packet_history.c index f252a9555e3..e2576b45ac0 100644 --- a/net/dccp/ccids/lib/packet_history.c +++ b/net/dccp/ccids/lib/packet_history.c @@ -113,6 +113,117 @@ struct dccp_rx_hist_entry * EXPORT_SYMBOL_GPL(dccp_rx_hist_find_data_packet); +int dccp_rx_hist_add_packet(struct dccp_rx_hist *hist, + struct list_head *rx_list, + struct list_head *li_list, + struct dccp_rx_hist_entry *packet) +{ + struct dccp_rx_hist_entry *entry, *next, *iter; + u8 num_later = 0; + + iter = dccp_rx_hist_head(rx_list); + if (iter == NULL) + dccp_rx_hist_add_entry(rx_list, packet); + else { + const u64 seqno = packet->dccphrx_seqno; + + if (after48(seqno, iter->dccphrx_seqno)) + dccp_rx_hist_add_entry(rx_list, packet); + else { + if (dccp_rx_hist_entry_data_packet(iter)) + num_later = 1; + + list_for_each_entry_continue(iter, rx_list, + dccphrx_node) { + if (after48(seqno, iter->dccphrx_seqno)) { + dccp_rx_hist_add_entry(&iter->dccphrx_node, + packet); + goto trim_history; + } + + if (dccp_rx_hist_entry_data_packet(iter)) + num_later++; + + if (num_later == TFRC_RECV_NUM_LATE_LOSS) { + dccp_rx_hist_entry_delete(hist, packet); + return 1; + } + } + + if (num_later < TFRC_RECV_NUM_LATE_LOSS) + dccp_rx_hist_add_entry(rx_list, packet); + /* + * FIXME: else what? should we destroy the packet + * like above? + */ + } + } + +trim_history: + /* + * Trim history (remove all packets after the NUM_LATE_LOSS + 1 + * data packets) + */ + num_later = TFRC_RECV_NUM_LATE_LOSS + 1; + + if (!list_empty(li_list)) { + list_for_each_entry_safe(entry, next, rx_list, dccphrx_node) { + if (num_later == 0) { + list_del_init(&entry->dccphrx_node); + dccp_rx_hist_entry_delete(hist, entry); + } else if (dccp_rx_hist_entry_data_packet(entry)) + --num_later; + } + } else { + int step = 0; + u8 win_count = 0; /* Not needed, but lets shut up gcc */ + int tmp; + /* + * We have no loss interval history so we need at least one + * rtt:s of data packets to approximate rtt. + */ + list_for_each_entry_safe(entry, next, rx_list, dccphrx_node) { + if (num_later == 0) { + switch (step) { + case 0: + step = 1; + /* OK, find next data packet */ + num_later = 1; + break; + case 1: + step = 2; + /* OK, find next data packet */ + num_later = 1; + win_count = entry->dccphrx_ccval; + break; + case 2: + tmp = win_count - entry->dccphrx_ccval; + if (tmp < 0) + tmp += TFRC_WIN_COUNT_LIMIT; + if (tmp > TFRC_WIN_COUNT_PER_RTT + 1) { + /* + * We have found a packet older + * than one rtt remove the rest + */ + step = 3; + } else /* OK, find next data packet */ + num_later = 1; + break; + case 3: + list_del_init(&entry->dccphrx_node); + dccp_rx_hist_entry_delete(hist, entry); + break; + } + } else if (dccp_rx_hist_entry_data_packet(entry)) + --num_later; + } + } + + return 0; +} + +EXPORT_SYMBOL_GPL(dccp_rx_hist_add_packet); + struct dccp_tx_hist *dccp_tx_hist_new(const char *name) { struct dccp_tx_hist *hist = kmalloc(sizeof(*hist), GFP_ATOMIC); diff --git a/net/dccp/ccids/lib/packet_history.h b/net/dccp/ccids/lib/packet_history.h index 235828d822d..ebfcb8e2c67 100644 --- a/net/dccp/ccids/lib/packet_history.h +++ b/net/dccp/ccids/lib/packet_history.h @@ -44,6 +44,12 @@ #include "../../dccp.h" +/* Number of later packets received before one is considered lost */ +#define TFRC_RECV_NUM_LATE_LOSS 3 + +#define TFRC_WIN_COUNT_PER_RTT 4 +#define TFRC_WIN_COUNT_LIMIT 16 + struct dccp_tx_hist_entry { struct list_head dccphtx_node; u64 dccphtx_seqno:48, @@ -182,4 +188,9 @@ static inline int entry->dccphrx_type == DCCP_PKT_DATAACK; } +extern int dccp_rx_hist_add_packet(struct dccp_rx_hist *hist, + struct list_head *rx_list, + struct list_head *li_list, + struct dccp_rx_hist_entry *packet); + #endif /* _DCCP_PKT_HIST_ */ -- cgit v1.2.3 From 29e4f8b3c340c4b2a0c6dd197b985e03826afd13 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Sun, 28 Aug 2005 02:00:28 -0300 Subject: [CCID3]: Move ccid3_hc_rx_detect_loss to packet_history.c Renaming it to dccp_rx_hist_detect_loss. Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- net/dccp/ccids/ccid3.c | 83 ++----------------------------------- net/dccp/ccids/lib/packet_history.c | 82 ++++++++++++++++++++++++++++++++++++ net/dccp/ccids/lib/packet_history.h | 3 ++ 3 files changed, 89 insertions(+), 79 deletions(-) (limited to 'net/dccp') diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c index 849f5580efb..4ff6ede0f07 100644 --- a/net/dccp/ccids/ccid3.c +++ b/net/dccp/ccids/ccid3.c @@ -927,86 +927,11 @@ static void ccid3_hc_rx_detect_loss(struct sock *sk) { struct dccp_sock *dp = dccp_sk(sk); struct ccid3_hc_rx_sock *hcrx = dp->dccps_hc_rx_ccid_private; - struct dccp_rx_hist_entry *entry, *next, *packet; - struct dccp_rx_hist_entry *a_loss = NULL; - struct dccp_rx_hist_entry *b_loss = NULL; - u64 seq_loss = DCCP_MAX_SEQNO + 1; - u8 win_loss = 0; - u8 num_later = TFRC_RECV_NUM_LATE_LOSS; + u8 win_loss; + const u64 seq_loss = dccp_rx_hist_detect_loss(&hcrx->ccid3hcrx_hist, + &hcrx->ccid3hcrx_li_hist, + &win_loss); - list_for_each_entry_safe(entry, next, &hcrx->ccid3hcrx_hist, - dccphrx_node) { - if (num_later == 0) { - b_loss = entry; - break; - } else if (dccp_rx_hist_entry_data_packet(entry)) - --num_later; - } - - if (b_loss == NULL) - goto out_update_li; - - num_later = 1; - - list_for_each_entry_safe_continue(entry, next, &hcrx->ccid3hcrx_hist, - dccphrx_node) { - if (num_later == 0) { - a_loss = entry; - break; - } else if (dccp_rx_hist_entry_data_packet(entry)) - --num_later; - } - - if (a_loss == NULL) { - if (list_empty(&hcrx->ccid3hcrx_li_hist)) { - /* no loss event have occured yet */ - LIMIT_NETDEBUG("%s: TODO: find a lost data packet by " - "comparing to initial seqno\n", - dccp_role(sk)); - goto out_update_li; - } else { - pr_info("%s: %s, sk=%p, ERROR! Less than 4 data " - "packets in history", - __FUNCTION__, dccp_role(sk), sk); - return; - } - } - - /* Locate a lost data packet */ - entry = packet = b_loss; - list_for_each_entry_safe_continue(entry, next, &hcrx->ccid3hcrx_hist, - dccphrx_node) { - u64 delta = dccp_delta_seqno(entry->dccphrx_seqno, - packet->dccphrx_seqno); - - if (delta != 0) { - if (dccp_rx_hist_entry_data_packet(packet)) - --delta; - /* - * FIXME: check this, probably this % usage is because - * in earlier drafts the ndp count was just 8 bits - * long, but now it cam be up to 24 bits long. - */ -#if 0 - if (delta % DCCP_NDP_LIMIT != - (packet->dccphrx_ndp - - entry->dccphrx_ndp) % DCCP_NDP_LIMIT) -#endif - if (delta != - packet->dccphrx_ndp - entry->dccphrx_ndp) { - seq_loss = entry->dccphrx_seqno; - dccp_inc_seqno(&seq_loss); - } - } - packet = entry; - if (packet == a_loss) - break; - } - - if (seq_loss != DCCP_MAX_SEQNO + 1) - win_loss = a_loss->dccphrx_ccval; - -out_update_li: ccid3_hc_rx_update_li(sk, seq_loss, win_loss); } diff --git a/net/dccp/ccids/lib/packet_history.c b/net/dccp/ccids/lib/packet_history.c index e2576b45ac0..d3f9d205383 100644 --- a/net/dccp/ccids/lib/packet_history.c +++ b/net/dccp/ccids/lib/packet_history.c @@ -224,6 +224,88 @@ trim_history: EXPORT_SYMBOL_GPL(dccp_rx_hist_add_packet); +u64 dccp_rx_hist_detect_loss(struct list_head *rx_list, + struct list_head *li_list, u8 *win_loss) +{ + struct dccp_rx_hist_entry *entry, *next, *packet; + struct dccp_rx_hist_entry *a_loss = NULL; + struct dccp_rx_hist_entry *b_loss = NULL; + u64 seq_loss = DCCP_MAX_SEQNO + 1; + u8 num_later = TFRC_RECV_NUM_LATE_LOSS; + + list_for_each_entry_safe(entry, next, rx_list, dccphrx_node) { + if (num_later == 0) { + b_loss = entry; + break; + } else if (dccp_rx_hist_entry_data_packet(entry)) + --num_later; + } + + if (b_loss == NULL) + goto out; + + num_later = 1; + list_for_each_entry_safe_continue(entry, next, rx_list, dccphrx_node) { + if (num_later == 0) { + a_loss = entry; + break; + } else if (dccp_rx_hist_entry_data_packet(entry)) + --num_later; + } + + if (a_loss == NULL) { + if (list_empty(li_list)) { + /* no loss event have occured yet */ + LIMIT_NETDEBUG("%s: TODO: find a lost data packet by " + "comparing to initial seqno\n", + __FUNCTION__); + goto out; + } else { + LIMIT_NETDEBUG("%s: Less than 4 data pkts in history!", + __FUNCTION__); + goto out; + } + } + + /* Locate a lost data packet */ + entry = packet = b_loss; + list_for_each_entry_safe_continue(entry, next, rx_list, dccphrx_node) { + u64 delta = dccp_delta_seqno(entry->dccphrx_seqno, + packet->dccphrx_seqno); + + if (delta != 0) { + if (dccp_rx_hist_entry_data_packet(packet)) + --delta; + /* + * FIXME: check this, probably this % usage is because + * in earlier drafts the ndp count was just 8 bits + * long, but now it cam be up to 24 bits long. + */ +#if 0 + if (delta % DCCP_NDP_LIMIT != + (packet->dccphrx_ndp - + entry->dccphrx_ndp) % DCCP_NDP_LIMIT) +#endif + if (delta != packet->dccphrx_ndp - entry->dccphrx_ndp) { + seq_loss = entry->dccphrx_seqno; + dccp_inc_seqno(&seq_loss); + } + } + packet = entry; + if (packet == a_loss) + break; + } +out: + if (seq_loss != DCCP_MAX_SEQNO + 1) + *win_loss = a_loss->dccphrx_ccval; + else + *win_loss = 0; /* Paranoia */ + + return seq_loss; +} + +EXPORT_SYMBOL_GPL(dccp_rx_hist_detect_loss); + struct dccp_tx_hist *dccp_tx_hist_new(const char *name) { struct dccp_tx_hist *hist = kmalloc(sizeof(*hist), GFP_ATOMIC); diff --git a/net/dccp/ccids/lib/packet_history.h b/net/dccp/ccids/lib/packet_history.h index ebfcb8e2c67..fb90a91aa93 100644 --- a/net/dccp/ccids/lib/packet_history.h +++ b/net/dccp/ccids/lib/packet_history.h @@ -193,4 +193,7 @@ extern int dccp_rx_hist_add_packet(struct dccp_rx_hist *hist, struct list_head *li_list, struct dccp_rx_hist_entry *packet); +extern u64 dccp_rx_hist_detect_loss(struct list_head *rx_list, + struct list_head *li_list, u8 *win_loss); + #endif /* _DCCP_PKT_HIST_ */ -- cgit v1.2.3 From a84ffe430342db6ee585a5038f3242a6b4112d69 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Sun, 28 Aug 2005 04:51:32 -0300 Subject: [DCCP]: Introduce DCCP_SOCKOPT_PACKET_SIZE So that applications can set dccp_sock->dccps_pkt_size, that in turn is used in the CCID3 half connection init routines to set ccid3hc[tr]x_s and use it in its rate calculations. Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- net/dccp/ccids/ccid3.c | 12 ++++++------ net/dccp/proto.c | 52 ++++++++++++++++++++++++++++++++++++++++++++++---- 2 files changed, 54 insertions(+), 10 deletions(-) (limited to 'net/dccp') diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c index 4ff6ede0f07..e22b0eefdbf 100644 --- a/net/dccp/ccids/ccid3.c +++ b/net/dccp/ccids/ccid3.c @@ -672,9 +672,9 @@ static int ccid3_hc_tx_init(struct sock *sk) memset(hctx, 0, sizeof(*hctx)); - if (dp->dccps_avg_packet_size >= TFRC_MIN_PACKET_SIZE && - dp->dccps_avg_packet_size <= TFRC_MAX_PACKET_SIZE) - hctx->ccid3hctx_s = (u16)dp->dccps_avg_packet_size; + if (dp->dccps_packet_size >= TFRC_MIN_PACKET_SIZE && + dp->dccps_packet_size <= TFRC_MAX_PACKET_SIZE) + hctx->ccid3hctx_s = dp->dccps_packet_size; else hctx->ccid3hctx_s = TFRC_STD_PACKET_SIZE; @@ -1058,9 +1058,9 @@ static int ccid3_hc_rx_init(struct sock *sk) memset(hcrx, 0, sizeof(*hcrx)); - if (dp->dccps_avg_packet_size >= TFRC_MIN_PACKET_SIZE && - dp->dccps_avg_packet_size <= TFRC_MAX_PACKET_SIZE) - hcrx->ccid3hcrx_s = (u16)dp->dccps_avg_packet_size; + if (dp->dccps_packet_size >= TFRC_MIN_PACKET_SIZE && + dp->dccps_packet_size <= TFRC_MAX_PACKET_SIZE) + hcrx->ccid3hcrx_s = dp->dccps_packet_size; else hcrx->ccid3hcrx_s = TFRC_STD_PACKET_SIZE; diff --git a/net/dccp/proto.c b/net/dccp/proto.c index f4da6561e40..18a0e69c9dc 100644 --- a/net/dccp/proto.c +++ b/net/dccp/proto.c @@ -205,23 +205,67 @@ int dccp_ioctl(struct sock *sk, int cmd, unsigned long arg) int dccp_setsockopt(struct sock *sk, int level, int optname, char __user *optval, int optlen) { - dccp_pr_debug("entry\n"); + struct dccp_sock *dp; + int err; + int val; if (level != SOL_DCCP) return ip_setsockopt(sk, level, optname, optval, optlen); - return -EOPNOTSUPP; + if (optlen < sizeof(int)) + return -EINVAL; + + if (get_user(val, (int __user *)optval)) + return -EFAULT; + + lock_sock(sk); + + dp = dccp_sk(sk); + err = 0; + + switch (optname) { + case DCCP_SOCKOPT_PACKET_SIZE: + dp->dccps_packet_size = val; + break; + default: + err = -ENOPROTOOPT; + break; + } + + release_sock(sk); + return err; } int dccp_getsockopt(struct sock *sk, int level, int optname, char __user *optval, int __user *optlen) { - dccp_pr_debug("entry\n"); + struct dccp_sock *dp; + int val, len; if (level != SOL_DCCP) return ip_getsockopt(sk, level, optname, optval, optlen); - return -EOPNOTSUPP; + if (get_user(len, optlen)) + return -EFAULT; + + len = min_t(unsigned int, len, sizeof(int)); + if (len < 0) + return -EINVAL; + + dp = dccp_sk(sk); + + switch (optname) { + case DCCP_SOCKOPT_PACKET_SIZE: + val = dp->dccps_packet_size; + break; + default: + return -ENOPROTOOPT; + } + + if (put_user(len, optlen) || copy_to_user(optval, &val, len)) + return -EFAULT; + + return 0; } int dccp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, -- cgit v1.2.3 From c530cfb1ce1e8f230744c3f3bd86771f50725053 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 29 Aug 2005 02:15:54 -0300 Subject: [CCID3]: Call sk->sk_write_space(sk) when receiving a feedback packet This makes the send rate calculations behave way more closely to what is specified, with the jitter previously seen on x and x_recv disappearing completely on non lossy setups. This resembles the tcp_data_snd_check code, that possibly we'll end up using in DCCP as well, perhaps moving this code to inet_connection_sock. For now I'm doing the simplest implementation tho. Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- net/dccp/ccids/ccid3.c | 5 +++++ net/dccp/dccp.h | 1 + net/dccp/ipv4.c | 1 + net/dccp/output.c | 13 +++++++++++++ 4 files changed, 20 insertions(+) (limited to 'net/dccp') diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c index e22b0eefdbf..7bf3b3a91e9 100644 --- a/net/dccp/ccids/ccid3.c +++ b/net/dccp/ccids/ccid3.c @@ -556,6 +556,11 @@ static void ccid3_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) /* remove all packets older than the one acked from history */ dccp_tx_hist_purge_older(ccid3_tx_hist, &hctx->ccid3hctx_hist, packet); + /* + * As we have calculated new ipi, delta, t_nom it is possible that + * we now can send a packet, so wake up dccp_wait_for_ccids. + */ + sk->sk_write_space(sk); /* * Schedule no feedback timer to expire in diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h index 5cd9e794bbe..33456c0d593 100644 --- a/net/dccp/dccp.h +++ b/net/dccp/dccp.h @@ -127,6 +127,7 @@ extern void dccp_send_sync(struct sock *sk, const u64 seq, const enum dccp_pkt_type pkt_type); extern int dccp_write_xmit(struct sock *sk, struct sk_buff *skb, long *timeo); +extern void dccp_write_space(struct sock *sk); extern void dccp_init_xmit_timers(struct sock *sk); static inline void dccp_clear_xmit_timers(struct sock *sk) diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index 3cf2cbcdcaf..3fc75dbee4b 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -1280,6 +1280,7 @@ static int dccp_v4_init_sock(struct sock *sk) dccp_init_xmit_timers(sk); inet_csk(sk)->icsk_rto = DCCP_TIMEOUT_INIT; sk->sk_state = DCCP_CLOSED; + sk->sk_write_space = dccp_write_space; dp->dccps_mss_cache = 536; dp->dccps_role = DCCP_ROLE_UNDEFINED; diff --git a/net/dccp/output.c b/net/dccp/output.c index 116f6db5678..28de157a432 100644 --- a/net/dccp/output.c +++ b/net/dccp/output.c @@ -150,6 +150,19 @@ unsigned int dccp_sync_mss(struct sock *sk, u32 pmtu) return mss_now; } +void dccp_write_space(struct sock *sk) +{ + read_lock(&sk->sk_callback_lock); + + if (sk->sk_sleep && waitqueue_active(sk->sk_sleep)) + wake_up_interruptible(sk->sk_sleep); + /* Should agree with poll, otherwise some programs break */ + if (sock_writeable(sk)) + sk_wake_async(sk, 2, POLL_OUT); + + read_unlock(&sk->sk_callback_lock); +} + /** * dccp_wait_for_ccid - Wait for ccid to tell us we can send a packet * @sk: socket to wait for -- cgit v1.2.3