diff options
author | Jeff Garzik <jeff@garzik.org> | 2006-09-26 13:13:19 -0400 |
---|---|---|
committer | Jeff Garzik <jeff@garzik.org> | 2006-09-26 13:13:19 -0400 |
commit | c226951b93f7cd7c3a10b17384535b617bd43fd0 (patch) | |
tree | 07b8796a5c99fbbf587b8d0dbcbc173cfe5e381e /net/ipv4 | |
parent | b0df3bd1e553e901ec7297267611a5db88240b38 (diff) | |
parent | e8216dee838c09776680a6f1a2e54d81f3cdfa14 (diff) |
Merge branch 'master' into upstream
Diffstat (limited to 'net/ipv4')
-rw-r--r-- | net/ipv4/Kconfig | 53 | ||||
-rw-r--r-- | net/ipv4/cipso_ipv4.c | 267 | ||||
-rw-r--r-- | net/ipv4/sysctl_net_ipv4.c | 6 | ||||
-rw-r--r-- | net/ipv4/tcp_cong.c | 2 |
4 files changed, 116 insertions, 212 deletions
diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig index 1650b64415a..30af4a4dfcc 100644 --- a/net/ipv4/Kconfig +++ b/net/ipv4/Kconfig @@ -448,24 +448,22 @@ config INET_TCP_DIAG depends on INET_DIAG def_tristate INET_DIAG -config TCP_CONG_ADVANCED +menuconfig TCP_CONG_ADVANCED bool "TCP: advanced congestion control" ---help--- Support for selection of various TCP congestion control modules. Nearly all users can safely say no here, and a safe default - selection will be made (BIC-TCP with new Reno as a fallback). + selection will be made (CUBIC with new Reno as a fallback). If unsure, say N. -# TCP Reno is builtin (required as fallback) -menu "TCP congestion control" - depends on TCP_CONG_ADVANCED +if TCP_CONG_ADVANCED config TCP_CONG_BIC tristate "Binary Increase Congestion (BIC) control" - default y + default m ---help--- BIC-TCP is a sender-side only change that ensures a linear RTT fairness under large windows while offering both scalability and @@ -479,7 +477,7 @@ config TCP_CONG_BIC config TCP_CONG_CUBIC tristate "CUBIC TCP" - default m + default y ---help--- This is version 2.0 of BIC-TCP which uses a cubic growth function among other techniques. @@ -574,12 +572,49 @@ config TCP_CONG_VENO loss packets. See http://www.ntu.edu.sg/home5/ZHOU0022/papers/CPFu03a.pdf -endmenu +choice + prompt "Default TCP congestion control" + default DEFAULT_CUBIC + help + Select the TCP congestion control that will be used by default + for all connections. -config TCP_CONG_BIC + config DEFAULT_BIC + bool "Bic" if TCP_CONG_BIC=y + + config DEFAULT_CUBIC + bool "Cubic" if TCP_CONG_CUBIC=y + + config DEFAULT_HTCP + bool "Htcp" if TCP_CONG_HTCP=y + + config DEFAULT_VEGAS + bool "Vegas" if TCP_CONG_VEGAS=y + + config DEFAULT_WESTWOOD + bool "Westwood" if TCP_CONG_WESTWOOD=y + + config DEFAULT_RENO + bool "Reno" + +endchoice + +endif + +config TCP_CONG_CUBIC tristate depends on !TCP_CONG_ADVANCED default y +config DEFAULT_TCP_CONG + string + default "bic" if DEFAULT_BIC + default "cubic" if DEFAULT_CUBIC + default "htcp" if DEFAULT_HTCP + default "vegas" if DEFAULT_VEGAS + default "westwood" if DEFAULT_WESTWOOD + default "reno" if DEFAULT_RENO + default "cubic" + source "net/ipv4/ipvs/Kconfig" diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c index 80a2a0911b4..e6ce0b3ba62 100644 --- a/net/ipv4/cipso_ipv4.c +++ b/net/ipv4/cipso_ipv4.c @@ -259,7 +259,7 @@ void cipso_v4_cache_invalidate(void) u32 iter; for (iter = 0; iter < CIPSO_V4_CACHE_BUCKETS; iter++) { - spin_lock(&cipso_v4_cache[iter].lock); + spin_lock_bh(&cipso_v4_cache[iter].lock); list_for_each_entry_safe(entry, tmp_entry, &cipso_v4_cache[iter].list, list) { @@ -267,7 +267,7 @@ void cipso_v4_cache_invalidate(void) cipso_v4_cache_entry_free(entry); } cipso_v4_cache[iter].size = 0; - spin_unlock(&cipso_v4_cache[iter].lock); + spin_unlock_bh(&cipso_v4_cache[iter].lock); } return; @@ -309,7 +309,7 @@ static int cipso_v4_cache_check(const unsigned char *key, hash = cipso_v4_map_cache_hash(key, key_len); bkt = hash & (CIPSO_V4_CACHE_BUCKETBITS - 1); - spin_lock(&cipso_v4_cache[bkt].lock); + spin_lock_bh(&cipso_v4_cache[bkt].lock); list_for_each_entry(entry, &cipso_v4_cache[bkt].list, list) { if (entry->hash == hash && entry->key_len == key_len && @@ -318,7 +318,7 @@ static int cipso_v4_cache_check(const unsigned char *key, secattr->cache.free = entry->lsm_data.free; secattr->cache.data = entry->lsm_data.data; if (prev_entry == NULL) { - spin_unlock(&cipso_v4_cache[bkt].lock); + spin_unlock_bh(&cipso_v4_cache[bkt].lock); return 0; } @@ -333,12 +333,12 @@ static int cipso_v4_cache_check(const unsigned char *key, &prev_entry->list); } - spin_unlock(&cipso_v4_cache[bkt].lock); + spin_unlock_bh(&cipso_v4_cache[bkt].lock); return 0; } prev_entry = entry; } - spin_unlock(&cipso_v4_cache[bkt].lock); + spin_unlock_bh(&cipso_v4_cache[bkt].lock); return -ENOENT; } @@ -387,7 +387,7 @@ int cipso_v4_cache_add(const struct sk_buff *skb, entry->lsm_data.data = secattr->cache.data; bkt = entry->hash & (CIPSO_V4_CACHE_BUCKETBITS - 1); - spin_lock(&cipso_v4_cache[bkt].lock); + spin_lock_bh(&cipso_v4_cache[bkt].lock); if (cipso_v4_cache[bkt].size < cipso_v4_cache_bucketsize) { list_add(&entry->list, &cipso_v4_cache[bkt].list); cipso_v4_cache[bkt].size += 1; @@ -398,7 +398,7 @@ int cipso_v4_cache_add(const struct sk_buff *skb, list_add(&entry->list, &cipso_v4_cache[bkt].list); cipso_v4_cache_entry_free(old_entry); } - spin_unlock(&cipso_v4_cache[bkt].lock); + spin_unlock_bh(&cipso_v4_cache[bkt].lock); return 0; @@ -530,197 +530,42 @@ struct cipso_v4_doi *cipso_v4_doi_getdef(u32 doi) } /** - * cipso_v4_doi_dump_all - Dump all the CIPSO DOI definitions into a sk_buff - * @headroom: the amount of headroom to allocate for the sk_buff + * cipso_v4_doi_walk - Iterate through the DOI definitions + * @skip_cnt: skip past this number of DOI definitions, updated + * @callback: callback for each DOI definition + * @cb_arg: argument for the callback function * * Description: - * Dump a list of all the configured DOI values into a sk_buff. The returned - * sk_buff has room at the front of the sk_buff for @headroom bytes. See - * net/netlabel/netlabel_cipso_v4.h for the LISTALL message format. This - * function may fail if another process is changing the DOI list at the same - * time. Returns a pointer to a sk_buff on success, NULL on error. + * Iterate over the DOI definition list, skipping the first @skip_cnt entries. + * For each entry call @callback, if @callback returns a negative value stop + * 'walking' through the list and return. Updates the value in @skip_cnt upon + * return. Returns zero on success, negative values on failure. * */ -struct sk_buff *cipso_v4_doi_dump_all(size_t headroom) +int cipso_v4_doi_walk(u32 *skip_cnt, + int (*callback) (struct cipso_v4_doi *doi_def, void *arg), + void *cb_arg) { - struct sk_buff *skb = NULL; - struct cipso_v4_doi *iter; + int ret_val = -ENOENT; u32 doi_cnt = 0; - ssize_t buf_len; + struct cipso_v4_doi *iter_doi; - buf_len = NETLBL_LEN_U32; rcu_read_lock(); - list_for_each_entry_rcu(iter, &cipso_v4_doi_list, list) - if (iter->valid) { - doi_cnt += 1; - buf_len += 2 * NETLBL_LEN_U32; - } - - skb = netlbl_netlink_alloc_skb(headroom, buf_len, GFP_ATOMIC); - if (skb == NULL) - goto doi_dump_all_failure; - - if (nla_put_u32(skb, NLA_U32, doi_cnt) != 0) - goto doi_dump_all_failure; - buf_len -= NETLBL_LEN_U32; - list_for_each_entry_rcu(iter, &cipso_v4_doi_list, list) - if (iter->valid) { - if (buf_len < 2 * NETLBL_LEN_U32) - goto doi_dump_all_failure; - if (nla_put_u32(skb, NLA_U32, iter->doi) != 0) - goto doi_dump_all_failure; - if (nla_put_u32(skb, NLA_U32, iter->type) != 0) - goto doi_dump_all_failure; - buf_len -= 2 * NETLBL_LEN_U32; - } - rcu_read_unlock(); - - return skb; - -doi_dump_all_failure: - rcu_read_unlock(); - kfree(skb); - return NULL; -} - -/** - * cipso_v4_doi_dump - Dump a CIPSO DOI definition into a sk_buff - * @doi: the DOI value - * @headroom: the amount of headroom to allocate for the sk_buff - * - * Description: - * Lookup the DOI definition matching @doi and dump it's contents into a - * sk_buff. The returned sk_buff has room at the front of the sk_buff for - * @headroom bytes. See net/netlabel/netlabel_cipso_v4.h for the LIST message - * format. This function may fail if another process is changing the DOI list - * at the same time. Returns a pointer to a sk_buff on success, NULL on error. - * - */ -struct sk_buff *cipso_v4_doi_dump(u32 doi, size_t headroom) -{ - struct sk_buff *skb = NULL; - struct cipso_v4_doi *iter; - u32 tag_cnt = 0; - u32 lvl_cnt = 0; - u32 cat_cnt = 0; - ssize_t buf_len; - ssize_t tmp; - - rcu_read_lock(); - iter = cipso_v4_doi_getdef(doi); - if (iter == NULL) - goto doi_dump_failure; - buf_len = NETLBL_LEN_U32; - switch (iter->type) { - case CIPSO_V4_MAP_PASS: - buf_len += NETLBL_LEN_U32; - while(tag_cnt < CIPSO_V4_TAG_MAXCNT && - iter->tags[tag_cnt] != CIPSO_V4_TAG_INVALID) { - tag_cnt += 1; - buf_len += NETLBL_LEN_U8; - } - break; - case CIPSO_V4_MAP_STD: - buf_len += 3 * NETLBL_LEN_U32; - while (tag_cnt < CIPSO_V4_TAG_MAXCNT && - iter->tags[tag_cnt] != CIPSO_V4_TAG_INVALID) { - tag_cnt += 1; - buf_len += NETLBL_LEN_U8; - } - for (tmp = 0; tmp < iter->map.std->lvl.local_size; tmp++) - if (iter->map.std->lvl.local[tmp] != - CIPSO_V4_INV_LVL) { - lvl_cnt += 1; - buf_len += NETLBL_LEN_U32 + NETLBL_LEN_U8; - } - for (tmp = 0; tmp < iter->map.std->cat.local_size; tmp++) - if (iter->map.std->cat.local[tmp] != - CIPSO_V4_INV_CAT) { - cat_cnt += 1; - buf_len += NETLBL_LEN_U32 + NETLBL_LEN_U16; + list_for_each_entry_rcu(iter_doi, &cipso_v4_doi_list, list) + if (iter_doi->valid) { + if (doi_cnt++ < *skip_cnt) + continue; + ret_val = callback(iter_doi, cb_arg); + if (ret_val < 0) { + doi_cnt--; + goto doi_walk_return; } - break; - } - - skb = netlbl_netlink_alloc_skb(headroom, buf_len, GFP_ATOMIC); - if (skb == NULL) - goto doi_dump_failure; - - if (nla_put_u32(skb, NLA_U32, iter->type) != 0) - goto doi_dump_failure; - buf_len -= NETLBL_LEN_U32; - if (iter != cipso_v4_doi_getdef(doi)) - goto doi_dump_failure; - switch (iter->type) { - case CIPSO_V4_MAP_PASS: - if (nla_put_u32(skb, NLA_U32, tag_cnt) != 0) - goto doi_dump_failure; - buf_len -= NETLBL_LEN_U32; - for (tmp = 0; - tmp < CIPSO_V4_TAG_MAXCNT && - iter->tags[tmp] != CIPSO_V4_TAG_INVALID; - tmp++) { - if (buf_len < NETLBL_LEN_U8) - goto doi_dump_failure; - if (nla_put_u8(skb, NLA_U8, iter->tags[tmp]) != 0) - goto doi_dump_failure; - buf_len -= NETLBL_LEN_U8; } - break; - case CIPSO_V4_MAP_STD: - if (nla_put_u32(skb, NLA_U32, tag_cnt) != 0) - goto doi_dump_failure; - if (nla_put_u32(skb, NLA_U32, lvl_cnt) != 0) - goto doi_dump_failure; - if (nla_put_u32(skb, NLA_U32, cat_cnt) != 0) - goto doi_dump_failure; - buf_len -= 3 * NETLBL_LEN_U32; - for (tmp = 0; - tmp < CIPSO_V4_TAG_MAXCNT && - iter->tags[tmp] != CIPSO_V4_TAG_INVALID; - tmp++) { - if (buf_len < NETLBL_LEN_U8) - goto doi_dump_failure; - if (nla_put_u8(skb, NLA_U8, iter->tags[tmp]) != 0) - goto doi_dump_failure; - buf_len -= NETLBL_LEN_U8; - } - for (tmp = 0; tmp < iter->map.std->lvl.local_size; tmp++) - if (iter->map.std->lvl.local[tmp] != - CIPSO_V4_INV_LVL) { - if (buf_len < NETLBL_LEN_U32 + NETLBL_LEN_U8) - goto doi_dump_failure; - if (nla_put_u32(skb, NLA_U32, tmp) != 0) - goto doi_dump_failure; - if (nla_put_u8(skb, - NLA_U8, - iter->map.std->lvl.local[tmp]) != 0) - goto doi_dump_failure; - buf_len -= NETLBL_LEN_U32 + NETLBL_LEN_U8; - } - for (tmp = 0; tmp < iter->map.std->cat.local_size; tmp++) - if (iter->map.std->cat.local[tmp] != - CIPSO_V4_INV_CAT) { - if (buf_len < NETLBL_LEN_U32 + NETLBL_LEN_U16) - goto doi_dump_failure; - if (nla_put_u32(skb, NLA_U32, tmp) != 0) - goto doi_dump_failure; - if (nla_put_u16(skb, - NLA_U16, - iter->map.std->cat.local[tmp]) != 0) - goto doi_dump_failure; - buf_len -= NETLBL_LEN_U32 + NETLBL_LEN_U16; - } - break; - } - rcu_read_unlock(); - - return skb; -doi_dump_failure: +doi_walk_return: rcu_read_unlock(); - kfree(skb); - return NULL; + *skip_cnt = doi_cnt; + return ret_val; } /** @@ -1486,43 +1331,40 @@ socket_setattr_failure: } /** - * cipso_v4_socket_getattr - Get the security attributes from a socket - * @sock: the socket + * cipso_v4_sock_getattr - Get the security attributes from a sock + * @sk: the sock * @secattr: the security attributes * * Description: - * Query @sock to see if there is a CIPSO option attached to the socket and if - * there is return the CIPSO security attributes in @secattr. Returns zero on - * success and negative values on failure. + * Query @sk to see if there is a CIPSO option attached to the sock and if + * there is return the CIPSO security attributes in @secattr. This function + * requires that @sk be locked, or privately held, but it does not do any + * locking itself. Returns zero on success and negative values on failure. * */ -int cipso_v4_socket_getattr(const struct socket *sock, - struct netlbl_lsm_secattr *secattr) +int cipso_v4_sock_getattr(struct sock *sk, struct netlbl_lsm_secattr *secattr) { int ret_val = -ENOMSG; - struct sock *sk; struct inet_sock *sk_inet; unsigned char *cipso_ptr; u32 doi; struct cipso_v4_doi *doi_def; - sk = sock->sk; - lock_sock(sk); sk_inet = inet_sk(sk); if (sk_inet->opt == NULL || sk_inet->opt->cipso == 0) - goto socket_getattr_return; + return -ENOMSG; cipso_ptr = sk_inet->opt->__data + sk_inet->opt->cipso - sizeof(struct iphdr); ret_val = cipso_v4_cache_check(cipso_ptr, cipso_ptr[1], secattr); if (ret_val == 0) - goto socket_getattr_return; + return ret_val; doi = ntohl(*(u32 *)&cipso_ptr[2]); rcu_read_lock(); doi_def = cipso_v4_doi_getdef(doi); if (doi_def == NULL) { rcu_read_unlock(); - goto socket_getattr_return; + return -ENOMSG; } switch (cipso_ptr[6]) { case CIPSO_V4_TAG_RBITMAP: @@ -1533,8 +1375,29 @@ int cipso_v4_socket_getattr(const struct socket *sock, } rcu_read_unlock(); -socket_getattr_return: - release_sock(sk); + return ret_val; +} + +/** + * cipso_v4_socket_getattr - Get the security attributes from a socket + * @sock: the socket + * @secattr: the security attributes + * + * Description: + * Query @sock to see if there is a CIPSO option attached to the socket and if + * there is return the CIPSO security attributes in @secattr. Returns zero on + * success and negative values on failure. + * + */ +int cipso_v4_socket_getattr(const struct socket *sock, + struct netlbl_lsm_secattr *secattr) +{ + int ret_val; + + lock_sock(sock->sk); + ret_val = cipso_v4_sock_getattr(sock->sk, secattr); + release_sock(sock->sk); + return ret_val; } diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 19b2071ff31..e82a5be894b 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -129,6 +129,12 @@ static int sysctl_tcp_congestion_control(ctl_table *table, int __user *name, return ret; } +static int __init tcp_congestion_default(void) +{ + return tcp_set_default_congestion_control(CONFIG_DEFAULT_TCP_CONG); +} + +late_initcall(tcp_congestion_default); ctl_table ipv4_table[] = { { diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c index 7ff2e4273a7..af0aca1e6be 100644 --- a/net/ipv4/tcp_cong.c +++ b/net/ipv4/tcp_cong.c @@ -48,7 +48,7 @@ int tcp_register_congestion_control(struct tcp_congestion_ops *ca) printk(KERN_NOTICE "TCP %s already registered\n", ca->name); ret = -EEXIST; } else { - list_add_rcu(&ca->list, &tcp_cong_list); + list_add_tail_rcu(&ca->list, &tcp_cong_list); printk(KERN_INFO "TCP %s registered\n", ca->name); } spin_unlock(&tcp_cong_list_lock); |