aboutsummaryrefslogtreecommitdiff
path: root/net/ipv4
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv4')
-rw-r--r--net/ipv4/Kconfig4
-rw-r--r--net/ipv4/fib_trie.c6
-rw-r--r--net/ipv4/ipconfig.c12
-rw-r--r--net/ipv4/netfilter/arp_tables.c125
-rw-r--r--net/ipv4/netfilter/ip_tables.c126
-rw-r--r--net/ipv4/netfilter/nf_nat_core.c3
-rw-r--r--net/ipv4/route.c62
-rw-r--r--net/ipv4/tcp.c11
-rw-r--r--net/ipv4/tcp_input.c13
-rw-r--r--net/ipv4/tcp_output.c2
-rw-r--r--net/ipv4/tcp_vegas.c11
-rw-r--r--net/ipv4/udp.c3
12 files changed, 135 insertions, 243 deletions
diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig
index b2cf91e4cca..5b919f7b45d 100644
--- a/net/ipv4/Kconfig
+++ b/net/ipv4/Kconfig
@@ -407,8 +407,8 @@ config INET_XFRM_MODE_BEET
If unsure, say Y.
config INET_LRO
- tristate "Large Receive Offload (ipv4/tcp)"
-
+ bool "Large Receive Offload (ipv4/tcp)"
+ default y
---help---
Support for Large Receive Offload (ipv4/tcp).
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index ec0ae490f0b..33c7c85dfe4 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -986,9 +986,12 @@ fib_find_node(struct trie *t, u32 key)
static struct node *trie_rebalance(struct trie *t, struct tnode *tn)
{
int wasfull;
- t_key cindex, key = tn->key;
+ t_key cindex, key;
struct tnode *tp;
+ preempt_disable();
+ key = tn->key;
+
while (tn != NULL && (tp = node_parent((struct node *)tn)) != NULL) {
cindex = tkey_extract_bits(key, tp->pos, tp->bits);
wasfull = tnode_full(tp, tnode_get_child(tp, cindex));
@@ -1007,6 +1010,7 @@ static struct node *trie_rebalance(struct trie *t, struct tnode *tn)
if (IS_TNODE(tn))
tn = (struct tnode *)resize(t, (struct tnode *)tn);
+ preempt_enable();
return (struct node *)tn;
}
diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c
index 90d22ae0a41..88bf051d0cb 100644
--- a/net/ipv4/ipconfig.c
+++ b/net/ipv4/ipconfig.c
@@ -139,6 +139,8 @@ __be32 ic_servaddr = NONE; /* Boot server IP address */
__be32 root_server_addr = NONE; /* Address of NFS server */
u8 root_server_path[256] = { 0, }; /* Path to mount as root */
+u32 ic_dev_xid; /* Device under configuration */
+
/* vendor class identifier */
static char vendor_class_identifier[253] __initdata;
@@ -932,6 +934,13 @@ static int __init ic_bootp_recv(struct sk_buff *skb, struct net_device *dev, str
goto drop_unlock;
}
+ /* Is it a reply for the device we are configuring? */
+ if (b->xid != ic_dev_xid) {
+ if (net_ratelimit())
+ printk(KERN_ERR "DHCP/BOOTP: Ignoring delayed packet \n");
+ goto drop_unlock;
+ }
+
/* Parse extensions */
if (ext_len >= 4 &&
!memcmp(b->exten, ic_bootp_cookie, 4)) { /* Check magic cookie */
@@ -1115,6 +1124,9 @@ static int __init ic_dynamic(void)
get_random_bytes(&timeout, sizeof(timeout));
timeout = CONF_BASE_TIMEOUT + (timeout % (unsigned) CONF_TIMEOUT_RANDOM);
for (;;) {
+ /* Track the device we are configuring */
+ ic_dev_xid = d->xid;
+
#ifdef IPCONFIG_BOOTP
if (do_bootp && (d->able & IC_BOOTP))
ic_bootp_send_if(d, jiffies - start_jiffies);
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index 5ba533d234d..831fe1879dc 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -253,9 +253,9 @@ unsigned int arpt_do_table(struct sk_buff *skb,
indev = in ? in->name : nulldevname;
outdev = out ? out->name : nulldevname;
- rcu_read_lock_bh();
- private = rcu_dereference(table->private);
- table_base = rcu_dereference(private->entries[smp_processor_id()]);
+ xt_info_rdlock_bh();
+ private = table->private;
+ table_base = private->entries[smp_processor_id()];
e = get_entry(table_base, private->hook_entry[hook]);
back = get_entry(table_base, private->underflow[hook]);
@@ -273,6 +273,7 @@ unsigned int arpt_do_table(struct sk_buff *skb,
hdr_len = sizeof(*arp) + (2 * sizeof(struct in_addr)) +
(2 * skb->dev->addr_len);
+
ADD_COUNTER(e->counters, hdr_len, 1);
t = arpt_get_target(e);
@@ -328,8 +329,7 @@ unsigned int arpt_do_table(struct sk_buff *skb,
e = (void *)e + e->next_offset;
}
} while (!hotdrop);
-
- rcu_read_unlock_bh();
+ xt_info_rdunlock_bh();
if (hotdrop)
return NF_DROP;
@@ -711,9 +711,12 @@ static void get_counters(const struct xt_table_info *t,
/* Instead of clearing (by a previous call to memset())
* the counters and using adds, we set the counters
* with data used by 'current' CPU
- * We dont care about preemption here.
+ *
+ * Bottom half has to be disabled to prevent deadlock
+ * if new softirq were to run and call ipt_do_table
*/
- curcpu = raw_smp_processor_id();
+ local_bh_disable();
+ curcpu = smp_processor_id();
i = 0;
ARPT_ENTRY_ITERATE(t->entries[curcpu],
@@ -726,73 +729,22 @@ static void get_counters(const struct xt_table_info *t,
if (cpu == curcpu)
continue;
i = 0;
+ xt_info_wrlock(cpu);
ARPT_ENTRY_ITERATE(t->entries[cpu],
t->size,
add_entry_to_counter,
counters,
&i);
+ xt_info_wrunlock(cpu);
}
-}
-
-
-/* We're lazy, and add to the first CPU; overflow works its fey magic
- * and everything is OK. */
-static int
-add_counter_to_entry(struct arpt_entry *e,
- const struct xt_counters addme[],
- unsigned int *i)
-{
- ADD_COUNTER(e->counters, addme[*i].bcnt, addme[*i].pcnt);
-
- (*i)++;
- return 0;
-}
-
-/* Take values from counters and add them back onto the current cpu */
-static void put_counters(struct xt_table_info *t,
- const struct xt_counters counters[])
-{
- unsigned int i, cpu;
-
- local_bh_disable();
- cpu = smp_processor_id();
- i = 0;
- ARPT_ENTRY_ITERATE(t->entries[cpu],
- t->size,
- add_counter_to_entry,
- counters,
- &i);
local_bh_enable();
}
-static inline int
-zero_entry_counter(struct arpt_entry *e, void *arg)
-{
- e->counters.bcnt = 0;
- e->counters.pcnt = 0;
- return 0;
-}
-
-static void
-clone_counters(struct xt_table_info *newinfo, const struct xt_table_info *info)
-{
- unsigned int cpu;
- const void *loc_cpu_entry = info->entries[raw_smp_processor_id()];
-
- memcpy(newinfo, info, offsetof(struct xt_table_info, entries));
- for_each_possible_cpu(cpu) {
- memcpy(newinfo->entries[cpu], loc_cpu_entry, info->size);
- ARPT_ENTRY_ITERATE(newinfo->entries[cpu], newinfo->size,
- zero_entry_counter, NULL);
- }
-}
-
static struct xt_counters *alloc_counters(struct xt_table *table)
{
unsigned int countersize;
struct xt_counters *counters;
struct xt_table_info *private = table->private;
- struct xt_table_info *info;
/* We need atomic snapshot of counters: rest doesn't change
* (other than comefrom, which userspace doesn't care
@@ -802,30 +754,11 @@ static struct xt_counters *alloc_counters(struct xt_table *table)
counters = vmalloc_node(countersize, numa_node_id());
if (counters == NULL)
- goto nomem;
-
- info = xt_alloc_table_info(private->size);
- if (!info)
- goto free_counters;
-
- clone_counters(info, private);
-
- mutex_lock(&table->lock);
- xt_table_entry_swap_rcu(private, info);
- synchronize_net(); /* Wait until smoke has cleared */
+ return ERR_PTR(-ENOMEM);
- get_counters(info, counters);
- put_counters(private, counters);
- mutex_unlock(&table->lock);
-
- xt_free_table_info(info);
+ get_counters(private, counters);
return counters;
-
- free_counters:
- vfree(counters);
- nomem:
- return ERR_PTR(-ENOMEM);
}
static int copy_entries_to_user(unsigned int total_size,
@@ -1094,8 +1027,9 @@ static int __do_replace(struct net *net, const char *name,
(newinfo->number <= oldinfo->initial_entries))
module_put(t->me);
- /* Get the old counters. */
+ /* Get the old counters, and synchronize with replace */
get_counters(oldinfo, counters);
+
/* Decrease module usage counts and free resource */
loc_cpu_old_entry = oldinfo->entries[raw_smp_processor_id()];
ARPT_ENTRY_ITERATE(loc_cpu_old_entry, oldinfo->size, cleanup_entry,
@@ -1165,10 +1099,23 @@ static int do_replace(struct net *net, void __user *user, unsigned int len)
return ret;
}
+/* We're lazy, and add to the first CPU; overflow works its fey magic
+ * and everything is OK. */
+static int
+add_counter_to_entry(struct arpt_entry *e,
+ const struct xt_counters addme[],
+ unsigned int *i)
+{
+ ADD_COUNTER(e->counters, addme[*i].bcnt, addme[*i].pcnt);
+
+ (*i)++;
+ return 0;
+}
+
static int do_add_counters(struct net *net, void __user *user, unsigned int len,
int compat)
{
- unsigned int i;
+ unsigned int i, curcpu;
struct xt_counters_info tmp;
struct xt_counters *paddc;
unsigned int num_counters;
@@ -1224,26 +1171,26 @@ static int do_add_counters(struct net *net, void __user *user, unsigned int len,
goto free;
}
- mutex_lock(&t->lock);
+ local_bh_disable();
private = t->private;
if (private->number != num_counters) {
ret = -EINVAL;
goto unlock_up_free;
}
- preempt_disable();
i = 0;
/* Choose the copy that is on our node */
- loc_cpu_entry = private->entries[smp_processor_id()];
+ curcpu = smp_processor_id();
+ loc_cpu_entry = private->entries[curcpu];
+ xt_info_wrlock(curcpu);
ARPT_ENTRY_ITERATE(loc_cpu_entry,
private->size,
add_counter_to_entry,
paddc,
&i);
- preempt_enable();
+ xt_info_wrunlock(curcpu);
unlock_up_free:
- mutex_unlock(&t->lock);
-
+ local_bh_enable();
xt_table_unlock(t);
module_put(t->me);
free:
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index 810c0b62c7d..2ec8d7290c4 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -338,10 +338,9 @@ ipt_do_table(struct sk_buff *skb,
tgpar.hooknum = hook;
IP_NF_ASSERT(table->valid_hooks & (1 << hook));
-
- rcu_read_lock_bh();
- private = rcu_dereference(table->private);
- table_base = rcu_dereference(private->entries[smp_processor_id()]);
+ xt_info_rdlock_bh();
+ private = table->private;
+ table_base = private->entries[smp_processor_id()];
e = get_entry(table_base, private->hook_entry[hook]);
@@ -436,8 +435,7 @@ ipt_do_table(struct sk_buff *skb,
e = (void *)e + e->next_offset;
}
} while (!hotdrop);
-
- rcu_read_unlock_bh();
+ xt_info_rdunlock_bh();
#ifdef DEBUG_ALLOW_ALL
return NF_ACCEPT;
@@ -896,10 +894,13 @@ get_counters(const struct xt_table_info *t,
/* Instead of clearing (by a previous call to memset())
* the counters and using adds, we set the counters
- * with data used by 'current' CPU
- * We dont care about preemption here.
+ * with data used by 'current' CPU.
+ *
+ * Bottom half has to be disabled to prevent deadlock
+ * if new softirq were to run and call ipt_do_table
*/
- curcpu = raw_smp_processor_id();
+ local_bh_disable();
+ curcpu = smp_processor_id();
i = 0;
IPT_ENTRY_ITERATE(t->entries[curcpu],
@@ -912,74 +913,22 @@ get_counters(const struct xt_table_info *t,
if (cpu == curcpu)
continue;
i = 0;
+ xt_info_wrlock(cpu);
IPT_ENTRY_ITERATE(t->entries[cpu],
t->size,
add_entry_to_counter,
counters,
&i);
+ xt_info_wrunlock(cpu);
}
-
-}
-
-/* We're lazy, and add to the first CPU; overflow works its fey magic
- * and everything is OK. */
-static int
-add_counter_to_entry(struct ipt_entry *e,
- const struct xt_counters addme[],
- unsigned int *i)
-{
- ADD_COUNTER(e->counters, addme[*i].bcnt, addme[*i].pcnt);
-
- (*i)++;
- return 0;
-}
-
-/* Take values from counters and add them back onto the current cpu */
-static void put_counters(struct xt_table_info *t,
- const struct xt_counters counters[])
-{
- unsigned int i, cpu;
-
- local_bh_disable();
- cpu = smp_processor_id();
- i = 0;
- IPT_ENTRY_ITERATE(t->entries[cpu],
- t->size,
- add_counter_to_entry,
- counters,
- &i);
local_bh_enable();
}
-
-static inline int
-zero_entry_counter(struct ipt_entry *e, void *arg)
-{
- e->counters.bcnt = 0;
- e->counters.pcnt = 0;
- return 0;
-}
-
-static void
-clone_counters(struct xt_table_info *newinfo, const struct xt_table_info *info)
-{
- unsigned int cpu;
- const void *loc_cpu_entry = info->entries[raw_smp_processor_id()];
-
- memcpy(newinfo, info, offsetof(struct xt_table_info, entries));
- for_each_possible_cpu(cpu) {
- memcpy(newinfo->entries[cpu], loc_cpu_entry, info->size);
- IPT_ENTRY_ITERATE(newinfo->entries[cpu], newinfo->size,
- zero_entry_counter, NULL);
- }
-}
-
static struct xt_counters * alloc_counters(struct xt_table *table)
{
unsigned int countersize;
struct xt_counters *counters;
struct xt_table_info *private = table->private;
- struct xt_table_info *info;
/* We need atomic snapshot of counters: rest doesn't change
(other than comefrom, which userspace doesn't care
@@ -988,30 +937,11 @@ static struct xt_counters * alloc_counters(struct xt_table *table)
counters = vmalloc_node(countersize, numa_node_id());
if (counters == NULL)
- goto nomem;
+ return ERR_PTR(-ENOMEM);
- info = xt_alloc_table_info(private->size);
- if (!info)
- goto free_counters;
-
- clone_counters(info, private);
-
- mutex_lock(&table->lock);
- xt_table_entry_swap_rcu(private, info);
- synchronize_net(); /* Wait until smoke has cleared */
-
- get_counters(info, counters);
- put_counters(private, counters);
- mutex_unlock(&table->lock);
-
- xt_free_table_info(info);
+ get_counters(private, counters);
return counters;
-
- free_counters:
- vfree(counters);
- nomem:
- return ERR_PTR(-ENOMEM);
}
static int
@@ -1306,8 +1236,9 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks,
(newinfo->number <= oldinfo->initial_entries))
module_put(t->me);
- /* Get the old counters. */
+ /* Get the old counters, and synchronize with replace */
get_counters(oldinfo, counters);
+
/* Decrease module usage counts and free resource */
loc_cpu_old_entry = oldinfo->entries[raw_smp_processor_id()];
IPT_ENTRY_ITERATE(loc_cpu_old_entry, oldinfo->size, cleanup_entry,
@@ -1377,11 +1308,23 @@ do_replace(struct net *net, void __user *user, unsigned int len)
return ret;
}
+/* We're lazy, and add to the first CPU; overflow works its fey magic
+ * and everything is OK. */
+static int
+add_counter_to_entry(struct ipt_entry *e,
+ const struct xt_counters addme[],
+ unsigned int *i)
+{
+ ADD_COUNTER(e->counters, addme[*i].bcnt, addme[*i].pcnt);
+
+ (*i)++;
+ return 0;
+}
static int
do_add_counters(struct net *net, void __user *user, unsigned int len, int compat)
{
- unsigned int i;
+ unsigned int i, curcpu;
struct xt_counters_info tmp;
struct xt_counters *paddc;
unsigned int num_counters;
@@ -1437,25 +1380,26 @@ do_add_counters(struct net *net, void __user *user, unsigned int len, int compat
goto free;
}
- mutex_lock(&t->lock);
+ local_bh_disable();
private = t->private;
if (private->number != num_counters) {
ret = -EINVAL;
goto unlock_up_free;
}
- preempt_disable();
i = 0;
/* Choose the copy that is on our node */
- loc_cpu_entry = private->entries[raw_smp_processor_id()];
+ curcpu = smp_processor_id();
+ loc_cpu_entry = private->entries[curcpu];
+ xt_info_wrlock(curcpu);
IPT_ENTRY_ITERATE(loc_cpu_entry,
private->size,
add_counter_to_entry,
paddc,
&i);
- preempt_enable();
+ xt_info_wrunlock(curcpu);
unlock_up_free:
- mutex_unlock(&t->lock);
+ local_bh_enable();
xt_table_unlock(t);
module_put(t->me);
free:
diff --git a/net/ipv4/netfilter/nf_nat_core.c b/net/ipv4/netfilter/nf_nat_core.c
index fe65187810f..3229e0a81ba 100644
--- a/net/ipv4/netfilter/nf_nat_core.c
+++ b/net/ipv4/netfilter/nf_nat_core.c
@@ -211,7 +211,8 @@ find_best_ips_proto(struct nf_conntrack_tuple *tuple,
minip = ntohl(range->min_ip);
maxip = ntohl(range->max_ip);
j = jhash_2words((__force u32)tuple->src.u3.ip,
- (__force u32)tuple->dst.u3.ip, 0);
+ range->flags & IP_NAT_RANGE_PERSISTENT ?
+ (__force u32)tuple->dst.u3.ip : 0, 0);
j = ((u64)j * (maxip - minip + 1)) >> 32;
*var_ipp = htonl(minip + j);
}
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index c40debe51b3..28205e5bfa9 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -784,8 +784,8 @@ static void rt_check_expire(void)
{
static unsigned int rover;
unsigned int i = rover, goal;
- struct rtable *rth, **rthp;
- unsigned long length = 0, samples = 0;
+ struct rtable *rth, *aux, **rthp;
+ unsigned long samples = 0;
unsigned long sum = 0, sum2 = 0;
u64 mult;
@@ -795,9 +795,9 @@ static void rt_check_expire(void)
goal = (unsigned int)mult;
if (goal > rt_hash_mask)
goal = rt_hash_mask + 1;
- length = 0;
for (; goal > 0; goal--) {
unsigned long tmo = ip_rt_gc_timeout;
+ unsigned long length;
i = (i + 1) & rt_hash_mask;
rthp = &rt_hash_table[i].chain;
@@ -809,8 +809,10 @@ static void rt_check_expire(void)
if (*rthp == NULL)
continue;
+ length = 0;
spin_lock_bh(rt_hash_lock_addr(i));
while ((rth = *rthp) != NULL) {
+ prefetch(rth->u.dst.rt_next);
if (rt_is_expired(rth)) {
*rthp = rth->u.dst.rt_next;
rt_free(rth);
@@ -819,33 +821,30 @@ static void rt_check_expire(void)
if (rth->u.dst.expires) {
/* Entry is expired even if it is in use */
if (time_before_eq(jiffies, rth->u.dst.expires)) {
+nofree:
tmo >>= 1;
rthp = &rth->u.dst.rt_next;
/*
- * Only bump our length if the hash
- * inputs on entries n and n+1 are not
- * the same, we only count entries on
+ * We only count entries on
* a chain with equal hash inputs once
* so that entries for different QOS
* levels, and other non-hash input
* attributes don't unfairly skew
* the length computation
*/
- if ((*rthp == NULL) ||
- !compare_hash_inputs(&(*rthp)->fl,
- &rth->fl))
- length += ONE;
+ for (aux = rt_hash_table[i].chain;;) {
+ if (aux == rth) {
+ length += ONE;
+ break;
+ }
+ if (compare_hash_inputs(&aux->fl, &rth->fl))
+ break;
+ aux = aux->u.dst.rt_next;
+ }
continue;
}
- } else if (!rt_may_expire(rth, tmo, ip_rt_gc_timeout)) {
- tmo >>= 1;
- rthp = &rth->u.dst.rt_next;
- if ((*rthp == NULL) ||
- !compare_hash_inputs(&(*rthp)->fl,
- &rth->fl))
- length += ONE;
- continue;
- }
+ } else if (!rt_may_expire(rth, tmo, ip_rt_gc_timeout))
+ goto nofree;
/* Cleanup aged off entries. */
*rthp = rth->u.dst.rt_next;
@@ -1068,7 +1067,6 @@ out: return 0;
static int rt_intern_hash(unsigned hash, struct rtable *rt, struct rtable **rp)
{
struct rtable *rth, **rthp;
- struct rtable *rthi;
unsigned long now;
struct rtable *cand, **candp;
u32 min_score;
@@ -1088,7 +1086,6 @@ restart:
}
rthp = &rt_hash_table[hash].chain;
- rthi = NULL;
spin_lock_bh(rt_hash_lock_addr(hash));
while ((rth = *rthp) != NULL) {
@@ -1134,17 +1131,6 @@ restart:
chain_length++;
rthp = &rth->u.dst.rt_next;
-
- /*
- * check to see if the next entry in the chain
- * contains the same hash input values as rt. If it does
- * This is where we will insert into the list, instead of
- * at the head. This groups entries that differ by aspects not
- * relvant to the hash function together, which we use to adjust
- * our chain length
- */
- if (*rthp && compare_hash_inputs(&(*rthp)->fl, &rt->fl))
- rthi = rth;
}
if (cand) {
@@ -1205,10 +1191,7 @@ restart:
}
}
- if (rthi)
- rt->u.dst.rt_next = rthi->u.dst.rt_next;
- else
- rt->u.dst.rt_next = rt_hash_table[hash].chain;
+ rt->u.dst.rt_next = rt_hash_table[hash].chain;
#if RT_CACHE_DEBUG >= 2
if (rt->u.dst.rt_next) {
@@ -1224,10 +1207,7 @@ restart:
* previous writes to rt are comitted to memory
* before making rt visible to other CPUS.
*/
- if (rthi)
- rcu_assign_pointer(rthi->u.dst.rt_next, rt);
- else
- rcu_assign_pointer(rt_hash_table[hash].chain, rt);
+ rcu_assign_pointer(rt_hash_table[hash].chain, rt);
spin_unlock_bh(rt_hash_lock_addr(hash));
*rp = rt;
@@ -3397,7 +3377,7 @@ int __init ip_rt_init(void)
0,
&rt_hash_log,
&rt_hash_mask,
- 0);
+ rhash_entries ? 0 : 512 * 1024);
memset(rt_hash_table, 0, (rt_hash_mask + 1) * sizeof(struct rt_hash_bucket));
rt_hash_lock_init();
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index fafbec8b073..7a0f0b27bf1 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -1321,6 +1321,7 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
struct task_struct *user_recv = NULL;
int copied_early = 0;
struct sk_buff *skb;
+ u32 urg_hole = 0;
lock_sock(sk);
@@ -1532,7 +1533,8 @@ do_prequeue:
}
}
}
- if ((flags & MSG_PEEK) && peek_seq != tp->copied_seq) {
+ if ((flags & MSG_PEEK) &&
+ (peek_seq - copied - urg_hole != tp->copied_seq)) {
if (net_ratelimit())
printk(KERN_DEBUG "TCP(%s:%d): Application bug, race in MSG_PEEK.\n",
current->comm, task_pid_nr(current));
@@ -1553,6 +1555,7 @@ do_prequeue:
if (!urg_offset) {
if (!sock_flag(sk, SOCK_URGINLINE)) {
++*seq;
+ urg_hole++;
offset++;
used--;
if (!used)
@@ -2511,6 +2514,7 @@ struct sk_buff **tcp_gro_receive(struct sk_buff **head, struct sk_buff *skb)
struct sk_buff *p;
struct tcphdr *th;
struct tcphdr *th2;
+ unsigned int len;
unsigned int thlen;
unsigned int flags;
unsigned int mss = 1;
@@ -2531,6 +2535,7 @@ struct sk_buff **tcp_gro_receive(struct sk_buff **head, struct sk_buff *skb)
skb_gro_pull(skb, thlen);
+ len = skb_gro_len(skb);
flags = tcp_flag_word(th);
for (; (p = *head); head = &p->next) {
@@ -2561,7 +2566,7 @@ found:
mss = skb_shinfo(p)->gso_size;
- flush |= (skb_gro_len(skb) > mss) | !skb_gro_len(skb);
+ flush |= (len > mss) | !len;
flush |= (ntohl(th2->seq) + skb_gro_len(p)) ^ ntohl(th->seq);
if (flush || skb_gro_receive(head, skb)) {
@@ -2574,7 +2579,7 @@ found:
tcp_flag_word(th2) |= flags & (TCP_FLAG_FIN | TCP_FLAG_PSH);
out_check_final:
- flush = skb_gro_len(skb) < mss;
+ flush = len < mss;
flush |= flags & (TCP_FLAG_URG | TCP_FLAG_PSH | TCP_FLAG_RST |
TCP_FLAG_SYN | TCP_FLAG_FIN);
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 2bc8e27a163..eec3e6f9956 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -597,16 +597,6 @@ static void tcp_event_data_recv(struct sock *sk, struct sk_buff *skb)
tcp_grow_window(sk, skb);
}
-static u32 tcp_rto_min(struct sock *sk)
-{
- struct dst_entry *dst = __sk_dst_get(sk);
- u32 rto_min = TCP_RTO_MIN;
-
- if (dst && dst_metric_locked(dst, RTAX_RTO_MIN))
- rto_min = dst_metric_rtt(dst, RTAX_RTO_MIN);
- return rto_min;
-}
-
/* Called to compute a smoothed rtt estimate. The data fed to this
* routine either comes from timestamps, or from segments that were
* known _not_ to have been retransmitted [see Karn/Partridge
@@ -928,6 +918,8 @@ static void tcp_init_metrics(struct sock *sk)
tcp_set_rto(sk);
if (inet_csk(sk)->icsk_rto < TCP_TIMEOUT_INIT && !tp->rx_opt.saw_tstamp)
goto reset;
+
+cwnd:
tp->snd_cwnd = tcp_init_cwnd(tp, dst);
tp->snd_cwnd_stamp = tcp_time_stamp;
return;
@@ -942,6 +934,7 @@ reset:
tp->mdev = tp->mdev_max = tp->rttvar = TCP_TIMEOUT_INIT;
inet_csk(sk)->icsk_rto = TCP_TIMEOUT_INIT;
}
+ goto cwnd;
}
static void tcp_update_reordering(struct sock *sk, const int metric,
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 53300fa2359..59aec609cec 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -778,7 +778,7 @@ static void tcp_adjust_pcount(struct sock *sk, struct sk_buff *skb, int decr)
if (tp->lost_skb_hint &&
before(TCP_SKB_CB(skb)->seq, TCP_SKB_CB(tp->lost_skb_hint)->seq) &&
- (tcp_is_fack(tp) || TCP_SKB_CB(skb)->sacked))
+ (tcp_is_fack(tp) || (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)))
tp->lost_cnt_hint -= decr;
tcp_verify_left_out(tp);
diff --git a/net/ipv4/tcp_vegas.c b/net/ipv4/tcp_vegas.c
index a453aac91bd..c6743eec9b7 100644
--- a/net/ipv4/tcp_vegas.c
+++ b/net/ipv4/tcp_vegas.c
@@ -158,6 +158,11 @@ void tcp_vegas_cwnd_event(struct sock *sk, enum tcp_ca_event event)
}
EXPORT_SYMBOL_GPL(tcp_vegas_cwnd_event);
+static inline u32 tcp_vegas_ssthresh(struct tcp_sock *tp)
+{
+ return min(tp->snd_ssthresh, tp->snd_cwnd-1);
+}
+
static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack, u32 in_flight)
{
struct tcp_sock *tp = tcp_sk(sk);
@@ -221,11 +226,10 @@ static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack, u32 in_flight)
*/
diff = tp->snd_cwnd * (rtt-vegas->baseRTT) / vegas->baseRTT;
- if (diff > gamma && tp->snd_ssthresh > 2 ) {
+ if (diff > gamma && tp->snd_cwnd <= tp->snd_ssthresh) {
/* Going too fast. Time to slow down
* and switch to congestion avoidance.
*/
- tp->snd_ssthresh = 2;
/* Set cwnd to match the actual rate
* exactly:
@@ -235,6 +239,7 @@ static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack, u32 in_flight)
* utilization.
*/
tp->snd_cwnd = min(tp->snd_cwnd, (u32)target_cwnd+1);
+ tp->snd_ssthresh = tcp_vegas_ssthresh(tp);
} else if (tp->snd_cwnd <= tp->snd_ssthresh) {
/* Slow start. */
@@ -250,6 +255,8 @@ static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack, u32 in_flight)
* we slow down.
*/
tp->snd_cwnd--;
+ tp->snd_ssthresh
+ = tcp_vegas_ssthresh(tp);
} else if (diff < alpha) {
/* We don't have enough extra packets
* in the network, so speed up.
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index bda08a09357..7a1d1ce22e6 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -222,7 +222,7 @@ fail:
return error;
}
-int ipv4_rcv_saddr_equal(const struct sock *sk1, const struct sock *sk2)
+static int ipv4_rcv_saddr_equal(const struct sock *sk1, const struct sock *sk2)
{
struct inet_sock *inet1 = inet_sk(sk1), *inet2 = inet_sk(sk2);
@@ -1823,7 +1823,6 @@ EXPORT_SYMBOL(udp_lib_getsockopt);
EXPORT_SYMBOL(udp_lib_setsockopt);
EXPORT_SYMBOL(udp_poll);
EXPORT_SYMBOL(udp_lib_get_port);
-EXPORT_SYMBOL(ipv4_rcv_saddr_equal);
#ifdef CONFIG_PROC_FS
EXPORT_SYMBOL(udp_proc_register);