From b000cd3707e7b25d76745f9c0e261c23d21fa578 Mon Sep 17 00:00:00 2001 From: Vitaliy Gusev Date: Tue, 15 Apr 2008 00:33:38 -0700 Subject: [TCP]: Fix never pruned tcp out-of-order queue. tcp_prune_queue() doesn't prune an out-of-order queue at all. Therefore sk_rmem_schedule() can fail but the out-of-order queue isn't pruned . This can lead to tcp deadlock state if the next two conditions are held: 1. There are a sequence hole between last received in order segment and segments enqueued to the out-of-order queue. 2. Size of all segments in the out-of-order queue is more than tcp_mem[2]. Signed-off-by: Vitaliy Gusev Signed-off-by: David S. Miller --- net/ipv4/tcp_input.c | 72 +++++++++++++++++++++++++++++++++------------------- 1 file changed, 46 insertions(+), 26 deletions(-) (limited to 'net/ipv4/tcp_input.c') diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 5119856017a..61db7b1eb99 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -3841,8 +3841,26 @@ static void tcp_ofo_queue(struct sock *sk) } } +static void tcp_prune_ofo_queue(struct sock *sk); static int tcp_prune_queue(struct sock *sk); +static inline int tcp_try_rmem_schedule(struct sock *sk, unsigned int size) +{ + if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf || + !sk_rmem_schedule(sk, size)) { + + if (tcp_prune_queue(sk) < 0) + return -1; + + if (!sk_rmem_schedule(sk, size)) { + tcp_prune_ofo_queue(sk); + if (!sk_rmem_schedule(sk, size)) + return -1; + } + } + return 0; +} + static void tcp_data_queue(struct sock *sk, struct sk_buff *skb) { struct tcphdr *th = tcp_hdr(skb); @@ -3892,12 +3910,9 @@ static void tcp_data_queue(struct sock *sk, struct sk_buff *skb) if (eaten <= 0) { queue_and_out: if (eaten < 0 && - (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf || - !sk_rmem_schedule(sk, skb->truesize))) { - if (tcp_prune_queue(sk) < 0 || - !sk_rmem_schedule(sk, skb->truesize)) - goto drop; - } + tcp_try_rmem_schedule(sk, skb->truesize)) + goto drop; + skb_set_owner_r(skb, sk); __skb_queue_tail(&sk->sk_receive_queue, skb); } @@ -3966,12 +3981,8 @@ drop: TCP_ECN_check_ce(tp, skb); - if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf || - !sk_rmem_schedule(sk, skb->truesize)) { - if (tcp_prune_queue(sk) < 0 || - !sk_rmem_schedule(sk, skb->truesize)) - goto drop; - } + if (tcp_try_rmem_schedule(sk, skb->truesize)) + goto drop; /* Disable header prediction. */ tp->pred_flags = 0; @@ -4198,6 +4209,28 @@ static void tcp_collapse_ofo_queue(struct sock *sk) } } +/* + * Purge the out-of-order queue. + */ +static void tcp_prune_ofo_queue(struct sock *sk) +{ + struct tcp_sock *tp = tcp_sk(sk); + + if (!skb_queue_empty(&tp->out_of_order_queue)) { + NET_INC_STATS_BH(LINUX_MIB_OFOPRUNED); + __skb_queue_purge(&tp->out_of_order_queue); + + /* Reset SACK state. A conforming SACK implementation will + * do the same at a timeout based retransmit. When a connection + * is in a sad state like this, we care only about integrity + * of the connection not performance. + */ + if (tp->rx_opt.sack_ok) + tcp_sack_reset(&tp->rx_opt); + sk_mem_reclaim(sk); + } +} + /* Reduce allocated memory if we can, trying to get * the socket within its memory limits again. * @@ -4231,20 +4264,7 @@ static int tcp_prune_queue(struct sock *sk) /* Collapsing did not help, destructive actions follow. * This must not ever occur. */ - /* First, purge the out_of_order queue. */ - if (!skb_queue_empty(&tp->out_of_order_queue)) { - NET_INC_STATS_BH(LINUX_MIB_OFOPRUNED); - __skb_queue_purge(&tp->out_of_order_queue); - - /* Reset SACK state. A conforming SACK implementation will - * do the same at a timeout based retransmit. When a connection - * is in a sad state like this, we care only about integrity - * of the connection not performance. - */ - if (tcp_is_sack(tp)) - tcp_sack_reset(&tp->rx_opt); - sk_mem_reclaim(sk); - } + tcp_prune_ofo_queue(sk); if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf) return 0; -- cgit v1.2.3 From 56f367bbfd5a7439961499ca6a2f0822d2074d83 Mon Sep 17 00:00:00 2001 From: Vitaliy Gusev Date: Tue, 15 Apr 2008 20:26:34 -0700 Subject: [TCP]: Add return value indication to tcp_prune_ofo_queue(). Returns non-zero if tp->out_of_order_queue was seen non-empty. This allows tcp_try_rmem_schedule() to return early. Signed-off-by: Vitaliy Gusev Signed-off-by: David S. Miller --- net/ipv4/tcp_input.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) (limited to 'net/ipv4/tcp_input.c') diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 61db7b1eb99..bbb7d88a16b 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -3841,7 +3841,7 @@ static void tcp_ofo_queue(struct sock *sk) } } -static void tcp_prune_ofo_queue(struct sock *sk); +static int tcp_prune_ofo_queue(struct sock *sk); static int tcp_prune_queue(struct sock *sk); static inline int tcp_try_rmem_schedule(struct sock *sk, unsigned int size) @@ -3853,7 +3853,9 @@ static inline int tcp_try_rmem_schedule(struct sock *sk, unsigned int size) return -1; if (!sk_rmem_schedule(sk, size)) { - tcp_prune_ofo_queue(sk); + if (!tcp_prune_ofo_queue(sk)) + return -1; + if (!sk_rmem_schedule(sk, size)) return -1; } @@ -4211,10 +4213,12 @@ static void tcp_collapse_ofo_queue(struct sock *sk) /* * Purge the out-of-order queue. + * Return true if queue was pruned. */ -static void tcp_prune_ofo_queue(struct sock *sk) +static int tcp_prune_ofo_queue(struct sock *sk) { struct tcp_sock *tp = tcp_sk(sk); + int res = 0; if (!skb_queue_empty(&tp->out_of_order_queue)) { NET_INC_STATS_BH(LINUX_MIB_OFOPRUNED); @@ -4228,7 +4232,9 @@ static void tcp_prune_ofo_queue(struct sock *sk) if (tp->rx_opt.sack_ok) tcp_sack_reset(&tp->rx_opt); sk_mem_reclaim(sk); + res = 1; } + return res; } /* Reduce allocated memory if we can, trying to get -- cgit v1.2.3