aboutsummaryrefslogtreecommitdiff
path: root/net/ipv4
diff options
context:
space:
mode:
authorIlpo Järvinen <ilpo.jarvinen@helsinki.fi>2007-12-24 21:33:45 -0800
committerDavid S. Miller <davem@davemloft.net>2008-01-28 15:00:06 -0800
commit0e3a4803aa06cd7bc2cfc1d04289df4f6027640a (patch)
treec3af99ceea81cd14e14c96fe0c85f39236de933b /net/ipv4
parent7ffc49a6ee92b7138c2ee28073a8e10e58335d62 (diff)
[TCP]: Force TSO splits to MSS boundaries
If snd_wnd - snd_nxt wasn't multiple of MSS, skb was split on odd boundary by the callers of tcp_window_allows. We try really hard to avoid unnecessary modulos. Therefore the old caller side check "if (skb->len < limit)" was too wide as well because limit is not bound in any way to skb->len and can cause spurious testing for trimming in the middle of the queue while we only wanted that to happen at the tail of the queue. A simple additional caller side check for tcp_write_queue_tail would likely have resulted 2 x modulos because the limit would have to be first calculated from window, however, doing that unnecessary modulo is not mandatory. After a minor change to the algorithm, simply determine first if the modulo is needed at all and at that point immediately decide also from which value it should be calculated from. This approach also kills some duplicated code. Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/ipv4')
-rw-r--r--net/ipv4/tcp_output.c51
1 files changed, 25 insertions, 26 deletions
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 9a9510acb14..9058e0a2510 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -1017,13 +1017,29 @@ static void tcp_cwnd_validate(struct sock *sk)
}
}
-static unsigned int tcp_window_allows(struct tcp_sock *tp, struct sk_buff *skb, unsigned int mss_now, unsigned int cwnd)
+/* Returns the portion of skb which can be sent right away without
+ * introducing MSS oddities to segment boundaries. In rare cases where
+ * mss_now != mss_cache, we will request caller to create a small skb
+ * per input skb which could be mostly avoided here (if desired).
+ */
+static unsigned int tcp_mss_split_point(struct sock *sk, struct sk_buff *skb,
+ unsigned int mss_now,
+ unsigned int cwnd)
{
- u32 window, cwnd_len;
+ struct tcp_sock *tp = tcp_sk(sk);
+ u32 needed, window, cwnd_len;
window = (tp->snd_una + tp->snd_wnd - TCP_SKB_CB(skb)->seq);
cwnd_len = mss_now * cwnd;
- return min(window, cwnd_len);
+
+ if (likely(cwnd_len <= window && skb != tcp_write_queue_tail(sk)))
+ return cwnd_len;
+
+ if (skb == tcp_write_queue_tail(sk) && cwnd_len <= skb->len)
+ return cwnd_len;
+
+ needed = min(skb->len, window);
+ return needed - needed % mss_now;
}
/* Can at least one segment of SKB be sent right now, according to the
@@ -1458,17 +1474,9 @@ static int tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle)
}
limit = mss_now;
- if (tso_segs > 1) {
- limit = tcp_window_allows(tp, skb,
- mss_now, cwnd_quota);
-
- if (skb->len < limit) {
- unsigned int trim = skb->len % mss_now;
-
- if (trim)
- limit = skb->len - trim;
- }
- }
+ if (tso_segs > 1)
+ limit = tcp_mss_split_point(sk, skb, mss_now,
+ cwnd_quota);
if (skb->len > limit &&
unlikely(tso_fragment(sk, skb, limit, mss_now)))
@@ -1515,7 +1523,6 @@ void __tcp_push_pending_frames(struct sock *sk, unsigned int cur_mss,
*/
void tcp_push_one(struct sock *sk, unsigned int mss_now)
{
- struct tcp_sock *tp = tcp_sk(sk);
struct sk_buff *skb = tcp_send_head(sk);
unsigned int tso_segs, cwnd_quota;
@@ -1530,17 +1537,9 @@ void tcp_push_one(struct sock *sk, unsigned int mss_now)
BUG_ON(!tso_segs);
limit = mss_now;
- if (tso_segs > 1) {
- limit = tcp_window_allows(tp, skb,
- mss_now, cwnd_quota);
-
- if (skb->len < limit) {
- unsigned int trim = skb->len % mss_now;
-
- if (trim)
- limit = skb->len - trim;
- }
- }
+ if (tso_segs > 1)
+ limit = tcp_mss_split_point(sk, skb, mss_now,
+ cwnd_quota);
if (skb->len > limit &&
unlikely(tso_fragment(sk, skb, limit, mss_now)))