diff options
Diffstat (limited to 'net/core')
-rw-r--r-- | net/core/dev.c | 219 | ||||
-rw-r--r-- | net/core/skbuff.c | 85 | ||||
-rw-r--r-- | net/core/sock.c | 8 |
3 files changed, 209 insertions, 103 deletions
diff --git a/net/core/dev.c b/net/core/dev.c index a17e0066236..709a9a92225 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -135,6 +135,14 @@ /* This should be increased if a protocol with a bigger head is added. */ #define GRO_MAX_HEAD (MAX_HEADER + 128) +enum { + GRO_MERGED, + GRO_MERGED_FREE, + GRO_HELD, + GRO_NORMAL, + GRO_DROP, +}; + /* * The list of packet types we will receive (as opposed to discard) * and the routines to invoke. @@ -207,6 +215,13 @@ static inline struct hlist_head *dev_index_hash(struct net *net, int ifindex) return &net->dev_index_head[ifindex & ((1 << NETDEV_HASHBITS) - 1)]; } +static inline void *skb_gro_mac_header(struct sk_buff *skb) +{ + return skb_mac_header(skb) < skb->data ? skb_mac_header(skb) : + page_address(skb_shinfo(skb)->frags[0].page) + + skb_shinfo(skb)->frags[0].page_offset; +} + /* Device list insertion */ static int list_netdevice(struct net_device *dev) { @@ -1708,56 +1723,26 @@ out_kfree_skb: return 0; } -static u32 simple_tx_hashrnd; -static int simple_tx_hashrnd_initialized = 0; +static u32 skb_tx_hashrnd; +static int skb_tx_hashrnd_initialized = 0; -static u16 simple_tx_hash(struct net_device *dev, struct sk_buff *skb) +static u16 skb_tx_hash(struct net_device *dev, struct sk_buff *skb) { - u32 addr1, addr2, ports; - u32 hash, ihl; - u8 ip_proto = 0; + u32 hash; - if (unlikely(!simple_tx_hashrnd_initialized)) { - get_random_bytes(&simple_tx_hashrnd, 4); - simple_tx_hashrnd_initialized = 1; + if (unlikely(!skb_tx_hashrnd_initialized)) { + get_random_bytes(&skb_tx_hashrnd, 4); + skb_tx_hashrnd_initialized = 1; } - switch (skb->protocol) { - case htons(ETH_P_IP): - if (!(ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET))) - ip_proto = ip_hdr(skb)->protocol; - addr1 = ip_hdr(skb)->saddr; - addr2 = ip_hdr(skb)->daddr; - ihl = ip_hdr(skb)->ihl; - break; - case htons(ETH_P_IPV6): - ip_proto = ipv6_hdr(skb)->nexthdr; - addr1 = ipv6_hdr(skb)->saddr.s6_addr32[3]; - addr2 = ipv6_hdr(skb)->daddr.s6_addr32[3]; - ihl = (40 >> 2); - break; - default: - return 0; - } - - - switch (ip_proto) { - case IPPROTO_TCP: - case IPPROTO_UDP: - case IPPROTO_DCCP: - case IPPROTO_ESP: - case IPPROTO_AH: - case IPPROTO_SCTP: - case IPPROTO_UDPLITE: - ports = *((u32 *) (skb_network_header(skb) + (ihl * 4))); - break; - - default: - ports = 0; - break; - } + if (skb_rx_queue_recorded(skb)) { + hash = skb_get_rx_queue(skb); + } else if (skb->sk && skb->sk->sk_hash) { + hash = skb->sk->sk_hash; + } else + hash = skb->protocol; - hash = jhash_3words(addr1, addr2, ports, simple_tx_hashrnd); + hash = jhash_1word(hash, skb_tx_hashrnd); return (u16) (((u64) hash * dev->real_num_tx_queues) >> 32); } @@ -1771,7 +1756,7 @@ static struct netdev_queue *dev_pick_tx(struct net_device *dev, if (ops->ndo_select_queue) queue_index = ops->ndo_select_queue(dev, skb); else if (dev->real_num_tx_queues > 1) - queue_index = simple_tx_hash(dev, skb); + queue_index = skb_tx_hash(dev, skb); skb_set_queue_mapping(skb, queue_index); return netdev_get_tx_queue(dev, queue_index); @@ -2303,6 +2288,8 @@ ncls: if (!skb) goto out; + skb_orphan(skb); + type = skb->protocol; list_for_each_entry_rcu(ptype, &ptype_base[ntohs(type) & PTYPE_HASH_MASK], list) { @@ -2372,7 +2359,6 @@ static int napi_gro_complete(struct sk_buff *skb) out: skb_shinfo(skb)->gso_size = 0; - __skb_push(skb, -skb_network_offset(skb)); return netif_receive_skb(skb); } @@ -2390,6 +2376,26 @@ void napi_gro_flush(struct napi_struct *napi) } EXPORT_SYMBOL(napi_gro_flush); +void *skb_gro_header(struct sk_buff *skb, unsigned int hlen) +{ + unsigned int offset = skb_gro_offset(skb); + + hlen += offset; + if (hlen <= skb_headlen(skb)) + return skb->data + offset; + + if (unlikely(!skb_shinfo(skb)->nr_frags || + skb_shinfo(skb)->frags[0].size <= + hlen - skb_headlen(skb) || + PageHighMem(skb_shinfo(skb)->frags[0].page))) + return pskb_may_pull(skb, hlen) ? skb->data + offset : NULL; + + return page_address(skb_shinfo(skb)->frags[0].page) + + skb_shinfo(skb)->frags[0].page_offset + + offset - skb_headlen(skb); +} +EXPORT_SYMBOL(skb_gro_header); + int dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb) { struct sk_buff **pp = NULL; @@ -2399,7 +2405,7 @@ int dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb) int count = 0; int same_flow; int mac_len; - int free; + int ret; if (!(skb->dev->features & NETIF_F_GRO)) goto normal; @@ -2410,11 +2416,13 @@ int dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb) rcu_read_lock(); list_for_each_entry_rcu(ptype, head, list) { struct sk_buff *p; + void *mac; if (ptype->type != type || ptype->dev || !ptype->gro_receive) continue; - skb_reset_network_header(skb); + skb_set_network_header(skb, skb_gro_offset(skb)); + mac = skb_gro_mac_header(skb); mac_len = skb->network_header - skb->mac_header; skb->mac_len = mac_len; NAPI_GRO_CB(skb)->same_flow = 0; @@ -2428,8 +2436,7 @@ int dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb) continue; if (p->mac_len != mac_len || - memcmp(skb_mac_header(p), skb_mac_header(skb), - mac_len)) + memcmp(skb_mac_header(p), mac, mac_len)) NAPI_GRO_CB(p)->same_flow = 0; } @@ -2442,7 +2449,7 @@ int dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb) goto normal; same_flow = NAPI_GRO_CB(skb)->same_flow; - free = NAPI_GRO_CB(skb)->free; + ret = NAPI_GRO_CB(skb)->free ? GRO_MERGED_FREE : GRO_MERGED; if (pp) { struct sk_buff *nskb = *pp; @@ -2456,21 +2463,28 @@ int dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb) if (same_flow) goto ok; - if (NAPI_GRO_CB(skb)->flush || count >= MAX_GRO_SKBS) { - __skb_push(skb, -skb_network_offset(skb)); + if (NAPI_GRO_CB(skb)->flush || count >= MAX_GRO_SKBS) goto normal; - } NAPI_GRO_CB(skb)->count = 1; - skb_shinfo(skb)->gso_size = skb->len; + skb_shinfo(skb)->gso_size = skb_gro_len(skb); skb->next = napi->gro_list; napi->gro_list = skb; + ret = GRO_HELD; + +pull: + if (unlikely(!pskb_may_pull(skb, skb_gro_offset(skb)))) { + if (napi->gro_list == skb) + napi->gro_list = skb->next; + ret = GRO_DROP; + } ok: - return free; + return ret; normal: - return -1; + ret = GRO_NORMAL; + goto pull; } EXPORT_SYMBOL(dev_gro_receive); @@ -2486,18 +2500,32 @@ static int __napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb) return dev_gro_receive(napi, skb); } -int napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb) +int napi_skb_finish(int ret, struct sk_buff *skb) { - switch (__napi_gro_receive(napi, skb)) { - case -1: + int err = NET_RX_SUCCESS; + + switch (ret) { + case GRO_NORMAL: return netif_receive_skb(skb); - case 1: + case GRO_DROP: + err = NET_RX_DROP; + /* fall through */ + + case GRO_MERGED_FREE: kfree_skb(skb); break; } - return NET_RX_SUCCESS; + return err; +} +EXPORT_SYMBOL(napi_skb_finish); + +int napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb) +{ + skb_gro_reset_offset(skb); + + return napi_skb_finish(__napi_gro_receive(napi, skb), skb); } EXPORT_SYMBOL(napi_gro_receive); @@ -2515,6 +2543,9 @@ struct sk_buff *napi_fraginfo_skb(struct napi_struct *napi, { struct net_device *dev = napi->dev; struct sk_buff *skb = napi->skb; + struct ethhdr *eth; + skb_frag_t *frag; + int i; napi->skb = NULL; @@ -2527,20 +2558,36 @@ struct sk_buff *napi_fraginfo_skb(struct napi_struct *napi, } BUG_ON(info->nr_frags > MAX_SKB_FRAGS); + frag = &info->frags[info->nr_frags - 1]; + + for (i = skb_shinfo(skb)->nr_frags; i < info->nr_frags; i++) { + skb_fill_page_desc(skb, i, frag->page, frag->page_offset, + frag->size); + frag++; + } skb_shinfo(skb)->nr_frags = info->nr_frags; - memcpy(skb_shinfo(skb)->frags, info->frags, sizeof(info->frags)); skb->data_len = info->len; skb->len += info->len; skb->truesize += info->len; - if (!pskb_may_pull(skb, ETH_HLEN)) { + skb_reset_mac_header(skb); + skb_gro_reset_offset(skb); + + eth = skb_gro_header(skb, sizeof(*eth)); + if (!eth) { napi_reuse_skb(napi, skb); skb = NULL; goto out; } - skb->protocol = eth_type_trans(skb, dev); + skb_gro_pull(skb, sizeof(*eth)); + + /* + * This works because the only protocols we care about don't require + * special handling. We'll fix it up properly at the end. + */ + skb->protocol = eth->h_proto; skb->ip_summed = info->ip_summed; skb->csum = info->csum; @@ -2550,29 +2597,43 @@ out: } EXPORT_SYMBOL(napi_fraginfo_skb); -int napi_gro_frags(struct napi_struct *napi, struct napi_gro_fraginfo *info) +int napi_frags_finish(struct napi_struct *napi, struct sk_buff *skb, int ret) { - struct sk_buff *skb = napi_fraginfo_skb(napi, info); - int err = NET_RX_DROP; + int err = NET_RX_SUCCESS; - if (!skb) - goto out; + switch (ret) { + case GRO_NORMAL: + case GRO_HELD: + skb->protocol = eth_type_trans(skb, napi->dev); - err = NET_RX_SUCCESS; + if (ret == GRO_NORMAL) + return netif_receive_skb(skb); - switch (__napi_gro_receive(napi, skb)) { - case -1: - return netif_receive_skb(skb); + skb_gro_pull(skb, -ETH_HLEN); + break; - case 0: - goto out; - } + case GRO_DROP: + err = NET_RX_DROP; + /* fall through */ - napi_reuse_skb(napi, skb); + case GRO_MERGED_FREE: + napi_reuse_skb(napi, skb); + break; + } -out: return err; } +EXPORT_SYMBOL(napi_frags_finish); + +int napi_gro_frags(struct napi_struct *napi, struct napi_gro_fraginfo *info) +{ + struct sk_buff *skb = napi_fraginfo_skb(napi, info); + + if (!skb) + return NET_RX_DROP; + + return napi_frags_finish(napi, skb, __napi_gro_receive(napi, skb)); +} EXPORT_SYMBOL(napi_gro_frags); static int process_backlog(struct napi_struct *napi, int quota) diff --git a/net/core/skbuff.c b/net/core/skbuff.c index da74b844f4e..67f2a2f8582 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -1333,14 +1333,39 @@ static void sock_spd_release(struct splice_pipe_desc *spd, unsigned int i) put_page(spd->pages[i]); } -static inline struct page *linear_to_page(struct page *page, unsigned int len, - unsigned int offset) -{ - struct page *p = alloc_pages(GFP_KERNEL, 0); +static inline struct page *linear_to_page(struct page *page, unsigned int *len, + unsigned int *offset, + struct sk_buff *skb) +{ + struct sock *sk = skb->sk; + struct page *p = sk->sk_sndmsg_page; + unsigned int off; + + if (!p) { +new_page: + p = sk->sk_sndmsg_page = alloc_pages(sk->sk_allocation, 0); + if (!p) + return NULL; - if (!p) - return NULL; - memcpy(page_address(p) + offset, page_address(page) + offset, len); + off = sk->sk_sndmsg_off = 0; + /* hold one ref to this page until it's full */ + } else { + unsigned int mlen; + + off = sk->sk_sndmsg_off; + mlen = PAGE_SIZE - off; + if (mlen < 64 && mlen < *len) { + put_page(p); + goto new_page; + } + + *len = min_t(unsigned int, *len, mlen); + } + + memcpy(page_address(p) + off, page_address(page) + *offset, *len); + sk->sk_sndmsg_off += *len; + *offset = off; + get_page(p); return p; } @@ -1349,21 +1374,21 @@ static inline struct page *linear_to_page(struct page *page, unsigned int len, * Fill page/offset/length into spd, if it can hold more pages. */ static inline int spd_fill_page(struct splice_pipe_desc *spd, struct page *page, - unsigned int len, unsigned int offset, + unsigned int *len, unsigned int offset, struct sk_buff *skb, int linear) { if (unlikely(spd->nr_pages == PIPE_BUFFERS)) return 1; if (linear) { - page = linear_to_page(page, len, offset); + page = linear_to_page(page, len, &offset, skb); if (!page) return 1; } else get_page(page); spd->pages[spd->nr_pages] = page; - spd->partial[spd->nr_pages].len = len; + spd->partial[spd->nr_pages].len = *len; spd->partial[spd->nr_pages].offset = offset; spd->nr_pages++; @@ -1405,7 +1430,7 @@ static inline int __splice_segment(struct page *page, unsigned int poff, /* the linear region may spread across several pages */ flen = min_t(unsigned int, flen, PAGE_SIZE - poff); - if (spd_fill_page(spd, page, flen, poff, skb, linear)) + if (spd_fill_page(spd, page, &flen, poff, skb, linear)) return 1; __segment_seek(&page, &poff, &plen, flen); @@ -2585,17 +2610,23 @@ int skb_gro_receive(struct sk_buff **head, struct sk_buff *skb) struct sk_buff *p = *head; struct sk_buff *nskb; unsigned int headroom; - unsigned int hlen = p->data - skb_mac_header(p); - unsigned int len = skb->len; + unsigned int len = skb_gro_len(skb); - if (hlen + p->len + len >= 65536) + if (p->len + len >= 65536) return -E2BIG; if (skb_shinfo(p)->frag_list) goto merge; - else if (!skb_headlen(p) && !skb_headlen(skb) && - skb_shinfo(p)->nr_frags + skb_shinfo(skb)->nr_frags < - MAX_SKB_FRAGS) { + else if (skb_headlen(skb) <= skb_gro_offset(skb)) { + if (skb_shinfo(p)->nr_frags + skb_shinfo(skb)->nr_frags > + MAX_SKB_FRAGS) + return -E2BIG; + + skb_shinfo(skb)->frags[0].page_offset += + skb_gro_offset(skb) - skb_headlen(skb); + skb_shinfo(skb)->frags[0].size -= + skb_gro_offset(skb) - skb_headlen(skb); + memcpy(skb_shinfo(p)->frags + skb_shinfo(p)->nr_frags, skb_shinfo(skb)->frags, skb_shinfo(skb)->nr_frags * sizeof(skb_frag_t)); @@ -2612,7 +2643,7 @@ int skb_gro_receive(struct sk_buff **head, struct sk_buff *skb) } headroom = skb_headroom(p); - nskb = netdev_alloc_skb(p->dev, headroom); + nskb = netdev_alloc_skb(p->dev, headroom + skb_gro_offset(p)); if (unlikely(!nskb)) return -ENOMEM; @@ -2620,12 +2651,15 @@ int skb_gro_receive(struct sk_buff **head, struct sk_buff *skb) nskb->mac_len = p->mac_len; skb_reserve(nskb, headroom); + __skb_put(nskb, skb_gro_offset(p)); - skb_set_mac_header(nskb, -hlen); + skb_set_mac_header(nskb, skb_mac_header(p) - p->data); skb_set_network_header(nskb, skb_network_offset(p)); skb_set_transport_header(nskb, skb_transport_offset(p)); - memcpy(skb_mac_header(nskb), skb_mac_header(p), hlen); + __skb_pull(p, skb_gro_offset(p)); + memcpy(skb_mac_header(nskb), skb_mac_header(p), + p->data - skb_mac_header(p)); *NAPI_GRO_CB(nskb) = *NAPI_GRO_CB(p); skb_shinfo(nskb)->frag_list = p; @@ -2644,6 +2678,17 @@ int skb_gro_receive(struct sk_buff **head, struct sk_buff *skb) p = nskb; merge: + if (skb_gro_offset(skb) > skb_headlen(skb)) { + skb_shinfo(skb)->frags[0].page_offset += + skb_gro_offset(skb) - skb_headlen(skb); + skb_shinfo(skb)->frags[0].size -= + skb_gro_offset(skb) - skb_headlen(skb); + skb_gro_reset_offset(skb); + skb_gro_pull(skb, skb_headlen(skb)); + } + + __skb_pull(skb, skb_gro_offset(skb)); + p->prev->next = skb; p->prev = skb; skb_header_release(skb); diff --git a/net/core/sock.c b/net/core/sock.c index f3a0d08cbb4..c64996f8a27 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1254,10 +1254,9 @@ static long sock_wait_for_wmem(struct sock * sk, long timeo) * Generic send/receive buffer handlers */ -static struct sk_buff *sock_alloc_send_pskb(struct sock *sk, - unsigned long header_len, - unsigned long data_len, - int noblock, int *errcode) +struct sk_buff *sock_alloc_send_pskb(struct sock *sk, unsigned long header_len, + unsigned long data_len, int noblock, + int *errcode) { struct sk_buff *skb; gfp_t gfp_mask; @@ -1337,6 +1336,7 @@ failure: *errcode = err; return NULL; } +EXPORT_SYMBOL(sock_alloc_send_pskb); struct sk_buff *sock_alloc_send_skb(struct sock *sk, unsigned long size, int noblock, int *errcode) |