diff options
Diffstat (limited to 'drivers/net/s2io.c')
-rw-r--r-- | drivers/net/s2io.c | 619 |
1 files changed, 542 insertions, 77 deletions
diff --git a/drivers/net/s2io.c b/drivers/net/s2io.c index b7f00d6eb6a..79208f434ac 100644 --- a/drivers/net/s2io.c +++ b/drivers/net/s2io.c @@ -57,23 +57,27 @@ #include <linux/ethtool.h> #include <linux/workqueue.h> #include <linux/if_vlan.h> +#include <linux/ip.h> +#include <linux/tcp.h> +#include <net/tcp.h> #include <asm/system.h> #include <asm/uaccess.h> #include <asm/io.h> +#include <asm/div64.h> /* local include */ #include "s2io.h" #include "s2io-regs.h" -#define DRV_VERSION "Version 2.0.9.4" +#define DRV_VERSION "2.0.11.2" /* S2io Driver name & version. */ static char s2io_driver_name[] = "Neterion"; static char s2io_driver_version[] = DRV_VERSION; -int rxd_size[4] = {32,48,48,64}; -int rxd_count[4] = {127,85,85,63}; +static int rxd_size[4] = {32,48,48,64}; +static int rxd_count[4] = {127,85,85,63}; static inline int RXD_IS_UP2DT(RxD_t *rxdp) { @@ -168,6 +172,11 @@ static char ethtool_stats_keys[][ETH_GSTRING_LEN] = { {"\n DRIVER STATISTICS"}, {"single_bit_ecc_errs"}, {"double_bit_ecc_errs"}, + ("lro_aggregated_pkts"), + ("lro_flush_both_count"), + ("lro_out_of_sequence_pkts"), + ("lro_flush_due_to_max_pkts"), + ("lro_avg_aggr_pkts"), }; #define S2IO_STAT_LEN sizeof(ethtool_stats_keys)/ ETH_GSTRING_LEN @@ -214,7 +223,7 @@ static void s2io_vlan_rx_kill_vid(struct net_device *dev, unsigned long vid) #define SWITCH_SIGN 0xA5A5A5A5A5A5A5A5ULL #define END_SIGN 0x0 -static u64 herc_act_dtx_cfg[] = { +static const u64 herc_act_dtx_cfg[] = { /* Set address */ 0x8000051536750000ULL, 0x80000515367500E0ULL, /* Write data */ @@ -235,7 +244,7 @@ static u64 herc_act_dtx_cfg[] = { END_SIGN }; -static u64 xena_mdio_cfg[] = { +static const u64 xena_mdio_cfg[] = { /* Reset PMA PLL */ 0xC001010000000000ULL, 0xC0010100000000E0ULL, 0xC0010100008000E4ULL, @@ -245,7 +254,7 @@ static u64 xena_mdio_cfg[] = { END_SIGN }; -static u64 xena_dtx_cfg[] = { +static const u64 xena_dtx_cfg[] = { 0x8000051500000000ULL, 0x80000515000000E0ULL, 0x80000515D93500E4ULL, 0x8001051500000000ULL, 0x80010515000000E0ULL, 0x80010515001E00E4ULL, @@ -273,7 +282,7 @@ static u64 xena_dtx_cfg[] = { * Constants for Fixing the MacAddress problem seen mostly on * Alpha machines. */ -static u64 fix_mac[] = { +static const u64 fix_mac[] = { 0x0060000000000000ULL, 0x0060600000000000ULL, 0x0040600000000000ULL, 0x0000600000000000ULL, 0x0020600000000000ULL, 0x0060600000000000ULL, @@ -317,6 +326,12 @@ static unsigned int indicate_max_pkts; static unsigned int rxsync_frequency = 3; /* Interrupt type. Values can be 0(INTA), 1(MSI), 2(MSI_X) */ static unsigned int intr_type = 0; +/* Large receive offload feature */ +static unsigned int lro = 0; +/* Max pkts to be aggregated by LRO at one time. If not specified, + * aggregation happens until we hit max IP pkt size(64K) + */ +static unsigned int lro_max_pkts = 0xFFFF; /* * S2IO device table. @@ -1476,6 +1491,19 @@ static int init_nic(struct s2io_nic *nic) writel((u32) (val64 >> 32), (add + 4)); val64 = readq(&bar0->mac_cfg); + /* Enable FCS stripping by adapter */ + add = &bar0->mac_cfg; + val64 = readq(&bar0->mac_cfg); + val64 |= MAC_CFG_RMAC_STRIP_FCS; + if (nic->device_type == XFRAME_II_DEVICE) + writeq(val64, &bar0->mac_cfg); + else { + writeq(RMAC_CFG_KEY(0x4C0D), &bar0->rmac_cfg_key); + writel((u32) (val64), add); + writeq(RMAC_CFG_KEY(0x4C0D), &bar0->rmac_cfg_key); + writel((u32) (val64 >> 32), (add + 4)); + } + /* * Set the time value to be inserted in the pause frame * generated by xena. @@ -2127,7 +2155,7 @@ static void stop_nic(struct s2io_nic *nic) } } -int fill_rxd_3buf(nic_t *nic, RxD_t *rxdp, struct sk_buff *skb) +static int fill_rxd_3buf(nic_t *nic, RxD_t *rxdp, struct sk_buff *skb) { struct net_device *dev = nic->dev; struct sk_buff *frag_list; @@ -2569,6 +2597,8 @@ static void rx_intr_handler(ring_info_t *ring_data) #ifndef CONFIG_S2IO_NAPI int pkt_cnt = 0; #endif + int i; + spin_lock(&nic->rx_lock); if (atomic_read(&nic->card_state) == CARD_DOWN) { DBG_PRINT(INTR_DBG, "%s: %s going down for reset\n", @@ -2661,6 +2691,18 @@ static void rx_intr_handler(ring_info_t *ring_data) break; #endif } + if (nic->lro) { + /* Clear all LRO sessions before exiting */ + for (i=0; i<MAX_LRO_SESSIONS; i++) { + lro_t *lro = &nic->lro0_n[i]; + if (lro->in_use) { + update_L3L4_header(nic, lro); + queue_rx_frame(lro->parent); + clear_lro_session(lro); + } + } + } + spin_unlock(&nic->rx_lock); } @@ -2852,7 +2894,7 @@ static int wait_for_cmd_complete(nic_t * sp) * void. */ -void s2io_reset(nic_t * sp) +static void s2io_reset(nic_t * sp) { XENA_dev_config_t __iomem *bar0 = sp->bar0; u64 val64; @@ -2940,7 +2982,7 @@ void s2io_reset(nic_t * sp) * SUCCESS on success and FAILURE on failure. */ -int s2io_set_swapper(nic_t * sp) +static int s2io_set_swapper(nic_t * sp) { struct net_device *dev = sp->dev; XENA_dev_config_t __iomem *bar0 = sp->bar0; @@ -3089,7 +3131,7 @@ static int wait_for_msix_trans(nic_t *nic, int i) return ret; } -void restore_xmsi_data(nic_t *nic) +static void restore_xmsi_data(nic_t *nic) { XENA_dev_config_t __iomem *bar0 = nic->bar0; u64 val64; @@ -3180,7 +3222,7 @@ int s2io_enable_msi(nic_t *nic) return 0; } -int s2io_enable_msi_x(nic_t *nic) +static int s2io_enable_msi_x(nic_t *nic) { XENA_dev_config_t __iomem *bar0 = nic->bar0; u64 tx_mat, rx_mat; @@ -3668,23 +3710,32 @@ s2io_msi_handle(int irq, void *dev_id, struct pt_regs *regs) * else schedule a tasklet to reallocate the buffers. */ for (i = 0; i < config->rx_ring_num; i++) { - int rxb_size = atomic_read(&sp->rx_bufs_left[i]); - int level = rx_buffer_level(sp, rxb_size, i); - - if ((level == PANIC) && (!TASKLET_IN_USE)) { - DBG_PRINT(INTR_DBG, "%s: Rx BD hit ", dev->name); - DBG_PRINT(INTR_DBG, "PANIC levels\n"); - if ((ret = fill_rx_buffers(sp, i)) == -ENOMEM) { - DBG_PRINT(ERR_DBG, "%s:Out of memory", - dev->name); - DBG_PRINT(ERR_DBG, " in ISR!!\n"); + if (!sp->lro) { + int rxb_size = atomic_read(&sp->rx_bufs_left[i]); + int level = rx_buffer_level(sp, rxb_size, i); + + if ((level == PANIC) && (!TASKLET_IN_USE)) { + DBG_PRINT(INTR_DBG, "%s: Rx BD hit ", + dev->name); + DBG_PRINT(INTR_DBG, "PANIC levels\n"); + if ((ret = fill_rx_buffers(sp, i)) == -ENOMEM) { + DBG_PRINT(ERR_DBG, "%s:Out of memory", + dev->name); + DBG_PRINT(ERR_DBG, " in ISR!!\n"); + clear_bit(0, (&sp->tasklet_status)); + atomic_dec(&sp->isr_cnt); + return IRQ_HANDLED; + } clear_bit(0, (&sp->tasklet_status)); - atomic_dec(&sp->isr_cnt); - return IRQ_HANDLED; + } else if (level == LOW) { + tasklet_schedule(&sp->task); } - clear_bit(0, (&sp->tasklet_status)); - } else if (level == LOW) { - tasklet_schedule(&sp->task); + } + else if (fill_rx_buffers(sp, i) == -ENOMEM) { + DBG_PRINT(ERR_DBG, "%s:Out of memory", + dev->name); + DBG_PRINT(ERR_DBG, " in Rx Intr!!\n"); + break; } } @@ -3697,29 +3748,37 @@ s2io_msix_ring_handle(int irq, void *dev_id, struct pt_regs *regs) { ring_info_t *ring = (ring_info_t *)dev_id; nic_t *sp = ring->nic; + struct net_device *dev = (struct net_device *) dev_id; int rxb_size, level, rng_n; atomic_inc(&sp->isr_cnt); rx_intr_handler(ring); rng_n = ring->ring_no; - rxb_size = atomic_read(&sp->rx_bufs_left[rng_n]); - level = rx_buffer_level(sp, rxb_size, rng_n); - - if ((level == PANIC) && (!TASKLET_IN_USE)) { - int ret; - DBG_PRINT(INTR_DBG, "%s: Rx BD hit ", __FUNCTION__); - DBG_PRINT(INTR_DBG, "PANIC levels\n"); - if ((ret = fill_rx_buffers(sp, rng_n)) == -ENOMEM) { - DBG_PRINT(ERR_DBG, "Out of memory in %s", - __FUNCTION__); + if (!sp->lro) { + rxb_size = atomic_read(&sp->rx_bufs_left[rng_n]); + level = rx_buffer_level(sp, rxb_size, rng_n); + + if ((level == PANIC) && (!TASKLET_IN_USE)) { + int ret; + DBG_PRINT(INTR_DBG, "%s: Rx BD hit ", __FUNCTION__); + DBG_PRINT(INTR_DBG, "PANIC levels\n"); + if ((ret = fill_rx_buffers(sp, rng_n)) == -ENOMEM) { + DBG_PRINT(ERR_DBG, "Out of memory in %s", + __FUNCTION__); + clear_bit(0, (&sp->tasklet_status)); + return IRQ_HANDLED; + } clear_bit(0, (&sp->tasklet_status)); - return IRQ_HANDLED; + } else if (level == LOW) { + tasklet_schedule(&sp->task); } - clear_bit(0, (&sp->tasklet_status)); - } else if (level == LOW) { - tasklet_schedule(&sp->task); } + else if (fill_rx_buffers(sp, rng_n) == -ENOMEM) { + DBG_PRINT(ERR_DBG, "%s:Out of memory", dev->name); + DBG_PRINT(ERR_DBG, " in Rx Intr!!\n"); + } + atomic_dec(&sp->isr_cnt); return IRQ_HANDLED; @@ -3875,24 +3934,33 @@ static irqreturn_t s2io_isr(int irq, void *dev_id, struct pt_regs *regs) */ #ifndef CONFIG_S2IO_NAPI for (i = 0; i < config->rx_ring_num; i++) { - int ret; - int rxb_size = atomic_read(&sp->rx_bufs_left[i]); - int level = rx_buffer_level(sp, rxb_size, i); - - if ((level == PANIC) && (!TASKLET_IN_USE)) { - DBG_PRINT(INTR_DBG, "%s: Rx BD hit ", dev->name); - DBG_PRINT(INTR_DBG, "PANIC levels\n"); - if ((ret = fill_rx_buffers(sp, i)) == -ENOMEM) { - DBG_PRINT(ERR_DBG, "%s:Out of memory", - dev->name); - DBG_PRINT(ERR_DBG, " in ISR!!\n"); + if (!sp->lro) { + int ret; + int rxb_size = atomic_read(&sp->rx_bufs_left[i]); + int level = rx_buffer_level(sp, rxb_size, i); + + if ((level == PANIC) && (!TASKLET_IN_USE)) { + DBG_PRINT(INTR_DBG, "%s: Rx BD hit ", + dev->name); + DBG_PRINT(INTR_DBG, "PANIC levels\n"); + if ((ret = fill_rx_buffers(sp, i)) == -ENOMEM) { + DBG_PRINT(ERR_DBG, "%s:Out of memory", + dev->name); + DBG_PRINT(ERR_DBG, " in ISR!!\n"); + clear_bit(0, (&sp->tasklet_status)); + atomic_dec(&sp->isr_cnt); + return IRQ_HANDLED; + } clear_bit(0, (&sp->tasklet_status)); - atomic_dec(&sp->isr_cnt); - return IRQ_HANDLED; + } else if (level == LOW) { + tasklet_schedule(&sp->task); } - clear_bit(0, (&sp->tasklet_status)); - } else if (level == LOW) { - tasklet_schedule(&sp->task); + } + else if (fill_rx_buffers(sp, i) == -ENOMEM) { + DBG_PRINT(ERR_DBG, "%s:Out of memory", + dev->name); + DBG_PRINT(ERR_DBG, " in Rx intr!!\n"); + break; } } #endif @@ -4129,7 +4197,7 @@ static void s2io_set_multicast(struct net_device *dev) * as defined in errno.h file on failure. */ -int s2io_set_mac_addr(struct net_device *dev, u8 * addr) +static int s2io_set_mac_addr(struct net_device *dev, u8 * addr) { nic_t *sp = dev->priv; XENA_dev_config_t __iomem *bar0 = sp->bar0; @@ -5044,6 +5112,7 @@ static void s2io_get_ethtool_stats(struct net_device *dev, int i = 0; nic_t *sp = dev->priv; StatInfo_t *stat_info = sp->mac_control.stats_info; + u64 tmp; s2io_updt_stats(sp); tmp_stats[i++] = @@ -5135,6 +5204,16 @@ static void s2io_get_ethtool_stats(struct net_device *dev, tmp_stats[i++] = 0; tmp_stats[i++] = stat_info->sw_stat.single_ecc_errs; tmp_stats[i++] = stat_info->sw_stat.double_ecc_errs; + tmp_stats[i++] = stat_info->sw_stat.clubbed_frms_cnt; + tmp_stats[i++] = stat_info->sw_stat.sending_both; + tmp_stats[i++] = stat_info->sw_stat.outof_sequence_pkts; + tmp_stats[i++] = stat_info->sw_stat.flush_max_pkts; + tmp = 0; + if (stat_info->sw_stat.num_aggregations) { + tmp = stat_info->sw_stat.sum_avg_pkts_aggregated; + do_div(tmp, stat_info->sw_stat.num_aggregations); + } + tmp_stats[i++] = tmp; } static int s2io_ethtool_get_regs_len(struct net_device *dev) @@ -5516,6 +5595,14 @@ static int s2io_card_up(nic_t * sp) /* Setting its receive mode */ s2io_set_multicast(dev); + if (sp->lro) { + /* Initialize max aggregatable pkts based on MTU */ + sp->lro_max_aggr_per_sess = ((1<<16) - 1) / dev->mtu; + /* Check if we can use(if specified) user provided value */ + if (lro_max_pkts < sp->lro_max_aggr_per_sess) + sp->lro_max_aggr_per_sess = lro_max_pkts; + } + /* Enable tasklet for the device */ tasklet_init(&sp->task, s2io_tasklet, (unsigned long) dev); @@ -5608,6 +5695,7 @@ static int rx_osm_handler(ring_info_t *ring_data, RxD_t * rxdp) ((unsigned long) rxdp->Host_Control); int ring_no = ring_data->ring_no; u16 l3_csum, l4_csum; + lro_t *lro; skb->dev = dev; if (rxdp->Control_1 & RXD_T_CODE) { @@ -5656,7 +5744,8 @@ static int rx_osm_handler(ring_info_t *ring_data, RxD_t * rxdp) skb_put(skb, buf2_len); } - if ((rxdp->Control_1 & TCP_OR_UDP_FRAME) && + if ((rxdp->Control_1 & TCP_OR_UDP_FRAME) && ((!sp->lro) || + (sp->lro && (!(rxdp->Control_1 & RXD_FRAME_IP_FRAG)))) && (sp->rx_csum)) { l3_csum = RXD_GET_L3_CKSUM(rxdp->Control_1); l4_csum = RXD_GET_L4_CKSUM(rxdp->Control_1); @@ -5667,6 +5756,54 @@ static int rx_osm_handler(ring_info_t *ring_data, RxD_t * rxdp) * a flag in the RxD. */ skb->ip_summed = CHECKSUM_UNNECESSARY; + if (sp->lro) { + u32 tcp_len; + u8 *tcp; + int ret = 0; + + ret = s2io_club_tcp_session(skb->data, &tcp, + &tcp_len, &lro, rxdp, sp); + switch (ret) { + case 3: /* Begin anew */ + lro->parent = skb; + goto aggregate; + case 1: /* Aggregate */ + { + lro_append_pkt(sp, lro, + skb, tcp_len); + goto aggregate; + } + case 4: /* Flush session */ + { + lro_append_pkt(sp, lro, + skb, tcp_len); + queue_rx_frame(lro->parent); + clear_lro_session(lro); + sp->mac_control.stats_info-> + sw_stat.flush_max_pkts++; + goto aggregate; + } + case 2: /* Flush both */ + lro->parent->data_len = + lro->frags_len; + sp->mac_control.stats_info-> + sw_stat.sending_both++; + queue_rx_frame(lro->parent); + clear_lro_session(lro); + goto send_up; + case 0: /* sessions exceeded */ + case 5: /* + * First pkt in session not + * L3/L4 aggregatable + */ + break; + default: + DBG_PRINT(ERR_DBG, + "%s: Samadhana!!\n", + __FUNCTION__); + BUG(); + } + } } else { /* * Packet with erroneous checksum, let the @@ -5678,25 +5815,31 @@ static int rx_osm_handler(ring_info_t *ring_data, RxD_t * rxdp) skb->ip_summed = CHECKSUM_NONE; } - skb->protocol = eth_type_trans(skb, dev); + if (!sp->lro) { + skb->protocol = eth_type_trans(skb, dev); #ifdef CONFIG_S2IO_NAPI - if (sp->vlgrp && RXD_GET_VLAN_TAG(rxdp->Control_2)) { - /* Queueing the vlan frame to the upper layer */ - vlan_hwaccel_receive_skb(skb, sp->vlgrp, - RXD_GET_VLAN_TAG(rxdp->Control_2)); - } else { - netif_receive_skb(skb); - } + if (sp->vlgrp && RXD_GET_VLAN_TAG(rxdp->Control_2)) { + /* Queueing the vlan frame to the upper layer */ + vlan_hwaccel_receive_skb(skb, sp->vlgrp, + RXD_GET_VLAN_TAG(rxdp->Control_2)); + } else { + netif_receive_skb(skb); + } #else - if (sp->vlgrp && RXD_GET_VLAN_TAG(rxdp->Control_2)) { - /* Queueing the vlan frame to the upper layer */ - vlan_hwaccel_rx(skb, sp->vlgrp, - RXD_GET_VLAN_TAG(rxdp->Control_2)); - } else { - netif_rx(skb); - } + if (sp->vlgrp && RXD_GET_VLAN_TAG(rxdp->Control_2)) { + /* Queueing the vlan frame to the upper layer */ + vlan_hwaccel_rx(skb, sp->vlgrp, + RXD_GET_VLAN_TAG(rxdp->Control_2)); + } else { + netif_rx(skb); + } #endif + } else { +send_up: + queue_rx_frame(skb); + } dev->last_rx = jiffies; +aggregate: atomic_dec(&sp->rx_bufs_left[ring_no]); return SUCCESS; } @@ -5714,7 +5857,7 @@ static int rx_osm_handler(ring_info_t *ring_data, RxD_t * rxdp) * void. */ -void s2io_link(nic_t * sp, int link) +static void s2io_link(nic_t * sp, int link) { struct net_device *dev = (struct net_device *) sp->dev; @@ -5739,7 +5882,7 @@ void s2io_link(nic_t * sp, int link) * returns the revision ID of the device. */ -int get_xena_rev_id(struct pci_dev *pdev) +static int get_xena_rev_id(struct pci_dev *pdev) { u8 id = 0; int ret; @@ -5808,6 +5951,8 @@ module_param(indicate_max_pkts, int, 0); #endif module_param(rxsync_frequency, int, 0); module_param(intr_type, int, 0); +module_param(lro, int, 0); +module_param(lro_max_pkts, int, 0); /** * s2io_init_nic - Initialization of the adapter . @@ -5939,6 +6084,7 @@ Defaulting to INTA\n"); else sp->device_type = XFRAME_I_DEVICE; + sp->lro = lro; /* Initialize some PCI/PCI-X fields of the NIC. */ s2io_init_pci(sp); @@ -6242,6 +6388,10 @@ Defaulting to INTA\n"); DBG_PRINT(ERR_DBG, "%s: 3-Buffer mode support has been " "enabled\n",dev->name); + if (sp->lro) + DBG_PRINT(ERR_DBG, "%s: Large receive offload enabled\n", + dev->name); + /* Initialize device name */ strcpy(sp->name, dev->name); if (sp->device_type & XFRAME_II_DEVICE) @@ -6344,7 +6494,7 @@ int __init s2io_starter(void) * Description: This function is the cleanup routine for the driver. It unregist * ers the driver. */ -void s2io_closer(void) +static void s2io_closer(void) { pci_unregister_driver(&s2io_driver); DBG_PRINT(INIT_DBG, "cleanup done\n"); @@ -6352,3 +6502,318 @@ void s2io_closer(void) module_init(s2io_starter); module_exit(s2io_closer); + +static int check_L2_lro_capable(u8 *buffer, struct iphdr **ip, + struct tcphdr **tcp, RxD_t *rxdp) +{ + int ip_off; + u8 l2_type = (u8)((rxdp->Control_1 >> 37) & 0x7), ip_len; + + if (!(rxdp->Control_1 & RXD_FRAME_PROTO_TCP)) { + DBG_PRINT(INIT_DBG,"%s: Non-TCP frames not supported for LRO\n", + __FUNCTION__); + return -1; + } + + /* TODO: + * By default the VLAN field in the MAC is stripped by the card, if this + * feature is turned off in rx_pa_cfg register, then the ip_off field + * has to be shifted by a further 2 bytes + */ + switch (l2_type) { + case 0: /* DIX type */ + case 4: /* DIX type with VLAN */ + ip_off = HEADER_ETHERNET_II_802_3_SIZE; + break; + /* LLC, SNAP etc are considered non-mergeable */ + default: + return -1; + } + + *ip = (struct iphdr *)((u8 *)buffer + ip_off); + ip_len = (u8)((*ip)->ihl); + ip_len <<= 2; + *tcp = (struct tcphdr *)((unsigned long)*ip + ip_len); + + return 0; +} + +static int check_for_socket_match(lro_t *lro, struct iphdr *ip, + struct tcphdr *tcp) +{ + DBG_PRINT(INFO_DBG,"%s: Been here...\n", __FUNCTION__); + if ((lro->iph->saddr != ip->saddr) || (lro->iph->daddr != ip->daddr) || + (lro->tcph->source != tcp->source) || (lro->tcph->dest != tcp->dest)) + return -1; + return 0; +} + +static inline int get_l4_pyld_length(struct iphdr *ip, struct tcphdr *tcp) +{ + return(ntohs(ip->tot_len) - (ip->ihl << 2) - (tcp->doff << 2)); +} + +static void initiate_new_session(lro_t *lro, u8 *l2h, + struct iphdr *ip, struct tcphdr *tcp, u32 tcp_pyld_len) +{ + DBG_PRINT(INFO_DBG,"%s: Been here...\n", __FUNCTION__); + lro->l2h = l2h; + lro->iph = ip; + lro->tcph = tcp; + lro->tcp_next_seq = tcp_pyld_len + ntohl(tcp->seq); + lro->tcp_ack = ntohl(tcp->ack_seq); + lro->sg_num = 1; + lro->total_len = ntohs(ip->tot_len); + lro->frags_len = 0; + /* + * check if we saw TCP timestamp. Other consistency checks have + * already been done. + */ + if (tcp->doff == 8) { + u32 *ptr; + ptr = (u32 *)(tcp+1); + lro->saw_ts = 1; + lro->cur_tsval = *(ptr+1); + lro->cur_tsecr = *(ptr+2); + } + lro->in_use = 1; +} + +static void update_L3L4_header(nic_t *sp, lro_t *lro) +{ + struct iphdr *ip = lro->iph; + struct tcphdr *tcp = lro->tcph; + u16 nchk; + StatInfo_t *statinfo = sp->mac_control.stats_info; + DBG_PRINT(INFO_DBG,"%s: Been here...\n", __FUNCTION__); + + /* Update L3 header */ + ip->tot_len = htons(lro->total_len); + ip->check = 0; + nchk = ip_fast_csum((u8 *)lro->iph, ip->ihl); + ip->check = nchk; + + /* Update L4 header */ + tcp->ack_seq = lro->tcp_ack; + tcp->window = lro->window; + + /* Update tsecr field if this session has timestamps enabled */ + if (lro->saw_ts) { + u32 *ptr = (u32 *)(tcp + 1); + *(ptr+2) = lro->cur_tsecr; + } + + /* Update counters required for calculation of + * average no. of packets aggregated. + */ + statinfo->sw_stat.sum_avg_pkts_aggregated += lro->sg_num; + statinfo->sw_stat.num_aggregations++; +} + +static void aggregate_new_rx(lro_t *lro, struct iphdr *ip, + struct tcphdr *tcp, u32 l4_pyld) +{ + DBG_PRINT(INFO_DBG,"%s: Been here...\n", __FUNCTION__); + lro->total_len += l4_pyld; + lro->frags_len += l4_pyld; + lro->tcp_next_seq += l4_pyld; + lro->sg_num++; + + /* Update ack seq no. and window ad(from this pkt) in LRO object */ + lro->tcp_ack = tcp->ack_seq; + lro->window = tcp->window; + + if (lro->saw_ts) { + u32 *ptr; + /* Update tsecr and tsval from this packet */ + ptr = (u32 *) (tcp + 1); + lro->cur_tsval = *(ptr + 1); + lro->cur_tsecr = *(ptr + 2); + } +} + +static int verify_l3_l4_lro_capable(lro_t *l_lro, struct iphdr *ip, + struct tcphdr *tcp, u32 tcp_pyld_len) +{ + u8 *ptr; + + DBG_PRINT(INFO_DBG,"%s: Been here...\n", __FUNCTION__); + + if (!tcp_pyld_len) { + /* Runt frame or a pure ack */ + return -1; + } + + if (ip->ihl != 5) /* IP has options */ + return -1; + + if (tcp->urg || tcp->psh || tcp->rst || tcp->syn || tcp->fin || + !tcp->ack) { + /* + * Currently recognize only the ack control word and + * any other control field being set would result in + * flushing the LRO session + */ + return -1; + } + + /* + * Allow only one TCP timestamp option. Don't aggregate if + * any other options are detected. + */ + if (tcp->doff != 5 && tcp->doff != 8) + return -1; + + if (tcp->doff == 8) { + ptr = (u8 *)(tcp + 1); + while (*ptr == TCPOPT_NOP) + ptr++; + if (*ptr != TCPOPT_TIMESTAMP || *(ptr+1) != TCPOLEN_TIMESTAMP) + return -1; + + /* Ensure timestamp value increases monotonically */ + if (l_lro) + if (l_lro->cur_tsval > *((u32 *)(ptr+2))) + return -1; + + /* timestamp echo reply should be non-zero */ + if (*((u32 *)(ptr+6)) == 0) + return -1; + } + + return 0; +} + +static int +s2io_club_tcp_session(u8 *buffer, u8 **tcp, u32 *tcp_len, lro_t **lro, + RxD_t *rxdp, nic_t *sp) +{ + struct iphdr *ip; + struct tcphdr *tcph; + int ret = 0, i; + + if (!(ret = check_L2_lro_capable(buffer, &ip, (struct tcphdr **)tcp, + rxdp))) { + DBG_PRINT(INFO_DBG,"IP Saddr: %x Daddr: %x\n", + ip->saddr, ip->daddr); + } else { + return ret; + } + + tcph = (struct tcphdr *)*tcp; + *tcp_len = get_l4_pyld_length(ip, tcph); + for (i=0; i<MAX_LRO_SESSIONS; i++) { + lro_t *l_lro = &sp->lro0_n[i]; + if (l_lro->in_use) { + if (check_for_socket_match(l_lro, ip, tcph)) + continue; + /* Sock pair matched */ + *lro = l_lro; + + if ((*lro)->tcp_next_seq != ntohl(tcph->seq)) { + DBG_PRINT(INFO_DBG, "%s:Out of order. expected " + "0x%x, actual 0x%x\n", __FUNCTION__, + (*lro)->tcp_next_seq, + ntohl(tcph->seq)); + + sp->mac_control.stats_info-> + sw_stat.outof_sequence_pkts++; + ret = 2; + break; + } + + if (!verify_l3_l4_lro_capable(l_lro, ip, tcph,*tcp_len)) + ret = 1; /* Aggregate */ + else + ret = 2; /* Flush both */ + break; + } + } + + if (ret == 0) { + /* Before searching for available LRO objects, + * check if the pkt is L3/L4 aggregatable. If not + * don't create new LRO session. Just send this + * packet up. + */ + if (verify_l3_l4_lro_capable(NULL, ip, tcph, *tcp_len)) { + return 5; + } + + for (i=0; i<MAX_LRO_SESSIONS; i++) { + lro_t *l_lro = &sp->lro0_n[i]; + if (!(l_lro->in_use)) { + *lro = l_lro; + ret = 3; /* Begin anew */ + break; + } + } + } + + if (ret == 0) { /* sessions exceeded */ + DBG_PRINT(INFO_DBG,"%s:All LRO sessions already in use\n", + __FUNCTION__); + *lro = NULL; + return ret; + } + + switch (ret) { + case 3: + initiate_new_session(*lro, buffer, ip, tcph, *tcp_len); + break; + case 2: + update_L3L4_header(sp, *lro); + break; + case 1: + aggregate_new_rx(*lro, ip, tcph, *tcp_len); + if ((*lro)->sg_num == sp->lro_max_aggr_per_sess) { + update_L3L4_header(sp, *lro); + ret = 4; /* Flush the LRO */ + } + break; + default: + DBG_PRINT(ERR_DBG,"%s:Dont know, can't say!!\n", + __FUNCTION__); + break; + } + + return ret; +} + +static void clear_lro_session(lro_t *lro) +{ + static u16 lro_struct_size = sizeof(lro_t); + + memset(lro, 0, lro_struct_size); +} + +static void queue_rx_frame(struct sk_buff *skb) +{ + struct net_device *dev = skb->dev; + + skb->protocol = eth_type_trans(skb, dev); +#ifdef CONFIG_S2IO_NAPI + netif_receive_skb(skb); +#else + netif_rx(skb); +#endif +} + +static void lro_append_pkt(nic_t *sp, lro_t *lro, struct sk_buff *skb, + u32 tcp_len) +{ + struct sk_buff *tmp, *first = lro->parent; + + first->len += tcp_len; + first->data_len = lro->frags_len; + skb_pull(skb, (skb->len - tcp_len)); + if ((tmp = skb_shinfo(first)->frag_list)) { + while (tmp->next) + tmp = tmp->next; + tmp->next = skb; + } + else + skb_shinfo(first)->frag_list = skb; + sp->mac_control.stats_info->sw_stat.clubbed_frms_cnt++; + return; +} |