diff options
Diffstat (limited to 'drivers/staging/octeon/ethernet-tx.c')
-rw-r--r-- | drivers/staging/octeon/ethernet-tx.c | 441 |
1 files changed, 260 insertions, 181 deletions
diff --git a/drivers/staging/octeon/ethernet-tx.c b/drivers/staging/octeon/ethernet-tx.c index 535294105f6..afc2b734d55 100644 --- a/drivers/staging/octeon/ethernet-tx.c +++ b/drivers/staging/octeon/ethernet-tx.c @@ -4,7 +4,7 @@ * Contact: support@caviumnetworks.com * This file is part of the OCTEON SDK * - * Copyright (c) 2003-2007 Cavium Networks + * Copyright (c) 2003-2010 Cavium Networks * * This file is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License, Version 2, as @@ -31,10 +31,6 @@ #include <linux/etherdevice.h> #include <linux/ip.h> #include <linux/string.h> -#include <linux/ethtool.h> -#include <linux/mii.h> -#include <linux/seq_file.h> -#include <linux/proc_fs.h> #include <net/dst.h> #ifdef CONFIG_XFRM #include <linux/xfrm.h> @@ -52,11 +48,14 @@ #include "cvmx-wqe.h" #include "cvmx-fau.h" +#include "cvmx-pip.h" #include "cvmx-pko.h" #include "cvmx-helper.h" #include "cvmx-gmxx-defs.h" +#define CVM_OCT_SKB_CB(skb) ((u64 *)((skb)->cb)) + /* * You can define GET_SKBUFF_QOS() to override how the skbuff output * function determines which output queue is used. The default @@ -68,12 +67,81 @@ #define GET_SKBUFF_QOS(skb) 0 #endif +static void cvm_oct_tx_do_cleanup(unsigned long arg); +static DECLARE_TASKLET(cvm_oct_tx_cleanup_tasklet, cvm_oct_tx_do_cleanup, 0); + +/* Maximum number of SKBs to try to free per xmit packet. */ +#define MAX_SKB_TO_FREE (MAX_OUT_QUEUE_DEPTH * 2) + +static inline int32_t cvm_oct_adjust_skb_to_free(int32_t skb_to_free, int fau) +{ + int32_t undo; + undo = skb_to_free > 0 ? MAX_SKB_TO_FREE : skb_to_free + MAX_SKB_TO_FREE; + if (undo > 0) + cvmx_fau_atomic_add32(fau, -undo); + skb_to_free = -skb_to_free > MAX_SKB_TO_FREE ? MAX_SKB_TO_FREE : -skb_to_free; + return skb_to_free; +} + +static void cvm_oct_kick_tx_poll_watchdog(void) +{ + union cvmx_ciu_timx ciu_timx; + ciu_timx.u64 = 0; + ciu_timx.s.one_shot = 1; + ciu_timx.s.len = cvm_oct_tx_poll_interval; + cvmx_write_csr(CVMX_CIU_TIMX(1), ciu_timx.u64); +} + +void cvm_oct_free_tx_skbs(struct net_device *dev) +{ + int32_t skb_to_free; + int qos, queues_per_port; + int total_freed = 0; + int total_remaining = 0; + unsigned long flags; + struct octeon_ethernet *priv = netdev_priv(dev); + + queues_per_port = cvmx_pko_get_num_queues(priv->port); + /* Drain any pending packets in the free list */ + for (qos = 0; qos < queues_per_port; qos++) { + if (skb_queue_len(&priv->tx_free_list[qos]) == 0) + continue; + skb_to_free = cvmx_fau_fetch_and_add32(priv->fau+qos*4, MAX_SKB_TO_FREE); + skb_to_free = cvm_oct_adjust_skb_to_free(skb_to_free, priv->fau+qos*4); + + + total_freed += skb_to_free; + if (skb_to_free > 0) { + struct sk_buff *to_free_list = NULL; + spin_lock_irqsave(&priv->tx_free_list[qos].lock, flags); + while (skb_to_free > 0) { + struct sk_buff *t = __skb_dequeue(&priv->tx_free_list[qos]); + t->next = to_free_list; + to_free_list = t; + skb_to_free--; + } + spin_unlock_irqrestore(&priv->tx_free_list[qos].lock, flags); + /* Do the actual freeing outside of the lock. */ + while (to_free_list) { + struct sk_buff *t = to_free_list; + to_free_list = to_free_list->next; + dev_kfree_skb_any(t); + } + } + total_remaining += skb_queue_len(&priv->tx_free_list[qos]); + } + if (total_freed >= 0 && netif_queue_stopped(dev)) + netif_wake_queue(dev); + if (total_remaining) + cvm_oct_kick_tx_poll_watchdog(); +} + /** - * Packet transmit - * + * cvm_oct_xmit - transmit a packet * @skb: Packet to send * @dev: Device info structure - * Returns Always returns zero + * + * Returns Always returns NETDEV_TX_OK */ int cvm_oct_xmit(struct sk_buff *skb, struct net_device *dev) { @@ -81,13 +149,15 @@ int cvm_oct_xmit(struct sk_buff *skb, struct net_device *dev) union cvmx_buf_ptr hw_buffer; uint64_t old_scratch; uint64_t old_scratch2; - int dropped; int qos; - int queue_it_up; + int i; + enum {QUEUE_CORE, QUEUE_HW, QUEUE_DROP} queue_type; struct octeon_ethernet *priv = netdev_priv(dev); + struct sk_buff *to_free_list; int32_t skb_to_free; - int32_t undo; int32_t buffers_to_free; + u32 total_to_clean; + unsigned long flags; #if REUSE_SKBUFFS_WITHOUT_FREE unsigned char *fpa_head; #endif @@ -98,9 +168,6 @@ int cvm_oct_xmit(struct sk_buff *skb, struct net_device *dev) */ prefetch(priv); - /* Start off assuming no drop */ - dropped = 0; - /* * The check on CVMX_PKO_QUEUES_PER_PORT_* is designed to * completely remove "qos" in the event neither interface @@ -135,6 +202,28 @@ int cvm_oct_xmit(struct sk_buff *skb, struct net_device *dev) } /* + * We have space for 6 segment pointers, If there will be more + * than that, we must linearize. + */ + if (unlikely(skb_shinfo(skb)->nr_frags > 5)) { + if (unlikely(__skb_linearize(skb))) { + queue_type = QUEUE_DROP; + if (USE_ASYNC_IOBDMA) { + /* Get the number of skbuffs in use by the hardware */ + CVMX_SYNCIOBDMA; + skb_to_free = cvmx_scratch_read64(CVMX_SCR_SCRATCH); + } else { + /* Get the number of skbuffs in use by the hardware */ + skb_to_free = cvmx_fau_fetch_and_add32(priv->fau + qos * 4, + MAX_SKB_TO_FREE); + } + skb_to_free = cvm_oct_adjust_skb_to_free(skb_to_free, priv->fau + qos * 4); + spin_lock_irqsave(&priv->tx_free_list[qos].lock, flags); + goto skip_xmit; + } + } + + /* * The CN3XXX series of parts has an errata (GMX-401) which * causes the GMX block to hang if a collision occurs towards * the end of a <68 byte packet. As a workaround for this, we @@ -162,13 +251,6 @@ int cvm_oct_xmit(struct sk_buff *skb, struct net_device *dev) } } - /* Build the PKO buffer pointer */ - hw_buffer.u64 = 0; - hw_buffer.s.addr = cvmx_ptr_to_phys(skb->data); - hw_buffer.s.pool = 0; - hw_buffer.s.size = - (unsigned long)skb_end_pointer(skb) - (unsigned long)skb->head; - /* Build the PKO command */ pko_command.u64 = 0; pko_command.s.n2 = 1; /* Don't pollute L2 with the outgoing packet */ @@ -178,7 +260,31 @@ int cvm_oct_xmit(struct sk_buff *skb, struct net_device *dev) pko_command.s.subone0 = 1; pko_command.s.dontfree = 1; - pko_command.s.reg0 = priv->fau + qos * 4; + + /* Build the PKO buffer pointer */ + hw_buffer.u64 = 0; + if (skb_shinfo(skb)->nr_frags == 0) { + hw_buffer.s.addr = XKPHYS_TO_PHYS((u64)skb->data); + hw_buffer.s.pool = 0; + hw_buffer.s.size = skb->len; + } else { + hw_buffer.s.addr = XKPHYS_TO_PHYS((u64)skb->data); + hw_buffer.s.pool = 0; + hw_buffer.s.size = skb_headlen(skb); + CVM_OCT_SKB_CB(skb)[0] = hw_buffer.u64; + for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { + struct skb_frag_struct *fs = skb_shinfo(skb)->frags + i; + hw_buffer.s.addr = XKPHYS_TO_PHYS((u64)(page_address(fs->page) + fs->page_offset)); + hw_buffer.s.size = fs->size; + CVM_OCT_SKB_CB(skb)[i + 1] = hw_buffer.u64; + } + hw_buffer.s.addr = XKPHYS_TO_PHYS((u64)CVM_OCT_SKB_CB(skb)); + hw_buffer.s.size = skb_shinfo(skb)->nr_frags + 1; + pko_command.s.segs = skb_shinfo(skb)->nr_frags + 1; + pko_command.s.gather = 1; + goto dont_put_skbuff_in_hw; + } + /* * See if we can put this skb in the FPA pool. Any strange * behavior from the Linux networking stack will most likely @@ -190,7 +296,7 @@ int cvm_oct_xmit(struct sk_buff *skb, struct net_device *dev) * shown a 25% increase in performance under some loads. */ #if REUSE_SKBUFFS_WITHOUT_FREE - fpa_head = skb->head + 128 - ((unsigned long)skb->head & 0x7f); + fpa_head = skb->head + 256 - ((unsigned long)skb->head & 0x7f); if (unlikely(skb->data < fpa_head)) { /* * printk("TX buffer beginning can't meet FPA @@ -248,10 +354,9 @@ int cvm_oct_xmit(struct sk_buff *skb, struct net_device *dev) * We can use this buffer in the FPA. We don't need the FAU * update anymore */ - pko_command.s.reg0 = 0; pko_command.s.dontfree = 0; - hw_buffer.s.back = (skb->data - fpa_head) >> 7; + hw_buffer.s.back = ((unsigned long)skb->data >> 7) - ((unsigned long)fpa_head >> 7); *(struct sk_buff **)(fpa_head - sizeof(void *)) = skb; /* @@ -272,16 +377,16 @@ int cvm_oct_xmit(struct sk_buff *skb, struct net_device *dev) skb->tc_verd = 0; #endif /* CONFIG_NET_CLS_ACT */ #endif /* CONFIG_NET_SCHED */ +#endif /* REUSE_SKBUFFS_WITHOUT_FREE */ dont_put_skbuff_in_hw: -#endif /* REUSE_SKBUFFS_WITHOUT_FREE */ /* Check if we can use the hardware checksumming */ if (USE_HW_TCPUDP_CHECKSUM && (skb->protocol == htons(ETH_P_IP)) && (ip_hdr(skb)->version == 4) && (ip_hdr(skb)->ihl == 5) && ((ip_hdr(skb)->frag_off == 0) || (ip_hdr(skb)->frag_off == 1 << 14)) - && ((ip_hdr(skb)->protocol == IP_PROTOCOL_TCP) - || (ip_hdr(skb)->protocol == IP_PROTOCOL_UDP))) { + && ((ip_hdr(skb)->protocol == IPPROTO_TCP) + || (ip_hdr(skb)->protocol == IPPROTO_UDP))) { /* Use hardware checksum calc */ pko_command.s.ipoffp1 = sizeof(struct ethhdr) + 1; } @@ -299,89 +404,116 @@ dont_put_skbuff_in_hw: cvmx_fau_fetch_and_add32(FAU_NUM_PACKET_BUFFERS_TO_FREE, 0); } - /* - * We try to claim MAX_SKB_TO_FREE buffers. If there were not - * that many available, we have to un-claim (undo) any that - * were in excess. If skb_to_free is positive we will free - * that many buffers. - */ - undo = skb_to_free > 0 ? - MAX_SKB_TO_FREE : skb_to_free + MAX_SKB_TO_FREE; - if (undo > 0) - cvmx_fau_atomic_add32(priv->fau+qos*4, -undo); - skb_to_free = -skb_to_free > MAX_SKB_TO_FREE ? - MAX_SKB_TO_FREE : -skb_to_free; + skb_to_free = cvm_oct_adjust_skb_to_free(skb_to_free, priv->fau+qos*4); /* * If we're sending faster than the receive can free them then * don't do the HW free. */ - if ((buffers_to_free < -100) && !pko_command.s.dontfree) { + if ((buffers_to_free < -100) && !pko_command.s.dontfree) pko_command.s.dontfree = 1; - pko_command.s.reg0 = priv->fau + qos * 4; + + if (pko_command.s.dontfree) { + queue_type = QUEUE_CORE; + pko_command.s.reg0 = priv->fau+qos*4; + } else { + queue_type = QUEUE_HW; } + if (USE_ASYNC_IOBDMA) + cvmx_fau_async_fetch_and_add32(CVMX_SCR_SCRATCH, FAU_TOTAL_TX_TO_CLEAN, 1); - cvmx_pko_send_packet_prepare(priv->port, priv->queue + qos, - CVMX_PKO_LOCK_CMD_QUEUE); + spin_lock_irqsave(&priv->tx_free_list[qos].lock, flags); /* Drop this packet if we have too many already queued to the HW */ - if (unlikely - (skb_queue_len(&priv->tx_free_list[qos]) >= MAX_OUT_QUEUE_DEPTH)) { - /* - DEBUGPRINT("%s: Tx dropped. Too many queued\n", dev->name); - */ - dropped = 1; + if (unlikely(skb_queue_len(&priv->tx_free_list[qos]) >= MAX_OUT_QUEUE_DEPTH)) { + if (dev->tx_queue_len != 0) { + /* Drop the lock when notifying the core. */ + spin_unlock_irqrestore(&priv->tx_free_list[qos].lock, flags); + netif_stop_queue(dev); + spin_lock_irqsave(&priv->tx_free_list[qos].lock, flags); + } else { + /* If not using normal queueing. */ + queue_type = QUEUE_DROP; + goto skip_xmit; + } } + + cvmx_pko_send_packet_prepare(priv->port, priv->queue + qos, + CVMX_PKO_LOCK_NONE); + /* Send the packet to the output queue */ - else if (unlikely - (cvmx_pko_send_packet_finish - (priv->port, priv->queue + qos, pko_command, hw_buffer, - CVMX_PKO_LOCK_CMD_QUEUE))) { + if (unlikely(cvmx_pko_send_packet_finish(priv->port, + priv->queue + qos, + pko_command, hw_buffer, + CVMX_PKO_LOCK_NONE))) { DEBUGPRINT("%s: Failed to send the packet\n", dev->name); - dropped = 1; + queue_type = QUEUE_DROP; + } +skip_xmit: + to_free_list = NULL; + + switch (queue_type) { + case QUEUE_DROP: + skb->next = to_free_list; + to_free_list = skb; + priv->stats.tx_dropped++; + break; + case QUEUE_HW: + cvmx_fau_atomic_add32(FAU_NUM_PACKET_BUFFERS_TO_FREE, -1); + break; + case QUEUE_CORE: + __skb_queue_tail(&priv->tx_free_list[qos], skb); + break; + default: + BUG(); + } + + while (skb_to_free > 0) { + struct sk_buff *t = __skb_dequeue(&priv->tx_free_list[qos]); + t->next = to_free_list; + to_free_list = t; + skb_to_free--; + } + + spin_unlock_irqrestore(&priv->tx_free_list[qos].lock, flags); + + /* Do the actual freeing outside of the lock. */ + while (to_free_list) { + struct sk_buff *t = to_free_list; + to_free_list = to_free_list->next; + dev_kfree_skb_any(t); } if (USE_ASYNC_IOBDMA) { + CVMX_SYNCIOBDMA; + total_to_clean = cvmx_scratch_read64(CVMX_SCR_SCRATCH); /* Restore the scratch area */ cvmx_scratch_write64(CVMX_SCR_SCRATCH, old_scratch); cvmx_scratch_write64(CVMX_SCR_SCRATCH + 8, old_scratch2); - } - - queue_it_up = 0; - if (unlikely(dropped)) { - dev_kfree_skb_any(skb); - priv->stats.tx_dropped++; } else { - if (USE_SKBUFFS_IN_HW) { - /* Put this packet on the queue to be freed later */ - if (pko_command.s.dontfree) - queue_it_up = 1; - else - cvmx_fau_atomic_add32 - (FAU_NUM_PACKET_BUFFERS_TO_FREE, -1); - } else { - /* Put this packet on the queue to be freed later */ - queue_it_up = 1; - } + total_to_clean = cvmx_fau_fetch_and_add32(FAU_TOTAL_TX_TO_CLEAN, 1); } - if (queue_it_up) { - spin_lock(&priv->tx_free_list[qos].lock); - __skb_queue_tail(&priv->tx_free_list[qos], skb); - cvm_oct_free_tx_skbs(priv, skb_to_free, qos, 0); - spin_unlock(&priv->tx_free_list[qos].lock); - } else { - cvm_oct_free_tx_skbs(priv, skb_to_free, qos, 1); + if (total_to_clean & 0x3ff) { + /* + * Schedule the cleanup tasklet every 1024 packets for + * the pathological case of high traffic on one port + * delaying clean up of packets on a different port + * that is blocked waiting for the cleanup. + */ + tasklet_schedule(&cvm_oct_tx_cleanup_tasklet); } - return 0; + cvm_oct_kick_tx_poll_watchdog(); + + return NETDEV_TX_OK; } /** - * Packet transmit to the POW - * + * cvm_oct_xmit_pow - transmit a packet to the POW * @skb: Packet to send * @dev: Device info structure + * Returns Always returns zero */ int cvm_oct_xmit_pow(struct sk_buff *skb, struct net_device *dev) @@ -459,8 +591,8 @@ int cvm_oct_xmit_pow(struct sk_buff *skb, struct net_device *dev) work->word2.s.dec_ipcomp = 0; /* FIXME */ #endif work->word2.s.tcp_or_udp = - (ip_hdr(skb)->protocol == IP_PROTOCOL_TCP) - || (ip_hdr(skb)->protocol == IP_PROTOCOL_UDP); + (ip_hdr(skb)->protocol == IPPROTO_TCP) + || (ip_hdr(skb)->protocol == IPPROTO_UDP); #if 0 /* FIXME */ work->word2.s.dec_ipsec = 0; @@ -529,116 +661,63 @@ int cvm_oct_xmit_pow(struct sk_buff *skb, struct net_device *dev) } /** - * Transmit a work queue entry out of the ethernet port. Both - * the work queue entry and the packet data can optionally be - * freed. The work will be freed on error as well. - * - * @dev: Device to transmit out. - * @work_queue_entry: - * Work queue entry to send - * @do_free: True if the work queue entry and packet data should be - * freed. If false, neither will be freed. - * @qos: Index into the queues for this port to transmit on. This - * is used to implement QoS if their are multiple queues per - * port. This parameter must be between 0 and the number of - * queues per port minus 1. Values outside of this range will - * be change to zero. + * cvm_oct_tx_shutdown_dev - free all skb that are currently queued for TX. + * @dev: Device being shutdown * - * Returns Zero on success, negative on failure. */ -int cvm_oct_transmit_qos(struct net_device *dev, void *work_queue_entry, - int do_free, int qos) +void cvm_oct_tx_shutdown_dev(struct net_device *dev) { - unsigned long flags; - union cvmx_buf_ptr hw_buffer; - cvmx_pko_command_word0_t pko_command; - int dropped; struct octeon_ethernet *priv = netdev_priv(dev); - cvmx_wqe_t *work = work_queue_entry; + unsigned long flags; + int qos; - if (!(dev->flags & IFF_UP)) { - DEBUGPRINT("%s: Device not up\n", dev->name); - if (do_free) - cvm_oct_free_work(work); - return -1; + for (qos = 0; qos < 16; qos++) { + spin_lock_irqsave(&priv->tx_free_list[qos].lock, flags); + while (skb_queue_len(&priv->tx_free_list[qos])) + dev_kfree_skb_any(__skb_dequeue + (&priv->tx_free_list[qos])); + spin_unlock_irqrestore(&priv->tx_free_list[qos].lock, flags); } +} - /* The check on CVMX_PKO_QUEUES_PER_PORT_* is designed to completely - remove "qos" in the event neither interface supports - multiple queues per port */ - if ((CVMX_PKO_QUEUES_PER_PORT_INTERFACE0 > 1) || - (CVMX_PKO_QUEUES_PER_PORT_INTERFACE1 > 1)) { - if (qos <= 0) - qos = 0; - else if (qos >= cvmx_pko_get_num_queues(priv->port)) - qos = 0; - } else - qos = 0; - - /* Start off assuming no drop */ - dropped = 0; - - local_irq_save(flags); - cvmx_pko_send_packet_prepare(priv->port, priv->queue + qos, - CVMX_PKO_LOCK_CMD_QUEUE); - - /* Build the PKO buffer pointer */ - hw_buffer.u64 = 0; - hw_buffer.s.addr = work->packet_ptr.s.addr; - hw_buffer.s.pool = CVMX_FPA_PACKET_POOL; - hw_buffer.s.size = CVMX_FPA_PACKET_POOL_SIZE; - hw_buffer.s.back = work->packet_ptr.s.back; +static void cvm_oct_tx_do_cleanup(unsigned long arg) +{ + int port; - /* Build the PKO command */ - pko_command.u64 = 0; - pko_command.s.n2 = 1; /* Don't pollute L2 with the outgoing packet */ - pko_command.s.dontfree = !do_free; - pko_command.s.segs = work->word2.s.bufs; - pko_command.s.total_bytes = work->len; + for (port = 0; port < TOTAL_NUMBER_OF_PORTS; port++) { + if (cvm_oct_device[port]) { + struct net_device *dev = cvm_oct_device[port]; + cvm_oct_free_tx_skbs(dev); + } + } +} - /* Check if we can use the hardware checksumming */ - if (unlikely(work->word2.s.not_IP || work->word2.s.IP_exc)) - pko_command.s.ipoffp1 = 0; - else - pko_command.s.ipoffp1 = sizeof(struct ethhdr) + 1; +static irqreturn_t cvm_oct_tx_cleanup_watchdog(int cpl, void *dev_id) +{ + /* Disable the interrupt. */ + cvmx_write_csr(CVMX_CIU_TIMX(1), 0); + /* Do the work in the tasklet. */ + tasklet_schedule(&cvm_oct_tx_cleanup_tasklet); + return IRQ_HANDLED; +} - /* Send the packet to the output queue */ - if (unlikely - (cvmx_pko_send_packet_finish - (priv->port, priv->queue + qos, pko_command, hw_buffer, - CVMX_PKO_LOCK_CMD_QUEUE))) { - DEBUGPRINT("%s: Failed to send the packet\n", dev->name); - dropped = -1; - } - local_irq_restore(flags); +void cvm_oct_tx_initialize(void) +{ + int i; - if (unlikely(dropped)) { - if (do_free) - cvm_oct_free_work(work); - priv->stats.tx_dropped++; - } else if (do_free) - cvmx_fpa_free(work, CVMX_FPA_WQE_POOL, DONT_WRITEBACK(1)); + /* Disable the interrupt. */ + cvmx_write_csr(CVMX_CIU_TIMX(1), 0); + /* Register an IRQ hander for to receive CIU_TIMX(1) interrupts */ + i = request_irq(OCTEON_IRQ_TIMER1, + cvm_oct_tx_cleanup_watchdog, 0, + "Ethernet", cvm_oct_device); - return dropped; + if (i) + panic("Could not acquire Ethernet IRQ %d\n", OCTEON_IRQ_TIMER1); } -EXPORT_SYMBOL(cvm_oct_transmit_qos); -/** - * This function frees all skb that are currently queued for TX. - * - * @dev: Device being shutdown - */ -void cvm_oct_tx_shutdown(struct net_device *dev) +void cvm_oct_tx_shutdown(void) { - struct octeon_ethernet *priv = netdev_priv(dev); - unsigned long flags; - int qos; - - for (qos = 0; qos < 16; qos++) { - spin_lock_irqsave(&priv->tx_free_list[qos].lock, flags); - while (skb_queue_len(&priv->tx_free_list[qos])) - dev_kfree_skb_any(__skb_dequeue - (&priv->tx_free_list[qos])); - spin_unlock_irqrestore(&priv->tx_free_list[qos].lock, flags); - } + /* Free the interrupt handler */ + free_irq(OCTEON_IRQ_TIMER1, cvm_oct_device); } |