From ea866e6526b8a2ead92875732d41b26fdb470312 Mon Sep 17 00:00:00 2001 From: Santiago Leon Date: Thu, 24 Jul 2008 04:34:23 +1000 Subject: ibmveth: Automatically enable larger rx buffer pools for larger mtu Activates larger rx buffer pools when the MTU is changed to a larger value. This patch de-activates the large rx buffer pools when the MTU changes to a smaller value. Signed-off-by: Santiago Leon Signed-off-by: Robert Jennings Acked-by: Paul Mackerras Signed-off-by: Benjamin Herrenschmidt --- drivers/net/ibmveth.c | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) (limited to 'drivers/net/ibmveth.c') diff --git a/drivers/net/ibmveth.c b/drivers/net/ibmveth.c index 00527805e4f..007ca8735a9 100644 --- a/drivers/net/ibmveth.c +++ b/drivers/net/ibmveth.c @@ -1054,7 +1054,6 @@ static int ibmveth_change_mtu(struct net_device *dev, int new_mtu) { struct ibmveth_adapter *adapter = dev->priv; int new_mtu_oh = new_mtu + IBMVETH_BUFF_OH; - int reinit = 0; int i, rc; if (new_mtu < IBMVETH_MAX_MTU) @@ -1067,15 +1066,21 @@ static int ibmveth_change_mtu(struct net_device *dev, int new_mtu) if (i == IbmVethNumBufferPools) return -EINVAL; + /* Deactivate all the buffer pools so that the next loop can activate + only the buffer pools necessary to hold the new MTU */ + for (i = 0; i < IbmVethNumBufferPools; i++) + if (adapter->rx_buff_pool[i].active) { + ibmveth_free_buffer_pool(adapter, + &adapter->rx_buff_pool[i]); + adapter->rx_buff_pool[i].active = 0; + } + /* Look for an active buffer pool that can hold the new MTU */ for(i = 0; irx_buff_pool[i].active) { - adapter->rx_buff_pool[i].active = 1; - reinit = 1; - } + adapter->rx_buff_pool[i].active = 1; if (new_mtu_oh < adapter->rx_buff_pool[i].buff_size) { - if (reinit && netif_running(adapter->netdev)) { + if (netif_running(adapter->netdev)) { adapter->pool_config = 1; ibmveth_close(adapter->netdev); adapter->pool_config = 0; @@ -1402,14 +1407,15 @@ const char * buf, size_t count) return -EPERM; } - pool->active = 0; if (netif_running(netdev)) { adapter->pool_config = 1; ibmveth_close(netdev); + pool->active = 0; adapter->pool_config = 0; if ((rc = ibmveth_open(netdev))) return rc; } + pool->active = 0; } } else if (attr == &veth_num_attr) { if (value <= 0 || value > IBMVETH_MAX_POOL_COUNT) -- cgit v1.2.3 From 1096d63d8e7d226630706e15648705d0187787e4 Mon Sep 17 00:00:00 2001 From: Robert Jennings Date: Thu, 24 Jul 2008 04:34:52 +1000 Subject: ibmveth: enable driver for CMO Enable ibmveth for Cooperative Memory Overcommitment (CMO). For this driver it means calculating a desired amount of IO memory based on the current MTU and updating this value with the bus when MTU changes occur. Because DMA mappings can fail, we have added a bounce buffer for temporary cases where the driver can not map IO memory for the buffer pool. The following changes are made to enable the driver for CMO: * DMA mapping errors will not result in error messages if entitlement has been exceeded and resources were not available. * DMA mapping errors are handled gracefully, ibmveth_replenish_buffer_pool() is corrected to check the return from dma_map_single and fail gracefully. * The driver will have a get_desired_dma function defined to function in a CMO environment. * When the MTU is changed, the driver will update the device IO entitlement Signed-off-by: Robert Jennings Signed-off-by: Brian King Signed-off-by: Santiago Leon Acked-by: Paul Mackerras Signed-off-by: Benjamin Herrenschmidt --- drivers/net/ibmveth.c | 169 ++++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 135 insertions(+), 34 deletions(-) (limited to 'drivers/net/ibmveth.c') diff --git a/drivers/net/ibmveth.c b/drivers/net/ibmveth.c index 007ca8735a9..e5a6e2e8454 100644 --- a/drivers/net/ibmveth.c +++ b/drivers/net/ibmveth.c @@ -33,6 +33,7 @@ */ #include +#include #include #include #include @@ -52,7 +53,9 @@ #include #include #include +#include #include +#include #include #include "ibmveth.h" @@ -94,8 +97,10 @@ static void ibmveth_proc_register_adapter(struct ibmveth_adapter *adapter); static void ibmveth_proc_unregister_adapter(struct ibmveth_adapter *adapter); static irqreturn_t ibmveth_interrupt(int irq, void *dev_instance); static void ibmveth_rxq_harvest_buffer(struct ibmveth_adapter *adapter); +static unsigned long ibmveth_get_desired_dma(struct vio_dev *vdev); static struct kobj_type ktype_veth_pool; + #ifdef CONFIG_PROC_FS #define IBMVETH_PROC_DIR "ibmveth" static struct proc_dir_entry *ibmveth_proc_dir; @@ -226,16 +231,16 @@ static void ibmveth_replenish_buffer_pool(struct ibmveth_adapter *adapter, struc u32 i; u32 count = pool->size - atomic_read(&pool->available); u32 buffers_added = 0; + struct sk_buff *skb; + unsigned int free_index, index; + u64 correlator; + unsigned long lpar_rc; + dma_addr_t dma_addr; mb(); for(i = 0; i < count; ++i) { - struct sk_buff *skb; - unsigned int free_index, index; - u64 correlator; union ibmveth_buf_desc desc; - unsigned long lpar_rc; - dma_addr_t dma_addr; skb = alloc_skb(pool->buff_size, GFP_ATOMIC); @@ -255,6 +260,9 @@ static void ibmveth_replenish_buffer_pool(struct ibmveth_adapter *adapter, struc dma_addr = dma_map_single(&adapter->vdev->dev, skb->data, pool->buff_size, DMA_FROM_DEVICE); + if (dma_mapping_error(dma_addr)) + goto failure; + pool->free_map[free_index] = IBM_VETH_INVALID_MAP; pool->dma_addr[index] = dma_addr; pool->skbuff[index] = skb; @@ -267,25 +275,32 @@ static void ibmveth_replenish_buffer_pool(struct ibmveth_adapter *adapter, struc lpar_rc = h_add_logical_lan_buffer(adapter->vdev->unit_address, desc.desc); - if(lpar_rc != H_SUCCESS) { - pool->free_map[free_index] = index; - pool->skbuff[index] = NULL; - if (pool->consumer_index == 0) - pool->consumer_index = pool->size - 1; - else - pool->consumer_index--; - dma_unmap_single(&adapter->vdev->dev, - pool->dma_addr[index], pool->buff_size, - DMA_FROM_DEVICE); - dev_kfree_skb_any(skb); - adapter->replenish_add_buff_failure++; - break; - } else { + if (lpar_rc != H_SUCCESS) + goto failure; + else { buffers_added++; adapter->replenish_add_buff_success++; } } + mb(); + atomic_add(buffers_added, &(pool->available)); + return; + +failure: + pool->free_map[free_index] = index; + pool->skbuff[index] = NULL; + if (pool->consumer_index == 0) + pool->consumer_index = pool->size - 1; + else + pool->consumer_index--; + if (!dma_mapping_error(dma_addr)) + dma_unmap_single(&adapter->vdev->dev, + pool->dma_addr[index], pool->buff_size, + DMA_FROM_DEVICE); + dev_kfree_skb_any(skb); + adapter->replenish_add_buff_failure++; + mb(); atomic_add(buffers_added, &(pool->available)); } @@ -297,7 +312,7 @@ static void ibmveth_replenish_task(struct ibmveth_adapter *adapter) adapter->replenish_task_cycles++; - for(i = 0; i < IbmVethNumBufferPools; i++) + for (i = (IbmVethNumBufferPools - 1); i >= 0; i--) if(adapter->rx_buff_pool[i].active) ibmveth_replenish_buffer_pool(adapter, &adapter->rx_buff_pool[i]); @@ -472,6 +487,18 @@ static void ibmveth_cleanup(struct ibmveth_adapter *adapter) if (adapter->rx_buff_pool[i].active) ibmveth_free_buffer_pool(adapter, &adapter->rx_buff_pool[i]); + + if (adapter->bounce_buffer != NULL) { + if (!dma_mapping_error(adapter->bounce_buffer_dma)) { + dma_unmap_single(&adapter->vdev->dev, + adapter->bounce_buffer_dma, + adapter->netdev->mtu + IBMVETH_BUFF_OH, + DMA_BIDIRECTIONAL); + adapter->bounce_buffer_dma = DMA_ERROR_CODE; + } + kfree(adapter->bounce_buffer); + adapter->bounce_buffer = NULL; + } } static int ibmveth_register_logical_lan(struct ibmveth_adapter *adapter, @@ -607,6 +634,24 @@ static int ibmveth_open(struct net_device *netdev) return rc; } + adapter->bounce_buffer = + kmalloc(netdev->mtu + IBMVETH_BUFF_OH, GFP_KERNEL); + if (!adapter->bounce_buffer) { + ibmveth_error_printk("unable to allocate bounce buffer\n"); + ibmveth_cleanup(adapter); + napi_disable(&adapter->napi); + return -ENOMEM; + } + adapter->bounce_buffer_dma = + dma_map_single(&adapter->vdev->dev, adapter->bounce_buffer, + netdev->mtu + IBMVETH_BUFF_OH, DMA_BIDIRECTIONAL); + if (dma_mapping_error(adapter->bounce_buffer_dma)) { + ibmveth_error_printk("unable to map bounce buffer\n"); + ibmveth_cleanup(adapter); + napi_disable(&adapter->napi); + return -ENOMEM; + } + ibmveth_debug_printk("initial replenish cycle\n"); ibmveth_interrupt(netdev->irq, netdev); @@ -853,10 +898,12 @@ static int ibmveth_start_xmit(struct sk_buff *skb, struct net_device *netdev) unsigned int tx_packets = 0; unsigned int tx_send_failed = 0; unsigned int tx_map_failed = 0; + int used_bounce = 0; + unsigned long data_dma_addr; desc.fields.flags_len = IBMVETH_BUF_VALID | skb->len; - desc.fields.address = dma_map_single(&adapter->vdev->dev, skb->data, - skb->len, DMA_TO_DEVICE); + data_dma_addr = dma_map_single(&adapter->vdev->dev, skb->data, + skb->len, DMA_TO_DEVICE); if (skb->ip_summed == CHECKSUM_PARTIAL && ip_hdr(skb)->protocol != IPPROTO_TCP && skb_checksum_help(skb)) { @@ -875,12 +922,16 @@ static int ibmveth_start_xmit(struct sk_buff *skb, struct net_device *netdev) buf[1] = 0; } - if (dma_mapping_error(desc.fields.address)) { - ibmveth_error_printk("tx: unable to map xmit buffer\n"); + if (dma_mapping_error(data_dma_addr)) { + if (!firmware_has_feature(FW_FEATURE_CMO)) + ibmveth_error_printk("tx: unable to map xmit buffer\n"); + skb_copy_from_linear_data(skb, adapter->bounce_buffer, + skb->len); + desc.fields.address = adapter->bounce_buffer_dma; tx_map_failed++; - tx_dropped++; - goto out; - } + used_bounce = 1; + } else + desc.fields.address = data_dma_addr; /* send the frame. Arbitrarily set retrycount to 1024 */ correlator = 0; @@ -904,8 +955,9 @@ static int ibmveth_start_xmit(struct sk_buff *skb, struct net_device *netdev) netdev->trans_start = jiffies; } - dma_unmap_single(&adapter->vdev->dev, desc.fields.address, - skb->len, DMA_TO_DEVICE); + if (!used_bounce) + dma_unmap_single(&adapter->vdev->dev, data_dma_addr, + skb->len, DMA_TO_DEVICE); out: spin_lock_irqsave(&adapter->stats_lock, flags); netdev->stats.tx_dropped += tx_dropped; @@ -1053,8 +1105,9 @@ static void ibmveth_set_multicast_list(struct net_device *netdev) static int ibmveth_change_mtu(struct net_device *dev, int new_mtu) { struct ibmveth_adapter *adapter = dev->priv; + struct vio_dev *viodev = adapter->vdev; int new_mtu_oh = new_mtu + IBMVETH_BUFF_OH; - int i, rc; + int i; if (new_mtu < IBMVETH_MAX_MTU) return -EINVAL; @@ -1085,10 +1138,15 @@ static int ibmveth_change_mtu(struct net_device *dev, int new_mtu) ibmveth_close(adapter->netdev); adapter->pool_config = 0; dev->mtu = new_mtu; - if ((rc = ibmveth_open(adapter->netdev))) - return rc; - } else - dev->mtu = new_mtu; + vio_cmo_set_dev_desired(viodev, + ibmveth_get_desired_dma + (viodev)); + return ibmveth_open(adapter->netdev); + } + dev->mtu = new_mtu; + vio_cmo_set_dev_desired(viodev, + ibmveth_get_desired_dma + (viodev)); return 0; } } @@ -1103,6 +1161,46 @@ static void ibmveth_poll_controller(struct net_device *dev) } #endif +/** + * ibmveth_get_desired_dma - Calculate IO memory desired by the driver + * + * @vdev: struct vio_dev for the device whose desired IO mem is to be returned + * + * Return value: + * Number of bytes of IO data the driver will need to perform well. + */ +static unsigned long ibmveth_get_desired_dma(struct vio_dev *vdev) +{ + struct net_device *netdev = dev_get_drvdata(&vdev->dev); + struct ibmveth_adapter *adapter; + unsigned long ret; + int i; + int rxqentries = 1; + + /* netdev inits at probe time along with the structures we need below*/ + if (netdev == NULL) + return IOMMU_PAGE_ALIGN(IBMVETH_IO_ENTITLEMENT_DEFAULT); + + adapter = netdev_priv(netdev); + + ret = IBMVETH_BUFF_LIST_SIZE + IBMVETH_FILT_LIST_SIZE; + ret += IOMMU_PAGE_ALIGN(netdev->mtu); + + for (i = 0; i < IbmVethNumBufferPools; i++) { + /* add the size of the active receive buffers */ + if (adapter->rx_buff_pool[i].active) + ret += + adapter->rx_buff_pool[i].size * + IOMMU_PAGE_ALIGN(adapter->rx_buff_pool[i]. + buff_size); + rxqentries += adapter->rx_buff_pool[i].size; + } + /* add the size of the receive queue entries */ + ret += IOMMU_PAGE_ALIGN(rxqentries * sizeof(struct ibmveth_rx_q_entry)); + + return ret; +} + static int __devinit ibmveth_probe(struct vio_dev *dev, const struct vio_device_id *id) { int rc, i; @@ -1247,6 +1345,8 @@ static int __devexit ibmveth_remove(struct vio_dev *dev) ibmveth_proc_unregister_adapter(adapter); free_netdev(netdev); + dev_set_drvdata(&dev->dev, NULL); + return 0; } @@ -1491,6 +1591,7 @@ static struct vio_driver ibmveth_driver = { .id_table = ibmveth_device_table, .probe = ibmveth_probe, .remove = ibmveth_remove, + .get_desired_dma = ibmveth_get_desired_dma, .driver = { .name = ibmveth_driver_name, .owner = THIS_MODULE, -- cgit v1.2.3 From 8d8bb39b9eba32dd70e87fd5ad5c5dd4ba118e06 Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Fri, 25 Jul 2008 19:44:49 -0700 Subject: dma-mapping: add the device argument to dma_mapping_error() Add per-device dma_mapping_ops support for CONFIG_X86_64 as POWER architecture does: This enables us to cleanly fix the Calgary IOMMU issue that some devices are not behind the IOMMU (http://lkml.org/lkml/2008/5/8/423). I think that per-device dma_mapping_ops support would be also helpful for KVM people to support PCI passthrough but Andi thinks that this makes it difficult to support the PCI passthrough (see the above thread). So I CC'ed this to KVM camp. Comments are appreciated. A pointer to dma_mapping_ops to struct dev_archdata is added. If the pointer is non NULL, DMA operations in asm/dma-mapping.h use it. If it's NULL, the system-wide dma_ops pointer is used as before. If it's useful for KVM people, I plan to implement a mechanism to register a hook called when a new pci (or dma capable) device is created (it works with hot plugging). It enables IOMMUs to set up an appropriate dma_mapping_ops per device. The major obstacle is that dma_mapping_error doesn't take a pointer to the device unlike other DMA operations. So x86 can't have dma_mapping_ops per device. Note all the POWER IOMMUs use the same dma_mapping_error function so this is not a problem for POWER but x86 IOMMUs use different dma_mapping_error functions. The first patch adds the device argument to dma_mapping_error. The patch is trivial but large since it touches lots of drivers and dma-mapping.h in all the architecture. This patch: dma_mapping_error() doesn't take a pointer to the device unlike other DMA operations. So we can't have dma_mapping_ops per device. Note that POWER already has dma_mapping_ops per device but all the POWER IOMMUs use the same dma_mapping_error function. x86 IOMMUs use device argument. [akpm@linux-foundation.org: fix sge] [akpm@linux-foundation.org: fix svc_rdma] [akpm@linux-foundation.org: build fix] [akpm@linux-foundation.org: fix bnx2x] [akpm@linux-foundation.org: fix s2io] [akpm@linux-foundation.org: fix pasemi_mac] [akpm@linux-foundation.org: fix sdhci] [akpm@linux-foundation.org: build fix] [akpm@linux-foundation.org: fix sparc] [akpm@linux-foundation.org: fix ibmvscsi] Signed-off-by: FUJITA Tomonori Cc: Muli Ben-Yehuda Cc: Andi Kleen Cc: Thomas Gleixner Cc: Ingo Molnar Cc: Avi Kivity Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/net/ibmveth.c | 38 ++++++++++++++++++++------------------ 1 file changed, 20 insertions(+), 18 deletions(-) (limited to 'drivers/net/ibmveth.c') diff --git a/drivers/net/ibmveth.c b/drivers/net/ibmveth.c index e5a6e2e8454..91ec9fdc718 100644 --- a/drivers/net/ibmveth.c +++ b/drivers/net/ibmveth.c @@ -260,7 +260,7 @@ static void ibmveth_replenish_buffer_pool(struct ibmveth_adapter *adapter, struc dma_addr = dma_map_single(&adapter->vdev->dev, skb->data, pool->buff_size, DMA_FROM_DEVICE); - if (dma_mapping_error(dma_addr)) + if (dma_mapping_error((&adapter->vdev->dev, dma_addr)) goto failure; pool->free_map[free_index] = IBM_VETH_INVALID_MAP; @@ -294,7 +294,7 @@ failure: pool->consumer_index = pool->size - 1; else pool->consumer_index--; - if (!dma_mapping_error(dma_addr)) + if (!dma_mapping_error((&adapter->vdev->dev, dma_addr)) dma_unmap_single(&adapter->vdev->dev, pool->dma_addr[index], pool->buff_size, DMA_FROM_DEVICE); @@ -448,11 +448,11 @@ static void ibmveth_rxq_harvest_buffer(struct ibmveth_adapter *adapter) static void ibmveth_cleanup(struct ibmveth_adapter *adapter) { int i; + struct device *dev = &adapter->vdev->dev; if(adapter->buffer_list_addr != NULL) { - if(!dma_mapping_error(adapter->buffer_list_dma)) { - dma_unmap_single(&adapter->vdev->dev, - adapter->buffer_list_dma, 4096, + if (!dma_mapping_error(dev, adapter->buffer_list_dma)) { + dma_unmap_single(dev, adapter->buffer_list_dma, 4096, DMA_BIDIRECTIONAL); adapter->buffer_list_dma = DMA_ERROR_CODE; } @@ -461,9 +461,8 @@ static void ibmveth_cleanup(struct ibmveth_adapter *adapter) } if(adapter->filter_list_addr != NULL) { - if(!dma_mapping_error(adapter->filter_list_dma)) { - dma_unmap_single(&adapter->vdev->dev, - adapter->filter_list_dma, 4096, + if (!dma_mapping_error(dev, adapter->filter_list_dma)) { + dma_unmap_single(dev, adapter->filter_list_dma, 4096, DMA_BIDIRECTIONAL); adapter->filter_list_dma = DMA_ERROR_CODE; } @@ -472,8 +471,8 @@ static void ibmveth_cleanup(struct ibmveth_adapter *adapter) } if(adapter->rx_queue.queue_addr != NULL) { - if(!dma_mapping_error(adapter->rx_queue.queue_dma)) { - dma_unmap_single(&adapter->vdev->dev, + if (!dma_mapping_error(dev, adapter->rx_queue.queue_dma)) { + dma_unmap_single(dev, adapter->rx_queue.queue_dma, adapter->rx_queue.queue_len, DMA_BIDIRECTIONAL); @@ -535,6 +534,7 @@ static int ibmveth_open(struct net_device *netdev) int rc; union ibmveth_buf_desc rxq_desc; int i; + struct device *dev; ibmveth_debug_printk("open starting\n"); @@ -563,17 +563,19 @@ static int ibmveth_open(struct net_device *netdev) return -ENOMEM; } - adapter->buffer_list_dma = dma_map_single(&adapter->vdev->dev, + dev = &adapter->vdev->dev; + + adapter->buffer_list_dma = dma_map_single(dev, adapter->buffer_list_addr, 4096, DMA_BIDIRECTIONAL); - adapter->filter_list_dma = dma_map_single(&adapter->vdev->dev, + adapter->filter_list_dma = dma_map_single(dev, adapter->filter_list_addr, 4096, DMA_BIDIRECTIONAL); - adapter->rx_queue.queue_dma = dma_map_single(&adapter->vdev->dev, + adapter->rx_queue.queue_dma = dma_map_single(dev, adapter->rx_queue.queue_addr, adapter->rx_queue.queue_len, DMA_BIDIRECTIONAL); - if((dma_mapping_error(adapter->buffer_list_dma) ) || - (dma_mapping_error(adapter->filter_list_dma)) || - (dma_mapping_error(adapter->rx_queue.queue_dma))) { + if ((dma_mapping_error(dev, adapter->buffer_list_dma)) || + (dma_mapping_error(dev, adapter->filter_list_dma)) || + (dma_mapping_error(dev, adapter->rx_queue.queue_dma))) { ibmveth_error_printk("unable to map filter or buffer list pages\n"); ibmveth_cleanup(adapter); napi_disable(&adapter->napi); @@ -645,7 +647,7 @@ static int ibmveth_open(struct net_device *netdev) adapter->bounce_buffer_dma = dma_map_single(&adapter->vdev->dev, adapter->bounce_buffer, netdev->mtu + IBMVETH_BUFF_OH, DMA_BIDIRECTIONAL); - if (dma_mapping_error(adapter->bounce_buffer_dma)) { + if (dma_mapping_error(dev, adapter->bounce_buffer_dma)) { ibmveth_error_printk("unable to map bounce buffer\n"); ibmveth_cleanup(adapter); napi_disable(&adapter->napi); @@ -922,7 +924,7 @@ static int ibmveth_start_xmit(struct sk_buff *skb, struct net_device *netdev) buf[1] = 0; } - if (dma_mapping_error(data_dma_addr)) { + if (dma_mapping_error((&adapter->vdev->dev, data_dma_addr)) { if (!firmware_has_feature(FW_FEATURE_CMO)) ibmveth_error_printk("tx: unable to map xmit buffer\n"); skb_copy_from_linear_data(skb, adapter->bounce_buffer, -- cgit v1.2.3