From 5b6810e048435de508ef66aebd6b78db13d651b8 Mon Sep 17 00:00:00 2001 From: Roland Dreier Date: Tue, 11 Oct 2005 11:08:24 -0700 Subject: [IPoIB] Rename ipoib_create_qp() -> ipoib_init_qp() and fix error cleanup ipoib_create_qp() no longer creates IPoIB's QP, so it shouldn't destroy the QP on failure -- that unwinding happens elsewhere, so the current code can cause a double free. While we're at it, the function's name should match what it actually does, so rename it to ipoib_init_qp(). Signed-off-by: Roland Dreier --- drivers/infiniband/ulp/ipoib/ipoib.h | 2 +- drivers/infiniband/ulp/ipoib/ipoib_ib.c | 4 ++-- drivers/infiniband/ulp/ipoib/ipoib_verbs.c | 9 +++++---- 3 files changed, 8 insertions(+), 7 deletions(-) (limited to 'drivers/infiniband/ulp') diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h index 4ea1c1ca85b..6b14bd1c60a 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib.h +++ b/drivers/infiniband/ulp/ipoib/ipoib.h @@ -277,7 +277,7 @@ int ipoib_mcast_attach(struct net_device *dev, u16 mlid, int ipoib_mcast_detach(struct net_device *dev, u16 mlid, union ib_gid *mgid); -int ipoib_qp_create(struct net_device *dev); +int ipoib_init_qp(struct net_device *dev); int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca); void ipoib_transport_dev_cleanup(struct net_device *dev); diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c index f7440096b5e..02d0e000657 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c @@ -387,9 +387,9 @@ int ipoib_ib_dev_open(struct net_device *dev) struct ipoib_dev_priv *priv = netdev_priv(dev); int ret; - ret = ipoib_qp_create(dev); + ret = ipoib_init_qp(dev); if (ret) { - ipoib_warn(priv, "ipoib_qp_create returned %d\n", ret); + ipoib_warn(priv, "ipoib_init_qp returned %d\n", ret); return -1; } diff --git a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c index 79f59d0563e..b5902a7ec24 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c @@ -92,7 +92,7 @@ int ipoib_mcast_detach(struct net_device *dev, u16 mlid, union ib_gid *mgid) return ret; } -int ipoib_qp_create(struct net_device *dev) +int ipoib_init_qp(struct net_device *dev) { struct ipoib_dev_priv *priv = netdev_priv(dev); int ret; @@ -149,10 +149,11 @@ int ipoib_qp_create(struct net_device *dev) return 0; out_fail: - ib_destroy_qp(priv->qp); - priv->qp = NULL; + qp_attr.qp_state = IB_QPS_RESET; + if (ib_modify_qp(priv->qp, &qp_attr, IB_QP_STATE)) + ipoib_warn(priv, "Failed to modify QP to RESET state\n"); - return -EINVAL; + return ret; } int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca) -- cgit v1.2.3 From 4b2d319b53810ab00ef3d8fdfc1c1ab0647ab0a7 Mon Sep 17 00:00:00 2001 From: Roland Dreier Date: Tue, 18 Oct 2005 12:20:06 -0700 Subject: [IPoIB] Improve ipoib_timeout() output Use jiffies_to_msecs() so we print a human-readable time so we don't have to worry about what HZ is configured to, and print out a few values to make post-mortem analysis easier. Signed-off-by: Roland Dreier --- drivers/infiniband/ulp/ipoib/ipoib_main.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'drivers/infiniband/ulp') diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index 6c5bf07489f..ee303859b04 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -637,8 +637,11 @@ static void ipoib_timeout(struct net_device *dev) { struct ipoib_dev_priv *priv = netdev_priv(dev); - ipoib_warn(priv, "transmit timeout: latency %ld\n", - jiffies - dev->trans_start); + ipoib_warn(priv, "transmit timeout: latency %d msecs\n", + jiffies_to_msecs(jiffies - dev->trans_start)); + ipoib_warn(priv, "queue stopped %d, tx_head %u, tx_tail %u\n", + netif_queue_stopped(dev), + priv->tx_head, priv->tx_tail); /* XXX reset QP, etc. */ } -- cgit v1.2.3 From 1993d683f39f77ddb46a662d7146247877d50b8f Mon Sep 17 00:00:00 2001 From: Roland Dreier Date: Fri, 28 Oct 2005 15:30:34 -0700 Subject: [IPoIB] Drop RX packets when out of memory Change the way IPoIB handles RX packets when it can't allocate a new receive skbuff. If the allocation of a new receive skb fails, we now drop the packet we just received and repost the original receive skb. This means that the receive ring always stays full and we don't have to monkey around with trying to schedule a refill task for later. Signed-off-by: Roland Dreier --- drivers/infiniband/ulp/ipoib/ipoib.h | 21 +++--- drivers/infiniband/ulp/ipoib/ipoib_ib.c | 116 +++++++++++++++++------------- drivers/infiniband/ulp/ipoib/ipoib_main.c | 8 +-- 3 files changed, 85 insertions(+), 60 deletions(-) (limited to 'drivers/infiniband/ulp') diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h index 6b14bd1c60a..c994a916a58 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib.h +++ b/drivers/infiniband/ulp/ipoib/ipoib.h @@ -100,7 +100,12 @@ struct ipoib_pseudoheader { struct ipoib_mcast; -struct ipoib_buf { +struct ipoib_rx_buf { + struct sk_buff *skb; + dma_addr_t mapping; +}; + +struct ipoib_tx_buf { struct sk_buff *skb; DECLARE_PCI_UNMAP_ADDR(mapping) }; @@ -150,14 +155,14 @@ struct ipoib_dev_priv { unsigned int admin_mtu; unsigned int mcast_mtu; - struct ipoib_buf *rx_ring; + struct ipoib_rx_buf *rx_ring; - spinlock_t tx_lock; - struct ipoib_buf *tx_ring; - unsigned tx_head; - unsigned tx_tail; - struct ib_sge tx_sge; - struct ib_send_wr tx_wr; + spinlock_t tx_lock; + struct ipoib_tx_buf *tx_ring; + unsigned tx_head; + unsigned tx_tail; + struct ib_sge tx_sge; + struct ib_send_wr tx_wr; struct ib_wc ibwc[IPOIB_NUM_WC]; diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c index 02d0e000657..192fef884e2 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c @@ -95,57 +95,65 @@ void ipoib_free_ah(struct kref *kref) } } -static inline int ipoib_ib_receive(struct ipoib_dev_priv *priv, - unsigned int wr_id, - dma_addr_t addr) +static int ipoib_ib_post_receive(struct net_device *dev, int id) { - struct ib_sge list = { - .addr = addr, - .length = IPOIB_BUF_SIZE, - .lkey = priv->mr->lkey, - }; - struct ib_recv_wr param = { - .wr_id = wr_id | IPOIB_OP_RECV, - .sg_list = &list, - .num_sge = 1, - }; + struct ipoib_dev_priv *priv = netdev_priv(dev); + struct ib_sge list; + struct ib_recv_wr param; struct ib_recv_wr *bad_wr; + int ret; + + list.addr = priv->rx_ring[id].mapping; + list.length = IPOIB_BUF_SIZE; + list.lkey = priv->mr->lkey; + + param.next = NULL; + param.wr_id = id | IPOIB_OP_RECV; + param.sg_list = &list; + param.num_sge = 1; + + ret = ib_post_recv(priv->qp, ¶m, &bad_wr); + if (unlikely(ret)) { + ipoib_warn(priv, "receive failed for buf %d (%d)\n", id, ret); + dma_unmap_single(priv->ca->dma_device, + priv->rx_ring[id].mapping, + IPOIB_BUF_SIZE, DMA_FROM_DEVICE); + dev_kfree_skb_any(priv->rx_ring[id].skb); + priv->rx_ring[id].skb = NULL; + } - return ib_post_recv(priv->qp, ¶m, &bad_wr); + return ret; } -static int ipoib_ib_post_receive(struct net_device *dev, int id) +static int ipoib_alloc_rx_skb(struct net_device *dev, int id) { struct ipoib_dev_priv *priv = netdev_priv(dev); struct sk_buff *skb; dma_addr_t addr; - int ret; skb = dev_alloc_skb(IPOIB_BUF_SIZE + 4); - if (!skb) { - ipoib_warn(priv, "failed to allocate receive buffer\n"); - - priv->rx_ring[id].skb = NULL; + if (!skb) return -ENOMEM; - } - skb_reserve(skb, 4); /* 16 byte align IP header */ - priv->rx_ring[id].skb = skb; + + /* + * IB will leave a 40 byte gap for a GRH and IPoIB adds a 4 byte + * header. So we need 4 more bytes to get to 48 and align the + * IP header to a multiple of 16. + */ + skb_reserve(skb, 4); + addr = dma_map_single(priv->ca->dma_device, skb->data, IPOIB_BUF_SIZE, DMA_FROM_DEVICE); - pci_unmap_addr_set(&priv->rx_ring[id], mapping, addr); - - ret = ipoib_ib_receive(priv, id, addr); - if (ret) { - ipoib_warn(priv, "ipoib_ib_receive failed for buf %d (%d)\n", - id, ret); - dma_unmap_single(priv->ca->dma_device, addr, - IPOIB_BUF_SIZE, DMA_FROM_DEVICE); + if (unlikely(dma_mapping_error(addr))) { dev_kfree_skb_any(skb); - priv->rx_ring[id].skb = NULL; + return -EIO; } - return ret; + priv->rx_ring[id].skb = skb; + priv->rx_ring[id].mapping = addr; + + return 0; } static int ipoib_ib_post_receives(struct net_device *dev) @@ -154,6 +162,10 @@ static int ipoib_ib_post_receives(struct net_device *dev) int i; for (i = 0; i < IPOIB_RX_RING_SIZE; ++i) { + if (ipoib_alloc_rx_skb(dev, i)) { + ipoib_warn(priv, "failed to allocate receive buffer %d\n", i); + return -ENOMEM; + } if (ipoib_ib_post_receive(dev, i)) { ipoib_warn(priv, "ipoib_ib_post_receive failed for buf %d\n", i); return -EIO; @@ -176,28 +188,36 @@ static void ipoib_ib_handle_wc(struct net_device *dev, wr_id &= ~IPOIB_OP_RECV; if (wr_id < IPOIB_RX_RING_SIZE) { - struct sk_buff *skb = priv->rx_ring[wr_id].skb; - - priv->rx_ring[wr_id].skb = NULL; + struct sk_buff *skb = priv->rx_ring[wr_id].skb; + dma_addr_t addr = priv->rx_ring[wr_id].mapping; - dma_unmap_single(priv->ca->dma_device, - pci_unmap_addr(&priv->rx_ring[wr_id], - mapping), - IPOIB_BUF_SIZE, - DMA_FROM_DEVICE); - - if (wc->status != IB_WC_SUCCESS) { + if (unlikely(wc->status != IB_WC_SUCCESS)) { if (wc->status != IB_WC_WR_FLUSH_ERR) ipoib_warn(priv, "failed recv event " "(status=%d, wrid=%d vend_err %x)\n", wc->status, wr_id, wc->vendor_err); + dma_unmap_single(priv->ca->dma_device, addr, + IPOIB_BUF_SIZE, DMA_FROM_DEVICE); dev_kfree_skb_any(skb); + priv->rx_ring[wr_id].skb = NULL; return; } + /* + * If we can't allocate a new RX buffer, dump + * this packet and reuse the old buffer. + */ + if (unlikely(ipoib_alloc_rx_skb(dev, wr_id))) { + ++priv->stats.rx_dropped; + goto repost; + } + ipoib_dbg_data(priv, "received %d bytes, SLID 0x%04x\n", wc->byte_len, wc->slid); + dma_unmap_single(priv->ca->dma_device, addr, + IPOIB_BUF_SIZE, DMA_FROM_DEVICE); + skb_put(skb, wc->byte_len); skb_pull(skb, IB_GRH_BYTES); @@ -220,8 +240,8 @@ static void ipoib_ib_handle_wc(struct net_device *dev, dev_kfree_skb_any(skb); } - /* repost receive */ - if (ipoib_ib_post_receive(dev, wr_id)) + repost: + if (unlikely(ipoib_ib_post_receive(dev, wr_id))) ipoib_warn(priv, "ipoib_ib_post_receive failed " "for buf %d\n", wr_id); } else @@ -229,7 +249,7 @@ static void ipoib_ib_handle_wc(struct net_device *dev, wr_id); } else { - struct ipoib_buf *tx_req; + struct ipoib_tx_buf *tx_req; unsigned long flags; if (wr_id >= IPOIB_TX_RING_SIZE) { @@ -302,7 +322,7 @@ void ipoib_send(struct net_device *dev, struct sk_buff *skb, struct ipoib_ah *address, u32 qpn) { struct ipoib_dev_priv *priv = netdev_priv(dev); - struct ipoib_buf *tx_req; + struct ipoib_tx_buf *tx_req; dma_addr_t addr; if (skb->len > dev->mtu + INFINIBAND_ALEN) { @@ -468,7 +488,7 @@ int ipoib_ib_dev_stop(struct net_device *dev) struct ib_qp_attr qp_attr; int attr_mask; unsigned long begin; - struct ipoib_buf *tx_req; + struct ipoib_tx_buf *tx_req; int i; /* Kill the existing QP and allocate a new one */ diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index ee303859b04..cd4f42328db 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -732,7 +732,7 @@ int ipoib_dev_init(struct net_device *dev, struct ib_device *ca, int port) /* Allocate RX/TX "rings" to hold queued skbs */ - priv->rx_ring = kmalloc(IPOIB_RX_RING_SIZE * sizeof (struct ipoib_buf), + priv->rx_ring = kmalloc(IPOIB_RX_RING_SIZE * sizeof (struct ipoib_rx_buf), GFP_KERNEL); if (!priv->rx_ring) { printk(KERN_WARNING "%s: failed to allocate RX ring (%d entries)\n", @@ -740,9 +740,9 @@ int ipoib_dev_init(struct net_device *dev, struct ib_device *ca, int port) goto out; } memset(priv->rx_ring, 0, - IPOIB_RX_RING_SIZE * sizeof (struct ipoib_buf)); + IPOIB_RX_RING_SIZE * sizeof (struct ipoib_rx_buf)); - priv->tx_ring = kmalloc(IPOIB_TX_RING_SIZE * sizeof (struct ipoib_buf), + priv->tx_ring = kmalloc(IPOIB_TX_RING_SIZE * sizeof (struct ipoib_tx_buf), GFP_KERNEL); if (!priv->tx_ring) { printk(KERN_WARNING "%s: failed to allocate TX ring (%d entries)\n", @@ -750,7 +750,7 @@ int ipoib_dev_init(struct net_device *dev, struct ib_device *ca, int port) goto out_rx_ring_cleanup; } memset(priv->tx_ring, 0, - IPOIB_TX_RING_SIZE * sizeof (struct ipoib_buf)); + IPOIB_TX_RING_SIZE * sizeof (struct ipoib_tx_buf)); /* priv->tx_head & tx_tail are already 0 */ -- cgit v1.2.3