From 140277e9a710202608914b5b731948d2769399bc Mon Sep 17 00:00:00 2001 From: Ralph Campbell Date: Fri, 14 Dec 2007 01:53:56 -0800 Subject: IB/ipath: Fix IB compliance problems with link state vs physical state Subnet manager SetPortinfo messages distingush between changing the link state (DOWN, ARM, ACTIVE) and the link physical state (POLL, SLEEP, DISABLED). These are somewhat independent commands and affect when link width and speed changes take effect. Without this patch, a link DOWN physical state NOP command was causing the link width and speed settings to take effect which should only happen when the link physical state is goes down (either by a SMP or some link physical error like link errors exceeding the threshold). Signed-off-by: Ralph Campbell Signed-off-by: Roland Dreier --- drivers/infiniband/hw/ipath/ipath_common.h | 2 +- drivers/infiniband/hw/ipath/ipath_driver.c | 28 ++++++++++++--------------- drivers/infiniband/hw/ipath/ipath_kernel.h | 1 + drivers/infiniband/hw/ipath/ipath_mad.c | 7 +++---- drivers/infiniband/hw/ipath/ipath_registers.h | 2 +- 5 files changed, 18 insertions(+), 22 deletions(-) diff --git a/drivers/infiniband/hw/ipath/ipath_common.h b/drivers/infiniband/hw/ipath/ipath_common.h index 41462109554..591901aab6b 100644 --- a/drivers/infiniband/hw/ipath/ipath_common.h +++ b/drivers/infiniband/hw/ipath/ipath_common.h @@ -75,7 +75,7 @@ #define IPATH_IB_LINKDOWN 0 #define IPATH_IB_LINKARM 1 #define IPATH_IB_LINKACTIVE 2 -#define IPATH_IB_LINKINIT 3 +#define IPATH_IB_LINKDOWN_ONLY 3 #define IPATH_IB_LINKDOWN_SLEEP 4 #define IPATH_IB_LINKDOWN_DISABLE 5 #define IPATH_IB_LINK_LOOPBACK 6 /* enable local loopback */ diff --git a/drivers/infiniband/hw/ipath/ipath_driver.c b/drivers/infiniband/hw/ipath/ipath_driver.c index d5ff6ca2db3..ca4d0acc678 100644 --- a/drivers/infiniband/hw/ipath/ipath_driver.c +++ b/drivers/infiniband/hw/ipath/ipath_driver.c @@ -851,8 +851,7 @@ void ipath_disarm_piobufs(struct ipath_devdata *dd, unsigned first, * -ETIMEDOUT state can have multiple states set, for any of several * transitions. */ -static int ipath_wait_linkstate(struct ipath_devdata *dd, u32 state, - int msecs) +int ipath_wait_linkstate(struct ipath_devdata *dd, u32 state, int msecs) { dd->ipath_state_wanted = state; wait_event_interruptible_timeout(ipath_state_wait, @@ -1656,8 +1655,8 @@ void ipath_cancel_sends(struct ipath_devdata *dd, int restore_sendctrl) static void ipath_set_ib_lstate(struct ipath_devdata *dd, int which) { static const char *what[4] = { - [0] = "DOWN", - [INFINIPATH_IBCC_LINKCMD_INIT] = "INIT", + [0] = "NOP", + [INFINIPATH_IBCC_LINKCMD_DOWN] = "DOWN", [INFINIPATH_IBCC_LINKCMD_ARMED] = "ARMED", [INFINIPATH_IBCC_LINKCMD_ACTIVE] = "ACTIVE" }; @@ -1672,9 +1671,9 @@ static void ipath_set_ib_lstate(struct ipath_devdata *dd, int which) (dd, dd->ipath_kregs->kr_ibcstatus) >> INFINIPATH_IBCS_LINKTRAININGSTATE_SHIFT) & INFINIPATH_IBCS_LINKTRAININGSTATE_MASK]); - /* flush all queued sends when going to DOWN or INIT, to be sure that + /* flush all queued sends when going to DOWN to be sure that * they don't block MAD packets */ - if (!linkcmd || linkcmd == INFINIPATH_IBCC_LINKCMD_INIT) + if (linkcmd == INFINIPATH_IBCC_LINKCMD_DOWN) ipath_cancel_sends(dd, 1); ipath_write_kreg(dd, dd->ipath_kregs->kr_ibcctrl, @@ -1687,6 +1686,13 @@ int ipath_set_linkstate(struct ipath_devdata *dd, u8 newstate) int ret; switch (newstate) { + case IPATH_IB_LINKDOWN_ONLY: + ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKCMD_DOWN << + INFINIPATH_IBCC_LINKCMD_SHIFT); + /* don't wait */ + ret = 0; + goto bail; + case IPATH_IB_LINKDOWN: ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKINITCMD_POLL << INFINIPATH_IBCC_LINKINITCMD_SHIFT); @@ -1709,16 +1715,6 @@ int ipath_set_linkstate(struct ipath_devdata *dd, u8 newstate) ret = 0; goto bail; - case IPATH_IB_LINKINIT: - if (dd->ipath_flags & IPATH_LINKINIT) { - ret = 0; - goto bail; - } - ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKCMD_INIT << - INFINIPATH_IBCC_LINKCMD_SHIFT); - lstate = IPATH_LINKINIT; - break; - case IPATH_IB_LINKARM: if (dd->ipath_flags & IPATH_LINKARMED) { ret = 0; diff --git a/drivers/infiniband/hw/ipath/ipath_kernel.h b/drivers/infiniband/hw/ipath/ipath_kernel.h index 4cc0f95ea87..ecf3f7ff771 100644 --- a/drivers/infiniband/hw/ipath/ipath_kernel.h +++ b/drivers/infiniband/hw/ipath/ipath_kernel.h @@ -767,6 +767,7 @@ void ipath_kreceive(struct ipath_portdata *); int ipath_setrcvhdrsize(struct ipath_devdata *, unsigned); int ipath_reset_device(int); void ipath_get_faststats(unsigned long); +int ipath_wait_linkstate(struct ipath_devdata *, u32, int); int ipath_set_linkstate(struct ipath_devdata *, u8); int ipath_set_mtu(struct ipath_devdata *, u16); int ipath_set_lid(struct ipath_devdata *, u32, u8); diff --git a/drivers/infiniband/hw/ipath/ipath_mad.c b/drivers/infiniband/hw/ipath/ipath_mad.c index d98d5f10370..b34b91d3723 100644 --- a/drivers/infiniband/hw/ipath/ipath_mad.c +++ b/drivers/infiniband/hw/ipath/ipath_mad.c @@ -555,10 +555,7 @@ static int recv_subn_set_portinfo(struct ib_smp *smp, /* FALLTHROUGH */ case IB_PORT_DOWN: if (lstate == 0) - if (get_linkdowndefaultstate(dd)) - lstate = IPATH_IB_LINKDOWN_SLEEP; - else - lstate = IPATH_IB_LINKDOWN; + lstate = IPATH_IB_LINKDOWN_ONLY; else if (lstate == 1) lstate = IPATH_IB_LINKDOWN_SLEEP; else if (lstate == 2) @@ -568,6 +565,8 @@ static int recv_subn_set_portinfo(struct ib_smp *smp, else goto err; ipath_set_linkstate(dd, lstate); + ipath_wait_linkstate(dd, IPATH_LINKINIT | IPATH_LINKARMED | + IPATH_LINKACTIVE, 1000); break; case IB_PORT_ARMED: ipath_set_linkstate(dd, IPATH_IB_LINKARM); diff --git a/drivers/infiniband/hw/ipath/ipath_registers.h b/drivers/infiniband/hw/ipath/ipath_registers.h index 6d2a17f9c1d..92ad73a7fff 100644 --- a/drivers/infiniband/hw/ipath/ipath_registers.h +++ b/drivers/infiniband/hw/ipath/ipath_registers.h @@ -185,7 +185,7 @@ #define INFINIPATH_IBCC_LINKINITCMD_SLEEP 3 #define INFINIPATH_IBCC_LINKINITCMD_SHIFT 16 #define INFINIPATH_IBCC_LINKCMD_MASK 0x3ULL -#define INFINIPATH_IBCC_LINKCMD_INIT 1 /* move to 0x11 */ +#define INFINIPATH_IBCC_LINKCMD_DOWN 1 /* move to 0x11 */ #define INFINIPATH_IBCC_LINKCMD_ARMED 2 /* move to 0x21 */ #define INFINIPATH_IBCC_LINKCMD_ACTIVE 3 /* move to 0x31 */ #define INFINIPATH_IBCC_LINKCMD_SHIFT 18 -- cgit v1.2.3 From 87d5aed85b2d79e4075ad2ca1449e9b98f657a09 Mon Sep 17 00:00:00 2001 From: Patrick Marchand Latifi Date: Mon, 7 Jan 2008 23:43:04 -0800 Subject: IB/ipath: Fix potentially wrong RNR retry counter returned in ipath_query_qp() There can be a case where the requester's rnr retry counter (s_rnr_retry) is less than the number of rnr retries allowed per QP (s_rnr_retry_cnt). This can happen if the s_rnr_retry counter is being decremented and an ipath_query_qp call is issued during that time frame. The fix is to always return the number of rnr retries allowed per QP instead of the requester's rnr counter. Found by code review. Signed-off-by: Patrick Marchand Latifi Acked-by: Ralph Campbell Signed-off-by: Roland Dreier --- drivers/infiniband/hw/ipath/ipath_qp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/ipath/ipath_qp.c b/drivers/infiniband/hw/ipath/ipath_qp.c index 80dc623cee4..8214c0905c7 100644 --- a/drivers/infiniband/hw/ipath/ipath_qp.c +++ b/drivers/infiniband/hw/ipath/ipath_qp.c @@ -647,7 +647,7 @@ int ipath_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, attr->port_num = 1; attr->timeout = qp->timeout; attr->retry_cnt = qp->s_retry_cnt; - attr->rnr_retry = qp->s_rnr_retry; + attr->rnr_retry = qp->s_rnr_retry_cnt; attr->alt_port_num = 0; attr->alt_timeout = 0; -- cgit v1.2.3 From 4cd5060cf7c2207c31e2e368f8a6343355362e51 Mon Sep 17 00:00:00 2001 From: Patrick Marchand Latifi Date: Fri, 18 Jan 2008 20:10:48 -0800 Subject: IB/ipath: Fix RC QP initialization This patch fixes the initialization of RC QPs, since we would rely on the queue pair type (ibqp->qp_type) being set, but this field is only initialized when we return from ipath_create_qp (it is initialized by the user-level verbs library). The fix is to not depend on this field to initialize the send and the receive state of the RC QP. Signed-off-by: Patrick Marchand Latifi Signed-off-by: Roland Dreier --- drivers/infiniband/hw/ipath/ipath_qp.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/infiniband/hw/ipath/ipath_qp.c b/drivers/infiniband/hw/ipath/ipath_qp.c index 8214c0905c7..553d9007cf0 100644 --- a/drivers/infiniband/hw/ipath/ipath_qp.c +++ b/drivers/infiniband/hw/ipath/ipath_qp.c @@ -329,8 +329,9 @@ struct ipath_qp *ipath_lookup_qpn(struct ipath_qp_table *qpt, u32 qpn) /** * ipath_reset_qp - initialize the QP state to the reset state * @qp: the QP to reset + * @type: the QP type */ -static void ipath_reset_qp(struct ipath_qp *qp) +static void ipath_reset_qp(struct ipath_qp *qp, enum ib_qp_type type) { qp->remote_qpn = 0; qp->qkey = 0; @@ -342,7 +343,7 @@ static void ipath_reset_qp(struct ipath_qp *qp) qp->s_psn = 0; qp->r_psn = 0; qp->r_msn = 0; - if (qp->ibqp.qp_type == IB_QPT_RC) { + if (type == IB_QPT_RC) { qp->s_state = IB_OPCODE_RC_SEND_LAST; qp->r_state = IB_OPCODE_RC_SEND_LAST; } else { @@ -534,7 +535,7 @@ int ipath_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, switch (new_state) { case IB_QPS_RESET: - ipath_reset_qp(qp); + ipath_reset_qp(qp, ibqp->qp_type); break; case IB_QPS_ERR: @@ -839,7 +840,7 @@ struct ib_qp *ipath_create_qp(struct ib_pd *ibpd, goto bail_qp; } qp->ip = NULL; - ipath_reset_qp(qp); + ipath_reset_qp(qp, init_attr->qp_type); break; default: -- cgit v1.2.3 From 2a049e514b890c8b70b965bbd9f4e3c963af69c9 Mon Sep 17 00:00:00 2001 From: Patrick Marchand Latifi Date: Thu, 31 Jan 2008 00:24:37 -0800 Subject: IB/ipath: Fix error completion put on send CQ instead of recv CQ A work completion entry could be placed on the wrong completion queue when an RC QP is placed in the error state. Signed-off-by: Patrick Marchand Latifi Acked-by: Ralph Campbell Signed-off-by: Roland Dreier --- drivers/infiniband/hw/ipath/ipath_qp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/ipath/ipath_qp.c b/drivers/infiniband/hw/ipath/ipath_qp.c index 553d9007cf0..087ed316647 100644 --- a/drivers/infiniband/hw/ipath/ipath_qp.c +++ b/drivers/infiniband/hw/ipath/ipath_qp.c @@ -415,7 +415,7 @@ int ipath_error_qp(struct ipath_qp *qp, enum ib_wc_status err) wc.wr_id = qp->r_wr_id; wc.opcode = IB_WC_RECV; wc.status = err; - ipath_cq_enter(to_icq(qp->ibqp.send_cq), &wc, 1); + ipath_cq_enter(to_icq(qp->ibqp.recv_cq), &wc, 1); } wc.status = IB_WC_WR_FLUSH_ERR; -- cgit v1.2.3 From 450bb3875f5f5ab3679823c941d6045d16967370 Mon Sep 17 00:00:00 2001 From: Patrick Marchand Latifi Date: Wed, 20 Feb 2008 19:08:10 -0800 Subject: IB/ipath: Reset the retry counter for RDMA_READ_RESPONSE_MIDDLE packets Reset the retry counter when we get a good RDMA_READ_RESPONSE_MIDDLE packet. This fix will prevent the requester from reporting a retry exceeded error too early. Signed-off-by: Patrick Marchand Latifi --- drivers/infiniband/hw/ipath/ipath_rc.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/infiniband/hw/ipath/ipath_rc.c b/drivers/infiniband/hw/ipath/ipath_rc.c index 459e46e2c01..40f3e37d7ad 100644 --- a/drivers/infiniband/hw/ipath/ipath_rc.c +++ b/drivers/infiniband/hw/ipath/ipath_rc.c @@ -1196,6 +1196,10 @@ static inline void ipath_rc_rcv_resp(struct ipath_ibdev *dev, list_move_tail(&qp->timerwait, &dev->pending[dev->pending_index]); spin_unlock(&dev->pending_lock); + + if (opcode == OP(RDMA_READ_RESPONSE_MIDDLE)) + qp->s_retry = qp->s_retry_cnt; + /* * Update the RDMA receive state but do the copy w/o * holding the locks and blocking interrupts. -- cgit v1.2.3 From b3e2749bf32f61e7beb259eb7cfb066d2ec6ad65 Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Tue, 11 Mar 2008 16:10:02 +0200 Subject: IPoIB: Don't drop multicast sends when they can be queued When set_multicast_list() is called the multicast task is restarted and the IPOIB_MCAST_STARTED bit is cleared. As a result for some window of time, multicast packets are not transmitted nor queued but rather dropped by ipoib_mcast_send(). These dropped packets are painful in two cases: - bonding fail-over which both calls set_multicast_list() on the new active slave and sends Gratuitous ARP through that slave. - IP_DROP_MEMBERSHIP code which both calls set_multicast_list() on the device and issues IGMP leave. In both these cases, depending on the scheduling of the IPoIB multicast task, the packets would be dropped. As a result, in the bonding case, the failover would not be detected by the peers until their neighbour is renewed the neighbour (which takes a few tens of seconds). In the IGMP case, the IP router doesn't get an IGMP leave and would only learn on that from further probes on the group (also a delay of at least a few tens of seconds). Fix this by allowing transmission (or queuing) depending on the IPOIB_FLAG_OPER_UP flag instead of the IPOIB_MCAST_STARTED flag. Signed-off-by: Olga Shern Signed-off-by: Or Gerlitz Signed-off-by: Roland Dreier --- drivers/infiniband/ulp/ipoib/ipoib_multicast.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c index 2628339e3a9..31a53c5bcb1 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c @@ -650,7 +650,7 @@ void ipoib_mcast_send(struct net_device *dev, void *mgid, struct sk_buff *skb) */ spin_lock(&priv->lock); - if (!test_bit(IPOIB_MCAST_STARTED, &priv->flags) || + if (!test_bit(IPOIB_FLAG_OPER_UP, &priv->flags) || !priv->broadcast || !test_bit(IPOIB_MCAST_FLAG_ATTACHED, &priv->broadcast->flags)) { ++dev->stats.tx_dropped; -- cgit v1.2.3 From 4200406b8fbbf309f4fffb339bd16c4553ae0c30 Mon Sep 17 00:00:00 2001 From: Roland Dreier Date: Tue, 11 Mar 2008 18:35:20 -0700 Subject: IPoIB/cm: Set tx_wr.num_sge in connected mode post_send() Commit 7143740d ("IPoIB: Add send gather support") made it possible for tx_wr.num_sge to be != 1 -- this happens if send gather support is enabled. However, the code in the connected mode post_send() function assumes the old invariant, namely that tx_wr.num_sge is always 1. Fix this by explicitly setting tx_wr.num_sge to 1 in the CM post_send(). Signed-off-by: Roland Dreier --- drivers/infiniband/ulp/ipoib/ipoib_cm.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c index 52b1bebfa74..4e8d0281f8b 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c @@ -637,6 +637,7 @@ static inline int post_send(struct ipoib_dev_priv *priv, priv->tx_sge[0].addr = addr; priv->tx_sge[0].length = len; + priv->tx_wr.num_sge = 1; priv->tx_wr.wr_id = wr_id | IPOIB_OP_CM; return ib_post_send(tx->qp, &priv->tx_wr, &bad_wr); -- cgit v1.2.3 From 10313cbb92206450b450e14f2b3f6ccde42d9a34 Mon Sep 17 00:00:00 2001 From: Roland Dreier Date: Wed, 12 Mar 2008 07:51:03 -0700 Subject: IPoIB: Allocate priv->tx_ring with vmalloc() Commit 7143740d ("IPoIB: Add send gather support") made struct ipoib_tx_buf significantly larger, since the mapping member changed from a single u64 to an array with MAX_SKB_FRAGS + 1 entries. This means that allocating tx_rings with kzalloc() may fail because there is not enough contiguous memory for the new, much bigger size. Fix this regression by allocating the rings with vmalloc() instead. Signed-off-by: Roland Dreier --- drivers/infiniband/ulp/ipoib/ipoib_cm.c | 8 +++++--- drivers/infiniband/ulp/ipoib/ipoib_main.c | 9 +++++---- 2 files changed, 10 insertions(+), 7 deletions(-) diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c index 4e8d0281f8b..2490b2d79db 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c @@ -38,6 +38,7 @@ #include #include #include +#include #include "ipoib.h" @@ -1031,13 +1032,13 @@ static int ipoib_cm_tx_init(struct ipoib_cm_tx *p, u32 qpn, struct ipoib_dev_priv *priv = netdev_priv(p->dev); int ret; - p->tx_ring = kzalloc(ipoib_sendq_size * sizeof *p->tx_ring, - GFP_KERNEL); + p->tx_ring = vmalloc(ipoib_sendq_size * sizeof *p->tx_ring); if (!p->tx_ring) { ipoib_warn(priv, "failed to allocate tx ring\n"); ret = -ENOMEM; goto err_tx; } + memset(p->tx_ring, 0, ipoib_sendq_size * sizeof *p->tx_ring); p->qp = ipoib_cm_create_tx_qp(p->dev, p); if (IS_ERR(p->qp)) { @@ -1078,6 +1079,7 @@ err_id: ib_destroy_qp(p->qp); err_qp: p->qp = NULL; + vfree(p->tx_ring); err_tx: return ret; } @@ -1128,7 +1130,7 @@ timeout: if (p->qp) ib_destroy_qp(p->qp); - kfree(p->tx_ring); + vfree(p->tx_ring); kfree(p); } diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index f96477a8ca5..57282048865 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -41,6 +41,7 @@ #include #include #include +#include #include /* For ARPHRD_xxx */ @@ -887,13 +888,13 @@ int ipoib_dev_init(struct net_device *dev, struct ib_device *ca, int port) goto out; } - priv->tx_ring = kzalloc(ipoib_sendq_size * sizeof *priv->tx_ring, - GFP_KERNEL); + priv->tx_ring = vmalloc(ipoib_sendq_size * sizeof *priv->tx_ring); if (!priv->tx_ring) { printk(KERN_WARNING "%s: failed to allocate TX ring (%d entries)\n", ca->name, ipoib_sendq_size); goto out_rx_ring_cleanup; } + memset(priv->tx_ring, 0, ipoib_sendq_size * sizeof *priv->tx_ring); /* priv->tx_head, tx_tail & tx_outstanding are already 0 */ @@ -903,7 +904,7 @@ int ipoib_dev_init(struct net_device *dev, struct ib_device *ca, int port) return 0; out_tx_ring_cleanup: - kfree(priv->tx_ring); + vfree(priv->tx_ring); out_rx_ring_cleanup: kfree(priv->rx_ring); @@ -928,7 +929,7 @@ void ipoib_dev_cleanup(struct net_device *dev) ipoib_ib_dev_cleanup(dev); kfree(priv->rx_ring); - kfree(priv->tx_ring); + vfree(priv->tx_ring); priv->rx_ring = NULL; priv->tx_ring = NULL; -- cgit v1.2.3