From e509be898d8937634437caa474b57ac12795e5bc Mon Sep 17 00:00:00 2001 From: Ralph Campbell Date: Tue, 13 May 2008 11:41:29 -0700 Subject: IB/ipath: Fix many locking issues when switching to error state The send DMA hardware queue voided a number of prior assumptions about when a send is complete which led to completions being generated out of order. There were also a number of locking issues when switching the QP to the error or reset states, and we implement the IB_QPS_SQD state. Signed-off-by: Ralph Campbell Signed-off-by: Roland Dreier --- drivers/infiniband/hw/ipath/ipath_ruc.c | 168 ++++++++++++++++++++++---------- 1 file changed, 115 insertions(+), 53 deletions(-) (limited to 'drivers/infiniband/hw/ipath/ipath_ruc.c') diff --git a/drivers/infiniband/hw/ipath/ipath_ruc.c b/drivers/infiniband/hw/ipath/ipath_ruc.c index c716a03dd39..a4b5521567f 100644 --- a/drivers/infiniband/hw/ipath/ipath_ruc.c +++ b/drivers/infiniband/hw/ipath/ipath_ruc.c @@ -78,6 +78,7 @@ const u32 ib_ipath_rnr_table[32] = { * ipath_insert_rnr_queue - put QP on the RNR timeout list for the device * @qp: the QP * + * Called with the QP s_lock held and interrupts disabled. * XXX Use a simple list for now. We might need a priority * queue if we have lots of QPs waiting for RNR timeouts * but that should be rare. @@ -85,9 +86,9 @@ const u32 ib_ipath_rnr_table[32] = { void ipath_insert_rnr_queue(struct ipath_qp *qp) { struct ipath_ibdev *dev = to_idev(qp->ibqp.device); - unsigned long flags; - spin_lock_irqsave(&dev->pending_lock, flags); + /* We already did a spin_lock_irqsave(), so just use spin_lock */ + spin_lock(&dev->pending_lock); if (list_empty(&dev->rnrwait)) list_add(&qp->timerwait, &dev->rnrwait); else { @@ -109,7 +110,7 @@ void ipath_insert_rnr_queue(struct ipath_qp *qp) nqp->s_rnr_timeout -= qp->s_rnr_timeout; list_add(&qp->timerwait, l); } - spin_unlock_irqrestore(&dev->pending_lock, flags); + spin_unlock(&dev->pending_lock); } /** @@ -185,6 +186,11 @@ int ipath_get_rwqe(struct ipath_qp *qp, int wr_id_only) } spin_lock_irqsave(&rq->lock, flags); + if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_RECV_OK)) { + ret = 0; + goto unlock; + } + wq = rq->wq; tail = wq->tail; /* Validate tail before using it since it is user writable. */ @@ -192,9 +198,8 @@ int ipath_get_rwqe(struct ipath_qp *qp, int wr_id_only) tail = 0; do { if (unlikely(tail == wq->head)) { - spin_unlock_irqrestore(&rq->lock, flags); ret = 0; - goto bail; + goto unlock; } /* Make sure entry is read after head index is read. */ smp_rmb(); @@ -207,7 +212,7 @@ int ipath_get_rwqe(struct ipath_qp *qp, int wr_id_only) wq->tail = tail; ret = 1; - qp->r_wrid_valid = 1; + set_bit(IPATH_R_WRID_VALID, &qp->r_aflags); if (handler) { u32 n; @@ -234,8 +239,8 @@ int ipath_get_rwqe(struct ipath_qp *qp, int wr_id_only) goto bail; } } +unlock: spin_unlock_irqrestore(&rq->lock, flags); - bail: return ret; } @@ -263,35 +268,59 @@ static void ipath_ruc_loopback(struct ipath_qp *sqp) atomic64_t *maddr; enum ib_wc_status send_status; + /* + * Note that we check the responder QP state after + * checking the requester's state. + */ qp = ipath_lookup_qpn(&dev->qp_table, sqp->remote_qpn); - if (!qp) { - dev->n_pkt_drops++; - return; - } -again: spin_lock_irqsave(&sqp->s_lock, flags); - if (!(ib_ipath_state_ops[sqp->state] & IPATH_PROCESS_SEND_OK) || - sqp->s_rnr_timeout) { - spin_unlock_irqrestore(&sqp->s_lock, flags); - goto done; - } + /* Return if we are already busy processing a work request. */ + if ((sqp->s_flags & (IPATH_S_BUSY | IPATH_S_ANY_WAIT)) || + !(ib_ipath_state_ops[sqp->state] & IPATH_PROCESS_OR_FLUSH_SEND)) + goto unlock; - /* Get the next send request. */ - if (sqp->s_last == sqp->s_head) { - /* Send work queue is empty. */ - spin_unlock_irqrestore(&sqp->s_lock, flags); - goto done; + sqp->s_flags |= IPATH_S_BUSY; + +again: + if (sqp->s_last == sqp->s_head) + goto clr_busy; + wqe = get_swqe_ptr(sqp, sqp->s_last); + + /* Return if it is not OK to start a new work reqeust. */ + if (!(ib_ipath_state_ops[sqp->state] & IPATH_PROCESS_NEXT_SEND_OK)) { + if (!(ib_ipath_state_ops[sqp->state] & IPATH_FLUSH_SEND)) + goto clr_busy; + /* We are in the error state, flush the work request. */ + send_status = IB_WC_WR_FLUSH_ERR; + goto flush_send; } /* * We can rely on the entry not changing without the s_lock * being held until we update s_last. + * We increment s_cur to indicate s_last is in progress. */ - wqe = get_swqe_ptr(sqp, sqp->s_last); + if (sqp->s_last == sqp->s_cur) { + if (++sqp->s_cur >= sqp->s_size) + sqp->s_cur = 0; + } spin_unlock_irqrestore(&sqp->s_lock, flags); + if (!qp || !(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_RECV_OK)) { + dev->n_pkt_drops++; + /* + * For RC, the requester would timeout and retry so + * shortcut the timeouts and just signal too many retries. + */ + if (sqp->ibqp.qp_type == IB_QPT_RC) + send_status = IB_WC_RETRY_EXC_ERR; + else + send_status = IB_WC_SUCCESS; + goto serr; + } + memset(&wc, 0, sizeof wc); send_status = IB_WC_SUCCESS; @@ -396,8 +425,7 @@ again: sqp->s_len -= len; } - if (wqe->wr.opcode == IB_WR_RDMA_WRITE || - wqe->wr.opcode == IB_WR_RDMA_READ) + if (!test_and_clear_bit(IPATH_R_WRID_VALID, &qp->r_aflags)) goto send_comp; if (wqe->wr.opcode == IB_WR_RDMA_WRITE_WITH_IMM) @@ -417,6 +445,8 @@ again: wqe->wr.send_flags & IB_SEND_SOLICITED); send_comp: + spin_lock_irqsave(&sqp->s_lock, flags); +flush_send: sqp->s_rnr_retry = sqp->s_rnr_retry_cnt; ipath_send_complete(sqp, wqe, send_status); goto again; @@ -437,11 +467,12 @@ rnr_nak: sqp->s_rnr_retry--; spin_lock_irqsave(&sqp->s_lock, flags); if (!(ib_ipath_state_ops[sqp->state] & IPATH_PROCESS_RECV_OK)) - goto unlock; + goto clr_busy; + sqp->s_flags |= IPATH_S_WAITING; dev->n_rnr_naks++; sqp->s_rnr_timeout = ib_ipath_rnr_table[qp->r_min_rnr_timer]; ipath_insert_rnr_queue(sqp); - goto unlock; + goto clr_busy; inv_err: send_status = IB_WC_REM_INV_REQ_ERR; @@ -473,17 +504,19 @@ serr: } goto done; } +clr_busy: + sqp->s_flags &= ~IPATH_S_BUSY; unlock: spin_unlock_irqrestore(&sqp->s_lock, flags); done: - if (atomic_dec_and_test(&qp->refcount)) + if (qp && atomic_dec_and_test(&qp->refcount)) wake_up(&qp->wait); } static void want_buffer(struct ipath_devdata *dd, struct ipath_qp *qp) { if (!(dd->ipath_flags & IPATH_HAS_SEND_DMA) || - qp->ibqp.qp_type == IB_QPT_SMI) { + qp->ibqp.qp_type == IB_QPT_SMI) { unsigned long flags; spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags); @@ -501,26 +534,36 @@ static void want_buffer(struct ipath_devdata *dd, struct ipath_qp *qp) * @dev: the device we ran out of buffers on * * Called when we run out of PIO buffers. + * If we are now in the error state, return zero to flush the + * send work request. */ -static void ipath_no_bufs_available(struct ipath_qp *qp, +static int ipath_no_bufs_available(struct ipath_qp *qp, struct ipath_ibdev *dev) { unsigned long flags; + int ret = 1; /* * Note that as soon as want_buffer() is called and * possibly before it returns, ipath_ib_piobufavail() - * could be called. If we are still in the tasklet function, - * tasklet_hi_schedule() will not call us until the next time - * tasklet_hi_schedule() is called. - * We leave the busy flag set so that another post send doesn't - * try to put the same QP on the piowait list again. + * could be called. Therefore, put QP on the piowait list before + * enabling the PIO avail interrupt. */ - spin_lock_irqsave(&dev->pending_lock, flags); - list_add_tail(&qp->piowait, &dev->piowait); - spin_unlock_irqrestore(&dev->pending_lock, flags); - want_buffer(dev->dd, qp); - dev->n_piowait++; + spin_lock_irqsave(&qp->s_lock, flags); + if (ib_ipath_state_ops[qp->state] & IPATH_PROCESS_SEND_OK) { + dev->n_piowait++; + qp->s_flags |= IPATH_S_WAITING; + qp->s_flags &= ~IPATH_S_BUSY; + spin_lock(&dev->pending_lock); + if (list_empty(&qp->piowait)) + list_add_tail(&qp->piowait, &dev->piowait); + spin_unlock(&dev->pending_lock); + } else + ret = 0; + spin_unlock_irqrestore(&qp->s_lock, flags); + if (ret) + want_buffer(dev->dd, qp); + return ret; } /** @@ -596,15 +639,13 @@ void ipath_do_send(unsigned long data) struct ipath_qp *qp = (struct ipath_qp *)data; struct ipath_ibdev *dev = to_idev(qp->ibqp.device); int (*make_req)(struct ipath_qp *qp); - - if (test_and_set_bit(IPATH_S_BUSY, &qp->s_busy)) - goto bail; + unsigned long flags; if ((qp->ibqp.qp_type == IB_QPT_RC || qp->ibqp.qp_type == IB_QPT_UC) && qp->remote_ah_attr.dlid == dev->dd->ipath_lid) { ipath_ruc_loopback(qp); - goto clear; + goto bail; } if (qp->ibqp.qp_type == IB_QPT_RC) @@ -614,6 +655,19 @@ void ipath_do_send(unsigned long data) else make_req = ipath_make_ud_req; + spin_lock_irqsave(&qp->s_lock, flags); + + /* Return if we are already busy processing a work request. */ + if ((qp->s_flags & (IPATH_S_BUSY | IPATH_S_ANY_WAIT)) || + !(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_OR_FLUSH_SEND)) { + spin_unlock_irqrestore(&qp->s_lock, flags); + goto bail; + } + + qp->s_flags |= IPATH_S_BUSY; + + spin_unlock_irqrestore(&qp->s_lock, flags); + again: /* Check for a constructed packet to be sent. */ if (qp->s_hdrwords != 0) { @@ -623,8 +677,8 @@ again: */ if (ipath_verbs_send(qp, &qp->s_hdr, qp->s_hdrwords, qp->s_cur_sge, qp->s_cur_size)) { - ipath_no_bufs_available(qp, dev); - goto bail; + if (ipath_no_bufs_available(qp, dev)) + goto bail; } dev->n_unicast_xmit++; /* Record that we sent the packet and s_hdr is empty. */ @@ -633,16 +687,20 @@ again: if (make_req(qp)) goto again; -clear: - clear_bit(IPATH_S_BUSY, &qp->s_busy); + bail:; } +/* + * This should be called with s_lock held. + */ void ipath_send_complete(struct ipath_qp *qp, struct ipath_swqe *wqe, enum ib_wc_status status) { - unsigned long flags; - u32 last; + u32 old_last, last; + + if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_OR_FLUSH_SEND)) + return; /* See ch. 11.2.4.1 and 10.7.3.1 */ if (!(qp->s_flags & IPATH_S_SIGNAL_REQ_WR) || @@ -661,10 +719,14 @@ void ipath_send_complete(struct ipath_qp *qp, struct ipath_swqe *wqe, status != IB_WC_SUCCESS); } - spin_lock_irqsave(&qp->s_lock, flags); - last = qp->s_last; + old_last = last = qp->s_last; if (++last >= qp->s_size) last = 0; qp->s_last = last; - spin_unlock_irqrestore(&qp->s_lock, flags); + if (qp->s_cur == old_last) + qp->s_cur = last; + if (qp->s_tail == old_last) + qp->s_tail = last; + if (qp->state == IB_QPS_SQD && last == qp->s_cur) + qp->s_draining = 0; } -- cgit v1.2.3