From e509be898d8937634437caa474b57ac12795e5bc Mon Sep 17 00:00:00 2001 From: Ralph Campbell Date: Tue, 13 May 2008 11:41:29 -0700 Subject: IB/ipath: Fix many locking issues when switching to error state The send DMA hardware queue voided a number of prior assumptions about when a send is complete which led to completions being generated out of order. There were also a number of locking issues when switching the QP to the error or reset states, and we implement the IB_QPS_SQD state. Signed-off-by: Ralph Campbell Signed-off-by: Roland Dreier --- drivers/infiniband/hw/ipath/ipath_rc.c | 151 +++++++++++++++++++++------------ 1 file changed, 97 insertions(+), 54 deletions(-) (limited to 'drivers/infiniband/hw/ipath/ipath_rc.c') diff --git a/drivers/infiniband/hw/ipath/ipath_rc.c b/drivers/infiniband/hw/ipath/ipath_rc.c index b4b26c3aa61..5b5276a270b 100644 --- a/drivers/infiniband/hw/ipath/ipath_rc.c +++ b/drivers/infiniband/hw/ipath/ipath_rc.c @@ -92,6 +92,10 @@ static int ipath_make_rc_ack(struct ipath_ibdev *dev, struct ipath_qp *qp, u32 bth0; u32 bth2; + /* Don't send an ACK if we aren't supposed to. */ + if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_RECV_OK)) + goto bail; + /* header size in 32-bit words LRH+BTH = (8+12)/4. */ hwords = 5; @@ -238,14 +242,25 @@ int ipath_make_rc_req(struct ipath_qp *qp) ipath_make_rc_ack(dev, qp, ohdr, pmtu)) goto done; - if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_SEND_OK) || - qp->s_rnr_timeout || qp->s_wait_credit) - goto bail; + if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_SEND_OK)) { + if (!(ib_ipath_state_ops[qp->state] & IPATH_FLUSH_SEND)) + goto bail; + /* We are in the error state, flush the work request. */ + if (qp->s_last == qp->s_head) + goto bail; + /* If DMAs are in progress, we can't flush immediately. */ + if (atomic_read(&qp->s_dma_busy)) { + qp->s_flags |= IPATH_S_WAIT_DMA; + goto bail; + } + wqe = get_swqe_ptr(qp, qp->s_last); + ipath_send_complete(qp, wqe, IB_WC_WR_FLUSH_ERR); + goto done; + } - /* Limit the number of packets sent without an ACK. */ - if (ipath_cmp24(qp->s_psn, qp->s_last_psn + IPATH_PSN_CREDIT) > 0) { - qp->s_wait_credit = 1; - dev->n_rc_stalls++; + /* Leave BUSY set until RNR timeout. */ + if (qp->s_rnr_timeout) { + qp->s_flags |= IPATH_S_WAITING; goto bail; } @@ -257,6 +272,9 @@ int ipath_make_rc_req(struct ipath_qp *qp) wqe = get_swqe_ptr(qp, qp->s_cur); switch (qp->s_state) { default: + if (!(ib_ipath_state_ops[qp->state] & + IPATH_PROCESS_NEXT_SEND_OK)) + goto bail; /* * Resend an old request or start a new one. * @@ -294,8 +312,10 @@ int ipath_make_rc_req(struct ipath_qp *qp) case IB_WR_SEND_WITH_IMM: /* If no credit, return. */ if (qp->s_lsn != (u32) -1 && - ipath_cmp24(wqe->ssn, qp->s_lsn + 1) > 0) + ipath_cmp24(wqe->ssn, qp->s_lsn + 1) > 0) { + qp->s_flags |= IPATH_S_WAIT_SSN_CREDIT; goto bail; + } wqe->lpsn = wqe->psn; if (len > pmtu) { wqe->lpsn += (len - 1) / pmtu; @@ -325,8 +345,10 @@ int ipath_make_rc_req(struct ipath_qp *qp) case IB_WR_RDMA_WRITE_WITH_IMM: /* If no credit, return. */ if (qp->s_lsn != (u32) -1 && - ipath_cmp24(wqe->ssn, qp->s_lsn + 1) > 0) + ipath_cmp24(wqe->ssn, qp->s_lsn + 1) > 0) { + qp->s_flags |= IPATH_S_WAIT_SSN_CREDIT; goto bail; + } ohdr->u.rc.reth.vaddr = cpu_to_be64(wqe->wr.wr.rdma.remote_addr); ohdr->u.rc.reth.rkey = @@ -570,7 +592,11 @@ int ipath_make_rc_req(struct ipath_qp *qp) ipath_make_ruc_header(dev, qp, ohdr, bth0 | (qp->s_state << 24), bth2); done: ret = 1; + goto unlock; + bail: + qp->s_flags &= ~IPATH_S_BUSY; +unlock: spin_unlock_irqrestore(&qp->s_lock, flags); return ret; } @@ -606,7 +632,11 @@ static void send_rc_ack(struct ipath_qp *qp) spin_unlock_irqrestore(&qp->s_lock, flags); + /* Don't try to send ACKs if the link isn't ACTIVE */ dd = dev->dd; + if (!(dd->ipath_flags & IPATH_LINKACTIVE)) + goto done; + piobuf = ipath_getpiobuf(dd, 0, NULL); if (!piobuf) { /* @@ -668,15 +698,16 @@ static void send_rc_ack(struct ipath_qp *qp) goto done; queue_ack: - dev->n_rc_qacks++; - qp->s_flags |= IPATH_S_ACK_PENDING; - qp->s_nak_state = qp->r_nak_state; - qp->s_ack_psn = qp->r_ack_psn; + if (ib_ipath_state_ops[qp->state] & IPATH_PROCESS_RECV_OK) { + dev->n_rc_qacks++; + qp->s_flags |= IPATH_S_ACK_PENDING; + qp->s_nak_state = qp->r_nak_state; + qp->s_ack_psn = qp->r_ack_psn; + + /* Schedule the send tasklet. */ + ipath_schedule_send(qp); + } spin_unlock_irqrestore(&qp->s_lock, flags); - - /* Call ipath_do_rc_send() in another thread. */ - tasklet_hi_schedule(&qp->s_task); - done: return; } @@ -735,7 +766,7 @@ static void reset_psn(struct ipath_qp *qp, u32 psn) /* * Set the state to restart in the middle of a request. * Don't change the s_sge, s_cur_sge, or s_cur_size. - * See ipath_do_rc_send(). + * See ipath_make_rc_req(). */ switch (opcode) { case IB_WR_SEND: @@ -801,7 +832,7 @@ void ipath_restart_rc(struct ipath_qp *qp, u32 psn) dev->n_rc_resends += (qp->s_psn - psn) & IPATH_PSN_MASK; reset_psn(qp, psn); - tasklet_hi_schedule(&qp->s_task); + ipath_schedule_send(qp); bail: return; @@ -809,13 +840,7 @@ bail: static inline void update_last_psn(struct ipath_qp *qp, u32 psn) { - if (qp->s_last_psn != psn) { - qp->s_last_psn = psn; - if (qp->s_wait_credit) { - qp->s_wait_credit = 0; - tasklet_hi_schedule(&qp->s_task); - } - } + qp->s_last_psn = psn; } /** @@ -915,14 +940,10 @@ static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode, wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD)) { qp->s_num_rd_atomic--; /* Restart sending task if fence is complete */ - if ((qp->s_flags & IPATH_S_FENCE_PENDING) && - !qp->s_num_rd_atomic) { - qp->s_flags &= ~IPATH_S_FENCE_PENDING; - tasklet_hi_schedule(&qp->s_task); - } else if (qp->s_flags & IPATH_S_RDMAR_PENDING) { - qp->s_flags &= ~IPATH_S_RDMAR_PENDING; - tasklet_hi_schedule(&qp->s_task); - } + if (((qp->s_flags & IPATH_S_FENCE_PENDING) && + !qp->s_num_rd_atomic) || + qp->s_flags & IPATH_S_RDMAR_PENDING) + ipath_schedule_send(qp); } /* Post a send completion queue entry if requested. */ if (!(qp->s_flags & IPATH_S_SIGNAL_REQ_WR) || @@ -956,6 +977,8 @@ static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode, } else { if (++qp->s_last >= qp->s_size) qp->s_last = 0; + if (qp->state == IB_QPS_SQD && qp->s_last == qp->s_cur) + qp->s_draining = 0; if (qp->s_last == qp->s_tail) break; wqe = get_swqe_ptr(qp, qp->s_last); @@ -979,7 +1002,7 @@ static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode, */ if (ipath_cmp24(qp->s_psn, psn) <= 0) { reset_psn(qp, psn + 1); - tasklet_hi_schedule(&qp->s_task); + ipath_schedule_send(qp); } } else if (ipath_cmp24(qp->s_psn, psn) <= 0) { qp->s_state = OP(SEND_LAST); @@ -1018,6 +1041,7 @@ static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode, ib_ipath_rnr_table[(aeth >> IPATH_AETH_CREDIT_SHIFT) & IPATH_AETH_CREDIT_MASK]; ipath_insert_rnr_queue(qp); + ipath_schedule_send(qp); goto bail; case 3: /* NAK */ @@ -1108,6 +1132,10 @@ static inline void ipath_rc_rcv_resp(struct ipath_ibdev *dev, spin_lock_irqsave(&qp->s_lock, flags); + /* Double check we can process this now that we hold the s_lock. */ + if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_RECV_OK)) + goto ack_done; + /* Ignore invalid responses. */ if (ipath_cmp24(psn, qp->s_next_psn) >= 0) goto ack_done; @@ -1343,7 +1371,12 @@ static inline int ipath_rc_rcv_error(struct ipath_ibdev *dev, psn &= IPATH_PSN_MASK; e = NULL; old_req = 1; + spin_lock_irqsave(&qp->s_lock, flags); + /* Double check we can process this now that we hold the s_lock. */ + if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_RECV_OK)) + goto unlock_done; + for (i = qp->r_head_ack_queue; ; i = prev) { if (i == qp->s_tail_ack_queue) old_req = 0; @@ -1471,7 +1504,7 @@ static inline int ipath_rc_rcv_error(struct ipath_ibdev *dev, break; } qp->r_nak_state = 0; - tasklet_hi_schedule(&qp->s_task); + ipath_schedule_send(qp); unlock_done: spin_unlock_irqrestore(&qp->s_lock, flags); @@ -1503,18 +1536,15 @@ void ipath_rc_error(struct ipath_qp *qp, enum ib_wc_status err) static inline void ipath_update_ack_queue(struct ipath_qp *qp, unsigned n) { - unsigned long flags; unsigned next; next = n + 1; if (next > IPATH_MAX_RDMA_ATOMIC) next = 0; - spin_lock_irqsave(&qp->s_lock, flags); if (n == qp->s_tail_ack_queue) { qp->s_tail_ack_queue = next; qp->s_ack_state = OP(ACKNOWLEDGE); } - spin_unlock_irqrestore(&qp->s_lock, flags); } /** @@ -1543,6 +1573,7 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, int diff; struct ib_reth *reth; int header_in_data; + unsigned long flags; /* Validate the SLID. See Ch. 9.6.1.5 */ if (unlikely(be16_to_cpu(hdr->lrh[3]) != qp->remote_ah_attr.dlid)) @@ -1690,9 +1721,8 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, goto nack_inv; ipath_copy_sge(&qp->r_sge, data, tlen); qp->r_msn++; - if (!qp->r_wrid_valid) + if (!test_and_clear_bit(IPATH_R_WRID_VALID, &qp->r_aflags)) break; - qp->r_wrid_valid = 0; wc.wr_id = qp->r_wr_id; wc.status = IB_WC_SUCCESS; if (opcode == OP(RDMA_WRITE_LAST_WITH_IMMEDIATE) || @@ -1764,9 +1794,13 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, next = qp->r_head_ack_queue + 1; if (next > IPATH_MAX_RDMA_ATOMIC) next = 0; + spin_lock_irqsave(&qp->s_lock, flags); + /* Double check we can process this while holding the s_lock. */ + if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_RECV_OK)) + goto unlock; if (unlikely(next == qp->s_tail_ack_queue)) { if (!qp->s_ack_queue[next].sent) - goto nack_inv; + goto nack_inv_unlck; ipath_update_ack_queue(qp, next); } e = &qp->s_ack_queue[qp->r_head_ack_queue]; @@ -1787,7 +1821,7 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, ok = ipath_rkey_ok(qp, &e->rdma_sge, len, vaddr, rkey, IB_ACCESS_REMOTE_READ); if (unlikely(!ok)) - goto nack_acc; + goto nack_acc_unlck; /* * Update the next expected PSN. We add 1 later * below, so only add the remainder here. @@ -1814,13 +1848,12 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, qp->r_psn++; qp->r_state = opcode; qp->r_nak_state = 0; - barrier(); qp->r_head_ack_queue = next; - /* Call ipath_do_rc_send() in another thread. */ - tasklet_hi_schedule(&qp->s_task); + /* Schedule the send tasklet. */ + ipath_schedule_send(qp); - goto done; + goto unlock; } case OP(COMPARE_SWAP): @@ -1839,9 +1872,13 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, next = qp->r_head_ack_queue + 1; if (next > IPATH_MAX_RDMA_ATOMIC) next = 0; + spin_lock_irqsave(&qp->s_lock, flags); + /* Double check we can process this while holding the s_lock. */ + if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_RECV_OK)) + goto unlock; if (unlikely(next == qp->s_tail_ack_queue)) { if (!qp->s_ack_queue[next].sent) - goto nack_inv; + goto nack_inv_unlck; ipath_update_ack_queue(qp, next); } if (!header_in_data) @@ -1851,13 +1888,13 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, vaddr = ((u64) be32_to_cpu(ateth->vaddr[0]) << 32) | be32_to_cpu(ateth->vaddr[1]); if (unlikely(vaddr & (sizeof(u64) - 1))) - goto nack_inv; + goto nack_inv_unlck; rkey = be32_to_cpu(ateth->rkey); /* Check rkey & NAK */ if (unlikely(!ipath_rkey_ok(qp, &qp->r_sge, sizeof(u64), vaddr, rkey, IB_ACCESS_REMOTE_ATOMIC))) - goto nack_acc; + goto nack_acc_unlck; /* Perform atomic OP and save result. */ maddr = (atomic64_t *) qp->r_sge.sge.vaddr; sdata = be64_to_cpu(ateth->swap_data); @@ -1874,13 +1911,12 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, qp->r_psn++; qp->r_state = opcode; qp->r_nak_state = 0; - barrier(); qp->r_head_ack_queue = next; - /* Call ipath_do_rc_send() in another thread. */ - tasklet_hi_schedule(&qp->s_task); + /* Schedule the send tasklet. */ + ipath_schedule_send(qp); - goto done; + goto unlock; } default: @@ -1901,19 +1937,26 @@ rnr_nak: qp->r_ack_psn = qp->r_psn; goto send_ack; +nack_inv_unlck: + spin_unlock_irqrestore(&qp->s_lock, flags); nack_inv: ipath_rc_error(qp, IB_WC_LOC_QP_OP_ERR); qp->r_nak_state = IB_NAK_INVALID_REQUEST; qp->r_ack_psn = qp->r_psn; goto send_ack; +nack_acc_unlck: + spin_unlock_irqrestore(&qp->s_lock, flags); nack_acc: ipath_rc_error(qp, IB_WC_LOC_PROT_ERR); qp->r_nak_state = IB_NAK_REMOTE_ACCESS_ERROR; qp->r_ack_psn = qp->r_psn; send_ack: send_rc_ack(qp); + goto done; +unlock: + spin_unlock_irqrestore(&qp->s_lock, flags); done: return; } -- cgit v1.2.3