From ab8403c424a35364a3a2c753f7c5917fcbb4d809 Mon Sep 17 00:00:00 2001 From: Roland Dreier Date: Sun, 14 Oct 2007 20:40:27 -0700 Subject: IB/mthca: Avoid alignment traps when writing doorbells Architectures such as ia64 see alignment traps when doing a 64-bit read from __be32 doorbell[2] arrays to do doorbell writes in mthca_write64(). Fix this by just passing the two halves of the doorbell value into mthca_write64(). This actually improves the generated code by allowing the compiler to see what's going on better. Signed-off-by: Roland Dreier --- drivers/infiniband/hw/mthca/mthca_cq.c | 53 ++++++++++------------------ drivers/infiniband/hw/mthca/mthca_doorbell.h | 13 ++++--- drivers/infiniband/hw/mthca/mthca_eq.c | 21 ++--------- drivers/infiniband/hw/mthca/mthca_qp.c | 45 ++++++++--------------- drivers/infiniband/hw/mthca/mthca_srq.c | 11 ++---- 5 files changed, 47 insertions(+), 96 deletions(-) (limited to 'drivers/infiniband/hw/mthca') diff --git a/drivers/infiniband/hw/mthca/mthca_cq.c b/drivers/infiniband/hw/mthca/mthca_cq.c index be6e1e03bda..6bd9f139334 100644 --- a/drivers/infiniband/hw/mthca/mthca_cq.c +++ b/drivers/infiniband/hw/mthca/mthca_cq.c @@ -204,16 +204,11 @@ static void dump_cqe(struct mthca_dev *dev, void *cqe_ptr) static inline void update_cons_index(struct mthca_dev *dev, struct mthca_cq *cq, int incr) { - __be32 doorbell[2]; - if (mthca_is_memfree(dev)) { *cq->set_ci_db = cpu_to_be32(cq->cons_index); wmb(); } else { - doorbell[0] = cpu_to_be32(MTHCA_TAVOR_CQ_DB_INC_CI | cq->cqn); - doorbell[1] = cpu_to_be32(incr - 1); - - mthca_write64(doorbell, + mthca_write64(MTHCA_TAVOR_CQ_DB_INC_CI | cq->cqn, incr - 1, dev->kar + MTHCA_CQ_DOORBELL, MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock)); /* @@ -731,17 +726,12 @@ repoll: int mthca_tavor_arm_cq(struct ib_cq *cq, enum ib_cq_notify_flags flags) { - __be32 doorbell[2]; + u32 dbhi = ((flags & IB_CQ_SOLICITED_MASK) == IB_CQ_SOLICITED ? + MTHCA_TAVOR_CQ_DB_REQ_NOT_SOL : + MTHCA_TAVOR_CQ_DB_REQ_NOT) | + to_mcq(cq)->cqn; - doorbell[0] = cpu_to_be32(((flags & IB_CQ_SOLICITED_MASK) == - IB_CQ_SOLICITED ? - MTHCA_TAVOR_CQ_DB_REQ_NOT_SOL : - MTHCA_TAVOR_CQ_DB_REQ_NOT) | - to_mcq(cq)->cqn); - doorbell[1] = (__force __be32) 0xffffffff; - - mthca_write64(doorbell, - to_mdev(cq->device)->kar + MTHCA_CQ_DOORBELL, + mthca_write64(dbhi, 0xffffffff, to_mdev(cq->device)->kar + MTHCA_CQ_DOORBELL, MTHCA_GET_DOORBELL_LOCK(&to_mdev(cq->device)->doorbell_lock)); return 0; @@ -750,19 +740,16 @@ int mthca_tavor_arm_cq(struct ib_cq *cq, enum ib_cq_notify_flags flags) int mthca_arbel_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags) { struct mthca_cq *cq = to_mcq(ibcq); - __be32 doorbell[2]; - u32 sn; - __be32 ci; - - sn = cq->arm_sn & 3; - ci = cpu_to_be32(cq->cons_index); + __be32 db_rec[2]; + u32 dbhi; + u32 sn = cq->arm_sn & 3; - doorbell[0] = ci; - doorbell[1] = cpu_to_be32((cq->cqn << 8) | (2 << 5) | (sn << 3) | - ((flags & IB_CQ_SOLICITED_MASK) == - IB_CQ_SOLICITED ? 1 : 2)); + db_rec[0] = cpu_to_be32(cq->cons_index); + db_rec[1] = cpu_to_be32((cq->cqn << 8) | (2 << 5) | (sn << 3) | + ((flags & IB_CQ_SOLICITED_MASK) == + IB_CQ_SOLICITED ? 1 : 2)); - mthca_write_db_rec(doorbell, cq->arm_db); + mthca_write_db_rec(db_rec, cq->arm_db); /* * Make sure that the doorbell record in host memory is @@ -770,14 +757,12 @@ int mthca_arbel_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags) */ wmb(); - doorbell[0] = cpu_to_be32((sn << 28) | - ((flags & IB_CQ_SOLICITED_MASK) == IB_CQ_SOLICITED ? - MTHCA_ARBEL_CQ_DB_REQ_NOT_SOL : - MTHCA_ARBEL_CQ_DB_REQ_NOT) | - cq->cqn); - doorbell[1] = ci; + dbhi = (sn << 28) | + ((flags & IB_CQ_SOLICITED_MASK) == IB_CQ_SOLICITED ? + MTHCA_ARBEL_CQ_DB_REQ_NOT_SOL : + MTHCA_ARBEL_CQ_DB_REQ_NOT) | cq->cqn; - mthca_write64(doorbell, + mthca_write64(dbhi, cq->cons_index, to_mdev(ibcq->device)->kar + MTHCA_CQ_DOORBELL, MTHCA_GET_DOORBELL_LOCK(&to_mdev(ibcq->device)->doorbell_lock)); diff --git a/drivers/infiniband/hw/mthca/mthca_doorbell.h b/drivers/infiniband/hw/mthca/mthca_doorbell.h index dd9a44d170c..b374dc395be 100644 --- a/drivers/infiniband/hw/mthca/mthca_doorbell.h +++ b/drivers/infiniband/hw/mthca/mthca_doorbell.h @@ -58,10 +58,10 @@ static inline void mthca_write64_raw(__be64 val, void __iomem *dest) __raw_writeq((__force u64) val, dest); } -static inline void mthca_write64(__be32 val[2], void __iomem *dest, +static inline void mthca_write64(u32 hi, u32 lo, void __iomem *dest, spinlock_t *doorbell_lock) { - __raw_writeq(*(u64 *) val, dest); + __raw_writeq((__force u64) cpu_to_be64((u64) hi << 32 | lo), dest); } static inline void mthca_write_db_rec(__be32 val[2], __be32 *db) @@ -87,14 +87,17 @@ static inline void mthca_write64_raw(__be64 val, void __iomem *dest) __raw_writel(((__force u32 *) &val)[1], dest + 4); } -static inline void mthca_write64(__be32 val[2], void __iomem *dest, +static inline void mthca_write64(u32 hi, u32 lo, void __iomem *dest, spinlock_t *doorbell_lock) { unsigned long flags; + hi = (__force u32) cpu_to_be32(hi); + lo = (__force u32) cpu_to_be32(lo); + spin_lock_irqsave(doorbell_lock, flags); - __raw_writel((__force u32) val[0], dest); - __raw_writel((__force u32) val[1], dest + 4); + __raw_writel(hi, dest); + __raw_writel(lo, dest + 4); spin_unlock_irqrestore(doorbell_lock, flags); } diff --git a/drivers/infiniband/hw/mthca/mthca_eq.c b/drivers/infiniband/hw/mthca/mthca_eq.c index 8592b26dc4e..b29de51b7f3 100644 --- a/drivers/infiniband/hw/mthca/mthca_eq.c +++ b/drivers/infiniband/hw/mthca/mthca_eq.c @@ -173,11 +173,6 @@ static inline u64 async_mask(struct mthca_dev *dev) static inline void tavor_set_eq_ci(struct mthca_dev *dev, struct mthca_eq *eq, u32 ci) { - __be32 doorbell[2]; - - doorbell[0] = cpu_to_be32(MTHCA_EQ_DB_SET_CI | eq->eqn); - doorbell[1] = cpu_to_be32(ci & (eq->nent - 1)); - /* * This barrier makes sure that all updates to ownership bits * done by set_eqe_hw() hit memory before the consumer index @@ -187,7 +182,7 @@ static inline void tavor_set_eq_ci(struct mthca_dev *dev, struct mthca_eq *eq, u * having set_eqe_hw() overwrite the owner field. */ wmb(); - mthca_write64(doorbell, + mthca_write64(MTHCA_EQ_DB_SET_CI | eq->eqn, ci & (eq->nent - 1), dev->kar + MTHCA_EQ_DOORBELL, MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock)); } @@ -212,12 +207,7 @@ static inline void set_eq_ci(struct mthca_dev *dev, struct mthca_eq *eq, u32 ci) static inline void tavor_eq_req_not(struct mthca_dev *dev, int eqn) { - __be32 doorbell[2]; - - doorbell[0] = cpu_to_be32(MTHCA_EQ_DB_REQ_NOT | eqn); - doorbell[1] = 0; - - mthca_write64(doorbell, + mthca_write64(MTHCA_EQ_DB_REQ_NOT | eqn, 0, dev->kar + MTHCA_EQ_DOORBELL, MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock)); } @@ -230,12 +220,7 @@ static inline void arbel_eq_req_not(struct mthca_dev *dev, u32 eqn_mask) static inline void disarm_cq(struct mthca_dev *dev, int eqn, int cqn) { if (!mthca_is_memfree(dev)) { - __be32 doorbell[2]; - - doorbell[0] = cpu_to_be32(MTHCA_EQ_DB_DISARM_CQ | eqn); - doorbell[1] = cpu_to_be32(cqn); - - mthca_write64(doorbell, + mthca_write64(MTHCA_EQ_DB_DISARM_CQ | eqn, cqn, dev->kar + MTHCA_EQ_DOORBELL, MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock)); } diff --git a/drivers/infiniband/hw/mthca/mthca_qp.c b/drivers/infiniband/hw/mthca/mthca_qp.c index df01b2026a6..0e5461c6573 100644 --- a/drivers/infiniband/hw/mthca/mthca_qp.c +++ b/drivers/infiniband/hw/mthca/mthca_qp.c @@ -1799,15 +1799,11 @@ int mthca_tavor_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, out: if (likely(nreq)) { - __be32 doorbell[2]; - - doorbell[0] = cpu_to_be32(((qp->sq.next_ind << qp->sq.wqe_shift) + - qp->send_wqe_offset) | f0 | op0); - doorbell[1] = cpu_to_be32((qp->qpn << 8) | size0); - wmb(); - mthca_write64(doorbell, + mthca_write64(((qp->sq.next_ind << qp->sq.wqe_shift) + + qp->send_wqe_offset) | f0 | op0, + (qp->qpn << 8) | size0, dev->kar + MTHCA_SEND_DOORBELL, MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock)); /* @@ -1829,7 +1825,6 @@ int mthca_tavor_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr, { struct mthca_dev *dev = to_mdev(ibqp->device); struct mthca_qp *qp = to_mqp(ibqp); - __be32 doorbell[2]; unsigned long flags; int err = 0; int nreq; @@ -1907,13 +1902,10 @@ int mthca_tavor_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr, if (unlikely(nreq == MTHCA_TAVOR_MAX_WQES_PER_RECV_DB)) { nreq = 0; - doorbell[0] = cpu_to_be32((qp->rq.next_ind << qp->rq.wqe_shift) | size0); - doorbell[1] = cpu_to_be32(qp->qpn << 8); - wmb(); - mthca_write64(doorbell, - dev->kar + MTHCA_RECEIVE_DOORBELL, + mthca_write64((qp->rq.next_ind << qp->rq.wqe_shift) | size0, + qp->qpn << 8, dev->kar + MTHCA_RECEIVE_DOORBELL, MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock)); qp->rq.next_ind = ind; @@ -1923,13 +1915,10 @@ int mthca_tavor_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr, out: if (likely(nreq)) { - doorbell[0] = cpu_to_be32((qp->rq.next_ind << qp->rq.wqe_shift) | size0); - doorbell[1] = cpu_to_be32((qp->qpn << 8) | nreq); - wmb(); - mthca_write64(doorbell, - dev->kar + MTHCA_RECEIVE_DOORBELL, + mthca_write64((qp->rq.next_ind << qp->rq.wqe_shift) | size0, + qp->qpn << 8 | nreq, dev->kar + MTHCA_RECEIVE_DOORBELL, MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock)); } @@ -1951,7 +1940,7 @@ int mthca_arbel_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, { struct mthca_dev *dev = to_mdev(ibqp->device); struct mthca_qp *qp = to_mqp(ibqp); - __be32 doorbell[2]; + u32 dbhi; void *wqe; void *prev_wqe; unsigned long flags; @@ -1981,10 +1970,8 @@ int mthca_arbel_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, if (unlikely(nreq == MTHCA_ARBEL_MAX_WQES_PER_SEND_DB)) { nreq = 0; - doorbell[0] = cpu_to_be32((MTHCA_ARBEL_MAX_WQES_PER_SEND_DB << 24) | - ((qp->sq.head & 0xffff) << 8) | - f0 | op0); - doorbell[1] = cpu_to_be32((qp->qpn << 8) | size0); + dbhi = (MTHCA_ARBEL_MAX_WQES_PER_SEND_DB << 24) | + ((qp->sq.head & 0xffff) << 8) | f0 | op0; qp->sq.head += MTHCA_ARBEL_MAX_WQES_PER_SEND_DB; @@ -2000,7 +1987,8 @@ int mthca_arbel_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, * write MMIO send doorbell. */ wmb(); - mthca_write64(doorbell, + + mthca_write64(dbhi, (qp->qpn << 8) | size0, dev->kar + MTHCA_SEND_DOORBELL, MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock)); } @@ -2154,10 +2142,7 @@ int mthca_arbel_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, out: if (likely(nreq)) { - doorbell[0] = cpu_to_be32((nreq << 24) | - ((qp->sq.head & 0xffff) << 8) | - f0 | op0); - doorbell[1] = cpu_to_be32((qp->qpn << 8) | size0); + dbhi = (nreq << 24) | ((qp->sq.head & 0xffff) << 8) | f0 | op0; qp->sq.head += nreq; @@ -2173,8 +2158,8 @@ out: * write MMIO send doorbell. */ wmb(); - mthca_write64(doorbell, - dev->kar + MTHCA_SEND_DOORBELL, + + mthca_write64(dbhi, (qp->qpn << 8) | size0, dev->kar + MTHCA_SEND_DOORBELL, MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock)); } diff --git a/drivers/infiniband/hw/mthca/mthca_srq.c b/drivers/infiniband/hw/mthca/mthca_srq.c index 3f58c11a62b..553d681f681 100644 --- a/drivers/infiniband/hw/mthca/mthca_srq.c +++ b/drivers/infiniband/hw/mthca/mthca_srq.c @@ -491,7 +491,6 @@ int mthca_tavor_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr, { struct mthca_dev *dev = to_mdev(ibsrq->device); struct mthca_srq *srq = to_msrq(ibsrq); - __be32 doorbell[2]; unsigned long flags; int err = 0; int first_ind; @@ -563,16 +562,13 @@ int mthca_tavor_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr, if (unlikely(nreq == MTHCA_TAVOR_MAX_WQES_PER_RECV_DB)) { nreq = 0; - doorbell[0] = cpu_to_be32(first_ind << srq->wqe_shift); - doorbell[1] = cpu_to_be32(srq->srqn << 8); - /* * Make sure that descriptors are written * before doorbell is rung. */ wmb(); - mthca_write64(doorbell, + mthca_write64(first_ind << srq->wqe_shift, srq->srqn << 8, dev->kar + MTHCA_RECEIVE_DOORBELL, MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock)); @@ -581,16 +577,13 @@ int mthca_tavor_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr, } if (likely(nreq)) { - doorbell[0] = cpu_to_be32(first_ind << srq->wqe_shift); - doorbell[1] = cpu_to_be32((srq->srqn << 8) | nreq); - /* * Make sure that descriptors are written before * doorbell is rung. */ wmb(); - mthca_write64(doorbell, + mthca_write64(first_ind << srq->wqe_shift, (srq->srqn << 8) | nreq, dev->kar + MTHCA_RECEIVE_DOORBELL, MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock)); } -- cgit v1.2.3