diff options
-rw-r--r-- | drivers/infiniband/hw/mlx4/qp.c | 42 | ||||
-rw-r--r-- | include/linux/mlx4/qp.h | 4 |
2 files changed, 43 insertions, 3 deletions
diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c index 355a31f9c03..28a08bdd180 100644 --- a/drivers/infiniband/hw/mlx4/qp.c +++ b/drivers/infiniband/hw/mlx4/qp.c @@ -192,6 +192,8 @@ static int send_wqe_overhead(enum ib_qp_type type) case IB_QPT_GSI: return sizeof (struct mlx4_wqe_ctrl_seg) + ALIGN(MLX4_IB_UD_HEADER_SIZE + + DIV_ROUND_UP(MLX4_IB_UD_HEADER_SIZE, + MLX4_INLINE_ALIGN) * sizeof (struct mlx4_wqe_inline_seg), sizeof (struct mlx4_wqe_data_seg)) + ALIGN(4 + @@ -1049,6 +1051,7 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr, u16 pkey; int send_size; int header_size; + int spc; int i; send_size = 0; @@ -1124,10 +1127,43 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr, printk("\n"); } - inl->byte_count = cpu_to_be32(1 << 31 | header_size); - memcpy(inl + 1, sqp->header_buf, header_size); + /* + * Inline data segments may not cross a 64 byte boundary. If + * our UD header is bigger than the space available up to the + * next 64 byte boundary in the WQE, use two inline data + * segments to hold the UD header. + */ + spc = MLX4_INLINE_ALIGN - + ((unsigned long) (inl + 1) & (MLX4_INLINE_ALIGN - 1)); + if (header_size <= spc) { + inl->byte_count = cpu_to_be32(1 << 31 | header_size); + memcpy(inl + 1, sqp->header_buf, header_size); + i = 1; + } else { + inl->byte_count = cpu_to_be32(1 << 31 | spc); + memcpy(inl + 1, sqp->header_buf, spc); + + inl = (void *) (inl + 1) + spc; + memcpy(inl + 1, sqp->header_buf + spc, header_size - spc); + /* + * Need a barrier here to make sure all the data is + * visible before the byte_count field is set. + * Otherwise the HCA prefetcher could grab the 64-byte + * chunk with this inline segment and get a valid (!= + * 0xffffffff) byte count but stale data, and end up + * generating a packet with bad headers. + * + * The first inline segment's byte_count field doesn't + * need a barrier, because it comes after a + * control/MLX segment and therefore is at an offset + * of 16 mod 64. + */ + wmb(); + inl->byte_count = cpu_to_be32(1 << 31 | (header_size - spc)); + i = 2; + } - return ALIGN(sizeof (struct mlx4_wqe_inline_seg) + header_size, 16); + return ALIGN(i * sizeof (struct mlx4_wqe_inline_seg) + header_size, 16); } static int mlx4_wq_overflow(struct mlx4_ib_wq *wq, int nreq, struct ib_cq *ib_cq) diff --git a/include/linux/mlx4/qp.h b/include/linux/mlx4/qp.h index 9eeb61adf6a..10c57d27914 100644 --- a/include/linux/mlx4/qp.h +++ b/include/linux/mlx4/qp.h @@ -269,6 +269,10 @@ struct mlx4_wqe_data_seg { __be64 addr; }; +enum { + MLX4_INLINE_ALIGN = 64, +}; + struct mlx4_wqe_inline_seg { __be32 byte_count; }; |