From 8b8c8bca3a63073bac20f0fca178e00fdf7f5a09 Mon Sep 17 00:00:00 2001 From: Roland Dreier Date: Sat, 19 May 2007 08:51:53 -0700 Subject: IB/ipath: Fix potential deadlock with multicast spinlocks Lockdep found the following potential deadlock between mcast_lock and n_mcast_grps_lock: mcast_lock is taken from both interrupt context and process context, so spin_lock_irqsave() must be used to take it. n_mcast_grps_lock is only taken from process context, so at first it seems safe to take it with plain spin_lock(); however, it also nests inside mcast_lock, and hence we could deadlock: cpu A cpu B ipath_mcast_add(): spin_lock_irq(&mcast_lock); ipath_mcast_detach(): spin_lock(&n_mcast_grps_lock); ipath_mcast_find(): spin_lock_irqsave(&mcast_lock); spin_lock(&n_mcast_grps_lock); Fix this by using spin_lock_irq() to take n_mcast_grps_lock. Signed-off-by: Roland Dreier --- drivers/infiniband/hw/ipath/ipath_verbs_mcast.c | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/ipath/ipath_verbs_mcast.c b/drivers/infiniband/hw/ipath/ipath_verbs_mcast.c index 085e28b939e..dd691cfa507 100644 --- a/drivers/infiniband/hw/ipath/ipath_verbs_mcast.c +++ b/drivers/infiniband/hw/ipath/ipath_verbs_mcast.c @@ -165,10 +165,9 @@ static int ipath_mcast_add(struct ipath_ibdev *dev, { struct rb_node **n = &mcast_tree.rb_node; struct rb_node *pn = NULL; - unsigned long flags; int ret; - spin_lock_irqsave(&mcast_lock, flags); + spin_lock_irq(&mcast_lock); while (*n) { struct ipath_mcast *tmcast; @@ -228,7 +227,7 @@ static int ipath_mcast_add(struct ipath_ibdev *dev, ret = 0; bail: - spin_unlock_irqrestore(&mcast_lock, flags); + spin_unlock_irq(&mcast_lock); return ret; } @@ -289,17 +288,16 @@ int ipath_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) struct ipath_mcast *mcast = NULL; struct ipath_mcast_qp *p, *tmp; struct rb_node *n; - unsigned long flags; int last = 0; int ret; - spin_lock_irqsave(&mcast_lock, flags); + spin_lock_irq(&mcast_lock); /* Find the GID in the mcast table. */ n = mcast_tree.rb_node; while (1) { if (n == NULL) { - spin_unlock_irqrestore(&mcast_lock, flags); + spin_unlock_irq(&mcast_lock); ret = -EINVAL; goto bail; } @@ -334,7 +332,7 @@ int ipath_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) break; } - spin_unlock_irqrestore(&mcast_lock, flags); + spin_unlock_irq(&mcast_lock); if (p) { /* @@ -348,9 +346,9 @@ int ipath_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) atomic_dec(&mcast->refcount); wait_event(mcast->wait, !atomic_read(&mcast->refcount)); ipath_mcast_free(mcast); - spin_lock(&dev->n_mcast_grps_lock); + spin_lock_irq(&dev->n_mcast_grps_lock); dev->n_mcast_grps_allocated--; - spin_unlock(&dev->n_mcast_grps_lock); + spin_unlock_irq(&dev->n_mcast_grps_lock); } ret = 0; -- cgit v1.2.3 From bd5a6ccc0e6d8eed3047b4af0e5c1e7168869cd8 Mon Sep 17 00:00:00 2001 From: Hoang-Nam Nguyen Date: Wed, 16 May 2007 14:50:55 +0200 Subject: IB/ehca: Return proper error code if register_mr fails Set the return code of ehca_register_mr() to ENOMEM if the corresponding firmware call fails due to out of resources. Some other error codes were explicitly mapped to EINVAL -- just remove those cases so they get mapped to the default case, which already returns EINVAL anyway. Signed-off-by: Hoang-Nam Nguyen Signed-off-by: Roland Dreier --- drivers/infiniband/hw/ehca/ehca_mrmw.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/ehca/ehca_mrmw.c b/drivers/infiniband/hw/ehca/ehca_mrmw.c index 84c5bb49856..add79bd44e3 100644 --- a/drivers/infiniband/hw/ehca/ehca_mrmw.c +++ b/drivers/infiniband/hw/ehca/ehca_mrmw.c @@ -2050,13 +2050,10 @@ int ehca_mrmw_map_hrc_alloc(const u64 hipz_rc) switch (hipz_rc) { case H_SUCCESS: /* successful completion */ return 0; - case H_ADAPTER_PARM: /* invalid adapter handle */ - case H_RT_PARM: /* invalid resource type */ case H_NOT_ENOUGH_RESOURCES: /* insufficient resources */ - case H_MLENGTH_PARM: /* invalid memory length */ - case H_MEM_ACCESS_PARM: /* invalid access controls */ case H_CONSTRAINED: /* resource constraint */ - return -EINVAL; + case H_NO_MEM: + return -ENOMEM; case H_BUSY: /* long busy */ return -EBUSY; default: -- cgit v1.2.3 From de57c9f102ad7bdc8afa5a1560748cf4f1c18b8e Mon Sep 17 00:00:00 2001 From: Ali Ayoub Date: Thu, 17 May 2007 20:58:30 +0300 Subject: IB/mthca: Fix use-after-free on device restart Signed-off-by: Michael S. Tsirkin Signed-off-by: Roland Dreier --- drivers/infiniband/hw/mthca/mthca_main.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/mthca/mthca_main.c b/drivers/infiniband/hw/mthca/mthca_main.c index 773145e2994..aa563e61de6 100644 --- a/drivers/infiniband/hw/mthca/mthca_main.c +++ b/drivers/infiniband/hw/mthca/mthca_main.c @@ -1250,12 +1250,14 @@ static void __mthca_remove_one(struct pci_dev *pdev) int __mthca_restart_one(struct pci_dev *pdev) { struct mthca_dev *mdev; + int hca_type; mdev = pci_get_drvdata(pdev); if (!mdev) return -ENODEV; + hca_type = mdev->hca_type; __mthca_remove_one(pdev); - return __mthca_init_one(pdev, mdev->hca_type); + return __mthca_init_one(pdev, hca_type); } static int __devinit mthca_init_one(struct pci_dev *pdev, -- cgit v1.2.3 From 1f8f7b7a7b885a0041a21b3d93c507269baf57c8 Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Thu, 17 May 2007 16:32:39 +0300 Subject: IB/mlx4: Fix check of max_qp_dest_rdma in modify QP max_qp_dest_rdma is already in natural units - no need to shift. This was discovered by a test that deliberately requests more outstanding atomic operation than the device supports. Found by Sagi Rotem at Mellanox. Signed-off-by: Eli Cohen Signed-off-by: Roland Dreier --- drivers/infiniband/hw/mlx4/qp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c index 5cd70690845..9c362fa7146 100644 --- a/drivers/infiniband/hw/mlx4/qp.c +++ b/drivers/infiniband/hw/mlx4/qp.c @@ -694,7 +694,7 @@ int mlx4_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, } if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC && - attr->max_dest_rd_atomic > 1 << dev->dev->caps.max_qp_dest_rdma) { + attr->max_dest_rd_atomic > dev->dev->caps.max_qp_dest_rdma) { goto out; } -- cgit v1.2.3 From 3f37cae6944de9d380c83f717f73d91ec6255d98 Mon Sep 17 00:00:00 2001 From: Rolf Manderscheid Date: Thu, 17 May 2007 09:45:48 -0600 Subject: IB/mthca: Set GRH:HopLimit when building MLX headers Global CM packets used by rmda_cm were being sent with a GRH:hopLimit of zero, causing them to be dropped by the router. The problem is a missing initialization of the hop_limit field in mthca_read_ah(), which was called by build_mlx_header() when sending a MAD on QP1. Signed-off-by: Rolf Manderscheid Signed-off-by: Roland Dreier --- drivers/infiniband/hw/mthca/mthca_av.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/mthca/mthca_av.c b/drivers/infiniband/hw/mthca/mthca_av.c index 27caf3b0648..4b111a852ff 100644 --- a/drivers/infiniband/hw/mthca/mthca_av.c +++ b/drivers/infiniband/hw/mthca/mthca_av.c @@ -279,6 +279,7 @@ int mthca_read_ah(struct mthca_dev *dev, struct mthca_ah *ah, (be32_to_cpu(ah->av->sl_tclass_flowlabel) >> 20) & 0xff; header->grh.flow_label = ah->av->sl_tclass_flowlabel & cpu_to_be32(0xfffff); + header->grh.hop_limit = ah->av->hop_limit; ib_get_cached_gid(&dev->ib_dev, be32_to_cpu(ah->av->port_pd) >> 24, ah->av->gid_index % dev->limits.gid_table_len, -- cgit v1.2.3 From 1526130351b31c792ced90c6c5ee08df955696c1 Mon Sep 17 00:00:00 2001 From: Roland Dreier Date: Sat, 19 May 2007 08:51:57 -0700 Subject: IB/mlx4: Set GRH:HopLimit when sending globally routed MADs This is the same issue discovered in mthca by Rolf Manderscheid . Signed-off-by: Roland Dreier --- drivers/infiniband/hw/mlx4/qp.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c index 9c362fa7146..0cf8b95128f 100644 --- a/drivers/infiniband/hw/mlx4/qp.c +++ b/drivers/infiniband/hw/mlx4/qp.c @@ -952,6 +952,7 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr, (be32_to_cpu(ah->av.sl_tclass_flowlabel) >> 20) & 0xff; sqp->ud_header.grh.flow_label = ah->av.sl_tclass_flowlabel & cpu_to_be32(0xfffff); + sqp->ud_header.grh.hop_limit = ah->av.hop_limit; ib_get_cached_gid(ib_dev, be32_to_cpu(ah->av.port_pd) >> 24, ah->av.gid_index, &sqp->ud_header.grh.source_gid); memcpy(sqp->ud_header.grh.destination_gid.raw, -- cgit v1.2.3 From b18aad7150c85cc86a66be8a1c744b63b41b36e0 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Mon, 14 May 2007 07:26:51 +0300 Subject: IB/mthca: Fix RESET to ERROR transition According to the IB spec, a QP can be moved from RESET to the ERROR state, but mthca firmware does not support this and returns an error if we try. Work around this FW limitation by moving the QP from RESET to INIT with dummy parameters and then transitioning from INIT to ERROR. Signed-off-by: Michael S. Tsirkin Signed-off-by: Roland Dreier --- drivers/infiniband/hw/mthca/mthca_qp.c | 158 ++++++++++++++++++++------------- 1 file changed, 98 insertions(+), 60 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/mthca/mthca_qp.c b/drivers/infiniband/hw/mthca/mthca_qp.c index 72fabb822f1..a20e2f0c9a5 100644 --- a/drivers/infiniband/hw/mthca/mthca_qp.c +++ b/drivers/infiniband/hw/mthca/mthca_qp.c @@ -295,7 +295,7 @@ static int to_mthca_st(int transport) } } -static void store_attrs(struct mthca_sqp *sqp, struct ib_qp_attr *attr, +static void store_attrs(struct mthca_sqp *sqp, const struct ib_qp_attr *attr, int attr_mask) { if (attr_mask & IB_QP_PKEY_INDEX) @@ -327,7 +327,7 @@ static void init_port(struct mthca_dev *dev, int port) mthca_warn(dev, "INIT_IB returned status %02x.\n", status); } -static __be32 get_hw_access_flags(struct mthca_qp *qp, struct ib_qp_attr *attr, +static __be32 get_hw_access_flags(struct mthca_qp *qp, const struct ib_qp_attr *attr, int attr_mask) { u8 dest_rd_atomic; @@ -510,7 +510,7 @@ out: return err; } -static int mthca_path_set(struct mthca_dev *dev, struct ib_ah_attr *ah, +static int mthca_path_set(struct mthca_dev *dev, const struct ib_ah_attr *ah, struct mthca_qp_path *path, u8 port) { path->g_mylmc = ah->src_path_bits & 0x7f; @@ -538,12 +538,12 @@ static int mthca_path_set(struct mthca_dev *dev, struct ib_ah_attr *ah, return 0; } -int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask, - struct ib_udata *udata) +static int __mthca_modify_qp(struct ib_qp *ibqp, + const struct ib_qp_attr *attr, int attr_mask, + enum ib_qp_state cur_state, enum ib_qp_state new_state) { struct mthca_dev *dev = to_mdev(ibqp->device); struct mthca_qp *qp = to_mqp(ibqp); - enum ib_qp_state cur_state, new_state; struct mthca_mailbox *mailbox; struct mthca_qp_param *qp_param; struct mthca_qp_context *qp_context; @@ -551,60 +551,6 @@ int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask, u8 status; int err = -EINVAL; - mutex_lock(&qp->mutex); - - if (attr_mask & IB_QP_CUR_STATE) { - cur_state = attr->cur_qp_state; - } else { - spin_lock_irq(&qp->sq.lock); - spin_lock(&qp->rq.lock); - cur_state = qp->state; - spin_unlock(&qp->rq.lock); - spin_unlock_irq(&qp->sq.lock); - } - - new_state = attr_mask & IB_QP_STATE ? attr->qp_state : cur_state; - - if (!ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type, attr_mask)) { - mthca_dbg(dev, "Bad QP transition (transport %d) " - "%d->%d with attr 0x%08x\n", - qp->transport, cur_state, new_state, - attr_mask); - goto out; - } - - if (cur_state == new_state && cur_state == IB_QPS_RESET) { - err = 0; - goto out; - } - - if ((attr_mask & IB_QP_PKEY_INDEX) && - attr->pkey_index >= dev->limits.pkey_table_len) { - mthca_dbg(dev, "P_Key index (%u) too large. max is %d\n", - attr->pkey_index, dev->limits.pkey_table_len-1); - goto out; - } - - if ((attr_mask & IB_QP_PORT) && - (attr->port_num == 0 || attr->port_num > dev->limits.num_ports)) { - mthca_dbg(dev, "Port number (%u) is invalid\n", attr->port_num); - goto out; - } - - if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC && - attr->max_rd_atomic > dev->limits.max_qp_init_rdma) { - mthca_dbg(dev, "Max rdma_atomic as initiator %u too large (max is %d)\n", - attr->max_rd_atomic, dev->limits.max_qp_init_rdma); - goto out; - } - - if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC && - attr->max_dest_rd_atomic > 1 << dev->qp_table.rdb_shift) { - mthca_dbg(dev, "Max rdma_atomic as responder %u too large (max %d)\n", - attr->max_dest_rd_atomic, 1 << dev->qp_table.rdb_shift); - goto out; - } - mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL); if (IS_ERR(mailbox)) { err = PTR_ERR(mailbox); @@ -891,6 +837,98 @@ int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask, out_mailbox: mthca_free_mailbox(dev, mailbox); +out: + return err; +} + +static const struct ib_qp_attr dummy_init_attr = { .port_num = 1 }; +static const int dummy_init_attr_mask[] = { + [IB_QPT_UD] = (IB_QP_PKEY_INDEX | + IB_QP_PORT | + IB_QP_QKEY), + [IB_QPT_UC] = (IB_QP_PKEY_INDEX | + IB_QP_PORT | + IB_QP_ACCESS_FLAGS), + [IB_QPT_RC] = (IB_QP_PKEY_INDEX | + IB_QP_PORT | + IB_QP_ACCESS_FLAGS), + [IB_QPT_SMI] = (IB_QP_PKEY_INDEX | + IB_QP_QKEY), + [IB_QPT_GSI] = (IB_QP_PKEY_INDEX | + IB_QP_QKEY), +}; + +int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask, + struct ib_udata *udata) +{ + struct mthca_dev *dev = to_mdev(ibqp->device); + struct mthca_qp *qp = to_mqp(ibqp); + enum ib_qp_state cur_state, new_state; + int err = -EINVAL; + + mutex_lock(&qp->mutex); + if (attr_mask & IB_QP_CUR_STATE) { + cur_state = attr->cur_qp_state; + } else { + spin_lock_irq(&qp->sq.lock); + spin_lock(&qp->rq.lock); + cur_state = qp->state; + spin_unlock(&qp->rq.lock); + spin_unlock_irq(&qp->sq.lock); + } + + new_state = attr_mask & IB_QP_STATE ? attr->qp_state : cur_state; + + if (!ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type, attr_mask)) { + mthca_dbg(dev, "Bad QP transition (transport %d) " + "%d->%d with attr 0x%08x\n", + qp->transport, cur_state, new_state, + attr_mask); + goto out; + } + + if ((attr_mask & IB_QP_PKEY_INDEX) && + attr->pkey_index >= dev->limits.pkey_table_len) { + mthca_dbg(dev, "P_Key index (%u) too large. max is %d\n", + attr->pkey_index, dev->limits.pkey_table_len-1); + goto out; + } + + if ((attr_mask & IB_QP_PORT) && + (attr->port_num == 0 || attr->port_num > dev->limits.num_ports)) { + mthca_dbg(dev, "Port number (%u) is invalid\n", attr->port_num); + goto out; + } + + if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC && + attr->max_rd_atomic > dev->limits.max_qp_init_rdma) { + mthca_dbg(dev, "Max rdma_atomic as initiator %u too large (max is %d)\n", + attr->max_rd_atomic, dev->limits.max_qp_init_rdma); + goto out; + } + + if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC && + attr->max_dest_rd_atomic > 1 << dev->qp_table.rdb_shift) { + mthca_dbg(dev, "Max rdma_atomic as responder %u too large (max %d)\n", + attr->max_dest_rd_atomic, 1 << dev->qp_table.rdb_shift); + goto out; + } + + if (cur_state == new_state && cur_state == IB_QPS_RESET) { + err = 0; + goto out; + } + + if (cur_state == IB_QPS_RESET && new_state == IB_QPS_ERR) { + err = __mthca_modify_qp(ibqp, &dummy_init_attr, + dummy_init_attr_mask[ibqp->qp_type], + IB_QPS_RESET, IB_QPS_INIT); + if (err) + goto out; + cur_state = IB_QPS_INIT; + } + + err = __mthca_modify_qp(ibqp, attr, attr_mask, cur_state, new_state); out: mutex_unlock(&qp->mutex); -- cgit v1.2.3 From 65adfa911a3522c1e40e55afd472dd571dc2431b Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Mon, 14 May 2007 07:26:51 +0300 Subject: IB/mlx4: Fix RESET to RESET and RESET to ERROR transitions According to the IB spec, a QP can be moved from RESET back to RESET or to the ERROR state, but mlx4 firmware does not support this and returns an error if we try. Fix the RESET to RESET transition by just returning 0 without doing anything, and fix RESET to ERROR by moving the QP from RESET to INIT with dummy parameters and then transitioning from INIT to ERROR. Signed-off-by: Michael S. Tsirkin Signed-off-by: Roland Dreier --- drivers/infiniband/hw/mlx4/qp.c | 115 ++++++++++++++++++++++++++++------------ 1 file changed, 80 insertions(+), 35 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c index 0cf8b95128f..bd28af5753d 100644 --- a/drivers/infiniband/hw/mlx4/qp.c +++ b/drivers/infiniband/hw/mlx4/qp.c @@ -573,7 +573,7 @@ static int to_mlx4_st(enum ib_qp_type type) } } -static __be32 to_mlx4_access_flags(struct mlx4_ib_qp *qp, struct ib_qp_attr *attr, +static __be32 to_mlx4_access_flags(struct mlx4_ib_qp *qp, const struct ib_qp_attr *attr, int attr_mask) { u8 dest_rd_atomic; @@ -603,7 +603,7 @@ static __be32 to_mlx4_access_flags(struct mlx4_ib_qp *qp, struct ib_qp_attr *att return cpu_to_be32(hw_access_flags); } -static void store_sqp_attrs(struct mlx4_ib_sqp *sqp, struct ib_qp_attr *attr, +static void store_sqp_attrs(struct mlx4_ib_sqp *sqp, const struct ib_qp_attr *attr, int attr_mask) { if (attr_mask & IB_QP_PKEY_INDEX) @@ -619,7 +619,7 @@ static void mlx4_set_sched(struct mlx4_qp_path *path, u8 port) path->sched_queue = (path->sched_queue & 0xbf) | ((port - 1) << 6); } -static int mlx4_set_path(struct mlx4_ib_dev *dev, struct ib_ah_attr *ah, +static int mlx4_set_path(struct mlx4_ib_dev *dev, const struct ib_ah_attr *ah, struct mlx4_qp_path *path, u8 port) { path->grh_mylmc = ah->src_path_bits & 0x7f; @@ -655,14 +655,14 @@ static int mlx4_set_path(struct mlx4_ib_dev *dev, struct ib_ah_attr *ah, return 0; } -int mlx4_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, - int attr_mask, struct ib_udata *udata) +static int __mlx4_ib_modify_qp(struct ib_qp *ibqp, + const struct ib_qp_attr *attr, int attr_mask, + enum ib_qp_state cur_state, enum ib_qp_state new_state) { struct mlx4_ib_dev *dev = to_mdev(ibqp->device); struct mlx4_ib_qp *qp = to_mqp(ibqp); struct mlx4_qp_context *context; enum mlx4_qp_optpar optpar = 0; - enum ib_qp_state cur_state, new_state; int sqd_event; int err = -EINVAL; @@ -670,34 +670,6 @@ int mlx4_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, if (!context) return -ENOMEM; - mutex_lock(&qp->mutex); - - cur_state = attr_mask & IB_QP_CUR_STATE ? attr->cur_qp_state : qp->state; - new_state = attr_mask & IB_QP_STATE ? attr->qp_state : cur_state; - - if (!ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type, attr_mask)) - goto out; - - if ((attr_mask & IB_QP_PKEY_INDEX) && - attr->pkey_index >= dev->dev->caps.pkey_table_len) { - goto out; - } - - if ((attr_mask & IB_QP_PORT) && - (attr->port_num == 0 || attr->port_num > dev->dev->caps.num_ports)) { - goto out; - } - - if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC && - attr->max_rd_atomic > dev->dev->caps.max_qp_init_rdma) { - goto out; - } - - if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC && - attr->max_dest_rd_atomic > dev->dev->caps.max_qp_dest_rdma) { - goto out; - } - context->flags = cpu_to_be32((to_mlx4_state(new_state) << 28) | (to_mlx4_st(ibqp->qp_type) << 16)); context->flags |= cpu_to_be32(1 << 8); /* DE? */ @@ -920,11 +892,84 @@ int mlx4_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, } out: - mutex_unlock(&qp->mutex); kfree(context); return err; } +static const struct ib_qp_attr mlx4_ib_qp_attr = { .port_num = 1 }; +static const int mlx4_ib_qp_attr_mask_table[IB_QPT_UD + 1] = { + [IB_QPT_UD] = (IB_QP_PKEY_INDEX | + IB_QP_PORT | + IB_QP_QKEY), + [IB_QPT_UC] = (IB_QP_PKEY_INDEX | + IB_QP_PORT | + IB_QP_ACCESS_FLAGS), + [IB_QPT_RC] = (IB_QP_PKEY_INDEX | + IB_QP_PORT | + IB_QP_ACCESS_FLAGS), + [IB_QPT_SMI] = (IB_QP_PKEY_INDEX | + IB_QP_QKEY), + [IB_QPT_GSI] = (IB_QP_PKEY_INDEX | + IB_QP_QKEY), +}; + +int mlx4_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, + int attr_mask, struct ib_udata *udata) +{ + struct mlx4_ib_dev *dev = to_mdev(ibqp->device); + struct mlx4_ib_qp *qp = to_mqp(ibqp); + enum ib_qp_state cur_state, new_state; + int err = -EINVAL; + + mutex_lock(&qp->mutex); + + cur_state = attr_mask & IB_QP_CUR_STATE ? attr->cur_qp_state : qp->state; + new_state = attr_mask & IB_QP_STATE ? attr->qp_state : cur_state; + + if (!ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type, attr_mask)) + goto out; + + if ((attr_mask & IB_QP_PKEY_INDEX) && + attr->pkey_index >= dev->dev->caps.pkey_table_len) { + goto out; + } + + if ((attr_mask & IB_QP_PORT) && + (attr->port_num == 0 || attr->port_num > dev->dev->caps.num_ports)) { + goto out; + } + + if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC && + attr->max_rd_atomic > dev->dev->caps.max_qp_init_rdma) { + goto out; + } + + if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC && + attr->max_dest_rd_atomic > dev->dev->caps.max_qp_dest_rdma) { + goto out; + } + + if (cur_state == new_state && cur_state == IB_QPS_RESET) { + err = 0; + goto out; + } + + if (cur_state == IB_QPS_RESET && new_state == IB_QPS_ERR) { + err = __mlx4_ib_modify_qp(ibqp, &mlx4_ib_qp_attr, + mlx4_ib_qp_attr_mask_table[ibqp->qp_type], + IB_QPS_RESET, IB_QPS_INIT); + if (err) + goto out; + cur_state = IB_QPS_INIT; + } + + err = __mlx4_ib_modify_qp(ibqp, attr, attr_mask, cur_state, new_state); + +out: + mutex_unlock(&qp->mutex); + return err; +} + static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr, void *wqe) { -- cgit v1.2.3 From 59b0ed121297b57abb2352bdc8313959e7cb5635 Mon Sep 17 00:00:00 2001 From: Roland Dreier Date: Sat, 19 May 2007 08:51:58 -0700 Subject: IB/mlx4: Fix check of opcode in mlx4_ib_post_send() wr->opcode is invalid if it's >= ARRAY_SIZE(mlx4_ib_opcode), not just strictly >. This was spotted by the Coverity checker (CID 1643). Signed-off-by: Roland Dreier --- drivers/infiniband/hw/mlx4/qp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c index bd28af5753d..5706f988e2e 100644 --- a/drivers/infiniband/hw/mlx4/qp.c +++ b/drivers/infiniband/hw/mlx4/qp.c @@ -1238,7 +1238,7 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, */ wmb(); - if (wr->opcode < 0 || wr->opcode > ARRAY_SIZE(mlx4_ib_opcode)) { + if (wr->opcode < 0 || wr->opcode >= ARRAY_SIZE(mlx4_ib_opcode)) { err = -EINVAL; goto out; } -- cgit v1.2.3 From 2446304dd687488c054d0437f2aeef1ef2bfbd02 Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Thu, 17 May 2007 10:32:41 +0300 Subject: IB/mlx4: Pass send queue sizes from userspace to kernel Pass the number of WQEs for the send queue and their size from userspace to the kernel to avoid having to keep the QP size calculations in sync between the kernel driver and libmlx4. This fixes a bug seen with the current mlx4_ib driver and current libmlx4 caused by a difference in the calculated sizes for SQ WQEs. Also, this gives more flexibility for userspace to experiment with using multiple WQE BBs for a single SQ WQE. Signed-off-by: Eli Cohen Signed-off-by: Roland Dreier --- drivers/infiniband/hw/mlx4/qp.c | 63 +++++++++++++++++++++++++++++---------- drivers/infiniband/hw/mlx4/user.h | 5 +++- 2 files changed, 51 insertions(+), 17 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c index 5706f988e2e..a824bc5f79f 100644 --- a/drivers/infiniband/hw/mlx4/qp.c +++ b/drivers/infiniband/hw/mlx4/qp.c @@ -188,14 +188,32 @@ static int send_wqe_overhead(enum ib_qp_type type) } } -static int set_qp_size(struct mlx4_ib_dev *dev, struct ib_qp_cap *cap, - enum ib_qp_type type, struct mlx4_ib_qp *qp) +static int set_rq_size(struct mlx4_ib_dev *dev, struct ib_qp_cap *cap, + struct mlx4_ib_qp *qp) { - /* Sanity check QP size before proceeding */ + /* Sanity check RQ size before proceeding */ + if (cap->max_recv_wr > dev->dev->caps.max_wqes || + cap->max_recv_sge > dev->dev->caps.max_rq_sg) + return -EINVAL; + + qp->rq.max = cap->max_recv_wr ? roundup_pow_of_two(cap->max_recv_wr) : 0; + + qp->rq.wqe_shift = ilog2(roundup_pow_of_two(cap->max_recv_sge * + sizeof (struct mlx4_wqe_data_seg))); + qp->rq.max_gs = (1 << qp->rq.wqe_shift) / sizeof (struct mlx4_wqe_data_seg); + + cap->max_recv_wr = qp->rq.max; + cap->max_recv_sge = qp->rq.max_gs; + + return 0; +} + +static int set_kernel_sq_size(struct mlx4_ib_dev *dev, struct ib_qp_cap *cap, + enum ib_qp_type type, struct mlx4_ib_qp *qp) +{ + /* Sanity check SQ size before proceeding */ if (cap->max_send_wr > dev->dev->caps.max_wqes || - cap->max_recv_wr > dev->dev->caps.max_wqes || cap->max_send_sge > dev->dev->caps.max_sq_sg || - cap->max_recv_sge > dev->dev->caps.max_rq_sg || cap->max_inline_data + send_wqe_overhead(type) + sizeof (struct mlx4_wqe_inline_seg) > dev->dev->caps.max_sq_desc_sz) return -EINVAL; @@ -208,12 +226,7 @@ static int set_qp_size(struct mlx4_ib_dev *dev, struct ib_qp_cap *cap, cap->max_send_sge + 2 > dev->dev->caps.max_sq_sg) return -EINVAL; - qp->rq.max = cap->max_recv_wr ? roundup_pow_of_two(cap->max_recv_wr) : 0; - qp->sq.max = cap->max_send_wr ? roundup_pow_of_two(cap->max_send_wr) : 0; - - qp->rq.wqe_shift = ilog2(roundup_pow_of_two(cap->max_recv_sge * - sizeof (struct mlx4_wqe_data_seg))); - qp->rq.max_gs = (1 << qp->rq.wqe_shift) / sizeof (struct mlx4_wqe_data_seg); + qp->sq.max = cap->max_send_wr ? roundup_pow_of_two(cap->max_send_wr) : 1; qp->sq.wqe_shift = ilog2(roundup_pow_of_two(max(cap->max_send_sge * sizeof (struct mlx4_wqe_data_seg), @@ -233,16 +246,26 @@ static int set_qp_size(struct mlx4_ib_dev *dev, struct ib_qp_cap *cap, qp->sq.offset = 0; } - cap->max_send_wr = qp->sq.max; - cap->max_recv_wr = qp->rq.max; - cap->max_send_sge = qp->sq.max_gs; - cap->max_recv_sge = qp->rq.max_gs; + cap->max_send_wr = qp->sq.max; + cap->max_send_sge = qp->sq.max_gs; cap->max_inline_data = (1 << qp->sq.wqe_shift) - send_wqe_overhead(type) - sizeof (struct mlx4_wqe_inline_seg); return 0; } +static int set_user_sq_size(struct mlx4_ib_qp *qp, + struct mlx4_ib_create_qp *ucmd) +{ + qp->sq.max = 1 << ucmd->log_sq_bb_count; + qp->sq.wqe_shift = ucmd->log_sq_stride; + + qp->buf_size = (qp->rq.max << qp->rq.wqe_shift) + + (qp->sq.max << qp->sq.wqe_shift); + + return 0; +} + static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd, struct ib_qp_init_attr *init_attr, struct ib_udata *udata, int sqpn, struct mlx4_ib_qp *qp) @@ -264,7 +287,7 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd, qp->sq.head = 0; qp->sq.tail = 0; - err = set_qp_size(dev, &init_attr->cap, init_attr->qp_type, qp); + err = set_rq_size(dev, &init_attr->cap, qp); if (err) goto err; @@ -276,6 +299,10 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd, goto err; } + err = set_user_sq_size(qp, &ucmd); + if (err) + goto err; + qp->umem = ib_umem_get(pd->uobject->context, ucmd.buf_addr, qp->buf_size, 0); if (IS_ERR(qp->umem)) { @@ -297,6 +324,10 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd, if (err) goto err_mtt; } else { + err = set_kernel_sq_size(dev, &init_attr->cap, init_attr->qp_type, qp); + if (err) + goto err; + err = mlx4_ib_db_alloc(dev, &qp->db, 0); if (err) goto err; diff --git a/drivers/infiniband/hw/mlx4/user.h b/drivers/infiniband/hw/mlx4/user.h index 5b8eddc9fa8..88c72d56368 100644 --- a/drivers/infiniband/hw/mlx4/user.h +++ b/drivers/infiniband/hw/mlx4/user.h @@ -39,7 +39,7 @@ * Increment this value if any changes that break userspace ABI * compatibility are made. */ -#define MLX4_IB_UVERBS_ABI_VERSION 1 +#define MLX4_IB_UVERBS_ABI_VERSION 2 /* * Make sure that all structs defined in this file remain laid out so @@ -87,6 +87,9 @@ struct mlx4_ib_create_srq_resp { struct mlx4_ib_create_qp { __u64 buf_addr; __u64 db_addr; + __u8 log_sq_bb_count; + __u8 log_sq_stride; + __u8 reserved[6]; }; #endif /* MLX4_IB_USER_H */ -- cgit v1.2.3 From 56a8c8b6ac4d6edba5153d17730aaf96ba8f1f8c Mon Sep 17 00:00:00 2001 From: Roland Dreier Date: Sun, 20 May 2007 20:19:24 -0700 Subject: IB/mlx4: Check if SRQ is full when posting receive Make mlx4_post_srq_recv() fail if the SRQ is full (head == tail). Signed-off-by: Roland Dreier --- drivers/infiniband/hw/mlx4/srq.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/mlx4/srq.c b/drivers/infiniband/hw/mlx4/srq.c index 42ab4a801d6..12fac1c8989 100644 --- a/drivers/infiniband/hw/mlx4/srq.c +++ b/drivers/infiniband/hw/mlx4/srq.c @@ -297,6 +297,12 @@ int mlx4_ib_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr, break; } + if (unlikely(srq->head == srq->tail)) { + err = -ENOMEM; + *bad_wr = wr; + break; + } + srq->wrid[srq->head] = wr->wr_id; next = get_wqe(srq, srq->head); -- cgit v1.2.3