aboutsummaryrefslogtreecommitdiff
path: root/drivers/infiniband/hw
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/infiniband/hw')
-rw-r--r--drivers/infiniband/hw/amso1100/c2_provider.c2
-rw-r--r--drivers/infiniband/hw/cxgb3/cxio_hal.c125
-rw-r--r--drivers/infiniband/hw/cxgb3/cxio_hal.h13
-rw-r--r--drivers/infiniband/hw/cxgb3/cxio_resource.c36
-rw-r--r--drivers/infiniband/hw/cxgb3/cxio_wr.h21
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch.c1
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch.h1
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch_cm.c173
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch_cm.h2
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch_mem.c75
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch_provider.c72
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch_provider.h11
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch_qp.c73
-rw-r--r--drivers/infiniband/hw/ehca/ehca_classes.h7
-rw-r--r--drivers/infiniband/hw/ehca/ehca_cq.c11
-rw-r--r--drivers/infiniband/hw/ehca/ehca_eq.c35
-rw-r--r--drivers/infiniband/hw/ehca/ehca_hca.c7
-rw-r--r--drivers/infiniband/hw/ehca/ehca_irq.c4
-rw-r--r--drivers/infiniband/hw/ehca/ehca_main.c36
-rw-r--r--drivers/infiniband/hw/ehca/ehca_mrmw.c2
-rw-r--r--drivers/infiniband/hw/ehca/ehca_qp.c31
-rw-r--r--drivers/infiniband/hw/ipath/ipath_driver.c158
-rw-r--r--drivers/infiniband/hw/ipath/ipath_file_ops.c91
-rw-r--r--drivers/infiniband/hw/ipath/ipath_iba7220.c26
-rw-r--r--drivers/infiniband/hw/ipath/ipath_init_chip.c95
-rw-r--r--drivers/infiniband/hw/ipath/ipath_intr.c80
-rw-r--r--drivers/infiniband/hw/ipath/ipath_kernel.h18
-rw-r--r--drivers/infiniband/hw/ipath/ipath_mr.c3
-rw-r--r--drivers/infiniband/hw/ipath/ipath_qp.c237
-rw-r--r--drivers/infiniband/hw/ipath/ipath_rc.c291
-rw-r--r--drivers/infiniband/hw/ipath/ipath_ruc.c332
-rw-r--r--drivers/infiniband/hw/ipath/ipath_sdma.c44
-rw-r--r--drivers/infiniband/hw/ipath/ipath_uc.c57
-rw-r--r--drivers/infiniband/hw/ipath/ipath_ud.c66
-rw-r--r--drivers/infiniband/hw/ipath/ipath_user_sdma.h2
-rw-r--r--drivers/infiniband/hw/ipath/ipath_verbs.c178
-rw-r--r--drivers/infiniband/hw/ipath/ipath_verbs.h64
-rw-r--r--drivers/infiniband/hw/mlx4/cq.c8
-rw-r--r--drivers/infiniband/hw/mlx4/doorbell.c2
-rw-r--r--drivers/infiniband/hw/mlx4/mr.c3
-rw-r--r--drivers/infiniband/hw/mlx4/qp.c2
-rw-r--r--drivers/infiniband/hw/mlx4/srq.c2
-rw-r--r--drivers/infiniband/hw/mthca/mthca_mr.c13
-rw-r--r--drivers/infiniband/hw/mthca/mthca_provider.c20
-rw-r--r--drivers/infiniband/hw/mthca/mthca_provider.h1
-rw-r--r--drivers/infiniband/hw/mthca/mthca_user.h10
-rw-r--r--drivers/infiniband/hw/nes/Kconfig1
-rw-r--r--drivers/infiniband/hw/nes/nes.h4
-rw-r--r--drivers/infiniband/hw/nes/nes_cm.c8
-rw-r--r--drivers/infiniband/hw/nes/nes_hw.c375
-rw-r--r--drivers/infiniband/hw/nes/nes_hw.h19
-rw-r--r--drivers/infiniband/hw/nes/nes_nic.c180
-rw-r--r--drivers/infiniband/hw/nes/nes_utils.c10
-rw-r--r--drivers/infiniband/hw/nes/nes_verbs.c4
54 files changed, 1979 insertions, 1163 deletions
diff --git a/drivers/infiniband/hw/amso1100/c2_provider.c b/drivers/infiniband/hw/amso1100/c2_provider.c
index 6af2c0f79a6..2acf9b62cf9 100644
--- a/drivers/infiniband/hw/amso1100/c2_provider.c
+++ b/drivers/infiniband/hw/amso1100/c2_provider.c
@@ -452,7 +452,7 @@ static struct ib_mr *c2_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
return ERR_PTR(-ENOMEM);
c2mr->pd = c2pd;
- c2mr->umem = ib_umem_get(pd->uobject->context, start, length, acc);
+ c2mr->umem = ib_umem_get(pd->uobject->context, start, length, acc, 0);
if (IS_ERR(c2mr->umem)) {
err = PTR_ERR(c2mr->umem);
kfree(c2mr);
diff --git a/drivers/infiniband/hw/cxgb3/cxio_hal.c b/drivers/infiniband/hw/cxgb3/cxio_hal.c
index 66eb7030aea..3f441fc57c1 100644
--- a/drivers/infiniband/hw/cxgb3/cxio_hal.c
+++ b/drivers/infiniband/hw/cxgb3/cxio_hal.c
@@ -359,9 +359,10 @@ static void insert_recv_cqe(struct t3_wq *wq, struct t3_cq *cq)
cq->sw_wptr++;
}
-void cxio_flush_rq(struct t3_wq *wq, struct t3_cq *cq, int count)
+int cxio_flush_rq(struct t3_wq *wq, struct t3_cq *cq, int count)
{
u32 ptr;
+ int flushed = 0;
PDBG("%s wq %p cq %p\n", __func__, wq, cq);
@@ -369,8 +370,11 @@ void cxio_flush_rq(struct t3_wq *wq, struct t3_cq *cq, int count)
PDBG("%s rq_rptr %u rq_wptr %u skip count %u\n", __func__,
wq->rq_rptr, wq->rq_wptr, count);
ptr = wq->rq_rptr + count;
- while (ptr++ != wq->rq_wptr)
+ while (ptr++ != wq->rq_wptr) {
insert_recv_cqe(wq, cq);
+ flushed++;
+ }
+ return flushed;
}
static void insert_sq_cqe(struct t3_wq *wq, struct t3_cq *cq,
@@ -394,18 +398,21 @@ static void insert_sq_cqe(struct t3_wq *wq, struct t3_cq *cq,
cq->sw_wptr++;
}
-void cxio_flush_sq(struct t3_wq *wq, struct t3_cq *cq, int count)
+int cxio_flush_sq(struct t3_wq *wq, struct t3_cq *cq, int count)
{
__u32 ptr;
+ int flushed = 0;
struct t3_swsq *sqp = wq->sq + Q_PTR2IDX(wq->sq_rptr, wq->sq_size_log2);
ptr = wq->sq_rptr + count;
- sqp += count;
+ sqp = wq->sq + Q_PTR2IDX(ptr, wq->sq_size_log2);
while (ptr != wq->sq_wptr) {
insert_sq_cqe(wq, cq, sqp);
- sqp++;
ptr++;
+ sqp = wq->sq + Q_PTR2IDX(ptr, wq->sq_size_log2);
+ flushed++;
}
+ return flushed;
}
/*
@@ -456,7 +463,8 @@ void cxio_count_scqes(struct t3_cq *cq, struct t3_wq *wq, int *count)
ptr = cq->sw_rptr;
while (!Q_EMPTY(ptr, cq->sw_wptr)) {
cqe = cq->sw_queue + (Q_PTR2IDX(ptr, cq->size_log2));
- if ((SQ_TYPE(*cqe) || (CQE_OPCODE(*cqe) == T3_READ_RESP)) &&
+ if ((SQ_TYPE(*cqe) ||
+ ((CQE_OPCODE(*cqe) == T3_READ_RESP) && wq->oldest_read)) &&
(CQE_QPID(*cqe) == wq->qpid))
(*count)++;
ptr++;
@@ -580,7 +588,7 @@ static int cxio_hal_destroy_ctrl_qp(struct cxio_rdev *rdev_p)
* caller aquires the ctrl_qp lock before the call
*/
static int cxio_hal_ctrl_qp_write_mem(struct cxio_rdev *rdev_p, u32 addr,
- u32 len, void *data, int completion)
+ u32 len, void *data)
{
u32 i, nr_wqe, copy_len;
u8 *copy_data;
@@ -616,7 +624,7 @@ static int cxio_hal_ctrl_qp_write_mem(struct cxio_rdev *rdev_p, u32 addr,
flag = 0;
if (i == (nr_wqe - 1)) {
/* last WQE */
- flag = completion ? T3_COMPLETION_FLAG : 0;
+ flag = T3_COMPLETION_FLAG;
if (len % 32)
utx_len = len / 32 + 1;
else
@@ -675,21 +683,20 @@ static int cxio_hal_ctrl_qp_write_mem(struct cxio_rdev *rdev_p, u32 addr,
return 0;
}
-/* IN: stag key, pdid, perm, zbva, to, len, page_size, pbl, and pbl_size
- * OUT: stag index, actual pbl_size, pbl_addr allocated.
+/* IN: stag key, pdid, perm, zbva, to, len, page_size, pbl_size and pbl_addr
+ * OUT: stag index
* TBD: shared memory region support
*/
static int __cxio_tpt_op(struct cxio_rdev *rdev_p, u32 reset_tpt_entry,
u32 *stag, u8 stag_state, u32 pdid,
enum tpt_mem_type type, enum tpt_mem_perm perm,
- u32 zbva, u64 to, u32 len, u8 page_size, __be64 *pbl,
- u32 *pbl_size, u32 *pbl_addr)
+ u32 zbva, u64 to, u32 len, u8 page_size,
+ u32 pbl_size, u32 pbl_addr)
{
int err;
struct tpt_entry tpt;
u32 stag_idx;
u32 wptr;
- int rereg = (*stag != T3_STAG_UNSET);
stag_state = stag_state > 0;
stag_idx = (*stag) >> 8;
@@ -703,30 +710,8 @@ static int __cxio_tpt_op(struct cxio_rdev *rdev_p, u32 reset_tpt_entry,
PDBG("%s stag_state 0x%0x type 0x%0x pdid 0x%0x, stag_idx 0x%x\n",
__func__, stag_state, type, pdid, stag_idx);
- if (reset_tpt_entry)
- cxio_hal_pblpool_free(rdev_p, *pbl_addr, *pbl_size << 3);
- else if (!rereg) {
- *pbl_addr = cxio_hal_pblpool_alloc(rdev_p, *pbl_size << 3);
- if (!*pbl_addr) {
- return -ENOMEM;
- }
- }
-
mutex_lock(&rdev_p->ctrl_qp.lock);
- /* write PBL first if any - update pbl only if pbl list exist */
- if (pbl) {
-
- PDBG("%s *pdb_addr 0x%x, pbl_base 0x%x, pbl_size %d\n",
- __func__, *pbl_addr, rdev_p->rnic_info.pbl_base,
- *pbl_size);
- err = cxio_hal_ctrl_qp_write_mem(rdev_p,
- (*pbl_addr >> 5),
- (*pbl_size << 3), pbl, 0);
- if (err)
- goto ret;
- }
-
/* write TPT entry */
if (reset_tpt_entry)
memset(&tpt, 0, sizeof(tpt));
@@ -741,23 +726,23 @@ static int __cxio_tpt_op(struct cxio_rdev *rdev_p, u32 reset_tpt_entry,
V_TPT_ADDR_TYPE((zbva ? TPT_ZBTO : TPT_VATO)) |
V_TPT_PAGE_SIZE(page_size));
tpt.rsvd_pbl_addr = reset_tpt_entry ? 0 :
- cpu_to_be32(V_TPT_PBL_ADDR(PBL_OFF(rdev_p, *pbl_addr)>>3));
+ cpu_to_be32(V_TPT_PBL_ADDR(PBL_OFF(rdev_p, pbl_addr)>>3));
tpt.len = cpu_to_be32(len);
tpt.va_hi = cpu_to_be32((u32) (to >> 32));
tpt.va_low_or_fbo = cpu_to_be32((u32) (to & 0xFFFFFFFFULL));
tpt.rsvd_bind_cnt_or_pstag = 0;
tpt.rsvd_pbl_size = reset_tpt_entry ? 0 :
- cpu_to_be32(V_TPT_PBL_SIZE((*pbl_size) >> 2));
+ cpu_to_be32(V_TPT_PBL_SIZE(pbl_size >> 2));
}
err = cxio_hal_ctrl_qp_write_mem(rdev_p,
stag_idx +
(rdev_p->rnic_info.tpt_base >> 5),
- sizeof(tpt), &tpt, 1);
+ sizeof(tpt), &tpt);
/* release the stag index to free pool */
if (reset_tpt_entry)
cxio_hal_put_stag(rdev_p->rscp, stag_idx);
-ret:
+
wptr = rdev_p->ctrl_qp.wptr;
mutex_unlock(&rdev_p->ctrl_qp.lock);
if (!err)
@@ -768,44 +753,67 @@ ret:
return err;
}
+int cxio_write_pbl(struct cxio_rdev *rdev_p, __be64 *pbl,
+ u32 pbl_addr, u32 pbl_size)
+{
+ u32 wptr;
+ int err;
+
+ PDBG("%s *pdb_addr 0x%x, pbl_base 0x%x, pbl_size %d\n",
+ __func__, pbl_addr, rdev_p->rnic_info.pbl_base,
+ pbl_size);
+
+ mutex_lock(&rdev_p->ctrl_qp.lock);
+ err = cxio_hal_ctrl_qp_write_mem(rdev_p, pbl_addr >> 5, pbl_size << 3,
+ pbl);
+ wptr = rdev_p->ctrl_qp.wptr;
+ mutex_unlock(&rdev_p->ctrl_qp.lock);
+ if (err)
+ return err;
+
+ if (wait_event_interruptible(rdev_p->ctrl_qp.waitq,
+ SEQ32_GE(rdev_p->ctrl_qp.rptr,
+ wptr)))
+ return -ERESTARTSYS;
+
+ return 0;
+}
+
int cxio_register_phys_mem(struct cxio_rdev *rdev_p, u32 *stag, u32 pdid,
enum tpt_mem_perm perm, u32 zbva, u64 to, u32 len,
- u8 page_size, __be64 *pbl, u32 *pbl_size,
- u32 *pbl_addr)
+ u8 page_size, u32 pbl_size, u32 pbl_addr)
{
*stag = T3_STAG_UNSET;
return __cxio_tpt_op(rdev_p, 0, stag, 1, pdid, TPT_NON_SHARED_MR, perm,
- zbva, to, len, page_size, pbl, pbl_size, pbl_addr);
+ zbva, to, len, page_size, pbl_size, pbl_addr);
}
int cxio_reregister_phys_mem(struct cxio_rdev *rdev_p, u32 *stag, u32 pdid,
enum tpt_mem_perm perm, u32 zbva, u64 to, u32 len,
- u8 page_size, __be64 *pbl, u32 *pbl_size,
- u32 *pbl_addr)
+ u8 page_size, u32 pbl_size, u32 pbl_addr)
{
return __cxio_tpt_op(rdev_p, 0, stag, 1, pdid, TPT_NON_SHARED_MR, perm,
- zbva, to, len, page_size, pbl, pbl_size, pbl_addr);
+ zbva, to, len, page_size, pbl_size, pbl_addr);
}
int cxio_dereg_mem(struct cxio_rdev *rdev_p, u32 stag, u32 pbl_size,
u32 pbl_addr)
{
- return __cxio_tpt_op(rdev_p, 1, &stag, 0, 0, 0, 0, 0, 0ULL, 0, 0, NULL,
- &pbl_size, &pbl_addr);
+ return __cxio_tpt_op(rdev_p, 1, &stag, 0, 0, 0, 0, 0, 0ULL, 0, 0,
+ pbl_size, pbl_addr);
}
int cxio_allocate_window(struct cxio_rdev *rdev_p, u32 * stag, u32 pdid)
{
- u32 pbl_size = 0;
*stag = T3_STAG_UNSET;
return __cxio_tpt_op(rdev_p, 0, stag, 0, pdid, TPT_MW, 0, 0, 0ULL, 0, 0,
- NULL, &pbl_size, NULL);
+ 0, 0);
}
int cxio_deallocate_window(struct cxio_rdev *rdev_p, u32 stag)
{
- return __cxio_tpt_op(rdev_p, 1, &stag, 0, 0, 0, 0, 0, 0ULL, 0, 0, NULL,
- NULL, NULL);
+ return __cxio_tpt_op(rdev_p, 1, &stag, 0, 0, 0, 0, 0, 0ULL, 0, 0,
+ 0, 0);
}
int cxio_rdma_init(struct cxio_rdev *rdev_p, struct t3_rdma_init_attr *attr)
@@ -829,7 +837,8 @@ int cxio_rdma_init(struct cxio_rdev *rdev_p, struct t3_rdma_init_attr *attr)
wqe->mpaattrs = attr->mpaattrs;
wqe->qpcaps = attr->qpcaps;
wqe->ulpdu_size = cpu_to_be16(attr->tcp_emss);
- wqe->flags = cpu_to_be32(attr->flags);
+ wqe->rqe_count = cpu_to_be16(attr->rqe_count);
+ wqe->flags_rtr_type = cpu_to_be16(attr->flags|V_RTR_TYPE(attr->rtr_type));
wqe->ord = cpu_to_be32(attr->ord);
wqe->ird = cpu_to_be32(attr->ird);
wqe->qp_dma_addr = cpu_to_be64(attr->qp_dma_addr);
@@ -1135,6 +1144,18 @@ int cxio_poll_cq(struct t3_wq *wq, struct t3_cq *cq, struct t3_cqe *cqe,
if (RQ_TYPE(*hw_cqe) && (CQE_OPCODE(*hw_cqe) == T3_READ_RESP)) {
/*
+ * If this is an unsolicited read response, then the read
+ * was generated by the kernel driver as part of peer-2-peer
+ * connection setup. So ignore the completion.
+ */
+ if (!wq->oldest_read) {
+ if (CQE_STATUS(*hw_cqe))
+ wq->error = 1;
+ ret = -1;
+ goto skip_cqe;
+ }
+
+ /*
* Don't write to the HWCQ, so create a new read req CQE
* in local memory.
*/
diff --git a/drivers/infiniband/hw/cxgb3/cxio_hal.h b/drivers/infiniband/hw/cxgb3/cxio_hal.h
index 99543d63470..6e128f6bab0 100644
--- a/drivers/infiniband/hw/cxgb3/cxio_hal.h
+++ b/drivers/infiniband/hw/cxgb3/cxio_hal.h
@@ -53,6 +53,7 @@
#define T3_MAX_PBL_SIZE 256
#define T3_MAX_RQ_SIZE 1024
#define T3_MAX_NUM_STAG (1<<15)
+#define T3_MAX_MR_SIZE 0x100000000ULL
#define T3_STAG_UNSET 0xffffffff
@@ -153,14 +154,14 @@ int cxio_create_qp(struct cxio_rdev *rdev, u32 kernel_domain, struct t3_wq *wq,
int cxio_destroy_qp(struct cxio_rdev *rdev, struct t3_wq *wq,
struct cxio_ucontext *uctx);
int cxio_peek_cq(struct t3_wq *wr, struct t3_cq *cq, int opcode);
+int cxio_write_pbl(struct cxio_rdev *rdev_p, __be64 *pbl,
+ u32 pbl_addr, u32 pbl_size);
int cxio_register_phys_mem(struct cxio_rdev *rdev, u32 * stag, u32 pdid,
enum tpt_mem_perm perm, u32 zbva, u64 to, u32 len,
- u8 page_size, __be64 *pbl, u32 *pbl_size,
- u32 *pbl_addr);
+ u8 page_size, u32 pbl_size, u32 pbl_addr);
int cxio_reregister_phys_mem(struct cxio_rdev *rdev, u32 * stag, u32 pdid,
enum tpt_mem_perm perm, u32 zbva, u64 to, u32 len,
- u8 page_size, __be64 *pbl, u32 *pbl_size,
- u32 *pbl_addr);
+ u8 page_size, u32 pbl_size, u32 pbl_addr);
int cxio_dereg_mem(struct cxio_rdev *rdev, u32 stag, u32 pbl_size,
u32 pbl_addr);
int cxio_allocate_window(struct cxio_rdev *rdev, u32 * stag, u32 pdid);
@@ -172,8 +173,8 @@ u32 cxio_hal_get_pdid(struct cxio_hal_resource *rscp);
void cxio_hal_put_pdid(struct cxio_hal_resource *rscp, u32 pdid);
int __init cxio_hal_init(void);
void __exit cxio_hal_exit(void);
-void cxio_flush_rq(struct t3_wq *wq, struct t3_cq *cq, int count);
-void cxio_flush_sq(struct t3_wq *wq, struct t3_cq *cq, int count);
+int cxio_flush_rq(struct t3_wq *wq, struct t3_cq *cq, int count);
+int cxio_flush_sq(struct t3_wq *wq, struct t3_cq *cq, int count);
void cxio_count_rcqes(struct t3_cq *cq, struct t3_wq *wq, int *count);
void cxio_count_scqes(struct t3_cq *cq, struct t3_wq *wq, int *count);
void cxio_flush_hw_cq(struct t3_cq *cq);
diff --git a/drivers/infiniband/hw/cxgb3/cxio_resource.c b/drivers/infiniband/hw/cxgb3/cxio_resource.c
index 45ed4f25ef7..bd233c08765 100644
--- a/drivers/infiniband/hw/cxgb3/cxio_resource.c
+++ b/drivers/infiniband/hw/cxgb3/cxio_resource.c
@@ -250,7 +250,6 @@ void cxio_hal_destroy_resource(struct cxio_hal_resource *rscp)
*/
#define MIN_PBL_SHIFT 8 /* 256B == min PBL size (32 entries) */
-#define PBL_CHUNK 2*1024*1024
u32 cxio_hal_pblpool_alloc(struct cxio_rdev *rdev_p, int size)
{
@@ -267,14 +266,35 @@ void cxio_hal_pblpool_free(struct cxio_rdev *rdev_p, u32 addr, int size)
int cxio_hal_pblpool_create(struct cxio_rdev *rdev_p)
{
- unsigned long i;
+ unsigned pbl_start, pbl_chunk;
+
rdev_p->pbl_pool = gen_pool_create(MIN_PBL_SHIFT, -1);
- if (rdev_p->pbl_pool)
- for (i = rdev_p->rnic_info.pbl_base;
- i <= rdev_p->rnic_info.pbl_top - PBL_CHUNK + 1;
- i += PBL_CHUNK)
- gen_pool_add(rdev_p->pbl_pool, i, PBL_CHUNK, -1);
- return rdev_p->pbl_pool ? 0 : -ENOMEM;
+ if (!rdev_p->pbl_pool)
+ return -ENOMEM;
+
+ pbl_start = rdev_p->rnic_info.pbl_base;
+ pbl_chunk = rdev_p->rnic_info.pbl_top - pbl_start + 1;
+
+ while (pbl_start < rdev_p->rnic_info.pbl_top) {
+ pbl_chunk = min(rdev_p->rnic_info.pbl_top - pbl_start + 1,
+ pbl_chunk);
+ if (gen_pool_add(rdev_p->pbl_pool, pbl_start, pbl_chunk, -1)) {
+ PDBG("%s failed to add PBL chunk (%x/%x)\n",
+ __func__, pbl_start, pbl_chunk);
+ if (pbl_chunk <= 1024 << MIN_PBL_SHIFT) {
+ printk(KERN_WARNING MOD "%s: Failed to add all PBL chunks (%x/%x)\n",
+ __func__, pbl_start, rdev_p->rnic_info.pbl_top - pbl_start);
+ return 0;
+ }
+ pbl_chunk >>= 1;
+ } else {
+ PDBG("%s added PBL chunk (%x/%x)\n",
+ __func__, pbl_start, pbl_chunk);
+ pbl_start += pbl_chunk;
+ }
+ }
+
+ return 0;
}
void cxio_hal_pblpool_destroy(struct cxio_rdev *rdev_p)
diff --git a/drivers/infiniband/hw/cxgb3/cxio_wr.h b/drivers/infiniband/hw/cxgb3/cxio_wr.h
index 969d4d92845..f1a25a821a4 100644
--- a/drivers/infiniband/hw/cxgb3/cxio_wr.h
+++ b/drivers/infiniband/hw/cxgb3/cxio_wr.h
@@ -278,6 +278,17 @@ enum t3_qp_caps {
uP_RI_QP_STAG0_ENABLE = 0x10
} __attribute__ ((packed));
+enum rdma_init_rtr_types {
+ RTR_READ = 1,
+ RTR_WRITE = 2,
+ RTR_SEND = 3,
+};
+
+#define S_RTR_TYPE 2
+#define M_RTR_TYPE 0x3
+#define V_RTR_TYPE(x) ((x) << S_RTR_TYPE)
+#define G_RTR_TYPE(x) ((((x) >> S_RTR_TYPE)) & M_RTR_TYPE)
+
struct t3_rdma_init_attr {
u32 tid;
u32 qpid;
@@ -293,7 +304,9 @@ struct t3_rdma_init_attr {
u32 ird;
u64 qp_dma_addr;
u32 qp_dma_size;
- u32 flags;
+ enum rdma_init_rtr_types rtr_type;
+ u16 flags;
+ u16 rqe_count;
u32 irs;
};
@@ -309,8 +322,8 @@ struct t3_rdma_init_wr {
u8 mpaattrs; /* 5 */
u8 qpcaps;
__be16 ulpdu_size;
- __be32 flags; /* bits 31-1 - reservered */
- /* bit 0 - set if RECV posted */
+ __be16 flags_rtr_type;
+ __be16 rqe_count;
__be32 ord; /* 6 */
__be32 ird;
__be64 qp_dma_addr; /* 7 */
@@ -324,7 +337,7 @@ struct t3_genbit {
};
enum rdma_init_wr_flags {
- RECVS_POSTED = (1<<0),
+ MPA_INITIATOR = (1<<0),
PRIV_QP = (1<<1),
};
diff --git a/drivers/infiniband/hw/cxgb3/iwch.c b/drivers/infiniband/hw/cxgb3/iwch.c
index 6ba4138c8ec..71554eacb13 100644
--- a/drivers/infiniband/hw/cxgb3/iwch.c
+++ b/drivers/infiniband/hw/cxgb3/iwch.c
@@ -83,6 +83,7 @@ static void rnic_init(struct iwch_dev *rnicp)
rnicp->attr.max_phys_buf_entries = T3_MAX_PBL_SIZE;
rnicp->attr.max_pds = T3_MAX_NUM_PD - 1;
rnicp->attr.mem_pgsizes_bitmask = 0x7FFF; /* 4KB-128MB */
+ rnicp->attr.max_mr_size = T3_MAX_MR_SIZE;
rnicp->attr.can_resize_wq = 0;
rnicp->attr.max_rdma_reads_per_qp = 8;
rnicp->attr.max_rdma_read_resources =
diff --git a/drivers/infiniband/hw/cxgb3/iwch.h b/drivers/infiniband/hw/cxgb3/iwch.h
index 9ad9b1e7c8c..d2409a505e8 100644
--- a/drivers/infiniband/hw/cxgb3/iwch.h
+++ b/drivers/infiniband/hw/cxgb3/iwch.h
@@ -66,6 +66,7 @@ struct iwch_rnic_attributes {
* size (4k)^i. Phys block list mode unsupported.
*/
u32 mem_pgsizes_bitmask;
+ u64 max_mr_size;
u8 can_resize_wq;
/*
diff --git a/drivers/infiniband/hw/cxgb3/iwch_cm.c b/drivers/infiniband/hw/cxgb3/iwch_cm.c
index 72ca360c3db..c325c44807e 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_cm.c
+++ b/drivers/infiniband/hw/cxgb3/iwch_cm.c
@@ -63,10 +63,14 @@ static char *states[] = {
NULL,
};
-static int ep_timeout_secs = 10;
+int peer2peer = 0;
+module_param(peer2peer, int, 0644);
+MODULE_PARM_DESC(peer2peer, "Support peer2peer ULPs (default=0)");
+
+static int ep_timeout_secs = 60;
module_param(ep_timeout_secs, int, 0644);
MODULE_PARM_DESC(ep_timeout_secs, "CM Endpoint operation timeout "
- "in seconds (default=10)");
+ "in seconds (default=60)");
static int mpa_rev = 1;
module_param(mpa_rev, int, 0644);
@@ -125,6 +129,12 @@ static void start_ep_timer(struct iwch_ep *ep)
static void stop_ep_timer(struct iwch_ep *ep)
{
PDBG("%s ep %p\n", __func__, ep);
+ if (!timer_pending(&ep->timer)) {
+ printk(KERN_ERR "%s timer stopped when its not running! ep %p state %u\n",
+ __func__, ep, ep->com.state);
+ WARN_ON(1);
+ return;
+ }
del_timer_sync(&ep->timer);
put_ep(&ep->com);
}
@@ -508,7 +518,7 @@ static void send_mpa_req(struct iwch_ep *ep, struct sk_buff *skb)
skb_reset_transport_header(skb);
len = skb->len;
req = (struct tx_data_wr *) skb_push(skb, sizeof(*req));
- req->wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_TX_DATA));
+ req->wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_TX_DATA)|F_WR_COMPL);
req->wr_lo = htonl(V_WR_TID(ep->hwtid));
req->len = htonl(len);
req->param = htonl(V_TX_PORT(ep->l2t->smt_idx) |
@@ -559,7 +569,7 @@ static int send_mpa_reject(struct iwch_ep *ep, const void *pdata, u8 plen)
set_arp_failure_handler(skb, arp_failure_discard);
skb_reset_transport_header(skb);
req = (struct tx_data_wr *) skb_push(skb, sizeof(*req));
- req->wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_TX_DATA));
+ req->wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_TX_DATA)|F_WR_COMPL);
req->wr_lo = htonl(V_WR_TID(ep->hwtid));
req->len = htonl(mpalen);
req->param = htonl(V_TX_PORT(ep->l2t->smt_idx) |
@@ -611,7 +621,7 @@ static int send_mpa_reply(struct iwch_ep *ep, const void *pdata, u8 plen)
skb_reset_transport_header(skb);
len = skb->len;
req = (struct tx_data_wr *) skb_push(skb, sizeof(*req));
- req->wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_TX_DATA));
+ req->wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_TX_DATA)|F_WR_COMPL);
req->wr_lo = htonl(V_WR_TID(ep->hwtid));
req->len = htonl(len);
req->param = htonl(V_TX_PORT(ep->l2t->smt_idx) |
@@ -879,6 +889,7 @@ static void process_mpa_reply(struct iwch_ep *ep, struct sk_buff *skb)
* the MPA header is valid.
*/
state_set(&ep->com, FPDU_MODE);
+ ep->mpa_attr.initiator = 1;
ep->mpa_attr.crc_enabled = (mpa->flags & MPA_CRC) | crc_enabled ? 1 : 0;
ep->mpa_attr.recv_marker_enabled = markers_enabled;
ep->mpa_attr.xmit_marker_enabled = mpa->flags & MPA_MARKERS ? 1 : 0;
@@ -901,8 +912,14 @@ static void process_mpa_reply(struct iwch_ep *ep, struct sk_buff *skb)
/* bind QP and TID with INIT_WR */
err = iwch_modify_qp(ep->com.qp->rhp,
ep->com.qp, mask, &attrs, 1);
- if (!err)
- goto out;
+ if (err)
+ goto err;
+
+ if (peer2peer && iwch_rqes_posted(ep->com.qp) == 0) {
+ iwch_post_zb_read(ep->com.qp);
+ }
+
+ goto out;
err:
abort_connection(ep, skb, GFP_KERNEL);
out:
@@ -995,6 +1012,7 @@ static void process_mpa_request(struct iwch_ep *ep, struct sk_buff *skb)
* If we get here we have accumulated the entire mpa
* start reply message including private data.
*/
+ ep->mpa_attr.initiator = 0;
ep->mpa_attr.crc_enabled = (mpa->flags & MPA_CRC) | crc_enabled ? 1 : 0;
ep->mpa_attr.recv_marker_enabled = markers_enabled;
ep->mpa_attr.xmit_marker_enabled = mpa->flags & MPA_MARKERS ? 1 : 0;
@@ -1065,17 +1083,33 @@ static int tx_ack(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
PDBG("%s ep %p credits %u\n", __func__, ep, credits);
- if (credits == 0)
+ if (credits == 0) {
+ PDBG(KERN_ERR "%s 0 credit ack ep %p state %u\n",
+ __func__, ep, state_read(&ep->com));
return CPL_RET_BUF_DONE;
+ }
+
BUG_ON(credits != 1);
- BUG_ON(ep->mpa_skb == NULL);
- kfree_skb(ep->mpa_skb);
- ep->mpa_skb = NULL;
dst_confirm(ep->dst);
- if (state_read(&ep->com) == MPA_REP_SENT) {
- ep->com.rpl_done = 1;
- PDBG("waking up ep %p\n", ep);
- wake_up(&ep->com.waitq);
+ if (!ep->mpa_skb) {
+ PDBG("%s rdma_init wr_ack ep %p state %u\n",
+ __func__, ep, state_read(&ep->com));
+ if (ep->mpa_attr.initiator) {
+ PDBG("%s initiator ep %p state %u\n",
+ __func__, ep, state_read(&ep->com));
+ if (peer2peer)
+ iwch_post_zb_read(ep->com.qp);
+ } else {
+ PDBG("%s responder ep %p state %u\n",
+ __func__, ep, state_read(&ep->com));
+ ep->com.rpl_done = 1;
+ wake_up(&ep->com.waitq);
+ }
+ } else {
+ PDBG("%s lsm ack ep %p state %u freeing skb\n",
+ __func__, ep, state_read(&ep->com));
+ kfree_skb(ep->mpa_skb);
+ ep->mpa_skb = NULL;
}
return CPL_RET_BUF_DONE;
}
@@ -1083,8 +1117,11 @@ static int tx_ack(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
static int abort_rpl(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
{
struct iwch_ep *ep = ctx;
+ unsigned long flags;
+ int release = 0;
PDBG("%s ep %p\n", __func__, ep);
+ BUG_ON(!ep);
/*
* We get 2 abort replies from the HW. The first one must
@@ -1095,9 +1132,22 @@ static int abort_rpl(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
return CPL_RET_BUF_DONE;
}
- close_complete_upcall(ep);
- state_set(&ep->com, DEAD);
- release_ep_resources(ep);
+ spin_lock_irqsave(&ep->com.lock, flags);
+ switch (ep->com.state) {
+ case ABORTING:
+ close_complete_upcall(ep);
+ __state_set(&ep->com, DEAD);
+ release = 1;
+ break;
+ default:
+ printk(KERN_ERR "%s ep %p state %d\n",
+ __func__, ep, ep->com.state);
+ break;
+ }
+ spin_unlock_irqrestore(&ep->com.lock, flags);
+
+ if (release)
+ release_ep_resources(ep);
return CPL_RET_BUF_DONE;
}
@@ -1470,7 +1520,8 @@ static int peer_abort(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
struct sk_buff *rpl_skb;
struct iwch_qp_attributes attrs;
int ret;
- int state;
+ int release = 0;
+ unsigned long flags;
if (is_neg_adv_abort(req->status)) {
PDBG("%s neg_adv_abort ep %p tid %d\n", __func__, ep,
@@ -1488,9 +1539,9 @@ static int peer_abort(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
return CPL_RET_BUF_DONE;
}
- state = state_read(&ep->com);
- PDBG("%s ep %p state %u\n", __func__, ep, state);
- switch (state) {
+ spin_lock_irqsave(&ep->com.lock, flags);
+ PDBG("%s ep %p state %u\n", __func__, ep, ep->com.state);
+ switch (ep->com.state) {
case CONNECTING:
break;
case MPA_REQ_WAIT:
@@ -1536,21 +1587,25 @@ static int peer_abort(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
break;
case DEAD:
PDBG("%s PEER_ABORT IN DEAD STATE!!!!\n", __func__);
+ spin_unlock_irqrestore(&ep->com.lock, flags);
return CPL_RET_BUF_DONE;
default:
BUG_ON(1);
break;
}
dst_confirm(ep->dst);
+ if (ep->com.state != ABORTING) {
+ __state_set(&ep->com, DEAD);
+ release = 1;
+ }
+ spin_unlock_irqrestore(&ep->com.lock, flags);
rpl_skb = get_skb(skb, sizeof(*rpl), GFP_KERNEL);
if (!rpl_skb) {
printk(KERN_ERR MOD "%s - cannot allocate skb!\n",
__func__);
- dst_release(ep->dst);
- l2t_release(L2DATA(ep->com.tdev), ep->l2t);
- put_ep(&ep->com);
- return CPL_RET_BUF_DONE;
+ release = 1;
+ goto out;
}
rpl_skb->priority = CPL_PRIORITY_DATA;
rpl = (struct cpl_abort_rpl *) skb_put(rpl_skb, sizeof(*rpl));
@@ -1559,10 +1614,9 @@ static int peer_abort(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
OPCODE_TID(rpl) = htonl(MK_OPCODE_TID(CPL_ABORT_RPL, ep->hwtid));
rpl->cmd = CPL_ABORT_NO_RST;
cxgb3_ofld_send(ep->com.tdev, rpl_skb);
- if (state != ABORTING) {
- state_set(&ep->com, DEAD);
+out:
+ if (release)
release_ep_resources(ep);
- }
return CPL_RET_BUF_DONE;
}
@@ -1596,8 +1650,8 @@ static int close_con_rpl(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
release = 1;
break;
case ABORTING:
- break;
case DEAD:
+ break;
default:
BUG_ON(1);
break;
@@ -1661,15 +1715,18 @@ static void ep_timeout(unsigned long arg)
struct iwch_ep *ep = (struct iwch_ep *)arg;
struct iwch_qp_attributes attrs;
unsigned long flags;
+ int abort = 1;
spin_lock_irqsave(&ep->com.lock, flags);
PDBG("%s ep %p tid %u state %d\n", __func__, ep, ep->hwtid,
ep->com.state);
switch (ep->com.state) {
case MPA_REQ_SENT:
+ __state_set(&ep->com, ABORTING);
connect_reply_upcall(ep, -ETIMEDOUT);
break;
case MPA_REQ_WAIT:
+ __state_set(&ep->com, ABORTING);
break;
case CLOSING:
case MORIBUND:
@@ -1679,13 +1736,17 @@ static void ep_timeout(unsigned long arg)
ep->com.qp, IWCH_QP_ATTR_NEXT_STATE,
&attrs, 1);
}
+ __state_set(&ep->com, ABORTING);
break;
default:
- BUG();
+ printk(KERN_ERR "%s unexpected state ep %p state %u\n",
+ __func__, ep, ep->com.state);
+ WARN_ON(1);
+ abort = 0;
}
- __state_set(&ep->com, CLOSING);
spin_unlock_irqrestore(&ep->com.lock, flags);
- abort_connection(ep, NULL, GFP_ATOMIC);
+ if (abort)
+ abort_connection(ep, NULL, GFP_ATOMIC);
put_ep(&ep->com);
}
@@ -1762,16 +1823,19 @@ int iwch_accept_cr(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
if (err)
goto err;
+ /* if needed, wait for wr_ack */
+ if (iwch_rqes_posted(qp)) {
+ wait_event(ep->com.waitq, ep->com.rpl_done);
+ err = ep->com.rpl_err;
+ if (err)
+ goto err;
+ }
+
err = send_mpa_reply(ep, conn_param->private_data,
conn_param->private_data_len);
if (err)
goto err;
- /* wait for wr_ack */
- wait_event(ep->com.waitq, ep->com.rpl_done);
- err = ep->com.rpl_err;
- if (err)
- goto err;
state_set(&ep->com, FPDU_MODE);
established_upcall(ep);
@@ -1968,40 +2032,39 @@ int iwch_ep_disconnect(struct iwch_ep *ep, int abrupt, gfp_t gfp)
PDBG("%s ep %p state %s, abrupt %d\n", __func__, ep,
states[ep->com.state], abrupt);
- if (ep->com.state == DEAD) {
- PDBG("%s already dead ep %p\n", __func__, ep);
- goto out;
- }
-
- if (abrupt) {
- if (ep->com.state != ABORTING) {
- ep->com.state = ABORTING;
- close = 1;
- }
- goto out;
- }
-
switch (ep->com.state) {
case MPA_REQ_WAIT:
case MPA_REQ_SENT:
case MPA_REQ_RCVD:
case MPA_REP_SENT:
case FPDU_MODE:
- start_ep_timer(ep);
- ep->com.state = CLOSING;
close = 1;
+ if (abrupt)
+ ep->com.state = ABORTING;
+ else {
+ ep->com.state = CLOSING;
+ start_ep_timer(ep);
+ }
break;
case CLOSING:
- ep->com.state = MORIBUND;
close = 1;
+ if (abrupt) {
+ stop_ep_timer(ep);
+ ep->com.state = ABORTING;
+ } else
+ ep->com.state = MORIBUND;
break;
case MORIBUND:
+ case ABORTING:
+ case DEAD:
+ PDBG("%s ignoring disconnect ep %p state %u\n",
+ __func__, ep, ep->com.state);
break;
default:
BUG();
break;
}
-out:
+
spin_unlock_irqrestore(&ep->com.lock, flags);
if (close) {
if (abrupt)
diff --git a/drivers/infiniband/hw/cxgb3/iwch_cm.h b/drivers/infiniband/hw/cxgb3/iwch_cm.h
index 2bb7fbdb3ff..d7c7e09f099 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_cm.h
+++ b/drivers/infiniband/hw/cxgb3/iwch_cm.h
@@ -56,6 +56,7 @@
#define put_ep(ep) { \
PDBG("put_ep (via %s:%u) ep %p refcnt %d\n", __func__, __LINE__, \
ep, atomic_read(&((ep)->kref.refcount))); \
+ WARN_ON(atomic_read(&((ep)->kref.refcount)) < 1); \
kref_put(&((ep)->kref), __free_ep); \
}
@@ -225,5 +226,6 @@ int iwch_ep_redirect(void *ctx, struct dst_entry *old, struct dst_entry *new, st
int __init iwch_cm_init(void);
void __exit iwch_cm_term(void);
+extern int peer2peer;
#endif /* _IWCH_CM_H_ */
diff --git a/drivers/infiniband/hw/cxgb3/iwch_mem.c b/drivers/infiniband/hw/cxgb3/iwch_mem.c
index 58c3d61bcd1..ec49a5cbdeb 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_mem.c
+++ b/drivers/infiniband/hw/cxgb3/iwch_mem.c
@@ -35,17 +35,26 @@
#include <rdma/ib_verbs.h>
#include "cxio_hal.h"
+#include "cxio_resource.h"
#include "iwch.h"
#include "iwch_provider.h"
-int iwch_register_mem(struct iwch_dev *rhp, struct iwch_pd *php,
- struct iwch_mr *mhp,
- int shift,
- __be64 *page_list)
+static void iwch_finish_mem_reg(struct iwch_mr *mhp, u32 stag)
{
- u32 stag;
u32 mmid;
+ mhp->attr.state = 1;
+ mhp->attr.stag = stag;
+ mmid = stag >> 8;
+ mhp->ibmr.rkey = mhp->ibmr.lkey = stag;
+ insert_handle(mhp->rhp, &mhp->rhp->mmidr, mhp, mmid);
+ PDBG("%s mmid 0x%x mhp %p\n", __func__, mmid, mhp);
+}
+
+int iwch_register_mem(struct iwch_dev *rhp, struct iwch_pd *php,
+ struct iwch_mr *mhp, int shift)
+{
+ u32 stag;
if (cxio_register_phys_mem(&rhp->rdev,
&stag, mhp->attr.pdid,
@@ -53,28 +62,21 @@ int iwch_register_mem(struct iwch_dev *rhp, struct iwch_pd *php,
mhp->attr.zbva,
mhp->attr.va_fbo,
mhp->attr.len,
- shift-12,
- page_list,
- &mhp->attr.pbl_size, &mhp->attr.pbl_addr))
+ shift - 12,
+ mhp->attr.pbl_size, mhp->attr.pbl_addr))
return -ENOMEM;
- mhp->attr.state = 1;
- mhp->attr.stag = stag;
- mmid = stag >> 8;
- mhp->ibmr.rkey = mhp->ibmr.lkey = stag;
- insert_handle(rhp, &rhp->mmidr, mhp, mmid);
- PDBG("%s mmid 0x%x mhp %p\n", __func__, mmid, mhp);
+
+ iwch_finish_mem_reg(mhp, stag);
+
return 0;
}
int iwch_reregister_mem(struct iwch_dev *rhp, struct iwch_pd *php,
struct iwch_mr *mhp,
int shift,
- __be64 *page_list,
int npages)
{
u32 stag;
- u32 mmid;
-
/* We could support this... */
if (npages > mhp->attr.pbl_size)
@@ -87,19 +89,40 @@ int iwch_reregister_mem(struct iwch_dev *rhp, struct iwch_pd *php,
mhp->attr.zbva,
mhp->attr.va_fbo,
mhp->attr.len,
- shift-12,
- page_list,
- &mhp->attr.pbl_size, &mhp->attr.pbl_addr))
+ shift - 12,
+ mhp->attr.pbl_size, mhp->attr.pbl_addr))
return -ENOMEM;
- mhp->attr.state = 1;
- mhp->attr.stag = stag;
- mmid = stag >> 8;
- mhp->ibmr.rkey = mhp->ibmr.lkey = stag;
- insert_handle(rhp, &rhp->mmidr, mhp, mmid);
- PDBG("%s mmid 0x%x mhp %p\n", __func__, mmid, mhp);
+
+ iwch_finish_mem_reg(mhp, stag);
+
+ return 0;
+}
+
+int iwch_alloc_pbl(struct iwch_mr *mhp, int npages)
+{
+ mhp->attr.pbl_addr = cxio_hal_pblpool_alloc(&mhp->rhp->rdev,
+ npages << 3);
+
+ if (!mhp->attr.pbl_addr)
+ return -ENOMEM;
+
+ mhp->attr.pbl_size = npages;
+
return 0;
}
+void iwch_free_pbl(struct iwch_mr *mhp)
+{
+ cxio_hal_pblpool_free(&mhp->rhp->rdev, mhp->attr.pbl_addr,
+ mhp->attr.pbl_size << 3);
+}
+
+int iwch_write_pbl(struct iwch_mr *mhp, __be64 *pages, int npages, int offset)
+{
+ return cxio_write_pbl(&mhp->rhp->rdev, pages,
+ mhp->attr.pbl_addr + (offset << 3), npages);
+}
+
int build_phys_page_list(struct ib_phys_buf *buffer_list,
int num_phys_buf,
u64 *iova_start,
diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.c b/drivers/infiniband/hw/cxgb3/iwch_provider.c
index ab4695c1dd5..8934178a23e 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_provider.c
+++ b/drivers/infiniband/hw/cxgb3/iwch_provider.c
@@ -442,6 +442,7 @@ static int iwch_dereg_mr(struct ib_mr *ib_mr)
mmid = mhp->attr.stag >> 8;
cxio_dereg_mem(&rhp->rdev, mhp->attr.stag, mhp->attr.pbl_size,
mhp->attr.pbl_addr);
+ iwch_free_pbl(mhp);
remove_handle(rhp, &rhp->mmidr, mmid);
if (mhp->kva)
kfree((void *) (unsigned long) mhp->kva);
@@ -475,6 +476,8 @@ static struct ib_mr *iwch_register_phys_mem(struct ib_pd *pd,
if (!mhp)
return ERR_PTR(-ENOMEM);
+ mhp->rhp = rhp;
+
/* First check that we have enough alignment */
if ((*iova_start & ~PAGE_MASK) != (buffer_list[0].addr & ~PAGE_MASK)) {
ret = -EINVAL;
@@ -492,7 +495,17 @@ static struct ib_mr *iwch_register_phys_mem(struct ib_pd *pd,
if (ret)
goto err;
- mhp->rhp = rhp;
+ ret = iwch_alloc_pbl(mhp, npages);
+ if (ret) {
+ kfree(page_list);
+ goto err_pbl;
+ }
+
+ ret = iwch_write_pbl(mhp, page_list, npages, 0);
+ kfree(page_list);
+ if (ret)
+ goto err_pbl;
+
mhp->attr.pdid = php->pdid;
mhp->attr.zbva = 0;
@@ -502,12 +515,15 @@ static struct ib_mr *iwch_register_phys_mem(struct ib_pd *pd,
mhp->attr.len = (u32) total_size;
mhp->attr.pbl_size = npages;
- ret = iwch_register_mem(rhp, php, mhp, shift, page_list);
- kfree(page_list);
- if (ret) {
- goto err;
- }
+ ret = iwch_register_mem(rhp, php, mhp, shift);
+ if (ret)
+ goto err_pbl;
+
return &mhp->ibmr;
+
+err_pbl:
+ iwch_free_pbl(mhp);
+
err:
kfree(mhp);
return ERR_PTR(ret);
@@ -560,7 +576,7 @@ static int iwch_reregister_phys_mem(struct ib_mr *mr,
return ret;
}
- ret = iwch_reregister_mem(rhp, php, &mh, shift, page_list, npages);
+ ret = iwch_reregister_mem(rhp, php, &mh, shift, npages);
kfree(page_list);
if (ret) {
return ret;
@@ -602,7 +618,9 @@ static struct ib_mr *iwch_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
if (!mhp)
return ERR_PTR(-ENOMEM);
- mhp->umem = ib_umem_get(pd->uobject->context, start, length, acc);
+ mhp->rhp = rhp;
+
+ mhp->umem = ib_umem_get(pd->uobject->context, start, length, acc, 0);
if (IS_ERR(mhp->umem)) {
err = PTR_ERR(mhp->umem);
kfree(mhp);
@@ -615,10 +633,14 @@ static struct ib_mr *iwch_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
list_for_each_entry(chunk, &mhp->umem->chunk_list, list)
n += chunk->nents;
- pages = kmalloc(n * sizeof(u64), GFP_KERNEL);
+ err = iwch_alloc_pbl(mhp, n);
+ if (err)
+ goto err;
+
+ pages = (__be64 *) __get_free_page(GFP_KERNEL);
if (!pages) {
err = -ENOMEM;
- goto err;
+ goto err_pbl;
}
i = n = 0;
@@ -630,25 +652,38 @@ static struct ib_mr *iwch_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
pages[i++] = cpu_to_be64(sg_dma_address(
&chunk->page_list[j]) +
mhp->umem->page_size * k);
+ if (i == PAGE_SIZE / sizeof *pages) {
+ err = iwch_write_pbl(mhp, pages, i, n);
+ if (err)
+ goto pbl_done;
+ n += i;
+ i = 0;
+ }
}
}
- mhp->rhp = rhp;
+ if (i)
+ err = iwch_write_pbl(mhp, pages, i, n);
+
+pbl_done:
+ free_page((unsigned long) pages);
+ if (err)
+ goto err_pbl;
+
mhp->attr.pdid = php->pdid;
mhp->attr.zbva = 0;
mhp->attr.perms = iwch_ib_to_tpt_access(acc);
mhp->attr.va_fbo = virt;
mhp->attr.page_size = shift - 12;
mhp->attr.len = (u32) length;
- mhp->attr.pbl_size = i;
- err = iwch_register_mem(rhp, php, mhp, shift, pages);
- kfree(pages);
+
+ err = iwch_register_mem(rhp, php, mhp, shift);
if (err)
- goto err;
+ goto err_pbl;
if (udata && !t3a_device(rhp)) {
uresp.pbl_addr = (mhp->attr.pbl_addr -
- rhp->rdev.rnic_info.pbl_base) >> 3;
+ rhp->rdev.rnic_info.pbl_base) >> 3;
PDBG("%s user resp pbl_addr 0x%x\n", __func__,
uresp.pbl_addr);
@@ -661,6 +696,9 @@ static struct ib_mr *iwch_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
return &mhp->ibmr;
+err_pbl:
+ iwch_free_pbl(mhp);
+
err:
ib_umem_release(mhp->umem);
kfree(mhp);
@@ -998,7 +1036,7 @@ static int iwch_query_device(struct ib_device *ibdev,
props->device_cap_flags = dev->device_cap_flags;
props->vendor_id = (u32)dev->rdev.rnic_info.pdev->vendor;
props->vendor_part_id = (u32)dev->rdev.rnic_info.pdev->device;
- props->max_mr_size = ~0ull;
+ props->max_mr_size = dev->attr.max_mr_size;
props->max_qp = dev->attr.max_qps;
props->max_qp_wr = dev->attr.max_wrs;
props->max_sge = dev->attr.max_sge_per_wr;
diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.h b/drivers/infiniband/hw/cxgb3/iwch_provider.h
index 61356f91109..836163fc542 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_provider.h
+++ b/drivers/infiniband/hw/cxgb3/iwch_provider.h
@@ -118,6 +118,7 @@ enum IWCH_QP_FLAGS {
};
struct iwch_mpa_attributes {
+ u8 initiator;
u8 recv_marker_enabled;
u8 xmit_marker_enabled; /* iWARP: enable inbound Read Resp. */
u8 crc_enabled;
@@ -322,6 +323,7 @@ enum iwch_qp_query_flags {
IWCH_QP_QUERY_TEST_USERWRITE = 0x32 /* Test special */
};
+u16 iwch_rqes_posted(struct iwch_qp *qhp);
int iwch_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
struct ib_send_wr **bad_wr);
int iwch_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
@@ -331,20 +333,21 @@ int iwch_bind_mw(struct ib_qp *qp,
struct ib_mw_bind *mw_bind);
int iwch_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc);
int iwch_post_terminate(struct iwch_qp *qhp, struct respQ_msg_t *rsp_msg);
+int iwch_post_zb_read(struct iwch_qp *qhp);
int iwch_register_device(struct iwch_dev *dev);
void iwch_unregister_device(struct iwch_dev *dev);
int iwch_quiesce_qps(struct iwch_cq *chp);
int iwch_resume_qps(struct iwch_cq *chp);
void stop_read_rep_timer(struct iwch_qp *qhp);
int iwch_register_mem(struct iwch_dev *rhp, struct iwch_pd *php,
- struct iwch_mr *mhp,
- int shift,
- __be64 *page_list);
+ struct iwch_mr *mhp, int shift);
int iwch_reregister_mem(struct iwch_dev *rhp, struct iwch_pd *php,
struct iwch_mr *mhp,
int shift,
- __be64 *page_list,
int npages);
+int iwch_alloc_pbl(struct iwch_mr *mhp, int npages);
+void iwch_free_pbl(struct iwch_mr *mhp);
+int iwch_write_pbl(struct iwch_mr *mhp, __be64 *pages, int npages, int offset);
int build_phys_page_list(struct ib_phys_buf *buffer_list,
int num_phys_buf,
u64 *iova_start,
diff --git a/drivers/infiniband/hw/cxgb3/iwch_qp.c b/drivers/infiniband/hw/cxgb3/iwch_qp.c
index 8891c3b0a3d..79dbe5beae5 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_qp.c
+++ b/drivers/infiniband/hw/cxgb3/iwch_qp.c
@@ -586,6 +586,36 @@ static inline void build_term_codes(struct respQ_msg_t *rsp_msg,
}
}
+int iwch_post_zb_read(struct iwch_qp *qhp)
+{
+ union t3_wr *wqe;
+ struct sk_buff *skb;
+ u8 flit_cnt = sizeof(struct t3_rdma_read_wr) >> 3;
+
+ PDBG("%s enter\n", __func__);
+ skb = alloc_skb(40, GFP_KERNEL);
+ if (!skb) {
+ printk(KERN_ERR "%s cannot send zb_read!!\n", __func__);
+ return -ENOMEM;
+ }
+ wqe = (union t3_wr *)skb_put(skb, sizeof(struct t3_rdma_read_wr));
+ memset(wqe, 0, sizeof(struct t3_rdma_read_wr));
+ wqe->read.rdmaop = T3_READ_REQ;
+ wqe->read.reserved[0] = 0;
+ wqe->read.reserved[1] = 0;
+ wqe->read.reserved[2] = 0;
+ wqe->read.rem_stag = cpu_to_be32(1);
+ wqe->read.rem_to = cpu_to_be64(1);
+ wqe->read.local_stag = cpu_to_be32(1);
+ wqe->read.local_len = cpu_to_be32(0);
+ wqe->read.local_to = cpu_to_be64(1);
+ wqe->send.wrh.op_seop_flags = cpu_to_be32(V_FW_RIWR_OP(T3_WR_READ));
+ wqe->send.wrh.gen_tid_len = cpu_to_be32(V_FW_RIWR_TID(qhp->ep->hwtid)|
+ V_FW_RIWR_LEN(flit_cnt));
+ skb->priority = CPL_PRIORITY_DATA;
+ return cxgb3_ofld_send(qhp->rhp->rdev.t3cdev_p, skb);
+}
+
/*
* This posts a TERMINATE with layer=RDMA, type=catastrophic.
*/
@@ -625,6 +655,7 @@ static void __flush_qp(struct iwch_qp *qhp, unsigned long *flag)
{
struct iwch_cq *rchp, *schp;
int count;
+ int flushed;
rchp = get_chp(qhp->rhp, qhp->attr.rcq);
schp = get_chp(qhp->rhp, qhp->attr.scq);
@@ -639,20 +670,22 @@ static void __flush_qp(struct iwch_qp *qhp, unsigned long *flag)
spin_lock(&qhp->lock);
cxio_flush_hw_cq(&rchp->cq);
cxio_count_rcqes(&rchp->cq, &qhp->wq, &count);
- cxio_flush_rq(&qhp->wq, &rchp->cq, count);
+ flushed = cxio_flush_rq(&qhp->wq, &rchp->cq, count);
spin_unlock(&qhp->lock);
spin_unlock_irqrestore(&rchp->lock, *flag);
- (*rchp->ibcq.comp_handler)(&rchp->ibcq, rchp->ibcq.cq_context);
+ if (flushed)
+ (*rchp->ibcq.comp_handler)(&rchp->ibcq, rchp->ibcq.cq_context);
/* locking heirarchy: cq lock first, then qp lock. */
spin_lock_irqsave(&schp->lock, *flag);
spin_lock(&qhp->lock);
cxio_flush_hw_cq(&schp->cq);
cxio_count_scqes(&schp->cq, &qhp->wq, &count);
- cxio_flush_sq(&qhp->wq, &schp->cq, count);
+ flushed = cxio_flush_sq(&qhp->wq, &schp->cq, count);
spin_unlock(&qhp->lock);
spin_unlock_irqrestore(&schp->lock, *flag);
- (*schp->ibcq.comp_handler)(&schp->ibcq, schp->ibcq.cq_context);
+ if (flushed)
+ (*schp->ibcq.comp_handler)(&schp->ibcq, schp->ibcq.cq_context);
/* deref */
if (atomic_dec_and_test(&qhp->refcnt))
@@ -671,11 +704,18 @@ static void flush_qp(struct iwch_qp *qhp, unsigned long *flag)
/*
- * Return non zero if at least one RECV was pre-posted.
+ * Return count of RECV WRs posted
*/
-static int rqes_posted(struct iwch_qp *qhp)
+u16 iwch_rqes_posted(struct iwch_qp *qhp)
{
- return fw_riwrh_opcode((struct fw_riwrh *)qhp->wq.queue) == T3_WR_RCV;
+ union t3_wr *wqe = qhp->wq.queue;
+ u16 count = 0;
+ while ((count+1) != 0 && fw_riwrh_opcode((struct fw_riwrh *)wqe) == T3_WR_RCV) {
+ count++;
+ wqe++;
+ }
+ PDBG("%s qhp %p count %u\n", __func__, qhp, count);
+ return count;
}
static int rdma_init(struct iwch_dev *rhp, struct iwch_qp *qhp,
@@ -716,8 +756,17 @@ static int rdma_init(struct iwch_dev *rhp, struct iwch_qp *qhp,
init_attr.ird = qhp->attr.max_ird;
init_attr.qp_dma_addr = qhp->wq.dma_addr;
init_attr.qp_dma_size = (1UL << qhp->wq.size_log2);
- init_attr.flags = rqes_posted(qhp) ? RECVS_POSTED : 0;
+ init_attr.rqe_count = iwch_rqes_posted(qhp);
+ init_attr.flags = qhp->attr.mpa_attr.initiator ? MPA_INITIATOR : 0;
init_attr.flags |= capable(CAP_NET_BIND_SERVICE) ? PRIV_QP : 0;
+ if (peer2peer) {
+ init_attr.rtr_type = RTR_READ;
+ if (init_attr.ord == 0 && qhp->attr.mpa_attr.initiator)
+ init_attr.ord = 1;
+ if (init_attr.ird == 0 && !qhp->attr.mpa_attr.initiator)
+ init_attr.ird = 1;
+ } else
+ init_attr.rtr_type = 0;
init_attr.irs = qhp->ep->rcv_seq;
PDBG("%s init_attr.rq_addr 0x%x init_attr.rq_size = %d "
"flags 0x%x qpcaps 0x%x\n", __func__,
@@ -832,8 +881,8 @@ int iwch_modify_qp(struct iwch_dev *rhp, struct iwch_qp *qhp,
abort=0;
disconnect = 1;
ep = qhp->ep;
+ get_ep(&ep->com);
}
- flush_qp(qhp, &flag);
break;
case IWCH_QP_STATE_TERMINATE:
qhp->attr.state = IWCH_QP_STATE_TERMINATE;
@@ -848,6 +897,7 @@ int iwch_modify_qp(struct iwch_dev *rhp, struct iwch_qp *qhp,
abort=1;
disconnect = 1;
ep = qhp->ep;
+ get_ep(&ep->com);
}
goto err;
break;
@@ -863,6 +913,7 @@ int iwch_modify_qp(struct iwch_dev *rhp, struct iwch_qp *qhp,
}
switch (attrs->next_state) {
case IWCH_QP_STATE_IDLE:
+ flush_qp(qhp, &flag);
qhp->attr.state = IWCH_QP_STATE_IDLE;
qhp->attr.llp_stream_handle = NULL;
put_ep(&qhp->ep->com);
@@ -929,8 +980,10 @@ out:
* on the EP. This can be a normal close (RTS->CLOSING) or
* an abnormal close (RTS/CLOSING->ERROR).
*/
- if (disconnect)
+ if (disconnect) {
iwch_ep_disconnect(ep, abort, GFP_KERNEL);
+ put_ep(&ep->com);
+ }
/*
* If free is 1, then we've disassociated the EP from the QP
diff --git a/drivers/infiniband/hw/ehca/ehca_classes.h b/drivers/infiniband/hw/ehca/ehca_classes.h
index 3d6d9461c31..1e9e99a1393 100644
--- a/drivers/infiniband/hw/ehca/ehca_classes.h
+++ b/drivers/infiniband/hw/ehca/ehca_classes.h
@@ -66,6 +66,7 @@ struct ehca_av;
#include "ehca_irq.h"
#define EHCA_EQE_CACHE_SIZE 20
+#define EHCA_MAX_NUM_QUEUES 0xffff
struct ehca_eqe_cache_entry {
struct ehca_eqe *eqe;
@@ -127,6 +128,8 @@ struct ehca_shca {
/* MR pgsize: bit 0-3 means 4K, 64K, 1M, 16M respectively */
u32 hca_cap_mr_pgsize;
int max_mtu;
+ atomic_t num_cqs;
+ atomic_t num_qps;
};
struct ehca_pd {
@@ -189,6 +192,8 @@ struct ehca_qp {
int mtu_shift;
u32 message_count;
u32 packet_count;
+ atomic_t nr_events; /* events seen */
+ wait_queue_head_t wait_completion;
};
#define IS_SRQ(qp) (qp->ext_type == EQPT_SRQ)
@@ -344,6 +349,8 @@ extern int ehca_use_hp_mr;
extern int ehca_scaling_code;
extern int ehca_lock_hcalls;
extern int ehca_nr_ports;
+extern int ehca_max_cq;
+extern int ehca_max_qp;
struct ipzu_queue_resp {
u32 qe_size; /* queue entry size */
diff --git a/drivers/infiniband/hw/ehca/ehca_cq.c b/drivers/infiniband/hw/ehca/ehca_cq.c
index ec0cfcf3073..5540b276a33 100644
--- a/drivers/infiniband/hw/ehca/ehca_cq.c
+++ b/drivers/infiniband/hw/ehca/ehca_cq.c
@@ -132,10 +132,19 @@ struct ib_cq *ehca_create_cq(struct ib_device *device, int cqe, int comp_vector,
if (cqe >= 0xFFFFFFFF - 64 - additional_cqe)
return ERR_PTR(-EINVAL);
+ if (!atomic_add_unless(&shca->num_cqs, 1, ehca_max_cq)) {
+ ehca_err(device, "Unable to create CQ, max number of %i "
+ "CQs reached.", ehca_max_cq);
+ ehca_err(device, "To increase the maximum number of CQs "
+ "use the number_of_cqs module parameter.\n");
+ return ERR_PTR(-ENOSPC);
+ }
+
my_cq = kmem_cache_zalloc(cq_cache, GFP_KERNEL);
if (!my_cq) {
ehca_err(device, "Out of memory for ehca_cq struct device=%p",
device);
+ atomic_dec(&shca->num_cqs);
return ERR_PTR(-ENOMEM);
}
@@ -305,6 +314,7 @@ create_cq_exit2:
create_cq_exit1:
kmem_cache_free(cq_cache, my_cq);
+ atomic_dec(&shca->num_cqs);
return cq;
}
@@ -359,6 +369,7 @@ int ehca_destroy_cq(struct ib_cq *cq)
ipz_queue_dtor(NULL, &my_cq->ipz_queue);
kmem_cache_free(cq_cache, my_cq);
+ atomic_dec(&shca->num_cqs);
return 0;
}
diff --git a/drivers/infiniband/hw/ehca/ehca_eq.c b/drivers/infiniband/hw/ehca/ehca_eq.c
index b4ac617a70e..49660dfa186 100644
--- a/drivers/infiniband/hw/ehca/ehca_eq.c
+++ b/drivers/infiniband/hw/ehca/ehca_eq.c
@@ -54,7 +54,8 @@ int ehca_create_eq(struct ehca_shca *shca,
struct ehca_eq *eq,
const enum ehca_eq_type type, const u32 length)
{
- u64 ret;
+ int ret;
+ u64 h_ret;
u32 nr_pages;
u32 i;
void *vpage;
@@ -73,15 +74,15 @@ int ehca_create_eq(struct ehca_shca *shca,
return -EINVAL;
}
- ret = hipz_h_alloc_resource_eq(shca->ipz_hca_handle,
- &eq->pf,
- type,
- length,
- &eq->ipz_eq_handle,
- &eq->length,
- &nr_pages, &eq->ist);
+ h_ret = hipz_h_alloc_resource_eq(shca->ipz_hca_handle,
+ &eq->pf,
+ type,
+ length,
+ &eq->ipz_eq_handle,
+ &eq->length,
+ &nr_pages, &eq->ist);
- if (ret != H_SUCCESS) {
+ if (h_ret != H_SUCCESS) {
ehca_err(ib_dev, "Can't allocate EQ/NEQ. eq=%p", eq);
return -EINVAL;
}
@@ -97,24 +98,22 @@ int ehca_create_eq(struct ehca_shca *shca,
u64 rpage;
vpage = ipz_qpageit_get_inc(&eq->ipz_queue);
- if (!vpage) {
- ret = H_RESOURCE;
+ if (!vpage)
goto create_eq_exit2;
- }
rpage = virt_to_abs(vpage);
- ret = hipz_h_register_rpage_eq(shca->ipz_hca_handle,
- eq->ipz_eq_handle,
- &eq->pf,
- 0, 0, rpage, 1);
+ h_ret = hipz_h_register_rpage_eq(shca->ipz_hca_handle,
+ eq->ipz_eq_handle,
+ &eq->pf,
+ 0, 0, rpage, 1);
if (i == (nr_pages - 1)) {
/* last page */
vpage = ipz_qpageit_get_inc(&eq->ipz_queue);
- if (ret != H_SUCCESS || vpage)
+ if (h_ret != H_SUCCESS || vpage)
goto create_eq_exit2;
} else {
- if (ret != H_PAGE_REGISTERED || !vpage)
+ if (h_ret != H_PAGE_REGISTERED || !vpage)
goto create_eq_exit2;
}
}
diff --git a/drivers/infiniband/hw/ehca/ehca_hca.c b/drivers/infiniband/hw/ehca/ehca_hca.c
index 2515cbde7e6..bc3b37d2070 100644
--- a/drivers/infiniband/hw/ehca/ehca_hca.c
+++ b/drivers/infiniband/hw/ehca/ehca_hca.c
@@ -101,7 +101,6 @@ int ehca_query_device(struct ib_device *ibdev, struct ib_device_attr *props)
props->max_ee = limit_uint(rblock->max_rd_ee_context);
props->max_rdd = limit_uint(rblock->max_rd_domain);
props->max_fmr = limit_uint(rblock->max_mr);
- props->local_ca_ack_delay = limit_uint(rblock->local_ca_ack_delay);
props->max_qp_rd_atom = limit_uint(rblock->max_rr_qp);
props->max_ee_rd_atom = limit_uint(rblock->max_rr_ee_context);
props->max_res_rd_atom = limit_uint(rblock->max_rr_hca);
@@ -115,7 +114,7 @@ int ehca_query_device(struct ib_device *ibdev, struct ib_device_attr *props)
}
props->max_pkeys = 16;
- props->local_ca_ack_delay = limit_uint(rblock->local_ca_ack_delay);
+ props->local_ca_ack_delay = min_t(u8, rblock->local_ca_ack_delay, 255);
props->max_raw_ipv6_qp = limit_uint(rblock->max_raw_ipv6_qp);
props->max_raw_ethy_qp = limit_uint(rblock->max_raw_ethy_qp);
props->max_mcast_grp = limit_uint(rblock->max_mcast_grp);
@@ -136,7 +135,7 @@ query_device1:
return ret;
}
-static int map_mtu(struct ehca_shca *shca, u32 fw_mtu)
+static enum ib_mtu map_mtu(struct ehca_shca *shca, u32 fw_mtu)
{
switch (fw_mtu) {
case 0x1:
@@ -156,7 +155,7 @@ static int map_mtu(struct ehca_shca *shca, u32 fw_mtu)
}
}
-static int map_number_of_vls(struct ehca_shca *shca, u32 vl_cap)
+static u8 map_number_of_vls(struct ehca_shca *shca, u32 vl_cap)
{
switch (vl_cap) {
case 0x1:
diff --git a/drivers/infiniband/hw/ehca/ehca_irq.c b/drivers/infiniband/hw/ehca/ehca_irq.c
index ca5eb0cb628..ce1ab0571be 100644
--- a/drivers/infiniband/hw/ehca/ehca_irq.c
+++ b/drivers/infiniband/hw/ehca/ehca_irq.c
@@ -204,6 +204,8 @@ static void qp_event_callback(struct ehca_shca *shca, u64 eqe,
read_lock(&ehca_qp_idr_lock);
qp = idr_find(&ehca_qp_idr, token);
+ if (qp)
+ atomic_inc(&qp->nr_events);
read_unlock(&ehca_qp_idr_lock);
if (!qp)
@@ -223,6 +225,8 @@ static void qp_event_callback(struct ehca_shca *shca, u64 eqe,
if (fatal && qp->ext_type == EQPT_SRQBASE)
dispatch_qp_event(shca, qp, IB_EVENT_QP_LAST_WQE_REACHED);
+ if (atomic_dec_and_test(&qp->nr_events))
+ wake_up(&qp->wait_completion);
return;
}
diff --git a/drivers/infiniband/hw/ehca/ehca_main.c b/drivers/infiniband/hw/ehca/ehca_main.c
index 65048976198..482103eb6ea 100644
--- a/drivers/infiniband/hw/ehca/ehca_main.c
+++ b/drivers/infiniband/hw/ehca/ehca_main.c
@@ -68,6 +68,8 @@ int ehca_port_act_time = 30;
int ehca_static_rate = -1;
int ehca_scaling_code = 0;
int ehca_lock_hcalls = -1;
+int ehca_max_cq = -1;
+int ehca_max_qp = -1;
module_param_named(open_aqp1, ehca_open_aqp1, bool, S_IRUGO);
module_param_named(debug_level, ehca_debug_level, int, S_IRUGO);
@@ -79,6 +81,8 @@ module_param_named(poll_all_eqs, ehca_poll_all_eqs, bool, S_IRUGO);
module_param_named(static_rate, ehca_static_rate, int, S_IRUGO);
module_param_named(scaling_code, ehca_scaling_code, bool, S_IRUGO);
module_param_named(lock_hcalls, ehca_lock_hcalls, bool, S_IRUGO);
+module_param_named(number_of_cqs, ehca_max_cq, int, S_IRUGO);
+module_param_named(number_of_qps, ehca_max_qp, int, S_IRUGO);
MODULE_PARM_DESC(open_aqp1,
"Open AQP1 on startup (default: no)");
@@ -104,6 +108,12 @@ MODULE_PARM_DESC(scaling_code,
MODULE_PARM_DESC(lock_hcalls,
"Serialize all hCalls made by the driver "
"(default: autodetect)");
+MODULE_PARM_DESC(number_of_cqs,
+ "Max number of CQs which can be allocated "
+ "(default: autodetect)");
+MODULE_PARM_DESC(number_of_qps,
+ "Max number of QPs which can be allocated "
+ "(default: autodetect)");
DEFINE_RWLOCK(ehca_qp_idr_lock);
DEFINE_RWLOCK(ehca_cq_idr_lock);
@@ -355,6 +365,25 @@ static int ehca_sense_attributes(struct ehca_shca *shca)
if (rblock->memory_page_size_supported & pgsize_map[i])
shca->hca_cap_mr_pgsize |= pgsize_map[i + 1];
+ /* Set maximum number of CQs and QPs to calculate EQ size */
+ if (ehca_max_qp == -1)
+ ehca_max_qp = min_t(int, rblock->max_qp, EHCA_MAX_NUM_QUEUES);
+ else if (ehca_max_qp < 1 || ehca_max_qp > rblock->max_qp) {
+ ehca_gen_err("Requested number of QPs is out of range (1 - %i) "
+ "specified by HW", rblock->max_qp);
+ ret = -EINVAL;
+ goto sense_attributes1;
+ }
+
+ if (ehca_max_cq == -1)
+ ehca_max_cq = min_t(int, rblock->max_cq, EHCA_MAX_NUM_QUEUES);
+ else if (ehca_max_cq < 1 || ehca_max_cq > rblock->max_cq) {
+ ehca_gen_err("Requested number of CQs is out of range (1 - %i) "
+ "specified by HW", rblock->max_cq);
+ ret = -EINVAL;
+ goto sense_attributes1;
+ }
+
/* query max MTU from first port -- it's the same for all ports */
port = (struct hipz_query_port *)rblock;
h_ret = hipz_h_query_port(shca->ipz_hca_handle, 1, port);
@@ -684,7 +713,7 @@ static int __devinit ehca_probe(struct of_device *dev,
struct ehca_shca *shca;
const u64 *handle;
struct ib_pd *ibpd;
- int ret, i;
+ int ret, i, eq_size;
handle = of_get_property(dev->node, "ibm,hca-handle", NULL);
if (!handle) {
@@ -705,6 +734,8 @@ static int __devinit ehca_probe(struct of_device *dev,
return -ENOMEM;
}
mutex_init(&shca->modify_mutex);
+ atomic_set(&shca->num_cqs, 0);
+ atomic_set(&shca->num_qps, 0);
for (i = 0; i < ARRAY_SIZE(shca->sport); i++)
spin_lock_init(&shca->sport[i].mod_sqp_lock);
@@ -724,8 +755,9 @@ static int __devinit ehca_probe(struct of_device *dev,
goto probe1;
}
+ eq_size = 2 * ehca_max_cq + 4 * ehca_max_qp;
/* create event queues */
- ret = ehca_create_eq(shca, &shca->eq, EHCA_EQ, 2048);
+ ret = ehca_create_eq(shca, &shca->eq, EHCA_EQ, eq_size);
if (ret) {
ehca_err(&shca->ib_device, "Cannot create EQ.");
goto probe1;
diff --git a/drivers/infiniband/hw/ehca/ehca_mrmw.c b/drivers/infiniband/hw/ehca/ehca_mrmw.c
index 46ae4eb2c4e..f974367cad4 100644
--- a/drivers/infiniband/hw/ehca/ehca_mrmw.c
+++ b/drivers/infiniband/hw/ehca/ehca_mrmw.c
@@ -323,7 +323,7 @@ struct ib_mr *ehca_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
}
e_mr->umem = ib_umem_get(pd->uobject->context, start, length,
- mr_access_flags);
+ mr_access_flags, 0);
if (IS_ERR(e_mr->umem)) {
ib_mr = (void *)e_mr->umem;
goto reg_user_mr_exit1;
diff --git a/drivers/infiniband/hw/ehca/ehca_qp.c b/drivers/infiniband/hw/ehca/ehca_qp.c
index 57bef1152cc..3f59587338e 100644
--- a/drivers/infiniband/hw/ehca/ehca_qp.c
+++ b/drivers/infiniband/hw/ehca/ehca_qp.c
@@ -421,8 +421,18 @@ static struct ehca_qp *internal_create_qp(
u32 swqe_size = 0, rwqe_size = 0, ib_qp_num;
unsigned long flags;
- if (init_attr->create_flags)
+ if (!atomic_add_unless(&shca->num_qps, 1, ehca_max_qp)) {
+ ehca_err(pd->device, "Unable to create QP, max number of %i "
+ "QPs reached.", ehca_max_qp);
+ ehca_err(pd->device, "To increase the maximum number of QPs "
+ "use the number_of_qps module parameter.\n");
+ return ERR_PTR(-ENOSPC);
+ }
+
+ if (init_attr->create_flags) {
+ atomic_dec(&shca->num_qps);
return ERR_PTR(-EINVAL);
+ }
memset(&parms, 0, sizeof(parms));
qp_type = init_attr->qp_type;
@@ -431,6 +441,7 @@ static struct ehca_qp *internal_create_qp(
init_attr->sq_sig_type != IB_SIGNAL_ALL_WR) {
ehca_err(pd->device, "init_attr->sg_sig_type=%x not allowed",
init_attr->sq_sig_type);
+ atomic_dec(&shca->num_qps);
return ERR_PTR(-EINVAL);
}
@@ -455,6 +466,7 @@ static struct ehca_qp *internal_create_qp(
if (is_llqp && has_srq) {
ehca_err(pd->device, "LLQPs can't have an SRQ");
+ atomic_dec(&shca->num_qps);
return ERR_PTR(-EINVAL);
}
@@ -466,6 +478,7 @@ static struct ehca_qp *internal_create_qp(
ehca_err(pd->device, "no more than three SGEs "
"supported for SRQ pd=%p max_sge=%x",
pd, init_attr->cap.max_recv_sge);
+ atomic_dec(&shca->num_qps);
return ERR_PTR(-EINVAL);
}
}
@@ -477,6 +490,7 @@ static struct ehca_qp *internal_create_qp(
qp_type != IB_QPT_SMI &&
qp_type != IB_QPT_GSI) {
ehca_err(pd->device, "wrong QP Type=%x", qp_type);
+ atomic_dec(&shca->num_qps);
return ERR_PTR(-EINVAL);
}
@@ -490,6 +504,7 @@ static struct ehca_qp *internal_create_qp(
"or max_rq_wr=%x for RC LLQP",
init_attr->cap.max_send_wr,
init_attr->cap.max_recv_wr);
+ atomic_dec(&shca->num_qps);
return ERR_PTR(-EINVAL);
}
break;
@@ -497,6 +512,7 @@ static struct ehca_qp *internal_create_qp(
if (!EHCA_BMASK_GET(HCA_CAP_UD_LL_QP, shca->hca_cap)) {
ehca_err(pd->device, "UD LLQP not supported "
"by this adapter");
+ atomic_dec(&shca->num_qps);
return ERR_PTR(-ENOSYS);
}
if (!(init_attr->cap.max_send_sge <= 5
@@ -508,20 +524,22 @@ static struct ehca_qp *internal_create_qp(
"or max_recv_sge=%x for UD LLQP",
init_attr->cap.max_send_sge,
init_attr->cap.max_recv_sge);
+ atomic_dec(&shca->num_qps);
return ERR_PTR(-EINVAL);
} else if (init_attr->cap.max_send_wr > 255) {
ehca_err(pd->device,
"Invalid Number of "
"max_send_wr=%x for UD QP_TYPE=%x",
init_attr->cap.max_send_wr, qp_type);
+ atomic_dec(&shca->num_qps);
return ERR_PTR(-EINVAL);
}
break;
default:
ehca_err(pd->device, "unsupported LL QP Type=%x",
qp_type);
+ atomic_dec(&shca->num_qps);
return ERR_PTR(-EINVAL);
- break;
}
} else {
int max_sge = (qp_type == IB_QPT_UD || qp_type == IB_QPT_SMI
@@ -533,6 +551,7 @@ static struct ehca_qp *internal_create_qp(
"send_sge=%x recv_sge=%x max_sge=%x",
init_attr->cap.max_send_sge,
init_attr->cap.max_recv_sge, max_sge);
+ atomic_dec(&shca->num_qps);
return ERR_PTR(-EINVAL);
}
}
@@ -543,9 +562,12 @@ static struct ehca_qp *internal_create_qp(
my_qp = kmem_cache_zalloc(qp_cache, GFP_KERNEL);
if (!my_qp) {
ehca_err(pd->device, "pd=%p not enough memory to alloc qp", pd);
+ atomic_dec(&shca->num_qps);
return ERR_PTR(-ENOMEM);
}
+ atomic_set(&my_qp->nr_events, 0);
+ init_waitqueue_head(&my_qp->wait_completion);
spin_lock_init(&my_qp->spinlock_s);
spin_lock_init(&my_qp->spinlock_r);
my_qp->qp_type = qp_type;
@@ -823,6 +845,7 @@ create_qp_exit1:
create_qp_exit0:
kmem_cache_free(qp_cache, my_qp);
+ atomic_dec(&shca->num_qps);
return ERR_PTR(ret);
}
@@ -1913,6 +1936,9 @@ static int internal_destroy_qp(struct ib_device *dev, struct ehca_qp *my_qp,
idr_remove(&ehca_qp_idr, my_qp->token);
write_unlock_irqrestore(&ehca_qp_idr_lock, flags);
+ /* now wait until all pending events have completed */
+ wait_event(my_qp->wait_completion, !atomic_read(&my_qp->nr_events));
+
h_ret = hipz_h_destroy_qp(shca->ipz_hca_handle, my_qp);
if (h_ret != H_SUCCESS) {
ehca_err(dev, "hipz_h_destroy_qp() failed h_ret=%li "
@@ -1948,6 +1974,7 @@ static int internal_destroy_qp(struct ib_device *dev, struct ehca_qp *my_qp,
if (HAS_SQ(my_qp))
ipz_queue_dtor(my_pd, &my_qp->ipz_squeue);
kmem_cache_free(qp_cache, my_qp);
+ atomic_dec(&shca->num_qps);
return 0;
}
diff --git a/drivers/infiniband/hw/ipath/ipath_driver.c b/drivers/infiniband/hw/ipath/ipath_driver.c
index acf30c06a0c..daad09a4591 100644
--- a/drivers/infiniband/hw/ipath/ipath_driver.c
+++ b/drivers/infiniband/hw/ipath/ipath_driver.c
@@ -1197,7 +1197,7 @@ void ipath_kreceive(struct ipath_portdata *pd)
}
reloop:
- for (last = 0, i = 1; !last; i++) {
+ for (last = 0, i = 1; !last; i += !last) {
hdr = dd->ipath_f_get_msgheader(dd, rhf_addr);
eflags = ipath_hdrget_err_flags(rhf_addr);
etype = ipath_hdrget_rcv_type(rhf_addr);
@@ -1428,6 +1428,40 @@ static void ipath_update_pio_bufs(struct ipath_devdata *dd)
spin_unlock_irqrestore(&ipath_pioavail_lock, flags);
}
+/*
+ * used to force update of pioavailshadow if we can't get a pio buffer.
+ * Needed primarily due to exitting freeze mode after recovering
+ * from errors. Done lazily, because it's safer (known to not
+ * be writing pio buffers).
+ */
+static void ipath_reset_availshadow(struct ipath_devdata *dd)
+{
+ int i, im;
+ unsigned long flags;
+
+ spin_lock_irqsave(&ipath_pioavail_lock, flags);
+ for (i = 0; i < dd->ipath_pioavregs; i++) {
+ u64 val, oldval;
+ /* deal with 6110 chip bug on high register #s */
+ im = (i > 3 && (dd->ipath_flags & IPATH_SWAP_PIOBUFS)) ?
+ i ^ 1 : i;
+ val = le64_to_cpu(dd->ipath_pioavailregs_dma[im]);
+ /*
+ * busy out the buffers not in the kernel avail list,
+ * without changing the generation bits.
+ */
+ oldval = dd->ipath_pioavailshadow[i];
+ dd->ipath_pioavailshadow[i] = val |
+ ((~dd->ipath_pioavailkernel[i] <<
+ INFINIPATH_SENDPIOAVAIL_BUSY_SHIFT) &
+ 0xaaaaaaaaaaaaaaaaULL); /* All BUSY bits in qword */
+ if (oldval != dd->ipath_pioavailshadow[i])
+ ipath_dbg("shadow[%d] was %Lx, now %lx\n",
+ i, oldval, dd->ipath_pioavailshadow[i]);
+ }
+ spin_unlock_irqrestore(&ipath_pioavail_lock, flags);
+}
+
/**
* ipath_setrcvhdrsize - set the receive header size
* @dd: the infinipath device
@@ -1482,9 +1516,12 @@ static noinline void no_pio_bufs(struct ipath_devdata *dd)
*/
ipath_stats.sps_nopiobufs++;
if (!(++dd->ipath_consec_nopiobuf % 100000)) {
- ipath_dbg("%u pio sends with no bufavail; dmacopy: "
- "%llx %llx %llx %llx; shadow: %lx %lx %lx %lx\n",
+ ipath_force_pio_avail_update(dd); /* at start */
+ ipath_dbg("%u tries no piobufavail ts%lx; dmacopy: "
+ "%llx %llx %llx %llx\n"
+ "ipath shadow: %lx %lx %lx %lx\n",
dd->ipath_consec_nopiobuf,
+ (unsigned long)get_cycles(),
(unsigned long long) le64_to_cpu(dma[0]),
(unsigned long long) le64_to_cpu(dma[1]),
(unsigned long long) le64_to_cpu(dma[2]),
@@ -1496,14 +1533,17 @@ static noinline void no_pio_bufs(struct ipath_devdata *dd)
*/
if ((dd->ipath_piobcnt2k + dd->ipath_piobcnt4k) >
(sizeof(shadow[0]) * 4 * 4))
- ipath_dbg("2nd group: dmacopy: %llx %llx "
- "%llx %llx; shadow: %lx %lx %lx %lx\n",
+ ipath_dbg("2nd group: dmacopy: "
+ "%llx %llx %llx %llx\n"
+ "ipath shadow: %lx %lx %lx %lx\n",
(unsigned long long)le64_to_cpu(dma[4]),
(unsigned long long)le64_to_cpu(dma[5]),
(unsigned long long)le64_to_cpu(dma[6]),
(unsigned long long)le64_to_cpu(dma[7]),
- shadow[4], shadow[5], shadow[6],
- shadow[7]);
+ shadow[4], shadow[5], shadow[6], shadow[7]);
+
+ /* at end, so update likely happened */
+ ipath_reset_availshadow(dd);
}
}
@@ -1652,19 +1692,46 @@ void ipath_chg_pioavailkernel(struct ipath_devdata *dd, unsigned start,
unsigned len, int avail)
{
unsigned long flags;
- unsigned end;
+ unsigned end, cnt = 0, next;
/* There are two bits per send buffer (busy and generation) */
start *= 2;
- len *= 2;
- end = start + len;
+ end = start + len * 2;
- /* Set or clear the generation bits. */
spin_lock_irqsave(&ipath_pioavail_lock, flags);
+ /* Set or clear the busy bit in the shadow. */
while (start < end) {
if (avail) {
- __clear_bit(start + INFINIPATH_SENDPIOAVAIL_BUSY_SHIFT,
- dd->ipath_pioavailshadow);
+ unsigned long dma;
+ int i, im;
+ /*
+ * the BUSY bit will never be set, because we disarm
+ * the user buffers before we hand them back to the
+ * kernel. We do have to make sure the generation
+ * bit is set correctly in shadow, since it could
+ * have changed many times while allocated to user.
+ * We can't use the bitmap functions on the full
+ * dma array because it is always little-endian, so
+ * we have to flip to host-order first.
+ * BITS_PER_LONG is slightly wrong, since it's
+ * always 64 bits per register in chip...
+ * We only work on 64 bit kernels, so that's OK.
+ */
+ /* deal with 6110 chip bug on high register #s */
+ i = start / BITS_PER_LONG;
+ im = (i > 3 && (dd->ipath_flags & IPATH_SWAP_PIOBUFS)) ?
+ i ^ 1 : i;
+ __clear_bit(INFINIPATH_SENDPIOAVAIL_BUSY_SHIFT
+ + start, dd->ipath_pioavailshadow);
+ dma = (unsigned long) le64_to_cpu(
+ dd->ipath_pioavailregs_dma[im]);
+ if (test_bit((INFINIPATH_SENDPIOAVAIL_CHECK_SHIFT
+ + start) % BITS_PER_LONG, &dma))
+ __set_bit(INFINIPATH_SENDPIOAVAIL_CHECK_SHIFT
+ + start, dd->ipath_pioavailshadow);
+ else
+ __clear_bit(INFINIPATH_SENDPIOAVAIL_CHECK_SHIFT
+ + start, dd->ipath_pioavailshadow);
__set_bit(start, dd->ipath_pioavailkernel);
} else {
__set_bit(start + INFINIPATH_SENDPIOAVAIL_BUSY_SHIFT,
@@ -1673,7 +1740,44 @@ void ipath_chg_pioavailkernel(struct ipath_devdata *dd, unsigned start,
}
start += 2;
}
+
+ if (dd->ipath_pioupd_thresh) {
+ end = 2 * (dd->ipath_piobcnt2k + dd->ipath_piobcnt4k);
+ next = find_first_bit(dd->ipath_pioavailkernel, end);
+ while (next < end) {
+ cnt++;
+ next = find_next_bit(dd->ipath_pioavailkernel, end,
+ next + 1);
+ }
+ }
spin_unlock_irqrestore(&ipath_pioavail_lock, flags);
+
+ /*
+ * When moving buffers from kernel to user, if number assigned to
+ * the user is less than the pio update threshold, and threshold
+ * is supported (cnt was computed > 0), drop the update threshold
+ * so we update at least once per allocated number of buffers.
+ * In any case, if the kernel buffers are less than the threshold,
+ * drop the threshold. We don't bother increasing it, having once
+ * decreased it, since it would typically just cycle back and forth.
+ * If we don't decrease below buffers in use, we can wait a long
+ * time for an update, until some other context uses PIO buffers.
+ */
+ if (!avail && len < cnt)
+ cnt = len;
+ if (cnt < dd->ipath_pioupd_thresh) {
+ dd->ipath_pioupd_thresh = cnt;
+ ipath_dbg("Decreased pio update threshold to %u\n",
+ dd->ipath_pioupd_thresh);
+ spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags);
+ dd->ipath_sendctrl &= ~(INFINIPATH_S_UPDTHRESH_MASK
+ << INFINIPATH_S_UPDTHRESH_SHIFT);
+ dd->ipath_sendctrl |= dd->ipath_pioupd_thresh
+ << INFINIPATH_S_UPDTHRESH_SHIFT;
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
+ dd->ipath_sendctrl);
+ spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags);
+ }
}
/**
@@ -1790,12 +1894,12 @@ void ipath_cancel_sends(struct ipath_devdata *dd, int restore_sendctrl)
*/
if (dd->ipath_flags & IPATH_HAS_SEND_DMA) {
int skip_cancel;
- u64 *statp = &dd->ipath_sdma_status;
+ unsigned long *statp = &dd->ipath_sdma_status;
spin_lock_irqsave(&dd->ipath_sdma_lock, flags);
skip_cancel =
- !test_bit(IPATH_SDMA_DISABLED, statp) &&
- test_and_set_bit(IPATH_SDMA_ABORTING, statp);
+ test_and_set_bit(IPATH_SDMA_ABORTING, statp)
+ && !test_bit(IPATH_SDMA_DISABLED, statp);
spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags);
if (skip_cancel)
goto bail;
@@ -1826,6 +1930,9 @@ void ipath_cancel_sends(struct ipath_devdata *dd, int restore_sendctrl)
ipath_disarm_piobufs(dd, 0,
dd->ipath_piobcnt2k + dd->ipath_piobcnt4k);
+ if (dd->ipath_flags & IPATH_HAS_SEND_DMA)
+ set_bit(IPATH_SDMA_DISARMED, &dd->ipath_sdma_status);
+
if (restore_sendctrl) {
/* else done by caller later if needed */
spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags);
@@ -1845,7 +1952,6 @@ void ipath_cancel_sends(struct ipath_devdata *dd, int restore_sendctrl)
/* only wait so long for intr */
dd->ipath_sdma_abort_intr_timeout = jiffies + HZ;
dd->ipath_sdma_reset_wait = 200;
- __set_bit(IPATH_SDMA_DISARMED, &dd->ipath_sdma_status);
if (!test_bit(IPATH_SDMA_SHUTDOWN, &dd->ipath_sdma_status))
tasklet_hi_schedule(&dd->ipath_sdma_abort_task);
spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags);
@@ -2510,7 +2616,7 @@ int ipath_reset_device(int unit)
ipath_dbg("unit %u port %d is in use "
"(PID %u cmd %s), can't reset\n",
unit, i,
- dd->ipath_pd[i]->port_pid,
+ pid_nr(dd->ipath_pd[i]->port_pid),
dd->ipath_pd[i]->port_comm);
ret = -EBUSY;
goto bail;
@@ -2548,19 +2654,21 @@ bail:
static int ipath_signal_procs(struct ipath_devdata *dd, int sig)
{
int i, sub, any = 0;
- pid_t pid;
+ struct pid *pid;
if (!dd->ipath_pd)
return 0;
for (i = 1; i < dd->ipath_cfgports; i++) {
- if (!dd->ipath_pd[i] || !dd->ipath_pd[i]->port_cnt ||
- !dd->ipath_pd[i]->port_pid)
+ if (!dd->ipath_pd[i] || !dd->ipath_pd[i]->port_cnt)
continue;
pid = dd->ipath_pd[i]->port_pid;
+ if (!pid)
+ continue;
+
dev_info(&dd->pcidev->dev, "context %d in use "
"(PID %u), sending signal %d\n",
- i, pid, sig);
- kill_proc(pid, sig, 1);
+ i, pid_nr(pid), sig);
+ kill_pid(pid, sig, 1);
any++;
for (sub = 0; sub < INFINIPATH_MAX_SUBPORT; sub++) {
pid = dd->ipath_pd[i]->port_subpid[sub];
@@ -2568,8 +2676,8 @@ static int ipath_signal_procs(struct ipath_devdata *dd, int sig)
continue;
dev_info(&dd->pcidev->dev, "sub-context "
"%d:%d in use (PID %u), sending "
- "signal %d\n", i, sub, pid, sig);
- kill_proc(pid, sig, 1);
+ "signal %d\n", i, sub, pid_nr(pid), sig);
+ kill_pid(pid, sig, 1);
any++;
}
}
diff --git a/drivers/infiniband/hw/ipath/ipath_file_ops.c b/drivers/infiniband/hw/ipath/ipath_file_ops.c
index 8b1752202e7..b472b15637f 100644
--- a/drivers/infiniband/hw/ipath/ipath_file_ops.c
+++ b/drivers/infiniband/hw/ipath/ipath_file_ops.c
@@ -173,47 +173,25 @@ static int ipath_get_base_info(struct file *fp,
(void *) dd->ipath_statusp -
(void *) dd->ipath_pioavailregs_dma;
if (!shared) {
- kinfo->spi_piocnt = dd->ipath_pbufsport;
+ kinfo->spi_piocnt = pd->port_piocnt;
kinfo->spi_piobufbase = (u64) pd->port_piobufs;
kinfo->__spi_uregbase = (u64) dd->ipath_uregbase +
dd->ipath_ureg_align * pd->port_port;
} else if (master) {
- kinfo->spi_piocnt = (dd->ipath_pbufsport / subport_cnt) +
- (dd->ipath_pbufsport % subport_cnt);
+ kinfo->spi_piocnt = (pd->port_piocnt / subport_cnt) +
+ (pd->port_piocnt % subport_cnt);
/* Master's PIO buffers are after all the slave's */
kinfo->spi_piobufbase = (u64) pd->port_piobufs +
dd->ipath_palign *
- (dd->ipath_pbufsport - kinfo->spi_piocnt);
+ (pd->port_piocnt - kinfo->spi_piocnt);
} else {
unsigned slave = subport_fp(fp) - 1;
- kinfo->spi_piocnt = dd->ipath_pbufsport / subport_cnt;
+ kinfo->spi_piocnt = pd->port_piocnt / subport_cnt;
kinfo->spi_piobufbase = (u64) pd->port_piobufs +
dd->ipath_palign * kinfo->spi_piocnt * slave;
}
- /*
- * Set the PIO avail update threshold to no larger
- * than the number of buffers per process. Note that
- * we decrease it here, but won't ever increase it.
- */
- if (dd->ipath_pioupd_thresh &&
- kinfo->spi_piocnt < dd->ipath_pioupd_thresh) {
- unsigned long flags;
-
- dd->ipath_pioupd_thresh = kinfo->spi_piocnt;
- ipath_dbg("Decreased pio update threshold to %u\n",
- dd->ipath_pioupd_thresh);
- spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags);
- dd->ipath_sendctrl &= ~(INFINIPATH_S_UPDTHRESH_MASK
- << INFINIPATH_S_UPDTHRESH_SHIFT);
- dd->ipath_sendctrl |= dd->ipath_pioupd_thresh
- << INFINIPATH_S_UPDTHRESH_SHIFT;
- ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
- dd->ipath_sendctrl);
- spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags);
- }
-
if (shared) {
kinfo->spi_port_uregbase = (u64) dd->ipath_uregbase +
dd->ipath_ureg_align * pd->port_port;
@@ -577,7 +555,7 @@ static int ipath_tid_free(struct ipath_portdata *pd, unsigned subport,
p = dd->ipath_pageshadow[porttid + tid];
dd->ipath_pageshadow[porttid + tid] = NULL;
ipath_cdbg(VERBOSE, "PID %u freeing TID %u\n",
- pd->port_pid, tid);
+ pid_nr(pd->port_pid), tid);
dd->ipath_f_put_tid(dd, &tidbase[tid],
RCVHQ_RCV_TYPE_EXPECTED,
dd->ipath_tidinvalid);
@@ -1309,19 +1287,19 @@ static int ipath_mmap(struct file *fp, struct vm_area_struct *vma)
ureg = dd->ipath_uregbase + dd->ipath_ureg_align * pd->port_port;
if (!pd->port_subport_cnt) {
/* port is not shared */
- piocnt = dd->ipath_pbufsport;
+ piocnt = pd->port_piocnt;
piobufs = pd->port_piobufs;
} else if (!subport_fp(fp)) {
/* caller is the master */
- piocnt = (dd->ipath_pbufsport / pd->port_subport_cnt) +
- (dd->ipath_pbufsport % pd->port_subport_cnt);
+ piocnt = (pd->port_piocnt / pd->port_subport_cnt) +
+ (pd->port_piocnt % pd->port_subport_cnt);
piobufs = pd->port_piobufs +
- dd->ipath_palign * (dd->ipath_pbufsport - piocnt);
+ dd->ipath_palign * (pd->port_piocnt - piocnt);
} else {
unsigned slave = subport_fp(fp) - 1;
/* caller is a slave */
- piocnt = dd->ipath_pbufsport / pd->port_subport_cnt;
+ piocnt = pd->port_piocnt / pd->port_subport_cnt;
piobufs = pd->port_piobufs + dd->ipath_palign * piocnt * slave;
}
@@ -1631,11 +1609,8 @@ static int try_alloc_port(struct ipath_devdata *dd, int port,
port);
pd->port_cnt = 1;
port_fp(fp) = pd;
- pd->port_pid = current->pid;
+ pd->port_pid = get_pid(task_pid(current));
strncpy(pd->port_comm, current->comm, sizeof(pd->port_comm));
- ipath_chg_pioavailkernel(dd,
- dd->ipath_pbufsport * (pd->port_port - 1),
- dd->ipath_pbufsport, 0);
ipath_stats.sps_ports++;
ret = 0;
} else
@@ -1818,14 +1793,15 @@ static int find_shared_port(struct file *fp,
}
port_fp(fp) = pd;
subport_fp(fp) = pd->port_cnt++;
- pd->port_subpid[subport_fp(fp)] = current->pid;
+ pd->port_subpid[subport_fp(fp)] =
+ get_pid(task_pid(current));
tidcursor_fp(fp) = 0;
pd->active_slaves |= 1 << subport_fp(fp);
ipath_cdbg(PROC,
"%s[%u] %u sharing %s[%u] unit:port %u:%u\n",
current->comm, current->pid,
subport_fp(fp),
- pd->port_comm, pd->port_pid,
+ pd->port_comm, pid_nr(pd->port_pid),
dd->ipath_unit, pd->port_port);
ret = 1;
goto done;
@@ -1938,11 +1914,25 @@ static int ipath_do_user_init(struct file *fp,
/* for now we do nothing with rcvhdrcnt: uinfo->spu_rcvhdrcnt */
+ /* some ports may get extra buffers, calculate that here */
+ if (pd->port_port <= dd->ipath_ports_extrabuf)
+ pd->port_piocnt = dd->ipath_pbufsport + 1;
+ else
+ pd->port_piocnt = dd->ipath_pbufsport;
+
/* for right now, kernel piobufs are at end, so port 1 is at 0 */
+ if (pd->port_port <= dd->ipath_ports_extrabuf)
+ pd->port_pio_base = (dd->ipath_pbufsport + 1)
+ * (pd->port_port - 1);
+ else
+ pd->port_pio_base = dd->ipath_ports_extrabuf +
+ dd->ipath_pbufsport * (pd->port_port - 1);
pd->port_piobufs = dd->ipath_piobufbase +
- dd->ipath_pbufsport * (pd->port_port - 1) * dd->ipath_palign;
- ipath_cdbg(VERBOSE, "Set base of piobufs for port %u to 0x%x\n",
- pd->port_port, pd->port_piobufs);
+ pd->port_pio_base * dd->ipath_palign;
+ ipath_cdbg(VERBOSE, "piobuf base for port %u is 0x%x, piocnt %u,"
+ " first pio %u\n", pd->port_port, pd->port_piobufs,
+ pd->port_piocnt, pd->port_pio_base);
+ ipath_chg_pioavailkernel(dd, pd->port_pio_base, pd->port_piocnt, 0);
/*
* Now allocate the rcvhdr Q and eager TIDs; skip the TID
@@ -2077,7 +2067,8 @@ static int ipath_close(struct inode *in, struct file *fp)
* the slave(s) don't wait for receive data forever.
*/
pd->active_slaves &= ~(1 << fd->subport);
- pd->port_subpid[fd->subport] = 0;
+ put_pid(pd->port_subpid[fd->subport]);
+ pd->port_subpid[fd->subport] = NULL;
mutex_unlock(&ipath_mutex);
goto bail;
}
@@ -2085,7 +2076,7 @@ static int ipath_close(struct inode *in, struct file *fp)
if (pd->port_hdrqfull) {
ipath_cdbg(PROC, "%s[%u] had %u rcvhdrqfull errors "
- "during run\n", pd->port_comm, pd->port_pid,
+ "during run\n", pd->port_comm, pid_nr(pd->port_pid),
pd->port_hdrqfull);
pd->port_hdrqfull = 0;
}
@@ -2107,7 +2098,6 @@ static int ipath_close(struct inode *in, struct file *fp)
}
if (dd->ipath_kregbase) {
- int i;
/* atomically clear receive enable port and intr avail. */
clear_bit(dd->ipath_r_portenable_shift + port,
&dd->ipath_rcvctrl);
@@ -2136,9 +2126,9 @@ static int ipath_close(struct inode *in, struct file *fp)
ipath_write_kreg_port(dd, dd->ipath_kregs->kr_rcvhdraddr,
pd->port_port, dd->ipath_dummy_hdrq_phys);
- i = dd->ipath_pbufsport * (port - 1);
- ipath_disarm_piobufs(dd, i, dd->ipath_pbufsport);
- ipath_chg_pioavailkernel(dd, i, dd->ipath_pbufsport, 1);
+ ipath_disarm_piobufs(dd, pd->port_pio_base, pd->port_piocnt);
+ ipath_chg_pioavailkernel(dd, pd->port_pio_base,
+ pd->port_piocnt, 1);
dd->ipath_f_clear_tids(dd, pd->port_port);
@@ -2146,11 +2136,12 @@ static int ipath_close(struct inode *in, struct file *fp)
unlock_expected_tids(pd);
ipath_stats.sps_ports--;
ipath_cdbg(PROC, "%s[%u] closed port %u:%u\n",
- pd->port_comm, pd->port_pid,
+ pd->port_comm, pid_nr(pd->port_pid),
dd->ipath_unit, port);
}
- pd->port_pid = 0;
+ put_pid(pd->port_pid);
+ pd->port_pid = NULL;
dd->ipath_pd[pd->port_port] = NULL; /* before releasing mutex */
mutex_unlock(&ipath_mutex);
ipath_free_pddata(dd, pd); /* after releasing the mutex */
diff --git a/drivers/infiniband/hw/ipath/ipath_iba7220.c b/drivers/infiniband/hw/ipath/ipath_iba7220.c
index e3ec0d1bdf5..8eee7830f04 100644
--- a/drivers/infiniband/hw/ipath/ipath_iba7220.c
+++ b/drivers/infiniband/hw/ipath/ipath_iba7220.c
@@ -595,7 +595,7 @@ static void ipath_7220_txe_recover(struct ipath_devdata *dd)
dev_info(&dd->pcidev->dev,
"Recovering from TXE PIO parity error\n");
- ipath_disarm_senderrbufs(dd, 1);
+ ipath_disarm_senderrbufs(dd);
}
@@ -675,10 +675,8 @@ static void ipath_7220_handle_hwerrors(struct ipath_devdata *dd, char *msg,
ctrl = ipath_read_kreg32(dd, dd->ipath_kregs->kr_control);
if ((ctrl & INFINIPATH_C_FREEZEMODE) && !ipath_diag_inuse) {
/*
- * Parity errors in send memory are recoverable,
- * just cancel the send (if indicated in * sendbuffererror),
- * count the occurrence, unfreeze (if no other handled
- * hardware error bits are set), and continue.
+ * Parity errors in send memory are recoverable by h/w
+ * just do housekeeping, exit freeze mode and continue.
*/
if (hwerrs & ((INFINIPATH_HWE_TXEMEMPARITYERR_PIOBUF |
INFINIPATH_HWE_TXEMEMPARITYERR_PIOPBC)
@@ -687,13 +685,6 @@ static void ipath_7220_handle_hwerrors(struct ipath_devdata *dd, char *msg,
hwerrs &= ~((INFINIPATH_HWE_TXEMEMPARITYERR_PIOBUF |
INFINIPATH_HWE_TXEMEMPARITYERR_PIOPBC)
<< INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT);
- if (!hwerrs) {
- /* else leave in freeze mode */
- ipath_write_kreg(dd,
- dd->ipath_kregs->kr_control,
- dd->ipath_control);
- goto bail;
- }
}
if (hwerrs) {
/*
@@ -723,8 +714,8 @@ static void ipath_7220_handle_hwerrors(struct ipath_devdata *dd, char *msg,
*dd->ipath_statusp |= IPATH_STATUS_HWERROR;
dd->ipath_flags &= ~IPATH_INITTED;
} else {
- ipath_dbg("Clearing freezemode on ignored hardware "
- "error\n");
+ ipath_dbg("Clearing freezemode on ignored or "
+ "recovered hardware error\n");
ipath_clear_freeze(dd);
}
}
@@ -870,8 +861,9 @@ static int ipath_7220_boardname(struct ipath_devdata *dd, char *name,
"revision %u.%u!\n",
dd->ipath_majrev, dd->ipath_minrev);
ret = 1;
- } else if (dd->ipath_minrev == 1) {
- /* Rev1 chips are prototype. Complain, but allow use */
+ } else if (dd->ipath_minrev == 1 &&
+ !(dd->ipath_flags & IPATH_INITTED)) {
+ /* Rev1 chips are prototype. Complain at init, but allow use */
ipath_dev_err(dd, "Unsupported hardware "
"revision %u.%u, Contact support@qlogic.com\n",
dd->ipath_majrev, dd->ipath_minrev);
@@ -1966,7 +1958,7 @@ static void ipath_7220_config_ports(struct ipath_devdata *dd, ushort cfgports)
dd->ipath_rcvctrl);
dd->ipath_p0_rcvegrcnt = 2048; /* always */
if (dd->ipath_flags & IPATH_HAS_SEND_DMA)
- dd->ipath_pioreserved = 1; /* reserve a buffer */
+ dd->ipath_pioreserved = 3; /* kpiobufs used for PIO */
}
diff --git a/drivers/infiniband/hw/ipath/ipath_init_chip.c b/drivers/infiniband/hw/ipath/ipath_init_chip.c
index 27dd8947666..3e5baa43fc8 100644
--- a/drivers/infiniband/hw/ipath/ipath_init_chip.c
+++ b/drivers/infiniband/hw/ipath/ipath_init_chip.c
@@ -41,7 +41,7 @@
/*
* min buffers we want to have per port, after driver
*/
-#define IPATH_MIN_USER_PORT_BUFCNT 8
+#define IPATH_MIN_USER_PORT_BUFCNT 7
/*
* Number of ports we are configured to use (to allow for more pio
@@ -54,13 +54,9 @@ MODULE_PARM_DESC(cfgports, "Set max number of ports to use");
/*
* Number of buffers reserved for driver (verbs and layered drivers.)
- * Reserved at end of buffer list. Initialized based on
- * number of PIO buffers if not set via module interface.
+ * Initialized based on number of PIO buffers if not set via module interface.
* The problem with this is that it's global, but we'll use different
- * numbers for different chip types. So the default value is not
- * very useful. I've redefined it for the 1.3 release so that it's
- * zero unless set by the user to something else, in which case we
- * try to respect it.
+ * numbers for different chip types.
*/
static ushort ipath_kpiobufs;
@@ -546,9 +542,12 @@ static void enable_chip(struct ipath_devdata *dd, int reinit)
pioavail = dd->ipath_pioavailregs_dma[i ^ 1];
else
pioavail = dd->ipath_pioavailregs_dma[i];
- dd->ipath_pioavailshadow[i] = le64_to_cpu(pioavail) |
- (~dd->ipath_pioavailkernel[i] <<
- INFINIPATH_SENDPIOAVAIL_BUSY_SHIFT);
+ /*
+ * don't need to worry about ipath_pioavailkernel here
+ * because we will call ipath_chg_pioavailkernel() later
+ * in initialization, to busy out buffers as needed
+ */
+ dd->ipath_pioavailshadow[i] = le64_to_cpu(pioavail);
}
/* can get counters, stats, etc. */
dd->ipath_flags |= IPATH_PRESENT;
@@ -708,12 +707,11 @@ static void verify_interrupt(unsigned long opaque)
int ipath_init_chip(struct ipath_devdata *dd, int reinit)
{
int ret = 0;
- u32 val32, kpiobufs;
+ u32 kpiobufs, defkbufs;
u32 piobufs, uports;
u64 val;
struct ipath_portdata *pd;
gfp_t gfp_flags = GFP_USER | __GFP_COMP;
- unsigned long flags;
ret = init_housekeeping(dd, reinit);
if (ret)
@@ -753,56 +751,46 @@ int ipath_init_chip(struct ipath_devdata *dd, int reinit)
dd->ipath_pioavregs = ALIGN(piobufs, sizeof(u64) * BITS_PER_BYTE / 2)
/ (sizeof(u64) * BITS_PER_BYTE / 2);
uports = dd->ipath_cfgports ? dd->ipath_cfgports - 1 : 0;
- if (ipath_kpiobufs == 0) {
- /* not set by user (this is default) */
- if (piobufs > 144)
- kpiobufs = 32;
- else
- kpiobufs = 16;
- }
+ if (piobufs > 144)
+ defkbufs = 32 + dd->ipath_pioreserved;
else
- kpiobufs = ipath_kpiobufs;
+ defkbufs = 16 + dd->ipath_pioreserved;
- if (kpiobufs + (uports * IPATH_MIN_USER_PORT_BUFCNT) > piobufs) {
+ if (ipath_kpiobufs && (ipath_kpiobufs +
+ (uports * IPATH_MIN_USER_PORT_BUFCNT)) > piobufs) {
int i = (int) piobufs -
(int) (uports * IPATH_MIN_USER_PORT_BUFCNT);
if (i < 1)
i = 1;
dev_info(&dd->pcidev->dev, "Allocating %d PIO bufs of "
"%d for kernel leaves too few for %d user ports "
- "(%d each); using %u\n", kpiobufs,
+ "(%d each); using %u\n", ipath_kpiobufs,
piobufs, uports, IPATH_MIN_USER_PORT_BUFCNT, i);
/*
* shouldn't change ipath_kpiobufs, because could be
* different for different devices...
*/
kpiobufs = i;
- }
+ } else if (ipath_kpiobufs)
+ kpiobufs = ipath_kpiobufs;
+ else
+ kpiobufs = defkbufs;
dd->ipath_lastport_piobuf = piobufs - kpiobufs;
dd->ipath_pbufsport =
uports ? dd->ipath_lastport_piobuf / uports : 0;
- val32 = dd->ipath_lastport_piobuf - (dd->ipath_pbufsport * uports);
- if (val32 > 0) {
- ipath_dbg("allocating %u pbufs/port leaves %u unused, "
- "add to kernel\n", dd->ipath_pbufsport, val32);
- dd->ipath_lastport_piobuf -= val32;
- kpiobufs += val32;
- ipath_dbg("%u pbufs/port leaves %u unused, add to kernel\n",
- dd->ipath_pbufsport, val32);
- }
+ /* if not an even divisor, some user ports get extra buffers */
+ dd->ipath_ports_extrabuf = dd->ipath_lastport_piobuf -
+ (dd->ipath_pbufsport * uports);
+ if (dd->ipath_ports_extrabuf)
+ ipath_dbg("%u pbufs/port leaves some unused, add 1 buffer to "
+ "ports <= %u\n", dd->ipath_pbufsport,
+ dd->ipath_ports_extrabuf);
dd->ipath_lastpioindex = 0;
dd->ipath_lastpioindexl = dd->ipath_piobcnt2k;
- ipath_chg_pioavailkernel(dd, 0, piobufs, 1);
+ /* ipath_pioavailshadow initialized earlier */
ipath_cdbg(VERBOSE, "%d PIO bufs for kernel out of %d total %u "
"each for %u user ports\n", kpiobufs,
piobufs, dd->ipath_pbufsport, uports);
- if (dd->ipath_pioupd_thresh) {
- if (dd->ipath_pbufsport < dd->ipath_pioupd_thresh)
- dd->ipath_pioupd_thresh = dd->ipath_pbufsport;
- if (kpiobufs < dd->ipath_pioupd_thresh)
- dd->ipath_pioupd_thresh = kpiobufs;
- }
-
ret = dd->ipath_f_early_init(dd);
if (ret) {
ipath_dev_err(dd, "Early initialization failure\n");
@@ -810,13 +798,6 @@ int ipath_init_chip(struct ipath_devdata *dd, int reinit)
}
/*
- * Cancel any possible active sends from early driver load.
- * Follows early_init because some chips have to initialize
- * PIO buffers in early_init to avoid false parity errors.
- */
- ipath_cancel_sends(dd, 0);
-
- /*
* Early_init sets rcvhdrentsize and rcvhdrsize, so this must be
* done after early_init.
*/
@@ -836,6 +817,7 @@ int ipath_init_chip(struct ipath_devdata *dd, int reinit)
ipath_write_kreg(dd, dd->ipath_kregs->kr_sendpioavailaddr,
dd->ipath_pioavailregs_phys);
+
/*
* this is to detect s/w errors, which the h/w works around by
* ignoring the low 6 bits of address, if it wasn't aligned.
@@ -862,12 +844,6 @@ int ipath_init_chip(struct ipath_devdata *dd, int reinit)
~0ULL&~INFINIPATH_HWE_MEMBISTFAILED);
ipath_write_kreg(dd, dd->ipath_kregs->kr_control, 0ULL);
- spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags);
- dd->ipath_sendctrl = INFINIPATH_S_PIOENABLE;
- ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, dd->ipath_sendctrl);
- ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
- spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags);
-
/*
* before error clears, since we expect serdes pll errors during
* this, the first time after reset
@@ -940,6 +916,19 @@ int ipath_init_chip(struct ipath_devdata *dd, int reinit)
else
enable_chip(dd, reinit);
+ /* after enable_chip, so pioavailshadow setup */
+ ipath_chg_pioavailkernel(dd, 0, piobufs, 1);
+
+ /*
+ * Cancel any possible active sends from early driver load.
+ * Follows early_init because some chips have to initialize
+ * PIO buffers in early_init to avoid false parity errors.
+ * After enable and ipath_chg_pioavailkernel so we can safely
+ * enable pioavail updates and PIOENABLE; packets are now
+ * ready to go out.
+ */
+ ipath_cancel_sends(dd, 1);
+
if (!reinit) {
/*
* Used when we close a port, for DMA already in flight
diff --git a/drivers/infiniband/hw/ipath/ipath_intr.c b/drivers/infiniband/hw/ipath/ipath_intr.c
index 1b58f4737c7..26900b3b7a4 100644
--- a/drivers/infiniband/hw/ipath/ipath_intr.c
+++ b/drivers/infiniband/hw/ipath/ipath_intr.c
@@ -38,42 +38,12 @@
#include "ipath_verbs.h"
#include "ipath_common.h"
-/*
- * clear (write) a pio buffer, to clear a parity error. This routine
- * should only be called when in freeze mode, and the buffer should be
- * canceled afterwards.
- */
-static void ipath_clrpiobuf(struct ipath_devdata *dd, u32 pnum)
-{
- u32 __iomem *pbuf;
- u32 dwcnt; /* dword count to write */
- if (pnum < dd->ipath_piobcnt2k) {
- pbuf = (u32 __iomem *) (dd->ipath_pio2kbase + pnum *
- dd->ipath_palign);
- dwcnt = dd->ipath_piosize2k >> 2;
- }
- else {
- pbuf = (u32 __iomem *) (dd->ipath_pio4kbase +
- (pnum - dd->ipath_piobcnt2k) * dd->ipath_4kalign);
- dwcnt = dd->ipath_piosize4k >> 2;
- }
- dev_info(&dd->pcidev->dev,
- "Rewrite PIO buffer %u, to recover from parity error\n",
- pnum);
-
- /* no flush required, since already in freeze */
- writel(dwcnt + 1, pbuf);
- while (--dwcnt)
- writel(0, pbuf++);
-}
/*
* Called when we might have an error that is specific to a particular
* PIO buffer, and may need to cancel that buffer, so it can be re-used.
- * If rewrite is true, and bits are set in the sendbufferror registers,
- * we'll write to the buffer, for error recovery on parity errors.
*/
-void ipath_disarm_senderrbufs(struct ipath_devdata *dd, int rewrite)
+void ipath_disarm_senderrbufs(struct ipath_devdata *dd)
{
u32 piobcnt;
unsigned long sbuf[4];
@@ -109,11 +79,8 @@ void ipath_disarm_senderrbufs(struct ipath_devdata *dd, int rewrite)
}
for (i = 0; i < piobcnt; i++)
- if (test_bit(i, sbuf)) {
- if (rewrite)
- ipath_clrpiobuf(dd, i);
+ if (test_bit(i, sbuf))
ipath_disarm_piobufs(dd, i, 1);
- }
/* ignore armlaunch errs for a bit */
dd->ipath_lastcancel = jiffies+3;
}
@@ -164,7 +131,7 @@ static u64 handle_e_sum_errs(struct ipath_devdata *dd, ipath_err_t errs)
{
u64 ignore_this_time = 0;
- ipath_disarm_senderrbufs(dd, 0);
+ ipath_disarm_senderrbufs(dd);
if ((errs & E_SUM_LINK_PKTERRS) &&
!(dd->ipath_flags & IPATH_LINKACTIVE)) {
/*
@@ -909,8 +876,8 @@ static int handle_errors(struct ipath_devdata *dd, ipath_err_t errs)
* processes (causing armlaunch), send errors due to going into freeze mode,
* etc., and try to avoid causing extra interrupts while doing so.
* Forcibly update the in-memory pioavail register copies after cleanup
- * because the chip won't do it for anything changing while in freeze mode
- * (we don't want to wait for the next pio buffer state change).
+ * because the chip won't do it while in freeze mode (the register values
+ * themselves are kept correct).
* Make sure that we don't lose any important interrupts by using the chip
* feature that says that writing 0 to a bit in *clear that is set in
* *status will cause an interrupt to be generated again (if allowed by
@@ -918,44 +885,23 @@ static int handle_errors(struct ipath_devdata *dd, ipath_err_t errs)
*/
void ipath_clear_freeze(struct ipath_devdata *dd)
{
- int i, im;
- u64 val;
-
/* disable error interrupts, to avoid confusion */
ipath_write_kreg(dd, dd->ipath_kregs->kr_errormask, 0ULL);
/* also disable interrupts; errormask is sometimes overwriten */
ipath_write_kreg(dd, dd->ipath_kregs->kr_intmask, 0ULL);
- /*
- * clear all sends, because they have may been
- * completed by usercode while in freeze mode, and
- * therefore would not be sent, and eventually
- * might cause the process to run out of bufs
- */
- ipath_cancel_sends(dd, 0);
+ ipath_cancel_sends(dd, 1);
+
+ /* clear the freeze, and be sure chip saw it */
ipath_write_kreg(dd, dd->ipath_kregs->kr_control,
dd->ipath_control);
+ ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
- /* ensure pio avail updates continue */
+ /* force in-memory update now we are out of freeze */
ipath_force_pio_avail_update(dd);
/*
- * We just enabled pioavailupdate, so dma copy is almost certainly
- * not yet right, so read the registers directly. Similar to init
- */
- for (i = 0; i < dd->ipath_pioavregs; i++) {
- /* deal with 6110 chip bug */
- im = (i > 3 && (dd->ipath_flags & IPATH_SWAP_PIOBUFS)) ?
- i ^ 1 : i;
- val = ipath_read_kreg64(dd, (0x1000 / sizeof(u64)) + im);
- dd->ipath_pioavailregs_dma[i] = cpu_to_le64(val);
- dd->ipath_pioavailshadow[i] = val |
- (~dd->ipath_pioavailkernel[i] <<
- INFINIPATH_SENDPIOAVAIL_BUSY_SHIFT);
- }
-
- /*
* force new interrupt if any hwerr, error or interrupt bits are
* still set, and clear "safe" send packet errors related to freeze
* and cancelling sends. Re-enable error interrupts before possible
@@ -1312,10 +1258,8 @@ irqreturn_t ipath_intr(int irq, void *data)
ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags);
- if (!(dd->ipath_flags & IPATH_HAS_SEND_DMA))
- handle_layer_pioavail(dd);
- else
- ipath_dbg("unexpected BUFAVAIL intr\n");
+ /* always process; sdma verbs uses PIO for acks and VL15 */
+ handle_layer_pioavail(dd);
}
ret = IRQ_HANDLED;
diff --git a/drivers/infiniband/hw/ipath/ipath_kernel.h b/drivers/infiniband/hw/ipath/ipath_kernel.h
index 202337ae90d..59a8b254b97 100644
--- a/drivers/infiniband/hw/ipath/ipath_kernel.h
+++ b/drivers/infiniband/hw/ipath/ipath_kernel.h
@@ -117,6 +117,10 @@ struct ipath_portdata {
u16 port_subport_cnt;
/* non-zero if port is being shared. */
u16 port_subport_id;
+ /* number of pio bufs for this port (all procs, if shared) */
+ u32 port_piocnt;
+ /* first pio buffer for this port */
+ u32 port_pio_base;
/* chip offset of PIO buffers for this port */
u32 port_piobufs;
/* how many alloc_pages() chunks in port_rcvegrbuf_pages */
@@ -155,8 +159,8 @@ struct ipath_portdata {
/* saved total number of polled urgent packets for poll edge trigger */
u32 port_urgent_poll;
/* pid of process using this port */
- pid_t port_pid;
- pid_t port_subpid[INFINIPATH_MAX_SUBPORT];
+ struct pid *port_pid;
+ struct pid *port_subpid[INFINIPATH_MAX_SUBPORT];
/* same size as task_struct .comm[] */
char port_comm[16];
/* pkeys set by this use of this port */
@@ -384,6 +388,8 @@ struct ipath_devdata {
u32 ipath_lastrpkts;
/* pio bufs allocated per port */
u32 ipath_pbufsport;
+ /* if remainder on bufs/port, ports < extrabuf get 1 extra */
+ u32 ipath_ports_extrabuf;
u32 ipath_pioupd_thresh; /* update threshold, some chips */
/*
* number of ports configured as max; zero is set to number chip
@@ -477,7 +483,7 @@ struct ipath_devdata {
/* SendDMA related entries */
spinlock_t ipath_sdma_lock;
- u64 ipath_sdma_status;
+ unsigned long ipath_sdma_status;
unsigned long ipath_sdma_abort_jiffies;
unsigned long ipath_sdma_abort_intr_timeout;
unsigned long ipath_sdma_buf_jiffies;
@@ -816,8 +822,8 @@ struct ipath_devdata {
#define IPATH_SDMA_DISARMED 1
#define IPATH_SDMA_DISABLED 2
#define IPATH_SDMA_LAYERBUF 3
-#define IPATH_SDMA_RUNNING 62
-#define IPATH_SDMA_SHUTDOWN 63
+#define IPATH_SDMA_RUNNING 30
+#define IPATH_SDMA_SHUTDOWN 31
/* bit combinations that correspond to abort states */
#define IPATH_SDMA_ABORT_NONE 0
@@ -1011,7 +1017,7 @@ void ipath_get_eeprom_info(struct ipath_devdata *);
int ipath_update_eeprom_log(struct ipath_devdata *dd);
void ipath_inc_eeprom_err(struct ipath_devdata *dd, u32 eidx, u32 incr);
u64 ipath_snap_cntr(struct ipath_devdata *, ipath_creg);
-void ipath_disarm_senderrbufs(struct ipath_devdata *, int);
+void ipath_disarm_senderrbufs(struct ipath_devdata *);
void ipath_force_pio_avail_update(struct ipath_devdata *);
void signal_ib_event(struct ipath_devdata *dd, enum ib_event_type ev);
diff --git a/drivers/infiniband/hw/ipath/ipath_mr.c b/drivers/infiniband/hw/ipath/ipath_mr.c
index db4ba92f79f..9d343b7c2f3 100644
--- a/drivers/infiniband/hw/ipath/ipath_mr.c
+++ b/drivers/infiniband/hw/ipath/ipath_mr.c
@@ -195,7 +195,8 @@ struct ib_mr *ipath_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
goto bail;
}
- umem = ib_umem_get(pd->uobject->context, start, length, mr_access_flags);
+ umem = ib_umem_get(pd->uobject->context, start, length,
+ mr_access_flags, 0);
if (IS_ERR(umem))
return (void *) umem;
diff --git a/drivers/infiniband/hw/ipath/ipath_qp.c b/drivers/infiniband/hw/ipath/ipath_qp.c
index dd5b6e9d57c..4715911101e 100644
--- a/drivers/infiniband/hw/ipath/ipath_qp.c
+++ b/drivers/infiniband/hw/ipath/ipath_qp.c
@@ -242,7 +242,6 @@ static void ipath_free_qp(struct ipath_qp_table *qpt, struct ipath_qp *qp)
{
struct ipath_qp *q, **qpp;
unsigned long flags;
- int fnd = 0;
spin_lock_irqsave(&qpt->lock, flags);
@@ -253,51 +252,40 @@ static void ipath_free_qp(struct ipath_qp_table *qpt, struct ipath_qp *qp)
*qpp = qp->next;
qp->next = NULL;
atomic_dec(&qp->refcount);
- fnd = 1;
break;
}
}
spin_unlock_irqrestore(&qpt->lock, flags);
-
- if (!fnd)
- return;
-
- free_qpn(qpt, qp->ibqp.qp_num);
-
- wait_event(qp->wait, !atomic_read(&qp->refcount));
}
/**
- * ipath_free_all_qps - remove all QPs from the table
+ * ipath_free_all_qps - check for QPs still in use
* @qpt: the QP table to empty
+ *
+ * There should not be any QPs still in use.
+ * Free memory for table.
*/
-void ipath_free_all_qps(struct ipath_qp_table *qpt)
+unsigned ipath_free_all_qps(struct ipath_qp_table *qpt)
{
unsigned long flags;
- struct ipath_qp *qp, *nqp;
- u32 n;
+ struct ipath_qp *qp;
+ u32 n, qp_inuse = 0;
+ spin_lock_irqsave(&qpt->lock, flags);
for (n = 0; n < qpt->max; n++) {
- spin_lock_irqsave(&qpt->lock, flags);
qp = qpt->table[n];
qpt->table[n] = NULL;
- spin_unlock_irqrestore(&qpt->lock, flags);
-
- while (qp) {
- nqp = qp->next;
- free_qpn(qpt, qp->ibqp.qp_num);
- if (!atomic_dec_and_test(&qp->refcount) ||
- !ipath_destroy_qp(&qp->ibqp))
- ipath_dbg("QP memory leak!\n");
- qp = nqp;
- }
+
+ for (; qp; qp = qp->next)
+ qp_inuse++;
}
+ spin_unlock_irqrestore(&qpt->lock, flags);
- for (n = 0; n < ARRAY_SIZE(qpt->map); n++) {
+ for (n = 0; n < ARRAY_SIZE(qpt->map); n++)
if (qpt->map[n].page)
- free_page((unsigned long)qpt->map[n].page);
- }
+ free_page((unsigned long) qpt->map[n].page);
+ return qp_inuse;
}
/**
@@ -336,11 +324,12 @@ static void ipath_reset_qp(struct ipath_qp *qp, enum ib_qp_type type)
qp->remote_qpn = 0;
qp->qkey = 0;
qp->qp_access_flags = 0;
- qp->s_busy = 0;
+ atomic_set(&qp->s_dma_busy, 0);
qp->s_flags &= IPATH_S_SIGNAL_REQ_WR;
qp->s_hdrwords = 0;
qp->s_wqe = NULL;
qp->s_pkt_delay = 0;
+ qp->s_draining = 0;
qp->s_psn = 0;
qp->r_psn = 0;
qp->r_msn = 0;
@@ -353,7 +342,8 @@ static void ipath_reset_qp(struct ipath_qp *qp, enum ib_qp_type type)
}
qp->s_ack_state = IB_OPCODE_RC_ACKNOWLEDGE;
qp->r_nak_state = 0;
- qp->r_wrid_valid = 0;
+ qp->r_aflags = 0;
+ qp->r_flags = 0;
qp->s_rnr_timeout = 0;
qp->s_head = 0;
qp->s_tail = 0;
@@ -361,7 +351,6 @@ static void ipath_reset_qp(struct ipath_qp *qp, enum ib_qp_type type)
qp->s_last = 0;
qp->s_ssn = 1;
qp->s_lsn = 0;
- qp->s_wait_credit = 0;
memset(qp->s_ack_queue, 0, sizeof(qp->s_ack_queue));
qp->r_head_ack_queue = 0;
qp->s_tail_ack_queue = 0;
@@ -370,17 +359,17 @@ static void ipath_reset_qp(struct ipath_qp *qp, enum ib_qp_type type)
qp->r_rq.wq->head = 0;
qp->r_rq.wq->tail = 0;
}
- qp->r_reuse_sge = 0;
}
/**
- * ipath_error_qp - put a QP into an error state
- * @qp: the QP to put into an error state
+ * ipath_error_qp - put a QP into the error state
+ * @qp: the QP to put into the error state
* @err: the receive completion error to signal if a RWQE is active
*
* Flushes both send and receive work queues.
* Returns true if last WQE event should be generated.
* The QP s_lock should be held and interrupts disabled.
+ * If we are already in error state, just return.
*/
int ipath_error_qp(struct ipath_qp *qp, enum ib_wc_status err)
@@ -389,8 +378,10 @@ int ipath_error_qp(struct ipath_qp *qp, enum ib_wc_status err)
struct ib_wc wc;
int ret = 0;
- ipath_dbg("QP%d/%d in error state (%d)\n",
- qp->ibqp.qp_num, qp->remote_qpn, err);
+ if (qp->state == IB_QPS_ERR)
+ goto bail;
+
+ qp->state = IB_QPS_ERR;
spin_lock(&dev->pending_lock);
if (!list_empty(&qp->timerwait))
@@ -399,39 +390,21 @@ int ipath_error_qp(struct ipath_qp *qp, enum ib_wc_status err)
list_del_init(&qp->piowait);
spin_unlock(&dev->pending_lock);
- wc.vendor_err = 0;
- wc.byte_len = 0;
- wc.imm_data = 0;
+ /* Schedule the sending tasklet to drain the send work queue. */
+ if (qp->s_last != qp->s_head)
+ ipath_schedule_send(qp);
+
+ memset(&wc, 0, sizeof(wc));
wc.qp = &qp->ibqp;
- wc.src_qp = 0;
- wc.wc_flags = 0;
- wc.pkey_index = 0;
- wc.slid = 0;
- wc.sl = 0;
- wc.dlid_path_bits = 0;
- wc.port_num = 0;
- if (qp->r_wrid_valid) {
- qp->r_wrid_valid = 0;
+ wc.opcode = IB_WC_RECV;
+
+ if (test_and_clear_bit(IPATH_R_WRID_VALID, &qp->r_aflags)) {
wc.wr_id = qp->r_wr_id;
- wc.opcode = IB_WC_RECV;
wc.status = err;
ipath_cq_enter(to_icq(qp->ibqp.recv_cq), &wc, 1);
}
wc.status = IB_WC_WR_FLUSH_ERR;
- while (qp->s_last != qp->s_head) {
- struct ipath_swqe *wqe = get_swqe_ptr(qp, qp->s_last);
-
- wc.wr_id = wqe->wr.wr_id;
- wc.opcode = ib_ipath_wc_opcode[wqe->wr.opcode];
- if (++qp->s_last >= qp->s_size)
- qp->s_last = 0;
- ipath_cq_enter(to_icq(qp->ibqp.send_cq), &wc, 1);
- }
- qp->s_cur = qp->s_tail = qp->s_head;
- qp->s_hdrwords = 0;
- qp->s_ack_state = IB_OPCODE_RC_ACKNOWLEDGE;
-
if (qp->r_rq.wq) {
struct ipath_rwq *wq;
u32 head;
@@ -447,7 +420,6 @@ int ipath_error_qp(struct ipath_qp *qp, enum ib_wc_status err)
tail = wq->tail;
if (tail >= qp->r_rq.size)
tail = 0;
- wc.opcode = IB_WC_RECV;
while (tail != head) {
wc.wr_id = get_rwqe_ptr(&qp->r_rq, tail)->wr_id;
if (++tail >= qp->r_rq.size)
@@ -460,6 +432,7 @@ int ipath_error_qp(struct ipath_qp *qp, enum ib_wc_status err)
} else if (qp->ibqp.event_handler)
ret = 1;
+bail:
return ret;
}
@@ -478,11 +451,10 @@ int ipath_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
struct ipath_ibdev *dev = to_idev(ibqp->device);
struct ipath_qp *qp = to_iqp(ibqp);
enum ib_qp_state cur_state, new_state;
- unsigned long flags;
int lastwqe = 0;
int ret;
- spin_lock_irqsave(&qp->s_lock, flags);
+ spin_lock_irq(&qp->s_lock);
cur_state = attr_mask & IB_QP_CUR_STATE ?
attr->cur_qp_state : qp->state;
@@ -535,16 +507,42 @@ int ipath_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
switch (new_state) {
case IB_QPS_RESET:
+ if (qp->state != IB_QPS_RESET) {
+ qp->state = IB_QPS_RESET;
+ spin_lock(&dev->pending_lock);
+ if (!list_empty(&qp->timerwait))
+ list_del_init(&qp->timerwait);
+ if (!list_empty(&qp->piowait))
+ list_del_init(&qp->piowait);
+ spin_unlock(&dev->pending_lock);
+ qp->s_flags &= ~IPATH_S_ANY_WAIT;
+ spin_unlock_irq(&qp->s_lock);
+ /* Stop the sending tasklet */
+ tasklet_kill(&qp->s_task);
+ wait_event(qp->wait_dma, !atomic_read(&qp->s_dma_busy));
+ spin_lock_irq(&qp->s_lock);
+ }
ipath_reset_qp(qp, ibqp->qp_type);
break;
+ case IB_QPS_SQD:
+ qp->s_draining = qp->s_last != qp->s_cur;
+ qp->state = new_state;
+ break;
+
+ case IB_QPS_SQE:
+ if (qp->ibqp.qp_type == IB_QPT_RC)
+ goto inval;
+ qp->state = new_state;
+ break;
+
case IB_QPS_ERR:
lastwqe = ipath_error_qp(qp, IB_WC_WR_FLUSH_ERR);
break;
default:
+ qp->state = new_state;
break;
-
}
if (attr_mask & IB_QP_PKEY_INDEX)
@@ -597,8 +595,7 @@ int ipath_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC)
qp->s_max_rd_atomic = attr->max_rd_atomic;
- qp->state = new_state;
- spin_unlock_irqrestore(&qp->s_lock, flags);
+ spin_unlock_irq(&qp->s_lock);
if (lastwqe) {
struct ib_event ev;
@@ -612,7 +609,7 @@ int ipath_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
goto bail;
inval:
- spin_unlock_irqrestore(&qp->s_lock, flags);
+ spin_unlock_irq(&qp->s_lock);
ret = -EINVAL;
bail:
@@ -643,7 +640,7 @@ int ipath_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
attr->pkey_index = qp->s_pkey_index;
attr->alt_pkey_index = 0;
attr->en_sqd_async_notify = 0;
- attr->sq_draining = 0;
+ attr->sq_draining = qp->s_draining;
attr->max_rd_atomic = qp->s_max_rd_atomic;
attr->max_dest_rd_atomic = qp->r_max_rd_atomic;
attr->min_rnr_timer = qp->r_min_rnr_timer;
@@ -833,6 +830,7 @@ struct ib_qp *ipath_create_qp(struct ib_pd *ibpd,
spin_lock_init(&qp->r_rq.lock);
atomic_set(&qp->refcount, 0);
init_waitqueue_head(&qp->wait);
+ init_waitqueue_head(&qp->wait_dma);
tasklet_init(&qp->s_task, ipath_do_send, (unsigned long)qp);
INIT_LIST_HEAD(&qp->piowait);
INIT_LIST_HEAD(&qp->timerwait);
@@ -926,6 +924,7 @@ bail_ip:
else
vfree(qp->r_rq.wq);
ipath_free_qp(&dev->qp_table, qp);
+ free_qpn(&dev->qp_table, qp->ibqp.qp_num);
bail_qp:
kfree(qp);
bail_swq:
@@ -947,41 +946,44 @@ int ipath_destroy_qp(struct ib_qp *ibqp)
{
struct ipath_qp *qp = to_iqp(ibqp);
struct ipath_ibdev *dev = to_idev(ibqp->device);
- unsigned long flags;
- spin_lock_irqsave(&qp->s_lock, flags);
- qp->state = IB_QPS_ERR;
- spin_unlock_irqrestore(&qp->s_lock, flags);
- spin_lock(&dev->n_qps_lock);
- dev->n_qps_allocated--;
- spin_unlock(&dev->n_qps_lock);
+ /* Make sure HW and driver activity is stopped. */
+ spin_lock_irq(&qp->s_lock);
+ if (qp->state != IB_QPS_RESET) {
+ qp->state = IB_QPS_RESET;
+ spin_lock(&dev->pending_lock);
+ if (!list_empty(&qp->timerwait))
+ list_del_init(&qp->timerwait);
+ if (!list_empty(&qp->piowait))
+ list_del_init(&qp->piowait);
+ spin_unlock(&dev->pending_lock);
+ qp->s_flags &= ~IPATH_S_ANY_WAIT;
+ spin_unlock_irq(&qp->s_lock);
+ /* Stop the sending tasklet */
+ tasklet_kill(&qp->s_task);
+ wait_event(qp->wait_dma, !atomic_read(&qp->s_dma_busy));
+ } else
+ spin_unlock_irq(&qp->s_lock);
- /* Stop the sending tasklet. */
- tasklet_kill(&qp->s_task);
+ ipath_free_qp(&dev->qp_table, qp);
if (qp->s_tx) {
atomic_dec(&qp->refcount);
if (qp->s_tx->txreq.flags & IPATH_SDMA_TXREQ_F_FREEBUF)
kfree(qp->s_tx->txreq.map_addr);
+ spin_lock_irq(&dev->pending_lock);
+ list_add(&qp->s_tx->txreq.list, &dev->txreq_free);
+ spin_unlock_irq(&dev->pending_lock);
+ qp->s_tx = NULL;
}
- /* Make sure the QP isn't on the timeout list. */
- spin_lock_irqsave(&dev->pending_lock, flags);
- if (!list_empty(&qp->timerwait))
- list_del_init(&qp->timerwait);
- if (!list_empty(&qp->piowait))
- list_del_init(&qp->piowait);
- if (qp->s_tx)
- list_add(&qp->s_tx->txreq.list, &dev->txreq_free);
- spin_unlock_irqrestore(&dev->pending_lock, flags);
+ wait_event(qp->wait, !atomic_read(&qp->refcount));
- /*
- * Make sure that the QP is not in the QPN table so receive
- * interrupts will discard packets for this QP. XXX Also remove QP
- * from multicast table.
- */
- if (atomic_read(&qp->refcount) != 0)
- ipath_free_qp(&dev->qp_table, qp);
+ /* all user's cleaned up, mark it available */
+ free_qpn(&dev->qp_table, qp->ibqp.qp_num);
+ spin_lock(&dev->n_qps_lock);
+ dev->n_qps_allocated--;
+ spin_unlock(&dev->n_qps_lock);
if (qp->ip)
kref_put(&qp->ip->ref, ipath_release_mmap_info);
@@ -1026,48 +1028,6 @@ bail:
}
/**
- * ipath_sqerror_qp - put a QP's send queue into an error state
- * @qp: QP who's send queue will be put into an error state
- * @wc: the WC responsible for putting the QP in this state
- *
- * Flushes the send work queue.
- * The QP s_lock should be held and interrupts disabled.
- */
-
-void ipath_sqerror_qp(struct ipath_qp *qp, struct ib_wc *wc)
-{
- struct ipath_ibdev *dev = to_idev(qp->ibqp.device);
- struct ipath_swqe *wqe = get_swqe_ptr(qp, qp->s_last);
-
- ipath_dbg("Send queue error on QP%d/%d: err: %d\n",
- qp->ibqp.qp_num, qp->remote_qpn, wc->status);
-
- spin_lock(&dev->pending_lock);
- if (!list_empty(&qp->timerwait))
- list_del_init(&qp->timerwait);
- if (!list_empty(&qp->piowait))
- list_del_init(&qp->piowait);
- spin_unlock(&dev->pending_lock);
-
- ipath_cq_enter(to_icq(qp->ibqp.send_cq), wc, 1);
- if (++qp->s_last >= qp->s_size)
- qp->s_last = 0;
-
- wc->status = IB_WC_WR_FLUSH_ERR;
-
- while (qp->s_last != qp->s_head) {
- wqe = get_swqe_ptr(qp, qp->s_last);
- wc->wr_id = wqe->wr.wr_id;
- wc->opcode = ib_ipath_wc_opcode[wqe->wr.opcode];
- ipath_cq_enter(to_icq(qp->ibqp.send_cq), wc, 1);
- if (++qp->s_last >= qp->s_size)
- qp->s_last = 0;
- }
- qp->s_cur = qp->s_tail = qp->s_head;
- qp->state = IB_QPS_SQE;
-}
-
-/**
* ipath_get_credit - flush the send work queue of a QP
* @qp: the qp who's send work queue to flush
* @aeth: the Acknowledge Extended Transport Header
@@ -1093,9 +1053,10 @@ void ipath_get_credit(struct ipath_qp *qp, u32 aeth)
}
/* Restart sending if it was blocked due to lack of credits. */
- if (qp->s_cur != qp->s_head &&
+ if ((qp->s_flags & IPATH_S_WAIT_SSN_CREDIT) &&
+ qp->s_cur != qp->s_head &&
(qp->s_lsn == (u32) -1 ||
ipath_cmp24(get_swqe_ptr(qp, qp->s_cur)->ssn,
qp->s_lsn + 1) <= 0))
- tasklet_hi_schedule(&qp->s_task);
+ ipath_schedule_send(qp);
}
diff --git a/drivers/infiniband/hw/ipath/ipath_rc.c b/drivers/infiniband/hw/ipath/ipath_rc.c
index c405dfba553..108df667d2e 100644
--- a/drivers/infiniband/hw/ipath/ipath_rc.c
+++ b/drivers/infiniband/hw/ipath/ipath_rc.c
@@ -92,6 +92,10 @@ static int ipath_make_rc_ack(struct ipath_ibdev *dev, struct ipath_qp *qp,
u32 bth0;
u32 bth2;
+ /* Don't send an ACK if we aren't supposed to. */
+ if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_RECV_OK))
+ goto bail;
+
/* header size in 32-bit words LRH+BTH = (8+12)/4. */
hwords = 5;
@@ -238,14 +242,25 @@ int ipath_make_rc_req(struct ipath_qp *qp)
ipath_make_rc_ack(dev, qp, ohdr, pmtu))
goto done;
- if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_SEND_OK) ||
- qp->s_rnr_timeout || qp->s_wait_credit)
- goto bail;
+ if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_SEND_OK)) {
+ if (!(ib_ipath_state_ops[qp->state] & IPATH_FLUSH_SEND))
+ goto bail;
+ /* We are in the error state, flush the work request. */
+ if (qp->s_last == qp->s_head)
+ goto bail;
+ /* If DMAs are in progress, we can't flush immediately. */
+ if (atomic_read(&qp->s_dma_busy)) {
+ qp->s_flags |= IPATH_S_WAIT_DMA;
+ goto bail;
+ }
+ wqe = get_swqe_ptr(qp, qp->s_last);
+ ipath_send_complete(qp, wqe, IB_WC_WR_FLUSH_ERR);
+ goto done;
+ }
- /* Limit the number of packets sent without an ACK. */
- if (ipath_cmp24(qp->s_psn, qp->s_last_psn + IPATH_PSN_CREDIT) > 0) {
- qp->s_wait_credit = 1;
- dev->n_rc_stalls++;
+ /* Leave BUSY set until RNR timeout. */
+ if (qp->s_rnr_timeout) {
+ qp->s_flags |= IPATH_S_WAITING;
goto bail;
}
@@ -257,6 +272,9 @@ int ipath_make_rc_req(struct ipath_qp *qp)
wqe = get_swqe_ptr(qp, qp->s_cur);
switch (qp->s_state) {
default:
+ if (!(ib_ipath_state_ops[qp->state] &
+ IPATH_PROCESS_NEXT_SEND_OK))
+ goto bail;
/*
* Resend an old request or start a new one.
*
@@ -294,8 +312,10 @@ int ipath_make_rc_req(struct ipath_qp *qp)
case IB_WR_SEND_WITH_IMM:
/* If no credit, return. */
if (qp->s_lsn != (u32) -1 &&
- ipath_cmp24(wqe->ssn, qp->s_lsn + 1) > 0)
+ ipath_cmp24(wqe->ssn, qp->s_lsn + 1) > 0) {
+ qp->s_flags |= IPATH_S_WAIT_SSN_CREDIT;
goto bail;
+ }
wqe->lpsn = wqe->psn;
if (len > pmtu) {
wqe->lpsn += (len - 1) / pmtu;
@@ -325,8 +345,10 @@ int ipath_make_rc_req(struct ipath_qp *qp)
case IB_WR_RDMA_WRITE_WITH_IMM:
/* If no credit, return. */
if (qp->s_lsn != (u32) -1 &&
- ipath_cmp24(wqe->ssn, qp->s_lsn + 1) > 0)
+ ipath_cmp24(wqe->ssn, qp->s_lsn + 1) > 0) {
+ qp->s_flags |= IPATH_S_WAIT_SSN_CREDIT;
goto bail;
+ }
ohdr->u.rc.reth.vaddr =
cpu_to_be64(wqe->wr.wr.rdma.remote_addr);
ohdr->u.rc.reth.rkey =
@@ -570,7 +592,11 @@ int ipath_make_rc_req(struct ipath_qp *qp)
ipath_make_ruc_header(dev, qp, ohdr, bth0 | (qp->s_state << 24), bth2);
done:
ret = 1;
+ goto unlock;
+
bail:
+ qp->s_flags &= ~IPATH_S_BUSY;
+unlock:
spin_unlock_irqrestore(&qp->s_lock, flags);
return ret;
}
@@ -606,7 +632,11 @@ static void send_rc_ack(struct ipath_qp *qp)
spin_unlock_irqrestore(&qp->s_lock, flags);
+ /* Don't try to send ACKs if the link isn't ACTIVE */
dd = dev->dd;
+ if (!(dd->ipath_flags & IPATH_LINKACTIVE))
+ goto done;
+
piobuf = ipath_getpiobuf(dd, 0, NULL);
if (!piobuf) {
/*
@@ -668,15 +698,16 @@ static void send_rc_ack(struct ipath_qp *qp)
goto done;
queue_ack:
- dev->n_rc_qacks++;
- qp->s_flags |= IPATH_S_ACK_PENDING;
- qp->s_nak_state = qp->r_nak_state;
- qp->s_ack_psn = qp->r_ack_psn;
+ if (ib_ipath_state_ops[qp->state] & IPATH_PROCESS_RECV_OK) {
+ dev->n_rc_qacks++;
+ qp->s_flags |= IPATH_S_ACK_PENDING;
+ qp->s_nak_state = qp->r_nak_state;
+ qp->s_ack_psn = qp->r_ack_psn;
+
+ /* Schedule the send tasklet. */
+ ipath_schedule_send(qp);
+ }
spin_unlock_irqrestore(&qp->s_lock, flags);
-
- /* Call ipath_do_rc_send() in another thread. */
- tasklet_hi_schedule(&qp->s_task);
-
done:
return;
}
@@ -735,7 +766,7 @@ static void reset_psn(struct ipath_qp *qp, u32 psn)
/*
* Set the state to restart in the middle of a request.
* Don't change the s_sge, s_cur_sge, or s_cur_size.
- * See ipath_do_rc_send().
+ * See ipath_make_rc_req().
*/
switch (opcode) {
case IB_WR_SEND:
@@ -771,27 +802,14 @@ done:
*
* The QP s_lock should be held and interrupts disabled.
*/
-void ipath_restart_rc(struct ipath_qp *qp, u32 psn, struct ib_wc *wc)
+void ipath_restart_rc(struct ipath_qp *qp, u32 psn)
{
struct ipath_swqe *wqe = get_swqe_ptr(qp, qp->s_last);
struct ipath_ibdev *dev;
if (qp->s_retry == 0) {
- wc->wr_id = wqe->wr.wr_id;
- wc->status = IB_WC_RETRY_EXC_ERR;
- wc->opcode = ib_ipath_wc_opcode[wqe->wr.opcode];
- wc->vendor_err = 0;
- wc->byte_len = 0;
- wc->qp = &qp->ibqp;
- wc->imm_data = 0;
- wc->src_qp = qp->remote_qpn;
- wc->wc_flags = 0;
- wc->pkey_index = 0;
- wc->slid = qp->remote_ah_attr.dlid;
- wc->sl = qp->remote_ah_attr.sl;
- wc->dlid_path_bits = 0;
- wc->port_num = 0;
- ipath_sqerror_qp(qp, wc);
+ ipath_send_complete(qp, wqe, IB_WC_RETRY_EXC_ERR);
+ ipath_error_qp(qp, IB_WC_WR_FLUSH_ERR);
goto bail;
}
qp->s_retry--;
@@ -804,6 +822,8 @@ void ipath_restart_rc(struct ipath_qp *qp, u32 psn, struct ib_wc *wc)
spin_lock(&dev->pending_lock);
if (!list_empty(&qp->timerwait))
list_del_init(&qp->timerwait);
+ if (!list_empty(&qp->piowait))
+ list_del_init(&qp->piowait);
spin_unlock(&dev->pending_lock);
if (wqe->wr.opcode == IB_WR_RDMA_READ)
@@ -812,7 +832,7 @@ void ipath_restart_rc(struct ipath_qp *qp, u32 psn, struct ib_wc *wc)
dev->n_rc_resends += (qp->s_psn - psn) & IPATH_PSN_MASK;
reset_psn(qp, psn);
- tasklet_hi_schedule(&qp->s_task);
+ ipath_schedule_send(qp);
bail:
return;
@@ -820,13 +840,7 @@ bail:
static inline void update_last_psn(struct ipath_qp *qp, u32 psn)
{
- if (qp->s_last_psn != psn) {
- qp->s_last_psn = psn;
- if (qp->s_wait_credit) {
- qp->s_wait_credit = 0;
- tasklet_hi_schedule(&qp->s_task);
- }
- }
+ qp->s_last_psn = psn;
}
/**
@@ -845,6 +859,7 @@ static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode,
{
struct ipath_ibdev *dev = to_idev(qp->ibqp.device);
struct ib_wc wc;
+ enum ib_wc_status status;
struct ipath_swqe *wqe;
int ret = 0;
u32 ack_psn;
@@ -909,7 +924,7 @@ static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode,
*/
update_last_psn(qp, wqe->psn - 1);
/* Retry this request. */
- ipath_restart_rc(qp, wqe->psn, &wc);
+ ipath_restart_rc(qp, wqe->psn);
/*
* No need to process the ACK/NAK since we are
* restarting an earlier request.
@@ -925,32 +940,23 @@ static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode,
wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD)) {
qp->s_num_rd_atomic--;
/* Restart sending task if fence is complete */
- if ((qp->s_flags & IPATH_S_FENCE_PENDING) &&
- !qp->s_num_rd_atomic) {
- qp->s_flags &= ~IPATH_S_FENCE_PENDING;
- tasklet_hi_schedule(&qp->s_task);
- } else if (qp->s_flags & IPATH_S_RDMAR_PENDING) {
- qp->s_flags &= ~IPATH_S_RDMAR_PENDING;
- tasklet_hi_schedule(&qp->s_task);
- }
+ if (((qp->s_flags & IPATH_S_FENCE_PENDING) &&
+ !qp->s_num_rd_atomic) ||
+ qp->s_flags & IPATH_S_RDMAR_PENDING)
+ ipath_schedule_send(qp);
}
/* Post a send completion queue entry if requested. */
if (!(qp->s_flags & IPATH_S_SIGNAL_REQ_WR) ||
(wqe->wr.send_flags & IB_SEND_SIGNALED)) {
+ memset(&wc, 0, sizeof wc);
wc.wr_id = wqe->wr.wr_id;
wc.status = IB_WC_SUCCESS;
wc.opcode = ib_ipath_wc_opcode[wqe->wr.opcode];
- wc.vendor_err = 0;
wc.byte_len = wqe->length;
- wc.imm_data = 0;
wc.qp = &qp->ibqp;
wc.src_qp = qp->remote_qpn;
- wc.wc_flags = 0;
- wc.pkey_index = 0;
wc.slid = qp->remote_ah_attr.dlid;
wc.sl = qp->remote_ah_attr.sl;
- wc.dlid_path_bits = 0;
- wc.port_num = 0;
ipath_cq_enter(to_icq(qp->ibqp.send_cq), &wc, 0);
}
qp->s_retry = qp->s_retry_cnt;
@@ -971,6 +977,8 @@ static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode,
} else {
if (++qp->s_last >= qp->s_size)
qp->s_last = 0;
+ if (qp->state == IB_QPS_SQD && qp->s_last == qp->s_cur)
+ qp->s_draining = 0;
if (qp->s_last == qp->s_tail)
break;
wqe = get_swqe_ptr(qp, qp->s_last);
@@ -994,7 +1002,7 @@ static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode,
*/
if (ipath_cmp24(qp->s_psn, psn) <= 0) {
reset_psn(qp, psn + 1);
- tasklet_hi_schedule(&qp->s_task);
+ ipath_schedule_send(qp);
}
} else if (ipath_cmp24(qp->s_psn, psn) <= 0) {
qp->s_state = OP(SEND_LAST);
@@ -1012,7 +1020,7 @@ static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode,
if (qp->s_last == qp->s_tail)
goto bail;
if (qp->s_rnr_retry == 0) {
- wc.status = IB_WC_RNR_RETRY_EXC_ERR;
+ status = IB_WC_RNR_RETRY_EXC_ERR;
goto class_b;
}
if (qp->s_rnr_retry_cnt < 7)
@@ -1033,6 +1041,7 @@ static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode,
ib_ipath_rnr_table[(aeth >> IPATH_AETH_CREDIT_SHIFT) &
IPATH_AETH_CREDIT_MASK];
ipath_insert_rnr_queue(qp);
+ ipath_schedule_send(qp);
goto bail;
case 3: /* NAK */
@@ -1050,37 +1059,25 @@ static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode,
* RDMA READ response which terminates the RDMA
* READ.
*/
- ipath_restart_rc(qp, psn, &wc);
+ ipath_restart_rc(qp, psn);
break;
case 1: /* Invalid Request */
- wc.status = IB_WC_REM_INV_REQ_ERR;
+ status = IB_WC_REM_INV_REQ_ERR;
dev->n_other_naks++;
goto class_b;
case 2: /* Remote Access Error */
- wc.status = IB_WC_REM_ACCESS_ERR;
+ status = IB_WC_REM_ACCESS_ERR;
dev->n_other_naks++;
goto class_b;
case 3: /* Remote Operation Error */
- wc.status = IB_WC_REM_OP_ERR;
+ status = IB_WC_REM_OP_ERR;
dev->n_other_naks++;
class_b:
- wc.wr_id = wqe->wr.wr_id;
- wc.opcode = ib_ipath_wc_opcode[wqe->wr.opcode];
- wc.vendor_err = 0;
- wc.byte_len = 0;
- wc.qp = &qp->ibqp;
- wc.imm_data = 0;
- wc.src_qp = qp->remote_qpn;
- wc.wc_flags = 0;
- wc.pkey_index = 0;
- wc.slid = qp->remote_ah_attr.dlid;
- wc.sl = qp->remote_ah_attr.sl;
- wc.dlid_path_bits = 0;
- wc.port_num = 0;
- ipath_sqerror_qp(qp, &wc);
+ ipath_send_complete(qp, wqe, status);
+ ipath_error_qp(qp, IB_WC_WR_FLUSH_ERR);
break;
default:
@@ -1126,8 +1123,8 @@ static inline void ipath_rc_rcv_resp(struct ipath_ibdev *dev,
int header_in_data)
{
struct ipath_swqe *wqe;
+ enum ib_wc_status status;
unsigned long flags;
- struct ib_wc wc;
int diff;
u32 pad;
u32 aeth;
@@ -1135,6 +1132,10 @@ static inline void ipath_rc_rcv_resp(struct ipath_ibdev *dev,
spin_lock_irqsave(&qp->s_lock, flags);
+ /* Double check we can process this now that we hold the s_lock. */
+ if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_RECV_OK))
+ goto ack_done;
+
/* Ignore invalid responses. */
if (ipath_cmp24(psn, qp->s_next_psn) >= 0)
goto ack_done;
@@ -1159,6 +1160,7 @@ static inline void ipath_rc_rcv_resp(struct ipath_ibdev *dev,
if (unlikely(qp->s_last == qp->s_tail))
goto ack_done;
wqe = get_swqe_ptr(qp, qp->s_last);
+ status = IB_WC_SUCCESS;
switch (opcode) {
case OP(ACKNOWLEDGE):
@@ -1187,6 +1189,7 @@ static inline void ipath_rc_rcv_resp(struct ipath_ibdev *dev,
wqe = get_swqe_ptr(qp, qp->s_last);
if (unlikely(wqe->wr.opcode != IB_WR_RDMA_READ))
goto ack_op_err;
+ qp->r_flags &= ~IPATH_R_RDMAR_SEQ;
/*
* If this is a response to a resent RDMA read, we
* have to be careful to copy the data to the right
@@ -1200,7 +1203,10 @@ static inline void ipath_rc_rcv_resp(struct ipath_ibdev *dev,
/* no AETH, no ACK */
if (unlikely(ipath_cmp24(psn, qp->s_last_psn + 1))) {
dev->n_rdma_seq++;
- ipath_restart_rc(qp, qp->s_last_psn + 1, &wc);
+ if (qp->r_flags & IPATH_R_RDMAR_SEQ)
+ goto ack_done;
+ qp->r_flags |= IPATH_R_RDMAR_SEQ;
+ ipath_restart_rc(qp, qp->s_last_psn + 1);
goto ack_done;
}
if (unlikely(wqe->wr.opcode != IB_WR_RDMA_READ))
@@ -1261,7 +1267,10 @@ static inline void ipath_rc_rcv_resp(struct ipath_ibdev *dev,
/* ACKs READ req. */
if (unlikely(ipath_cmp24(psn, qp->s_last_psn + 1))) {
dev->n_rdma_seq++;
- ipath_restart_rc(qp, qp->s_last_psn + 1, &wc);
+ if (qp->r_flags & IPATH_R_RDMAR_SEQ)
+ goto ack_done;
+ qp->r_flags |= IPATH_R_RDMAR_SEQ;
+ ipath_restart_rc(qp, qp->s_last_psn + 1);
goto ack_done;
}
if (unlikely(wqe->wr.opcode != IB_WR_RDMA_READ))
@@ -1291,31 +1300,16 @@ static inline void ipath_rc_rcv_resp(struct ipath_ibdev *dev,
goto ack_done;
}
-ack_done:
- spin_unlock_irqrestore(&qp->s_lock, flags);
- goto bail;
-
ack_op_err:
- wc.status = IB_WC_LOC_QP_OP_ERR;
+ status = IB_WC_LOC_QP_OP_ERR;
goto ack_err;
ack_len_err:
- wc.status = IB_WC_LOC_LEN_ERR;
+ status = IB_WC_LOC_LEN_ERR;
ack_err:
- wc.wr_id = wqe->wr.wr_id;
- wc.opcode = ib_ipath_wc_opcode[wqe->wr.opcode];
- wc.vendor_err = 0;
- wc.byte_len = 0;
- wc.imm_data = 0;
- wc.qp = &qp->ibqp;
- wc.src_qp = qp->remote_qpn;
- wc.wc_flags = 0;
- wc.pkey_index = 0;
- wc.slid = qp->remote_ah_attr.dlid;
- wc.sl = qp->remote_ah_attr.sl;
- wc.dlid_path_bits = 0;
- wc.port_num = 0;
- ipath_sqerror_qp(qp, &wc);
+ ipath_send_complete(qp, wqe, status);
+ ipath_error_qp(qp, IB_WC_WR_FLUSH_ERR);
+ack_done:
spin_unlock_irqrestore(&qp->s_lock, flags);
bail:
return;
@@ -1384,7 +1378,12 @@ static inline int ipath_rc_rcv_error(struct ipath_ibdev *dev,
psn &= IPATH_PSN_MASK;
e = NULL;
old_req = 1;
+
spin_lock_irqsave(&qp->s_lock, flags);
+ /* Double check we can process this now that we hold the s_lock. */
+ if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_RECV_OK))
+ goto unlock_done;
+
for (i = qp->r_head_ack_queue; ; i = prev) {
if (i == qp->s_tail_ack_queue)
old_req = 0;
@@ -1512,7 +1511,7 @@ static inline int ipath_rc_rcv_error(struct ipath_ibdev *dev,
break;
}
qp->r_nak_state = 0;
- tasklet_hi_schedule(&qp->s_task);
+ ipath_schedule_send(qp);
unlock_done:
spin_unlock_irqrestore(&qp->s_lock, flags);
@@ -1523,13 +1522,12 @@ send_ack:
return 0;
}
-static void ipath_rc_error(struct ipath_qp *qp, enum ib_wc_status err)
+void ipath_rc_error(struct ipath_qp *qp, enum ib_wc_status err)
{
unsigned long flags;
int lastwqe;
spin_lock_irqsave(&qp->s_lock, flags);
- qp->state = IB_QPS_ERR;
lastwqe = ipath_error_qp(qp, err);
spin_unlock_irqrestore(&qp->s_lock, flags);
@@ -1545,18 +1543,15 @@ static void ipath_rc_error(struct ipath_qp *qp, enum ib_wc_status err)
static inline void ipath_update_ack_queue(struct ipath_qp *qp, unsigned n)
{
- unsigned long flags;
unsigned next;
next = n + 1;
if (next > IPATH_MAX_RDMA_ATOMIC)
next = 0;
- spin_lock_irqsave(&qp->s_lock, flags);
if (n == qp->s_tail_ack_queue) {
qp->s_tail_ack_queue = next;
qp->s_ack_state = OP(ACKNOWLEDGE);
}
- spin_unlock_irqrestore(&qp->s_lock, flags);
}
/**
@@ -1585,6 +1580,7 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
int diff;
struct ib_reth *reth;
int header_in_data;
+ unsigned long flags;
/* Validate the SLID. See Ch. 9.6.1.5 */
if (unlikely(be16_to_cpu(hdr->lrh[3]) != qp->remote_ah_attr.dlid))
@@ -1643,11 +1639,7 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
opcode == OP(SEND_LAST) ||
opcode == OP(SEND_LAST_WITH_IMMEDIATE))
break;
- nack_inv:
- ipath_rc_error(qp, IB_WC_REM_INV_REQ_ERR);
- qp->r_nak_state = IB_NAK_INVALID_REQUEST;
- qp->r_ack_psn = qp->r_psn;
- goto send_ack;
+ goto nack_inv;
case OP(RDMA_WRITE_FIRST):
case OP(RDMA_WRITE_MIDDLE):
@@ -1673,18 +1665,13 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
break;
}
- wc.imm_data = 0;
- wc.wc_flags = 0;
+ memset(&wc, 0, sizeof wc);
/* OK, process the packet. */
switch (opcode) {
case OP(SEND_FIRST):
- if (!ipath_get_rwqe(qp, 0)) {
- rnr_nak:
- qp->r_nak_state = IB_RNR_NAK | qp->r_min_rnr_timer;
- qp->r_ack_psn = qp->r_psn;
- goto send_ack;
- }
+ if (!ipath_get_rwqe(qp, 0))
+ goto rnr_nak;
qp->r_rcv_len = 0;
/* FALLTHROUGH */
case OP(SEND_MIDDLE):
@@ -1741,20 +1728,19 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
goto nack_inv;
ipath_copy_sge(&qp->r_sge, data, tlen);
qp->r_msn++;
- if (!qp->r_wrid_valid)
+ if (!test_and_clear_bit(IPATH_R_WRID_VALID, &qp->r_aflags))
break;
- qp->r_wrid_valid = 0;
wc.wr_id = qp->r_wr_id;
wc.status = IB_WC_SUCCESS;
- wc.opcode = IB_WC_RECV;
- wc.vendor_err = 0;
+ if (opcode == OP(RDMA_WRITE_LAST_WITH_IMMEDIATE) ||
+ opcode == OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE))
+ wc.opcode = IB_WC_RECV_RDMA_WITH_IMM;
+ else
+ wc.opcode = IB_WC_RECV;
wc.qp = &qp->ibqp;
wc.src_qp = qp->remote_qpn;
- wc.pkey_index = 0;
wc.slid = qp->remote_ah_attr.dlid;
wc.sl = qp->remote_ah_attr.sl;
- wc.dlid_path_bits = 0;
- wc.port_num = 0;
/* Signal completion event if the solicited bit is set. */
ipath_cq_enter(to_icq(qp->ibqp.recv_cq), &wc,
(ohdr->bth[0] &
@@ -1815,9 +1801,13 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
next = qp->r_head_ack_queue + 1;
if (next > IPATH_MAX_RDMA_ATOMIC)
next = 0;
+ spin_lock_irqsave(&qp->s_lock, flags);
+ /* Double check we can process this while holding the s_lock. */
+ if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_RECV_OK))
+ goto unlock;
if (unlikely(next == qp->s_tail_ack_queue)) {
if (!qp->s_ack_queue[next].sent)
- goto nack_inv;
+ goto nack_inv_unlck;
ipath_update_ack_queue(qp, next);
}
e = &qp->s_ack_queue[qp->r_head_ack_queue];
@@ -1838,7 +1828,7 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
ok = ipath_rkey_ok(qp, &e->rdma_sge, len, vaddr,
rkey, IB_ACCESS_REMOTE_READ);
if (unlikely(!ok))
- goto nack_acc;
+ goto nack_acc_unlck;
/*
* Update the next expected PSN. We add 1 later
* below, so only add the remainder here.
@@ -1865,13 +1855,12 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
qp->r_psn++;
qp->r_state = opcode;
qp->r_nak_state = 0;
- barrier();
qp->r_head_ack_queue = next;
- /* Call ipath_do_rc_send() in another thread. */
- tasklet_hi_schedule(&qp->s_task);
+ /* Schedule the send tasklet. */
+ ipath_schedule_send(qp);
- goto done;
+ goto unlock;
}
case OP(COMPARE_SWAP):
@@ -1890,9 +1879,13 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
next = qp->r_head_ack_queue + 1;
if (next > IPATH_MAX_RDMA_ATOMIC)
next = 0;
+ spin_lock_irqsave(&qp->s_lock, flags);
+ /* Double check we can process this while holding the s_lock. */
+ if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_RECV_OK))
+ goto unlock;
if (unlikely(next == qp->s_tail_ack_queue)) {
if (!qp->s_ack_queue[next].sent)
- goto nack_inv;
+ goto nack_inv_unlck;
ipath_update_ack_queue(qp, next);
}
if (!header_in_data)
@@ -1902,13 +1895,13 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
vaddr = ((u64) be32_to_cpu(ateth->vaddr[0]) << 32) |
be32_to_cpu(ateth->vaddr[1]);
if (unlikely(vaddr & (sizeof(u64) - 1)))
- goto nack_inv;
+ goto nack_inv_unlck;
rkey = be32_to_cpu(ateth->rkey);
/* Check rkey & NAK */
if (unlikely(!ipath_rkey_ok(qp, &qp->r_sge,
sizeof(u64), vaddr, rkey,
IB_ACCESS_REMOTE_ATOMIC)))
- goto nack_acc;
+ goto nack_acc_unlck;
/* Perform atomic OP and save result. */
maddr = (atomic64_t *) qp->r_sge.sge.vaddr;
sdata = be64_to_cpu(ateth->swap_data);
@@ -1925,13 +1918,12 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
qp->r_psn++;
qp->r_state = opcode;
qp->r_nak_state = 0;
- barrier();
qp->r_head_ack_queue = next;
- /* Call ipath_do_rc_send() in another thread. */
- tasklet_hi_schedule(&qp->s_task);
+ /* Schedule the send tasklet. */
+ ipath_schedule_send(qp);
- goto done;
+ goto unlock;
}
default:
@@ -1947,14 +1939,31 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
goto send_ack;
goto done;
+rnr_nak:
+ qp->r_nak_state = IB_RNR_NAK | qp->r_min_rnr_timer;
+ qp->r_ack_psn = qp->r_psn;
+ goto send_ack;
+
+nack_inv_unlck:
+ spin_unlock_irqrestore(&qp->s_lock, flags);
+nack_inv:
+ ipath_rc_error(qp, IB_WC_LOC_QP_OP_ERR);
+ qp->r_nak_state = IB_NAK_INVALID_REQUEST;
+ qp->r_ack_psn = qp->r_psn;
+ goto send_ack;
+
+nack_acc_unlck:
+ spin_unlock_irqrestore(&qp->s_lock, flags);
nack_acc:
- ipath_rc_error(qp, IB_WC_REM_ACCESS_ERR);
+ ipath_rc_error(qp, IB_WC_LOC_PROT_ERR);
qp->r_nak_state = IB_NAK_REMOTE_ACCESS_ERROR;
qp->r_ack_psn = qp->r_psn;
-
send_ack:
send_rc_ack(qp);
+ goto done;
+unlock:
+ spin_unlock_irqrestore(&qp->s_lock, flags);
done:
return;
}
diff --git a/drivers/infiniband/hw/ipath/ipath_ruc.c b/drivers/infiniband/hw/ipath/ipath_ruc.c
index 8ac5c1d82cc..a4b5521567f 100644
--- a/drivers/infiniband/hw/ipath/ipath_ruc.c
+++ b/drivers/infiniband/hw/ipath/ipath_ruc.c
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved.
+ * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved.
* Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
@@ -78,6 +78,7 @@ const u32 ib_ipath_rnr_table[32] = {
* ipath_insert_rnr_queue - put QP on the RNR timeout list for the device
* @qp: the QP
*
+ * Called with the QP s_lock held and interrupts disabled.
* XXX Use a simple list for now. We might need a priority
* queue if we have lots of QPs waiting for RNR timeouts
* but that should be rare.
@@ -85,9 +86,9 @@ const u32 ib_ipath_rnr_table[32] = {
void ipath_insert_rnr_queue(struct ipath_qp *qp)
{
struct ipath_ibdev *dev = to_idev(qp->ibqp.device);
- unsigned long flags;
- spin_lock_irqsave(&dev->pending_lock, flags);
+ /* We already did a spin_lock_irqsave(), so just use spin_lock */
+ spin_lock(&dev->pending_lock);
if (list_empty(&dev->rnrwait))
list_add(&qp->timerwait, &dev->rnrwait);
else {
@@ -109,7 +110,7 @@ void ipath_insert_rnr_queue(struct ipath_qp *qp)
nqp->s_rnr_timeout -= qp->s_rnr_timeout;
list_add(&qp->timerwait, l);
}
- spin_unlock_irqrestore(&dev->pending_lock, flags);
+ spin_unlock(&dev->pending_lock);
}
/**
@@ -140,20 +141,11 @@ int ipath_init_sge(struct ipath_qp *qp, struct ipath_rwqe *wqe,
goto bail;
bad_lkey:
+ memset(&wc, 0, sizeof(wc));
wc.wr_id = wqe->wr_id;
wc.status = IB_WC_LOC_PROT_ERR;
wc.opcode = IB_WC_RECV;
- wc.vendor_err = 0;
- wc.byte_len = 0;
- wc.imm_data = 0;
wc.qp = &qp->ibqp;
- wc.src_qp = 0;
- wc.wc_flags = 0;
- wc.pkey_index = 0;
- wc.slid = 0;
- wc.sl = 0;
- wc.dlid_path_bits = 0;
- wc.port_num = 0;
/* Signal solicited completion event. */
ipath_cq_enter(to_icq(qp->ibqp.recv_cq), &wc, 1);
ret = 0;
@@ -194,6 +186,11 @@ int ipath_get_rwqe(struct ipath_qp *qp, int wr_id_only)
}
spin_lock_irqsave(&rq->lock, flags);
+ if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_RECV_OK)) {
+ ret = 0;
+ goto unlock;
+ }
+
wq = rq->wq;
tail = wq->tail;
/* Validate tail before using it since it is user writable. */
@@ -201,9 +198,8 @@ int ipath_get_rwqe(struct ipath_qp *qp, int wr_id_only)
tail = 0;
do {
if (unlikely(tail == wq->head)) {
- spin_unlock_irqrestore(&rq->lock, flags);
ret = 0;
- goto bail;
+ goto unlock;
}
/* Make sure entry is read after head index is read. */
smp_rmb();
@@ -216,7 +212,7 @@ int ipath_get_rwqe(struct ipath_qp *qp, int wr_id_only)
wq->tail = tail;
ret = 1;
- qp->r_wrid_valid = 1;
+ set_bit(IPATH_R_WRID_VALID, &qp->r_aflags);
if (handler) {
u32 n;
@@ -243,8 +239,8 @@ int ipath_get_rwqe(struct ipath_qp *qp, int wr_id_only)
goto bail;
}
}
+unlock:
spin_unlock_irqrestore(&rq->lock, flags);
-
bail:
return ret;
}
@@ -270,38 +266,63 @@ static void ipath_ruc_loopback(struct ipath_qp *sqp)
struct ib_wc wc;
u64 sdata;
atomic64_t *maddr;
+ enum ib_wc_status send_status;
+ /*
+ * Note that we check the responder QP state after
+ * checking the requester's state.
+ */
qp = ipath_lookup_qpn(&dev->qp_table, sqp->remote_qpn);
- if (!qp) {
- dev->n_pkt_drops++;
- return;
- }
-again:
spin_lock_irqsave(&sqp->s_lock, flags);
- if (!(ib_ipath_state_ops[sqp->state] & IPATH_PROCESS_SEND_OK) ||
- sqp->s_rnr_timeout) {
- spin_unlock_irqrestore(&sqp->s_lock, flags);
- goto done;
- }
+ /* Return if we are already busy processing a work request. */
+ if ((sqp->s_flags & (IPATH_S_BUSY | IPATH_S_ANY_WAIT)) ||
+ !(ib_ipath_state_ops[sqp->state] & IPATH_PROCESS_OR_FLUSH_SEND))
+ goto unlock;
- /* Get the next send request. */
- if (sqp->s_last == sqp->s_head) {
- /* Send work queue is empty. */
- spin_unlock_irqrestore(&sqp->s_lock, flags);
- goto done;
+ sqp->s_flags |= IPATH_S_BUSY;
+
+again:
+ if (sqp->s_last == sqp->s_head)
+ goto clr_busy;
+ wqe = get_swqe_ptr(sqp, sqp->s_last);
+
+ /* Return if it is not OK to start a new work reqeust. */
+ if (!(ib_ipath_state_ops[sqp->state] & IPATH_PROCESS_NEXT_SEND_OK)) {
+ if (!(ib_ipath_state_ops[sqp->state] & IPATH_FLUSH_SEND))
+ goto clr_busy;
+ /* We are in the error state, flush the work request. */
+ send_status = IB_WC_WR_FLUSH_ERR;
+ goto flush_send;
}
/*
* We can rely on the entry not changing without the s_lock
* being held until we update s_last.
+ * We increment s_cur to indicate s_last is in progress.
*/
- wqe = get_swqe_ptr(sqp, sqp->s_last);
+ if (sqp->s_last == sqp->s_cur) {
+ if (++sqp->s_cur >= sqp->s_size)
+ sqp->s_cur = 0;
+ }
spin_unlock_irqrestore(&sqp->s_lock, flags);
- wc.wc_flags = 0;
- wc.imm_data = 0;
+ if (!qp || !(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_RECV_OK)) {
+ dev->n_pkt_drops++;
+ /*
+ * For RC, the requester would timeout and retry so
+ * shortcut the timeouts and just signal too many retries.
+ */
+ if (sqp->ibqp.qp_type == IB_QPT_RC)
+ send_status = IB_WC_RETRY_EXC_ERR;
+ else
+ send_status = IB_WC_SUCCESS;
+ goto serr;
+ }
+
+ memset(&wc, 0, sizeof wc);
+ send_status = IB_WC_SUCCESS;
sqp->s_sge.sge = wqe->sg_list[0];
sqp->s_sge.sg_list = wqe->sg_list + 1;
@@ -313,75 +334,33 @@ again:
wc.imm_data = wqe->wr.ex.imm_data;
/* FALLTHROUGH */
case IB_WR_SEND:
- if (!ipath_get_rwqe(qp, 0)) {
- rnr_nak:
- /* Handle RNR NAK */
- if (qp->ibqp.qp_type == IB_QPT_UC)
- goto send_comp;
- if (sqp->s_rnr_retry == 0) {
- wc.status = IB_WC_RNR_RETRY_EXC_ERR;
- goto err;
- }
- if (sqp->s_rnr_retry_cnt < 7)
- sqp->s_rnr_retry--;
- dev->n_rnr_naks++;
- sqp->s_rnr_timeout =
- ib_ipath_rnr_table[qp->r_min_rnr_timer];
- ipath_insert_rnr_queue(sqp);
- goto done;
- }
+ if (!ipath_get_rwqe(qp, 0))
+ goto rnr_nak;
break;
case IB_WR_RDMA_WRITE_WITH_IMM:
- if (unlikely(!(qp->qp_access_flags &
- IB_ACCESS_REMOTE_WRITE))) {
- wc.status = IB_WC_REM_INV_REQ_ERR;
- goto err;
- }
+ if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_WRITE)))
+ goto inv_err;
wc.wc_flags = IB_WC_WITH_IMM;
wc.imm_data = wqe->wr.ex.imm_data;
if (!ipath_get_rwqe(qp, 1))
goto rnr_nak;
/* FALLTHROUGH */
case IB_WR_RDMA_WRITE:
- if (unlikely(!(qp->qp_access_flags &
- IB_ACCESS_REMOTE_WRITE))) {
- wc.status = IB_WC_REM_INV_REQ_ERR;
- goto err;
- }
+ if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_WRITE)))
+ goto inv_err;
if (wqe->length == 0)
break;
if (unlikely(!ipath_rkey_ok(qp, &qp->r_sge, wqe->length,
wqe->wr.wr.rdma.remote_addr,
wqe->wr.wr.rdma.rkey,
- IB_ACCESS_REMOTE_WRITE))) {
- acc_err:
- wc.status = IB_WC_REM_ACCESS_ERR;
- err:
- wc.wr_id = wqe->wr.wr_id;
- wc.opcode = ib_ipath_wc_opcode[wqe->wr.opcode];
- wc.vendor_err = 0;
- wc.byte_len = 0;
- wc.qp = &sqp->ibqp;
- wc.src_qp = sqp->remote_qpn;
- wc.pkey_index = 0;
- wc.slid = sqp->remote_ah_attr.dlid;
- wc.sl = sqp->remote_ah_attr.sl;
- wc.dlid_path_bits = 0;
- wc.port_num = 0;
- spin_lock_irqsave(&sqp->s_lock, flags);
- ipath_sqerror_qp(sqp, &wc);
- spin_unlock_irqrestore(&sqp->s_lock, flags);
- goto done;
- }
+ IB_ACCESS_REMOTE_WRITE)))
+ goto acc_err;
break;
case IB_WR_RDMA_READ:
- if (unlikely(!(qp->qp_access_flags &
- IB_ACCESS_REMOTE_READ))) {
- wc.status = IB_WC_REM_INV_REQ_ERR;
- goto err;
- }
+ if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_READ)))
+ goto inv_err;
if (unlikely(!ipath_rkey_ok(qp, &sqp->s_sge, wqe->length,
wqe->wr.wr.rdma.remote_addr,
wqe->wr.wr.rdma.rkey,
@@ -394,11 +373,8 @@ again:
case IB_WR_ATOMIC_CMP_AND_SWP:
case IB_WR_ATOMIC_FETCH_AND_ADD:
- if (unlikely(!(qp->qp_access_flags &
- IB_ACCESS_REMOTE_ATOMIC))) {
- wc.status = IB_WC_REM_INV_REQ_ERR;
- goto err;
- }
+ if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_ATOMIC)))
+ goto inv_err;
if (unlikely(!ipath_rkey_ok(qp, &qp->r_sge, sizeof(u64),
wqe->wr.wr.atomic.remote_addr,
wqe->wr.wr.atomic.rkey,
@@ -415,7 +391,8 @@ again:
goto send_comp;
default:
- goto done;
+ send_status = IB_WC_LOC_QP_OP_ERR;
+ goto serr;
}
sge = &sqp->s_sge.sge;
@@ -448,8 +425,7 @@ again:
sqp->s_len -= len;
}
- if (wqe->wr.opcode == IB_WR_RDMA_WRITE ||
- wqe->wr.opcode == IB_WR_RDMA_READ)
+ if (!test_and_clear_bit(IPATH_R_WRID_VALID, &qp->r_aflags))
goto send_comp;
if (wqe->wr.opcode == IB_WR_RDMA_WRITE_WITH_IMM)
@@ -458,32 +434,89 @@ again:
wc.opcode = IB_WC_RECV;
wc.wr_id = qp->r_wr_id;
wc.status = IB_WC_SUCCESS;
- wc.vendor_err = 0;
wc.byte_len = wqe->length;
wc.qp = &qp->ibqp;
wc.src_qp = qp->remote_qpn;
- wc.pkey_index = 0;
wc.slid = qp->remote_ah_attr.dlid;
wc.sl = qp->remote_ah_attr.sl;
- wc.dlid_path_bits = 0;
wc.port_num = 1;
/* Signal completion event if the solicited bit is set. */
ipath_cq_enter(to_icq(qp->ibqp.recv_cq), &wc,
wqe->wr.send_flags & IB_SEND_SOLICITED);
send_comp:
+ spin_lock_irqsave(&sqp->s_lock, flags);
+flush_send:
sqp->s_rnr_retry = sqp->s_rnr_retry_cnt;
- ipath_send_complete(sqp, wqe, IB_WC_SUCCESS);
+ ipath_send_complete(sqp, wqe, send_status);
goto again;
+rnr_nak:
+ /* Handle RNR NAK */
+ if (qp->ibqp.qp_type == IB_QPT_UC)
+ goto send_comp;
+ /*
+ * Note: we don't need the s_lock held since the BUSY flag
+ * makes this single threaded.
+ */
+ if (sqp->s_rnr_retry == 0) {
+ send_status = IB_WC_RNR_RETRY_EXC_ERR;
+ goto serr;
+ }
+ if (sqp->s_rnr_retry_cnt < 7)
+ sqp->s_rnr_retry--;
+ spin_lock_irqsave(&sqp->s_lock, flags);
+ if (!(ib_ipath_state_ops[sqp->state] & IPATH_PROCESS_RECV_OK))
+ goto clr_busy;
+ sqp->s_flags |= IPATH_S_WAITING;
+ dev->n_rnr_naks++;
+ sqp->s_rnr_timeout = ib_ipath_rnr_table[qp->r_min_rnr_timer];
+ ipath_insert_rnr_queue(sqp);
+ goto clr_busy;
+
+inv_err:
+ send_status = IB_WC_REM_INV_REQ_ERR;
+ wc.status = IB_WC_LOC_QP_OP_ERR;
+ goto err;
+
+acc_err:
+ send_status = IB_WC_REM_ACCESS_ERR;
+ wc.status = IB_WC_LOC_PROT_ERR;
+err:
+ /* responder goes to error state */
+ ipath_rc_error(qp, wc.status);
+
+serr:
+ spin_lock_irqsave(&sqp->s_lock, flags);
+ ipath_send_complete(sqp, wqe, send_status);
+ if (sqp->ibqp.qp_type == IB_QPT_RC) {
+ int lastwqe = ipath_error_qp(sqp, IB_WC_WR_FLUSH_ERR);
+
+ sqp->s_flags &= ~IPATH_S_BUSY;
+ spin_unlock_irqrestore(&sqp->s_lock, flags);
+ if (lastwqe) {
+ struct ib_event ev;
+
+ ev.device = sqp->ibqp.device;
+ ev.element.qp = &sqp->ibqp;
+ ev.event = IB_EVENT_QP_LAST_WQE_REACHED;
+ sqp->ibqp.event_handler(&ev, sqp->ibqp.qp_context);
+ }
+ goto done;
+ }
+clr_busy:
+ sqp->s_flags &= ~IPATH_S_BUSY;
+unlock:
+ spin_unlock_irqrestore(&sqp->s_lock, flags);
done:
- if (atomic_dec_and_test(&qp->refcount))
+ if (qp && atomic_dec_and_test(&qp->refcount))
wake_up(&qp->wait);
}
-static void want_buffer(struct ipath_devdata *dd)
+static void want_buffer(struct ipath_devdata *dd, struct ipath_qp *qp)
{
- if (!(dd->ipath_flags & IPATH_HAS_SEND_DMA)) {
+ if (!(dd->ipath_flags & IPATH_HAS_SEND_DMA) ||
+ qp->ibqp.qp_type == IB_QPT_SMI) {
unsigned long flags;
spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags);
@@ -501,26 +534,36 @@ static void want_buffer(struct ipath_devdata *dd)
* @dev: the device we ran out of buffers on
*
* Called when we run out of PIO buffers.
+ * If we are now in the error state, return zero to flush the
+ * send work request.
*/
-static void ipath_no_bufs_available(struct ipath_qp *qp,
+static int ipath_no_bufs_available(struct ipath_qp *qp,
struct ipath_ibdev *dev)
{
unsigned long flags;
+ int ret = 1;
/*
* Note that as soon as want_buffer() is called and
* possibly before it returns, ipath_ib_piobufavail()
- * could be called. If we are still in the tasklet function,
- * tasklet_hi_schedule() will not call us until the next time
- * tasklet_hi_schedule() is called.
- * We leave the busy flag set so that another post send doesn't
- * try to put the same QP on the piowait list again.
+ * could be called. Therefore, put QP on the piowait list before
+ * enabling the PIO avail interrupt.
*/
- spin_lock_irqsave(&dev->pending_lock, flags);
- list_add_tail(&qp->piowait, &dev->piowait);
- spin_unlock_irqrestore(&dev->pending_lock, flags);
- want_buffer(dev->dd);
- dev->n_piowait++;
+ spin_lock_irqsave(&qp->s_lock, flags);
+ if (ib_ipath_state_ops[qp->state] & IPATH_PROCESS_SEND_OK) {
+ dev->n_piowait++;
+ qp->s_flags |= IPATH_S_WAITING;
+ qp->s_flags &= ~IPATH_S_BUSY;
+ spin_lock(&dev->pending_lock);
+ if (list_empty(&qp->piowait))
+ list_add_tail(&qp->piowait, &dev->piowait);
+ spin_unlock(&dev->pending_lock);
+ } else
+ ret = 0;
+ spin_unlock_irqrestore(&qp->s_lock, flags);
+ if (ret)
+ want_buffer(dev->dd, qp);
+ return ret;
}
/**
@@ -596,15 +639,13 @@ void ipath_do_send(unsigned long data)
struct ipath_qp *qp = (struct ipath_qp *)data;
struct ipath_ibdev *dev = to_idev(qp->ibqp.device);
int (*make_req)(struct ipath_qp *qp);
-
- if (test_and_set_bit(IPATH_S_BUSY, &qp->s_busy))
- goto bail;
+ unsigned long flags;
if ((qp->ibqp.qp_type == IB_QPT_RC ||
qp->ibqp.qp_type == IB_QPT_UC) &&
qp->remote_ah_attr.dlid == dev->dd->ipath_lid) {
ipath_ruc_loopback(qp);
- goto clear;
+ goto bail;
}
if (qp->ibqp.qp_type == IB_QPT_RC)
@@ -614,6 +655,19 @@ void ipath_do_send(unsigned long data)
else
make_req = ipath_make_ud_req;
+ spin_lock_irqsave(&qp->s_lock, flags);
+
+ /* Return if we are already busy processing a work request. */
+ if ((qp->s_flags & (IPATH_S_BUSY | IPATH_S_ANY_WAIT)) ||
+ !(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_OR_FLUSH_SEND)) {
+ spin_unlock_irqrestore(&qp->s_lock, flags);
+ goto bail;
+ }
+
+ qp->s_flags |= IPATH_S_BUSY;
+
+ spin_unlock_irqrestore(&qp->s_lock, flags);
+
again:
/* Check for a constructed packet to be sent. */
if (qp->s_hdrwords != 0) {
@@ -623,8 +677,8 @@ again:
*/
if (ipath_verbs_send(qp, &qp->s_hdr, qp->s_hdrwords,
qp->s_cur_sge, qp->s_cur_size)) {
- ipath_no_bufs_available(qp, dev);
- goto bail;
+ if (ipath_no_bufs_available(qp, dev))
+ goto bail;
}
dev->n_unicast_xmit++;
/* Record that we sent the packet and s_hdr is empty. */
@@ -633,16 +687,20 @@ again:
if (make_req(qp))
goto again;
-clear:
- clear_bit(IPATH_S_BUSY, &qp->s_busy);
+
bail:;
}
+/*
+ * This should be called with s_lock held.
+ */
void ipath_send_complete(struct ipath_qp *qp, struct ipath_swqe *wqe,
enum ib_wc_status status)
{
- unsigned long flags;
- u32 last;
+ u32 old_last, last;
+
+ if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_OR_FLUSH_SEND))
+ return;
/* See ch. 11.2.4.1 and 10.7.3.1 */
if (!(qp->s_flags & IPATH_S_SIGNAL_REQ_WR) ||
@@ -650,27 +708,25 @@ void ipath_send_complete(struct ipath_qp *qp, struct ipath_swqe *wqe,
status != IB_WC_SUCCESS) {
struct ib_wc wc;
+ memset(&wc, 0, sizeof wc);
wc.wr_id = wqe->wr.wr_id;
wc.status = status;
wc.opcode = ib_ipath_wc_opcode[wqe->wr.opcode];
- wc.vendor_err = 0;
- wc.byte_len = wqe->length;
- wc.imm_data = 0;
wc.qp = &qp->ibqp;
- wc.src_qp = 0;
- wc.wc_flags = 0;
- wc.pkey_index = 0;
- wc.slid = 0;
- wc.sl = 0;
- wc.dlid_path_bits = 0;
- wc.port_num = 0;
- ipath_cq_enter(to_icq(qp->ibqp.send_cq), &wc, 0);
+ if (status == IB_WC_SUCCESS)
+ wc.byte_len = wqe->length;
+ ipath_cq_enter(to_icq(qp->ibqp.send_cq), &wc,
+ status != IB_WC_SUCCESS);
}
- spin_lock_irqsave(&qp->s_lock, flags);
- last = qp->s_last;
+ old_last = last = qp->s_last;
if (++last >= qp->s_size)
last = 0;
qp->s_last = last;
- spin_unlock_irqrestore(&qp->s_lock, flags);
+ if (qp->s_cur == old_last)
+ qp->s_cur = last;
+ if (qp->s_tail == old_last)
+ qp->s_tail = last;
+ if (qp->state == IB_QPS_SQD && last == qp->s_cur)
+ qp->s_draining = 0;
}
diff --git a/drivers/infiniband/hw/ipath/ipath_sdma.c b/drivers/infiniband/hw/ipath/ipath_sdma.c
index 1974df7a9f7..3697449c1ba 100644
--- a/drivers/infiniband/hw/ipath/ipath_sdma.c
+++ b/drivers/infiniband/hw/ipath/ipath_sdma.c
@@ -308,13 +308,15 @@ static void sdma_abort_task(unsigned long opaque)
spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags);
/*
- * Don't restart sdma here. Wait until link is up to ACTIVE.
- * VL15 MADs used to bring the link up use PIO, and multiple
- * link transitions otherwise cause the sdma engine to be
+ * Don't restart sdma here (with the exception
+ * below). Wait until link is up to ACTIVE. VL15 MADs
+ * used to bring the link up use PIO, and multiple link
+ * transitions otherwise cause the sdma engine to be
* stopped and started multiple times.
- * The disable is done here, including the shadow, so the
- * state is kept consistent.
- * See ipath_restart_sdma() for the actual starting of sdma.
+ * The disable is done here, including the shadow,
+ * so the state is kept consistent.
+ * See ipath_restart_sdma() for the actual starting
+ * of sdma.
*/
spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags);
dd->ipath_sendctrl &= ~INFINIPATH_S_SDMAENABLE;
@@ -326,6 +328,13 @@ static void sdma_abort_task(unsigned long opaque)
/* make sure I see next message */
dd->ipath_sdma_abort_jiffies = 0;
+ /*
+ * Not everything that takes SDMA offline is a link
+ * status change. If the link was up, restart SDMA.
+ */
+ if (dd->ipath_flags & IPATH_LINKACTIVE)
+ ipath_restart_sdma(dd);
+
goto done;
}
@@ -427,7 +436,12 @@ int setup_sdma(struct ipath_devdata *dd)
goto done;
}
- dd->ipath_sdma_status = 0;
+ /*
+ * Set initial status as if we had been up, then gone down.
+ * This lets initial start on transition to ACTIVE be the
+ * same as restart after link flap.
+ */
+ dd->ipath_sdma_status = IPATH_SDMA_ABORT_ABORTED;
dd->ipath_sdma_abort_jiffies = 0;
dd->ipath_sdma_generation = 0;
dd->ipath_sdma_descq_tail = 0;
@@ -449,16 +463,19 @@ int setup_sdma(struct ipath_devdata *dd)
ipath_write_kreg(dd, dd->ipath_kregs->kr_senddmaheadaddr,
dd->ipath_sdma_head_phys);
- /* Reserve all the former "kernel" piobufs */
- n = dd->ipath_piobcnt2k + dd->ipath_piobcnt4k - dd->ipath_pioreserved;
- for (i = dd->ipath_lastport_piobuf; i < n; ++i) {
+ /*
+ * Reserve all the former "kernel" piobufs, using high number range
+ * so we get as many 4K buffers as possible
+ */
+ n = dd->ipath_piobcnt2k + dd->ipath_piobcnt4k;
+ i = dd->ipath_lastport_piobuf + dd->ipath_pioreserved;
+ ipath_chg_pioavailkernel(dd, i, n - i , 0);
+ for (; i < n; ++i) {
unsigned word = i / 64;
unsigned bit = i & 63;
BUG_ON(word >= 3);
senddmabufmask[word] |= 1ULL << bit;
}
- ipath_chg_pioavailkernel(dd, dd->ipath_lastport_piobuf,
- n - dd->ipath_lastport_piobuf, 0);
ipath_write_kreg(dd, dd->ipath_kregs->kr_senddmabufmask0,
senddmabufmask[0]);
ipath_write_kreg(dd, dd->ipath_kregs->kr_senddmabufmask1,
@@ -615,6 +632,9 @@ void ipath_restart_sdma(struct ipath_devdata *dd)
ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags);
+ /* notify upper layers */
+ ipath_ib_piobufavail(dd->verbs_dev);
+
bail:
return;
}
diff --git a/drivers/infiniband/hw/ipath/ipath_uc.c b/drivers/infiniband/hw/ipath/ipath_uc.c
index bfe8926b551..7fd18e83390 100644
--- a/drivers/infiniband/hw/ipath/ipath_uc.c
+++ b/drivers/infiniband/hw/ipath/ipath_uc.c
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved.
+ * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved.
* Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
@@ -47,14 +47,30 @@ int ipath_make_uc_req(struct ipath_qp *qp)
{
struct ipath_other_headers *ohdr;
struct ipath_swqe *wqe;
+ unsigned long flags;
u32 hwords;
u32 bth0;
u32 len;
u32 pmtu = ib_mtu_enum_to_int(qp->path_mtu);
int ret = 0;
- if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_SEND_OK))
+ spin_lock_irqsave(&qp->s_lock, flags);
+
+ if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_SEND_OK)) {
+ if (!(ib_ipath_state_ops[qp->state] & IPATH_FLUSH_SEND))
+ goto bail;
+ /* We are in the error state, flush the work request. */
+ if (qp->s_last == qp->s_head)
+ goto bail;
+ /* If DMAs are in progress, we can't flush immediately. */
+ if (atomic_read(&qp->s_dma_busy)) {
+ qp->s_flags |= IPATH_S_WAIT_DMA;
+ goto bail;
+ }
+ wqe = get_swqe_ptr(qp, qp->s_last);
+ ipath_send_complete(qp, wqe, IB_WC_WR_FLUSH_ERR);
goto done;
+ }
ohdr = &qp->s_hdr.u.oth;
if (qp->remote_ah_attr.ah_flags & IB_AH_GRH)
@@ -69,9 +85,12 @@ int ipath_make_uc_req(struct ipath_qp *qp)
qp->s_wqe = NULL;
switch (qp->s_state) {
default:
+ if (!(ib_ipath_state_ops[qp->state] &
+ IPATH_PROCESS_NEXT_SEND_OK))
+ goto bail;
/* Check if send work queue is empty. */
if (qp->s_cur == qp->s_head)
- goto done;
+ goto bail;
/*
* Start a new request.
*/
@@ -134,7 +153,7 @@ int ipath_make_uc_req(struct ipath_qp *qp)
break;
default:
- goto done;
+ goto bail;
}
break;
@@ -194,9 +213,14 @@ int ipath_make_uc_req(struct ipath_qp *qp)
ipath_make_ruc_header(to_idev(qp->ibqp.device),
qp, ohdr, bth0 | (qp->s_state << 24),
qp->s_next_psn++ & IPATH_PSN_MASK);
+done:
ret = 1;
+ goto unlock;
-done:
+bail:
+ qp->s_flags &= ~IPATH_S_BUSY;
+unlock:
+ spin_unlock_irqrestore(&qp->s_lock, flags);
return ret;
}
@@ -258,8 +282,7 @@ void ipath_uc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
*/
opcode = be32_to_cpu(ohdr->bth[0]) >> 24;
- wc.imm_data = 0;
- wc.wc_flags = 0;
+ memset(&wc, 0, sizeof wc);
/* Compare the PSN verses the expected PSN. */
if (unlikely(ipath_cmp24(psn, qp->r_psn) != 0)) {
@@ -322,8 +345,8 @@ void ipath_uc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
case OP(SEND_ONLY):
case OP(SEND_ONLY_WITH_IMMEDIATE):
send_first:
- if (qp->r_reuse_sge) {
- qp->r_reuse_sge = 0;
+ if (qp->r_flags & IPATH_R_REUSE_SGE) {
+ qp->r_flags &= ~IPATH_R_REUSE_SGE;
qp->r_sge = qp->s_rdma_read_sge;
} else if (!ipath_get_rwqe(qp, 0)) {
dev->n_pkt_drops++;
@@ -340,13 +363,13 @@ void ipath_uc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
case OP(SEND_MIDDLE):
/* Check for invalid length PMTU or posted rwqe len. */
if (unlikely(tlen != (hdrsize + pmtu + 4))) {
- qp->r_reuse_sge = 1;
+ qp->r_flags |= IPATH_R_REUSE_SGE;
dev->n_pkt_drops++;
goto done;
}
qp->r_rcv_len += pmtu;
if (unlikely(qp->r_rcv_len > qp->r_len)) {
- qp->r_reuse_sge = 1;
+ qp->r_flags |= IPATH_R_REUSE_SGE;
dev->n_pkt_drops++;
goto done;
}
@@ -372,7 +395,7 @@ void ipath_uc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
/* Check for invalid length. */
/* XXX LAST len should be >= 1 */
if (unlikely(tlen < (hdrsize + pad + 4))) {
- qp->r_reuse_sge = 1;
+ qp->r_flags |= IPATH_R_REUSE_SGE;
dev->n_pkt_drops++;
goto done;
}
@@ -380,7 +403,7 @@ void ipath_uc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
tlen -= (hdrsize + pad + 4);
wc.byte_len = tlen + qp->r_rcv_len;
if (unlikely(wc.byte_len > qp->r_len)) {
- qp->r_reuse_sge = 1;
+ qp->r_flags |= IPATH_R_REUSE_SGE;
dev->n_pkt_drops++;
goto done;
}
@@ -390,14 +413,10 @@ void ipath_uc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
wc.wr_id = qp->r_wr_id;
wc.status = IB_WC_SUCCESS;
wc.opcode = IB_WC_RECV;
- wc.vendor_err = 0;
wc.qp = &qp->ibqp;
wc.src_qp = qp->remote_qpn;
- wc.pkey_index = 0;
wc.slid = qp->remote_ah_attr.dlid;
wc.sl = qp->remote_ah_attr.sl;
- wc.dlid_path_bits = 0;
- wc.port_num = 0;
/* Signal completion event if the solicited bit is set. */
ipath_cq_enter(to_icq(qp->ibqp.recv_cq), &wc,
(ohdr->bth[0] &
@@ -488,8 +507,8 @@ void ipath_uc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
dev->n_pkt_drops++;
goto done;
}
- if (qp->r_reuse_sge)
- qp->r_reuse_sge = 0;
+ if (qp->r_flags & IPATH_R_REUSE_SGE)
+ qp->r_flags &= ~IPATH_R_REUSE_SGE;
else if (!ipath_get_rwqe(qp, 1)) {
dev->n_pkt_drops++;
goto done;
diff --git a/drivers/infiniband/hw/ipath/ipath_ud.c b/drivers/infiniband/hw/ipath/ipath_ud.c
index 8b6a261c89e..77ca8ca74e7 100644
--- a/drivers/infiniband/hw/ipath/ipath_ud.c
+++ b/drivers/infiniband/hw/ipath/ipath_ud.c
@@ -65,9 +65,9 @@ static void ipath_ud_loopback(struct ipath_qp *sqp, struct ipath_swqe *swqe)
u32 length;
qp = ipath_lookup_qpn(&dev->qp_table, swqe->wr.wr.ud.remote_qpn);
- if (!qp) {
+ if (!qp || !(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_RECV_OK)) {
dev->n_pkt_drops++;
- goto send_comp;
+ goto done;
}
rsge.sg_list = NULL;
@@ -91,14 +91,12 @@ static void ipath_ud_loopback(struct ipath_qp *sqp, struct ipath_swqe *swqe)
* present on the wire.
*/
length = swqe->length;
+ memset(&wc, 0, sizeof wc);
wc.byte_len = length + sizeof(struct ib_grh);
if (swqe->wr.opcode == IB_WR_SEND_WITH_IMM) {
wc.wc_flags = IB_WC_WITH_IMM;
wc.imm_data = swqe->wr.ex.imm_data;
- } else {
- wc.wc_flags = 0;
- wc.imm_data = 0;
}
/*
@@ -229,7 +227,6 @@ static void ipath_ud_loopback(struct ipath_qp *sqp, struct ipath_swqe *swqe)
}
wc.status = IB_WC_SUCCESS;
wc.opcode = IB_WC_RECV;
- wc.vendor_err = 0;
wc.qp = &qp->ibqp;
wc.src_qp = sqp->ibqp.qp_num;
/* XXX do we know which pkey matched? Only needed for GSI. */
@@ -248,8 +245,7 @@ drop:
kfree(rsge.sg_list);
if (atomic_dec_and_test(&qp->refcount))
wake_up(&qp->wait);
-send_comp:
- ipath_send_complete(sqp, swqe, IB_WC_SUCCESS);
+done:;
}
/**
@@ -264,6 +260,7 @@ int ipath_make_ud_req(struct ipath_qp *qp)
struct ipath_other_headers *ohdr;
struct ib_ah_attr *ah_attr;
struct ipath_swqe *wqe;
+ unsigned long flags;
u32 nwords;
u32 extra_bytes;
u32 bth0;
@@ -271,13 +268,30 @@ int ipath_make_ud_req(struct ipath_qp *qp)
u16 lid;
int ret = 0;
- if (unlikely(!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_SEND_OK)))
- goto bail;
+ spin_lock_irqsave(&qp->s_lock, flags);
+
+ if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_NEXT_SEND_OK)) {
+ if (!(ib_ipath_state_ops[qp->state] & IPATH_FLUSH_SEND))
+ goto bail;
+ /* We are in the error state, flush the work request. */
+ if (qp->s_last == qp->s_head)
+ goto bail;
+ /* If DMAs are in progress, we can't flush immediately. */
+ if (atomic_read(&qp->s_dma_busy)) {
+ qp->s_flags |= IPATH_S_WAIT_DMA;
+ goto bail;
+ }
+ wqe = get_swqe_ptr(qp, qp->s_last);
+ ipath_send_complete(qp, wqe, IB_WC_WR_FLUSH_ERR);
+ goto done;
+ }
if (qp->s_cur == qp->s_head)
goto bail;
wqe = get_swqe_ptr(qp, qp->s_cur);
+ if (++qp->s_cur >= qp->s_size)
+ qp->s_cur = 0;
/* Construct the header. */
ah_attr = &to_iah(wqe->wr.wr.ud.ah)->attr;
@@ -288,10 +302,23 @@ int ipath_make_ud_req(struct ipath_qp *qp)
dev->n_unicast_xmit++;
} else {
dev->n_unicast_xmit++;
- lid = ah_attr->dlid &
- ~((1 << dev->dd->ipath_lmc) - 1);
+ lid = ah_attr->dlid & ~((1 << dev->dd->ipath_lmc) - 1);
if (unlikely(lid == dev->dd->ipath_lid)) {
+ /*
+ * If DMAs are in progress, we can't generate
+ * a completion for the loopback packet since
+ * it would be out of order.
+ * XXX Instead of waiting, we could queue a
+ * zero length descriptor so we get a callback.
+ */
+ if (atomic_read(&qp->s_dma_busy)) {
+ qp->s_flags |= IPATH_S_WAIT_DMA;
+ goto bail;
+ }
+ spin_unlock_irqrestore(&qp->s_lock, flags);
ipath_ud_loopback(qp, wqe);
+ spin_lock_irqsave(&qp->s_lock, flags);
+ ipath_send_complete(qp, wqe, IB_WC_SUCCESS);
goto done;
}
}
@@ -368,11 +395,13 @@ int ipath_make_ud_req(struct ipath_qp *qp)
ohdr->u.ud.deth[1] = cpu_to_be32(qp->ibqp.qp_num);
done:
- if (++qp->s_cur >= qp->s_size)
- qp->s_cur = 0;
ret = 1;
+ goto unlock;
bail:
+ qp->s_flags &= ~IPATH_S_BUSY;
+unlock:
+ spin_unlock_irqrestore(&qp->s_lock, flags);
return ret;
}
@@ -506,8 +535,8 @@ void ipath_ud_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
/*
* Get the next work request entry to find where to put the data.
*/
- if (qp->r_reuse_sge)
- qp->r_reuse_sge = 0;
+ if (qp->r_flags & IPATH_R_REUSE_SGE)
+ qp->r_flags &= ~IPATH_R_REUSE_SGE;
else if (!ipath_get_rwqe(qp, 0)) {
/*
* Count VL15 packets dropped due to no receive buffer.
@@ -523,7 +552,7 @@ void ipath_ud_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
}
/* Silently drop packets which are too big. */
if (wc.byte_len > qp->r_len) {
- qp->r_reuse_sge = 1;
+ qp->r_flags |= IPATH_R_REUSE_SGE;
dev->n_pkt_drops++;
goto bail;
}
@@ -535,7 +564,8 @@ void ipath_ud_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
ipath_skip_sge(&qp->r_sge, sizeof(struct ib_grh));
ipath_copy_sge(&qp->r_sge, data,
wc.byte_len - sizeof(struct ib_grh));
- qp->r_wrid_valid = 0;
+ if (!test_and_clear_bit(IPATH_R_WRID_VALID, &qp->r_aflags))
+ goto bail;
wc.wr_id = qp->r_wr_id;
wc.status = IB_WC_SUCCESS;
wc.opcode = IB_WC_RECV;
diff --git a/drivers/infiniband/hw/ipath/ipath_user_sdma.h b/drivers/infiniband/hw/ipath/ipath_user_sdma.h
index e70946c1428..fc76316c4a5 100644
--- a/drivers/infiniband/hw/ipath/ipath_user_sdma.h
+++ b/drivers/infiniband/hw/ipath/ipath_user_sdma.h
@@ -45,8 +45,6 @@ int ipath_user_sdma_writev(struct ipath_devdata *dd,
int ipath_user_sdma_make_progress(struct ipath_devdata *dd,
struct ipath_user_sdma_queue *pq);
-int ipath_user_sdma_pkt_sent(const struct ipath_user_sdma_queue *pq,
- u32 counter);
void ipath_user_sdma_queue_drain(struct ipath_devdata *dd,
struct ipath_user_sdma_queue *pq);
diff --git a/drivers/infiniband/hw/ipath/ipath_verbs.c b/drivers/infiniband/hw/ipath/ipath_verbs.c
index e63927cce5b..e0ec540042b 100644
--- a/drivers/infiniband/hw/ipath/ipath_verbs.c
+++ b/drivers/infiniband/hw/ipath/ipath_verbs.c
@@ -111,16 +111,24 @@ static unsigned int ib_ipath_disable_sma;
module_param_named(disable_sma, ib_ipath_disable_sma, uint, S_IWUSR | S_IRUGO);
MODULE_PARM_DESC(disable_sma, "Disable the SMA");
+/*
+ * Note that it is OK to post send work requests in the SQE and ERR
+ * states; ipath_do_send() will process them and generate error
+ * completions as per IB 1.2 C10-96.
+ */
const int ib_ipath_state_ops[IB_QPS_ERR + 1] = {
[IB_QPS_RESET] = 0,
[IB_QPS_INIT] = IPATH_POST_RECV_OK,
[IB_QPS_RTR] = IPATH_POST_RECV_OK | IPATH_PROCESS_RECV_OK,
[IB_QPS_RTS] = IPATH_POST_RECV_OK | IPATH_PROCESS_RECV_OK |
- IPATH_POST_SEND_OK | IPATH_PROCESS_SEND_OK,
+ IPATH_POST_SEND_OK | IPATH_PROCESS_SEND_OK |
+ IPATH_PROCESS_NEXT_SEND_OK,
[IB_QPS_SQD] = IPATH_POST_RECV_OK | IPATH_PROCESS_RECV_OK |
- IPATH_POST_SEND_OK,
- [IB_QPS_SQE] = IPATH_POST_RECV_OK | IPATH_PROCESS_RECV_OK,
- [IB_QPS_ERR] = 0,
+ IPATH_POST_SEND_OK | IPATH_PROCESS_SEND_OK,
+ [IB_QPS_SQE] = IPATH_POST_RECV_OK | IPATH_PROCESS_RECV_OK |
+ IPATH_POST_SEND_OK | IPATH_FLUSH_SEND,
+ [IB_QPS_ERR] = IPATH_POST_RECV_OK | IPATH_FLUSH_RECV |
+ IPATH_POST_SEND_OK | IPATH_FLUSH_SEND,
};
struct ipath_ucontext {
@@ -230,18 +238,6 @@ void ipath_skip_sge(struct ipath_sge_state *ss, u32 length)
}
}
-static void ipath_flush_wqe(struct ipath_qp *qp, struct ib_send_wr *wr)
-{
- struct ib_wc wc;
-
- memset(&wc, 0, sizeof(wc));
- wc.wr_id = wr->wr_id;
- wc.status = IB_WC_WR_FLUSH_ERR;
- wc.opcode = ib_ipath_wc_opcode[wr->opcode];
- wc.qp = &qp->ibqp;
- ipath_cq_enter(to_icq(qp->ibqp.send_cq), &wc, 1);
-}
-
/*
* Count the number of DMA descriptors needed to send length bytes of data.
* Don't modify the ipath_sge_state to get the count.
@@ -347,14 +343,8 @@ static int ipath_post_one_send(struct ipath_qp *qp, struct ib_send_wr *wr)
spin_lock_irqsave(&qp->s_lock, flags);
/* Check that state is OK to post send. */
- if (unlikely(!(ib_ipath_state_ops[qp->state] & IPATH_POST_SEND_OK))) {
- if (qp->state != IB_QPS_SQE && qp->state != IB_QPS_ERR)
- goto bail_inval;
- /* C10-96 says generate a flushed completion entry. */
- ipath_flush_wqe(qp, wr);
- ret = 0;
- goto bail;
- }
+ if (unlikely(!(ib_ipath_state_ops[qp->state] & IPATH_POST_SEND_OK)))
+ goto bail_inval;
/* IB spec says that num_sge == 0 is OK. */
if (wr->num_sge > qp->s_max_sge)
@@ -396,7 +386,6 @@ static int ipath_post_one_send(struct ipath_qp *qp, struct ib_send_wr *wr)
wqe = get_swqe_ptr(qp, qp->s_head);
wqe->wr = *wr;
- wqe->ssn = qp->s_ssn++;
wqe->length = 0;
if (wr->num_sge) {
acc = wr->opcode >= IB_WR_RDMA_READ ?
@@ -422,6 +411,7 @@ static int ipath_post_one_send(struct ipath_qp *qp, struct ib_send_wr *wr)
goto bail_inval;
} else if (wqe->length > to_idev(qp->ibqp.device)->dd->ipath_ibmtu)
goto bail_inval;
+ wqe->ssn = qp->s_ssn++;
qp->s_head = next;
ret = 0;
@@ -677,6 +667,7 @@ bail:;
static void ipath_ib_timer(struct ipath_ibdev *dev)
{
struct ipath_qp *resend = NULL;
+ struct ipath_qp *rnr = NULL;
struct list_head *last;
struct ipath_qp *qp;
unsigned long flags;
@@ -703,7 +694,9 @@ static void ipath_ib_timer(struct ipath_ibdev *dev)
if (--qp->s_rnr_timeout == 0) {
do {
list_del_init(&qp->timerwait);
- tasklet_hi_schedule(&qp->s_task);
+ qp->timer_next = rnr;
+ rnr = qp;
+ atomic_inc(&qp->refcount);
if (list_empty(last))
break;
qp = list_entry(last->next, struct ipath_qp,
@@ -743,13 +736,15 @@ static void ipath_ib_timer(struct ipath_ibdev *dev)
spin_unlock_irqrestore(&dev->pending_lock, flags);
/* XXX What if timer fires again while this is running? */
- for (qp = resend; qp != NULL; qp = qp->timer_next) {
- struct ib_wc wc;
+ while (resend != NULL) {
+ qp = resend;
+ resend = qp->timer_next;
spin_lock_irqsave(&qp->s_lock, flags);
- if (qp->s_last != qp->s_tail && qp->state == IB_QPS_RTS) {
+ if (qp->s_last != qp->s_tail &&
+ ib_ipath_state_ops[qp->state] & IPATH_PROCESS_SEND_OK) {
dev->n_timeouts++;
- ipath_restart_rc(qp, qp->s_last_psn + 1, &wc);
+ ipath_restart_rc(qp, qp->s_last_psn + 1);
}
spin_unlock_irqrestore(&qp->s_lock, flags);
@@ -757,6 +752,19 @@ static void ipath_ib_timer(struct ipath_ibdev *dev)
if (atomic_dec_and_test(&qp->refcount))
wake_up(&qp->wait);
}
+ while (rnr != NULL) {
+ qp = rnr;
+ rnr = qp->timer_next;
+
+ spin_lock_irqsave(&qp->s_lock, flags);
+ if (ib_ipath_state_ops[qp->state] & IPATH_PROCESS_SEND_OK)
+ ipath_schedule_send(qp);
+ spin_unlock_irqrestore(&qp->s_lock, flags);
+
+ /* Notify ipath_destroy_qp() if it is waiting. */
+ if (atomic_dec_and_test(&qp->refcount))
+ wake_up(&qp->wait);
+ }
}
static void update_sge(struct ipath_sge_state *ss, u32 length)
@@ -1012,13 +1020,24 @@ static void sdma_complete(void *cookie, int status)
struct ipath_verbs_txreq *tx = cookie;
struct ipath_qp *qp = tx->qp;
struct ipath_ibdev *dev = to_idev(qp->ibqp.device);
+ unsigned int flags;
+ enum ib_wc_status ibs = status == IPATH_SDMA_TXREQ_S_OK ?
+ IB_WC_SUCCESS : IB_WC_WR_FLUSH_ERR;
- /* Generate a completion queue entry if needed */
- if (qp->ibqp.qp_type != IB_QPT_RC && tx->wqe) {
- enum ib_wc_status ibs = status == IPATH_SDMA_TXREQ_S_OK ?
- IB_WC_SUCCESS : IB_WC_WR_FLUSH_ERR;
-
+ if (atomic_dec_and_test(&qp->s_dma_busy)) {
+ spin_lock_irqsave(&qp->s_lock, flags);
+ if (tx->wqe)
+ ipath_send_complete(qp, tx->wqe, ibs);
+ if ((ib_ipath_state_ops[qp->state] & IPATH_FLUSH_SEND &&
+ qp->s_last != qp->s_head) ||
+ (qp->s_flags & IPATH_S_WAIT_DMA))
+ ipath_schedule_send(qp);
+ spin_unlock_irqrestore(&qp->s_lock, flags);
+ wake_up(&qp->wait_dma);
+ } else if (tx->wqe) {
+ spin_lock_irqsave(&qp->s_lock, flags);
ipath_send_complete(qp, tx->wqe, ibs);
+ spin_unlock_irqrestore(&qp->s_lock, flags);
}
if (tx->txreq.flags & IPATH_SDMA_TXREQ_F_FREEBUF)
@@ -1029,6 +1048,21 @@ static void sdma_complete(void *cookie, int status)
wake_up(&qp->wait);
}
+static void decrement_dma_busy(struct ipath_qp *qp)
+{
+ unsigned int flags;
+
+ if (atomic_dec_and_test(&qp->s_dma_busy)) {
+ spin_lock_irqsave(&qp->s_lock, flags);
+ if ((ib_ipath_state_ops[qp->state] & IPATH_FLUSH_SEND &&
+ qp->s_last != qp->s_head) ||
+ (qp->s_flags & IPATH_S_WAIT_DMA))
+ ipath_schedule_send(qp);
+ spin_unlock_irqrestore(&qp->s_lock, flags);
+ wake_up(&qp->wait_dma);
+ }
+}
+
/*
* Compute the number of clock cycles of delay before sending the next packet.
* The multipliers reflect the number of clocks for the fastest rate so
@@ -1067,9 +1101,12 @@ static int ipath_verbs_send_dma(struct ipath_qp *qp,
if (tx) {
qp->s_tx = NULL;
/* resend previously constructed packet */
+ atomic_inc(&qp->s_dma_busy);
ret = ipath_sdma_verbs_send(dd, tx->ss, tx->len, tx);
- if (ret)
+ if (ret) {
qp->s_tx = tx;
+ decrement_dma_busy(qp);
+ }
goto bail;
}
@@ -1120,12 +1157,14 @@ static int ipath_verbs_send_dma(struct ipath_qp *qp,
tx->txreq.sg_count = ndesc;
tx->map_len = (hdrwords + 2) << 2;
tx->txreq.map_addr = &tx->hdr;
+ atomic_inc(&qp->s_dma_busy);
ret = ipath_sdma_verbs_send(dd, ss, dwords, tx);
if (ret) {
/* save ss and length in dwords */
tx->ss = ss;
tx->len = dwords;
qp->s_tx = tx;
+ decrement_dma_busy(qp);
}
goto bail;
}
@@ -1146,6 +1185,7 @@ static int ipath_verbs_send_dma(struct ipath_qp *qp,
memcpy(piobuf, hdr, hdrwords << 2);
ipath_copy_from_sge(piobuf + hdrwords, ss, len);
+ atomic_inc(&qp->s_dma_busy);
ret = ipath_sdma_verbs_send(dd, NULL, 0, tx);
/*
* If we couldn't queue the DMA request, save the info
@@ -1156,6 +1196,7 @@ static int ipath_verbs_send_dma(struct ipath_qp *qp,
tx->ss = NULL;
tx->len = 0;
qp->s_tx = tx;
+ decrement_dma_busy(qp);
}
dev->n_unaligned++;
goto bail;
@@ -1179,6 +1220,7 @@ static int ipath_verbs_send_pio(struct ipath_qp *qp,
unsigned flush_wc;
u32 control;
int ret;
+ unsigned int flags;
piobuf = ipath_getpiobuf(dd, plen, NULL);
if (unlikely(piobuf == NULL)) {
@@ -1249,8 +1291,11 @@ static int ipath_verbs_send_pio(struct ipath_qp *qp,
}
copy_io(piobuf, ss, len, flush_wc);
done:
- if (qp->s_wqe)
+ if (qp->s_wqe) {
+ spin_lock_irqsave(&qp->s_lock, flags);
ipath_send_complete(qp, qp->s_wqe, IB_WC_SUCCESS);
+ spin_unlock_irqrestore(&qp->s_lock, flags);
+ }
ret = 0;
bail:
return ret;
@@ -1283,19 +1328,12 @@ int ipath_verbs_send(struct ipath_qp *qp, struct ipath_ib_header *hdr,
* can defer SDMA restart until link goes ACTIVE without
* worrying about just how we got there.
*/
- if (qp->ibqp.qp_type == IB_QPT_SMI)
+ if (qp->ibqp.qp_type == IB_QPT_SMI ||
+ !(dd->ipath_flags & IPATH_HAS_SEND_DMA))
ret = ipath_verbs_send_pio(qp, hdr, hdrwords, ss, len,
plen, dwords);
- /* All non-VL15 packets are dropped if link is not ACTIVE */
- else if (!(dd->ipath_flags & IPATH_LINKACTIVE)) {
- if (qp->s_wqe)
- ipath_send_complete(qp, qp->s_wqe, IB_WC_SUCCESS);
- ret = 0;
- } else if (dd->ipath_flags & IPATH_HAS_SEND_DMA)
- ret = ipath_verbs_send_dma(qp, hdr, hdrwords, ss, len,
- plen, dwords);
else
- ret = ipath_verbs_send_pio(qp, hdr, hdrwords, ss, len,
+ ret = ipath_verbs_send_dma(qp, hdr, hdrwords, ss, len,
plen, dwords);
return ret;
@@ -1403,27 +1441,46 @@ bail:
* This is called from ipath_intr() at interrupt level when a PIO buffer is
* available after ipath_verbs_send() returned an error that no buffers were
* available. Return 1 if we consumed all the PIO buffers and we still have
- * QPs waiting for buffers (for now, just do a tasklet_hi_schedule and
+ * QPs waiting for buffers (for now, just restart the send tasklet and
* return zero).
*/
int ipath_ib_piobufavail(struct ipath_ibdev *dev)
{
+ struct list_head *list;
+ struct ipath_qp *qplist;
struct ipath_qp *qp;
unsigned long flags;
if (dev == NULL)
goto bail;
+ list = &dev->piowait;
+ qplist = NULL;
+
spin_lock_irqsave(&dev->pending_lock, flags);
- while (!list_empty(&dev->piowait)) {
- qp = list_entry(dev->piowait.next, struct ipath_qp,
- piowait);
+ while (!list_empty(list)) {
+ qp = list_entry(list->next, struct ipath_qp, piowait);
list_del_init(&qp->piowait);
- clear_bit(IPATH_S_BUSY, &qp->s_busy);
- tasklet_hi_schedule(&qp->s_task);
+ qp->pio_next = qplist;
+ qplist = qp;
+ atomic_inc(&qp->refcount);
}
spin_unlock_irqrestore(&dev->pending_lock, flags);
+ while (qplist != NULL) {
+ qp = qplist;
+ qplist = qp->pio_next;
+
+ spin_lock_irqsave(&qp->s_lock, flags);
+ if (ib_ipath_state_ops[qp->state] & IPATH_PROCESS_SEND_OK)
+ ipath_schedule_send(qp);
+ spin_unlock_irqrestore(&qp->s_lock, flags);
+
+ /* Notify ipath_destroy_qp() if it is waiting. */
+ if (atomic_dec_and_test(&qp->refcount))
+ wake_up(&qp->wait);
+ }
+
bail:
return 0;
}
@@ -2145,11 +2202,12 @@ bail:
void ipath_unregister_ib_device(struct ipath_ibdev *dev)
{
struct ib_device *ibdev = &dev->ibdev;
-
- disable_timer(dev->dd);
+ u32 qps_inuse;
ib_unregister_device(ibdev);
+ disable_timer(dev->dd);
+
if (!list_empty(&dev->pending[0]) ||
!list_empty(&dev->pending[1]) ||
!list_empty(&dev->pending[2]))
@@ -2164,7 +2222,10 @@ void ipath_unregister_ib_device(struct ipath_ibdev *dev)
* Note that ipath_unregister_ib_device() can be called before all
* the QPs are destroyed!
*/
- ipath_free_all_qps(&dev->qp_table);
+ qps_inuse = ipath_free_all_qps(&dev->qp_table);
+ if (qps_inuse)
+ ipath_dev_err(dev->dd, "QP memory leak! %u still in use\n",
+ qps_inuse);
kfree(dev->qp_table.table);
kfree(dev->lk_table.table);
kfree(dev->txreq_bufs);
@@ -2215,17 +2276,14 @@ static ssize_t show_stats(struct device *device, struct device_attribute *attr,
"RC OTH NAKs %d\n"
"RC timeouts %d\n"
"RC RDMA dup %d\n"
- "RC stalls %d\n"
"piobuf wait %d\n"
- "no piobuf %d\n"
"unaligned %d\n"
"PKT drops %d\n"
"WQE errs %d\n",
dev->n_rc_resends, dev->n_rc_qacks, dev->n_rc_acks,
dev->n_seq_naks, dev->n_rdma_seq, dev->n_rnr_naks,
dev->n_other_naks, dev->n_timeouts,
- dev->n_rdma_dup_busy, dev->n_rc_stalls, dev->n_piowait,
- dev->n_no_piobuf, dev->n_unaligned,
+ dev->n_rdma_dup_busy, dev->n_piowait, dev->n_unaligned,
dev->n_pkt_drops, dev->n_wqe_errs);
for (i = 0; i < ARRAY_SIZE(dev->opstats); i++) {
const struct ipath_opcode_stats *si = &dev->opstats[i];
diff --git a/drivers/infiniband/hw/ipath/ipath_verbs.h b/drivers/infiniband/hw/ipath/ipath_verbs.h
index 6514aa8306c..9d12ae8a778 100644
--- a/drivers/infiniband/hw/ipath/ipath_verbs.h
+++ b/drivers/infiniband/hw/ipath/ipath_verbs.h
@@ -74,6 +74,11 @@
#define IPATH_POST_RECV_OK 0x02
#define IPATH_PROCESS_RECV_OK 0x04
#define IPATH_PROCESS_SEND_OK 0x08
+#define IPATH_PROCESS_NEXT_SEND_OK 0x10
+#define IPATH_FLUSH_SEND 0x20
+#define IPATH_FLUSH_RECV 0x40
+#define IPATH_PROCESS_OR_FLUSH_SEND \
+ (IPATH_PROCESS_SEND_OK | IPATH_FLUSH_SEND)
/* IB Performance Manager status values */
#define IB_PMA_SAMPLE_STATUS_DONE 0x00
@@ -353,12 +358,14 @@ struct ipath_qp {
struct ib_qp ibqp;
struct ipath_qp *next; /* link list for QPN hash table */
struct ipath_qp *timer_next; /* link list for ipath_ib_timer() */
+ struct ipath_qp *pio_next; /* link for ipath_ib_piobufavail() */
struct list_head piowait; /* link for wait PIO buf */
struct list_head timerwait; /* link for waiting for timeouts */
struct ib_ah_attr remote_ah_attr;
struct ipath_ib_header s_hdr; /* next packet header to send */
atomic_t refcount;
wait_queue_head_t wait;
+ wait_queue_head_t wait_dma;
struct tasklet_struct s_task;
struct ipath_mmap_info *ip;
struct ipath_sge_state *s_cur_sge;
@@ -369,7 +376,7 @@ struct ipath_qp {
struct ipath_sge_state s_rdma_read_sge;
struct ipath_sge_state r_sge; /* current receive data */
spinlock_t s_lock;
- unsigned long s_busy;
+ atomic_t s_dma_busy;
u16 s_pkt_delay;
u16 s_hdrwords; /* size of s_hdr in 32 bit words */
u32 s_cur_size; /* size of send packet in bytes */
@@ -383,6 +390,7 @@ struct ipath_qp {
u32 s_rnr_timeout; /* number of milliseconds for RNR timeout */
u32 r_ack_psn; /* PSN for next ACK or atomic ACK */
u64 r_wr_id; /* ID for current receive WQE */
+ unsigned long r_aflags;
u32 r_len; /* total length of r_sge */
u32 r_rcv_len; /* receive data len processed */
u32 r_psn; /* expected rcv packet sequence number */
@@ -394,8 +402,7 @@ struct ipath_qp {
u8 r_state; /* opcode of last packet received */
u8 r_nak_state; /* non-zero if NAK is pending */
u8 r_min_rnr_timer; /* retry timeout value for RNR NAKs */
- u8 r_reuse_sge; /* for UC receive errors */
- u8 r_wrid_valid; /* r_wrid set but CQ entry not yet made */
+ u8 r_flags;
u8 r_max_rd_atomic; /* max number of RDMA read/atomic to receive */
u8 r_head_ack_queue; /* index into s_ack_queue[] */
u8 qp_access_flags;
@@ -404,13 +411,13 @@ struct ipath_qp {
u8 s_rnr_retry_cnt;
u8 s_retry; /* requester retry counter */
u8 s_rnr_retry; /* requester RNR retry counter */
- u8 s_wait_credit; /* limit number of unacked packets sent */
u8 s_pkey_index; /* PKEY index to use */
u8 s_max_rd_atomic; /* max number of RDMA read/atomic to send */
u8 s_num_rd_atomic; /* number of RDMA read/atomic pending */
u8 s_tail_ack_queue; /* index into s_ack_queue[] */
u8 s_flags;
u8 s_dmult;
+ u8 s_draining;
u8 timeout; /* Timeout for this QP */
enum ib_mtu path_mtu;
u32 remote_qpn;
@@ -428,16 +435,40 @@ struct ipath_qp {
struct ipath_sge r_sg_list[0]; /* verified SGEs */
};
-/* Bit definition for s_busy. */
-#define IPATH_S_BUSY 0
+/*
+ * Atomic bit definitions for r_aflags.
+ */
+#define IPATH_R_WRID_VALID 0
+
+/*
+ * Bit definitions for r_flags.
+ */
+#define IPATH_R_REUSE_SGE 0x01
+#define IPATH_R_RDMAR_SEQ 0x02
/*
* Bit definitions for s_flags.
+ *
+ * IPATH_S_FENCE_PENDING - waiting for all prior RDMA read or atomic SWQEs
+ * before processing the next SWQE
+ * IPATH_S_RDMAR_PENDING - waiting for any RDMA read or atomic SWQEs
+ * before processing the next SWQE
+ * IPATH_S_WAITING - waiting for RNR timeout or send buffer available.
+ * IPATH_S_WAIT_SSN_CREDIT - waiting for RC credits to process next SWQE
+ * IPATH_S_WAIT_DMA - waiting for send DMA queue to drain before generating
+ * next send completion entry not via send DMA.
*/
#define IPATH_S_SIGNAL_REQ_WR 0x01
#define IPATH_S_FENCE_PENDING 0x02
#define IPATH_S_RDMAR_PENDING 0x04
#define IPATH_S_ACK_PENDING 0x08
+#define IPATH_S_BUSY 0x10
+#define IPATH_S_WAITING 0x20
+#define IPATH_S_WAIT_SSN_CREDIT 0x40
+#define IPATH_S_WAIT_DMA 0x80
+
+#define IPATH_S_ANY_WAIT (IPATH_S_FENCE_PENDING | IPATH_S_RDMAR_PENDING | \
+ IPATH_S_WAITING | IPATH_S_WAIT_SSN_CREDIT | IPATH_S_WAIT_DMA)
#define IPATH_PSN_CREDIT 512
@@ -573,13 +604,11 @@ struct ipath_ibdev {
u32 n_rnr_naks;
u32 n_other_naks;
u32 n_timeouts;
- u32 n_rc_stalls;
u32 n_pkt_drops;
u32 n_vl15_dropped;
u32 n_wqe_errs;
u32 n_rdma_dup_busy;
u32 n_piowait;
- u32 n_no_piobuf;
u32 n_unaligned;
u32 port_cap_flags;
u32 pma_sample_start;
@@ -657,6 +686,17 @@ static inline struct ipath_ibdev *to_idev(struct ib_device *ibdev)
return container_of(ibdev, struct ipath_ibdev, ibdev);
}
+/*
+ * This must be called with s_lock held.
+ */
+static inline void ipath_schedule_send(struct ipath_qp *qp)
+{
+ if (qp->s_flags & IPATH_S_ANY_WAIT)
+ qp->s_flags &= ~IPATH_S_ANY_WAIT;
+ if (!(qp->s_flags & IPATH_S_BUSY))
+ tasklet_hi_schedule(&qp->s_task);
+}
+
int ipath_process_mad(struct ib_device *ibdev,
int mad_flags,
u8 port_num,
@@ -706,12 +746,10 @@ int ipath_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
int ipath_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
int attr_mask, struct ib_qp_init_attr *init_attr);
-void ipath_free_all_qps(struct ipath_qp_table *qpt);
+unsigned ipath_free_all_qps(struct ipath_qp_table *qpt);
int ipath_init_qp_table(struct ipath_ibdev *idev, int size);
-void ipath_sqerror_qp(struct ipath_qp *qp, struct ib_wc *wc);
-
void ipath_get_credit(struct ipath_qp *qp, u32 aeth);
unsigned ipath_ib_rate_to_mult(enum ib_rate rate);
@@ -729,7 +767,9 @@ void ipath_uc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
int has_grh, void *data, u32 tlen, struct ipath_qp *qp);
-void ipath_restart_rc(struct ipath_qp *qp, u32 psn, struct ib_wc *wc);
+void ipath_restart_rc(struct ipath_qp *qp, u32 psn);
+
+void ipath_rc_error(struct ipath_qp *qp, enum ib_wc_status err);
int ipath_post_ud_send(struct ipath_qp *qp, struct ib_send_wr *wr);
diff --git a/drivers/infiniband/hw/mlx4/cq.c b/drivers/infiniband/hw/mlx4/cq.c
index 5e570bb0bb6..4521319b140 100644
--- a/drivers/infiniband/hw/mlx4/cq.c
+++ b/drivers/infiniband/hw/mlx4/cq.c
@@ -137,7 +137,7 @@ static int mlx4_ib_get_cq_umem(struct mlx4_ib_dev *dev, struct ib_ucontext *cont
int err;
*umem = ib_umem_get(context, buf_addr, cqe * sizeof (struct mlx4_cqe),
- IB_ACCESS_LOCAL_WRITE);
+ IB_ACCESS_LOCAL_WRITE, 1);
if (IS_ERR(*umem))
return PTR_ERR(*umem);
@@ -221,7 +221,7 @@ struct ib_cq *mlx4_ib_create_cq(struct ib_device *ibdev, int entries, int vector
}
err = mlx4_cq_alloc(dev->dev, entries, &cq->buf.mtt, uar,
- cq->db.dma, &cq->mcq);
+ cq->db.dma, &cq->mcq, 0);
if (err)
goto err_dbmap;
@@ -246,7 +246,7 @@ err_mtt:
if (context)
ib_umem_release(cq->umem);
else
- mlx4_ib_free_cq_buf(dev, &cq->buf, entries);
+ mlx4_ib_free_cq_buf(dev, &cq->buf, cq->ibcq.cqe);
err_db:
if (!context)
@@ -434,7 +434,7 @@ int mlx4_ib_destroy_cq(struct ib_cq *cq)
mlx4_ib_db_unmap_user(to_mucontext(cq->uobject->context), &mcq->db);
ib_umem_release(mcq->umem);
} else {
- mlx4_ib_free_cq_buf(dev, &mcq->buf, cq->cqe + 1);
+ mlx4_ib_free_cq_buf(dev, &mcq->buf, cq->cqe);
mlx4_db_free(dev->dev, &mcq->db);
}
diff --git a/drivers/infiniband/hw/mlx4/doorbell.c b/drivers/infiniband/hw/mlx4/doorbell.c
index 8e342cc9bae..8aee4233b38 100644
--- a/drivers/infiniband/hw/mlx4/doorbell.c
+++ b/drivers/infiniband/hw/mlx4/doorbell.c
@@ -63,7 +63,7 @@ int mlx4_ib_db_map_user(struct mlx4_ib_ucontext *context, unsigned long virt,
page->user_virt = (virt & PAGE_MASK);
page->refcnt = 0;
page->umem = ib_umem_get(&context->ibucontext, virt & PAGE_MASK,
- PAGE_SIZE, 0);
+ PAGE_SIZE, 0, 0);
if (IS_ERR(page->umem)) {
err = PTR_ERR(page->umem);
kfree(page);
diff --git a/drivers/infiniband/hw/mlx4/mr.c b/drivers/infiniband/hw/mlx4/mr.c
index fe2c2e94a5f..68e92485fc7 100644
--- a/drivers/infiniband/hw/mlx4/mr.c
+++ b/drivers/infiniband/hw/mlx4/mr.c
@@ -132,7 +132,8 @@ struct ib_mr *mlx4_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
if (!mr)
return ERR_PTR(-ENOMEM);
- mr->umem = ib_umem_get(pd->uobject->context, start, length, access_flags);
+ mr->umem = ib_umem_get(pd->uobject->context, start, length,
+ access_flags, 0);
if (IS_ERR(mr->umem)) {
err = PTR_ERR(mr->umem);
goto err_free;
diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c
index 80ea8b9e776..8e02ecfec18 100644
--- a/drivers/infiniband/hw/mlx4/qp.c
+++ b/drivers/infiniband/hw/mlx4/qp.c
@@ -482,7 +482,7 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
goto err;
qp->umem = ib_umem_get(pd->uobject->context, ucmd.buf_addr,
- qp->buf_size, 0);
+ qp->buf_size, 0, 0);
if (IS_ERR(qp->umem)) {
err = PTR_ERR(qp->umem);
goto err;
diff --git a/drivers/infiniband/hw/mlx4/srq.c b/drivers/infiniband/hw/mlx4/srq.c
index 204619702f9..12d6bc6f800 100644
--- a/drivers/infiniband/hw/mlx4/srq.c
+++ b/drivers/infiniband/hw/mlx4/srq.c
@@ -109,7 +109,7 @@ struct ib_srq *mlx4_ib_create_srq(struct ib_pd *pd,
}
srq->umem = ib_umem_get(pd->uobject->context, ucmd.buf_addr,
- buf_size, 0);
+ buf_size, 0, 0);
if (IS_ERR(srq->umem)) {
err = PTR_ERR(srq->umem);
goto err_srq;
diff --git a/drivers/infiniband/hw/mthca/mthca_mr.c b/drivers/infiniband/hw/mthca/mthca_mr.c
index 3538da16e3f..820205dec56 100644
--- a/drivers/infiniband/hw/mthca/mthca_mr.c
+++ b/drivers/infiniband/hw/mthca/mthca_mr.c
@@ -818,15 +818,9 @@ int mthca_arbel_map_phys_fmr(struct ib_fmr *ibfmr, u64 *page_list,
void mthca_tavor_fmr_unmap(struct mthca_dev *dev, struct mthca_fmr *fmr)
{
- u32 key;
-
if (!fmr->maps)
return;
- key = tavor_key_to_hw_index(fmr->ibmr.lkey);
- key &= dev->limits.num_mpts - 1;
- fmr->ibmr.lkey = fmr->ibmr.rkey = tavor_hw_index_to_key(key);
-
fmr->maps = 0;
writeb(MTHCA_MPT_STATUS_SW, fmr->mem.tavor.mpt);
@@ -834,16 +828,9 @@ void mthca_tavor_fmr_unmap(struct mthca_dev *dev, struct mthca_fmr *fmr)
void mthca_arbel_fmr_unmap(struct mthca_dev *dev, struct mthca_fmr *fmr)
{
- u32 key;
-
if (!fmr->maps)
return;
- key = arbel_key_to_hw_index(fmr->ibmr.lkey);
- key &= dev->limits.num_mpts - 1;
- key = adjust_key(dev, key);
- fmr->ibmr.lkey = fmr->ibmr.rkey = arbel_hw_index_to_key(key);
-
fmr->maps = 0;
*(u8 *) fmr->mem.arbel.mpt = MTHCA_MPT_STATUS_SW;
diff --git a/drivers/infiniband/hw/mthca/mthca_provider.c b/drivers/infiniband/hw/mthca/mthca_provider.c
index 696e1f30233..be34f99ca62 100644
--- a/drivers/infiniband/hw/mthca/mthca_provider.c
+++ b/drivers/infiniband/hw/mthca/mthca_provider.c
@@ -39,6 +39,8 @@
#include <rdma/ib_smi.h>
#include <rdma/ib_umem.h>
#include <rdma/ib_user_verbs.h>
+
+#include <linux/sched.h>
#include <linux/mm.h>
#include "mthca_dev.h"
@@ -367,6 +369,8 @@ static struct ib_ucontext *mthca_alloc_ucontext(struct ib_device *ibdev,
return ERR_PTR(-EFAULT);
}
+ context->reg_mr_warned = 0;
+
return &context->ibucontext;
}
@@ -1006,17 +1010,31 @@ static struct ib_mr *mthca_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
struct mthca_dev *dev = to_mdev(pd->device);
struct ib_umem_chunk *chunk;
struct mthca_mr *mr;
+ struct mthca_reg_mr ucmd;
u64 *pages;
int shift, n, len;
int i, j, k;
int err = 0;
int write_mtt_size;
+ if (udata->inlen - sizeof (struct ib_uverbs_cmd_hdr) < sizeof ucmd) {
+ if (!to_mucontext(pd->uobject->context)->reg_mr_warned) {
+ mthca_warn(dev, "Process '%s' did not pass in MR attrs.\n",
+ current->comm);
+ mthca_warn(dev, " Update libmthca to fix this.\n");
+ }
+ ++to_mucontext(pd->uobject->context)->reg_mr_warned;
+ ucmd.mr_attrs = 0;
+ } else if (ib_copy_from_udata(&ucmd, udata, sizeof ucmd))
+ return ERR_PTR(-EFAULT);
+
mr = kmalloc(sizeof *mr, GFP_KERNEL);
if (!mr)
return ERR_PTR(-ENOMEM);
- mr->umem = ib_umem_get(pd->uobject->context, start, length, acc);
+ mr->umem = ib_umem_get(pd->uobject->context, start, length, acc,
+ ucmd.mr_attrs & MTHCA_MR_DMASYNC);
+
if (IS_ERR(mr->umem)) {
err = PTR_ERR(mr->umem);
goto err;
diff --git a/drivers/infiniband/hw/mthca/mthca_provider.h b/drivers/infiniband/hw/mthca/mthca_provider.h
index 262616c8ebb..934bf954403 100644
--- a/drivers/infiniband/hw/mthca/mthca_provider.h
+++ b/drivers/infiniband/hw/mthca/mthca_provider.h
@@ -67,6 +67,7 @@ struct mthca_ucontext {
struct ib_ucontext ibucontext;
struct mthca_uar uar;
struct mthca_user_db_table *db_tab;
+ int reg_mr_warned;
};
struct mthca_mtt;
diff --git a/drivers/infiniband/hw/mthca/mthca_user.h b/drivers/infiniband/hw/mthca/mthca_user.h
index 02cc0a766f3..e1262c942db 100644
--- a/drivers/infiniband/hw/mthca/mthca_user.h
+++ b/drivers/infiniband/hw/mthca/mthca_user.h
@@ -61,6 +61,16 @@ struct mthca_alloc_pd_resp {
__u32 reserved;
};
+struct mthca_reg_mr {
+/*
+ * Mark the memory region with a DMA attribute that causes
+ * in-flight DMA to be flushed when the region is written to:
+ */
+#define MTHCA_MR_DMASYNC 0x1
+ __u32 mr_attrs;
+ __u32 reserved;
+};
+
struct mthca_create_cq {
__u32 lkey;
__u32 pdn;
diff --git a/drivers/infiniband/hw/nes/Kconfig b/drivers/infiniband/hw/nes/Kconfig
index 2aeb7ac972a..d449eb6ec78 100644
--- a/drivers/infiniband/hw/nes/Kconfig
+++ b/drivers/infiniband/hw/nes/Kconfig
@@ -2,6 +2,7 @@ config INFINIBAND_NES
tristate "NetEffect RNIC Driver"
depends on PCI && INET && INFINIBAND
select LIBCRC32C
+ select INET_LRO
---help---
This is a low-level driver for NetEffect RDMA enabled
Network Interface Cards (RNIC).
diff --git a/drivers/infiniband/hw/nes/nes.h b/drivers/infiniband/hw/nes/nes.h
index cdf2e9ad62f..61b46e9c7d2 100644
--- a/drivers/infiniband/hw/nes/nes.h
+++ b/drivers/infiniband/hw/nes/nes.h
@@ -535,8 +535,8 @@ int nes_register_ofa_device(struct nes_ib_device *);
int nes_read_eeprom_values(struct nes_device *, struct nes_adapter *);
void nes_write_1G_phy_reg(struct nes_device *, u8, u8, u16);
void nes_read_1G_phy_reg(struct nes_device *, u8, u8, u16 *);
-void nes_write_10G_phy_reg(struct nes_device *, u16, u8, u16);
-void nes_read_10G_phy_reg(struct nes_device *, u16, u8);
+void nes_write_10G_phy_reg(struct nes_device *, u16, u8, u16, u16);
+void nes_read_10G_phy_reg(struct nes_device *, u8, u8, u16);
struct nes_cqp_request *nes_get_cqp_request(struct nes_device *);
void nes_post_cqp_request(struct nes_device *, struct nes_cqp_request *, int);
int nes_arp_table(struct nes_device *, u32, u8 *, u32);
diff --git a/drivers/infiniband/hw/nes/nes_cm.c b/drivers/infiniband/hw/nes/nes_cm.c
index d940fc27129..9a4b40fae40 100644
--- a/drivers/infiniband/hw/nes/nes_cm.c
+++ b/drivers/infiniband/hw/nes/nes_cm.c
@@ -594,7 +594,7 @@ static void nes_cm_timer_tick(unsigned long pass)
continue;
}
/* this seems like the correct place, but leave send entry unprotected */
- // spin_unlock_irqrestore(&cm_node->retrans_list_lock, flags);
+ /* spin_unlock_irqrestore(&cm_node->retrans_list_lock, flags); */
atomic_inc(&send_entry->skb->users);
cm_packets_retrans++;
nes_debug(NES_DBG_CM, "Retransmitting send_entry %p for node %p,"
@@ -1335,7 +1335,7 @@ static int process_packet(struct nes_cm_node *cm_node, struct sk_buff *skb,
cm_node->loc_addr, cm_node->loc_port,
cm_node->rem_addr, cm_node->rem_port,
cm_node->state, atomic_read(&cm_node->ref_count));
- // create event
+ /* create event */
cm_node->state = NES_CM_STATE_CLOSED;
create_event(cm_node, NES_CM_EVENT_ABORTED);
@@ -1669,7 +1669,7 @@ static struct nes_cm_node *mini_cm_connect(struct nes_cm_core *cm_core,
if (!cm_node)
return NULL;
- // set our node side to client (active) side
+ /* set our node side to client (active) side */
cm_node->tcp_cntxt.client = 1;
cm_node->tcp_cntxt.rcv_wscale = NES_CM_DEFAULT_RCV_WND_SCALE;
@@ -1694,7 +1694,7 @@ static struct nes_cm_node *mini_cm_connect(struct nes_cm_core *cm_core,
loopbackremotenode->mpa_frame_size = mpa_frame_size -
sizeof(struct ietf_mpa_frame);
- // we are done handling this state, set node to a TSA state
+ /* we are done handling this state, set node to a TSA state */
cm_node->state = NES_CM_STATE_TSA;
cm_node->tcp_cntxt.rcv_nxt = loopbackremotenode->tcp_cntxt.loc_seq_num;
loopbackremotenode->tcp_cntxt.rcv_nxt = cm_node->tcp_cntxt.loc_seq_num;
diff --git a/drivers/infiniband/hw/nes/nes_hw.c b/drivers/infiniband/hw/nes/nes_hw.c
index 08964cc7e98..d3278f111ca 100644
--- a/drivers/infiniband/hw/nes/nes_hw.c
+++ b/drivers/infiniband/hw/nes/nes_hw.c
@@ -38,9 +38,14 @@
#include <linux/ip.h>
#include <linux/tcp.h>
#include <linux/if_vlan.h>
+#include <linux/inet_lro.h>
#include "nes.h"
+static unsigned int nes_lro_max_aggr = NES_LRO_MAX_AGGR;
+module_param(nes_lro_max_aggr, uint, 0444);
+MODULE_PARM_DESC(nes_lro_max_aggr, "NIC LRO max packet aggregation");
+
static u32 crit_err_count;
u32 int_mod_timer_init;
u32 int_mod_cq_depth_256;
@@ -832,7 +837,7 @@ static void nes_init_csr_ne020(struct nes_device *nesdev, u8 hw_rev, u8 port_cou
nes_write_indexed(nesdev, 0x00000900, 0x20000001);
nes_write_indexed(nesdev, 0x000060C0, 0x0000028e);
nes_write_indexed(nesdev, 0x000060C8, 0x00000020);
- //
+
nes_write_indexed(nesdev, 0x000001EC, 0x7b2625a0);
/* nes_write_indexed(nesdev, 0x000001EC, 0x5f2625a0); */
@@ -1207,11 +1212,16 @@ int nes_init_phy(struct nes_device *nesdev)
{
struct nes_adapter *nesadapter = nesdev->nesadapter;
u32 counter = 0;
+ u32 sds_common_control0;
u32 mac_index = nesdev->mac_index;
- u32 tx_config;
+ u32 tx_config = 0;
u16 phy_data;
+ u32 temp_phy_data = 0;
+ u32 temp_phy_data2 = 0;
+ u32 i = 0;
- if (nesadapter->OneG_Mode) {
+ if ((nesadapter->OneG_Mode) &&
+ (nesadapter->phy_type[mac_index] != NES_PHY_TYPE_PUMA_1G)) {
nes_debug(NES_DBG_PHY, "1G PHY, mac_index = %d.\n", mac_index);
if (nesadapter->phy_type[mac_index] == NES_PHY_TYPE_1G) {
printk(PFX "%s: Programming mdc config for 1G\n", __func__);
@@ -1223,7 +1233,7 @@ int nes_init_phy(struct nes_device *nesdev)
nes_read_1G_phy_reg(nesdev, 1, nesadapter->phy_index[mac_index], &phy_data);
nes_debug(NES_DBG_PHY, "Phy data from register 1 phy address %u = 0x%X.\n",
nesadapter->phy_index[mac_index], phy_data);
- nes_write_1G_phy_reg(nesdev, 23, nesadapter->phy_index[mac_index], 0xb000);
+ nes_write_1G_phy_reg(nesdev, 23, nesadapter->phy_index[mac_index], 0xb000);
/* Reset the PHY */
nes_write_1G_phy_reg(nesdev, 0, nesadapter->phy_index[mac_index], 0x8000);
@@ -1277,12 +1287,126 @@ int nes_init_phy(struct nes_device *nesdev)
nes_read_1G_phy_reg(nesdev, 0, nesadapter->phy_index[mac_index], &phy_data);
nes_write_1G_phy_reg(nesdev, 0, nesadapter->phy_index[mac_index], phy_data | 0x0300);
} else {
- if (nesadapter->phy_type[mac_index] == NES_PHY_TYPE_IRIS) {
+ if ((nesadapter->phy_type[mac_index] == NES_PHY_TYPE_IRIS) ||
+ (nesadapter->phy_type[mac_index] == NES_PHY_TYPE_ARGUS)) {
/* setup 10G MDIO operation */
tx_config = nes_read_indexed(nesdev, NES_IDX_MAC_TX_CONFIG);
tx_config |= 0x14;
nes_write_indexed(nesdev, NES_IDX_MAC_TX_CONFIG, tx_config);
}
+ if ((nesadapter->phy_type[mac_index] == NES_PHY_TYPE_ARGUS)) {
+ nes_read_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 0x3, 0xd7ee);
+
+ temp_phy_data = (u16)nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL);
+ mdelay(10);
+ nes_read_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 0x3, 0xd7ee);
+ temp_phy_data2 = (u16)nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL);
+
+ /*
+ * if firmware is already running (like from a
+ * driver un-load/load, don't do anything.
+ */
+ if (temp_phy_data == temp_phy_data2) {
+ /* configure QT2505 AMCC PHY */
+ nes_write_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 0x1, 0x0000, 0x8000);
+ nes_write_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 0x1, 0xc300, 0x0000);
+ nes_write_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 0x1, 0xc302, 0x0044);
+ nes_write_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 0x1, 0xc318, 0x0052);
+ nes_write_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 0x1, 0xc319, 0x0008);
+ nes_write_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 0x1, 0xc31a, 0x0098);
+ nes_write_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 0x3, 0x0026, 0x0E00);
+ nes_write_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 0x3, 0x0027, 0x0000);
+ nes_write_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 0x3, 0x0028, 0xA528);
+
+ /*
+ * remove micro from reset; chip boots from ROM,
+ * uploads EEPROM f/w image, uC executes f/w
+ */
+ nes_write_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 0x1, 0xc300, 0x0002);
+
+ /*
+ * wait for heart beat to start to
+ * know loading is done
+ */
+ counter = 0;
+ do {
+ nes_read_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 0x3, 0xd7ee);
+ temp_phy_data = (u16)nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL);
+ if (counter++ > 1000) {
+ nes_debug(NES_DBG_PHY, "AMCC PHY- breaking from heartbeat check <this is bad!!!> \n");
+ break;
+ }
+ mdelay(100);
+ nes_read_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 0x3, 0xd7ee);
+ temp_phy_data2 = (u16)nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL);
+ } while ((temp_phy_data2 == temp_phy_data));
+
+ /*
+ * wait for tracking to start to know
+ * f/w is good to go
+ */
+ counter = 0;
+ do {
+ nes_read_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 0x3, 0xd7fd);
+ temp_phy_data = (u16)nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL);
+ if (counter++ > 1000) {
+ nes_debug(NES_DBG_PHY, "AMCC PHY- breaking from status check <this is bad!!!> \n");
+ break;
+ }
+ mdelay(1000);
+ /*
+ * nes_debug(NES_DBG_PHY, "AMCC PHY- phy_status not ready yet = 0x%02X\n",
+ * temp_phy_data);
+ */
+ } while (((temp_phy_data & 0xff) != 0x50) && ((temp_phy_data & 0xff) != 0x70));
+
+ /* set LOS Control invert RXLOSB_I_PADINV */
+ nes_write_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 0x1, 0xd003, 0x0000);
+ /* set LOS Control to mask of RXLOSB_I */
+ nes_write_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 0x1, 0xc314, 0x0042);
+ /* set LED1 to input mode (LED1 and LED2 share same LED) */
+ nes_write_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 0x1, 0xd006, 0x0007);
+ /* set LED2 to RX link_status and activity */
+ nes_write_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 0x1, 0xd007, 0x000A);
+ /* set LED3 to RX link_status */
+ nes_write_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 0x1, 0xd008, 0x0009);
+
+ /*
+ * reset the res-calibration on t2
+ * serdes; ensures it is stable after
+ * the amcc phy is stable
+ */
+
+ sds_common_control0 = nes_read_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_CONTROL0);
+ sds_common_control0 |= 0x1;
+ nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_CONTROL0, sds_common_control0);
+
+ /* release the res-calibration reset */
+ sds_common_control0 &= 0xfffffffe;
+ nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_CONTROL0, sds_common_control0);
+
+ i = 0;
+ while (((nes_read32(nesdev->regs + NES_SOFTWARE_RESET) & 0x00000040) != 0x00000040)
+ && (i++ < 5000)) {
+ /* mdelay(1); */
+ }
+
+ /*
+ * wait for link train done before moving on,
+ * or will get an interupt storm
+ */
+ counter = 0;
+ do {
+ temp_phy_data = nes_read_indexed(nesdev, NES_IDX_PHY_PCS_CONTROL_STATUS0 +
+ (0x200 * (nesdev->mac_index & 1)));
+ if (counter++ > 1000) {
+ nes_debug(NES_DBG_PHY, "AMCC PHY- breaking from link train wait <this is bad, link didnt train!!!>\n");
+ break;
+ }
+ mdelay(1);
+ } while (((temp_phy_data & 0x0f1f0000) != 0x0f0f0000));
+ }
+ }
}
return 0;
}
@@ -1375,6 +1499,25 @@ static void nes_rq_wqes_timeout(unsigned long parm)
}
+static int nes_lro_get_skb_hdr(struct sk_buff *skb, void **iphdr,
+ void **tcph, u64 *hdr_flags, void *priv)
+{
+ unsigned int ip_len;
+ struct iphdr *iph;
+ skb_reset_network_header(skb);
+ iph = ip_hdr(skb);
+ if (iph->protocol != IPPROTO_TCP)
+ return -1;
+ ip_len = ip_hdrlen(skb);
+ skb_set_transport_header(skb, ip_len);
+ *tcph = tcp_hdr(skb);
+
+ *hdr_flags = LRO_IPV4 | LRO_TCP;
+ *iphdr = iph;
+ return 0;
+}
+
+
/**
* nes_init_nic_qp
*/
@@ -1520,10 +1663,10 @@ int nes_init_nic_qp(struct nes_device *nesdev, struct net_device *netdev)
}
u64temp = (u64)nesvnic->nic.sq_pbase;
- nic_context->context_words[NES_NIC_CTX_SQ_LOW_IDX] = cpu_to_le32((u32)u64temp);
+ nic_context->context_words[NES_NIC_CTX_SQ_LOW_IDX] = cpu_to_le32((u32)u64temp);
nic_context->context_words[NES_NIC_CTX_SQ_HIGH_IDX] = cpu_to_le32((u32)(u64temp >> 32));
u64temp = (u64)nesvnic->nic.rq_pbase;
- nic_context->context_words[NES_NIC_CTX_RQ_LOW_IDX] = cpu_to_le32((u32)u64temp);
+ nic_context->context_words[NES_NIC_CTX_RQ_LOW_IDX] = cpu_to_le32((u32)u64temp);
nic_context->context_words[NES_NIC_CTX_RQ_HIGH_IDX] = cpu_to_le32((u32)(u64temp >> 32));
cqp_wqe->wqe_words[NES_CQP_WQE_OPCODE_IDX] = cpu_to_le32(NES_CQP_CREATE_QP |
@@ -1575,7 +1718,7 @@ int nes_init_nic_qp(struct nes_device *nesdev, struct net_device *netdev)
nic_rqe = &nesvnic->nic.rq_vbase[counter];
nic_rqe->wqe_words[NES_NIC_RQ_WQE_LENGTH_1_0_IDX] = cpu_to_le32(nesvnic->max_frame_size);
nic_rqe->wqe_words[NES_NIC_RQ_WQE_LENGTH_3_2_IDX] = 0;
- nic_rqe->wqe_words[NES_NIC_RQ_WQE_FRAG0_LOW_IDX] = cpu_to_le32((u32)pmem);
+ nic_rqe->wqe_words[NES_NIC_RQ_WQE_FRAG0_LOW_IDX] = cpu_to_le32((u32)pmem);
nic_rqe->wqe_words[NES_NIC_RQ_WQE_FRAG0_HIGH_IDX] = cpu_to_le32((u32)((u64)pmem >> 32));
nesvnic->nic.rx_skb[counter] = skb;
}
@@ -1592,15 +1735,21 @@ int nes_init_nic_qp(struct nes_device *nesdev, struct net_device *netdev)
nesvnic->rq_wqes_timer.function = nes_rq_wqes_timeout;
nesvnic->rq_wqes_timer.data = (unsigned long)nesvnic;
nes_debug(NES_DBG_INIT, "NAPI support Enabled\n");
-
if (nesdev->nesadapter->et_use_adaptive_rx_coalesce)
{
nes_nic_init_timer(nesdev);
if (netdev->mtu > 1500)
jumbomode = 1;
- nes_nic_init_timer_defaults(nesdev, jumbomode);
- }
-
+ nes_nic_init_timer_defaults(nesdev, jumbomode);
+ }
+ nesvnic->lro_mgr.max_aggr = nes_lro_max_aggr;
+ nesvnic->lro_mgr.max_desc = NES_MAX_LRO_DESCRIPTORS;
+ nesvnic->lro_mgr.lro_arr = nesvnic->lro_desc;
+ nesvnic->lro_mgr.get_skb_header = nes_lro_get_skb_hdr;
+ nesvnic->lro_mgr.features = LRO_F_NAPI | LRO_F_EXTRACT_VLAN_ID;
+ nesvnic->lro_mgr.dev = netdev;
+ nesvnic->lro_mgr.ip_summed = CHECKSUM_UNNECESSARY;
+ nesvnic->lro_mgr.ip_summed_aggr = CHECKSUM_UNNECESSARY;
return 0;
}
@@ -1620,8 +1769,8 @@ void nes_destroy_nic_qp(struct nes_vnic *nesvnic)
/* Free remaining NIC receive buffers */
while (nesvnic->nic.rq_head != nesvnic->nic.rq_tail) {
- nic_rqe = &nesvnic->nic.rq_vbase[nesvnic->nic.rq_tail];
- wqe_frag = (u64)le32_to_cpu(nic_rqe->wqe_words[NES_NIC_RQ_WQE_FRAG0_LOW_IDX]);
+ nic_rqe = &nesvnic->nic.rq_vbase[nesvnic->nic.rq_tail];
+ wqe_frag = (u64)le32_to_cpu(nic_rqe->wqe_words[NES_NIC_RQ_WQE_FRAG0_LOW_IDX]);
wqe_frag |= ((u64)le32_to_cpu(nic_rqe->wqe_words[NES_NIC_RQ_WQE_FRAG0_HIGH_IDX])) << 32;
pci_unmap_single(nesdev->pcidev, (dma_addr_t)wqe_frag,
nesvnic->max_frame_size, PCI_DMA_FROMDEVICE);
@@ -1704,17 +1853,17 @@ int nes_napi_isr(struct nes_device *nesdev)
/* iff NIC, process here, else wait for DPC */
if ((int_stat) && ((int_stat & 0x0000ff00) == int_stat)) {
nesdev->napi_isr_ran = 0;
- nes_write32(nesdev->regs+NES_INT_STAT,
- (int_stat &
- ~(NES_INT_INTF|NES_INT_TIMER|NES_INT_MAC0|NES_INT_MAC1|NES_INT_MAC2|NES_INT_MAC3)));
+ nes_write32(nesdev->regs + NES_INT_STAT,
+ (int_stat &
+ ~(NES_INT_INTF | NES_INT_TIMER | NES_INT_MAC0 | NES_INT_MAC1 | NES_INT_MAC2 | NES_INT_MAC3)));
/* Process the CEQs */
nes_process_ceq(nesdev, &nesdev->nesadapter->ceq[nesdev->nic_ceq_index]);
if (unlikely((((nesadapter->et_rx_coalesce_usecs_irq) &&
- (!nesadapter->et_use_adaptive_rx_coalesce)) ||
- ((nesadapter->et_use_adaptive_rx_coalesce) &&
- (nesdev->deepcq_count > nesadapter->et_pkt_rate_low)))) ) {
+ (!nesadapter->et_use_adaptive_rx_coalesce)) ||
+ ((nesadapter->et_use_adaptive_rx_coalesce) &&
+ (nesdev->deepcq_count > nesadapter->et_pkt_rate_low))))) {
if ((nesdev->int_req & NES_INT_TIMER) == 0) {
/* Enable Periodic timer interrupts */
nesdev->int_req |= NES_INT_TIMER;
@@ -1792,12 +1941,12 @@ void nes_dpc(unsigned long param)
}
if (int_stat) {
- if (int_stat & ~(NES_INT_INTF|NES_INT_TIMER|NES_INT_MAC0|
- NES_INT_MAC1|NES_INT_MAC2|NES_INT_MAC3)) {
+ if (int_stat & ~(NES_INT_INTF | NES_INT_TIMER | NES_INT_MAC0|
+ NES_INT_MAC1|NES_INT_MAC2 | NES_INT_MAC3)) {
/* Ack the interrupts */
nes_write32(nesdev->regs+NES_INT_STAT,
- (int_stat & ~(NES_INT_INTF|NES_INT_TIMER|NES_INT_MAC0|
- NES_INT_MAC1|NES_INT_MAC2|NES_INT_MAC3)));
+ (int_stat & ~(NES_INT_INTF | NES_INT_TIMER | NES_INT_MAC0|
+ NES_INT_MAC1 | NES_INT_MAC2 | NES_INT_MAC3)));
}
temp_int_stat = int_stat;
@@ -1862,8 +2011,8 @@ void nes_dpc(unsigned long param)
}
}
/* Don't use the interface interrupt bit stay in loop */
- int_stat &= ~NES_INT_INTF|NES_INT_TIMER|NES_INT_MAC0|
- NES_INT_MAC1|NES_INT_MAC2|NES_INT_MAC3;
+ int_stat &= ~NES_INT_INTF | NES_INT_TIMER | NES_INT_MAC0 |
+ NES_INT_MAC1 | NES_INT_MAC2 | NES_INT_MAC3;
} while ((int_stat != 0) && (loop_counter++ < MAX_DPC_ITERATIONS));
if (timer_ints == 1) {
@@ -1874,9 +2023,9 @@ void nes_dpc(unsigned long param)
nesdev->timer_only_int_count = 0;
nesdev->int_req &= ~NES_INT_TIMER;
nes_write32(nesdev->regs + NES_INTF_INT_MASK, ~(nesdev->intf_int_req));
- nes_write32(nesdev->regs+NES_INT_MASK, ~nesdev->int_req);
+ nes_write32(nesdev->regs + NES_INT_MASK, ~nesdev->int_req);
} else {
- nes_write32(nesdev->regs+NES_INT_MASK, 0x0000ffff|(~nesdev->int_req));
+ nes_write32(nesdev->regs+NES_INT_MASK, 0x0000ffff | (~nesdev->int_req));
}
} else {
if (unlikely(nesadapter->et_use_adaptive_rx_coalesce))
@@ -1884,7 +2033,7 @@ void nes_dpc(unsigned long param)
nes_nic_init_timer(nesdev);
}
nesdev->timer_only_int_count = 0;
- nes_write32(nesdev->regs+NES_INT_MASK, 0x0000ffff|(~nesdev->int_req));
+ nes_write32(nesdev->regs+NES_INT_MASK, 0x0000ffff | (~nesdev->int_req));
}
} else {
nesdev->timer_only_int_count = 0;
@@ -1933,7 +2082,7 @@ static void nes_process_ceq(struct nes_device *nesdev, struct nes_hw_ceq *ceq)
do {
if (le32_to_cpu(ceq->ceq_vbase[head].ceqe_words[NES_CEQE_CQ_CTX_HIGH_IDX]) &
NES_CEQE_VALID) {
- u64temp = (((u64)(le32_to_cpu(ceq->ceq_vbase[head].ceqe_words[NES_CEQE_CQ_CTX_HIGH_IDX])))<<32) |
+ u64temp = (((u64)(le32_to_cpu(ceq->ceq_vbase[head].ceqe_words[NES_CEQE_CQ_CTX_HIGH_IDX]))) << 32) |
((u64)(le32_to_cpu(ceq->ceq_vbase[head].ceqe_words[NES_CEQE_CQ_CTX_LOW_IDX])));
u64temp <<= 1;
cq = *((struct nes_hw_cq **)&u64temp);
@@ -1961,7 +2110,7 @@ static void nes_process_ceq(struct nes_device *nesdev, struct nes_hw_ceq *ceq)
*/
static void nes_process_aeq(struct nes_device *nesdev, struct nes_hw_aeq *aeq)
{
-// u64 u64temp;
+ /* u64 u64temp; */
u32 head;
u32 aeq_size;
u32 aeqe_misc;
@@ -1980,8 +2129,10 @@ static void nes_process_aeq(struct nes_device *nesdev, struct nes_hw_aeq *aeq)
if (aeqe_misc & (NES_AEQE_QP|NES_AEQE_CQ)) {
if (aeqe_cq_id >= NES_FIRST_QPN) {
/* dealing with an accelerated QP related AE */
-// u64temp = (((u64)(le32_to_cpu(aeqe->aeqe_words[NES_AEQE_COMP_CTXT_HIGH_IDX])))<<32) |
-// ((u64)(le32_to_cpu(aeqe->aeqe_words[NES_AEQE_COMP_CTXT_LOW_IDX])));
+ /*
+ * u64temp = (((u64)(le32_to_cpu(aeqe->aeqe_words[NES_AEQE_COMP_CTXT_HIGH_IDX]))) << 32) |
+ * ((u64)(le32_to_cpu(aeqe->aeqe_words[NES_AEQE_COMP_CTXT_LOW_IDX])));
+ */
nes_process_iwarp_aeqe(nesdev, (struct nes_hw_aeqe *)aeqe);
} else {
/* TODO: dealing with a CQP related AE */
@@ -2081,6 +2232,8 @@ static void nes_process_mac_intr(struct nes_device *nesdev, u32 mac_number)
u32 u32temp;
u16 phy_data;
u16 temp_phy_data;
+ u32 pcs_val = 0x0f0f0000;
+ u32 pcs_mask = 0x0f1f0000;
spin_lock_irqsave(&nesadapter->phy_lock, flags);
if (nesadapter->mac_sw_state[mac_number] != NES_MAC_SW_IDLE) {
@@ -2144,13 +2297,30 @@ static void nes_process_mac_intr(struct nes_device *nesdev, u32 mac_number)
nes_debug(NES_DBG_PHY, "Eth SERDES Common Status: 0=0x%08X, 1=0x%08X\n",
nes_read_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_STATUS0),
nes_read_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_STATUS0+0x200));
- pcs_control_status = nes_read_indexed(nesdev,
- NES_IDX_PHY_PCS_CONTROL_STATUS0 + ((mac_index&1)*0x200));
- pcs_control_status = nes_read_indexed(nesdev,
- NES_IDX_PHY_PCS_CONTROL_STATUS0 + ((mac_index&1)*0x200));
+
+ if (nesadapter->phy_type[mac_index] == NES_PHY_TYPE_PUMA_1G) {
+ switch (mac_index) {
+ case 1:
+ case 3:
+ pcs_control_status = nes_read_indexed(nesdev,
+ NES_IDX_PHY_PCS_CONTROL_STATUS0 + 0x200);
+ break;
+ default:
+ pcs_control_status = nes_read_indexed(nesdev,
+ NES_IDX_PHY_PCS_CONTROL_STATUS0);
+ break;
+ }
+ } else {
+ pcs_control_status = nes_read_indexed(nesdev,
+ NES_IDX_PHY_PCS_CONTROL_STATUS0 + ((mac_index & 1) * 0x200));
+ pcs_control_status = nes_read_indexed(nesdev,
+ NES_IDX_PHY_PCS_CONTROL_STATUS0 + ((mac_index & 1) * 0x200));
+ }
+
nes_debug(NES_DBG_PHY, "PCS PHY Control/Status%u: 0x%08X\n",
mac_index, pcs_control_status);
- if (nesadapter->OneG_Mode) {
+ if ((nesadapter->OneG_Mode) &&
+ (nesadapter->phy_type[mac_index] != NES_PHY_TYPE_PUMA_1G)) {
u32temp = 0x01010000;
if (nesadapter->port_count > 2) {
u32temp |= 0x02020000;
@@ -2159,24 +2329,59 @@ static void nes_process_mac_intr(struct nes_device *nesdev, u32 mac_number)
phy_data = 0;
nes_debug(NES_DBG_PHY, "PCS says the link is down\n");
}
- } else if (nesadapter->phy_type[mac_index] == NES_PHY_TYPE_IRIS) {
- nes_read_10G_phy_reg(nesdev, 1, nesadapter->phy_index[mac_index]);
- temp_phy_data = (u16)nes_read_indexed(nesdev,
- NES_IDX_MAC_MDIO_CONTROL);
- u32temp = 20;
- do {
- nes_read_10G_phy_reg(nesdev, 1, nesadapter->phy_index[mac_index]);
- phy_data = (u16)nes_read_indexed(nesdev,
- NES_IDX_MAC_MDIO_CONTROL);
- if ((phy_data == temp_phy_data) || (!(--u32temp)))
- break;
- temp_phy_data = phy_data;
- } while (1);
- nes_debug(NES_DBG_PHY, "%s: Phy data = 0x%04X, link was %s.\n",
- __func__, phy_data, nesadapter->mac_link_down ? "DOWN" : "UP");
-
} else {
- phy_data = (0x0f0f0000 == (pcs_control_status & 0x0f1f0000)) ? 4 : 0;
+ switch (nesadapter->phy_type[mac_index]) {
+ case NES_PHY_TYPE_IRIS:
+ nes_read_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 1, 1);
+ temp_phy_data = (u16)nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL);
+ u32temp = 20;
+ do {
+ nes_read_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 1, 1);
+ phy_data = (u16)nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL);
+ if ((phy_data == temp_phy_data) || (!(--u32temp)))
+ break;
+ temp_phy_data = phy_data;
+ } while (1);
+ nes_debug(NES_DBG_PHY, "%s: Phy data = 0x%04X, link was %s.\n",
+ __func__, phy_data, nesadapter->mac_link_down[mac_index] ? "DOWN" : "UP");
+ break;
+
+ case NES_PHY_TYPE_ARGUS:
+ /* clear the alarms */
+ nes_read_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 4, 0x0008);
+ nes_read_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 4, 0xc001);
+ nes_read_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 4, 0xc002);
+ nes_read_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 4, 0xc005);
+ nes_read_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 4, 0xc006);
+ nes_read_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 1, 0x9003);
+ nes_read_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 1, 0x9004);
+ nes_read_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 1, 0x9005);
+ /* check link status */
+ nes_read_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 1, 1);
+ temp_phy_data = (u16)nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL);
+ u32temp = 100;
+ do {
+ nes_read_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 1, 1);
+
+ phy_data = (u16)nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL);
+ if ((phy_data == temp_phy_data) || (!(--u32temp)))
+ break;
+ temp_phy_data = phy_data;
+ } while (1);
+ nes_debug(NES_DBG_PHY, "%s: Phy data = 0x%04X, link was %s.\n",
+ __func__, phy_data, nesadapter->mac_link_down ? "DOWN" : "UP");
+ break;
+
+ case NES_PHY_TYPE_PUMA_1G:
+ if (mac_index < 2)
+ pcs_val = pcs_mask = 0x01010000;
+ else
+ pcs_val = pcs_mask = 0x02020000;
+ /* fall through */
+ default:
+ phy_data = (pcs_val == (pcs_control_status & pcs_mask)) ? 0x4 : 0x0;
+ break;
+ }
}
if (phy_data & 0x0004) {
@@ -2185,8 +2390,8 @@ static void nes_process_mac_intr(struct nes_device *nesdev, u32 mac_number)
nes_debug(NES_DBG_PHY, "The Link is UP!!. linkup was %d\n",
nesvnic->linkup);
if (nesvnic->linkup == 0) {
- printk(PFX "The Link is now up for port %u, netdev %p.\n",
- mac_index, nesvnic->netdev);
+ printk(PFX "The Link is now up for port %s, netdev %p.\n",
+ nesvnic->netdev->name, nesvnic->netdev);
if (netif_queue_stopped(nesvnic->netdev))
netif_start_queue(nesvnic->netdev);
nesvnic->linkup = 1;
@@ -2199,8 +2404,8 @@ static void nes_process_mac_intr(struct nes_device *nesdev, u32 mac_number)
nes_debug(NES_DBG_PHY, "The Link is Down!!. linkup was %d\n",
nesvnic->linkup);
if (nesvnic->linkup == 1) {
- printk(PFX "The Link is now down for port %u, netdev %p.\n",
- mac_index, nesvnic->netdev);
+ printk(PFX "The Link is now down for port %s, netdev %p.\n",
+ nesvnic->netdev->name, nesvnic->netdev);
if (!(netif_queue_stopped(nesvnic->netdev)))
netif_stop_queue(nesvnic->netdev);
nesvnic->linkup = 0;
@@ -2254,10 +2459,13 @@ void nes_nic_ce_handler(struct nes_device *nesdev, struct nes_hw_nic_cq *cq)
u16 pkt_type;
u16 rqes_processed = 0;
u8 sq_cqes = 0;
+ u8 nes_use_lro = 0;
head = cq->cq_head;
cq_size = cq->cq_size;
cq->cqes_pending = 1;
+ if (nesvnic->netdev->features & NETIF_F_LRO)
+ nes_use_lro = 1;
do {
if (le32_to_cpu(cq->cq_vbase[head].cqe_words[NES_NIC_CQE_MISC_IDX]) &
NES_NIC_CQE_VALID) {
@@ -2272,8 +2480,10 @@ void nes_nic_ce_handler(struct nes_device *nesdev, struct nes_hw_nic_cq *cq)
/* bump past the vlan tag */
wqe_fragment_length++;
if (le16_to_cpu(wqe_fragment_length[wqe_fragment_index]) != 0) {
- u64temp = (u64) le32_to_cpu(nic_sqe->wqe_words[NES_NIC_SQ_WQE_FRAG0_LOW_IDX+wqe_fragment_index*2]);
- u64temp += ((u64)le32_to_cpu(nic_sqe->wqe_words[NES_NIC_SQ_WQE_FRAG0_HIGH_IDX+wqe_fragment_index*2]))<<32;
+ u64temp = (u64) le32_to_cpu(nic_sqe->wqe_words[NES_NIC_SQ_WQE_FRAG0_LOW_IDX +
+ wqe_fragment_index * 2]);
+ u64temp += ((u64)le32_to_cpu(nic_sqe->wqe_words[NES_NIC_SQ_WQE_FRAG0_HIGH_IDX +
+ wqe_fragment_index * 2])) << 32;
bus_address = (dma_addr_t)u64temp;
if (test_and_clear_bit(nesnic->sq_tail, nesnic->first_frag_overflow)) {
pci_unmap_single(nesdev->pcidev,
@@ -2283,8 +2493,10 @@ void nes_nic_ce_handler(struct nes_device *nesdev, struct nes_hw_nic_cq *cq)
}
for (; wqe_fragment_index < 5; wqe_fragment_index++) {
if (wqe_fragment_length[wqe_fragment_index]) {
- u64temp = le32_to_cpu(nic_sqe->wqe_words[NES_NIC_SQ_WQE_FRAG0_LOW_IDX+wqe_fragment_index*2]);
- u64temp += ((u64)le32_to_cpu(nic_sqe->wqe_words[NES_NIC_SQ_WQE_FRAG0_HIGH_IDX+wqe_fragment_index*2]))<<32;
+ u64temp = le32_to_cpu(nic_sqe->wqe_words[NES_NIC_SQ_WQE_FRAG0_LOW_IDX +
+ wqe_fragment_index * 2]);
+ u64temp += ((u64)le32_to_cpu(nic_sqe->wqe_words[NES_NIC_SQ_WQE_FRAG0_HIGH_IDX
+ + wqe_fragment_index * 2])) <<32;
bus_address = (dma_addr_t)u64temp;
pci_unmap_page(nesdev->pcidev,
bus_address,
@@ -2331,7 +2543,7 @@ void nes_nic_ce_handler(struct nes_device *nesdev, struct nes_hw_nic_cq *cq)
if (atomic_read(&nesvnic->rx_skbs_needed) > (nesvnic->nic.rq_size>>1)) {
nes_write32(nesdev->regs+NES_CQE_ALLOC,
cq->cq_number | (cqe_count << 16));
-// nesadapter->tune_timer.cq_count += cqe_count;
+ /* nesadapter->tune_timer.cq_count += cqe_count; */
nesdev->currcq_count += cqe_count;
cqe_count = 0;
nes_replenish_nic_rq(nesvnic);
@@ -2379,9 +2591,16 @@ void nes_nic_ce_handler(struct nes_device *nesdev, struct nes_hw_nic_cq *cq)
>> 16);
nes_debug(NES_DBG_CQ, "%s: Reporting stripped VLAN packet. Tag = 0x%04X\n",
nesvnic->netdev->name, vlan_tag);
- nes_vlan_rx(rx_skb, nesvnic->vlan_grp, vlan_tag);
+ if (nes_use_lro)
+ lro_vlan_hwaccel_receive_skb(&nesvnic->lro_mgr, rx_skb,
+ nesvnic->vlan_grp, vlan_tag, NULL);
+ else
+ nes_vlan_rx(rx_skb, nesvnic->vlan_grp, vlan_tag);
} else {
- nes_netif_rx(rx_skb);
+ if (nes_use_lro)
+ lro_receive_skb(&nesvnic->lro_mgr, rx_skb, NULL);
+ else
+ nes_netif_rx(rx_skb);
}
}
@@ -2399,7 +2618,7 @@ void nes_nic_ce_handler(struct nes_device *nesdev, struct nes_hw_nic_cq *cq)
/* Replenish Nic CQ */
nes_write32(nesdev->regs+NES_CQE_ALLOC,
cq->cq_number | (cqe_count << 16));
-// nesdev->nesadapter->tune_timer.cq_count += cqe_count;
+ /* nesdev->nesadapter->tune_timer.cq_count += cqe_count; */
nesdev->currcq_count += cqe_count;
cqe_count = 0;
}
@@ -2413,26 +2632,27 @@ void nes_nic_ce_handler(struct nes_device *nesdev, struct nes_hw_nic_cq *cq)
} while (1);
+ if (nes_use_lro)
+ lro_flush_all(&nesvnic->lro_mgr);
if (sq_cqes) {
barrier();
/* restart the queue if it had been stopped */
if (netif_queue_stopped(nesvnic->netdev))
netif_wake_queue(nesvnic->netdev);
}
-
cq->cq_head = head;
/* nes_debug(NES_DBG_CQ, "CQ%u Processed = %u cqes, new head = %u.\n",
cq->cq_number, cqe_count, cq->cq_head); */
cq->cqe_allocs_pending = cqe_count;
if (unlikely(nesadapter->et_use_adaptive_rx_coalesce))
{
-// nesdev->nesadapter->tune_timer.cq_count += cqe_count;
+ /* nesdev->nesadapter->tune_timer.cq_count += cqe_count; */
nesdev->currcq_count += cqe_count;
nes_nic_tune_timer(nesdev);
}
if (atomic_read(&nesvnic->rx_skbs_needed))
nes_replenish_nic_rq(nesvnic);
- }
+}
/**
@@ -2461,7 +2681,7 @@ static void nes_cqp_ce_handler(struct nes_device *nesdev, struct nes_hw_cq *cq)
if (le32_to_cpu(cq->cq_vbase[head].cqe_words[NES_CQE_OPCODE_IDX]) & NES_CQE_VALID) {
u64temp = (((u64)(le32_to_cpu(cq->cq_vbase[head].
- cqe_words[NES_CQE_COMP_COMP_CTX_HIGH_IDX])))<<32) |
+ cqe_words[NES_CQE_COMP_COMP_CTX_HIGH_IDX]))) << 32) |
((u64)(le32_to_cpu(cq->cq_vbase[head].
cqe_words[NES_CQE_COMP_COMP_CTX_LOW_IDX])));
cqp = *((struct nes_hw_cqp **)&u64temp);
@@ -2478,7 +2698,7 @@ static void nes_cqp_ce_handler(struct nes_device *nesdev, struct nes_hw_cq *cq)
}
u64temp = (((u64)(le32_to_cpu(nesdev->cqp.sq_vbase[cqp->sq_tail].
- wqe_words[NES_CQP_WQE_COMP_SCRATCH_HIGH_IDX])))<<32) |
+ wqe_words[NES_CQP_WQE_COMP_SCRATCH_HIGH_IDX]))) << 32) |
((u64)(le32_to_cpu(nesdev->cqp.sq_vbase[cqp->sq_tail].
wqe_words[NES_CQP_WQE_COMP_SCRATCH_LOW_IDX])));
cqp_request = *((struct nes_cqp_request **)&u64temp);
@@ -2515,7 +2735,7 @@ static void nes_cqp_ce_handler(struct nes_device *nesdev, struct nes_hw_cq *cq)
} else {
nes_debug(NES_DBG_CQP, "CQP request %p (opcode 0x%02X) freed.\n",
cqp_request,
- le32_to_cpu(cqp_request->cqp_wqe.wqe_words[NES_CQP_WQE_OPCODE_IDX])&0x3f);
+ le32_to_cpu(cqp_request->cqp_wqe.wqe_words[NES_CQP_WQE_OPCODE_IDX]) & 0x3f);
if (cqp_request->dynamic) {
kfree(cqp_request);
} else {
@@ -2529,7 +2749,7 @@ static void nes_cqp_ce_handler(struct nes_device *nesdev, struct nes_hw_cq *cq)
}
cq->cq_vbase[head].cqe_words[NES_CQE_OPCODE_IDX] = 0;
- nes_write32(nesdev->regs+NES_CQE_ALLOC, cq->cq_number | (1 << 16));
+ nes_write32(nesdev->regs + NES_CQE_ALLOC, cq->cq_number | (1 << 16));
if (++cqp->sq_tail >= cqp->sq_size)
cqp->sq_tail = 0;
@@ -2598,13 +2818,13 @@ static void nes_process_iwarp_aeqe(struct nes_device *nesdev,
nes_debug(NES_DBG_AEQ, "\n");
aeq_info = le32_to_cpu(aeqe->aeqe_words[NES_AEQE_MISC_IDX]);
if ((NES_AEQE_INBOUND_RDMA&aeq_info) || (!(NES_AEQE_QP&aeq_info))) {
- context = le32_to_cpu(aeqe->aeqe_words[NES_AEQE_COMP_CTXT_LOW_IDX]);
+ context = le32_to_cpu(aeqe->aeqe_words[NES_AEQE_COMP_CTXT_LOW_IDX]);
context += ((u64)le32_to_cpu(aeqe->aeqe_words[NES_AEQE_COMP_CTXT_HIGH_IDX])) << 32;
} else {
aeqe_context = le32_to_cpu(aeqe->aeqe_words[NES_AEQE_COMP_CTXT_LOW_IDX]);
aeqe_context += ((u64)le32_to_cpu(aeqe->aeqe_words[NES_AEQE_COMP_CTXT_HIGH_IDX])) << 32;
context = (unsigned long)nesadapter->qp_table[le32_to_cpu(
- aeqe->aeqe_words[NES_AEQE_COMP_QP_CQ_ID_IDX])-NES_FIRST_QPN];
+ aeqe->aeqe_words[NES_AEQE_COMP_QP_CQ_ID_IDX]) - NES_FIRST_QPN];
BUG_ON(!context);
}
@@ -2617,7 +2837,6 @@ static void nes_process_iwarp_aeqe(struct nes_device *nesdev,
le32_to_cpu(aeqe->aeqe_words[NES_AEQE_COMP_QP_CQ_ID_IDX]), aeqe,
nes_tcp_state_str[tcp_state], nes_iwarp_state_str[iwarp_state]);
-
switch (async_event_id) {
case NES_AEQE_AEID_LLP_FIN_RECEIVED:
nesqp = *((struct nes_qp **)&context);
@@ -3021,7 +3240,7 @@ void nes_manage_arp_cache(struct net_device *netdev, unsigned char *mac_addr,
cqp_wqe->wqe_words[NES_CQP_WQE_OPCODE_IDX] |= cpu_to_le32(NES_CQP_ARP_VALID);
cqp_wqe->wqe_words[NES_CQP_ARP_WQE_MAC_ADDR_LOW_IDX] = cpu_to_le32(
(((u32)mac_addr[2]) << 24) | (((u32)mac_addr[3]) << 16) |
- (((u32)mac_addr[4]) << 8) | (u32)mac_addr[5]);
+ (((u32)mac_addr[4]) << 8) | (u32)mac_addr[5]);
cqp_wqe->wqe_words[NES_CQP_ARP_WQE_MAC_HIGH_IDX] = cpu_to_le32(
(((u32)mac_addr[0]) << 16) | (u32)mac_addr[1]);
} else {
diff --git a/drivers/infiniband/hw/nes/nes_hw.h b/drivers/infiniband/hw/nes/nes_hw.h
index 8f36e231bdf..745bf94f3f0 100644
--- a/drivers/infiniband/hw/nes/nes_hw.h
+++ b/drivers/infiniband/hw/nes/nes_hw.h
@@ -33,8 +33,12 @@
#ifndef __NES_HW_H
#define __NES_HW_H
-#define NES_PHY_TYPE_1G 2
-#define NES_PHY_TYPE_IRIS 3
+#include <linux/inet_lro.h>
+
+#define NES_PHY_TYPE_1G 2
+#define NES_PHY_TYPE_IRIS 3
+#define NES_PHY_TYPE_ARGUS 4
+#define NES_PHY_TYPE_PUMA_1G 5
#define NES_PHY_TYPE_PUMA_10G 6
#define NES_MULTICAST_PF_MAX 8
@@ -965,7 +969,7 @@ struct nes_arp_entry {
#define NES_NIC_CQ_DOWNWARD_TREND 16
struct nes_hw_tune_timer {
- //u16 cq_count;
+ /* u16 cq_count; */
u16 threshold_low;
u16 threshold_target;
u16 threshold_high;
@@ -982,8 +986,10 @@ struct nes_hw_tune_timer {
#define NES_TIMER_INT_LIMIT 2
#define NES_TIMER_INT_LIMIT_DYNAMIC 10
#define NES_TIMER_ENABLE_LIMIT 4
-#define NES_MAX_LINK_INTERRUPTS 128
-#define NES_MAX_LINK_CHECK 200
+#define NES_MAX_LINK_INTERRUPTS 128
+#define NES_MAX_LINK_CHECK 200
+#define NES_MAX_LRO_DESCRIPTORS 32
+#define NES_LRO_MAX_AGGR 64
struct nes_adapter {
u64 fw_ver;
@@ -1183,6 +1189,9 @@ struct nes_vnic {
u8 of_device_registered;
u8 rdma_enabled;
u8 rx_checksum_disabled;
+ u32 lro_max_aggr;
+ struct net_lro_mgr lro_mgr;
+ struct net_lro_desc lro_desc[NES_MAX_LRO_DESCRIPTORS];
};
struct nes_ib_device {
diff --git a/drivers/infiniband/hw/nes/nes_nic.c b/drivers/infiniband/hw/nes/nes_nic.c
index e5366b013c1..1b0938c8777 100644
--- a/drivers/infiniband/hw/nes/nes_nic.c
+++ b/drivers/infiniband/hw/nes/nes_nic.c
@@ -185,12 +185,13 @@ static int nes_netdev_open(struct net_device *netdev)
nic_active |= nic_active_bit;
nes_write_indexed(nesdev, NES_IDX_NIC_BROADCAST_ON, nic_active);
- macaddr_high = ((u16)netdev->dev_addr[0]) << 8;
+ macaddr_high = ((u16)netdev->dev_addr[0]) << 8;
macaddr_high += (u16)netdev->dev_addr[1];
- macaddr_low = ((u32)netdev->dev_addr[2]) << 24;
- macaddr_low += ((u32)netdev->dev_addr[3]) << 16;
- macaddr_low += ((u32)netdev->dev_addr[4]) << 8;
- macaddr_low += (u32)netdev->dev_addr[5];
+
+ macaddr_low = ((u32)netdev->dev_addr[2]) << 24;
+ macaddr_low += ((u32)netdev->dev_addr[3]) << 16;
+ macaddr_low += ((u32)netdev->dev_addr[4]) << 8;
+ macaddr_low += (u32)netdev->dev_addr[5];
/* Program the various MAC regs */
for (i = 0; i < NES_MAX_PORT_COUNT; i++) {
@@ -451,7 +452,7 @@ static int nes_netdev_start_xmit(struct sk_buff *skb, struct net_device *netdev)
__le16 *wqe_fragment_length;
u32 nr_frags;
u32 original_first_length;
-// u64 *wqe_fragment_address;
+ /* u64 *wqe_fragment_address; */
/* first fragment (0) is used by copy buffer */
u16 wqe_fragment_index=1;
u16 hoffset;
@@ -461,11 +462,12 @@ static int nes_netdev_start_xmit(struct sk_buff *skb, struct net_device *netdev)
u32 old_head;
u32 wqe_misc;
- /* nes_debug(NES_DBG_NIC_TX, "%s Request to tx NIC packet length %u, headlen %u,"
- " (%u frags), tso_size=%u\n",
- netdev->name, skb->len, skb_headlen(skb),
- skb_shinfo(skb)->nr_frags, skb_is_gso(skb));
- */
+ /*
+ * nes_debug(NES_DBG_NIC_TX, "%s Request to tx NIC packet length %u, headlen %u,"
+ * " (%u frags), tso_size=%u\n",
+ * netdev->name, skb->len, skb_headlen(skb),
+ * skb_shinfo(skb)->nr_frags, skb_is_gso(skb));
+ */
if (!netif_carrier_ok(netdev))
return NETDEV_TX_OK;
@@ -795,12 +797,12 @@ static int nes_netdev_set_mac_address(struct net_device *netdev, void *p)
memcpy(netdev->dev_addr, mac_addr->sa_data, netdev->addr_len);
printk(PFX "%s: Address length = %d, Address = %s\n",
__func__, netdev->addr_len, print_mac(mac, mac_addr->sa_data));
- macaddr_high = ((u16)netdev->dev_addr[0]) << 8;
+ macaddr_high = ((u16)netdev->dev_addr[0]) << 8;
macaddr_high += (u16)netdev->dev_addr[1];
- macaddr_low = ((u32)netdev->dev_addr[2]) << 24;
- macaddr_low += ((u32)netdev->dev_addr[3]) << 16;
- macaddr_low += ((u32)netdev->dev_addr[4]) << 8;
- macaddr_low += (u32)netdev->dev_addr[5];
+ macaddr_low = ((u32)netdev->dev_addr[2]) << 24;
+ macaddr_low += ((u32)netdev->dev_addr[3]) << 16;
+ macaddr_low += ((u32)netdev->dev_addr[4]) << 8;
+ macaddr_low += (u32)netdev->dev_addr[5];
for (i = 0; i < NES_MAX_PORT_COUNT; i++) {
if (nesvnic->qp_nic_index[i] == 0xf) {
@@ -881,12 +883,12 @@ static void nes_netdev_set_multicast_list(struct net_device *netdev)
print_mac(mac, multicast_addr->dmi_addr),
perfect_filter_register_address+(mc_index * 8),
mc_nic_index);
- macaddr_high = ((u16)multicast_addr->dmi_addr[0]) << 8;
+ macaddr_high = ((u16)multicast_addr->dmi_addr[0]) << 8;
macaddr_high += (u16)multicast_addr->dmi_addr[1];
- macaddr_low = ((u32)multicast_addr->dmi_addr[2]) << 24;
- macaddr_low += ((u32)multicast_addr->dmi_addr[3]) << 16;
- macaddr_low += ((u32)multicast_addr->dmi_addr[4]) << 8;
- macaddr_low += (u32)multicast_addr->dmi_addr[5];
+ macaddr_low = ((u32)multicast_addr->dmi_addr[2]) << 24;
+ macaddr_low += ((u32)multicast_addr->dmi_addr[3]) << 16;
+ macaddr_low += ((u32)multicast_addr->dmi_addr[4]) << 8;
+ macaddr_low += (u32)multicast_addr->dmi_addr[5];
nes_write_indexed(nesdev,
perfect_filter_register_address+(mc_index * 8),
macaddr_low);
@@ -910,23 +912,23 @@ static void nes_netdev_set_multicast_list(struct net_device *netdev)
/**
* nes_netdev_change_mtu
*/
-static int nes_netdev_change_mtu(struct net_device *netdev, int new_mtu)
+static int nes_netdev_change_mtu(struct net_device *netdev, int new_mtu)
{
struct nes_vnic *nesvnic = netdev_priv(netdev);
- struct nes_device *nesdev = nesvnic->nesdev;
- int ret = 0;
- u8 jumbomode=0;
+ struct nes_device *nesdev = nesvnic->nesdev;
+ int ret = 0;
+ u8 jumbomode = 0;
- if ((new_mtu < ETH_ZLEN) || (new_mtu > max_mtu))
+ if ((new_mtu < ETH_ZLEN) || (new_mtu > max_mtu))
return -EINVAL;
- netdev->mtu = new_mtu;
+ netdev->mtu = new_mtu;
nesvnic->max_frame_size = new_mtu + VLAN_ETH_HLEN;
if (netdev->mtu > 1500) {
jumbomode=1;
}
- nes_nic_init_timer_defaults(nesdev, jumbomode);
+ nes_nic_init_timer_defaults(nesdev, jumbomode);
if (netif_running(netdev)) {
nes_netdev_stop(netdev);
@@ -936,8 +938,7 @@ static int nes_netdev_change_mtu(struct net_device *netdev, int new_mtu)
return ret;
}
-#define NES_ETHTOOL_STAT_COUNT 55
-static const char nes_ethtool_stringset[NES_ETHTOOL_STAT_COUNT][ETH_GSTRING_LEN] = {
+static const char nes_ethtool_stringset[][ETH_GSTRING_LEN] = {
"Link Change Interrupts",
"Linearized SKBs",
"T/GSO Requests",
@@ -993,8 +994,12 @@ static const char nes_ethtool_stringset[NES_ETHTOOL_STAT_COUNT][ETH_GSTRING_LEN]
"CQ Depth 32",
"CQ Depth 128",
"CQ Depth 256",
+ "LRO aggregated",
+ "LRO flushed",
+ "LRO no_desc",
};
+#define NES_ETHTOOL_STAT_COUNT ARRAY_SIZE(nes_ethtool_stringset)
/**
* nes_netdev_get_rx_csum
@@ -1189,6 +1194,9 @@ static void nes_netdev_get_ethtool_stats(struct net_device *netdev,
target_stat_values[52] = int_mod_cq_depth_32;
target_stat_values[53] = int_mod_cq_depth_128;
target_stat_values[54] = int_mod_cq_depth_256;
+ target_stat_values[55] = nesvnic->lro_mgr.stats.aggregated;
+ target_stat_values[56] = nesvnic->lro_mgr.stats.flushed;
+ target_stat_values[57] = nesvnic->lro_mgr.stats.no_desc;
}
@@ -1219,14 +1227,14 @@ static int nes_netdev_set_coalesce(struct net_device *netdev,
struct ethtool_coalesce *et_coalesce)
{
struct nes_vnic *nesvnic = netdev_priv(netdev);
- struct nes_device *nesdev = nesvnic->nesdev;
+ struct nes_device *nesdev = nesvnic->nesdev;
struct nes_adapter *nesadapter = nesdev->nesadapter;
struct nes_hw_tune_timer *shared_timer = &nesadapter->tune_timer;
unsigned long flags;
- spin_lock_irqsave(&nesadapter->periodic_timer_lock, flags);
+ spin_lock_irqsave(&nesadapter->periodic_timer_lock, flags);
if (et_coalesce->rx_max_coalesced_frames_low) {
- shared_timer->threshold_low = et_coalesce->rx_max_coalesced_frames_low;
+ shared_timer->threshold_low = et_coalesce->rx_max_coalesced_frames_low;
}
if (et_coalesce->rx_max_coalesced_frames_irq) {
shared_timer->threshold_target = et_coalesce->rx_max_coalesced_frames_irq;
@@ -1246,14 +1254,14 @@ static int nes_netdev_set_coalesce(struct net_device *netdev,
nesadapter->et_rx_coalesce_usecs_irq = et_coalesce->rx_coalesce_usecs_irq;
if (et_coalesce->use_adaptive_rx_coalesce) {
nesadapter->et_use_adaptive_rx_coalesce = 1;
- nesadapter->timer_int_limit = NES_TIMER_INT_LIMIT_DYNAMIC;
+ nesadapter->timer_int_limit = NES_TIMER_INT_LIMIT_DYNAMIC;
nesadapter->et_rx_coalesce_usecs_irq = 0;
if (et_coalesce->pkt_rate_low) {
- nesadapter->et_pkt_rate_low = et_coalesce->pkt_rate_low;
+ nesadapter->et_pkt_rate_low = et_coalesce->pkt_rate_low;
}
} else {
nesadapter->et_use_adaptive_rx_coalesce = 0;
- nesadapter->timer_int_limit = NES_TIMER_INT_LIMIT;
+ nesadapter->timer_int_limit = NES_TIMER_INT_LIMIT;
if (nesadapter->et_rx_coalesce_usecs_irq) {
nes_write32(nesdev->regs+NES_PERIODIC_CONTROL,
0x80000000 | ((u32)(nesadapter->et_rx_coalesce_usecs_irq*8)));
@@ -1270,28 +1278,28 @@ static int nes_netdev_get_coalesce(struct net_device *netdev,
struct ethtool_coalesce *et_coalesce)
{
struct nes_vnic *nesvnic = netdev_priv(netdev);
- struct nes_device *nesdev = nesvnic->nesdev;
+ struct nes_device *nesdev = nesvnic->nesdev;
struct nes_adapter *nesadapter = nesdev->nesadapter;
struct ethtool_coalesce temp_et_coalesce;
struct nes_hw_tune_timer *shared_timer = &nesadapter->tune_timer;
unsigned long flags;
memset(&temp_et_coalesce, 0, sizeof(temp_et_coalesce));
- temp_et_coalesce.rx_coalesce_usecs_irq = nesadapter->et_rx_coalesce_usecs_irq;
- temp_et_coalesce.use_adaptive_rx_coalesce = nesadapter->et_use_adaptive_rx_coalesce;
- temp_et_coalesce.rate_sample_interval = nesadapter->et_rate_sample_interval;
+ temp_et_coalesce.rx_coalesce_usecs_irq = nesadapter->et_rx_coalesce_usecs_irq;
+ temp_et_coalesce.use_adaptive_rx_coalesce = nesadapter->et_use_adaptive_rx_coalesce;
+ temp_et_coalesce.rate_sample_interval = nesadapter->et_rate_sample_interval;
temp_et_coalesce.pkt_rate_low = nesadapter->et_pkt_rate_low;
spin_lock_irqsave(&nesadapter->periodic_timer_lock, flags);
- temp_et_coalesce.rx_max_coalesced_frames_low = shared_timer->threshold_low;
- temp_et_coalesce.rx_max_coalesced_frames_irq = shared_timer->threshold_target;
+ temp_et_coalesce.rx_max_coalesced_frames_low = shared_timer->threshold_low;
+ temp_et_coalesce.rx_max_coalesced_frames_irq = shared_timer->threshold_target;
temp_et_coalesce.rx_max_coalesced_frames_high = shared_timer->threshold_high;
- temp_et_coalesce.rx_coalesce_usecs_low = shared_timer->timer_in_use_min;
+ temp_et_coalesce.rx_coalesce_usecs_low = shared_timer->timer_in_use_min;
temp_et_coalesce.rx_coalesce_usecs_high = shared_timer->timer_in_use_max;
if (nesadapter->et_use_adaptive_rx_coalesce) {
temp_et_coalesce.rx_coalesce_usecs_irq = shared_timer->timer_in_use;
}
spin_unlock_irqrestore(&nesadapter->periodic_timer_lock, flags);
- memcpy(et_coalesce, &temp_et_coalesce, sizeof(*et_coalesce));
+ memcpy(et_coalesce, &temp_et_coalesce, sizeof(*et_coalesce));
return 0;
}
@@ -1370,30 +1378,38 @@ static int nes_netdev_get_settings(struct net_device *netdev, struct ethtool_cmd
u16 phy_data;
et_cmd->duplex = DUPLEX_FULL;
- et_cmd->port = PORT_MII;
+ et_cmd->port = PORT_MII;
+
if (nesadapter->OneG_Mode) {
- et_cmd->supported = SUPPORTED_1000baseT_Full|SUPPORTED_Autoneg;
- et_cmd->advertising = ADVERTISED_1000baseT_Full|ADVERTISED_Autoneg;
et_cmd->speed = SPEED_1000;
- nes_read_1G_phy_reg(nesdev, 0, nesadapter->phy_index[nesdev->mac_index],
- &phy_data);
- if (phy_data&0x1000) {
- et_cmd->autoneg = AUTONEG_ENABLE;
+ if (nesadapter->phy_type[nesdev->mac_index] == NES_PHY_TYPE_PUMA_1G) {
+ et_cmd->supported = SUPPORTED_1000baseT_Full;
+ et_cmd->advertising = ADVERTISED_1000baseT_Full;
+ et_cmd->autoneg = AUTONEG_DISABLE;
+ et_cmd->transceiver = XCVR_INTERNAL;
+ et_cmd->phy_address = nesdev->mac_index;
} else {
- et_cmd->autoneg = AUTONEG_DISABLE;
+ et_cmd->supported = SUPPORTED_1000baseT_Full | SUPPORTED_Autoneg;
+ et_cmd->advertising = ADVERTISED_1000baseT_Full | ADVERTISED_Autoneg;
+ nes_read_1G_phy_reg(nesdev, 0, nesadapter->phy_index[nesdev->mac_index], &phy_data);
+ if (phy_data & 0x1000)
+ et_cmd->autoneg = AUTONEG_ENABLE;
+ else
+ et_cmd->autoneg = AUTONEG_DISABLE;
+ et_cmd->transceiver = XCVR_EXTERNAL;
+ et_cmd->phy_address = nesadapter->phy_index[nesdev->mac_index];
}
- et_cmd->transceiver = XCVR_EXTERNAL;
- et_cmd->phy_address = nesadapter->phy_index[nesdev->mac_index];
} else {
- if (nesadapter->phy_type[nesvnic->logical_port] == NES_PHY_TYPE_IRIS) {
+ if ((nesadapter->phy_type[nesdev->mac_index] == NES_PHY_TYPE_IRIS) ||
+ (nesadapter->phy_type[nesdev->mac_index] == NES_PHY_TYPE_ARGUS)) {
et_cmd->transceiver = XCVR_EXTERNAL;
- et_cmd->port = PORT_FIBRE;
- et_cmd->supported = SUPPORTED_FIBRE;
+ et_cmd->port = PORT_FIBRE;
+ et_cmd->supported = SUPPORTED_FIBRE;
et_cmd->advertising = ADVERTISED_FIBRE;
et_cmd->phy_address = nesadapter->phy_index[nesdev->mac_index];
} else {
et_cmd->transceiver = XCVR_INTERNAL;
- et_cmd->supported = SUPPORTED_10000baseT_Full;
+ et_cmd->supported = SUPPORTED_10000baseT_Full;
et_cmd->advertising = ADVERTISED_10000baseT_Full;
et_cmd->phy_address = nesdev->mac_index;
}
@@ -1416,14 +1432,15 @@ static int nes_netdev_set_settings(struct net_device *netdev, struct ethtool_cmd
struct nes_adapter *nesadapter = nesdev->nesadapter;
u16 phy_data;
- if (nesadapter->OneG_Mode) {
+ if ((nesadapter->OneG_Mode) &&
+ (nesadapter->phy_type[nesdev->mac_index] != NES_PHY_TYPE_PUMA_1G)) {
nes_read_1G_phy_reg(nesdev, 0, nesadapter->phy_index[nesdev->mac_index],
&phy_data);
if (et_cmd->autoneg) {
/* Turn on Full duplex, Autoneg, and restart autonegotiation */
phy_data |= 0x1300;
} else {
- // Turn off autoneg
+ /* Turn off autoneg */
phy_data &= ~0x1000;
}
nes_write_1G_phy_reg(nesdev, 0, nesadapter->phy_index[nesdev->mac_index],
@@ -1454,6 +1471,8 @@ static struct ethtool_ops nes_ethtool_ops = {
.set_sg = ethtool_op_set_sg,
.get_tso = ethtool_op_get_tso,
.set_tso = ethtool_op_set_tso,
+ .get_flags = ethtool_op_get_flags,
+ .set_flags = ethtool_op_set_flags,
};
@@ -1607,27 +1626,34 @@ struct net_device *nes_netdev_init(struct nes_device *nesdev,
list_add_tail(&nesvnic->list, &nesdev->nesadapter->nesvnic_list[nesdev->mac_index]);
if ((nesdev->netdev_count == 0) &&
- (PCI_FUNC(nesdev->pcidev->devfn) == nesdev->mac_index)) {
- nes_debug(NES_DBG_INIT, "Setting up PHY interrupt mask. Using register index 0x%04X\n",
- NES_IDX_PHY_PCS_CONTROL_STATUS0+(0x200*(nesvnic->logical_port&1)));
+ ((PCI_FUNC(nesdev->pcidev->devfn) == nesdev->mac_index) ||
+ ((nesdev->nesadapter->phy_type[nesdev->mac_index] == NES_PHY_TYPE_PUMA_1G) &&
+ (((PCI_FUNC(nesdev->pcidev->devfn) == 1) && (nesdev->mac_index == 2)) ||
+ ((PCI_FUNC(nesdev->pcidev->devfn) == 2) && (nesdev->mac_index == 1)))))) {
+ /*
+ * nes_debug(NES_DBG_INIT, "Setting up PHY interrupt mask. Using register index 0x%04X\n",
+ * NES_IDX_PHY_PCS_CONTROL_STATUS0 + (0x200 * (nesvnic->logical_port & 1)));
+ */
u32temp = nes_read_indexed(nesdev, NES_IDX_PHY_PCS_CONTROL_STATUS0 +
- (0x200*(nesvnic->logical_port&1)));
- u32temp |= 0x00200000;
- nes_write_indexed(nesdev, NES_IDX_PHY_PCS_CONTROL_STATUS0 +
- (0x200*(nesvnic->logical_port&1)), u32temp);
+ (0x200 * (nesdev->mac_index & 1)));
+ if (nesdev->nesadapter->phy_type[nesdev->mac_index] != NES_PHY_TYPE_PUMA_1G) {
+ u32temp |= 0x00200000;
+ nes_write_indexed(nesdev, NES_IDX_PHY_PCS_CONTROL_STATUS0 +
+ (0x200 * (nesdev->mac_index & 1)), u32temp);
+ }
+
u32temp = nes_read_indexed(nesdev, NES_IDX_PHY_PCS_CONTROL_STATUS0 +
- (0x200*(nesvnic->logical_port&1)) );
+ (0x200 * (nesdev->mac_index & 1)));
+
if ((u32temp&0x0f1f0000) == 0x0f0f0000) {
- if (nesdev->nesadapter->phy_type[nesvnic->logical_port] == NES_PHY_TYPE_IRIS) {
+ if (nesdev->nesadapter->phy_type[nesdev->mac_index] == NES_PHY_TYPE_IRIS) {
nes_init_phy(nesdev);
- nes_read_10G_phy_reg(nesdev, 1,
- nesdev->nesadapter->phy_index[nesvnic->logical_port]);
+ nes_read_10G_phy_reg(nesdev, nesdev->nesadapter->phy_index[nesdev->mac_index], 1, 1);
temp_phy_data = (u16)nes_read_indexed(nesdev,
NES_IDX_MAC_MDIO_CONTROL);
u32temp = 20;
do {
- nes_read_10G_phy_reg(nesdev, 1,
- nesdev->nesadapter->phy_index[nesvnic->logical_port]);
+ nes_read_10G_phy_reg(nesdev, nesdev->nesadapter->phy_index[nesdev->mac_index], 1, 1);
phy_data = (u16)nes_read_indexed(nesdev,
NES_IDX_MAC_MDIO_CONTROL);
if ((phy_data == temp_phy_data) || (!(--u32temp)))
@@ -1644,6 +1670,14 @@ struct net_device *nes_netdev_init(struct nes_device *nesdev,
nes_debug(NES_DBG_INIT, "The Link is UP!!.\n");
nesvnic->linkup = 1;
}
+ } else if (nesdev->nesadapter->phy_type[nesdev->mac_index] == NES_PHY_TYPE_PUMA_1G) {
+ nes_debug(NES_DBG_INIT, "mac_index=%d, logical_port=%d, u32temp=0x%04X, PCI_FUNC=%d\n",
+ nesdev->mac_index, nesvnic->logical_port, u32temp, PCI_FUNC(nesdev->pcidev->devfn));
+ if (((nesdev->mac_index < 2) && ((u32temp&0x01010000) == 0x01010000)) ||
+ ((nesdev->mac_index > 1) && ((u32temp&0x02020000) == 0x02020000))) {
+ nes_debug(NES_DBG_INIT, "The Link is UP!!.\n");
+ nesvnic->linkup = 1;
+ }
}
/* clear the MAC interrupt status, assumes direct logical to physical mapping */
u32temp = nes_read_indexed(nesdev, NES_IDX_MAC_INT_STATUS + (0x200 * nesdev->mac_index));
diff --git a/drivers/infiniband/hw/nes/nes_utils.c b/drivers/infiniband/hw/nes/nes_utils.c
index c6d5631a699..fe83d1b2b17 100644
--- a/drivers/infiniband/hw/nes/nes_utils.c
+++ b/drivers/infiniband/hw/nes/nes_utils.c
@@ -444,15 +444,13 @@ void nes_read_1G_phy_reg(struct nes_device *nesdev, u8 phy_reg, u8 phy_addr, u16
/**
* nes_write_10G_phy_reg
*/
-void nes_write_10G_phy_reg(struct nes_device *nesdev, u16 phy_reg,
- u8 phy_addr, u16 data)
+void nes_write_10G_phy_reg(struct nes_device *nesdev, u16 phy_addr, u8 dev_addr, u16 phy_reg,
+ u16 data)
{
- u32 dev_addr;
u32 port_addr;
u32 u32temp;
u32 counter;
- dev_addr = 1;
port_addr = phy_addr;
/* set address */
@@ -492,14 +490,12 @@ void nes_write_10G_phy_reg(struct nes_device *nesdev, u16 phy_reg,
* This routine only issues the read, the data must be read
* separately.
*/
-void nes_read_10G_phy_reg(struct nes_device *nesdev, u16 phy_reg, u8 phy_addr)
+void nes_read_10G_phy_reg(struct nes_device *nesdev, u8 phy_addr, u8 dev_addr, u16 phy_reg)
{
- u32 dev_addr;
u32 port_addr;
u32 u32temp;
u32 counter;
- dev_addr = 1;
port_addr = phy_addr;
/* set address */
diff --git a/drivers/infiniband/hw/nes/nes_verbs.c b/drivers/infiniband/hw/nes/nes_verbs.c
index ee74f7c7a6d..99b3c4ae86e 100644
--- a/drivers/infiniband/hw/nes/nes_verbs.c
+++ b/drivers/infiniband/hw/nes/nes_verbs.c
@@ -1266,7 +1266,7 @@ static struct ib_qp *nes_create_qp(struct ib_pd *ibpd,
sq_size = init_attr->cap.max_send_wr;
rq_size = init_attr->cap.max_recv_wr;
- // check if the encoded sizes are OK or not...
+ /* check if the encoded sizes are OK or not... */
sq_encoded_size = nes_get_encoded_size(&sq_size);
rq_encoded_size = nes_get_encoded_size(&rq_size);
@@ -2377,7 +2377,7 @@ static struct ib_mr *nes_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
u8 single_page = 1;
u8 stag_key;
- region = ib_umem_get(pd->uobject->context, start, length, acc);
+ region = ib_umem_get(pd->uobject->context, start, length, acc, 0);
if (IS_ERR(region)) {
return (struct ib_mr *)region;
}