From 82d416fffb5e8e39e899be7075dbeeac5fb8f0c2 Mon Sep 17 00:00:00 2001 From: Bryan Rosenburg Date: Wed, 20 Feb 2008 17:31:48 -0600 Subject: RDMA/cxgb3: Fix shift calc in build_phys_page_list() for 1-entry page lists A single entry (addr 0x10001000, size 0x2000) will get converted to page address 0x10000000 with a page size of 0x4000. The code as it stands doesn't address the single buffer case, but in fact it allows the subsequent single-buffer special case to be eliminated entirely. Because the mask now includes the (page adjusted) starting and ending addresses, the general case works for the single buffer case as well. Signed-off-by: Bryan Rosenburg Acked-by: Steve Wise Signed-off-by: Roland Dreier --- drivers/infiniband/hw/cxgb3/iwch_mem.c | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/cxgb3/iwch_mem.c b/drivers/infiniband/hw/cxgb3/iwch_mem.c index 73bfd1656f8..b8797c66676 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_mem.c +++ b/drivers/infiniband/hw/cxgb3/iwch_mem.c @@ -136,14 +136,8 @@ int build_phys_page_list(struct ib_phys_buf *buffer_list, /* Find largest page shift we can use to cover buffers */ for (*shift = PAGE_SHIFT; *shift < 27; ++(*shift)) - if (num_phys_buf > 1) { - if ((1ULL << *shift) & mask) - break; - } else - if (1ULL << *shift >= - buffer_list[0].size + - (buffer_list[0].addr & ((1ULL << *shift) - 1))) - break; + if ((1ULL << *shift) & mask) + break; buffer_list[0].size += buffer_list[0].addr & ((1ULL << *shift) - 1); buffer_list[0].addr &= ~0ull << *shift; -- cgit v1.2.3 From 9300c0c06788a409a97d54bbe3360d2f385fc621 Mon Sep 17 00:00:00 2001 From: Chien Tung Date: Thu, 21 Feb 2008 07:51:17 -0600 Subject: RDMA/nes: Resurrect error path dead code Adrian Bunk pointed out that a Coverity scan found some apparently dead code in nes_verbs.c that really shouldn't have been dead. The function nes_create_cq() was missing the assignment err = 1; just prior to an iteration that conditionally set err = 0 if a PBL was found for a given virtual CQ. I also noticed we should have been returning -EFAULT on a couple related error paths. Signed-off-by: Chien Tung Signed-off-by: Glenn Streiff Signed-off-by: Roland Dreier --- drivers/infiniband/hw/nes/nes_verbs.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/nes/nes_verbs.c b/drivers/infiniband/hw/nes/nes_verbs.c index 4dafbe16e82..201b95ee23c 100644 --- a/drivers/infiniband/hw/nes/nes_verbs.c +++ b/drivers/infiniband/hw/nes/nes_verbs.c @@ -1327,7 +1327,7 @@ static struct ib_qp *nes_create_qp(struct ib_pd *ibpd, (long long unsigned int)req.user_wqe_buffers); nes_free_resource(nesadapter, nesadapter->allocated_qps, qp_num); kfree(nesqp->allocated_buffer); - return ERR_PTR(-ENOMEM); + return ERR_PTR(-EFAULT); } } @@ -1674,6 +1674,7 @@ static struct ib_cq *nes_create_cq(struct ib_device *ibdev, int entries, } nes_debug(NES_DBG_CQ, "CQ Virtual Address = %08lX, size = %u.\n", (unsigned long)req.user_cq_buffer, entries); + err = 1; list_for_each_entry(nespbl, &nes_ucontext->cq_reg_mem_list, list) { if (nespbl->user_base == (unsigned long )req.user_cq_buffer) { list_del(&nespbl->list); @@ -1686,7 +1687,7 @@ static struct ib_cq *nes_create_cq(struct ib_device *ibdev, int entries, if (err) { nes_free_resource(nesadapter, nesadapter->allocated_cqs, cq_num); kfree(nescq); - return ERR_PTR(err); + return ERR_PTR(-EFAULT); } pbl_entries = nespbl->pbl_size >> 3; -- cgit v1.2.3 From 65b07ec29354b345ff93914d064c2467aef4c862 Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Thu, 21 Feb 2008 08:01:18 -0600 Subject: RDMA/nes: Fix off-by-one Fix an off-by-one spotted by the Coverity checker. Signed-off-by: Adrian Bunk Signed-off-by: Glenn Streiff Signed-off-by: Roland Dreier --- drivers/infiniband/hw/nes/nes_verbs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/nes/nes_verbs.c b/drivers/infiniband/hw/nes/nes_verbs.c index 201b95ee23c..692f0d82130 100644 --- a/drivers/infiniband/hw/nes/nes_verbs.c +++ b/drivers/infiniband/hw/nes/nes_verbs.c @@ -929,7 +929,7 @@ static struct ib_pd *nes_alloc_pd(struct ib_device *ibdev, NES_MAX_USER_DB_REGIONS, nesucontext->first_free_db); nes_debug(NES_DBG_PD, "find_first_zero_biton doorbells returned %u, mapping pd_id %u.\n", nespd->mmap_db_index, nespd->pd_id); - if (nespd->mmap_db_index > NES_MAX_USER_DB_REGIONS) { + if (nespd->mmap_db_index >= NES_MAX_USER_DB_REGIONS) { nes_debug(NES_DBG_PD, "mmap_db_index > MAX\n"); nes_free_resource(nesadapter, nesadapter->allocated_pds, pd_num); kfree(nespd); -- cgit v1.2.3 From ed0ba33d64fb933f5fd985aa8f641984efd9d658 Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Thu, 21 Feb 2008 08:12:06 -0600 Subject: RDMA/nes: Fix a memory leak in schedule_nes_timer() Fix a memory leak spotted by the Coverity checker. Signed-off-by: Adrian Bunk Signed-off-by: Glenn Streiff Signed-off-by: Roland Dreier --- drivers/infiniband/hw/nes/nes_cm.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/nes/nes_cm.c b/drivers/infiniband/hw/nes/nes_cm.c index bd5cfeaac20..78e845c9457 100644 --- a/drivers/infiniband/hw/nes/nes_cm.c +++ b/drivers/infiniband/hw/nes/nes_cm.c @@ -370,11 +370,11 @@ int schedule_nes_timer(struct nes_cm_node *cm_node, struct sk_buff *skb, int ret = 0; u32 was_timer_set; + if (!cm_node) + return -EINVAL; new_send = kzalloc(sizeof(*new_send), GFP_ATOMIC); if (!new_send) return -1; - if (!cm_node) - return -EINVAL; /* new_send->timetosend = currenttime */ new_send->retrycount = NES_DEFAULT_RETRYS; -- cgit v1.2.3 From a4435febd4c0f14b25159dca249ecf91301c7c76 Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Thu, 21 Feb 2008 08:13:47 -0600 Subject: RDMA/nes: Fix a check-after-use in nes_probe() Fix a check-after-use spotted by the Coverity checker. Signed-off-by: Adrian Bunk Signed-off-by: Glenn Streiff Signed-off-by: Roland Dreier --- drivers/infiniband/hw/nes/nes.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/nes/nes.c b/drivers/infiniband/hw/nes/nes.c index 7f8853b44ee..b2112f5a422 100644 --- a/drivers/infiniband/hw/nes/nes.c +++ b/drivers/infiniband/hw/nes/nes.c @@ -567,12 +567,12 @@ static int __devinit nes_probe(struct pci_dev *pcidev, const struct pci_device_i /* Init the adapter */ nesdev->nesadapter = nes_init_adapter(nesdev, hw_rev); - nesdev->nesadapter->et_rx_coalesce_usecs_irq = interrupt_mod_interval; if (!nesdev->nesadapter) { printk(KERN_ERR PFX "Unable to initialize adapter.\n"); ret = -ENOMEM; goto bail5; } + nesdev->nesadapter->et_rx_coalesce_usecs_irq = interrupt_mod_interval; /* nesdev->base_doorbell_index = nesdev->nesadapter->pd_config_base[PCI_FUNC(nesdev->pcidev->devfn)]; */ -- cgit v1.2.3 From f84fba6f969065c6622669bbaa955c26fc1461ae Mon Sep 17 00:00:00 2001 From: Glenn Streiff Date: Thu, 21 Feb 2008 08:17:54 -0600 Subject: RDMA/nes: Fix use-after-free in nes_create_cq() Just delete the debugging statement so we don't use cqp_request after freeing it. Adrian Bunk flagged this use-after-free issue spotted by the Coverity checker. Signed-off-by: Glenn Streiff Signed-off-by: Roland Dreier --- drivers/infiniband/hw/nes/nes_verbs.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/nes/nes_verbs.c b/drivers/infiniband/hw/nes/nes_verbs.c index 692f0d82130..a651e9d9f0e 100644 --- a/drivers/infiniband/hw/nes/nes_verbs.c +++ b/drivers/infiniband/hw/nes/nes_verbs.c @@ -1832,9 +1832,6 @@ static struct ib_cq *nes_create_cq(struct ib_device *ibdev, int entries, spin_unlock_irqrestore(&nesdev->cqp.lock, flags); } } - nes_debug(NES_DBG_CQ, "iWARP CQ%u create timeout expired, major code = 0x%04X," - " minor code = 0x%04X\n", - nescq->hw_cq.cq_number, cqp_request->major_code, cqp_request->minor_code); if (!context) pci_free_consistent(nesdev->pcidev, nescq->cq_mem_size, mem, nescq->hw_cq.cq_pbase); -- cgit v1.2.3 From a2e9c384ce76993cd68d6de57eaa81985b4618e3 Mon Sep 17 00:00:00 2001 From: Faisal Latif Date: Thu, 21 Feb 2008 08:27:32 -0600 Subject: RDMA/nes: Fix use-after-free in mini_cm_dec_refcnt_listen() Fix use-after-free spotted by Coverity checker flagged by Adrian Bunk. Signed-off-by: Faisal Latif Signed-off-by: Glenn Streiff Signed-off-by: Roland Dreier --- drivers/infiniband/hw/nes/nes_cm.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/nes/nes_cm.c b/drivers/infiniband/hw/nes/nes_cm.c index 78e845c9457..6c298aa9ab0 100644 --- a/drivers/infiniband/hw/nes/nes_cm.c +++ b/drivers/infiniband/hw/nes/nes_cm.c @@ -947,6 +947,7 @@ static int mini_cm_dec_refcnt_listen(struct nes_cm_core *cm_core, nes_debug(NES_DBG_CM, "destroying listener (%p)\n", listener); kfree(listener); + listener = NULL; ret = 0; cm_listens_destroyed++; } else { -- cgit v1.2.3 From 30da7cff87f0ffa169fe07b766c3d6a5f6d1f6ab Mon Sep 17 00:00:00 2001 From: Faisal Latif Date: Thu, 21 Feb 2008 08:31:22 -0600 Subject: RDMA/nes: Fix CRC endianness for RDMA connection establishment on big-endian With commit ef19454b ("[LIB] crc32c: Keep intermediate crc state in cpu order"), the behavior of crc32c changes on big-endian platforms. Our algorithm expects the previous behavior; otherwise we have RDMA connection establishment failure on big-endian platforms like powerpc. Apply cpu_to_le32() to value returned by crc32c() to get the previous behavior. Signed-off-by: Faisal Latif Signed-off-by: Glenn Streiff Signed-off-by: Roland Dreier --- drivers/infiniband/hw/nes/nes.h | 15 +++++++++++++++ drivers/infiniband/hw/nes/nes_cm.c | 10 ++++++---- 2 files changed, 21 insertions(+), 4 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/nes/nes.h b/drivers/infiniband/hw/nes/nes.h index fd57e8a1582..a48b288618e 100644 --- a/drivers/infiniband/hw/nes/nes.h +++ b/drivers/infiniband/hw/nes/nes.h @@ -285,6 +285,21 @@ struct nes_device { }; +static inline __le32 get_crc_value(struct nes_v4_quad *nes_quad) +{ + u32 crc_value; + crc_value = crc32c(~0, (void *)nes_quad, sizeof (struct nes_v4_quad)); + + /* + * With commit ef19454b ("[LIB] crc32c: Keep intermediate crc + * state in cpu order"), behavior of crc32c changes on + * big-endian platforms. Our algorithm expects the previous + * behavior; otherwise we have RDMA connection establishment + * issue on big-endian. + */ + return cpu_to_le32(crc_value); +} + static inline void set_wqe_64bit_value(__le32 *wqe_words, u32 index, u64 value) { diff --git a/drivers/infiniband/hw/nes/nes_cm.c b/drivers/infiniband/hw/nes/nes_cm.c index 6c298aa9ab0..39adb267fb1 100644 --- a/drivers/infiniband/hw/nes/nes_cm.c +++ b/drivers/infiniband/hw/nes/nes_cm.c @@ -2320,6 +2320,7 @@ int nes_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) struct iw_cm_event cm_event; struct nes_hw_qp_wqe *wqe; struct nes_v4_quad nes_quad; + u32 crc_value; int ret; ibqp = nes_get_qp(cm_id->device, conn_param->qpn); @@ -2436,8 +2437,8 @@ int nes_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) nes_quad.TcpPorts[1] = cm_id->local_addr.sin_port; /* Produce hash key */ - nesqp->hte_index = cpu_to_be32( - crc32c(~0, (void *)&nes_quad, sizeof(nes_quad)) ^ 0xffffffff); + crc_value = get_crc_value(&nes_quad); + nesqp->hte_index = cpu_to_be32(crc_value ^ 0xffffffff); nes_debug(NES_DBG_CM, "HTE Index = 0x%08X, CRC = 0x%08X\n", nesqp->hte_index, nesqp->hte_index & adapter->hte_index_mask); @@ -2751,6 +2752,7 @@ void cm_event_connected(struct nes_cm_event *event) struct iw_cm_event cm_event; struct nes_hw_qp_wqe *wqe; struct nes_v4_quad nes_quad; + u32 crc_value; int ret; /* get all our handles */ @@ -2828,8 +2830,8 @@ void cm_event_connected(struct nes_cm_event *event) nes_quad.TcpPorts[1] = cm_id->local_addr.sin_port; /* Produce hash key */ - nesqp->hte_index = cpu_to_be32( - crc32c(~0, (void *)&nes_quad, sizeof(nes_quad)) ^ 0xffffffff); + crc_value = get_crc_value(&nes_quad); + nesqp->hte_index = cpu_to_be32(crc_value ^ 0xffffffff); nes_debug(NES_DBG_CM, "HTE Index = 0x%08X, After CRC = 0x%08X\n", nesqp->hte_index, nesqp->hte_index & nesadapter->hte_index_mask); -- cgit v1.2.3 From 4b1cc7e7ca5715907d17619dcb49144db6efe1c9 Mon Sep 17 00:00:00 2001 From: John Lacombe Date: Thu, 21 Feb 2008 08:34:58 -0600 Subject: RDMA/nes: Fix interrupt moderation low threshold Interrupt moderation low threshold value was incorrectly triggering, indicating that the threshold should be lowered. The impact was the timer was likely to become 40usecs and get stuck there. The biggest side effect was too many interrupts and nonoptimal performance. Signed-off-by: John Lacombe Signed-off-by: Glenn Streiff Signed-off-by: Roland Dreier --- drivers/infiniband/hw/nes/nes_hw.c | 13 +++++-------- drivers/infiniband/hw/nes/nes_hw.h | 2 +- 2 files changed, 6 insertions(+), 9 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/nes/nes_hw.c b/drivers/infiniband/hw/nes/nes_hw.c index 7c4c0fbf0ab..49e53e4c1eb 100644 --- a/drivers/infiniband/hw/nes/nes_hw.c +++ b/drivers/infiniband/hw/nes/nes_hw.c @@ -156,15 +156,14 @@ static void nes_nic_tune_timer(struct nes_device *nesdev) spin_lock_irqsave(&nesadapter->periodic_timer_lock, flags); - if (shared_timer->cq_count_old < cq_count) { - if (cq_count > shared_timer->threshold_low) - shared_timer->cq_direction_downward=0; - } - if (shared_timer->cq_count_old >= cq_count) + if (shared_timer->cq_count_old <= cq_count) + shared_timer->cq_direction_downward = 0; + else shared_timer->cq_direction_downward++; shared_timer->cq_count_old = cq_count; if (shared_timer->cq_direction_downward > NES_NIC_CQ_DOWNWARD_TREND) { - if (cq_count <= shared_timer->threshold_low) { + if (cq_count <= shared_timer->threshold_low && + shared_timer->threshold_low > 4) { shared_timer->threshold_low = shared_timer->threshold_low/2; shared_timer->cq_direction_downward=0; nesdev->currcq_count = 0; @@ -1728,7 +1727,6 @@ int nes_napi_isr(struct nes_device *nesdev) nesdev->int_req &= ~NES_INT_TIMER; nes_write32(nesdev->regs+NES_INTF_INT_MASK, ~(nesdev->intf_int_req)); nes_write32(nesdev->regs+NES_INT_MASK, ~nesdev->int_req); - nesadapter->tune_timer.timer_in_use_old = 0; } nesdev->deepcq_count = 0; return 1; @@ -1867,7 +1865,6 @@ void nes_dpc(unsigned long param) nesdev->int_req &= ~NES_INT_TIMER; nes_write32(nesdev->regs + NES_INTF_INT_MASK, ~(nesdev->intf_int_req)); nes_write32(nesdev->regs+NES_INT_MASK, ~nesdev->int_req); - nesdev->nesadapter->tune_timer.timer_in_use_old = 0; } else { nes_write32(nesdev->regs+NES_INT_MASK, 0x0000ffff|(~nesdev->int_req)); } diff --git a/drivers/infiniband/hw/nes/nes_hw.h b/drivers/infiniband/hw/nes/nes_hw.h index 1e10df550c9..b7e2844f096 100644 --- a/drivers/infiniband/hw/nes/nes_hw.h +++ b/drivers/infiniband/hw/nes/nes_hw.h @@ -962,7 +962,7 @@ struct nes_arp_entry { #define DEFAULT_JUMBO_NES_QL_LOW 12 #define DEFAULT_JUMBO_NES_QL_TARGET 40 #define DEFAULT_JUMBO_NES_QL_HIGH 128 -#define NES_NIC_CQ_DOWNWARD_TREND 8 +#define NES_NIC_CQ_DOWNWARD_TREND 16 struct nes_hw_tune_timer { //u16 cq_count; -- cgit v1.2.3 From 84ba284cd78c130818e2de53150f39b92504593b Mon Sep 17 00:00:00 2001 From: Sean Hefty Date: Fri, 22 Feb 2008 10:40:45 -0800 Subject: IB/cm: Flush workqueue when removing device When a CM MAD is received, it is queued to a CM workqueue for processing. The queued work item references the port and device on which the MAD was received. If that device is removed from the system before the work item can execute, the work item will reference freed memory. To fix this, flush the workqueue after unregistering to receive MAD, and before the device is be freed. Signed-off-by: Sean Hefty Signed-off-by: Roland Dreier --- drivers/infiniband/core/cm.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c index b10ade92efe..4df40515708 100644 --- a/drivers/infiniband/core/cm.c +++ b/drivers/infiniband/core/cm.c @@ -3759,6 +3759,7 @@ static void cm_remove_one(struct ib_device *device) port = cm_dev->port[i-1]; ib_modify_port(device, port->port_num, 0, &port_modify); ib_unregister_mad_agent(port->mad_agent); + flush_workqueue(cm.wq); cm_remove_port_fs(port); } kobject_put(&cm_dev->dev_obj); @@ -3813,6 +3814,7 @@ static void __exit ib_cm_cleanup(void) cancel_delayed_work(&timewait_info->work.work); spin_unlock_irq(&cm.lock); + ib_unregister_client(&cm_client); destroy_workqueue(cm.wq); list_for_each_entry_safe(timewait_info, tmp, &cm.timewait_list, list) { @@ -3820,7 +3822,6 @@ static void __exit ib_cm_cleanup(void) kfree(timewait_info); } - ib_unregister_client(&cm_client); class_unregister(&cm_class); idr_destroy(&cm.local_id_table); } -- cgit v1.2.3 From 35fb5340e3de5dff86923eb0cded748c3a6e05e7 Mon Sep 17 00:00:00 2001 From: Pete Wyckoff Date: Tue, 26 Feb 2008 13:27:31 -0500 Subject: Revert "IB/fmr_pool: ib_fmr_pool_flush() should flush all dirty FMRs" This reverts commit a3cd7d9070be417a21905c997ee32d756d999b38. The original commit breaks iSER reliably, making it complain: iser: iser_reg_page_vec:ib_fmr_pool_map_phys failed: -11 The FMR cleanup thread runs ib_fmr_batch_release() as dirty entries build up. This commit causes clean but used FMR entries also to be purged. During that process, another thread can see that there are no free FMRs and fail, even though there should always have been enough available. Signed-off-by: Pete Wyckoff Signed-off-by: Roland Dreier --- drivers/infiniband/core/fmr_pool.c | 21 ++++++--------------- 1 file changed, 6 insertions(+), 15 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/core/fmr_pool.c b/drivers/infiniband/core/fmr_pool.c index 7f00347364f..4044fdf62cc 100644 --- a/drivers/infiniband/core/fmr_pool.c +++ b/drivers/infiniband/core/fmr_pool.c @@ -139,7 +139,7 @@ static inline struct ib_pool_fmr *ib_fmr_cache_lookup(struct ib_fmr_pool *pool, static void ib_fmr_batch_release(struct ib_fmr_pool *pool) { int ret; - struct ib_pool_fmr *fmr, *next; + struct ib_pool_fmr *fmr; LIST_HEAD(unmap_list); LIST_HEAD(fmr_list); @@ -158,20 +158,6 @@ static void ib_fmr_batch_release(struct ib_fmr_pool *pool) #endif } - /* - * The free_list may hold FMRs that have been put there - * because they haven't reached the max_remap count. - * Invalidate their mapping as well. - */ - list_for_each_entry_safe(fmr, next, &pool->free_list, list) { - if (fmr->remap_count == 0) - continue; - hlist_del_init(&fmr->cache_node); - fmr->remap_count = 0; - list_add_tail(&fmr->fmr->list, &fmr_list); - list_move(&fmr->list, &unmap_list); - } - list_splice(&pool->dirty_list, &unmap_list); INIT_LIST_HEAD(&pool->dirty_list); pool->dirty_len = 0; @@ -384,6 +370,11 @@ void ib_destroy_fmr_pool(struct ib_fmr_pool *pool) i = 0; list_for_each_entry_safe(fmr, tmp, &pool->free_list, list) { + if (fmr->remap_count) { + INIT_LIST_HEAD(&fmr_list); + list_add_tail(&fmr->fmr->list, &fmr_list); + ib_unmap_fmr(&fmr_list); + } ib_dealloc_fmr(fmr->fmr); list_del(&fmr->list); kfree(fmr); -- cgit v1.2.3 From 331552925d17ffa2f5c676e282d4fd37c852d9e3 Mon Sep 17 00:00:00 2001 From: Pete Wyckoff Date: Tue, 26 Feb 2008 13:27:53 -0500 Subject: IB/fmr_pool: Flush all dirty FMRs from ib_fmr_pool_flush() Commit a3cd7d90 ("IB/fmr_pool: ib_fmr_pool_flush() should flush all dirty FMRs") caused a regression for iSER and was reverted in e5507736. This change attempts to redo the original patch so that all used FMR entries are flushed when ib_flush_fmr_pool() is called without affecting the normal FMR pool cleaning thread. Simply move used entries from the clean list onto the dirty list in ib_flush_fmr_pool() before letting the cleanup thread do its job. Signed-off-by: Pete Wyckoff Signed-off-by: Roland Dreier --- drivers/infiniband/core/fmr_pool.c | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/core/fmr_pool.c b/drivers/infiniband/core/fmr_pool.c index 4044fdf62cc..06d502c06a4 100644 --- a/drivers/infiniband/core/fmr_pool.c +++ b/drivers/infiniband/core/fmr_pool.c @@ -398,8 +398,23 @@ EXPORT_SYMBOL(ib_destroy_fmr_pool); */ int ib_flush_fmr_pool(struct ib_fmr_pool *pool) { - int serial = atomic_inc_return(&pool->req_ser); + int serial; + struct ib_pool_fmr *fmr, *next; + + /* + * The free_list holds FMRs that may have been used + * but have not been remapped enough times to be dirty. + * Put them on the dirty list now so that the cleanup + * thread will reap them too. + */ + spin_lock_irq(&pool->pool_lock); + list_for_each_entry_safe(fmr, next, &pool->free_list, list) { + if (fmr->remap_count > 0) + list_move(&fmr->list, &pool->dirty_list); + } + spin_unlock_irq(&pool->pool_lock); + serial = atomic_inc_return(&pool->req_ser); wake_up_process(pool->thread); if (wait_event_interruptible(pool->force_wait, -- cgit v1.2.3 From 1bab74e691d3c7845df2342d202c0f1c2344c834 Mon Sep 17 00:00:00 2001 From: Jon Mason Date: Fri, 29 Feb 2008 13:53:18 -0800 Subject: RDMA/cxgb3: Return correct max_inline_data when creating a QP Set cap.max_inline_data to the actual max inline data that the adapter support, so that userspace apps see the right value returned. Signed-off-by: Jon Mason Acked-by: Steve Wise Signed-off-by: Roland Dreier --- drivers/infiniband/hw/cxgb3/iwch_provider.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.c b/drivers/infiniband/hw/cxgb3/iwch_provider.c index df1838f8f94..ee3d63cd1f9 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_provider.c +++ b/drivers/infiniband/hw/cxgb3/iwch_provider.c @@ -819,8 +819,11 @@ static struct ib_qp *iwch_create_qp(struct ib_pd *pd, kfree(qhp); return ERR_PTR(-ENOMEM); } + attrs->cap.max_recv_wr = rqsize - 1; attrs->cap.max_send_wr = sqsize; + attrs->cap.max_inline_data = T3_MAX_INLINE; + qhp->rhp = rhp; qhp->attr.pd = php->pdid; qhp->attr.scq = ((struct iwch_cq *) attrs->send_cq)->cq.cqid; -- cgit v1.2.3 From 4fa45725df0f00c2bf86a0fc2670e88bfe0ceee7 Mon Sep 17 00:00:00 2001 From: Jon Mason Date: Sun, 9 Mar 2008 13:54:12 -0700 Subject: RDMA/cxgb3: Fix iwch_create_cq() off-by-one error The cxbg3 driver is unnecessarily decreasing the number of CQ entries by one when creating a CQ. This will cause the CQ not to have as many entries as requested by the user if the user requests a power of 2 size. Signed-off-by: Jon Mason Acked-by: Steve Wise Signed-off-by: Roland Dreier --- drivers/infiniband/hw/cxgb3/iwch_provider.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.c b/drivers/infiniband/hw/cxgb3/iwch_provider.c index ee3d63cd1f9..b2ea9210467 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_provider.c +++ b/drivers/infiniband/hw/cxgb3/iwch_provider.c @@ -189,7 +189,7 @@ static struct ib_cq *iwch_create_cq(struct ib_device *ibdev, int entries, int ve return ERR_PTR(-ENOMEM); } chp->rhp = rhp; - chp->ibcq.cqe = (1 << chp->cq.size_log2) - 1; + chp->ibcq.cqe = 1 << chp->cq.size_log2; spin_lock_init(&chp->lock); atomic_set(&chp->refcnt, 1); init_waitqueue_head(&chp->wait); -- cgit v1.2.3 From 9a378270c085080b2f38dee6308de4d8413b5141 Mon Sep 17 00:00:00 2001 From: Arne Redlich Date: Tue, 4 Mar 2008 14:07:22 +0200 Subject: IB/iser: Fix list iteration bug The iteration through the list of "iser_device"s during device lookup/creation is broken -- it might result in an infinite loop if more than one HCA is used with iSER. Fix this by using list_for_each_entry() instead of the open-coded flawed list iteration code. Signed-off-by: Arne Redlich Signed-off-by: Erez Zilber Signed-off-by: Roland Dreier --- drivers/infiniband/ulp/iser/iser_verbs.c | 36 ++++++++++++++------------------ 1 file changed, 16 insertions(+), 20 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/ulp/iser/iser_verbs.c b/drivers/infiniband/ulp/iser/iser_verbs.c index 714b8db02b2..768ba69f2fd 100644 --- a/drivers/infiniband/ulp/iser/iser_verbs.c +++ b/drivers/infiniband/ulp/iser/iser_verbs.c @@ -237,33 +237,29 @@ static int iser_free_ib_conn_res(struct iser_conn *ib_conn) static struct iser_device *iser_device_find_by_ib_device(struct rdma_cm_id *cma_id) { - struct list_head *p_list; - struct iser_device *device = NULL; + struct iser_device *device; mutex_lock(&ig.device_list_mutex); - p_list = ig.device_list.next; - while (p_list != &ig.device_list) { - device = list_entry(p_list, struct iser_device, ig_list); + list_for_each_entry(device, &ig.device_list, ig_list) /* find if there's a match using the node GUID */ if (device->ib_device->node_guid == cma_id->device->node_guid) - break; - } - - if (device == NULL) { - device = kzalloc(sizeof *device, GFP_KERNEL); - if (device == NULL) goto out; - /* assign this device to the device */ - device->ib_device = cma_id->device; - /* init the device and link it into ig device list */ - if (iser_create_device_ib_res(device)) { - kfree(device); - device = NULL; - goto out; - } - list_add(&device->ig_list, &ig.device_list); + + device = kzalloc(sizeof *device, GFP_KERNEL); + if (device == NULL) + goto out; + + /* assign this device to the device */ + device->ib_device = cma_id->device; + /* init the device and link it into ig device list */ + if (iser_create_device_ib_res(device)) { + kfree(device); + device = NULL; + goto out; } + list_add(&device->ig_list, &ig.device_list); + out: BUG_ON(device == NULL); device->refcount++; -- cgit v1.2.3 From d33ed425c6cc14370d8c418b504328d2c3db58b4 Mon Sep 17 00:00:00 2001 From: Arne Redlich Date: Tue, 4 Mar 2008 14:11:54 +0200 Subject: IB/iser: Handle iser_device allocation error gracefully "iser_device" allocation failure is "handled" with a BUG_ON() right before dereferencing the NULL-pointer - fix this! Signed-off-by: Arne Redlich Signed-off-by: Erez Zilber --- drivers/infiniband/ulp/iser/iser_verbs.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/ulp/iser/iser_verbs.c b/drivers/infiniband/ulp/iser/iser_verbs.c index 768ba69f2fd..993f0a8ff28 100644 --- a/drivers/infiniband/ulp/iser/iser_verbs.c +++ b/drivers/infiniband/ulp/iser/iser_verbs.c @@ -244,7 +244,7 @@ struct iser_device *iser_device_find_by_ib_device(struct rdma_cm_id *cma_id) list_for_each_entry(device, &ig.device_list, ig_list) /* find if there's a match using the node GUID */ if (device->ib_device->node_guid == cma_id->device->node_guid) - goto out; + goto inc_refcnt; device = kzalloc(sizeof *device, GFP_KERNEL); if (device == NULL) @@ -260,9 +260,9 @@ struct iser_device *iser_device_find_by_ib_device(struct rdma_cm_id *cma_id) } list_add(&device->ig_list, &ig.device_list); -out: - BUG_ON(device == NULL); +inc_refcnt: device->refcount++; +out: mutex_unlock(&ig.device_list_mutex); return device; } @@ -368,6 +368,12 @@ static void iser_addr_handler(struct rdma_cm_id *cma_id) int ret; device = iser_device_find_by_ib_device(cma_id); + if (!device) { + iser_err("device lookup/creation failed\n"); + iser_connect_error(cma_id); + return; + } + ib_conn = (struct iser_conn *)cma_id->context; ib_conn->device = device; @@ -376,7 +382,6 @@ static void iser_addr_handler(struct rdma_cm_id *cma_id) iser_err("resolve route failed: %d\n", ret); iser_connect_error(cma_id); } - return; } static void iser_route_handler(struct rdma_cm_id *cma_id) -- cgit v1.2.3 From d7c1fbd6606085dbf95e47068d6bd2db8a180e38 Mon Sep 17 00:00:00 2001 From: Steve Wise Date: Tue, 4 Mar 2008 16:44:52 -0600 Subject: RDMA/iwcm: Don't access a cm_id after dropping reference cm_work_handler() can access cm_id_priv after it drops its reference by calling iwch_deref_id(), which might cause it to be freed. The fix is to look at whether IWCM_F_CALLBACK_DESTROY is set _before_ dropping the reference. Then if it was set, free the cm_id on this thread. Signed-off-by: Steve Wise Signed-off-by: Roland Dreier --- drivers/infiniband/core/iwcm.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/core/iwcm.c b/drivers/infiniband/core/iwcm.c index 223b1aa7d92..81c9195b512 100644 --- a/drivers/infiniband/core/iwcm.c +++ b/drivers/infiniband/core/iwcm.c @@ -839,6 +839,7 @@ static void cm_work_handler(struct work_struct *_work) unsigned long flags; int empty; int ret = 0; + int destroy_id; spin_lock_irqsave(&cm_id_priv->lock, flags); empty = list_empty(&cm_id_priv->work_list); @@ -857,9 +858,9 @@ static void cm_work_handler(struct work_struct *_work) destroy_cm_id(&cm_id_priv->id); } BUG_ON(atomic_read(&cm_id_priv->refcount)==0); + destroy_id = test_bit(IWCM_F_CALLBACK_DESTROY, &cm_id_priv->flags); if (iwcm_deref_id(cm_id_priv)) { - if (test_bit(IWCM_F_CALLBACK_DESTROY, - &cm_id_priv->flags)) { + if (destroy_id) { BUG_ON(!list_empty(&cm_id_priv->work_list)); free_cm_id(cm_id_priv); } -- cgit v1.2.3 From 140277e9a710202608914b5b731948d2769399bc Mon Sep 17 00:00:00 2001 From: Ralph Campbell Date: Fri, 14 Dec 2007 01:53:56 -0800 Subject: IB/ipath: Fix IB compliance problems with link state vs physical state Subnet manager SetPortinfo messages distingush between changing the link state (DOWN, ARM, ACTIVE) and the link physical state (POLL, SLEEP, DISABLED). These are somewhat independent commands and affect when link width and speed changes take effect. Without this patch, a link DOWN physical state NOP command was causing the link width and speed settings to take effect which should only happen when the link physical state is goes down (either by a SMP or some link physical error like link errors exceeding the threshold). Signed-off-by: Ralph Campbell Signed-off-by: Roland Dreier --- drivers/infiniband/hw/ipath/ipath_common.h | 2 +- drivers/infiniband/hw/ipath/ipath_driver.c | 28 ++++++++++++--------------- drivers/infiniband/hw/ipath/ipath_kernel.h | 1 + drivers/infiniband/hw/ipath/ipath_mad.c | 7 +++---- drivers/infiniband/hw/ipath/ipath_registers.h | 2 +- 5 files changed, 18 insertions(+), 22 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/ipath/ipath_common.h b/drivers/infiniband/hw/ipath/ipath_common.h index 41462109554..591901aab6b 100644 --- a/drivers/infiniband/hw/ipath/ipath_common.h +++ b/drivers/infiniband/hw/ipath/ipath_common.h @@ -75,7 +75,7 @@ #define IPATH_IB_LINKDOWN 0 #define IPATH_IB_LINKARM 1 #define IPATH_IB_LINKACTIVE 2 -#define IPATH_IB_LINKINIT 3 +#define IPATH_IB_LINKDOWN_ONLY 3 #define IPATH_IB_LINKDOWN_SLEEP 4 #define IPATH_IB_LINKDOWN_DISABLE 5 #define IPATH_IB_LINK_LOOPBACK 6 /* enable local loopback */ diff --git a/drivers/infiniband/hw/ipath/ipath_driver.c b/drivers/infiniband/hw/ipath/ipath_driver.c index d5ff6ca2db3..ca4d0acc678 100644 --- a/drivers/infiniband/hw/ipath/ipath_driver.c +++ b/drivers/infiniband/hw/ipath/ipath_driver.c @@ -851,8 +851,7 @@ void ipath_disarm_piobufs(struct ipath_devdata *dd, unsigned first, * -ETIMEDOUT state can have multiple states set, for any of several * transitions. */ -static int ipath_wait_linkstate(struct ipath_devdata *dd, u32 state, - int msecs) +int ipath_wait_linkstate(struct ipath_devdata *dd, u32 state, int msecs) { dd->ipath_state_wanted = state; wait_event_interruptible_timeout(ipath_state_wait, @@ -1656,8 +1655,8 @@ void ipath_cancel_sends(struct ipath_devdata *dd, int restore_sendctrl) static void ipath_set_ib_lstate(struct ipath_devdata *dd, int which) { static const char *what[4] = { - [0] = "DOWN", - [INFINIPATH_IBCC_LINKCMD_INIT] = "INIT", + [0] = "NOP", + [INFINIPATH_IBCC_LINKCMD_DOWN] = "DOWN", [INFINIPATH_IBCC_LINKCMD_ARMED] = "ARMED", [INFINIPATH_IBCC_LINKCMD_ACTIVE] = "ACTIVE" }; @@ -1672,9 +1671,9 @@ static void ipath_set_ib_lstate(struct ipath_devdata *dd, int which) (dd, dd->ipath_kregs->kr_ibcstatus) >> INFINIPATH_IBCS_LINKTRAININGSTATE_SHIFT) & INFINIPATH_IBCS_LINKTRAININGSTATE_MASK]); - /* flush all queued sends when going to DOWN or INIT, to be sure that + /* flush all queued sends when going to DOWN to be sure that * they don't block MAD packets */ - if (!linkcmd || linkcmd == INFINIPATH_IBCC_LINKCMD_INIT) + if (linkcmd == INFINIPATH_IBCC_LINKCMD_DOWN) ipath_cancel_sends(dd, 1); ipath_write_kreg(dd, dd->ipath_kregs->kr_ibcctrl, @@ -1687,6 +1686,13 @@ int ipath_set_linkstate(struct ipath_devdata *dd, u8 newstate) int ret; switch (newstate) { + case IPATH_IB_LINKDOWN_ONLY: + ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKCMD_DOWN << + INFINIPATH_IBCC_LINKCMD_SHIFT); + /* don't wait */ + ret = 0; + goto bail; + case IPATH_IB_LINKDOWN: ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKINITCMD_POLL << INFINIPATH_IBCC_LINKINITCMD_SHIFT); @@ -1709,16 +1715,6 @@ int ipath_set_linkstate(struct ipath_devdata *dd, u8 newstate) ret = 0; goto bail; - case IPATH_IB_LINKINIT: - if (dd->ipath_flags & IPATH_LINKINIT) { - ret = 0; - goto bail; - } - ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKCMD_INIT << - INFINIPATH_IBCC_LINKCMD_SHIFT); - lstate = IPATH_LINKINIT; - break; - case IPATH_IB_LINKARM: if (dd->ipath_flags & IPATH_LINKARMED) { ret = 0; diff --git a/drivers/infiniband/hw/ipath/ipath_kernel.h b/drivers/infiniband/hw/ipath/ipath_kernel.h index 4cc0f95ea87..ecf3f7ff771 100644 --- a/drivers/infiniband/hw/ipath/ipath_kernel.h +++ b/drivers/infiniband/hw/ipath/ipath_kernel.h @@ -767,6 +767,7 @@ void ipath_kreceive(struct ipath_portdata *); int ipath_setrcvhdrsize(struct ipath_devdata *, unsigned); int ipath_reset_device(int); void ipath_get_faststats(unsigned long); +int ipath_wait_linkstate(struct ipath_devdata *, u32, int); int ipath_set_linkstate(struct ipath_devdata *, u8); int ipath_set_mtu(struct ipath_devdata *, u16); int ipath_set_lid(struct ipath_devdata *, u32, u8); diff --git a/drivers/infiniband/hw/ipath/ipath_mad.c b/drivers/infiniband/hw/ipath/ipath_mad.c index d98d5f10370..b34b91d3723 100644 --- a/drivers/infiniband/hw/ipath/ipath_mad.c +++ b/drivers/infiniband/hw/ipath/ipath_mad.c @@ -555,10 +555,7 @@ static int recv_subn_set_portinfo(struct ib_smp *smp, /* FALLTHROUGH */ case IB_PORT_DOWN: if (lstate == 0) - if (get_linkdowndefaultstate(dd)) - lstate = IPATH_IB_LINKDOWN_SLEEP; - else - lstate = IPATH_IB_LINKDOWN; + lstate = IPATH_IB_LINKDOWN_ONLY; else if (lstate == 1) lstate = IPATH_IB_LINKDOWN_SLEEP; else if (lstate == 2) @@ -568,6 +565,8 @@ static int recv_subn_set_portinfo(struct ib_smp *smp, else goto err; ipath_set_linkstate(dd, lstate); + ipath_wait_linkstate(dd, IPATH_LINKINIT | IPATH_LINKARMED | + IPATH_LINKACTIVE, 1000); break; case IB_PORT_ARMED: ipath_set_linkstate(dd, IPATH_IB_LINKARM); diff --git a/drivers/infiniband/hw/ipath/ipath_registers.h b/drivers/infiniband/hw/ipath/ipath_registers.h index 6d2a17f9c1d..92ad73a7fff 100644 --- a/drivers/infiniband/hw/ipath/ipath_registers.h +++ b/drivers/infiniband/hw/ipath/ipath_registers.h @@ -185,7 +185,7 @@ #define INFINIPATH_IBCC_LINKINITCMD_SLEEP 3 #define INFINIPATH_IBCC_LINKINITCMD_SHIFT 16 #define INFINIPATH_IBCC_LINKCMD_MASK 0x3ULL -#define INFINIPATH_IBCC_LINKCMD_INIT 1 /* move to 0x11 */ +#define INFINIPATH_IBCC_LINKCMD_DOWN 1 /* move to 0x11 */ #define INFINIPATH_IBCC_LINKCMD_ARMED 2 /* move to 0x21 */ #define INFINIPATH_IBCC_LINKCMD_ACTIVE 3 /* move to 0x31 */ #define INFINIPATH_IBCC_LINKCMD_SHIFT 18 -- cgit v1.2.3 From 87d5aed85b2d79e4075ad2ca1449e9b98f657a09 Mon Sep 17 00:00:00 2001 From: Patrick Marchand Latifi Date: Mon, 7 Jan 2008 23:43:04 -0800 Subject: IB/ipath: Fix potentially wrong RNR retry counter returned in ipath_query_qp() There can be a case where the requester's rnr retry counter (s_rnr_retry) is less than the number of rnr retries allowed per QP (s_rnr_retry_cnt). This can happen if the s_rnr_retry counter is being decremented and an ipath_query_qp call is issued during that time frame. The fix is to always return the number of rnr retries allowed per QP instead of the requester's rnr counter. Found by code review. Signed-off-by: Patrick Marchand Latifi Acked-by: Ralph Campbell Signed-off-by: Roland Dreier --- drivers/infiniband/hw/ipath/ipath_qp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/ipath/ipath_qp.c b/drivers/infiniband/hw/ipath/ipath_qp.c index 80dc623cee4..8214c0905c7 100644 --- a/drivers/infiniband/hw/ipath/ipath_qp.c +++ b/drivers/infiniband/hw/ipath/ipath_qp.c @@ -647,7 +647,7 @@ int ipath_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, attr->port_num = 1; attr->timeout = qp->timeout; attr->retry_cnt = qp->s_retry_cnt; - attr->rnr_retry = qp->s_rnr_retry; + attr->rnr_retry = qp->s_rnr_retry_cnt; attr->alt_port_num = 0; attr->alt_timeout = 0; -- cgit v1.2.3 From 4cd5060cf7c2207c31e2e368f8a6343355362e51 Mon Sep 17 00:00:00 2001 From: Patrick Marchand Latifi Date: Fri, 18 Jan 2008 20:10:48 -0800 Subject: IB/ipath: Fix RC QP initialization This patch fixes the initialization of RC QPs, since we would rely on the queue pair type (ibqp->qp_type) being set, but this field is only initialized when we return from ipath_create_qp (it is initialized by the user-level verbs library). The fix is to not depend on this field to initialize the send and the receive state of the RC QP. Signed-off-by: Patrick Marchand Latifi Signed-off-by: Roland Dreier --- drivers/infiniband/hw/ipath/ipath_qp.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/ipath/ipath_qp.c b/drivers/infiniband/hw/ipath/ipath_qp.c index 8214c0905c7..553d9007cf0 100644 --- a/drivers/infiniband/hw/ipath/ipath_qp.c +++ b/drivers/infiniband/hw/ipath/ipath_qp.c @@ -329,8 +329,9 @@ struct ipath_qp *ipath_lookup_qpn(struct ipath_qp_table *qpt, u32 qpn) /** * ipath_reset_qp - initialize the QP state to the reset state * @qp: the QP to reset + * @type: the QP type */ -static void ipath_reset_qp(struct ipath_qp *qp) +static void ipath_reset_qp(struct ipath_qp *qp, enum ib_qp_type type) { qp->remote_qpn = 0; qp->qkey = 0; @@ -342,7 +343,7 @@ static void ipath_reset_qp(struct ipath_qp *qp) qp->s_psn = 0; qp->r_psn = 0; qp->r_msn = 0; - if (qp->ibqp.qp_type == IB_QPT_RC) { + if (type == IB_QPT_RC) { qp->s_state = IB_OPCODE_RC_SEND_LAST; qp->r_state = IB_OPCODE_RC_SEND_LAST; } else { @@ -534,7 +535,7 @@ int ipath_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, switch (new_state) { case IB_QPS_RESET: - ipath_reset_qp(qp); + ipath_reset_qp(qp, ibqp->qp_type); break; case IB_QPS_ERR: @@ -839,7 +840,7 @@ struct ib_qp *ipath_create_qp(struct ib_pd *ibpd, goto bail_qp; } qp->ip = NULL; - ipath_reset_qp(qp); + ipath_reset_qp(qp, init_attr->qp_type); break; default: -- cgit v1.2.3 From 2a049e514b890c8b70b965bbd9f4e3c963af69c9 Mon Sep 17 00:00:00 2001 From: Patrick Marchand Latifi Date: Thu, 31 Jan 2008 00:24:37 -0800 Subject: IB/ipath: Fix error completion put on send CQ instead of recv CQ A work completion entry could be placed on the wrong completion queue when an RC QP is placed in the error state. Signed-off-by: Patrick Marchand Latifi Acked-by: Ralph Campbell Signed-off-by: Roland Dreier --- drivers/infiniband/hw/ipath/ipath_qp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/ipath/ipath_qp.c b/drivers/infiniband/hw/ipath/ipath_qp.c index 553d9007cf0..087ed316647 100644 --- a/drivers/infiniband/hw/ipath/ipath_qp.c +++ b/drivers/infiniband/hw/ipath/ipath_qp.c @@ -415,7 +415,7 @@ int ipath_error_qp(struct ipath_qp *qp, enum ib_wc_status err) wc.wr_id = qp->r_wr_id; wc.opcode = IB_WC_RECV; wc.status = err; - ipath_cq_enter(to_icq(qp->ibqp.send_cq), &wc, 1); + ipath_cq_enter(to_icq(qp->ibqp.recv_cq), &wc, 1); } wc.status = IB_WC_WR_FLUSH_ERR; -- cgit v1.2.3 From 450bb3875f5f5ab3679823c941d6045d16967370 Mon Sep 17 00:00:00 2001 From: Patrick Marchand Latifi Date: Wed, 20 Feb 2008 19:08:10 -0800 Subject: IB/ipath: Reset the retry counter for RDMA_READ_RESPONSE_MIDDLE packets Reset the retry counter when we get a good RDMA_READ_RESPONSE_MIDDLE packet. This fix will prevent the requester from reporting a retry exceeded error too early. Signed-off-by: Patrick Marchand Latifi --- drivers/infiniband/hw/ipath/ipath_rc.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/ipath/ipath_rc.c b/drivers/infiniband/hw/ipath/ipath_rc.c index 459e46e2c01..40f3e37d7ad 100644 --- a/drivers/infiniband/hw/ipath/ipath_rc.c +++ b/drivers/infiniband/hw/ipath/ipath_rc.c @@ -1196,6 +1196,10 @@ static inline void ipath_rc_rcv_resp(struct ipath_ibdev *dev, list_move_tail(&qp->timerwait, &dev->pending[dev->pending_index]); spin_unlock(&dev->pending_lock); + + if (opcode == OP(RDMA_READ_RESPONSE_MIDDLE)) + qp->s_retry = qp->s_retry_cnt; + /* * Update the RDMA receive state but do the copy w/o * holding the locks and blocking interrupts. -- cgit v1.2.3 From b3e2749bf32f61e7beb259eb7cfb066d2ec6ad65 Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Tue, 11 Mar 2008 16:10:02 +0200 Subject: IPoIB: Don't drop multicast sends when they can be queued When set_multicast_list() is called the multicast task is restarted and the IPOIB_MCAST_STARTED bit is cleared. As a result for some window of time, multicast packets are not transmitted nor queued but rather dropped by ipoib_mcast_send(). These dropped packets are painful in two cases: - bonding fail-over which both calls set_multicast_list() on the new active slave and sends Gratuitous ARP through that slave. - IP_DROP_MEMBERSHIP code which both calls set_multicast_list() on the device and issues IGMP leave. In both these cases, depending on the scheduling of the IPoIB multicast task, the packets would be dropped. As a result, in the bonding case, the failover would not be detected by the peers until their neighbour is renewed the neighbour (which takes a few tens of seconds). In the IGMP case, the IP router doesn't get an IGMP leave and would only learn on that from further probes on the group (also a delay of at least a few tens of seconds). Fix this by allowing transmission (or queuing) depending on the IPOIB_FLAG_OPER_UP flag instead of the IPOIB_MCAST_STARTED flag. Signed-off-by: Olga Shern Signed-off-by: Or Gerlitz Signed-off-by: Roland Dreier --- drivers/infiniband/ulp/ipoib/ipoib_multicast.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c index 2628339e3a9..31a53c5bcb1 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c @@ -650,7 +650,7 @@ void ipoib_mcast_send(struct net_device *dev, void *mgid, struct sk_buff *skb) */ spin_lock(&priv->lock); - if (!test_bit(IPOIB_MCAST_STARTED, &priv->flags) || + if (!test_bit(IPOIB_FLAG_OPER_UP, &priv->flags) || !priv->broadcast || !test_bit(IPOIB_MCAST_FLAG_ATTACHED, &priv->broadcast->flags)) { ++dev->stats.tx_dropped; -- cgit v1.2.3 From 4200406b8fbbf309f4fffb339bd16c4553ae0c30 Mon Sep 17 00:00:00 2001 From: Roland Dreier Date: Tue, 11 Mar 2008 18:35:20 -0700 Subject: IPoIB/cm: Set tx_wr.num_sge in connected mode post_send() Commit 7143740d ("IPoIB: Add send gather support") made it possible for tx_wr.num_sge to be != 1 -- this happens if send gather support is enabled. However, the code in the connected mode post_send() function assumes the old invariant, namely that tx_wr.num_sge is always 1. Fix this by explicitly setting tx_wr.num_sge to 1 in the CM post_send(). Signed-off-by: Roland Dreier --- drivers/infiniband/ulp/ipoib/ipoib_cm.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c index 52b1bebfa74..4e8d0281f8b 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c @@ -637,6 +637,7 @@ static inline int post_send(struct ipoib_dev_priv *priv, priv->tx_sge[0].addr = addr; priv->tx_sge[0].length = len; + priv->tx_wr.num_sge = 1; priv->tx_wr.wr_id = wr_id | IPOIB_OP_CM; return ib_post_send(tx->qp, &priv->tx_wr, &bad_wr); -- cgit v1.2.3 From 10313cbb92206450b450e14f2b3f6ccde42d9a34 Mon Sep 17 00:00:00 2001 From: Roland Dreier Date: Wed, 12 Mar 2008 07:51:03 -0700 Subject: IPoIB: Allocate priv->tx_ring with vmalloc() Commit 7143740d ("IPoIB: Add send gather support") made struct ipoib_tx_buf significantly larger, since the mapping member changed from a single u64 to an array with MAX_SKB_FRAGS + 1 entries. This means that allocating tx_rings with kzalloc() may fail because there is not enough contiguous memory for the new, much bigger size. Fix this regression by allocating the rings with vmalloc() instead. Signed-off-by: Roland Dreier --- drivers/infiniband/ulp/ipoib/ipoib_cm.c | 8 +++++--- drivers/infiniband/ulp/ipoib/ipoib_main.c | 9 +++++---- 2 files changed, 10 insertions(+), 7 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c index 4e8d0281f8b..2490b2d79db 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c @@ -38,6 +38,7 @@ #include #include #include +#include #include "ipoib.h" @@ -1031,13 +1032,13 @@ static int ipoib_cm_tx_init(struct ipoib_cm_tx *p, u32 qpn, struct ipoib_dev_priv *priv = netdev_priv(p->dev); int ret; - p->tx_ring = kzalloc(ipoib_sendq_size * sizeof *p->tx_ring, - GFP_KERNEL); + p->tx_ring = vmalloc(ipoib_sendq_size * sizeof *p->tx_ring); if (!p->tx_ring) { ipoib_warn(priv, "failed to allocate tx ring\n"); ret = -ENOMEM; goto err_tx; } + memset(p->tx_ring, 0, ipoib_sendq_size * sizeof *p->tx_ring); p->qp = ipoib_cm_create_tx_qp(p->dev, p); if (IS_ERR(p->qp)) { @@ -1078,6 +1079,7 @@ err_id: ib_destroy_qp(p->qp); err_qp: p->qp = NULL; + vfree(p->tx_ring); err_tx: return ret; } @@ -1128,7 +1130,7 @@ timeout: if (p->qp) ib_destroy_qp(p->qp); - kfree(p->tx_ring); + vfree(p->tx_ring); kfree(p); } diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index f96477a8ca5..57282048865 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -41,6 +41,7 @@ #include #include #include +#include #include /* For ARPHRD_xxx */ @@ -887,13 +888,13 @@ int ipoib_dev_init(struct net_device *dev, struct ib_device *ca, int port) goto out; } - priv->tx_ring = kzalloc(ipoib_sendq_size * sizeof *priv->tx_ring, - GFP_KERNEL); + priv->tx_ring = vmalloc(ipoib_sendq_size * sizeof *priv->tx_ring); if (!priv->tx_ring) { printk(KERN_WARNING "%s: failed to allocate TX ring (%d entries)\n", ca->name, ipoib_sendq_size); goto out_rx_ring_cleanup; } + memset(priv->tx_ring, 0, ipoib_sendq_size * sizeof *priv->tx_ring); /* priv->tx_head, tx_tail & tx_outstanding are already 0 */ @@ -903,7 +904,7 @@ int ipoib_dev_init(struct net_device *dev, struct ib_device *ca, int port) return 0; out_tx_ring_cleanup: - kfree(priv->tx_ring); + vfree(priv->tx_ring); out_rx_ring_cleanup: kfree(priv->rx_ring); @@ -928,7 +929,7 @@ void ipoib_dev_cleanup(struct net_device *dev) ipoib_ib_dev_cleanup(dev); kfree(priv->rx_ring); - kfree(priv->tx_ring); + vfree(priv->tx_ring); priv->rx_ring = NULL; priv->tx_ring = NULL; -- cgit v1.2.3