diff options
42 files changed, 2097 insertions, 814 deletions
diff --git a/drivers/infiniband/core/agent.c b/drivers/infiniband/core/agent.c index 34b724afd28..ecd1a3057c6 100644 --- a/drivers/infiniband/core/agent.c +++ b/drivers/infiniband/core/agent.c @@ -78,25 +78,6 @@ ib_get_agent_port(struct ib_device *device, int port_num) return entry; } -int smi_check_local_dr_smp(struct ib_smp *smp, - struct ib_device *device, - int port_num) -{ - struct ib_agent_port_private *port_priv; - - if (smp->mgmt_class != IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) - return 1; - - port_priv = ib_get_agent_port(device, port_num); - if (!port_priv) { - printk(KERN_DEBUG SPFX "smi_check_local_dr_smp %s port %d " - "not open\n", device->name, port_num); - return 1; - } - - return smi_check_local_smp(port_priv->agent[0], smp); -} - int agent_send_response(struct ib_mad *mad, struct ib_grh *grh, struct ib_wc *wc, struct ib_device *device, int port_num, int qpn) diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c index 2514de3480d..7cfedb8d9bc 100644 --- a/drivers/infiniband/core/cm.c +++ b/drivers/infiniband/core/cm.c @@ -121,7 +121,7 @@ struct cm_id_private { struct rb_node service_node; struct rb_node sidr_id_node; - spinlock_t lock; + spinlock_t lock; /* Do not acquire inside cm.lock */ wait_queue_head_t wait; atomic_t refcount; @@ -1547,40 +1547,46 @@ static int cm_rep_handler(struct cm_work *work) return -EINVAL; } + cm_format_rep_event(work); + + spin_lock_irqsave(&cm_id_priv->lock, flags); + switch (cm_id_priv->id.state) { + case IB_CM_REQ_SENT: + case IB_CM_MRA_REQ_RCVD: + break; + default: + spin_unlock_irqrestore(&cm_id_priv->lock, flags); + ret = -EINVAL; + goto error; + } + cm_id_priv->timewait_info->work.remote_id = rep_msg->local_comm_id; cm_id_priv->timewait_info->remote_ca_guid = rep_msg->local_ca_guid; cm_id_priv->timewait_info->remote_qpn = cm_rep_get_local_qpn(rep_msg); - spin_lock_irqsave(&cm.lock, flags); + spin_lock(&cm.lock); /* Check for duplicate REP. */ if (cm_insert_remote_id(cm_id_priv->timewait_info)) { - spin_unlock_irqrestore(&cm.lock, flags); + spin_unlock(&cm.lock); + spin_unlock_irqrestore(&cm_id_priv->lock, flags); ret = -EINVAL; goto error; } /* Check for a stale connection. */ if (cm_insert_remote_qpn(cm_id_priv->timewait_info)) { - spin_unlock_irqrestore(&cm.lock, flags); + rb_erase(&cm_id_priv->timewait_info->remote_id_node, + &cm.remote_id_table); + cm_id_priv->timewait_info->inserted_remote_id = 0; + spin_unlock(&cm.lock); + spin_unlock_irqrestore(&cm_id_priv->lock, flags); cm_issue_rej(work->port, work->mad_recv_wc, IB_CM_REJ_STALE_CONN, CM_MSG_RESPONSE_REP, NULL, 0); ret = -EINVAL; goto error; } - spin_unlock_irqrestore(&cm.lock, flags); - - cm_format_rep_event(work); + spin_unlock(&cm.lock); - spin_lock_irqsave(&cm_id_priv->lock, flags); - switch (cm_id_priv->id.state) { - case IB_CM_REQ_SENT: - case IB_CM_MRA_REQ_RCVD: - break; - default: - spin_unlock_irqrestore(&cm_id_priv->lock, flags); - ret = -EINVAL; - goto error; - } cm_id_priv->id.state = IB_CM_REP_RCVD; cm_id_priv->id.remote_id = rep_msg->local_comm_id; cm_id_priv->remote_qpn = cm_rep_get_local_qpn(rep_msg); @@ -1603,7 +1609,7 @@ static int cm_rep_handler(struct cm_work *work) cm_deref_id(cm_id_priv); return 0; -error: cm_cleanup_timewait(cm_id_priv->timewait_info); +error: cm_deref_id(cm_id_priv); return ret; } diff --git a/drivers/infiniband/core/fmr_pool.c b/drivers/infiniband/core/fmr_pool.c index d34a6f1c4f4..838bf54458d 100644 --- a/drivers/infiniband/core/fmr_pool.c +++ b/drivers/infiniband/core/fmr_pool.c @@ -278,9 +278,9 @@ struct ib_fmr_pool *ib_create_fmr_pool(struct ib_pd *pd, { struct ib_pool_fmr *fmr; struct ib_fmr_attr attr = { - .max_pages = params->max_pages_per_fmr, - .max_maps = IB_FMR_MAX_REMAPS, - .page_size = PAGE_SHIFT + .max_pages = params->max_pages_per_fmr, + .max_maps = IB_FMR_MAX_REMAPS, + .page_shift = params->page_shift }; for (i = 0; i < params->pool_size; ++i) { diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c index c82f47a66e4..f7854b65fd5 100644 --- a/drivers/infiniband/core/mad.c +++ b/drivers/infiniband/core/mad.c @@ -31,7 +31,7 @@ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * - * $Id: mad.c 2817 2005-07-07 11:29:26Z halr $ + * $Id: mad.c 5596 2006-03-03 01:00:07Z sean.hefty $ */ #include <linux/dma-mapping.h> @@ -679,8 +679,8 @@ static int handle_outgoing_dr_smp(struct ib_mad_agent_private *mad_agent_priv, goto out; } /* Check to post send on QP or process locally */ - ret = smi_check_local_dr_smp(smp, device, port_num); - if (!ret || !device->process_mad) + ret = smi_check_local_smp(smp, device); + if (!ret) goto out; local = kmalloc(sizeof *local, GFP_ATOMIC); @@ -765,18 +765,67 @@ out: return ret; } -static int get_buf_length(int hdr_len, int data_len) +static int get_pad_size(int hdr_len, int data_len) { int seg_size, pad; seg_size = sizeof(struct ib_mad) - hdr_len; if (data_len && seg_size) { pad = seg_size - data_len % seg_size; - if (pad == seg_size) - pad = 0; + return pad == seg_size ? 0 : pad; } else - pad = seg_size; - return hdr_len + data_len + pad; + return seg_size; +} + +static void free_send_rmpp_list(struct ib_mad_send_wr_private *mad_send_wr) +{ + struct ib_rmpp_segment *s, *t; + + list_for_each_entry_safe(s, t, &mad_send_wr->rmpp_list, list) { + list_del(&s->list); + kfree(s); + } +} + +static int alloc_send_rmpp_list(struct ib_mad_send_wr_private *send_wr, + gfp_t gfp_mask) +{ + struct ib_mad_send_buf *send_buf = &send_wr->send_buf; + struct ib_rmpp_mad *rmpp_mad = send_buf->mad; + struct ib_rmpp_segment *seg = NULL; + int left, seg_size, pad; + + send_buf->seg_size = sizeof (struct ib_mad) - send_buf->hdr_len; + seg_size = send_buf->seg_size; + pad = send_wr->pad; + + /* Allocate data segments. */ + for (left = send_buf->data_len + pad; left > 0; left -= seg_size) { + seg = kmalloc(sizeof (*seg) + seg_size, gfp_mask); + if (!seg) { + printk(KERN_ERR "alloc_send_rmpp_segs: RMPP mem " + "alloc failed for len %zd, gfp %#x\n", + sizeof (*seg) + seg_size, gfp_mask); + free_send_rmpp_list(send_wr); + return -ENOMEM; + } + seg->num = ++send_buf->seg_count; + list_add_tail(&seg->list, &send_wr->rmpp_list); + } + + /* Zero any padding */ + if (pad) + memset(seg->data + seg_size - pad, 0, pad); + + rmpp_mad->rmpp_hdr.rmpp_version = send_wr->mad_agent_priv-> + agent.rmpp_version; + rmpp_mad->rmpp_hdr.rmpp_type = IB_MGMT_RMPP_TYPE_DATA; + ib_set_rmpp_flags(&rmpp_mad->rmpp_hdr, IB_MGMT_RMPP_FLAG_ACTIVE); + + send_wr->cur_seg = container_of(send_wr->rmpp_list.next, + struct ib_rmpp_segment, list); + send_wr->last_ack_seg = send_wr->cur_seg; + return 0; } struct ib_mad_send_buf * ib_create_send_mad(struct ib_mad_agent *mad_agent, @@ -787,32 +836,40 @@ struct ib_mad_send_buf * ib_create_send_mad(struct ib_mad_agent *mad_agent, { struct ib_mad_agent_private *mad_agent_priv; struct ib_mad_send_wr_private *mad_send_wr; - int buf_size; + int pad, message_size, ret, size; void *buf; mad_agent_priv = container_of(mad_agent, struct ib_mad_agent_private, agent); - buf_size = get_buf_length(hdr_len, data_len); + pad = get_pad_size(hdr_len, data_len); + message_size = hdr_len + data_len + pad; if ((!mad_agent->rmpp_version && - (rmpp_active || buf_size > sizeof(struct ib_mad))) || - (!rmpp_active && buf_size > sizeof(struct ib_mad))) + (rmpp_active || message_size > sizeof(struct ib_mad))) || + (!rmpp_active && message_size > sizeof(struct ib_mad))) return ERR_PTR(-EINVAL); - buf = kzalloc(sizeof *mad_send_wr + buf_size, gfp_mask); + size = rmpp_active ? hdr_len : sizeof(struct ib_mad); + buf = kzalloc(sizeof *mad_send_wr + size, gfp_mask); if (!buf) return ERR_PTR(-ENOMEM); - mad_send_wr = buf + buf_size; + mad_send_wr = buf + size; + INIT_LIST_HEAD(&mad_send_wr->rmpp_list); mad_send_wr->send_buf.mad = buf; + mad_send_wr->send_buf.hdr_len = hdr_len; + mad_send_wr->send_buf.data_len = data_len; + mad_send_wr->pad = pad; mad_send_wr->mad_agent_priv = mad_agent_priv; - mad_send_wr->sg_list[0].length = buf_size; + mad_send_wr->sg_list[0].length = hdr_len; mad_send_wr->sg_list[0].lkey = mad_agent->mr->lkey; + mad_send_wr->sg_list[1].length = sizeof(struct ib_mad) - hdr_len; + mad_send_wr->sg_list[1].lkey = mad_agent->mr->lkey; mad_send_wr->send_wr.wr_id = (unsigned long) mad_send_wr; mad_send_wr->send_wr.sg_list = mad_send_wr->sg_list; - mad_send_wr->send_wr.num_sge = 1; + mad_send_wr->send_wr.num_sge = 2; mad_send_wr->send_wr.opcode = IB_WR_SEND; mad_send_wr->send_wr.send_flags = IB_SEND_SIGNALED; mad_send_wr->send_wr.wr.ud.remote_qpn = remote_qpn; @@ -820,13 +877,11 @@ struct ib_mad_send_buf * ib_create_send_mad(struct ib_mad_agent *mad_agent, mad_send_wr->send_wr.wr.ud.pkey_index = pkey_index; if (rmpp_active) { - struct ib_rmpp_mad *rmpp_mad = mad_send_wr->send_buf.mad; - rmpp_mad->rmpp_hdr.paylen_newwin = cpu_to_be32(hdr_len - - IB_MGMT_RMPP_HDR + data_len); - rmpp_mad->rmpp_hdr.rmpp_version = mad_agent->rmpp_version; - rmpp_mad->rmpp_hdr.rmpp_type = IB_MGMT_RMPP_TYPE_DATA; - ib_set_rmpp_flags(&rmpp_mad->rmpp_hdr, - IB_MGMT_RMPP_FLAG_ACTIVE); + ret = alloc_send_rmpp_list(mad_send_wr, gfp_mask); + if (ret) { + kfree(buf); + return ERR_PTR(ret); + } } mad_send_wr->send_buf.mad_agent = mad_agent; @@ -835,14 +890,50 @@ struct ib_mad_send_buf * ib_create_send_mad(struct ib_mad_agent *mad_agent, } EXPORT_SYMBOL(ib_create_send_mad); +void *ib_get_rmpp_segment(struct ib_mad_send_buf *send_buf, int seg_num) +{ + struct ib_mad_send_wr_private *mad_send_wr; + struct list_head *list; + + mad_send_wr = container_of(send_buf, struct ib_mad_send_wr_private, + send_buf); + list = &mad_send_wr->cur_seg->list; + + if (mad_send_wr->cur_seg->num < seg_num) { + list_for_each_entry(mad_send_wr->cur_seg, list, list) + if (mad_send_wr->cur_seg->num == seg_num) + break; + } else if (mad_send_wr->cur_seg->num > seg_num) { + list_for_each_entry_reverse(mad_send_wr->cur_seg, list, list) + if (mad_send_wr->cur_seg->num == seg_num) + break; + } + return mad_send_wr->cur_seg->data; +} +EXPORT_SYMBOL(ib_get_rmpp_segment); + +static inline void *ib_get_payload(struct ib_mad_send_wr_private *mad_send_wr) +{ + if (mad_send_wr->send_buf.seg_count) + return ib_get_rmpp_segment(&mad_send_wr->send_buf, + mad_send_wr->seg_num); + else + return mad_send_wr->send_buf.mad + + mad_send_wr->send_buf.hdr_len; +} + void ib_free_send_mad(struct ib_mad_send_buf *send_buf) { struct ib_mad_agent_private *mad_agent_priv; + struct ib_mad_send_wr_private *mad_send_wr; mad_agent_priv = container_of(send_buf->mad_agent, struct ib_mad_agent_private, agent); - kfree(send_buf->mad); + mad_send_wr = container_of(send_buf, struct ib_mad_send_wr_private, + send_buf); + free_send_rmpp_list(mad_send_wr); + kfree(send_buf->mad); if (atomic_dec_and_test(&mad_agent_priv->refcount)) wake_up(&mad_agent_priv->wait); } @@ -865,10 +956,17 @@ int ib_send_mad(struct ib_mad_send_wr_private *mad_send_wr) mad_agent = mad_send_wr->send_buf.mad_agent; sge = mad_send_wr->sg_list; - sge->addr = dma_map_single(mad_agent->device->dma_device, - mad_send_wr->send_buf.mad, sge->length, - DMA_TO_DEVICE); - pci_unmap_addr_set(mad_send_wr, mapping, sge->addr); + sge[0].addr = dma_map_single(mad_agent->device->dma_device, + mad_send_wr->send_buf.mad, + sge[0].length, + DMA_TO_DEVICE); + pci_unmap_addr_set(mad_send_wr, header_mapping, sge[0].addr); + + sge[1].addr = dma_map_single(mad_agent->device->dma_device, + ib_get_payload(mad_send_wr), + sge[1].length, + DMA_TO_DEVICE); + pci_unmap_addr_set(mad_send_wr, payload_mapping, sge[1].addr); spin_lock_irqsave(&qp_info->send_queue.lock, flags); if (qp_info->send_queue.count < qp_info->send_queue.max_active) { @@ -885,11 +983,14 @@ int ib_send_mad(struct ib_mad_send_wr_private *mad_send_wr) list_add_tail(&mad_send_wr->mad_list.list, list); } spin_unlock_irqrestore(&qp_info->send_queue.lock, flags); - if (ret) + if (ret) { dma_unmap_single(mad_agent->device->dma_device, - pci_unmap_addr(mad_send_wr, mapping), - sge->length, DMA_TO_DEVICE); - + pci_unmap_addr(mad_send_wr, header_mapping), + sge[0].length, DMA_TO_DEVICE); + dma_unmap_single(mad_agent->device->dma_device, + pci_unmap_addr(mad_send_wr, payload_mapping), + sge[1].length, DMA_TO_DEVICE); + } return ret; } @@ -1661,9 +1762,7 @@ static void ib_mad_recv_done_handler(struct ib_mad_port_private *port_priv, port_priv->device->node_type, port_priv->port_num)) goto out; - if (!smi_check_local_dr_smp(&recv->mad.smp, - port_priv->device, - port_priv->port_num)) + if (!smi_check_local_smp(&recv->mad.smp, port_priv->device)) goto out; } @@ -1862,8 +1961,11 @@ static void ib_mad_send_done_handler(struct ib_mad_port_private *port_priv, retry: dma_unmap_single(mad_send_wr->send_buf.mad_agent->device->dma_device, - pci_unmap_addr(mad_send_wr, mapping), + pci_unmap_addr(mad_send_wr, header_mapping), mad_send_wr->sg_list[0].length, DMA_TO_DEVICE); + dma_unmap_single(mad_send_wr->send_buf.mad_agent->device->dma_device, + pci_unmap_addr(mad_send_wr, payload_mapping), + mad_send_wr->sg_list[1].length, DMA_TO_DEVICE); queued_send_wr = NULL; spin_lock_irqsave(&send_queue->lock, flags); list_del(&mad_list->list); @@ -2262,8 +2364,12 @@ static void timeout_sends(void *data) static void ib_mad_thread_completion_handler(struct ib_cq *cq, void *arg) { struct ib_mad_port_private *port_priv = cq->cq_context; + unsigned long flags; - queue_work(port_priv->wq, &port_priv->work); + spin_lock_irqsave(&ib_mad_port_list_lock, flags); + if (!list_empty(&port_priv->port_list)) + queue_work(port_priv->wq, &port_priv->work); + spin_unlock_irqrestore(&ib_mad_port_list_lock, flags); } /* @@ -2575,18 +2681,23 @@ static int ib_mad_port_open(struct ib_device *device, } INIT_WORK(&port_priv->work, ib_mad_completion_handler, port_priv); + spin_lock_irqsave(&ib_mad_port_list_lock, flags); + list_add_tail(&port_priv->port_list, &ib_mad_port_list); + spin_unlock_irqrestore(&ib_mad_port_list_lock, flags); + ret = ib_mad_port_start(port_priv); if (ret) { printk(KERN_ERR PFX "Couldn't start port\n"); goto error9; } - spin_lock_irqsave(&ib_mad_port_list_lock, flags); - list_add_tail(&port_priv->port_list, &ib_mad_port_list); - spin_unlock_irqrestore(&ib_mad_port_list_lock, flags); return 0; error9: + spin_lock_irqsave(&ib_mad_port_list_lock, flags); + list_del_init(&port_priv->port_list); + spin_unlock_irqrestore(&ib_mad_port_list_lock, flags); + destroy_workqueue(port_priv->wq); error8: destroy_mad_qp(&port_priv->qp_info[1]); @@ -2623,11 +2734,9 @@ static int ib_mad_port_close(struct ib_device *device, int port_num) printk(KERN_ERR PFX "Port %d not found\n", port_num); return -ENODEV; } - list_del(&port_priv->port_list); + list_del_init(&port_priv->port_list); spin_unlock_irqrestore(&ib_mad_port_list_lock, flags); - /* Stop processing completions. */ - flush_workqueue(port_priv->wq); destroy_workqueue(port_priv->wq); destroy_mad_qp(&port_priv->qp_info[1]); destroy_mad_qp(&port_priv->qp_info[0]); diff --git a/drivers/infiniband/core/mad_priv.h b/drivers/infiniband/core/mad_priv.h index 570f78682af..a7125d4b5cc 100644 --- a/drivers/infiniband/core/mad_priv.h +++ b/drivers/infiniband/core/mad_priv.h @@ -31,7 +31,7 @@ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * - * $Id: mad_priv.h 2730 2005-06-28 16:43:03Z sean.hefty $ + * $Id: mad_priv.h 5596 2006-03-03 01:00:07Z sean.hefty $ */ #ifndef __IB_MAD_PRIV_H__ @@ -85,6 +85,12 @@ struct ib_mad_private { } mad; } __attribute__ ((packed)); +struct ib_rmpp_segment { + struct list_head list; + u32 num; + u8 data[0]; +}; + struct ib_mad_agent_private { struct list_head agent_list; struct ib_mad_agent agent; @@ -119,7 +125,8 @@ struct ib_mad_send_wr_private { struct list_head agent_list; struct ib_mad_agent_private *mad_agent_priv; struct ib_mad_send_buf send_buf; - DECLARE_PCI_UNMAP_ADDR(mapping) + DECLARE_PCI_UNMAP_ADDR(header_mapping) + DECLARE_PCI_UNMAP_ADDR(payload_mapping) struct ib_send_wr send_wr; struct ib_sge sg_list[IB_MAD_SEND_REQ_MAX_SG]; __be64 tid; @@ -130,11 +137,12 @@ struct ib_mad_send_wr_private { enum ib_wc_status status; /* RMPP control */ + struct list_head rmpp_list; + struct ib_rmpp_segment *last_ack_seg; + struct ib_rmpp_segment *cur_seg; int last_ack; int seg_num; int newwin; - int total_seg; - int data_offset; int pad; }; diff --git a/drivers/infiniband/core/mad_rmpp.c b/drivers/infiniband/core/mad_rmpp.c index 3249e1d8c07..bacfdd5bdda 100644 --- a/drivers/infiniband/core/mad_rmpp.c +++ b/drivers/infiniband/core/mad_rmpp.c @@ -111,14 +111,14 @@ static int data_offset(u8 mgmt_class) return IB_MGMT_RMPP_HDR; } -static void format_ack(struct ib_rmpp_mad *ack, +static void format_ack(struct ib_mad_send_buf *msg, struct ib_rmpp_mad *data, struct mad_rmpp_recv *rmpp_recv) { + struct ib_rmpp_mad *ack = msg->mad; unsigned long flags; - memcpy(&ack->mad_hdr, &data->mad_hdr, - data_offset(data->mad_hdr.mgmt_class)); + memcpy(ack, &data->mad_hdr, msg->hdr_len); ack->mad_hdr.method ^= IB_MGMT_METHOD_RESP; ack->rmpp_hdr.rmpp_type = IB_MGMT_RMPP_TYPE_ACK; @@ -135,16 +135,16 @@ static void ack_recv(struct mad_rmpp_recv *rmpp_recv, struct ib_mad_recv_wc *recv_wc) { struct ib_mad_send_buf *msg; - int ret; + int ret, hdr_len; + hdr_len = data_offset(recv_wc->recv_buf.mad->mad_hdr.mgmt_class); msg = ib_create_send_mad(&rmpp_recv->agent->agent, recv_wc->wc->src_qp, - recv_wc->wc->pkey_index, 1, IB_MGMT_RMPP_HDR, - IB_MGMT_RMPP_DATA, GFP_KERNEL); + recv_wc->wc->pkey_index, 1, hdr_len, + 0, GFP_KERNEL); if (!msg) return; - format_ack(msg->mad, (struct ib_rmpp_mad *) recv_wc->recv_buf.mad, - rmpp_recv); + format_ack(msg, (struct ib_rmpp_mad *) recv_wc->recv_buf.mad, rmpp_recv); msg->ah = rmpp_recv->ah; ret = ib_post_send_mad(msg, NULL); if (ret) @@ -156,16 +156,17 @@ static struct ib_mad_send_buf *alloc_response_msg(struct ib_mad_agent *agent, { struct ib_mad_send_buf *msg; struct ib_ah *ah; + int hdr_len; ah = ib_create_ah_from_wc(agent->qp->pd, recv_wc->wc, recv_wc->recv_buf.grh, agent->port_num); if (IS_ERR(ah)) return (void *) ah; + hdr_len = data_offset(recv_wc->recv_buf.mad->mad_hdr.mgmt_class); msg = ib_create_send_mad(agent, recv_wc->wc->src_qp, recv_wc->wc->pkey_index, 1, - IB_MGMT_RMPP_HDR, IB_MGMT_RMPP_DATA, - GFP_KERNEL); + hdr_len, 0, GFP_KERNEL); if (IS_ERR(msg)) ib_destroy_ah(ah); else @@ -195,8 +196,7 @@ static void nack_recv(struct ib_mad_agent_private *agent, return; rmpp_mad = msg->mad; - memcpy(rmpp_mad, recv_wc->recv_buf.mad, - data_offset(recv_wc->recv_buf.mad->mad_hdr.mgmt_class)); + memcpy(rmpp_mad, recv_wc->recv_buf.mad, msg->hdr_len); rmpp_mad->mad_hdr.method ^= IB_MGMT_METHOD_RESP; rmpp_mad->rmpp_hdr.rmpp_version = IB_MGMT_RMPP_VERSION; @@ -433,44 +433,6 @@ static struct ib_mad_recv_wc * complete_rmpp(struct mad_rmpp_recv *rmpp_recv) return rmpp_wc; } -void ib_coalesce_recv_mad(struct ib_mad_recv_wc *mad_recv_wc, void *buf) -{ - struct ib_mad_recv_buf *seg_buf; - struct ib_rmpp_mad *rmpp_mad; - void *data; - int size, len, offset; - u8 flags; - - len = mad_recv_wc->mad_len; - if (len <= sizeof(struct ib_mad)) { - memcpy(buf, mad_recv_wc->recv_buf.mad, len); - return; - } - - offset = data_offset(mad_recv_wc->recv_buf.mad->mad_hdr.mgmt_class); - - list_for_each_entry(seg_buf, &mad_recv_wc->rmpp_list, list) { - rmpp_mad = (struct ib_rmpp_mad *)seg_buf->mad; - flags = ib_get_rmpp_flags(&rmpp_mad->rmpp_hdr); - - if (flags & IB_MGMT_RMPP_FLAG_FIRST) { - data = rmpp_mad; - size = sizeof(*rmpp_mad); - } else { - data = (void *) rmpp_mad + offset; - if (flags & IB_MGMT_RMPP_FLAG_LAST) - size = len; - else - size = sizeof(*rmpp_mad) - offset; - } - - memcpy(buf, data, size); - len -= size; - buf += size; - } -} -EXPORT_SYMBOL(ib_coalesce_recv_mad); - static struct ib_mad_recv_wc * continue_rmpp(struct ib_mad_agent_private *agent, struct ib_mad_recv_wc *mad_recv_wc) @@ -570,50 +532,33 @@ start_rmpp(struct ib_mad_agent_private *agent, return mad_recv_wc; } -static inline u64 get_seg_addr(struct ib_mad_send_wr_private *mad_send_wr) -{ - return mad_send_wr->sg_list[0].addr + mad_send_wr->data_offset + - (sizeof(struct ib_rmpp_mad) - mad_send_wr->data_offset) * - (mad_send_wr->seg_num - 1); -} - static int send_next_seg(struct ib_mad_send_wr_private *mad_send_wr) { struct ib_rmpp_mad *rmpp_mad; int timeout; - u32 paylen; + u32 paylen = 0; rmpp_mad = mad_send_wr->send_buf.mad; ib_set_rmpp_flags(&rmpp_mad->rmpp_hdr, IB_MGMT_RMPP_FLAG_ACTIVE); - rmpp_mad->rmpp_hdr.seg_num = cpu_to_be32(mad_send_wr->seg_num); + rmpp_mad->rmpp_hdr.seg_num = cpu_to_be32(++mad_send_wr->seg_num); if (mad_send_wr->seg_num == 1) { rmpp_mad->rmpp_hdr.rmpp_rtime_flags |= IB_MGMT_RMPP_FLAG_FIRST; - paylen = mad_send_wr->total_seg * IB_MGMT_RMPP_DATA - + paylen = mad_send_wr->send_buf.seg_count * IB_MGMT_RMPP_DATA - mad_send_wr->pad; - rmpp_mad->rmpp_hdr.paylen_newwin = cpu_to_be32(paylen); - mad_send_wr->sg_list[0].length = sizeof(struct ib_rmpp_mad); - } else { - mad_send_wr->send_wr.num_sge = 2; - mad_send_wr->sg_list[0].length = mad_send_wr->data_offset; - mad_send_wr->sg_list[1].addr = get_seg_addr(mad_send_wr); - mad_send_wr->sg_list[1].length = sizeof(struct ib_rmpp_mad) - - mad_send_wr->data_offset; - mad_send_wr->sg_list[1].lkey = mad_send_wr->sg_list[0].lkey; - rmpp_mad->rmpp_hdr.paylen_newwin = 0; } - if (mad_send_wr->seg_num == mad_send_wr->total_seg) { + if (mad_send_wr->seg_num == mad_send_wr->send_buf.seg_count) { rmpp_mad->rmpp_hdr.rmpp_rtime_flags |= IB_MGMT_RMPP_FLAG_LAST; paylen = IB_MGMT_RMPP_DATA - mad_send_wr->pad; - rmpp_mad->rmpp_hdr.paylen_newwin = cpu_to_be32(paylen); } + rmpp_mad->rmpp_hdr.paylen_newwin = cpu_to_be32(paylen); /* 2 seconds for an ACK until we can find the packet lifetime */ timeout = mad_send_wr->send_buf.timeout_ms; if (!timeout || timeout > 2000) mad_send_wr->timeout = msecs_to_jiffies(2000); - mad_send_wr->seg_num++; + return ib_send_mad(mad_send_wr); } @@ -629,7 +574,7 @@ static void abort_send(struct ib_mad_agent_private *agent, __be64 tid, if (!mad_send_wr) goto out; /* Unmatched send */ - if ((mad_send_wr->last_ack == mad_send_wr->total_seg) || + if ((mad_send_wr->last_ack == mad_send_wr->send_buf.seg_count) || (!mad_send_wr->timeout) || (mad_send_wr->status != IB_WC_SUCCESS)) goto out; /* Send is already done */ @@ -645,6 +590,18 @@ out: spin_unlock_irqrestore(&agent->lock, flags); } +static inline void adjust_last_ack(struct ib_mad_send_wr_private *wr, + int seg_num) +{ + struct list_head *list; + + wr->last_ack = seg_num; + list = &wr->last_ack_seg->list; + list_for_each_entry(wr->last_ack_seg, list, list) + if (wr->last_ack_seg->num == seg_num) + break; +} + static void process_rmpp_ack(struct ib_mad_agent_private *agent, struct ib_mad_recv_wc *mad_recv_wc) { @@ -675,11 +632,12 @@ static void process_rmpp_ack(struct ib_mad_agent_private *agent, if (!mad_send_wr) goto out; /* Unmatched ACK */ - if ((mad_send_wr->last_ack == mad_send_wr->total_seg) || + if ((mad_send_wr->last_ack == mad_send_wr->send_buf.seg_count) || (!mad_send_wr->timeout) || (mad_send_wr->status != IB_WC_SUCCESS)) goto out; /* Send is already done */ - if (seg_num > mad_send_wr->total_seg || seg_num > mad_send_wr->newwin) { + if (seg_num > mad_send_wr->send_buf.seg_count || + seg_num > mad_send_wr->newwin) { spin_unlock_irqrestore(&agent->lock, flags); abort_send(agent, rmpp_mad->mad_hdr.tid, IB_MGMT_RMPP_STATUS_S2B); @@ -691,11 +649,11 @@ static void process_rmpp_ack(struct ib_mad_agent_private *agent, goto out; /* Old ACK */ if (seg_num > mad_send_wr->last_ack) { - mad_send_wr->last_ack = seg_num; + adjust_last_ack(mad_send_wr, seg_num); mad_send_wr->retries = mad_send_wr->send_buf.retries; } mad_send_wr->newwin = newwin; - if (mad_send_wr->last_ack == mad_send_wr->total_seg) { + if (mad_send_wr->last_ack == mad_send_wr->send_buf.seg_count) { /* If no response is expected, the ACK completes the send */ if (!mad_send_wr->send_buf.timeout_ms) { struct ib_mad_send_wc wc; @@ -714,7 +672,7 @@ static void process_rmpp_ack(struct ib_mad_agent_private *agent, mad_send_wr->send_buf.timeout_ms); } else if (mad_send_wr->refcount == 1 && mad_send_wr->seg_num < mad_send_wr->newwin && - mad_send_wr->seg_num <= mad_send_wr->total_seg) { + mad_send_wr->seg_num < mad_send_wr->send_buf.seg_count) { /* Send failure will just result in a timeout/retry */ ret = send_next_seg(mad_send_wr); if (ret) @@ -838,31 +796,19 @@ out: int ib_send_rmpp_mad(struct ib_mad_send_wr_private *mad_send_wr) { struct ib_rmpp_mad *rmpp_mad; - int i, total_len, ret; + int ret; rmpp_mad = mad_send_wr->send_buf.mad; if (!(ib_get_rmpp_flags(&rmpp_mad->rmpp_hdr) & IB_MGMT_RMPP_FLAG_ACTIVE)) return IB_RMPP_RESULT_UNHANDLED; - if (rmpp_mad->rmpp_hdr.rmpp_type != IB_MGMT_RMPP_TYPE_DATA) + if (rmpp_mad->rmpp_hdr.rmpp_type != IB_MGMT_RMPP_TYPE_DATA) { + mad_send_wr->seg_num = 1; return IB_RMPP_RESULT_INTERNAL; + } - if (mad_send_wr->send_wr.num_sge > 1) - return -EINVAL; /* TODO: support num_sge > 1 */ - - mad_send_wr->seg_num = 1; mad_send_wr->newwin = 1; - mad_send_wr->data_offset = data_offset(rmpp_mad->mad_hdr.mgmt_class); - - total_len = 0; - for (i = 0; i < mad_send_wr->send_wr.num_sge; i++) - total_len += mad_send_wr->send_wr.sg_list[i].length; - - mad_send_wr->total_seg = (total_len - mad_send_wr->data_offset) / - (sizeof(struct ib_rmpp_mad) - mad_send_wr->data_offset); - mad_send_wr->pad = total_len - IB_MGMT_RMPP_HDR - - be32_to_cpu(rmpp_mad->rmpp_hdr.paylen_newwin); /* We need to wait for the final ACK even if there isn't a response */ mad_send_wr->refcount += (mad_send_wr->timeout == 0); @@ -893,14 +839,14 @@ int ib_process_rmpp_send_wc(struct ib_mad_send_wr_private *mad_send_wr, if (!mad_send_wr->timeout) return IB_RMPP_RESULT_PROCESSED; /* Response received */ - if (mad_send_wr->last_ack == mad_send_wr->total_seg) { + if (mad_send_wr->last_ack == mad_send_wr->send_buf.seg_count) { mad_send_wr->timeout = msecs_to_jiffies(mad_send_wr->send_buf.timeout_ms); return IB_RMPP_RESULT_PROCESSED; /* Send done */ } - if (mad_send_wr->seg_num > mad_send_wr->newwin || - mad_send_wr->seg_num > mad_send_wr->total_seg) + if (mad_send_wr->seg_num == mad_send_wr->newwin || + mad_send_wr->seg_num == mad_send_wr->send_buf.seg_count) return IB_RMPP_RESULT_PROCESSED; /* Wait for ACK */ ret = send_next_seg(mad_send_wr); @@ -921,10 +867,12 @@ int ib_retry_rmpp(struct ib_mad_send_wr_private *mad_send_wr) IB_MGMT_RMPP_FLAG_ACTIVE)) return IB_RMPP_RESULT_UNHANDLED; /* RMPP not active */ - if (mad_send_wr->last_ack == mad_send_wr->total_seg) + if (mad_send_wr->last_ack == mad_send_wr->send_buf.seg_count) return IB_RMPP_RESULT_PROCESSED; - mad_send_wr->seg_num = mad_send_wr->last_ack + 1; + mad_send_wr->seg_num = mad_send_wr->last_ack; + mad_send_wr->cur_seg = mad_send_wr->last_ack_seg; + ret = send_next_seg(mad_send_wr); if (ret) return IB_RMPP_RESULT_PROCESSED; diff --git a/drivers/infiniband/core/smi.h b/drivers/infiniband/core/smi.h index 2b3c40198f8..3011bfd86dc 100644 --- a/drivers/infiniband/core/smi.h +++ b/drivers/infiniband/core/smi.h @@ -49,19 +49,16 @@ extern int smi_check_forward_dr_smp(struct ib_smp *smp); extern int smi_handle_dr_smp_send(struct ib_smp *smp, u8 node_type, int port_num); -extern int smi_check_local_dr_smp(struct ib_smp *smp, - struct ib_device *device, - int port_num); /* * Return 1 if the SMP should be handled by the local SMA/SM via process_mad */ -static inline int smi_check_local_smp(struct ib_mad_agent *mad_agent, - struct ib_smp *smp) +static inline int smi_check_local_smp(struct ib_smp *smp, + struct ib_device *device) { /* C14-9:3 -- We're at the end of the DR segment of path */ /* C14-9:4 -- Hop Pointer = Hop Count + 1 -> give to SMA/SM */ - return ((mad_agent->device->process_mad && + return ((device->process_mad && !ib_get_smp_direction(smp) && (smp->hop_ptr == smp->hop_cnt + 1))); } diff --git a/drivers/infiniband/core/sysfs.c b/drivers/infiniband/core/sysfs.c index 5982d687a00..15121cb5a1f 100644 --- a/drivers/infiniband/core/sysfs.c +++ b/drivers/infiniband/core/sysfs.c @@ -112,7 +112,7 @@ static ssize_t state_show(struct ib_port *p, struct port_attribute *unused, return ret; return sprintf(buf, "%d: %s\n", attr.state, - attr.state >= 0 && attr.state <= ARRAY_SIZE(state_name) ? + attr.state >= 0 && attr.state < ARRAY_SIZE(state_name) ? state_name[attr.state] : "UNKNOWN"); } @@ -472,8 +472,10 @@ alloc_group_attrs(ssize_t (*show)(struct ib_port *, goto err; if (snprintf(element->name, sizeof(element->name), - "%d", i) >= sizeof(element->name)) + "%d", i) >= sizeof(element->name)) { + kfree(element); goto err; + } element->attr.attr.name = element->name; element->attr.attr.mode = S_IRUGO; @@ -628,14 +630,42 @@ static ssize_t show_node_guid(struct class_device *cdev, char *buf) be16_to_cpu(((__be16 *) &dev->node_guid)[3])); } +static ssize_t show_node_desc(struct class_device *cdev, char *buf) +{ + struct ib_device *dev = container_of(cdev, struct ib_device, class_dev); + + return sprintf(buf, "%.64s\n", dev->node_desc); +} + +static ssize_t set_node_desc(struct class_device *cdev, const char *buf, + size_t count) +{ + struct ib_device *dev = container_of(cdev, struct ib_device, class_dev); + struct ib_device_modify desc = {}; + int ret; + + if (!dev->modify_device) + return -EIO; + + memcpy(desc.node_desc, buf, min_t(int, count, 64)); + ret = ib_modify_device(dev, IB_DEVICE_MODIFY_NODE_DESC, &desc); + if (ret) + return ret; + + return count; +} + static CLASS_DEVICE_ATTR(node_type, S_IRUGO, show_node_type, NULL); static CLASS_DEVICE_ATTR(sys_image_guid, S_IRUGO, show_sys_image_guid, NULL); static CLASS_DEVICE_ATTR(node_guid, S_IRUGO, show_node_guid, NULL); +static CLASS_DEVICE_ATTR(node_desc, S_IRUGO | S_IWUSR, show_node_desc, + set_node_desc); static struct class_device_attribute *ib_class_attributes[] = { &class_device_attr_node_type, &class_device_attr_sys_image_guid, - &class_device_attr_node_guid + &class_device_attr_node_guid, + &class_device_attr_node_desc }; static struct class ib_class = { diff --git a/drivers/infiniband/core/user_mad.c b/drivers/infiniband/core/user_mad.c index c908de8db5a..fb6cd42601f 100644 --- a/drivers/infiniband/core/user_mad.c +++ b/drivers/infiniband/core/user_mad.c @@ -31,7 +31,7 @@ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * - * $Id: user_mad.c 4010 2005-11-09 23:11:56Z roland $ + * $Id: user_mad.c 5596 2006-03-03 01:00:07Z sean.hefty $ */ #include <linux/module.h> @@ -121,6 +121,7 @@ struct ib_umad_file { struct ib_umad_packet { struct ib_mad_send_buf *msg; + struct ib_mad_recv_wc *recv_wc; struct list_head list; int length; struct ib_user_mad mad; @@ -176,31 +177,32 @@ static int queue_packet(struct ib_umad_file *file, return ret; } +static int data_offset(u8 mgmt_class) +{ + if (mgmt_class == IB_MGMT_CLASS_SUBN_ADM) + return IB_MGMT_SA_HDR; + else if ((mgmt_class >= IB_MGMT_CLASS_VENDOR_RANGE2_START) && + (mgmt_class <= IB_MGMT_CLASS_VENDOR_RANGE2_END)) + return IB_MGMT_VENDOR_HDR; + else + return IB_MGMT_RMPP_HDR; +} + static void send_handler(struct ib_mad_agent *agent, struct ib_mad_send_wc *send_wc) { struct ib_umad_file *file = agent->context; - struct ib_umad_packet *timeout; struct ib_umad_packet *packet = send_wc->send_buf->context[0]; ib_destroy_ah(packet->msg->ah); ib_free_send_mad(packet->msg); if (send_wc->status == IB_WC_RESP_TIMEOUT_ERR) { - timeout = kzalloc(sizeof *timeout + IB_MGMT_MAD_HDR, GFP_KERNEL); - if (!timeout) - goto out; - - timeout->length = IB_MGMT_MAD_HDR; - timeout->mad.hdr.id = packet->mad.hdr.id; - timeout->mad.hdr.status = ETIMEDOUT; - memcpy(timeout->mad.data, packet->mad.data, - sizeof (struct ib_mad_hdr)); - - if (queue_packet(file, agent, timeout)) - kfree(timeout); + packet->length = IB_MGMT_MAD_HDR; + packet->mad.hdr.status = ETIMEDOUT; + if (!queue_packet(file, agent, packet)) + return; } -out: kfree(packet); } @@ -209,22 +211,20 @@ static void recv_handler(struct ib_mad_agent *agent, { struct ib_umad_file *file = agent->context; struct ib_umad_packet *packet; - int length; if (mad_recv_wc->wc->status != IB_WC_SUCCESS) - goto out; + goto err1; - length = mad_recv_wc->mad_len; - packet = kzalloc(sizeof *packet + length, GFP_KERNEL); + packet = kzalloc(sizeof *packet, GFP_KERNEL); if (!packet) - goto out; + goto err1; - packet->length = length; - - ib_coalesce_recv_mad(mad_recv_wc, packet->mad.data); + packet->length = mad_recv_wc->mad_len; + packet->recv_wc = mad_recv_wc; packet->mad.hdr.status = 0; - packet->mad.hdr.length = length + sizeof (struct ib_user_mad); + packet->mad.hdr.length = sizeof (struct ib_user_mad) + + mad_recv_wc->mad_len; packet->mad.hdr.qpn = cpu_to_be32(mad_recv_wc->wc->src_qp); packet->mad.hdr.lid = cpu_to_be16(mad_recv_wc->wc->slid); packet->mad.hdr.sl = mad_recv_wc->wc->sl; @@ -240,12 +240,79 @@ static void recv_handler(struct ib_mad_agent *agent, } if (queue_packet(file, agent, packet)) - kfree(packet); + goto err2; + return; -out: +err2: + kfree(packet); +err1: ib_free_recv_mad(mad_recv_wc); } +static ssize_t copy_recv_mad(char __user *buf, struct ib_umad_packet *packet, + size_t count) +{ + struct ib_mad_recv_buf *recv_buf; + int left, seg_payload, offset, max_seg_payload; + + /* We need enough room to copy the first (or only) MAD segment. */ + recv_buf = &packet->recv_wc->recv_buf; + if ((packet->length <= sizeof (*recv_buf->mad) && + count < sizeof (packet->mad) + packet->length) || + (packet->length > sizeof (*recv_buf->mad) && + count < sizeof (packet->mad) + sizeof (*recv_buf->mad))) + return -EINVAL; + + if (copy_to_user(buf, &packet->mad, sizeof (packet->mad))) + return -EFAULT; + + buf += sizeof (packet->mad); + seg_payload = min_t(int, packet->length, sizeof (*recv_buf->mad)); + if (copy_to_user(buf, recv_buf->mad, seg_payload)) + return -EFAULT; + + if (seg_payload < packet->length) { + /* + * Multipacket RMPP MAD message. Copy remainder of message. + * Note that last segment may have a shorter payload. + */ + if (count < sizeof (packet->mad) + packet->length) { + /* + * The buffer is too small, return the first RMPP segment, + * which includes the RMPP message length. + */ + return -ENOSPC; + } + offset = data_offset(recv_buf->mad->mad_hdr.mgmt_class); + max_seg_payload = sizeof (struct ib_mad) - offset; + + for (left = packet->length - seg_payload, buf += seg_payload; + left; left -= seg_payload, buf += seg_payload) { + recv_buf = container_of(recv_buf->list.next, + struct ib_mad_recv_buf, list); + seg_payload = min(left, max_seg_payload); + if (copy_to_user(buf, ((void *) recv_buf->mad) + offset, + seg_payload)) + return -EFAULT; + } + } + return sizeof (packet->mad) + packet->length; +} + +static ssize_t copy_send_mad(char __user *buf, struct ib_umad_packet *packet, + size_t count) +{ + ssize_t size = sizeof (packet->mad) + packet->length; + + if (count < size) + return -EINVAL; + + if (copy_to_user(buf, &packet->mad, size)) + return -EFAULT; + + return size; +} + static ssize_t ib_umad_read(struct file *filp, char __user *buf, size_t count, loff_t *pos) { @@ -253,7 +320,7 @@ static ssize_t ib_umad_read(struct file *filp, char __user *buf, struct ib_umad_packet *packet; ssize_t ret; - if (count < sizeof (struct ib_user_mad) + sizeof (struct ib_mad)) + if (count < sizeof (struct ib_user_mad)) return -EINVAL; spin_lock_irq(&file->recv_lock); @@ -276,28 +343,44 @@ static ssize_t ib_umad_read(struct file *filp, char __user *buf, spin_unlock_irq(&file->recv_lock); - if (count < packet->length + sizeof (struct ib_user_mad)) { - /* Return length needed (and first RMPP segment) if too small */ - if (copy_to_user(buf, &packet->mad, - sizeof (struct ib_user_mad) + sizeof (struct ib_mad))) - ret = -EFAULT; - else - ret = -ENOSPC; - } else if (copy_to_user(buf, &packet->mad, - packet->length + sizeof (struct ib_user_mad))) - ret = -EFAULT; + if (packet->recv_wc) + ret = copy_recv_mad(buf, packet, count); else - ret = packet->length + sizeof (struct ib_user_mad); + ret = copy_send_mad(buf, packet, count); + if (ret < 0) { /* Requeue packet */ spin_lock_irq(&file->recv_lock); list_add(&packet->list, &file->recv_list); spin_unlock_irq(&file->recv_lock); - } else + } else { + if (packet->recv_wc) + ib_free_recv_mad(packet->recv_wc); kfree(packet); + } return ret; } +static int copy_rmpp_mad(struct ib_mad_send_buf *msg, const char __user *buf) +{ + int left, seg; + + /* Copy class specific header */ + if ((msg->hdr_len > IB_MGMT_RMPP_HDR) && + copy_from_user(msg->mad + IB_MGMT_RMPP_HDR, buf + IB_MGMT_RMPP_HDR, + msg->hdr_len - IB_MGMT_RMPP_HDR)) + return -EFAULT; + + /* All headers are in place. Copy data segments. */ + for (seg = 1, left = msg->data_len, buf += msg->hdr_len; left > 0; + seg++, left -= msg->seg_size, buf += msg->seg_size) { + if (copy_from_user(ib_get_rmpp_segment(msg, seg), buf, + min(left, msg->seg_size))) + return -EFAULT; + } + return 0; +} + static ssize_t ib_umad_write(struct file *filp, const char __user *buf, size_t count, loff_t *pos) { @@ -309,14 +392,12 @@ static ssize_t ib_umad_write(struct file *filp, const char __user *buf, struct ib_rmpp_mad *rmpp_mad; u8 method; __be64 *tid; - int ret, length, hdr_len, copy_offset; - int rmpp_active, has_rmpp_header; + int ret, data_len, hdr_len, copy_offset, rmpp_active; if (count < sizeof (struct ib_user_mad) + IB_MGMT_RMPP_HDR) return -EINVAL; - length = count - sizeof (struct ib_user_mad); - packet = kmalloc(sizeof *packet + IB_MGMT_RMPP_HDR, GFP_KERNEL); + packet = kzalloc(sizeof *packet + IB_MGMT_RMPP_HDR, GFP_KERNEL); if (!packet) return -ENOMEM; @@ -363,35 +444,25 @@ static ssize_t ib_umad_write(struct file *filp, const char __user *buf, if (rmpp_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_ADM) { hdr_len = IB_MGMT_SA_HDR; copy_offset = IB_MGMT_RMPP_HDR; - has_rmpp_header = 1; + rmpp_active = ib_get_rmpp_flags(&rmpp_mad->rmpp_hdr) & + IB_MGMT_RMPP_FLAG_ACTIVE; } else if (rmpp_mad->mad_hdr.mgmt_class >= IB_MGMT_CLASS_VENDOR_RANGE2_START && rmpp_mad->mad_hdr.mgmt_class <= IB_MGMT_CLASS_VENDOR_RANGE2_END) { - hdr_len = IB_MGMT_VENDOR_HDR; - copy_offset = IB_MGMT_RMPP_HDR; - has_rmpp_header = 1; + hdr_len = IB_MGMT_VENDOR_HDR; + copy_offset = IB_MGMT_RMPP_HDR; + rmpp_active = ib_get_rmpp_flags(&rmpp_mad->rmpp_hdr) & + IB_MGMT_RMPP_FLAG_ACTIVE; } else { hdr_len = IB_MGMT_MAD_HDR; copy_offset = IB_MGMT_MAD_HDR; - has_rmpp_header = 0; - } - - if (has_rmpp_header) - rmpp_active = ib_get_rmpp_flags(&rmpp_mad->rmpp_hdr) & - IB_MGMT_RMPP_FLAG_ACTIVE; - else rmpp_active = 0; - - /* Validate that the management class can support RMPP */ - if (rmpp_active && !agent->rmpp_version) { - ret = -EINVAL; - goto err_ah; } + data_len = count - sizeof (struct ib_user_mad) - hdr_len; packet->msg = ib_create_send_mad(agent, be32_to_cpu(packet->mad.hdr.qpn), - 0, rmpp_active, - hdr_len, length - hdr_len, - GFP_KERNEL); + 0, rmpp_active, hdr_len, + data_len, GFP_KERNEL); if (IS_ERR(packet->msg)) { ret = PTR_ERR(packet->msg); goto err_ah; @@ -402,14 +473,21 @@ static ssize_t ib_umad_write(struct file *filp, const char __user *buf, packet->msg->retries = packet->mad.hdr.retries; packet->msg->context[0] = packet; - /* Copy MAD headers (RMPP header in place) */ + /* Copy MAD header. Any RMPP header is already in place. */ memcpy(packet->msg->mad, packet->mad.data, IB_MGMT_MAD_HDR); - /* Now, copy rest of message from user into send buffer */ - if (copy_from_user(packet->msg->mad + copy_offset, - buf + sizeof (struct ib_user_mad) + copy_offset, - length - copy_offset)) { - ret = -EFAULT; - goto err_msg; + buf += sizeof (struct ib_user_mad); + + if (!rmpp_active) { + if (copy_from_user(packet->msg->mad + copy_offset, + buf + copy_offset, + hdr_len + data_len - copy_offset)) { + ret = -EFAULT; + goto err_msg; + } + } else { + ret = copy_rmpp_mad(packet->msg, buf); + if (ret) + goto err_msg; } /* @@ -433,18 +511,14 @@ static ssize_t ib_umad_write(struct file *filp, const char __user *buf, goto err_msg; up_read(&file->port->mutex); - return count; err_msg: ib_free_send_mad(packet->msg); - err_ah: ib_destroy_ah(ah); - err_up: up_read(&file->port->mutex); - err: kfree(packet); return ret; @@ -627,8 +701,11 @@ static int ib_umad_close(struct inode *inode, struct file *filp) already_dead = file->agents_dead; file->agents_dead = 1; - list_for_each_entry_safe(packet, tmp, &file->recv_list, list) + list_for_each_entry_safe(packet, tmp, &file->recv_list, list) { + if (packet->recv_wc) + ib_free_recv_mad(packet->recv_wc); kfree(packet); + } list_del(&file->port_list); diff --git a/drivers/infiniband/core/uverbs.h b/drivers/infiniband/core/uverbs.h index f7eecbc6af6..3372d67ff13 100644 --- a/drivers/infiniband/core/uverbs.h +++ b/drivers/infiniband/core/uverbs.h @@ -1,6 +1,6 @@ /* * Copyright (c) 2005 Topspin Communications. All rights reserved. - * Copyright (c) 2005 Cisco Systems. All rights reserved. + * Copyright (c) 2005, 2006 Cisco Systems. All rights reserved. * Copyright (c) 2005 Mellanox Technologies. All rights reserved. * Copyright (c) 2005 Voltaire, Inc. All rights reserved. * Copyright (c) 2005 PathScale, Inc. All rights reserved. @@ -178,10 +178,12 @@ IB_UVERBS_DECLARE_CMD(reg_mr); IB_UVERBS_DECLARE_CMD(dereg_mr); IB_UVERBS_DECLARE_CMD(create_comp_channel); IB_UVERBS_DECLARE_CMD(create_cq); +IB_UVERBS_DECLARE_CMD(resize_cq); IB_UVERBS_DECLARE_CMD(poll_cq); IB_UVERBS_DECLARE_CMD(req_notify_cq); IB_UVERBS_DECLARE_CMD(destroy_cq); IB_UVERBS_DECLARE_CMD(create_qp); +IB_UVERBS_DECLARE_CMD(query_qp); IB_UVERBS_DECLARE_CMD(modify_qp); IB_UVERBS_DECLARE_CMD(destroy_qp); IB_UVERBS_DECLARE_CMD(post_send); @@ -193,6 +195,7 @@ IB_UVERBS_DECLARE_CMD(attach_mcast); IB_UVERBS_DECLARE_CMD(detach_mcast); IB_UVERBS_DECLARE_CMD(create_srq); IB_UVERBS_DECLARE_CMD(modify_srq); +IB_UVERBS_DECLARE_CMD(query_srq); IB_UVERBS_DECLARE_CMD(destroy_srq); #endif /* UVERBS_H */ diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index 407b6284d7d..9f69bd48eb1 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -1,7 +1,8 @@ /* * Copyright (c) 2005 Topspin Communications. All rights reserved. - * Copyright (c) 2005 Cisco Systems. All rights reserved. + * Copyright (c) 2005, 2006 Cisco Systems. All rights reserved. * Copyright (c) 2005 PathScale, Inc. All rights reserved. + * Copyright (c) 2006 Mellanox Technologies. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -675,6 +676,46 @@ err: return ret; } +ssize_t ib_uverbs_resize_cq(struct ib_uverbs_file *file, + const char __user *buf, int in_len, + int out_len) +{ + struct ib_uverbs_resize_cq cmd; + struct ib_uverbs_resize_cq_resp resp; + struct ib_udata udata; + struct ib_cq *cq; + int ret = -EINVAL; + + if (copy_from_user(&cmd, buf, sizeof cmd)) + return -EFAULT; + + INIT_UDATA(&udata, buf + sizeof cmd, + (unsigned long) cmd.response + sizeof resp, + in_len - sizeof cmd, out_len - sizeof resp); + + mutex_lock(&ib_uverbs_idr_mutex); + + cq = idr_find(&ib_uverbs_cq_idr, cmd.cq_handle); + if (!cq || cq->uobject->context != file->ucontext || !cq->device->resize_cq) + goto out; + + ret = cq->device->resize_cq(cq, cmd.cqe, &udata); + if (ret) + goto out; + + memset(&resp, 0, sizeof resp); + resp.cqe = cq->cqe; + + if (copy_to_user((void __user *) (unsigned long) cmd.response, + &resp, sizeof resp)) + ret = -EFAULT; + +out: + mutex_unlock(&ib_uverbs_idr_mutex); + + return ret ? ret : in_len; +} + ssize_t ib_uverbs_poll_cq(struct ib_uverbs_file *file, const char __user *buf, int in_len, int out_len) @@ -956,6 +997,106 @@ err_up: return ret; } +ssize_t ib_uverbs_query_qp(struct ib_uverbs_file *file, + const char __user *buf, int in_len, + int out_len) +{ + struct ib_uverbs_query_qp cmd; + struct ib_uverbs_query_qp_resp resp; + struct ib_qp *qp; + struct ib_qp_attr *attr; + struct ib_qp_init_attr *init_attr; + int ret; + + if (copy_from_user(&cmd, buf, sizeof cmd)) + return -EFAULT; + + attr = kmalloc(sizeof *attr, GFP_KERNEL); + init_attr = kmalloc(sizeof *init_attr, GFP_KERNEL); + if (!attr || !init_attr) { + ret = -ENOMEM; + goto out; + } + + mutex_lock(&ib_uverbs_idr_mutex); + + qp = idr_find(&ib_uverbs_qp_idr, cmd.qp_handle); + if (qp && qp->uobject->context == file->ucontext) + ret = ib_query_qp(qp, attr, cmd.attr_mask, init_attr); + else + ret = -EINVAL; + + mutex_unlock(&ib_uverbs_idr_mutex); + + if (ret) + goto out; + + memset(&resp, 0, sizeof resp); + + resp.qp_state = attr->qp_state; + resp.cur_qp_state = attr->cur_qp_state; + resp.path_mtu = attr->path_mtu; + resp.path_mig_state = attr->path_mig_state; + resp.qkey = attr->qkey; + resp.rq_psn = attr->rq_psn; + resp.sq_psn = attr->sq_psn; + resp.dest_qp_num = attr->dest_qp_num; + resp.qp_access_flags = attr->qp_access_flags; + resp.pkey_index = attr->pkey_index; + resp.alt_pkey_index = attr->alt_pkey_index; + resp.en_sqd_async_notify = attr->en_sqd_async_notify; + resp.max_rd_atomic = attr->max_rd_atomic; + resp.max_dest_rd_atomic = attr->max_dest_rd_atomic; + resp.min_rnr_timer = attr->min_rnr_timer; + resp.port_num = attr->port_num; + resp.timeout = attr->timeout; + resp.retry_cnt = attr->retry_cnt; + resp.rnr_retry = attr->rnr_retry; + resp.alt_port_num = attr->alt_port_num; + resp.alt_timeout = attr->alt_timeout; + + memcpy(resp.dest.dgid, attr->ah_attr.grh.dgid.raw, 16); + resp.dest.flow_label = attr->ah_attr.grh.flow_label; + resp.dest.sgid_index = attr->ah_attr.grh.sgid_index; + resp.dest.hop_limit = attr->ah_attr.grh.hop_limit; + resp.dest.traffic_class = attr->ah_attr.grh.traffic_class; + resp.dest.dlid = attr->ah_attr.dlid; + resp.dest.sl = attr->ah_attr.sl; + resp.dest.src_path_bits = attr->ah_attr.src_path_bits; + resp.dest.static_rate = attr->ah_attr.static_rate; + resp.dest.is_global = !!(attr->ah_attr.ah_flags & IB_AH_GRH); + resp.dest.port_num = attr->ah_attr.port_num; + + memcpy(resp.alt_dest.dgid, attr->alt_ah_attr.grh.dgid.raw, 16); + resp.alt_dest.flow_label = attr->alt_ah_attr.grh.flow_label; + resp.alt_dest.sgid_index = attr->alt_ah_attr.grh.sgid_index; + resp.alt_dest.hop_limit = attr->alt_ah_attr.grh.hop_limit; + resp.alt_dest.traffic_class = attr->alt_ah_attr.grh.traffic_class; + resp.alt_dest.dlid = attr->alt_ah_attr.dlid; + resp.alt_dest.sl = attr->alt_ah_attr.sl; + resp.alt_dest.src_path_bits = attr->alt_ah_attr.src_path_bits; + resp.alt_dest.static_rate = attr->alt_ah_attr.static_rate; + resp.alt_dest.is_global = !!(attr->alt_ah_attr.ah_flags & IB_AH_GRH); + resp.alt_dest.port_num = attr->alt_ah_attr.port_num; + + resp.max_send_wr = init_attr->cap.max_send_wr; + resp.max_recv_wr = init_attr->cap.max_recv_wr; + resp.max_send_sge = init_attr->cap.max_send_sge; + resp.max_recv_sge = init_attr->cap.max_recv_sge; + resp.max_inline_data = init_attr->cap.max_inline_data; + resp.sq_sig_all = init_attr->sq_sig_type == IB_SIGNAL_ALL_WR; + + if (copy_to_user((void __user *) (unsigned long) cmd.response, + &resp, sizeof resp)) + ret = -EFAULT; + +out: + kfree(attr); + kfree(init_attr); + + return ret ? ret : in_len; +} + ssize_t ib_uverbs_modify_qp(struct ib_uverbs_file *file, const char __user *buf, int in_len, int out_len) @@ -990,7 +1131,7 @@ ssize_t ib_uverbs_modify_qp(struct ib_uverbs_file *file, attr->dest_qp_num = cmd.dest_qp_num; attr->qp_access_flags = cmd.qp_access_flags; attr->pkey_index = cmd.pkey_index; - attr->alt_pkey_index = cmd.pkey_index; + attr->alt_pkey_index = cmd.alt_pkey_index; attr->en_sqd_async_notify = cmd.en_sqd_async_notify; attr->max_rd_atomic = cmd.max_rd_atomic; attr->max_dest_rd_atomic = cmd.max_dest_rd_atomic; @@ -1094,8 +1235,8 @@ out: } ssize_t ib_uverbs_post_send(struct ib_uverbs_file *file, - const char __user *buf, int in_len, - int out_len) + const char __user *buf, int in_len, + int out_len) { struct ib_uverbs_post_send cmd; struct ib_uverbs_post_send_resp resp; @@ -1323,8 +1464,8 @@ err: } ssize_t ib_uverbs_post_recv(struct ib_uverbs_file *file, - const char __user *buf, int in_len, - int out_len) + const char __user *buf, int in_len, + int out_len) { struct ib_uverbs_post_recv cmd; struct ib_uverbs_post_recv_resp resp; @@ -1374,8 +1515,8 @@ out: } ssize_t ib_uverbs_post_srq_recv(struct ib_uverbs_file *file, - const char __user *buf, int in_len, - int out_len) + const char __user *buf, int in_len, + int out_len) { struct ib_uverbs_post_srq_recv cmd; struct ib_uverbs_post_srq_recv_resp resp; @@ -1723,6 +1864,8 @@ retry: goto err_destroy; resp.srq_handle = uobj->uobject.id; + resp.max_wr = attr.attr.max_wr; + resp.max_sge = attr.attr.max_sge; if (copy_to_user((void __user *) (unsigned long) cmd.response, &resp, sizeof resp)) { @@ -1783,6 +1926,49 @@ out: return ret ? ret : in_len; } +ssize_t ib_uverbs_query_srq(struct ib_uverbs_file *file, + const char __user *buf, + int in_len, int out_len) +{ + struct ib_uverbs_query_srq cmd; + struct ib_uverbs_query_srq_resp resp; + struct ib_srq_attr attr; + struct ib_srq *srq; + int ret; + + if (out_len < sizeof resp) + return -ENOSPC; + + if (copy_from_user(&cmd, buf, sizeof cmd)) + return -EFAULT; + + mutex_lock(&ib_uverbs_idr_mutex); + + srq = idr_find(&ib_uverbs_srq_idr, cmd.srq_handle); + if (srq && srq->uobject->context == file->ucontext) + ret = ib_query_srq(srq, &attr); + else + ret = -EINVAL; + + mutex_unlock(&ib_uverbs_idr_mutex); + + if (ret) + goto out; + + memset(&resp, 0, sizeof resp); + + resp.max_wr = attr.max_wr; + resp.max_sge = attr.max_sge; + resp.srq_limit = attr.srq_limit; + + if (copy_to_user((void __user *) (unsigned long) cmd.response, + &resp, sizeof resp)) + ret = -EFAULT; + +out: + return ret ? ret : in_len; +} + ssize_t ib_uverbs_destroy_srq(struct ib_uverbs_file *file, const char __user *buf, int in_len, int out_len) diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c index 903f85a4bc0..ff092a0a94d 100644 --- a/drivers/infiniband/core/uverbs_main.c +++ b/drivers/infiniband/core/uverbs_main.c @@ -1,6 +1,6 @@ /* * Copyright (c) 2005 Topspin Communications. All rights reserved. - * Copyright (c) 2005 Cisco Systems. All rights reserved. + * Copyright (c) 2005, 2006 Cisco Systems. All rights reserved. * Copyright (c) 2005 Mellanox Technologies. All rights reserved. * Copyright (c) 2005 Voltaire, Inc. All rights reserved. * Copyright (c) 2005 PathScale, Inc. All rights reserved. @@ -91,10 +91,12 @@ static ssize_t (*uverbs_cmd_table[])(struct ib_uverbs_file *file, [IB_USER_VERBS_CMD_DEREG_MR] = ib_uverbs_dereg_mr, [IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL] = ib_uverbs_create_comp_channel, [IB_USER_VERBS_CMD_CREATE_CQ] = ib_uverbs_create_cq, + [IB_USER_VERBS_CMD_RESIZE_CQ] = ib_uverbs_resize_cq, [IB_USER_VERBS_CMD_POLL_CQ] = ib_uverbs_poll_cq, [IB_USER_VERBS_CMD_REQ_NOTIFY_CQ] = ib_uverbs_req_notify_cq, [IB_USER_VERBS_CMD_DESTROY_CQ] = ib_uverbs_destroy_cq, [IB_USER_VERBS_CMD_CREATE_QP] = ib_uverbs_create_qp, + [IB_USER_VERBS_CMD_QUERY_QP] = ib_uverbs_query_qp, [IB_USER_VERBS_CMD_MODIFY_QP] = ib_uverbs_modify_qp, [IB_USER_VERBS_CMD_DESTROY_QP] = ib_uverbs_destroy_qp, [IB_USER_VERBS_CMD_POST_SEND] = ib_uverbs_post_send, @@ -106,6 +108,7 @@ static ssize_t (*uverbs_cmd_table[])(struct ib_uverbs_file *file, [IB_USER_VERBS_CMD_DETACH_MCAST] = ib_uverbs_detach_mcast, [IB_USER_VERBS_CMD_CREATE_SRQ] = ib_uverbs_create_srq, [IB_USER_VERBS_CMD_MODIFY_SRQ] = ib_uverbs_modify_srq, + [IB_USER_VERBS_CMD_QUERY_SRQ] = ib_uverbs_query_srq, [IB_USER_VERBS_CMD_DESTROY_SRQ] = ib_uverbs_destroy_srq, }; @@ -461,7 +464,6 @@ void ib_uverbs_cq_event_handler(struct ib_event *event, void *context_ptr) ib_uverbs_async_handler(uobj->uverbs_file, uobj->uobject.user_handle, event->event, &uobj->async_list, &uobj->async_events_reported); - } void ib_uverbs_qp_event_handler(struct ib_event *event, void *context_ptr) diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index c857361be44..cae0845f472 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -5,7 +5,7 @@ * Copyright (c) 2004 Topspin Corporation. All rights reserved. * Copyright (c) 2004 Voltaire Corporation. All rights reserved. * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. - * Copyright (c) 2005 Cisco Systems. All rights reserved. + * Copyright (c) 2005, 2006 Cisco Systems. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -245,6 +245,258 @@ struct ib_qp *ib_create_qp(struct ib_pd *pd, } EXPORT_SYMBOL(ib_create_qp); +static const struct { + int valid; + enum ib_qp_attr_mask req_param[IB_QPT_RAW_ETY + 1]; + enum ib_qp_attr_mask opt_param[IB_QPT_RAW_ETY + 1]; +} qp_state_table[IB_QPS_ERR + 1][IB_QPS_ERR + 1] = { + [IB_QPS_RESET] = { + [IB_QPS_RESET] = { .valid = 1 }, + [IB_QPS_ERR] = { .valid = 1 }, + [IB_QPS_INIT] = { + .valid = 1, + .req_param = { + [IB_QPT_UD] = (IB_QP_PKEY_INDEX | + IB_QP_PORT | + IB_QP_QKEY), + [IB_QPT_UC] = (IB_QP_PKEY_INDEX | + IB_QP_PORT | + IB_QP_ACCESS_FLAGS), + [IB_QPT_RC] = (IB_QP_PKEY_INDEX | + IB_QP_PORT | + IB_QP_ACCESS_FLAGS), + [IB_QPT_SMI] = (IB_QP_PKEY_INDEX | + IB_QP_QKEY), + [IB_QPT_GSI] = (IB_QP_PKEY_INDEX | + IB_QP_QKEY), + } + }, + }, + [IB_QPS_INIT] = { + [IB_QPS_RESET] = { .valid = 1 }, + [IB_QPS_ERR] = { .valid = 1 }, + [IB_QPS_INIT] = { + .valid = 1, + .opt_param = { + [IB_QPT_UD] = (IB_QP_PKEY_INDEX | + IB_QP_PORT | + IB_QP_QKEY), + [IB_QPT_UC] = (IB_QP_PKEY_INDEX | + IB_QP_PORT | + IB_QP_ACCESS_FLAGS), + [IB_QPT_RC] = (IB_QP_PKEY_INDEX | + IB_QP_PORT | + IB_QP_ACCESS_FLAGS), + [IB_QPT_SMI] = (IB_QP_PKEY_INDEX | + IB_QP_QKEY), + [IB_QPT_GSI] = (IB_QP_PKEY_INDEX | + IB_QP_QKEY), + } + }, + [IB_QPS_RTR] = { + .valid = 1, + .req_param = { + [IB_QPT_UC] = (IB_QP_AV | + IB_QP_PATH_MTU | + IB_QP_DEST_QPN | + IB_QP_RQ_PSN), + [IB_QPT_RC] = (IB_QP_AV | + IB_QP_PATH_MTU | + IB_QP_DEST_QPN | + IB_QP_RQ_PSN | + IB_QP_MAX_DEST_RD_ATOMIC | + IB_QP_MIN_RNR_TIMER), + }, + .opt_param = { + [IB_QPT_UD] = (IB_QP_PKEY_INDEX | + IB_QP_QKEY), + [IB_QPT_UC] = (IB_QP_ALT_PATH | + IB_QP_ACCESS_FLAGS | + IB_QP_PKEY_INDEX), + [IB_QPT_RC] = (IB_QP_ALT_PATH | + IB_QP_ACCESS_FLAGS | + IB_QP_PKEY_INDEX), + [IB_QPT_SMI] = (IB_QP_PKEY_INDEX | + IB_QP_QKEY), + [IB_QPT_GSI] = (IB_QP_PKEY_INDEX | + IB_QP_QKEY), + } + } + }, + [IB_QPS_RTR] = { + [IB_QPS_RESET] = { .valid = 1 }, + [IB_QPS_ERR] = { .valid = 1 }, + [IB_QPS_RTS] = { + .valid = 1, + .req_param = { + [IB_QPT_UD] = IB_QP_SQ_PSN, + [IB_QPT_UC] = IB_QP_SQ_PSN, + [IB_QPT_RC] = (IB_QP_TIMEOUT | + IB_QP_RETRY_CNT | + IB_QP_RNR_RETRY | + IB_QP_SQ_PSN | + IB_QP_MAX_QP_RD_ATOMIC), + [IB_QPT_SMI] = IB_QP_SQ_PSN, + [IB_QPT_GSI] = IB_QP_SQ_PSN, + }, + .opt_param = { + [IB_QPT_UD] = (IB_QP_CUR_STATE | + IB_QP_QKEY), + [IB_QPT_UC] = (IB_QP_CUR_STATE | + IB_QP_ALT_PATH | + IB_QP_ACCESS_FLAGS | + IB_QP_PATH_MIG_STATE), + [IB_QPT_RC] = (IB_QP_CUR_STATE | + IB_QP_ALT_PATH | + IB_QP_ACCESS_FLAGS | + IB_QP_MIN_RNR_TIMER | + IB_QP_PATH_MIG_STATE), + [IB_QPT_SMI] = (IB_QP_CUR_STATE | + IB_QP_QKEY), + [IB_QPT_GSI] = (IB_QP_CUR_STATE | + IB_QP_QKEY), + } + } + }, + [IB_QPS_RTS] = { + [IB_QPS_RESET] = { .valid = 1 }, + [IB_QPS_ERR] = { .valid = 1 }, + [IB_QPS_RTS] = { + .valid = 1, + .opt_param = { + [IB_QPT_UD] = (IB_QP_CUR_STATE | + IB_QP_QKEY), + [IB_QPT_UC] = (IB_QP_CUR_STATE | + IB_QP_ACCESS_FLAGS | + IB_QP_ALT_PATH | + IB_QP_PATH_MIG_STATE), + [IB_QPT_RC] = (IB_QP_CUR_STATE | + IB_QP_ACCESS_FLAGS | + IB_QP_ALT_PATH | + IB_QP_PATH_MIG_STATE | + IB_QP_MIN_RNR_TIMER), + [IB_QPT_SMI] = (IB_QP_CUR_STATE | + IB_QP_QKEY), + [IB_QPT_GSI] = (IB_QP_CUR_STATE | + IB_QP_QKEY), + } + }, + [IB_QPS_SQD] = { + .valid = 1, + .opt_param = { + [IB_QPT_UD] = IB_QP_EN_SQD_ASYNC_NOTIFY, + [IB_QPT_UC] = IB_QP_EN_SQD_ASYNC_NOTIFY, + [IB_QPT_RC] = IB_QP_EN_SQD_ASYNC_NOTIFY, + [IB_QPT_SMI] = IB_QP_EN_SQD_ASYNC_NOTIFY, + [IB_QPT_GSI] = IB_QP_EN_SQD_ASYNC_NOTIFY + } + }, + }, + [IB_QPS_SQD] = { + [IB_QPS_RESET] = { .valid = 1 }, + [IB_QPS_ERR] = { .valid = 1 }, + [IB_QPS_RTS] = { + .valid = 1, + .opt_param = { + [IB_QPT_UD] = (IB_QP_CUR_STATE | + IB_QP_QKEY), + [IB_QPT_UC] = (IB_QP_CUR_STATE | + IB_QP_ALT_PATH | + IB_QP_ACCESS_FLAGS | + IB_QP_PATH_MIG_STATE), + [IB_QPT_RC] = (IB_QP_CUR_STATE | + IB_QP_ALT_PATH | + IB_QP_ACCESS_FLAGS | + IB_QP_MIN_RNR_TIMER | + IB_QP_PATH_MIG_STATE), + [IB_QPT_SMI] = (IB_QP_CUR_STATE | + IB_QP_QKEY), + [IB_QPT_GSI] = (IB_QP_CUR_STATE | + IB_QP_QKEY), + } + }, + [IB_QPS_SQD] = { + .valid = 1, + .opt_param = { + [IB_QPT_UD] = (IB_QP_PKEY_INDEX | + IB_QP_QKEY), + [IB_QPT_UC] = (IB_QP_AV | + IB_QP_ALT_PATH | + IB_QP_ACCESS_FLAGS | + IB_QP_PKEY_INDEX | + IB_QP_PATH_MIG_STATE), + [IB_QPT_RC] = (IB_QP_PORT | + IB_QP_AV | + IB_QP_TIMEOUT | + IB_QP_RETRY_CNT | + IB_QP_RNR_RETRY | + IB_QP_MAX_QP_RD_ATOMIC | + IB_QP_MAX_DEST_RD_ATOMIC | + IB_QP_ALT_PATH | + IB_QP_ACCESS_FLAGS | + IB_QP_PKEY_INDEX | + IB_QP_MIN_RNR_TIMER | + IB_QP_PATH_MIG_STATE), + [IB_QPT_SMI] = (IB_QP_PKEY_INDEX | + IB_QP_QKEY), + [IB_QPT_GSI] = (IB_QP_PKEY_INDEX | + IB_QP_QKEY), + } + } + }, + [IB_QPS_SQE] = { + [IB_QPS_RESET] = { .valid = 1 }, + [IB_QPS_ERR] = { .valid = 1 }, + [IB_QPS_RTS] = { + .valid = 1, + .opt_param = { + [IB_QPT_UD] = (IB_QP_CUR_STATE | + IB_QP_QKEY), + [IB_QPT_UC] = (IB_QP_CUR_STATE | + IB_QP_ACCESS_FLAGS), + [IB_QPT_SMI] = (IB_QP_CUR_STATE | + IB_QP_QKEY), + [IB_QPT_GSI] = (IB_QP_CUR_STATE | + IB_QP_QKEY), + } + } + }, + [IB_QPS_ERR] = { + [IB_QPS_RESET] = { .valid = 1 }, + [IB_QPS_ERR] = { .valid = 1 } + } +}; + +int ib_modify_qp_is_ok(enum ib_qp_state cur_state, enum ib_qp_state next_state, + enum ib_qp_type type, enum ib_qp_attr_mask mask) +{ + enum ib_qp_attr_mask req_param, opt_param; + + if (cur_state < 0 || cur_state > IB_QPS_ERR || + next_state < 0 || next_state > IB_QPS_ERR) + return 0; + + if (mask & IB_QP_CUR_STATE && + cur_state != IB_QPS_RTR && cur_state != IB_QPS_RTS && + cur_state != IB_QPS_SQD && cur_state != IB_QPS_SQE) + return 0; + + if (!qp_state_table[cur_state][next_state].valid) + return 0; + + req_param = qp_state_table[cur_state][next_state].req_param[type]; + opt_param = qp_state_table[cur_state][next_state].opt_param[type]; + + if ((mask & req_param) != req_param) + return 0; + + if (mask & ~(req_param | opt_param | IB_QP_STATE)) + return 0; + + return 1; +} +EXPORT_SYMBOL(ib_modify_qp_is_ok); + int ib_modify_qp(struct ib_qp *qp, struct ib_qp_attr *qp_attr, int qp_attr_mask) @@ -322,11 +574,10 @@ int ib_destroy_cq(struct ib_cq *cq) } EXPORT_SYMBOL(ib_destroy_cq); -int ib_resize_cq(struct ib_cq *cq, - int cqe) +int ib_resize_cq(struct ib_cq *cq, int cqe) { return cq->device->resize_cq ? - cq->device->resize_cq(cq, cqe) : -ENOSYS; + cq->device->resize_cq(cq, cqe, NULL) : -ENOSYS; } EXPORT_SYMBOL(ib_resize_cq); diff --git a/drivers/infiniband/hw/mthca/mthca_av.c b/drivers/infiniband/hw/mthca/mthca_av.c index a19e0ed03d7..f023d393651 100644 --- a/drivers/infiniband/hw/mthca/mthca_av.c +++ b/drivers/infiniband/hw/mthca/mthca_av.c @@ -147,7 +147,7 @@ int mthca_destroy_ah(struct mthca_dev *dev, struct mthca_ah *ah) switch (ah->type) { case MTHCA_AH_ON_HCA: mthca_free(&dev->av_table.alloc, - (ah->avdma - dev->av_table.ddr_av_base) / + (ah->avdma - dev->av_table.ddr_av_base) / MTHCA_AV_SIZE); break; @@ -193,6 +193,37 @@ int mthca_read_ah(struct mthca_dev *dev, struct mthca_ah *ah, return 0; } +int mthca_ah_query(struct ib_ah *ibah, struct ib_ah_attr *attr) +{ + struct mthca_ah *ah = to_mah(ibah); + struct mthca_dev *dev = to_mdev(ibah->device); + + /* Only implement for MAD and memfree ah for now. */ + if (ah->type == MTHCA_AH_ON_HCA) + return -ENOSYS; + + memset(attr, 0, sizeof *attr); + attr->dlid = be16_to_cpu(ah->av->dlid); + attr->sl = be32_to_cpu(ah->av->sl_tclass_flowlabel) >> 28; + attr->static_rate = ah->av->msg_sr & 0x7; + attr->src_path_bits = ah->av->g_slid & 0x7F; + attr->port_num = be32_to_cpu(ah->av->port_pd) >> 24; + attr->ah_flags = mthca_ah_grh_present(ah) ? IB_AH_GRH : 0; + + if (attr->ah_flags) { + attr->grh.traffic_class = + be32_to_cpu(ah->av->sl_tclass_flowlabel) >> 20; + attr->grh.flow_label = + be32_to_cpu(ah->av->sl_tclass_flowlabel) & 0xfffff; + attr->grh.hop_limit = ah->av->hop_limit; + attr->grh.sgid_index = ah->av->gid_index & + (dev->limits.gid_table_len - 1); + memcpy(attr->grh.dgid.raw, ah->av->dgid, 16); + } + + return 0; +} + int __devinit mthca_init_av_table(struct mthca_dev *dev) { int err; diff --git a/drivers/infiniband/hw/mthca/mthca_cmd.c b/drivers/infiniband/hw/mthca/mthca_cmd.c index 2825615ce81..343eca50787 100644 --- a/drivers/infiniband/hw/mthca/mthca_cmd.c +++ b/drivers/infiniband/hw/mthca/mthca_cmd.c @@ -1,7 +1,7 @@ /* * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved. * Copyright (c) 2005 Mellanox Technologies. All rights reserved. - * Copyright (c) 2005 Cisco Systems. All rights reserved. + * Copyright (c) 2005, 2006 Cisco Systems. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -182,25 +182,58 @@ struct mthca_cmd_context { u8 status; }; +static int fw_cmd_doorbell = 1; +module_param(fw_cmd_doorbell, int, 0644); +MODULE_PARM_DESC(fw_cmd_doorbell, "post FW commands through doorbell page if nonzero " + "(and supported by FW)"); + static inline int go_bit(struct mthca_dev *dev) { return readl(dev->hcr + HCR_STATUS_OFFSET) & swab32(1 << HCR_GO_BIT); } -static int mthca_cmd_post(struct mthca_dev *dev, - u64 in_param, - u64 out_param, - u32 in_modifier, - u8 op_modifier, - u16 op, - u16 token, - int event) +static void mthca_cmd_post_dbell(struct mthca_dev *dev, + u64 in_param, + u64 out_param, + u32 in_modifier, + u8 op_modifier, + u16 op, + u16 token) { - int err = 0; + void __iomem *ptr = dev->cmd.dbell_map; + u16 *offs = dev->cmd.dbell_offsets; - mutex_lock(&dev->cmd.hcr_mutex); + __raw_writel((__force u32) cpu_to_be32(in_param >> 32), ptr + offs[0]); + wmb(); + __raw_writel((__force u32) cpu_to_be32(in_param & 0xfffffffful), ptr + offs[1]); + wmb(); + __raw_writel((__force u32) cpu_to_be32(in_modifier), ptr + offs[2]); + wmb(); + __raw_writel((__force u32) cpu_to_be32(out_param >> 32), ptr + offs[3]); + wmb(); + __raw_writel((__force u32) cpu_to_be32(out_param & 0xfffffffful), ptr + offs[4]); + wmb(); + __raw_writel((__force u32) cpu_to_be32(token << 16), ptr + offs[5]); + wmb(); + __raw_writel((__force u32) cpu_to_be32((1 << HCR_GO_BIT) | + (1 << HCA_E_BIT) | + (op_modifier << HCR_OPMOD_SHIFT) | + op), ptr + offs[6]); + wmb(); + __raw_writel((__force u32) 0, ptr + offs[7]); + wmb(); +} +static int mthca_cmd_post_hcr(struct mthca_dev *dev, + u64 in_param, + u64 out_param, + u32 in_modifier, + u8 op_modifier, + u16 op, + u16 token, + int event) +{ if (event) { unsigned long end = jiffies + GO_BIT_TIMEOUT; @@ -210,10 +243,8 @@ static int mthca_cmd_post(struct mthca_dev *dev, } } - if (go_bit(dev)) { - err = -EAGAIN; - goto out; - } + if (go_bit(dev)) + return -EAGAIN; /* * We use writel (instead of something like memcpy_toio) @@ -236,7 +267,29 @@ static int mthca_cmd_post(struct mthca_dev *dev, (op_modifier << HCR_OPMOD_SHIFT) | op), dev->hcr + 6 * 4); -out: + return 0; +} + +static int mthca_cmd_post(struct mthca_dev *dev, + u64 in_param, + u64 out_param, + u32 in_modifier, + u8 op_modifier, + u16 op, + u16 token, + int event) +{ + int err = 0; + + mutex_lock(&dev->cmd.hcr_mutex); + + if (event && dev->cmd.flags & MTHCA_CMD_POST_DOORBELLS && fw_cmd_doorbell) + mthca_cmd_post_dbell(dev, in_param, out_param, in_modifier, + op_modifier, op, token); + else + err = mthca_cmd_post_hcr(dev, in_param, out_param, in_modifier, + op_modifier, op, token, event); + mutex_unlock(&dev->cmd.hcr_mutex); return err; } @@ -275,7 +328,7 @@ static int mthca_cmd_poll(struct mthca_dev *dev, } if (out_is_imm) - *out_param = + *out_param = (u64) be32_to_cpu((__force __be32) __raw_readl(dev->hcr + HCR_OUT_PARAM_OFFSET)) << 32 | (u64) be32_to_cpu((__force __be32) @@ -386,7 +439,7 @@ static int mthca_cmd_box(struct mthca_dev *dev, unsigned long timeout, u8 *status) { - if (dev->cmd.use_events) + if (dev->cmd.flags & MTHCA_CMD_USE_EVENTS) return mthca_cmd_wait(dev, in_param, &out_param, 0, in_modifier, op_modifier, op, timeout, status); @@ -423,7 +476,7 @@ static int mthca_cmd_imm(struct mthca_dev *dev, unsigned long timeout, u8 *status) { - if (dev->cmd.use_events) + if (dev->cmd.flags & MTHCA_CMD_USE_EVENTS) return mthca_cmd_wait(dev, in_param, out_param, 1, in_modifier, op_modifier, op, timeout, status); @@ -437,7 +490,7 @@ int mthca_cmd_init(struct mthca_dev *dev) { mutex_init(&dev->cmd.hcr_mutex); sema_init(&dev->cmd.poll_sem, 1); - dev->cmd.use_events = 0; + dev->cmd.flags = 0; dev->hcr = ioremap(pci_resource_start(dev->pdev, 0) + MTHCA_HCR_BASE, MTHCA_HCR_SIZE); @@ -461,6 +514,8 @@ void mthca_cmd_cleanup(struct mthca_dev *dev) { pci_pool_destroy(dev->cmd.pool); iounmap(dev->hcr); + if (dev->cmd.flags & MTHCA_CMD_POST_DOORBELLS) + iounmap(dev->cmd.dbell_map); } /* @@ -498,7 +553,8 @@ int mthca_cmd_use_events(struct mthca_dev *dev) ; /* nothing */ --dev->cmd.token_mask; - dev->cmd.use_events = 1; + dev->cmd.flags |= MTHCA_CMD_USE_EVENTS; + down(&dev->cmd.poll_sem); return 0; @@ -511,7 +567,7 @@ void mthca_cmd_use_polling(struct mthca_dev *dev) { int i; - dev->cmd.use_events = 0; + dev->cmd.flags &= ~MTHCA_CMD_USE_EVENTS; for (i = 0; i < dev->cmd.max_cmds; ++i) down(&dev->cmd.event_sem); @@ -596,8 +652,9 @@ static int mthca_map_cmd(struct mthca_dev *dev, u16 op, struct mthca_icm *icm, * address or size and use that as our log2 size. */ lg = ffs(mthca_icm_addr(&iter) | mthca_icm_size(&iter)) - 1; - if (lg < 12) { - mthca_warn(dev, "Got FW area not aligned to 4K (%llx/%lx).\n", + if (lg < MTHCA_ICM_PAGE_SHIFT) { + mthca_warn(dev, "Got FW area not aligned to %d (%llx/%lx).\n", + MTHCA_ICM_PAGE_SIZE, (unsigned long long) mthca_icm_addr(&iter), mthca_icm_size(&iter)); err = -EINVAL; @@ -609,8 +666,9 @@ static int mthca_map_cmd(struct mthca_dev *dev, u16 op, struct mthca_icm *icm, virt += 1 << lg; } - pages[nent * 2 + 1] = cpu_to_be64((mthca_icm_addr(&iter) + - (i << lg)) | (lg - 12)); + pages[nent * 2 + 1] = + cpu_to_be64((mthca_icm_addr(&iter) + (i << lg)) | + (lg - MTHCA_ICM_PAGE_SHIFT)); ts += 1 << (lg - 10); ++tc; @@ -661,12 +719,41 @@ int mthca_RUN_FW(struct mthca_dev *dev, u8 *status) return mthca_cmd(dev, 0, 0, 0, CMD_RUN_FW, CMD_TIME_CLASS_A, status); } +static void mthca_setup_cmd_doorbells(struct mthca_dev *dev, u64 base) +{ + unsigned long addr; + u16 max_off = 0; + int i; + + for (i = 0; i < 8; ++i) + max_off = max(max_off, dev->cmd.dbell_offsets[i]); + + if ((base & PAGE_MASK) != ((base + max_off) & PAGE_MASK)) { + mthca_warn(dev, "Firmware doorbell region at 0x%016llx, " + "length 0x%x crosses a page boundary\n", + (unsigned long long) base, max_off); + return; + } + + addr = pci_resource_start(dev->pdev, 2) + + ((pci_resource_len(dev->pdev, 2) - 1) & base); + dev->cmd.dbell_map = ioremap(addr, max_off + sizeof(u32)); + if (!dev->cmd.dbell_map) + return; + + dev->cmd.flags |= MTHCA_CMD_POST_DOORBELLS; + mthca_dbg(dev, "Mapped doorbell page for posting FW commands\n"); +} + int mthca_QUERY_FW(struct mthca_dev *dev, u8 *status) { struct mthca_mailbox *mailbox; u32 *outbox; + u64 base; + u32 tmp; int err = 0; u8 lg; + int i; #define QUERY_FW_OUT_SIZE 0x100 #define QUERY_FW_VER_OFFSET 0x00 @@ -674,6 +761,10 @@ int mthca_QUERY_FW(struct mthca_dev *dev, u8 *status) #define QUERY_FW_ERR_START_OFFSET 0x30 #define QUERY_FW_ERR_SIZE_OFFSET 0x38 +#define QUERY_FW_CMD_DB_EN_OFFSET 0x10 +#define QUERY_FW_CMD_DB_OFFSET 0x50 +#define QUERY_FW_CMD_DB_BASE 0x60 + #define QUERY_FW_START_OFFSET 0x20 #define QUERY_FW_END_OFFSET 0x28 @@ -702,16 +793,29 @@ int mthca_QUERY_FW(struct mthca_dev *dev, u8 *status) ((dev->fw_ver & 0xffff0000ull) >> 16) | ((dev->fw_ver & 0x0000ffffull) << 16); + mthca_dbg(dev, "FW version %012llx, max commands %d\n", + (unsigned long long) dev->fw_ver, dev->cmd.max_cmds); + MTHCA_GET(lg, outbox, QUERY_FW_MAX_CMD_OFFSET); dev->cmd.max_cmds = 1 << lg; MTHCA_GET(dev->catas_err.addr, outbox, QUERY_FW_ERR_START_OFFSET); MTHCA_GET(dev->catas_err.size, outbox, QUERY_FW_ERR_SIZE_OFFSET); - mthca_dbg(dev, "FW version %012llx, max commands %d\n", - (unsigned long long) dev->fw_ver, dev->cmd.max_cmds); mthca_dbg(dev, "Catastrophic error buffer at 0x%llx, size 0x%x\n", (unsigned long long) dev->catas_err.addr, dev->catas_err.size); + MTHCA_GET(tmp, outbox, QUERY_FW_CMD_DB_EN_OFFSET); + if (tmp & 0x1) { + mthca_dbg(dev, "FW supports commands through doorbells\n"); + + MTHCA_GET(base, outbox, QUERY_FW_CMD_DB_BASE); + for (i = 0; i < MTHCA_CMD_NUM_DBELL_DWORDS; ++i) + MTHCA_GET(dev->cmd.dbell_offsets[i], outbox, + QUERY_FW_CMD_DB_OFFSET + (i << 1)); + + mthca_setup_cmd_doorbells(dev, base); + } + if (mthca_is_memfree(dev)) { MTHCA_GET(dev->fw.arbel.fw_pages, outbox, QUERY_FW_SIZE_OFFSET); MTHCA_GET(dev->fw.arbel.clr_int_base, outbox, QUERY_FW_CLR_INT_BASE_OFFSET); @@ -720,12 +824,12 @@ int mthca_QUERY_FW(struct mthca_dev *dev, u8 *status) mthca_dbg(dev, "FW size %d KB\n", dev->fw.arbel.fw_pages << 2); /* - * Arbel page size is always 4 KB; round up number of - * system pages needed. + * Round up number of system pages needed in case + * MTHCA_ICM_PAGE_SIZE < PAGE_SIZE. */ dev->fw.arbel.fw_pages = - ALIGN(dev->fw.arbel.fw_pages, PAGE_SIZE >> 12) >> - (PAGE_SHIFT - 12); + ALIGN(dev->fw.arbel.fw_pages, PAGE_SIZE / MTHCA_ICM_PAGE_SIZE) >> + (PAGE_SHIFT - MTHCA_ICM_PAGE_SHIFT); mthca_dbg(dev, "Clear int @ %llx, EQ arm @ %llx, EQ set CI @ %llx\n", (unsigned long long) dev->fw.arbel.clr_int_base, @@ -1173,7 +1277,8 @@ int mthca_INIT_HCA(struct mthca_dev *dev, int err; #define INIT_HCA_IN_SIZE 0x200 -#define INIT_HCA_FLAGS_OFFSET 0x014 +#define INIT_HCA_FLAGS1_OFFSET 0x00c +#define INIT_HCA_FLAGS2_OFFSET 0x014 #define INIT_HCA_QPC_OFFSET 0x020 #define INIT_HCA_QPC_BASE_OFFSET (INIT_HCA_QPC_OFFSET + 0x10) #define INIT_HCA_LOG_QP_OFFSET (INIT_HCA_QPC_OFFSET + 0x17) @@ -1216,15 +1321,18 @@ int mthca_INIT_HCA(struct mthca_dev *dev, memset(inbox, 0, INIT_HCA_IN_SIZE); + if (dev->mthca_flags & MTHCA_FLAG_SINAI_OPT) + MTHCA_PUT(inbox, 0x1, INIT_HCA_FLAGS1_OFFSET); + #if defined(__LITTLE_ENDIAN) - *(inbox + INIT_HCA_FLAGS_OFFSET / 4) &= ~cpu_to_be32(1 << 1); + *(inbox + INIT_HCA_FLAGS2_OFFSET / 4) &= ~cpu_to_be32(1 << 1); #elif defined(__BIG_ENDIAN) - *(inbox + INIT_HCA_FLAGS_OFFSET / 4) |= cpu_to_be32(1 << 1); + *(inbox + INIT_HCA_FLAGS2_OFFSET / 4) |= cpu_to_be32(1 << 1); #else #error Host endianness not defined #endif /* Check port for UD address vector: */ - *(inbox + INIT_HCA_FLAGS_OFFSET / 4) |= cpu_to_be32(1); + *(inbox + INIT_HCA_FLAGS2_OFFSET / 4) |= cpu_to_be32(1); /* We leave wqe_quota, responder_exu, etc as 0 (default) */ @@ -1438,11 +1546,11 @@ int mthca_SET_ICM_SIZE(struct mthca_dev *dev, u64 icm_size, u64 *aux_pages, return ret; /* - * Arbel page size is always 4 KB; round up number of system - * pages needed. + * Round up number of system pages needed in case + * MTHCA_ICM_PAGE_SIZE < PAGE_SIZE. */ - *aux_pages = (*aux_pages + (1 << (PAGE_SHIFT - 12)) - 1) >> (PAGE_SHIFT - 12); - *aux_pages = ALIGN(*aux_pages, PAGE_SIZE >> 12) >> (PAGE_SHIFT - 12); + *aux_pages = ALIGN(*aux_pages, PAGE_SIZE / MTHCA_ICM_PAGE_SIZE) >> + (PAGE_SHIFT - MTHCA_ICM_PAGE_SHIFT); return 0; } @@ -1514,6 +1622,37 @@ int mthca_HW2SW_CQ(struct mthca_dev *dev, struct mthca_mailbox *mailbox, CMD_TIME_CLASS_A, status); } +int mthca_RESIZE_CQ(struct mthca_dev *dev, int cq_num, u32 lkey, u8 log_size, + u8 *status) +{ + struct mthca_mailbox *mailbox; + __be32 *inbox; + int err; + +#define RESIZE_CQ_IN_SIZE 0x40 +#define RESIZE_CQ_LOG_SIZE_OFFSET 0x0c +#define RESIZE_CQ_LKEY_OFFSET 0x1c + + mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL); + if (IS_ERR(mailbox)) + return PTR_ERR(mailbox); + inbox = mailbox->buf; + + memset(inbox, 0, RESIZE_CQ_IN_SIZE); + /* + * Leave start address fields zeroed out -- mthca assumes that + * MRs for CQs always start at virtual address 0. + */ + MTHCA_PUT(inbox, log_size, RESIZE_CQ_LOG_SIZE_OFFSET); + MTHCA_PUT(inbox, lkey, RESIZE_CQ_LKEY_OFFSET); + + err = mthca_cmd(dev, mailbox->dma, cq_num, 1, CMD_RESIZE_CQ, + CMD_TIME_CLASS_B, status); + + mthca_free_mailbox(dev, mailbox); + return err; +} + int mthca_SW2HW_SRQ(struct mthca_dev *dev, struct mthca_mailbox *mailbox, int srq_num, u8 *status) { @@ -1529,37 +1668,69 @@ int mthca_HW2SW_SRQ(struct mthca_dev *dev, struct mthca_mailbox *mailbox, CMD_TIME_CLASS_A, status); } +int mthca_QUERY_SRQ(struct mthca_dev *dev, u32 num, + struct mthca_mailbox *mailbox, u8 *status) +{ + return mthca_cmd_box(dev, 0, mailbox->dma, num, 0, + CMD_QUERY_SRQ, CMD_TIME_CLASS_A, status); +} + int mthca_ARM_SRQ(struct mthca_dev *dev, int srq_num, int limit, u8 *status) { return mthca_cmd(dev, limit, srq_num, 0, CMD_ARM_SRQ, CMD_TIME_CLASS_B, status); } -int mthca_MODIFY_QP(struct mthca_dev *dev, int trans, u32 num, - int is_ee, struct mthca_mailbox *mailbox, u32 optmask, +int mthca_MODIFY_QP(struct mthca_dev *dev, enum ib_qp_state cur, + enum ib_qp_state next, u32 num, int is_ee, + struct mthca_mailbox *mailbox, u32 optmask, u8 *status) { - static const u16 op[] = { - [MTHCA_TRANS_RST2INIT] = CMD_RST2INIT_QPEE, - [MTHCA_TRANS_INIT2INIT] = CMD_INIT2INIT_QPEE, - [MTHCA_TRANS_INIT2RTR] = CMD_INIT2RTR_QPEE, - [MTHCA_TRANS_RTR2RTS] = CMD_RTR2RTS_QPEE, - [MTHCA_TRANS_RTS2RTS] = CMD_RTS2RTS_QPEE, - [MTHCA_TRANS_SQERR2RTS] = CMD_SQERR2RTS_QPEE, - [MTHCA_TRANS_ANY2ERR] = CMD_2ERR_QPEE, - [MTHCA_TRANS_RTS2SQD] = CMD_RTS2SQD_QPEE, - [MTHCA_TRANS_SQD2SQD] = CMD_SQD2SQD_QPEE, - [MTHCA_TRANS_SQD2RTS] = CMD_SQD2RTS_QPEE, - [MTHCA_TRANS_ANY2RST] = CMD_ERR2RST_QPEE + static const u16 op[IB_QPS_ERR + 1][IB_QPS_ERR + 1] = { + [IB_QPS_RESET] = { + [IB_QPS_RESET] = CMD_ERR2RST_QPEE, + [IB_QPS_ERR] = CMD_2ERR_QPEE, + [IB_QPS_INIT] = CMD_RST2INIT_QPEE, + }, + [IB_QPS_INIT] = { + [IB_QPS_RESET] = CMD_ERR2RST_QPEE, + [IB_QPS_ERR] = CMD_2ERR_QPEE, + [IB_QPS_INIT] = CMD_INIT2INIT_QPEE, + [IB_QPS_RTR] = CMD_INIT2RTR_QPEE, + }, + [IB_QPS_RTR] = { + [IB_QPS_RESET] = CMD_ERR2RST_QPEE, + [IB_QPS_ERR] = CMD_2ERR_QPEE, + [IB_QPS_RTS] = CMD_RTR2RTS_QPEE, + }, + [IB_QPS_RTS] = { + [IB_QPS_RESET] = CMD_ERR2RST_QPEE, + [IB_QPS_ERR] = CMD_2ERR_QPEE, + [IB_QPS_RTS] = CMD_RTS2RTS_QPEE, + [IB_QPS_SQD] = CMD_RTS2SQD_QPEE, + }, + [IB_QPS_SQD] = { + [IB_QPS_RESET] = CMD_ERR2RST_QPEE, + [IB_QPS_ERR] = CMD_2ERR_QPEE, + [IB_QPS_RTS] = CMD_SQD2RTS_QPEE, + [IB_QPS_SQD] = CMD_SQD2SQD_QPEE, + }, + [IB_QPS_SQE] = { + [IB_QPS_RESET] = CMD_ERR2RST_QPEE, + [IB_QPS_ERR] = CMD_2ERR_QPEE, + [IB_QPS_RTS] = CMD_SQERR2RTS_QPEE, + }, + [IB_QPS_ERR] = { + [IB_QPS_RESET] = CMD_ERR2RST_QPEE, + [IB_QPS_ERR] = CMD_2ERR_QPEE, + } }; + u8 op_mod = 0; int my_mailbox = 0; int err; - if (trans < 0 || trans >= ARRAY_SIZE(op)) - return -EINVAL; - - if (trans == MTHCA_TRANS_ANY2RST) { + if (op[cur][next] == CMD_ERR2RST_QPEE) { op_mod = 3; /* don't write outbox, any->reset */ /* For debugging */ @@ -1571,34 +1742,35 @@ int mthca_MODIFY_QP(struct mthca_dev *dev, int trans, u32 num, } else mailbox = NULL; } - } else { - if (0) { + + err = mthca_cmd_box(dev, 0, mailbox ? mailbox->dma : 0, + (!!is_ee << 24) | num, op_mod, + op[cur][next], CMD_TIME_CLASS_C, status); + + if (0 && mailbox) { int i; mthca_dbg(dev, "Dumping QP context:\n"); - printk(" opt param mask: %08x\n", be32_to_cpup(mailbox->buf)); + printk(" %08x\n", be32_to_cpup(mailbox->buf)); for (i = 0; i < 0x100 / 4; ++i) { if (i % 8 == 0) - printk(" [%02x] ", i * 4); + printk("[%02x] ", i * 4); printk(" %08x", be32_to_cpu(((__be32 *) mailbox->buf)[i + 2])); if ((i + 1) % 8 == 0) printk("\n"); } } - } - - if (trans == MTHCA_TRANS_ANY2RST) { - err = mthca_cmd_box(dev, 0, mailbox ? mailbox->dma : 0, - (!!is_ee << 24) | num, op_mod, - op[trans], CMD_TIME_CLASS_C, status); - if (0 && mailbox) { + if (my_mailbox) + mthca_free_mailbox(dev, mailbox); + } else { + if (0) { int i; mthca_dbg(dev, "Dumping QP context:\n"); - printk(" %08x\n", be32_to_cpup(mailbox->buf)); + printk(" opt param mask: %08x\n", be32_to_cpup(mailbox->buf)); for (i = 0; i < 0x100 / 4; ++i) { if (i % 8 == 0) - printk("[%02x] ", i * 4); + printk(" [%02x] ", i * 4); printk(" %08x", be32_to_cpu(((__be32 *) mailbox->buf)[i + 2])); if ((i + 1) % 8 == 0) @@ -1606,12 +1778,9 @@ int mthca_MODIFY_QP(struct mthca_dev *dev, int trans, u32 num, } } - } else - err = mthca_cmd(dev, mailbox->dma, (!!is_ee << 24) | num, - op_mod, op[trans], CMD_TIME_CLASS_C, status); - - if (my_mailbox) - mthca_free_mailbox(dev, mailbox); + err = mthca_cmd(dev, mailbox->dma, optmask | (!!is_ee << 24) | num, + op_mod, op[cur][next], CMD_TIME_CLASS_C, status); + } return err; } diff --git a/drivers/infiniband/hw/mthca/mthca_cmd.h b/drivers/infiniband/hw/mthca/mthca_cmd.h index 18175bec84c..e4ec35c40dd 100644 --- a/drivers/infiniband/hw/mthca/mthca_cmd.h +++ b/drivers/infiniband/hw/mthca/mthca_cmd.h @@ -1,6 +1,7 @@ /* * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved. * Copyright (c) 2005 Mellanox Technologies. All rights reserved. + * Copyright (c) 2006 Cisco Systems. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -73,9 +74,9 @@ enum { MTHCA_CMD_STAT_REG_BOUND = 0x21, /* HCA local attached memory not present: */ MTHCA_CMD_STAT_LAM_NOT_PRE = 0x22, - /* Bad management packet (silently discarded): */ + /* Bad management packet (silently discarded): */ MTHCA_CMD_STAT_BAD_PKT = 0x30, - /* More outstanding CQEs in CQ than new CQ size: */ + /* More outstanding CQEs in CQ than new CQ size: */ MTHCA_CMD_STAT_BAD_SIZE = 0x40 }; @@ -298,13 +299,18 @@ int mthca_SW2HW_CQ(struct mthca_dev *dev, struct mthca_mailbox *mailbox, int cq_num, u8 *status); int mthca_HW2SW_CQ(struct mthca_dev *dev, struct mthca_mailbox *mailbox, int cq_num, u8 *status); +int mthca_RESIZE_CQ(struct mthca_dev *dev, int cq_num, u32 lkey, u8 log_size, + u8 *status); int mthca_SW2HW_SRQ(struct mthca_dev *dev, struct mthca_mailbox *mailbox, int srq_num, u8 *status); int mthca_HW2SW_SRQ(struct mthca_dev *dev, struct mthca_mailbox *mailbox, int srq_num, u8 *status); +int mthca_QUERY_SRQ(struct mthca_dev *dev, u32 num, + struct mthca_mailbox *mailbox, u8 *status); int mthca_ARM_SRQ(struct mthca_dev *dev, int srq_num, int limit, u8 *status); -int mthca_MODIFY_QP(struct mthca_dev *dev, int trans, u32 num, - int is_ee, struct mthca_mailbox *mailbox, u32 optmask, +int mthca_MODIFY_QP(struct mthca_dev *dev, enum ib_qp_state cur, + enum ib_qp_state next, u32 num, int is_ee, + struct mthca_mailbox *mailbox, u32 optmask, u8 *status); int mthca_QUERY_QP(struct mthca_dev *dev, u32 num, int is_ee, struct mthca_mailbox *mailbox, u8 *status); diff --git a/drivers/infiniband/hw/mthca/mthca_cq.c b/drivers/infiniband/hw/mthca/mthca_cq.c index 96f1a86bf04..76aabc5bf37 100644 --- a/drivers/infiniband/hw/mthca/mthca_cq.c +++ b/drivers/infiniband/hw/mthca/mthca_cq.c @@ -1,7 +1,7 @@ /* * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved. * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. - * Copyright (c) 2005 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2005, 2006 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2005 Mellanox Technologies. All rights reserved. * Copyright (c) 2004 Voltaire, Inc. All rights reserved. * @@ -150,24 +150,29 @@ struct mthca_err_cqe { #define MTHCA_ARBEL_CQ_DB_REQ_NOT (2 << 24) #define MTHCA_ARBEL_CQ_DB_REQ_NOT_MULT (3 << 24) -static inline struct mthca_cqe *get_cqe(struct mthca_cq *cq, int entry) +static inline struct mthca_cqe *get_cqe_from_buf(struct mthca_cq_buf *buf, + int entry) { - if (cq->is_direct) - return cq->queue.direct.buf + (entry * MTHCA_CQ_ENTRY_SIZE); + if (buf->is_direct) + return buf->queue.direct.buf + (entry * MTHCA_CQ_ENTRY_SIZE); else - return cq->queue.page_list[entry * MTHCA_CQ_ENTRY_SIZE / PAGE_SIZE].buf + return buf->queue.page_list[entry * MTHCA_CQ_ENTRY_SIZE / PAGE_SIZE].buf + (entry * MTHCA_CQ_ENTRY_SIZE) % PAGE_SIZE; } -static inline struct mthca_cqe *cqe_sw(struct mthca_cq *cq, int i) +static inline struct mthca_cqe *get_cqe(struct mthca_cq *cq, int entry) +{ + return get_cqe_from_buf(&cq->buf, entry); +} + +static inline struct mthca_cqe *cqe_sw(struct mthca_cqe *cqe) { - struct mthca_cqe *cqe = get_cqe(cq, i); return MTHCA_CQ_ENTRY_OWNER_HW & cqe->owner ? NULL : cqe; } static inline struct mthca_cqe *next_cqe_sw(struct mthca_cq *cq) { - return cqe_sw(cq, cq->cons_index & cq->ibcq.cqe); + return cqe_sw(get_cqe(cq, cq->cons_index & cq->ibcq.cqe)); } static inline void set_cqe_hw(struct mthca_cqe *cqe) @@ -289,7 +294,7 @@ void mthca_cq_clean(struct mthca_dev *dev, u32 cqn, u32 qpn, * from our QP and therefore don't need to be checked. */ for (prod_index = cq->cons_index; - cqe_sw(cq, prod_index & cq->ibcq.cqe); + cqe_sw(get_cqe(cq, prod_index & cq->ibcq.cqe)); ++prod_index) if (prod_index == cq->cons_index + cq->ibcq.cqe) break; @@ -324,12 +329,58 @@ void mthca_cq_clean(struct mthca_dev *dev, u32 cqn, u32 qpn, wake_up(&cq->wait); } -static int handle_error_cqe(struct mthca_dev *dev, struct mthca_cq *cq, - struct mthca_qp *qp, int wqe_index, int is_send, - struct mthca_err_cqe *cqe, - struct ib_wc *entry, int *free_cqe) +void mthca_cq_resize_copy_cqes(struct mthca_cq *cq) +{ + int i; + + /* + * In Tavor mode, the hardware keeps the consumer and producer + * indices mod the CQ size. Since we might be making the CQ + * bigger, we need to deal with the case where the producer + * index wrapped around before the CQ was resized. + */ + if (!mthca_is_memfree(to_mdev(cq->ibcq.device)) && + cq->ibcq.cqe < cq->resize_buf->cqe) { + cq->cons_index &= cq->ibcq.cqe; + if (cqe_sw(get_cqe(cq, cq->ibcq.cqe))) + cq->cons_index -= cq->ibcq.cqe + 1; + } + + for (i = cq->cons_index; cqe_sw(get_cqe(cq, i & cq->ibcq.cqe)); ++i) + memcpy(get_cqe_from_buf(&cq->resize_buf->buf, + i & cq->resize_buf->cqe), + get_cqe(cq, i & cq->ibcq.cqe), MTHCA_CQ_ENTRY_SIZE); +} + +int mthca_alloc_cq_buf(struct mthca_dev *dev, struct mthca_cq_buf *buf, int nent) +{ + int ret; + int i; + + ret = mthca_buf_alloc(dev, nent * MTHCA_CQ_ENTRY_SIZE, + MTHCA_MAX_DIRECT_CQ_SIZE, + &buf->queue, &buf->is_direct, + &dev->driver_pd, 1, &buf->mr); + if (ret) + return ret; + + for (i = 0; i < nent; ++i) + set_cqe_hw(get_cqe_from_buf(buf, i)); + + return 0; +} + +void mthca_free_cq_buf(struct mthca_dev *dev, struct mthca_cq_buf *buf, int cqe) +{ + mthca_buf_free(dev, (cqe + 1) * MTHCA_CQ_ENTRY_SIZE, &buf->queue, + buf->is_direct, &buf->mr); +} + +static void handle_error_cqe(struct mthca_dev *dev, struct mthca_cq *cq, + struct mthca_qp *qp, int wqe_index, int is_send, + struct mthca_err_cqe *cqe, + struct ib_wc *entry, int *free_cqe) { - int err; int dbd; __be32 new_wqe; @@ -412,11 +463,9 @@ static int handle_error_cqe(struct mthca_dev *dev, struct mthca_cq *cq, * error case, so we don't have to check the doorbell count, etc. */ if (mthca_is_memfree(dev)) - return 0; + return; - err = mthca_free_err_wqe(dev, qp, is_send, wqe_index, &dbd, &new_wqe); - if (err) - return err; + mthca_free_err_wqe(dev, qp, is_send, wqe_index, &dbd, &new_wqe); /* * If we're at the end of the WQE chain, or we've used up our @@ -424,15 +473,13 @@ static int handle_error_cqe(struct mthca_dev *dev, struct mthca_cq *cq, * the next poll operation. */ if (!(new_wqe & cpu_to_be32(0x3f)) || (!cqe->db_cnt && dbd)) - return 0; + return; cqe->db_cnt = cpu_to_be16(be16_to_cpu(cqe->db_cnt) - dbd); cqe->wqe = new_wqe; cqe->syndrome = SYNDROME_WR_FLUSH_ERR; *free_cqe = 0; - - return 0; } static inline int mthca_poll_one(struct mthca_dev *dev, @@ -518,9 +565,9 @@ static inline int mthca_poll_one(struct mthca_dev *dev, } if (is_error) { - err = handle_error_cqe(dev, cq, *cur_qp, wqe_index, is_send, - (struct mthca_err_cqe *) cqe, - entry, &free_cqe); + handle_error_cqe(dev, cq, *cur_qp, wqe_index, is_send, + (struct mthca_err_cqe *) cqe, + entry, &free_cqe); goto out; } @@ -614,11 +661,14 @@ int mthca_poll_cq(struct ib_cq *ibcq, int num_entries, spin_lock_irqsave(&cq->lock, flags); - for (npolled = 0; npolled < num_entries; ++npolled) { + npolled = 0; +repoll: + while (npolled < num_entries) { err = mthca_poll_one(dev, cq, &qp, &freed, entry + npolled); if (err) break; + ++npolled; } if (freed) { @@ -626,6 +676,42 @@ int mthca_poll_cq(struct ib_cq *ibcq, int num_entries, update_cons_index(dev, cq, freed); } + /* + * If a CQ resize is in progress and we discovered that the + * old buffer is empty, then peek in the new buffer, and if + * it's not empty, switch to the new buffer and continue + * polling there. + */ + if (unlikely(err == -EAGAIN && cq->resize_buf && + cq->resize_buf->state == CQ_RESIZE_READY)) { + /* + * In Tavor mode, the hardware keeps the producer + * index modulo the CQ size. Since we might be making + * the CQ bigger, we need to mask our consumer index + * using the size of the old CQ buffer before looking + * in the new CQ buffer. + */ + if (!mthca_is_memfree(dev)) + cq->cons_index &= cq->ibcq.cqe; + + if (cqe_sw(get_cqe_from_buf(&cq->resize_buf->buf, + cq->cons_index & cq->resize_buf->cqe))) { + struct mthca_cq_buf tbuf; + int tcqe; + + tbuf = cq->buf; + tcqe = cq->ibcq.cqe; + cq->buf = cq->resize_buf->buf; + cq->ibcq.cqe = cq->resize_buf->cqe; + + cq->resize_buf->buf = tbuf; + cq->resize_buf->cqe = tcqe; + cq->resize_buf->state = CQ_RESIZE_SWAPPED; + + goto repoll; + } + } + spin_unlock_irqrestore(&cq->lock, flags); return err == 0 || err == -EAGAIN ? npolled : err; @@ -684,24 +770,14 @@ int mthca_arbel_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify notify) return 0; } -static void mthca_free_cq_buf(struct mthca_dev *dev, struct mthca_cq *cq) -{ - mthca_buf_free(dev, (cq->ibcq.cqe + 1) * MTHCA_CQ_ENTRY_SIZE, - &cq->queue, cq->is_direct, &cq->mr); -} - int mthca_init_cq(struct mthca_dev *dev, int nent, struct mthca_ucontext *ctx, u32 pdn, struct mthca_cq *cq) { - int size = nent * MTHCA_CQ_ENTRY_SIZE; struct mthca_mailbox *mailbox; struct mthca_cq_context *cq_context; int err = -ENOMEM; u8 status; - int i; - - might_sleep(); cq->ibcq.cqe = nent - 1; cq->is_kernel = !ctx; @@ -739,14 +815,9 @@ int mthca_init_cq(struct mthca_dev *dev, int nent, cq_context = mailbox->buf; if (cq->is_kernel) { - err = mthca_buf_alloc(dev, size, MTHCA_MAX_DIRECT_CQ_SIZE, - &cq->queue, &cq->is_direct, - &dev->driver_pd, 1, &cq->mr); + err = mthca_alloc_cq_buf(dev, &cq->buf, nent); if (err) goto err_out_mailbox; - - for (i = 0; i < nent; ++i) - set_cqe_hw(get_cqe(cq, i)); } spin_lock_init(&cq->lock); @@ -765,7 +836,7 @@ int mthca_init_cq(struct mthca_dev *dev, int nent, cq_context->error_eqn = cpu_to_be32(dev->eq_table.eq[MTHCA_EQ_ASYNC].eqn); cq_context->comp_eqn = cpu_to_be32(dev->eq_table.eq[MTHCA_EQ_COMP].eqn); cq_context->pd = cpu_to_be32(pdn); - cq_context->lkey = cpu_to_be32(cq->mr.ibmr.lkey); + cq_context->lkey = cpu_to_be32(cq->buf.mr.ibmr.lkey); cq_context->cqn = cpu_to_be32(cq->cqn); if (mthca_is_memfree(dev)) { @@ -803,7 +874,7 @@ int mthca_init_cq(struct mthca_dev *dev, int nent, err_out_free_mr: if (cq->is_kernel) - mthca_free_cq_buf(dev, cq); + mthca_free_cq_buf(dev, &cq->buf, cq->ibcq.cqe); err_out_mailbox: mthca_free_mailbox(dev, mailbox); @@ -832,8 +903,6 @@ void mthca_free_cq(struct mthca_dev *dev, int err; u8 status; - might_sleep(); - mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL); if (IS_ERR(mailbox)) { mthca_warn(dev, "No memory for mailbox to free CQ.\n"); @@ -871,7 +940,7 @@ void mthca_free_cq(struct mthca_dev *dev, wait_event(cq->wait, !atomic_read(&cq->refcount)); if (cq->is_kernel) { - mthca_free_cq_buf(dev, cq); + mthca_free_cq_buf(dev, &cq->buf, cq->ibcq.cqe); if (mthca_is_memfree(dev)) { mthca_free_db(dev, MTHCA_DB_TYPE_CQ_ARM, cq->arm_db_index); mthca_free_db(dev, MTHCA_DB_TYPE_CQ_SET_CI, cq->set_ci_db_index); diff --git a/drivers/infiniband/hw/mthca/mthca_dev.h b/drivers/infiniband/hw/mthca/mthca_dev.h index e481037288d..ad52edbefe9 100644 --- a/drivers/infiniband/hw/mthca/mthca_dev.h +++ b/drivers/infiniband/hw/mthca/mthca_dev.h @@ -1,7 +1,7 @@ /* * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved. * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. - * Copyright (c) 2005 Cisco Systems. All rights reserved. + * Copyright (c) 2005, 2006 Cisco Systems. All rights reserved. * Copyright (c) 2005 Mellanox Technologies. All rights reserved. * Copyright (c) 2004 Voltaire, Inc. All rights reserved. * @@ -53,8 +53,8 @@ #define DRV_NAME "ib_mthca" #define PFX DRV_NAME ": " -#define DRV_VERSION "0.07" -#define DRV_RELDATE "February 13, 2006" +#define DRV_VERSION "0.08" +#define DRV_RELDATE "February 14, 2006" enum { MTHCA_FLAG_DDR_HIDDEN = 1 << 1, @@ -64,7 +64,8 @@ enum { MTHCA_FLAG_NO_LAM = 1 << 5, MTHCA_FLAG_FMR = 1 << 6, MTHCA_FLAG_MEMFREE = 1 << 7, - MTHCA_FLAG_PCIE = 1 << 8 + MTHCA_FLAG_PCIE = 1 << 8, + MTHCA_FLAG_SINAI_OPT = 1 << 9 }; enum { @@ -110,9 +111,17 @@ enum { MTHCA_OPCODE_INVALID = 0xff }; +enum { + MTHCA_CMD_USE_EVENTS = 1 << 0, + MTHCA_CMD_POST_DOORBELLS = 1 << 1 +}; + +enum { + MTHCA_CMD_NUM_DBELL_DWORDS = 8 +}; + struct mthca_cmd { struct pci_pool *pool; - int use_events; struct mutex hcr_mutex; struct semaphore poll_sem; struct semaphore event_sem; @@ -121,6 +130,9 @@ struct mthca_cmd { int free_head; struct mthca_cmd_context *context; u16 token_mask; + u32 flags; + void __iomem *dbell_map; + u16 dbell_offsets[MTHCA_CMD_NUM_DBELL_DWORDS]; }; struct mthca_limits { @@ -470,12 +482,16 @@ void mthca_cq_event(struct mthca_dev *dev, u32 cqn, enum ib_event_type event_type); void mthca_cq_clean(struct mthca_dev *dev, u32 cqn, u32 qpn, struct mthca_srq *srq); +void mthca_cq_resize_copy_cqes(struct mthca_cq *cq); +int mthca_alloc_cq_buf(struct mthca_dev *dev, struct mthca_cq_buf *buf, int nent); +void mthca_free_cq_buf(struct mthca_dev *dev, struct mthca_cq_buf *buf, int cqe); int mthca_alloc_srq(struct mthca_dev *dev, struct mthca_pd *pd, struct ib_srq_attr *attr, struct mthca_srq *srq); void mthca_free_srq(struct mthca_dev *dev, struct mthca_srq *srq); int mthca_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr, enum ib_srq_attr_mask attr_mask); +int mthca_query_srq(struct ib_srq *srq, struct ib_srq_attr *srq_attr); void mthca_srq_event(struct mthca_dev *dev, u32 srqn, enum ib_event_type event_type); void mthca_free_srq_wqe(struct mthca_srq *srq, u32 wqe_addr); @@ -486,6 +502,8 @@ int mthca_arbel_post_srq_recv(struct ib_srq *srq, struct ib_recv_wr *wr, void mthca_qp_event(struct mthca_dev *dev, u32 qpn, enum ib_event_type event_type); +int mthca_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, int qp_attr_mask, + struct ib_qp_init_attr *qp_init_attr); int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask); int mthca_tavor_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, struct ib_send_wr **bad_wr); @@ -495,8 +513,8 @@ int mthca_arbel_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, struct ib_send_wr **bad_wr); int mthca_arbel_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr, struct ib_recv_wr **bad_wr); -int mthca_free_err_wqe(struct mthca_dev *dev, struct mthca_qp *qp, int is_send, - int index, int *dbd, __be32 *new_wqe); +void mthca_free_err_wqe(struct mthca_dev *dev, struct mthca_qp *qp, int is_send, + int index, int *dbd, __be32 *new_wqe); int mthca_alloc_qp(struct mthca_dev *dev, struct mthca_pd *pd, struct mthca_cq *send_cq, @@ -522,6 +540,7 @@ int mthca_create_ah(struct mthca_dev *dev, int mthca_destroy_ah(struct mthca_dev *dev, struct mthca_ah *ah); int mthca_read_ah(struct mthca_dev *dev, struct mthca_ah *ah, struct ib_ud_header *header); +int mthca_ah_query(struct ib_ah *ibah, struct ib_ah_attr *attr); int mthca_ah_grh_present(struct mthca_ah *ah); int mthca_multicast_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid); diff --git a/drivers/infiniband/hw/mthca/mthca_eq.c b/drivers/infiniband/hw/mthca/mthca_eq.c index 2eabb27804c..cbdc348fb68 100644 --- a/drivers/infiniband/hw/mthca/mthca_eq.c +++ b/drivers/infiniband/hw/mthca/mthca_eq.c @@ -497,7 +497,7 @@ static int __devinit mthca_create_eq(struct mthca_dev *dev, eq->dev = dev; eq->nent = roundup_pow_of_two(max(nent, 2)); - npages = ALIGN(eq->nent * MTHCA_EQ_ENTRY_SIZE, PAGE_SIZE) / PAGE_SIZE; + npages = ALIGN(eq->nent * MTHCA_EQ_ENTRY_SIZE, PAGE_SIZE) / PAGE_SIZE; eq->page_list = kmalloc(npages * sizeof *eq->page_list, GFP_KERNEL); @@ -825,7 +825,7 @@ void __devexit mthca_unmap_eq_icm(struct mthca_dev *dev) { u8 status; - mthca_UNMAP_ICM(dev, dev->eq_table.icm_virt, PAGE_SIZE / 4096, &status); + mthca_UNMAP_ICM(dev, dev->eq_table.icm_virt, 1, &status); pci_unmap_page(dev->pdev, dev->eq_table.icm_dma, PAGE_SIZE, PCI_DMA_BIDIRECTIONAL); __free_page(dev->eq_table.icm_page); @@ -928,7 +928,7 @@ int __devinit mthca_init_eq_table(struct mthca_dev *dev) mthca_warn(dev, "MAP_EQ for cmd EQ %d returned status 0x%02x\n", dev->eq_table.eq[MTHCA_EQ_CMD].eqn, status); - for (i = 0; i < MTHCA_EQ_CMD; ++i) + for (i = 0; i < MTHCA_NUM_EQ; ++i) if (mthca_is_memfree(dev)) arbel_eq_req_not(dev, dev->eq_table.eq[i].eqn_mask); else diff --git a/drivers/infiniband/hw/mthca/mthca_mad.c b/drivers/infiniband/hw/mthca/mthca_mad.c index 1229c604c6e..4ace6a392f4 100644 --- a/drivers/infiniband/hw/mthca/mthca_mad.c +++ b/drivers/infiniband/hw/mthca/mthca_mad.c @@ -109,6 +109,19 @@ static void smp_snoop(struct ib_device *ibdev, } } +static void node_desc_override(struct ib_device *dev, + struct ib_mad *mad) +{ + if ((mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_LID_ROUTED || + mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) && + mad->mad_hdr.method == IB_MGMT_METHOD_GET_RESP && + mad->mad_hdr.attr_id == IB_SMP_ATTR_NODE_DESC) { + mutex_lock(&to_mdev(dev)->cap_mask_mutex); + memcpy(((struct ib_smp *) mad)->data, dev->node_desc, 64); + mutex_unlock(&to_mdev(dev)->cap_mask_mutex); + } +} + static void forward_trap(struct mthca_dev *dev, u8 port_num, struct ib_mad *mad) @@ -207,8 +220,10 @@ int mthca_process_mad(struct ib_device *ibdev, return IB_MAD_RESULT_FAILURE; } - if (!out_mad->mad_hdr.status) + if (!out_mad->mad_hdr.status) { smp_snoop(ibdev, port_num, in_mad); + node_desc_override(ibdev, out_mad); + } /* set return bit in status of directed route responses */ if (in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) diff --git a/drivers/infiniband/hw/mthca/mthca_main.c b/drivers/infiniband/hw/mthca/mthca_main.c index 9c849d27b06..266f347c670 100644 --- a/drivers/infiniband/hw/mthca/mthca_main.c +++ b/drivers/infiniband/hw/mthca/mthca_main.c @@ -935,13 +935,19 @@ enum { static struct { u64 latest_fw; - int is_memfree; - int is_pcie; + u32 flags; } mthca_hca_table[] = { - [TAVOR] = { .latest_fw = MTHCA_FW_VER(3, 3, 3), .is_memfree = 0, .is_pcie = 0 }, - [ARBEL_COMPAT] = { .latest_fw = MTHCA_FW_VER(4, 7, 0), .is_memfree = 0, .is_pcie = 1 }, - [ARBEL_NATIVE] = { .latest_fw = MTHCA_FW_VER(5, 1, 0), .is_memfree = 1, .is_pcie = 1 }, - [SINAI] = { .latest_fw = MTHCA_FW_VER(1, 0, 1), .is_memfree = 1, .is_pcie = 1 } + [TAVOR] = { .latest_fw = MTHCA_FW_VER(3, 4, 0), + .flags = 0 }, + [ARBEL_COMPAT] = { .latest_fw = MTHCA_FW_VER(4, 7, 400), + .flags = MTHCA_FLAG_PCIE }, + [ARBEL_NATIVE] = { .latest_fw = MTHCA_FW_VER(5, 1, 0), + .flags = MTHCA_FLAG_MEMFREE | + MTHCA_FLAG_PCIE }, + [SINAI] = { .latest_fw = MTHCA_FW_VER(1, 0, 800), + .flags = MTHCA_FLAG_MEMFREE | + MTHCA_FLAG_PCIE | + MTHCA_FLAG_SINAI_OPT } }; static int __devinit mthca_init_one(struct pci_dev *pdev, @@ -1031,12 +1037,9 @@ static int __devinit mthca_init_one(struct pci_dev *pdev, mdev->pdev = pdev; + mdev->mthca_flags = mthca_hca_table[id->driver_data].flags; if (ddr_hidden) mdev->mthca_flags |= MTHCA_FLAG_DDR_HIDDEN; - if (mthca_hca_table[id->driver_data].is_memfree) - mdev->mthca_flags |= MTHCA_FLAG_MEMFREE; - if (mthca_hca_table[id->driver_data].is_pcie) - mdev->mthca_flags |= MTHCA_FLAG_PCIE; /* * Now reset the HCA before we touch the PCI capabilities or diff --git a/drivers/infiniband/hw/mthca/mthca_mcg.c b/drivers/infiniband/hw/mthca/mthca_mcg.c index 321f11e707f..9965bda9afe 100644 --- a/drivers/infiniband/hw/mthca/mthca_mcg.c +++ b/drivers/infiniband/hw/mthca/mthca_mcg.c @@ -187,7 +187,7 @@ int mthca_multicast_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) for (i = 0; i < MTHCA_QP_PER_MGM; ++i) if (mgm->qp[i] == cpu_to_be32(ibqp->qp_num | (1 << 31))) { - mthca_dbg(dev, "QP %06x already a member of MGM\n", + mthca_dbg(dev, "QP %06x already a member of MGM\n", ibqp->qp_num); err = 0; goto out; diff --git a/drivers/infiniband/hw/mthca/mthca_memfree.c b/drivers/infiniband/hw/mthca/mthca_memfree.c index d709cb162a7..15cc2f6eb47 100644 --- a/drivers/infiniband/hw/mthca/mthca_memfree.c +++ b/drivers/infiniband/hw/mthca/mthca_memfree.c @@ -202,7 +202,8 @@ void mthca_table_put(struct mthca_dev *dev, struct mthca_icm_table *table, int o if (--table->icm[i]->refcount == 0) { mthca_UNMAP_ICM(dev, table->virt + i * MTHCA_TABLE_CHUNK_SIZE, - MTHCA_TABLE_CHUNK_SIZE >> 12, &status); + MTHCA_TABLE_CHUNK_SIZE / MTHCA_ICM_PAGE_SIZE, + &status); mthca_free_icm(dev, table->icm[i]); table->icm[i] = NULL; } @@ -336,7 +337,8 @@ err: for (i = 0; i < num_icm; ++i) if (table->icm[i]) { mthca_UNMAP_ICM(dev, virt + i * MTHCA_TABLE_CHUNK_SIZE, - MTHCA_TABLE_CHUNK_SIZE >> 12, &status); + MTHCA_TABLE_CHUNK_SIZE / MTHCA_ICM_PAGE_SIZE, + &status); mthca_free_icm(dev, table->icm[i]); } @@ -353,7 +355,8 @@ void mthca_free_icm_table(struct mthca_dev *dev, struct mthca_icm_table *table) for (i = 0; i < table->num_icm; ++i) if (table->icm[i]) { mthca_UNMAP_ICM(dev, table->virt + i * MTHCA_TABLE_CHUNK_SIZE, - MTHCA_TABLE_CHUNK_SIZE >> 12, &status); + MTHCA_TABLE_CHUNK_SIZE / MTHCA_ICM_PAGE_SIZE, + &status); mthca_free_icm(dev, table->icm[i]); } @@ -364,7 +367,7 @@ static u64 mthca_uarc_virt(struct mthca_dev *dev, struct mthca_uar *uar, int pag { return dev->uar_table.uarc_base + uar->index * dev->uar_table.uarc_size + - page * 4096; + page * MTHCA_ICM_PAGE_SIZE; } int mthca_map_user_db(struct mthca_dev *dev, struct mthca_uar *uar, @@ -401,7 +404,7 @@ int mthca_map_user_db(struct mthca_dev *dev, struct mthca_uar *uar, if (ret < 0) goto out; - db_tab->page[i].mem.length = 4096; + db_tab->page[i].mem.length = MTHCA_ICM_PAGE_SIZE; db_tab->page[i].mem.offset = uaddr & ~PAGE_MASK; ret = pci_map_sg(dev->pdev, &db_tab->page[i].mem, 1, PCI_DMA_TODEVICE); @@ -455,7 +458,7 @@ struct mthca_user_db_table *mthca_init_user_db_tab(struct mthca_dev *dev) if (!mthca_is_memfree(dev)) return NULL; - npages = dev->uar_table.uarc_size / 4096; + npages = dev->uar_table.uarc_size / MTHCA_ICM_PAGE_SIZE; db_tab = kmalloc(sizeof *db_tab + npages * sizeof *db_tab->page, GFP_KERNEL); if (!db_tab) return ERR_PTR(-ENOMEM); @@ -478,7 +481,7 @@ void mthca_cleanup_user_db_tab(struct mthca_dev *dev, struct mthca_uar *uar, if (!mthca_is_memfree(dev)) return; - for (i = 0; i < dev->uar_table.uarc_size / 4096; ++i) { + for (i = 0; i < dev->uar_table.uarc_size / MTHCA_ICM_PAGE_SIZE; ++i) { if (db_tab->page[i].uvirt) { mthca_UNMAP_ICM(dev, mthca_uarc_virt(dev, uar, i), 1, &status); pci_unmap_sg(dev->pdev, &db_tab->page[i].mem, 1, PCI_DMA_TODEVICE); @@ -551,20 +554,20 @@ int mthca_alloc_db(struct mthca_dev *dev, enum mthca_db_type type, page = dev->db_tab->page + end; alloc: - page->db_rec = dma_alloc_coherent(&dev->pdev->dev, 4096, + page->db_rec = dma_alloc_coherent(&dev->pdev->dev, MTHCA_ICM_PAGE_SIZE, &page->mapping, GFP_KERNEL); if (!page->db_rec) { ret = -ENOMEM; goto out; } - memset(page->db_rec, 0, 4096); + memset(page->db_rec, 0, MTHCA_ICM_PAGE_SIZE); ret = mthca_MAP_ICM_page(dev, page->mapping, mthca_uarc_virt(dev, &dev->driver_uar, i), &status); if (!ret && status) ret = -EINVAL; if (ret) { - dma_free_coherent(&dev->pdev->dev, 4096, + dma_free_coherent(&dev->pdev->dev, MTHCA_ICM_PAGE_SIZE, page->db_rec, page->mapping); goto out; } @@ -612,7 +615,7 @@ void mthca_free_db(struct mthca_dev *dev, int type, int db_index) i >= dev->db_tab->max_group1 - 1) { mthca_UNMAP_ICM(dev, mthca_uarc_virt(dev, &dev->driver_uar, i), 1, &status); - dma_free_coherent(&dev->pdev->dev, 4096, + dma_free_coherent(&dev->pdev->dev, MTHCA_ICM_PAGE_SIZE, page->db_rec, page->mapping); page->db_rec = NULL; @@ -640,7 +643,7 @@ int mthca_init_db_tab(struct mthca_dev *dev) mutex_init(&dev->db_tab->mutex); - dev->db_tab->npages = dev->uar_table.uarc_size / 4096; + dev->db_tab->npages = dev->uar_table.uarc_size / MTHCA_ICM_PAGE_SIZE; dev->db_tab->max_group1 = 0; dev->db_tab->min_group2 = dev->db_tab->npages - 1; @@ -681,7 +684,7 @@ void mthca_cleanup_db_tab(struct mthca_dev *dev) mthca_UNMAP_ICM(dev, mthca_uarc_virt(dev, &dev->driver_uar, i), 1, &status); - dma_free_coherent(&dev->pdev->dev, 4096, + dma_free_coherent(&dev->pdev->dev, MTHCA_ICM_PAGE_SIZE, dev->db_tab->page[i].db_rec, dev->db_tab->page[i].mapping); } diff --git a/drivers/infiniband/hw/mthca/mthca_memfree.h b/drivers/infiniband/hw/mthca/mthca_memfree.h index 36f1141a08a..6d42947e1dc 100644 --- a/drivers/infiniband/hw/mthca/mthca_memfree.h +++ b/drivers/infiniband/hw/mthca/mthca_memfree.h @@ -45,6 +45,12 @@ ((256 - sizeof (struct list_head) - 2 * sizeof (int)) / \ (sizeof (struct scatterlist))) +enum { + MTHCA_ICM_PAGE_SHIFT = 12, + MTHCA_ICM_PAGE_SIZE = 1 << MTHCA_ICM_PAGE_SHIFT, + MTHCA_DB_REC_PER_PAGE = MTHCA_ICM_PAGE_SIZE / 8 +}; + struct mthca_icm_chunk { struct list_head list; int npages; @@ -131,10 +137,6 @@ static inline unsigned long mthca_icm_size(struct mthca_icm_iter *iter) return sg_dma_len(&iter->chunk->mem[iter->page_idx]); } -enum { - MTHCA_DB_REC_PER_PAGE = 4096 / 8 -}; - struct mthca_db_page { DECLARE_BITMAP(used, MTHCA_DB_REC_PER_PAGE); __be64 *db_rec; diff --git a/drivers/infiniband/hw/mthca/mthca_mr.c b/drivers/infiniband/hw/mthca/mthca_mr.c index e995e2aa016..698b6212576 100644 --- a/drivers/infiniband/hw/mthca/mthca_mr.c +++ b/drivers/infiniband/hw/mthca/mthca_mr.c @@ -76,6 +76,8 @@ struct mthca_mpt_entry { #define MTHCA_MPT_STATUS_SW 0xF0 #define MTHCA_MPT_STATUS_HW 0x00 +#define SINAI_FMR_KEY_INC 0x1000000 + /* * Buddy allocator for MTT segments (currently not very efficient * since it doesn't keep a free list and just searches linearly @@ -330,6 +332,14 @@ static inline u32 key_to_hw_index(struct mthca_dev *dev, u32 key) return tavor_key_to_hw_index(key); } +static inline u32 adjust_key(struct mthca_dev *dev, u32 key) +{ + if (dev->mthca_flags & MTHCA_FLAG_SINAI_OPT) + return ((key << 20) & 0x800000) | (key & 0x7fffff); + else + return key; +} + int mthca_mr_alloc(struct mthca_dev *dev, u32 pd, int buffer_size_shift, u64 iova, u64 total_size, u32 access, struct mthca_mr *mr) { @@ -340,13 +350,12 @@ int mthca_mr_alloc(struct mthca_dev *dev, u32 pd, int buffer_size_shift, int err; u8 status; - might_sleep(); - WARN_ON(buffer_size_shift >= 32); key = mthca_alloc(&dev->mr_table.mpt_alloc); if (key == -1) return -ENOMEM; + key = adjust_key(dev, key); mr->ibmr.rkey = mr->ibmr.lkey = hw_index_to_key(dev, key); if (mthca_is_memfree(dev)) { @@ -467,8 +476,6 @@ void mthca_free_mr(struct mthca_dev *dev, struct mthca_mr *mr) int err; u8 status; - might_sleep(); - err = mthca_HW2SW_MPT(dev, NULL, key_to_hw_index(dev, mr->ibmr.lkey) & (dev->limits.num_mpts - 1), @@ -495,9 +502,7 @@ int mthca_fmr_alloc(struct mthca_dev *dev, u32 pd, int err = -ENOMEM; int i; - might_sleep(); - - if (mr->attr.page_size < 12 || mr->attr.page_size >= 32) + if (mr->attr.page_shift < 12 || mr->attr.page_shift >= 32) return -EINVAL; /* For Arbel, all MTTs must fit in the same page. */ @@ -510,6 +515,7 @@ int mthca_fmr_alloc(struct mthca_dev *dev, u32 pd, key = mthca_alloc(&dev->mr_table.mpt_alloc); if (key == -1) return -ENOMEM; + key = adjust_key(dev, key); idx = key & (dev->limits.num_mpts - 1); mr->ibmr.rkey = mr->ibmr.lkey = hw_index_to_key(dev, key); @@ -523,7 +529,7 @@ int mthca_fmr_alloc(struct mthca_dev *dev, u32 pd, BUG_ON(!mr->mem.arbel.mpt); } else mr->mem.tavor.mpt = dev->mr_table.tavor_fmr.mpt_base + - sizeof *(mr->mem.tavor.mpt) * idx; + sizeof *(mr->mem.tavor.mpt) * idx; mr->mtt = __mthca_alloc_mtt(dev, list_len, dev->mr_table.fmr_mtt_buddy); if (IS_ERR(mr->mtt)) @@ -549,7 +555,7 @@ int mthca_fmr_alloc(struct mthca_dev *dev, u32 pd, MTHCA_MPT_FLAG_REGION | access); - mpt_entry->page_size = cpu_to_be32(mr->attr.page_size - 12); + mpt_entry->page_size = cpu_to_be32(mr->attr.page_shift - 12); mpt_entry->key = cpu_to_be32(key); mpt_entry->pd = cpu_to_be32(pd); memset(&mpt_entry->start, 0, @@ -617,7 +623,7 @@ static inline int mthca_check_fmr(struct mthca_fmr *fmr, u64 *page_list, if (list_len > fmr->attr.max_pages) return -EINVAL; - page_mask = (1 << fmr->attr.page_size) - 1; + page_mask = (1 << fmr->attr.page_shift) - 1; /* We are getting page lists, so va must be page aligned. */ if (iova & page_mask) @@ -665,7 +671,7 @@ int mthca_tavor_map_phys_fmr(struct ib_fmr *ibfmr, u64 *page_list, } mpt_entry.lkey = cpu_to_be32(key); - mpt_entry.length = cpu_to_be64(list_len * (1ull << fmr->attr.page_size)); + mpt_entry.length = cpu_to_be64(list_len * (1ull << fmr->attr.page_shift)); mpt_entry.start = cpu_to_be64(iova); __raw_writel((__force u32) mpt_entry.lkey, &fmr->mem.tavor.mpt->key); @@ -693,7 +699,10 @@ int mthca_arbel_map_phys_fmr(struct ib_fmr *ibfmr, u64 *page_list, ++fmr->maps; key = arbel_key_to_hw_index(fmr->ibmr.lkey); - key += dev->limits.num_mpts; + if (dev->mthca_flags & MTHCA_FLAG_SINAI_OPT) + key += SINAI_FMR_KEY_INC; + else + key += dev->limits.num_mpts; fmr->ibmr.lkey = fmr->ibmr.rkey = arbel_hw_index_to_key(key); *(u8 *) fmr->mem.arbel.mpt = MTHCA_MPT_STATUS_SW; @@ -706,7 +715,7 @@ int mthca_arbel_map_phys_fmr(struct ib_fmr *ibfmr, u64 *page_list, fmr->mem.arbel.mpt->key = cpu_to_be32(key); fmr->mem.arbel.mpt->lkey = cpu_to_be32(key); - fmr->mem.arbel.mpt->length = cpu_to_be64(list_len * (1ull << fmr->attr.page_size)); + fmr->mem.arbel.mpt->length = cpu_to_be64(list_len * (1ull << fmr->attr.page_shift)); fmr->mem.arbel.mpt->start = cpu_to_be64(iova); wmb(); @@ -766,6 +775,9 @@ int __devinit mthca_init_mr_table(struct mthca_dev *dev) else dev->mthca_flags |= MTHCA_FLAG_FMR; + if (dev->mthca_flags & MTHCA_FLAG_SINAI_OPT) + mthca_dbg(dev, "Memory key throughput optimization activated.\n"); + err = mthca_buddy_init(&dev->mr_table.mtt_buddy, fls(dev->limits.num_mtt_segs - 1)); @@ -785,7 +797,7 @@ int __devinit mthca_init_mr_table(struct mthca_dev *dev) } dev->mr_table.tavor_fmr.mpt_base = - ioremap(dev->mr_table.mpt_base, + ioremap(dev->mr_table.mpt_base, (1 << i) * sizeof (struct mthca_mpt_entry)); if (!dev->mr_table.tavor_fmr.mpt_base) { @@ -813,7 +825,7 @@ int __devinit mthca_init_mr_table(struct mthca_dev *dev) goto err_reserve_fmr; dev->mr_table.fmr_mtt_buddy = - &dev->mr_table.tavor_fmr.mtt_buddy; + &dev->mr_table.tavor_fmr.mtt_buddy; } else dev->mr_table.fmr_mtt_buddy = &dev->mr_table.mtt_buddy; diff --git a/drivers/infiniband/hw/mthca/mthca_pd.c b/drivers/infiniband/hw/mthca/mthca_pd.c index 3dbf06a6e6f..105fc5faadd 100644 --- a/drivers/infiniband/hw/mthca/mthca_pd.c +++ b/drivers/infiniband/hw/mthca/mthca_pd.c @@ -43,8 +43,6 @@ int mthca_pd_alloc(struct mthca_dev *dev, int privileged, struct mthca_pd *pd) { int err = 0; - might_sleep(); - pd->privileged = privileged; atomic_set(&pd->sqp_count, 0); @@ -66,7 +64,6 @@ int mthca_pd_alloc(struct mthca_dev *dev, int privileged, struct mthca_pd *pd) void mthca_pd_free(struct mthca_dev *dev, struct mthca_pd *pd) { - might_sleep(); if (pd->privileged) mthca_free_mr(dev, &pd->ntmr); mthca_free(&dev->pd_table.alloc, pd->pd_num); diff --git a/drivers/infiniband/hw/mthca/mthca_profile.c b/drivers/infiniband/hw/mthca/mthca_profile.c index 08a909371b0..58d44aa3c30 100644 --- a/drivers/infiniband/hw/mthca/mthca_profile.c +++ b/drivers/infiniband/hw/mthca/mthca_profile.c @@ -152,7 +152,7 @@ u64 mthca_make_profile(struct mthca_dev *dev, } if (total_size > mem_avail) { mthca_err(dev, "Profile requires 0x%llx bytes; " - "won't in 0x%llx bytes of context memory.\n", + "won't fit in 0x%llx bytes of context memory.\n", (unsigned long long) total_size, (unsigned long long) mem_avail); kfree(profile); @@ -262,6 +262,14 @@ u64 mthca_make_profile(struct mthca_dev *dev, */ dev->limits.num_pds = MTHCA_NUM_PDS; + if (dev->mthca_flags & MTHCA_FLAG_SINAI_OPT && + init_hca->log_mpt_sz > 23) { + mthca_warn(dev, "MPT table too large (requested size 2^%d >= 2^24)\n", + init_hca->log_mpt_sz); + mthca_warn(dev, "Disabling memory key throughput optimization.\n"); + dev->mthca_flags &= ~MTHCA_FLAG_SINAI_OPT; + } + /* * For Tavor, FMRs use ioremapped PCI memory. For 32 bit * systems it may use too much vmalloc space to map all MTT diff --git a/drivers/infiniband/hw/mthca/mthca_provider.c b/drivers/infiniband/hw/mthca/mthca_provider.c index e88e39aef85..2c250bc11c3 100644 --- a/drivers/infiniband/hw/mthca/mthca_provider.c +++ b/drivers/infiniband/hw/mthca/mthca_provider.c @@ -1,7 +1,7 @@ /* * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved. * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. - * Copyright (c) 2005 Cisco Systems. All rights reserved. + * Copyright (c) 2005, 2006 Cisco Systems. All rights reserved. * Copyright (c) 2005 Mellanox Technologies. All rights reserved. * Copyright (c) 2004 Voltaire, Inc. All rights reserved. * @@ -108,12 +108,12 @@ static int mthca_query_device(struct ib_device *ibdev, props->max_srq_wr = mdev->limits.max_srq_wqes; props->max_srq_sge = mdev->limits.max_sg; props->local_ca_ack_delay = mdev->limits.local_ca_ack_delay; - props->atomic_cap = mdev->limits.flags & DEV_LIM_FLAG_ATOMIC ? + props->atomic_cap = mdev->limits.flags & DEV_LIM_FLAG_ATOMIC ? IB_ATOMIC_HCA : IB_ATOMIC_NONE; props->max_pkeys = mdev->limits.pkey_table_len; props->max_mcast_grp = mdev->limits.num_mgms + mdev->limits.num_amgms; props->max_mcast_qp_attach = MTHCA_QP_PER_MGM; - props->max_total_mcast_qp_attach = props->max_mcast_qp_attach * + props->max_total_mcast_qp_attach = props->max_mcast_qp_attach * props->max_mcast_grp; err = 0; @@ -176,6 +176,23 @@ static int mthca_query_port(struct ib_device *ibdev, return err; } +static int mthca_modify_device(struct ib_device *ibdev, + int mask, + struct ib_device_modify *props) +{ + if (mask & ~IB_DEVICE_MODIFY_NODE_DESC) + return -EOPNOTSUPP; + + if (mask & IB_DEVICE_MODIFY_NODE_DESC) { + if (mutex_lock_interruptible(&to_mdev(ibdev)->cap_mask_mutex)) + return -ERESTARTSYS; + memcpy(ibdev->node_desc, props->node_desc, 64); + mutex_unlock(&to_mdev(ibdev)->cap_mask_mutex); + } + + return 0; +} + static int mthca_modify_port(struct ib_device *ibdev, u8 port, int port_modify_mask, struct ib_port_modify *props) @@ -669,9 +686,9 @@ static struct ib_cq *mthca_create_cq(struct ib_device *ibdev, int entries, } if (context) { - cq->mr.ibmr.lkey = ucmd.lkey; - cq->set_ci_db_index = ucmd.set_db_index; - cq->arm_db_index = ucmd.arm_db_index; + cq->buf.mr.ibmr.lkey = ucmd.lkey; + cq->set_ci_db_index = ucmd.set_db_index; + cq->arm_db_index = ucmd.arm_db_index; } for (nent = 1; nent <= entries; nent <<= 1) @@ -689,6 +706,8 @@ static struct ib_cq *mthca_create_cq(struct ib_device *ibdev, int entries, goto err_free; } + cq->resize_buf = NULL; + return &cq->ibcq; err_free: @@ -707,6 +726,121 @@ err_unmap_set: return ERR_PTR(err); } +static int mthca_alloc_resize_buf(struct mthca_dev *dev, struct mthca_cq *cq, + int entries) +{ + int ret; + + spin_lock_irq(&cq->lock); + if (cq->resize_buf) { + ret = -EBUSY; + goto unlock; + } + + cq->resize_buf = kmalloc(sizeof *cq->resize_buf, GFP_ATOMIC); + if (!cq->resize_buf) { + ret = -ENOMEM; + goto unlock; + } + + cq->resize_buf->state = CQ_RESIZE_ALLOC; + + ret = 0; + +unlock: + spin_unlock_irq(&cq->lock); + + if (ret) + return ret; + + ret = mthca_alloc_cq_buf(dev, &cq->resize_buf->buf, entries); + if (ret) { + spin_lock_irq(&cq->lock); + kfree(cq->resize_buf); + cq->resize_buf = NULL; + spin_unlock_irq(&cq->lock); + return ret; + } + + cq->resize_buf->cqe = entries - 1; + + spin_lock_irq(&cq->lock); + cq->resize_buf->state = CQ_RESIZE_READY; + spin_unlock_irq(&cq->lock); + + return 0; +} + +static int mthca_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata) +{ + struct mthca_dev *dev = to_mdev(ibcq->device); + struct mthca_cq *cq = to_mcq(ibcq); + struct mthca_resize_cq ucmd; + u32 lkey; + u8 status; + int ret; + + if (entries < 1 || entries > dev->limits.max_cqes) + return -EINVAL; + + entries = roundup_pow_of_two(entries + 1); + if (entries == ibcq->cqe + 1) + return 0; + + if (cq->is_kernel) { + ret = mthca_alloc_resize_buf(dev, cq, entries); + if (ret) + return ret; + lkey = cq->resize_buf->buf.mr.ibmr.lkey; + } else { + if (ib_copy_from_udata(&ucmd, udata, sizeof ucmd)) + return -EFAULT; + lkey = ucmd.lkey; + } + + ret = mthca_RESIZE_CQ(dev, cq->cqn, lkey, long_log2(entries), &status); + if (status) + ret = -EINVAL; + + if (ret) { + if (cq->resize_buf) { + mthca_free_cq_buf(dev, &cq->resize_buf->buf, + cq->resize_buf->cqe); + kfree(cq->resize_buf); + spin_lock_irq(&cq->lock); + cq->resize_buf = NULL; + spin_unlock_irq(&cq->lock); + } + return ret; + } + + if (cq->is_kernel) { + struct mthca_cq_buf tbuf; + int tcqe; + + spin_lock_irq(&cq->lock); + if (cq->resize_buf->state == CQ_RESIZE_READY) { + mthca_cq_resize_copy_cqes(cq); + tbuf = cq->buf; + tcqe = cq->ibcq.cqe; + cq->buf = cq->resize_buf->buf; + cq->ibcq.cqe = cq->resize_buf->cqe; + } else { + tbuf = cq->resize_buf->buf; + tcqe = cq->resize_buf->cqe; + } + + kfree(cq->resize_buf); + cq->resize_buf = NULL; + spin_unlock_irq(&cq->lock); + + mthca_free_cq_buf(dev, &tbuf, tcqe); + } else + ibcq->cqe = entries - 1; + + return 0; +} + static int mthca_destroy_cq(struct ib_cq *cq) { if (cq->uobject) { @@ -1070,6 +1204,20 @@ static int mthca_init_node_data(struct mthca_dev *dev) goto out; init_query_mad(in_mad); + in_mad->attr_id = IB_SMP_ATTR_NODE_DESC; + + err = mthca_MAD_IFC(dev, 1, 1, + 1, NULL, NULL, in_mad, out_mad, + &status); + if (err) + goto out; + if (status) { + err = -EINVAL; + goto out; + } + + memcpy(dev->ib_dev.node_desc, out_mad->data, 64); + in_mad->attr_id = IB_SMP_ATTR_NODE_INFO; err = mthca_MAD_IFC(dev, 1, 1, @@ -1113,14 +1261,17 @@ int mthca_register_device(struct mthca_dev *dev) (1ull << IB_USER_VERBS_CMD_DEREG_MR) | (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) | (1ull << IB_USER_VERBS_CMD_CREATE_CQ) | + (1ull << IB_USER_VERBS_CMD_RESIZE_CQ) | (1ull << IB_USER_VERBS_CMD_DESTROY_CQ) | (1ull << IB_USER_VERBS_CMD_CREATE_QP) | + (1ull << IB_USER_VERBS_CMD_QUERY_QP) | (1ull << IB_USER_VERBS_CMD_MODIFY_QP) | (1ull << IB_USER_VERBS_CMD_DESTROY_QP) | (1ull << IB_USER_VERBS_CMD_ATTACH_MCAST) | (1ull << IB_USER_VERBS_CMD_DETACH_MCAST) | (1ull << IB_USER_VERBS_CMD_CREATE_SRQ) | (1ull << IB_USER_VERBS_CMD_MODIFY_SRQ) | + (1ull << IB_USER_VERBS_CMD_QUERY_SRQ) | (1ull << IB_USER_VERBS_CMD_DESTROY_SRQ); dev->ib_dev.node_type = IB_NODE_CA; dev->ib_dev.phys_port_cnt = dev->limits.num_ports; @@ -1128,6 +1279,7 @@ int mthca_register_device(struct mthca_dev *dev) dev->ib_dev.class_dev.dev = &dev->pdev->dev; dev->ib_dev.query_device = mthca_query_device; dev->ib_dev.query_port = mthca_query_port; + dev->ib_dev.modify_device = mthca_modify_device; dev->ib_dev.modify_port = mthca_modify_port; dev->ib_dev.query_pkey = mthca_query_pkey; dev->ib_dev.query_gid = mthca_query_gid; @@ -1137,11 +1289,13 @@ int mthca_register_device(struct mthca_dev *dev) dev->ib_dev.alloc_pd = mthca_alloc_pd; dev->ib_dev.dealloc_pd = mthca_dealloc_pd; dev->ib_dev.create_ah = mthca_ah_create; + dev->ib_dev.query_ah = mthca_ah_query; dev->ib_dev.destroy_ah = mthca_ah_destroy; if (dev->mthca_flags & MTHCA_FLAG_SRQ) { dev->ib_dev.create_srq = mthca_create_srq; - dev->ib_dev.modify_srq = mthca_modify_srq; + dev->ib_dev.modify_srq = mthca_modify_srq; + dev->ib_dev.query_srq = mthca_query_srq; dev->ib_dev.destroy_srq = mthca_destroy_srq; if (mthca_is_memfree(dev)) @@ -1152,8 +1306,10 @@ int mthca_register_device(struct mthca_dev *dev) dev->ib_dev.create_qp = mthca_create_qp; dev->ib_dev.modify_qp = mthca_modify_qp; + dev->ib_dev.query_qp = mthca_query_qp; dev->ib_dev.destroy_qp = mthca_destroy_qp; dev->ib_dev.create_cq = mthca_create_cq; + dev->ib_dev.resize_cq = mthca_resize_cq; dev->ib_dev.destroy_cq = mthca_destroy_cq; dev->ib_dev.poll_cq = mthca_poll_cq; dev->ib_dev.get_dma_mr = mthca_get_dma_mr; diff --git a/drivers/infiniband/hw/mthca/mthca_provider.h b/drivers/infiniband/hw/mthca/mthca_provider.h index 1e73947b470..2e7f5213696 100644 --- a/drivers/infiniband/hw/mthca/mthca_provider.h +++ b/drivers/infiniband/hw/mthca/mthca_provider.h @@ -1,6 +1,6 @@ /* * Copyright (c) 2004 Topspin Communications. All rights reserved. - * Copyright (c) 2005 Cisco Systems. All rights reserved. + * Copyright (c) 2005, 2006 Cisco Systems. All rights reserved. * Copyright (c) 2005 Mellanox Technologies. All rights reserved. * * This software is available to you under a choice of one of two @@ -164,9 +164,11 @@ struct mthca_ah { * - wait_event until ref count is zero * * It is the consumer's responsibilty to make sure that no QP - * operations (WQE posting or state modification) are pending when the + * operations (WQE posting or state modification) are pending when a * QP is destroyed. Also, the consumer must make sure that calls to - * qp_modify are serialized. + * qp_modify are serialized. Similarly, the consumer is responsible + * for ensuring that no CQ resize operations are pending when a CQ + * is destroyed. * * Possible optimizations (wait for profile data to see if/where we * have locks bouncing between CPUs): @@ -176,25 +178,40 @@ struct mthca_ah { * send queue and one for the receive queue) */ +struct mthca_cq_buf { + union mthca_buf queue; + struct mthca_mr mr; + int is_direct; +}; + +struct mthca_cq_resize { + struct mthca_cq_buf buf; + int cqe; + enum { + CQ_RESIZE_ALLOC, + CQ_RESIZE_READY, + CQ_RESIZE_SWAPPED + } state; +}; + struct mthca_cq { - struct ib_cq ibcq; - spinlock_t lock; - atomic_t refcount; - int cqn; - u32 cons_index; - int is_direct; - int is_kernel; + struct ib_cq ibcq; + spinlock_t lock; + atomic_t refcount; + int cqn; + u32 cons_index; + struct mthca_cq_buf buf; + struct mthca_cq_resize *resize_buf; + int is_kernel; /* Next fields are Arbel only */ - int set_ci_db_index; - __be32 *set_ci_db; - int arm_db_index; - __be32 *arm_db; - int arm_sn; + int set_ci_db_index; + __be32 *set_ci_db; + int arm_db_index; + __be32 *arm_db; + int arm_sn; - union mthca_buf queue; - struct mthca_mr mr; - wait_queue_head_t wait; + wait_queue_head_t wait; }; struct mthca_srq { diff --git a/drivers/infiniband/hw/mthca/mthca_qp.c b/drivers/infiniband/hw/mthca/mthca_qp.c index fba608ed7df..f673c461e30 100644 --- a/drivers/infiniband/hw/mthca/mthca_qp.c +++ b/drivers/infiniband/hw/mthca/mthca_qp.c @@ -2,7 +2,7 @@ * Copyright (c) 2004 Topspin Communications. All rights reserved. * Copyright (c) 2005 Cisco Systems. All rights reserved. * Copyright (c) 2005 Mellanox Technologies. All rights reserved. - * Copyright (c) 2004 Voltaire, Inc. All rights reserved. + * Copyright (c) 2004 Voltaire, Inc. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -286,207 +286,6 @@ static int to_mthca_st(int transport) } } -static const struct { - int trans; - u32 req_param[NUM_TRANS]; - u32 opt_param[NUM_TRANS]; -} state_table[IB_QPS_ERR + 1][IB_QPS_ERR + 1] = { - [IB_QPS_RESET] = { - [IB_QPS_RESET] = { .trans = MTHCA_TRANS_ANY2RST }, - [IB_QPS_ERR] = { .trans = MTHCA_TRANS_ANY2ERR }, - [IB_QPS_INIT] = { - .trans = MTHCA_TRANS_RST2INIT, - .req_param = { - [UD] = (IB_QP_PKEY_INDEX | - IB_QP_PORT | - IB_QP_QKEY), - [UC] = (IB_QP_PKEY_INDEX | - IB_QP_PORT | - IB_QP_ACCESS_FLAGS), - [RC] = (IB_QP_PKEY_INDEX | - IB_QP_PORT | - IB_QP_ACCESS_FLAGS), - [MLX] = (IB_QP_PKEY_INDEX | - IB_QP_QKEY), - }, - /* bug-for-bug compatibility with VAPI: */ - .opt_param = { - [MLX] = IB_QP_PORT - } - }, - }, - [IB_QPS_INIT] = { - [IB_QPS_RESET] = { .trans = MTHCA_TRANS_ANY2RST }, - [IB_QPS_ERR] = { .trans = MTHCA_TRANS_ANY2ERR }, - [IB_QPS_INIT] = { - .trans = MTHCA_TRANS_INIT2INIT, - .opt_param = { - [UD] = (IB_QP_PKEY_INDEX | - IB_QP_PORT | - IB_QP_QKEY), - [UC] = (IB_QP_PKEY_INDEX | - IB_QP_PORT | - IB_QP_ACCESS_FLAGS), - [RC] = (IB_QP_PKEY_INDEX | - IB_QP_PORT | - IB_QP_ACCESS_FLAGS), - [MLX] = (IB_QP_PKEY_INDEX | - IB_QP_QKEY), - } - }, - [IB_QPS_RTR] = { - .trans = MTHCA_TRANS_INIT2RTR, - .req_param = { - [UC] = (IB_QP_AV | - IB_QP_PATH_MTU | - IB_QP_DEST_QPN | - IB_QP_RQ_PSN), - [RC] = (IB_QP_AV | - IB_QP_PATH_MTU | - IB_QP_DEST_QPN | - IB_QP_RQ_PSN | - IB_QP_MAX_DEST_RD_ATOMIC | - IB_QP_MIN_RNR_TIMER), - }, - .opt_param = { - [UD] = (IB_QP_PKEY_INDEX | - IB_QP_QKEY), - [UC] = (IB_QP_ALT_PATH | - IB_QP_ACCESS_FLAGS | - IB_QP_PKEY_INDEX), - [RC] = (IB_QP_ALT_PATH | - IB_QP_ACCESS_FLAGS | - IB_QP_PKEY_INDEX), - [MLX] = (IB_QP_PKEY_INDEX | - IB_QP_QKEY), - } - } - }, - [IB_QPS_RTR] = { - [IB_QPS_RESET] = { .trans = MTHCA_TRANS_ANY2RST }, - [IB_QPS_ERR] = { .trans = MTHCA_TRANS_ANY2ERR }, - [IB_QPS_RTS] = { - .trans = MTHCA_TRANS_RTR2RTS, - .req_param = { - [UD] = IB_QP_SQ_PSN, - [UC] = IB_QP_SQ_PSN, - [RC] = (IB_QP_TIMEOUT | - IB_QP_RETRY_CNT | - IB_QP_RNR_RETRY | - IB_QP_SQ_PSN | - IB_QP_MAX_QP_RD_ATOMIC), - [MLX] = IB_QP_SQ_PSN, - }, - .opt_param = { - [UD] = (IB_QP_CUR_STATE | - IB_QP_QKEY), - [UC] = (IB_QP_CUR_STATE | - IB_QP_ALT_PATH | - IB_QP_ACCESS_FLAGS | - IB_QP_PATH_MIG_STATE), - [RC] = (IB_QP_CUR_STATE | - IB_QP_ALT_PATH | - IB_QP_ACCESS_FLAGS | - IB_QP_MIN_RNR_TIMER | - IB_QP_PATH_MIG_STATE), - [MLX] = (IB_QP_CUR_STATE | - IB_QP_QKEY), - } - } - }, - [IB_QPS_RTS] = { - [IB_QPS_RESET] = { .trans = MTHCA_TRANS_ANY2RST }, - [IB_QPS_ERR] = { .trans = MTHCA_TRANS_ANY2ERR }, - [IB_QPS_RTS] = { - .trans = MTHCA_TRANS_RTS2RTS, - .opt_param = { - [UD] = (IB_QP_CUR_STATE | - IB_QP_QKEY), - [UC] = (IB_QP_ACCESS_FLAGS | - IB_QP_ALT_PATH | - IB_QP_PATH_MIG_STATE), - [RC] = (IB_QP_ACCESS_FLAGS | - IB_QP_ALT_PATH | - IB_QP_PATH_MIG_STATE | - IB_QP_MIN_RNR_TIMER), - [MLX] = (IB_QP_CUR_STATE | - IB_QP_QKEY), - } - }, - [IB_QPS_SQD] = { - .trans = MTHCA_TRANS_RTS2SQD, - }, - }, - [IB_QPS_SQD] = { - [IB_QPS_RESET] = { .trans = MTHCA_TRANS_ANY2RST }, - [IB_QPS_ERR] = { .trans = MTHCA_TRANS_ANY2ERR }, - [IB_QPS_RTS] = { - .trans = MTHCA_TRANS_SQD2RTS, - .opt_param = { - [UD] = (IB_QP_CUR_STATE | - IB_QP_QKEY), - [UC] = (IB_QP_CUR_STATE | - IB_QP_ALT_PATH | - IB_QP_ACCESS_FLAGS | - IB_QP_PATH_MIG_STATE), - [RC] = (IB_QP_CUR_STATE | - IB_QP_ALT_PATH | - IB_QP_ACCESS_FLAGS | - IB_QP_MIN_RNR_TIMER | - IB_QP_PATH_MIG_STATE), - [MLX] = (IB_QP_CUR_STATE | - IB_QP_QKEY), - } - }, - [IB_QPS_SQD] = { - .trans = MTHCA_TRANS_SQD2SQD, - .opt_param = { - [UD] = (IB_QP_PKEY_INDEX | - IB_QP_QKEY), - [UC] = (IB_QP_AV | - IB_QP_CUR_STATE | - IB_QP_ALT_PATH | - IB_QP_ACCESS_FLAGS | - IB_QP_PKEY_INDEX | - IB_QP_PATH_MIG_STATE), - [RC] = (IB_QP_AV | - IB_QP_TIMEOUT | - IB_QP_RETRY_CNT | - IB_QP_RNR_RETRY | - IB_QP_MAX_QP_RD_ATOMIC | - IB_QP_MAX_DEST_RD_ATOMIC | - IB_QP_CUR_STATE | - IB_QP_ALT_PATH | - IB_QP_ACCESS_FLAGS | - IB_QP_PKEY_INDEX | - IB_QP_MIN_RNR_TIMER | - IB_QP_PATH_MIG_STATE), - [MLX] = (IB_QP_PKEY_INDEX | - IB_QP_QKEY), - } - } - }, - [IB_QPS_SQE] = { - [IB_QPS_RESET] = { .trans = MTHCA_TRANS_ANY2RST }, - [IB_QPS_ERR] = { .trans = MTHCA_TRANS_ANY2ERR }, - [IB_QPS_RTS] = { - .trans = MTHCA_TRANS_SQERR2RTS, - .opt_param = { - [UD] = (IB_QP_CUR_STATE | - IB_QP_QKEY), - [UC] = (IB_QP_CUR_STATE | - IB_QP_ACCESS_FLAGS), - [MLX] = (IB_QP_CUR_STATE | - IB_QP_QKEY), - } - } - }, - [IB_QPS_ERR] = { - [IB_QPS_RESET] = { .trans = MTHCA_TRANS_ANY2RST }, - [IB_QPS_ERR] = { .trans = MTHCA_TRANS_ANY2ERR } - } -}; - static void store_attrs(struct mthca_sqp *sqp, struct ib_qp_attr *attr, int attr_mask) { @@ -549,6 +348,141 @@ static __be32 get_hw_access_flags(struct mthca_qp *qp, struct ib_qp_attr *attr, return cpu_to_be32(hw_access_flags); } +static inline enum ib_qp_state to_ib_qp_state(int mthca_state) +{ + switch (mthca_state) { + case MTHCA_QP_STATE_RST: return IB_QPS_RESET; + case MTHCA_QP_STATE_INIT: return IB_QPS_INIT; + case MTHCA_QP_STATE_RTR: return IB_QPS_RTR; + case MTHCA_QP_STATE_RTS: return IB_QPS_RTS; + case MTHCA_QP_STATE_DRAINING: + case MTHCA_QP_STATE_SQD: return IB_QPS_SQD; + case MTHCA_QP_STATE_SQE: return IB_QPS_SQE; + case MTHCA_QP_STATE_ERR: return IB_QPS_ERR; + default: return -1; + } +} + +static inline enum ib_mig_state to_ib_mig_state(int mthca_mig_state) +{ + switch (mthca_mig_state) { + case 0: return IB_MIG_ARMED; + case 1: return IB_MIG_REARM; + case 3: return IB_MIG_MIGRATED; + default: return -1; + } +} + +static int to_ib_qp_access_flags(int mthca_flags) +{ + int ib_flags = 0; + + if (mthca_flags & MTHCA_QP_BIT_RRE) + ib_flags |= IB_ACCESS_REMOTE_READ; + if (mthca_flags & MTHCA_QP_BIT_RWE) + ib_flags |= IB_ACCESS_REMOTE_WRITE; + if (mthca_flags & MTHCA_QP_BIT_RAE) + ib_flags |= IB_ACCESS_REMOTE_ATOMIC; + + return ib_flags; +} + +static void to_ib_ah_attr(struct mthca_dev *dev, struct ib_ah_attr *ib_ah_attr, + struct mthca_qp_path *path) +{ + memset(ib_ah_attr, 0, sizeof *path); + ib_ah_attr->port_num = (be32_to_cpu(path->port_pkey) >> 24) & 0x3; + ib_ah_attr->dlid = be16_to_cpu(path->rlid); + ib_ah_attr->sl = be32_to_cpu(path->sl_tclass_flowlabel) >> 28; + ib_ah_attr->src_path_bits = path->g_mylmc & 0x7f; + ib_ah_attr->static_rate = path->static_rate & 0x7; + ib_ah_attr->ah_flags = (path->g_mylmc & (1 << 7)) ? IB_AH_GRH : 0; + if (ib_ah_attr->ah_flags) { + ib_ah_attr->grh.sgid_index = path->mgid_index & (dev->limits.gid_table_len - 1); + ib_ah_attr->grh.hop_limit = path->hop_limit; + ib_ah_attr->grh.traffic_class = + (be32_to_cpu(path->sl_tclass_flowlabel) >> 20) & 0xff; + ib_ah_attr->grh.flow_label = + be32_to_cpu(path->sl_tclass_flowlabel) & 0xfffff; + memcpy(ib_ah_attr->grh.dgid.raw, + path->rgid, sizeof ib_ah_attr->grh.dgid.raw); + } +} + +int mthca_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, int qp_attr_mask, + struct ib_qp_init_attr *qp_init_attr) +{ + struct mthca_dev *dev = to_mdev(ibqp->device); + struct mthca_qp *qp = to_mqp(ibqp); + int err; + struct mthca_mailbox *mailbox; + struct mthca_qp_param *qp_param; + struct mthca_qp_context *context; + int mthca_state; + u8 status; + + mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL); + if (IS_ERR(mailbox)) + return PTR_ERR(mailbox); + + err = mthca_QUERY_QP(dev, qp->qpn, 0, mailbox, &status); + if (err) + goto out; + if (status) { + mthca_warn(dev, "QUERY_QP returned status %02x\n", status); + err = -EINVAL; + goto out; + } + + qp_param = mailbox->buf; + context = &qp_param->context; + mthca_state = be32_to_cpu(context->flags) >> 28; + + qp_attr->qp_state = to_ib_qp_state(mthca_state); + qp_attr->cur_qp_state = qp_attr->qp_state; + qp_attr->path_mtu = context->mtu_msgmax >> 5; + qp_attr->path_mig_state = + to_ib_mig_state((be32_to_cpu(context->flags) >> 11) & 0x3); + qp_attr->qkey = be32_to_cpu(context->qkey); + qp_attr->rq_psn = be32_to_cpu(context->rnr_nextrecvpsn) & 0xffffff; + qp_attr->sq_psn = be32_to_cpu(context->next_send_psn) & 0xffffff; + qp_attr->dest_qp_num = be32_to_cpu(context->remote_qpn) & 0xffffff; + qp_attr->qp_access_flags = + to_ib_qp_access_flags(be32_to_cpu(context->params2)); + qp_attr->cap.max_send_wr = qp->sq.max; + qp_attr->cap.max_recv_wr = qp->rq.max; + qp_attr->cap.max_send_sge = qp->sq.max_gs; + qp_attr->cap.max_recv_sge = qp->rq.max_gs; + qp_attr->cap.max_inline_data = qp->max_inline_data; + + to_ib_ah_attr(dev, &qp_attr->ah_attr, &context->pri_path); + to_ib_ah_attr(dev, &qp_attr->alt_ah_attr, &context->alt_path); + + qp_attr->pkey_index = be32_to_cpu(context->pri_path.port_pkey) & 0x7f; + qp_attr->alt_pkey_index = be32_to_cpu(context->alt_path.port_pkey) & 0x7f; + + /* qp_attr->en_sqd_async_notify is only applicable in modify qp */ + qp_attr->sq_draining = mthca_state == MTHCA_QP_STATE_DRAINING; + + qp_attr->max_rd_atomic = 1 << ((be32_to_cpu(context->params1) >> 21) & 0x7); + + qp_attr->max_dest_rd_atomic = + 1 << ((be32_to_cpu(context->params2) >> 21) & 0x7); + qp_attr->min_rnr_timer = + (be32_to_cpu(context->rnr_nextrecvpsn) >> 24) & 0x1f; + qp_attr->port_num = qp_attr->ah_attr.port_num; + qp_attr->timeout = context->pri_path.ackto >> 3; + qp_attr->retry_cnt = (be32_to_cpu(context->params1) >> 16) & 0x7; + qp_attr->rnr_retry = context->pri_path.rnr_retry >> 5; + qp_attr->alt_port_num = qp_attr->alt_ah_attr.port_num; + qp_attr->alt_timeout = context->alt_path.ackto >> 3; + qp_init_attr->cap = qp_attr->cap; + +out: + mthca_free_mailbox(dev, mailbox); + return err; +} + static void mthca_path_set(struct ib_ah_attr *ah, struct mthca_qp_path *path) { path->g_mylmc = ah->src_path_bits & 0x7f; @@ -559,9 +493,9 @@ static void mthca_path_set(struct ib_ah_attr *ah, struct mthca_qp_path *path) path->g_mylmc |= 1 << 7; path->mgid_index = ah->grh.sgid_index; path->hop_limit = ah->grh.hop_limit; - path->sl_tclass_flowlabel = + path->sl_tclass_flowlabel = cpu_to_be32((ah->sl << 28) | - (ah->grh.traffic_class << 20) | + (ah->grh.traffic_class << 20) | (ah->grh.flow_label)); memcpy(path->rgid, ah->grh.dgid.raw, 16); } else @@ -576,18 +510,12 @@ int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask) struct mthca_mailbox *mailbox; struct mthca_qp_param *qp_param; struct mthca_qp_context *qp_context; - u32 req_param, opt_param; + u32 sqd_event = 0; u8 status; int err; if (attr_mask & IB_QP_CUR_STATE) { - if (attr->cur_qp_state != IB_QPS_RTR && - attr->cur_qp_state != IB_QPS_RTS && - attr->cur_qp_state != IB_QPS_SQD && - attr->cur_qp_state != IB_QPS_SQE) - return -EINVAL; - else - cur_state = attr->cur_qp_state; + cur_state = attr->cur_qp_state; } else { spin_lock_irq(&qp->sq.lock); spin_lock(&qp->rq.lock); @@ -596,44 +524,20 @@ int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask) spin_unlock_irq(&qp->sq.lock); } - if (attr_mask & IB_QP_STATE) { - if (attr->qp_state < 0 || attr->qp_state > IB_QPS_ERR) - return -EINVAL; - new_state = attr->qp_state; - } else - new_state = cur_state; - - if (state_table[cur_state][new_state].trans == MTHCA_TRANS_INVALID) { - mthca_dbg(dev, "Illegal QP transition " - "%d->%d\n", cur_state, new_state); - return -EINVAL; - } - - req_param = state_table[cur_state][new_state].req_param[qp->transport]; - opt_param = state_table[cur_state][new_state].opt_param[qp->transport]; + new_state = attr_mask & IB_QP_STATE ? attr->qp_state : cur_state; - if ((req_param & attr_mask) != req_param) { - mthca_dbg(dev, "QP transition " - "%d->%d missing req attr 0x%08x\n", - cur_state, new_state, - req_param & ~attr_mask); + if (!ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type, attr_mask)) { + mthca_dbg(dev, "Bad QP transition (transport %d) " + "%d->%d with attr 0x%08x\n", + qp->transport, cur_state, new_state, + attr_mask); return -EINVAL; } - if (attr_mask & ~(req_param | opt_param | IB_QP_STATE)) { - mthca_dbg(dev, "QP transition (transport %d) " - "%d->%d has extra attr 0x%08x\n", - qp->transport, - cur_state, new_state, - attr_mask & ~(req_param | opt_param | - IB_QP_STATE)); - return -EINVAL; - } - - if ((attr_mask & IB_QP_PKEY_INDEX) && + if ((attr_mask & IB_QP_PKEY_INDEX) && attr->pkey_index >= dev->limits.pkey_table_len) { - mthca_dbg(dev, "PKey index (%u) too large. max is %d\n", - attr->pkey_index,dev->limits.pkey_table_len-1); + mthca_dbg(dev, "P_Key index (%u) too large. max is %d\n", + attr->pkey_index, dev->limits.pkey_table_len-1); return -EINVAL; } @@ -733,7 +637,7 @@ int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask) if (attr_mask & IB_QP_RNR_RETRY) { qp_context->alt_path.rnr_retry = qp_context->pri_path.rnr_retry = attr->rnr_retry << 5; - qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_RNR_RETRY | + qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_RNR_RETRY | MTHCA_QP_OPTPAR_ALT_RNR_RETRY); } @@ -748,14 +652,20 @@ int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask) } if (attr_mask & IB_QP_ALT_PATH) { + if (attr->alt_pkey_index >= dev->limits.pkey_table_len) { + mthca_dbg(dev, "Alternate P_Key index (%u) too large. max is %d\n", + attr->alt_pkey_index, dev->limits.pkey_table_len-1); + return -EINVAL; + } + if (attr->alt_port_num == 0 || attr->alt_port_num > dev->limits.num_ports) { - mthca_dbg(dev, "Alternate port number (%u) is invalid\n", + mthca_dbg(dev, "Alternate port number (%u) is invalid\n", attr->alt_port_num); return -EINVAL; } mthca_path_set(&attr->alt_ah_attr, &qp_context->alt_path); - qp_context->alt_path.port_pkey |= cpu_to_be32(attr->alt_pkey_index | + qp_context->alt_path.port_pkey |= cpu_to_be32(attr->alt_pkey_index | attr->alt_port_num << 24); qp_context->alt_path.ackto = attr->alt_timeout << 3; qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_ALT_ADDR_PATH); @@ -841,11 +751,16 @@ int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask) qp_context->srqn = cpu_to_be32(1 << 24 | to_msrq(ibqp->srq)->srqn); - err = mthca_MODIFY_QP(dev, state_table[cur_state][new_state].trans, - qp->qpn, 0, mailbox, 0, &status); + if (cur_state == IB_QPS_RTS && new_state == IB_QPS_SQD && + attr_mask & IB_QP_EN_SQD_ASYNC_NOTIFY && + attr->en_sqd_async_notify) + sqd_event = 1 << 31; + + err = mthca_MODIFY_QP(dev, cur_state, new_state, qp->qpn, 0, + mailbox, sqd_event, &status); if (status) { - mthca_warn(dev, "modify QP %d returned status %02x.\n", - state_table[cur_state][new_state].trans, status); + mthca_warn(dev, "modify QP %d->%d returned status %02x.\n", + cur_state, new_state, status); err = -EINVAL; } @@ -1078,10 +993,10 @@ static int mthca_map_memfree(struct mthca_dev *dev, if (ret) goto err_qpc; - ret = mthca_table_get(dev, dev->qp_table.rdb_table, - qp->qpn << dev->qp_table.rdb_shift); - if (ret) - goto err_eqpc; + ret = mthca_table_get(dev, dev->qp_table.rdb_table, + qp->qpn << dev->qp_table.rdb_shift); + if (ret) + goto err_eqpc; } @@ -1393,7 +1308,8 @@ void mthca_free_qp(struct mthca_dev *dev, wait_event(qp->wait, !atomic_read(&qp->refcount)); if (qp->state != IB_QPS_RESET) - mthca_MODIFY_QP(dev, MTHCA_TRANS_ANY2RST, qp->qpn, 0, NULL, 0, &status); + mthca_MODIFY_QP(dev, qp->state, IB_QPS_RESET, qp->qpn, 0, + NULL, 0, &status); /* * If this is a userspace QP, the buffers, MR, CQs and so on @@ -1699,7 +1615,9 @@ int mthca_tavor_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, mthca_opcode[wr->opcode]); wmb(); ((struct mthca_next_seg *) prev_wqe)->ee_nds = - cpu_to_be32((size0 ? 0 : MTHCA_NEXT_DBD) | size); + cpu_to_be32((size0 ? 0 : MTHCA_NEXT_DBD) | size | + ((wr->send_flags & IB_SEND_FENCE) ? + MTHCA_NEXT_FENCE : 0)); if (!size0) { size0 = size; @@ -2061,7 +1979,9 @@ int mthca_arbel_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, mthca_opcode[wr->opcode]); wmb(); ((struct mthca_next_seg *) prev_wqe)->ee_nds = - cpu_to_be32(MTHCA_NEXT_DBD | size); + cpu_to_be32(MTHCA_NEXT_DBD | size | + ((wr->send_flags & IB_SEND_FENCE) ? + MTHCA_NEXT_FENCE : 0)); if (!size0) { size0 = size; @@ -2115,7 +2035,7 @@ int mthca_arbel_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr, int i; void *wqe; - spin_lock_irqsave(&qp->rq.lock, flags); + spin_lock_irqsave(&qp->rq.lock, flags); /* XXX check that state is OK to post receive */ @@ -2182,8 +2102,8 @@ out: return err; } -int mthca_free_err_wqe(struct mthca_dev *dev, struct mthca_qp *qp, int is_send, - int index, int *dbd, __be32 *new_wqe) +void mthca_free_err_wqe(struct mthca_dev *dev, struct mthca_qp *qp, int is_send, + int index, int *dbd, __be32 *new_wqe) { struct mthca_next_seg *next; @@ -2193,7 +2113,7 @@ int mthca_free_err_wqe(struct mthca_dev *dev, struct mthca_qp *qp, int is_send, */ if (qp->ibqp.srq) { *new_wqe = 0; - return 0; + return; } if (is_send) @@ -2207,8 +2127,6 @@ int mthca_free_err_wqe(struct mthca_dev *dev, struct mthca_qp *qp, int is_send, (next->ee_nds & cpu_to_be32(0x3f)); else *new_wqe = 0; - - return 0; } int __devinit mthca_init_qp_table(struct mthca_dev *dev) diff --git a/drivers/infiniband/hw/mthca/mthca_srq.c b/drivers/infiniband/hw/mthca/mthca_srq.c index e7e153d9c4c..47a6a754a59 100644 --- a/drivers/infiniband/hw/mthca/mthca_srq.c +++ b/drivers/infiniband/hw/mthca/mthca_srq.c @@ -49,7 +49,8 @@ struct mthca_tavor_srq_context { __be32 state_pd; __be32 lkey; __be32 uar; - __be32 wqe_cnt; + __be16 limit_watermark; + __be16 wqe_cnt; u32 reserved[2]; }; @@ -271,6 +272,9 @@ int mthca_alloc_srq(struct mthca_dev *dev, struct mthca_pd *pd, srq->first_free = 0; srq->last_free = srq->max - 1; + attr->max_wr = (mthca_is_memfree(dev)) ? srq->max - 1 : srq->max; + attr->max_sge = srq->max_gs; + return 0; err_out_free_srq: @@ -339,7 +343,7 @@ void mthca_free_srq(struct mthca_dev *dev, struct mthca_srq *srq) int mthca_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr, enum ib_srq_attr_mask attr_mask) -{ +{ struct mthca_dev *dev = to_mdev(ibsrq->device); struct mthca_srq *srq = to_msrq(ibsrq); int ret; @@ -360,6 +364,41 @@ int mthca_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr, return 0; } +int mthca_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *srq_attr) +{ + struct mthca_dev *dev = to_mdev(ibsrq->device); + struct mthca_srq *srq = to_msrq(ibsrq); + struct mthca_mailbox *mailbox; + struct mthca_arbel_srq_context *arbel_ctx; + struct mthca_tavor_srq_context *tavor_ctx; + u8 status; + int err; + + mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL); + if (IS_ERR(mailbox)) + return PTR_ERR(mailbox); + + err = mthca_QUERY_SRQ(dev, srq->srqn, mailbox, &status); + if (err) + goto out; + + if (mthca_is_memfree(dev)) { + arbel_ctx = mailbox->buf; + srq_attr->srq_limit = be16_to_cpu(arbel_ctx->limit_watermark); + } else { + tavor_ctx = mailbox->buf; + srq_attr->srq_limit = be16_to_cpu(tavor_ctx->limit_watermark); + } + + srq_attr->max_wr = (mthca_is_memfree(dev)) ? srq->max - 1 : srq->max; + srq_attr->max_sge = srq->max_gs; + +out: + mthca_free_mailbox(dev, mailbox); + + return err; +} + void mthca_srq_event(struct mthca_dev *dev, u32 srqn, enum ib_event_type event_type) { diff --git a/drivers/infiniband/hw/mthca/mthca_user.h b/drivers/infiniband/hw/mthca/mthca_user.h index bb015c6494c..02cc0a766f3 100644 --- a/drivers/infiniband/hw/mthca/mthca_user.h +++ b/drivers/infiniband/hw/mthca/mthca_user.h @@ -1,6 +1,6 @@ /* * Copyright (c) 2005 Topspin Communications. All rights reserved. - * Copyright (c) 2005 Cisco Systems. All rights reserved. + * Copyright (c) 2005, 2006 Cisco Systems. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -75,6 +75,11 @@ struct mthca_create_cq_resp { __u32 reserved; }; +struct mthca_resize_cq { + __u32 lkey; + __u32 reserved; +}; + struct mthca_create_srq { __u32 lkey; __u32 db_index; diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h index 2f85a9a831b..1251f86ec85 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib.h +++ b/drivers/infiniband/ulp/ipoib/ipoib.h @@ -217,10 +217,16 @@ struct ipoib_neigh { struct list_head list; }; +/* + * We stash a pointer to our private neighbour information after our + * hardware address in neigh->ha. The ALIGN() expression here makes + * sure that this pointer is stored aligned so that an unaligned + * load is not needed to dereference it. + */ static inline struct ipoib_neigh **to_ipoib_neigh(struct neighbour *neigh) { - return (struct ipoib_neigh **) (neigh->ha + 24 - - (offsetof(struct neighbour, ha) & 4)); + return (void*) neigh + ALIGN(offsetof(struct neighbour, ha) + + INFINIBAND_ALEN, sizeof(void *)); } extern struct workqueue_struct *ipoib_workqueue; @@ -253,7 +259,7 @@ void ipoib_ib_dev_cleanup(struct net_device *dev); int ipoib_ib_dev_open(struct net_device *dev); int ipoib_ib_dev_up(struct net_device *dev); -int ipoib_ib_dev_down(struct net_device *dev); +int ipoib_ib_dev_down(struct net_device *dev, int flush); int ipoib_ib_dev_stop(struct net_device *dev); int ipoib_dev_init(struct net_device *dev, struct ib_device *ca, int port); diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c index 86bcdd72a10..a1f5a05f2f3 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c @@ -416,6 +416,7 @@ int ipoib_ib_dev_open(struct net_device *dev) ret = ipoib_ib_post_receives(dev); if (ret) { ipoib_warn(priv, "ipoib_ib_post_receives returned %d\n", ret); + ipoib_ib_dev_stop(dev); return -1; } @@ -434,7 +435,7 @@ int ipoib_ib_dev_up(struct net_device *dev) return ipoib_mcast_start_thread(dev); } -int ipoib_ib_dev_down(struct net_device *dev) +int ipoib_ib_dev_down(struct net_device *dev, int flush) { struct ipoib_dev_priv *priv = netdev_priv(dev); @@ -449,10 +450,11 @@ int ipoib_ib_dev_down(struct net_device *dev) set_bit(IPOIB_PKEY_STOP, &priv->flags); cancel_delayed_work(&priv->pkey_task); mutex_unlock(&pkey_mutex); - flush_workqueue(ipoib_workqueue); + if (flush) + flush_workqueue(ipoib_workqueue); } - ipoib_mcast_stop_thread(dev, 1); + ipoib_mcast_stop_thread(dev, flush); ipoib_mcast_dev_flush(dev); ipoib_flush_paths(dev); @@ -590,7 +592,7 @@ void ipoib_ib_dev_flush(void *_dev) ipoib_dbg(priv, "flushing\n"); - ipoib_ib_dev_down(dev); + ipoib_ib_dev_down(dev, 0); /* * The device could have been brought down between the start and when diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index c3b5f79d116..0ebacd558ff 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -133,7 +133,13 @@ static int ipoib_stop(struct net_device *dev) netif_stop_queue(dev); - ipoib_ib_dev_down(dev); + /* + * Now flush workqueue to make sure a scheduled task doesn't + * bring our internal state back up. + */ + flush_workqueue(ipoib_workqueue); + + ipoib_ib_dev_down(dev, 1); ipoib_ib_dev_stop(dev); if (!test_bit(IPOIB_FLAG_SUBINTERFACE, &priv->flags)) { @@ -513,12 +519,7 @@ static void neigh_add_path(struct sk_buff *skb, struct net_device *dev) be32_to_cpup((__be32 *) skb->dst->neighbour->ha)); } else { neigh->ah = NULL; - if (skb_queue_len(&neigh->queue) < IPOIB_MAX_PATH_REC_QUEUE) { - __skb_queue_tail(&neigh->queue, skb); - } else { - ++priv->stats.tx_dropped; - dev_kfree_skb_any(skb); - } + __skb_queue_tail(&neigh->queue, skb); if (!path->query && path_rec_start(dev, path)) goto err; diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c index a2408d7ec59..93c462eaf4f 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c @@ -115,7 +115,6 @@ static void ipoib_mcast_free(struct ipoib_mcast *mcast) if (neigh->ah) ipoib_put_ah(neigh->ah); *to_ipoib_neigh(neigh->neighbour) = NULL; - neigh->neighbour->ops->destructor = NULL; kfree(neigh); } @@ -213,6 +212,7 @@ static int ipoib_mcast_join_finish(struct ipoib_mcast *mcast, { struct net_device *dev = mcast->dev; struct ipoib_dev_priv *priv = netdev_priv(dev); + struct ipoib_ah *ah; int ret; mcast->mcmember = *mcmember; @@ -269,8 +269,8 @@ static int ipoib_mcast_join_finish(struct ipoib_mcast *mcast, av.static_rate, priv->local_rate, ib_sa_rate_enum_to_int(mcast->mcmember.rate)); - mcast->ah = ipoib_create_ah(dev, priv->pd, &av); - if (!mcast->ah) { + ah = ipoib_create_ah(dev, priv->pd, &av); + if (!ah) { ipoib_warn(priv, "ib_address_create failed\n"); } else { ipoib_dbg_mcast(priv, "MGID " IPOIB_GID_FMT @@ -280,6 +280,10 @@ static int ipoib_mcast_join_finish(struct ipoib_mcast *mcast, be16_to_cpu(mcast->mcmember.mlid), mcast->mcmember.sl); } + + spin_lock_irq(&priv->lock); + mcast->ah = ah; + spin_unlock_irq(&priv->lock); } /* actually send any queued packets */ @@ -432,9 +436,11 @@ static void ipoib_mcast_join_complete(int status, if (mcast->backoff > IPOIB_MAX_BACKOFF_SECONDS) mcast->backoff = IPOIB_MAX_BACKOFF_SECONDS; + mutex_lock(&mcast_mutex); + + spin_lock_irq(&priv->lock); mcast->query = NULL; - mutex_lock(&mcast_mutex); if (test_bit(IPOIB_MCAST_RUN, &priv->flags)) { if (status == -ETIMEDOUT) queue_work(ipoib_workqueue, &priv->mcast_task); @@ -443,6 +449,7 @@ static void ipoib_mcast_join_complete(int status, mcast->backoff * HZ); } else complete(&mcast->done); + spin_unlock_irq(&priv->lock); mutex_unlock(&mcast_mutex); return; @@ -630,21 +637,27 @@ int ipoib_mcast_stop_thread(struct net_device *dev, int flush) if (flush) flush_workqueue(ipoib_workqueue); + spin_lock_irq(&priv->lock); if (priv->broadcast && priv->broadcast->query) { ib_sa_cancel_query(priv->broadcast->query_id, priv->broadcast->query); priv->broadcast->query = NULL; + spin_unlock_irq(&priv->lock); ipoib_dbg_mcast(priv, "waiting for bcast\n"); wait_for_completion(&priv->broadcast->done); - } + } else + spin_unlock_irq(&priv->lock); list_for_each_entry(mcast, &priv->multicast_list, list) { + spin_lock_irq(&priv->lock); if (mcast->query) { ib_sa_cancel_query(mcast->query_id, mcast->query); mcast->query = NULL; + spin_unlock_irq(&priv->lock); ipoib_dbg_mcast(priv, "waiting for MGID " IPOIB_GID_FMT "\n", IPOIB_GID_ARG(mcast->mcmember.mgid)); wait_for_completion(&mcast->done); - } + } else + spin_unlock_irq(&priv->lock); } return 0; diff --git a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c index faaf10e5fc7..18d2f53ec34 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c @@ -255,6 +255,6 @@ void ipoib_event(struct ib_event_handler *handler, record->event == IB_EVENT_LID_CHANGE || record->event == IB_EVENT_SM_CHANGE) { ipoib_dbg(priv, "Port active event\n"); - schedule_work(&priv->flush_task); + queue_work(ipoib_workqueue, &priv->flush_task); } } diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c index 960dae5c87d..a13dcdf90a4 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.c +++ b/drivers/infiniband/ulp/srp/ib_srp.c @@ -1237,6 +1237,87 @@ static int srp_reset_host(struct scsi_cmnd *scmnd) return ret; } +static ssize_t show_id_ext(struct class_device *cdev, char *buf) +{ + struct srp_target_port *target = host_to_target(class_to_shost(cdev)); + + if (target->state == SRP_TARGET_DEAD || + target->state == SRP_TARGET_REMOVED) + return -ENODEV; + + return sprintf(buf, "0x%016llx\n", + (unsigned long long) be64_to_cpu(target->id_ext)); +} + +static ssize_t show_ioc_guid(struct class_device *cdev, char *buf) +{ + struct srp_target_port *target = host_to_target(class_to_shost(cdev)); + + if (target->state == SRP_TARGET_DEAD || + target->state == SRP_TARGET_REMOVED) + return -ENODEV; + + return sprintf(buf, "0x%016llx\n", + (unsigned long long) be64_to_cpu(target->ioc_guid)); +} + +static ssize_t show_service_id(struct class_device *cdev, char *buf) +{ + struct srp_target_port *target = host_to_target(class_to_shost(cdev)); + + if (target->state == SRP_TARGET_DEAD || + target->state == SRP_TARGET_REMOVED) + return -ENODEV; + + return sprintf(buf, "0x%016llx\n", + (unsigned long long) be64_to_cpu(target->service_id)); +} + +static ssize_t show_pkey(struct class_device *cdev, char *buf) +{ + struct srp_target_port *target = host_to_target(class_to_shost(cdev)); + + if (target->state == SRP_TARGET_DEAD || + target->state == SRP_TARGET_REMOVED) + return -ENODEV; + + return sprintf(buf, "0x%04x\n", be16_to_cpu(target->path.pkey)); +} + +static ssize_t show_dgid(struct class_device *cdev, char *buf) +{ + struct srp_target_port *target = host_to_target(class_to_shost(cdev)); + + if (target->state == SRP_TARGET_DEAD || + target->state == SRP_TARGET_REMOVED) + return -ENODEV; + + return sprintf(buf, "%04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x\n", + be16_to_cpu(((__be16 *) target->path.dgid.raw)[0]), + be16_to_cpu(((__be16 *) target->path.dgid.raw)[1]), + be16_to_cpu(((__be16 *) target->path.dgid.raw)[2]), + be16_to_cpu(((__be16 *) target->path.dgid.raw)[3]), + be16_to_cpu(((__be16 *) target->path.dgid.raw)[4]), + be16_to_cpu(((__be16 *) target->path.dgid.raw)[5]), + be16_to_cpu(((__be16 *) target->path.dgid.raw)[6]), + be16_to_cpu(((__be16 *) target->path.dgid.raw)[7])); +} + +static CLASS_DEVICE_ATTR(id_ext, S_IRUGO, show_id_ext, NULL); +static CLASS_DEVICE_ATTR(ioc_guid, S_IRUGO, show_ioc_guid, NULL); +static CLASS_DEVICE_ATTR(service_id, S_IRUGO, show_service_id, NULL); +static CLASS_DEVICE_ATTR(pkey, S_IRUGO, show_pkey, NULL); +static CLASS_DEVICE_ATTR(dgid, S_IRUGO, show_dgid, NULL); + +static struct class_device_attribute *srp_host_attrs[] = { + &class_device_attr_id_ext, + &class_device_attr_ioc_guid, + &class_device_attr_service_id, + &class_device_attr_pkey, + &class_device_attr_dgid, + NULL +}; + static struct scsi_host_template srp_template = { .module = THIS_MODULE, .name = DRV_NAME, @@ -1249,7 +1330,8 @@ static struct scsi_host_template srp_template = { .this_id = -1, .sg_tablesize = SRP_MAX_INDIRECT, .cmd_per_lun = SRP_SQ_SIZE, - .use_clustering = ENABLE_CLUSTERING + .use_clustering = ENABLE_CLUSTERING, + .shost_attrs = srp_host_attrs }; static int srp_add_target(struct srp_host *host, struct srp_target_port *target) @@ -1366,6 +1448,7 @@ static int srp_parse_options(const char *buf, struct srp_target_port *target) strlcpy(dgid, p + i * 2, 3); target->path.dgid.raw[i] = simple_strtoul(dgid, NULL, 16); } + kfree(p); break; case SRP_OPT_PKEY: diff --git a/include/rdma/ib_fmr_pool.h b/include/rdma/ib_fmr_pool.h index 86b7e93f198..4ace54cd0cc 100644 --- a/include/rdma/ib_fmr_pool.h +++ b/include/rdma/ib_fmr_pool.h @@ -43,6 +43,7 @@ struct ib_fmr_pool; /** * struct ib_fmr_pool_param - Parameters for creating FMR pool * @max_pages_per_fmr:Maximum number of pages per map request. + * @page_shift: Log2 of sizeof "pages" mapped by this fmr * @access:Access flags for FMRs in pool. * @pool_size:Number of FMRs to allocate for pool. * @dirty_watermark:Flush is triggered when @dirty_watermark dirty @@ -55,6 +56,7 @@ struct ib_fmr_pool; */ struct ib_fmr_pool_param { int max_pages_per_fmr; + int page_shift; enum ib_access_flags access; int pool_size; int dirty_watermark; diff --git a/include/rdma/ib_mad.h b/include/rdma/ib_mad.h index 2c133506742..51ab8eddb29 100644 --- a/include/rdma/ib_mad.h +++ b/include/rdma/ib_mad.h @@ -33,7 +33,7 @@ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * - * $Id: ib_mad.h 2775 2005-07-02 13:42:12Z halr $ + * $Id: ib_mad.h 5596 2006-03-03 01:00:07Z sean.hefty $ */ #if !defined( IB_MAD_H ) @@ -208,15 +208,23 @@ struct ib_class_port_info /** * ib_mad_send_buf - MAD data buffer and work request for sends. * @next: A pointer used to chain together MADs for posting. - * @mad: References an allocated MAD data buffer. + * @mad: References an allocated MAD data buffer for MADs that do not have + * RMPP active. For MADs using RMPP, references the common and management + * class specific headers. * @mad_agent: MAD agent that allocated the buffer. * @ah: The address handle to use when sending the MAD. * @context: User-controlled context fields. + * @hdr_len: Indicates the size of the data header of the MAD. This length + * includes the common MAD, RMPP, and class specific headers. + * @data_len: Indicates the total size of user-transferred data. + * @seg_count: The number of RMPP segments allocated for this send. + * @seg_size: Size of each RMPP segment. * @timeout_ms: Time to wait for a response. * @retries: Number of times to retry a request for a response. * * Users are responsible for initializing the MAD buffer itself, with the - * exception of specifying the payload length field in any RMPP MAD. + * exception of any RMPP header. Additional segment buffer space allocated + * beyond data_len is padding. */ struct ib_mad_send_buf { struct ib_mad_send_buf *next; @@ -224,6 +232,10 @@ struct ib_mad_send_buf { struct ib_mad_agent *mad_agent; struct ib_ah *ah; void *context[2]; + int hdr_len; + int data_len; + int seg_count; + int seg_size; int timeout_ms; int retries; }; @@ -299,7 +311,7 @@ typedef void (*ib_mad_snoop_handler)(struct ib_mad_agent *mad_agent, * @mad_recv_wc: Received work completion information on the received MAD. * * MADs received in response to a send request operation will be handed to - * the user after the send operation completes. All data buffers given + * the user before the send operation completes. All data buffers given * to registered agents through this routine are owned by the receiving * client, except for snooping agents. Clients snooping MADs should not * modify the data referenced by @mad_recv_wc. @@ -485,17 +497,6 @@ int ib_unregister_mad_agent(struct ib_mad_agent *mad_agent); int ib_post_send_mad(struct ib_mad_send_buf *send_buf, struct ib_mad_send_buf **bad_send_buf); -/** - * ib_coalesce_recv_mad - Coalesces received MAD data into a single buffer. - * @mad_recv_wc: Work completion information for a received MAD. - * @buf: User-provided data buffer to receive the coalesced buffers. The - * referenced buffer should be at least the size of the mad_len specified - * by @mad_recv_wc. - * - * This call copies a chain of received MAD segments into a single data buffer, - * removing duplicated headers. - */ -void ib_coalesce_recv_mad(struct ib_mad_recv_wc *mad_recv_wc, void *buf); /** * ib_free_recv_mad - Returns data buffers used to receive a MAD. @@ -590,9 +591,10 @@ int ib_process_mad_wc(struct ib_mad_agent *mad_agent, * with an initialized work request structure. Users may modify the returned * MAD data buffer before posting the send. * - * The returned data buffer will be cleared. Users are responsible for - * initializing the common MAD and any class specific headers. If @rmpp_active - * is set, the RMPP header will be initialized for sending. + * The returned MAD header, class specific headers, and any padding will be + * cleared. Users are responsible for initializing the common MAD header, + * any class specific header, and MAD data area. + * If @rmpp_active is set, the RMPP header will be initialized for sending. */ struct ib_mad_send_buf * ib_create_send_mad(struct ib_mad_agent *mad_agent, u32 remote_qpn, u16 pkey_index, @@ -601,6 +603,16 @@ struct ib_mad_send_buf * ib_create_send_mad(struct ib_mad_agent *mad_agent, gfp_t gfp_mask); /** + * ib_get_rmpp_segment - returns the data buffer for a given RMPP segment. + * @send_buf: Previously allocated send data buffer. + * @seg_num: number of segment to return + * + * This routine returns a pointer to the data buffer of an RMPP MAD. + * Users must provide synchronization to @send_buf around this call. + */ +void *ib_get_rmpp_segment(struct ib_mad_send_buf *send_buf, int seg_num); + +/** * ib_free_send_mad - Returns data buffers used to send a MAD. * @send_buf: Previously allocated send data buffer. */ diff --git a/include/rdma/ib_user_verbs.h b/include/rdma/ib_user_verbs.h index 5ff1490c08d..338ed433306 100644 --- a/include/rdma/ib_user_verbs.h +++ b/include/rdma/ib_user_verbs.h @@ -1,7 +1,8 @@ /* * Copyright (c) 2005 Topspin Communications. All rights reserved. - * Copyright (c) 2005 Cisco Systems. All rights reserved. + * Copyright (c) 2005, 2006 Cisco Systems. All rights reserved. * Copyright (c) 2005 PathScale, Inc. All rights reserved. + * Copyright (c) 2006 Mellanox Technologies. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -43,7 +44,7 @@ * Increment this value if any changes that break userspace ABI * compatibility are made. */ -#define IB_USER_VERBS_ABI_VERSION 4 +#define IB_USER_VERBS_ABI_VERSION 6 enum { IB_USER_VERBS_CMD_GET_CONTEXT, @@ -265,6 +266,17 @@ struct ib_uverbs_create_cq_resp { __u32 cqe; }; +struct ib_uverbs_resize_cq { + __u64 response; + __u32 cq_handle; + __u32 cqe; + __u64 driver_data[0]; +}; + +struct ib_uverbs_resize_cq_resp { + __u32 cqe; +}; + struct ib_uverbs_poll_cq { __u64 response; __u32 cq_handle; @@ -338,6 +350,7 @@ struct ib_uverbs_create_qp_resp { __u32 max_send_sge; __u32 max_recv_sge; __u32 max_inline_data; + __u32 reserved; }; /* @@ -359,6 +372,47 @@ struct ib_uverbs_qp_dest { __u8 port_num; }; +struct ib_uverbs_query_qp { + __u64 response; + __u32 qp_handle; + __u32 attr_mask; + __u64 driver_data[0]; +}; + +struct ib_uverbs_query_qp_resp { + struct ib_uverbs_qp_dest dest; + struct ib_uverbs_qp_dest alt_dest; + __u32 max_send_wr; + __u32 max_recv_wr; + __u32 max_send_sge; + __u32 max_recv_sge; + __u32 max_inline_data; + __u32 qkey; + __u32 rq_psn; + __u32 sq_psn; + __u32 dest_qp_num; + __u32 qp_access_flags; + __u16 pkey_index; + __u16 alt_pkey_index; + __u8 qp_state; + __u8 cur_qp_state; + __u8 path_mtu; + __u8 path_mig_state; + __u8 en_sqd_async_notify; + __u8 max_rd_atomic; + __u8 max_dest_rd_atomic; + __u8 min_rnr_timer; + __u8 port_num; + __u8 timeout; + __u8 retry_cnt; + __u8 rnr_retry; + __u8 alt_port_num; + __u8 alt_timeout; + __u8 sq_sig_all; + __u8 reserved[5]; + __u64 driver_data[0]; +}; + struct ib_uverbs_modify_qp { struct ib_uverbs_qp_dest dest; struct ib_uverbs_qp_dest alt_dest; @@ -415,7 +469,7 @@ struct ib_uverbs_sge { }; struct ib_uverbs_send_wr { - __u64 wr_id; + __u64 wr_id; __u32 num_sge; __u32 opcode; __u32 send_flags; @@ -489,7 +543,7 @@ struct ib_uverbs_post_srq_recv_resp { struct ib_uverbs_global_route { __u8 dgid[16]; - __u32 flow_label; + __u32 flow_label; __u8 sgid_index; __u8 hop_limit; __u8 traffic_class; @@ -551,6 +605,9 @@ struct ib_uverbs_create_srq { struct ib_uverbs_create_srq_resp { __u32 srq_handle; + __u32 max_wr; + __u32 max_sge; + __u32 reserved; }; struct ib_uverbs_modify_srq { @@ -561,6 +618,20 @@ struct ib_uverbs_modify_srq { __u64 driver_data[0]; }; +struct ib_uverbs_query_srq { + __u64 response; + __u32 srq_handle; + __u32 reserved; + __u64 driver_data[0]; +}; + +struct ib_uverbs_query_srq_resp { + __u32 max_wr; + __u32 max_sge; + __u32 srq_limit; + __u32 reserved; +}; + struct ib_uverbs_destroy_srq { __u64 response; __u32 srq_handle; diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 22fc886b969..c1ad6273ac6 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -5,7 +5,7 @@ * Copyright (c) 2004 Topspin Corporation. All rights reserved. * Copyright (c) 2004 Voltaire Corporation. All rights reserved. * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. - * Copyright (c) 2005 Cisco Systems. All rights reserved. + * Copyright (c) 2005, 2006 Cisco Systems. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -222,11 +222,13 @@ struct ib_port_attr { }; enum ib_device_modify_flags { - IB_DEVICE_MODIFY_SYS_IMAGE_GUID = 1 + IB_DEVICE_MODIFY_SYS_IMAGE_GUID = 1 << 0, + IB_DEVICE_MODIFY_NODE_DESC = 1 << 1 }; struct ib_device_modify { u64 sys_image_guid; + char node_desc[64]; }; enum ib_port_modify_flags { @@ -649,7 +651,7 @@ struct ib_mw_bind { struct ib_fmr_attr { int max_pages; int max_maps; - u8 page_size; + u8 page_shift; }; struct ib_ucontext { @@ -880,7 +882,8 @@ struct ib_device { struct ib_ucontext *context, struct ib_udata *udata); int (*destroy_cq)(struct ib_cq *cq); - int (*resize_cq)(struct ib_cq *cq, int cqe); + int (*resize_cq)(struct ib_cq *cq, int cqe, + struct ib_udata *udata); int (*poll_cq)(struct ib_cq *cq, int num_entries, struct ib_wc *wc); int (*peek_cq)(struct ib_cq *cq, int wc_cnt); @@ -950,6 +953,7 @@ struct ib_device { u64 uverbs_cmd_mask; int uverbs_abi_ver; + char node_desc[64]; __be64 node_guid; u8 node_type; u8 phys_port_cnt; @@ -986,6 +990,24 @@ static inline int ib_copy_to_udata(struct ib_udata *udata, void *src, size_t len return copy_to_user(udata->outbuf, src, len) ? -EFAULT : 0; } +/** + * ib_modify_qp_is_ok - Check that the supplied attribute mask + * contains all required attributes and no attributes not allowed for + * the given QP state transition. + * @cur_state: Current QP state + * @next_state: Next QP state + * @type: QP type + * @mask: Mask of supplied QP attributes + * + * This function is a helper function that a low-level driver's + * modify_qp method can use to validate the consumer's input. It + * checks that cur_state and next_state are valid QP states, that a + * transition from cur_state to next_state is allowed by the IB spec, + * and that the attribute mask supplied is allowed for the transition. + */ +int ib_modify_qp_is_ok(enum ib_qp_state cur_state, enum ib_qp_state next_state, + enum ib_qp_type type, enum ib_qp_attr_mask mask); + int ib_register_event_handler (struct ib_event_handler *event_handler); int ib_unregister_event_handler(struct ib_event_handler *event_handler); void ib_dispatch_event(struct ib_event *event); @@ -1078,7 +1100,9 @@ int ib_destroy_ah(struct ib_ah *ah); * ib_create_srq - Creates a SRQ associated with the specified protection * domain. * @pd: The protection domain associated with the SRQ. - * @srq_init_attr: A list of initial attributes required to create the SRQ. + * @srq_init_attr: A list of initial attributes required to create the + * SRQ. If SRQ creation succeeds, then the attributes are updated to + * the actual capabilities of the created SRQ. * * srq_attr->max_wr and srq_attr->max_sge are read the determine the * requested size of the SRQ, and set to the actual values allocated @@ -1137,7 +1161,9 @@ static inline int ib_post_srq_recv(struct ib_srq *srq, * ib_create_qp - Creates a QP associated with the specified protection * domain. * @pd: The protection domain associated with the QP. - * @qp_init_attr: A list of initial attributes required to create the QP. + * @qp_init_attr: A list of initial attributes required to create the + * QP. If QP creation succeeds, then the attributes are updated to + * the actual capabilities of the created QP. */ struct ib_qp *ib_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *qp_init_attr); |