diff options
author | Ingo Molnar <mingo@elte.hu> | 2008-10-13 11:05:51 +0200 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2008-10-13 11:05:51 +0200 |
commit | accba5f3965d6a9d1bf7c1e1a7995d17e9d521b6 (patch) | |
tree | 8fb40782e79472ed882ff2098d4dd295557278ee /drivers/infiniband | |
parent | 6852fd9b86d05063c6ef49d2e12e061cc7f6a105 (diff) | |
parent | 4480f15b3306f43bbb0310d461142b4e897ca45b (diff) |
Merge branch 'linus' into oprofile-v2
Conflicts:
arch/x86/kernel/apic_32.c
arch/x86/oprofile/nmi_int.c
include/linux/pci_ids.h
Diffstat (limited to 'drivers/infiniband')
53 files changed, 2355 insertions, 1433 deletions
diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c index 922d35f4fc0..3cab0cedfca 100644 --- a/drivers/infiniband/core/cm.c +++ b/drivers/infiniband/core/cm.c @@ -3748,6 +3748,7 @@ error1: cm_remove_port_fs(port); } device_unregister(cm_dev->device); + kfree(cm_dev); } static void cm_remove_one(struct ib_device *ib_device) @@ -3776,6 +3777,7 @@ static void cm_remove_one(struct ib_device *ib_device) cm_remove_port_fs(port); } device_unregister(cm_dev->device); + kfree(cm_dev); } static int __init ib_cm_init(void) diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index e980ff3335d..d951896ff7f 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -155,9 +155,7 @@ struct cma_multicast { } multicast; struct list_head list; void *context; - struct sockaddr addr; - u8 pad[sizeof(struct sockaddr_in6) - - sizeof(struct sockaddr)]; + struct sockaddr_storage addr; }; struct cma_work { @@ -786,8 +784,8 @@ static void cma_cancel_operation(struct rdma_id_private *id_priv, cma_cancel_route(id_priv); break; case CMA_LISTEN: - if (cma_any_addr(&id_priv->id.route.addr.src_addr) && - !id_priv->cma_dev) + if (cma_any_addr((struct sockaddr *) &id_priv->id.route.addr.src_addr) + && !id_priv->cma_dev) cma_cancel_listens(id_priv); break; default: @@ -1026,7 +1024,7 @@ static struct rdma_id_private *cma_new_conn_id(struct rdma_cm_id *listen_id, rt->path_rec[1] = *ib_event->param.req_rcvd.alternate_path; ib_addr_set_dgid(&rt->addr.dev_addr, &rt->path_rec[0].dgid); - ret = rdma_translate_ip(&id->route.addr.src_addr, + ret = rdma_translate_ip((struct sockaddr *) &id->route.addr.src_addr, &id->route.addr.dev_addr); if (ret) goto destroy_id; @@ -1064,7 +1062,7 @@ static struct rdma_id_private *cma_new_udp_id(struct rdma_cm_id *listen_id, cma_save_net_info(&id->route.addr, &listen_id->route.addr, ip_ver, port, src, dst); - ret = rdma_translate_ip(&id->route.addr.src_addr, + ret = rdma_translate_ip((struct sockaddr *) &id->route.addr.src_addr, &id->route.addr.dev_addr); if (ret) goto err; @@ -1377,7 +1375,7 @@ static int cma_ib_listen(struct rdma_id_private *id_priv) if (IS_ERR(id_priv->cm_id.ib)) return PTR_ERR(id_priv->cm_id.ib); - addr = &id_priv->id.route.addr.src_addr; + addr = (struct sockaddr *) &id_priv->id.route.addr.src_addr; svc_id = cma_get_service_id(id_priv->id.ps, addr); if (cma_any_addr(addr)) ret = ib_cm_listen(id_priv->cm_id.ib, svc_id, 0, NULL); @@ -1443,7 +1441,7 @@ static void cma_listen_on_dev(struct rdma_id_private *id_priv, dev_id_priv->state = CMA_ADDR_BOUND; memcpy(&id->route.addr.src_addr, &id_priv->id.route.addr.src_addr, - ip_addr_size(&id_priv->id.route.addr.src_addr)); + ip_addr_size((struct sockaddr *) &id_priv->id.route.addr.src_addr)); cma_attach_to_dev(dev_id_priv, cma_dev); list_add_tail(&dev_id_priv->listen_list, &id_priv->listen_list); @@ -1563,13 +1561,14 @@ static int cma_query_ib_route(struct rdma_id_private *id_priv, int timeout_ms, path_rec.pkey = cpu_to_be16(ib_addr_get_pkey(&addr->dev_addr)); path_rec.numb_path = 1; path_rec.reversible = 1; - path_rec.service_id = cma_get_service_id(id_priv->id.ps, &addr->dst_addr); + path_rec.service_id = cma_get_service_id(id_priv->id.ps, + (struct sockaddr *) &addr->dst_addr); comp_mask = IB_SA_PATH_REC_DGID | IB_SA_PATH_REC_SGID | IB_SA_PATH_REC_PKEY | IB_SA_PATH_REC_NUMB_PATH | IB_SA_PATH_REC_REVERSIBLE | IB_SA_PATH_REC_SERVICE_ID; - if (addr->src_addr.sa_family == AF_INET) { + if (addr->src_addr.ss_family == AF_INET) { path_rec.qos_class = cpu_to_be16((u16) id_priv->tos); comp_mask |= IB_SA_PATH_REC_QOS_CLASS; } else { @@ -1848,7 +1847,7 @@ static int cma_resolve_loopback(struct rdma_id_private *id_priv) ib_addr_get_sgid(&id_priv->id.route.addr.dev_addr, &gid); ib_addr_set_dgid(&id_priv->id.route.addr.dev_addr, &gid); - if (cma_zero_addr(&id_priv->id.route.addr.src_addr)) { + if (cma_zero_addr((struct sockaddr *) &id_priv->id.route.addr.src_addr)) { src_in = (struct sockaddr_in *)&id_priv->id.route.addr.src_addr; dst_in = (struct sockaddr_in *)&id_priv->id.route.addr.dst_addr; src_in->sin_family = dst_in->sin_family; @@ -1897,7 +1896,7 @@ int rdma_resolve_addr(struct rdma_cm_id *id, struct sockaddr *src_addr, if (cma_any_addr(dst_addr)) ret = cma_resolve_loopback(id_priv); else - ret = rdma_resolve_ip(&addr_client, &id->route.addr.src_addr, + ret = rdma_resolve_ip(&addr_client, (struct sockaddr *) &id->route.addr.src_addr, dst_addr, &id->route.addr.dev_addr, timeout_ms, addr_handler, id_priv); if (ret) @@ -2021,11 +2020,11 @@ static int cma_use_port(struct idr *ps, struct rdma_id_private *id_priv) * We don't support binding to any address if anyone is bound to * a specific address on the same port. */ - if (cma_any_addr(&id_priv->id.route.addr.src_addr)) + if (cma_any_addr((struct sockaddr *) &id_priv->id.route.addr.src_addr)) return -EADDRNOTAVAIL; hlist_for_each_entry(cur_id, node, &bind_list->owners, node) { - if (cma_any_addr(&cur_id->id.route.addr.src_addr)) + if (cma_any_addr((struct sockaddr *) &cur_id->id.route.addr.src_addr)) return -EADDRNOTAVAIL; cur_sin = (struct sockaddr_in *) &cur_id->id.route.addr.src_addr; @@ -2060,7 +2059,7 @@ static int cma_get_port(struct rdma_id_private *id_priv) } mutex_lock(&lock); - if (cma_any_port(&id_priv->id.route.addr.src_addr)) + if (cma_any_port((struct sockaddr *) &id_priv->id.route.addr.src_addr)) ret = cma_alloc_any_port(ps, id_priv); else ret = cma_use_port(ps, id_priv); @@ -2232,7 +2231,7 @@ static int cma_resolve_ib_udp(struct rdma_id_private *id_priv, req.path = route->path_rec; req.service_id = cma_get_service_id(id_priv->id.ps, - &route->addr.dst_addr); + (struct sockaddr *) &route->addr.dst_addr); req.timeout_ms = 1 << (CMA_CM_RESPONSE_TIMEOUT - 8); req.max_cm_retries = CMA_MAX_CM_RETRIES; @@ -2283,7 +2282,7 @@ static int cma_connect_ib(struct rdma_id_private *id_priv, req.alternate_path = &route->path_rec[1]; req.service_id = cma_get_service_id(id_priv->id.ps, - &route->addr.dst_addr); + (struct sockaddr *) &route->addr.dst_addr); req.qp_num = id_priv->qp_num; req.qp_type = IB_QPT_RC; req.starting_psn = id_priv->seq_num; @@ -2667,7 +2666,7 @@ static int cma_join_ib_multicast(struct rdma_id_private *id_priv, if (ret) return ret; - cma_set_mgid(id_priv, &mc->addr, &rec.mgid); + cma_set_mgid(id_priv, (struct sockaddr *) &mc->addr, &rec.mgid); if (id_priv->id.ps == RDMA_PS_UDP) rec.qkey = cpu_to_be32(RDMA_UDP_QKEY); ib_addr_get_sgid(dev_addr, &rec.port_gid); diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c index 1adf2efd3cb..49c45feccd5 100644 --- a/drivers/infiniband/core/mad.c +++ b/drivers/infiniband/core/mad.c @@ -1697,9 +1697,8 @@ static inline int rcv_has_same_gid(struct ib_mad_agent_private *mad_agent_priv, u8 port_num = mad_agent_priv->agent.port_num; u8 lmc; - send_resp = ((struct ib_mad *)(wr->send_buf.mad))-> - mad_hdr.method & IB_MGMT_METHOD_RESP; - rcv_resp = rwc->recv_buf.mad->mad_hdr.method & IB_MGMT_METHOD_RESP; + send_resp = ib_response_mad((struct ib_mad *)wr->send_buf.mad); + rcv_resp = ib_response_mad(rwc->recv_buf.mad); if (send_resp == rcv_resp) /* both requests, or both responses. GIDs different */ diff --git a/drivers/infiniband/core/mad_rmpp.c b/drivers/infiniband/core/mad_rmpp.c index d0ef7d61c03..3af2b84cd83 100644 --- a/drivers/infiniband/core/mad_rmpp.c +++ b/drivers/infiniband/core/mad_rmpp.c @@ -133,7 +133,7 @@ static void ack_recv(struct mad_rmpp_recv *rmpp_recv, msg = ib_create_send_mad(&rmpp_recv->agent->agent, recv_wc->wc->src_qp, recv_wc->wc->pkey_index, 1, hdr_len, 0, GFP_KERNEL); - if (!msg) + if (IS_ERR(msg)) return; format_ack(msg, (struct ib_rmpp_mad *) recv_wc->recv_buf.mad, rmpp_recv); diff --git a/drivers/infiniband/core/ucm.c b/drivers/infiniband/core/ucm.c index 9494005d1c9..e603736682b 100644 --- a/drivers/infiniband/core/ucm.c +++ b/drivers/infiniband/core/ucm.c @@ -43,7 +43,6 @@ #include <linux/cdev.h> #include <linux/idr.h> #include <linux/mutex.h> -#include <linux/smp_lock.h> #include <asm/uaccess.h> @@ -1154,11 +1153,18 @@ static unsigned int ib_ucm_poll(struct file *filp, return mask; } +/* + * ib_ucm_open() does not need the BKL: + * + * - no global state is referred to; + * - there is no ioctl method to race against; + * - no further module initialization is required for open to work + * after the device is registered. + */ static int ib_ucm_open(struct inode *inode, struct file *filp) { struct ib_ucm_file *file; - cycle_kernel_lock(); file = kmalloc(sizeof(*file), GFP_KERNEL); if (!file) return -ENOMEM; diff --git a/drivers/infiniband/core/ucma.c b/drivers/infiniband/core/ucma.c index 195f97302fe..3ddacf39b7b 100644 --- a/drivers/infiniband/core/ucma.c +++ b/drivers/infiniband/core/ucma.c @@ -38,7 +38,6 @@ #include <linux/in.h> #include <linux/in6.h> #include <linux/miscdevice.h> -#include <linux/smp_lock.h> #include <rdma/rdma_user_cm.h> #include <rdma/ib_marshall.h> @@ -82,9 +81,7 @@ struct ucma_multicast { u64 uid; struct list_head list; - struct sockaddr addr; - u8 pad[sizeof(struct sockaddr_in6) - - sizeof(struct sockaddr)]; + struct sockaddr_storage addr; }; struct ucma_event { @@ -604,11 +601,11 @@ static ssize_t ucma_query_route(struct ucma_file *file, return PTR_ERR(ctx); memset(&resp, 0, sizeof resp); - addr = &ctx->cm_id->route.addr.src_addr; + addr = (struct sockaddr *) &ctx->cm_id->route.addr.src_addr; memcpy(&resp.src_addr, addr, addr->sa_family == AF_INET ? sizeof(struct sockaddr_in) : sizeof(struct sockaddr_in6)); - addr = &ctx->cm_id->route.addr.dst_addr; + addr = (struct sockaddr *) &ctx->cm_id->route.addr.dst_addr; memcpy(&resp.dst_addr, addr, addr->sa_family == AF_INET ? sizeof(struct sockaddr_in) : sizeof(struct sockaddr_in6)); @@ -914,7 +911,7 @@ static ssize_t ucma_join_multicast(struct ucma_file *file, mc->uid = cmd.uid; memcpy(&mc->addr, &cmd.addr, sizeof cmd.addr); - ret = rdma_join_multicast(ctx->cm_id, &mc->addr, mc); + ret = rdma_join_multicast(ctx->cm_id, (struct sockaddr *) &mc->addr, mc); if (ret) goto err2; @@ -930,7 +927,7 @@ static ssize_t ucma_join_multicast(struct ucma_file *file, return 0; err3: - rdma_leave_multicast(ctx->cm_id, &mc->addr); + rdma_leave_multicast(ctx->cm_id, (struct sockaddr *) &mc->addr); ucma_cleanup_mc_events(mc); err2: mutex_lock(&mut); @@ -976,7 +973,7 @@ static ssize_t ucma_leave_multicast(struct ucma_file *file, goto out; } - rdma_leave_multicast(mc->ctx->cm_id, &mc->addr); + rdma_leave_multicast(mc->ctx->cm_id, (struct sockaddr *) &mc->addr); mutex_lock(&mc->ctx->file->mut); ucma_cleanup_mc_events(mc); list_del(&mc->list); @@ -1149,6 +1146,14 @@ static unsigned int ucma_poll(struct file *filp, struct poll_table_struct *wait) return mask; } +/* + * ucma_open() does not need the BKL: + * + * - no global state is referred to; + * - there is no ioctl method to race against; + * - no further module initialization is required for open to work + * after the device is registered. + */ static int ucma_open(struct inode *inode, struct file *filp) { struct ucma_file *file; @@ -1157,7 +1162,6 @@ static int ucma_open(struct inode *inode, struct file *filp) if (!file) return -ENOMEM; - lock_kernel(); INIT_LIST_HEAD(&file->event_list); INIT_LIST_HEAD(&file->ctx_list); init_waitqueue_head(&file->poll_wait); @@ -1165,7 +1169,6 @@ static int ucma_open(struct inode *inode, struct file *filp) filp->private_data = file; file->filp = filp; - unlock_kernel(); return 0; } diff --git a/drivers/infiniband/hw/amso1100/c2_provider.c b/drivers/infiniband/hw/amso1100/c2_provider.c index 2acf9b62cf9..69580e282af 100644 --- a/drivers/infiniband/hw/amso1100/c2_provider.c +++ b/drivers/infiniband/hw/amso1100/c2_provider.c @@ -272,7 +272,6 @@ static struct ib_qp *c2_create_qp(struct ib_pd *pd, pr_debug("%s: Invalid QP type: %d\n", __func__, init_attr->qp_type); return ERR_PTR(-EINVAL); - break; } if (err) { diff --git a/drivers/infiniband/hw/cxgb3/cxio_hal.c b/drivers/infiniband/hw/cxgb3/cxio_hal.c index f6d5747153a..4dcf08b3fd8 100644 --- a/drivers/infiniband/hw/cxgb3/cxio_hal.c +++ b/drivers/infiniband/hw/cxgb3/cxio_hal.c @@ -725,9 +725,9 @@ static int __cxio_tpt_op(struct cxio_rdev *rdev_p, u32 reset_tpt_entry, V_TPT_STAG_TYPE(type) | V_TPT_PDID(pdid)); BUG_ON(page_size >= 28); tpt.flags_pagesize_qpid = cpu_to_be32(V_TPT_PERM(perm) | - F_TPT_MW_BIND_ENABLE | - V_TPT_ADDR_TYPE((zbva ? TPT_ZBTO : TPT_VATO)) | - V_TPT_PAGE_SIZE(page_size)); + ((perm & TPT_MW_BIND) ? F_TPT_MW_BIND_ENABLE : 0) | + V_TPT_ADDR_TYPE((zbva ? TPT_ZBTO : TPT_VATO)) | + V_TPT_PAGE_SIZE(page_size)); tpt.rsvd_pbl_addr = reset_tpt_entry ? 0 : cpu_to_be32(V_TPT_PBL_ADDR(PBL_OFF(rdev_p, pbl_addr)>>3)); tpt.len = cpu_to_be32(len); diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.c b/drivers/infiniband/hw/cxgb3/iwch_provider.c index b89640aa6e1..ecff9804358 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_provider.c +++ b/drivers/infiniband/hw/cxgb3/iwch_provider.c @@ -1155,13 +1155,11 @@ static int iwch_query_port(struct ib_device *ibdev, u8 port, struct ib_port_attr *props) { PDBG("%s ibdev %p\n", __func__, ibdev); + + memset(props, 0, sizeof(struct ib_port_attr)); props->max_mtu = IB_MTU_4096; - props->lid = 0; - props->lmc = 0; - props->sm_lid = 0; - props->sm_sl = 0; + props->active_mtu = IB_MTU_2048; props->state = IB_PORT_ACTIVE; - props->phys_state = 0; props->port_cap_flags = IB_PORT_CM_SUP | IB_PORT_SNMP_TUNNEL_SUP | @@ -1170,7 +1168,6 @@ static int iwch_query_port(struct ib_device *ibdev, IB_PORT_VENDOR_CLASS_SUP | IB_PORT_BOOT_MGMT_SUP; props->gid_tbl_len = 1; props->pkey_tbl_len = 1; - props->qkey_viol_cntr = 0; props->active_width = 2; props->active_speed = 2; props->max_msg_sz = -1; @@ -1187,28 +1184,6 @@ static ssize_t show_rev(struct device *dev, struct device_attribute *attr, return sprintf(buf, "%d\n", iwch_dev->rdev.t3cdev_p->type); } -static int fw_supports_fastreg(struct iwch_dev *iwch_dev) -{ - struct ethtool_drvinfo info; - struct net_device *lldev = iwch_dev->rdev.t3cdev_p->lldev; - char *cp, *next; - unsigned fw_maj, fw_min; - - rtnl_lock(); - lldev->ethtool_ops->get_drvinfo(lldev, &info); - rtnl_unlock(); - - next = info.fw_version+1; - cp = strsep(&next, "."); - sscanf(cp, "%i", &fw_maj); - cp = strsep(&next, "."); - sscanf(cp, "%i", &fw_min); - - PDBG("%s maj %u min %u\n", __func__, fw_maj, fw_min); - - return fw_maj > 6 || (fw_maj == 6 && fw_min > 0); -} - static ssize_t show_fw_ver(struct device *dev, struct device_attribute *attr, char *buf) { struct iwch_dev *iwch_dev = container_of(dev, struct iwch_dev, @@ -1325,12 +1300,12 @@ int iwch_register_device(struct iwch_dev *dev) memset(&dev->ibdev.node_guid, 0, sizeof(dev->ibdev.node_guid)); memcpy(&dev->ibdev.node_guid, dev->rdev.t3cdev_p->lldev->dev_addr, 6); dev->ibdev.owner = THIS_MODULE; - dev->device_cap_flags = IB_DEVICE_LOCAL_DMA_LKEY | IB_DEVICE_MEM_WINDOW; + dev->device_cap_flags = IB_DEVICE_LOCAL_DMA_LKEY | + IB_DEVICE_MEM_WINDOW | + IB_DEVICE_MEM_MGT_EXTENSIONS; /* cxgb3 supports STag 0. */ dev->ibdev.local_dma_lkey = 0; - if (fw_supports_fastreg(dev)) - dev->device_cap_flags |= IB_DEVICE_MEM_MGT_EXTENSIONS; dev->ibdev.uverbs_cmd_mask = (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) | diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.h b/drivers/infiniband/hw/cxgb3/iwch_provider.h index f5ceca05c43..a237d49bdcc 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_provider.h +++ b/drivers/infiniband/hw/cxgb3/iwch_provider.h @@ -293,9 +293,16 @@ static inline u32 iwch_ib_to_tpt_access(int acc) return (acc & IB_ACCESS_REMOTE_WRITE ? TPT_REMOTE_WRITE : 0) | (acc & IB_ACCESS_REMOTE_READ ? TPT_REMOTE_READ : 0) | (acc & IB_ACCESS_LOCAL_WRITE ? TPT_LOCAL_WRITE : 0) | + (acc & IB_ACCESS_MW_BIND ? TPT_MW_BIND : 0) | TPT_LOCAL_READ; } +static inline u32 iwch_ib_to_tpt_bind_access(int acc) +{ + return (acc & IB_ACCESS_REMOTE_WRITE ? TPT_REMOTE_WRITE : 0) | + (acc & IB_ACCESS_REMOTE_READ ? TPT_REMOTE_READ : 0); +} + enum iwch_mmid_state { IWCH_STAG_STATE_VALID, IWCH_STAG_STATE_INVALID diff --git a/drivers/infiniband/hw/cxgb3/iwch_qp.c b/drivers/infiniband/hw/cxgb3/iwch_qp.c index 9a3be3a9d5d..3e4585c2318 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_qp.c +++ b/drivers/infiniband/hw/cxgb3/iwch_qp.c @@ -565,7 +565,7 @@ int iwch_bind_mw(struct ib_qp *qp, wqe->bind.type = TPT_VATO; /* TBD: check perms */ - wqe->bind.perms = iwch_ib_to_tpt_access(mw_bind->mw_access_flags); + wqe->bind.perms = iwch_ib_to_tpt_bind_access(mw_bind->mw_access_flags); wqe->bind.mr_stag = cpu_to_be32(mw_bind->mr->lkey); wqe->bind.mw_stag = cpu_to_be32(mw->rkey); wqe->bind.mw_len = cpu_to_be32(mw_bind->length); @@ -879,20 +879,13 @@ static int rdma_init(struct iwch_dev *rhp, struct iwch_qp *qhp, (qhp->attr.mpa_attr.xmit_marker_enabled << 1) | (qhp->attr.mpa_attr.crc_enabled << 2); - /* - * XXX - The IWCM doesn't quite handle getting these - * attrs set before going into RTS. For now, just turn - * them on always... - */ -#if 0 - init_attr.qpcaps = qhp->attr.enableRdmaRead | - (qhp->attr.enableRdmaWrite << 1) | - (qhp->attr.enableBind << 2) | - (qhp->attr.enable_stag0_fastreg << 3) | - (qhp->attr.enable_stag0_fastreg << 4); -#else - init_attr.qpcaps = 0x1f; -#endif + init_attr.qpcaps = uP_RI_QP_RDMA_READ_ENABLE | + uP_RI_QP_RDMA_WRITE_ENABLE | + uP_RI_QP_BIND_ENABLE; + if (!qhp->ibqp.uobject) + init_attr.qpcaps |= uP_RI_QP_STAG0_ENABLE | + uP_RI_QP_FAST_REGISTER_ENABLE; + init_attr.tcp_emss = qhp->ep->emss; init_attr.ord = qhp->attr.max_ord; init_attr.ird = qhp->attr.max_ird; @@ -900,8 +893,6 @@ static int rdma_init(struct iwch_dev *rhp, struct iwch_qp *qhp, init_attr.qp_dma_size = (1UL << qhp->wq.size_log2); init_attr.rqe_count = iwch_rqes_posted(qhp); init_attr.flags = qhp->attr.mpa_attr.initiator ? MPA_INITIATOR : 0; - if (!qhp->ibqp.uobject) - init_attr.flags |= PRIV_QP; if (peer2peer) { init_attr.rtr_type = RTR_READ; if (init_attr.ord == 0 && qhp->attr.mpa_attr.initiator) diff --git a/drivers/infiniband/hw/ehca/ehca_classes.h b/drivers/infiniband/hw/ehca/ehca_classes.h index 0b0618edd64..5d7b7855afb 100644 --- a/drivers/infiniband/hw/ehca/ehca_classes.h +++ b/drivers/infiniband/hw/ehca/ehca_classes.h @@ -156,6 +156,21 @@ struct ehca_mod_qp_parm { #define EHCA_MOD_QP_PARM_MAX 4 +#define QMAP_IDX_MASK 0xFFFFULL + +/* struct for tracking if cqes have been reported to the application */ +struct ehca_qmap_entry { + u16 app_wr_id; + u16 reported; +}; + +struct ehca_queue_map { + struct ehca_qmap_entry *map; + unsigned int entries; + unsigned int tail; + unsigned int left_to_poll; +}; + struct ehca_qp { union { struct ib_qp ib_qp; @@ -165,7 +180,9 @@ struct ehca_qp { enum ehca_ext_qp_type ext_type; enum ib_qp_state state; struct ipz_queue ipz_squeue; + struct ehca_queue_map sq_map; struct ipz_queue ipz_rqueue; + struct ehca_queue_map rq_map; struct h_galpas galpas; u32 qkey; u32 real_qp_num; @@ -195,6 +212,8 @@ struct ehca_qp { atomic_t nr_events; /* events seen */ wait_queue_head_t wait_completion; int mig_armed; + struct list_head sq_err_node; + struct list_head rq_err_node; }; #define IS_SRQ(qp) (qp->ext_type == EQPT_SRQ) @@ -224,6 +243,8 @@ struct ehca_cq { /* mmap counter for resources mapped into user space */ u32 mm_count_queue; u32 mm_count_galpa; + struct list_head sqp_err_list; + struct list_head rqp_err_list; }; enum ehca_mr_flag { diff --git a/drivers/infiniband/hw/ehca/ehca_cq.c b/drivers/infiniband/hw/ehca/ehca_cq.c index 5540b276a33..33647a95eb9 100644 --- a/drivers/infiniband/hw/ehca/ehca_cq.c +++ b/drivers/infiniband/hw/ehca/ehca_cq.c @@ -276,6 +276,9 @@ struct ib_cq *ehca_create_cq(struct ib_device *device, int cqe, int comp_vector, for (i = 0; i < QP_HASHTAB_LEN; i++) INIT_HLIST_HEAD(&my_cq->qp_hashtab[i]); + INIT_LIST_HEAD(&my_cq->sqp_err_list); + INIT_LIST_HEAD(&my_cq->rqp_err_list); + if (context) { struct ipz_queue *ipz_queue = &my_cq->ipz_queue; struct ehca_create_cq_resp resp; diff --git a/drivers/infiniband/hw/ehca/ehca_iverbs.h b/drivers/infiniband/hw/ehca/ehca_iverbs.h index a8a2ea585d2..8f7f282ead6 100644 --- a/drivers/infiniband/hw/ehca/ehca_iverbs.h +++ b/drivers/infiniband/hw/ehca/ehca_iverbs.h @@ -197,6 +197,8 @@ void ehca_poll_eqs(unsigned long data); int ehca_calc_ipd(struct ehca_shca *shca, int port, enum ib_rate path_rate, u32 *ipd); +void ehca_add_to_err_list(struct ehca_qp *qp, int on_sq); + #ifdef CONFIG_PPC_64K_PAGES void *ehca_alloc_fw_ctrlblock(gfp_t flags); void ehca_free_fw_ctrlblock(void *ptr); diff --git a/drivers/infiniband/hw/ehca/ehca_qes.h b/drivers/infiniband/hw/ehca/ehca_qes.h index 818803057eb..5d28e3e98a2 100644 --- a/drivers/infiniband/hw/ehca/ehca_qes.h +++ b/drivers/infiniband/hw/ehca/ehca_qes.h @@ -213,6 +213,7 @@ struct ehca_wqe { #define WC_STATUS_ERROR_BIT 0x80000000 #define WC_STATUS_REMOTE_ERROR_FLAGS 0x0000F800 #define WC_STATUS_PURGE_BIT 0x10 +#define WC_SEND_RECEIVE_BIT 0x80 struct ehca_cqe { u64 work_request_id; diff --git a/drivers/infiniband/hw/ehca/ehca_qp.c b/drivers/infiniband/hw/ehca/ehca_qp.c index ea13efddf17..4dbe2870e01 100644 --- a/drivers/infiniband/hw/ehca/ehca_qp.c +++ b/drivers/infiniband/hw/ehca/ehca_qp.c @@ -396,6 +396,50 @@ static void ehca_determine_small_queue(struct ehca_alloc_queue_parms *queue, queue->is_small = (queue->page_size != 0); } +/* needs to be called with cq->spinlock held */ +void ehca_add_to_err_list(struct ehca_qp *qp, int on_sq) +{ + struct list_head *list, *node; + + /* TODO: support low latency QPs */ + if (qp->ext_type == EQPT_LLQP) + return; + + if (on_sq) { + list = &qp->send_cq->sqp_err_list; + node = &qp->sq_err_node; + } else { + list = &qp->recv_cq->rqp_err_list; + node = &qp->rq_err_node; + } + + if (list_empty(node)) + list_add_tail(node, list); + + return; +} + +static void del_from_err_list(struct ehca_cq *cq, struct list_head *node) +{ + unsigned long flags; + + spin_lock_irqsave(&cq->spinlock, flags); + + if (!list_empty(node)) + list_del_init(node); + + spin_unlock_irqrestore(&cq->spinlock, flags); +} + +static void reset_queue_map(struct ehca_queue_map *qmap) +{ + int i; + + qmap->tail = 0; + for (i = 0; i < qmap->entries; i++) + qmap->map[i].reported = 1; +} + /* * Create an ib_qp struct that is either a QP or an SRQ, depending on * the value of the is_srq parameter. If init_attr and srq_init_attr share @@ -407,7 +451,7 @@ static struct ehca_qp *internal_create_qp( struct ib_srq_init_attr *srq_init_attr, struct ib_udata *udata, int is_srq) { - struct ehca_qp *my_qp; + struct ehca_qp *my_qp, *my_srq = NULL; struct ehca_pd *my_pd = container_of(pd, struct ehca_pd, ib_pd); struct ehca_shca *shca = container_of(pd->device, struct ehca_shca, ib_device); @@ -456,8 +500,7 @@ static struct ehca_qp *internal_create_qp( /* handle SRQ base QPs */ if (init_attr->srq) { - struct ehca_qp *my_srq = - container_of(init_attr->srq, struct ehca_qp, ib_srq); + my_srq = container_of(init_attr->srq, struct ehca_qp, ib_srq); has_srq = 1; parms.ext_type = EQPT_SRQBASE; @@ -715,6 +758,19 @@ static struct ehca_qp *internal_create_qp( "and pages ret=%i", ret); goto create_qp_exit2; } + + my_qp->sq_map.entries = my_qp->ipz_squeue.queue_length / + my_qp->ipz_squeue.qe_size; + my_qp->sq_map.map = vmalloc(my_qp->sq_map.entries * + sizeof(struct ehca_qmap_entry)); + if (!my_qp->sq_map.map) { + ehca_err(pd->device, "Couldn't allocate squeue " + "map ret=%i", ret); + goto create_qp_exit3; + } + INIT_LIST_HEAD(&my_qp->sq_err_node); + /* to avoid the generation of bogus flush CQEs */ + reset_queue_map(&my_qp->sq_map); } if (HAS_RQ(my_qp)) { @@ -724,8 +780,27 @@ static struct ehca_qp *internal_create_qp( if (ret) { ehca_err(pd->device, "Couldn't initialize rqueue " "and pages ret=%i", ret); - goto create_qp_exit3; + goto create_qp_exit4; } + + my_qp->rq_map.entries = my_qp->ipz_rqueue.queue_length / + my_qp->ipz_rqueue.qe_size; + my_qp->rq_map.map = vmalloc(my_qp->rq_map.entries * + sizeof(struct ehca_qmap_entry)); + if (!my_qp->rq_map.map) { + ehca_err(pd->device, "Couldn't allocate squeue " + "map ret=%i", ret); + goto create_qp_exit5; + } + INIT_LIST_HEAD(&my_qp->rq_err_node); + /* to avoid the generation of bogus flush CQEs */ + reset_queue_map(&my_qp->rq_map); + } else if (init_attr->srq) { + /* this is a base QP, use the queue map of the SRQ */ + my_qp->rq_map = my_srq->rq_map; + INIT_LIST_HEAD(&my_qp->rq_err_node); + + my_qp->ipz_rqueue = my_srq->ipz_rqueue; } if (is_srq) { @@ -770,7 +845,7 @@ static struct ehca_qp *internal_create_qp( if (!my_qp->mod_qp_parm) { ehca_err(pd->device, "Could not alloc mod_qp_parm"); - goto create_qp_exit4; + goto create_qp_exit5; } } } @@ -780,7 +855,7 @@ static struct ehca_qp *internal_create_qp( h_ret = ehca_define_sqp(shca, my_qp, init_attr); if (h_ret != H_SUCCESS) { ret = ehca2ib_return_code(h_ret); - goto create_qp_exit5; + goto create_qp_exit6; } } @@ -789,7 +864,7 @@ static struct ehca_qp *internal_create_qp( if (ret) { ehca_err(pd->device, "Couldn't assign qp to send_cq ret=%i", ret); - goto create_qp_exit5; + goto create_qp_exit7; } } @@ -815,22 +890,30 @@ static struct ehca_qp *internal_create_qp( if (ib_copy_to_udata(udata, &resp, sizeof resp)) { ehca_err(pd->device, "Copy to udata failed"); ret = -EINVAL; - goto create_qp_exit6; + goto create_qp_exit8; } } return my_qp; -create_qp_exit6: +create_qp_exit8: ehca_cq_unassign_qp(my_qp->send_cq, my_qp->real_qp_num); -create_qp_exit5: +create_qp_exit7: kfree(my_qp->mod_qp_parm); -create_qp_exit4: +create_qp_exit6: + if (HAS_RQ(my_qp)) + vfree(my_qp->rq_map.map); + +create_qp_exit5: if (HAS_RQ(my_qp)) ipz_queue_dtor(my_pd, &my_qp->ipz_rqueue); +create_qp_exit4: + if (HAS_SQ(my_qp)) + vfree(my_qp->sq_map.map); + create_qp_exit3: if (HAS_SQ(my_qp)) ipz_queue_dtor(my_pd, &my_qp->ipz_squeue); @@ -1021,6 +1104,101 @@ static int prepare_sqe_rts(struct ehca_qp *my_qp, struct ehca_shca *shca, return 0; } +static int calc_left_cqes(u64 wqe_p, struct ipz_queue *ipz_queue, + struct ehca_queue_map *qmap) +{ + void *wqe_v; + u64 q_ofs; + u32 wqe_idx; + + /* convert real to abs address */ + wqe_p = wqe_p & (~(1UL << 63)); + + wqe_v = abs_to_virt(wqe_p); + + if (ipz_queue_abs_to_offset(ipz_queue, wqe_p, &q_ofs)) { + ehca_gen_err("Invalid offset for calculating left cqes " + "wqe_p=%#lx wqe_v=%p\n", wqe_p, wqe_v); + return -EFAULT; + } + + wqe_idx = q_ofs / ipz_queue->qe_size; + if (wqe_idx < qmap->tail) + qmap->left_to_poll = (qmap->entries - qmap->tail) + wqe_idx; + else + qmap->left_to_poll = wqe_idx - qmap->tail; + + return 0; +} + +static int check_for_left_cqes(struct ehca_qp *my_qp, struct ehca_shca *shca) +{ + u64 h_ret; + void *send_wqe_p, *recv_wqe_p; + int ret; + unsigned long flags; + int qp_num = my_qp->ib_qp.qp_num; + + /* this hcall is not supported on base QPs */ + if (my_qp->ext_type != EQPT_SRQBASE) { + /* get send and receive wqe pointer */ + h_ret = hipz_h_disable_and_get_wqe(shca->ipz_hca_handle, + my_qp->ipz_qp_handle, &my_qp->pf, + &send_wqe_p, &recv_wqe_p, 4); + if (h_ret != H_SUCCESS) { + ehca_err(&shca->ib_device, "disable_and_get_wqe() " + "failed ehca_qp=%p qp_num=%x h_ret=%li", + my_qp, qp_num, h_ret); + return ehca2ib_return_code(h_ret); + } + + /* + * acquire lock to ensure that nobody is polling the cq which + * could mean that the qmap->tail pointer is in an + * inconsistent state. + */ + spin_lock_irqsave(&my_qp->send_cq->spinlock, flags); + ret = calc_left_cqes((u64)send_wqe_p, &my_qp->ipz_squeue, + &my_qp->sq_map); + spin_unlock_irqrestore(&my_qp->send_cq->spinlock, flags); + if (ret) + return ret; + + + spin_lock_irqsave(&my_qp->recv_cq->spinlock, flags); + ret = calc_left_cqes((u64)recv_wqe_p, &my_qp->ipz_rqueue, + &my_qp->rq_map); + spin_unlock_irqrestore(&my_qp->recv_cq->spinlock, flags); + if (ret) + return ret; + } else { + spin_lock_irqsave(&my_qp->send_cq->spinlock, flags); + my_qp->sq_map.left_to_poll = 0; + spin_unlock_irqrestore(&my_qp->send_cq->spinlock, flags); + + spin_lock_irqsave(&my_qp->recv_cq->spinlock, flags); + my_qp->rq_map.left_to_poll = 0; + spin_unlock_irqrestore(&my_qp->recv_cq->spinlock, flags); + } + + /* this assures flush cqes being generated only for pending wqes */ + if ((my_qp->sq_map.left_to_poll == 0) && + (my_qp->rq_map.left_to_poll == 0)) { + spin_lock_irqsave(&my_qp->send_cq->spinlock, flags); + ehca_add_to_err_list(my_qp, 1); + spin_unlock_irqrestore(&my_qp->send_cq->spinlock, flags); + + if (HAS_RQ(my_qp)) { + spin_lock_irqsave(&my_qp->recv_cq->spinlock, flags); + ehca_add_to_err_list(my_qp, 0); + spin_unlock_irqrestore(&my_qp->recv_cq->spinlock, + flags); + } + } + + return 0; +} + /* * internal_modify_qp with circumvention to handle aqp0 properly * smi_reset2init indicates if this is an internal reset-to-init-call for @@ -1525,17 +1703,32 @@ static int internal_modify_qp(struct ib_qp *ibqp, goto modify_qp_exit2; } } + if ((qp_new_state == IB_QPS_ERR) && (qp_cur_state != IB_QPS_ERR)) { + ret = check_for_left_cqes(my_qp, shca); + if (ret) + goto modify_qp_exit2; + } if (statetrans == IB_QPST_ANY2RESET) { ipz_qeit_reset(&my_qp->ipz_rqueue); ipz_qeit_reset(&my_qp->ipz_squeue); + + if (qp_cur_state == IB_QPS_ERR) { + del_from_err_list(my_qp->send_cq, &my_qp->sq_err_node); + + if (HAS_RQ(my_qp)) + del_from_err_list(my_qp->recv_cq, + &my_qp->rq_err_node); + } + reset_queue_map(&my_qp->sq_map); + + if (HAS_RQ(my_qp)) + reset_queue_map(&my_qp->rq_map); } if (attr_mask & IB_QP_QKEY) my_qp->qkey = attr->qkey; - my_qp->state = qp_new_state; - modify_qp_exit2: if (squeue_locked) { /* this means: sqe -> rts */ spin_unlock_irqrestore(&my_qp->spinlock_s, flags); @@ -1551,6 +1744,8 @@ modify_qp_exit1: int ehca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask, struct ib_udata *udata) { + int ret = 0; + struct ehca_shca *shca = container_of(ibqp->device, struct ehca_shca, ib_device); struct ehca_qp *my_qp = container_of(ibqp, struct ehca_qp, ib_qp); @@ -1597,12 +1792,18 @@ int ehca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask, attr->qp_state, my_qp->init_attr.port_num, ibqp->qp_type); spin_unlock_irqrestore(&sport->mod_sqp_lock, flags); - return 0; + goto out; } spin_unlock_irqrestore(&sport->mod_sqp_lock, flags); } - return internal_modify_qp(ibqp, attr, attr_mask, 0); + ret = internal_modify_qp(ibqp, attr, attr_mask, 0); + +out: + if ((ret == 0) && (attr_mask & IB_QP_STATE)) + my_qp->state = attr->qp_state; + + return ret; } void ehca_recover_sqp(struct ib_qp *sqp) @@ -1938,6 +2139,16 @@ static int internal_destroy_qp(struct ib_device *dev, struct ehca_qp *my_qp, idr_remove(&ehca_qp_idr, my_qp->token); write_unlock_irqrestore(&ehca_qp_idr_lock, flags); + /* + * SRQs will never get into an error list and do not have a recv_cq, + * so we need to skip them here. + */ + if (HAS_RQ(my_qp) && !IS_SRQ(my_qp)) + del_from_err_list(my_qp->recv_cq, &my_qp->rq_err_node); + + if (HAS_SQ(my_qp)) + del_from_err_list(my_qp->send_cq, &my_qp->sq_err_node); + /* now wait until all pending events have completed */ wait_event(my_qp->wait_completion, !atomic_read(&my_qp->nr_events)); @@ -1963,7 +2174,7 @@ static int internal_destroy_qp(struct ib_device *dev, struct ehca_qp *my_qp, if (qp_type == IB_QPT_GSI) { struct ib_event event; ehca_info(dev, "device %s: port %x is inactive.", - shca->ib_device.name, port_num); + shca->ib_device.name, port_num); event.device = &shca->ib_device; event.event = IB_EVENT_PORT_ERR; event.element.port_num = port_num; @@ -1971,10 +2182,16 @@ static int internal_destroy_qp(struct ib_device *dev, struct ehca_qp *my_qp, ib_dispatch_event(&event); } - if (HAS_RQ(my_qp)) + if (HAS_RQ(my_qp)) { ipz_queue_dtor(my_pd, &my_qp->ipz_rqueue); - if (HAS_SQ(my_qp)) + + vfree(my_qp->rq_map.map); + } + if (HAS_SQ(my_qp)) { ipz_queue_dtor(my_pd, &my_qp->ipz_squeue); + + vfree(my_qp->sq_map.map); + } kmem_cache_free(qp_cache, my_qp); atomic_dec(&shca->num_qps); return 0; diff --git a/drivers/infiniband/hw/ehca/ehca_reqs.c b/drivers/infiniband/hw/ehca/ehca_reqs.c index dd9bc68f1c7..64928079eaf 100644 --- a/drivers/infiniband/hw/ehca/ehca_reqs.c +++ b/drivers/infiniband/hw/ehca/ehca_reqs.c @@ -42,7 +42,7 @@ */ -#include <asm-powerpc/system.h> +#include <asm/system.h> #include "ehca_classes.h" #include "ehca_tools.h" #include "ehca_qes.h" @@ -53,9 +53,25 @@ /* in RC traffic, insert an empty RDMA READ every this many packets */ #define ACK_CIRC_THRESHOLD 2000000 +static u64 replace_wr_id(u64 wr_id, u16 idx) +{ + u64 ret; + + ret = wr_id & ~QMAP_IDX_MASK; + ret |= idx & QMAP_IDX_MASK; + + return ret; +} + +static u16 get_app_wr_id(u64 wr_id) +{ + return wr_id & QMAP_IDX_MASK; +} + static inline int ehca_write_rwqe(struct ipz_queue *ipz_rqueue, struct ehca_wqe *wqe_p, - struct ib_recv_wr *recv_wr) + struct ib_recv_wr *recv_wr, + u32 rq_map_idx) { u8 cnt_ds; if (unlikely((recv_wr->num_sge < 0) || @@ -69,7 +85,7 @@ static inline int ehca_write_rwqe(struct ipz_queue *ipz_rqueue, /* clear wqe header until sglist */ memset(wqe_p, 0, offsetof(struct ehca_wqe, u.ud_av.sg_list)); - wqe_p->work_request_id = recv_wr->wr_id; + wqe_p->work_request_id = replace_wr_id(recv_wr->wr_id, rq_map_idx); wqe_p->nr_of_data_seg = recv_wr->num_sge; for (cnt_ds = 0; cnt_ds < recv_wr->num_sge; cnt_ds++) { @@ -139,12 +155,14 @@ static void trace_send_wr_ud(const struct ib_send_wr *send_wr) static inline int ehca_write_swqe(struct ehca_qp *qp, struct ehca_wqe *wqe_p, const struct ib_send_wr *send_wr, + u32 sq_map_idx, int hidden) { u32 idx; u64 dma_length; struct ehca_av *my_av; u32 remote_qkey = send_wr->wr.ud.remote_qkey; + struct ehca_qmap_entry *qmap_entry = &qp->sq_map.map[sq_map_idx]; if (unlikely((send_wr->num_sge < 0) || (send_wr->num_sge > qp->ipz_squeue.act_nr_of_sg))) { @@ -157,7 +175,10 @@ static inline int ehca_write_swqe(struct ehca_qp *qp, /* clear wqe header until sglist */ memset(wqe_p, 0, offsetof(struct ehca_wqe, u.ud_av.sg_list)); - wqe_p->work_request_id = send_wr->wr_id; + wqe_p->work_request_id = replace_wr_id(send_wr->wr_id, sq_map_idx); + + qmap_entry->app_wr_id = get_app_wr_id(send_wr->wr_id); + qmap_entry->reported = 0; switch (send_wr->opcode) { case IB_WR_SEND: @@ -381,6 +402,7 @@ static inline int post_one_send(struct ehca_qp *my_qp, { struct ehca_wqe *wqe_p; int ret; + u32 sq_map_idx; u64 start_offset = my_qp->ipz_squeue.current_q_offset; /* get pointer next to free WQE */ @@ -393,8 +415,15 @@ static inline int post_one_send(struct ehca_qp *my_qp, "qp_num=%x", my_qp->ib_qp.qp_num); return -ENOMEM; } + + /* + * Get the index of the WQE in the send queue. The same index is used + * for writing into the sq_map. + */ + sq_map_idx = start_offset / my_qp->ipz_squeue.qe_size; + /* write a SEND WQE into the QUEUE */ - ret = ehca_write_swqe(my_qp, wqe_p, cur_send_wr, hidden); + ret = ehca_write_swqe(my_qp, wqe_p, cur_send_wr, sq_map_idx, hidden); /* * if something failed, * reset the free entry pointer to the start value @@ -483,7 +512,9 @@ static int internal_post_recv(struct ehca_qp *my_qp, struct ehca_wqe *wqe_p; int wqe_cnt = 0; int ret = 0; + u32 rq_map_idx; unsigned long flags; + struct ehca_qmap_entry *qmap_entry; if (unlikely(!HAS_RQ(my_qp))) { ehca_err(dev, "QP has no RQ ehca_qp=%p qp_num=%x ext_type=%d", @@ -511,8 +542,15 @@ static int internal_post_recv(struct ehca_qp *my_qp, } goto post_recv_exit0; } + /* + * Get the index of the WQE in the recv queue. The same index + * is used for writing into the rq_map. + */ + rq_map_idx = start_offset / my_qp->ipz_rqueue.qe_size; + /* write a RECV WQE into the QUEUE */ - ret = ehca_write_rwqe(&my_qp->ipz_rqueue, wqe_p, cur_recv_wr); + ret = ehca_write_rwqe(&my_qp->ipz_rqueue, wqe_p, cur_recv_wr, + rq_map_idx); /* * if something failed, * reset the free entry pointer to the start value @@ -527,6 +565,11 @@ static int internal_post_recv(struct ehca_qp *my_qp, } goto post_recv_exit0; } + + qmap_entry = &my_qp->rq_map.map[rq_map_idx]; + qmap_entry->app_wr_id = get_app_wr_id(cur_recv_wr->wr_id); + qmap_entry->reported = 0; + wqe_cnt++; } /* eof for cur_recv_wr */ @@ -583,13 +626,15 @@ static const u8 ib_wc_opcode[255] = { /* internal function to poll one entry of cq */ static inline int ehca_poll_cq_one(struct ib_cq *cq, struct ib_wc *wc) { - int ret = 0; + int ret = 0, qmap_tail_idx; struct ehca_cq *my_cq = container_of(cq, struct ehca_cq, ib_cq); struct ehca_cqe *cqe; struct ehca_qp *my_qp; + struct ehca_qmap_entry *qmap_entry; + struct ehca_queue_map *qmap; int cqe_count = 0, is_error; -poll_cq_one_read_cqe: +repoll: cqe = (struct ehca_cqe *) ipz_qeit_get_inc_valid(&my_cq->ipz_queue); if (!cqe) { @@ -617,7 +662,7 @@ poll_cq_one_read_cqe: ehca_dmp(cqe, 64, "cq_num=%x qp_num=%x", my_cq->cq_number, cqe->local_qp_number); /* ignore this purged cqe */ - goto poll_cq_one_read_cqe; + goto repoll; } spin_lock_irqsave(&qp->spinlock_s, flags); purgeflag = qp->sqerr_purgeflag; @@ -636,7 +681,7 @@ poll_cq_one_read_cqe: * that caused sqe and turn off purge flag */ qp->sqerr_purgeflag = 0; - goto poll_cq_one_read_cqe; + goto repoll; } } @@ -654,8 +699,59 @@ poll_cq_one_read_cqe: my_cq, my_cq->cq_number); } - /* we got a completion! */ - wc->wr_id = cqe->work_request_id; + read_lock(&ehca_qp_idr_lock); + my_qp = idr_find(&ehca_qp_idr, cqe->qp_token); + read_unlock(&ehca_qp_idr_lock); + if (!my_qp) + goto repoll; + wc->qp = &my_qp->ib_qp; + + if (is_error) { + /* + * set left_to_poll to 0 because in error state, we will not + * get any additional CQEs + */ + ehca_add_to_err_list(my_qp, 1); + my_qp->sq_map.left_to_poll = 0; + + if (HAS_RQ(my_qp)) + ehca_add_to_err_list(my_qp, 0); + my_qp->rq_map.left_to_poll = 0; + } + + qmap_tail_idx = get_app_wr_id(cqe->work_request_id); + if (!(cqe->w_completion_flags & WC_SEND_RECEIVE_BIT)) + /* We got a send completion. */ + qmap = &my_qp->sq_map; + else + /* We got a receive completion. */ + qmap = &my_qp->rq_map; + + qmap_entry = &qmap->map[qmap_tail_idx]; + if (qmap_entry->reported) { + ehca_warn(cq->device, "Double cqe on qp_num=%#x", + my_qp->real_qp_num); + /* found a double cqe, discard it and read next one */ + goto repoll; + } + + wc->wr_id = replace_wr_id(cqe->work_request_id, qmap_entry->app_wr_id); + qmap_entry->reported = 1; + + /* this is a proper completion, we need to advance the tail pointer */ + if (++qmap->tail == qmap->entries) + qmap->tail = 0; + + /* if left_to_poll is decremented to 0, add the QP to the error list */ + if (qmap->left_to_poll > 0) { + qmap->left_to_poll--; + if ((my_qp->sq_map.left_to_poll == 0) && + (my_qp->rq_map.left_to_poll == 0)) { + ehca_add_to_err_list(my_qp, 1); + if (HAS_RQ(my_qp)) + ehca_add_to_err_list(my_qp, 0); + } + } /* eval ib_wc_opcode */ wc->opcode = ib_wc_opcode[cqe->optype]-1; @@ -667,7 +763,7 @@ poll_cq_one_read_cqe: ehca_dmp(cqe, 64, "ehca_cq=%p cq_num=%x", my_cq, my_cq->cq_number); /* update also queue adder to throw away this entry!!! */ - goto poll_cq_one_exit0; + goto repoll; } /* eval ib_wc_status */ @@ -678,11 +774,6 @@ poll_cq_one_read_cqe: } else wc->status = IB_WC_SUCCESS; - read_lock(&ehca_qp_idr_lock); - my_qp = idr_find(&ehca_qp_idr, cqe->qp_token); - wc->qp = &my_qp->ib_qp; - read_unlock(&ehca_qp_idr_lock); - wc->byte_len = cqe->nr_bytes_transferred; wc->pkey_index = cqe->pkey_index; wc->slid = cqe->rlid; @@ -699,13 +790,88 @@ poll_cq_one_exit0: return ret; } +static int generate_flush_cqes(struct ehca_qp *my_qp, struct ib_cq *cq, + struct ib_wc *wc, int num_entries, + struct ipz_queue *ipz_queue, int on_sq) +{ + int nr = 0; + struct ehca_wqe *wqe; + u64 offset; + struct ehca_queue_map *qmap; + struct ehca_qmap_entry *qmap_entry; + + if (on_sq) + qmap = &my_qp->sq_map; + else + qmap = &my_qp->rq_map; + + qmap_entry = &qmap->map[qmap->tail]; + + while ((nr < num_entries) && (qmap_entry->reported == 0)) { + /* generate flush CQE */ + memset(wc, 0, sizeof(*wc)); + + offset = qmap->tail * ipz_queue->qe_size; + wqe = (struct ehca_wqe *)ipz_qeit_calc(ipz_queue, offset); + if (!wqe) { + ehca_err(cq->device, "Invalid wqe offset=%#lx on " + "qp_num=%#x", offset, my_qp->real_qp_num); + return nr; + } + + wc->wr_id = replace_wr_id(wqe->work_request_id, + qmap_entry->app_wr_id); + + if (on_sq) { + switch (wqe->optype) { + case WQE_OPTYPE_SEND: + wc->opcode = IB_WC_SEND; + break; + case WQE_OPTYPE_RDMAWRITE: + wc->opcode = IB_WC_RDMA_WRITE; + break; + case WQE_OPTYPE_RDMAREAD: + wc->opcode = IB_WC_RDMA_READ; + break; + default: + ehca_err(cq->device, "Invalid optype=%x", + wqe->optype); + return nr; + } + } else + wc->opcode = IB_WC_RECV; + + if (wqe->wr_flag & WQE_WRFLAG_IMM_DATA_PRESENT) { + wc->ex.imm_data = wqe->immediate_data; + wc->wc_flags |= IB_WC_WITH_IMM; + } + + wc->status = IB_WC_WR_FLUSH_ERR; + + wc->qp = &my_qp->ib_qp; + + /* mark as reported and advance tail pointer */ + qmap_entry->reported = 1; + if (++qmap->tail == qmap->entries) + qmap->tail = 0; + qmap_entry = &qmap->map[qmap->tail]; + + wc++; nr++; + } + + return nr; + +} + int ehca_poll_cq(struct ib_cq *cq, int num_entries, struct ib_wc *wc) { struct ehca_cq *my_cq = container_of(cq, struct ehca_cq, ib_cq); int nr; + struct ehca_qp *err_qp; struct ib_wc *current_wc = wc; int ret = 0; unsigned long flags; + int entries_left = num_entries; if (num_entries < 1) { ehca_err(cq->device, "Invalid num_entries=%d ehca_cq=%p " @@ -715,15 +881,40 @@ int ehca_poll_cq(struct ib_cq *cq, int num_entries, struct ib_wc *wc) } spin_lock_irqsave(&my_cq->spinlock, flags); - for (nr = 0; nr < num_entries; nr++) { + + /* generate flush cqes for send queues */ + list_for_each_entry(err_qp, &my_cq->sqp_err_list, sq_err_node) { + nr = generate_flush_cqes(err_qp, cq, current_wc, entries_left, + &err_qp->ipz_squeue, 1); + entries_left -= nr; + current_wc += nr; + + if (entries_left == 0) + break; + } + + /* generate flush cqes for receive queues */ + list_for_each_entry(err_qp, &my_cq->rqp_err_list, rq_err_node) { + nr = generate_flush_cqes(err_qp, cq, current_wc, entries_left, + &err_qp->ipz_rqueue, 0); + entries_left -= nr; + current_wc += nr; + + if (entries_left == 0) + break; + } + + for (nr = 0; nr < entries_left; nr++) { ret = ehca_poll_cq_one(cq, current_wc); if (ret) break; current_wc++; } /* eof for nr */ + entries_left -= nr; + spin_unlock_irqrestore(&my_cq->spinlock, flags); if (ret == -EAGAIN || !ret) - ret = nr; + ret = num_entries - entries_left; poll_cq_exit0: return ret; diff --git a/drivers/infiniband/hw/ehca/ehca_tools.h b/drivers/infiniband/hw/ehca/ehca_tools.h index ec950bf8c47..21f7d06f14a 100644 --- a/drivers/infiniband/hw/ehca/ehca_tools.h +++ b/drivers/infiniband/hw/ehca/ehca_tools.h @@ -54,7 +54,6 @@ #include <linux/module.h> #include <linux/moduleparam.h> #include <linux/vmalloc.h> -#include <linux/version.h> #include <linux/notifier.h> #include <linux/cpu.h> #include <linux/device.h> diff --git a/drivers/infiniband/hw/ipath/ipath_driver.c b/drivers/infiniband/hw/ipath/ipath_driver.c index daad09a4591..ad0aab60b05 100644 --- a/drivers/infiniband/hw/ipath/ipath_driver.c +++ b/drivers/infiniband/hw/ipath/ipath_driver.c @@ -1259,7 +1259,7 @@ reloop: */ ipath_cdbg(ERRPKT, "Error Pkt, but no eflags! egrbuf" " %x, len %x hdrq+%x rhf: %Lx\n", - etail, tlen, l, + etail, tlen, l, (unsigned long long) le64_to_cpu(*(__le64 *) rhf_addr)); if (ipath_debug & __IPATH_ERRPKTDBG) { u32 j, *d, dw = rsize-2; @@ -1457,7 +1457,8 @@ static void ipath_reset_availshadow(struct ipath_devdata *dd) 0xaaaaaaaaaaaaaaaaULL); /* All BUSY bits in qword */ if (oldval != dd->ipath_pioavailshadow[i]) ipath_dbg("shadow[%d] was %Lx, now %lx\n", - i, oldval, dd->ipath_pioavailshadow[i]); + i, (unsigned long long) oldval, + dd->ipath_pioavailshadow[i]); } spin_unlock_irqrestore(&ipath_pioavail_lock, flags); } diff --git a/drivers/infiniband/hw/ipath/ipath_fs.c b/drivers/infiniband/hw/ipath/ipath_fs.c index 23faba9d21e..8bb5170b4e4 100644 --- a/drivers/infiniband/hw/ipath/ipath_fs.c +++ b/drivers/infiniband/hw/ipath/ipath_fs.c @@ -31,7 +31,6 @@ * SOFTWARE. */ -#include <linux/version.h> #include <linux/module.h> #include <linux/fs.h> #include <linux/mount.h> diff --git a/drivers/infiniband/hw/ipath/ipath_iba7220.c b/drivers/infiniband/hw/ipath/ipath_iba7220.c index fb70712ac85..9839e20119b 100644 --- a/drivers/infiniband/hw/ipath/ipath_iba7220.c +++ b/drivers/infiniband/hw/ipath/ipath_iba7220.c @@ -528,7 +528,7 @@ static const struct ipath_cregs ipath_7220_cregs = { static char int_type[16] = "auto"; module_param_string(interrupt_type, int_type, sizeof(int_type), 0444); -MODULE_PARM_DESC(int_type, " interrupt_type=auto|force_msi|force_intx\n"); +MODULE_PARM_DESC(int_type, " interrupt_type=auto|force_msi|force_intx"); /* packet rate matching delay; chip has support */ static u8 rate_to_delay[2][2] = { @@ -1032,7 +1032,7 @@ static int ipath_7220_bringup_serdes(struct ipath_devdata *dd) ipath_cdbg(VERBOSE, "done: xgxs=%llx from %llx\n", (unsigned long long) ipath_read_kreg64(dd, dd->ipath_kregs->kr_xgxsconfig), - prev_val); + (unsigned long long) prev_val); guid = be64_to_cpu(dd->ipath_guid); @@ -1042,7 +1042,8 @@ static int ipath_7220_bringup_serdes(struct ipath_devdata *dd) ipath_dbg("No GUID for heartbeat, faking %llx\n", (unsigned long long)guid); } else - ipath_cdbg(VERBOSE, "Wrote %llX to HRTBT_GUID\n", guid); + ipath_cdbg(VERBOSE, "Wrote %llX to HRTBT_GUID\n", + (unsigned long long) guid); ipath_write_kreg(dd, dd->ipath_kregs->kr_hrtbt_guid, guid); return ret; } @@ -1719,7 +1720,7 @@ static void ipath_7220_put_tid(struct ipath_devdata *dd, u64 __iomem *tidptr, "not 2KB aligned!\n", pa); return; } - if (pa >= (1UL << IBA7220_TID_SZ_SHIFT)) { + if (chippa >= (1UL << IBA7220_TID_SZ_SHIFT)) { ipath_dev_err(dd, "BUG: Physical page address 0x%lx " "larger than supported\n", pa); @@ -2505,7 +2506,7 @@ done: if (dd->ipath_flags & IPATH_IB_AUTONEG_INPROG) { ipath_dbg("Did not get to DDR INIT (%x) after %Lu msecs\n", ipath_ib_state(dd, dd->ipath_lastibcstat), - jiffies_to_msecs(jiffies)-startms); + (unsigned long long) jiffies_to_msecs(jiffies)-startms); dd->ipath_flags &= ~IPATH_IB_AUTONEG_INPROG; if (dd->ipath_autoneg_tries == IPATH_AUTONEG_TRIES) { dd->ipath_flags |= IPATH_IB_AUTONEG_FAILED; diff --git a/drivers/infiniband/hw/ipath/ipath_intr.c b/drivers/infiniband/hw/ipath/ipath_intr.c index 26900b3b7a4..6c21b4b5ec7 100644 --- a/drivers/infiniband/hw/ipath/ipath_intr.c +++ b/drivers/infiniband/hw/ipath/ipath_intr.c @@ -356,9 +356,10 @@ static void handle_e_ibstatuschanged(struct ipath_devdata *dd, dd->ipath_cregs->cr_iblinkerrrecovcnt); if (linkrecov != dd->ipath_lastlinkrecov) { ipath_dbg("IB linkrecov up %Lx (%s %s) recov %Lu\n", - ibcs, ib_linkstate(dd, ibcs), + (unsigned long long) ibcs, + ib_linkstate(dd, ibcs), ipath_ibcstatus_str[ltstate], - linkrecov); + (unsigned long long) linkrecov); /* and no more until active again */ dd->ipath_lastlinkrecov = 0; ipath_set_linkstate(dd, IPATH_IB_LINKDOWN); @@ -1118,9 +1119,11 @@ irqreturn_t ipath_intr(int irq, void *data) if (unlikely(istat & ~dd->ipath_i_bitsextant)) ipath_dev_err(dd, "interrupt with unknown interrupts %Lx set\n", + (unsigned long long) istat & ~dd->ipath_i_bitsextant); else if (istat & ~INFINIPATH_I_ERROR) /* errors do own printing */ - ipath_cdbg(VERBOSE, "intr stat=0x%Lx\n", istat); + ipath_cdbg(VERBOSE, "intr stat=0x%Lx\n", + (unsigned long long) istat); if (istat & INFINIPATH_I_ERROR) { ipath_stats.sps_errints++; @@ -1128,7 +1131,8 @@ irqreturn_t ipath_intr(int irq, void *data) dd->ipath_kregs->kr_errorstatus); if (!estat) dev_info(&dd->pcidev->dev, "error interrupt (%Lx), " - "but no error bits set!\n", istat); + "but no error bits set!\n", + (unsigned long long) istat); else if (estat == -1LL) /* * should we try clearing all, or hope next read diff --git a/drivers/infiniband/hw/ipath/ipath_rc.c b/drivers/infiniband/hw/ipath/ipath_rc.c index 97710522624..7b93cda1a4b 100644 --- a/drivers/infiniband/hw/ipath/ipath_rc.c +++ b/drivers/infiniband/hw/ipath/ipath_rc.c @@ -675,7 +675,8 @@ static void send_rc_ack(struct ipath_qp *qp) hdr.lrh[0] = cpu_to_be16(lrh0); hdr.lrh[1] = cpu_to_be16(qp->remote_ah_attr.dlid); hdr.lrh[2] = cpu_to_be16(hwords + SIZE_OF_CRC); - hdr.lrh[3] = cpu_to_be16(dd->ipath_lid); + hdr.lrh[3] = cpu_to_be16(dd->ipath_lid | + qp->remote_ah_attr.src_path_bits); ohdr->bth[0] = cpu_to_be32(bth0); ohdr->bth[1] = cpu_to_be32(qp->remote_qpn); ohdr->bth[2] = cpu_to_be32(qp->r_ack_psn & IPATH_PSN_MASK); diff --git a/drivers/infiniband/hw/ipath/ipath_ruc.c b/drivers/infiniband/hw/ipath/ipath_ruc.c index af051f75766..fc0f6d9e603 100644 --- a/drivers/infiniband/hw/ipath/ipath_ruc.c +++ b/drivers/infiniband/hw/ipath/ipath_ruc.c @@ -618,7 +618,8 @@ void ipath_make_ruc_header(struct ipath_ibdev *dev, struct ipath_qp *qp, qp->s_hdr.lrh[0] = cpu_to_be16(lrh0); qp->s_hdr.lrh[1] = cpu_to_be16(qp->remote_ah_attr.dlid); qp->s_hdr.lrh[2] = cpu_to_be16(qp->s_hdrwords + nwords + SIZE_OF_CRC); - qp->s_hdr.lrh[3] = cpu_to_be16(dev->dd->ipath_lid); + qp->s_hdr.lrh[3] = cpu_to_be16(dev->dd->ipath_lid | + qp->remote_ah_attr.src_path_bits); bth0 |= ipath_get_pkey(dev->dd, qp->s_pkey_index); bth0 |= extra_bytes << 20; ohdr->bth[0] = cpu_to_be32(bth0 | (1 << 22)); diff --git a/drivers/infiniband/hw/ipath/ipath_sdma.c b/drivers/infiniband/hw/ipath/ipath_sdma.c index eaba03273e4..284c9bca517 100644 --- a/drivers/infiniband/hw/ipath/ipath_sdma.c +++ b/drivers/infiniband/hw/ipath/ipath_sdma.c @@ -698,7 +698,7 @@ retry: addr = dma_map_single(&dd->pcidev->dev, tx->txreq.map_addr, tx->map_len, DMA_TO_DEVICE); - if (dma_mapping_error(addr)) { + if (dma_mapping_error(&dd->pcidev->dev, addr)) { ret = -EIO; goto unlock; } diff --git a/drivers/infiniband/hw/ipath/ipath_ud.c b/drivers/infiniband/hw/ipath/ipath_ud.c index 36aa242c487..729446f56aa 100644 --- a/drivers/infiniband/hw/ipath/ipath_ud.c +++ b/drivers/infiniband/hw/ipath/ipath_ud.c @@ -267,6 +267,7 @@ int ipath_make_ud_req(struct ipath_qp *qp) u16 lrh0; u16 lid; int ret = 0; + int next_cur; spin_lock_irqsave(&qp->s_lock, flags); @@ -290,8 +291,9 @@ int ipath_make_ud_req(struct ipath_qp *qp) goto bail; wqe = get_swqe_ptr(qp, qp->s_cur); - if (++qp->s_cur >= qp->s_size) - qp->s_cur = 0; + next_cur = qp->s_cur + 1; + if (next_cur >= qp->s_size) + next_cur = 0; /* Construct the header. */ ah_attr = &to_iah(wqe->wr.wr.ud.ah)->attr; @@ -315,6 +317,7 @@ int ipath_make_ud_req(struct ipath_qp *qp) qp->s_flags |= IPATH_S_WAIT_DMA; goto bail; } + qp->s_cur = next_cur; spin_unlock_irqrestore(&qp->s_lock, flags); ipath_ud_loopback(qp, wqe); spin_lock_irqsave(&qp->s_lock, flags); @@ -323,6 +326,7 @@ int ipath_make_ud_req(struct ipath_qp *qp) } } + qp->s_cur = next_cur; extra_bytes = -wqe->length & 3; nwords = (wqe->length + extra_bytes) >> 2; diff --git a/drivers/infiniband/hw/ipath/ipath_user_sdma.c b/drivers/infiniband/hw/ipath/ipath_user_sdma.c index 86e016916cd..82d9a0b5ca2 100644 --- a/drivers/infiniband/hw/ipath/ipath_user_sdma.c +++ b/drivers/infiniband/hw/ipath/ipath_user_sdma.c @@ -206,7 +206,7 @@ static int ipath_user_sdma_coalesce(const struct ipath_devdata *dd, dma_addr = dma_map_page(&dd->pcidev->dev, page, 0, len, DMA_TO_DEVICE); - if (dma_mapping_error(dma_addr)) { + if (dma_mapping_error(&dd->pcidev->dev, dma_addr)) { ret = -ENOMEM; goto free_unmap; } @@ -301,7 +301,7 @@ static int ipath_user_sdma_pin_pages(const struct ipath_devdata *dd, pages[j], 0, flen, DMA_TO_DEVICE); unsigned long fofs = addr & ~PAGE_MASK; - if (dma_mapping_error(dma_addr)) { + if (dma_mapping_error(&dd->pcidev->dev, dma_addr)) { ret = -ENOMEM; goto done; } @@ -508,7 +508,7 @@ static int ipath_user_sdma_queue_pkts(const struct ipath_devdata *dd, if (page) { dma_addr = dma_map_page(&dd->pcidev->dev, page, 0, len, DMA_TO_DEVICE); - if (dma_mapping_error(dma_addr)) { + if (dma_mapping_error(&dd->pcidev->dev, dma_addr)) { ret = -ENOMEM; goto free_pbc; } diff --git a/drivers/infiniband/hw/ipath/ipath_verbs.c b/drivers/infiniband/hw/ipath/ipath_verbs.c index 55c71882882..eabc4247860 100644 --- a/drivers/infiniband/hw/ipath/ipath_verbs.c +++ b/drivers/infiniband/hw/ipath/ipath_verbs.c @@ -340,9 +340,16 @@ static int ipath_post_one_send(struct ipath_qp *qp, struct ib_send_wr *wr) int acc; int ret; unsigned long flags; + struct ipath_devdata *dd = to_idev(qp->ibqp.device)->dd; spin_lock_irqsave(&qp->s_lock, flags); + if (qp->ibqp.qp_type != IB_QPT_SMI && + !(dd->ipath_flags & IPATH_LINKACTIVE)) { + ret = -ENETDOWN; + goto bail; + } + /* Check that state is OK to post send. */ if (unlikely(!(ib_ipath_state_ops[qp->state] & IPATH_POST_SEND_OK))) goto bail_inval; @@ -1021,7 +1028,7 @@ static void sdma_complete(void *cookie, int status) struct ipath_verbs_txreq *tx = cookie; struct ipath_qp *qp = tx->qp; struct ipath_ibdev *dev = to_idev(qp->ibqp.device); - unsigned int flags; + unsigned long flags; enum ib_wc_status ibs = status == IPATH_SDMA_TXREQ_S_OK ? IB_WC_SUCCESS : IB_WC_WR_FLUSH_ERR; @@ -1051,7 +1058,7 @@ static void sdma_complete(void *cookie, int status) static void decrement_dma_busy(struct ipath_qp *qp) { - unsigned int flags; + unsigned long flags; if (atomic_dec_and_test(&qp->s_dma_busy)) { spin_lock_irqsave(&qp->s_lock, flags); @@ -1221,7 +1228,7 @@ static int ipath_verbs_send_pio(struct ipath_qp *qp, unsigned flush_wc; u32 control; int ret; - unsigned int flags; + unsigned long flags; piobuf = ipath_getpiobuf(dd, plen, NULL); if (unlikely(piobuf == NULL)) { diff --git a/drivers/infiniband/hw/mlx4/cq.c b/drivers/infiniband/hw/mlx4/cq.c index 0b191a4842c..d0866a3636e 100644 --- a/drivers/infiniband/hw/mlx4/cq.c +++ b/drivers/infiniband/hw/mlx4/cq.c @@ -1,5 +1,6 @@ /* * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2007, 2008 Mellanox Technologies. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -514,17 +515,17 @@ static void mlx4_ib_handle_error_cqe(struct mlx4_err_cqe *cqe, wc->vendor_err = cqe->vendor_err_syndrome; } -static int mlx4_ib_ipoib_csum_ok(__be32 status, __be16 checksum) +static int mlx4_ib_ipoib_csum_ok(__be16 status, __be16 checksum) { - return ((status & cpu_to_be32(MLX4_CQE_IPOIB_STATUS_IPV4 | - MLX4_CQE_IPOIB_STATUS_IPV4F | - MLX4_CQE_IPOIB_STATUS_IPV4OPT | - MLX4_CQE_IPOIB_STATUS_IPV6 | - MLX4_CQE_IPOIB_STATUS_IPOK)) == - cpu_to_be32(MLX4_CQE_IPOIB_STATUS_IPV4 | - MLX4_CQE_IPOIB_STATUS_IPOK)) && - (status & cpu_to_be32(MLX4_CQE_IPOIB_STATUS_UDP | - MLX4_CQE_IPOIB_STATUS_TCP)) && + return ((status & cpu_to_be16(MLX4_CQE_STATUS_IPV4 | + MLX4_CQE_STATUS_IPV4F | + MLX4_CQE_STATUS_IPV4OPT | + MLX4_CQE_STATUS_IPV6 | + MLX4_CQE_STATUS_IPOK)) == + cpu_to_be16(MLX4_CQE_STATUS_IPV4 | + MLX4_CQE_STATUS_IPOK)) && + (status & cpu_to_be16(MLX4_CQE_STATUS_UDP | + MLX4_CQE_STATUS_TCP)) && checksum == cpu_to_be16(0xffff); } @@ -581,17 +582,17 @@ repoll: } if (!*cur_qp || - (be32_to_cpu(cqe->my_qpn) & 0xffffff) != (*cur_qp)->mqp.qpn) { + (be32_to_cpu(cqe->vlan_my_qpn) & MLX4_CQE_QPN_MASK) != (*cur_qp)->mqp.qpn) { /* * We do not have to take the QP table lock here, * because CQs will be locked while QPs are removed * from the table. */ mqp = __mlx4_qp_lookup(to_mdev(cq->ibcq.device)->dev, - be32_to_cpu(cqe->my_qpn)); + be32_to_cpu(cqe->vlan_my_qpn)); if (unlikely(!mqp)) { printk(KERN_WARNING "CQ %06x with entry for unknown QPN %06x\n", - cq->mcq.cqn, be32_to_cpu(cqe->my_qpn) & 0xffffff); + cq->mcq.cqn, be32_to_cpu(cqe->vlan_my_qpn) & MLX4_CQE_QPN_MASK); return -EINVAL; } @@ -691,14 +692,13 @@ repoll: } wc->slid = be16_to_cpu(cqe->rlid); - wc->sl = cqe->sl >> 4; + wc->sl = be16_to_cpu(cqe->sl_vid >> 12); g_mlpath_rqpn = be32_to_cpu(cqe->g_mlpath_rqpn); wc->src_qp = g_mlpath_rqpn & 0xffffff; wc->dlid_path_bits = (g_mlpath_rqpn >> 24) & 0x7f; wc->wc_flags |= g_mlpath_rqpn & 0x80000000 ? IB_WC_GRH : 0; wc->pkey_index = be32_to_cpu(cqe->immed_rss_invalid) & 0x7f; - wc->csum_ok = mlx4_ib_ipoib_csum_ok(cqe->ipoib_status, - cqe->checksum); + wc->csum_ok = mlx4_ib_ipoib_csum_ok(cqe->status, cqe->checksum); } return 0; @@ -766,7 +766,7 @@ void __mlx4_ib_cq_clean(struct mlx4_ib_cq *cq, u32 qpn, struct mlx4_ib_srq *srq) */ while ((int) --prod_index - (int) cq->mcq.cons_index >= 0) { cqe = get_cqe(cq, prod_index & cq->ibcq.cqe); - if ((be32_to_cpu(cqe->my_qpn) & 0xffffff) == qpn) { + if ((be32_to_cpu(cqe->vlan_my_qpn) & MLX4_CQE_QPN_MASK) == qpn) { if (srq && !(cqe->owner_sr_opcode & MLX4_CQE_IS_SEND_MASK)) mlx4_ib_free_srq_wqe(srq, be16_to_cpu(cqe->wqe_index)); ++nfreed; diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index 38d6907ab52..a3c2851c054 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -1,5 +1,6 @@ /* * Copyright (c) 2006, 2007 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2007, 2008 Mellanox Technologies. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h index d26a91317d4..6e2b0dc21b6 100644 --- a/drivers/infiniband/hw/mlx4/mlx4_ib.h +++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h @@ -1,5 +1,6 @@ /* * Copyright (c) 2006, 2007 Cisco Systems. All rights reserved. + * Copyright (c) 2007, 2008 Mellanox Technologies. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU diff --git a/drivers/infiniband/hw/mlx4/mr.c b/drivers/infiniband/hw/mlx4/mr.c index db2086faa4e..87f5c5a87b9 100644 --- a/drivers/infiniband/hw/mlx4/mr.c +++ b/drivers/infiniband/hw/mlx4/mr.c @@ -1,5 +1,6 @@ /* * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2007, 2008 Mellanox Technologies. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -203,6 +204,8 @@ struct ib_mr *mlx4_ib_alloc_fast_reg_mr(struct ib_pd *pd, if (err) goto err_mr; + mr->ibmr.rkey = mr->ibmr.lkey = mr->mmr.key; + return &mr->ibmr; err_mr: diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c index 02a99bc4442..baa01deb243 100644 --- a/drivers/infiniband/hw/mlx4/qp.c +++ b/drivers/infiniband/hw/mlx4/qp.c @@ -1,5 +1,6 @@ /* * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2007, 2008 Mellanox Technologies. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -901,7 +902,7 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp, context->mtu_msgmax = (IB_MTU_4096 << 5) | ilog2(dev->dev->caps.max_gso_sz); else - context->mtu_msgmax = (IB_MTU_4096 << 5) | 11; + context->mtu_msgmax = (IB_MTU_4096 << 5) | 12; } else if (attr_mask & IB_QP_PATH_MTU) { if (attr->path_mtu < IB_MTU_256 || attr->path_mtu > IB_MTU_4096) { printk(KERN_ERR "path MTU (%u) is invalid\n", @@ -1057,6 +1058,9 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp, else sqd_event = 0; + if (!ibqp->uobject && cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) + context->rlkey |= (1 << 4); + /* * Before passing a kernel QP to the HW, make sure that the * ownership bits of the send queue are set and the SQ @@ -1341,6 +1345,12 @@ static __be32 convert_access(int acc) static void set_fmr_seg(struct mlx4_wqe_fmr_seg *fseg, struct ib_send_wr *wr) { struct mlx4_ib_fast_reg_page_list *mfrpl = to_mfrpl(wr->wr.fast_reg.page_list); + int i; + + for (i = 0; i < wr->wr.fast_reg.page_list_len; ++i) + wr->wr.fast_reg.page_list->page_list[i] = + cpu_to_be64(wr->wr.fast_reg.page_list->page_list[i] | + MLX4_MTT_FLAG_PRESENT); fseg->flags = convert_access(wr->wr.fast_reg.access_flags); fseg->mem_key = cpu_to_be32(wr->wr.fast_reg.rkey); diff --git a/drivers/infiniband/hw/mlx4/srq.c b/drivers/infiniband/hw/mlx4/srq.c index 12d6bc6f800..d42565258fb 100644 --- a/drivers/infiniband/hw/mlx4/srq.c +++ b/drivers/infiniband/hw/mlx4/srq.c @@ -1,5 +1,6 @@ /* * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2007, 2008 Mellanox Technologies. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU diff --git a/drivers/infiniband/hw/mlx4/user.h b/drivers/infiniband/hw/mlx4/user.h index e2d11be4525..13beedeeef9 100644 --- a/drivers/infiniband/hw/mlx4/user.h +++ b/drivers/infiniband/hw/mlx4/user.h @@ -1,5 +1,6 @@ /* * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2007, 2008 Mellanox Technologies. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU diff --git a/drivers/infiniband/hw/mthca/mthca_catas.c b/drivers/infiniband/hw/mthca/mthca_catas.c index cc440f90000..65ad359fdf1 100644 --- a/drivers/infiniband/hw/mthca/mthca_catas.c +++ b/drivers/infiniband/hw/mthca/mthca_catas.c @@ -149,18 +149,10 @@ void mthca_start_catas_poll(struct mthca_dev *dev) ((pci_resource_len(dev->pdev, 0) - 1) & dev->catas_err.addr); - if (!request_mem_region(addr, dev->catas_err.size * 4, - DRV_NAME)) { - mthca_warn(dev, "couldn't request catastrophic error region " - "at 0x%lx/0x%x\n", addr, dev->catas_err.size * 4); - return; - } - dev->catas_err.map = ioremap(addr, dev->catas_err.size * 4); if (!dev->catas_err.map) { mthca_warn(dev, "couldn't map catastrophic error region " "at 0x%lx/0x%x\n", addr, dev->catas_err.size * 4); - release_mem_region(addr, dev->catas_err.size * 4); return; } @@ -175,13 +167,8 @@ void mthca_stop_catas_poll(struct mthca_dev *dev) { del_timer_sync(&dev->catas_err.timer); - if (dev->catas_err.map) { + if (dev->catas_err.map) iounmap(dev->catas_err.map); - release_mem_region(pci_resource_start(dev->pdev, 0) + - ((pci_resource_len(dev->pdev, 0) - 1) & - dev->catas_err.addr), - dev->catas_err.size * 4); - } spin_lock_irq(&catas_lock); list_del(&dev->catas_err.list); diff --git a/drivers/infiniband/hw/mthca/mthca_eq.c b/drivers/infiniband/hw/mthca/mthca_eq.c index 4e36aa7cb3d..28f0e0c40d7 100644 --- a/drivers/infiniband/hw/mthca/mthca_eq.c +++ b/drivers/infiniband/hw/mthca/mthca_eq.c @@ -652,27 +652,13 @@ static int mthca_map_reg(struct mthca_dev *dev, { unsigned long base = pci_resource_start(dev->pdev, 0); - if (!request_mem_region(base + offset, size, DRV_NAME)) - return -EBUSY; - *map = ioremap(base + offset, size); - if (!*map) { - release_mem_region(base + offset, size); + if (!*map) return -ENOMEM; - } return 0; } -static void mthca_unmap_reg(struct mthca_dev *dev, unsigned long offset, - unsigned long size, void __iomem *map) -{ - unsigned long base = pci_resource_start(dev->pdev, 0); - - release_mem_region(base + offset, size); - iounmap(map); -} - static int mthca_map_eq_regs(struct mthca_dev *dev) { if (mthca_is_memfree(dev)) { @@ -699,9 +685,7 @@ static int mthca_map_eq_regs(struct mthca_dev *dev) dev->fw.arbel.eq_arm_base) + 4, 4, &dev->eq_regs.arbel.eq_arm)) { mthca_err(dev, "Couldn't map EQ arm register, aborting.\n"); - mthca_unmap_reg(dev, (pci_resource_len(dev->pdev, 0) - 1) & - dev->fw.arbel.clr_int_base, MTHCA_CLR_INT_SIZE, - dev->clr_base); + iounmap(dev->clr_base); return -ENOMEM; } @@ -710,12 +694,8 @@ static int mthca_map_eq_regs(struct mthca_dev *dev) MTHCA_EQ_SET_CI_SIZE, &dev->eq_regs.arbel.eq_set_ci_base)) { mthca_err(dev, "Couldn't map EQ CI register, aborting.\n"); - mthca_unmap_reg(dev, ((pci_resource_len(dev->pdev, 0) - 1) & - dev->fw.arbel.eq_arm_base) + 4, 4, - dev->eq_regs.arbel.eq_arm); - mthca_unmap_reg(dev, (pci_resource_len(dev->pdev, 0) - 1) & - dev->fw.arbel.clr_int_base, MTHCA_CLR_INT_SIZE, - dev->clr_base); + iounmap(dev->eq_regs.arbel.eq_arm); + iounmap(dev->clr_base); return -ENOMEM; } } else { @@ -731,8 +711,7 @@ static int mthca_map_eq_regs(struct mthca_dev *dev) &dev->eq_regs.tavor.ecr_base)) { mthca_err(dev, "Couldn't map ecr register, " "aborting.\n"); - mthca_unmap_reg(dev, MTHCA_CLR_INT_BASE, MTHCA_CLR_INT_SIZE, - dev->clr_base); + iounmap(dev->clr_base); return -ENOMEM; } } @@ -744,22 +723,12 @@ static int mthca_map_eq_regs(struct mthca_dev *dev) static void mthca_unmap_eq_regs(struct mthca_dev *dev) { if (mthca_is_memfree(dev)) { - mthca_unmap_reg(dev, (pci_resource_len(dev->pdev, 0) - 1) & - dev->fw.arbel.eq_set_ci_base, - MTHCA_EQ_SET_CI_SIZE, - dev->eq_regs.arbel.eq_set_ci_base); - mthca_unmap_reg(dev, ((pci_resource_len(dev->pdev, 0) - 1) & - dev->fw.arbel.eq_arm_base) + 4, 4, - dev->eq_regs.arbel.eq_arm); - mthca_unmap_reg(dev, (pci_resource_len(dev->pdev, 0) - 1) & - dev->fw.arbel.clr_int_base, MTHCA_CLR_INT_SIZE, - dev->clr_base); + iounmap(dev->eq_regs.arbel.eq_set_ci_base); + iounmap(dev->eq_regs.arbel.eq_arm); + iounmap(dev->clr_base); } else { - mthca_unmap_reg(dev, MTHCA_ECR_BASE, - MTHCA_ECR_SIZE + MTHCA_ECR_CLR_SIZE, - dev->eq_regs.tavor.ecr_base); - mthca_unmap_reg(dev, MTHCA_CLR_INT_BASE, MTHCA_CLR_INT_SIZE, - dev->clr_base); + iounmap(dev->eq_regs.tavor.ecr_base); + iounmap(dev->clr_base); } } @@ -780,7 +749,7 @@ int mthca_map_eq_icm(struct mthca_dev *dev, u64 icm_virt) return -ENOMEM; dev->eq_table.icm_dma = pci_map_page(dev->pdev, dev->eq_table.icm_page, 0, PAGE_SIZE, PCI_DMA_BIDIRECTIONAL); - if (pci_dma_mapping_error(dev->eq_table.icm_dma)) { + if (pci_dma_mapping_error(dev->pdev, dev->eq_table.icm_dma)) { __free_page(dev->eq_table.icm_page); return -ENOMEM; } diff --git a/drivers/infiniband/hw/mthca/mthca_main.c b/drivers/infiniband/hw/mthca/mthca_main.c index fb9f91b60f3..52f60f4eea0 100644 --- a/drivers/infiniband/hw/mthca/mthca_main.c +++ b/drivers/infiniband/hw/mthca/mthca_main.c @@ -921,58 +921,6 @@ err_uar_table_free: return err; } -static int mthca_request_regions(struct pci_dev *pdev, int ddr_hidden) -{ - int err; - - /* - * We can't just use pci_request_regions() because the MSI-X - * table is right in the middle of the first BAR. If we did - * pci_request_region and grab all of the first BAR, then - * setting up MSI-X would fail, since the PCI core wants to do - * request_mem_region on the MSI-X vector table. - * - * So just request what we need right now, and request any - * other regions we need when setting up EQs. - */ - if (!request_mem_region(pci_resource_start(pdev, 0) + MTHCA_HCR_BASE, - MTHCA_HCR_SIZE, DRV_NAME)) - return -EBUSY; - - err = pci_request_region(pdev, 2, DRV_NAME); - if (err) - goto err_bar2_failed; - - if (!ddr_hidden) { - err = pci_request_region(pdev, 4, DRV_NAME); - if (err) - goto err_bar4_failed; - } - - return 0; - -err_bar4_failed: - pci_release_region(pdev, 2); - -err_bar2_failed: - release_mem_region(pci_resource_start(pdev, 0) + MTHCA_HCR_BASE, - MTHCA_HCR_SIZE); - - return err; -} - -static void mthca_release_regions(struct pci_dev *pdev, - int ddr_hidden) -{ - if (!ddr_hidden) - pci_release_region(pdev, 4); - - pci_release_region(pdev, 2); - - release_mem_region(pci_resource_start(pdev, 0) + MTHCA_HCR_BASE, - MTHCA_HCR_SIZE); -} - static int mthca_enable_msi_x(struct mthca_dev *mdev) { struct msix_entry entries[3]; @@ -1059,7 +1007,7 @@ static int __mthca_init_one(struct pci_dev *pdev, int hca_type) if (!(pci_resource_flags(pdev, 4) & IORESOURCE_MEM)) ddr_hidden = 1; - err = mthca_request_regions(pdev, ddr_hidden); + err = pci_request_regions(pdev, DRV_NAME); if (err) { dev_err(&pdev->dev, "Cannot obtain PCI resources, " "aborting.\n"); @@ -1196,7 +1144,7 @@ err_free_dev: ib_dealloc_device(&mdev->ib_dev); err_free_res: - mthca_release_regions(pdev, ddr_hidden); + pci_release_regions(pdev); err_disable_pdev: pci_disable_device(pdev); @@ -1240,8 +1188,7 @@ static void __mthca_remove_one(struct pci_dev *pdev) pci_disable_msix(pdev); ib_dealloc_device(&mdev->ib_dev); - mthca_release_regions(pdev, mdev->mthca_flags & - MTHCA_FLAG_DDR_HIDDEN); + pci_release_regions(pdev); pci_disable_device(pdev); pci_set_drvdata(pdev, NULL); } diff --git a/drivers/infiniband/hw/nes/nes.c b/drivers/infiniband/hw/nes/nes.c index d2884e77809..a2b04d62b1a 100644 --- a/drivers/infiniband/hw/nes/nes.c +++ b/drivers/infiniband/hw/nes/nes.c @@ -70,27 +70,31 @@ int interrupt_mod_interval = 0; /* Interoperability */ int mpa_version = 1; -module_param(mpa_version, int, 0); +module_param(mpa_version, int, 0644); MODULE_PARM_DESC(mpa_version, "MPA version to be used int MPA Req/Resp (0 or 1)"); /* Interoperability */ int disable_mpa_crc = 0; -module_param(disable_mpa_crc, int, 0); +module_param(disable_mpa_crc, int, 0644); MODULE_PARM_DESC(disable_mpa_crc, "Disable checking of MPA CRC"); unsigned int send_first = 0; -module_param(send_first, int, 0); +module_param(send_first, int, 0644); MODULE_PARM_DESC(send_first, "Send RDMA Message First on Active Connection"); unsigned int nes_drv_opt = 0; -module_param(nes_drv_opt, int, 0); +module_param(nes_drv_opt, int, 0644); MODULE_PARM_DESC(nes_drv_opt, "Driver option parameters"); unsigned int nes_debug_level = 0; module_param_named(debug_level, nes_debug_level, uint, 0644); MODULE_PARM_DESC(debug_level, "Enable debug output level"); +unsigned int wqm_quanta = 0x10000; +module_param(wqm_quanta, int, 0644); +MODULE_PARM_DESC(wqm_quanta, "WQM quanta"); + LIST_HEAD(nes_adapter_list); static LIST_HEAD(nes_dev_list); @@ -276,6 +280,7 @@ static void nes_cqp_rem_ref_callback(struct nes_device *nesdev, struct nes_cqp_r } nes_free_resource(nesadapter, nesadapter->allocated_qps, nesqp->hwqp.qp_id); + nesadapter->qp_table[nesqp->hwqp.qp_id-NES_FIRST_QPN] = NULL; kfree(nesqp->allocated_buffer); } @@ -289,7 +294,6 @@ void nes_rem_ref(struct ib_qp *ibqp) struct nes_qp *nesqp; struct nes_vnic *nesvnic = to_nesvnic(ibqp->device); struct nes_device *nesdev = nesvnic->nesdev; - struct nes_adapter *nesadapter = nesdev->nesadapter; struct nes_hw_cqp_wqe *cqp_wqe; struct nes_cqp_request *cqp_request; u32 opcode; @@ -303,8 +307,6 @@ void nes_rem_ref(struct ib_qp *ibqp) } if (atomic_dec_and_test(&nesqp->refcount)) { - nesadapter->qp_table[nesqp->hwqp.qp_id-NES_FIRST_QPN] = NULL; - /* Destroy the QP */ cqp_request = nes_get_cqp_request(nesdev); if (cqp_request == NULL) { @@ -559,12 +561,32 @@ static int __devinit nes_probe(struct pci_dev *pcidev, const struct pci_device_i goto bail5; } nesdev->nesadapter->et_rx_coalesce_usecs_irq = interrupt_mod_interval; + nesdev->nesadapter->wqm_quanta = wqm_quanta; /* nesdev->base_doorbell_index = nesdev->nesadapter->pd_config_base[PCI_FUNC(nesdev->pcidev->devfn)]; */ nesdev->base_doorbell_index = 1; nesdev->doorbell_start = nesdev->nesadapter->doorbell_start; - nesdev->mac_index = PCI_FUNC(nesdev->pcidev->devfn) % nesdev->nesadapter->port_count; + if (nesdev->nesadapter->phy_type[0] == NES_PHY_TYPE_PUMA_1G) { + switch (PCI_FUNC(nesdev->pcidev->devfn) % + nesdev->nesadapter->port_count) { + case 1: + nesdev->mac_index = 2; + break; + case 2: + nesdev->mac_index = 1; + break; + case 3: + nesdev->mac_index = 3; + break; + case 0: + default: + nesdev->mac_index = 0; + } + } else { + nesdev->mac_index = PCI_FUNC(nesdev->pcidev->devfn) % + nesdev->nesadapter->port_count; + } tasklet_init(&nesdev->dpc_tasklet, nes_dpc, (unsigned long)nesdev); @@ -583,7 +605,7 @@ static int __devinit nes_probe(struct pci_dev *pcidev, const struct pci_device_i nesdev->int_req = (0x101 << PCI_FUNC(nesdev->pcidev->devfn)) | (1 << (PCI_FUNC(nesdev->pcidev->devfn)+16)); if (PCI_FUNC(nesdev->pcidev->devfn) < 4) { - nesdev->int_req |= (1 << (PCI_FUNC(nesdev->pcidev->devfn)+24)); + nesdev->int_req |= (1 << (PCI_FUNC(nesdev->mac_index)+24)); } /* TODO: This really should be the first driver to load, not function 0 */ @@ -774,14 +796,14 @@ static ssize_t nes_show_adapter(struct device_driver *ddp, char *buf) list_for_each_entry(nesdev, &nes_dev_list, list) { if (i == ee_flsh_adapter) { - devfn = nesdev->nesadapter->devfn; - bus_number = nesdev->nesadapter->bus_number; + devfn = nesdev->pcidev->devfn; + bus_number = nesdev->pcidev->bus->number; break; } i++; } - return snprintf(buf, PAGE_SIZE, "%x:%x", bus_number, devfn); + return snprintf(buf, PAGE_SIZE, "%x:%x\n", bus_number, devfn); } static ssize_t nes_store_adapter(struct device_driver *ddp, @@ -1052,6 +1074,55 @@ static ssize_t nes_store_idx_data(struct device_driver *ddp, return strnlen(buf, count); } + +/** + * nes_show_wqm_quanta + */ +static ssize_t nes_show_wqm_quanta(struct device_driver *ddp, char *buf) +{ + u32 wqm_quanta_value = 0xdead; + u32 i = 0; + struct nes_device *nesdev; + + list_for_each_entry(nesdev, &nes_dev_list, list) { + if (i == ee_flsh_adapter) { + wqm_quanta_value = nesdev->nesadapter->wqm_quanta; + break; + } + i++; + } + + return snprintf(buf, PAGE_SIZE, "0x%X\n", wqm_quanta); +} + + +/** + * nes_store_wqm_quanta + */ +static ssize_t nes_store_wqm_quanta(struct device_driver *ddp, + const char *buf, size_t count) +{ + unsigned long wqm_quanta_value; + u32 wqm_config1; + u32 i = 0; + struct nes_device *nesdev; + + strict_strtoul(buf, 0, &wqm_quanta_value); + list_for_each_entry(nesdev, &nes_dev_list, list) { + if (i == ee_flsh_adapter) { + nesdev->nesadapter->wqm_quanta = wqm_quanta_value; + wqm_config1 = nes_read_indexed(nesdev, + NES_IDX_WQM_CONFIG1); + nes_write_indexed(nesdev, NES_IDX_WQM_CONFIG1, + ((wqm_quanta_value << 1) | + (wqm_config1 & 0x00000001))); + break; + } + i++; + } + return strnlen(buf, count); +} + static DRIVER_ATTR(adapter, S_IRUSR | S_IWUSR, nes_show_adapter, nes_store_adapter); static DRIVER_ATTR(eeprom_cmd, S_IRUSR | S_IWUSR, @@ -1070,6 +1141,8 @@ static DRIVER_ATTR(idx_addr, S_IRUSR | S_IWUSR, nes_show_idx_addr, nes_store_idx_addr); static DRIVER_ATTR(idx_data, S_IRUSR | S_IWUSR, nes_show_idx_data, nes_store_idx_data); +static DRIVER_ATTR(wqm_quanta, S_IRUSR | S_IWUSR, + nes_show_wqm_quanta, nes_store_wqm_quanta); static int nes_create_driver_sysfs(struct pci_driver *drv) { @@ -1083,6 +1156,7 @@ static int nes_create_driver_sysfs(struct pci_driver *drv) error |= driver_create_file(&drv->driver, &driver_attr_nonidx_data); error |= driver_create_file(&drv->driver, &driver_attr_idx_addr); error |= driver_create_file(&drv->driver, &driver_attr_idx_data); + error |= driver_create_file(&drv->driver, &driver_attr_wqm_quanta); return error; } @@ -1097,6 +1171,7 @@ static void nes_remove_driver_sysfs(struct pci_driver *drv) driver_remove_file(&drv->driver, &driver_attr_nonidx_data); driver_remove_file(&drv->driver, &driver_attr_idx_addr); driver_remove_file(&drv->driver, &driver_attr_idx_data); + driver_remove_file(&drv->driver, &driver_attr_wqm_quanta); } /** diff --git a/drivers/infiniband/hw/nes/nes.h b/drivers/infiniband/hw/nes/nes.h index 39bd897b40c..1595dc7bba9 100644 --- a/drivers/infiniband/hw/nes/nes.h +++ b/drivers/infiniband/hw/nes/nes.h @@ -43,7 +43,6 @@ #include <linux/dma-mapping.h> #include <linux/workqueue.h> #include <linux/slab.h> -#include <linux/version.h> #include <asm/io.h> #include <linux/crc32c.h> @@ -170,7 +169,7 @@ extern int disable_mpa_crc; extern unsigned int send_first; extern unsigned int nes_drv_opt; extern unsigned int nes_debug_level; - +extern unsigned int wqm_quanta; extern struct list_head nes_adapter_list; extern atomic_t cm_connects; diff --git a/drivers/infiniband/hw/nes/nes_cm.c b/drivers/infiniband/hw/nes/nes_cm.c index 6aa531d5276..2caf9da81ad 100644 --- a/drivers/infiniband/hw/nes/nes_cm.c +++ b/drivers/infiniband/hw/nes/nes_cm.c @@ -52,7 +52,7 @@ #include <linux/random.h> #include <linux/list.h> #include <linux/threads.h> - +#include <net/arp.h> #include <net/neighbour.h> #include <net/route.h> #include <net/ip_fib.h> @@ -74,36 +74,59 @@ atomic_t cm_nodes_destroyed; atomic_t cm_accel_dropped_pkts; atomic_t cm_resets_recvd; -static inline int mini_cm_accelerated(struct nes_cm_core *, struct nes_cm_node *); +static inline int mini_cm_accelerated(struct nes_cm_core *, + struct nes_cm_node *); static struct nes_cm_listener *mini_cm_listen(struct nes_cm_core *, - struct nes_vnic *, struct nes_cm_info *); -static int add_ref_cm_node(struct nes_cm_node *); -static int rem_ref_cm_node(struct nes_cm_core *, struct nes_cm_node *); + struct nes_vnic *, struct nes_cm_info *); static int mini_cm_del_listen(struct nes_cm_core *, struct nes_cm_listener *); -static struct sk_buff *form_cm_frame(struct sk_buff *, struct nes_cm_node *, - void *, u32, void *, u32, u8); -static struct sk_buff *get_free_pkt(struct nes_cm_node *cm_node); - static struct nes_cm_node *mini_cm_connect(struct nes_cm_core *, - struct nes_vnic *, - struct ietf_mpa_frame *, - struct nes_cm_info *); + struct nes_vnic *, u16, void *, struct nes_cm_info *); +static int mini_cm_close(struct nes_cm_core *, struct nes_cm_node *); static int mini_cm_accept(struct nes_cm_core *, struct ietf_mpa_frame *, - struct nes_cm_node *); + struct nes_cm_node *); static int mini_cm_reject(struct nes_cm_core *, struct ietf_mpa_frame *, - struct nes_cm_node *); -static int mini_cm_close(struct nes_cm_core *, struct nes_cm_node *); -static int mini_cm_recv_pkt(struct nes_cm_core *, struct nes_vnic *, - struct sk_buff *); + struct nes_cm_node *); +static void mini_cm_recv_pkt(struct nes_cm_core *, struct nes_vnic *, + struct sk_buff *); static int mini_cm_dealloc_core(struct nes_cm_core *); static int mini_cm_get(struct nes_cm_core *); static int mini_cm_set(struct nes_cm_core *, u32, u32); + +static struct sk_buff *form_cm_frame(struct sk_buff *, struct nes_cm_node *, + void *, u32, void *, u32, u8); +static struct sk_buff *get_free_pkt(struct nes_cm_node *cm_node); +static int add_ref_cm_node(struct nes_cm_node *); +static int rem_ref_cm_node(struct nes_cm_core *, struct nes_cm_node *); + static int nes_cm_disconn_true(struct nes_qp *); static int nes_cm_post_event(struct nes_cm_event *event); static int nes_disconnect(struct nes_qp *nesqp, int abrupt); static void nes_disconnect_worker(struct work_struct *work); -static int send_ack(struct nes_cm_node *cm_node); + +static int send_mpa_request(struct nes_cm_node *, struct sk_buff *); +static int send_syn(struct nes_cm_node *, u32, struct sk_buff *); +static int send_reset(struct nes_cm_node *, struct sk_buff *); +static int send_ack(struct nes_cm_node *cm_node, struct sk_buff *skb); static int send_fin(struct nes_cm_node *cm_node, struct sk_buff *skb); +static void process_packet(struct nes_cm_node *, struct sk_buff *, + struct nes_cm_core *); + +static void active_open_err(struct nes_cm_node *, struct sk_buff *, int); +static void passive_open_err(struct nes_cm_node *, struct sk_buff *, int); +static void cleanup_retrans_entry(struct nes_cm_node *); +static void handle_rcv_mpa(struct nes_cm_node *, struct sk_buff *, + enum nes_cm_event_type); +static void free_retrans_entry(struct nes_cm_node *cm_node); +static int handle_tcp_options(struct nes_cm_node *cm_node, struct tcphdr *tcph, + struct sk_buff *skb, int optionsize, int passive); + +/* CM event handler functions */ +static void cm_event_connected(struct nes_cm_event *); +static void cm_event_connect_error(struct nes_cm_event *); +static void cm_event_reset(struct nes_cm_event *); +static void cm_event_mpa_req(struct nes_cm_event *); + +static void print_core(struct nes_cm_core *core); /* External CM API Interface */ /* instance of function pointers for client API */ @@ -158,11 +181,11 @@ static struct nes_cm_event *create_event(struct nes_cm_node *cm_node, event->cm_info.loc_port = cm_node->loc_port; event->cm_info.cm_id = cm_node->cm_id; - nes_debug(NES_DBG_CM, "Created event=%p, type=%u, dst_addr=%08x[%x]," - " src_addr=%08x[%x]\n", - event, type, - event->cm_info.loc_addr, event->cm_info.loc_port, - event->cm_info.rem_addr, event->cm_info.rem_port); + nes_debug(NES_DBG_CM, "cm_node=%p Created event=%p, type=%u, " + "dst_addr=%08x[%x], src_addr=%08x[%x]\n", + cm_node, event, type, event->cm_info.loc_addr, + event->cm_info.loc_port, event->cm_info.rem_addr, + event->cm_info.rem_port); nes_cm_post_event(event); return event; @@ -172,14 +195,11 @@ static struct nes_cm_event *create_event(struct nes_cm_node *cm_node, /** * send_mpa_request */ -static int send_mpa_request(struct nes_cm_node *cm_node) +static int send_mpa_request(struct nes_cm_node *cm_node, struct sk_buff *skb) { - struct sk_buff *skb; int ret; - - skb = get_free_pkt(cm_node); if (!skb) { - nes_debug(NES_DBG_CM, "Failed to get a Free pkt\n"); + nes_debug(NES_DBG_CM, "skb set to NULL\n"); return -1; } @@ -188,9 +208,8 @@ static int send_mpa_request(struct nes_cm_node *cm_node) cm_node->mpa_frame_size, SET_ACK); ret = schedule_nes_timer(cm_node, skb, NES_TIMER_TYPE_SEND, 1, 0); - if (ret < 0) { + if (ret < 0) return ret; - } return 0; } @@ -229,46 +248,12 @@ static int parse_mpa(struct nes_cm_node *cm_node, u8 *buffer, u32 len) /** - * handle_exception_pkt - process an exception packet. - * We have been in a TSA state, and we have now received SW - * TCP/IP traffic should be a FIN request or IP pkt with options - */ -static int handle_exception_pkt(struct nes_cm_node *cm_node, struct sk_buff *skb) -{ - int ret = 0; - struct tcphdr *tcph = tcp_hdr(skb); - - /* first check to see if this a FIN pkt */ - if (tcph->fin) { - /* we need to ACK the FIN request */ - send_ack(cm_node); - - /* check which side we are (client/server) and set next state accordingly */ - if (cm_node->tcp_cntxt.client) - cm_node->state = NES_CM_STATE_CLOSING; - else { - /* we are the server side */ - cm_node->state = NES_CM_STATE_CLOSE_WAIT; - /* since this is a self contained CM we don't wait for */ - /* an APP to close us, just send final FIN immediately */ - ret = send_fin(cm_node, NULL); - cm_node->state = NES_CM_STATE_LAST_ACK; - } - } else { - ret = -EINVAL; - } - - return ret; -} - - -/** * form_cm_frame - get a free packet and build empty frame Use * node info to build. */ -static struct sk_buff *form_cm_frame(struct sk_buff *skb, struct nes_cm_node *cm_node, - void *options, u32 optionsize, void *data, - u32 datasize, u8 flags) +static struct sk_buff *form_cm_frame(struct sk_buff *skb, + struct nes_cm_node *cm_node, void *options, u32 optionsize, + void *data, u32 datasize, u8 flags) { struct tcphdr *tcph; struct iphdr *iph; @@ -332,10 +317,12 @@ static struct sk_buff *form_cm_frame(struct sk_buff *skb, struct nes_cm_node *cm cm_node->tcp_cntxt.loc_seq_num++; tcph->syn = 1; } else - cm_node->tcp_cntxt.loc_seq_num += datasize; /* data (no headers) */ + cm_node->tcp_cntxt.loc_seq_num += datasize; - if (flags & SET_FIN) + if (flags & SET_FIN) { + cm_node->tcp_cntxt.loc_seq_num++; tcph->fin = 1; + } if (flags & SET_RST) tcph->rst = 1; @@ -389,7 +376,7 @@ int schedule_nes_timer(struct nes_cm_node *cm_node, struct sk_buff *skb, int close_when_complete) { unsigned long flags; - struct nes_cm_core *cm_core; + struct nes_cm_core *cm_core = cm_node->cm_core; struct nes_timer_entry *new_send; int ret = 0; u32 was_timer_set; @@ -411,7 +398,7 @@ int schedule_nes_timer(struct nes_cm_node *cm_node, struct sk_buff *skb, new_send->close_when_complete = close_when_complete; if (type == NES_TIMER_TYPE_CLOSE) { - new_send->timetosend += (HZ/2); /* TODO: decide on the correct value here */ + new_send->timetosend += (HZ/10); spin_lock_irqsave(&cm_node->recv_list_lock, flags); list_add_tail(&new_send->list, &cm_node->recv_list); spin_unlock_irqrestore(&cm_node->recv_list_lock, flags); @@ -420,36 +407,28 @@ int schedule_nes_timer(struct nes_cm_node *cm_node, struct sk_buff *skb, if (type == NES_TIMER_TYPE_SEND) { new_send->seq_num = ntohl(tcp_hdr(skb)->seq); atomic_inc(&new_send->skb->users); + spin_lock_irqsave(&cm_node->retrans_list_lock, flags); + cm_node->send_entry = new_send; + add_ref_cm_node(cm_node); + spin_unlock_irqrestore(&cm_node->retrans_list_lock, flags); + new_send->timetosend = jiffies + NES_RETRY_TIMEOUT; ret = nes_nic_cm_xmit(new_send->skb, cm_node->netdev); if (ret != NETDEV_TX_OK) { - nes_debug(NES_DBG_CM, "Error sending packet %p (jiffies = %lu)\n", - new_send, jiffies); + nes_debug(NES_DBG_CM, "Error sending packet %p " + "(jiffies = %lu)\n", new_send, jiffies); atomic_dec(&new_send->skb->users); new_send->timetosend = jiffies; } else { cm_packets_sent++; if (!send_retrans) { + cleanup_retrans_entry(cm_node); if (close_when_complete) - rem_ref_cm_node(cm_node->cm_core, cm_node); - dev_kfree_skb_any(new_send->skb); - kfree(new_send); + rem_ref_cm_node(cm_core, cm_node); return ret; } - new_send->timetosend = jiffies + NES_RETRY_TIMEOUT; } - spin_lock_irqsave(&cm_node->retrans_list_lock, flags); - list_add_tail(&new_send->list, &cm_node->retrans_list); - spin_unlock_irqrestore(&cm_node->retrans_list_lock, flags); - } - if (type == NES_TIMER_TYPE_RECV) { - new_send->seq_num = ntohl(tcp_hdr(skb)->seq); - new_send->timetosend = jiffies; - spin_lock_irqsave(&cm_node->recv_list_lock, flags); - list_add_tail(&new_send->list, &cm_node->recv_list); - spin_unlock_irqrestore(&cm_node->recv_list_lock, flags); } - cm_core = cm_node->cm_core; was_timer_set = timer_pending(&cm_core->tcp_timer); @@ -476,23 +455,27 @@ static void nes_cm_timer_tick(unsigned long pass) struct list_head *list_node, *list_node_temp; struct nes_cm_core *cm_core = g_cm_core; struct nes_qp *nesqp; - struct sk_buff *skb; u32 settimer = 0; int ret = NETDEV_TX_OK; - int node_done; + enum nes_cm_node_state last_state; spin_lock_irqsave(&cm_core->ht_lock, flags); - list_for_each_safe(list_node, list_core_temp, &cm_core->connected_nodes) { + list_for_each_safe(list_node, list_core_temp, + &cm_core->connected_nodes) { cm_node = container_of(list_node, struct nes_cm_node, list); add_ref_cm_node(cm_node); spin_unlock_irqrestore(&cm_core->ht_lock, flags); spin_lock_irqsave(&cm_node->recv_list_lock, flags); - list_for_each_safe(list_core, list_node_temp, &cm_node->recv_list) { - recv_entry = container_of(list_core, struct nes_timer_entry, list); - if ((time_after(recv_entry->timetosend, jiffies)) && - (recv_entry->type == NES_TIMER_TYPE_CLOSE)) { - if (nexttimeout > recv_entry->timetosend || !settimer) { + list_for_each_safe(list_core, list_node_temp, + &cm_node->recv_list) { + recv_entry = container_of(list_core, + struct nes_timer_entry, list); + if (!recv_entry) + break; + if (time_after(recv_entry->timetosend, jiffies)) { + if (nexttimeout > recv_entry->timetosend || + !settimer) { nexttimeout = recv_entry->timetosend; settimer = 1; } @@ -501,157 +484,143 @@ static void nes_cm_timer_tick(unsigned long pass) list_del(&recv_entry->list); cm_id = cm_node->cm_id; spin_unlock_irqrestore(&cm_node->recv_list_lock, flags); - if (recv_entry->type == NES_TIMER_TYPE_CLOSE) { - nesqp = (struct nes_qp *)recv_entry->skb; - spin_lock_irqsave(&nesqp->lock, qplockflags); - if (nesqp->cm_id) { - nes_debug(NES_DBG_CM, "QP%u: cm_id = %p, refcount = %d: " - "****** HIT A NES_TIMER_TYPE_CLOSE" - " with something to do!!! ******\n", - nesqp->hwqp.qp_id, cm_id, - atomic_read(&nesqp->refcount)); - nesqp->hw_tcp_state = NES_AEQE_TCP_STATE_CLOSED; - nesqp->last_aeq = NES_AEQE_AEID_RESET_SENT; - nesqp->ibqp_state = IB_QPS_ERR; - spin_unlock_irqrestore(&nesqp->lock, qplockflags); - nes_cm_disconn(nesqp); - } else { - spin_unlock_irqrestore(&nesqp->lock, qplockflags); - nes_debug(NES_DBG_CM, "QP%u: cm_id = %p, refcount = %d:" - " ****** HIT A NES_TIMER_TYPE_CLOSE" - " with nothing to do!!! ******\n", - nesqp->hwqp.qp_id, cm_id, - atomic_read(&nesqp->refcount)); - nes_rem_ref(&nesqp->ibqp); - } - if (cm_id) - cm_id->rem_ref(cm_id); + nesqp = (struct nes_qp *)recv_entry->skb; + spin_lock_irqsave(&nesqp->lock, qplockflags); + if (nesqp->cm_id) { + nes_debug(NES_DBG_CM, "QP%u: cm_id = %p, " + "refcount = %d: HIT A " + "NES_TIMER_TYPE_CLOSE with something " + "to do!!!\n", nesqp->hwqp.qp_id, cm_id, + atomic_read(&nesqp->refcount)); + nesqp->hw_tcp_state = NES_AEQE_TCP_STATE_CLOSED; + nesqp->last_aeq = NES_AEQE_AEID_RESET_SENT; + nesqp->ibqp_state = IB_QPS_ERR; + spin_unlock_irqrestore(&nesqp->lock, + qplockflags); + nes_cm_disconn(nesqp); + } else { + spin_unlock_irqrestore(&nesqp->lock, + qplockflags); + nes_debug(NES_DBG_CM, "QP%u: cm_id = %p, " + "refcount = %d: HIT A " + "NES_TIMER_TYPE_CLOSE with nothing " + "to do!!!\n", nesqp->hwqp.qp_id, cm_id, + atomic_read(&nesqp->refcount)); } + if (cm_id) + cm_id->rem_ref(cm_id); + kfree(recv_entry); spin_lock_irqsave(&cm_node->recv_list_lock, flags); } spin_unlock_irqrestore(&cm_node->recv_list_lock, flags); spin_lock_irqsave(&cm_node->retrans_list_lock, flags); - node_done = 0; - list_for_each_safe(list_core, list_node_temp, &cm_node->retrans_list) { - if (node_done) { - break; - } - send_entry = container_of(list_core, struct nes_timer_entry, list); + do { + send_entry = cm_node->send_entry; + if (!send_entry) + continue; if (time_after(send_entry->timetosend, jiffies)) { if (cm_node->state != NES_CM_STATE_TSA) { - if ((nexttimeout > send_entry->timetosend) || !settimer) { - nexttimeout = send_entry->timetosend; + if ((nexttimeout > + send_entry->timetosend) || + !settimer) { + nexttimeout = + send_entry->timetosend; settimer = 1; + continue; } - node_done = 1; - continue; } else { - list_del(&send_entry->list); - skb = send_entry->skb; - spin_unlock_irqrestore(&cm_node->retrans_list_lock, flags); - dev_kfree_skb_any(skb); - kfree(send_entry); - spin_lock_irqsave(&cm_node->retrans_list_lock, flags); + free_retrans_entry(cm_node); continue; } } - if (send_entry->type == NES_TIMER_NODE_CLEANUP) { - list_del(&send_entry->list); - spin_unlock_irqrestore(&cm_node->retrans_list_lock, flags); - kfree(send_entry); - spin_lock_irqsave(&cm_node->retrans_list_lock, flags); - continue; - } - if ((send_entry->seq_num < cm_node->tcp_cntxt.rem_ack_num) || - (cm_node->state == NES_CM_STATE_TSA) || - (cm_node->state == NES_CM_STATE_CLOSED)) { - skb = send_entry->skb; - list_del(&send_entry->list); - spin_unlock_irqrestore(&cm_node->retrans_list_lock, flags); - kfree(send_entry); - dev_kfree_skb_any(skb); - spin_lock_irqsave(&cm_node->retrans_list_lock, flags); + + if ((cm_node->state == NES_CM_STATE_TSA) || + (cm_node->state == NES_CM_STATE_CLOSED)) { + free_retrans_entry(cm_node); continue; } - if (!send_entry->retranscount || !send_entry->retrycount) { + if (!send_entry->retranscount || + !send_entry->retrycount) { cm_packets_dropped++; - skb = send_entry->skb; - list_del(&send_entry->list); - spin_unlock_irqrestore(&cm_node->retrans_list_lock, flags); - dev_kfree_skb_any(skb); - kfree(send_entry); - if (cm_node->state == NES_CM_STATE_SYN_RCVD) { - /* this node never even generated an indication up to the cm */ + last_state = cm_node->state; + cm_node->state = NES_CM_STATE_CLOSED; + free_retrans_entry(cm_node); + spin_unlock_irqrestore( + &cm_node->retrans_list_lock, flags); + if (last_state == NES_CM_STATE_SYN_RCVD) rem_ref_cm_node(cm_core, cm_node); - } else { - cm_node->state = NES_CM_STATE_CLOSED; - create_event(cm_node, NES_CM_EVENT_ABORTED); - } - spin_lock_irqsave(&cm_node->retrans_list_lock, flags); + else + create_event(cm_node, + NES_CM_EVENT_ABORTED); + spin_lock_irqsave(&cm_node->retrans_list_lock, + flags); continue; } - /* this seems like the correct place, but leave send entry unprotected */ - /* spin_unlock_irqrestore(&cm_node->retrans_list_lock, flags); */ atomic_inc(&send_entry->skb->users); cm_packets_retrans++; - nes_debug(NES_DBG_CM, "Retransmitting send_entry %p for node %p," - " jiffies = %lu, time to send = %lu, retranscount = %u, " - "send_entry->seq_num = 0x%08X, cm_node->tcp_cntxt.rem_ack_num = 0x%08X\n", - send_entry, cm_node, jiffies, send_entry->timetosend, send_entry->retranscount, - send_entry->seq_num, cm_node->tcp_cntxt.rem_ack_num); - - spin_unlock_irqrestore(&cm_node->retrans_list_lock, flags); + nes_debug(NES_DBG_CM, "Retransmitting send_entry %p " + "for node %p, jiffies = %lu, time to send = " + "%lu, retranscount = %u, send_entry->seq_num = " + "0x%08X, cm_node->tcp_cntxt.rem_ack_num = " + "0x%08X\n", send_entry, cm_node, jiffies, + send_entry->timetosend, + send_entry->retranscount, + send_entry->seq_num, + cm_node->tcp_cntxt.rem_ack_num); + + spin_unlock_irqrestore(&cm_node->retrans_list_lock, + flags); ret = nes_nic_cm_xmit(send_entry->skb, cm_node->netdev); + spin_lock_irqsave(&cm_node->retrans_list_lock, flags); if (ret != NETDEV_TX_OK) { + nes_debug(NES_DBG_CM, "rexmit failed for " + "node=%p\n", cm_node); cm_packets_bounced++; atomic_dec(&send_entry->skb->users); send_entry->retrycount--; nexttimeout = jiffies + NES_SHORT_TIME; settimer = 1; - node_done = 1; - spin_lock_irqsave(&cm_node->retrans_list_lock, flags); continue; } else { cm_packets_sent++; } - spin_lock_irqsave(&cm_node->retrans_list_lock, flags); - list_del(&send_entry->list); - nes_debug(NES_DBG_CM, "Packet Sent: retrans count = %u, retry count = %u.\n", - send_entry->retranscount, send_entry->retrycount); + nes_debug(NES_DBG_CM, "Packet Sent: retrans count = " + "%u, retry count = %u.\n", + send_entry->retranscount, + send_entry->retrycount); if (send_entry->send_retrans) { send_entry->retranscount--; - send_entry->timetosend = jiffies + NES_RETRY_TIMEOUT; - if (nexttimeout > send_entry->timetosend || !settimer) { + send_entry->timetosend = jiffies + + NES_RETRY_TIMEOUT; + if (nexttimeout > send_entry->timetosend || + !settimer) { nexttimeout = send_entry->timetosend; settimer = 1; } - list_add(&send_entry->list, &cm_node->retrans_list); - continue; } else { int close_when_complete; - skb = send_entry->skb; - close_when_complete = send_entry->close_when_complete; - spin_unlock_irqrestore(&cm_node->retrans_list_lock, flags); - if (close_when_complete) { - BUG_ON(atomic_read(&cm_node->ref_count) == 1); - rem_ref_cm_node(cm_core, cm_node); - } - dev_kfree_skb_any(skb); - kfree(send_entry); - spin_lock_irqsave(&cm_node->retrans_list_lock, flags); - continue; + close_when_complete = + send_entry->close_when_complete; + nes_debug(NES_DBG_CM, "cm_node=%p state=%d\n", + cm_node, cm_node->state); + free_retrans_entry(cm_node); + if (close_when_complete) + rem_ref_cm_node(cm_node->cm_core, + cm_node); } - } - spin_unlock_irqrestore(&cm_node->retrans_list_lock, flags); - - rem_ref_cm_node(cm_core, cm_node); + } while (0); + spin_unlock_irqrestore(&cm_node->retrans_list_lock, flags); + rem_ref_cm_node(cm_node->cm_core, cm_node); spin_lock_irqsave(&cm_core->ht_lock, flags); - if (ret != NETDEV_TX_OK) + if (ret != NETDEV_TX_OK) { + nes_debug(NES_DBG_CM, "rexmit failed for cm_node=%p\n", + cm_node); break; + } } spin_unlock_irqrestore(&cm_core->ht_lock, flags); @@ -667,14 +636,14 @@ static void nes_cm_timer_tick(unsigned long pass) /** * send_syn */ -static int send_syn(struct nes_cm_node *cm_node, u32 sendack) +static int send_syn(struct nes_cm_node *cm_node, u32 sendack, + struct sk_buff *skb) { int ret; int flags = SET_SYN; - struct sk_buff *skb; char optionsbuffer[sizeof(struct option_mss) + - sizeof(struct option_windowscale) + - sizeof(struct option_base) + 1]; + sizeof(struct option_windowscale) + sizeof(struct option_base) + + TCP_OPTIONS_PADDING]; int optionssize = 0; /* Sending MSS option */ @@ -695,8 +664,7 @@ static int send_syn(struct nes_cm_node *cm_node, u32 sendack) options->as_windowscale.shiftcount = cm_node->tcp_cntxt.rcv_wscale; optionssize += sizeof(struct option_windowscale); - if (sendack && !(NES_DRV_OPT_SUPRESS_OPTION_BC & nes_drv_opt) - ) { + if (sendack && !(NES_DRV_OPT_SUPRESS_OPTION_BC & nes_drv_opt)) { options = (union all_known_options *)&optionsbuffer[optionssize]; options->as_base.optionnum = OPTION_NUMBER_WRITE0; options->as_base.length = sizeof(struct option_base); @@ -714,7 +682,8 @@ static int send_syn(struct nes_cm_node *cm_node, u32 sendack) options->as_end = OPTION_NUMBER_END; optionssize += 1; - skb = get_free_pkt(cm_node); + if (!skb) + skb = get_free_pkt(cm_node); if (!skb) { nes_debug(NES_DBG_CM, "Failed to get a Free pkt\n"); return -1; @@ -733,18 +702,18 @@ static int send_syn(struct nes_cm_node *cm_node, u32 sendack) /** * send_reset */ -static int send_reset(struct nes_cm_node *cm_node) +static int send_reset(struct nes_cm_node *cm_node, struct sk_buff *skb) { int ret; - struct sk_buff *skb = get_free_pkt(cm_node); int flags = SET_RST | SET_ACK; + if (!skb) + skb = get_free_pkt(cm_node); if (!skb) { nes_debug(NES_DBG_CM, "Failed to get a Free pkt\n"); return -1; } - add_ref_cm_node(cm_node); form_cm_frame(skb, cm_node, NULL, 0, NULL, 0, flags); ret = schedule_nes_timer(cm_node, skb, NES_TIMER_TYPE_SEND, 0, 1); @@ -755,10 +724,12 @@ static int send_reset(struct nes_cm_node *cm_node) /** * send_ack */ -static int send_ack(struct nes_cm_node *cm_node) +static int send_ack(struct nes_cm_node *cm_node, struct sk_buff *skb) { int ret; - struct sk_buff *skb = get_free_pkt(cm_node); + + if (!skb) + skb = get_free_pkt(cm_node); if (!skb) { nes_debug(NES_DBG_CM, "Failed to get a Free pkt\n"); @@ -922,7 +893,8 @@ static int add_hte_node(struct nes_cm_core *cm_core, struct nes_cm_node *cm_node if (!cm_node || !cm_core) return -EINVAL; - nes_debug(NES_DBG_CM, "Adding Node to Active Connection HT\n"); + nes_debug(NES_DBG_CM, "Adding Node %p to Active Connection HT\n", + cm_node); /* first, make an index into our hash table */ hashkey = make_hashkey(cm_node->loc_port, cm_node->loc_addr, @@ -946,10 +918,35 @@ static int add_hte_node(struct nes_cm_core *cm_core, struct nes_cm_node *cm_node * mini_cm_dec_refcnt_listen */ static int mini_cm_dec_refcnt_listen(struct nes_cm_core *cm_core, - struct nes_cm_listener *listener, int free_hanging_nodes) + struct nes_cm_listener *listener, int free_hanging_nodes) { int ret = 1; unsigned long flags; + struct list_head *list_pos = NULL; + struct list_head *list_temp = NULL; + struct nes_cm_node *cm_node = NULL; + + nes_debug(NES_DBG_CM, "attempting listener= %p free_nodes= %d, " + "refcnt=%d\n", listener, free_hanging_nodes, + atomic_read(&listener->ref_count)); + /* free non-accelerated child nodes for this listener */ + if (free_hanging_nodes) { + spin_lock_irqsave(&cm_core->ht_lock, flags); + list_for_each_safe(list_pos, list_temp, + &g_cm_core->connected_nodes) { + cm_node = container_of(list_pos, struct nes_cm_node, + list); + if ((cm_node->listener == listener) && + (!cm_node->accelerated)) { + cleanup_retrans_entry(cm_node); + spin_unlock_irqrestore(&cm_core->ht_lock, + flags); + send_reset(cm_node, NULL); + spin_lock_irqsave(&cm_core->ht_lock, flags); + } + } + spin_unlock_irqrestore(&cm_core->ht_lock, flags); + } spin_lock_irqsave(&cm_core->listen_list_lock, flags); if (!atomic_dec_return(&listener->ref_count)) { list_del(&listener->list); @@ -1022,23 +1019,43 @@ static inline int mini_cm_accelerated(struct nes_cm_core *cm_core, /** - * nes_addr_send_arp + * nes_addr_resolve_neigh */ -static void nes_addr_send_arp(u32 dst_ip) +static int nes_addr_resolve_neigh(struct nes_vnic *nesvnic, u32 dst_ip) { struct rtable *rt; struct flowi fl; + struct neighbour *neigh; + int rc = -1; + DECLARE_MAC_BUF(mac); memset(&fl, 0, sizeof fl); fl.nl_u.ip4_u.daddr = htonl(dst_ip); if (ip_route_output_key(&init_net, &rt, &fl)) { printk("%s: ip_route_output_key failed for 0x%08X\n", __func__, dst_ip); - return; + return rc; + } + + neigh = neigh_lookup(&arp_tbl, &rt->rt_gateway, nesvnic->netdev); + if (neigh) { + if (neigh->nud_state & NUD_VALID) { + nes_debug(NES_DBG_CM, "Neighbor MAC address for 0x%08X" + " is %s, Gateway is 0x%08X \n", dst_ip, + print_mac(mac, neigh->ha), ntohl(rt->rt_gateway)); + nes_manage_arp_cache(nesvnic->netdev, neigh->ha, + dst_ip, NES_ARP_ADD); + rc = nes_arp_table(nesvnic->nesdev, dst_ip, NULL, + NES_ARP_RESOLVE); + } + neigh_release(neigh); } - neigh_event_send(rt->u.dst.neighbour, NULL); + if ((neigh == NULL) || (!(neigh->nud_state & NUD_VALID))) + neigh_event_send(rt->u.dst.neighbour, NULL); + ip_rt_put(rt); + return rc; } @@ -1067,18 +1084,18 @@ static struct nes_cm_node *make_cm_node(struct nes_cm_core *cm_core, cm_node->loc_port = cm_info->loc_port; cm_node->rem_port = cm_info->rem_port; cm_node->send_write0 = send_first; - nes_debug(NES_DBG_CM, "Make node addresses : loc = " NIPQUAD_FMT ":%x, rem = " NIPQUAD_FMT ":%x\n", - HIPQUAD(cm_node->loc_addr), cm_node->loc_port, - HIPQUAD(cm_node->rem_addr), cm_node->rem_port); + nes_debug(NES_DBG_CM, "Make node addresses : loc = " NIPQUAD_FMT + ":%x, rem = " NIPQUAD_FMT ":%x\n", + HIPQUAD(cm_node->loc_addr), cm_node->loc_port, + HIPQUAD(cm_node->rem_addr), cm_node->rem_port); cm_node->listener = listener; cm_node->netdev = nesvnic->netdev; cm_node->cm_id = cm_info->cm_id; memcpy(cm_node->loc_mac, nesvnic->netdev->dev_addr, ETH_ALEN); - nes_debug(NES_DBG_CM, "listener=%p, cm_id=%p\n", - cm_node->listener, cm_node->cm_id); + nes_debug(NES_DBG_CM, "listener=%p, cm_id=%p\n", cm_node->listener, + cm_node->cm_id); - INIT_LIST_HEAD(&cm_node->retrans_list); spin_lock_init(&cm_node->retrans_list_lock); INIT_LIST_HEAD(&cm_node->recv_list); spin_lock_init(&cm_node->recv_list_lock); @@ -1111,9 +1128,11 @@ static struct nes_cm_node *make_cm_node(struct nes_cm_core *cm_core, /* get the mac addr for the remote node */ arpindex = nes_arp_table(nesdev, cm_node->rem_addr, NULL, NES_ARP_RESOLVE); if (arpindex < 0) { - kfree(cm_node); - nes_addr_send_arp(cm_info->rem_addr); - return NULL; + arpindex = nes_addr_resolve_neigh(nesvnic, cm_info->rem_addr); + if (arpindex < 0) { + kfree(cm_node); + return NULL; + } } /* copy the mac addr to node context */ @@ -1142,10 +1161,9 @@ static int add_ref_cm_node(struct nes_cm_node *cm_node) * rem_ref_cm_node - destroy an instance of a cm node */ static int rem_ref_cm_node(struct nes_cm_core *cm_core, - struct nes_cm_node *cm_node) + struct nes_cm_node *cm_node) { unsigned long flags, qplockflags; - struct nes_timer_entry *send_entry; struct nes_timer_entry *recv_entry; struct iw_cm_id *cm_id; struct list_head *list_core, *list_node_temp; @@ -1169,48 +1187,33 @@ static int rem_ref_cm_node(struct nes_cm_core *cm_core, atomic_dec(&cm_node->listener->pend_accepts_cnt); BUG_ON(atomic_read(&cm_node->listener->pend_accepts_cnt) < 0); } - - spin_lock_irqsave(&cm_node->retrans_list_lock, flags); - list_for_each_safe(list_core, list_node_temp, &cm_node->retrans_list) { - send_entry = container_of(list_core, struct nes_timer_entry, list); - list_del(&send_entry->list); - spin_unlock_irqrestore(&cm_node->retrans_list_lock, flags); - dev_kfree_skb_any(send_entry->skb); - kfree(send_entry); - spin_lock_irqsave(&cm_node->retrans_list_lock, flags); - continue; - } - spin_unlock_irqrestore(&cm_node->retrans_list_lock, flags); - + BUG_ON(cm_node->send_entry); spin_lock_irqsave(&cm_node->recv_list_lock, flags); list_for_each_safe(list_core, list_node_temp, &cm_node->recv_list) { - recv_entry = container_of(list_core, struct nes_timer_entry, list); + recv_entry = container_of(list_core, struct nes_timer_entry, + list); list_del(&recv_entry->list); cm_id = cm_node->cm_id; spin_unlock_irqrestore(&cm_node->recv_list_lock, flags); - if (recv_entry->type == NES_TIMER_TYPE_CLOSE) { - nesqp = (struct nes_qp *)recv_entry->skb; - spin_lock_irqsave(&nesqp->lock, qplockflags); - if (nesqp->cm_id) { - nes_debug(NES_DBG_CM, "QP%u: cm_id = %p: ****** HIT A NES_TIMER_TYPE_CLOSE" - " with something to do!!! ******\n", - nesqp->hwqp.qp_id, cm_id); - nesqp->hw_tcp_state = NES_AEQE_TCP_STATE_CLOSED; - nesqp->last_aeq = NES_AEQE_AEID_RESET_SENT; - nesqp->ibqp_state = IB_QPS_ERR; - spin_unlock_irqrestore(&nesqp->lock, qplockflags); - nes_cm_disconn(nesqp); - } else { - spin_unlock_irqrestore(&nesqp->lock, qplockflags); - nes_debug(NES_DBG_CM, "QP%u: cm_id = %p: ****** HIT A NES_TIMER_TYPE_CLOSE" - " with nothing to do!!! ******\n", - nesqp->hwqp.qp_id, cm_id); - nes_rem_ref(&nesqp->ibqp); - } - cm_id->rem_ref(cm_id); - } else if (recv_entry->type == NES_TIMER_TYPE_RECV) { - dev_kfree_skb_any(recv_entry->skb); + nesqp = (struct nes_qp *)recv_entry->skb; + spin_lock_irqsave(&nesqp->lock, qplockflags); + if (nesqp->cm_id) { + nes_debug(NES_DBG_CM, "QP%u: cm_id = %p: HIT A " + "NES_TIMER_TYPE_CLOSE with something to do!\n", + nesqp->hwqp.qp_id, cm_id); + nesqp->hw_tcp_state = NES_AEQE_TCP_STATE_CLOSED; + nesqp->last_aeq = NES_AEQE_AEID_RESET_SENT; + nesqp->ibqp_state = IB_QPS_ERR; + spin_unlock_irqrestore(&nesqp->lock, qplockflags); + nes_cm_disconn(nesqp); + } else { + spin_unlock_irqrestore(&nesqp->lock, qplockflags); + nes_debug(NES_DBG_CM, "QP%u: cm_id = %p: HIT A " + "NES_TIMER_TYPE_CLOSE with nothing to do!\n", + nesqp->hwqp.qp_id, cm_id); } + cm_id->rem_ref(cm_id); + kfree(recv_entry); spin_lock_irqsave(&cm_node->recv_list_lock, flags); } @@ -1221,23 +1224,31 @@ static int rem_ref_cm_node(struct nes_cm_core *cm_core, } else { if (cm_node->apbvt_set && cm_node->nesvnic) { nes_manage_apbvt(cm_node->nesvnic, cm_node->loc_port, - PCI_FUNC(cm_node->nesvnic->nesdev->pcidev->devfn), - NES_MANAGE_APBVT_DEL); + PCI_FUNC( + cm_node->nesvnic->nesdev->pcidev->devfn), + NES_MANAGE_APBVT_DEL); } } - kfree(cm_node); atomic_dec(&cm_core->node_cnt); atomic_inc(&cm_nodes_destroyed); + nesqp = cm_node->nesqp; + if (nesqp) { + nesqp->cm_node = NULL; + nes_rem_ref(&nesqp->ibqp); + cm_node->nesqp = NULL; + } + cm_node->freed = 1; + kfree(cm_node); return 0; } - /** * process_options */ -static int process_options(struct nes_cm_node *cm_node, u8 *optionsloc, u32 optionsize, u32 syn_packet) +static int process_options(struct nes_cm_node *cm_node, u8 *optionsloc, + u32 optionsize, u32 syn_packet) { u32 tmp; u32 offset = 0; @@ -1247,35 +1258,37 @@ static int process_options(struct nes_cm_node *cm_node, u8 *optionsloc, u32 opti while (offset < optionsize) { all_options = (union all_known_options *)(optionsloc + offset); switch (all_options->as_base.optionnum) { - case OPTION_NUMBER_END: - offset = optionsize; - break; - case OPTION_NUMBER_NONE: - offset += 1; - continue; - case OPTION_NUMBER_MSS: - nes_debug(NES_DBG_CM, "%s: MSS Length: %d Offset: %d Size: %d\n", - __func__, - all_options->as_mss.length, offset, optionsize); - got_mss_option = 1; - if (all_options->as_mss.length != 4) { - return 1; - } else { - tmp = ntohs(all_options->as_mss.mss); - if (tmp > 0 && tmp < cm_node->tcp_cntxt.mss) - cm_node->tcp_cntxt.mss = tmp; - } - break; - case OPTION_NUMBER_WINDOW_SCALE: - cm_node->tcp_cntxt.snd_wscale = all_options->as_windowscale.shiftcount; - break; - case OPTION_NUMBER_WRITE0: - cm_node->send_write0 = 1; - break; - default: - nes_debug(NES_DBG_CM, "TCP Option not understood: %x\n", - all_options->as_base.optionnum); - break; + case OPTION_NUMBER_END: + offset = optionsize; + break; + case OPTION_NUMBER_NONE: + offset += 1; + continue; + case OPTION_NUMBER_MSS: + nes_debug(NES_DBG_CM, "%s: MSS Length: %d Offset: %d " + "Size: %d\n", __func__, + all_options->as_mss.length, offset, optionsize); + got_mss_option = 1; + if (all_options->as_mss.length != 4) { + return 1; + } else { + tmp = ntohs(all_options->as_mss.mss); + if (tmp > 0 && tmp < + cm_node->tcp_cntxt.mss) + cm_node->tcp_cntxt.mss = tmp; + } + break; + case OPTION_NUMBER_WINDOW_SCALE: + cm_node->tcp_cntxt.snd_wscale = + all_options->as_windowscale.shiftcount; + break; + case OPTION_NUMBER_WRITE0: + cm_node->send_write0 = 1; + break; + default: + nes_debug(NES_DBG_CM, "TCP Option not understood: %x\n", + all_options->as_base.optionnum); + break; } offset += all_options->as_base.length; } @@ -1284,300 +1297,491 @@ static int process_options(struct nes_cm_node *cm_node, u8 *optionsloc, u32 opti return 0; } +static void drop_packet(struct sk_buff *skb) +{ + atomic_inc(&cm_accel_dropped_pkts); + dev_kfree_skb_any(skb); +} -/** - * process_packet - */ -static int process_packet(struct nes_cm_node *cm_node, struct sk_buff *skb, - struct nes_cm_core *cm_core) +static void handle_fin_pkt(struct nes_cm_node *cm_node, struct sk_buff *skb, + struct tcphdr *tcph) { - int optionsize; - int datasize; - int ret = 0; - struct tcphdr *tcph = tcp_hdr(skb); - u32 inc_sequence; - if (cm_node->state == NES_CM_STATE_SYN_SENT && tcph->syn) { - inc_sequence = ntohl(tcph->seq); - cm_node->tcp_cntxt.rcv_nxt = inc_sequence; + atomic_inc(&cm_resets_recvd); + nes_debug(NES_DBG_CM, "Received FIN, cm_node = %p, state = %u. " + "refcnt=%d\n", cm_node, cm_node->state, + atomic_read(&cm_node->ref_count)); + cm_node->tcp_cntxt.rcv_nxt++; + cleanup_retrans_entry(cm_node); + switch (cm_node->state) { + case NES_CM_STATE_SYN_RCVD: + case NES_CM_STATE_SYN_SENT: + case NES_CM_STATE_ESTABLISHED: + case NES_CM_STATE_MPAREQ_SENT: + cm_node->state = NES_CM_STATE_LAST_ACK; + send_fin(cm_node, skb); + break; + case NES_CM_STATE_FIN_WAIT1: + cm_node->state = NES_CM_STATE_CLOSING; + send_ack(cm_node, skb); + break; + case NES_CM_STATE_FIN_WAIT2: + cm_node->state = NES_CM_STATE_TIME_WAIT; + send_ack(cm_node, skb); + cm_node->state = NES_CM_STATE_CLOSED; + break; + case NES_CM_STATE_TSA: + default: + nes_debug(NES_DBG_CM, "Error Rcvd FIN for node-%p state = %d\n", + cm_node, cm_node->state); + drop_packet(skb); + break; } +} - if ((!tcph) || (cm_node->state == NES_CM_STATE_TSA)) { - BUG_ON(!tcph); - atomic_inc(&cm_accel_dropped_pkts); - return -1; - } - if (tcph->rst) { - atomic_inc(&cm_resets_recvd); - nes_debug(NES_DBG_CM, "Received Reset, cm_node = %p, state = %u. refcnt=%d\n", - cm_node, cm_node->state, atomic_read(&cm_node->ref_count)); - switch (cm_node->state) { - case NES_CM_STATE_LISTENING: - rem_ref_cm_node(cm_core, cm_node); - break; - case NES_CM_STATE_TSA: - case NES_CM_STATE_CLOSED: - break; - case NES_CM_STATE_SYN_RCVD: - nes_debug(NES_DBG_CM, "Received a reset for local 0x%08X:%04X," - " remote 0x%08X:%04X, node state = %u\n", - cm_node->loc_addr, cm_node->loc_port, - cm_node->rem_addr, cm_node->rem_port, - cm_node->state); - rem_ref_cm_node(cm_core, cm_node); - break; - case NES_CM_STATE_ONE_SIDE_ESTABLISHED: - case NES_CM_STATE_ESTABLISHED: - case NES_CM_STATE_MPAREQ_SENT: - default: - nes_debug(NES_DBG_CM, "Received a reset for local 0x%08X:%04X," - " remote 0x%08X:%04X, node state = %u refcnt=%d\n", - cm_node->loc_addr, cm_node->loc_port, - cm_node->rem_addr, cm_node->rem_port, - cm_node->state, atomic_read(&cm_node->ref_count)); - /* create event */ - cm_node->state = NES_CM_STATE_CLOSED; +static void handle_rst_pkt(struct nes_cm_node *cm_node, struct sk_buff *skb, + struct tcphdr *tcph) +{ - create_event(cm_node, NES_CM_EVENT_ABORTED); - break; + int reset = 0; /* whether to send reset in case of err.. */ + atomic_inc(&cm_resets_recvd); + nes_debug(NES_DBG_CM, "Received Reset, cm_node = %p, state = %u." + " refcnt=%d\n", cm_node, cm_node->state, + atomic_read(&cm_node->ref_count)); + cleanup_retrans_entry(cm_node); + switch (cm_node->state) { + case NES_CM_STATE_SYN_SENT: + case NES_CM_STATE_MPAREQ_SENT: + nes_debug(NES_DBG_CM, "%s[%u] create abort for cm_node=%p " + "listener=%p state=%d\n", __func__, __LINE__, cm_node, + cm_node->listener, cm_node->state); + active_open_err(cm_node, skb, reset); + break; + /* For PASSIVE open states, remove the cm_node event */ + case NES_CM_STATE_ESTABLISHED: + case NES_CM_STATE_SYN_RCVD: + case NES_CM_STATE_LISTENING: + nes_debug(NES_DBG_CM, "Bad state %s[%u]\n", __func__, __LINE__); + passive_open_err(cm_node, skb, reset); + break; + case NES_CM_STATE_TSA: + default: + break; + } +} +static void handle_rcv_mpa(struct nes_cm_node *cm_node, struct sk_buff *skb, + enum nes_cm_event_type type) +{ + + int ret; + int datasize = skb->len; + u8 *dataloc = skb->data; + ret = parse_mpa(cm_node, dataloc, datasize); + if (ret < 0) { + nes_debug(NES_DBG_CM, "didn't like MPA Request\n"); + if (type == NES_CM_EVENT_CONNECTED) { + nes_debug(NES_DBG_CM, "%s[%u] create abort for " + "cm_node=%p listener=%p state=%d\n", __func__, + __LINE__, cm_node, cm_node->listener, + cm_node->state); + active_open_err(cm_node, skb, 1); + } else { + passive_open_err(cm_node, skb, 1); } - return -1; + } else { + cleanup_retrans_entry(cm_node); + dev_kfree_skb_any(skb); + if (type == NES_CM_EVENT_CONNECTED) + cm_node->state = NES_CM_STATE_TSA; + create_event(cm_node, type); + } + return ; +} + +static void indicate_pkt_err(struct nes_cm_node *cm_node, struct sk_buff *skb) +{ + switch (cm_node->state) { + case NES_CM_STATE_SYN_SENT: + case NES_CM_STATE_MPAREQ_SENT: + nes_debug(NES_DBG_CM, "%s[%u] create abort for cm_node=%p " + "listener=%p state=%d\n", __func__, __LINE__, cm_node, + cm_node->listener, cm_node->state); + active_open_err(cm_node, skb, 1); + break; + case NES_CM_STATE_ESTABLISHED: + case NES_CM_STATE_SYN_RCVD: + passive_open_err(cm_node, skb, 1); + break; + case NES_CM_STATE_TSA: + default: + drop_packet(skb); + } +} + +static int check_syn(struct nes_cm_node *cm_node, struct tcphdr *tcph, + struct sk_buff *skb) +{ + int err; + + err = ((ntohl(tcph->ack_seq) == cm_node->tcp_cntxt.loc_seq_num))? 0 : 1; + if (err) + active_open_err(cm_node, skb, 1); + + return err; +} + +static int check_seq(struct nes_cm_node *cm_node, struct tcphdr *tcph, + struct sk_buff *skb) +{ + int err = 0; + u32 seq; + u32 ack_seq; + u32 loc_seq_num = cm_node->tcp_cntxt.loc_seq_num; + u32 rcv_nxt = cm_node->tcp_cntxt.rcv_nxt; + u32 rcv_wnd; + seq = ntohl(tcph->seq); + ack_seq = ntohl(tcph->ack_seq); + rcv_wnd = cm_node->tcp_cntxt.rcv_wnd; + if (ack_seq != loc_seq_num) + err = 1; + else if ((seq + rcv_wnd) < rcv_nxt) + err = 1; + if (err) { + nes_debug(NES_DBG_CM, "%s[%u] create abort for cm_node=%p " + "listener=%p state=%d\n", __func__, __LINE__, cm_node, + cm_node->listener, cm_node->state); + indicate_pkt_err(cm_node, skb); + nes_debug(NES_DBG_CM, "seq ERROR cm_node =%p seq=0x%08X " + "rcv_nxt=0x%08X rcv_wnd=0x%x\n", cm_node, seq, rcv_nxt, + rcv_wnd); + } + return err; +} + +/* + * handle_syn_pkt() is for Passive node. The syn packet is received when a node + * is created with a listener or it may comein as rexmitted packet which in + * that case will be just dropped. + */ + +static void handle_syn_pkt(struct nes_cm_node *cm_node, struct sk_buff *skb, + struct tcphdr *tcph) +{ + int ret; + u32 inc_sequence; + int optionsize; optionsize = (tcph->doff << 2) - sizeof(struct tcphdr); + skb_pull(skb, tcph->doff << 2); + inc_sequence = ntohl(tcph->seq); - skb_pull(skb, ip_hdr(skb)->ihl << 2); + switch (cm_node->state) { + case NES_CM_STATE_SYN_SENT: + case NES_CM_STATE_MPAREQ_SENT: + /* Rcvd syn on active open connection*/ + active_open_err(cm_node, skb, 1); + break; + case NES_CM_STATE_LISTENING: + /* Passive OPEN */ + cm_node->accept_pend = 1; + atomic_inc(&cm_node->listener->pend_accepts_cnt); + if (atomic_read(&cm_node->listener->pend_accepts_cnt) > + cm_node->listener->backlog) { + nes_debug(NES_DBG_CM, "drop syn due to backlog " + "pressure \n"); + cm_backlog_drops++; + passive_open_err(cm_node, skb, 0); + break; + } + ret = handle_tcp_options(cm_node, tcph, skb, optionsize, + 1); + if (ret) { + passive_open_err(cm_node, skb, 0); + /* drop pkt */ + break; + } + cm_node->tcp_cntxt.rcv_nxt = inc_sequence + 1; + BUG_ON(cm_node->send_entry); + cm_node->state = NES_CM_STATE_SYN_RCVD; + send_syn(cm_node, 1, skb); + break; + case NES_CM_STATE_TSA: + case NES_CM_STATE_ESTABLISHED: + case NES_CM_STATE_FIN_WAIT1: + case NES_CM_STATE_FIN_WAIT2: + case NES_CM_STATE_MPAREQ_RCVD: + case NES_CM_STATE_LAST_ACK: + case NES_CM_STATE_CLOSING: + case NES_CM_STATE_UNKNOWN: + case NES_CM_STATE_CLOSED: + default: + drop_packet(skb); + break; + } +} + +static void handle_synack_pkt(struct nes_cm_node *cm_node, struct sk_buff *skb, + struct tcphdr *tcph) +{ + + int ret; + u32 inc_sequence; + int optionsize; + + optionsize = (tcph->doff << 2) - sizeof(struct tcphdr); skb_pull(skb, tcph->doff << 2); + inc_sequence = ntohl(tcph->seq); + switch (cm_node->state) { + case NES_CM_STATE_SYN_SENT: + /* active open */ + if (check_syn(cm_node, tcph, skb)) + return; + cm_node->tcp_cntxt.rem_ack_num = ntohl(tcph->ack_seq); + /* setup options */ + ret = handle_tcp_options(cm_node, tcph, skb, optionsize, 0); + if (ret) { + nes_debug(NES_DBG_CM, "cm_node=%p tcp_options failed\n", + cm_node); + break; + } + cleanup_retrans_entry(cm_node); + cm_node->tcp_cntxt.rcv_nxt = inc_sequence + 1; + send_mpa_request(cm_node, skb); + cm_node->state = NES_CM_STATE_MPAREQ_SENT; + break; + case NES_CM_STATE_MPAREQ_RCVD: + /* passive open, so should not be here */ + passive_open_err(cm_node, skb, 1); + break; + case NES_CM_STATE_ESTABLISHED: + case NES_CM_STATE_FIN_WAIT1: + case NES_CM_STATE_FIN_WAIT2: + case NES_CM_STATE_LAST_ACK: + case NES_CM_STATE_TSA: + case NES_CM_STATE_CLOSING: + case NES_CM_STATE_UNKNOWN: + case NES_CM_STATE_CLOSED: + case NES_CM_STATE_MPAREQ_SENT: + default: + drop_packet(skb); + break; + } +} - datasize = skb->len; +static void handle_ack_pkt(struct nes_cm_node *cm_node, struct sk_buff *skb, + struct tcphdr *tcph) +{ + int datasize = 0; + u32 inc_sequence; + u32 rem_seq_ack; + u32 rem_seq; + if (check_seq(cm_node, tcph, skb)) + return; + + skb_pull(skb, tcph->doff << 2); inc_sequence = ntohl(tcph->seq); - nes_debug(NES_DBG_CM, "datasize = %u, sequence = 0x%08X, ack_seq = 0x%08X," - " rcv_nxt = 0x%08X Flags: %s %s.\n", - datasize, inc_sequence, ntohl(tcph->ack_seq), - cm_node->tcp_cntxt.rcv_nxt, (tcph->syn ? "SYN":""), - (tcph->ack ? "ACK":"")); - - if (!tcph->syn && (inc_sequence != cm_node->tcp_cntxt.rcv_nxt) - ) { - nes_debug(NES_DBG_CM, "dropping packet, datasize = %u, sequence = 0x%08X," - " ack_seq = 0x%08X, rcv_nxt = 0x%08X Flags: %s.\n", - datasize, inc_sequence, ntohl(tcph->ack_seq), - cm_node->tcp_cntxt.rcv_nxt, (tcph->ack ? "ACK":"")); - if (cm_node->state == NES_CM_STATE_LISTENING) { - rem_ref_cm_node(cm_core, cm_node); + rem_seq = ntohl(tcph->seq); + rem_seq_ack = ntohl(tcph->ack_seq); + datasize = skb->len; + + switch (cm_node->state) { + case NES_CM_STATE_SYN_RCVD: + /* Passive OPEN */ + cm_node->tcp_cntxt.rem_ack_num = ntohl(tcph->ack_seq); + cm_node->state = NES_CM_STATE_ESTABLISHED; + if (datasize) { + cm_node->tcp_cntxt.rcv_nxt = inc_sequence + datasize; + cm_node->state = NES_CM_STATE_MPAREQ_RCVD; + handle_rcv_mpa(cm_node, skb, NES_CM_EVENT_MPA_REQ); + } else { /* rcvd ACK only */ + dev_kfree_skb_any(skb); + cleanup_retrans_entry(cm_node); + } + break; + case NES_CM_STATE_ESTABLISHED: + /* Passive OPEN */ + /* We expect mpa frame to be received only */ + if (datasize) { + cm_node->tcp_cntxt.rcv_nxt = inc_sequence + datasize; + cm_node->state = NES_CM_STATE_MPAREQ_RCVD; + handle_rcv_mpa(cm_node, skb, + NES_CM_EVENT_MPA_REQ); + } else + drop_packet(skb); + break; + case NES_CM_STATE_MPAREQ_SENT: + cm_node->tcp_cntxt.rem_ack_num = ntohl(tcph->ack_seq); + if (datasize) { + cm_node->tcp_cntxt.rcv_nxt = inc_sequence + datasize; + handle_rcv_mpa(cm_node, skb, NES_CM_EVENT_CONNECTED); + } else { /* Could be just an ack pkt.. */ + cleanup_retrans_entry(cm_node); + dev_kfree_skb_any(skb); } - return -1; + break; + case NES_CM_STATE_FIN_WAIT1: + case NES_CM_STATE_SYN_SENT: + case NES_CM_STATE_FIN_WAIT2: + case NES_CM_STATE_TSA: + case NES_CM_STATE_CLOSED: + case NES_CM_STATE_MPAREQ_RCVD: + case NES_CM_STATE_LAST_ACK: + case NES_CM_STATE_CLOSING: + case NES_CM_STATE_UNKNOWN: + default: + drop_packet(skb); + break; } +} - cm_node->tcp_cntxt.rcv_nxt = inc_sequence + datasize; +static int handle_tcp_options(struct nes_cm_node *cm_node, struct tcphdr *tcph, + struct sk_buff *skb, int optionsize, int passive) +{ + u8 *optionsloc = (u8 *)&tcph[1]; if (optionsize) { - u8 *optionsloc = (u8 *)&tcph[1]; - if (process_options(cm_node, optionsloc, optionsize, (u32)tcph->syn)) { - nes_debug(NES_DBG_CM, "%s: Node %p, Sending RESET\n", __func__, cm_node); - send_reset(cm_node); - if (cm_node->state != NES_CM_STATE_SYN_SENT) - rem_ref_cm_node(cm_core, cm_node); - return 0; + if (process_options(cm_node, optionsloc, optionsize, + (u32)tcph->syn)) { + nes_debug(NES_DBG_CM, "%s: Node %p, Sending RESET\n", + __func__, cm_node); + if (passive) + passive_open_err(cm_node, skb, 0); + else + active_open_err(cm_node, skb, 0); + return 1; } - } else if (tcph->syn) - cm_node->tcp_cntxt.mss = NES_CM_DEFAULT_MSS; + } cm_node->tcp_cntxt.snd_wnd = ntohs(tcph->window) << cm_node->tcp_cntxt.snd_wscale; - if (cm_node->tcp_cntxt.snd_wnd > cm_node->tcp_cntxt.max_snd_wnd) { + if (cm_node->tcp_cntxt.snd_wnd > cm_node->tcp_cntxt.max_snd_wnd) cm_node->tcp_cntxt.max_snd_wnd = cm_node->tcp_cntxt.snd_wnd; - } + return 0; +} - if (tcph->ack) { - cm_node->tcp_cntxt.rem_ack_num = ntohl(tcph->ack_seq); - switch (cm_node->state) { - case NES_CM_STATE_SYN_RCVD: - case NES_CM_STATE_SYN_SENT: - /* read and stash current sequence number */ - if (cm_node->tcp_cntxt.rem_ack_num != cm_node->tcp_cntxt.loc_seq_num) { - nes_debug(NES_DBG_CM, "ERROR - cm_node->tcp_cntxt.rem_ack_num !=" - " cm_node->tcp_cntxt.loc_seq_num\n"); - send_reset(cm_node); - return 0; - } - if (cm_node->state == NES_CM_STATE_SYN_SENT) - cm_node->state = NES_CM_STATE_ONE_SIDE_ESTABLISHED; - else { - cm_node->state = NES_CM_STATE_ESTABLISHED; - } - break; - case NES_CM_STATE_LAST_ACK: - cm_node->state = NES_CM_STATE_CLOSED; - break; - case NES_CM_STATE_FIN_WAIT1: - cm_node->state = NES_CM_STATE_FIN_WAIT2; - break; - case NES_CM_STATE_CLOSING: - cm_node->state = NES_CM_STATE_TIME_WAIT; - /* need to schedule this to happen in 2MSL timeouts */ - cm_node->state = NES_CM_STATE_CLOSED; - break; - case NES_CM_STATE_ONE_SIDE_ESTABLISHED: - case NES_CM_STATE_ESTABLISHED: - case NES_CM_STATE_MPAREQ_SENT: - case NES_CM_STATE_CLOSE_WAIT: - case NES_CM_STATE_TIME_WAIT: - case NES_CM_STATE_CLOSED: - break; - case NES_CM_STATE_LISTENING: - nes_debug(NES_DBG_CM, "Received an ACK on a listening port (SYN %d)\n", tcph->syn); - cm_node->tcp_cntxt.loc_seq_num = ntohl(tcph->ack_seq); - send_reset(cm_node); - /* send_reset bumps refcount, this should have been a new node */ - rem_ref_cm_node(cm_core, cm_node); - return -1; - break; - case NES_CM_STATE_TSA: - nes_debug(NES_DBG_CM, "Received a packet with the ack bit set while in TSA state\n"); - break; - case NES_CM_STATE_UNKNOWN: - case NES_CM_STATE_INITED: - case NES_CM_STATE_ACCEPTING: - case NES_CM_STATE_FIN_WAIT2: - default: - nes_debug(NES_DBG_CM, "Received ack from unknown state: %x\n", - cm_node->state); - send_reset(cm_node); - break; - } - } +/* + * active_open_err() will send reset() if flag set.. + * It will also send ABORT event. + */ - if (tcph->syn) { - if (cm_node->state == NES_CM_STATE_LISTENING) { - /* do not exceed backlog */ - atomic_inc(&cm_node->listener->pend_accepts_cnt); - if (atomic_read(&cm_node->listener->pend_accepts_cnt) > - cm_node->listener->backlog) { - nes_debug(NES_DBG_CM, "drop syn due to backlog pressure \n"); - cm_backlog_drops++; - atomic_dec(&cm_node->listener->pend_accepts_cnt); - rem_ref_cm_node(cm_core, cm_node); - return 0; - } - cm_node->accept_pend = 1; +static void active_open_err(struct nes_cm_node *cm_node, struct sk_buff *skb, + int reset) +{ + cleanup_retrans_entry(cm_node); + if (reset) { + nes_debug(NES_DBG_CM, "ERROR active err called for cm_node=%p, " + "state=%d\n", cm_node, cm_node->state); + add_ref_cm_node(cm_node); + send_reset(cm_node, skb); + } else + dev_kfree_skb_any(skb); - } - if (datasize == 0) - cm_node->tcp_cntxt.rcv_nxt ++; + cm_node->state = NES_CM_STATE_CLOSED; + create_event(cm_node, NES_CM_EVENT_ABORTED); +} - if (cm_node->state == NES_CM_STATE_LISTENING) { - cm_node->state = NES_CM_STATE_SYN_RCVD; - send_syn(cm_node, 1); - } - if (cm_node->state == NES_CM_STATE_ONE_SIDE_ESTABLISHED) { - cm_node->state = NES_CM_STATE_ESTABLISHED; - /* send final handshake ACK */ - ret = send_ack(cm_node); - if (ret < 0) - return ret; +/* + * passive_open_err() will either do a reset() or will free up the skb and + * remove the cm_node. + */ - cm_node->state = NES_CM_STATE_MPAREQ_SENT; - ret = send_mpa_request(cm_node); - if (ret < 0) - return ret; - } +static void passive_open_err(struct nes_cm_node *cm_node, struct sk_buff *skb, + int reset) +{ + cleanup_retrans_entry(cm_node); + cm_node->state = NES_CM_STATE_CLOSED; + if (reset) { + nes_debug(NES_DBG_CM, "passive_open_err sending RST for " + "cm_node=%p state =%d\n", cm_node, cm_node->state); + send_reset(cm_node, skb); + } else { + dev_kfree_skb_any(skb); + rem_ref_cm_node(cm_node->cm_core, cm_node); } +} - if (tcph->fin) { - cm_node->tcp_cntxt.rcv_nxt++; - switch (cm_node->state) { - case NES_CM_STATE_SYN_RCVD: - case NES_CM_STATE_SYN_SENT: - case NES_CM_STATE_ONE_SIDE_ESTABLISHED: - case NES_CM_STATE_ESTABLISHED: - case NES_CM_STATE_ACCEPTING: - case NES_CM_STATE_MPAREQ_SENT: - cm_node->state = NES_CM_STATE_CLOSE_WAIT; - cm_node->state = NES_CM_STATE_LAST_ACK; - ret = send_fin(cm_node, NULL); - break; - case NES_CM_STATE_FIN_WAIT1: - cm_node->state = NES_CM_STATE_CLOSING; - ret = send_ack(cm_node); - break; - case NES_CM_STATE_FIN_WAIT2: - cm_node->state = NES_CM_STATE_TIME_WAIT; - cm_node->tcp_cntxt.loc_seq_num ++; - ret = send_ack(cm_node); - /* need to schedule this to happen in 2MSL timeouts */ - cm_node->state = NES_CM_STATE_CLOSED; - break; - case NES_CM_STATE_CLOSE_WAIT: - case NES_CM_STATE_LAST_ACK: - case NES_CM_STATE_CLOSING: - case NES_CM_STATE_TSA: - default: - nes_debug(NES_DBG_CM, "Received a fin while in %x state\n", - cm_node->state); - ret = -EINVAL; - break; - } +/* + * free_retrans_entry() routines assumes that the retrans_list_lock has + * been acquired before calling. + */ +static void free_retrans_entry(struct nes_cm_node *cm_node) +{ + struct nes_timer_entry *send_entry; + send_entry = cm_node->send_entry; + if (send_entry) { + cm_node->send_entry = NULL; + dev_kfree_skb_any(send_entry->skb); + kfree(send_entry); + rem_ref_cm_node(cm_node->cm_core, cm_node); } +} - if (datasize) { - u8 *dataloc = skb->data; - /* figure out what state we are in and handle transition to next state */ - switch (cm_node->state) { - case NES_CM_STATE_LISTENING: - case NES_CM_STATE_SYN_RCVD: - case NES_CM_STATE_SYN_SENT: - case NES_CM_STATE_FIN_WAIT1: - case NES_CM_STATE_FIN_WAIT2: - case NES_CM_STATE_CLOSE_WAIT: - case NES_CM_STATE_LAST_ACK: - case NES_CM_STATE_CLOSING: - break; - case NES_CM_STATE_MPAREQ_SENT: - /* recv the mpa res frame, ret=frame len (incl priv data) */ - ret = parse_mpa(cm_node, dataloc, datasize); - if (ret < 0) - break; - /* set the req frame payload len in skb */ - /* we are done handling this state, set node to a TSA state */ - cm_node->state = NES_CM_STATE_TSA; - send_ack(cm_node); - create_event(cm_node, NES_CM_EVENT_CONNECTED); - break; - - case NES_CM_STATE_ESTABLISHED: - /* we are expecting an MPA req frame */ - ret = parse_mpa(cm_node, dataloc, datasize); - if (ret < 0) { - break; - } - cm_node->state = NES_CM_STATE_TSA; - send_ack(cm_node); - /* we got a valid MPA request, create an event */ - create_event(cm_node, NES_CM_EVENT_MPA_REQ); - break; - case NES_CM_STATE_TSA: - handle_exception_pkt(cm_node, skb); - break; - case NES_CM_STATE_UNKNOWN: - case NES_CM_STATE_INITED: - default: - ret = -1; - } - } +static void cleanup_retrans_entry(struct nes_cm_node *cm_node) +{ + unsigned long flags; - return ret; + spin_lock_irqsave(&cm_node->retrans_list_lock, flags); + free_retrans_entry(cm_node); + spin_unlock_irqrestore(&cm_node->retrans_list_lock, flags); } +/** + * process_packet + * Returns skb if to be freed, else it will return NULL if already used.. + */ +static void process_packet(struct nes_cm_node *cm_node, struct sk_buff *skb, + struct nes_cm_core *cm_core) +{ + enum nes_tcpip_pkt_type pkt_type = NES_PKT_TYPE_UNKNOWN; + struct tcphdr *tcph = tcp_hdr(skb); + skb_pull(skb, ip_hdr(skb)->ihl << 2); + + nes_debug(NES_DBG_CM, "process_packet: cm_node=%p state =%d syn=%d " + "ack=%d rst=%d fin=%d\n", cm_node, cm_node->state, tcph->syn, + tcph->ack, tcph->rst, tcph->fin); + + if (tcph->rst) + pkt_type = NES_PKT_TYPE_RST; + else if (tcph->syn) { + pkt_type = NES_PKT_TYPE_SYN; + if (tcph->ack) + pkt_type = NES_PKT_TYPE_SYNACK; + } else if (tcph->fin) + pkt_type = NES_PKT_TYPE_FIN; + else if (tcph->ack) + pkt_type = NES_PKT_TYPE_ACK; + + switch (pkt_type) { + case NES_PKT_TYPE_SYN: + handle_syn_pkt(cm_node, skb, tcph); + break; + case NES_PKT_TYPE_SYNACK: + handle_synack_pkt(cm_node, skb, tcph); + break; + case NES_PKT_TYPE_ACK: + handle_ack_pkt(cm_node, skb, tcph); + break; + case NES_PKT_TYPE_RST: + handle_rst_pkt(cm_node, skb, tcph); + break; + case NES_PKT_TYPE_FIN: + handle_fin_pkt(cm_node, skb, tcph); + break; + default: + drop_packet(skb); + break; + } +} /** * mini_cm_listen - create a listen node with params */ static struct nes_cm_listener *mini_cm_listen(struct nes_cm_core *cm_core, - struct nes_vnic *nesvnic, struct nes_cm_info *cm_info) + struct nes_vnic *nesvnic, struct nes_cm_info *cm_info) { struct nes_cm_listener *listener; unsigned long flags; @@ -1645,36 +1849,35 @@ static struct nes_cm_listener *mini_cm_listen(struct nes_cm_core *cm_core, * mini_cm_connect - make a connection node with params */ static struct nes_cm_node *mini_cm_connect(struct nes_cm_core *cm_core, - struct nes_vnic *nesvnic, - struct ietf_mpa_frame *mpa_frame, - struct nes_cm_info *cm_info) + struct nes_vnic *nesvnic, u16 private_data_len, + void *private_data, struct nes_cm_info *cm_info) { int ret = 0; struct nes_cm_node *cm_node; struct nes_cm_listener *loopbackremotelistener; struct nes_cm_node *loopbackremotenode; struct nes_cm_info loopback_cm_info; - - u16 mpa_frame_size = sizeof(struct ietf_mpa_frame) + - ntohs(mpa_frame->priv_data_len); - - cm_info->loc_addr = htonl(cm_info->loc_addr); - cm_info->rem_addr = htonl(cm_info->rem_addr); - cm_info->loc_port = htons(cm_info->loc_port); - cm_info->rem_port = htons(cm_info->rem_port); + u16 mpa_frame_size = sizeof(struct ietf_mpa_frame) + private_data_len; + struct ietf_mpa_frame *mpa_frame = NULL; /* create a CM connection node */ cm_node = make_cm_node(cm_core, nesvnic, cm_info, NULL); if (!cm_node) return NULL; + mpa_frame = &cm_node->mpa_frame; + strcpy(mpa_frame->key, IEFT_MPA_KEY_REQ); + mpa_frame->flags = IETF_MPA_FLAGS_CRC; + mpa_frame->rev = IETF_MPA_VERSION; + mpa_frame->priv_data_len = htons(private_data_len); /* set our node side to client (active) side */ cm_node->tcp_cntxt.client = 1; cm_node->tcp_cntxt.rcv_wscale = NES_CM_DEFAULT_RCV_WND_SCALE; if (cm_info->loc_addr == cm_info->rem_addr) { - loopbackremotelistener = find_listener(cm_core, cm_node->rem_addr, - cm_node->rem_port, NES_CM_LISTENER_ACTIVE_STATE); + loopbackremotelistener = find_listener(cm_core, + ntohl(nesvnic->local_ipaddr), cm_node->rem_port, + NES_CM_LISTENER_ACTIVE_STATE); if (loopbackremotelistener == NULL) { create_event(cm_node, NES_CM_EVENT_ABORTED); } else { @@ -1683,26 +1886,35 @@ static struct nes_cm_node *mini_cm_connect(struct nes_cm_core *cm_core, loopback_cm_info.loc_port = cm_info->rem_port; loopback_cm_info.rem_port = cm_info->loc_port; loopback_cm_info.cm_id = loopbackremotelistener->cm_id; - loopbackremotenode = make_cm_node(cm_core, nesvnic, &loopback_cm_info, - loopbackremotelistener); + loopbackremotenode = make_cm_node(cm_core, nesvnic, + &loopback_cm_info, loopbackremotelistener); loopbackremotenode->loopbackpartner = cm_node; - loopbackremotenode->tcp_cntxt.rcv_wscale = NES_CM_DEFAULT_RCV_WND_SCALE; + loopbackremotenode->tcp_cntxt.rcv_wscale = + NES_CM_DEFAULT_RCV_WND_SCALE; cm_node->loopbackpartner = loopbackremotenode; - memcpy(loopbackremotenode->mpa_frame_buf, &mpa_frame->priv_data, - mpa_frame_size); - loopbackremotenode->mpa_frame_size = mpa_frame_size - - sizeof(struct ietf_mpa_frame); + memcpy(loopbackremotenode->mpa_frame_buf, private_data, + private_data_len); + loopbackremotenode->mpa_frame_size = private_data_len; - /* we are done handling this state, set node to a TSA state */ + /* we are done handling this state. */ + /* set node to a TSA state */ cm_node->state = NES_CM_STATE_TSA; - cm_node->tcp_cntxt.rcv_nxt = loopbackremotenode->tcp_cntxt.loc_seq_num; - loopbackremotenode->tcp_cntxt.rcv_nxt = cm_node->tcp_cntxt.loc_seq_num; - cm_node->tcp_cntxt.max_snd_wnd = loopbackremotenode->tcp_cntxt.rcv_wnd; - loopbackremotenode->tcp_cntxt.max_snd_wnd = cm_node->tcp_cntxt.rcv_wnd; - cm_node->tcp_cntxt.snd_wnd = loopbackremotenode->tcp_cntxt.rcv_wnd; - loopbackremotenode->tcp_cntxt.snd_wnd = cm_node->tcp_cntxt.rcv_wnd; - cm_node->tcp_cntxt.snd_wscale = loopbackremotenode->tcp_cntxt.rcv_wscale; - loopbackremotenode->tcp_cntxt.snd_wscale = cm_node->tcp_cntxt.rcv_wscale; + cm_node->tcp_cntxt.rcv_nxt = + loopbackremotenode->tcp_cntxt.loc_seq_num; + loopbackremotenode->tcp_cntxt.rcv_nxt = + cm_node->tcp_cntxt.loc_seq_num; + cm_node->tcp_cntxt.max_snd_wnd = + loopbackremotenode->tcp_cntxt.rcv_wnd; + loopbackremotenode->tcp_cntxt.max_snd_wnd = + cm_node->tcp_cntxt.rcv_wnd; + cm_node->tcp_cntxt.snd_wnd = + loopbackremotenode->tcp_cntxt.rcv_wnd; + loopbackremotenode->tcp_cntxt.snd_wnd = + cm_node->tcp_cntxt.rcv_wnd; + cm_node->tcp_cntxt.snd_wscale = + loopbackremotenode->tcp_cntxt.rcv_wscale; + loopbackremotenode->tcp_cntxt.snd_wscale = + cm_node->tcp_cntxt.rcv_wscale; create_event(loopbackremotenode, NES_CM_EVENT_MPA_REQ); } @@ -1712,16 +1924,29 @@ static struct nes_cm_node *mini_cm_connect(struct nes_cm_core *cm_core, /* set our node side to client (active) side */ cm_node->tcp_cntxt.client = 1; /* init our MPA frame ptr */ - memcpy(&cm_node->mpa_frame, mpa_frame, mpa_frame_size); + memcpy(mpa_frame->priv_data, private_data, private_data_len); + cm_node->mpa_frame_size = mpa_frame_size; /* send a syn and goto syn sent state */ cm_node->state = NES_CM_STATE_SYN_SENT; - ret = send_syn(cm_node, 0); + ret = send_syn(cm_node, 0, NULL); + + if (ret) { + /* error in sending the syn free up the cm_node struct */ + nes_debug(NES_DBG_CM, "Api - connect() FAILED: dest " + "addr=0x%08X, port=0x%04x, cm_node=%p, cm_id = %p.\n", + cm_node->rem_addr, cm_node->rem_port, cm_node, + cm_node->cm_id); + rem_ref_cm_node(cm_node->cm_core, cm_node); + cm_node = NULL; + } - nes_debug(NES_DBG_CM, "Api - connect(): dest addr=0x%08X, port=0x%04x," - " cm_node=%p, cm_id = %p.\n", - cm_node->rem_addr, cm_node->rem_port, cm_node, cm_node->cm_id); + if (cm_node) + nes_debug(NES_DBG_CM, "Api - connect(): dest addr=0x%08X," + "port=0x%04x, cm_node=%p, cm_id = %p.\n", + cm_node->rem_addr, cm_node->rem_port, cm_node, + cm_node->cm_id); return cm_node; } @@ -1731,8 +1956,8 @@ static struct nes_cm_node *mini_cm_connect(struct nes_cm_core *cm_core, * mini_cm_accept - accept a connection * This function is never called */ -static int mini_cm_accept(struct nes_cm_core *cm_core, struct ietf_mpa_frame *mpa_frame, - struct nes_cm_node *cm_node) +static int mini_cm_accept(struct nes_cm_core *cm_core, + struct ietf_mpa_frame *mpa_frame, struct nes_cm_node *cm_node) { return 0; } @@ -1742,32 +1967,19 @@ static int mini_cm_accept(struct nes_cm_core *cm_core, struct ietf_mpa_frame *mp * mini_cm_reject - reject and teardown a connection */ static int mini_cm_reject(struct nes_cm_core *cm_core, - struct ietf_mpa_frame *mpa_frame, - struct nes_cm_node *cm_node) + struct ietf_mpa_frame *mpa_frame, struct nes_cm_node *cm_node) { int ret = 0; - struct sk_buff *skb; - u16 mpa_frame_size = sizeof(struct ietf_mpa_frame) + - ntohs(mpa_frame->priv_data_len); - skb = get_free_pkt(cm_node); - if (!skb) { - nes_debug(NES_DBG_CM, "Failed to get a Free pkt\n"); - return -1; - } + nes_debug(NES_DBG_CM, "%s cm_node=%p type=%d state=%d\n", + __func__, cm_node, cm_node->tcp_cntxt.client, cm_node->state); - /* send an MPA Request frame */ - form_cm_frame(skb, cm_node, NULL, 0, mpa_frame, mpa_frame_size, SET_ACK | SET_FIN); - ret = schedule_nes_timer(cm_node, skb, NES_TIMER_TYPE_SEND, 1, 0); - - cm_node->state = NES_CM_STATE_CLOSED; - ret = send_fin(cm_node, NULL); - - if (ret < 0) { - printk(KERN_INFO PFX "failed to send MPA Reply (reject)\n"); + if (cm_node->tcp_cntxt.client) return ret; - } + cleanup_retrans_entry(cm_node); + cm_node->state = NES_CM_STATE_CLOSED; + ret = send_reset(cm_node, NULL); return ret; } @@ -1783,37 +1995,40 @@ static int mini_cm_close(struct nes_cm_core *cm_core, struct nes_cm_node *cm_nod return -EINVAL; switch (cm_node->state) { - /* if passed in node is null, create a reference key node for node search */ - /* check if we found an owner node for this pkt */ - case NES_CM_STATE_SYN_RCVD: - case NES_CM_STATE_SYN_SENT: - case NES_CM_STATE_ONE_SIDE_ESTABLISHED: - case NES_CM_STATE_ESTABLISHED: - case NES_CM_STATE_ACCEPTING: - case NES_CM_STATE_MPAREQ_SENT: - cm_node->state = NES_CM_STATE_FIN_WAIT1; - send_fin(cm_node, NULL); - break; - case NES_CM_STATE_CLOSE_WAIT: - cm_node->state = NES_CM_STATE_LAST_ACK; - send_fin(cm_node, NULL); - break; - case NES_CM_STATE_FIN_WAIT1: - case NES_CM_STATE_FIN_WAIT2: - case NES_CM_STATE_LAST_ACK: - case NES_CM_STATE_TIME_WAIT: - case NES_CM_STATE_CLOSING: - ret = -1; - break; - case NES_CM_STATE_LISTENING: - case NES_CM_STATE_UNKNOWN: - case NES_CM_STATE_INITED: - case NES_CM_STATE_CLOSED: - case NES_CM_STATE_TSA: - ret = rem_ref_cm_node(cm_core, cm_node); - break; + case NES_CM_STATE_SYN_RCVD: + case NES_CM_STATE_SYN_SENT: + case NES_CM_STATE_ONE_SIDE_ESTABLISHED: + case NES_CM_STATE_ESTABLISHED: + case NES_CM_STATE_ACCEPTING: + case NES_CM_STATE_MPAREQ_SENT: + case NES_CM_STATE_MPAREQ_RCVD: + cleanup_retrans_entry(cm_node); + send_reset(cm_node, NULL); + break; + case NES_CM_STATE_CLOSE_WAIT: + cm_node->state = NES_CM_STATE_LAST_ACK; + send_fin(cm_node, NULL); + break; + case NES_CM_STATE_FIN_WAIT1: + case NES_CM_STATE_FIN_WAIT2: + case NES_CM_STATE_LAST_ACK: + case NES_CM_STATE_TIME_WAIT: + case NES_CM_STATE_CLOSING: + ret = -1; + break; + case NES_CM_STATE_LISTENING: + case NES_CM_STATE_UNKNOWN: + case NES_CM_STATE_INITED: + case NES_CM_STATE_CLOSED: + ret = rem_ref_cm_node(cm_core, cm_node); + break; + case NES_CM_STATE_TSA: + if (cm_node->send_entry) + printk(KERN_ERR "ERROR Close got called from STATE_TSA " + "send_entry=%p\n", cm_node->send_entry); + ret = rem_ref_cm_node(cm_core, cm_node); + break; } - cm_node->cm_id = NULL; return ret; } @@ -1822,25 +2037,30 @@ static int mini_cm_close(struct nes_cm_core *cm_core, struct nes_cm_node *cm_nod * recv_pkt - recv an ETHERNET packet, and process it through CM * node state machine */ -static int mini_cm_recv_pkt(struct nes_cm_core *cm_core, struct nes_vnic *nesvnic, - struct sk_buff *skb) +static void mini_cm_recv_pkt(struct nes_cm_core *cm_core, + struct nes_vnic *nesvnic, struct sk_buff *skb) { struct nes_cm_node *cm_node = NULL; struct nes_cm_listener *listener = NULL; struct iphdr *iph; struct tcphdr *tcph; struct nes_cm_info nfo; - int ret = 0; - if (!skb || skb->len < sizeof(struct iphdr) + sizeof(struct tcphdr)) { - ret = -EINVAL; - goto out; + if (!skb) + return; + if (skb->len < sizeof(struct iphdr) + sizeof(struct tcphdr)) { + dev_kfree_skb_any(skb); + return; } iph = (struct iphdr *)skb->data; tcph = (struct tcphdr *)(skb->data + sizeof(struct iphdr)); skb_reset_network_header(skb); skb_set_transport_header(skb, sizeof(*tcph)); + if (!tcph) { + dev_kfree_skb_any(skb); + return; + } skb->len = ntohs(iph->tot_len); nfo.loc_addr = ntohl(iph->daddr); @@ -1853,61 +2073,60 @@ static int mini_cm_recv_pkt(struct nes_cm_core *cm_core, struct nes_vnic *nesvni NIPQUAD(iph->daddr), tcph->dest, NIPQUAD(iph->saddr), tcph->source); - /* note: this call is going to increment cm_node ref count */ - cm_node = find_node(cm_core, + do { + cm_node = find_node(cm_core, nfo.rem_port, nfo.rem_addr, nfo.loc_port, nfo.loc_addr); - if (!cm_node) { - listener = find_listener(cm_core, nfo.loc_addr, nfo.loc_port, - NES_CM_LISTENER_ACTIVE_STATE); - if (listener) { - nfo.cm_id = listener->cm_id; - nfo.conn_type = listener->conn_type; - } else { - nfo.cm_id = NULL; - nfo.conn_type = 0; - } - - cm_node = make_cm_node(cm_core, nesvnic, &nfo, listener); if (!cm_node) { - nes_debug(NES_DBG_CM, "Unable to allocate node\n"); + /* Only type of packet accepted are for */ + /* the PASSIVE open (syn only) */ + if ((!tcph->syn) || (tcph->ack)) { + cm_packets_dropped++; + break; + } + listener = find_listener(cm_core, nfo.loc_addr, + nfo.loc_port, + NES_CM_LISTENER_ACTIVE_STATE); if (listener) { - nes_debug(NES_DBG_CM, "unable to allocate node and decrementing listener refcount\n"); + nfo.cm_id = listener->cm_id; + nfo.conn_type = listener->conn_type; + } else { + nes_debug(NES_DBG_CM, "Unable to find listener " + "for the pkt\n"); + cm_packets_dropped++; + dev_kfree_skb_any(skb); + break; + } + + cm_node = make_cm_node(cm_core, nesvnic, &nfo, + listener); + if (!cm_node) { + nes_debug(NES_DBG_CM, "Unable to allocate " + "node\n"); + cm_packets_dropped++; atomic_dec(&listener->ref_count); + dev_kfree_skb_any(skb); + break; } - ret = -1; - goto out; - } - if (!listener) { - nes_debug(NES_DBG_CM, "Packet found for unknown port %x refcnt=%d\n", - nfo.loc_port, atomic_read(&cm_node->ref_count)); - if (!tcph->rst) { - nes_debug(NES_DBG_CM, "Packet found for unknown port=%d" - " rem_port=%d refcnt=%d\n", - nfo.loc_port, nfo.rem_port, atomic_read(&cm_node->ref_count)); - - cm_node->tcp_cntxt.rcv_nxt = ntohl(tcph->seq); - cm_node->tcp_cntxt.loc_seq_num = ntohl(tcph->ack_seq); - send_reset(cm_node); + if (!tcph->rst && !tcph->fin) { + cm_node->state = NES_CM_STATE_LISTENING; + } else { + cm_packets_dropped++; + rem_ref_cm_node(cm_core, cm_node); + dev_kfree_skb_any(skb); + break; } + add_ref_cm_node(cm_node); + } else if (cm_node->state == NES_CM_STATE_TSA) { rem_ref_cm_node(cm_core, cm_node); - ret = -1; - goto out; + atomic_inc(&cm_accel_dropped_pkts); + dev_kfree_skb_any(skb); + break; } - add_ref_cm_node(cm_node); - cm_node->state = NES_CM_STATE_LISTENING; - } - - nes_debug(NES_DBG_CM, "Processing Packet for node %p, data = (%p):\n", - cm_node, skb->data); - process_packet(cm_node, skb, cm_core); - - rem_ref_cm_node(cm_core, cm_node); - out: - if (skb) - dev_kfree_skb_any(skb); - return ret; + process_packet(cm_node, skb, cm_core); + rem_ref_cm_node(cm_core, cm_node); + } while (0); } @@ -2107,15 +2326,12 @@ int nes_cm_disconn(struct nes_qp *nesqp) if (nesqp->disconn_pending == 0) { nesqp->disconn_pending++; spin_unlock_irqrestore(&nesqp->lock, flags); - /* nes_add_ref(&nesqp->ibqp); */ /* init our disconnect work element, to */ INIT_WORK(&nesqp->disconn_work, nes_disconnect_worker); queue_work(g_cm_core->disconn_wq, &nesqp->disconn_work); - } else { + } else spin_unlock_irqrestore(&nesqp->lock, flags); - nes_rem_ref(&nesqp->ibqp); - } return 0; } @@ -2161,7 +2377,6 @@ static int nes_cm_disconn_true(struct nes_qp *nesqp) nes_debug(NES_DBG_CM, "QP%u disconnect_worker cmid is NULL\n", nesqp->hwqp.qp_id); spin_unlock_irqrestore(&nesqp->lock, flags); - nes_rem_ref(&nesqp->ibqp); return -1; } @@ -2184,28 +2399,30 @@ static int nes_cm_disconn_true(struct nes_qp *nesqp) if (last_ae == NES_AEQE_AEID_LLP_CONNECTION_RESET) { issued_disconnect_reset = 1; cm_event.status = IW_CM_EVENT_STATUS_RESET; - nes_debug(NES_DBG_CM, "Generating a CM Disconnect Event (status reset) for " - " QP%u, cm_id = %p. \n", - nesqp->hwqp.qp_id, cm_id); - } else { + nes_debug(NES_DBG_CM, "Generating a CM " + "Disconnect Event (status reset) for " + "QP%u, cm_id = %p. \n", + nesqp->hwqp.qp_id, cm_id); + } else cm_event.status = IW_CM_EVENT_STATUS_OK; - } cm_event.local_addr = cm_id->local_addr; cm_event.remote_addr = cm_id->remote_addr; cm_event.private_data = NULL; cm_event.private_data_len = 0; - nes_debug(NES_DBG_CM, "Generating a CM Disconnect Event for " - " QP%u, SQ Head = %u, SQ Tail = %u. cm_id = %p, refcount = %u.\n", - nesqp->hwqp.qp_id, - nesqp->hwqp.sq_head, nesqp->hwqp.sq_tail, cm_id, - atomic_read(&nesqp->refcount)); + nes_debug(NES_DBG_CM, "Generating a CM Disconnect Event" + " for QP%u, SQ Head = %u, SQ Tail = %u. " + "cm_id = %p, refcount = %u.\n", + nesqp->hwqp.qp_id, nesqp->hwqp.sq_head, + nesqp->hwqp.sq_tail, cm_id, + atomic_read(&nesqp->refcount)); spin_unlock_irqrestore(&nesqp->lock, flags); ret = cm_id->event_handler(cm_id, &cm_event); if (ret) - nes_debug(NES_DBG_CM, "OFA CM event_handler returned, ret=%d\n", ret); + nes_debug(NES_DBG_CM, "OFA CM event_handler " + "returned, ret=%d\n", ret); spin_lock_irqsave(&nesqp->lock, flags); } @@ -2247,31 +2464,24 @@ static int nes_cm_disconn_true(struct nes_qp *nesqp) if (nesqp->flush_issued == 0) { nesqp->flush_issued = 1; spin_unlock_irqrestore(&nesqp->lock, flags); - flush_wqes(nesvnic->nesdev, nesqp, NES_CQP_FLUSH_RQ, 1); - } else { + flush_wqes(nesvnic->nesdev, nesqp, + NES_CQP_FLUSH_RQ, 1); + } else spin_unlock_irqrestore(&nesqp->lock, flags); - } - - /* This reference is from either ModifyQP or the AE processing, - there is still a race here with modifyqp */ - nes_rem_ref(&nesqp->ibqp); - } else { cm_id = nesqp->cm_id; spin_unlock_irqrestore(&nesqp->lock, flags); /* check to see if the inbound reset beat the outbound reset */ if ((!cm_id) && (last_ae==NES_AEQE_AEID_RESET_SENT)) { - nes_debug(NES_DBG_CM, "QP%u: Decing refcount due to inbound reset" - " beating the outbound reset.\n", - nesqp->hwqp.qp_id); - nes_rem_ref(&nesqp->ibqp); + nes_debug(NES_DBG_CM, "QP%u: Decing refcount " + "due to inbound reset beating the " + "outbound reset.\n", nesqp->hwqp.qp_id); } } } else { nesqp->disconn_pending = 0; spin_unlock_irqrestore(&nesqp->lock, flags); } - nes_rem_ref(&nesqp->ibqp); return 0; } @@ -2313,7 +2523,6 @@ static int nes_disconnect(struct nes_qp *nesqp, int abrupt) nes_debug(NES_DBG_CM, "Call close API\n"); g_cm_core->api->close(g_cm_core, nesqp->cm_node); - nesqp->cm_node = NULL; } return ret; @@ -2349,71 +2558,82 @@ int nes_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) nesdev = nesvnic->nesdev; adapter = nesdev->nesadapter; - nes_debug(NES_DBG_CM, "nesvnic=%p, netdev=%p, %s\n", - nesvnic, nesvnic->netdev, nesvnic->netdev->name); - - /* since this is from a listen, we were able to put node handle into cm_id */ cm_node = (struct nes_cm_node *)cm_id->provider_data; + nes_debug(NES_DBG_CM, "nes_accept: cm_node= %p nesvnic=%p, netdev=%p," + "%s\n", cm_node, nesvnic, nesvnic->netdev, + nesvnic->netdev->name); /* associate the node with the QP */ nesqp->cm_node = (void *)cm_node; + cm_node->nesqp = nesqp; + nes_add_ref(&nesqp->ibqp); - nes_debug(NES_DBG_CM, "QP%u, cm_node=%p, jiffies = %lu\n", - nesqp->hwqp.qp_id, cm_node, jiffies); + nes_debug(NES_DBG_CM, "QP%u, cm_node=%p, jiffies = %lu listener = %p\n", + nesqp->hwqp.qp_id, cm_node, jiffies, cm_node->listener); atomic_inc(&cm_accepts); nes_debug(NES_DBG_CM, "netdev refcnt = %u.\n", atomic_read(&nesvnic->netdev->refcnt)); - /* allocate the ietf frame and space for private data */ - nesqp->ietf_frame = pci_alloc_consistent(nesdev->pcidev, - sizeof(struct ietf_mpa_frame) + conn_param->private_data_len, - &nesqp->ietf_frame_pbase); - - if (!nesqp->ietf_frame) { - nes_debug(NES_DBG_CM, "Unable to allocate memory for private data\n"); - return -ENOMEM; - } + /* allocate the ietf frame and space for private data */ + nesqp->ietf_frame = pci_alloc_consistent(nesdev->pcidev, + sizeof(struct ietf_mpa_frame) + conn_param->private_data_len, + &nesqp->ietf_frame_pbase); + if (!nesqp->ietf_frame) { + nes_debug(NES_DBG_CM, "Unable to allocate memory for private " + "data\n"); + return -ENOMEM; + } - /* setup the MPA frame */ - nesqp->private_data_len = conn_param->private_data_len; - memcpy(nesqp->ietf_frame->key, IEFT_MPA_KEY_REP, IETF_MPA_KEY_SIZE); - memcpy(nesqp->ietf_frame->priv_data, conn_param->private_data, - conn_param->private_data_len); + /* setup the MPA frame */ + nesqp->private_data_len = conn_param->private_data_len; + memcpy(nesqp->ietf_frame->key, IEFT_MPA_KEY_REP, IETF_MPA_KEY_SIZE); - nesqp->ietf_frame->priv_data_len = cpu_to_be16(conn_param->private_data_len); - nesqp->ietf_frame->rev = mpa_version; - nesqp->ietf_frame->flags = IETF_MPA_FLAGS_CRC; + memcpy(nesqp->ietf_frame->priv_data, conn_param->private_data, + conn_param->private_data_len); - /* setup our first outgoing iWarp send WQE (the IETF frame response) */ - wqe = &nesqp->hwqp.sq_vbase[0]; + nesqp->ietf_frame->priv_data_len = + cpu_to_be16(conn_param->private_data_len); + nesqp->ietf_frame->rev = mpa_version; + nesqp->ietf_frame->flags = IETF_MPA_FLAGS_CRC; - if (cm_id->remote_addr.sin_addr.s_addr != cm_id->local_addr.sin_addr.s_addr) { - u64temp = (unsigned long)nesqp; - u64temp |= NES_SW_CONTEXT_ALIGN>>1; - set_wqe_64bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_COMP_CTX_LOW_IDX, - u64temp); - wqe->wqe_words[NES_IWARP_SQ_WQE_MISC_IDX] = - cpu_to_le32(NES_IWARP_SQ_WQE_STREAMING | NES_IWARP_SQ_WQE_WRPDU); - wqe->wqe_words[NES_IWARP_SQ_WQE_TOTAL_PAYLOAD_IDX] = - cpu_to_le32(conn_param->private_data_len + sizeof(struct ietf_mpa_frame)); - wqe->wqe_words[NES_IWARP_SQ_WQE_FRAG0_LOW_IDX] = - cpu_to_le32((u32)nesqp->ietf_frame_pbase); - wqe->wqe_words[NES_IWARP_SQ_WQE_FRAG0_HIGH_IDX] = - cpu_to_le32((u32)((u64)nesqp->ietf_frame_pbase >> 32)); - wqe->wqe_words[NES_IWARP_SQ_WQE_LENGTH0_IDX] = - cpu_to_le32(conn_param->private_data_len + sizeof(struct ietf_mpa_frame)); - wqe->wqe_words[NES_IWARP_SQ_WQE_STAG0_IDX] = 0; - - nesqp->nesqp_context->ird_ord_sizes |= cpu_to_le32( - NES_QPCONTEXT_ORDIRD_LSMM_PRESENT | NES_QPCONTEXT_ORDIRD_WRPDU); - } else { - nesqp->nesqp_context->ird_ord_sizes |= cpu_to_le32((NES_QPCONTEXT_ORDIRD_LSMM_PRESENT | - NES_QPCONTEXT_ORDIRD_WRPDU | NES_QPCONTEXT_ORDIRD_ALSMM)); - } - nesqp->skip_lsmm = 1; + /* setup our first outgoing iWarp send WQE (the IETF frame response) */ + wqe = &nesqp->hwqp.sq_vbase[0]; + + if (cm_id->remote_addr.sin_addr.s_addr != + cm_id->local_addr.sin_addr.s_addr) { + u64temp = (unsigned long)nesqp; + u64temp |= NES_SW_CONTEXT_ALIGN>>1; + set_wqe_64bit_value(wqe->wqe_words, + NES_IWARP_SQ_WQE_COMP_CTX_LOW_IDX, + u64temp); + wqe->wqe_words[NES_IWARP_SQ_WQE_MISC_IDX] = + cpu_to_le32(NES_IWARP_SQ_WQE_STREAMING | + NES_IWARP_SQ_WQE_WRPDU); + wqe->wqe_words[NES_IWARP_SQ_WQE_TOTAL_PAYLOAD_IDX] = + cpu_to_le32(conn_param->private_data_len + + sizeof(struct ietf_mpa_frame)); + wqe->wqe_words[NES_IWARP_SQ_WQE_FRAG0_LOW_IDX] = + cpu_to_le32((u32)nesqp->ietf_frame_pbase); + wqe->wqe_words[NES_IWARP_SQ_WQE_FRAG0_HIGH_IDX] = + cpu_to_le32((u32)((u64)nesqp->ietf_frame_pbase >> 32)); + wqe->wqe_words[NES_IWARP_SQ_WQE_LENGTH0_IDX] = + cpu_to_le32(conn_param->private_data_len + + sizeof(struct ietf_mpa_frame)); + wqe->wqe_words[NES_IWARP_SQ_WQE_STAG0_IDX] = 0; + + nesqp->nesqp_context->ird_ord_sizes |= + cpu_to_le32(NES_QPCONTEXT_ORDIRD_LSMM_PRESENT | + NES_QPCONTEXT_ORDIRD_WRPDU); + } else { + nesqp->nesqp_context->ird_ord_sizes |= + cpu_to_le32((NES_QPCONTEXT_ORDIRD_LSMM_PRESENT | + NES_QPCONTEXT_ORDIRD_WRPDU | + NES_QPCONTEXT_ORDIRD_ALSMM)); + } + nesqp->skip_lsmm = 1; /* Cache the cm_id in the qp */ @@ -2424,55 +2644,75 @@ int nes_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) cm_id->provider_data = nesqp; nesqp->active_conn = 0; + if (cm_node->state == NES_CM_STATE_TSA) + nes_debug(NES_DBG_CM, "Already state = TSA for cm_node=%p\n", + cm_node); + nes_cm_init_tsa_conn(nesqp, cm_node); - nesqp->nesqp_context->tcpPorts[0] = cpu_to_le16(ntohs(cm_id->local_addr.sin_port)); - nesqp->nesqp_context->tcpPorts[1] = cpu_to_le16(ntohs(cm_id->remote_addr.sin_port)); - nesqp->nesqp_context->ip0 = cpu_to_le32(ntohl(cm_id->remote_addr.sin_addr.s_addr)); + nesqp->nesqp_context->tcpPorts[0] = + cpu_to_le16(ntohs(cm_id->local_addr.sin_port)); + nesqp->nesqp_context->tcpPorts[1] = + cpu_to_le16(ntohs(cm_id->remote_addr.sin_port)); + + if (ipv4_is_loopback(cm_id->remote_addr.sin_addr.s_addr)) + nesqp->nesqp_context->ip0 = + cpu_to_le32(ntohl(nesvnic->local_ipaddr)); + else + nesqp->nesqp_context->ip0 = + cpu_to_le32(ntohl(cm_id->remote_addr.sin_addr.s_addr)); nesqp->nesqp_context->misc2 |= cpu_to_le32( - (u32)PCI_FUNC(nesdev->pcidev->devfn) << NES_QPCONTEXT_MISC2_SRC_IP_SHIFT); + (u32)PCI_FUNC(nesdev->pcidev->devfn) << + NES_QPCONTEXT_MISC2_SRC_IP_SHIFT); - nesqp->nesqp_context->arp_index_vlan |= cpu_to_le32( - nes_arp_table(nesdev, le32_to_cpu(nesqp->nesqp_context->ip0), NULL, + nesqp->nesqp_context->arp_index_vlan |= + cpu_to_le32(nes_arp_table(nesdev, + le32_to_cpu(nesqp->nesqp_context->ip0), NULL, NES_ARP_RESOLVE) << 16); nesqp->nesqp_context->ts_val_delta = cpu_to_le32( - jiffies - nes_read_indexed(nesdev, NES_IDX_TCP_NOW)); + jiffies - nes_read_indexed(nesdev, NES_IDX_TCP_NOW)); nesqp->nesqp_context->ird_index = cpu_to_le32(nesqp->hwqp.qp_id); nesqp->nesqp_context->ird_ord_sizes |= cpu_to_le32( - ((u32)1 << NES_QPCONTEXT_ORDIRD_IWARP_MODE_SHIFT)); - nesqp->nesqp_context->ird_ord_sizes |= cpu_to_le32((u32)conn_param->ord); + ((u32)1 << NES_QPCONTEXT_ORDIRD_IWARP_MODE_SHIFT)); + nesqp->nesqp_context->ird_ord_sizes |= + cpu_to_le32((u32)conn_param->ord); memset(&nes_quad, 0, sizeof(nes_quad)); - nes_quad.DstIpAdrIndex = cpu_to_le32((u32)PCI_FUNC(nesdev->pcidev->devfn) << 24); - nes_quad.SrcIpadr = cm_id->remote_addr.sin_addr.s_addr; - nes_quad.TcpPorts[0] = cm_id->remote_addr.sin_port; - nes_quad.TcpPorts[1] = cm_id->local_addr.sin_port; + nes_quad.DstIpAdrIndex = + cpu_to_le32((u32)PCI_FUNC(nesdev->pcidev->devfn) << 24); + if (ipv4_is_loopback(cm_id->remote_addr.sin_addr.s_addr)) + nes_quad.SrcIpadr = nesvnic->local_ipaddr; + else + nes_quad.SrcIpadr = cm_id->remote_addr.sin_addr.s_addr; + nes_quad.TcpPorts[0] = cm_id->remote_addr.sin_port; + nes_quad.TcpPorts[1] = cm_id->local_addr.sin_port; /* Produce hash key */ crc_value = get_crc_value(&nes_quad); nesqp->hte_index = cpu_to_be32(crc_value ^ 0xffffffff); nes_debug(NES_DBG_CM, "HTE Index = 0x%08X, CRC = 0x%08X\n", - nesqp->hte_index, nesqp->hte_index & adapter->hte_index_mask); + nesqp->hte_index, nesqp->hte_index & adapter->hte_index_mask); nesqp->hte_index &= adapter->hte_index_mask; nesqp->nesqp_context->hte_index = cpu_to_le32(nesqp->hte_index); cm_node->cm_core->api->accelerated(cm_node->cm_core, cm_node); - nes_debug(NES_DBG_CM, "QP%u, Destination IP = 0x%08X:0x%04X, local = 0x%08X:0x%04X," - " rcv_nxt=0x%08X, snd_nxt=0x%08X, mpa + private data length=%zu.\n", - nesqp->hwqp.qp_id, + nes_debug(NES_DBG_CM, "QP%u, Destination IP = 0x%08X:0x%04X, local = " + "0x%08X:0x%04X, rcv_nxt=0x%08X, snd_nxt=0x%08X, mpa + " + "private data length=%zu.\n", nesqp->hwqp.qp_id, ntohl(cm_id->remote_addr.sin_addr.s_addr), ntohs(cm_id->remote_addr.sin_port), ntohl(cm_id->local_addr.sin_addr.s_addr), ntohs(cm_id->local_addr.sin_port), le32_to_cpu(nesqp->nesqp_context->rcv_nxt), le32_to_cpu(nesqp->nesqp_context->snd_nxt), - conn_param->private_data_len+sizeof(struct ietf_mpa_frame)); + conn_param->private_data_len + + sizeof(struct ietf_mpa_frame)); attr.qp_state = IB_QPS_RTS; nes_modify_qp(&nesqp->ibqp, &attr, IB_QP_STATE, NULL); @@ -2489,15 +2729,16 @@ int nes_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) cm_event.private_data_len = 0; ret = cm_id->event_handler(cm_id, &cm_event); if (cm_node->loopbackpartner) { - cm_node->loopbackpartner->mpa_frame_size = nesqp->private_data_len; + cm_node->loopbackpartner->mpa_frame_size = + nesqp->private_data_len; /* copy entire MPA frame to our cm_node's frame */ - memcpy(cm_node->loopbackpartner->mpa_frame_buf, nesqp->ietf_frame->priv_data, - nesqp->private_data_len); + memcpy(cm_node->loopbackpartner->mpa_frame_buf, + nesqp->ietf_frame->priv_data, nesqp->private_data_len); create_event(cm_node->loopbackpartner, NES_CM_EVENT_CONNECTED); } if (ret) - printk("%s[%u] OFA CM event_handler returned, ret=%d\n", - __func__, __LINE__, ret); + printk(KERN_ERR "%s[%u] OFA CM event_handler returned, " + "ret=%d\n", __func__, __LINE__, ret); return 0; } @@ -2555,74 +2796,62 @@ int nes_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) if (!nesdev) return -EINVAL; - atomic_inc(&cm_connects); - - nesqp->ietf_frame = kzalloc(sizeof(struct ietf_mpa_frame) + - conn_param->private_data_len, GFP_KERNEL); - if (!nesqp->ietf_frame) - return -ENOMEM; + nes_debug(NES_DBG_CM, "QP%u, current IP = 0x%08X, Destination IP = " + "0x%08X:0x%04X, local = 0x%08X:0x%04X.\n", nesqp->hwqp.qp_id, + ntohl(nesvnic->local_ipaddr), + ntohl(cm_id->remote_addr.sin_addr.s_addr), + ntohs(cm_id->remote_addr.sin_port), + ntohl(cm_id->local_addr.sin_addr.s_addr), + ntohs(cm_id->local_addr.sin_port)); - /* set qp as having an active connection */ + atomic_inc(&cm_connects); nesqp->active_conn = 1; - nes_debug(NES_DBG_CM, "QP%u, Destination IP = 0x%08X:0x%04X, local = 0x%08X:0x%04X.\n", - nesqp->hwqp.qp_id, - ntohl(cm_id->remote_addr.sin_addr.s_addr), - ntohs(cm_id->remote_addr.sin_port), - ntohl(cm_id->local_addr.sin_addr.s_addr), - ntohs(cm_id->local_addr.sin_port)); - /* cache the cm_id in the qp */ nesqp->cm_id = cm_id; cm_id->provider_data = nesqp; - /* copy the private data */ - if (conn_param->private_data_len) { - memcpy(nesqp->ietf_frame->priv_data, conn_param->private_data, - conn_param->private_data_len); - } - nesqp->private_data_len = conn_param->private_data_len; nesqp->nesqp_context->ird_ord_sizes |= cpu_to_le32((u32)conn_param->ord); nes_debug(NES_DBG_CM, "requested ord = 0x%08X.\n", (u32)conn_param->ord); - nes_debug(NES_DBG_CM, "mpa private data len =%u\n", conn_param->private_data_len); - - strcpy(&nesqp->ietf_frame->key[0], IEFT_MPA_KEY_REQ); - nesqp->ietf_frame->flags = IETF_MPA_FLAGS_CRC; - nesqp->ietf_frame->rev = IETF_MPA_VERSION; - nesqp->ietf_frame->priv_data_len = htons(conn_param->private_data_len); + nes_debug(NES_DBG_CM, "mpa private data len =%u\n", + conn_param->private_data_len); - if (cm_id->local_addr.sin_addr.s_addr != cm_id->remote_addr.sin_addr.s_addr) + if (cm_id->local_addr.sin_addr.s_addr != + cm_id->remote_addr.sin_addr.s_addr) nes_manage_apbvt(nesvnic, ntohs(cm_id->local_addr.sin_port), - PCI_FUNC(nesdev->pcidev->devfn), NES_MANAGE_APBVT_ADD); + PCI_FUNC(nesdev->pcidev->devfn), NES_MANAGE_APBVT_ADD); /* set up the connection params for the node */ - cm_info.loc_addr = (cm_id->local_addr.sin_addr.s_addr); - cm_info.loc_port = (cm_id->local_addr.sin_port); - cm_info.rem_addr = (cm_id->remote_addr.sin_addr.s_addr); - cm_info.rem_port = (cm_id->remote_addr.sin_port); + cm_info.loc_addr = htonl(cm_id->local_addr.sin_addr.s_addr); + cm_info.loc_port = htons(cm_id->local_addr.sin_port); + cm_info.rem_addr = htonl(cm_id->remote_addr.sin_addr.s_addr); + cm_info.rem_port = htons(cm_id->remote_addr.sin_port); cm_info.cm_id = cm_id; cm_info.conn_type = NES_CM_IWARP_CONN_TYPE; cm_id->add_ref(cm_id); - nes_add_ref(&nesqp->ibqp); /* create a connect CM node connection */ - cm_node = g_cm_core->api->connect(g_cm_core, nesvnic, nesqp->ietf_frame, &cm_info); + cm_node = g_cm_core->api->connect(g_cm_core, nesvnic, + conn_param->private_data_len, (void *)conn_param->private_data, + &cm_info); if (!cm_node) { - if (cm_id->local_addr.sin_addr.s_addr != cm_id->remote_addr.sin_addr.s_addr) + if (cm_id->local_addr.sin_addr.s_addr != + cm_id->remote_addr.sin_addr.s_addr) nes_manage_apbvt(nesvnic, ntohs(cm_id->local_addr.sin_port), - PCI_FUNC(nesdev->pcidev->devfn), NES_MANAGE_APBVT_DEL); - nes_rem_ref(&nesqp->ibqp); - kfree(nesqp->ietf_frame); - nesqp->ietf_frame = NULL; + PCI_FUNC(nesdev->pcidev->devfn), + NES_MANAGE_APBVT_DEL); + cm_id->rem_ref(cm_id); return -ENOMEM; } cm_node->apbvt_set = 1; nesqp->cm_node = cm_node; + cm_node->nesqp = nesqp; + nes_add_ref(&nesqp->ibqp); return 0; } @@ -2664,7 +2893,7 @@ int nes_create_listen(struct iw_cm_id *cm_id, int backlog) cm_node = g_cm_core->api->listen(g_cm_core, nesvnic, &cm_info); if (!cm_node) { - printk("%s[%u] Error returned from listen API call\n", + printk(KERN_ERR "%s[%u] Error returned from listen API call\n", __func__, __LINE__); return -ENOMEM; } @@ -2672,10 +2901,13 @@ int nes_create_listen(struct iw_cm_id *cm_id, int backlog) cm_id->provider_data = cm_node; if (!cm_node->reused_node) { - err = nes_manage_apbvt(nesvnic, ntohs(cm_id->local_addr.sin_port), - PCI_FUNC(nesvnic->nesdev->pcidev->devfn), NES_MANAGE_APBVT_ADD); + err = nes_manage_apbvt(nesvnic, + ntohs(cm_id->local_addr.sin_port), + PCI_FUNC(nesvnic->nesdev->pcidev->devfn), + NES_MANAGE_APBVT_ADD); if (err) { - printk("nes_manage_apbvt call returned %d.\n", err); + printk(KERN_ERR "nes_manage_apbvt call returned %d.\n", + err); g_cm_core->api->stop_listener(g_cm_core, (void *)cm_node); return err; } @@ -2795,53 +3027,70 @@ static void cm_event_connected(struct nes_cm_event *event) nes_cm_init_tsa_conn(nesqp, cm_node); /* set the QP tsa context */ - nesqp->nesqp_context->tcpPorts[0] = cpu_to_le16(ntohs(cm_id->local_addr.sin_port)); - nesqp->nesqp_context->tcpPorts[1] = cpu_to_le16(ntohs(cm_id->remote_addr.sin_port)); - nesqp->nesqp_context->ip0 = cpu_to_le32(ntohl(cm_id->remote_addr.sin_addr.s_addr)); + nesqp->nesqp_context->tcpPorts[0] = + cpu_to_le16(ntohs(cm_id->local_addr.sin_port)); + nesqp->nesqp_context->tcpPorts[1] = + cpu_to_le16(ntohs(cm_id->remote_addr.sin_port)); + if (ipv4_is_loopback(cm_id->remote_addr.sin_addr.s_addr)) + nesqp->nesqp_context->ip0 = + cpu_to_le32(ntohl(nesvnic->local_ipaddr)); + else + nesqp->nesqp_context->ip0 = + cpu_to_le32(ntohl(cm_id->remote_addr.sin_addr.s_addr)); nesqp->nesqp_context->misc2 |= cpu_to_le32( - (u32)PCI_FUNC(nesdev->pcidev->devfn) << NES_QPCONTEXT_MISC2_SRC_IP_SHIFT); + (u32)PCI_FUNC(nesdev->pcidev->devfn) << + NES_QPCONTEXT_MISC2_SRC_IP_SHIFT); nesqp->nesqp_context->arp_index_vlan |= cpu_to_le32( - nes_arp_table(nesdev, le32_to_cpu(nesqp->nesqp_context->ip0), + nes_arp_table(nesdev, + le32_to_cpu(nesqp->nesqp_context->ip0), NULL, NES_ARP_RESOLVE) << 16); nesqp->nesqp_context->ts_val_delta = cpu_to_le32( jiffies - nes_read_indexed(nesdev, NES_IDX_TCP_NOW)); nesqp->nesqp_context->ird_index = cpu_to_le32(nesqp->hwqp.qp_id); nesqp->nesqp_context->ird_ord_sizes |= - cpu_to_le32((u32)1 << NES_QPCONTEXT_ORDIRD_IWARP_MODE_SHIFT); + cpu_to_le32((u32)1 << + NES_QPCONTEXT_ORDIRD_IWARP_MODE_SHIFT); /* Adjust tail for not having a LSMM */ nesqp->hwqp.sq_tail = 1; #if defined(NES_SEND_FIRST_WRITE) - if (cm_node->send_write0) { - nes_debug(NES_DBG_CM, "Sending first write.\n"); - wqe = &nesqp->hwqp.sq_vbase[0]; - u64temp = (unsigned long)nesqp; - u64temp |= NES_SW_CONTEXT_ALIGN>>1; - set_wqe_64bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_COMP_CTX_LOW_IDX, - u64temp); - wqe->wqe_words[NES_IWARP_SQ_WQE_MISC_IDX] = cpu_to_le32(NES_IWARP_SQ_OP_RDMAW); - wqe->wqe_words[NES_IWARP_SQ_WQE_TOTAL_PAYLOAD_IDX] = 0; - wqe->wqe_words[NES_IWARP_SQ_WQE_FRAG0_LOW_IDX] = 0; - wqe->wqe_words[NES_IWARP_SQ_WQE_FRAG0_HIGH_IDX] = 0; - wqe->wqe_words[NES_IWARP_SQ_WQE_LENGTH0_IDX] = 0; - wqe->wqe_words[NES_IWARP_SQ_WQE_STAG0_IDX] = 0; - - /* use the reserved spot on the WQ for the extra first WQE */ - nesqp->nesqp_context->ird_ord_sizes &= cpu_to_le32(~(NES_QPCONTEXT_ORDIRD_LSMM_PRESENT | - NES_QPCONTEXT_ORDIRD_WRPDU | NES_QPCONTEXT_ORDIRD_ALSMM)); - nesqp->skip_lsmm = 1; - nesqp->hwqp.sq_tail = 0; - nes_write32(nesdev->regs + NES_WQE_ALLOC, - (1 << 24) | 0x00800000 | nesqp->hwqp.qp_id); - } + if (cm_node->send_write0) { + nes_debug(NES_DBG_CM, "Sending first write.\n"); + wqe = &nesqp->hwqp.sq_vbase[0]; + u64temp = (unsigned long)nesqp; + u64temp |= NES_SW_CONTEXT_ALIGN>>1; + set_wqe_64bit_value(wqe->wqe_words, + NES_IWARP_SQ_WQE_COMP_CTX_LOW_IDX, u64temp); + wqe->wqe_words[NES_IWARP_SQ_WQE_MISC_IDX] = + cpu_to_le32(NES_IWARP_SQ_OP_RDMAW); + wqe->wqe_words[NES_IWARP_SQ_WQE_TOTAL_PAYLOAD_IDX] = 0; + wqe->wqe_words[NES_IWARP_SQ_WQE_FRAG0_LOW_IDX] = 0; + wqe->wqe_words[NES_IWARP_SQ_WQE_FRAG0_HIGH_IDX] = 0; + wqe->wqe_words[NES_IWARP_SQ_WQE_LENGTH0_IDX] = 0; + wqe->wqe_words[NES_IWARP_SQ_WQE_STAG0_IDX] = 0; + + /* use the reserved spot on the WQ for the extra first WQE */ + nesqp->nesqp_context->ird_ord_sizes &= + cpu_to_le32(~(NES_QPCONTEXT_ORDIRD_LSMM_PRESENT | + NES_QPCONTEXT_ORDIRD_WRPDU | + NES_QPCONTEXT_ORDIRD_ALSMM)); + nesqp->skip_lsmm = 1; + nesqp->hwqp.sq_tail = 0; + nes_write32(nesdev->regs + NES_WQE_ALLOC, + (1 << 24) | 0x00800000 | nesqp->hwqp.qp_id); + } #endif memset(&nes_quad, 0, sizeof(nes_quad)); - nes_quad.DstIpAdrIndex = cpu_to_le32((u32)PCI_FUNC(nesdev->pcidev->devfn) << 24); - nes_quad.SrcIpadr = cm_id->remote_addr.sin_addr.s_addr; + nes_quad.DstIpAdrIndex = + cpu_to_le32((u32)PCI_FUNC(nesdev->pcidev->devfn) << 24); + if (ipv4_is_loopback(cm_id->remote_addr.sin_addr.s_addr)) + nes_quad.SrcIpadr = nesvnic->local_ipaddr; + else + nes_quad.SrcIpadr = cm_id->remote_addr.sin_addr.s_addr; nes_quad.TcpPorts[0] = cm_id->remote_addr.sin_port; nes_quad.TcpPorts[1] = cm_id->local_addr.sin_port; @@ -2858,10 +3107,6 @@ static void cm_event_connected(struct nes_cm_event *event) nesqp->private_data_len = (u8) cm_node->mpa_frame_size; cm_node->cm_core->api->accelerated(cm_node->cm_core, cm_node); - /* modify QP state to rts */ - attr.qp_state = IB_QPS_RTS; - nes_modify_qp(&nesqp->ibqp, &attr, IB_QP_STATE, NULL); - /* notify OF layer we successfully created the requested connection */ cm_event.event = IW_CM_EVENT_CONNECT_REPLY; cm_event.status = IW_CM_EVENT_STATUS_ACCEPTED; @@ -2870,20 +3115,21 @@ static void cm_event_connected(struct nes_cm_event *event) cm_event.local_addr.sin_port = cm_id->local_addr.sin_port; cm_event.remote_addr = cm_id->remote_addr; - cm_event.private_data = (void *)event->cm_node->mpa_frame_buf; - cm_event.private_data_len = (u8) event->cm_node->mpa_frame_size; + cm_event.private_data = (void *)event->cm_node->mpa_frame_buf; + cm_event.private_data_len = (u8) event->cm_node->mpa_frame_size; cm_event.local_addr.sin_addr.s_addr = event->cm_info.rem_addr; ret = cm_id->event_handler(cm_id, &cm_event); nes_debug(NES_DBG_CM, "OFA CM event_handler returned, ret=%d\n", ret); if (ret) - printk("%s[%u] OFA CM event_handler returned, ret=%d\n", - __func__, __LINE__, ret); - nes_debug(NES_DBG_CM, "Exiting connect thread for QP%u. jiffies = %lu\n", - nesqp->hwqp.qp_id, jiffies ); + printk(KERN_ERR "%s[%u] OFA CM event_handler returned, " + "ret=%d\n", __func__, __LINE__, ret); + attr.qp_state = IB_QPS_RTS; + nes_modify_qp(&nesqp->ibqp, &attr, IB_QP_STATE, NULL); - nes_rem_ref(&nesqp->ibqp); + nes_debug(NES_DBG_CM, "Exiting connect thread for QP%u. jiffies = " + "%lu\n", nesqp->hwqp.qp_id, jiffies); return; } @@ -2927,17 +3173,18 @@ static void cm_event_connect_error(struct nes_cm_event *event) cm_event.private_data = NULL; cm_event.private_data_len = 0; - nes_debug(NES_DBG_CM, "call CM_EVENT REJECTED, local_addr=%08x, remove_addr=%08x\n", - cm_event.local_addr.sin_addr.s_addr, cm_event.remote_addr.sin_addr.s_addr); + nes_debug(NES_DBG_CM, "call CM_EVENT REJECTED, local_addr=%08x, " + "remove_addr=%08x\n", cm_event.local_addr.sin_addr.s_addr, + cm_event.remote_addr.sin_addr.s_addr); ret = cm_id->event_handler(cm_id, &cm_event); nes_debug(NES_DBG_CM, "OFA CM event_handler returned, ret=%d\n", ret); if (ret) - printk("%s[%u] OFA CM event_handler returned, ret=%d\n", - __func__, __LINE__, ret); - nes_rem_ref(&nesqp->ibqp); - cm_id->rem_ref(cm_id); + printk(KERN_ERR "%s[%u] OFA CM event_handler returned, " + "ret=%d\n", __func__, __LINE__, ret); + cm_id->rem_ref(cm_id); + rem_ref_cm_node(event->cm_node->cm_core, event->cm_node); return; } @@ -3040,7 +3287,8 @@ static int nes_cm_post_event(struct nes_cm_event *event) add_ref_cm_node(event->cm_node); event->cm_info.cm_id->add_ref(event->cm_info.cm_id); INIT_WORK(&event->event_work, nes_cm_event_handler); - nes_debug(NES_DBG_CM, "queue_work, event=%p\n", event); + nes_debug(NES_DBG_CM, "cm_node=%p queue_work, event=%p\n", + event->cm_node, event); queue_work(event->cm_node->cm_core->event_wq, &event->event_work); @@ -3056,46 +3304,48 @@ static int nes_cm_post_event(struct nes_cm_event *event) */ static void nes_cm_event_handler(struct work_struct *work) { - struct nes_cm_event *event = container_of(work, struct nes_cm_event, event_work); + struct nes_cm_event *event = container_of(work, struct nes_cm_event, + event_work); struct nes_cm_core *cm_core; - if ((!event) || (!event->cm_node) || (!event->cm_node->cm_core)) { + if ((!event) || (!event->cm_node) || (!event->cm_node->cm_core)) return; - } + cm_core = event->cm_node->cm_core; nes_debug(NES_DBG_CM, "event=%p, event->type=%u, events posted=%u\n", - event, event->type, atomic_read(&cm_core->events_posted)); + event, event->type, atomic_read(&cm_core->events_posted)); switch (event->type) { - case NES_CM_EVENT_MPA_REQ: - cm_event_mpa_req(event); - nes_debug(NES_DBG_CM, "CM Event: MPA REQUEST\n"); + case NES_CM_EVENT_MPA_REQ: + cm_event_mpa_req(event); + nes_debug(NES_DBG_CM, "cm_node=%p CM Event: MPA REQUEST\n", + event->cm_node); + break; + case NES_CM_EVENT_RESET: + nes_debug(NES_DBG_CM, "cm_node = %p CM Event: RESET\n", + event->cm_node); + cm_event_reset(event); + break; + case NES_CM_EVENT_CONNECTED: + if ((!event->cm_node->cm_id) || + (event->cm_node->state != NES_CM_STATE_TSA)) break; - case NES_CM_EVENT_RESET: - nes_debug(NES_DBG_CM, "CM Event: RESET\n"); - cm_event_reset(event); - break; - case NES_CM_EVENT_CONNECTED: - if ((!event->cm_node->cm_id) || - (event->cm_node->state != NES_CM_STATE_TSA)) { - break; - } - cm_event_connected(event); - nes_debug(NES_DBG_CM, "CM Event: CONNECTED\n"); - break; - case NES_CM_EVENT_ABORTED: - if ((!event->cm_node->cm_id) || (event->cm_node->state == NES_CM_STATE_TSA)) { - break; - } - cm_event_connect_error(event); - nes_debug(NES_DBG_CM, "CM Event: ABORTED\n"); - break; - case NES_CM_EVENT_DROPPED_PKT: - nes_debug(NES_DBG_CM, "CM Event: DROPPED PKT\n"); - break; - default: - nes_debug(NES_DBG_CM, "CM Event: UNKNOWN EVENT TYPE\n"); + cm_event_connected(event); + nes_debug(NES_DBG_CM, "CM Event: CONNECTED\n"); + break; + case NES_CM_EVENT_ABORTED: + if ((!event->cm_node->cm_id) || + (event->cm_node->state == NES_CM_STATE_TSA)) break; + cm_event_connect_error(event); + nes_debug(NES_DBG_CM, "CM Event: ABORTED\n"); + break; + case NES_CM_EVENT_DROPPED_PKT: + nes_debug(NES_DBG_CM, "CM Event: DROPPED PKT\n"); + break; + default: + nes_debug(NES_DBG_CM, "CM Event: UNKNOWN EVENT TYPE\n"); + break; } atomic_dec(&cm_core->events_posted); diff --git a/drivers/infiniband/hw/nes/nes_cm.h b/drivers/infiniband/hw/nes/nes_cm.h index 7717cb2ab50..367b3d29014 100644 --- a/drivers/infiniband/hw/nes/nes_cm.h +++ b/drivers/infiniband/hw/nes/nes_cm.h @@ -83,6 +83,8 @@ enum nes_timer_type { #define SET_FIN 4 #define SET_RST 8 +#define TCP_OPTIONS_PADDING 3 + struct option_base { u8 optionnum; u8 length; @@ -177,6 +179,7 @@ enum nes_cm_node_state { NES_CM_STATE_ESTABLISHED, NES_CM_STATE_ACCEPTING, NES_CM_STATE_MPAREQ_SENT, + NES_CM_STATE_MPAREQ_RCVD, NES_CM_STATE_TSA, NES_CM_STATE_FIN_WAIT1, NES_CM_STATE_FIN_WAIT2, @@ -187,6 +190,16 @@ enum nes_cm_node_state { NES_CM_STATE_CLOSED }; +enum nes_tcpip_pkt_type { + NES_PKT_TYPE_UNKNOWN, + NES_PKT_TYPE_SYN, + NES_PKT_TYPE_SYNACK, + NES_PKT_TYPE_ACK, + NES_PKT_TYPE_FIN, + NES_PKT_TYPE_RST +}; + + /* type of nes connection */ enum nes_cm_conn_type { NES_CM_IWARP_CONN_TYPE, @@ -257,7 +270,9 @@ struct nes_cm_node { struct net_device *netdev; struct nes_cm_node *loopbackpartner; - struct list_head retrans_list; + + struct nes_timer_entry *send_entry; + spinlock_t retrans_list_lock; struct list_head recv_list; spinlock_t recv_list_lock; @@ -276,6 +291,8 @@ struct nes_cm_node { struct nes_vnic *nesvnic; int apbvt_set; int accept_pend; + int freed; + struct nes_qp *nesqp; }; /* structure for client or CM to fill when making CM api calls. */ @@ -366,14 +383,14 @@ struct nes_cm_ops { struct nes_cm_info *); int (*stop_listener)(struct nes_cm_core *, struct nes_cm_listener *); struct nes_cm_node * (*connect)(struct nes_cm_core *, - struct nes_vnic *, struct ietf_mpa_frame *, + struct nes_vnic *, u16, void *, struct nes_cm_info *); int (*close)(struct nes_cm_core *, struct nes_cm_node *); int (*accept)(struct nes_cm_core *, struct ietf_mpa_frame *, struct nes_cm_node *); int (*reject)(struct nes_cm_core *, struct ietf_mpa_frame *, struct nes_cm_node *); - int (*recv_pkt)(struct nes_cm_core *, struct nes_vnic *, + void (*recv_pkt)(struct nes_cm_core *, struct nes_vnic *, struct sk_buff *); int (*destroy_cm_core)(struct nes_cm_core *); int (*get)(struct nes_cm_core *); diff --git a/drivers/infiniband/hw/nes/nes_hw.c b/drivers/infiniband/hw/nes/nes_hw.c index 85f26d19a32..7c49cc882d7 100644 --- a/drivers/infiniband/hw/nes/nes_hw.c +++ b/drivers/infiniband/hw/nes/nes_hw.c @@ -55,18 +55,19 @@ u32 int_mod_cq_depth_24; u32 int_mod_cq_depth_16; u32 int_mod_cq_depth_4; u32 int_mod_cq_depth_1; - +static const u8 nes_max_critical_error_count = 100; #include "nes_cm.h" static void nes_cqp_ce_handler(struct nes_device *nesdev, struct nes_hw_cq *cq); static void nes_init_csr_ne020(struct nes_device *nesdev, u8 hw_rev, u8 port_count); static int nes_init_serdes(struct nes_device *nesdev, u8 hw_rev, u8 port_count, - u8 OneG_Mode); + struct nes_adapter *nesadapter, u8 OneG_Mode); static void nes_nic_napi_ce_handler(struct nes_device *nesdev, struct nes_hw_nic_cq *cq); static void nes_process_aeq(struct nes_device *nesdev, struct nes_hw_aeq *aeq); static void nes_process_ceq(struct nes_device *nesdev, struct nes_hw_ceq *ceq); static void nes_process_iwarp_aeqe(struct nes_device *nesdev, struct nes_hw_aeqe *aeqe); +static void process_critical_error(struct nes_device *nesdev); static void nes_process_mac_intr(struct nes_device *nesdev, u32 mac_number); static unsigned int nes_reset_adapter_ne020(struct nes_device *nesdev, u8 *OneG_Mode); @@ -222,11 +223,10 @@ static void nes_nic_tune_timer(struct nes_device *nesdev) } /* boundary checking */ - if (shared_timer->timer_in_use > NES_NIC_FAST_TIMER_HIGH) - shared_timer->timer_in_use = NES_NIC_FAST_TIMER_HIGH; - else if (shared_timer->timer_in_use < NES_NIC_FAST_TIMER_LOW) { - shared_timer->timer_in_use = NES_NIC_FAST_TIMER_LOW; - } + if (shared_timer->timer_in_use > shared_timer->threshold_high) + shared_timer->timer_in_use = shared_timer->threshold_high; + else if (shared_timer->timer_in_use < shared_timer->threshold_low) + shared_timer->timer_in_use = shared_timer->threshold_low; nesdev->currcq_count = 0; @@ -292,9 +292,6 @@ struct nes_adapter *nes_init_adapter(struct nes_device *nesdev, u8 hw_rev) { if ((port_count = nes_reset_adapter_ne020(nesdev, &OneG_Mode)) == 0) return NULL; - if (nes_init_serdes(nesdev, hw_rev, port_count, OneG_Mode)) - return NULL; - nes_init_csr_ne020(nesdev, hw_rev, port_count); max_qp = nes_read_indexed(nesdev, NES_IDX_QP_CTX_SIZE); nes_debug(NES_DBG_INIT, "QP_CTX_SIZE=%u\n", max_qp); @@ -353,6 +350,22 @@ struct nes_adapter *nes_init_adapter(struct nes_device *nesdev, u8 hw_rev) { nes_debug(NES_DBG_INIT, "Allocating new nesadapter @ %p, size = %u (actual size = %u).\n", nesadapter, (u32)sizeof(struct nes_adapter), adapter_size); + if (nes_read_eeprom_values(nesdev, nesadapter)) { + printk(KERN_ERR PFX "Unable to read EEPROM data.\n"); + kfree(nesadapter); + return NULL; + } + + if (nes_init_serdes(nesdev, hw_rev, port_count, nesadapter, + OneG_Mode)) { + kfree(nesadapter); + return NULL; + } + nes_init_csr_ne020(nesdev, hw_rev, port_count); + + memset(nesadapter->pft_mcast_map, 255, + sizeof nesadapter->pft_mcast_map); + /* populate the new nesadapter */ nesadapter->devfn = nesdev->pcidev->devfn; nesadapter->bus_number = nesdev->pcidev->bus->number; @@ -468,20 +481,25 @@ struct nes_adapter *nes_init_adapter(struct nes_device *nesdev, u8 hw_rev) { /* setup port configuration */ if (nesadapter->port_count == 1) { - u32temp = 0x00000000; + nesadapter->log_port = 0x00000000; if (nes_drv_opt & NES_DRV_OPT_DUAL_LOGICAL_PORT) nes_write_indexed(nesdev, NES_IDX_TX_POOL_SIZE, 0x00000002); else nes_write_indexed(nesdev, NES_IDX_TX_POOL_SIZE, 0x00000003); } else { - if (nesadapter->port_count == 2) - u32temp = 0x00000044; - else - u32temp = 0x000000e4; + if (nesadapter->phy_type[0] == NES_PHY_TYPE_PUMA_1G) { + nesadapter->log_port = 0x000000D8; + } else { + if (nesadapter->port_count == 2) + nesadapter->log_port = 0x00000044; + else + nesadapter->log_port = 0x000000e4; + } nes_write_indexed(nesdev, NES_IDX_TX_POOL_SIZE, 0x00000003); } - nes_write_indexed(nesdev, NES_IDX_NIC_LOGPORT_TO_PHYPORT, u32temp); + nes_write_indexed(nesdev, NES_IDX_NIC_LOGPORT_TO_PHYPORT, + nesadapter->log_port); nes_debug(NES_DBG_INIT, "Probe time, LOG2PHY=%u\n", nes_read_indexed(nesdev, NES_IDX_NIC_LOGPORT_TO_PHYPORT)); @@ -706,23 +724,43 @@ static unsigned int nes_reset_adapter_ne020(struct nes_device *nesdev, u8 *OneG_ * nes_init_serdes */ static int nes_init_serdes(struct nes_device *nesdev, u8 hw_rev, u8 port_count, - u8 OneG_Mode) + struct nes_adapter *nesadapter, u8 OneG_Mode) { int i; u32 u32temp; + u32 serdes_common_control; if (hw_rev != NE020_REV) { /* init serdes 0 */ nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_CDR_CONTROL0, 0x000000FF); - if (!OneG_Mode) + if (nesadapter->phy_type[0] == NES_PHY_TYPE_PUMA_1G) { + serdes_common_control = nes_read_indexed(nesdev, + NES_IDX_ETH_SERDES_COMMON_CONTROL0); + serdes_common_control |= 0x000000100; + nes_write_indexed(nesdev, + NES_IDX_ETH_SERDES_COMMON_CONTROL0, + serdes_common_control); + } else if (!OneG_Mode) { nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_TX_HIGHZ_LANE_MODE0, 0x11110000); - if (port_count > 1) { + } + if (((port_count > 1) && + (nesadapter->phy_type[0] != NES_PHY_TYPE_PUMA_1G)) || + ((port_count > 2) && + (nesadapter->phy_type[0] == NES_PHY_TYPE_PUMA_1G))) { /* init serdes 1 */ nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_CDR_CONTROL1, 0x000000FF); - if (!OneG_Mode) + if (nesadapter->phy_type[0] == NES_PHY_TYPE_PUMA_1G) { + serdes_common_control = nes_read_indexed(nesdev, + NES_IDX_ETH_SERDES_COMMON_CONTROL1); + serdes_common_control |= 0x000000100; + nes_write_indexed(nesdev, + NES_IDX_ETH_SERDES_COMMON_CONTROL1, + serdes_common_control); + } else if (!OneG_Mode) { nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_TX_HIGHZ_LANE_MODE1, 0x11110000); } + } } else { /* init serdes 0 */ nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_CONTROL0, 0x00000008); @@ -826,7 +864,8 @@ static void nes_init_csr_ne020(struct nes_device *nesdev, u8 hw_rev, u8 port_cou nes_write_indexed(nesdev, 0x00005000, 0x00018000); /* nes_write_indexed(nesdev, 0x00005000, 0x00010000); */ - nes_write_indexed(nesdev, 0x00005004, 0x00020001); + nes_write_indexed(nesdev, NES_IDX_WQM_CONFIG1, (wqm_quanta << 1) | + 0x00000001); nes_write_indexed(nesdev, 0x00005008, 0x1F1F1F1F); nes_write_indexed(nesdev, 0x00005010, 0x1F1F1F1F); nes_write_indexed(nesdev, 0x00005018, 0x1F1F1F1F); @@ -1226,6 +1265,7 @@ int nes_init_phy(struct nes_device *nesdev) if (nesadapter->phy_type[mac_index] == NES_PHY_TYPE_1G) { printk(PFX "%s: Programming mdc config for 1G\n", __func__); tx_config = nes_read_indexed(nesdev, NES_IDX_MAC_TX_CONFIG); + tx_config &= 0xFFFFFFE3; tx_config |= 0x04; nes_write_indexed(nesdev, NES_IDX_MAC_TX_CONFIG, tx_config); } @@ -1291,7 +1331,8 @@ int nes_init_phy(struct nes_device *nesdev) (nesadapter->phy_type[mac_index] == NES_PHY_TYPE_ARGUS)) { /* setup 10G MDIO operation */ tx_config = nes_read_indexed(nesdev, NES_IDX_MAC_TX_CONFIG); - tx_config |= 0x14; + tx_config &= 0xFFFFFFE3; + tx_config |= 0x15; nes_write_indexed(nesdev, NES_IDX_MAC_TX_CONFIG, tx_config); } if ((nesadapter->phy_type[mac_index] == NES_PHY_TYPE_ARGUS)) { @@ -1315,7 +1356,7 @@ int nes_init_phy(struct nes_device *nesdev) nes_write_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 0x1, 0xc319, 0x0008); nes_write_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 0x1, 0xc31a, 0x0098); nes_write_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 0x3, 0x0026, 0x0E00); - nes_write_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 0x3, 0x0027, 0x0000); + nes_write_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 0x3, 0x0027, 0x0001); nes_write_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 0x3, 0x0028, 0xA528); /* @@ -1759,9 +1800,14 @@ int nes_init_nic_qp(struct nes_device *nesdev, struct net_device *netdev) */ void nes_destroy_nic_qp(struct nes_vnic *nesvnic) { + u64 u64temp; + dma_addr_t bus_address; struct nes_device *nesdev = nesvnic->nesdev; struct nes_hw_cqp_wqe *cqp_wqe; + struct nes_hw_nic_sq_wqe *nic_sqe; struct nes_hw_nic_rq_wqe *nic_rqe; + __le16 *wqe_fragment_length; + u16 wqe_fragment_index; u64 wqe_frag; u32 cqp_head; unsigned long flags; @@ -1770,14 +1816,69 @@ void nes_destroy_nic_qp(struct nes_vnic *nesvnic) /* Free remaining NIC receive buffers */ while (nesvnic->nic.rq_head != nesvnic->nic.rq_tail) { nic_rqe = &nesvnic->nic.rq_vbase[nesvnic->nic.rq_tail]; - wqe_frag = (u64)le32_to_cpu(nic_rqe->wqe_words[NES_NIC_RQ_WQE_FRAG0_LOW_IDX]); - wqe_frag |= ((u64)le32_to_cpu(nic_rqe->wqe_words[NES_NIC_RQ_WQE_FRAG0_HIGH_IDX])) << 32; + wqe_frag = (u64)le32_to_cpu( + nic_rqe->wqe_words[NES_NIC_RQ_WQE_FRAG0_LOW_IDX]); + wqe_frag |= ((u64)le32_to_cpu( + nic_rqe->wqe_words[NES_NIC_RQ_WQE_FRAG0_HIGH_IDX]))<<32; pci_unmap_single(nesdev->pcidev, (dma_addr_t)wqe_frag, nesvnic->max_frame_size, PCI_DMA_FROMDEVICE); dev_kfree_skb(nesvnic->nic.rx_skb[nesvnic->nic.rq_tail++]); nesvnic->nic.rq_tail &= (nesvnic->nic.rq_size - 1); } + /* Free remaining NIC transmit buffers */ + while (nesvnic->nic.sq_head != nesvnic->nic.sq_tail) { + nic_sqe = &nesvnic->nic.sq_vbase[nesvnic->nic.sq_tail]; + wqe_fragment_index = 1; + wqe_fragment_length = (__le16 *) + &nic_sqe->wqe_words[NES_NIC_SQ_WQE_LENGTH_0_TAG_IDX]; + /* bump past the vlan tag */ + wqe_fragment_length++; + if (le16_to_cpu(wqe_fragment_length[wqe_fragment_index]) != 0) { + u64temp = (u64)le32_to_cpu( + nic_sqe->wqe_words[NES_NIC_SQ_WQE_FRAG0_LOW_IDX+ + wqe_fragment_index*2]); + u64temp += ((u64)le32_to_cpu( + nic_sqe->wqe_words[NES_NIC_SQ_WQE_FRAG0_HIGH_IDX + + wqe_fragment_index*2]))<<32; + bus_address = (dma_addr_t)u64temp; + if (test_and_clear_bit(nesvnic->nic.sq_tail, + nesvnic->nic.first_frag_overflow)) { + pci_unmap_single(nesdev->pcidev, + bus_address, + le16_to_cpu(wqe_fragment_length[ + wqe_fragment_index++]), + PCI_DMA_TODEVICE); + } + for (; wqe_fragment_index < 5; wqe_fragment_index++) { + if (wqe_fragment_length[wqe_fragment_index]) { + u64temp = le32_to_cpu( + nic_sqe->wqe_words[ + NES_NIC_SQ_WQE_FRAG0_LOW_IDX+ + wqe_fragment_index*2]); + u64temp += ((u64)le32_to_cpu( + nic_sqe->wqe_words[ + NES_NIC_SQ_WQE_FRAG0_HIGH_IDX+ + wqe_fragment_index*2]))<<32; + bus_address = (dma_addr_t)u64temp; + pci_unmap_page(nesdev->pcidev, + bus_address, + le16_to_cpu( + wqe_fragment_length[ + wqe_fragment_index]), + PCI_DMA_TODEVICE); + } else + break; + } + } + if (nesvnic->nic.tx_skb[nesvnic->nic.sq_tail]) + dev_kfree_skb( + nesvnic->nic.tx_skb[nesvnic->nic.sq_tail]); + + nesvnic->nic.sq_tail = (++nesvnic->nic.sq_tail) + & (nesvnic->nic.sq_size - 1); + } + spin_lock_irqsave(&nesdev->cqp.lock, flags); /* Destroy NIC QP */ @@ -1894,7 +1995,30 @@ int nes_napi_isr(struct nes_device *nesdev) } } - +static void process_critical_error(struct nes_device *nesdev) +{ + u32 debug_error; + u32 nes_idx_debug_error_masks0 = 0; + u16 error_module = 0; + + debug_error = nes_read_indexed(nesdev, NES_IDX_DEBUG_ERROR_CONTROL_STATUS); + printk(KERN_ERR PFX "Critical Error reported by device!!! 0x%02X\n", + (u16)debug_error); + nes_write_indexed(nesdev, NES_IDX_DEBUG_ERROR_CONTROL_STATUS, + 0x01010000 | (debug_error & 0x0000ffff)); + if (crit_err_count++ > 10) + nes_write_indexed(nesdev, NES_IDX_DEBUG_ERROR_MASKS1, 1 << 0x17); + error_module = (u16) (debug_error & 0x1F00) >> 8; + if (++nesdev->nesadapter->crit_error_count[error_module-1] >= + nes_max_critical_error_count) { + printk(KERN_ERR PFX "Masking off critical error for module " + "0x%02X\n", (u16)error_module); + nes_idx_debug_error_masks0 = nes_read_indexed(nesdev, + NES_IDX_DEBUG_ERROR_MASKS0); + nes_write_indexed(nesdev, NES_IDX_DEBUG_ERROR_MASKS0, + nes_idx_debug_error_masks0 | (1 << error_module)); + } +} /** * nes_dpc */ @@ -1909,7 +2033,6 @@ void nes_dpc(unsigned long param) u32 timer_stat; u32 temp_int_stat; u32 intf_int_stat; - u32 debug_error; u32 processed_intf_int = 0; u16 processed_timer_int = 0; u16 completion_ints = 0; @@ -1987,14 +2110,7 @@ void nes_dpc(unsigned long param) intf_int_stat = nes_read32(nesdev->regs+NES_INTF_INT_STAT); intf_int_stat &= nesdev->intf_int_req; if (NES_INTF_INT_CRITERR & intf_int_stat) { - debug_error = nes_read_indexed(nesdev, NES_IDX_DEBUG_ERROR_CONTROL_STATUS); - printk(KERN_ERR PFX "Critical Error reported by device!!! 0x%02X\n", - (u16)debug_error); - nes_write_indexed(nesdev, NES_IDX_DEBUG_ERROR_CONTROL_STATUS, - 0x01010000 | (debug_error & 0x0000ffff)); - /* BUG(); */ - if (crit_err_count++ > 10) - nes_write_indexed(nesdev, NES_IDX_DEBUG_ERROR_MASKS1, 1 << 0x17); + process_critical_error(nesdev); } if (NES_INTF_INT_PCIERR & intf_int_stat) { printk(KERN_ERR PFX "PCI Error reported by device!!!\n"); @@ -2258,7 +2374,8 @@ static void nes_process_mac_intr(struct nes_device *nesdev, u32 mac_number) spin_unlock_irqrestore(&nesadapter->phy_lock, flags); } /* read the PHY interrupt status register */ - if (nesadapter->OneG_Mode) { + if ((nesadapter->OneG_Mode) && + (nesadapter->phy_type[mac_index] != NES_PHY_TYPE_PUMA_1G)) { do { nes_read_1G_phy_reg(nesdev, 0x1a, nesadapter->phy_index[mac_index], &phy_data); @@ -2814,7 +2931,6 @@ static void nes_process_iwarp_aeqe(struct nes_device *nesdev, nesqp = *((struct nes_qp **)&context); if (atomic_inc_return(&nesqp->close_timer_started) == 1) { nesqp->cm_id->add_ref(nesqp->cm_id); - nes_add_ref(&nesqp->ibqp); schedule_nes_timer(nesqp->cm_node, (struct sk_buff *)nesqp, NES_TIMER_TYPE_CLOSE, 1, 0); nes_debug(NES_DBG_AEQ, "QP%u Not decrementing QP refcount (%d)," @@ -2838,7 +2954,6 @@ static void nes_process_iwarp_aeqe(struct nes_device *nesdev, if (async_event_id == NES_AEQE_AEID_RESET_SENT) { tcp_state = NES_AEQE_TCP_STATE_CLOSED; } - nes_add_ref(&nesqp->ibqp); spin_lock_irqsave(&nesqp->lock, flags); nesqp->hw_iwarp_state = iwarp_state; nesqp->hw_tcp_state = tcp_state; @@ -2876,7 +2991,6 @@ static void nes_process_iwarp_aeqe(struct nes_device *nesdev, } spin_unlock_irqrestore(&nesqp->lock, flags); if (next_iwarp_state) { - nes_add_ref(&nesqp->ibqp); nes_debug(NES_DBG_AEQ, "issuing hw modifyqp for QP%u. next state = 0x%08X," " also added another reference\n", nesqp->hwqp.qp_id, next_iwarp_state); @@ -2888,7 +3002,6 @@ static void nes_process_iwarp_aeqe(struct nes_device *nesdev, /* FIN Received but ib state not RTS, close complete will be on its way */ spin_unlock_irqrestore(&nesqp->lock, flags); - nes_rem_ref(&nesqp->ibqp); return; } spin_unlock_irqrestore(&nesqp->lock, flags); @@ -2922,7 +3035,6 @@ static void nes_process_iwarp_aeqe(struct nes_device *nesdev, if ((tcp_state == NES_AEQE_TCP_STATE_CLOSE_WAIT) || ((nesqp->ibqp_state == IB_QPS_RTS)&& (async_event_id == NES_AEQE_AEID_LLP_CONNECTION_RESET))) { - nes_add_ref(&nesqp->ibqp); nes_cm_disconn(nesqp); } else { nesqp->in_disconnect = 0; @@ -2931,7 +3043,6 @@ static void nes_process_iwarp_aeqe(struct nes_device *nesdev, break; case NES_AEQE_AEID_LLP_TOO_MANY_RETRIES: nesqp = *((struct nes_qp **)&context); - nes_add_ref(&nesqp->ibqp); spin_lock_irqsave(&nesqp->lock, flags); nesqp->hw_iwarp_state = NES_AEQE_IWARP_STATE_ERROR; nesqp->hw_tcp_state = NES_AEQE_TCP_STATE_CLOSED; @@ -3042,7 +3153,6 @@ static void nes_process_iwarp_aeqe(struct nes_device *nesdev, nesqp->ibqp.event_handler(&ibevent, nesqp->ibqp.qp_context); } /* tell cm to disconnect, cm will queue work to thread */ - nes_add_ref(&nesqp->ibqp); nes_cm_disconn(nesqp); break; case NES_AEQE_AEID_DDP_UBE_INVALID_MSN_NO_BUFFER_AVAILABLE: @@ -3062,7 +3172,6 @@ static void nes_process_iwarp_aeqe(struct nes_device *nesdev, nesqp->ibqp.event_handler(&ibevent, nesqp->ibqp.qp_context); } /* tell cm to disconnect, cm will queue work to thread */ - nes_add_ref(&nesqp->ibqp); nes_cm_disconn(nesqp); break; case NES_AEQE_AEID_LLP_RECEIVED_MPA_CRC_ERROR: @@ -3082,10 +3191,25 @@ static void nes_process_iwarp_aeqe(struct nes_device *nesdev, nesqp->ibqp.event_handler(&ibevent, nesqp->ibqp.qp_context); } /* tell cm to disconnect, cm will queue work to thread */ - nes_add_ref(&nesqp->ibqp); nes_cm_disconn(nesqp); break; /* TODO: additional AEs need to be here */ + case NES_AEQE_AEID_AMP_BOUNDS_VIOLATION: + nesqp = *((struct nes_qp **)&context); + spin_lock_irqsave(&nesqp->lock, flags); + nesqp->hw_iwarp_state = iwarp_state; + nesqp->hw_tcp_state = tcp_state; + nesqp->last_aeq = async_event_id; + spin_unlock_irqrestore(&nesqp->lock, flags); + if (nesqp->ibqp.event_handler) { + ibevent.device = nesqp->ibqp.device; + ibevent.element.qp = &nesqp->ibqp; + ibevent.event = IB_EVENT_QP_ACCESS_ERR; + nesqp->ibqp.event_handler(&ibevent, + nesqp->ibqp.qp_context); + } + nes_cm_disconn(nesqp); + break; default: nes_debug(NES_DBG_AEQ, "Processing an iWARP related AE for QP, misc = 0x%04X\n", async_event_id); diff --git a/drivers/infiniband/hw/nes/nes_hw.h b/drivers/infiniband/hw/nes/nes_hw.h index 7b81e0ae007..610b9d85959 100644 --- a/drivers/infiniband/hw/nes/nes_hw.h +++ b/drivers/infiniband/hw/nes/nes_hw.h @@ -156,6 +156,7 @@ enum indexed_regs { NES_IDX_ENDNODE0_NSTAT_TX_OCTETS_HI = 0x7004, NES_IDX_ENDNODE0_NSTAT_TX_FRAMES_LO = 0x7008, NES_IDX_ENDNODE0_NSTAT_TX_FRAMES_HI = 0x700c, + NES_IDX_WQM_CONFIG1 = 0x5004, NES_IDX_CM_CONFIG = 0x5100, NES_IDX_NIC_LOGPORT_TO_PHYPORT = 0x6000, NES_IDX_NIC_PHYPORT_TO_USW = 0x6008, @@ -967,6 +968,7 @@ struct nes_arp_entry { #define DEFAULT_JUMBO_NES_QL_TARGET 40 #define DEFAULT_JUMBO_NES_QL_HIGH 128 #define NES_NIC_CQ_DOWNWARD_TREND 16 +#define NES_PFT_SIZE 48 struct nes_hw_tune_timer { /* u16 cq_count; */ @@ -1079,6 +1081,7 @@ struct nes_adapter { u32 et_rx_max_coalesced_frames_high; u32 et_rate_sample_interval; u32 timer_int_limit; + u32 wqm_quanta; /* Adapter base MAC address */ u32 mac_addr_low; @@ -1094,12 +1097,14 @@ struct nes_adapter { u16 pd_config_base[4]; u16 link_interrupt_count[4]; + u8 crit_error_count[32]; /* the phy index for each port */ u8 phy_index[4]; u8 mac_sw_state[4]; u8 mac_link_down[4]; u8 phy_type[4]; + u8 log_port; /* PCI information */ unsigned int devfn; @@ -1113,6 +1118,7 @@ struct nes_adapter { u8 virtwq; u8 et_use_adaptive_rx_coalesce; u8 adapter_fcn_count; + u8 pft_mcast_map[NES_PFT_SIZE]; }; struct nes_pbl { diff --git a/drivers/infiniband/hw/nes/nes_nic.c b/drivers/infiniband/hw/nes/nes_nic.c index 1b0938c8777..730358637bb 100644 --- a/drivers/infiniband/hw/nes/nes_nic.c +++ b/drivers/infiniband/hw/nes/nes_nic.c @@ -91,6 +91,7 @@ static struct nic_qp_map *nic_qp_mapping_per_function[] = { static const u32 default_msg = NETIF_MSG_DRV | NETIF_MSG_PROBE | NETIF_MSG_LINK | NETIF_MSG_IFUP | NETIF_MSG_IFDOWN; static int debug = -1; +static int nics_per_function = 1; /** * nes_netdev_poll @@ -201,7 +202,8 @@ static int nes_netdev_open(struct net_device *netdev) nes_debug(NES_DBG_NETDEV, "i=%d, perfect filter table index= %d, PERF FILTER LOW" " (Addr:%08X) = %08X, HIGH = %08X.\n", i, nesvnic->qp_nic_index[i], - NES_IDX_PERFECT_FILTER_LOW+((nesvnic->perfect_filter_index + i) * 8), + NES_IDX_PERFECT_FILTER_LOW+ + (nesvnic->qp_nic_index[i] * 8), macaddr_low, (u32)macaddr_high | NES_MAC_ADDR_VALID | ((((u32)nesvnic->nic_index) << 16))); @@ -272,14 +274,18 @@ static int nes_netdev_stop(struct net_device *netdev) break; } - if (first_nesvnic->netdev_open == 0) + if ((first_nesvnic->netdev_open == 1) && (first_nesvnic != nesvnic) && + (PCI_FUNC(first_nesvnic->nesdev->pcidev->devfn) != + PCI_FUNC(nesvnic->nesdev->pcidev->devfn))) { + nes_write_indexed(nesdev, NES_IDX_MAC_INT_MASK+ + (0x200*nesdev->mac_index), 0xffffffff); + nes_write_indexed(first_nesvnic->nesdev, + NES_IDX_MAC_INT_MASK+ + (0x200*first_nesvnic->nesdev->mac_index), + ~(NES_MAC_INT_LINK_STAT_CHG | NES_MAC_INT_XGMII_EXT | + NES_MAC_INT_TX_UNDERFLOW | NES_MAC_INT_TX_ERROR)); + } else { nes_write_indexed(nesdev, NES_IDX_MAC_INT_MASK+(0x200*nesdev->mac_index), 0xffffffff); - else if ((first_nesvnic != nesvnic) && - (PCI_FUNC(first_nesvnic->nesdev->pcidev->devfn) != PCI_FUNC(nesvnic->nesdev->pcidev->devfn))) { - nes_write_indexed(nesdev, NES_IDX_MAC_INT_MASK + (0x200 * nesdev->mac_index), 0xffffffff); - nes_write_indexed(first_nesvnic->nesdev, NES_IDX_MAC_INT_MASK + (0x200 * first_nesvnic->nesdev->mac_index), - ~(NES_MAC_INT_LINK_STAT_CHG | NES_MAC_INT_XGMII_EXT | - NES_MAC_INT_TX_UNDERFLOW | NES_MAC_INT_TX_ERROR)); } nic_active_mask = ~((u32)(1 << nesvnic->nic_index)); @@ -437,7 +443,7 @@ static int nes_netdev_start_xmit(struct sk_buff *skb, struct net_device *netdev) struct nes_hw_nic_sq_wqe *nic_sqe; struct tcphdr *tcph; /* struct udphdr *udph; */ -#define NES_MAX_TSO_FRAGS 18 +#define NES_MAX_TSO_FRAGS MAX_SKB_FRAGS /* 64K segment plus overflow on each side */ dma_addr_t tso_bus_address[NES_MAX_TSO_FRAGS]; dma_addr_t bus_address; @@ -605,6 +611,8 @@ tso_sq_no_longer_full: wqe_fragment_length[wqe_fragment_index] = 0; set_wqe_64bit_value(nic_sqe->wqe_words, NES_NIC_SQ_WQE_FRAG1_LOW_IDX, bus_address); + tso_wqe_length += skb_headlen(skb) - + original_first_length; } while (wqe_fragment_index < 5) { wqe_fragment_length[wqe_fragment_index] = @@ -827,6 +835,7 @@ static void nes_netdev_set_multicast_list(struct net_device *netdev) { struct nes_vnic *nesvnic = netdev_priv(netdev); struct nes_device *nesdev = nesvnic->nesdev; + struct nes_adapter *nesadapter = nesvnic->nesdev->nesadapter; struct dev_mc_list *multicast_addr; u32 nic_active_bit; u32 nic_active; @@ -836,7 +845,12 @@ static void nes_netdev_set_multicast_list(struct net_device *netdev) u8 mc_all_on = 0; u8 mc_index; int mc_nic_index = -1; + u8 pft_entries_preallocated = max(nesadapter->adapter_fcn_count * + nics_per_function, 4); + u8 max_pft_entries_avaiable = NES_PFT_SIZE - pft_entries_preallocated; + unsigned long flags; + spin_lock_irqsave(&nesadapter->resource_lock, flags); nic_active_bit = 1 << nesvnic->nic_index; if (netdev->flags & IFF_PROMISC) { @@ -847,7 +861,7 @@ static void nes_netdev_set_multicast_list(struct net_device *netdev) nic_active |= nic_active_bit; nes_write_indexed(nesdev, NES_IDX_NIC_UNICAST_ALL, nic_active); mc_all_on = 1; - } else if ((netdev->flags & IFF_ALLMULTI) || (netdev->mc_count > NES_MULTICAST_PF_MAX) || + } else if ((netdev->flags & IFF_ALLMULTI) || (nesvnic->nic_index > 3)) { nic_active = nes_read_indexed(nesdev, NES_IDX_NIC_MULTICAST_ALL); nic_active |= nic_active_bit; @@ -866,17 +880,34 @@ static void nes_netdev_set_multicast_list(struct net_device *netdev) } nes_debug(NES_DBG_NIC_RX, "Number of MC entries = %d, Promiscous = %d, All Multicast = %d.\n", - netdev->mc_count, (netdev->flags & IFF_PROMISC)?1:0, - (netdev->flags & IFF_ALLMULTI)?1:0); + netdev->mc_count, !!(netdev->flags & IFF_PROMISC), + !!(netdev->flags & IFF_ALLMULTI)); if (!mc_all_on) { multicast_addr = netdev->mc_list; - perfect_filter_register_address = NES_IDX_PERFECT_FILTER_LOW + 0x80; - perfect_filter_register_address += nesvnic->nic_index*0x40; - for (mc_index=0; mc_index < NES_MULTICAST_PF_MAX; mc_index++) { - while (multicast_addr && nesvnic->mcrq_mcast_filter && ((mc_nic_index = nesvnic->mcrq_mcast_filter(nesvnic, multicast_addr->dmi_addr)) == 0)) + perfect_filter_register_address = NES_IDX_PERFECT_FILTER_LOW + + pft_entries_preallocated * 0x8; + for (mc_index = 0; mc_index < max_pft_entries_avaiable; + mc_index++) { + while (multicast_addr && nesvnic->mcrq_mcast_filter && + ((mc_nic_index = nesvnic->mcrq_mcast_filter(nesvnic, + multicast_addr->dmi_addr)) == 0)) { multicast_addr = multicast_addr->next; + } if (mc_nic_index < 0) mc_nic_index = nesvnic->nic_index; + while (nesadapter->pft_mcast_map[mc_index] < 16 && + nesadapter->pft_mcast_map[mc_index] != + nesvnic->nic_index && + mc_index < max_pft_entries_avaiable) { + nes_debug(NES_DBG_NIC_RX, + "mc_index=%d skipping nic_index=%d,\ + used for=%d \n", mc_index, + nesvnic->nic_index, + nesadapter->pft_mcast_map[mc_index]); + mc_index++; + } + if (mc_index >= max_pft_entries_avaiable) + break; if (multicast_addr) { DECLARE_MAC_BUF(mac); nes_debug(NES_DBG_NIC_RX, "Assigning MC Address %s to register 0x%04X nic_idx=%d\n", @@ -897,15 +928,33 @@ static void nes_netdev_set_multicast_list(struct net_device *netdev) (u32)macaddr_high | NES_MAC_ADDR_VALID | ((((u32)(1<<mc_nic_index)) << 16))); multicast_addr = multicast_addr->next; + nesadapter->pft_mcast_map[mc_index] = + nesvnic->nic_index; } else { nes_debug(NES_DBG_NIC_RX, "Clearing MC Address at register 0x%04X\n", perfect_filter_register_address+(mc_index * 8)); nes_write_indexed(nesdev, perfect_filter_register_address+4+(mc_index * 8), 0); + nesadapter->pft_mcast_map[mc_index] = 255; } } + /* PFT is not large enough */ + if (multicast_addr && multicast_addr->next) { + nic_active = nes_read_indexed(nesdev, + NES_IDX_NIC_MULTICAST_ALL); + nic_active |= nic_active_bit; + nes_write_indexed(nesdev, NES_IDX_NIC_MULTICAST_ALL, + nic_active); + nic_active = nes_read_indexed(nesdev, + NES_IDX_NIC_UNICAST_ALL); + nic_active &= ~nic_active_bit; + nes_write_indexed(nesdev, NES_IDX_NIC_UNICAST_ALL, + nic_active); + } } + + spin_unlock_irqrestore(&nesadapter->resource_lock, flags); } @@ -918,6 +967,10 @@ static int nes_netdev_change_mtu(struct net_device *netdev, int new_mtu) struct nes_device *nesdev = nesvnic->nesdev; int ret = 0; u8 jumbomode = 0; + u32 nic_active; + u32 nic_active_bit; + u32 uc_all_active; + u32 mc_all_active; if ((new_mtu < ETH_ZLEN) || (new_mtu > max_mtu)) return -EINVAL; @@ -931,8 +984,24 @@ static int nes_netdev_change_mtu(struct net_device *netdev, int new_mtu) nes_nic_init_timer_defaults(nesdev, jumbomode); if (netif_running(netdev)) { + nic_active_bit = 1 << nesvnic->nic_index; + mc_all_active = nes_read_indexed(nesdev, + NES_IDX_NIC_MULTICAST_ALL) & nic_active_bit; + uc_all_active = nes_read_indexed(nesdev, + NES_IDX_NIC_UNICAST_ALL) & nic_active_bit; + nes_netdev_stop(netdev); nes_netdev_open(netdev); + + nic_active = nes_read_indexed(nesdev, + NES_IDX_NIC_MULTICAST_ALL); + nic_active |= mc_all_active; + nes_write_indexed(nesdev, NES_IDX_NIC_MULTICAST_ALL, + nic_active); + + nic_active = nes_read_indexed(nesdev, NES_IDX_NIC_UNICAST_ALL); + nic_active |= uc_all_active; + nes_write_indexed(nesdev, NES_IDX_NIC_UNICAST_ALL, nic_active); } return ret; @@ -1208,10 +1277,12 @@ static void nes_netdev_get_drvinfo(struct net_device *netdev, struct ethtool_drvinfo *drvinfo) { struct nes_vnic *nesvnic = netdev_priv(netdev); + struct nes_adapter *nesadapter = nesvnic->nesdev->nesadapter; strcpy(drvinfo->driver, DRV_NAME); strcpy(drvinfo->bus_info, pci_name(nesvnic->nesdev->pcidev)); - strcpy(drvinfo->fw_version, "TBD"); + sprintf(drvinfo->fw_version, "%u.%u", nesadapter->firmware_version>>16, + nesadapter->firmware_version & 0x000000ff); strcpy(drvinfo->version, DRV_VERSION); drvinfo->n_stats = nes_netdev_get_stats_count(netdev); drvinfo->testinfo_len = 0; @@ -1587,7 +1658,9 @@ struct net_device *nes_netdev_init(struct nes_device *nesdev, nesvnic, (unsigned long)netdev->features, nesvnic->nic.qp_id, nesvnic->nic_index, nesvnic->logical_port, nesdev->mac_index); - if (nesvnic->nesdev->nesadapter->port_count == 1) { + if (nesvnic->nesdev->nesadapter->port_count == 1 && + nesvnic->nesdev->nesadapter->adapter_fcn_count == 1) { + nesvnic->qp_nic_index[0] = nesvnic->nic_index; nesvnic->qp_nic_index[1] = nesvnic->nic_index + 1; if (nes_drv_opt & NES_DRV_OPT_DUAL_LOGICAL_PORT) { @@ -1598,11 +1671,14 @@ struct net_device *nes_netdev_init(struct nes_device *nesdev, nesvnic->qp_nic_index[3] = nesvnic->nic_index + 3; } } else { - if (nesvnic->nesdev->nesadapter->port_count == 2) { - nesvnic->qp_nic_index[0] = nesvnic->nic_index; - nesvnic->qp_nic_index[1] = nesvnic->nic_index + 2; - nesvnic->qp_nic_index[2] = 0xf; - nesvnic->qp_nic_index[3] = 0xf; + if (nesvnic->nesdev->nesadapter->port_count == 2 || + (nesvnic->nesdev->nesadapter->port_count == 1 && + nesvnic->nesdev->nesadapter->adapter_fcn_count == 2)) { + nesvnic->qp_nic_index[0] = nesvnic->nic_index; + nesvnic->qp_nic_index[1] = nesvnic->nic_index + + 2; + nesvnic->qp_nic_index[2] = 0xf; + nesvnic->qp_nic_index[3] = 0xf; } else { nesvnic->qp_nic_index[0] = nesvnic->nic_index; nesvnic->qp_nic_index[1] = 0xf; diff --git a/drivers/infiniband/hw/nes/nes_verbs.c b/drivers/infiniband/hw/nes/nes_verbs.c index e3939d13484..932e56fcf77 100644 --- a/drivers/infiniband/hw/nes/nes_verbs.c +++ b/drivers/infiniband/hw/nes/nes_verbs.c @@ -1467,7 +1467,6 @@ static struct ib_qp *nes_create_qp(struct ib_pd *ibpd, default: nes_debug(NES_DBG_QP, "Invalid QP type: %d\n", init_attr->qp_type); return ERR_PTR(-EINVAL); - break; } /* update the QP table */ @@ -2498,7 +2497,6 @@ static struct ib_mr *nes_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, nes_debug(NES_DBG_MR, "Leaving, ibmr=%p", ibmr); return ibmr; - break; case IWNES_MEMREG_TYPE_QP: case IWNES_MEMREG_TYPE_CQ: nespbl = kzalloc(sizeof(*nespbl), GFP_KERNEL); @@ -2572,7 +2570,6 @@ static struct ib_mr *nes_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, nesmr->ibmr.lkey = -1; nesmr->mode = req.reg_type; return &nesmr->ibmr; - break; } return ERR_PTR(-ENOSYS); @@ -2867,7 +2864,6 @@ int nes_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, nesqp->hwqp.qp_id, attr->qp_state, nesqp->ibqp_state, nesqp->iwarp_state, atomic_read(&nesqp->refcount)); - nes_add_ref(&nesqp->ibqp); spin_lock_irqsave(&nesqp->lock, qplockflags); nes_debug(NES_DBG_MOD_QP, "QP%u: hw_iwarp_state=0x%X, hw_tcp_state=0x%X," @@ -2882,7 +2878,6 @@ int nes_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, nesqp->hwqp.qp_id); if (nesqp->iwarp_state > (u32)NES_CQP_QP_IWARP_STATE_IDLE) { spin_unlock_irqrestore(&nesqp->lock, qplockflags); - nes_rem_ref(&nesqp->ibqp); return -EINVAL; } next_iwarp_state = NES_CQP_QP_IWARP_STATE_IDLE; @@ -2893,7 +2888,6 @@ int nes_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, nesqp->hwqp.qp_id); if (nesqp->iwarp_state>(u32)NES_CQP_QP_IWARP_STATE_IDLE) { spin_unlock_irqrestore(&nesqp->lock, qplockflags); - nes_rem_ref(&nesqp->ibqp); return -EINVAL; } next_iwarp_state = NES_CQP_QP_IWARP_STATE_IDLE; @@ -2904,14 +2898,12 @@ int nes_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, nesqp->hwqp.qp_id); if (nesqp->iwarp_state>(u32)NES_CQP_QP_IWARP_STATE_RTS) { spin_unlock_irqrestore(&nesqp->lock, qplockflags); - nes_rem_ref(&nesqp->ibqp); return -EINVAL; } if (nesqp->cm_id == NULL) { nes_debug(NES_DBG_MOD_QP, "QP%u: Failing attempt to move QP to RTS without a CM_ID. \n", nesqp->hwqp.qp_id ); spin_unlock_irqrestore(&nesqp->lock, qplockflags); - nes_rem_ref(&nesqp->ibqp); return -EINVAL; } next_iwarp_state = NES_CQP_QP_IWARP_STATE_RTS; @@ -2929,7 +2921,6 @@ int nes_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, nesqp->hwqp.qp_id, nesqp->hwqp.sq_head, nesqp->hwqp.sq_tail); if (nesqp->iwarp_state == (u32)NES_CQP_QP_IWARP_STATE_CLOSING) { spin_unlock_irqrestore(&nesqp->lock, qplockflags); - nes_rem_ref(&nesqp->ibqp); return 0; } else { if (nesqp->iwarp_state > (u32)NES_CQP_QP_IWARP_STATE_CLOSING) { @@ -2937,7 +2928,6 @@ int nes_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, " ignored due to current iWARP state\n", nesqp->hwqp.qp_id); spin_unlock_irqrestore(&nesqp->lock, qplockflags); - nes_rem_ref(&nesqp->ibqp); return -EINVAL; } if (nesqp->hw_iwarp_state != NES_AEQE_IWARP_STATE_RTS) { @@ -2969,7 +2959,6 @@ int nes_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, nesqp->hwqp.qp_id); if (nesqp->iwarp_state>=(u32)NES_CQP_QP_IWARP_STATE_TERMINATE) { spin_unlock_irqrestore(&nesqp->lock, qplockflags); - nes_rem_ref(&nesqp->ibqp); return -EINVAL; } /* next_iwarp_state = (NES_CQP_QP_IWARP_STATE_TERMINATE | 0x02000000); */ @@ -2982,7 +2971,6 @@ int nes_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, case IB_QPS_RESET: if (nesqp->iwarp_state == (u32)NES_CQP_QP_IWARP_STATE_ERROR) { spin_unlock_irqrestore(&nesqp->lock, qplockflags); - nes_rem_ref(&nesqp->ibqp); return -EINVAL; } nes_debug(NES_DBG_MOD_QP, "QP%u: new state = error\n", @@ -3008,7 +2996,6 @@ int nes_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, break; default: spin_unlock_irqrestore(&nesqp->lock, qplockflags); - nes_rem_ref(&nesqp->ibqp); return -EINVAL; break; } @@ -3088,7 +3075,6 @@ int nes_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, nesqp->hwqp.qp_id, atomic_read(&nesqp->refcount), original_last_aeq, nesqp->last_aeq); /* this one is for the cm_disconnect thread */ - nes_add_ref(&nesqp->ibqp); spin_lock_irqsave(&nesqp->lock, qplockflags); nesqp->hw_tcp_state = NES_AEQE_TCP_STATE_CLOSED; nesqp->last_aeq = NES_AEQE_AEID_RESET_SENT; @@ -3097,14 +3083,12 @@ int nes_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, } else { nes_debug(NES_DBG_MOD_QP, "QP%u No fake disconnect, QP refcount=%d\n", nesqp->hwqp.qp_id, atomic_read(&nesqp->refcount)); - nes_rem_ref(&nesqp->ibqp); } } else { spin_lock_irqsave(&nesqp->lock, qplockflags); if (nesqp->cm_id) { /* These two are for the timer thread */ if (atomic_inc_return(&nesqp->close_timer_started) == 1) { - nes_add_ref(&nesqp->ibqp); nesqp->cm_id->add_ref(nesqp->cm_id); nes_debug(NES_DBG_MOD_QP, "QP%u Not decrementing QP refcount (%d)," " need ae to finish up, original_last_aeq = 0x%04X." @@ -3128,14 +3112,12 @@ int nes_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, " original_last_aeq = 0x%04X. last_aeq = 0x%04X.\n", nesqp->hwqp.qp_id, atomic_read(&nesqp->refcount), original_last_aeq, nesqp->last_aeq); - nes_rem_ref(&nesqp->ibqp); } } else { nes_debug(NES_DBG_MOD_QP, "QP%u Decrementing QP refcount (%d), No ae to finish up," " original_last_aeq = 0x%04X. last_aeq = 0x%04X.\n", nesqp->hwqp.qp_id, atomic_read(&nesqp->refcount), original_last_aeq, nesqp->last_aeq); - nes_rem_ref(&nesqp->ibqp); } err = 0; diff --git a/drivers/infiniband/ulp/ipoib/Kconfig b/drivers/infiniband/ulp/ipoib/Kconfig index 691525cf394..9d9a9dc51f1 100644 --- a/drivers/infiniband/ulp/ipoib/Kconfig +++ b/drivers/infiniband/ulp/ipoib/Kconfig @@ -11,16 +11,17 @@ config INFINIBAND_IPOIB config INFINIBAND_IPOIB_CM bool "IP-over-InfiniBand Connected Mode support" - depends on INFINIBAND_IPOIB && EXPERIMENTAL + depends on INFINIBAND_IPOIB default n ---help--- - This option enables experimental support for IPoIB connected mode. - After enabling this option, you need to switch to connected mode through - /sys/class/net/ibXXX/mode to actually create connections, and then increase - the interface MTU with e.g. ifconfig ib0 mtu 65520. + This option enables support for IPoIB connected mode. After + enabling this option, you need to switch to connected mode + through /sys/class/net/ibXXX/mode to actually create + connections, and then increase the interface MTU with + e.g. ifconfig ib0 mtu 65520. - WARNING: Enabling connected mode will trigger some - packet drops for multicast and UD mode traffic from this interface, + WARNING: Enabling connected mode will trigger some packet + drops for multicast and UD mode traffic from this interface, unless you limit mtu for these destinations to 2044. config INFINIBAND_IPOIB_DEBUG @@ -33,9 +34,10 @@ config INFINIBAND_IPOIB_DEBUG debug_level and mcast_debug_level module parameters (which can also be set after the driver is loaded through sysfs). - This option also creates an "ipoib_debugfs," which can be - mounted to expose debugging information about IB multicast - groups used by the IPoIB driver. + This option also creates a directory tree under ipoib/ in + debugfs, which contains files that expose debugging + information about IB multicast groups used by the IPoIB + driver. config INFINIBAND_IPOIB_DEBUG_DATA bool "IP-over-InfiniBand data path debugging" diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h index b0ffc9abe8c..68ba5c3482e 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib.h +++ b/drivers/infiniband/ulp/ipoib/ipoib.h @@ -268,10 +268,9 @@ struct ipoib_lro { }; /* - * Device private locking: tx_lock protects members used in TX fast - * path (and we use LLTX so upper layers don't do extra locking). - * lock protects everything else. lock nests inside of tx_lock (ie - * tx_lock must be acquired first if needed). + * Device private locking: network stack tx_lock protects members used + * in TX fast path, lock protects everything else. lock nests inside + * of tx_lock (ie tx_lock must be acquired first if needed). */ struct ipoib_dev_priv { spinlock_t lock; @@ -293,6 +292,7 @@ struct ipoib_dev_priv { struct delayed_work pkey_poll_task; struct delayed_work mcast_task; + struct work_struct carrier_on_task; struct work_struct flush_light; struct work_struct flush_normal; struct work_struct flush_heavy; @@ -319,7 +319,6 @@ struct ipoib_dev_priv { struct ipoib_rx_buf *rx_ring; - spinlock_t tx_lock; struct ipoib_tx_buf *tx_ring; unsigned tx_head; unsigned tx_tail; @@ -464,6 +463,7 @@ int ipoib_dev_init(struct net_device *dev, struct ib_device *ca, int port); void ipoib_dev_cleanup(struct net_device *dev); void ipoib_mcast_join_task(struct work_struct *work); +void ipoib_mcast_carrier_on_task(struct work_struct *work); void ipoib_mcast_send(struct net_device *dev, void *mgid, struct sk_buff *skb); void ipoib_mcast_restart_task(struct work_struct *work); diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c index 0f2d3045061..7b14c2c3950 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c @@ -202,7 +202,7 @@ static void ipoib_cm_free_rx_ring(struct net_device *dev, dev_kfree_skb_any(rx_ring[i].skb); } - kfree(rx_ring); + vfree(rx_ring); } static void ipoib_cm_start_rx_drain(struct ipoib_dev_priv *priv) @@ -337,7 +337,7 @@ static void ipoib_cm_init_rx_wr(struct net_device *dev, sge[i].length = PAGE_SIZE; wr->next = NULL; - wr->sg_list = priv->cm.rx_sge; + wr->sg_list = sge; wr->num_sge = priv->cm.num_frags; } @@ -352,9 +352,14 @@ static int ipoib_cm_nonsrq_init_rx(struct net_device *dev, struct ib_cm_id *cm_i int ret; int i; - rx->rx_ring = kcalloc(ipoib_recvq_size, sizeof *rx->rx_ring, GFP_KERNEL); - if (!rx->rx_ring) + rx->rx_ring = vmalloc(ipoib_recvq_size * sizeof *rx->rx_ring); + if (!rx->rx_ring) { + printk(KERN_WARNING "%s: failed to allocate CM non-SRQ ring (%d entries)\n", + priv->ca->name, ipoib_recvq_size); return -ENOMEM; + } + + memset(rx->rx_ring, 0, ipoib_recvq_size * sizeof *rx->rx_ring); t = kmalloc(sizeof *t, GFP_KERNEL); if (!t) { @@ -781,7 +786,8 @@ void ipoib_cm_handle_tx_wc(struct net_device *dev, struct ib_wc *wc) dev_kfree_skb_any(tx_req->skb); - spin_lock_irqsave(&priv->tx_lock, flags); + netif_tx_lock(dev); + ++tx->tx_tail; if (unlikely(--priv->tx_outstanding == ipoib_sendq_size >> 1) && netif_queue_stopped(dev) && @@ -796,7 +802,7 @@ void ipoib_cm_handle_tx_wc(struct net_device *dev, struct ib_wc *wc) "(status=%d, wrid=%d vend_err %x)\n", wc->status, wr_id, wc->vendor_err); - spin_lock(&priv->lock); + spin_lock_irqsave(&priv->lock, flags); neigh = tx->neigh; if (neigh) { @@ -816,10 +822,10 @@ void ipoib_cm_handle_tx_wc(struct net_device *dev, struct ib_wc *wc) clear_bit(IPOIB_FLAG_OPER_UP, &tx->flags); - spin_unlock(&priv->lock); + spin_unlock_irqrestore(&priv->lock, flags); } - spin_unlock_irqrestore(&priv->tx_lock, flags); + netif_tx_unlock(dev); } int ipoib_cm_dev_open(struct net_device *dev) @@ -1144,7 +1150,6 @@ static void ipoib_cm_tx_destroy(struct ipoib_cm_tx *p) { struct ipoib_dev_priv *priv = netdev_priv(p->dev); struct ipoib_cm_tx_buf *tx_req; - unsigned long flags; unsigned long begin; ipoib_dbg(priv, "Destroy active connection 0x%x head 0x%x tail 0x%x\n", @@ -1175,12 +1180,12 @@ timeout: DMA_TO_DEVICE); dev_kfree_skb_any(tx_req->skb); ++p->tx_tail; - spin_lock_irqsave(&priv->tx_lock, flags); + netif_tx_lock_bh(p->dev); if (unlikely(--priv->tx_outstanding == ipoib_sendq_size >> 1) && netif_queue_stopped(p->dev) && test_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags)) netif_wake_queue(p->dev); - spin_unlock_irqrestore(&priv->tx_lock, flags); + netif_tx_unlock_bh(p->dev); } if (p->qp) @@ -1197,6 +1202,7 @@ static int ipoib_cm_tx_handler(struct ib_cm_id *cm_id, struct ipoib_dev_priv *priv = netdev_priv(tx->dev); struct net_device *dev = priv->dev; struct ipoib_neigh *neigh; + unsigned long flags; int ret; switch (event->event) { @@ -1215,8 +1221,8 @@ static int ipoib_cm_tx_handler(struct ib_cm_id *cm_id, case IB_CM_REJ_RECEIVED: case IB_CM_TIMEWAIT_EXIT: ipoib_dbg(priv, "CM error %d.\n", event->event); - spin_lock_irq(&priv->tx_lock); - spin_lock(&priv->lock); + netif_tx_lock_bh(dev); + spin_lock_irqsave(&priv->lock, flags); neigh = tx->neigh; if (neigh) { @@ -1234,8 +1240,8 @@ static int ipoib_cm_tx_handler(struct ib_cm_id *cm_id, queue_work(ipoib_workqueue, &priv->cm.reap_task); } - spin_unlock(&priv->lock); - spin_unlock_irq(&priv->tx_lock); + spin_unlock_irqrestore(&priv->lock, flags); + netif_tx_unlock_bh(dev); break; default: break; @@ -1289,19 +1295,24 @@ static void ipoib_cm_tx_start(struct work_struct *work) struct ib_sa_path_rec pathrec; u32 qpn; - spin_lock_irqsave(&priv->tx_lock, flags); - spin_lock(&priv->lock); + netif_tx_lock_bh(dev); + spin_lock_irqsave(&priv->lock, flags); + while (!list_empty(&priv->cm.start_list)) { p = list_entry(priv->cm.start_list.next, typeof(*p), list); list_del_init(&p->list); neigh = p->neigh; qpn = IPOIB_QPN(neigh->neighbour->ha); memcpy(&pathrec, &p->path->pathrec, sizeof pathrec); - spin_unlock(&priv->lock); - spin_unlock_irqrestore(&priv->tx_lock, flags); + + spin_unlock_irqrestore(&priv->lock, flags); + netif_tx_unlock_bh(dev); + ret = ipoib_cm_tx_init(p, qpn, &pathrec); - spin_lock_irqsave(&priv->tx_lock, flags); - spin_lock(&priv->lock); + + netif_tx_lock_bh(dev); + spin_lock_irqsave(&priv->lock, flags); + if (ret) { neigh = p->neigh; if (neigh) { @@ -1315,44 +1326,52 @@ static void ipoib_cm_tx_start(struct work_struct *work) kfree(p); } } - spin_unlock(&priv->lock); - spin_unlock_irqrestore(&priv->tx_lock, flags); + + spin_unlock_irqrestore(&priv->lock, flags); + netif_tx_unlock_bh(dev); } static void ipoib_cm_tx_reap(struct work_struct *work) { struct ipoib_dev_priv *priv = container_of(work, struct ipoib_dev_priv, cm.reap_task); + struct net_device *dev = priv->dev; struct ipoib_cm_tx *p; + unsigned long flags; + + netif_tx_lock_bh(dev); + spin_lock_irqsave(&priv->lock, flags); - spin_lock_irq(&priv->tx_lock); - spin_lock(&priv->lock); while (!list_empty(&priv->cm.reap_list)) { p = list_entry(priv->cm.reap_list.next, typeof(*p), list); list_del(&p->list); - spin_unlock(&priv->lock); - spin_unlock_irq(&priv->tx_lock); + spin_unlock_irqrestore(&priv->lock, flags); + netif_tx_unlock_bh(dev); ipoib_cm_tx_destroy(p); - spin_lock_irq(&priv->tx_lock); - spin_lock(&priv->lock); + netif_tx_lock_bh(dev); + spin_lock_irqsave(&priv->lock, flags); } - spin_unlock(&priv->lock); - spin_unlock_irq(&priv->tx_lock); + + spin_unlock_irqrestore(&priv->lock, flags); + netif_tx_unlock_bh(dev); } static void ipoib_cm_skb_reap(struct work_struct *work) { struct ipoib_dev_priv *priv = container_of(work, struct ipoib_dev_priv, cm.skb_task); + struct net_device *dev = priv->dev; struct sk_buff *skb; - + unsigned long flags; unsigned mtu = priv->mcast_mtu; - spin_lock_irq(&priv->tx_lock); - spin_lock(&priv->lock); + netif_tx_lock_bh(dev); + spin_lock_irqsave(&priv->lock, flags); + while ((skb = skb_dequeue(&priv->cm.skb_queue))) { - spin_unlock(&priv->lock); - spin_unlock_irq(&priv->tx_lock); + spin_unlock_irqrestore(&priv->lock, flags); + netif_tx_unlock_bh(dev); + if (skb->protocol == htons(ETH_P_IP)) icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu)); #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) @@ -1360,11 +1379,13 @@ static void ipoib_cm_skb_reap(struct work_struct *work) icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, priv->dev); #endif dev_kfree_skb_any(skb); - spin_lock_irq(&priv->tx_lock); - spin_lock(&priv->lock); + + netif_tx_lock_bh(dev); + spin_lock_irqsave(&priv->lock, flags); } - spin_unlock(&priv->lock); - spin_unlock_irq(&priv->tx_lock); + + spin_unlock_irqrestore(&priv->lock, flags); + netif_tx_unlock_bh(dev); } void ipoib_cm_skb_too_long(struct net_device *dev, struct sk_buff *skb, @@ -1494,14 +1515,16 @@ static void ipoib_cm_create_srq(struct net_device *dev, int max_sge) return; } - priv->cm.srq_ring = kzalloc(ipoib_recvq_size * sizeof *priv->cm.srq_ring, - GFP_KERNEL); + priv->cm.srq_ring = vmalloc(ipoib_recvq_size * sizeof *priv->cm.srq_ring); if (!priv->cm.srq_ring) { printk(KERN_WARNING "%s: failed to allocate CM SRQ ring (%d entries)\n", priv->ca->name, ipoib_recvq_size); ib_destroy_srq(priv->cm.srq); priv->cm.srq = NULL; + return; } + + memset(priv->cm.srq_ring, 0, ipoib_recvq_size * sizeof *priv->cm.srq_ring); } int ipoib_cm_dev_init(struct net_device *dev) diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c index 66cafa20c24..0e748aeeae9 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c @@ -468,21 +468,22 @@ void ipoib_ib_completion(struct ib_cq *cq, void *dev_ptr) static void drain_tx_cq(struct net_device *dev) { struct ipoib_dev_priv *priv = netdev_priv(dev); - unsigned long flags; - spin_lock_irqsave(&priv->tx_lock, flags); + netif_tx_lock(dev); while (poll_tx(priv)) ; /* nothing */ if (netif_queue_stopped(dev)) mod_timer(&priv->poll_timer, jiffies + 1); - spin_unlock_irqrestore(&priv->tx_lock, flags); + netif_tx_unlock(dev); } void ipoib_send_comp_handler(struct ib_cq *cq, void *dev_ptr) { - drain_tx_cq((struct net_device *)dev_ptr); + struct ipoib_dev_priv *priv = netdev_priv(dev_ptr); + + mod_timer(&priv->poll_timer, jiffies); } static inline int post_send(struct ipoib_dev_priv *priv, @@ -614,17 +615,20 @@ static void __ipoib_reap_ah(struct net_device *dev) struct ipoib_dev_priv *priv = netdev_priv(dev); struct ipoib_ah *ah, *tah; LIST_HEAD(remove_list); + unsigned long flags; + + netif_tx_lock_bh(dev); + spin_lock_irqsave(&priv->lock, flags); - spin_lock_irq(&priv->tx_lock); - spin_lock(&priv->lock); list_for_each_entry_safe(ah, tah, &priv->dead_ahs, list) if ((int) priv->tx_tail - (int) ah->last_send >= 0) { list_del(&ah->list); ib_destroy_ah(ah->ah); kfree(ah); } - spin_unlock(&priv->lock); - spin_unlock_irq(&priv->tx_lock); + + spin_unlock_irqrestore(&priv->lock, flags); + netif_tx_unlock_bh(dev); } void ipoib_reap_ah(struct work_struct *work) @@ -761,6 +765,14 @@ void ipoib_drain_cq(struct net_device *dev) { struct ipoib_dev_priv *priv = netdev_priv(dev); int i, n; + + /* + * We call completion handling routines that expect to be + * called from the BH-disabled NAPI poll context, so disable + * BHs here too. + */ + local_bh_disable(); + do { n = ib_poll_cq(priv->recv_cq, IPOIB_NUM_WC, priv->ibwc); for (i = 0; i < n; ++i) { @@ -784,6 +796,8 @@ void ipoib_drain_cq(struct net_device *dev) while (poll_tx(priv)) ; /* nothing */ + + local_bh_enable(); } int ipoib_ib_dev_stop(struct net_device *dev, int flush) diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index f51201b17bf..c0ee514396d 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -156,14 +156,8 @@ static int ipoib_stop(struct net_device *dev) netif_stop_queue(dev); - /* - * Now flush workqueue to make sure a scheduled task doesn't - * bring our internal state back up. - */ - flush_workqueue(ipoib_workqueue); - - ipoib_ib_dev_down(dev, 1); - ipoib_ib_dev_stop(dev, 1); + ipoib_ib_dev_down(dev, 0); + ipoib_ib_dev_stop(dev, 0); if (!test_bit(IPOIB_FLAG_SUBINTERFACE, &priv->flags)) { struct ipoib_dev_priv *cpriv; @@ -379,9 +373,10 @@ void ipoib_flush_paths(struct net_device *dev) struct ipoib_dev_priv *priv = netdev_priv(dev); struct ipoib_path *path, *tp; LIST_HEAD(remove_list); + unsigned long flags; - spin_lock_irq(&priv->tx_lock); - spin_lock(&priv->lock); + netif_tx_lock_bh(dev); + spin_lock_irqsave(&priv->lock, flags); list_splice_init(&priv->path_list, &remove_list); @@ -391,15 +386,16 @@ void ipoib_flush_paths(struct net_device *dev) list_for_each_entry_safe(path, tp, &remove_list, list) { if (path->query) ib_sa_cancel_query(path->query_id, path->query); - spin_unlock(&priv->lock); - spin_unlock_irq(&priv->tx_lock); + spin_unlock_irqrestore(&priv->lock, flags); + netif_tx_unlock_bh(dev); wait_for_completion(&path->done); path_free(dev, path); - spin_lock_irq(&priv->tx_lock); - spin_lock(&priv->lock); + netif_tx_lock_bh(dev); + spin_lock_irqsave(&priv->lock, flags); } - spin_unlock(&priv->lock); - spin_unlock_irq(&priv->tx_lock); + + spin_unlock_irqrestore(&priv->lock, flags); + netif_tx_unlock_bh(dev); } static void path_rec_completion(int status, @@ -410,7 +406,7 @@ static void path_rec_completion(int status, struct net_device *dev = path->dev; struct ipoib_dev_priv *priv = netdev_priv(dev); struct ipoib_ah *ah = NULL; - struct ipoib_ah *old_ah; + struct ipoib_ah *old_ah = NULL; struct ipoib_neigh *neigh, *tn; struct sk_buff_head skqueue; struct sk_buff *skb; @@ -434,12 +430,12 @@ static void path_rec_completion(int status, spin_lock_irqsave(&priv->lock, flags); - old_ah = path->ah; - path->ah = ah; - if (ah) { path->pathrec = *pathrec; + old_ah = path->ah; + path->ah = ah; + ipoib_dbg(priv, "created address handle %p for LID 0x%04x, SL %d\n", ah, be16_to_cpu(pathrec->dlid), pathrec->sl); @@ -561,6 +557,7 @@ static void neigh_add_path(struct sk_buff *skb, struct net_device *dev) struct ipoib_dev_priv *priv = netdev_priv(dev); struct ipoib_path *path; struct ipoib_neigh *neigh; + unsigned long flags; neigh = ipoib_neigh_alloc(skb->dst->neighbour, skb->dev); if (!neigh) { @@ -569,11 +566,7 @@ static void neigh_add_path(struct sk_buff *skb, struct net_device *dev) return; } - /* - * We can only be called from ipoib_start_xmit, so we're - * inside tx_lock -- no need to save/restore flags. - */ - spin_lock(&priv->lock); + spin_lock_irqsave(&priv->lock, flags); path = __path_find(dev, skb->dst->neighbour->ha + 4); if (!path) { @@ -620,7 +613,7 @@ static void neigh_add_path(struct sk_buff *skb, struct net_device *dev) __skb_queue_tail(&neigh->queue, skb); } - spin_unlock(&priv->lock); + spin_unlock_irqrestore(&priv->lock, flags); return; err_list: @@ -632,7 +625,7 @@ err_drop: ++dev->stats.tx_dropped; dev_kfree_skb_any(skb); - spin_unlock(&priv->lock); + spin_unlock_irqrestore(&priv->lock, flags); } static void ipoib_path_lookup(struct sk_buff *skb, struct net_device *dev) @@ -656,12 +649,9 @@ static void unicast_arp_send(struct sk_buff *skb, struct net_device *dev, { struct ipoib_dev_priv *priv = netdev_priv(dev); struct ipoib_path *path; + unsigned long flags; - /* - * We can only be called from ipoib_start_xmit, so we're - * inside tx_lock -- no need to save/restore flags. - */ - spin_lock(&priv->lock); + spin_lock_irqsave(&priv->lock, flags); path = __path_find(dev, phdr->hwaddr + 4); if (!path || !path->valid) { @@ -673,7 +663,7 @@ static void unicast_arp_send(struct sk_buff *skb, struct net_device *dev, __skb_queue_tail(&path->queue, skb); if (path_rec_start(dev, path)) { - spin_unlock(&priv->lock); + spin_unlock_irqrestore(&priv->lock, flags); path_free(dev, path); return; } else @@ -683,7 +673,7 @@ static void unicast_arp_send(struct sk_buff *skb, struct net_device *dev, dev_kfree_skb_any(skb); } - spin_unlock(&priv->lock); + spin_unlock_irqrestore(&priv->lock, flags); return; } @@ -702,7 +692,7 @@ static void unicast_arp_send(struct sk_buff *skb, struct net_device *dev, dev_kfree_skb_any(skb); } - spin_unlock(&priv->lock); + spin_unlock_irqrestore(&priv->lock, flags); } static int ipoib_start_xmit(struct sk_buff *skb, struct net_device *dev) @@ -711,13 +701,10 @@ static int ipoib_start_xmit(struct sk_buff *skb, struct net_device *dev) struct ipoib_neigh *neigh; unsigned long flags; - if (unlikely(!spin_trylock_irqsave(&priv->tx_lock, flags))) - return NETDEV_TX_LOCKED; - if (likely(skb->dst && skb->dst->neighbour)) { if (unlikely(!*to_ipoib_neigh(skb->dst->neighbour))) { ipoib_path_lookup(skb, dev); - goto out; + return NETDEV_TX_OK; } neigh = *to_ipoib_neigh(skb->dst->neighbour); @@ -727,7 +714,7 @@ static int ipoib_start_xmit(struct sk_buff *skb, struct net_device *dev) skb->dst->neighbour->ha + 4, sizeof(union ib_gid))) || (neigh->dev != dev))) { - spin_lock(&priv->lock); + spin_lock_irqsave(&priv->lock, flags); /* * It's safe to call ipoib_put_ah() inside * priv->lock here, because we know that @@ -738,25 +725,25 @@ static int ipoib_start_xmit(struct sk_buff *skb, struct net_device *dev) ipoib_put_ah(neigh->ah); list_del(&neigh->list); ipoib_neigh_free(dev, neigh); - spin_unlock(&priv->lock); + spin_unlock_irqrestore(&priv->lock, flags); ipoib_path_lookup(skb, dev); - goto out; + return NETDEV_TX_OK; } if (ipoib_cm_get(neigh)) { if (ipoib_cm_up(neigh)) { ipoib_cm_send(dev, skb, ipoib_cm_get(neigh)); - goto out; + return NETDEV_TX_OK; } } else if (neigh->ah) { ipoib_send(dev, skb, neigh->ah, IPOIB_QPN(skb->dst->neighbour->ha)); - goto out; + return NETDEV_TX_OK; } if (skb_queue_len(&neigh->queue) < IPOIB_MAX_PATH_REC_QUEUE) { - spin_lock(&priv->lock); + spin_lock_irqsave(&priv->lock, flags); __skb_queue_tail(&neigh->queue, skb); - spin_unlock(&priv->lock); + spin_unlock_irqrestore(&priv->lock, flags); } else { ++dev->stats.tx_dropped; dev_kfree_skb_any(skb); @@ -785,16 +772,13 @@ static int ipoib_start_xmit(struct sk_buff *skb, struct net_device *dev) IPOIB_GID_RAW_ARG(phdr->hwaddr + 4)); dev_kfree_skb_any(skb); ++dev->stats.tx_dropped; - goto out; + return NETDEV_TX_OK; } unicast_arp_send(skb, dev, phdr); } } -out: - spin_unlock_irqrestore(&priv->tx_lock, flags); - return NETDEV_TX_OK; } @@ -1058,7 +1042,6 @@ static void ipoib_setup(struct net_device *dev) dev->type = ARPHRD_INFINIBAND; dev->tx_queue_len = ipoib_sendq_size * 2; dev->features = (NETIF_F_VLAN_CHALLENGED | - NETIF_F_LLTX | NETIF_F_HIGHDMA); memcpy(dev->broadcast, ipv4_bcast_addr, INFINIBAND_ALEN); @@ -1070,7 +1053,6 @@ static void ipoib_setup(struct net_device *dev) ipoib_lro_setup(priv); spin_lock_init(&priv->lock); - spin_lock_init(&priv->tx_lock); mutex_init(&priv->vlan_mutex); @@ -1081,6 +1063,7 @@ static void ipoib_setup(struct net_device *dev) INIT_DELAYED_WORK(&priv->pkey_poll_task, ipoib_pkey_poll); INIT_DELAYED_WORK(&priv->mcast_task, ipoib_mcast_join_task); + INIT_WORK(&priv->carrier_on_task, ipoib_mcast_carrier_on_task); INIT_WORK(&priv->flush_light, ipoib_ib_dev_flush_light); INIT_WORK(&priv->flush_normal, ipoib_ib_dev_flush_normal); INIT_WORK(&priv->flush_heavy, ipoib_ib_dev_flush_heavy); @@ -1314,7 +1297,7 @@ sysfs_failed: register_failed: ib_unregister_event_handler(&priv->event_handler); - flush_scheduled_work(); + flush_workqueue(ipoib_workqueue); event_failed: ipoib_dev_cleanup(priv->dev); @@ -1373,7 +1356,12 @@ static void ipoib_remove_one(struct ib_device *device) list_for_each_entry_safe(priv, tmp, dev_list, list) { ib_unregister_event_handler(&priv->event_handler); - flush_scheduled_work(); + + rtnl_lock(); + dev_change_flags(priv->dev, priv->dev->flags & ~IFF_UP); + rtnl_unlock(); + + flush_workqueue(ipoib_workqueue); unregister_netdev(priv->dev); ipoib_dev_cleanup(priv->dev); diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c index 8950e9546f4..d9d1223c3fd 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c @@ -69,14 +69,13 @@ static void ipoib_mcast_free(struct ipoib_mcast *mcast) struct net_device *dev = mcast->dev; struct ipoib_dev_priv *priv = netdev_priv(dev); struct ipoib_neigh *neigh, *tmp; - unsigned long flags; int tx_dropped = 0; ipoib_dbg_mcast(netdev_priv(dev), "deleting multicast group " IPOIB_GID_FMT "\n", IPOIB_GID_ARG(mcast->mcmember.mgid)); - spin_lock_irqsave(&priv->lock, flags); + spin_lock_irq(&priv->lock); list_for_each_entry_safe(neigh, tmp, &mcast->neigh_list, list) { /* @@ -90,7 +89,7 @@ static void ipoib_mcast_free(struct ipoib_mcast *mcast) ipoib_neigh_free(dev, neigh); } - spin_unlock_irqrestore(&priv->lock, flags); + spin_unlock_irq(&priv->lock); if (mcast->ah) ipoib_put_ah(mcast->ah); @@ -100,9 +99,9 @@ static void ipoib_mcast_free(struct ipoib_mcast *mcast) dev_kfree_skb_any(skb_dequeue(&mcast->pkt_queue)); } - spin_lock_irqsave(&priv->tx_lock, flags); + netif_tx_lock_bh(dev); dev->stats.tx_dropped += tx_dropped; - spin_unlock_irqrestore(&priv->tx_lock, flags); + netif_tx_unlock_bh(dev); kfree(mcast); } @@ -259,10 +258,10 @@ static int ipoib_mcast_join_finish(struct ipoib_mcast *mcast, } /* actually send any queued packets */ - spin_lock_irq(&priv->tx_lock); + netif_tx_lock_bh(dev); while (!skb_queue_empty(&mcast->pkt_queue)) { struct sk_buff *skb = skb_dequeue(&mcast->pkt_queue); - spin_unlock_irq(&priv->tx_lock); + netif_tx_unlock_bh(dev); skb->dev = dev; @@ -273,9 +272,9 @@ static int ipoib_mcast_join_finish(struct ipoib_mcast *mcast, if (dev_queue_xmit(skb)) ipoib_warn(priv, "dev_queue_xmit failed to requeue packet\n"); - spin_lock_irq(&priv->tx_lock); + netif_tx_lock_bh(dev); } - spin_unlock_irq(&priv->tx_lock); + netif_tx_unlock_bh(dev); return 0; } @@ -286,7 +285,6 @@ ipoib_mcast_sendonly_join_complete(int status, { struct ipoib_mcast *mcast = multicast->context; struct net_device *dev = mcast->dev; - struct ipoib_dev_priv *priv = netdev_priv(dev); /* We trap for port events ourselves. */ if (status == -ENETRESET) @@ -302,12 +300,12 @@ ipoib_mcast_sendonly_join_complete(int status, IPOIB_GID_ARG(mcast->mcmember.mgid), status); /* Flush out any queued packets */ - spin_lock_irq(&priv->tx_lock); + netif_tx_lock_bh(dev); while (!skb_queue_empty(&mcast->pkt_queue)) { ++dev->stats.tx_dropped; dev_kfree_skb_any(skb_dequeue(&mcast->pkt_queue)); } - spin_unlock_irq(&priv->tx_lock); + netif_tx_unlock_bh(dev); /* Clear the busy flag so we try again */ status = test_and_clear_bit(IPOIB_MCAST_FLAG_BUSY, @@ -366,6 +364,21 @@ static int ipoib_mcast_sendonly_join(struct ipoib_mcast *mcast) return ret; } +void ipoib_mcast_carrier_on_task(struct work_struct *work) +{ + struct ipoib_dev_priv *priv = container_of(work, struct ipoib_dev_priv, + carrier_on_task); + + /* + * Take rtnl_lock to avoid racing with ipoib_stop() and + * turning the carrier back on while a device is being + * removed. + */ + rtnl_lock(); + netif_carrier_on(priv->dev); + rtnl_unlock(); +} + static int ipoib_mcast_join_complete(int status, struct ib_sa_multicast *multicast) { @@ -392,8 +405,12 @@ static int ipoib_mcast_join_complete(int status, &priv->mcast_task, 0); mutex_unlock(&mcast_mutex); + /* + * Defer carrier on work to ipoib_workqueue to avoid a + * deadlock on rtnl_lock here. + */ if (mcast == priv->broadcast) - netif_carrier_on(dev); + queue_work(ipoib_workqueue, &priv->carrier_on_task); return 0; } @@ -643,12 +660,9 @@ void ipoib_mcast_send(struct net_device *dev, void *mgid, struct sk_buff *skb) { struct ipoib_dev_priv *priv = netdev_priv(dev); struct ipoib_mcast *mcast; + unsigned long flags; - /* - * We can only be called from ipoib_start_xmit, so we're - * inside tx_lock -- no need to save/restore flags. - */ - spin_lock(&priv->lock); + spin_lock_irqsave(&priv->lock, flags); if (!test_bit(IPOIB_FLAG_OPER_UP, &priv->flags) || !priv->broadcast || @@ -719,7 +733,7 @@ out: } unlock: - spin_unlock(&priv->lock); + spin_unlock_irqrestore(&priv->lock, flags); } void ipoib_mcast_dev_flush(struct net_device *dev) diff --git a/drivers/infiniband/ulp/iser/iser_verbs.c b/drivers/infiniband/ulp/iser/iser_verbs.c index 63462ecca14..26ff6214a81 100644 --- a/drivers/infiniband/ulp/iser/iser_verbs.c +++ b/drivers/infiniband/ulp/iser/iser_verbs.c @@ -33,7 +33,6 @@ #include <linux/kernel.h> #include <linux/module.h> #include <linux/delay.h> -#include <linux/version.h> #include "iscsi_iser.h" |