diff options
author | Dmitry Torokhov <dmitry.torokhov@gmail.com> | 2008-08-08 16:21:02 -0400 |
---|---|---|
committer | Dmitry Torokhov <dmitry.torokhov@gmail.com> | 2008-08-08 16:21:02 -0400 |
commit | e4ddcb0a6bf04d53ce77b4eb87bbbb32c4261d11 (patch) | |
tree | d27d2fea50a384d97aa2d0cf5c8657c916f761d4 /drivers/infiniband | |
parent | f2afa7711f8585ffc088ba538b9a510e0d5dca12 (diff) | |
parent | 6e86841d05f371b5b9b86ce76c02aaee83352298 (diff) |
Merge commit 'v2.6.27-rc1' into for-linus
Diffstat (limited to 'drivers/infiniband')
32 files changed, 1539 insertions, 1013 deletions
diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c index 55738eead3b..922d35f4fc0 100644 --- a/drivers/infiniband/core/cm.c +++ b/drivers/infiniband/core/cm.c @@ -44,6 +44,7 @@ #include <linux/spinlock.h> #include <linux/sysfs.h> #include <linux/workqueue.h> +#include <linux/kdev_t.h> #include <rdma/ib_cache.h> #include <rdma/ib_cm.h> @@ -162,8 +163,8 @@ struct cm_port { struct cm_device { struct list_head list; - struct ib_device *device; - struct kobject dev_obj; + struct ib_device *ib_device; + struct device *device; u8 ack_delay; struct cm_port *port[0]; }; @@ -339,7 +340,7 @@ static void cm_init_av_for_response(struct cm_port *port, struct ib_wc *wc, { av->port = port; av->pkey_index = wc->pkey_index; - ib_init_ah_from_wc(port->cm_dev->device, port->port_num, wc, + ib_init_ah_from_wc(port->cm_dev->ib_device, port->port_num, wc, grh, &av->ah_attr); } @@ -353,7 +354,7 @@ static int cm_init_av_by_path(struct ib_sa_path_rec *path, struct cm_av *av) read_lock_irqsave(&cm.device_lock, flags); list_for_each_entry(cm_dev, &cm.device_list, list) { - if (!ib_find_cached_gid(cm_dev->device, &path->sgid, + if (!ib_find_cached_gid(cm_dev->ib_device, &path->sgid, &p, NULL)) { port = cm_dev->port[p-1]; break; @@ -364,13 +365,13 @@ static int cm_init_av_by_path(struct ib_sa_path_rec *path, struct cm_av *av) if (!port) return -EINVAL; - ret = ib_find_cached_pkey(cm_dev->device, port->port_num, + ret = ib_find_cached_pkey(cm_dev->ib_device, port->port_num, be16_to_cpu(path->pkey), &av->pkey_index); if (ret) return ret; av->port = port; - ib_init_ah_from_path(cm_dev->device, port->port_num, path, + ib_init_ah_from_path(cm_dev->ib_device, port->port_num, path, &av->ah_attr); av->timeout = path->packet_life_time + 1; return 0; @@ -1515,7 +1516,7 @@ static int cm_req_handler(struct cm_work *work) req_msg = (struct cm_req_msg *)work->mad_recv_wc->recv_buf.mad; - cm_id = ib_create_cm_id(work->port->cm_dev->device, NULL, NULL); + cm_id = ib_create_cm_id(work->port->cm_dev->ib_device, NULL, NULL); if (IS_ERR(cm_id)) return PTR_ERR(cm_id); @@ -1550,7 +1551,7 @@ static int cm_req_handler(struct cm_work *work) cm_format_paths_from_req(req_msg, &work->path[0], &work->path[1]); ret = cm_init_av_by_path(&work->path[0], &cm_id_priv->av); if (ret) { - ib_get_cached_gid(work->port->cm_dev->device, + ib_get_cached_gid(work->port->cm_dev->ib_device, work->port->port_num, 0, &work->path[0].sgid); ib_send_cm_rej(cm_id, IB_CM_REJ_INVALID_GID, &work->path[0].sgid, sizeof work->path[0].sgid, @@ -2950,7 +2951,7 @@ static int cm_sidr_req_handler(struct cm_work *work) struct cm_sidr_req_msg *sidr_req_msg; struct ib_wc *wc; - cm_id = ib_create_cm_id(work->port->cm_dev->device, NULL, NULL); + cm_id = ib_create_cm_id(work->port->cm_dev->ib_device, NULL, NULL); if (IS_ERR(cm_id)) return PTR_ERR(cm_id); cm_id_priv = container_of(cm_id, struct cm_id_private, id); @@ -3578,7 +3579,7 @@ static void cm_get_ack_delay(struct cm_device *cm_dev) { struct ib_device_attr attr; - if (ib_query_device(cm_dev->device, &attr)) + if (ib_query_device(cm_dev->ib_device, &attr)) cm_dev->ack_delay = 0; /* acks will rely on packet life time */ else cm_dev->ack_delay = attr.local_ca_ack_delay; @@ -3618,18 +3619,6 @@ static struct kobj_type cm_port_obj_type = { .release = cm_release_port_obj }; -static void cm_release_dev_obj(struct kobject *obj) -{ - struct cm_device *cm_dev; - - cm_dev = container_of(obj, struct cm_device, dev_obj); - kfree(cm_dev); -} - -static struct kobj_type cm_dev_obj_type = { - .release = cm_release_dev_obj -}; - struct class cm_class = { .name = "infiniband_cm", }; @@ -3640,7 +3629,7 @@ static int cm_create_port_fs(struct cm_port *port) int i, ret; ret = kobject_init_and_add(&port->port_obj, &cm_port_obj_type, - &port->cm_dev->dev_obj, + &port->cm_dev->device->kobj, "%d", port->port_num); if (ret) { kfree(port); @@ -3676,7 +3665,7 @@ static void cm_remove_port_fs(struct cm_port *port) kobject_put(&port->port_obj); } -static void cm_add_one(struct ib_device *device) +static void cm_add_one(struct ib_device *ib_device) { struct cm_device *cm_dev; struct cm_port *port; @@ -3691,26 +3680,27 @@ static void cm_add_one(struct ib_device *device) int ret; u8 i; - if (rdma_node_get_transport(device->node_type) != RDMA_TRANSPORT_IB) + if (rdma_node_get_transport(ib_device->node_type) != RDMA_TRANSPORT_IB) return; cm_dev = kzalloc(sizeof(*cm_dev) + sizeof(*port) * - device->phys_port_cnt, GFP_KERNEL); + ib_device->phys_port_cnt, GFP_KERNEL); if (!cm_dev) return; - cm_dev->device = device; + cm_dev->ib_device = ib_device; cm_get_ack_delay(cm_dev); - ret = kobject_init_and_add(&cm_dev->dev_obj, &cm_dev_obj_type, - &cm_class.subsys.kobj, "%s", device->name); - if (ret) { + cm_dev->device = device_create_drvdata(&cm_class, &ib_device->dev, + MKDEV(0, 0), NULL, + "%s", ib_device->name); + if (!cm_dev->device) { kfree(cm_dev); return; } set_bit(IB_MGMT_METHOD_SEND, reg_req.method_mask); - for (i = 1; i <= device->phys_port_cnt; i++) { + for (i = 1; i <= ib_device->phys_port_cnt; i++) { port = kzalloc(sizeof *port, GFP_KERNEL); if (!port) goto error1; @@ -3723,7 +3713,7 @@ static void cm_add_one(struct ib_device *device) if (ret) goto error1; - port->mad_agent = ib_register_mad_agent(device, i, + port->mad_agent = ib_register_mad_agent(ib_device, i, IB_QPT_GSI, ®_req, 0, @@ -3733,11 +3723,11 @@ static void cm_add_one(struct ib_device *device) if (IS_ERR(port->mad_agent)) goto error2; - ret = ib_modify_port(device, i, 0, &port_modify); + ret = ib_modify_port(ib_device, i, 0, &port_modify); if (ret) goto error3; } - ib_set_client_data(device, &cm_client, cm_dev); + ib_set_client_data(ib_device, &cm_client, cm_dev); write_lock_irqsave(&cm.device_lock, flags); list_add_tail(&cm_dev->list, &cm.device_list); @@ -3753,14 +3743,14 @@ error1: port_modify.clr_port_cap_mask = IB_PORT_CM_SUP; while (--i) { port = cm_dev->port[i-1]; - ib_modify_port(device, port->port_num, 0, &port_modify); + ib_modify_port(ib_device, port->port_num, 0, &port_modify); ib_unregister_mad_agent(port->mad_agent); cm_remove_port_fs(port); } - kobject_put(&cm_dev->dev_obj); + device_unregister(cm_dev->device); } -static void cm_remove_one(struct ib_device *device) +static void cm_remove_one(struct ib_device *ib_device) { struct cm_device *cm_dev; struct cm_port *port; @@ -3770,7 +3760,7 @@ static void cm_remove_one(struct ib_device *device) unsigned long flags; int i; - cm_dev = ib_get_client_data(device, &cm_client); + cm_dev = ib_get_client_data(ib_device, &cm_client); if (!cm_dev) return; @@ -3778,14 +3768,14 @@ static void cm_remove_one(struct ib_device *device) list_del(&cm_dev->list); write_unlock_irqrestore(&cm.device_lock, flags); - for (i = 1; i <= device->phys_port_cnt; i++) { + for (i = 1; i <= ib_device->phys_port_cnt; i++) { port = cm_dev->port[i-1]; - ib_modify_port(device, port->port_num, 0, &port_modify); + ib_modify_port(ib_device, port->port_num, 0, &port_modify); ib_unregister_mad_agent(port->mad_agent); flush_workqueue(cm.wq); cm_remove_port_fs(port); } - kobject_put(&cm_dev->dev_obj); + device_unregister(cm_dev->device); } static int __init ib_cm_init(void) diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index ae11d5cc74d..e980ff3335d 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -168,6 +168,12 @@ struct cma_work { struct rdma_cm_event event; }; +struct cma_ndev_work { + struct work_struct work; + struct rdma_id_private *id; + struct rdma_cm_event event; +}; + union cma_ip_addr { struct in6_addr ip6; struct { @@ -914,7 +920,10 @@ static int cma_ib_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event) struct rdma_cm_event event; int ret = 0; - if (cma_disable_callback(id_priv, CMA_CONNECT)) + if ((ib_event->event != IB_CM_TIMEWAIT_EXIT && + cma_disable_callback(id_priv, CMA_CONNECT)) || + (ib_event->event == IB_CM_TIMEWAIT_EXIT && + cma_disable_callback(id_priv, CMA_DISCONNECT))) return 0; memset(&event, 0, sizeof event); @@ -950,6 +959,8 @@ static int cma_ib_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event) event.event = RDMA_CM_EVENT_DISCONNECTED; break; case IB_CM_TIMEWAIT_EXIT: + event.event = RDMA_CM_EVENT_TIMEWAIT_EXIT; + break; case IB_CM_MRA_RECEIVED: /* ignore event */ goto out; @@ -1598,6 +1609,30 @@ out: kfree(work); } +static void cma_ndev_work_handler(struct work_struct *_work) +{ + struct cma_ndev_work *work = container_of(_work, struct cma_ndev_work, work); + struct rdma_id_private *id_priv = work->id; + int destroy = 0; + + mutex_lock(&id_priv->handler_mutex); + if (id_priv->state == CMA_DESTROYING || + id_priv->state == CMA_DEVICE_REMOVAL) + goto out; + + if (id_priv->id.event_handler(&id_priv->id, &work->event)) { + cma_exch(id_priv, CMA_DESTROYING); + destroy = 1; + } + +out: + mutex_unlock(&id_priv->handler_mutex); + cma_deref_id(id_priv); + if (destroy) + rdma_destroy_id(&id_priv->id); + kfree(work); +} + static int cma_resolve_ib_route(struct rdma_id_private *id_priv, int timeout_ms) { struct rdma_route *route = &id_priv->id.route; @@ -2723,6 +2758,65 @@ void rdma_leave_multicast(struct rdma_cm_id *id, struct sockaddr *addr) } EXPORT_SYMBOL(rdma_leave_multicast); +static int cma_netdev_change(struct net_device *ndev, struct rdma_id_private *id_priv) +{ + struct rdma_dev_addr *dev_addr; + struct cma_ndev_work *work; + + dev_addr = &id_priv->id.route.addr.dev_addr; + + if ((dev_addr->src_dev == ndev) && + memcmp(dev_addr->src_dev_addr, ndev->dev_addr, ndev->addr_len)) { + printk(KERN_INFO "RDMA CM addr change for ndev %s used by id %p\n", + ndev->name, &id_priv->id); + work = kzalloc(sizeof *work, GFP_KERNEL); + if (!work) + return -ENOMEM; + + INIT_WORK(&work->work, cma_ndev_work_handler); + work->id = id_priv; + work->event.event = RDMA_CM_EVENT_ADDR_CHANGE; + atomic_inc(&id_priv->refcount); + queue_work(cma_wq, &work->work); + } + + return 0; +} + +static int cma_netdev_callback(struct notifier_block *self, unsigned long event, + void *ctx) +{ + struct net_device *ndev = (struct net_device *)ctx; + struct cma_device *cma_dev; + struct rdma_id_private *id_priv; + int ret = NOTIFY_DONE; + + if (dev_net(ndev) != &init_net) + return NOTIFY_DONE; + + if (event != NETDEV_BONDING_FAILOVER) + return NOTIFY_DONE; + + if (!(ndev->flags & IFF_MASTER) || !(ndev->priv_flags & IFF_BONDING)) + return NOTIFY_DONE; + + mutex_lock(&lock); + list_for_each_entry(cma_dev, &dev_list, list) + list_for_each_entry(id_priv, &cma_dev->id_list, list) { + ret = cma_netdev_change(ndev, id_priv); + if (ret) + goto out; + } + +out: + mutex_unlock(&lock); + return ret; +} + +static struct notifier_block cma_nb = { + .notifier_call = cma_netdev_callback +}; + static void cma_add_one(struct ib_device *device) { struct cma_device *cma_dev; @@ -2831,6 +2925,7 @@ static int cma_init(void) ib_sa_register_client(&sa_client); rdma_addr_register_client(&addr_client); + register_netdevice_notifier(&cma_nb); ret = ib_register_client(&cma_client); if (ret) @@ -2838,6 +2933,7 @@ static int cma_init(void) return 0; err: + unregister_netdevice_notifier(&cma_nb); rdma_addr_unregister_client(&addr_client); ib_sa_unregister_client(&sa_client); destroy_workqueue(cma_wq); @@ -2847,6 +2943,7 @@ err: static void cma_cleanup(void) { ib_unregister_client(&cma_client); + unregister_netdevice_notifier(&cma_nb); rdma_addr_unregister_client(&addr_client); ib_sa_unregister_client(&sa_client); destroy_workqueue(cma_wq); diff --git a/drivers/infiniband/core/iwcm.c b/drivers/infiniband/core/iwcm.c index 81c9195b512..8f9509e1ebf 100644 --- a/drivers/infiniband/core/iwcm.c +++ b/drivers/infiniband/core/iwcm.c @@ -942,8 +942,7 @@ static int iwcm_init_qp_init_attr(struct iwcm_id_private *cm_id_priv, case IW_CM_STATE_CONN_RECV: case IW_CM_STATE_ESTABLISHED: *qp_attr_mask = IB_QP_STATE | IB_QP_ACCESS_FLAGS; - qp_attr->qp_access_flags = IB_ACCESS_LOCAL_WRITE | - IB_ACCESS_REMOTE_WRITE| + qp_attr->qp_access_flags = IB_ACCESS_REMOTE_WRITE| IB_ACCESS_REMOTE_READ; ret = 0; break; diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c index 1341de793e5..7863a50d56f 100644 --- a/drivers/infiniband/core/sa_query.c +++ b/drivers/infiniband/core/sa_query.c @@ -1064,7 +1064,8 @@ static void ib_sa_remove_one(struct ib_device *device) for (i = 0; i <= sa_dev->end_port - sa_dev->start_port; ++i) { ib_unregister_mad_agent(sa_dev->port[i].agent); - kref_put(&sa_dev->port[i].sm_ah->ref, free_sm_ah); + if (sa_dev->port[i].sm_ah) + kref_put(&sa_dev->port[i].sm_ah->ref, free_sm_ah); } kfree(sa_dev); diff --git a/drivers/infiniband/core/ucm.c b/drivers/infiniband/core/ucm.c index 9494005d1c9..e603736682b 100644 --- a/drivers/infiniband/core/ucm.c +++ b/drivers/infiniband/core/ucm.c @@ -43,7 +43,6 @@ #include <linux/cdev.h> #include <linux/idr.h> #include <linux/mutex.h> -#include <linux/smp_lock.h> #include <asm/uaccess.h> @@ -1154,11 +1153,18 @@ static unsigned int ib_ucm_poll(struct file *filp, return mask; } +/* + * ib_ucm_open() does not need the BKL: + * + * - no global state is referred to; + * - there is no ioctl method to race against; + * - no further module initialization is required for open to work + * after the device is registered. + */ static int ib_ucm_open(struct inode *inode, struct file *filp) { struct ib_ucm_file *file; - cycle_kernel_lock(); file = kmalloc(sizeof(*file), GFP_KERNEL); if (!file) return -ENOMEM; diff --git a/drivers/infiniband/core/ucma.c b/drivers/infiniband/core/ucma.c index 195f97302fe..b41dd26bbfa 100644 --- a/drivers/infiniband/core/ucma.c +++ b/drivers/infiniband/core/ucma.c @@ -38,7 +38,6 @@ #include <linux/in.h> #include <linux/in6.h> #include <linux/miscdevice.h> -#include <linux/smp_lock.h> #include <rdma/rdma_user_cm.h> #include <rdma/ib_marshall.h> @@ -1149,6 +1148,14 @@ static unsigned int ucma_poll(struct file *filp, struct poll_table_struct *wait) return mask; } +/* + * ucma_open() does not need the BKL: + * + * - no global state is referred to; + * - there is no ioctl method to race against; + * - no further module initialization is required for open to work + * after the device is registered. + */ static int ucma_open(struct inode *inode, struct file *filp) { struct ucma_file *file; @@ -1157,7 +1164,6 @@ static int ucma_open(struct inode *inode, struct file *filp) if (!file) return -ENOMEM; - lock_kernel(); INIT_LIST_HEAD(&file->event_list); INIT_LIST_HEAD(&file->ctx_list); init_waitqueue_head(&file->poll_wait); @@ -1165,7 +1171,6 @@ static int ucma_open(struct inode *inode, struct file *filp) filp->private_data = file; file->filp = filp; - unlock_kernel(); return 0; } diff --git a/drivers/infiniband/hw/ehca/ehca_classes.h b/drivers/infiniband/hw/ehca/ehca_classes.h index 1e9e99a1393..0b0618edd64 100644 --- a/drivers/infiniband/hw/ehca/ehca_classes.h +++ b/drivers/infiniband/hw/ehca/ehca_classes.h @@ -194,6 +194,7 @@ struct ehca_qp { u32 packet_count; atomic_t nr_events; /* events seen */ wait_queue_head_t wait_completion; + int mig_armed; }; #define IS_SRQ(qp) (qp->ext_type == EQPT_SRQ) diff --git a/drivers/infiniband/hw/ehca/ehca_hca.c b/drivers/infiniband/hw/ehca/ehca_hca.c index bc3b37d2070..46288220cfb 100644 --- a/drivers/infiniband/hw/ehca/ehca_hca.c +++ b/drivers/infiniband/hw/ehca/ehca_hca.c @@ -114,7 +114,9 @@ int ehca_query_device(struct ib_device *ibdev, struct ib_device_attr *props) } props->max_pkeys = 16; - props->local_ca_ack_delay = min_t(u8, rblock->local_ca_ack_delay, 255); + /* Some FW versions say 0 here; insert sensible value in that case */ + props->local_ca_ack_delay = rblock->local_ca_ack_delay ? + min_t(u8, rblock->local_ca_ack_delay, 255) : 12; props->max_raw_ipv6_qp = limit_uint(rblock->max_raw_ipv6_qp); props->max_raw_ethy_qp = limit_uint(rblock->max_raw_ethy_qp); props->max_mcast_grp = limit_uint(rblock->max_mcast_grp); diff --git a/drivers/infiniband/hw/ehca/ehca_irq.c b/drivers/infiniband/hw/ehca/ehca_irq.c index 0792d930c48..cb55be04442 100644 --- a/drivers/infiniband/hw/ehca/ehca_irq.c +++ b/drivers/infiniband/hw/ehca/ehca_irq.c @@ -178,6 +178,10 @@ static void dispatch_qp_event(struct ehca_shca *shca, struct ehca_qp *qp, { struct ib_event event; + /* PATH_MIG without the QP ever having been armed is false alarm */ + if (event_type == IB_EVENT_PATH_MIG && !qp->mig_armed) + return; + event.device = &shca->ib_device; event.event = event_type; @@ -646,8 +650,8 @@ static inline int find_next_online_cpu(struct ehca_comp_pool *pool) ehca_dmp(&cpu_online_map, sizeof(cpumask_t), ""); spin_lock_irqsave(&pool->last_cpu_lock, flags); - cpu = next_cpu(pool->last_cpu, cpu_online_map); - if (cpu == NR_CPUS) + cpu = next_cpu_nr(pool->last_cpu, cpu_online_map); + if (cpu >= nr_cpu_ids) cpu = first_cpu(cpu_online_map); pool->last_cpu = cpu; spin_unlock_irqrestore(&pool->last_cpu_lock, flags); diff --git a/drivers/infiniband/hw/ehca/ehca_qp.c b/drivers/infiniband/hw/ehca/ehca_qp.c index 3f59587338e..ea13efddf17 100644 --- a/drivers/infiniband/hw/ehca/ehca_qp.c +++ b/drivers/infiniband/hw/ehca/ehca_qp.c @@ -1460,6 +1460,8 @@ static int internal_modify_qp(struct ib_qp *ibqp, goto modify_qp_exit2; } mqpcb->path_migration_state = attr->path_mig_state + 1; + if (attr->path_mig_state == IB_MIG_REARM) + my_qp->mig_armed = 1; update_mask |= EHCA_BMASK_SET(MQPCB_MASK_PATH_MIGRATION_STATE, 1); } diff --git a/drivers/infiniband/hw/ehca/ipz_pt_fn.c b/drivers/infiniband/hw/ehca/ipz_pt_fn.c index 661f8db6270..c3a32846543 100644 --- a/drivers/infiniband/hw/ehca/ipz_pt_fn.c +++ b/drivers/infiniband/hw/ehca/ipz_pt_fn.c @@ -163,6 +163,7 @@ static int alloc_small_queue_page(struct ipz_queue *queue, struct ehca_pd *pd) out: ehca_err(pd->ib_pd.device, "failed to allocate small queue page"); + mutex_unlock(&pd->lock); return 0; } diff --git a/drivers/infiniband/hw/ipath/ipath_file_ops.c b/drivers/infiniband/hw/ipath/ipath_file_ops.c index 35f301c88b5..56c0eda3c07 100644 --- a/drivers/infiniband/hw/ipath/ipath_file_ops.c +++ b/drivers/infiniband/hw/ipath/ipath_file_ops.c @@ -2455,7 +2455,7 @@ static int init_cdev(int minor, char *name, const struct file_operations *fops, goto err_cdev; } - device = device_create(ipath_class, NULL, dev, name); + device = device_create_drvdata(ipath_class, NULL, dev, NULL, name); if (IS_ERR(device)) { ret = PTR_ERR(device); diff --git a/drivers/infiniband/hw/ipath/ipath_sdma.c b/drivers/infiniband/hw/ipath/ipath_sdma.c index eaba03273e4..284c9bca517 100644 --- a/drivers/infiniband/hw/ipath/ipath_sdma.c +++ b/drivers/infiniband/hw/ipath/ipath_sdma.c @@ -698,7 +698,7 @@ retry: addr = dma_map_single(&dd->pcidev->dev, tx->txreq.map_addr, tx->map_len, DMA_TO_DEVICE); - if (dma_mapping_error(addr)) { + if (dma_mapping_error(&dd->pcidev->dev, addr)) { ret = -EIO; goto unlock; } diff --git a/drivers/infiniband/hw/ipath/ipath_user_sdma.c b/drivers/infiniband/hw/ipath/ipath_user_sdma.c index 86e016916cd..82d9a0b5ca2 100644 --- a/drivers/infiniband/hw/ipath/ipath_user_sdma.c +++ b/drivers/infiniband/hw/ipath/ipath_user_sdma.c @@ -206,7 +206,7 @@ static int ipath_user_sdma_coalesce(const struct ipath_devdata *dd, dma_addr = dma_map_page(&dd->pcidev->dev, page, 0, len, DMA_TO_DEVICE); - if (dma_mapping_error(dma_addr)) { + if (dma_mapping_error(&dd->pcidev->dev, dma_addr)) { ret = -ENOMEM; goto free_unmap; } @@ -301,7 +301,7 @@ static int ipath_user_sdma_pin_pages(const struct ipath_devdata *dd, pages[j], 0, flen, DMA_TO_DEVICE); unsigned long fofs = addr & ~PAGE_MASK; - if (dma_mapping_error(dma_addr)) { + if (dma_mapping_error(&dd->pcidev->dev, dma_addr)) { ret = -ENOMEM; goto done; } @@ -508,7 +508,7 @@ static int ipath_user_sdma_queue_pkts(const struct ipath_devdata *dd, if (page) { dma_addr = dma_map_page(&dd->pcidev->dev, page, 0, len, DMA_TO_DEVICE); - if (dma_mapping_error(dma_addr)) { + if (dma_mapping_error(&dd->pcidev->dev, dma_addr)) { ret = -ENOMEM; goto free_pbc; } diff --git a/drivers/infiniband/hw/mlx4/cq.c b/drivers/infiniband/hw/mlx4/cq.c index 299f20832ab..a1464574bfd 100644 --- a/drivers/infiniband/hw/mlx4/cq.c +++ b/drivers/infiniband/hw/mlx4/cq.c @@ -1,5 +1,6 @@ /* * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2007, 2008 Mellanox Technologies. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -637,6 +638,7 @@ repoll: case MLX4_OPCODE_SEND_IMM: wc->wc_flags |= IB_WC_WITH_IMM; case MLX4_OPCODE_SEND: + case MLX4_OPCODE_SEND_INVAL: wc->opcode = IB_WC_SEND; break; case MLX4_OPCODE_RDMA_READ: @@ -657,6 +659,12 @@ repoll: case MLX4_OPCODE_LSO: wc->opcode = IB_WC_LSO; break; + case MLX4_OPCODE_FMR: + wc->opcode = IB_WC_FAST_REG_MR; + break; + case MLX4_OPCODE_LOCAL_INVAL: + wc->opcode = IB_WC_LOCAL_INV; + break; } } else { wc->byte_len = be32_to_cpu(cqe->byte_cnt); @@ -667,6 +675,11 @@ repoll: wc->wc_flags = IB_WC_WITH_IMM; wc->ex.imm_data = cqe->immed_rss_invalid; break; + case MLX4_RECV_OPCODE_SEND_INVAL: + wc->opcode = IB_WC_RECV; + wc->wc_flags = IB_WC_WITH_INVALIDATE; + wc->ex.invalidate_rkey = be32_to_cpu(cqe->immed_rss_invalid); + break; case MLX4_RECV_OPCODE_SEND: wc->opcode = IB_WC_RECV; wc->wc_flags = 0; diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index bcf50648fa1..a3c2851c054 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -1,5 +1,6 @@ /* * Copyright (c) 2006, 2007 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2007, 2008 Mellanox Technologies. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -104,6 +105,12 @@ static int mlx4_ib_query_device(struct ib_device *ibdev, props->device_cap_flags |= IB_DEVICE_UD_IP_CSUM; if (dev->dev->caps.max_gso_sz) props->device_cap_flags |= IB_DEVICE_UD_TSO; + if (dev->dev->caps.bmme_flags & MLX4_BMME_FLAG_RESERVED_LKEY) + props->device_cap_flags |= IB_DEVICE_LOCAL_DMA_LKEY; + if ((dev->dev->caps.bmme_flags & MLX4_BMME_FLAG_LOCAL_INV) && + (dev->dev->caps.bmme_flags & MLX4_BMME_FLAG_REMOTE_INV) && + (dev->dev->caps.bmme_flags & MLX4_BMME_FLAG_FAST_REG_WR)) + props->device_cap_flags |= IB_DEVICE_MEM_MGT_EXTENSIONS; props->vendor_id = be32_to_cpup((__be32 *) (out_mad->data + 36)) & 0xffffff; @@ -127,6 +134,7 @@ static int mlx4_ib_query_device(struct ib_device *ibdev, props->max_srq = dev->dev->caps.num_srqs - dev->dev->caps.reserved_srqs; props->max_srq_wr = dev->dev->caps.max_srq_wqes - 1; props->max_srq_sge = dev->dev->caps.max_srq_sge; + props->max_fast_reg_page_list_len = PAGE_SIZE / sizeof (u64); props->local_ca_ack_delay = dev->dev->caps.local_ca_ack_delay; props->atomic_cap = dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_ATOMIC ? IB_ATOMIC_HCA : IB_ATOMIC_NONE; @@ -565,6 +573,7 @@ static void *mlx4_ib_add(struct mlx4_dev *dev) strlcpy(ibdev->ib_dev.name, "mlx4_%d", IB_DEVICE_NAME_MAX); ibdev->ib_dev.owner = THIS_MODULE; ibdev->ib_dev.node_type = RDMA_NODE_IB_CA; + ibdev->ib_dev.local_dma_lkey = dev->caps.reserved_lkey; ibdev->ib_dev.phys_port_cnt = dev->caps.num_ports; ibdev->ib_dev.num_comp_vectors = 1; ibdev->ib_dev.dma_device = &dev->pdev->dev; @@ -627,6 +636,9 @@ static void *mlx4_ib_add(struct mlx4_dev *dev) ibdev->ib_dev.get_dma_mr = mlx4_ib_get_dma_mr; ibdev->ib_dev.reg_user_mr = mlx4_ib_reg_user_mr; ibdev->ib_dev.dereg_mr = mlx4_ib_dereg_mr; + ibdev->ib_dev.alloc_fast_reg_mr = mlx4_ib_alloc_fast_reg_mr; + ibdev->ib_dev.alloc_fast_reg_page_list = mlx4_ib_alloc_fast_reg_page_list; + ibdev->ib_dev.free_fast_reg_page_list = mlx4_ib_free_fast_reg_page_list; ibdev->ib_dev.attach_mcast = mlx4_ib_mcg_attach; ibdev->ib_dev.detach_mcast = mlx4_ib_mcg_detach; ibdev->ib_dev.process_mad = mlx4_ib_process_mad; diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h index c4cf5b69eef..6e2b0dc21b6 100644 --- a/drivers/infiniband/hw/mlx4/mlx4_ib.h +++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h @@ -1,5 +1,6 @@ /* * Copyright (c) 2006, 2007 Cisco Systems. All rights reserved. + * Copyright (c) 2007, 2008 Mellanox Technologies. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -83,6 +84,11 @@ struct mlx4_ib_mr { struct ib_umem *umem; }; +struct mlx4_ib_fast_reg_page_list { + struct ib_fast_reg_page_list ibfrpl; + dma_addr_t map; +}; + struct mlx4_ib_fmr { struct ib_fmr ibfmr; struct mlx4_fmr mfmr; @@ -199,6 +205,11 @@ static inline struct mlx4_ib_mr *to_mmr(struct ib_mr *ibmr) return container_of(ibmr, struct mlx4_ib_mr, ibmr); } +static inline struct mlx4_ib_fast_reg_page_list *to_mfrpl(struct ib_fast_reg_page_list *ibfrpl) +{ + return container_of(ibfrpl, struct mlx4_ib_fast_reg_page_list, ibfrpl); +} + static inline struct mlx4_ib_fmr *to_mfmr(struct ib_fmr *ibfmr) { return container_of(ibfmr, struct mlx4_ib_fmr, ibfmr); @@ -239,6 +250,11 @@ struct ib_mr *mlx4_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, u64 virt_addr, int access_flags, struct ib_udata *udata); int mlx4_ib_dereg_mr(struct ib_mr *mr); +struct ib_mr *mlx4_ib_alloc_fast_reg_mr(struct ib_pd *pd, + int max_page_list_len); +struct ib_fast_reg_page_list *mlx4_ib_alloc_fast_reg_page_list(struct ib_device *ibdev, + int page_list_len); +void mlx4_ib_free_fast_reg_page_list(struct ib_fast_reg_page_list *page_list); int mlx4_ib_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period); int mlx4_ib_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata); diff --git a/drivers/infiniband/hw/mlx4/mr.c b/drivers/infiniband/hw/mlx4/mr.c index 68e92485fc7..a4cdb465cd1 100644 --- a/drivers/infiniband/hw/mlx4/mr.c +++ b/drivers/infiniband/hw/mlx4/mr.c @@ -1,5 +1,6 @@ /* * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2007, 2008 Mellanox Technologies. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -183,6 +184,76 @@ int mlx4_ib_dereg_mr(struct ib_mr *ibmr) return 0; } +struct ib_mr *mlx4_ib_alloc_fast_reg_mr(struct ib_pd *pd, + int max_page_list_len) +{ + struct mlx4_ib_dev *dev = to_mdev(pd->device); + struct mlx4_ib_mr *mr; + int err; + + mr = kmalloc(sizeof *mr, GFP_KERNEL); + if (!mr) + return ERR_PTR(-ENOMEM); + + err = mlx4_mr_alloc(dev->dev, to_mpd(pd)->pdn, 0, 0, 0, + max_page_list_len, 0, &mr->mmr); + if (err) + goto err_free; + + err = mlx4_mr_enable(dev->dev, &mr->mmr); + if (err) + goto err_mr; + + return &mr->ibmr; + +err_mr: + mlx4_mr_free(dev->dev, &mr->mmr); + +err_free: + kfree(mr); + return ERR_PTR(err); +} + +struct ib_fast_reg_page_list *mlx4_ib_alloc_fast_reg_page_list(struct ib_device *ibdev, + int page_list_len) +{ + struct mlx4_ib_dev *dev = to_mdev(ibdev); + struct mlx4_ib_fast_reg_page_list *mfrpl; + int size = page_list_len * sizeof (u64); + + if (size > PAGE_SIZE) + return ERR_PTR(-EINVAL); + + mfrpl = kmalloc(sizeof *mfrpl, GFP_KERNEL); + if (!mfrpl) + return ERR_PTR(-ENOMEM); + + mfrpl->ibfrpl.page_list = dma_alloc_coherent(&dev->dev->pdev->dev, + size, &mfrpl->map, + GFP_KERNEL); + if (!mfrpl->ibfrpl.page_list) + goto err_free; + + WARN_ON(mfrpl->map & 0x3f); + + return &mfrpl->ibfrpl; + +err_free: + kfree(mfrpl); + return ERR_PTR(-ENOMEM); +} + +void mlx4_ib_free_fast_reg_page_list(struct ib_fast_reg_page_list *page_list) +{ + struct mlx4_ib_dev *dev = to_mdev(page_list->device); + struct mlx4_ib_fast_reg_page_list *mfrpl = to_mfrpl(page_list); + int size = page_list->max_page_list_len * sizeof (u64); + + dma_free_coherent(&dev->dev->pdev->dev, size, page_list->page_list, + mfrpl->map); + kfree(mfrpl); +} + struct ib_fmr *mlx4_ib_fmr_alloc(struct ib_pd *pd, int acc, struct ib_fmr_attr *fmr_attr) { diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c index 89eb6cbe592..f7bc7dd8578 100644 --- a/drivers/infiniband/hw/mlx4/qp.c +++ b/drivers/infiniband/hw/mlx4/qp.c @@ -1,5 +1,6 @@ /* * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2007, 2008 Mellanox Technologies. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -78,6 +79,9 @@ static const __be32 mlx4_ib_opcode[] = { [IB_WR_RDMA_READ] = __constant_cpu_to_be32(MLX4_OPCODE_RDMA_READ), [IB_WR_ATOMIC_CMP_AND_SWP] = __constant_cpu_to_be32(MLX4_OPCODE_ATOMIC_CS), [IB_WR_ATOMIC_FETCH_AND_ADD] = __constant_cpu_to_be32(MLX4_OPCODE_ATOMIC_FA), + [IB_WR_SEND_WITH_INV] = __constant_cpu_to_be32(MLX4_OPCODE_SEND_INVAL), + [IB_WR_LOCAL_INV] = __constant_cpu_to_be32(MLX4_OPCODE_LOCAL_INVAL), + [IB_WR_FAST_REG_MR] = __constant_cpu_to_be32(MLX4_OPCODE_FMR), }; static struct mlx4_ib_sqp *to_msqp(struct mlx4_ib_qp *mqp) @@ -976,6 +980,10 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp, context->pd = cpu_to_be32(to_mpd(ibqp->pd)->pdn); context->params1 = cpu_to_be32(MLX4_IB_ACK_REQ_FREQ << 28); + /* Set "fast registration enabled" for all kernel QPs */ + if (!qp->ibqp.uobject) + context->params1 |= cpu_to_be32(1 << 11); + if (attr_mask & IB_QP_RNR_RETRY) { context->params1 |= cpu_to_be32(attr->rnr_retry << 13); optpar |= MLX4_QP_OPTPAR_RNR_RETRY; @@ -1322,6 +1330,38 @@ static int mlx4_wq_overflow(struct mlx4_ib_wq *wq, int nreq, struct ib_cq *ib_cq return cur + nreq >= wq->max_post; } +static __be32 convert_access(int acc) +{ + return (acc & IB_ACCESS_REMOTE_ATOMIC ? cpu_to_be32(MLX4_WQE_FMR_PERM_ATOMIC) : 0) | + (acc & IB_ACCESS_REMOTE_WRITE ? cpu_to_be32(MLX4_WQE_FMR_PERM_REMOTE_WRITE) : 0) | + (acc & IB_ACCESS_REMOTE_READ ? cpu_to_be32(MLX4_WQE_FMR_PERM_REMOTE_READ) : 0) | + (acc & IB_ACCESS_LOCAL_WRITE ? cpu_to_be32(MLX4_WQE_FMR_PERM_LOCAL_WRITE) : 0) | + cpu_to_be32(MLX4_WQE_FMR_PERM_LOCAL_READ); +} + +static void set_fmr_seg(struct mlx4_wqe_fmr_seg *fseg, struct ib_send_wr *wr) +{ + struct mlx4_ib_fast_reg_page_list *mfrpl = to_mfrpl(wr->wr.fast_reg.page_list); + + fseg->flags = convert_access(wr->wr.fast_reg.access_flags); + fseg->mem_key = cpu_to_be32(wr->wr.fast_reg.rkey); + fseg->buf_list = cpu_to_be64(mfrpl->map); + fseg->start_addr = cpu_to_be64(wr->wr.fast_reg.iova_start); + fseg->reg_len = cpu_to_be64(wr->wr.fast_reg.length); + fseg->offset = 0; /* XXX -- is this just for ZBVA? */ + fseg->page_size = cpu_to_be32(wr->wr.fast_reg.page_shift); + fseg->reserved[0] = 0; + fseg->reserved[1] = 0; +} + +static void set_local_inv_seg(struct mlx4_wqe_local_inval_seg *iseg, u32 rkey) +{ + iseg->flags = 0; + iseg->mem_key = cpu_to_be32(rkey); + iseg->guest_id = 0; + iseg->pa = 0; +} + static __always_inline void set_raddr_seg(struct mlx4_wqe_raddr_seg *rseg, u64 remote_addr, u32 rkey) { @@ -1395,7 +1435,7 @@ static void __set_data_seg(struct mlx4_wqe_data_seg *dseg, struct ib_sge *sg) dseg->addr = cpu_to_be64(sg->addr); } -static int build_lso_seg(struct mlx4_lso_seg *wqe, struct ib_send_wr *wr, +static int build_lso_seg(struct mlx4_wqe_lso_seg *wqe, struct ib_send_wr *wr, struct mlx4_ib_qp *qp, unsigned *lso_seg_len) { unsigned halign = ALIGN(sizeof *wqe + wr->wr.ud.hlen, 16); @@ -1423,6 +1463,21 @@ static int build_lso_seg(struct mlx4_lso_seg *wqe, struct ib_send_wr *wr, return 0; } +static __be32 send_ieth(struct ib_send_wr *wr) +{ + switch (wr->opcode) { + case IB_WR_SEND_WITH_IMM: + case IB_WR_RDMA_WRITE_WITH_IMM: + return wr->ex.imm_data; + + case IB_WR_SEND_WITH_INV: + return cpu_to_be32(wr->ex.invalidate_rkey); + + default: + return 0; + } +} + int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, struct ib_send_wr **bad_wr) { @@ -1469,11 +1524,7 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, MLX4_WQE_CTRL_TCP_UDP_CSUM) : 0) | qp->sq_signal_bits; - if (wr->opcode == IB_WR_SEND_WITH_IMM || - wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM) - ctrl->imm = wr->ex.imm_data; - else - ctrl->imm = 0; + ctrl->imm = send_ieth(wr); wqe += sizeof *ctrl; size = sizeof *ctrl / 16; @@ -1505,6 +1556,18 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, size += sizeof (struct mlx4_wqe_raddr_seg) / 16; break; + case IB_WR_LOCAL_INV: + set_local_inv_seg(wqe, wr->ex.invalidate_rkey); + wqe += sizeof (struct mlx4_wqe_local_inval_seg); + size += sizeof (struct mlx4_wqe_local_inval_seg) / 16; + break; + + case IB_WR_FAST_REG_MR: + set_fmr_seg(wqe, wr); + wqe += sizeof (struct mlx4_wqe_fmr_seg); + size += sizeof (struct mlx4_wqe_fmr_seg) / 16; + break; + default: /* No extra segments required for sends */ break; diff --git a/drivers/infiniband/hw/mlx4/srq.c b/drivers/infiniband/hw/mlx4/srq.c index 12d6bc6f800..d42565258fb 100644 --- a/drivers/infiniband/hw/mlx4/srq.c +++ b/drivers/infiniband/hw/mlx4/srq.c @@ -1,5 +1,6 @@ /* * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2007, 2008 Mellanox Technologies. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU diff --git a/drivers/infiniband/hw/mlx4/user.h b/drivers/infiniband/hw/mlx4/user.h index e2d11be4525..13beedeeef9 100644 --- a/drivers/infiniband/hw/mlx4/user.h +++ b/drivers/infiniband/hw/mlx4/user.h @@ -1,5 +1,6 @@ /* * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2007, 2008 Mellanox Technologies. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU diff --git a/drivers/infiniband/hw/mthca/mthca_dev.h b/drivers/infiniband/hw/mthca/mthca_dev.h index ee4d073c889..252590116df 100644 --- a/drivers/infiniband/hw/mthca/mthca_dev.h +++ b/drivers/infiniband/hw/mthca/mthca_dev.h @@ -202,6 +202,7 @@ struct mthca_pd_table { struct mthca_buddy { unsigned long **bits; + int *num_free; int max_order; spinlock_t lock; }; diff --git a/drivers/infiniband/hw/mthca/mthca_eq.c b/drivers/infiniband/hw/mthca/mthca_eq.c index 4e36aa7cb3d..cc6858f0b65 100644 --- a/drivers/infiniband/hw/mthca/mthca_eq.c +++ b/drivers/infiniband/hw/mthca/mthca_eq.c @@ -780,7 +780,7 @@ int mthca_map_eq_icm(struct mthca_dev *dev, u64 icm_virt) return -ENOMEM; dev->eq_table.icm_dma = pci_map_page(dev->pdev, dev->eq_table.icm_page, 0, PAGE_SIZE, PCI_DMA_BIDIRECTIONAL); - if (pci_dma_mapping_error(dev->eq_table.icm_dma)) { + if (pci_dma_mapping_error(dev->pdev, dev->eq_table.icm_dma)) { __free_page(dev->eq_table.icm_page); return -ENOMEM; } diff --git a/drivers/infiniband/hw/mthca/mthca_mr.c b/drivers/infiniband/hw/mthca/mthca_mr.c index 8489b1e81c0..882e6b73591 100644 --- a/drivers/infiniband/hw/mthca/mthca_mr.c +++ b/drivers/infiniband/hw/mthca/mthca_mr.c @@ -89,23 +89,26 @@ static u32 mthca_buddy_alloc(struct mthca_buddy *buddy, int order) spin_lock(&buddy->lock); - for (o = order; o <= buddy->max_order; ++o) { - m = 1 << (buddy->max_order - o); - seg = find_first_bit(buddy->bits[o], m); - if (seg < m) - goto found; - } + for (o = order; o <= buddy->max_order; ++o) + if (buddy->num_free[o]) { + m = 1 << (buddy->max_order - o); + seg = find_first_bit(buddy->bits[o], m); + if (seg < m) + goto found; + } spin_unlock(&buddy->lock); return -1; found: clear_bit(seg, buddy->bits[o]); + --buddy->num_free[o]; while (o > order) { --o; seg <<= 1; set_bit(seg ^ 1, buddy->bits[o]); + ++buddy->num_free[o]; } spin_unlock(&buddy->lock); @@ -123,11 +126,13 @@ static void mthca_buddy_free(struct mthca_buddy *buddy, u32 seg, int order) while (test_bit(seg ^ 1, buddy->bits[order])) { clear_bit(seg ^ 1, buddy->bits[order]); + --buddy->num_free[order]; seg >>= 1; ++order; } set_bit(seg, buddy->bits[order]); + ++buddy->num_free[order]; spin_unlock(&buddy->lock); } @@ -141,7 +146,9 @@ static int mthca_buddy_init(struct mthca_buddy *buddy, int max_order) buddy->bits = kzalloc((buddy->max_order + 1) * sizeof (long *), GFP_KERNEL); - if (!buddy->bits) + buddy->num_free = kzalloc((buddy->max_order + 1) * sizeof (int *), + GFP_KERNEL); + if (!buddy->bits || !buddy->num_free) goto err_out; for (i = 0; i <= buddy->max_order; ++i) { @@ -154,6 +161,7 @@ static int mthca_buddy_init(struct mthca_buddy *buddy, int max_order) } set_bit(0, buddy->bits[buddy->max_order]); + buddy->num_free[buddy->max_order] = 1; return 0; @@ -161,9 +169,10 @@ err_out_free: for (i = 0; i <= buddy->max_order; ++i) kfree(buddy->bits[i]); +err_out: kfree(buddy->bits); + kfree(buddy->num_free); -err_out: return -ENOMEM; } @@ -175,6 +184,7 @@ static void mthca_buddy_cleanup(struct mthca_buddy *buddy) kfree(buddy->bits[i]); kfree(buddy->bits); + kfree(buddy->num_free); } static u32 mthca_alloc_mtt_range(struct mthca_dev *dev, int order, diff --git a/drivers/infiniband/hw/nes/nes.c b/drivers/infiniband/hw/nes/nes.c index d2884e77809..b0cab64e5e3 100644 --- a/drivers/infiniband/hw/nes/nes.c +++ b/drivers/infiniband/hw/nes/nes.c @@ -276,6 +276,7 @@ static void nes_cqp_rem_ref_callback(struct nes_device *nesdev, struct nes_cqp_r } nes_free_resource(nesadapter, nesadapter->allocated_qps, nesqp->hwqp.qp_id); + nesadapter->qp_table[nesqp->hwqp.qp_id-NES_FIRST_QPN] = NULL; kfree(nesqp->allocated_buffer); } @@ -289,7 +290,6 @@ void nes_rem_ref(struct ib_qp *ibqp) struct nes_qp *nesqp; struct nes_vnic *nesvnic = to_nesvnic(ibqp->device); struct nes_device *nesdev = nesvnic->nesdev; - struct nes_adapter *nesadapter = nesdev->nesadapter; struct nes_hw_cqp_wqe *cqp_wqe; struct nes_cqp_request *cqp_request; u32 opcode; @@ -303,8 +303,6 @@ void nes_rem_ref(struct ib_qp *ibqp) } if (atomic_dec_and_test(&nesqp->refcount)) { - nesadapter->qp_table[nesqp->hwqp.qp_id-NES_FIRST_QPN] = NULL; - /* Destroy the QP */ cqp_request = nes_get_cqp_request(nesdev); if (cqp_request == NULL) { diff --git a/drivers/infiniband/hw/nes/nes_cm.c b/drivers/infiniband/hw/nes/nes_cm.c index 6aa531d5276..9f0b964b2c9 100644 --- a/drivers/infiniband/hw/nes/nes_cm.c +++ b/drivers/infiniband/hw/nes/nes_cm.c @@ -74,36 +74,59 @@ atomic_t cm_nodes_destroyed; atomic_t cm_accel_dropped_pkts; atomic_t cm_resets_recvd; -static inline int mini_cm_accelerated(struct nes_cm_core *, struct nes_cm_node *); +static inline int mini_cm_accelerated(struct nes_cm_core *, + struct nes_cm_node *); static struct nes_cm_listener *mini_cm_listen(struct nes_cm_core *, - struct nes_vnic *, struct nes_cm_info *); -static int add_ref_cm_node(struct nes_cm_node *); -static int rem_ref_cm_node(struct nes_cm_core *, struct nes_cm_node *); + struct nes_vnic *, struct nes_cm_info *); static int mini_cm_del_listen(struct nes_cm_core *, struct nes_cm_listener *); -static struct sk_buff *form_cm_frame(struct sk_buff *, struct nes_cm_node *, - void *, u32, void *, u32, u8); -static struct sk_buff *get_free_pkt(struct nes_cm_node *cm_node); - static struct nes_cm_node *mini_cm_connect(struct nes_cm_core *, - struct nes_vnic *, - struct ietf_mpa_frame *, - struct nes_cm_info *); + struct nes_vnic *, u16, void *, struct nes_cm_info *); +static int mini_cm_close(struct nes_cm_core *, struct nes_cm_node *); static int mini_cm_accept(struct nes_cm_core *, struct ietf_mpa_frame *, - struct nes_cm_node *); + struct nes_cm_node *); static int mini_cm_reject(struct nes_cm_core *, struct ietf_mpa_frame *, - struct nes_cm_node *); -static int mini_cm_close(struct nes_cm_core *, struct nes_cm_node *); -static int mini_cm_recv_pkt(struct nes_cm_core *, struct nes_vnic *, - struct sk_buff *); + struct nes_cm_node *); +static void mini_cm_recv_pkt(struct nes_cm_core *, struct nes_vnic *, + struct sk_buff *); static int mini_cm_dealloc_core(struct nes_cm_core *); static int mini_cm_get(struct nes_cm_core *); static int mini_cm_set(struct nes_cm_core *, u32, u32); + +static struct sk_buff *form_cm_frame(struct sk_buff *, struct nes_cm_node *, + void *, u32, void *, u32, u8); +static struct sk_buff *get_free_pkt(struct nes_cm_node *cm_node); +static int add_ref_cm_node(struct nes_cm_node *); +static int rem_ref_cm_node(struct nes_cm_core *, struct nes_cm_node *); + static int nes_cm_disconn_true(struct nes_qp *); static int nes_cm_post_event(struct nes_cm_event *event); static int nes_disconnect(struct nes_qp *nesqp, int abrupt); static void nes_disconnect_worker(struct work_struct *work); -static int send_ack(struct nes_cm_node *cm_node); + +static int send_mpa_request(struct nes_cm_node *, struct sk_buff *); +static int send_syn(struct nes_cm_node *, u32, struct sk_buff *); +static int send_reset(struct nes_cm_node *, struct sk_buff *); +static int send_ack(struct nes_cm_node *cm_node, struct sk_buff *skb); static int send_fin(struct nes_cm_node *cm_node, struct sk_buff *skb); +static void process_packet(struct nes_cm_node *, struct sk_buff *, + struct nes_cm_core *); + +static void active_open_err(struct nes_cm_node *, struct sk_buff *, int); +static void passive_open_err(struct nes_cm_node *, struct sk_buff *, int); +static void cleanup_retrans_entry(struct nes_cm_node *); +static void handle_rcv_mpa(struct nes_cm_node *, struct sk_buff *, + enum nes_cm_event_type); +static void free_retrans_entry(struct nes_cm_node *cm_node); +static int handle_tcp_options(struct nes_cm_node *cm_node, struct tcphdr *tcph, + struct sk_buff *skb, int optionsize, int passive); + +/* CM event handler functions */ +static void cm_event_connected(struct nes_cm_event *); +static void cm_event_connect_error(struct nes_cm_event *); +static void cm_event_reset(struct nes_cm_event *); +static void cm_event_mpa_req(struct nes_cm_event *); + +static void print_core(struct nes_cm_core *core); /* External CM API Interface */ /* instance of function pointers for client API */ @@ -158,11 +181,11 @@ static struct nes_cm_event *create_event(struct nes_cm_node *cm_node, event->cm_info.loc_port = cm_node->loc_port; event->cm_info.cm_id = cm_node->cm_id; - nes_debug(NES_DBG_CM, "Created event=%p, type=%u, dst_addr=%08x[%x]," - " src_addr=%08x[%x]\n", - event, type, - event->cm_info.loc_addr, event->cm_info.loc_port, - event->cm_info.rem_addr, event->cm_info.rem_port); + nes_debug(NES_DBG_CM, "cm_node=%p Created event=%p, type=%u, " + "dst_addr=%08x[%x], src_addr=%08x[%x]\n", + cm_node, event, type, event->cm_info.loc_addr, + event->cm_info.loc_port, event->cm_info.rem_addr, + event->cm_info.rem_port); nes_cm_post_event(event); return event; @@ -172,14 +195,11 @@ static struct nes_cm_event *create_event(struct nes_cm_node *cm_node, /** * send_mpa_request */ -static int send_mpa_request(struct nes_cm_node *cm_node) +static int send_mpa_request(struct nes_cm_node *cm_node, struct sk_buff *skb) { - struct sk_buff *skb; int ret; - - skb = get_free_pkt(cm_node); if (!skb) { - nes_debug(NES_DBG_CM, "Failed to get a Free pkt\n"); + nes_debug(NES_DBG_CM, "skb set to NULL\n"); return -1; } @@ -188,9 +208,8 @@ static int send_mpa_request(struct nes_cm_node *cm_node) cm_node->mpa_frame_size, SET_ACK); ret = schedule_nes_timer(cm_node, skb, NES_TIMER_TYPE_SEND, 1, 0); - if (ret < 0) { + if (ret < 0) return ret; - } return 0; } @@ -229,46 +248,12 @@ static int parse_mpa(struct nes_cm_node *cm_node, u8 *buffer, u32 len) /** - * handle_exception_pkt - process an exception packet. - * We have been in a TSA state, and we have now received SW - * TCP/IP traffic should be a FIN request or IP pkt with options - */ -static int handle_exception_pkt(struct nes_cm_node *cm_node, struct sk_buff *skb) -{ - int ret = 0; - struct tcphdr *tcph = tcp_hdr(skb); - - /* first check to see if this a FIN pkt */ - if (tcph->fin) { - /* we need to ACK the FIN request */ - send_ack(cm_node); - - /* check which side we are (client/server) and set next state accordingly */ - if (cm_node->tcp_cntxt.client) - cm_node->state = NES_CM_STATE_CLOSING; - else { - /* we are the server side */ - cm_node->state = NES_CM_STATE_CLOSE_WAIT; - /* since this is a self contained CM we don't wait for */ - /* an APP to close us, just send final FIN immediately */ - ret = send_fin(cm_node, NULL); - cm_node->state = NES_CM_STATE_LAST_ACK; - } - } else { - ret = -EINVAL; - } - - return ret; -} - - -/** * form_cm_frame - get a free packet and build empty frame Use * node info to build. */ -static struct sk_buff *form_cm_frame(struct sk_buff *skb, struct nes_cm_node *cm_node, - void *options, u32 optionsize, void *data, - u32 datasize, u8 flags) +static struct sk_buff *form_cm_frame(struct sk_buff *skb, + struct nes_cm_node *cm_node, void *options, u32 optionsize, + void *data, u32 datasize, u8 flags) { struct tcphdr *tcph; struct iphdr *iph; @@ -332,10 +317,12 @@ static struct sk_buff *form_cm_frame(struct sk_buff *skb, struct nes_cm_node *cm cm_node->tcp_cntxt.loc_seq_num++; tcph->syn = 1; } else - cm_node->tcp_cntxt.loc_seq_num += datasize; /* data (no headers) */ + cm_node->tcp_cntxt.loc_seq_num += datasize; - if (flags & SET_FIN) + if (flags & SET_FIN) { + cm_node->tcp_cntxt.loc_seq_num++; tcph->fin = 1; + } if (flags & SET_RST) tcph->rst = 1; @@ -389,7 +376,7 @@ int schedule_nes_timer(struct nes_cm_node *cm_node, struct sk_buff *skb, int close_when_complete) { unsigned long flags; - struct nes_cm_core *cm_core; + struct nes_cm_core *cm_core = cm_node->cm_core; struct nes_timer_entry *new_send; int ret = 0; u32 was_timer_set; @@ -411,7 +398,7 @@ int schedule_nes_timer(struct nes_cm_node *cm_node, struct sk_buff *skb, new_send->close_when_complete = close_when_complete; if (type == NES_TIMER_TYPE_CLOSE) { - new_send->timetosend += (HZ/2); /* TODO: decide on the correct value here */ + new_send->timetosend += (HZ/10); spin_lock_irqsave(&cm_node->recv_list_lock, flags); list_add_tail(&new_send->list, &cm_node->recv_list); spin_unlock_irqrestore(&cm_node->recv_list_lock, flags); @@ -420,36 +407,28 @@ int schedule_nes_timer(struct nes_cm_node *cm_node, struct sk_buff *skb, if (type == NES_TIMER_TYPE_SEND) { new_send->seq_num = ntohl(tcp_hdr(skb)->seq); atomic_inc(&new_send->skb->users); + spin_lock_irqsave(&cm_node->retrans_list_lock, flags); + cm_node->send_entry = new_send; + add_ref_cm_node(cm_node); + spin_unlock_irqrestore(&cm_node->retrans_list_lock, flags); + new_send->timetosend = jiffies + NES_RETRY_TIMEOUT; ret = nes_nic_cm_xmit(new_send->skb, cm_node->netdev); if (ret != NETDEV_TX_OK) { - nes_debug(NES_DBG_CM, "Error sending packet %p (jiffies = %lu)\n", - new_send, jiffies); + nes_debug(NES_DBG_CM, "Error sending packet %p " + "(jiffies = %lu)\n", new_send, jiffies); atomic_dec(&new_send->skb->users); new_send->timetosend = jiffies; } else { cm_packets_sent++; if (!send_retrans) { + cleanup_retrans_entry(cm_node); if (close_when_complete) - rem_ref_cm_node(cm_node->cm_core, cm_node); - dev_kfree_skb_any(new_send->skb); - kfree(new_send); + rem_ref_cm_node(cm_core, cm_node); return ret; } - new_send->timetosend = jiffies + NES_RETRY_TIMEOUT; } - spin_lock_irqsave(&cm_node->retrans_list_lock, flags); - list_add_tail(&new_send->list, &cm_node->retrans_list); - spin_unlock_irqrestore(&cm_node->retrans_list_lock, flags); - } - if (type == NES_TIMER_TYPE_RECV) { - new_send->seq_num = ntohl(tcp_hdr(skb)->seq); - new_send->timetosend = jiffies; - spin_lock_irqsave(&cm_node->recv_list_lock, flags); - list_add_tail(&new_send->list, &cm_node->recv_list); - spin_unlock_irqrestore(&cm_node->recv_list_lock, flags); } - cm_core = cm_node->cm_core; was_timer_set = timer_pending(&cm_core->tcp_timer); @@ -476,23 +455,27 @@ static void nes_cm_timer_tick(unsigned long pass) struct list_head *list_node, *list_node_temp; struct nes_cm_core *cm_core = g_cm_core; struct nes_qp *nesqp; - struct sk_buff *skb; u32 settimer = 0; int ret = NETDEV_TX_OK; - int node_done; + enum nes_cm_node_state last_state; spin_lock_irqsave(&cm_core->ht_lock, flags); - list_for_each_safe(list_node, list_core_temp, &cm_core->connected_nodes) { + list_for_each_safe(list_node, list_core_temp, + &cm_core->connected_nodes) { cm_node = container_of(list_node, struct nes_cm_node, list); add_ref_cm_node(cm_node); spin_unlock_irqrestore(&cm_core->ht_lock, flags); spin_lock_irqsave(&cm_node->recv_list_lock, flags); - list_for_each_safe(list_core, list_node_temp, &cm_node->recv_list) { - recv_entry = container_of(list_core, struct nes_timer_entry, list); - if ((time_after(recv_entry->timetosend, jiffies)) && - (recv_entry->type == NES_TIMER_TYPE_CLOSE)) { - if (nexttimeout > recv_entry->timetosend || !settimer) { + list_for_each_safe(list_core, list_node_temp, + &cm_node->recv_list) { + recv_entry = container_of(list_core, + struct nes_timer_entry, list); + if (!recv_entry) + break; + if (time_after(recv_entry->timetosend, jiffies)) { + if (nexttimeout > recv_entry->timetosend || + !settimer) { nexttimeout = recv_entry->timetosend; settimer = 1; } @@ -501,157 +484,143 @@ static void nes_cm_timer_tick(unsigned long pass) list_del(&recv_entry->list); cm_id = cm_node->cm_id; spin_unlock_irqrestore(&cm_node->recv_list_lock, flags); - if (recv_entry->type == NES_TIMER_TYPE_CLOSE) { - nesqp = (struct nes_qp *)recv_entry->skb; - spin_lock_irqsave(&nesqp->lock, qplockflags); - if (nesqp->cm_id) { - nes_debug(NES_DBG_CM, "QP%u: cm_id = %p, refcount = %d: " - "****** HIT A NES_TIMER_TYPE_CLOSE" - " with something to do!!! ******\n", - nesqp->hwqp.qp_id, cm_id, - atomic_read(&nesqp->refcount)); - nesqp->hw_tcp_state = NES_AEQE_TCP_STATE_CLOSED; - nesqp->last_aeq = NES_AEQE_AEID_RESET_SENT; - nesqp->ibqp_state = IB_QPS_ERR; - spin_unlock_irqrestore(&nesqp->lock, qplockflags); - nes_cm_disconn(nesqp); - } else { - spin_unlock_irqrestore(&nesqp->lock, qplockflags); - nes_debug(NES_DBG_CM, "QP%u: cm_id = %p, refcount = %d:" - " ****** HIT A NES_TIMER_TYPE_CLOSE" - " with nothing to do!!! ******\n", - nesqp->hwqp.qp_id, cm_id, - atomic_read(&nesqp->refcount)); - nes_rem_ref(&nesqp->ibqp); - } - if (cm_id) - cm_id->rem_ref(cm_id); + nesqp = (struct nes_qp *)recv_entry->skb; + spin_lock_irqsave(&nesqp->lock, qplockflags); + if (nesqp->cm_id) { + nes_debug(NES_DBG_CM, "QP%u: cm_id = %p, " + "refcount = %d: HIT A " + "NES_TIMER_TYPE_CLOSE with something " + "to do!!!\n", nesqp->hwqp.qp_id, cm_id, + atomic_read(&nesqp->refcount)); + nesqp->hw_tcp_state = NES_AEQE_TCP_STATE_CLOSED; + nesqp->last_aeq = NES_AEQE_AEID_RESET_SENT; + nesqp->ibqp_state = IB_QPS_ERR; + spin_unlock_irqrestore(&nesqp->lock, + qplockflags); + nes_cm_disconn(nesqp); + } else { + spin_unlock_irqrestore(&nesqp->lock, + qplockflags); + nes_debug(NES_DBG_CM, "QP%u: cm_id = %p, " + "refcount = %d: HIT A " + "NES_TIMER_TYPE_CLOSE with nothing " + "to do!!!\n", nesqp->hwqp.qp_id, cm_id, + atomic_read(&nesqp->refcount)); } + if (cm_id) + cm_id->rem_ref(cm_id); + kfree(recv_entry); spin_lock_irqsave(&cm_node->recv_list_lock, flags); } spin_unlock_irqrestore(&cm_node->recv_list_lock, flags); spin_lock_irqsave(&cm_node->retrans_list_lock, flags); - node_done = 0; - list_for_each_safe(list_core, list_node_temp, &cm_node->retrans_list) { - if (node_done) { - break; - } - send_entry = container_of(list_core, struct nes_timer_entry, list); + do { + send_entry = cm_node->send_entry; + if (!send_entry) + continue; if (time_after(send_entry->timetosend, jiffies)) { if (cm_node->state != NES_CM_STATE_TSA) { - if ((nexttimeout > send_entry->timetosend) || !settimer) { - nexttimeout = send_entry->timetosend; + if ((nexttimeout > + send_entry->timetosend) || + !settimer) { + nexttimeout = + send_entry->timetosend; settimer = 1; + continue; } - node_done = 1; - continue; } else { - list_del(&send_entry->list); - skb = send_entry->skb; - spin_unlock_irqrestore(&cm_node->retrans_list_lock, flags); - dev_kfree_skb_any(skb); - kfree(send_entry); - spin_lock_irqsave(&cm_node->retrans_list_lock, flags); + free_retrans_entry(cm_node); continue; } } - if (send_entry->type == NES_TIMER_NODE_CLEANUP) { - list_del(&send_entry->list); - spin_unlock_irqrestore(&cm_node->retrans_list_lock, flags); - kfree(send_entry); - spin_lock_irqsave(&cm_node->retrans_list_lock, flags); - continue; - } - if ((send_entry->seq_num < cm_node->tcp_cntxt.rem_ack_num) || - (cm_node->state == NES_CM_STATE_TSA) || - (cm_node->state == NES_CM_STATE_CLOSED)) { - skb = send_entry->skb; - list_del(&send_entry->list); - spin_unlock_irqrestore(&cm_node->retrans_list_lock, flags); - kfree(send_entry); - dev_kfree_skb_any(skb); - spin_lock_irqsave(&cm_node->retrans_list_lock, flags); + + if ((cm_node->state == NES_CM_STATE_TSA) || + (cm_node->state == NES_CM_STATE_CLOSED)) { + free_retrans_entry(cm_node); continue; } - if (!send_entry->retranscount || !send_entry->retrycount) { + if (!send_entry->retranscount || + !send_entry->retrycount) { cm_packets_dropped++; - skb = send_entry->skb; - list_del(&send_entry->list); - spin_unlock_irqrestore(&cm_node->retrans_list_lock, flags); - dev_kfree_skb_any(skb); - kfree(send_entry); - if (cm_node->state == NES_CM_STATE_SYN_RCVD) { - /* this node never even generated an indication up to the cm */ + last_state = cm_node->state; + cm_node->state = NES_CM_STATE_CLOSED; + free_retrans_entry(cm_node); + spin_unlock_irqrestore( + &cm_node->retrans_list_lock, flags); + if (last_state == NES_CM_STATE_SYN_RCVD) rem_ref_cm_node(cm_core, cm_node); - } else { - cm_node->state = NES_CM_STATE_CLOSED; - create_event(cm_node, NES_CM_EVENT_ABORTED); - } - spin_lock_irqsave(&cm_node->retrans_list_lock, flags); + else + create_event(cm_node, + NES_CM_EVENT_ABORTED); + spin_lock_irqsave(&cm_node->retrans_list_lock, + flags); continue; } - /* this seems like the correct place, but leave send entry unprotected */ - /* spin_unlock_irqrestore(&cm_node->retrans_list_lock, flags); */ atomic_inc(&send_entry->skb->users); cm_packets_retrans++; - nes_debug(NES_DBG_CM, "Retransmitting send_entry %p for node %p," - " jiffies = %lu, time to send = %lu, retranscount = %u, " - "send_entry->seq_num = 0x%08X, cm_node->tcp_cntxt.rem_ack_num = 0x%08X\n", - send_entry, cm_node, jiffies, send_entry->timetosend, send_entry->retranscount, - send_entry->seq_num, cm_node->tcp_cntxt.rem_ack_num); - - spin_unlock_irqrestore(&cm_node->retrans_list_lock, flags); + nes_debug(NES_DBG_CM, "Retransmitting send_entry %p " + "for node %p, jiffies = %lu, time to send = " + "%lu, retranscount = %u, send_entry->seq_num = " + "0x%08X, cm_node->tcp_cntxt.rem_ack_num = " + "0x%08X\n", send_entry, cm_node, jiffies, + send_entry->timetosend, + send_entry->retranscount, + send_entry->seq_num, + cm_node->tcp_cntxt.rem_ack_num); + + spin_unlock_irqrestore(&cm_node->retrans_list_lock, + flags); ret = nes_nic_cm_xmit(send_entry->skb, cm_node->netdev); + spin_lock_irqsave(&cm_node->retrans_list_lock, flags); if (ret != NETDEV_TX_OK) { + nes_debug(NES_DBG_CM, "rexmit failed for " + "node=%p\n", cm_node); cm_packets_bounced++; atomic_dec(&send_entry->skb->users); send_entry->retrycount--; nexttimeout = jiffies + NES_SHORT_TIME; settimer = 1; - node_done = 1; - spin_lock_irqsave(&cm_node->retrans_list_lock, flags); continue; } else { cm_packets_sent++; } - spin_lock_irqsave(&cm_node->retrans_list_lock, flags); - list_del(&send_entry->list); - nes_debug(NES_DBG_CM, "Packet Sent: retrans count = %u, retry count = %u.\n", - send_entry->retranscount, send_entry->retrycount); + nes_debug(NES_DBG_CM, "Packet Sent: retrans count = " + "%u, retry count = %u.\n", + send_entry->retranscount, + send_entry->retrycount); if (send_entry->send_retrans) { send_entry->retranscount--; - send_entry->timetosend = jiffies + NES_RETRY_TIMEOUT; - if (nexttimeout > send_entry->timetosend || !settimer) { + send_entry->timetosend = jiffies + + NES_RETRY_TIMEOUT; + if (nexttimeout > send_entry->timetosend || + !settimer) { nexttimeout = send_entry->timetosend; settimer = 1; } - list_add(&send_entry->list, &cm_node->retrans_list); - continue; } else { int close_when_complete; - skb = send_entry->skb; - close_when_complete = send_entry->close_when_complete; - spin_unlock_irqrestore(&cm_node->retrans_list_lock, flags); - if (close_when_complete) { - BUG_ON(atomic_read(&cm_node->ref_count) == 1); - rem_ref_cm_node(cm_core, cm_node); - } - dev_kfree_skb_any(skb); - kfree(send_entry); - spin_lock_irqsave(&cm_node->retrans_list_lock, flags); - continue; + close_when_complete = + send_entry->close_when_complete; + nes_debug(NES_DBG_CM, "cm_node=%p state=%d\n", + cm_node, cm_node->state); + free_retrans_entry(cm_node); + if (close_when_complete) + rem_ref_cm_node(cm_node->cm_core, + cm_node); } - } - spin_unlock_irqrestore(&cm_node->retrans_list_lock, flags); - - rem_ref_cm_node(cm_core, cm_node); + } while (0); + spin_unlock_irqrestore(&cm_node->retrans_list_lock, flags); + rem_ref_cm_node(cm_node->cm_core, cm_node); spin_lock_irqsave(&cm_core->ht_lock, flags); - if (ret != NETDEV_TX_OK) + if (ret != NETDEV_TX_OK) { + nes_debug(NES_DBG_CM, "rexmit failed for cm_node=%p\n", + cm_node); break; + } } spin_unlock_irqrestore(&cm_core->ht_lock, flags); @@ -667,14 +636,14 @@ static void nes_cm_timer_tick(unsigned long pass) /** * send_syn */ -static int send_syn(struct nes_cm_node *cm_node, u32 sendack) +static int send_syn(struct nes_cm_node *cm_node, u32 sendack, + struct sk_buff *skb) { int ret; int flags = SET_SYN; - struct sk_buff *skb; char optionsbuffer[sizeof(struct option_mss) + - sizeof(struct option_windowscale) + - sizeof(struct option_base) + 1]; + sizeof(struct option_windowscale) + sizeof(struct option_base) + + TCP_OPTIONS_PADDING]; int optionssize = 0; /* Sending MSS option */ @@ -695,8 +664,7 @@ static int send_syn(struct nes_cm_node *cm_node, u32 sendack) options->as_windowscale.shiftcount = cm_node->tcp_cntxt.rcv_wscale; optionssize += sizeof(struct option_windowscale); - if (sendack && !(NES_DRV_OPT_SUPRESS_OPTION_BC & nes_drv_opt) - ) { + if (sendack && !(NES_DRV_OPT_SUPRESS_OPTION_BC & nes_drv_opt)) { options = (union all_known_options *)&optionsbuffer[optionssize]; options->as_base.optionnum = OPTION_NUMBER_WRITE0; options->as_base.length = sizeof(struct option_base); @@ -714,7 +682,8 @@ static int send_syn(struct nes_cm_node *cm_node, u32 sendack) options->as_end = OPTION_NUMBER_END; optionssize += 1; - skb = get_free_pkt(cm_node); + if (!skb) + skb = get_free_pkt(cm_node); if (!skb) { nes_debug(NES_DBG_CM, "Failed to get a Free pkt\n"); return -1; @@ -733,18 +702,18 @@ static int send_syn(struct nes_cm_node *cm_node, u32 sendack) /** * send_reset */ -static int send_reset(struct nes_cm_node *cm_node) +static int send_reset(struct nes_cm_node *cm_node, struct sk_buff *skb) { int ret; - struct sk_buff *skb = get_free_pkt(cm_node); int flags = SET_RST | SET_ACK; + if (!skb) + skb = get_free_pkt(cm_node); if (!skb) { nes_debug(NES_DBG_CM, "Failed to get a Free pkt\n"); return -1; } - add_ref_cm_node(cm_node); form_cm_frame(skb, cm_node, NULL, 0, NULL, 0, flags); ret = schedule_nes_timer(cm_node, skb, NES_TIMER_TYPE_SEND, 0, 1); @@ -755,10 +724,12 @@ static int send_reset(struct nes_cm_node *cm_node) /** * send_ack */ -static int send_ack(struct nes_cm_node *cm_node) +static int send_ack(struct nes_cm_node *cm_node, struct sk_buff *skb) { int ret; - struct sk_buff *skb = get_free_pkt(cm_node); + + if (!skb) + skb = get_free_pkt(cm_node); if (!skb) { nes_debug(NES_DBG_CM, "Failed to get a Free pkt\n"); @@ -922,7 +893,8 @@ static int add_hte_node(struct nes_cm_core *cm_core, struct nes_cm_node *cm_node if (!cm_node || !cm_core) return -EINVAL; - nes_debug(NES_DBG_CM, "Adding Node to Active Connection HT\n"); + nes_debug(NES_DBG_CM, "Adding Node %p to Active Connection HT\n", + cm_node); /* first, make an index into our hash table */ hashkey = make_hashkey(cm_node->loc_port, cm_node->loc_addr, @@ -946,10 +918,35 @@ static int add_hte_node(struct nes_cm_core *cm_core, struct nes_cm_node *cm_node * mini_cm_dec_refcnt_listen */ static int mini_cm_dec_refcnt_listen(struct nes_cm_core *cm_core, - struct nes_cm_listener *listener, int free_hanging_nodes) + struct nes_cm_listener *listener, int free_hanging_nodes) { int ret = 1; unsigned long flags; + struct list_head *list_pos = NULL; + struct list_head *list_temp = NULL; + struct nes_cm_node *cm_node = NULL; + + nes_debug(NES_DBG_CM, "attempting listener= %p free_nodes= %d, " + "refcnt=%d\n", listener, free_hanging_nodes, + atomic_read(&listener->ref_count)); + /* free non-accelerated child nodes for this listener */ + if (free_hanging_nodes) { + spin_lock_irqsave(&cm_core->ht_lock, flags); + list_for_each_safe(list_pos, list_temp, + &g_cm_core->connected_nodes) { + cm_node = container_of(list_pos, struct nes_cm_node, + list); + if ((cm_node->listener == listener) && + (!cm_node->accelerated)) { + cleanup_retrans_entry(cm_node); + spin_unlock_irqrestore(&cm_core->ht_lock, + flags); + send_reset(cm_node, NULL); + spin_lock_irqsave(&cm_core->ht_lock, flags); + } + } + spin_unlock_irqrestore(&cm_core->ht_lock, flags); + } spin_lock_irqsave(&cm_core->listen_list_lock, flags); if (!atomic_dec_return(&listener->ref_count)) { list_del(&listener->list); @@ -1067,18 +1064,18 @@ static struct nes_cm_node *make_cm_node(struct nes_cm_core *cm_core, cm_node->loc_port = cm_info->loc_port; cm_node->rem_port = cm_info->rem_port; cm_node->send_write0 = send_first; - nes_debug(NES_DBG_CM, "Make node addresses : loc = " NIPQUAD_FMT ":%x, rem = " NIPQUAD_FMT ":%x\n", - HIPQUAD(cm_node->loc_addr), cm_node->loc_port, - HIPQUAD(cm_node->rem_addr), cm_node->rem_port); + nes_debug(NES_DBG_CM, "Make node addresses : loc = " NIPQUAD_FMT + ":%x, rem = " NIPQUAD_FMT ":%x\n", + HIPQUAD(cm_node->loc_addr), cm_node->loc_port, + HIPQUAD(cm_node->rem_addr), cm_node->rem_port); cm_node->listener = listener; cm_node->netdev = nesvnic->netdev; cm_node->cm_id = cm_info->cm_id; memcpy(cm_node->loc_mac, nesvnic->netdev->dev_addr, ETH_ALEN); - nes_debug(NES_DBG_CM, "listener=%p, cm_id=%p\n", - cm_node->listener, cm_node->cm_id); + nes_debug(NES_DBG_CM, "listener=%p, cm_id=%p\n", cm_node->listener, + cm_node->cm_id); - INIT_LIST_HEAD(&cm_node->retrans_list); spin_lock_init(&cm_node->retrans_list_lock); INIT_LIST_HEAD(&cm_node->recv_list); spin_lock_init(&cm_node->recv_list_lock); @@ -1142,10 +1139,9 @@ static int add_ref_cm_node(struct nes_cm_node *cm_node) * rem_ref_cm_node - destroy an instance of a cm node */ static int rem_ref_cm_node(struct nes_cm_core *cm_core, - struct nes_cm_node *cm_node) + struct nes_cm_node *cm_node) { unsigned long flags, qplockflags; - struct nes_timer_entry *send_entry; struct nes_timer_entry *recv_entry; struct iw_cm_id *cm_id; struct list_head *list_core, *list_node_temp; @@ -1169,48 +1165,33 @@ static int rem_ref_cm_node(struct nes_cm_core *cm_core, atomic_dec(&cm_node->listener->pend_accepts_cnt); BUG_ON(atomic_read(&cm_node->listener->pend_accepts_cnt) < 0); } - - spin_lock_irqsave(&cm_node->retrans_list_lock, flags); - list_for_each_safe(list_core, list_node_temp, &cm_node->retrans_list) { - send_entry = container_of(list_core, struct nes_timer_entry, list); - list_del(&send_entry->list); - spin_unlock_irqrestore(&cm_node->retrans_list_lock, flags); - dev_kfree_skb_any(send_entry->skb); - kfree(send_entry); - spin_lock_irqsave(&cm_node->retrans_list_lock, flags); - continue; - } - spin_unlock_irqrestore(&cm_node->retrans_list_lock, flags); - + BUG_ON(cm_node->send_entry); spin_lock_irqsave(&cm_node->recv_list_lock, flags); list_for_each_safe(list_core, list_node_temp, &cm_node->recv_list) { - recv_entry = container_of(list_core, struct nes_timer_entry, list); + recv_entry = container_of(list_core, struct nes_timer_entry, + list); list_del(&recv_entry->list); cm_id = cm_node->cm_id; spin_unlock_irqrestore(&cm_node->recv_list_lock, flags); - if (recv_entry->type == NES_TIMER_TYPE_CLOSE) { - nesqp = (struct nes_qp *)recv_entry->skb; - spin_lock_irqsave(&nesqp->lock, qplockflags); - if (nesqp->cm_id) { - nes_debug(NES_DBG_CM, "QP%u: cm_id = %p: ****** HIT A NES_TIMER_TYPE_CLOSE" - " with something to do!!! ******\n", - nesqp->hwqp.qp_id, cm_id); - nesqp->hw_tcp_state = NES_AEQE_TCP_STATE_CLOSED; - nesqp->last_aeq = NES_AEQE_AEID_RESET_SENT; - nesqp->ibqp_state = IB_QPS_ERR; - spin_unlock_irqrestore(&nesqp->lock, qplockflags); - nes_cm_disconn(nesqp); - } else { - spin_unlock_irqrestore(&nesqp->lock, qplockflags); - nes_debug(NES_DBG_CM, "QP%u: cm_id = %p: ****** HIT A NES_TIMER_TYPE_CLOSE" - " with nothing to do!!! ******\n", - nesqp->hwqp.qp_id, cm_id); - nes_rem_ref(&nesqp->ibqp); - } - cm_id->rem_ref(cm_id); - } else if (recv_entry->type == NES_TIMER_TYPE_RECV) { - dev_kfree_skb_any(recv_entry->skb); + nesqp = (struct nes_qp *)recv_entry->skb; + spin_lock_irqsave(&nesqp->lock, qplockflags); + if (nesqp->cm_id) { + nes_debug(NES_DBG_CM, "QP%u: cm_id = %p: HIT A " + "NES_TIMER_TYPE_CLOSE with something to do!\n", + nesqp->hwqp.qp_id, cm_id); + nesqp->hw_tcp_state = NES_AEQE_TCP_STATE_CLOSED; + nesqp->last_aeq = NES_AEQE_AEID_RESET_SENT; + nesqp->ibqp_state = IB_QPS_ERR; + spin_unlock_irqrestore(&nesqp->lock, qplockflags); + nes_cm_disconn(nesqp); + } else { + spin_unlock_irqrestore(&nesqp->lock, qplockflags); + nes_debug(NES_DBG_CM, "QP%u: cm_id = %p: HIT A " + "NES_TIMER_TYPE_CLOSE with nothing to do!\n", + nesqp->hwqp.qp_id, cm_id); } + cm_id->rem_ref(cm_id); + kfree(recv_entry); spin_lock_irqsave(&cm_node->recv_list_lock, flags); } @@ -1221,23 +1202,31 @@ static int rem_ref_cm_node(struct nes_cm_core *cm_core, } else { if (cm_node->apbvt_set && cm_node->nesvnic) { nes_manage_apbvt(cm_node->nesvnic, cm_node->loc_port, - PCI_FUNC(cm_node->nesvnic->nesdev->pcidev->devfn), - NES_MANAGE_APBVT_DEL); + PCI_FUNC( + cm_node->nesvnic->nesdev->pcidev->devfn), + NES_MANAGE_APBVT_DEL); } } - kfree(cm_node); atomic_dec(&cm_core->node_cnt); atomic_inc(&cm_nodes_destroyed); + nesqp = cm_node->nesqp; + if (nesqp) { + nesqp->cm_node = NULL; + nes_rem_ref(&nesqp->ibqp); + cm_node->nesqp = NULL; + } + cm_node->freed = 1; + kfree(cm_node); return 0; } - /** * process_options */ -static int process_options(struct nes_cm_node *cm_node, u8 *optionsloc, u32 optionsize, u32 syn_packet) +static int process_options(struct nes_cm_node *cm_node, u8 *optionsloc, + u32 optionsize, u32 syn_packet) { u32 tmp; u32 offset = 0; @@ -1247,35 +1236,37 @@ static int process_options(struct nes_cm_node *cm_node, u8 *optionsloc, u32 opti while (offset < optionsize) { all_options = (union all_known_options *)(optionsloc + offset); switch (all_options->as_base.optionnum) { - case OPTION_NUMBER_END: - offset = optionsize; - break; - case OPTION_NUMBER_NONE: - offset += 1; - continue; - case OPTION_NUMBER_MSS: - nes_debug(NES_DBG_CM, "%s: MSS Length: %d Offset: %d Size: %d\n", - __func__, - all_options->as_mss.length, offset, optionsize); - got_mss_option = 1; - if (all_options->as_mss.length != 4) { - return 1; - } else { - tmp = ntohs(all_options->as_mss.mss); - if (tmp > 0 && tmp < cm_node->tcp_cntxt.mss) - cm_node->tcp_cntxt.mss = tmp; - } - break; - case OPTION_NUMBER_WINDOW_SCALE: - cm_node->tcp_cntxt.snd_wscale = all_options->as_windowscale.shiftcount; - break; - case OPTION_NUMBER_WRITE0: - cm_node->send_write0 = 1; - break; - default: - nes_debug(NES_DBG_CM, "TCP Option not understood: %x\n", - all_options->as_base.optionnum); - break; + case OPTION_NUMBER_END: + offset = optionsize; + break; + case OPTION_NUMBER_NONE: + offset += 1; + continue; + case OPTION_NUMBER_MSS: + nes_debug(NES_DBG_CM, "%s: MSS Length: %d Offset: %d " + "Size: %d\n", __func__, + all_options->as_mss.length, offset, optionsize); + got_mss_option = 1; + if (all_options->as_mss.length != 4) { + return 1; + } else { + tmp = ntohs(all_options->as_mss.mss); + if (tmp > 0 && tmp < + cm_node->tcp_cntxt.mss) + cm_node->tcp_cntxt.mss = tmp; + } + break; + case OPTION_NUMBER_WINDOW_SCALE: + cm_node->tcp_cntxt.snd_wscale = + all_options->as_windowscale.shiftcount; + break; + case OPTION_NUMBER_WRITE0: + cm_node->send_write0 = 1; + break; + default: + nes_debug(NES_DBG_CM, "TCP Option not understood: %x\n", + all_options->as_base.optionnum); + break; } offset += all_options->as_base.length; } @@ -1284,300 +1275,491 @@ static int process_options(struct nes_cm_node *cm_node, u8 *optionsloc, u32 opti return 0; } +static void drop_packet(struct sk_buff *skb) +{ + atomic_inc(&cm_accel_dropped_pkts); + dev_kfree_skb_any(skb); +} -/** - * process_packet - */ -static int process_packet(struct nes_cm_node *cm_node, struct sk_buff *skb, - struct nes_cm_core *cm_core) +static void handle_fin_pkt(struct nes_cm_node *cm_node, struct sk_buff *skb, + struct tcphdr *tcph) { - int optionsize; - int datasize; - int ret = 0; - struct tcphdr *tcph = tcp_hdr(skb); - u32 inc_sequence; - if (cm_node->state == NES_CM_STATE_SYN_SENT && tcph->syn) { - inc_sequence = ntohl(tcph->seq); - cm_node->tcp_cntxt.rcv_nxt = inc_sequence; + atomic_inc(&cm_resets_recvd); + nes_debug(NES_DBG_CM, "Received FIN, cm_node = %p, state = %u. " + "refcnt=%d\n", cm_node, cm_node->state, + atomic_read(&cm_node->ref_count)); + cm_node->tcp_cntxt.rcv_nxt++; + cleanup_retrans_entry(cm_node); + switch (cm_node->state) { + case NES_CM_STATE_SYN_RCVD: + case NES_CM_STATE_SYN_SENT: + case NES_CM_STATE_ESTABLISHED: + case NES_CM_STATE_MPAREQ_SENT: + cm_node->state = NES_CM_STATE_LAST_ACK; + send_fin(cm_node, skb); + break; + case NES_CM_STATE_FIN_WAIT1: + cm_node->state = NES_CM_STATE_CLOSING; + send_ack(cm_node, skb); + break; + case NES_CM_STATE_FIN_WAIT2: + cm_node->state = NES_CM_STATE_TIME_WAIT; + send_ack(cm_node, skb); + cm_node->state = NES_CM_STATE_CLOSED; + break; + case NES_CM_STATE_TSA: + default: + nes_debug(NES_DBG_CM, "Error Rcvd FIN for node-%p state = %d\n", + cm_node, cm_node->state); + drop_packet(skb); + break; } +} - if ((!tcph) || (cm_node->state == NES_CM_STATE_TSA)) { - BUG_ON(!tcph); - atomic_inc(&cm_accel_dropped_pkts); - return -1; - } - if (tcph->rst) { - atomic_inc(&cm_resets_recvd); - nes_debug(NES_DBG_CM, "Received Reset, cm_node = %p, state = %u. refcnt=%d\n", - cm_node, cm_node->state, atomic_read(&cm_node->ref_count)); - switch (cm_node->state) { - case NES_CM_STATE_LISTENING: - rem_ref_cm_node(cm_core, cm_node); - break; - case NES_CM_STATE_TSA: - case NES_CM_STATE_CLOSED: - break; - case NES_CM_STATE_SYN_RCVD: - nes_debug(NES_DBG_CM, "Received a reset for local 0x%08X:%04X," - " remote 0x%08X:%04X, node state = %u\n", - cm_node->loc_addr, cm_node->loc_port, - cm_node->rem_addr, cm_node->rem_port, - cm_node->state); - rem_ref_cm_node(cm_core, cm_node); - break; - case NES_CM_STATE_ONE_SIDE_ESTABLISHED: - case NES_CM_STATE_ESTABLISHED: - case NES_CM_STATE_MPAREQ_SENT: - default: - nes_debug(NES_DBG_CM, "Received a reset for local 0x%08X:%04X," - " remote 0x%08X:%04X, node state = %u refcnt=%d\n", - cm_node->loc_addr, cm_node->loc_port, - cm_node->rem_addr, cm_node->rem_port, - cm_node->state, atomic_read(&cm_node->ref_count)); - /* create event */ - cm_node->state = NES_CM_STATE_CLOSED; +static void handle_rst_pkt(struct nes_cm_node *cm_node, struct sk_buff *skb, + struct tcphdr *tcph) +{ - create_event(cm_node, NES_CM_EVENT_ABORTED); - break; + int reset = 0; /* whether to send reset in case of err.. */ + atomic_inc(&cm_resets_recvd); + nes_debug(NES_DBG_CM, "Received Reset, cm_node = %p, state = %u." + " refcnt=%d\n", cm_node, cm_node->state, + atomic_read(&cm_node->ref_count)); + cleanup_retrans_entry(cm_node); + switch (cm_node->state) { + case NES_CM_STATE_SYN_SENT: + case NES_CM_STATE_MPAREQ_SENT: + nes_debug(NES_DBG_CM, "%s[%u] create abort for cm_node=%p " + "listener=%p state=%d\n", __func__, __LINE__, cm_node, + cm_node->listener, cm_node->state); + active_open_err(cm_node, skb, reset); + break; + /* For PASSIVE open states, remove the cm_node event */ + case NES_CM_STATE_ESTABLISHED: + case NES_CM_STATE_SYN_RCVD: + case NES_CM_STATE_LISTENING: + nes_debug(NES_DBG_CM, "Bad state %s[%u]\n", __func__, __LINE__); + passive_open_err(cm_node, skb, reset); + break; + case NES_CM_STATE_TSA: + default: + break; + } +} +static void handle_rcv_mpa(struct nes_cm_node *cm_node, struct sk_buff *skb, + enum nes_cm_event_type type) +{ + + int ret; + int datasize = skb->len; + u8 *dataloc = skb->data; + ret = parse_mpa(cm_node, dataloc, datasize); + if (ret < 0) { + nes_debug(NES_DBG_CM, "didn't like MPA Request\n"); + if (type == NES_CM_EVENT_CONNECTED) { + nes_debug(NES_DBG_CM, "%s[%u] create abort for " + "cm_node=%p listener=%p state=%d\n", __func__, + __LINE__, cm_node, cm_node->listener, + cm_node->state); + active_open_err(cm_node, skb, 1); + } else { + passive_open_err(cm_node, skb, 1); } - return -1; + } else { + cleanup_retrans_entry(cm_node); + dev_kfree_skb_any(skb); + if (type == NES_CM_EVENT_CONNECTED) + cm_node->state = NES_CM_STATE_TSA; + create_event(cm_node, type); + + } + return ; +} + +static void indicate_pkt_err(struct nes_cm_node *cm_node, struct sk_buff *skb) +{ + switch (cm_node->state) { + case NES_CM_STATE_SYN_SENT: + case NES_CM_STATE_MPAREQ_SENT: + nes_debug(NES_DBG_CM, "%s[%u] create abort for cm_node=%p " + "listener=%p state=%d\n", __func__, __LINE__, cm_node, + cm_node->listener, cm_node->state); + active_open_err(cm_node, skb, 1); + break; + case NES_CM_STATE_ESTABLISHED: + case NES_CM_STATE_SYN_RCVD: + passive_open_err(cm_node, skb, 1); + break; + case NES_CM_STATE_TSA: + default: + drop_packet(skb); } +} + +static int check_syn(struct nes_cm_node *cm_node, struct tcphdr *tcph, + struct sk_buff *skb) +{ + int err; + + err = ((ntohl(tcph->ack_seq) == cm_node->tcp_cntxt.loc_seq_num))? 0 : 1; + if (err) + active_open_err(cm_node, skb, 1); + + return err; +} + +static int check_seq(struct nes_cm_node *cm_node, struct tcphdr *tcph, + struct sk_buff *skb) +{ + int err = 0; + u32 seq; + u32 ack_seq; + u32 loc_seq_num = cm_node->tcp_cntxt.loc_seq_num; + u32 rcv_nxt = cm_node->tcp_cntxt.rcv_nxt; + u32 rcv_wnd; + seq = ntohl(tcph->seq); + ack_seq = ntohl(tcph->ack_seq); + rcv_wnd = cm_node->tcp_cntxt.rcv_wnd; + if (ack_seq != loc_seq_num) + err = 1; + else if ((seq + rcv_wnd) < rcv_nxt) + err = 1; + if (err) { + nes_debug(NES_DBG_CM, "%s[%u] create abort for cm_node=%p " + "listener=%p state=%d\n", __func__, __LINE__, cm_node, + cm_node->listener, cm_node->state); + indicate_pkt_err(cm_node, skb); + nes_debug(NES_DBG_CM, "seq ERROR cm_node =%p seq=0x%08X " + "rcv_nxt=0x%08X rcv_wnd=0x%x\n", cm_node, seq, rcv_nxt, + rcv_wnd); + } + return err; +} + +/* + * handle_syn_pkt() is for Passive node. The syn packet is received when a node + * is created with a listener or it may comein as rexmitted packet which in + * that case will be just dropped. + */ + +static void handle_syn_pkt(struct nes_cm_node *cm_node, struct sk_buff *skb, + struct tcphdr *tcph) +{ + int ret; + u32 inc_sequence; + int optionsize; optionsize = (tcph->doff << 2) - sizeof(struct tcphdr); + skb_pull(skb, tcph->doff << 2); + inc_sequence = ntohl(tcph->seq); - skb_pull(skb, ip_hdr(skb)->ihl << 2); + switch (cm_node->state) { + case NES_CM_STATE_SYN_SENT: + case NES_CM_STATE_MPAREQ_SENT: + /* Rcvd syn on active open connection*/ + active_open_err(cm_node, skb, 1); + break; + case NES_CM_STATE_LISTENING: + /* Passive OPEN */ + cm_node->accept_pend = 1; + atomic_inc(&cm_node->listener->pend_accepts_cnt); + if (atomic_read(&cm_node->listener->pend_accepts_cnt) > + cm_node->listener->backlog) { + nes_debug(NES_DBG_CM, "drop syn due to backlog " + "pressure \n"); + cm_backlog_drops++; + passive_open_err(cm_node, skb, 0); + break; + } + ret = handle_tcp_options(cm_node, tcph, skb, optionsize, + 1); + if (ret) { + passive_open_err(cm_node, skb, 0); + /* drop pkt */ + break; + } + cm_node->tcp_cntxt.rcv_nxt = inc_sequence + 1; + BUG_ON(cm_node->send_entry); + cm_node->state = NES_CM_STATE_SYN_RCVD; + send_syn(cm_node, 1, skb); + break; + case NES_CM_STATE_TSA: + case NES_CM_STATE_ESTABLISHED: + case NES_CM_STATE_FIN_WAIT1: + case NES_CM_STATE_FIN_WAIT2: + case NES_CM_STATE_MPAREQ_RCVD: + case NES_CM_STATE_LAST_ACK: + case NES_CM_STATE_CLOSING: + case NES_CM_STATE_UNKNOWN: + case NES_CM_STATE_CLOSED: + default: + drop_packet(skb); + break; + } +} + +static void handle_synack_pkt(struct nes_cm_node *cm_node, struct sk_buff *skb, + struct tcphdr *tcph) +{ + + int ret; + u32 inc_sequence; + int optionsize; + + optionsize = (tcph->doff << 2) - sizeof(struct tcphdr); skb_pull(skb, tcph->doff << 2); + inc_sequence = ntohl(tcph->seq); + switch (cm_node->state) { + case NES_CM_STATE_SYN_SENT: + /* active open */ + if (check_syn(cm_node, tcph, skb)) + return; + cm_node->tcp_cntxt.rem_ack_num = ntohl(tcph->ack_seq); + /* setup options */ + ret = handle_tcp_options(cm_node, tcph, skb, optionsize, 0); + if (ret) { + nes_debug(NES_DBG_CM, "cm_node=%p tcp_options failed\n", + cm_node); + break; + } + cleanup_retrans_entry(cm_node); + cm_node->tcp_cntxt.rcv_nxt = inc_sequence + 1; + send_mpa_request(cm_node, skb); + cm_node->state = NES_CM_STATE_MPAREQ_SENT; + break; + case NES_CM_STATE_MPAREQ_RCVD: + /* passive open, so should not be here */ + passive_open_err(cm_node, skb, 1); + break; + case NES_CM_STATE_ESTABLISHED: + case NES_CM_STATE_FIN_WAIT1: + case NES_CM_STATE_FIN_WAIT2: + case NES_CM_STATE_LAST_ACK: + case NES_CM_STATE_TSA: + case NES_CM_STATE_CLOSING: + case NES_CM_STATE_UNKNOWN: + case NES_CM_STATE_CLOSED: + case NES_CM_STATE_MPAREQ_SENT: + default: + drop_packet(skb); + break; + } +} - datasize = skb->len; +static void handle_ack_pkt(struct nes_cm_node *cm_node, struct sk_buff *skb, + struct tcphdr *tcph) +{ + int datasize = 0; + u32 inc_sequence; + u32 rem_seq_ack; + u32 rem_seq; + if (check_seq(cm_node, tcph, skb)) + return; + + skb_pull(skb, tcph->doff << 2); inc_sequence = ntohl(tcph->seq); - nes_debug(NES_DBG_CM, "datasize = %u, sequence = 0x%08X, ack_seq = 0x%08X," - " rcv_nxt = 0x%08X Flags: %s %s.\n", - datasize, inc_sequence, ntohl(tcph->ack_seq), - cm_node->tcp_cntxt.rcv_nxt, (tcph->syn ? "SYN":""), - (tcph->ack ? "ACK":"")); - - if (!tcph->syn && (inc_sequence != cm_node->tcp_cntxt.rcv_nxt) - ) { - nes_debug(NES_DBG_CM, "dropping packet, datasize = %u, sequence = 0x%08X," - " ack_seq = 0x%08X, rcv_nxt = 0x%08X Flags: %s.\n", - datasize, inc_sequence, ntohl(tcph->ack_seq), - cm_node->tcp_cntxt.rcv_nxt, (tcph->ack ? "ACK":"")); - if (cm_node->state == NES_CM_STATE_LISTENING) { - rem_ref_cm_node(cm_core, cm_node); + rem_seq = ntohl(tcph->seq); + rem_seq_ack = ntohl(tcph->ack_seq); + datasize = skb->len; + + switch (cm_node->state) { + case NES_CM_STATE_SYN_RCVD: + /* Passive OPEN */ + cm_node->tcp_cntxt.rem_ack_num = ntohl(tcph->ack_seq); + cm_node->state = NES_CM_STATE_ESTABLISHED; + if (datasize) { + cm_node->tcp_cntxt.rcv_nxt = inc_sequence + datasize; + cm_node->state = NES_CM_STATE_MPAREQ_RCVD; + handle_rcv_mpa(cm_node, skb, NES_CM_EVENT_MPA_REQ); + } else { /* rcvd ACK only */ + dev_kfree_skb_any(skb); + cleanup_retrans_entry(cm_node); + } + break; + case NES_CM_STATE_ESTABLISHED: + /* Passive OPEN */ + /* We expect mpa frame to be received only */ + if (datasize) { + cm_node->tcp_cntxt.rcv_nxt = inc_sequence + datasize; + cm_node->state = NES_CM_STATE_MPAREQ_RCVD; + handle_rcv_mpa(cm_node, skb, + NES_CM_EVENT_MPA_REQ); + } else + drop_packet(skb); + break; + case NES_CM_STATE_MPAREQ_SENT: + cm_node->tcp_cntxt.rem_ack_num = ntohl(tcph->ack_seq); + if (datasize) { + cm_node->tcp_cntxt.rcv_nxt = inc_sequence + datasize; + handle_rcv_mpa(cm_node, skb, NES_CM_EVENT_CONNECTED); + } else { /* Could be just an ack pkt.. */ + cleanup_retrans_entry(cm_node); + dev_kfree_skb_any(skb); } - return -1; + break; + case NES_CM_STATE_FIN_WAIT1: + case NES_CM_STATE_SYN_SENT: + case NES_CM_STATE_FIN_WAIT2: + case NES_CM_STATE_TSA: + case NES_CM_STATE_CLOSED: + case NES_CM_STATE_MPAREQ_RCVD: + case NES_CM_STATE_LAST_ACK: + case NES_CM_STATE_CLOSING: + case NES_CM_STATE_UNKNOWN: + default: + drop_packet(skb); + break; } +} - cm_node->tcp_cntxt.rcv_nxt = inc_sequence + datasize; +static int handle_tcp_options(struct nes_cm_node *cm_node, struct tcphdr *tcph, + struct sk_buff *skb, int optionsize, int passive) +{ + u8 *optionsloc = (u8 *)&tcph[1]; if (optionsize) { - u8 *optionsloc = (u8 *)&tcph[1]; - if (process_options(cm_node, optionsloc, optionsize, (u32)tcph->syn)) { - nes_debug(NES_DBG_CM, "%s: Node %p, Sending RESET\n", __func__, cm_node); - send_reset(cm_node); - if (cm_node->state != NES_CM_STATE_SYN_SENT) - rem_ref_cm_node(cm_core, cm_node); - return 0; + if (process_options(cm_node, optionsloc, optionsize, + (u32)tcph->syn)) { + nes_debug(NES_DBG_CM, "%s: Node %p, Sending RESET\n", + __func__, cm_node); + if (passive) + passive_open_err(cm_node, skb, 0); + else + active_open_err(cm_node, skb, 0); + return 1; } - } else if (tcph->syn) - cm_node->tcp_cntxt.mss = NES_CM_DEFAULT_MSS; + } cm_node->tcp_cntxt.snd_wnd = ntohs(tcph->window) << cm_node->tcp_cntxt.snd_wscale; - if (cm_node->tcp_cntxt.snd_wnd > cm_node->tcp_cntxt.max_snd_wnd) { + if (cm_node->tcp_cntxt.snd_wnd > cm_node->tcp_cntxt.max_snd_wnd) cm_node->tcp_cntxt.max_snd_wnd = cm_node->tcp_cntxt.snd_wnd; - } + return 0; +} - if (tcph->ack) { - cm_node->tcp_cntxt.rem_ack_num = ntohl(tcph->ack_seq); - switch (cm_node->state) { - case NES_CM_STATE_SYN_RCVD: - case NES_CM_STATE_SYN_SENT: - /* read and stash current sequence number */ - if (cm_node->tcp_cntxt.rem_ack_num != cm_node->tcp_cntxt.loc_seq_num) { - nes_debug(NES_DBG_CM, "ERROR - cm_node->tcp_cntxt.rem_ack_num !=" - " cm_node->tcp_cntxt.loc_seq_num\n"); - send_reset(cm_node); - return 0; - } - if (cm_node->state == NES_CM_STATE_SYN_SENT) - cm_node->state = NES_CM_STATE_ONE_SIDE_ESTABLISHED; - else { - cm_node->state = NES_CM_STATE_ESTABLISHED; - } - break; - case NES_CM_STATE_LAST_ACK: - cm_node->state = NES_CM_STATE_CLOSED; - break; - case NES_CM_STATE_FIN_WAIT1: - cm_node->state = NES_CM_STATE_FIN_WAIT2; - break; - case NES_CM_STATE_CLOSING: - cm_node->state = NES_CM_STATE_TIME_WAIT; - /* need to schedule this to happen in 2MSL timeouts */ - cm_node->state = NES_CM_STATE_CLOSED; - break; - case NES_CM_STATE_ONE_SIDE_ESTABLISHED: - case NES_CM_STATE_ESTABLISHED: - case NES_CM_STATE_MPAREQ_SENT: - case NES_CM_STATE_CLOSE_WAIT: - case NES_CM_STATE_TIME_WAIT: - case NES_CM_STATE_CLOSED: - break; - case NES_CM_STATE_LISTENING: - nes_debug(NES_DBG_CM, "Received an ACK on a listening port (SYN %d)\n", tcph->syn); - cm_node->tcp_cntxt.loc_seq_num = ntohl(tcph->ack_seq); - send_reset(cm_node); - /* send_reset bumps refcount, this should have been a new node */ - rem_ref_cm_node(cm_core, cm_node); - return -1; - break; - case NES_CM_STATE_TSA: - nes_debug(NES_DBG_CM, "Received a packet with the ack bit set while in TSA state\n"); - break; - case NES_CM_STATE_UNKNOWN: - case NES_CM_STATE_INITED: - case NES_CM_STATE_ACCEPTING: - case NES_CM_STATE_FIN_WAIT2: - default: - nes_debug(NES_DBG_CM, "Received ack from unknown state: %x\n", - cm_node->state); - send_reset(cm_node); - break; - } - } +/* + * active_open_err() will send reset() if flag set.. + * It will also send ABORT event. + */ - if (tcph->syn) { - if (cm_node->state == NES_CM_STATE_LISTENING) { - /* do not exceed backlog */ - atomic_inc(&cm_node->listener->pend_accepts_cnt); - if (atomic_read(&cm_node->listener->pend_accepts_cnt) > - cm_node->listener->backlog) { - nes_debug(NES_DBG_CM, "drop syn due to backlog pressure \n"); - cm_backlog_drops++; - atomic_dec(&cm_node->listener->pend_accepts_cnt); - rem_ref_cm_node(cm_core, cm_node); - return 0; - } - cm_node->accept_pend = 1; +static void active_open_err(struct nes_cm_node *cm_node, struct sk_buff *skb, + int reset) +{ + cleanup_retrans_entry(cm_node); + if (reset) { + nes_debug(NES_DBG_CM, "ERROR active err called for cm_node=%p, " + "state=%d\n", cm_node, cm_node->state); + add_ref_cm_node(cm_node); + send_reset(cm_node, skb); + } else + dev_kfree_skb_any(skb); - } - if (datasize == 0) - cm_node->tcp_cntxt.rcv_nxt ++; + cm_node->state = NES_CM_STATE_CLOSED; + create_event(cm_node, NES_CM_EVENT_ABORTED); +} - if (cm_node->state == NES_CM_STATE_LISTENING) { - cm_node->state = NES_CM_STATE_SYN_RCVD; - send_syn(cm_node, 1); - } - if (cm_node->state == NES_CM_STATE_ONE_SIDE_ESTABLISHED) { - cm_node->state = NES_CM_STATE_ESTABLISHED; - /* send final handshake ACK */ - ret = send_ack(cm_node); - if (ret < 0) - return ret; +/* + * passive_open_err() will either do a reset() or will free up the skb and + * remove the cm_node. + */ - cm_node->state = NES_CM_STATE_MPAREQ_SENT; - ret = send_mpa_request(cm_node); - if (ret < 0) - return ret; - } +static void passive_open_err(struct nes_cm_node *cm_node, struct sk_buff *skb, + int reset) +{ + cleanup_retrans_entry(cm_node); + cm_node->state = NES_CM_STATE_CLOSED; + if (reset) { + nes_debug(NES_DBG_CM, "passive_open_err sending RST for " + "cm_node=%p state =%d\n", cm_node, cm_node->state); + send_reset(cm_node, skb); + } else { + dev_kfree_skb_any(skb); + rem_ref_cm_node(cm_node->cm_core, cm_node); } +} - if (tcph->fin) { - cm_node->tcp_cntxt.rcv_nxt++; - switch (cm_node->state) { - case NES_CM_STATE_SYN_RCVD: - case NES_CM_STATE_SYN_SENT: - case NES_CM_STATE_ONE_SIDE_ESTABLISHED: - case NES_CM_STATE_ESTABLISHED: - case NES_CM_STATE_ACCEPTING: - case NES_CM_STATE_MPAREQ_SENT: - cm_node->state = NES_CM_STATE_CLOSE_WAIT; - cm_node->state = NES_CM_STATE_LAST_ACK; - ret = send_fin(cm_node, NULL); - break; - case NES_CM_STATE_FIN_WAIT1: - cm_node->state = NES_CM_STATE_CLOSING; - ret = send_ack(cm_node); - break; - case NES_CM_STATE_FIN_WAIT2: - cm_node->state = NES_CM_STATE_TIME_WAIT; - cm_node->tcp_cntxt.loc_seq_num ++; - ret = send_ack(cm_node); - /* need to schedule this to happen in 2MSL timeouts */ - cm_node->state = NES_CM_STATE_CLOSED; - break; - case NES_CM_STATE_CLOSE_WAIT: - case NES_CM_STATE_LAST_ACK: - case NES_CM_STATE_CLOSING: - case NES_CM_STATE_TSA: - default: - nes_debug(NES_DBG_CM, "Received a fin while in %x state\n", - cm_node->state); - ret = -EINVAL; - break; - } +/* + * free_retrans_entry() routines assumes that the retrans_list_lock has + * been acquired before calling. + */ +static void free_retrans_entry(struct nes_cm_node *cm_node) +{ + struct nes_timer_entry *send_entry; + send_entry = cm_node->send_entry; + if (send_entry) { + cm_node->send_entry = NULL; + dev_kfree_skb_any(send_entry->skb); + kfree(send_entry); + rem_ref_cm_node(cm_node->cm_core, cm_node); } +} - if (datasize) { - u8 *dataloc = skb->data; - /* figure out what state we are in and handle transition to next state */ - switch (cm_node->state) { - case NES_CM_STATE_LISTENING: - case NES_CM_STATE_SYN_RCVD: - case NES_CM_STATE_SYN_SENT: - case NES_CM_STATE_FIN_WAIT1: - case NES_CM_STATE_FIN_WAIT2: - case NES_CM_STATE_CLOSE_WAIT: - case NES_CM_STATE_LAST_ACK: - case NES_CM_STATE_CLOSING: - break; - case NES_CM_STATE_MPAREQ_SENT: - /* recv the mpa res frame, ret=frame len (incl priv data) */ - ret = parse_mpa(cm_node, dataloc, datasize); - if (ret < 0) - break; - /* set the req frame payload len in skb */ - /* we are done handling this state, set node to a TSA state */ - cm_node->state = NES_CM_STATE_TSA; - send_ack(cm_node); - create_event(cm_node, NES_CM_EVENT_CONNECTED); - break; - - case NES_CM_STATE_ESTABLISHED: - /* we are expecting an MPA req frame */ - ret = parse_mpa(cm_node, dataloc, datasize); - if (ret < 0) { - break; - } - cm_node->state = NES_CM_STATE_TSA; - send_ack(cm_node); - /* we got a valid MPA request, create an event */ - create_event(cm_node, NES_CM_EVENT_MPA_REQ); - break; - case NES_CM_STATE_TSA: - handle_exception_pkt(cm_node, skb); - break; - case NES_CM_STATE_UNKNOWN: - case NES_CM_STATE_INITED: - default: - ret = -1; - } - } +static void cleanup_retrans_entry(struct nes_cm_node *cm_node) +{ + unsigned long flags; - return ret; + spin_lock_irqsave(&cm_node->retrans_list_lock, flags); + free_retrans_entry(cm_node); + spin_unlock_irqrestore(&cm_node->retrans_list_lock, flags); } +/** + * process_packet + * Returns skb if to be freed, else it will return NULL if already used.. + */ +static void process_packet(struct nes_cm_node *cm_node, struct sk_buff *skb, + struct nes_cm_core *cm_core) +{ + enum nes_tcpip_pkt_type pkt_type = NES_PKT_TYPE_UNKNOWN; + struct tcphdr *tcph = tcp_hdr(skb); + skb_pull(skb, ip_hdr(skb)->ihl << 2); + + nes_debug(NES_DBG_CM, "process_packet: cm_node=%p state =%d syn=%d " + "ack=%d rst=%d fin=%d\n", cm_node, cm_node->state, tcph->syn, + tcph->ack, tcph->rst, tcph->fin); + + if (tcph->rst) + pkt_type = NES_PKT_TYPE_RST; + else if (tcph->syn) { + pkt_type = NES_PKT_TYPE_SYN; + if (tcph->ack) + pkt_type = NES_PKT_TYPE_SYNACK; + } else if (tcph->fin) + pkt_type = NES_PKT_TYPE_FIN; + else if (tcph->ack) + pkt_type = NES_PKT_TYPE_ACK; + + switch (pkt_type) { + case NES_PKT_TYPE_SYN: + handle_syn_pkt(cm_node, skb, tcph); + break; + case NES_PKT_TYPE_SYNACK: + handle_synack_pkt(cm_node, skb, tcph); + break; + case NES_PKT_TYPE_ACK: + handle_ack_pkt(cm_node, skb, tcph); + break; + case NES_PKT_TYPE_RST: + handle_rst_pkt(cm_node, skb, tcph); + break; + case NES_PKT_TYPE_FIN: + handle_fin_pkt(cm_node, skb, tcph); + break; + default: + drop_packet(skb); + break; + } +} /** * mini_cm_listen - create a listen node with params */ static struct nes_cm_listener *mini_cm_listen(struct nes_cm_core *cm_core, - struct nes_vnic *nesvnic, struct nes_cm_info *cm_info) + struct nes_vnic *nesvnic, struct nes_cm_info *cm_info) { struct nes_cm_listener *listener; unsigned long flags; @@ -1644,37 +1826,36 @@ static struct nes_cm_listener *mini_cm_listen(struct nes_cm_core *cm_core, /** * mini_cm_connect - make a connection node with params */ -static struct nes_cm_node *mini_cm_connect(struct nes_cm_core *cm_core, - struct nes_vnic *nesvnic, - struct ietf_mpa_frame *mpa_frame, - struct nes_cm_info *cm_info) +struct nes_cm_node *mini_cm_connect(struct nes_cm_core *cm_core, + struct nes_vnic *nesvnic, u16 private_data_len, + void *private_data, struct nes_cm_info *cm_info) { int ret = 0; struct nes_cm_node *cm_node; struct nes_cm_listener *loopbackremotelistener; struct nes_cm_node *loopbackremotenode; struct nes_cm_info loopback_cm_info; - - u16 mpa_frame_size = sizeof(struct ietf_mpa_frame) + - ntohs(mpa_frame->priv_data_len); - - cm_info->loc_addr = htonl(cm_info->loc_addr); - cm_info->rem_addr = htonl(cm_info->rem_addr); - cm_info->loc_port = htons(cm_info->loc_port); - cm_info->rem_port = htons(cm_info->rem_port); + u16 mpa_frame_size = sizeof(struct ietf_mpa_frame) + private_data_len; + struct ietf_mpa_frame *mpa_frame = NULL; /* create a CM connection node */ cm_node = make_cm_node(cm_core, nesvnic, cm_info, NULL); if (!cm_node) return NULL; + mpa_frame = &cm_node->mpa_frame; + strcpy(mpa_frame->key, IEFT_MPA_KEY_REQ); + mpa_frame->flags = IETF_MPA_FLAGS_CRC; + mpa_frame->rev = IETF_MPA_VERSION; + mpa_frame->priv_data_len = htons(private_data_len); /* set our node side to client (active) side */ cm_node->tcp_cntxt.client = 1; cm_node->tcp_cntxt.rcv_wscale = NES_CM_DEFAULT_RCV_WND_SCALE; if (cm_info->loc_addr == cm_info->rem_addr) { - loopbackremotelistener = find_listener(cm_core, cm_node->rem_addr, - cm_node->rem_port, NES_CM_LISTENER_ACTIVE_STATE); + loopbackremotelistener = find_listener(cm_core, + ntohl(nesvnic->local_ipaddr), cm_node->rem_port, + NES_CM_LISTENER_ACTIVE_STATE); if (loopbackremotelistener == NULL) { create_event(cm_node, NES_CM_EVENT_ABORTED); } else { @@ -1683,26 +1864,35 @@ static struct nes_cm_node *mini_cm_connect(struct nes_cm_core *cm_core, loopback_cm_info.loc_port = cm_info->rem_port; loopback_cm_info.rem_port = cm_info->loc_port; loopback_cm_info.cm_id = loopbackremotelistener->cm_id; - loopbackremotenode = make_cm_node(cm_core, nesvnic, &loopback_cm_info, - loopbackremotelistener); + loopbackremotenode = make_cm_node(cm_core, nesvnic, + &loopback_cm_info, loopbackremotelistener); loopbackremotenode->loopbackpartner = cm_node; - loopbackremotenode->tcp_cntxt.rcv_wscale = NES_CM_DEFAULT_RCV_WND_SCALE; + loopbackremotenode->tcp_cntxt.rcv_wscale = + NES_CM_DEFAULT_RCV_WND_SCALE; cm_node->loopbackpartner = loopbackremotenode; - memcpy(loopbackremotenode->mpa_frame_buf, &mpa_frame->priv_data, - mpa_frame_size); - loopbackremotenode->mpa_frame_size = mpa_frame_size - - sizeof(struct ietf_mpa_frame); + memcpy(loopbackremotenode->mpa_frame_buf, private_data, + private_data_len); + loopbackremotenode->mpa_frame_size = private_data_len; - /* we are done handling this state, set node to a TSA state */ + /* we are done handling this state. */ + /* set node to a TSA state */ cm_node->state = NES_CM_STATE_TSA; - cm_node->tcp_cntxt.rcv_nxt = loopbackremotenode->tcp_cntxt.loc_seq_num; - loopbackremotenode->tcp_cntxt.rcv_nxt = cm_node->tcp_cntxt.loc_seq_num; - cm_node->tcp_cntxt.max_snd_wnd = loopbackremotenode->tcp_cntxt.rcv_wnd; - loopbackremotenode->tcp_cntxt.max_snd_wnd = cm_node->tcp_cntxt.rcv_wnd; - cm_node->tcp_cntxt.snd_wnd = loopbackremotenode->tcp_cntxt.rcv_wnd; - loopbackremotenode->tcp_cntxt.snd_wnd = cm_node->tcp_cntxt.rcv_wnd; - cm_node->tcp_cntxt.snd_wscale = loopbackremotenode->tcp_cntxt.rcv_wscale; - loopbackremotenode->tcp_cntxt.snd_wscale = cm_node->tcp_cntxt.rcv_wscale; + cm_node->tcp_cntxt.rcv_nxt = + loopbackremotenode->tcp_cntxt.loc_seq_num; + loopbackremotenode->tcp_cntxt.rcv_nxt = + cm_node->tcp_cntxt.loc_seq_num; + cm_node->tcp_cntxt.max_snd_wnd = + loopbackremotenode->tcp_cntxt.rcv_wnd; + loopbackremotenode->tcp_cntxt.max_snd_wnd = + cm_node->tcp_cntxt.rcv_wnd; + cm_node->tcp_cntxt.snd_wnd = + loopbackremotenode->tcp_cntxt.rcv_wnd; + loopbackremotenode->tcp_cntxt.snd_wnd = + cm_node->tcp_cntxt.rcv_wnd; + cm_node->tcp_cntxt.snd_wscale = + loopbackremotenode->tcp_cntxt.rcv_wscale; + loopbackremotenode->tcp_cntxt.snd_wscale = + cm_node->tcp_cntxt.rcv_wscale; create_event(loopbackremotenode, NES_CM_EVENT_MPA_REQ); } @@ -1712,16 +1902,29 @@ static struct nes_cm_node *mini_cm_connect(struct nes_cm_core *cm_core, /* set our node side to client (active) side */ cm_node->tcp_cntxt.client = 1; /* init our MPA frame ptr */ - memcpy(&cm_node->mpa_frame, mpa_frame, mpa_frame_size); + memcpy(mpa_frame->priv_data, private_data, private_data_len); + cm_node->mpa_frame_size = mpa_frame_size; /* send a syn and goto syn sent state */ cm_node->state = NES_CM_STATE_SYN_SENT; - ret = send_syn(cm_node, 0); + ret = send_syn(cm_node, 0, NULL); + + if (ret) { + /* error in sending the syn free up the cm_node struct */ + nes_debug(NES_DBG_CM, "Api - connect() FAILED: dest " + "addr=0x%08X, port=0x%04x, cm_node=%p, cm_id = %p.\n", + cm_node->rem_addr, cm_node->rem_port, cm_node, + cm_node->cm_id); + rem_ref_cm_node(cm_node->cm_core, cm_node); + cm_node = NULL; + } - nes_debug(NES_DBG_CM, "Api - connect(): dest addr=0x%08X, port=0x%04x," - " cm_node=%p, cm_id = %p.\n", - cm_node->rem_addr, cm_node->rem_port, cm_node, cm_node->cm_id); + if (cm_node) + nes_debug(NES_DBG_CM, "Api - connect(): dest addr=0x%08X," + "port=0x%04x, cm_node=%p, cm_id = %p.\n", + cm_node->rem_addr, cm_node->rem_port, cm_node, + cm_node->cm_id); return cm_node; } @@ -1731,8 +1934,8 @@ static struct nes_cm_node *mini_cm_connect(struct nes_cm_core *cm_core, * mini_cm_accept - accept a connection * This function is never called */ -static int mini_cm_accept(struct nes_cm_core *cm_core, struct ietf_mpa_frame *mpa_frame, - struct nes_cm_node *cm_node) +static int mini_cm_accept(struct nes_cm_core *cm_core, + struct ietf_mpa_frame *mpa_frame, struct nes_cm_node *cm_node) { return 0; } @@ -1742,32 +1945,26 @@ static int mini_cm_accept(struct nes_cm_core *cm_core, struct ietf_mpa_frame *mp * mini_cm_reject - reject and teardown a connection */ static int mini_cm_reject(struct nes_cm_core *cm_core, - struct ietf_mpa_frame *mpa_frame, - struct nes_cm_node *cm_node) + struct ietf_mpa_frame *mpa_frame, struct nes_cm_node *cm_node) { int ret = 0; - struct sk_buff *skb; - u16 mpa_frame_size = sizeof(struct ietf_mpa_frame) + - ntohs(mpa_frame->priv_data_len); - skb = get_free_pkt(cm_node); - if (!skb) { - nes_debug(NES_DBG_CM, "Failed to get a Free pkt\n"); - return -1; - } - - /* send an MPA Request frame */ - form_cm_frame(skb, cm_node, NULL, 0, mpa_frame, mpa_frame_size, SET_ACK | SET_FIN); - ret = schedule_nes_timer(cm_node, skb, NES_TIMER_TYPE_SEND, 1, 0); + nes_debug(NES_DBG_CM, "%s cm_node=%p type=%d state=%d\n", + __func__, cm_node, cm_node->tcp_cntxt.client, cm_node->state); + if (cm_node->tcp_cntxt.client) + return ret; + cleanup_retrans_entry(cm_node); cm_node->state = NES_CM_STATE_CLOSED; ret = send_fin(cm_node, NULL); - if (ret < 0) { - printk(KERN_INFO PFX "failed to send MPA Reply (reject)\n"); - return ret; + if (cm_node->accept_pend) { + BUG_ON(!cm_node->listener); + atomic_dec(&cm_node->listener->pend_accepts_cnt); + BUG_ON(atomic_read(&cm_node->listener->pend_accepts_cnt) < 0); } + ret = send_reset(cm_node, NULL); return ret; } @@ -1783,35 +1980,39 @@ static int mini_cm_close(struct nes_cm_core *cm_core, struct nes_cm_node *cm_nod return -EINVAL; switch (cm_node->state) { - /* if passed in node is null, create a reference key node for node search */ - /* check if we found an owner node for this pkt */ - case NES_CM_STATE_SYN_RCVD: - case NES_CM_STATE_SYN_SENT: - case NES_CM_STATE_ONE_SIDE_ESTABLISHED: - case NES_CM_STATE_ESTABLISHED: - case NES_CM_STATE_ACCEPTING: - case NES_CM_STATE_MPAREQ_SENT: - cm_node->state = NES_CM_STATE_FIN_WAIT1; - send_fin(cm_node, NULL); - break; - case NES_CM_STATE_CLOSE_WAIT: - cm_node->state = NES_CM_STATE_LAST_ACK; - send_fin(cm_node, NULL); - break; - case NES_CM_STATE_FIN_WAIT1: - case NES_CM_STATE_FIN_WAIT2: - case NES_CM_STATE_LAST_ACK: - case NES_CM_STATE_TIME_WAIT: - case NES_CM_STATE_CLOSING: - ret = -1; - break; - case NES_CM_STATE_LISTENING: - case NES_CM_STATE_UNKNOWN: - case NES_CM_STATE_INITED: - case NES_CM_STATE_CLOSED: - case NES_CM_STATE_TSA: - ret = rem_ref_cm_node(cm_core, cm_node); - break; + case NES_CM_STATE_SYN_RCVD: + case NES_CM_STATE_SYN_SENT: + case NES_CM_STATE_ONE_SIDE_ESTABLISHED: + case NES_CM_STATE_ESTABLISHED: + case NES_CM_STATE_ACCEPTING: + case NES_CM_STATE_MPAREQ_SENT: + case NES_CM_STATE_MPAREQ_RCVD: + cleanup_retrans_entry(cm_node); + send_reset(cm_node, NULL); + break; + case NES_CM_STATE_CLOSE_WAIT: + cm_node->state = NES_CM_STATE_LAST_ACK; + send_fin(cm_node, NULL); + break; + case NES_CM_STATE_FIN_WAIT1: + case NES_CM_STATE_FIN_WAIT2: + case NES_CM_STATE_LAST_ACK: + case NES_CM_STATE_TIME_WAIT: + case NES_CM_STATE_CLOSING: + ret = -1; + break; + case NES_CM_STATE_LISTENING: + case NES_CM_STATE_UNKNOWN: + case NES_CM_STATE_INITED: + case NES_CM_STATE_CLOSED: + ret = rem_ref_cm_node(cm_core, cm_node); + break; + case NES_CM_STATE_TSA: + if (cm_node->send_entry) + printk(KERN_ERR "ERROR Close got called from STATE_TSA " + "send_entry=%p\n", cm_node->send_entry); + ret = rem_ref_cm_node(cm_core, cm_node); + break; } cm_node->cm_id = NULL; return ret; @@ -1822,25 +2023,30 @@ static int mini_cm_close(struct nes_cm_core *cm_core, struct nes_cm_node *cm_nod * recv_pkt - recv an ETHERNET packet, and process it through CM * node state machine */ -static int mini_cm_recv_pkt(struct nes_cm_core *cm_core, struct nes_vnic *nesvnic, - struct sk_buff *skb) +static void mini_cm_recv_pkt(struct nes_cm_core *cm_core, + struct nes_vnic *nesvnic, struct sk_buff *skb) { struct nes_cm_node *cm_node = NULL; struct nes_cm_listener *listener = NULL; struct iphdr *iph; struct tcphdr *tcph; struct nes_cm_info nfo; - int ret = 0; - if (!skb || skb->len < sizeof(struct iphdr) + sizeof(struct tcphdr)) { - ret = -EINVAL; - goto out; + if (!skb) + return; + if (skb->len < sizeof(struct iphdr) + sizeof(struct tcphdr)) { + dev_kfree_skb_any(skb); + return; } iph = (struct iphdr *)skb->data; tcph = (struct tcphdr *)(skb->data + sizeof(struct iphdr)); skb_reset_network_header(skb); skb_set_transport_header(skb, sizeof(*tcph)); + if (!tcph) { + dev_kfree_skb_any(skb); + return; + } skb->len = ntohs(iph->tot_len); nfo.loc_addr = ntohl(iph->daddr); @@ -1853,61 +2059,60 @@ static int mini_cm_recv_pkt(struct nes_cm_core *cm_core, struct nes_vnic *nesvni NIPQUAD(iph->daddr), tcph->dest, NIPQUAD(iph->saddr), tcph->source); - /* note: this call is going to increment cm_node ref count */ - cm_node = find_node(cm_core, + do { + cm_node = find_node(cm_core, nfo.rem_port, nfo.rem_addr, nfo.loc_port, nfo.loc_addr); - if (!cm_node) { - listener = find_listener(cm_core, nfo.loc_addr, nfo.loc_port, - NES_CM_LISTENER_ACTIVE_STATE); - if (listener) { - nfo.cm_id = listener->cm_id; - nfo.conn_type = listener->conn_type; - } else { - nfo.cm_id = NULL; - nfo.conn_type = 0; - } - - cm_node = make_cm_node(cm_core, nesvnic, &nfo, listener); if (!cm_node) { - nes_debug(NES_DBG_CM, "Unable to allocate node\n"); + /* Only type of packet accepted are for */ + /* the PASSIVE open (syn only) */ + if ((!tcph->syn) || (tcph->ack)) { + cm_packets_dropped++; + break; + } + listener = find_listener(cm_core, nfo.loc_addr, + nfo.loc_port, + NES_CM_LISTENER_ACTIVE_STATE); if (listener) { - nes_debug(NES_DBG_CM, "unable to allocate node and decrementing listener refcount\n"); + nfo.cm_id = listener->cm_id; + nfo.conn_type = listener->conn_type; + } else { + nes_debug(NES_DBG_CM, "Unable to find listener " + "for the pkt\n"); + cm_packets_dropped++; + dev_kfree_skb_any(skb); + break; + } + + cm_node = make_cm_node(cm_core, nesvnic, &nfo, + listener); + if (!cm_node) { + nes_debug(NES_DBG_CM, "Unable to allocate " + "node\n"); + cm_packets_dropped++; atomic_dec(&listener->ref_count); + dev_kfree_skb_any(skb); + break; } - ret = -1; - goto out; - } - if (!listener) { - nes_debug(NES_DBG_CM, "Packet found for unknown port %x refcnt=%d\n", - nfo.loc_port, atomic_read(&cm_node->ref_count)); - if (!tcph->rst) { - nes_debug(NES_DBG_CM, "Packet found for unknown port=%d" - " rem_port=%d refcnt=%d\n", - nfo.loc_port, nfo.rem_port, atomic_read(&cm_node->ref_count)); - - cm_node->tcp_cntxt.rcv_nxt = ntohl(tcph->seq); - cm_node->tcp_cntxt.loc_seq_num = ntohl(tcph->ack_seq); - send_reset(cm_node); + if (!tcph->rst && !tcph->fin) { + cm_node->state = NES_CM_STATE_LISTENING; + } else { + cm_packets_dropped++; + rem_ref_cm_node(cm_core, cm_node); + dev_kfree_skb_any(skb); + break; } + add_ref_cm_node(cm_node); + } else if (cm_node->state == NES_CM_STATE_TSA) { rem_ref_cm_node(cm_core, cm_node); - ret = -1; - goto out; + atomic_inc(&cm_accel_dropped_pkts); + dev_kfree_skb_any(skb); + break; } - add_ref_cm_node(cm_node); - cm_node->state = NES_CM_STATE_LISTENING; - } - - nes_debug(NES_DBG_CM, "Processing Packet for node %p, data = (%p):\n", - cm_node, skb->data); - process_packet(cm_node, skb, cm_core); - - rem_ref_cm_node(cm_core, cm_node); - out: - if (skb) - dev_kfree_skb_any(skb); - return ret; + process_packet(cm_node, skb, cm_core); + rem_ref_cm_node(cm_core, cm_node); + } while (0); } @@ -2107,15 +2312,12 @@ int nes_cm_disconn(struct nes_qp *nesqp) if (nesqp->disconn_pending == 0) { nesqp->disconn_pending++; spin_unlock_irqrestore(&nesqp->lock, flags); - /* nes_add_ref(&nesqp->ibqp); */ /* init our disconnect work element, to */ INIT_WORK(&nesqp->disconn_work, nes_disconnect_worker); queue_work(g_cm_core->disconn_wq, &nesqp->disconn_work); - } else { + } else spin_unlock_irqrestore(&nesqp->lock, flags); - nes_rem_ref(&nesqp->ibqp); - } return 0; } @@ -2161,7 +2363,6 @@ static int nes_cm_disconn_true(struct nes_qp *nesqp) nes_debug(NES_DBG_CM, "QP%u disconnect_worker cmid is NULL\n", nesqp->hwqp.qp_id); spin_unlock_irqrestore(&nesqp->lock, flags); - nes_rem_ref(&nesqp->ibqp); return -1; } @@ -2182,30 +2383,31 @@ static int nes_cm_disconn_true(struct nes_qp *nesqp) atomic_inc(&cm_disconnects); cm_event.event = IW_CM_EVENT_DISCONNECT; if (last_ae == NES_AEQE_AEID_LLP_CONNECTION_RESET) { - issued_disconnect_reset = 1; cm_event.status = IW_CM_EVENT_STATUS_RESET; - nes_debug(NES_DBG_CM, "Generating a CM Disconnect Event (status reset) for " - " QP%u, cm_id = %p. \n", - nesqp->hwqp.qp_id, cm_id); - } else { + nes_debug(NES_DBG_CM, "Generating a CM " + "Disconnect Event (status reset) for " + "QP%u, cm_id = %p. \n", + nesqp->hwqp.qp_id, cm_id); + } else cm_event.status = IW_CM_EVENT_STATUS_OK; - } cm_event.local_addr = cm_id->local_addr; cm_event.remote_addr = cm_id->remote_addr; cm_event.private_data = NULL; cm_event.private_data_len = 0; - nes_debug(NES_DBG_CM, "Generating a CM Disconnect Event for " - " QP%u, SQ Head = %u, SQ Tail = %u. cm_id = %p, refcount = %u.\n", - nesqp->hwqp.qp_id, - nesqp->hwqp.sq_head, nesqp->hwqp.sq_tail, cm_id, - atomic_read(&nesqp->refcount)); + nes_debug(NES_DBG_CM, "Generating a CM Disconnect Event" + " for QP%u, SQ Head = %u, SQ Tail = %u. " + "cm_id = %p, refcount = %u.\n", + nesqp->hwqp.qp_id, nesqp->hwqp.sq_head, + nesqp->hwqp.sq_tail, cm_id, + atomic_read(&nesqp->refcount)); spin_unlock_irqrestore(&nesqp->lock, flags); ret = cm_id->event_handler(cm_id, &cm_event); if (ret) - nes_debug(NES_DBG_CM, "OFA CM event_handler returned, ret=%d\n", ret); + nes_debug(NES_DBG_CM, "OFA CM event_handler " + "returned, ret=%d\n", ret); spin_lock_irqsave(&nesqp->lock, flags); } @@ -2247,31 +2449,24 @@ static int nes_cm_disconn_true(struct nes_qp *nesqp) if (nesqp->flush_issued == 0) { nesqp->flush_issued = 1; spin_unlock_irqrestore(&nesqp->lock, flags); - flush_wqes(nesvnic->nesdev, nesqp, NES_CQP_FLUSH_RQ, 1); - } else { + flush_wqes(nesvnic->nesdev, nesqp, + NES_CQP_FLUSH_RQ, 1); + } else spin_unlock_irqrestore(&nesqp->lock, flags); - } - - /* This reference is from either ModifyQP or the AE processing, - there is still a race here with modifyqp */ - nes_rem_ref(&nesqp->ibqp); - } else { cm_id = nesqp->cm_id; spin_unlock_irqrestore(&nesqp->lock, flags); /* check to see if the inbound reset beat the outbound reset */ if ((!cm_id) && (last_ae==NES_AEQE_AEID_RESET_SENT)) { - nes_debug(NES_DBG_CM, "QP%u: Decing refcount due to inbound reset" - " beating the outbound reset.\n", - nesqp->hwqp.qp_id); - nes_rem_ref(&nesqp->ibqp); + nes_debug(NES_DBG_CM, "QP%u: Decing refcount " + "due to inbound reset beating the " + "outbound reset.\n", nesqp->hwqp.qp_id); } } } else { nesqp->disconn_pending = 0; spin_unlock_irqrestore(&nesqp->lock, flags); } - nes_rem_ref(&nesqp->ibqp); return 0; } @@ -2349,71 +2544,82 @@ int nes_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) nesdev = nesvnic->nesdev; adapter = nesdev->nesadapter; - nes_debug(NES_DBG_CM, "nesvnic=%p, netdev=%p, %s\n", - nesvnic, nesvnic->netdev, nesvnic->netdev->name); - - /* since this is from a listen, we were able to put node handle into cm_id */ cm_node = (struct nes_cm_node *)cm_id->provider_data; + nes_debug(NES_DBG_CM, "nes_accept: cm_node= %p nesvnic=%p, netdev=%p," + "%s\n", cm_node, nesvnic, nesvnic->netdev, + nesvnic->netdev->name); /* associate the node with the QP */ nesqp->cm_node = (void *)cm_node; + cm_node->nesqp = nesqp; + nes_add_ref(&nesqp->ibqp); - nes_debug(NES_DBG_CM, "QP%u, cm_node=%p, jiffies = %lu\n", - nesqp->hwqp.qp_id, cm_node, jiffies); + nes_debug(NES_DBG_CM, "QP%u, cm_node=%p, jiffies = %lu listener = %p\n", + nesqp->hwqp.qp_id, cm_node, jiffies, cm_node->listener); atomic_inc(&cm_accepts); nes_debug(NES_DBG_CM, "netdev refcnt = %u.\n", atomic_read(&nesvnic->netdev->refcnt)); - /* allocate the ietf frame and space for private data */ - nesqp->ietf_frame = pci_alloc_consistent(nesdev->pcidev, - sizeof(struct ietf_mpa_frame) + conn_param->private_data_len, - &nesqp->ietf_frame_pbase); - - if (!nesqp->ietf_frame) { - nes_debug(NES_DBG_CM, "Unable to allocate memory for private data\n"); - return -ENOMEM; - } + /* allocate the ietf frame and space for private data */ + nesqp->ietf_frame = pci_alloc_consistent(nesdev->pcidev, + sizeof(struct ietf_mpa_frame) + conn_param->private_data_len, + &nesqp->ietf_frame_pbase); + if (!nesqp->ietf_frame) { + nes_debug(NES_DBG_CM, "Unable to allocate memory for private " + "data\n"); + return -ENOMEM; + } - /* setup the MPA frame */ - nesqp->private_data_len = conn_param->private_data_len; - memcpy(nesqp->ietf_frame->key, IEFT_MPA_KEY_REP, IETF_MPA_KEY_SIZE); - memcpy(nesqp->ietf_frame->priv_data, conn_param->private_data, - conn_param->private_data_len); + /* setup the MPA frame */ + nesqp->private_data_len = conn_param->private_data_len; + memcpy(nesqp->ietf_frame->key, IEFT_MPA_KEY_REP, IETF_MPA_KEY_SIZE); - nesqp->ietf_frame->priv_data_len = cpu_to_be16(conn_param->private_data_len); - nesqp->ietf_frame->rev = mpa_version; - nesqp->ietf_frame->flags = IETF_MPA_FLAGS_CRC; + memcpy(nesqp->ietf_frame->priv_data, conn_param->private_data, + conn_param->private_data_len); - /* setup our first outgoing iWarp send WQE (the IETF frame response) */ - wqe = &nesqp->hwqp.sq_vbase[0]; + nesqp->ietf_frame->priv_data_len = + cpu_to_be16(conn_param->private_data_len); + nesqp->ietf_frame->rev = mpa_version; + nesqp->ietf_frame->flags = IETF_MPA_FLAGS_CRC; - if (cm_id->remote_addr.sin_addr.s_addr != cm_id->local_addr.sin_addr.s_addr) { - u64temp = (unsigned long)nesqp; - u64temp |= NES_SW_CONTEXT_ALIGN>>1; - set_wqe_64bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_COMP_CTX_LOW_IDX, - u64temp); - wqe->wqe_words[NES_IWARP_SQ_WQE_MISC_IDX] = - cpu_to_le32(NES_IWARP_SQ_WQE_STREAMING | NES_IWARP_SQ_WQE_WRPDU); - wqe->wqe_words[NES_IWARP_SQ_WQE_TOTAL_PAYLOAD_IDX] = - cpu_to_le32(conn_param->private_data_len + sizeof(struct ietf_mpa_frame)); - wqe->wqe_words[NES_IWARP_SQ_WQE_FRAG0_LOW_IDX] = - cpu_to_le32((u32)nesqp->ietf_frame_pbase); - wqe->wqe_words[NES_IWARP_SQ_WQE_FRAG0_HIGH_IDX] = - cpu_to_le32((u32)((u64)nesqp->ietf_frame_pbase >> 32)); - wqe->wqe_words[NES_IWARP_SQ_WQE_LENGTH0_IDX] = - cpu_to_le32(conn_param->private_data_len + sizeof(struct ietf_mpa_frame)); - wqe->wqe_words[NES_IWARP_SQ_WQE_STAG0_IDX] = 0; - - nesqp->nesqp_context->ird_ord_sizes |= cpu_to_le32( - NES_QPCONTEXT_ORDIRD_LSMM_PRESENT | NES_QPCONTEXT_ORDIRD_WRPDU); - } else { - nesqp->nesqp_context->ird_ord_sizes |= cpu_to_le32((NES_QPCONTEXT_ORDIRD_LSMM_PRESENT | - NES_QPCONTEXT_ORDIRD_WRPDU | NES_QPCONTEXT_ORDIRD_ALSMM)); - } - nesqp->skip_lsmm = 1; + /* setup our first outgoing iWarp send WQE (the IETF frame response) */ + wqe = &nesqp->hwqp.sq_vbase[0]; + + if (cm_id->remote_addr.sin_addr.s_addr != + cm_id->local_addr.sin_addr.s_addr) { + u64temp = (unsigned long)nesqp; + u64temp |= NES_SW_CONTEXT_ALIGN>>1; + set_wqe_64bit_value(wqe->wqe_words, + NES_IWARP_SQ_WQE_COMP_CTX_LOW_IDX, + u64temp); + wqe->wqe_words[NES_IWARP_SQ_WQE_MISC_IDX] = + cpu_to_le32(NES_IWARP_SQ_WQE_STREAMING | + NES_IWARP_SQ_WQE_WRPDU); + wqe->wqe_words[NES_IWARP_SQ_WQE_TOTAL_PAYLOAD_IDX] = + cpu_to_le32(conn_param->private_data_len + + sizeof(struct ietf_mpa_frame)); + wqe->wqe_words[NES_IWARP_SQ_WQE_FRAG0_LOW_IDX] = + cpu_to_le32((u32)nesqp->ietf_frame_pbase); + wqe->wqe_words[NES_IWARP_SQ_WQE_FRAG0_HIGH_IDX] = + cpu_to_le32((u32)((u64)nesqp->ietf_frame_pbase >> 32)); + wqe->wqe_words[NES_IWARP_SQ_WQE_LENGTH0_IDX] = + cpu_to_le32(conn_param->private_data_len + + sizeof(struct ietf_mpa_frame)); + wqe->wqe_words[NES_IWARP_SQ_WQE_STAG0_IDX] = 0; + + nesqp->nesqp_context->ird_ord_sizes |= + cpu_to_le32(NES_QPCONTEXT_ORDIRD_LSMM_PRESENT | + NES_QPCONTEXT_ORDIRD_WRPDU); + } else { + nesqp->nesqp_context->ird_ord_sizes |= + cpu_to_le32((NES_QPCONTEXT_ORDIRD_LSMM_PRESENT | + NES_QPCONTEXT_ORDIRD_WRPDU | + NES_QPCONTEXT_ORDIRD_ALSMM)); + } + nesqp->skip_lsmm = 1; /* Cache the cm_id in the qp */ @@ -2424,55 +2630,75 @@ int nes_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) cm_id->provider_data = nesqp; nesqp->active_conn = 0; + if (cm_node->state == NES_CM_STATE_TSA) + nes_debug(NES_DBG_CM, "Already state = TSA for cm_node=%p\n", + cm_node); + nes_cm_init_tsa_conn(nesqp, cm_node); - nesqp->nesqp_context->tcpPorts[0] = cpu_to_le16(ntohs(cm_id->local_addr.sin_port)); - nesqp->nesqp_context->tcpPorts[1] = cpu_to_le16(ntohs(cm_id->remote_addr.sin_port)); - nesqp->nesqp_context->ip0 = cpu_to_le32(ntohl(cm_id->remote_addr.sin_addr.s_addr)); + nesqp->nesqp_context->tcpPorts[0] = + cpu_to_le16(ntohs(cm_id->local_addr.sin_port)); + nesqp->nesqp_context->tcpPorts[1] = + cpu_to_le16(ntohs(cm_id->remote_addr.sin_port)); + + if (ipv4_is_loopback(cm_id->remote_addr.sin_addr.s_addr)) + nesqp->nesqp_context->ip0 = + cpu_to_le32(ntohl(nesvnic->local_ipaddr)); + else + nesqp->nesqp_context->ip0 = + cpu_to_le32(ntohl(cm_id->remote_addr.sin_addr.s_addr)); nesqp->nesqp_context->misc2 |= cpu_to_le32( - (u32)PCI_FUNC(nesdev->pcidev->devfn) << NES_QPCONTEXT_MISC2_SRC_IP_SHIFT); + (u32)PCI_FUNC(nesdev->pcidev->devfn) << + NES_QPCONTEXT_MISC2_SRC_IP_SHIFT); - nesqp->nesqp_context->arp_index_vlan |= cpu_to_le32( - nes_arp_table(nesdev, le32_to_cpu(nesqp->nesqp_context->ip0), NULL, + nesqp->nesqp_context->arp_index_vlan |= + cpu_to_le32(nes_arp_table(nesdev, + le32_to_cpu(nesqp->nesqp_context->ip0), NULL, NES_ARP_RESOLVE) << 16); nesqp->nesqp_context->ts_val_delta = cpu_to_le32( - jiffies - nes_read_indexed(nesdev, NES_IDX_TCP_NOW)); + jiffies - nes_read_indexed(nesdev, NES_IDX_TCP_NOW)); nesqp->nesqp_context->ird_index = cpu_to_le32(nesqp->hwqp.qp_id); nesqp->nesqp_context->ird_ord_sizes |= cpu_to_le32( - ((u32)1 << NES_QPCONTEXT_ORDIRD_IWARP_MODE_SHIFT)); - nesqp->nesqp_context->ird_ord_sizes |= cpu_to_le32((u32)conn_param->ord); + ((u32)1 << NES_QPCONTEXT_ORDIRD_IWARP_MODE_SHIFT)); + nesqp->nesqp_context->ird_ord_sizes |= + cpu_to_le32((u32)conn_param->ord); memset(&nes_quad, 0, sizeof(nes_quad)); - nes_quad.DstIpAdrIndex = cpu_to_le32((u32)PCI_FUNC(nesdev->pcidev->devfn) << 24); - nes_quad.SrcIpadr = cm_id->remote_addr.sin_addr.s_addr; - nes_quad.TcpPorts[0] = cm_id->remote_addr.sin_port; - nes_quad.TcpPorts[1] = cm_id->local_addr.sin_port; + nes_quad.DstIpAdrIndex = + cpu_to_le32((u32)PCI_FUNC(nesdev->pcidev->devfn) << 24); + if (ipv4_is_loopback(cm_id->remote_addr.sin_addr.s_addr)) + nes_quad.SrcIpadr = nesvnic->local_ipaddr; + else + nes_quad.SrcIpadr = cm_id->remote_addr.sin_addr.s_addr; + nes_quad.TcpPorts[0] = cm_id->remote_addr.sin_port; + nes_quad.TcpPorts[1] = cm_id->local_addr.sin_port; /* Produce hash key */ crc_value = get_crc_value(&nes_quad); nesqp->hte_index = cpu_to_be32(crc_value ^ 0xffffffff); nes_debug(NES_DBG_CM, "HTE Index = 0x%08X, CRC = 0x%08X\n", - nesqp->hte_index, nesqp->hte_index & adapter->hte_index_mask); + nesqp->hte_index, nesqp->hte_index & adapter->hte_index_mask); nesqp->hte_index &= adapter->hte_index_mask; nesqp->nesqp_context->hte_index = cpu_to_le32(nesqp->hte_index); cm_node->cm_core->api->accelerated(cm_node->cm_core, cm_node); - nes_debug(NES_DBG_CM, "QP%u, Destination IP = 0x%08X:0x%04X, local = 0x%08X:0x%04X," - " rcv_nxt=0x%08X, snd_nxt=0x%08X, mpa + private data length=%zu.\n", - nesqp->hwqp.qp_id, + nes_debug(NES_DBG_CM, "QP%u, Destination IP = 0x%08X:0x%04X, local = " + "0x%08X:0x%04X, rcv_nxt=0x%08X, snd_nxt=0x%08X, mpa + " + "private data length=%zu.\n", nesqp->hwqp.qp_id, ntohl(cm_id->remote_addr.sin_addr.s_addr), ntohs(cm_id->remote_addr.sin_port), ntohl(cm_id->local_addr.sin_addr.s_addr), ntohs(cm_id->local_addr.sin_port), le32_to_cpu(nesqp->nesqp_context->rcv_nxt), le32_to_cpu(nesqp->nesqp_context->snd_nxt), - conn_param->private_data_len+sizeof(struct ietf_mpa_frame)); + conn_param->private_data_len + + sizeof(struct ietf_mpa_frame)); attr.qp_state = IB_QPS_RTS; nes_modify_qp(&nesqp->ibqp, &attr, IB_QP_STATE, NULL); @@ -2489,15 +2715,16 @@ int nes_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) cm_event.private_data_len = 0; ret = cm_id->event_handler(cm_id, &cm_event); if (cm_node->loopbackpartner) { - cm_node->loopbackpartner->mpa_frame_size = nesqp->private_data_len; + cm_node->loopbackpartner->mpa_frame_size = + nesqp->private_data_len; /* copy entire MPA frame to our cm_node's frame */ - memcpy(cm_node->loopbackpartner->mpa_frame_buf, nesqp->ietf_frame->priv_data, - nesqp->private_data_len); + memcpy(cm_node->loopbackpartner->mpa_frame_buf, + nesqp->ietf_frame->priv_data, nesqp->private_data_len); create_event(cm_node->loopbackpartner, NES_CM_EVENT_CONNECTED); } if (ret) - printk("%s[%u] OFA CM event_handler returned, ret=%d\n", - __func__, __LINE__, ret); + printk(KERN_ERR "%s[%u] OFA CM event_handler returned, " + "ret=%d\n", __func__, __LINE__, ret); return 0; } @@ -2555,74 +2782,61 @@ int nes_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) if (!nesdev) return -EINVAL; - atomic_inc(&cm_connects); - - nesqp->ietf_frame = kzalloc(sizeof(struct ietf_mpa_frame) + - conn_param->private_data_len, GFP_KERNEL); - if (!nesqp->ietf_frame) - return -ENOMEM; + nes_debug(NES_DBG_CM, "QP%u, current IP = 0x%08X, Destination IP = " + "0x%08X:0x%04X, local = 0x%08X:0x%04X.\n", nesqp->hwqp.qp_id, + ntohl(nesvnic->local_ipaddr), + ntohl(cm_id->remote_addr.sin_addr.s_addr), + ntohs(cm_id->remote_addr.sin_port), + ntohl(cm_id->local_addr.sin_addr.s_addr), + ntohs(cm_id->local_addr.sin_port)); - /* set qp as having an active connection */ + atomic_inc(&cm_connects); nesqp->active_conn = 1; - nes_debug(NES_DBG_CM, "QP%u, Destination IP = 0x%08X:0x%04X, local = 0x%08X:0x%04X.\n", - nesqp->hwqp.qp_id, - ntohl(cm_id->remote_addr.sin_addr.s_addr), - ntohs(cm_id->remote_addr.sin_port), - ntohl(cm_id->local_addr.sin_addr.s_addr), - ntohs(cm_id->local_addr.sin_port)); - /* cache the cm_id in the qp */ nesqp->cm_id = cm_id; cm_id->provider_data = nesqp; - /* copy the private data */ - if (conn_param->private_data_len) { - memcpy(nesqp->ietf_frame->priv_data, conn_param->private_data, - conn_param->private_data_len); - } - nesqp->private_data_len = conn_param->private_data_len; nesqp->nesqp_context->ird_ord_sizes |= cpu_to_le32((u32)conn_param->ord); nes_debug(NES_DBG_CM, "requested ord = 0x%08X.\n", (u32)conn_param->ord); - nes_debug(NES_DBG_CM, "mpa private data len =%u\n", conn_param->private_data_len); - - strcpy(&nesqp->ietf_frame->key[0], IEFT_MPA_KEY_REQ); - nesqp->ietf_frame->flags = IETF_MPA_FLAGS_CRC; - nesqp->ietf_frame->rev = IETF_MPA_VERSION; - nesqp->ietf_frame->priv_data_len = htons(conn_param->private_data_len); + nes_debug(NES_DBG_CM, "mpa private data len =%u\n", + conn_param->private_data_len); - if (cm_id->local_addr.sin_addr.s_addr != cm_id->remote_addr.sin_addr.s_addr) + if (cm_id->local_addr.sin_addr.s_addr != + cm_id->remote_addr.sin_addr.s_addr) nes_manage_apbvt(nesvnic, ntohs(cm_id->local_addr.sin_port), - PCI_FUNC(nesdev->pcidev->devfn), NES_MANAGE_APBVT_ADD); + PCI_FUNC(nesdev->pcidev->devfn), NES_MANAGE_APBVT_ADD); /* set up the connection params for the node */ - cm_info.loc_addr = (cm_id->local_addr.sin_addr.s_addr); - cm_info.loc_port = (cm_id->local_addr.sin_port); - cm_info.rem_addr = (cm_id->remote_addr.sin_addr.s_addr); - cm_info.rem_port = (cm_id->remote_addr.sin_port); + cm_info.loc_addr = htonl(cm_id->local_addr.sin_addr.s_addr); + cm_info.loc_port = htons(cm_id->local_addr.sin_port); + cm_info.rem_addr = htonl(cm_id->remote_addr.sin_addr.s_addr); + cm_info.rem_port = htons(cm_id->remote_addr.sin_port); cm_info.cm_id = cm_id; cm_info.conn_type = NES_CM_IWARP_CONN_TYPE; cm_id->add_ref(cm_id); - nes_add_ref(&nesqp->ibqp); /* create a connect CM node connection */ - cm_node = g_cm_core->api->connect(g_cm_core, nesvnic, nesqp->ietf_frame, &cm_info); + cm_node = g_cm_core->api->connect(g_cm_core, nesvnic, + conn_param->private_data_len, (void *)conn_param->private_data, + &cm_info); if (!cm_node) { - if (cm_id->local_addr.sin_addr.s_addr != cm_id->remote_addr.sin_addr.s_addr) + if (cm_id->local_addr.sin_addr.s_addr != + cm_id->remote_addr.sin_addr.s_addr) nes_manage_apbvt(nesvnic, ntohs(cm_id->local_addr.sin_port), - PCI_FUNC(nesdev->pcidev->devfn), NES_MANAGE_APBVT_DEL); - nes_rem_ref(&nesqp->ibqp); - kfree(nesqp->ietf_frame); - nesqp->ietf_frame = NULL; + PCI_FUNC(nesdev->pcidev->devfn), + NES_MANAGE_APBVT_DEL); + cm_id->rem_ref(cm_id); return -ENOMEM; } cm_node->apbvt_set = 1; nesqp->cm_node = cm_node; + cm_node->nesqp = nesqp; return 0; } @@ -2664,7 +2878,7 @@ int nes_create_listen(struct iw_cm_id *cm_id, int backlog) cm_node = g_cm_core->api->listen(g_cm_core, nesvnic, &cm_info); if (!cm_node) { - printk("%s[%u] Error returned from listen API call\n", + printk(KERN_ERR "%s[%u] Error returned from listen API call\n", __func__, __LINE__); return -ENOMEM; } @@ -2672,10 +2886,13 @@ int nes_create_listen(struct iw_cm_id *cm_id, int backlog) cm_id->provider_data = cm_node; if (!cm_node->reused_node) { - err = nes_manage_apbvt(nesvnic, ntohs(cm_id->local_addr.sin_port), - PCI_FUNC(nesvnic->nesdev->pcidev->devfn), NES_MANAGE_APBVT_ADD); + err = nes_manage_apbvt(nesvnic, + ntohs(cm_id->local_addr.sin_port), + PCI_FUNC(nesvnic->nesdev->pcidev->devfn), + NES_MANAGE_APBVT_ADD); if (err) { - printk("nes_manage_apbvt call returned %d.\n", err); + printk(KERN_ERR "nes_manage_apbvt call returned %d.\n", + err); g_cm_core->api->stop_listener(g_cm_core, (void *)cm_node); return err; } @@ -2795,53 +3012,70 @@ static void cm_event_connected(struct nes_cm_event *event) nes_cm_init_tsa_conn(nesqp, cm_node); /* set the QP tsa context */ - nesqp->nesqp_context->tcpPorts[0] = cpu_to_le16(ntohs(cm_id->local_addr.sin_port)); - nesqp->nesqp_context->tcpPorts[1] = cpu_to_le16(ntohs(cm_id->remote_addr.sin_port)); - nesqp->nesqp_context->ip0 = cpu_to_le32(ntohl(cm_id->remote_addr.sin_addr.s_addr)); + nesqp->nesqp_context->tcpPorts[0] = + cpu_to_le16(ntohs(cm_id->local_addr.sin_port)); + nesqp->nesqp_context->tcpPorts[1] = + cpu_to_le16(ntohs(cm_id->remote_addr.sin_port)); + if (ipv4_is_loopback(cm_id->remote_addr.sin_addr.s_addr)) + nesqp->nesqp_context->ip0 = + cpu_to_le32(ntohl(nesvnic->local_ipaddr)); + else + nesqp->nesqp_context->ip0 = + cpu_to_le32(ntohl(cm_id->remote_addr.sin_addr.s_addr)); nesqp->nesqp_context->misc2 |= cpu_to_le32( - (u32)PCI_FUNC(nesdev->pcidev->devfn) << NES_QPCONTEXT_MISC2_SRC_IP_SHIFT); + (u32)PCI_FUNC(nesdev->pcidev->devfn) << + NES_QPCONTEXT_MISC2_SRC_IP_SHIFT); nesqp->nesqp_context->arp_index_vlan |= cpu_to_le32( - nes_arp_table(nesdev, le32_to_cpu(nesqp->nesqp_context->ip0), + nes_arp_table(nesdev, + le32_to_cpu(nesqp->nesqp_context->ip0), NULL, NES_ARP_RESOLVE) << 16); nesqp->nesqp_context->ts_val_delta = cpu_to_le32( jiffies - nes_read_indexed(nesdev, NES_IDX_TCP_NOW)); nesqp->nesqp_context->ird_index = cpu_to_le32(nesqp->hwqp.qp_id); nesqp->nesqp_context->ird_ord_sizes |= - cpu_to_le32((u32)1 << NES_QPCONTEXT_ORDIRD_IWARP_MODE_SHIFT); + cpu_to_le32((u32)1 << + NES_QPCONTEXT_ORDIRD_IWARP_MODE_SHIFT); /* Adjust tail for not having a LSMM */ nesqp->hwqp.sq_tail = 1; #if defined(NES_SEND_FIRST_WRITE) - if (cm_node->send_write0) { - nes_debug(NES_DBG_CM, "Sending first write.\n"); - wqe = &nesqp->hwqp.sq_vbase[0]; - u64temp = (unsigned long)nesqp; - u64temp |= NES_SW_CONTEXT_ALIGN>>1; - set_wqe_64bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_COMP_CTX_LOW_IDX, - u64temp); - wqe->wqe_words[NES_IWARP_SQ_WQE_MISC_IDX] = cpu_to_le32(NES_IWARP_SQ_OP_RDMAW); - wqe->wqe_words[NES_IWARP_SQ_WQE_TOTAL_PAYLOAD_IDX] = 0; - wqe->wqe_words[NES_IWARP_SQ_WQE_FRAG0_LOW_IDX] = 0; - wqe->wqe_words[NES_IWARP_SQ_WQE_FRAG0_HIGH_IDX] = 0; - wqe->wqe_words[NES_IWARP_SQ_WQE_LENGTH0_IDX] = 0; - wqe->wqe_words[NES_IWARP_SQ_WQE_STAG0_IDX] = 0; - - /* use the reserved spot on the WQ for the extra first WQE */ - nesqp->nesqp_context->ird_ord_sizes &= cpu_to_le32(~(NES_QPCONTEXT_ORDIRD_LSMM_PRESENT | - NES_QPCONTEXT_ORDIRD_WRPDU | NES_QPCONTEXT_ORDIRD_ALSMM)); - nesqp->skip_lsmm = 1; - nesqp->hwqp.sq_tail = 0; - nes_write32(nesdev->regs + NES_WQE_ALLOC, - (1 << 24) | 0x00800000 | nesqp->hwqp.qp_id); - } + if (cm_node->send_write0) { + nes_debug(NES_DBG_CM, "Sending first write.\n"); + wqe = &nesqp->hwqp.sq_vbase[0]; + u64temp = (unsigned long)nesqp; + u64temp |= NES_SW_CONTEXT_ALIGN>>1; + set_wqe_64bit_value(wqe->wqe_words, + NES_IWARP_SQ_WQE_COMP_CTX_LOW_IDX, u64temp); + wqe->wqe_words[NES_IWARP_SQ_WQE_MISC_IDX] = + cpu_to_le32(NES_IWARP_SQ_OP_RDMAW); + wqe->wqe_words[NES_IWARP_SQ_WQE_TOTAL_PAYLOAD_IDX] = 0; + wqe->wqe_words[NES_IWARP_SQ_WQE_FRAG0_LOW_IDX] = 0; + wqe->wqe_words[NES_IWARP_SQ_WQE_FRAG0_HIGH_IDX] = 0; + wqe->wqe_words[NES_IWARP_SQ_WQE_LENGTH0_IDX] = 0; + wqe->wqe_words[NES_IWARP_SQ_WQE_STAG0_IDX] = 0; + + /* use the reserved spot on the WQ for the extra first WQE */ + nesqp->nesqp_context->ird_ord_sizes &= + cpu_to_le32(~(NES_QPCONTEXT_ORDIRD_LSMM_PRESENT | + NES_QPCONTEXT_ORDIRD_WRPDU | + NES_QPCONTEXT_ORDIRD_ALSMM)); + nesqp->skip_lsmm = 1; + nesqp->hwqp.sq_tail = 0; + nes_write32(nesdev->regs + NES_WQE_ALLOC, + (1 << 24) | 0x00800000 | nesqp->hwqp.qp_id); + } #endif memset(&nes_quad, 0, sizeof(nes_quad)); - nes_quad.DstIpAdrIndex = cpu_to_le32((u32)PCI_FUNC(nesdev->pcidev->devfn) << 24); - nes_quad.SrcIpadr = cm_id->remote_addr.sin_addr.s_addr; + nes_quad.DstIpAdrIndex = + cpu_to_le32((u32)PCI_FUNC(nesdev->pcidev->devfn) << 24); + if (ipv4_is_loopback(cm_id->remote_addr.sin_addr.s_addr)) + nes_quad.SrcIpadr = nesvnic->local_ipaddr; + else + nes_quad.SrcIpadr = cm_id->remote_addr.sin_addr.s_addr; nes_quad.TcpPorts[0] = cm_id->remote_addr.sin_port; nes_quad.TcpPorts[1] = cm_id->local_addr.sin_port; @@ -2858,10 +3092,6 @@ static void cm_event_connected(struct nes_cm_event *event) nesqp->private_data_len = (u8) cm_node->mpa_frame_size; cm_node->cm_core->api->accelerated(cm_node->cm_core, cm_node); - /* modify QP state to rts */ - attr.qp_state = IB_QPS_RTS; - nes_modify_qp(&nesqp->ibqp, &attr, IB_QP_STATE, NULL); - /* notify OF layer we successfully created the requested connection */ cm_event.event = IW_CM_EVENT_CONNECT_REPLY; cm_event.status = IW_CM_EVENT_STATUS_ACCEPTED; @@ -2870,20 +3100,21 @@ static void cm_event_connected(struct nes_cm_event *event) cm_event.local_addr.sin_port = cm_id->local_addr.sin_port; cm_event.remote_addr = cm_id->remote_addr; - cm_event.private_data = (void *)event->cm_node->mpa_frame_buf; - cm_event.private_data_len = (u8) event->cm_node->mpa_frame_size; + cm_event.private_data = (void *)event->cm_node->mpa_frame_buf; + cm_event.private_data_len = (u8) event->cm_node->mpa_frame_size; cm_event.local_addr.sin_addr.s_addr = event->cm_info.rem_addr; ret = cm_id->event_handler(cm_id, &cm_event); nes_debug(NES_DBG_CM, "OFA CM event_handler returned, ret=%d\n", ret); if (ret) - printk("%s[%u] OFA CM event_handler returned, ret=%d\n", - __func__, __LINE__, ret); - nes_debug(NES_DBG_CM, "Exiting connect thread for QP%u. jiffies = %lu\n", - nesqp->hwqp.qp_id, jiffies ); + printk(KERN_ERR "%s[%u] OFA CM event_handler returned, " + "ret=%d\n", __func__, __LINE__, ret); + attr.qp_state = IB_QPS_RTS; + nes_modify_qp(&nesqp->ibqp, &attr, IB_QP_STATE, NULL); - nes_rem_ref(&nesqp->ibqp); + nes_debug(NES_DBG_CM, "Exiting connect thread for QP%u. jiffies = " + "%lu\n", nesqp->hwqp.qp_id, jiffies); return; } @@ -2927,17 +3158,19 @@ static void cm_event_connect_error(struct nes_cm_event *event) cm_event.private_data = NULL; cm_event.private_data_len = 0; - nes_debug(NES_DBG_CM, "call CM_EVENT REJECTED, local_addr=%08x, remove_addr=%08x\n", - cm_event.local_addr.sin_addr.s_addr, cm_event.remote_addr.sin_addr.s_addr); + nes_debug(NES_DBG_CM, "call CM_EVENT REJECTED, local_addr=%08x, " + "remove_addr=%08x\n", cm_event.local_addr.sin_addr.s_addr, + cm_event.remote_addr.sin_addr.s_addr); ret = cm_id->event_handler(cm_id, &cm_event); nes_debug(NES_DBG_CM, "OFA CM event_handler returned, ret=%d\n", ret); if (ret) - printk("%s[%u] OFA CM event_handler returned, ret=%d\n", - __func__, __LINE__, ret); + printk(KERN_ERR "%s[%u] OFA CM event_handler returned, " + "ret=%d\n", __func__, __LINE__, ret); nes_rem_ref(&nesqp->ibqp); - cm_id->rem_ref(cm_id); + cm_id->rem_ref(cm_id); + rem_ref_cm_node(event->cm_node->cm_core, event->cm_node); return; } @@ -3040,7 +3273,8 @@ static int nes_cm_post_event(struct nes_cm_event *event) add_ref_cm_node(event->cm_node); event->cm_info.cm_id->add_ref(event->cm_info.cm_id); INIT_WORK(&event->event_work, nes_cm_event_handler); - nes_debug(NES_DBG_CM, "queue_work, event=%p\n", event); + nes_debug(NES_DBG_CM, "cm_node=%p queue_work, event=%p\n", + event->cm_node, event); queue_work(event->cm_node->cm_core->event_wq, &event->event_work); @@ -3056,46 +3290,48 @@ static int nes_cm_post_event(struct nes_cm_event *event) */ static void nes_cm_event_handler(struct work_struct *work) { - struct nes_cm_event *event = container_of(work, struct nes_cm_event, event_work); + struct nes_cm_event *event = container_of(work, struct nes_cm_event, + event_work); struct nes_cm_core *cm_core; - if ((!event) || (!event->cm_node) || (!event->cm_node->cm_core)) { + if ((!event) || (!event->cm_node) || (!event->cm_node->cm_core)) return; - } + cm_core = event->cm_node->cm_core; nes_debug(NES_DBG_CM, "event=%p, event->type=%u, events posted=%u\n", - event, event->type, atomic_read(&cm_core->events_posted)); + event, event->type, atomic_read(&cm_core->events_posted)); switch (event->type) { - case NES_CM_EVENT_MPA_REQ: - cm_event_mpa_req(event); - nes_debug(NES_DBG_CM, "CM Event: MPA REQUEST\n"); - break; - case NES_CM_EVENT_RESET: - nes_debug(NES_DBG_CM, "CM Event: RESET\n"); - cm_event_reset(event); - break; - case NES_CM_EVENT_CONNECTED: - if ((!event->cm_node->cm_id) || - (event->cm_node->state != NES_CM_STATE_TSA)) { - break; - } - cm_event_connected(event); - nes_debug(NES_DBG_CM, "CM Event: CONNECTED\n"); + case NES_CM_EVENT_MPA_REQ: + cm_event_mpa_req(event); + nes_debug(NES_DBG_CM, "cm_node=%p CM Event: MPA REQUEST\n", + event->cm_node); + break; + case NES_CM_EVENT_RESET: + nes_debug(NES_DBG_CM, "cm_node = %p CM Event: RESET\n", + event->cm_node); + cm_event_reset(event); + break; + case NES_CM_EVENT_CONNECTED: + if ((!event->cm_node->cm_id) || + (event->cm_node->state != NES_CM_STATE_TSA)) break; - case NES_CM_EVENT_ABORTED: - if ((!event->cm_node->cm_id) || (event->cm_node->state == NES_CM_STATE_TSA)) { - break; - } - cm_event_connect_error(event); - nes_debug(NES_DBG_CM, "CM Event: ABORTED\n"); - break; - case NES_CM_EVENT_DROPPED_PKT: - nes_debug(NES_DBG_CM, "CM Event: DROPPED PKT\n"); - break; - default: - nes_debug(NES_DBG_CM, "CM Event: UNKNOWN EVENT TYPE\n"); + cm_event_connected(event); + nes_debug(NES_DBG_CM, "CM Event: CONNECTED\n"); + break; + case NES_CM_EVENT_ABORTED: + if ((!event->cm_node->cm_id) || + (event->cm_node->state == NES_CM_STATE_TSA)) break; + cm_event_connect_error(event); + nes_debug(NES_DBG_CM, "CM Event: ABORTED\n"); + break; + case NES_CM_EVENT_DROPPED_PKT: + nes_debug(NES_DBG_CM, "CM Event: DROPPED PKT\n"); + break; + default: + nes_debug(NES_DBG_CM, "CM Event: UNKNOWN EVENT TYPE\n"); + break; } atomic_dec(&cm_core->events_posted); diff --git a/drivers/infiniband/hw/nes/nes_cm.h b/drivers/infiniband/hw/nes/nes_cm.h index 7717cb2ab50..367b3d29014 100644 --- a/drivers/infiniband/hw/nes/nes_cm.h +++ b/drivers/infiniband/hw/nes/nes_cm.h @@ -83,6 +83,8 @@ enum nes_timer_type { #define SET_FIN 4 #define SET_RST 8 +#define TCP_OPTIONS_PADDING 3 + struct option_base { u8 optionnum; u8 length; @@ -177,6 +179,7 @@ enum nes_cm_node_state { NES_CM_STATE_ESTABLISHED, NES_CM_STATE_ACCEPTING, NES_CM_STATE_MPAREQ_SENT, + NES_CM_STATE_MPAREQ_RCVD, NES_CM_STATE_TSA, NES_CM_STATE_FIN_WAIT1, NES_CM_STATE_FIN_WAIT2, @@ -187,6 +190,16 @@ enum nes_cm_node_state { NES_CM_STATE_CLOSED }; +enum nes_tcpip_pkt_type { + NES_PKT_TYPE_UNKNOWN, + NES_PKT_TYPE_SYN, + NES_PKT_TYPE_SYNACK, + NES_PKT_TYPE_ACK, + NES_PKT_TYPE_FIN, + NES_PKT_TYPE_RST +}; + + /* type of nes connection */ enum nes_cm_conn_type { NES_CM_IWARP_CONN_TYPE, @@ -257,7 +270,9 @@ struct nes_cm_node { struct net_device *netdev; struct nes_cm_node *loopbackpartner; - struct list_head retrans_list; + + struct nes_timer_entry *send_entry; + spinlock_t retrans_list_lock; struct list_head recv_list; spinlock_t recv_list_lock; @@ -276,6 +291,8 @@ struct nes_cm_node { struct nes_vnic *nesvnic; int apbvt_set; int accept_pend; + int freed; + struct nes_qp *nesqp; }; /* structure for client or CM to fill when making CM api calls. */ @@ -366,14 +383,14 @@ struct nes_cm_ops { struct nes_cm_info *); int (*stop_listener)(struct nes_cm_core *, struct nes_cm_listener *); struct nes_cm_node * (*connect)(struct nes_cm_core *, - struct nes_vnic *, struct ietf_mpa_frame *, + struct nes_vnic *, u16, void *, struct nes_cm_info *); int (*close)(struct nes_cm_core *, struct nes_cm_node *); int (*accept)(struct nes_cm_core *, struct ietf_mpa_frame *, struct nes_cm_node *); int (*reject)(struct nes_cm_core *, struct ietf_mpa_frame *, struct nes_cm_node *); - int (*recv_pkt)(struct nes_cm_core *, struct nes_vnic *, + void (*recv_pkt)(struct nes_cm_core *, struct nes_vnic *, struct sk_buff *); int (*destroy_cm_core)(struct nes_cm_core *); int (*get)(struct nes_cm_core *); diff --git a/drivers/infiniband/hw/nes/nes_hw.c b/drivers/infiniband/hw/nes/nes_hw.c index 85f26d19a32..1513d4066f1 100644 --- a/drivers/infiniband/hw/nes/nes_hw.c +++ b/drivers/infiniband/hw/nes/nes_hw.c @@ -2814,7 +2814,6 @@ static void nes_process_iwarp_aeqe(struct nes_device *nesdev, nesqp = *((struct nes_qp **)&context); if (atomic_inc_return(&nesqp->close_timer_started) == 1) { nesqp->cm_id->add_ref(nesqp->cm_id); - nes_add_ref(&nesqp->ibqp); schedule_nes_timer(nesqp->cm_node, (struct sk_buff *)nesqp, NES_TIMER_TYPE_CLOSE, 1, 0); nes_debug(NES_DBG_AEQ, "QP%u Not decrementing QP refcount (%d)," @@ -2838,7 +2837,6 @@ static void nes_process_iwarp_aeqe(struct nes_device *nesdev, if (async_event_id == NES_AEQE_AEID_RESET_SENT) { tcp_state = NES_AEQE_TCP_STATE_CLOSED; } - nes_add_ref(&nesqp->ibqp); spin_lock_irqsave(&nesqp->lock, flags); nesqp->hw_iwarp_state = iwarp_state; nesqp->hw_tcp_state = tcp_state; @@ -2876,7 +2874,6 @@ static void nes_process_iwarp_aeqe(struct nes_device *nesdev, } spin_unlock_irqrestore(&nesqp->lock, flags); if (next_iwarp_state) { - nes_add_ref(&nesqp->ibqp); nes_debug(NES_DBG_AEQ, "issuing hw modifyqp for QP%u. next state = 0x%08X," " also added another reference\n", nesqp->hwqp.qp_id, next_iwarp_state); @@ -2888,7 +2885,6 @@ static void nes_process_iwarp_aeqe(struct nes_device *nesdev, /* FIN Received but ib state not RTS, close complete will be on its way */ spin_unlock_irqrestore(&nesqp->lock, flags); - nes_rem_ref(&nesqp->ibqp); return; } spin_unlock_irqrestore(&nesqp->lock, flags); @@ -2922,7 +2918,6 @@ static void nes_process_iwarp_aeqe(struct nes_device *nesdev, if ((tcp_state == NES_AEQE_TCP_STATE_CLOSE_WAIT) || ((nesqp->ibqp_state == IB_QPS_RTS)&& (async_event_id == NES_AEQE_AEID_LLP_CONNECTION_RESET))) { - nes_add_ref(&nesqp->ibqp); nes_cm_disconn(nesqp); } else { nesqp->in_disconnect = 0; @@ -2931,7 +2926,6 @@ static void nes_process_iwarp_aeqe(struct nes_device *nesdev, break; case NES_AEQE_AEID_LLP_TOO_MANY_RETRIES: nesqp = *((struct nes_qp **)&context); - nes_add_ref(&nesqp->ibqp); spin_lock_irqsave(&nesqp->lock, flags); nesqp->hw_iwarp_state = NES_AEQE_IWARP_STATE_ERROR; nesqp->hw_tcp_state = NES_AEQE_TCP_STATE_CLOSED; @@ -3042,7 +3036,6 @@ static void nes_process_iwarp_aeqe(struct nes_device *nesdev, nesqp->ibqp.event_handler(&ibevent, nesqp->ibqp.qp_context); } /* tell cm to disconnect, cm will queue work to thread */ - nes_add_ref(&nesqp->ibqp); nes_cm_disconn(nesqp); break; case NES_AEQE_AEID_DDP_UBE_INVALID_MSN_NO_BUFFER_AVAILABLE: @@ -3062,7 +3055,6 @@ static void nes_process_iwarp_aeqe(struct nes_device *nesdev, nesqp->ibqp.event_handler(&ibevent, nesqp->ibqp.qp_context); } /* tell cm to disconnect, cm will queue work to thread */ - nes_add_ref(&nesqp->ibqp); nes_cm_disconn(nesqp); break; case NES_AEQE_AEID_LLP_RECEIVED_MPA_CRC_ERROR: @@ -3082,7 +3074,6 @@ static void nes_process_iwarp_aeqe(struct nes_device *nesdev, nesqp->ibqp.event_handler(&ibevent, nesqp->ibqp.qp_context); } /* tell cm to disconnect, cm will queue work to thread */ - nes_add_ref(&nesqp->ibqp); nes_cm_disconn(nesqp); break; /* TODO: additional AEs need to be here */ diff --git a/drivers/infiniband/hw/nes/nes_verbs.c b/drivers/infiniband/hw/nes/nes_verbs.c index e3939d13484..d79942e8497 100644 --- a/drivers/infiniband/hw/nes/nes_verbs.c +++ b/drivers/infiniband/hw/nes/nes_verbs.c @@ -2867,7 +2867,6 @@ int nes_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, nesqp->hwqp.qp_id, attr->qp_state, nesqp->ibqp_state, nesqp->iwarp_state, atomic_read(&nesqp->refcount)); - nes_add_ref(&nesqp->ibqp); spin_lock_irqsave(&nesqp->lock, qplockflags); nes_debug(NES_DBG_MOD_QP, "QP%u: hw_iwarp_state=0x%X, hw_tcp_state=0x%X," @@ -2882,7 +2881,6 @@ int nes_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, nesqp->hwqp.qp_id); if (nesqp->iwarp_state > (u32)NES_CQP_QP_IWARP_STATE_IDLE) { spin_unlock_irqrestore(&nesqp->lock, qplockflags); - nes_rem_ref(&nesqp->ibqp); return -EINVAL; } next_iwarp_state = NES_CQP_QP_IWARP_STATE_IDLE; @@ -2893,7 +2891,6 @@ int nes_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, nesqp->hwqp.qp_id); if (nesqp->iwarp_state>(u32)NES_CQP_QP_IWARP_STATE_IDLE) { spin_unlock_irqrestore(&nesqp->lock, qplockflags); - nes_rem_ref(&nesqp->ibqp); return -EINVAL; } next_iwarp_state = NES_CQP_QP_IWARP_STATE_IDLE; @@ -2904,14 +2901,12 @@ int nes_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, nesqp->hwqp.qp_id); if (nesqp->iwarp_state>(u32)NES_CQP_QP_IWARP_STATE_RTS) { spin_unlock_irqrestore(&nesqp->lock, qplockflags); - nes_rem_ref(&nesqp->ibqp); return -EINVAL; } if (nesqp->cm_id == NULL) { nes_debug(NES_DBG_MOD_QP, "QP%u: Failing attempt to move QP to RTS without a CM_ID. \n", nesqp->hwqp.qp_id ); spin_unlock_irqrestore(&nesqp->lock, qplockflags); - nes_rem_ref(&nesqp->ibqp); return -EINVAL; } next_iwarp_state = NES_CQP_QP_IWARP_STATE_RTS; @@ -2929,7 +2924,6 @@ int nes_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, nesqp->hwqp.qp_id, nesqp->hwqp.sq_head, nesqp->hwqp.sq_tail); if (nesqp->iwarp_state == (u32)NES_CQP_QP_IWARP_STATE_CLOSING) { spin_unlock_irqrestore(&nesqp->lock, qplockflags); - nes_rem_ref(&nesqp->ibqp); return 0; } else { if (nesqp->iwarp_state > (u32)NES_CQP_QP_IWARP_STATE_CLOSING) { @@ -2937,7 +2931,6 @@ int nes_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, " ignored due to current iWARP state\n", nesqp->hwqp.qp_id); spin_unlock_irqrestore(&nesqp->lock, qplockflags); - nes_rem_ref(&nesqp->ibqp); return -EINVAL; } if (nesqp->hw_iwarp_state != NES_AEQE_IWARP_STATE_RTS) { @@ -2969,7 +2962,6 @@ int nes_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, nesqp->hwqp.qp_id); if (nesqp->iwarp_state>=(u32)NES_CQP_QP_IWARP_STATE_TERMINATE) { spin_unlock_irqrestore(&nesqp->lock, qplockflags); - nes_rem_ref(&nesqp->ibqp); return -EINVAL; } /* next_iwarp_state = (NES_CQP_QP_IWARP_STATE_TERMINATE | 0x02000000); */ @@ -2982,7 +2974,6 @@ int nes_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, case IB_QPS_RESET: if (nesqp->iwarp_state == (u32)NES_CQP_QP_IWARP_STATE_ERROR) { spin_unlock_irqrestore(&nesqp->lock, qplockflags); - nes_rem_ref(&nesqp->ibqp); return -EINVAL; } nes_debug(NES_DBG_MOD_QP, "QP%u: new state = error\n", @@ -3008,7 +2999,6 @@ int nes_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, break; default: spin_unlock_irqrestore(&nesqp->lock, qplockflags); - nes_rem_ref(&nesqp->ibqp); return -EINVAL; break; } @@ -3088,7 +3078,6 @@ int nes_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, nesqp->hwqp.qp_id, atomic_read(&nesqp->refcount), original_last_aeq, nesqp->last_aeq); /* this one is for the cm_disconnect thread */ - nes_add_ref(&nesqp->ibqp); spin_lock_irqsave(&nesqp->lock, qplockflags); nesqp->hw_tcp_state = NES_AEQE_TCP_STATE_CLOSED; nesqp->last_aeq = NES_AEQE_AEID_RESET_SENT; @@ -3097,14 +3086,12 @@ int nes_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, } else { nes_debug(NES_DBG_MOD_QP, "QP%u No fake disconnect, QP refcount=%d\n", nesqp->hwqp.qp_id, atomic_read(&nesqp->refcount)); - nes_rem_ref(&nesqp->ibqp); } } else { spin_lock_irqsave(&nesqp->lock, qplockflags); if (nesqp->cm_id) { /* These two are for the timer thread */ if (atomic_inc_return(&nesqp->close_timer_started) == 1) { - nes_add_ref(&nesqp->ibqp); nesqp->cm_id->add_ref(nesqp->cm_id); nes_debug(NES_DBG_MOD_QP, "QP%u Not decrementing QP refcount (%d)," " need ae to finish up, original_last_aeq = 0x%04X." @@ -3128,14 +3115,12 @@ int nes_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, " original_last_aeq = 0x%04X. last_aeq = 0x%04X.\n", nesqp->hwqp.qp_id, atomic_read(&nesqp->refcount), original_last_aeq, nesqp->last_aeq); - nes_rem_ref(&nesqp->ibqp); } } else { nes_debug(NES_DBG_MOD_QP, "QP%u Decrementing QP refcount (%d), No ae to finish up," " original_last_aeq = 0x%04X. last_aeq = 0x%04X.\n", nesqp->hwqp.qp_id, atomic_read(&nesqp->refcount), original_last_aeq, nesqp->last_aeq); - nes_rem_ref(&nesqp->ibqp); } err = 0; diff --git a/drivers/infiniband/ulp/ipoib/Kconfig b/drivers/infiniband/ulp/ipoib/Kconfig index 691525cf394..9d9a9dc51f1 100644 --- a/drivers/infiniband/ulp/ipoib/Kconfig +++ b/drivers/infiniband/ulp/ipoib/Kconfig @@ -11,16 +11,17 @@ config INFINIBAND_IPOIB config INFINIBAND_IPOIB_CM bool "IP-over-InfiniBand Connected Mode support" - depends on INFINIBAND_IPOIB && EXPERIMENTAL + depends on INFINIBAND_IPOIB default n ---help--- - This option enables experimental support for IPoIB connected mode. - After enabling this option, you need to switch to connected mode through - /sys/class/net/ibXXX/mode to actually create connections, and then increase - the interface MTU with e.g. ifconfig ib0 mtu 65520. + This option enables support for IPoIB connected mode. After + enabling this option, you need to switch to connected mode + through /sys/class/net/ibXXX/mode to actually create + connections, and then increase the interface MTU with + e.g. ifconfig ib0 mtu 65520. - WARNING: Enabling connected mode will trigger some - packet drops for multicast and UD mode traffic from this interface, + WARNING: Enabling connected mode will trigger some packet + drops for multicast and UD mode traffic from this interface, unless you limit mtu for these destinations to 2044. config INFINIBAND_IPOIB_DEBUG @@ -33,9 +34,10 @@ config INFINIBAND_IPOIB_DEBUG debug_level and mcast_debug_level module parameters (which can also be set after the driver is loaded through sysfs). - This option also creates an "ipoib_debugfs," which can be - mounted to expose debugging information about IB multicast - groups used by the IPoIB driver. + This option also creates a directory tree under ipoib/ in + debugfs, which contains files that expose debugging + information about IB multicast groups used by the IPoIB + driver. config INFINIBAND_IPOIB_DEBUG_DATA bool "IP-over-InfiniBand data path debugging" diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index 8be9ea0436e..f51201b17bf 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -548,7 +548,7 @@ static int path_rec_start(struct net_device *dev, path_rec_completion, path, &path->query); if (path->query_id < 0) { - ipoib_warn(priv, "ib_sa_path_rec_get failed\n"); + ipoib_warn(priv, "ib_sa_path_rec_get failed: %d\n", path->query_id); path->query = NULL; return path->query_id; } diff --git a/drivers/infiniband/ulp/iser/iser_verbs.c b/drivers/infiniband/ulp/iser/iser_verbs.c index 3a917c1f796..63462ecca14 100644 --- a/drivers/infiniband/ulp/iser/iser_verbs.c +++ b/drivers/infiniband/ulp/iser/iser_verbs.c @@ -483,6 +483,7 @@ static int iser_cma_handler(struct rdma_cm_id *cma_id, struct rdma_cm_event *eve break; case RDMA_CM_EVENT_DISCONNECTED: case RDMA_CM_EVENT_DEVICE_REMOVAL: + case RDMA_CM_EVENT_ADDR_CHANGE: iser_disconnected_handler(cma_id); break; default: |