aboutsummaryrefslogtreecommitdiff
path: root/drivers/infiniband
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/infiniband')
-rw-r--r--drivers/infiniband/core/addr.c275
-rw-r--r--drivers/infiniband/core/cma.c133
-rw-r--r--drivers/infiniband/core/sa_query.c6
-rw-r--r--drivers/infiniband/core/ucma.c57
-rw-r--r--drivers/infiniband/core/uverbs_cmd.c2
-rw-r--r--drivers/infiniband/hw/amso1100/c2_qp.c14
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch_qp.c32
-rw-r--r--drivers/infiniband/hw/ehca/ehca_reqs.c67
-rw-r--r--drivers/infiniband/hw/ipath/ipath_driver.c10
-rw-r--r--drivers/infiniband/hw/mlx4/main.c2
-rw-r--r--drivers/infiniband/hw/mlx4/qp.c25
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_main.c1
-rw-r--r--drivers/infiniband/ulp/iser/iser_memory.c122
13 files changed, 375 insertions, 371 deletions
diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c
index bd07803e918..abbb06996f9 100644
--- a/drivers/infiniband/core/addr.c
+++ b/drivers/infiniband/core/addr.c
@@ -36,7 +36,6 @@
#include <linux/mutex.h>
#include <linux/inetdevice.h>
#include <linux/workqueue.h>
-#include <linux/if_arp.h>
#include <net/arp.h>
#include <net/neighbour.h>
#include <net/route.h>
@@ -92,22 +91,12 @@ EXPORT_SYMBOL(rdma_addr_unregister_client);
int rdma_copy_addr(struct rdma_dev_addr *dev_addr, struct net_device *dev,
const unsigned char *dst_dev_addr)
{
- switch (dev->type) {
- case ARPHRD_INFINIBAND:
- dev_addr->dev_type = RDMA_NODE_IB_CA;
- break;
- case ARPHRD_ETHER:
- dev_addr->dev_type = RDMA_NODE_RNIC;
- break;
- default:
- return -EADDRNOTAVAIL;
- }
-
+ dev_addr->dev_type = dev->type;
memcpy(dev_addr->src_dev_addr, dev->dev_addr, MAX_ADDR_LEN);
memcpy(dev_addr->broadcast, dev->broadcast, MAX_ADDR_LEN);
if (dst_dev_addr)
memcpy(dev_addr->dst_dev_addr, dst_dev_addr, MAX_ADDR_LEN);
- dev_addr->src_dev = dev;
+ dev_addr->bound_dev_if = dev->ifindex;
return 0;
}
EXPORT_SYMBOL(rdma_copy_addr);
@@ -117,6 +106,15 @@ int rdma_translate_ip(struct sockaddr *addr, struct rdma_dev_addr *dev_addr)
struct net_device *dev;
int ret = -EADDRNOTAVAIL;
+ if (dev_addr->bound_dev_if) {
+ dev = dev_get_by_index(&init_net, dev_addr->bound_dev_if);
+ if (!dev)
+ return -ENODEV;
+ ret = rdma_copy_addr(dev_addr, dev, NULL);
+ dev_put(dev);
+ return ret;
+ }
+
switch (addr->sa_family) {
case AF_INET:
dev = ip_dev_find(&init_net,
@@ -131,6 +129,7 @@ int rdma_translate_ip(struct sockaddr *addr, struct rdma_dev_addr *dev_addr)
#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
case AF_INET6:
+ read_lock(&dev_base_lock);
for_each_netdev(&init_net, dev) {
if (ipv6_chk_addr(&init_net,
&((struct sockaddr_in6 *) addr)->sin6_addr,
@@ -139,6 +138,7 @@ int rdma_translate_ip(struct sockaddr *addr, struct rdma_dev_addr *dev_addr)
break;
}
}
+ read_unlock(&dev_base_lock);
break;
#endif
}
@@ -176,48 +176,9 @@ static void queue_req(struct addr_req *req)
mutex_unlock(&lock);
}
-static void addr_send_arp(struct sockaddr *dst_in)
-{
- struct rtable *rt;
- struct flowi fl;
-
- memset(&fl, 0, sizeof fl);
-
- switch (dst_in->sa_family) {
- case AF_INET:
- fl.nl_u.ip4_u.daddr =
- ((struct sockaddr_in *) dst_in)->sin_addr.s_addr;
-
- if (ip_route_output_key(&init_net, &rt, &fl))
- return;
-
- neigh_event_send(rt->u.dst.neighbour, NULL);
- ip_rt_put(rt);
- break;
-
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
- case AF_INET6:
- {
- struct dst_entry *dst;
-
- fl.nl_u.ip6_u.daddr =
- ((struct sockaddr_in6 *) dst_in)->sin6_addr;
-
- dst = ip6_route_output(&init_net, NULL, &fl);
- if (!dst)
- return;
-
- neigh_event_send(dst->neighbour, NULL);
- dst_release(dst);
- break;
- }
-#endif
- }
-}
-
-static int addr4_resolve_remote(struct sockaddr_in *src_in,
- struct sockaddr_in *dst_in,
- struct rdma_dev_addr *addr)
+static int addr4_resolve(struct sockaddr_in *src_in,
+ struct sockaddr_in *dst_in,
+ struct rdma_dev_addr *addr)
{
__be32 src_ip = src_in->sin_addr.s_addr;
__be32 dst_ip = dst_in->sin_addr.s_addr;
@@ -229,10 +190,22 @@ static int addr4_resolve_remote(struct sockaddr_in *src_in,
memset(&fl, 0, sizeof fl);
fl.nl_u.ip4_u.daddr = dst_ip;
fl.nl_u.ip4_u.saddr = src_ip;
+ fl.oif = addr->bound_dev_if;
+
ret = ip_route_output_key(&init_net, &rt, &fl);
if (ret)
goto out;
+ src_in->sin_family = AF_INET;
+ src_in->sin_addr.s_addr = rt->rt_src;
+
+ if (rt->idev->dev->flags & IFF_LOOPBACK) {
+ ret = rdma_translate_ip((struct sockaddr *) dst_in, addr);
+ if (!ret)
+ memcpy(addr->dst_dev_addr, addr->src_dev_addr, MAX_ADDR_LEN);
+ goto put;
+ }
+
/* If the device does ARP internally, return 'done' */
if (rt->idev->dev->flags & IFF_NOARP) {
rdma_copy_addr(addr, rt->idev->dev, NULL);
@@ -240,21 +213,14 @@ static int addr4_resolve_remote(struct sockaddr_in *src_in,
}
neigh = neigh_lookup(&arp_tbl, &rt->rt_gateway, rt->idev->dev);
- if (!neigh) {
+ if (!neigh || !(neigh->nud_state & NUD_VALID)) {
+ neigh_event_send(rt->u.dst.neighbour, NULL);
ret = -ENODATA;
+ if (neigh)
+ goto release;
goto put;
}
- if (!(neigh->nud_state & NUD_VALID)) {
- ret = -ENODATA;
- goto release;
- }
-
- if (!src_ip) {
- src_in->sin_family = dst_in->sin_family;
- src_in->sin_addr.s_addr = rt->rt_src;
- }
-
ret = rdma_copy_addr(addr, neigh->dev, neigh->ha);
release:
neigh_release(neigh);
@@ -265,52 +231,77 @@ out:
}
#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
-static int addr6_resolve_remote(struct sockaddr_in6 *src_in,
- struct sockaddr_in6 *dst_in,
- struct rdma_dev_addr *addr)
+static int addr6_resolve(struct sockaddr_in6 *src_in,
+ struct sockaddr_in6 *dst_in,
+ struct rdma_dev_addr *addr)
{
struct flowi fl;
struct neighbour *neigh;
struct dst_entry *dst;
- int ret = -ENODATA;
+ int ret;
memset(&fl, 0, sizeof fl);
- fl.nl_u.ip6_u.daddr = dst_in->sin6_addr;
- fl.nl_u.ip6_u.saddr = src_in->sin6_addr;
+ ipv6_addr_copy(&fl.fl6_dst, &dst_in->sin6_addr);
+ ipv6_addr_copy(&fl.fl6_src, &src_in->sin6_addr);
+ fl.oif = addr->bound_dev_if;
dst = ip6_route_output(&init_net, NULL, &fl);
- if (!dst)
- return ret;
+ if ((ret = dst->error))
+ goto put;
+
+ if (ipv6_addr_any(&fl.fl6_src)) {
+ ret = ipv6_dev_get_saddr(&init_net, ip6_dst_idev(dst)->dev,
+ &fl.fl6_dst, 0, &fl.fl6_src);
+ if (ret)
+ goto put;
+
+ src_in->sin6_family = AF_INET6;
+ ipv6_addr_copy(&src_in->sin6_addr, &fl.fl6_src);
+ }
+
+ if (dst->dev->flags & IFF_LOOPBACK) {
+ ret = rdma_translate_ip((struct sockaddr *) dst_in, addr);
+ if (!ret)
+ memcpy(addr->dst_dev_addr, addr->src_dev_addr, MAX_ADDR_LEN);
+ goto put;
+ }
+ /* If the device does ARP internally, return 'done' */
if (dst->dev->flags & IFF_NOARP) {
ret = rdma_copy_addr(addr, dst->dev, NULL);
- } else {
- neigh = dst->neighbour;
- if (neigh && (neigh->nud_state & NUD_VALID))
- ret = rdma_copy_addr(addr, neigh->dev, neigh->ha);
+ goto put;
+ }
+
+ neigh = dst->neighbour;
+ if (!neigh || !(neigh->nud_state & NUD_VALID)) {
+ neigh_event_send(dst->neighbour, NULL);
+ ret = -ENODATA;
+ goto put;
}
+ ret = rdma_copy_addr(addr, dst->dev, neigh->ha);
+put:
dst_release(dst);
return ret;
}
#else
-static int addr6_resolve_remote(struct sockaddr_in6 *src_in,
- struct sockaddr_in6 *dst_in,
- struct rdma_dev_addr *addr)
+static int addr6_resolve(struct sockaddr_in6 *src_in,
+ struct sockaddr_in6 *dst_in,
+ struct rdma_dev_addr *addr)
{
return -EADDRNOTAVAIL;
}
#endif
-static int addr_resolve_remote(struct sockaddr *src_in,
- struct sockaddr *dst_in,
- struct rdma_dev_addr *addr)
+static int addr_resolve(struct sockaddr *src_in,
+ struct sockaddr *dst_in,
+ struct rdma_dev_addr *addr)
{
if (src_in->sa_family == AF_INET) {
- return addr4_resolve_remote((struct sockaddr_in *) src_in,
+ return addr4_resolve((struct sockaddr_in *) src_in,
(struct sockaddr_in *) dst_in, addr);
} else
- return addr6_resolve_remote((struct sockaddr_in6 *) src_in,
+ return addr6_resolve((struct sockaddr_in6 *) src_in,
(struct sockaddr_in6 *) dst_in, addr);
}
@@ -327,8 +318,7 @@ static void process_req(struct work_struct *work)
if (req->status == -ENODATA) {
src_in = (struct sockaddr *) &req->src_addr;
dst_in = (struct sockaddr *) &req->dst_addr;
- req->status = addr_resolve_remote(src_in, dst_in,
- req->addr);
+ req->status = addr_resolve(src_in, dst_in, req->addr);
if (req->status && time_after_eq(jiffies, req->timeout))
req->status = -ETIMEDOUT;
else if (req->status == -ENODATA)
@@ -352,82 +342,6 @@ static void process_req(struct work_struct *work)
}
}
-static int addr_resolve_local(struct sockaddr *src_in,
- struct sockaddr *dst_in,
- struct rdma_dev_addr *addr)
-{
- struct net_device *dev;
- int ret;
-
- switch (dst_in->sa_family) {
- case AF_INET:
- {
- __be32 src_ip = ((struct sockaddr_in *) src_in)->sin_addr.s_addr;
- __be32 dst_ip = ((struct sockaddr_in *) dst_in)->sin_addr.s_addr;
-
- dev = ip_dev_find(&init_net, dst_ip);
- if (!dev)
- return -EADDRNOTAVAIL;
-
- if (ipv4_is_zeronet(src_ip)) {
- src_in->sa_family = dst_in->sa_family;
- ((struct sockaddr_in *) src_in)->sin_addr.s_addr = dst_ip;
- ret = rdma_copy_addr(addr, dev, dev->dev_addr);
- } else if (ipv4_is_loopback(src_ip)) {
- ret = rdma_translate_ip(dst_in, addr);
- if (!ret)
- memcpy(addr->dst_dev_addr, dev->dev_addr, MAX_ADDR_LEN);
- } else {
- ret = rdma_translate_ip(src_in, addr);
- if (!ret)
- memcpy(addr->dst_dev_addr, dev->dev_addr, MAX_ADDR_LEN);
- }
- dev_put(dev);
- break;
- }
-
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
- case AF_INET6:
- {
- struct in6_addr *a;
-
- for_each_netdev(&init_net, dev)
- if (ipv6_chk_addr(&init_net,
- &((struct sockaddr_in6 *) dst_in)->sin6_addr,
- dev, 1))
- break;
-
- if (!dev)
- return -EADDRNOTAVAIL;
-
- a = &((struct sockaddr_in6 *) src_in)->sin6_addr;
-
- if (ipv6_addr_any(a)) {
- src_in->sa_family = dst_in->sa_family;
- ((struct sockaddr_in6 *) src_in)->sin6_addr =
- ((struct sockaddr_in6 *) dst_in)->sin6_addr;
- ret = rdma_copy_addr(addr, dev, dev->dev_addr);
- } else if (ipv6_addr_loopback(a)) {
- ret = rdma_translate_ip(dst_in, addr);
- if (!ret)
- memcpy(addr->dst_dev_addr, dev->dev_addr, MAX_ADDR_LEN);
- } else {
- ret = rdma_translate_ip(src_in, addr);
- if (!ret)
- memcpy(addr->dst_dev_addr, dev->dev_addr, MAX_ADDR_LEN);
- }
- break;
- }
-#endif
-
- default:
- ret = -EADDRNOTAVAIL;
- break;
- }
-
- return ret;
-}
-
int rdma_resolve_ip(struct rdma_addr_client *client,
struct sockaddr *src_addr, struct sockaddr *dst_addr,
struct rdma_dev_addr *addr, int timeout_ms,
@@ -443,22 +357,28 @@ int rdma_resolve_ip(struct rdma_addr_client *client,
if (!req)
return -ENOMEM;
- if (src_addr)
- memcpy(&req->src_addr, src_addr, ip_addr_size(src_addr));
- memcpy(&req->dst_addr, dst_addr, ip_addr_size(dst_addr));
+ src_in = (struct sockaddr *) &req->src_addr;
+ dst_in = (struct sockaddr *) &req->dst_addr;
+
+ if (src_addr) {
+ if (src_addr->sa_family != dst_addr->sa_family) {
+ ret = -EINVAL;
+ goto err;
+ }
+
+ memcpy(src_in, src_addr, ip_addr_size(src_addr));
+ } else {
+ src_in->sa_family = dst_addr->sa_family;
+ }
+
+ memcpy(dst_in, dst_addr, ip_addr_size(dst_addr));
req->addr = addr;
req->callback = callback;
req->context = context;
req->client = client;
atomic_inc(&client->refcount);
- src_in = (struct sockaddr *) &req->src_addr;
- dst_in = (struct sockaddr *) &req->dst_addr;
-
- req->status = addr_resolve_local(src_in, dst_in, addr);
- if (req->status == -EADDRNOTAVAIL)
- req->status = addr_resolve_remote(src_in, dst_in, addr);
-
+ req->status = addr_resolve(src_in, dst_in, addr);
switch (req->status) {
case 0:
req->timeout = jiffies;
@@ -467,15 +387,16 @@ int rdma_resolve_ip(struct rdma_addr_client *client,
case -ENODATA:
req->timeout = msecs_to_jiffies(timeout_ms) + jiffies;
queue_req(req);
- addr_send_arp(dst_in);
break;
default:
ret = req->status;
atomic_dec(&client->refcount);
- kfree(req);
- break;
+ goto err;
}
return ret;
+err:
+ kfree(req);
+ return ret;
}
EXPORT_SYMBOL(rdma_resolve_ip);
diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
index 075317884b5..fbdd7310600 100644
--- a/drivers/infiniband/core/cma.c
+++ b/drivers/infiniband/core/cma.c
@@ -330,17 +330,7 @@ static int cma_acquire_dev(struct rdma_id_private *id_priv)
union ib_gid gid;
int ret = -ENODEV;
- switch (rdma_node_get_transport(dev_addr->dev_type)) {
- case RDMA_TRANSPORT_IB:
- ib_addr_get_sgid(dev_addr, &gid);
- break;
- case RDMA_TRANSPORT_IWARP:
- iw_addr_get_sgid(dev_addr, &gid);
- break;
- default:
- return -ENODEV;
- }
-
+ rdma_addr_get_sgid(dev_addr, &gid);
list_for_each_entry(cma_dev, &dev_list, list) {
ret = ib_find_cached_gid(cma_dev->device, &gid,
&id_priv->id.port_num, NULL);
@@ -1032,11 +1022,17 @@ static struct rdma_id_private *cma_new_conn_id(struct rdma_cm_id *listen_id,
if (rt->num_paths == 2)
rt->path_rec[1] = *ib_event->param.req_rcvd.alternate_path;
- ib_addr_set_dgid(&rt->addr.dev_addr, &rt->path_rec[0].dgid);
- ret = rdma_translate_ip((struct sockaddr *) &id->route.addr.src_addr,
- &id->route.addr.dev_addr);
- if (ret)
- goto destroy_id;
+ if (cma_any_addr((struct sockaddr *) &rt->addr.src_addr)) {
+ rt->addr.dev_addr.dev_type = ARPHRD_INFINIBAND;
+ rdma_addr_set_sgid(&rt->addr.dev_addr, &rt->path_rec[0].sgid);
+ ib_addr_set_pkey(&rt->addr.dev_addr, rt->path_rec[0].pkey);
+ } else {
+ ret = rdma_translate_ip((struct sockaddr *) &rt->addr.src_addr,
+ &rt->addr.dev_addr);
+ if (ret)
+ goto destroy_id;
+ }
+ rdma_addr_set_dgid(&rt->addr.dev_addr, &rt->path_rec[0].dgid);
id_priv = container_of(id, struct rdma_id_private, id);
id_priv->state = CMA_CONNECT;
@@ -1071,10 +1067,12 @@ static struct rdma_id_private *cma_new_udp_id(struct rdma_cm_id *listen_id,
cma_save_net_info(&id->route.addr, &listen_id->route.addr,
ip_ver, port, src, dst);
- ret = rdma_translate_ip((struct sockaddr *) &id->route.addr.src_addr,
- &id->route.addr.dev_addr);
- if (ret)
- goto err;
+ if (!cma_any_addr((struct sockaddr *) &id->route.addr.src_addr)) {
+ ret = rdma_translate_ip((struct sockaddr *) &id->route.addr.src_addr,
+ &id->route.addr.dev_addr);
+ if (ret)
+ goto err;
+ }
id_priv = container_of(id, struct rdma_id_private, id);
id_priv->state = CMA_CONNECT;
@@ -1474,15 +1472,6 @@ static void cma_listen_on_all(struct rdma_id_private *id_priv)
mutex_unlock(&lock);
}
-static int cma_bind_any(struct rdma_cm_id *id, sa_family_t af)
-{
- struct sockaddr_storage addr_in;
-
- memset(&addr_in, 0, sizeof addr_in);
- addr_in.ss_family = af;
- return rdma_bind_addr(id, (struct sockaddr *) &addr_in);
-}
-
int rdma_listen(struct rdma_cm_id *id, int backlog)
{
struct rdma_id_private *id_priv;
@@ -1490,7 +1479,8 @@ int rdma_listen(struct rdma_cm_id *id, int backlog)
id_priv = container_of(id, struct rdma_id_private, id);
if (id_priv->state == CMA_IDLE) {
- ret = cma_bind_any(id, AF_INET);
+ ((struct sockaddr *) &id->route.addr.src_addr)->sa_family = AF_INET;
+ ret = rdma_bind_addr(id, (struct sockaddr *) &id->route.addr.src_addr);
if (ret)
return ret;
}
@@ -1565,8 +1555,8 @@ static int cma_query_ib_route(struct rdma_id_private *id_priv, int timeout_ms,
struct sockaddr_in6 *sin6;
memset(&path_rec, 0, sizeof path_rec);
- ib_addr_get_sgid(&addr->dev_addr, &path_rec.sgid);
- ib_addr_get_dgid(&addr->dev_addr, &path_rec.dgid);
+ rdma_addr_get_sgid(&addr->dev_addr, &path_rec.sgid);
+ rdma_addr_get_dgid(&addr->dev_addr, &path_rec.dgid);
path_rec.pkey = cpu_to_be16(ib_addr_get_pkey(&addr->dev_addr));
path_rec.numb_path = 1;
path_rec.reversible = 1;
@@ -1781,7 +1771,11 @@ port_found:
if (ret)
goto out;
- ib_addr_set_sgid(&id_priv->id.route.addr.dev_addr, &gid);
+ id_priv->id.route.addr.dev_addr.dev_type =
+ (rdma_node_get_transport(cma_dev->device->node_type) == RDMA_TRANSPORT_IB) ?
+ ARPHRD_INFINIBAND : ARPHRD_ETHER;
+
+ rdma_addr_set_sgid(&id_priv->id.route.addr.dev_addr, &gid);
ib_addr_set_pkey(&id_priv->id.route.addr.dev_addr, pkey);
id_priv->id.port_num = p;
cma_attach_to_dev(id_priv, cma_dev);
@@ -1839,7 +1833,7 @@ out:
static int cma_resolve_loopback(struct rdma_id_private *id_priv)
{
struct cma_work *work;
- struct sockaddr_in *src_in, *dst_in;
+ struct sockaddr *src, *dst;
union ib_gid gid;
int ret;
@@ -1853,14 +1847,19 @@ static int cma_resolve_loopback(struct rdma_id_private *id_priv)
goto err;
}
- ib_addr_get_sgid(&id_priv->id.route.addr.dev_addr, &gid);
- ib_addr_set_dgid(&id_priv->id.route.addr.dev_addr, &gid);
+ rdma_addr_get_sgid(&id_priv->id.route.addr.dev_addr, &gid);
+ rdma_addr_set_dgid(&id_priv->id.route.addr.dev_addr, &gid);
- if (cma_zero_addr((struct sockaddr *) &id_priv->id.route.addr.src_addr)) {
- src_in = (struct sockaddr_in *)&id_priv->id.route.addr.src_addr;
- dst_in = (struct sockaddr_in *)&id_priv->id.route.addr.dst_addr;
- src_in->sin_family = dst_in->sin_family;
- src_in->sin_addr.s_addr = dst_in->sin_addr.s_addr;
+ src = (struct sockaddr *) &id_priv->id.route.addr.src_addr;
+ if (cma_zero_addr(src)) {
+ dst = (struct sockaddr *) &id_priv->id.route.addr.dst_addr;
+ if ((src->sa_family = dst->sa_family) == AF_INET) {
+ ((struct sockaddr_in *) src)->sin_addr.s_addr =
+ ((struct sockaddr_in *) dst)->sin_addr.s_addr;
+ } else {
+ ipv6_addr_copy(&((struct sockaddr_in6 *) src)->sin6_addr,
+ &((struct sockaddr_in6 *) dst)->sin6_addr);
+ }
}
work->id = id_priv;
@@ -1878,10 +1877,14 @@ err:
static int cma_bind_addr(struct rdma_cm_id *id, struct sockaddr *src_addr,
struct sockaddr *dst_addr)
{
- if (src_addr && src_addr->sa_family)
- return rdma_bind_addr(id, src_addr);
- else
- return cma_bind_any(id, dst_addr->sa_family);
+ if (!src_addr || !src_addr->sa_family) {
+ src_addr = (struct sockaddr *) &id->route.addr.src_addr;
+ if ((src_addr->sa_family = dst_addr->sa_family) == AF_INET6) {
+ ((struct sockaddr_in6 *) src_addr)->sin6_scope_id =
+ ((struct sockaddr_in6 *) dst_addr)->sin6_scope_id;
+ }
+ }
+ return rdma_bind_addr(id, src_addr);
}
int rdma_resolve_addr(struct rdma_cm_id *id, struct sockaddr *src_addr,
@@ -2077,6 +2080,25 @@ static int cma_get_port(struct rdma_id_private *id_priv)
return ret;
}
+static int cma_check_linklocal(struct rdma_dev_addr *dev_addr,
+ struct sockaddr *addr)
+{
+#if defined(CONFIG_IPv6) || defined(CONFIG_IPV6_MODULE)
+ struct sockaddr_in6 *sin6;
+
+ if (addr->sa_family != AF_INET6)
+ return 0;
+
+ sin6 = (struct sockaddr_in6 *) addr;
+ if ((ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LINKLOCAL) &&
+ !sin6->sin6_scope_id)
+ return -EINVAL;
+
+ dev_addr->bound_dev_if = sin6->sin6_scope_id;
+#endif
+ return 0;
+}
+
int rdma_bind_addr(struct rdma_cm_id *id, struct sockaddr *addr)
{
struct rdma_id_private *id_priv;
@@ -2089,7 +2111,13 @@ int rdma_bind_addr(struct rdma_cm_id *id, struct sockaddr *addr)
if (!cma_comp_exch(id_priv, CMA_IDLE, CMA_ADDR_BOUND))
return -EINVAL;
- if (!cma_any_addr(addr)) {
+ ret = cma_check_linklocal(&id->route.addr.dev_addr, addr);
+ if (ret)
+ goto err1;
+
+ if (cma_loopback_addr(addr)) {
+ ret = cma_bind_loopback(id_priv);
+ } else if (!cma_zero_addr(addr)) {
ret = rdma_translate_ip(addr, &id->route.addr.dev_addr);
if (ret)
goto err1;
@@ -2108,7 +2136,7 @@ int rdma_bind_addr(struct rdma_cm_id *id, struct sockaddr *addr)
return 0;
err2:
- if (!cma_any_addr(addr)) {
+ if (id_priv->cma_dev) {
mutex_lock(&lock);
cma_detach_from_dev(id_priv);
mutex_unlock(&lock);
@@ -2687,10 +2715,15 @@ static void cma_set_mgid(struct rdma_id_private *id_priv,
if (cma_any_addr(addr)) {
memset(mgid, 0, sizeof *mgid);
} else if ((addr->sa_family == AF_INET6) &&
- ((be32_to_cpu(sin6->sin6_addr.s6_addr32[0]) & 0xFF10A01B) ==
+ ((be32_to_cpu(sin6->sin6_addr.s6_addr32[0]) & 0xFFF0FFFF) ==
0xFF10A01B)) {
/* IPv6 address is an SA assigned MGID. */
memcpy(mgid, &sin6->sin6_addr, sizeof *mgid);
+ } else if ((addr->sa_family == AF_INET6)) {
+ ipv6_ib_mc_map(&sin6->sin6_addr, dev_addr->broadcast, mc_map);
+ if (id_priv->id.ps == RDMA_PS_UDP)
+ mc_map[7] = 0x01; /* Use RDMA CM signature */
+ *mgid = *(union ib_gid *) (mc_map + 4);
} else {
ip_ib_mc_map(sin->sin_addr.s_addr, dev_addr->broadcast, mc_map);
if (id_priv->id.ps == RDMA_PS_UDP)
@@ -2716,7 +2749,7 @@ static int cma_join_ib_multicast(struct rdma_id_private *id_priv,
cma_set_mgid(id_priv, (struct sockaddr *) &mc->addr, &rec.mgid);
if (id_priv->id.ps == RDMA_PS_UDP)
rec.qkey = cpu_to_be32(RDMA_UDP_QKEY);
- ib_addr_get_sgid(dev_addr, &rec.port_gid);
+ rdma_addr_get_sgid(dev_addr, &rec.port_gid);
rec.pkey = cpu_to_be16(ib_addr_get_pkey(dev_addr));
rec.join_state = 1;
@@ -2815,7 +2848,7 @@ static int cma_netdev_change(struct net_device *ndev, struct rdma_id_private *id
dev_addr = &id_priv->id.route.addr.dev_addr;
- if ((dev_addr->src_dev == ndev) &&
+ if ((dev_addr->bound_dev_if == ndev->ifindex) &&
memcmp(dev_addr->src_dev_addr, ndev->dev_addr, ndev->addr_len)) {
printk(KERN_INFO "RDMA CM addr change for ndev %s used by id %p\n",
ndev->name, &id_priv->id);
diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c
index 82543716d59..7e1ffd8ccd5 100644
--- a/drivers/infiniband/core/sa_query.c
+++ b/drivers/infiniband/core/sa_query.c
@@ -604,6 +604,12 @@ retry:
return ret ? ret : id;
}
+void ib_sa_unpack_path(void *attribute, struct ib_sa_path_rec *rec)
+{
+ ib_unpack(path_rec_table, ARRAY_SIZE(path_rec_table), attribute, rec);
+}
+EXPORT_SYMBOL(ib_sa_unpack_path);
+
static void ib_sa_path_rec_callback(struct ib_sa_query *sa_query,
int status,
struct ib_sa_mad *mad)
diff --git a/drivers/infiniband/core/ucma.c b/drivers/infiniband/core/ucma.c
index bb96d3c4b0f..b2e16c332d5 100644
--- a/drivers/infiniband/core/ucma.c
+++ b/drivers/infiniband/core/ucma.c
@@ -43,6 +43,7 @@
#include <rdma/rdma_user_cm.h>
#include <rdma/ib_marshall.h>
#include <rdma/rdma_cm.h>
+#include <rdma/rdma_cm_ib.h>
MODULE_AUTHOR("Sean Hefty");
MODULE_DESCRIPTION("RDMA Userspace Connection Manager Access");
@@ -562,10 +563,10 @@ static void ucma_copy_ib_route(struct rdma_ucm_query_route_resp *resp,
switch (route->num_paths) {
case 0:
dev_addr = &route->addr.dev_addr;
- ib_addr_get_dgid(dev_addr,
- (union ib_gid *) &resp->ib_route[0].dgid);
- ib_addr_get_sgid(dev_addr,
- (union ib_gid *) &resp->ib_route[0].sgid);
+ rdma_addr_get_dgid(dev_addr,
+ (union ib_gid *) &resp->ib_route[0].dgid);
+ rdma_addr_get_sgid(dev_addr,
+ (union ib_gid *) &resp->ib_route[0].sgid);
resp->ib_route[0].pkey = cpu_to_be16(ib_addr_get_pkey(dev_addr));
break;
case 2:
@@ -812,6 +813,51 @@ static int ucma_set_option_id(struct ucma_context *ctx, int optname,
return ret;
}
+static int ucma_set_ib_path(struct ucma_context *ctx,
+ struct ib_path_rec_data *path_data, size_t optlen)
+{
+ struct ib_sa_path_rec sa_path;
+ struct rdma_cm_event event;
+ int ret;
+
+ if (optlen % sizeof(*path_data))
+ return -EINVAL;
+
+ for (; optlen; optlen -= sizeof(*path_data), path_data++) {
+ if (path_data->flags == (IB_PATH_GMP | IB_PATH_PRIMARY |
+ IB_PATH_BIDIRECTIONAL))
+ break;
+ }
+
+ if (!optlen)
+ return -EINVAL;
+
+ ib_sa_unpack_path(path_data->path_rec, &sa_path);
+ ret = rdma_set_ib_paths(ctx->cm_id, &sa_path, 1);
+ if (ret)
+ return ret;
+
+ memset(&event, 0, sizeof event);
+ event.event = RDMA_CM_EVENT_ROUTE_RESOLVED;
+ return ucma_event_handler(ctx->cm_id, &event);
+}
+
+static int ucma_set_option_ib(struct ucma_context *ctx, int optname,
+ void *optval, size_t optlen)
+{
+ int ret;
+
+ switch (optname) {
+ case RDMA_OPTION_IB_PATH:
+ ret = ucma_set_ib_path(ctx, optval, optlen);
+ break;
+ default:
+ ret = -ENOSYS;
+ }
+
+ return ret;
+}
+
static int ucma_set_option_level(struct ucma_context *ctx, int level,
int optname, void *optval, size_t optlen)
{
@@ -821,6 +867,9 @@ static int ucma_set_option_level(struct ucma_context *ctx, int level,
case RDMA_OPTION_ID:
ret = ucma_set_option_id(ctx, optname, optval, optlen);
break;
+ case RDMA_OPTION_IB:
+ ret = ucma_set_option_ib(ctx, optname, optval, optlen);
+ break;
default:
ret = -ENOSYS;
}
diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c
index 56feab6c251..112d3970222 100644
--- a/drivers/infiniband/core/uverbs_cmd.c
+++ b/drivers/infiniband/core/uverbs_cmd.c
@@ -285,7 +285,7 @@ ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file,
ucontext = ibdev->alloc_ucontext(ibdev, &udata);
if (IS_ERR(ucontext)) {
- ret = PTR_ERR(file->ucontext);
+ ret = PTR_ERR(ucontext);
goto err;
}
diff --git a/drivers/infiniband/hw/amso1100/c2_qp.c b/drivers/infiniband/hw/amso1100/c2_qp.c
index a6d89440ad2..ad518868df7 100644
--- a/drivers/infiniband/hw/amso1100/c2_qp.c
+++ b/drivers/infiniband/hw/amso1100/c2_qp.c
@@ -798,8 +798,10 @@ int c2_post_send(struct ib_qp *ibqp, struct ib_send_wr *ib_wr,
u8 actual_sge_count;
u32 msg_size;
- if (qp->state > IB_QPS_RTS)
- return -EINVAL;
+ if (qp->state > IB_QPS_RTS) {
+ err = -EINVAL;
+ goto out;
+ }
while (ib_wr) {
@@ -930,6 +932,7 @@ int c2_post_send(struct ib_qp *ibqp, struct ib_send_wr *ib_wr,
ib_wr = ib_wr->next;
}
+out:
if (err)
*bad_wr = ib_wr;
return err;
@@ -944,8 +947,10 @@ int c2_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *ib_wr,
unsigned long lock_flags;
int err = 0;
- if (qp->state > IB_QPS_RTS)
- return -EINVAL;
+ if (qp->state > IB_QPS_RTS) {
+ err = -EINVAL;
+ goto out;
+ }
/*
* Try and post each work request
@@ -998,6 +1003,7 @@ int c2_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *ib_wr,
ib_wr = ib_wr->next;
}
+out:
if (err)
*bad_wr = ib_wr;
return err;
diff --git a/drivers/infiniband/hw/cxgb3/iwch_qp.c b/drivers/infiniband/hw/cxgb3/iwch_qp.c
index 1cecf98829a..3eb8cecf81d 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_qp.c
+++ b/drivers/infiniband/hw/cxgb3/iwch_qp.c
@@ -365,18 +365,19 @@ int iwch_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
spin_lock_irqsave(&qhp->lock, flag);
if (qhp->attr.state > IWCH_QP_STATE_RTS) {
spin_unlock_irqrestore(&qhp->lock, flag);
- return -EINVAL;
+ err = -EINVAL;
+ goto out;
}
num_wrs = Q_FREECNT(qhp->wq.sq_rptr, qhp->wq.sq_wptr,
qhp->wq.sq_size_log2);
if (num_wrs <= 0) {
spin_unlock_irqrestore(&qhp->lock, flag);
- return -ENOMEM;
+ err = -ENOMEM;
+ goto out;
}
while (wr) {
if (num_wrs == 0) {
err = -ENOMEM;
- *bad_wr = wr;
break;
}
idx = Q_PTR2IDX(qhp->wq.wptr, qhp->wq.size_log2);
@@ -428,10 +429,8 @@ int iwch_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
wr->opcode);
err = -EINVAL;
}
- if (err) {
- *bad_wr = wr;
+ if (err)
break;
- }
wqe->send.wrid.id0.hi = qhp->wq.sq_wptr;
sqp->wr_id = wr->wr_id;
sqp->opcode = wr2opcode(t3_wr_opcode);
@@ -454,6 +453,10 @@ int iwch_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
}
spin_unlock_irqrestore(&qhp->lock, flag);
ring_doorbell(qhp->wq.doorbell, qhp->wq.qpid);
+
+out:
+ if (err)
+ *bad_wr = wr;
return err;
}
@@ -471,18 +474,19 @@ int iwch_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
spin_lock_irqsave(&qhp->lock, flag);
if (qhp->attr.state > IWCH_QP_STATE_RTS) {
spin_unlock_irqrestore(&qhp->lock, flag);
- return -EINVAL;
+ err = -EINVAL;
+ goto out;
}
num_wrs = Q_FREECNT(qhp->wq.rq_rptr, qhp->wq.rq_wptr,
qhp->wq.rq_size_log2) - 1;
if (!wr) {
spin_unlock_irqrestore(&qhp->lock, flag);
- return -EINVAL;
+ err = -ENOMEM;
+ goto out;
}
while (wr) {
if (wr->num_sge > T3_MAX_SGE) {
err = -EINVAL;
- *bad_wr = wr;
break;
}
idx = Q_PTR2IDX(qhp->wq.wptr, qhp->wq.size_log2);
@@ -494,10 +498,10 @@ int iwch_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
err = build_zero_stag_recv(qhp, wqe, wr);
else
err = -ENOMEM;
- if (err) {
- *bad_wr = wr;
+
+ if (err)
break;
- }
+
build_fw_riwrh((void *) wqe, T3_WR_RCV, T3_COMPLETION_FLAG,
Q_GENBIT(qhp->wq.wptr, qhp->wq.size_log2),
0, sizeof(struct t3_receive_wr) >> 3, T3_SOPEOP);
@@ -511,6 +515,10 @@ int iwch_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
}
spin_unlock_irqrestore(&qhp->lock, flag);
ring_doorbell(qhp->wq.doorbell, qhp->wq.qpid);
+
+out:
+ if (err)
+ *bad_wr = wr;
return err;
}
diff --git a/drivers/infiniband/hw/ehca/ehca_reqs.c b/drivers/infiniband/hw/ehca/ehca_reqs.c
index 8fd88cd828f..e3ec7fdd67b 100644
--- a/drivers/infiniband/hw/ehca/ehca_reqs.c
+++ b/drivers/infiniband/hw/ehca/ehca_reqs.c
@@ -400,7 +400,6 @@ static inline void map_ib_wc_status(u32 cqe_status,
static inline int post_one_send(struct ehca_qp *my_qp,
struct ib_send_wr *cur_send_wr,
- struct ib_send_wr **bad_send_wr,
int hidden)
{
struct ehca_wqe *wqe_p;
@@ -412,8 +411,6 @@ static inline int post_one_send(struct ehca_qp *my_qp,
wqe_p = ipz_qeit_get_inc(&my_qp->ipz_squeue);
if (unlikely(!wqe_p)) {
/* too many posted work requests: queue overflow */
- if (bad_send_wr)
- *bad_send_wr = cur_send_wr;
ehca_err(my_qp->ib_qp.device, "Too many posted WQEs "
"qp_num=%x", my_qp->ib_qp.qp_num);
return -ENOMEM;
@@ -433,8 +430,6 @@ static inline int post_one_send(struct ehca_qp *my_qp,
*/
if (unlikely(ret)) {
my_qp->ipz_squeue.current_q_offset = start_offset;
- if (bad_send_wr)
- *bad_send_wr = cur_send_wr;
ehca_err(my_qp->ib_qp.device, "Could not write WQE "
"qp_num=%x", my_qp->ib_qp.qp_num);
return -EINVAL;
@@ -448,7 +443,6 @@ int ehca_post_send(struct ib_qp *qp,
struct ib_send_wr **bad_send_wr)
{
struct ehca_qp *my_qp = container_of(qp, struct ehca_qp, ib_qp);
- struct ib_send_wr *cur_send_wr;
int wqe_cnt = 0;
int ret = 0;
unsigned long flags;
@@ -457,7 +451,8 @@ int ehca_post_send(struct ib_qp *qp,
if (unlikely(my_qp->state < IB_QPS_RTS)) {
ehca_err(qp->device, "Invalid QP state qp_state=%d qpn=%x",
my_qp->state, qp->qp_num);
- return -EINVAL;
+ ret = -EINVAL;
+ goto out;
}
/* LOCK the QUEUE */
@@ -476,24 +471,21 @@ int ehca_post_send(struct ib_qp *qp,
struct ib_send_wr circ_wr;
memset(&circ_wr, 0, sizeof(circ_wr));
circ_wr.opcode = IB_WR_RDMA_READ;
- post_one_send(my_qp, &circ_wr, NULL, 1); /* ignore retcode */
+ post_one_send(my_qp, &circ_wr, 1); /* ignore retcode */
wqe_cnt++;
ehca_dbg(qp->device, "posted circ wr qp_num=%x", qp->qp_num);
my_qp->message_count = my_qp->packet_count = 0;
}
/* loop processes list of send reqs */
- for (cur_send_wr = send_wr; cur_send_wr != NULL;
- cur_send_wr = cur_send_wr->next) {
- ret = post_one_send(my_qp, cur_send_wr, bad_send_wr, 0);
+ while (send_wr) {
+ ret = post_one_send(my_qp, send_wr, 0);
if (unlikely(ret)) {
- /* if one or more WQEs were successful, don't fail */
- if (wqe_cnt)
- ret = 0;
goto post_send_exit0;
}
wqe_cnt++;
- } /* eof for cur_send_wr */
+ send_wr = send_wr->next;
+ }
post_send_exit0:
iosync(); /* serialize GAL register access */
@@ -503,6 +495,10 @@ post_send_exit0:
my_qp, qp->qp_num, wqe_cnt, ret);
my_qp->message_count += wqe_cnt;
spin_unlock_irqrestore(&my_qp->spinlock_s, flags);
+
+out:
+ if (ret)
+ *bad_send_wr = send_wr;
return ret;
}
@@ -511,7 +507,6 @@ static int internal_post_recv(struct ehca_qp *my_qp,
struct ib_recv_wr *recv_wr,
struct ib_recv_wr **bad_recv_wr)
{
- struct ib_recv_wr *cur_recv_wr;
struct ehca_wqe *wqe_p;
int wqe_cnt = 0;
int ret = 0;
@@ -522,27 +517,23 @@ static int internal_post_recv(struct ehca_qp *my_qp,
if (unlikely(!HAS_RQ(my_qp))) {
ehca_err(dev, "QP has no RQ ehca_qp=%p qp_num=%x ext_type=%d",
my_qp, my_qp->real_qp_num, my_qp->ext_type);
- return -ENODEV;
+ ret = -ENODEV;
+ goto out;
}
/* LOCK the QUEUE */
spin_lock_irqsave(&my_qp->spinlock_r, flags);
- /* loop processes list of send reqs */
- for (cur_recv_wr = recv_wr; cur_recv_wr != NULL;
- cur_recv_wr = cur_recv_wr->next) {
+ /* loop processes list of recv reqs */
+ while (recv_wr) {
u64 start_offset = my_qp->ipz_rqueue.current_q_offset;
/* get pointer next to free WQE */
wqe_p = ipz_qeit_get_inc(&my_qp->ipz_rqueue);
if (unlikely(!wqe_p)) {
/* too many posted work requests: queue overflow */
- if (bad_recv_wr)
- *bad_recv_wr = cur_recv_wr;
- if (wqe_cnt == 0) {
- ret = -ENOMEM;
- ehca_err(dev, "Too many posted WQEs "
- "qp_num=%x", my_qp->real_qp_num);
- }
+ ret = -ENOMEM;
+ ehca_err(dev, "Too many posted WQEs "
+ "qp_num=%x", my_qp->real_qp_num);
goto post_recv_exit0;
}
/*
@@ -552,7 +543,7 @@ static int internal_post_recv(struct ehca_qp *my_qp,
rq_map_idx = start_offset / my_qp->ipz_rqueue.qe_size;
/* write a RECV WQE into the QUEUE */
- ret = ehca_write_rwqe(&my_qp->ipz_rqueue, wqe_p, cur_recv_wr,
+ ret = ehca_write_rwqe(&my_qp->ipz_rqueue, wqe_p, recv_wr,
rq_map_idx);
/*
* if something failed,
@@ -560,22 +551,20 @@ static int internal_post_recv(struct ehca_qp *my_qp,
*/
if (unlikely(ret)) {
my_qp->ipz_rqueue.current_q_offset = start_offset;
- *bad_recv_wr = cur_recv_wr;
- if (wqe_cnt == 0) {
- ret = -EINVAL;
- ehca_err(dev, "Could not write WQE "
- "qp_num=%x", my_qp->real_qp_num);
- }
+ ret = -EINVAL;
+ ehca_err(dev, "Could not write WQE "
+ "qp_num=%x", my_qp->real_qp_num);
goto post_recv_exit0;
}
qmap_entry = &my_qp->rq_map.map[rq_map_idx];
- qmap_entry->app_wr_id = get_app_wr_id(cur_recv_wr->wr_id);
+ qmap_entry->app_wr_id = get_app_wr_id(recv_wr->wr_id);
qmap_entry->reported = 0;
qmap_entry->cqe_req = 1;
wqe_cnt++;
- } /* eof for cur_recv_wr */
+ recv_wr = recv_wr->next;
+ } /* eof for recv_wr */
post_recv_exit0:
iosync(); /* serialize GAL register access */
@@ -584,6 +573,11 @@ post_recv_exit0:
ehca_dbg(dev, "ehca_qp=%p qp_num=%x wqe_cnt=%d ret=%i",
my_qp, my_qp->real_qp_num, wqe_cnt, ret);
spin_unlock_irqrestore(&my_qp->spinlock_r, flags);
+
+out:
+ if (ret)
+ *bad_recv_wr = recv_wr;
+
return ret;
}
@@ -597,6 +591,7 @@ int ehca_post_recv(struct ib_qp *qp,
if (unlikely(my_qp->state == IB_QPS_RESET)) {
ehca_err(qp->device, "Invalid QP state qp_state=%d qpn=%x",
my_qp->state, qp->qp_num);
+ *bad_recv_wr = recv_wr;
return -EINVAL;
}
diff --git a/drivers/infiniband/hw/ipath/ipath_driver.c b/drivers/infiniband/hw/ipath/ipath_driver.c
index 013d1380e77..d2787fe8030 100644
--- a/drivers/infiniband/hw/ipath/ipath_driver.c
+++ b/drivers/infiniband/hw/ipath/ipath_driver.c
@@ -39,6 +39,7 @@
#include <linux/delay.h>
#include <linux/netdevice.h>
#include <linux/vmalloc.h>
+#include <linux/bitmap.h>
#include "ipath_kernel.h"
#include "ipath_verbs.h"
@@ -1697,7 +1698,7 @@ void ipath_chg_pioavailkernel(struct ipath_devdata *dd, unsigned start,
unsigned len, int avail)
{
unsigned long flags;
- unsigned end, cnt = 0, next;
+ unsigned end, cnt = 0;
/* There are two bits per send buffer (busy and generation) */
start *= 2;
@@ -1748,12 +1749,7 @@ void ipath_chg_pioavailkernel(struct ipath_devdata *dd, unsigned start,
if (dd->ipath_pioupd_thresh) {
end = 2 * (dd->ipath_piobcnt2k + dd->ipath_piobcnt4k);
- next = find_first_bit(dd->ipath_pioavailkernel, end);
- while (next < end) {
- cnt++;
- next = find_next_bit(dd->ipath_pioavailkernel, end,
- next + 1);
- }
+ cnt = bitmap_weight(dd->ipath_pioavailkernel, end);
}
spin_unlock_irqrestore(&ipath_pioavail_lock, flags);
diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c
index 3cb3f47a10b..e596537ff35 100644
--- a/drivers/infiniband/hw/mlx4/main.c
+++ b/drivers/infiniband/hw/mlx4/main.c
@@ -103,7 +103,7 @@ static int mlx4_ib_query_device(struct ib_device *ibdev,
props->device_cap_flags |= IB_DEVICE_UD_AV_PORT_ENFORCE;
if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_IPOIB_CSUM)
props->device_cap_flags |= IB_DEVICE_UD_IP_CSUM;
- if (dev->dev->caps.max_gso_sz)
+ if (dev->dev->caps.max_gso_sz && dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_BLH)
props->device_cap_flags |= IB_DEVICE_UD_TSO;
if (dev->dev->caps.bmme_flags & MLX4_BMME_FLAG_RESERVED_LKEY)
props->device_cap_flags |= IB_DEVICE_LOCAL_DMA_LKEY;
diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c
index 219b10397b4..847030c89a8 100644
--- a/drivers/infiniband/hw/mlx4/qp.c
+++ b/drivers/infiniband/hw/mlx4/qp.c
@@ -54,7 +54,8 @@ enum {
/*
* Largest possible UD header: send with GRH and immediate data.
*/
- MLX4_IB_UD_HEADER_SIZE = 72
+ MLX4_IB_UD_HEADER_SIZE = 72,
+ MLX4_IB_LSO_HEADER_SPARE = 128,
};
struct mlx4_ib_sqp {
@@ -67,7 +68,8 @@ struct mlx4_ib_sqp {
};
enum {
- MLX4_IB_MIN_SQ_STRIDE = 6
+ MLX4_IB_MIN_SQ_STRIDE = 6,
+ MLX4_IB_CACHE_LINE_SIZE = 64,
};
static const __be32 mlx4_ib_opcode[] = {
@@ -261,7 +263,7 @@ static int send_wqe_overhead(enum ib_qp_type type, u32 flags)
case IB_QPT_UD:
return sizeof (struct mlx4_wqe_ctrl_seg) +
sizeof (struct mlx4_wqe_datagram_seg) +
- ((flags & MLX4_IB_QP_LSO) ? 64 : 0);
+ ((flags & MLX4_IB_QP_LSO) ? MLX4_IB_LSO_HEADER_SPARE : 0);
case IB_QPT_UC:
return sizeof (struct mlx4_wqe_ctrl_seg) +
sizeof (struct mlx4_wqe_raddr_seg);
@@ -897,7 +899,6 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
context->flags = cpu_to_be32((to_mlx4_state(new_state) << 28) |
(to_mlx4_st(ibqp->qp_type) << 16));
- context->flags |= cpu_to_be32(1 << 8); /* DE? */
if (!(attr_mask & IB_QP_PATH_MIG_STATE))
context->flags |= cpu_to_be32(MLX4_QP_PM_MIGRATED << 11);
@@ -1467,16 +1468,12 @@ static void __set_data_seg(struct mlx4_wqe_data_seg *dseg, struct ib_sge *sg)
static int build_lso_seg(struct mlx4_wqe_lso_seg *wqe, struct ib_send_wr *wr,
struct mlx4_ib_qp *qp, unsigned *lso_seg_len,
- __be32 *lso_hdr_sz)
+ __be32 *lso_hdr_sz, __be32 *blh)
{
unsigned halign = ALIGN(sizeof *wqe + wr->wr.ud.hlen, 16);
- /*
- * This is a temporary limitation and will be removed in
- * a forthcoming FW release:
- */
- if (unlikely(halign > 64))
- return -EINVAL;
+ if (unlikely(halign > MLX4_IB_CACHE_LINE_SIZE))
+ *blh = cpu_to_be32(1 << 6);
if (unlikely(!(qp->flags & MLX4_IB_QP_LSO) &&
wr->num_sge > qp->sq.max_gs - (halign >> 4)))
@@ -1522,6 +1519,7 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
__be32 dummy;
__be32 *lso_wqe;
__be32 uninitialized_var(lso_hdr_sz);
+ __be32 blh;
int i;
spin_lock_irqsave(&qp->sq.lock, flags);
@@ -1530,6 +1528,7 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
for (nreq = 0; wr; ++nreq, wr = wr->next) {
lso_wqe = &dummy;
+ blh = 0;
if (mlx4_wq_overflow(&qp->sq, nreq, qp->ibqp.send_cq)) {
err = -ENOMEM;
@@ -1616,7 +1615,7 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
size += sizeof (struct mlx4_wqe_datagram_seg) / 16;
if (wr->opcode == IB_WR_LSO) {
- err = build_lso_seg(wqe, wr, qp, &seglen, &lso_hdr_sz);
+ err = build_lso_seg(wqe, wr, qp, &seglen, &lso_hdr_sz, &blh);
if (unlikely(err)) {
*bad_wr = wr;
goto out;
@@ -1687,7 +1686,7 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
}
ctrl->owner_opcode = mlx4_ib_opcode[wr->opcode] |
- (ind & qp->sq.wqe_cnt ? cpu_to_be32(1 << 31) : 0);
+ (ind & qp->sq.wqe_cnt ? cpu_to_be32(1 << 31) : 0) | blh;
stamp = ind + qp->sq_spare_wqes;
ind += DIV_ROUND_UP(size * 16, 1U << qp->sq.wqe_shift);
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c
index 2bf5116deec..df3eb8c9fd9 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_main.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c
@@ -884,6 +884,7 @@ struct ipoib_neigh *ipoib_neigh_alloc(struct neighbour *neighbour,
neigh->neighbour = neighbour;
neigh->dev = dev;
+ memset(&neigh->dgid.raw, 0, sizeof (union ib_gid));
*to_ipoib_neigh(neighbour) = neigh;
skb_queue_head_init(&neigh->queue);
ipoib_cm_set(neigh, NULL);
diff --git a/drivers/infiniband/ulp/iser/iser_memory.c b/drivers/infiniband/ulp/iser/iser_memory.c
index b9453d068e9..274c883ef3e 100644
--- a/drivers/infiniband/ulp/iser/iser_memory.c
+++ b/drivers/infiniband/ulp/iser/iser_memory.c
@@ -209,6 +209,8 @@ void iser_finalize_rdma_unaligned_sg(struct iscsi_iser_task *iser_task,
mem_copy->copy_buf = NULL;
}
+#define IS_4K_ALIGNED(addr) ((((unsigned long)addr) & ~MASK_4K) == 0)
+
/**
* iser_sg_to_page_vec - Translates scatterlist entries to physical addresses
* and returns the length of resulting physical address array (may be less than
@@ -221,62 +223,52 @@ void iser_finalize_rdma_unaligned_sg(struct iscsi_iser_task *iser_task,
* where --few fragments of the same page-- are present in the SG as
* consecutive elements. Also, it handles one entry SG.
*/
+
static int iser_sg_to_page_vec(struct iser_data_buf *data,
struct iser_page_vec *page_vec,
struct ib_device *ibdev)
{
- struct scatterlist *sgl = (struct scatterlist *)data->buf;
- struct scatterlist *sg;
- u64 first_addr, last_addr, page;
- int end_aligned;
- unsigned int cur_page = 0;
+ struct scatterlist *sg, *sgl = (struct scatterlist *)data->buf;
+ u64 start_addr, end_addr, page, chunk_start = 0;
unsigned long total_sz = 0;
- int i;
+ unsigned int dma_len;
+ int i, new_chunk, cur_page, last_ent = data->dma_nents - 1;
/* compute the offset of first element */
page_vec->offset = (u64) sgl[0].offset & ~MASK_4K;
+ new_chunk = 1;
+ cur_page = 0;
for_each_sg(sgl, sg, data->dma_nents, i) {
- unsigned int dma_len = ib_sg_dma_len(ibdev, sg);
-
+ start_addr = ib_sg_dma_address(ibdev, sg);
+ if (new_chunk)
+ chunk_start = start_addr;
+ dma_len = ib_sg_dma_len(ibdev, sg);
+ end_addr = start_addr + dma_len;
total_sz += dma_len;
- first_addr = ib_sg_dma_address(ibdev, sg);
- last_addr = first_addr + dma_len;
-
- end_aligned = !(last_addr & ~MASK_4K);
-
- /* continue to collect page fragments till aligned or SG ends */
- while (!end_aligned && (i + 1 < data->dma_nents)) {
- sg = sg_next(sg);
- i++;
- dma_len = ib_sg_dma_len(ibdev, sg);
- total_sz += dma_len;
- last_addr = ib_sg_dma_address(ibdev, sg) + dma_len;
- end_aligned = !(last_addr & ~MASK_4K);
+ /* collect page fragments until aligned or end of SG list */
+ if (!IS_4K_ALIGNED(end_addr) && i < last_ent) {
+ new_chunk = 0;
+ continue;
}
-
- /* handle the 1st page in the 1st DMA element */
- if (cur_page == 0) {
- page = first_addr & MASK_4K;
- page_vec->pages[cur_page] = page;
- cur_page++;
+ new_chunk = 1;
+
+ /* address of the first page in the contiguous chunk;
+ masking relevant for the very first SG entry,
+ which might be unaligned */
+ page = chunk_start & MASK_4K;
+ do {
+ page_vec->pages[cur_page++] = page;
page += SIZE_4K;
- } else
- page = first_addr;
-
- for (; page < last_addr; page += SIZE_4K) {
- page_vec->pages[cur_page] = page;
- cur_page++;
- }
-
+ } while (page < end_addr);
}
+
page_vec->data_size = total_sz;
iser_dbg("page_vec->data_size:%d cur_page %d\n", page_vec->data_size,cur_page);
return cur_page;
}
-#define IS_4K_ALIGNED(addr) ((((unsigned long)addr) & ~MASK_4K) == 0)
/**
* iser_data_buf_aligned_len - Tries to determine the maximal correctly aligned
@@ -284,42 +276,40 @@ static int iser_sg_to_page_vec(struct iser_data_buf *data,
* the number of entries which are aligned correctly. Supports the case where
* consecutive SG elements are actually fragments of the same physcial page.
*/
-static unsigned int iser_data_buf_aligned_len(struct iser_data_buf *data,
- struct ib_device *ibdev)
+static int iser_data_buf_aligned_len(struct iser_data_buf *data,
+ struct ib_device *ibdev)
{
- struct scatterlist *sgl, *sg;
- u64 end_addr, next_addr;
- int i, cnt;
- unsigned int ret_len = 0;
+ struct scatterlist *sgl, *sg, *next_sg = NULL;
+ u64 start_addr, end_addr;
+ int i, ret_len, start_check = 0;
+
+ if (data->dma_nents == 1)
+ return 1;
sgl = (struct scatterlist *)data->buf;
+ start_addr = ib_sg_dma_address(ibdev, sgl);
- cnt = 0;
for_each_sg(sgl, sg, data->dma_nents, i) {
- /* iser_dbg("Checking sg iobuf [%d]: phys=0x%08lX "
- "offset: %ld sz: %ld\n", i,
- (unsigned long)sg_phys(sg),
- (unsigned long)sg->offset,
- (unsigned long)sg->length); */
- end_addr = ib_sg_dma_address(ibdev, sg) +
- ib_sg_dma_len(ibdev, sg);
- /* iser_dbg("Checking sg iobuf end address "
- "0x%08lX\n", end_addr); */
- if (i + 1 < data->dma_nents) {
- next_addr = ib_sg_dma_address(ibdev, sg_next(sg));
- /* are i, i+1 fragments of the same page? */
- if (end_addr == next_addr) {
- cnt++;
- continue;
- } else if (!IS_4K_ALIGNED(end_addr)) {
- ret_len = cnt + 1;
- break;
- }
- }
- cnt++;
+ if (start_check && !IS_4K_ALIGNED(start_addr))
+ break;
+
+ next_sg = sg_next(sg);
+ if (!next_sg)
+ break;
+
+ end_addr = start_addr + ib_sg_dma_len(ibdev, sg);
+ start_addr = ib_sg_dma_address(ibdev, next_sg);
+
+ if (end_addr == start_addr) {
+ start_check = 0;
+ continue;
+ } else
+ start_check = 1;
+
+ if (!IS_4K_ALIGNED(end_addr))
+ break;
}
- if (i == data->dma_nents)
- ret_len = cnt; /* loop ended */
+ ret_len = (next_sg) ? i : i+1;
iser_dbg("Found %d aligned entries out of %d in sg:0x%p\n",
ret_len, data->dma_nents, data);
return ret_len;