129 files changed, 25254 insertions, 2694 deletions
diff --git a/drivers/infiniband/Kconfig b/drivers/infiniband/Kconfig
index 69a53d476b5..9edfacee7d8 100644
--- a/drivers/infiniband/Kconfig
+++ b/drivers/infiniband/Kconfig
@@ -14,7 +14,7 @@ config INFINIBAND_USER_MAD
 	---help---
 	  Userspace InfiniBand Management Datagram (MAD) support.  This
 	  is the kernel side of the userspace MAD support, which allows
-	  userspace processes to send and receive MADs. You will also 
+	  userspace processes to send and receive MADs. You will also
 	  need libibumad from <http://www.openib.org>.
 
 config INFINIBAND_USER_ACCESS
@@ -36,6 +36,8 @@ config INFINIBAND_ADDR_TRANS
 
 source "drivers/infiniband/hw/mthca/Kconfig"
 source "drivers/infiniband/hw/ipath/Kconfig"
+source "drivers/infiniband/hw/ehca/Kconfig"
+source "drivers/infiniband/hw/amso1100/Kconfig"
 
 source "drivers/infiniband/ulp/ipoib/Kconfig"
 
diff --git a/drivers/infiniband/Makefile b/drivers/infiniband/Makefile
index c7ff58c1d0e..2b5d1098ef4 100644
--- a/drivers/infiniband/Makefile
+++ b/drivers/infiniband/Makefile
@@ -1,6 +1,8 @@
 obj-$(CONFIG_INFINIBAND)		+= core/
 obj-$(CONFIG_INFINIBAND_MTHCA)		+= hw/mthca/
-obj-$(CONFIG_IPATH_CORE)		+= hw/ipath/
+obj-$(CONFIG_INFINIBAND_IPATH)		+= hw/ipath/
+obj-$(CONFIG_INFINIBAND_EHCA)		+= hw/ehca/
+obj-$(CONFIG_INFINIBAND_AMSO1100)	+= hw/amso1100/
 obj-$(CONFIG_INFINIBAND_IPOIB)		+= ulp/ipoib/
 obj-$(CONFIG_INFINIBAND_SRP)		+= ulp/srp/
 obj-$(CONFIG_INFINIBAND_ISER)		+= ulp/iser/
diff --git a/drivers/infiniband/core/Makefile b/drivers/infiniband/core/Makefile
index 68e73ec2d1f..163d991eb8c 100644
--- a/drivers/infiniband/core/Makefile
+++ b/drivers/infiniband/core/Makefile
@@ -1,7 +1,7 @@
 infiniband-$(CONFIG_INFINIBAND_ADDR_TRANS)	:= ib_addr.o rdma_cm.o
 
 obj-$(CONFIG_INFINIBAND) +=		ib_core.o ib_mad.o ib_sa.o \
-					ib_cm.o $(infiniband-y)
+					ib_cm.o iw_cm.o $(infiniband-y)
 obj-$(CONFIG_INFINIBAND_USER_MAD) +=	ib_umad.o
 obj-$(CONFIG_INFINIBAND_USER_ACCESS) +=	ib_uverbs.o ib_ucm.o
 
@@ -14,6 +14,8 @@ ib_sa-y :=			sa_query.o
 
 ib_cm-y :=			cm.o
 
+iw_cm-y :=			iwcm.o
+
 rdma_cm-y :=			cma.o
 
 ib_addr-y :=			addr.o
diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c
index 1205e802782..9cbf09e2052 100644
--- a/drivers/infiniband/core/addr.c
+++ b/drivers/infiniband/core/addr.c
@@ -61,12 +61,15 @@ static LIST_HEAD(req_list);
 static DECLARE_WORK(work, process_req, NULL);
 static struct workqueue_struct *addr_wq;
 
-static int copy_addr(struct rdma_dev_addr *dev_addr, struct net_device *dev,
-		     unsigned char *dst_dev_addr)
+int rdma_copy_addr(struct rdma_dev_addr *dev_addr, struct net_device *dev,
+		     const unsigned char *dst_dev_addr)
 {
 	switch (dev->type) {
 	case ARPHRD_INFINIBAND:
-		dev_addr->dev_type = IB_NODE_CA;
+		dev_addr->dev_type = RDMA_NODE_IB_CA;
+		break;
+	case ARPHRD_ETHER:
+		dev_addr->dev_type = RDMA_NODE_RNIC;
 		break;
 	default:
 		return -EADDRNOTAVAIL;
@@ -78,6 +81,7 @@ static int copy_addr(struct rdma_dev_addr *dev_addr, struct net_device *dev,
 		memcpy(dev_addr->dst_dev_addr, dst_dev_addr, MAX_ADDR_LEN);
 	return 0;
 }
+EXPORT_SYMBOL(rdma_copy_addr);
 
 int rdma_translate_ip(struct sockaddr *addr, struct rdma_dev_addr *dev_addr)
 {
@@ -89,7 +93,7 @@ int rdma_translate_ip(struct sockaddr *addr, struct rdma_dev_addr *dev_addr)
 	if (!dev)
 		return -EADDRNOTAVAIL;
 
-	ret = copy_addr(dev_addr, dev, NULL);
+	ret = rdma_copy_addr(dev_addr, dev, NULL);
 	dev_put(dev);
 	return ret;
 }
@@ -161,7 +165,7 @@ static int addr_resolve_remote(struct sockaddr_in *src_in,
 
 	/* If the device does ARP internally, return 'done' */
 	if (rt->idev->dev->flags & IFF_NOARP) {
-		copy_addr(addr, rt->idev->dev, NULL);
+		rdma_copy_addr(addr, rt->idev->dev, NULL);
 		goto put;
 	}
 
@@ -181,7 +185,7 @@ static int addr_resolve_remote(struct sockaddr_in *src_in,
 		src_in->sin_addr.s_addr = rt->rt_src;
 	}
 
-	ret = copy_addr(addr, neigh->dev, neigh->ha);
+	ret = rdma_copy_addr(addr, neigh->dev, neigh->ha);
 release:
 	neigh_release(neigh);
 put:
@@ -245,7 +249,7 @@ static int addr_resolve_local(struct sockaddr_in *src_in,
 	if (ZERONET(src_ip)) {
 		src_in->sin_family = dst_in->sin_family;
 		src_in->sin_addr.s_addr = dst_ip;
-		ret = copy_addr(addr, dev, dev->dev_addr);
+		ret = rdma_copy_addr(addr, dev, dev->dev_addr);
 	} else if (LOOPBACK(src_ip)) {
 		ret = rdma_translate_ip((struct sockaddr *)dst_in, addr);
 		if (!ret)
@@ -327,10 +331,10 @@ void rdma_addr_cancel(struct rdma_dev_addr *addr)
 }
 EXPORT_SYMBOL(rdma_addr_cancel);
 
-static int netevent_callback(struct notifier_block *self, unsigned long event, 
+static int netevent_callback(struct notifier_block *self, unsigned long event,
 	void *ctx)
 {
-	if (event == NETEVENT_NEIGH_UPDATE) {  
+	if (event == NETEVENT_NEIGH_UPDATE) {
 		struct neighbour *neigh = ctx;
 
 		if (neigh->dev->type == ARPHRD_INFINIBAND &&
diff --git a/drivers/infiniband/core/cache.c b/drivers/infiniband/core/cache.c
index 75313ade2e0..20e9f64e67a 100644
--- a/drivers/infiniband/core/cache.c
+++ b/drivers/infiniband/core/cache.c
@@ -62,12 +62,13 @@ struct ib_update_work {
 
 static inline int start_port(struct ib_device *device)
 {
-	return device->node_type == IB_NODE_SWITCH ? 0 : 1;
+	return (device->node_type == RDMA_NODE_IB_SWITCH) ? 0 : 1;
 }
 
 static inline int end_port(struct ib_device *device)
 {
-	return device->node_type == IB_NODE_SWITCH ? 0 : device->phys_port_cnt;
+	return (device->node_type == RDMA_NODE_IB_SWITCH) ?
+		0 : device->phys_port_cnt;
 }
 
 int ib_get_cached_gid(struct ib_device *device,
diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c
index 0de335b7bfc..f35fcc4c063 100644
--- a/drivers/infiniband/core/cm.c
+++ b/drivers/infiniband/core/cm.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2004, 2005 Intel Corporation.  All rights reserved.
+ * Copyright (c) 2004-2006 Intel Corporation.  All rights reserved.
  * Copyright (c) 2004 Topspin Corporation.  All rights reserved.
  * Copyright (c) 2004, 2005 Voltaire Corporation.  All rights reserved.
  * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
@@ -41,6 +41,7 @@
 #include <linux/idr.h>
 #include <linux/interrupt.h>
 #include <linux/pci.h>
+#include <linux/random.h>
 #include <linux/rbtree.h>
 #include <linux/spinlock.h>
 #include <linux/workqueue.h>
@@ -73,6 +74,7 @@ static struct ib_cm {
 	struct rb_root remote_id_table;
 	struct rb_root remote_sidr_table;
 	struct idr local_id_table;
+	__be32 random_id_operand;
 	struct workqueue_struct *wq;
 } cm;
 
@@ -177,7 +179,7 @@ static int cm_alloc_msg(struct cm_id_private *cm_id_priv,
 	if (IS_ERR(ah))
 		return PTR_ERR(ah);
 
-	m = ib_create_send_mad(mad_agent, cm_id_priv->id.remote_cm_qpn, 
+	m = ib_create_send_mad(mad_agent, cm_id_priv->id.remote_cm_qpn,
 			       cm_id_priv->av.pkey_index,
 			       0, IB_MGMT_MAD_HDR, IB_MGMT_MAD_DATA,
 			       GFP_ATOMIC);
@@ -299,15 +301,17 @@ static int cm_init_av_by_path(struct ib_sa_path_rec *path, struct cm_av *av)
 static int cm_alloc_id(struct cm_id_private *cm_id_priv)
 {
 	unsigned long flags;
-	int ret;
+	int ret, id;
 	static int next_id;
 
 	do {
 		spin_lock_irqsave(&cm.lock, flags);
-		ret = idr_get_new_above(&cm.local_id_table, cm_id_priv, next_id++,
-					(__force int *) &cm_id_priv->id.local_id);
+		ret = idr_get_new_above(&cm.local_id_table, cm_id_priv,
+					next_id++, &id);
 		spin_unlock_irqrestore(&cm.lock, flags);
 	} while( (ret == -EAGAIN) && idr_pre_get(&cm.local_id_table, GFP_KERNEL) );
+
+	cm_id_priv->id.local_id = (__force __be32) (id ^ cm.random_id_operand);
 	return ret;
 }
 
@@ -316,7 +320,8 @@ static void cm_free_id(__be32 local_id)
 	unsigned long flags;
 
 	spin_lock_irqsave(&cm.lock, flags);
-	idr_remove(&cm.local_id_table, (__force int) local_id);
+	idr_remove(&cm.local_id_table,
+		   (__force int) (local_id ^ cm.random_id_operand));
 	spin_unlock_irqrestore(&cm.lock, flags);
 }
 
@@ -324,7 +329,8 @@ static struct cm_id_private * cm_get_id(__be32 local_id, __be32 remote_id)
 {
 	struct cm_id_private *cm_id_priv;
 
-	cm_id_priv = idr_find(&cm.local_id_table, (__force int) local_id);
+	cm_id_priv = idr_find(&cm.local_id_table,
+			      (__force int) (local_id ^ cm.random_id_operand));
 	if (cm_id_priv) {
 		if (cm_id_priv->id.remote_id == remote_id)
 			atomic_inc(&cm_id_priv->refcount);
@@ -679,6 +685,8 @@ static void cm_enter_timewait(struct cm_id_private *cm_id_priv)
 {
 	int wait_time;
 
+	cm_cleanup_timewait(cm_id_priv->timewait_info);
+
 	/*
 	 * The cm_id could be destroyed by the user before we exit timewait.
 	 * To protect against this, we search for the cm_id after exiting
@@ -1354,7 +1362,7 @@ static int cm_req_handler(struct cm_work *work)
 							    id.local_id);
 	if (IS_ERR(cm_id_priv->timewait_info)) {
 		ret = PTR_ERR(cm_id_priv->timewait_info);
-		goto error1;
+		goto destroy;
 	}
 	cm_id_priv->timewait_info->work.remote_id = req_msg->local_comm_id;
 	cm_id_priv->timewait_info->remote_ca_guid = req_msg->local_ca_guid;
@@ -1363,7 +1371,8 @@ static int cm_req_handler(struct cm_work *work)
 	listen_cm_id_priv = cm_match_req(work, cm_id_priv);
 	if (!listen_cm_id_priv) {
 		ret = -EINVAL;
-		goto error2;
+		kfree(cm_id_priv->timewait_info);
+		goto destroy;
 	}
 
 	cm_id_priv->id.cm_handler = listen_cm_id_priv->id.cm_handler;
@@ -1373,12 +1382,22 @@ static int cm_req_handler(struct cm_work *work)
 
 	cm_format_paths_from_req(req_msg, &work->path[0], &work->path[1]);
 	ret = cm_init_av_by_path(&work->path[0], &cm_id_priv->av);
-	if (ret)
-		goto error3;
+	if (ret) {
+		ib_get_cached_gid(work->port->cm_dev->device,
+				  work->port->port_num, 0, &work->path[0].sgid);
+		ib_send_cm_rej(cm_id, IB_CM_REJ_INVALID_GID,
+			       &work->path[0].sgid, sizeof work->path[0].sgid,
+			       NULL, 0);
+		goto rejected;
+	}
 	if (req_msg->alt_local_lid) {
 		ret = cm_init_av_by_path(&work->path[1], &cm_id_priv->alt_av);
-		if (ret)
-			goto error3;
+		if (ret) {
+			ib_send_cm_rej(cm_id, IB_CM_REJ_INVALID_ALT_GID,
+				       &work->path[0].sgid,
+				       sizeof work->path[0].sgid, NULL, 0);
+			goto rejected;
+		}
 	}
 	cm_id_priv->tid = req_msg->hdr.tid;
 	cm_id_priv->timeout_ms = cm_convert_to_ms(
@@ -1400,12 +1419,11 @@ static int cm_req_handler(struct cm_work *work)
 	cm_deref_id(listen_cm_id_priv);
 	return 0;
 
-error3:	atomic_dec(&cm_id_priv->refcount);
+rejected:
+	atomic_dec(&cm_id_priv->refcount);
 	cm_deref_id(listen_cm_id_priv);
-	cm_cleanup_timewait(cm_id_priv->timewait_info);
-error2:	kfree(cm_id_priv->timewait_info);
-	cm_id_priv->timewait_info = NULL;
-error1:	ib_destroy_cm_id(&cm_id_priv->id);
+destroy:
+	ib_destroy_cm_id(cm_id);
 	return ret;
 }
 
@@ -2072,8 +2090,9 @@ static struct cm_id_private * cm_acquire_rejected_id(struct cm_rej_msg *rej_msg)
 			spin_unlock_irqrestore(&cm.lock, flags);
 			return NULL;
 		}
-		cm_id_priv = idr_find(&cm.local_id_table,
-				      (__force int) timewait_info->work.local_id);
+		cm_id_priv = idr_find(&cm.local_id_table, (__force int)
+				      (timewait_info->work.local_id ^
+				       cm.random_id_operand));
 		if (cm_id_priv) {
 			if (cm_id_priv->id.remote_id == remote_id)
 				atomic_inc(&cm_id_priv->refcount);
@@ -3125,7 +3144,8 @@ static int cm_init_qp_init_attr(struct cm_id_private *cm_id_priv,
 		qp_attr->qp_access_flags = IB_ACCESS_LOCAL_WRITE |
 					   IB_ACCESS_REMOTE_WRITE;
 		if (cm_id_priv->responder_resources)
-			qp_attr->qp_access_flags |= IB_ACCESS_REMOTE_READ;
+			qp_attr->qp_access_flags |= IB_ACCESS_REMOTE_READ |
+						    IB_ACCESS_REMOTE_ATOMIC;
 		qp_attr->pkey_index = cm_id_priv->av.pkey_index;
 		qp_attr->port_num = cm_id_priv->av.port->port_num;
 		ret = 0;
@@ -3262,6 +3282,9 @@ static void cm_add_one(struct ib_device *device)
 	int ret;
 	u8 i;
 
+	if (rdma_node_get_transport(device->node_type) != RDMA_TRANSPORT_IB)
+		return;
+
 	cm_dev = kmalloc(sizeof(*cm_dev) + sizeof(*port) *
 			 device->phys_port_cnt, GFP_KERNEL);
 	if (!cm_dev)
@@ -3349,6 +3372,7 @@ static int __init ib_cm_init(void)
 	cm.remote_qp_table = RB_ROOT;
 	cm.remote_sidr_table = RB_ROOT;
 	idr_init(&cm.local_id_table);
+	get_random_bytes(&cm.random_id_operand, sizeof cm.random_id_operand);
 	idr_pre_get(&cm.local_id_table, GFP_KERNEL);
 
 	cm.wq = create_workqueue("ib_cm");
diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
index d6f99d5720f..1178bd434d1 100644
--- a/drivers/infiniband/core/cma.c
+++ b/drivers/infiniband/core/cma.c
@@ -35,6 +35,7 @@
 #include <linux/mutex.h>
 #include <linux/random.h>
 #include <linux/idr.h>
+#include <linux/inetdevice.h>
 
 #include <net/tcp.h>
 
@@ -43,13 +44,14 @@
 #include <rdma/ib_cache.h>
 #include <rdma/ib_cm.h>
 #include <rdma/ib_sa.h>
+#include <rdma/iw_cm.h>
 
 MODULE_AUTHOR("Sean Hefty");
 MODULE_DESCRIPTION("Generic RDMA CM Agent");
 MODULE_LICENSE("Dual BSD/GPL");
 
 #define CMA_CM_RESPONSE_TIMEOUT 20
-#define CMA_MAX_CM_RETRIES 3
+#define CMA_MAX_CM_RETRIES 15
 
 static void cma_add_one(struct ib_device *device);
 static void cma_remove_one(struct ib_device *device);
@@ -60,6 +62,7 @@ static struct ib_client cma_client = {
 	.remove = cma_remove_one
 };
 
+static struct ib_sa_client sa_client;
 static LIST_HEAD(dev_list);
 static LIST_HEAD(listen_any_list);
 static DEFINE_MUTEX(lock);
@@ -124,6 +127,7 @@ struct rdma_id_private {
 	int			query_id;
 	union {
 		struct ib_cm_id	*ib;
+		struct iw_cm_id	*iw;
 	} cm_id;
 
 	u32			seq_num;
@@ -259,15 +263,24 @@ static void cma_detach_from_dev(struct rdma_id_private *id_priv)
 	id_priv->cma_dev = NULL;
 }
 
-static int cma_acquire_ib_dev(struct rdma_id_private *id_priv)
+static int cma_acquire_dev(struct rdma_id_private *id_priv)
 {
+	enum rdma_node_type dev_type = id_priv->id.route.addr.dev_addr.dev_type;
 	struct cma_device *cma_dev;
 	union ib_gid gid;
 	int ret = -ENODEV;
 
-	ib_addr_get_sgid(&id_priv->id.route.addr.dev_addr, &gid),
+	switch (rdma_node_get_transport(dev_type)) {
+	case RDMA_TRANSPORT_IB:
+		ib_addr_get_sgid(&id_priv->id.route.addr.dev_addr, &gid);
+		break;
+	case RDMA_TRANSPORT_IWARP:
+		iw_addr_get_sgid(&id_priv->id.route.addr.dev_addr, &gid);
+		break;
+	default:
+		return -ENODEV;
+	}
 
-	mutex_lock(&lock);
 	list_for_each_entry(cma_dev, &dev_list, list) {
 		ret = ib_find_cached_gid(cma_dev->device, &gid,
 					 &id_priv->id.port_num, NULL);
@@ -276,20 +289,9 @@ static int cma_acquire_ib_dev(struct rdma_id_private *id_priv)
 			break;
 		}
 	}
-	mutex_unlock(&lock);
 	return ret;
 }
 
-static int cma_acquire_dev(struct rdma_id_private *id_priv)
-{
-	switch (id_priv->id.route.addr.dev_addr.dev_type) {
-	case IB_NODE_CA:
-		return cma_acquire_ib_dev(id_priv);
-	default:
-		return -ENODEV;
-	}
-}
-
 static void cma_deref_id(struct rdma_id_private *id_priv)
 {
 	if (atomic_dec_and_test(&id_priv->refcount))
@@ -347,6 +349,16 @@ static int cma_init_ib_qp(struct rdma_id_private *id_priv, struct ib_qp *qp)
 					  IB_QP_PKEY_INDEX | IB_QP_PORT);
 }
 
+static int cma_init_iw_qp(struct rdma_id_private *id_priv, struct ib_qp *qp)
+{
+	struct ib_qp_attr qp_attr;
+
+	qp_attr.qp_state = IB_QPS_INIT;
+	qp_attr.qp_access_flags = IB_ACCESS_LOCAL_WRITE;
+
+	return ib_modify_qp(qp, &qp_attr, IB_QP_STATE | IB_QP_ACCESS_FLAGS);
+}
+
 int rdma_create_qp(struct rdma_cm_id *id, struct ib_pd *pd,
 		   struct ib_qp_init_attr *qp_init_attr)
 {
@@ -362,10 +374,13 @@ int rdma_create_qp(struct rdma_cm_id *id, struct ib_pd *pd,
 	if (IS_ERR(qp))
 		return PTR_ERR(qp);
 
-	switch (id->device->node_type) {
-	case IB_NODE_CA:
+	switch (rdma_node_get_transport(id->device->node_type)) {
+	case RDMA_TRANSPORT_IB:
 		ret = cma_init_ib_qp(id_priv, qp);
 		break;
+	case RDMA_TRANSPORT_IWARP:
+		ret = cma_init_iw_qp(id_priv, qp);
+		break;
 	default:
 		ret = -ENOSYS;
 		break;
@@ -451,13 +466,17 @@ int rdma_init_qp_attr(struct rdma_cm_id *id, struct ib_qp_attr *qp_attr,
 	int ret;
 
 	id_priv = container_of(id, struct rdma_id_private, id);
-	switch (id_priv->id.device->node_type) {
-	case IB_NODE_CA:
+	switch (rdma_node_get_transport(id_priv->id.device->node_type)) {
+	case RDMA_TRANSPORT_IB:
 		ret = ib_cm_init_qp_attr(id_priv->cm_id.ib, qp_attr,
 					 qp_attr_mask);
 		if (qp_attr->qp_state == IB_QPS_RTR)
 			qp_attr->rq_psn = id_priv->seq_num;
 		break;
+	case RDMA_TRANSPORT_IWARP:
+		ret = iw_cm_init_qp_attr(id_priv->cm_id.iw, qp_attr,
+					qp_attr_mask);
+		break;
 	default:
 		ret = -ENOSYS;
 		break;
@@ -590,8 +609,8 @@ static int cma_notify_user(struct rdma_id_private *id_priv,
 
 static void cma_cancel_route(struct rdma_id_private *id_priv)
 {
-	switch (id_priv->id.device->node_type) {
-	case IB_NODE_CA:
+	switch (rdma_node_get_transport(id_priv->id.device->node_type)) {
+	case RDMA_TRANSPORT_IB:
 		if (id_priv->query)
 			ib_sa_cancel_query(id_priv->query_id, id_priv->query);
 		break;
@@ -611,11 +630,15 @@ static void cma_destroy_listen(struct rdma_id_private *id_priv)
 	cma_exch(id_priv, CMA_DESTROYING);
 
 	if (id_priv->cma_dev) {
-		switch (id_priv->id.device->node_type) {
-		case IB_NODE_CA:
-	 		if (id_priv->cm_id.ib && !IS_ERR(id_priv->cm_id.ib))
+		switch (rdma_node_get_transport(id_priv->id.device->node_type)) {
+		case RDMA_TRANSPORT_IB:
+			if (id_priv->cm_id.ib && !IS_ERR(id_priv->cm_id.ib))
 				ib_destroy_cm_id(id_priv->cm_id.ib);
 			break;
+		case RDMA_TRANSPORT_IWARP:
+			if (id_priv->cm_id.iw && !IS_ERR(id_priv->cm_id.iw))
+				iw_destroy_cm_id(id_priv->cm_id.iw);
+			break;
 		default:
 			break;
 		}
@@ -689,19 +712,25 @@ void rdma_destroy_id(struct rdma_cm_id *id)
 	state = cma_exch(id_priv, CMA_DESTROYING);
 	cma_cancel_operation(id_priv, state);
 
+	mutex_lock(&lock);
 	if (id_priv->cma_dev) {
-		switch (id->device->node_type) {
-		case IB_NODE_CA:
-	 		if (id_priv->cm_id.ib && !IS_ERR(id_priv->cm_id.ib))
+		mutex_unlock(&lock);
+		switch (rdma_node_get_transport(id->device->node_type)) {
+		case RDMA_TRANSPORT_IB:
+			if (id_priv->cm_id.ib && !IS_ERR(id_priv->cm_id.ib))
 				ib_destroy_cm_id(id_priv->cm_id.ib);
 			break;
+		case RDMA_TRANSPORT_IWARP:
+			if (id_priv->cm_id.iw && !IS_ERR(id_priv->cm_id.iw))
+				iw_destroy_cm_id(id_priv->cm_id.iw);
+			break;
 		default:
 			break;
 		}
-	  	mutex_lock(&lock);
+		mutex_lock(&lock);
 		cma_detach_from_dev(id_priv);
-		mutex_unlock(&lock);
 	}
+	mutex_unlock(&lock);
 
 	cma_release_port(id_priv);
 	cma_deref_id(id_priv);
@@ -869,7 +898,7 @@ static struct rdma_id_private *cma_new_id(struct rdma_cm_id *listen_id,
 	ib_addr_set_sgid(&rt->addr.dev_addr, &rt->path_rec[0].sgid);
 	ib_addr_set_dgid(&rt->addr.dev_addr, &rt->path_rec[0].dgid);
 	ib_addr_set_pkey(&rt->addr.dev_addr, be16_to_cpu(rt->path_rec[0].pkey));
-	rt->addr.dev_addr.dev_type = IB_NODE_CA;
+	rt->addr.dev_addr.dev_type = RDMA_NODE_IB_CA;
 
 	id_priv = container_of(id, struct rdma_id_private, id);
 	id_priv->state = CMA_CONNECT;
@@ -898,7 +927,9 @@ static int cma_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event)
 	}
 
 	atomic_inc(&conn_id->dev_remove);
-	ret = cma_acquire_ib_dev(conn_id);
+	mutex_lock(&lock);
+	ret = cma_acquire_dev(conn_id);
+	mutex_unlock(&lock);
 	if (ret) {
 		ret = -ENODEV;
 		cma_release_remove(conn_id);
@@ -982,6 +1013,130 @@ static void cma_set_compare_data(enum rdma_port_space ps, struct sockaddr *addr,
 	}
 }
 
+static int cma_iw_handler(struct iw_cm_id *iw_id, struct iw_cm_event *iw_event)
+{
+	struct rdma_id_private *id_priv = iw_id->context;
+	enum rdma_cm_event_type event = 0;
+	struct sockaddr_in *sin;
+	int ret = 0;
+
+	atomic_inc(&id_priv->dev_remove);
+
+	switch (iw_event->event) {
+	case IW_CM_EVENT_CLOSE:
+		event = RDMA_CM_EVENT_DISCONNECTED;
+		break;
+	case IW_CM_EVENT_CONNECT_REPLY:
+		sin = (struct sockaddr_in *) &id_priv->id.route.addr.src_addr;
+		*sin = iw_event->local_addr;
+		sin = (struct sockaddr_in *) &id_priv->id.route.addr.dst_addr;
+		*sin = iw_event->remote_addr;
+		if (iw_event->status)
+			event = RDMA_CM_EVENT_REJECTED;
+		else
+			event = RDMA_CM_EVENT_ESTABLISHED;
+		break;
+	case IW_CM_EVENT_ESTABLISHED:
+		event = RDMA_CM_EVENT_ESTABLISHED;
+		break;
+	default:
+		BUG_ON(1);
+	}
+
+	ret = cma_notify_user(id_priv, event, iw_event->status,
+			      iw_event->private_data,
+			      iw_event->private_data_len);
+	if (ret) {
+		/* Destroy the CM ID by returning a non-zero value. */
+		id_priv->cm_id.iw = NULL;
+		cma_exch(id_priv, CMA_DESTROYING);
+		cma_release_remove(id_priv);
+		rdma_destroy_id(&id_priv->id);
+		return ret;
+	}
+
+	cma_release_remove(id_priv);
+	return ret;
+}
+
+static int iw_conn_req_handler(struct iw_cm_id *cm_id,
+			       struct iw_cm_event *iw_event)
+{
+	struct rdma_cm_id *new_cm_id;
+	struct rdma_id_private *listen_id, *conn_id;
+	struct sockaddr_in *sin;
+	struct net_device *dev = NULL;
+	int ret;
+
+	listen_id = cm_id->context;
+	atomic_inc(&listen_id->dev_remove);
+	if (!cma_comp(listen_id, CMA_LISTEN)) {
+		ret = -ECONNABORTED;
+		goto out;
+	}
+
+	/* Create a new RDMA id for the new IW CM ID */
+	new_cm_id = rdma_create_id(listen_id->id.event_handler,
+				   listen_id->id.context,
+				   RDMA_PS_TCP);
+	if (!new_cm_id) {
+		ret = -ENOMEM;
+		goto out;
+	}
+	conn_id = container_of(new_cm_id, struct rdma_id_private, id);
+	atomic_inc(&conn_id->dev_remove);
+	conn_id->state = CMA_CONNECT;
+
+	dev = ip_dev_find(iw_event->local_addr.sin_addr.s_addr);
+	if (!dev) {
+		ret = -EADDRNOTAVAIL;
+		cma_release_remove(conn_id);
+		rdma_destroy_id(new_cm_id);
+		goto out;
+	}
+	ret = rdma_copy_addr(&conn_id->id.route.addr.dev_addr, dev, NULL);
+	if (ret) {
+		cma_release_remove(conn_id);
+		rdma_destroy_id(new_cm_id);
+		goto out;
+	}
+
+	mutex_lock(&lock);
+	ret = cma_acquire_dev(conn_id);
+	mutex_unlock(&lock);
+	if (ret) {
+		cma_release_remove(conn_id);
+		rdma_destroy_id(new_cm_id);
+		goto out;
+	}
+
+	conn_id->cm_id.iw = cm_id;
+	cm_id->context = conn_id;
+	cm_id->cm_handler = cma_iw_handler;
+
+	sin = (struct sockaddr_in *) &new_cm_id->route.addr.src_addr;
+	*sin = iw_event->local_addr;
+	sin = (struct sockaddr_in *) &new_cm_id->route.addr.dst_addr;
+	*sin = iw_event->remote_addr;
+
+	ret = cma_notify_user(conn_id, RDMA_CM_EVENT_CONNECT_REQUEST, 0,
+			      iw_event->private_data,
+			      iw_event->private_data_len);
+	if (ret) {
+		/* User wants to destroy the CM ID */
+		conn_id->cm_id.iw = NULL;
+		cma_exch(conn_id, CMA_DESTROYING);
+		cma_release_remove(conn_id);
+		rdma_destroy_id(&conn_id->id);
+	}
+
+out:
+	if (dev)
+		dev_put(dev);
+	cma_release_remove(listen_id);
+	return ret;
+}
+
 static int cma_ib_listen(struct rdma_id_private *id_priv)
 {
 	struct ib_cm_compare_data compare_data;
@@ -1011,6 +1166,30 @@ static int cma_ib_listen(struct rdma_id_private *id_priv)
 	return ret;
 }
 
+static int cma_iw_listen(struct rdma_id_private *id_priv, int backlog)
+{
+	int ret;
+	struct sockaddr_in *sin;
+
+	id_priv->cm_id.iw = iw_create_cm_id(id_priv->id.device,
+					    iw_conn_req_handler,
+					    id_priv);
+	if (IS_ERR(id_priv->cm_id.iw))
+		return PTR_ERR(id_priv->cm_id.iw);
+
+	sin = (struct sockaddr_in *) &id_priv->id.route.addr.src_addr;
+	id_priv->cm_id.iw->local_addr = *sin;
+
+	ret = iw_cm_listen(id_priv->cm_id.iw, backlog);
+
+	if (ret) {
+		iw_destroy_cm_id(id_priv->cm_id.iw);
+		id_priv->cm_id.iw = NULL;
+	}
+
+	return ret;
+}
+
 static int cma_listen_handler(struct rdma_cm_id *id,
 			      struct rdma_cm_event *event)
 {
@@ -1087,12 +1266,17 @@ int rdma_listen(struct rdma_cm_id *id, int backlog)
 
 	id_priv->backlog = backlog;
 	if (id->device) {
-		switch (id->device->node_type) {
-		case IB_NODE_CA:
+		switch (rdma_node_get_transport(id->device->node_type)) {
+		case RDMA_TRANSPORT_IB:
 			ret = cma_ib_listen(id_priv);
 			if (ret)
 				goto err;
 			break;
+		case RDMA_TRANSPORT_IWARP:
+			ret = cma_iw_listen(id_priv, backlog);
+			if (ret)
+				goto err;
+			break;
 		default:
 			ret = -ENOSYS;
 			goto err;
@@ -1140,7 +1324,7 @@ static int cma_query_ib_route(struct rdma_id_private *id_priv, int timeout_ms,
 	path_rec.pkey = cpu_to_be16(ib_addr_get_pkey(addr));
 	path_rec.numb_path = 1;
 
-	id_priv->query_id = ib_sa_path_rec_get(id_priv->id.device,
+	id_priv->query_id = ib_sa_path_rec_get(&sa_client, id_priv->id.device,
 				id_priv->id.port_num, &path_rec,
 				IB_SA_PATH_REC_DGID | IB_SA_PATH_REC_SGID |
 				IB_SA_PATH_REC_PKEY | IB_SA_PATH_REC_NUMB_PATH,
@@ -1231,6 +1415,23 @@ err:
 }
 EXPORT_SYMBOL(rdma_set_ib_paths);
 
+static int cma_resolve_iw_route(struct rdma_id_private *id_priv, int timeout_ms)
+{
+	struct cma_work *work;
+
+	work = kzalloc(sizeof *work, GFP_KERNEL);
+	if (!work)
+		return -ENOMEM;
+
+	work->id = id_priv;
+	INIT_WORK(&work->work, cma_work_handler, work);
+	work->old_state = CMA_ROUTE_QUERY;
+	work->new_state = CMA_ROUTE_RESOLVED;
+	work->event.event = RDMA_CM_EVENT_ROUTE_RESOLVED;
+	queue_work(cma_wq, &work->work);
+	return 0;
+}
+
 int rdma_resolve_route(struct rdma_cm_id *id, int timeout_ms)
 {
 	struct rdma_id_private *id_priv;
@@ -1241,10 +1442,13 @@ int rdma_resolve_route(struct rdma_cm_id *id, int timeout_ms)
 		return -EINVAL;
 
 	atomic_inc(&id_priv->refcount);
-	switch (id->device->node_type) {
-	case IB_NODE_CA:
+	switch (rdma_node_get_transport(id->device->node_type)) {
+	case RDMA_TRANSPORT_IB:
 		ret = cma_resolve_ib_route(id_priv, timeout_ms);
 		break;
+	case RDMA_TRANSPORT_IWARP:
+		ret = cma_resolve_iw_route(id_priv, timeout_ms);
+		break;
 	default:
 		ret = -ENOSYS;
 		break;
@@ -1309,16 +1513,26 @@ static void addr_handler(int status, struct sockaddr *src_addr,
 	enum rdma_cm_event_type event;
 
 	atomic_inc(&id_priv->dev_remove);
-	if (!id_priv->cma_dev && !status)
+
+	/*
+	 * Grab mutex to block rdma_destroy_id() from removing the device while
+	 * we're trying to acquire it.
+	 */
+	mutex_lock(&lock);
+	if (!cma_comp_exch(id_priv, CMA_ADDR_QUERY, CMA_ADDR_RESOLVED)) {
+		mutex_unlock(&lock);
+		goto out;
+	}
+
+	if (!status && !id_priv->cma_dev)
 		status = cma_acquire_dev(id_priv);
+	mutex_unlock(&lock);
 
 	if (status) {
-		if (!cma_comp_exch(id_priv, CMA_ADDR_QUERY, CMA_ADDR_BOUND))
+		if (!cma_comp_exch(id_priv, CMA_ADDR_RESOLVED, CMA_ADDR_BOUND))
 			goto out;
 		event = RDMA_CM_EVENT_ADDR_ERROR;
 	} else {
-		if (!cma_comp_exch(id_priv, CMA_ADDR_QUERY, CMA_ADDR_RESOLVED))
-			goto out;
 		memcpy(&id_priv->id.route.addr.src_addr, src_addr,
 		       ip_addr_size(src_addr));
 		event = RDMA_CM_EVENT_ADDR_RESOLVED;
@@ -1492,7 +1706,7 @@ static int cma_use_port(struct idr *ps, struct rdma_id_private *id_priv)
 	hlist_for_each_entry(cur_id, node, &bind_list->owners, node) {
 		if (cma_any_addr(&cur_id->id.route.addr.src_addr))
 			return -EADDRNOTAVAIL;
-		
+
 		cur_sin = (struct sockaddr_in *) &cur_id->id.route.addr.src_addr;
 		if (sin->sin_addr.s_addr == cur_sin->sin_addr.s_addr)
 			return -EADDRINUSE;
@@ -1542,8 +1756,11 @@ int rdma_bind_addr(struct rdma_cm_id *id, struct sockaddr *addr)
 
 	if (!cma_any_addr(addr)) {
 		ret = rdma_translate_ip(addr, &id->route.addr.dev_addr);
-		if (!ret)
+		if (!ret) {
+			mutex_lock(&lock);
 			ret = cma_acquire_dev(id_priv);
+			mutex_unlock(&lock);
+		}
 		if (ret)
 			goto err;
 	}
@@ -1649,6 +1866,47 @@ out:
 	return ret;
 }
 
+static int cma_connect_iw(struct rdma_id_private *id_priv,
+			  struct rdma_conn_param *conn_param)
+{
+	struct iw_cm_id *cm_id;
+	struct sockaddr_in* sin;
+	int ret;
+	struct iw_cm_conn_param iw_param;
+
+	cm_id = iw_create_cm_id(id_priv->id.device, cma_iw_handler, id_priv);
+	if (IS_ERR(cm_id)) {
+		ret = PTR_ERR(cm_id);
+		goto out;
+	}
+
+	id_priv->cm_id.iw = cm_id;
+
+	sin = (struct sockaddr_in*) &id_priv->id.route.addr.src_addr;
+	cm_id->local_addr = *sin;
+
+	sin = (struct sockaddr_in*) &id_priv->id.route.addr.dst_addr;
+	cm_id->remote_addr = *sin;
+
+	ret = cma_modify_qp_rtr(&id_priv->id);
+	if (ret) {
+		iw_destroy_cm_id(cm_id);
+		return ret;
+	}
+
+	iw_param.ord = conn_param->initiator_depth;
+	iw_param.ird = conn_param->responder_resources;
+	iw_param.private_data = conn_param->private_data;
+	iw_param.private_data_len = conn_param->private_data_len;
+	if (id_priv->id.qp)
+		iw_param.qpn = id_priv->qp_num;
+	else
+		iw_param.qpn = conn_param->qp_num;
+	ret = iw_cm_connect(cm_id, &iw_param);
+out:
+	return ret;
+}
+
 int rdma_connect(struct rdma_cm_id *id, struct rdma_conn_param *conn_param)
 {
 	struct rdma_id_private *id_priv;
@@ -1664,10 +1922,13 @@ int rdma_connect(struct rdma_cm_id *id, struct rdma_conn_param *conn_param)
 		id_priv->srq = conn_param->srq;
 	}
 
-	switch (id->device->node_type) {
-	case IB_NODE_CA:
+	switch (rdma_node_get_transport(id->device->node_type)) {
+	case RDMA_TRANSPORT_IB:
 		ret = cma_connect_ib(id_priv, conn_param);
 		break;
+	case RDMA_TRANSPORT_IWARP:
+		ret = cma_connect_iw(id_priv, conn_param);
+		break;
 	default:
 		ret = -ENOSYS;
 		break;
@@ -1708,6 +1969,28 @@ static int cma_accept_ib(struct rdma_id_private *id_priv,
 	return ib_send_cm_rep(id_priv->cm_id.ib, &rep);
 }
 
+static int cma_accept_iw(struct rdma_id_private *id_priv,
+		  struct rdma_conn_param *conn_param)
+{
+	struct iw_cm_conn_param iw_param;
+	int ret;
+
+	ret = cma_modify_qp_rtr(&id_priv->id);
+	if (ret)
+		return ret;
+
+	iw_param.ord = conn_param->initiator_depth;
+	iw_param.ird = conn_param->responder_resources;
+	iw_param.private_data = conn_param->private_data;
+	iw_param.private_data_len = conn_param->private_data_len;
+	if (id_priv->id.qp) {
+		iw_param.qpn = id_priv->qp_num;
+	} else
+		iw_param.qpn = conn_param->qp_num;
+
+	return iw_cm_accept(id_priv->cm_id.iw, &iw_param);
+}
+
 int rdma_accept(struct rdma_cm_id *id, struct rdma_conn_param *conn_param)
 {
 	struct rdma_id_private *id_priv;
@@ -1723,13 +2006,16 @@ int rdma_accept(struct rdma_cm_id *id, struct rdma_conn_param *conn_param)
 		id_priv->srq = conn_param->srq;
 	}
 
-	switch (id->device->node_type) {
-	case IB_NODE_CA:
+	switch (rdma_node_get_transport(id->device->node_type)) {
+	case RDMA_TRANSPORT_IB:
 		if (conn_param)
 			ret = cma_accept_ib(id_priv, conn_param);
 		else
 			ret = cma_rep_recv(id_priv);
 		break;
+	case RDMA_TRANSPORT_IWARP:
+		ret = cma_accept_iw(id_priv, conn_param);
+		break;
 	default:
 		ret = -ENOSYS;
 		break;
@@ -1756,12 +2042,16 @@ int rdma_reject(struct rdma_cm_id *id, const void *private_data,
 	if (!cma_comp(id_priv, CMA_CONNECT))
 		return -EINVAL;
 
-	switch (id->device->node_type) {
-	case IB_NODE_CA:
+	switch (rdma_node_get_transport(id->device->node_type)) {
+	case RDMA_TRANSPORT_IB:
 		ret = ib_send_cm_rej(id_priv->cm_id.ib,
 				     IB_CM_REJ_CONSUMER_DEFINED, NULL, 0,
 				     private_data, private_data_len);
 		break;
+	case RDMA_TRANSPORT_IWARP:
+		ret = iw_cm_reject(id_priv->cm_id.iw,
+				   private_data, private_data_len);
+		break;
 	default:
 		ret = -ENOSYS;
 		break;
@@ -1780,17 +2070,20 @@ int rdma_disconnect(struct rdma_cm_id *id)
 	    !cma_comp(id_priv, CMA_DISCONNECT))
 		return -EINVAL;
 
-	ret = cma_modify_qp_err(id);
-	if (ret)
-		goto out;
-
-	switch (id->device->node_type) {
-	case IB_NODE_CA:
+	switch (rdma_node_get_transport(id->device->node_type)) {
+	case RDMA_TRANSPORT_IB:
+		ret = cma_modify_qp_err(id);
+		if (ret)
+			goto out;
 		/* Initiate or respond to a disconnect. */
 		if (ib_send_cm_dreq(id_priv->cm_id.ib, NULL, 0))
 			ib_send_cm_drep(id_priv->cm_id.ib, NULL, 0);
 		break;
+	case RDMA_TRANSPORT_IWARP:
+		ret = iw_cm_disconnect(id_priv->cm_id.iw, 0);
+		break;
 	default:
+		ret = -EINVAL;
 		break;
 	}
 out:
@@ -1907,12 +2200,15 @@ static int cma_init(void)
 	if (!cma_wq)
 		return -ENOMEM;
 
+	ib_sa_register_client(&sa_client);
+
 	ret = ib_register_client(&cma_client);
 	if (ret)
 		goto err;
 	return 0;
 
 err:
+	ib_sa_unregister_client(&sa_client);
 	destroy_workqueue(cma_wq);
 	return ret;
 }
@@ -1920,6 +2216,7 @@ err:
 static void cma_cleanup(void)
 {
 	ib_unregister_client(&cma_client);
+	ib_sa_unregister_client(&sa_client);
 	destroy_workqueue(cma_wq);
 	idr_destroy(&sdp_ps);
 	idr_destroy(&tcp_ps);
diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c
index b2f3cb91d9b..63d2a39fb82 100644
--- a/drivers/infiniband/core/device.c
+++ b/drivers/infiniband/core/device.c
@@ -385,7 +385,7 @@ void *ib_get_client_data(struct ib_device *device, struct ib_client *client)
 EXPORT_SYMBOL(ib_get_client_data);
 
 /**
- * ib_set_client_data - Get IB client context
+ * ib_set_client_data - Set IB client context
  * @device:Device to set context for
  * @client:Client to set context for
  * @data:Context to set
@@ -505,7 +505,7 @@ int ib_query_port(struct ib_device *device,
 		  u8 port_num,
 		  struct ib_port_attr *port_attr)
 {
-	if (device->node_type == IB_NODE_SWITCH) {
+	if (device->node_type == RDMA_NODE_IB_SWITCH) {
 		if (port_num)
 			return -EINVAL;
 	} else if (port_num < 1 || port_num > device->phys_port_cnt)
@@ -580,7 +580,7 @@ int ib_modify_port(struct ib_device *device,
 		   u8 port_num, int port_modify_mask,
 		   struct ib_port_modify *port_modify)
 {
-	if (device->node_type == IB_NODE_SWITCH) {
+	if (device->node_type == RDMA_NODE_IB_SWITCH) {
 		if (port_num)
 			return -EINVAL;
 	} else if (port_num < 1 || port_num > device->phys_port_cnt)
diff --git a/drivers/infiniband/core/iwcm.c b/drivers/infiniband/core/iwcm.c
new file mode 100644
index 00000000000..c3fb304a4e8
--- /dev/null
+++ b/drivers/infiniband/core/iwcm.c
@@ -0,0 +1,1019 @@
+/*
+ * Copyright (c) 2004, 2005 Intel Corporation.  All rights reserved.
+ * Copyright (c) 2004 Topspin Corporation.  All rights reserved.
+ * Copyright (c) 2004, 2005 Voltaire Corporation.  All rights reserved.
+ * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
+ * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
+ * Copyright (c) 2005 Network Appliance, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+#include <linux/dma-mapping.h>
+#include <linux/err.h>
+#include <linux/idr.h>
+#include <linux/interrupt.h>
+#include <linux/pci.h>
+#include <linux/rbtree.h>
+#include <linux/spinlock.h>
+#include <linux/workqueue.h>
+#include <linux/completion.h>
+
+#include <rdma/iw_cm.h>
+#include <rdma/ib_addr.h>
+
+#include "iwcm.h"
+
+MODULE_AUTHOR("Tom Tucker");
+MODULE_DESCRIPTION("iWARP CM");
+MODULE_LICENSE("Dual BSD/GPL");
+
+static struct workqueue_struct *iwcm_wq;
+struct iwcm_work {
+	struct work_struct work;
+	struct iwcm_id_private *cm_id;
+	struct list_head list;
+	struct iw_cm_event event;
+	struct list_head free_list;
+};
+
+/*
+ * The following services provide a mechanism for pre-allocating iwcm_work
+ * elements.  The design pre-allocates them  based on the cm_id type:
+ *	LISTENING IDS: 	Get enough elements preallocated to handle the
+ *			listen backlog.
+ *	ACTIVE IDS:	4: CONNECT_REPLY, ESTABLISHED, DISCONNECT, CLOSE
+ *	PASSIVE IDS:	3: ESTABLISHED, DISCONNECT, CLOSE
+ *
+ * Allocating them in connect and listen avoids having to deal
+ * with allocation failures on the event upcall from the provider (which
+ * is called in the interrupt context).
+ *
+ * One exception is when creating the cm_id for incoming connection requests.
+ * There are two cases:
+ * 1) in the event upcall, cm_event_handler(), for a listening cm_id.  If
+ *    the backlog is exceeded, then no more connection request events will
+ *    be processed.  cm_event_handler() returns -ENOMEM in this case.  Its up
+ *    to the provider to reject the connectino request.
+ * 2) in the connection request workqueue handler, cm_conn_req_handler().
+ *    If work elements cannot be allocated for the new connect request cm_id,
+ *    then IWCM will call the provider reject method.  This is ok since
+ *    cm_conn_req_handler() runs in the workqueue thread context.
+ */
+
+static struct iwcm_work *get_work(struct iwcm_id_private *cm_id_priv)
+{
+	struct iwcm_work *work;
+
+	if (list_empty(&cm_id_priv->work_free_list))
+		return NULL;
+	work = list_entry(cm_id_priv->work_free_list.next, struct iwcm_work,
+			  free_list);
+	list_del_init(&work->free_list);
+	return work;
+}
+
+static void put_work(struct iwcm_work *work)
+{
+	list_add(&work->free_list, &work->cm_id->work_free_list);
+}
+
+static void dealloc_work_entries(struct iwcm_id_private *cm_id_priv)
+{
+	struct list_head *e, *tmp;
+
+	list_for_each_safe(e, tmp, &cm_id_priv->work_free_list)
+		kfree(list_entry(e, struct iwcm_work, free_list));
+}
+
+static int alloc_work_entries(struct iwcm_id_private *cm_id_priv, int count)
+{
+	struct iwcm_work *work;
+
+	BUG_ON(!list_empty(&cm_id_priv->work_free_list));
+	while (count--) {
+		work = kmalloc(sizeof(struct iwcm_work), GFP_KERNEL);
+		if (!work) {
+			dealloc_work_entries(cm_id_priv);
+			return -ENOMEM;
+		}
+		work->cm_id = cm_id_priv;
+		INIT_LIST_HEAD(&work->list);
+		put_work(work);
+	}
+	return 0;
+}
+
+/*
+ * Save private data from incoming connection requests in the
+ * cm_id_priv so the low level driver doesn't have to.  Adjust
+ * the event ptr to point to the local copy.
+ */
+static int copy_private_data(struct iwcm_id_private *cm_id_priv,
+		       struct iw_cm_event *event)
+{
+	void *p;
+
+	p = kmalloc(event->private_data_len, GFP_ATOMIC);
+	if (!p)
+		return -ENOMEM;
+	memcpy(p, event->private_data, event->private_data_len);
+	event->private_data = p;
+	return 0;
+}
+
+/*
+ * Release a reference on cm_id. If the last reference is being removed
+ * and iw_destroy_cm_id is waiting, wake up the waiting thread.
+ */
+static int iwcm_deref_id(struct iwcm_id_private *cm_id_priv)
+{
+	int ret = 0;
+
+	BUG_ON(atomic_read(&cm_id_priv->refcount)==0);
+	if (atomic_dec_and_test(&cm_id_priv->refcount)) {
+		BUG_ON(!list_empty(&cm_id_priv->work_list));
+		if (waitqueue_active(&cm_id_priv->destroy_comp.wait)) {
+			BUG_ON(cm_id_priv->state != IW_CM_STATE_DESTROYING);
+			BUG_ON(test_bit(IWCM_F_CALLBACK_DESTROY,
+					&cm_id_priv->flags));
+			ret = 1;
+		}
+		complete(&cm_id_priv->destroy_comp);
+	}
+
+	return ret;
+}
+
+static void add_ref(struct iw_cm_id *cm_id)
+{
+	struct iwcm_id_private *cm_id_priv;
+	cm_id_priv = container_of(cm_id, struct iwcm_id_private, id);
+	atomic_inc(&cm_id_priv->refcount);
+}
+
+static void rem_ref(struct iw_cm_id *cm_id)
+{
+	struct iwcm_id_private *cm_id_priv;
+	cm_id_priv = container_of(cm_id, struct iwcm_id_private, id);
+	iwcm_deref_id(cm_id_priv);
+}
+
+static int cm_event_handler(struct iw_cm_id *cm_id, struct iw_cm_event *event);
+
+struct iw_cm_id *iw_create_cm_id(struct ib_device *device,
+				 iw_cm_handler cm_handler,
+				 void *context)
+{
+	struct iwcm_id_private *cm_id_priv;
+
+	cm_id_priv = kzalloc(sizeof(*cm_id_priv), GFP_KERNEL);
+	if (!cm_id_priv)
+		return ERR_PTR(-ENOMEM);
+
+	cm_id_priv->state = IW_CM_STATE_IDLE;
+	cm_id_priv->id.device = device;
+	cm_id_priv->id.cm_handler = cm_handler;
+	cm_id_priv->id.context = context;
+	cm_id_priv->id.event_handler = cm_event_handler;
+	cm_id_priv->id.add_ref = add_ref;
+	cm_id_priv->id.rem_ref = rem_ref;
+	spin_lock_init(&cm_id_priv->lock);
+	atomic_set(&cm_id_priv->refcount, 1);
+	init_waitqueue_head(&cm_id_priv->connect_wait);
+	init_completion(&cm_id_priv->destroy_comp);
+	INIT_LIST_HEAD(&cm_id_priv->work_list);
+	INIT_LIST_HEAD(&cm_id_priv->work_free_list);
+
+	return &cm_id_priv->id;
+}
+EXPORT_SYMBOL(iw_create_cm_id);
+
+
+static int iwcm_modify_qp_err(struct ib_qp *qp)
+{
+	struct ib_qp_attr qp_attr;
+
+	if (!qp)
+		return -EINVAL;
+
+	qp_attr.qp_state = IB_QPS_ERR;
+	return ib_modify_qp(qp, &qp_attr, IB_QP_STATE);
+}
+
+/*
+ * This is really the RDMAC CLOSING state. It is most similar to the
+ * IB SQD QP state.
+ */
+static int iwcm_modify_qp_sqd(struct ib_qp *qp)
+{
+	struct ib_qp_attr qp_attr;
+
+	BUG_ON(qp == NULL);
+	qp_attr.qp_state = IB_QPS_SQD;
+	return ib_modify_qp(qp, &qp_attr, IB_QP_STATE);
+}
+
+/*
+ * CM_ID <-- CLOSING
+ *
+ * Block if a passive or active connection is currenlty being processed. Then
+ * process the event as follows:
+ * - If we are ESTABLISHED, move to CLOSING and modify the QP state
+ *   based on the abrupt flag
+ * - If the connection is already in the CLOSING or IDLE state, the peer is
+ *   disconnecting concurrently with us and we've already seen the
+ *   DISCONNECT event -- ignore the request and return 0
+ * - Disconnect on a listening endpoint returns -EINVAL
+ */
+int iw_cm_disconnect(struct iw_cm_id *cm_id, int abrupt)
+{
+	struct iwcm_id_private *cm_id_priv;
+	unsigned long flags;
+	int ret = 0;
+	struct ib_qp *qp = NULL;
+
+	cm_id_priv = container_of(cm_id, struct iwcm_id_private, id);
+	/* Wait if we're currently in a connect or accept downcall */
+	wait_event(cm_id_priv->connect_wait,
+		   !test_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags));
+
+	spin_lock_irqsave(&cm_id_priv->lock, flags);
+	switch (cm_id_priv->state) {
+	case IW_CM_STATE_ESTABLISHED:
+		cm_id_priv->state = IW_CM_STATE_CLOSING;
+
+		/* QP could be <nul> for user-mode client */
+		if (cm_id_priv->qp)
+			qp = cm_id_priv->qp;
+		else
+			ret = -EINVAL;
+		break;
+	case IW_CM_STATE_LISTEN:
+		ret = -EINVAL;
+		break;
+	case IW_CM_STATE_CLOSING:
+		/* remote peer closed first */
+	case IW_CM_STATE_IDLE:
+		/* accept or connect returned !0 */
+		break;
+	case IW_CM_STATE_CONN_RECV:
+		/*
+		 * App called disconnect before/without calling accept after
+		 * connect_request event delivered.
+		 */
+		break;
+	case IW_CM_STATE_CONN_SENT:
+		/* Can only get here if wait above fails */
+	default:
+		BUG();
+	}
+	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+
+	if (qp) {
+		if (abrupt)
+			ret = iwcm_modify_qp_err(qp);
+		else
+			ret = iwcm_modify_qp_sqd(qp);
+
+		/*
+		 * If both sides are disconnecting the QP could
+		 * already be in ERR or SQD states
+		 */
+		ret = 0;
+	}
+
+	return ret;
+}
+EXPORT_SYMBOL(iw_cm_disconnect);
+
+/*
+ * CM_ID <-- DESTROYING
+ *
+ * Clean up all resources associated with the connection and release
+ * the initial reference taken by iw_create_cm_id.
+ */
+static void destroy_cm_id(struct iw_cm_id *cm_id)
+{
+	struct iwcm_id_private *cm_id_priv;
+	unsigned long flags;
+	int ret;
+
+	cm_id_priv = container_of(cm_id, struct iwcm_id_private, id);
+	/*
+	 * Wait if we're currently in a connect or accept downcall. A
+	 * listening endpoint should never block here.
+	 */
+	wait_event(cm_id_priv->connect_wait,
+		   !test_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags));
+
+	spin_lock_irqsave(&cm_id_priv->lock, flags);
+	switch (cm_id_priv->state) {
+	case IW_CM_STATE_LISTEN:
+		cm_id_priv->state = IW_CM_STATE_DESTROYING;
+		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+		/* destroy the listening endpoint */
+		ret = cm_id->device->iwcm->destroy_listen(cm_id);
+		spin_lock_irqsave(&cm_id_priv->lock, flags);
+		break;
+	case IW_CM_STATE_ESTABLISHED:
+		cm_id_priv->state = IW_CM_STATE_DESTROYING;
+		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+		/* Abrupt close of the connection */
+		(void)iwcm_modify_qp_err(cm_id_priv->qp);
+		spin_lock_irqsave(&cm_id_priv->lock, flags);
+		break;
+	case IW_CM_STATE_IDLE:
+	case IW_CM_STATE_CLOSING:
+		cm_id_priv->state = IW_CM_STATE_DESTROYING;
+		break;
+	case IW_CM_STATE_CONN_RECV:
+		/*
+		 * App called destroy before/without calling accept after
+		 * receiving connection request event notification.
+		 */
+		cm_id_priv->state = IW_CM_STATE_DESTROYING;
+		break;
+	case IW_CM_STATE_CONN_SENT:
+	case IW_CM_STATE_DESTROYING:
+	default:
+		BUG();
+		break;
+	}
+	if (cm_id_priv->qp) {
+		cm_id_priv->id.device->iwcm->rem_ref(cm_id_priv->qp);
+		cm_id_priv->qp = NULL;
+	}
+	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+
+	(void)iwcm_deref_id(cm_id_priv);
+}
+
+/*
+ * This function is only called by the application thread and cannot
+ * be called by the event thread. The function will wait for all
+ * references to be released on the cm_id and then kfree the cm_id
+ * object.
+ */
+void iw_destroy_cm_id(struct iw_cm_id *cm_id)
+{
+	struct iwcm_id_private *cm_id_priv;
+
+	cm_id_priv = container_of(cm_id, struct iwcm_id_private, id);
+	BUG_ON(test_bit(IWCM_F_CALLBACK_DESTROY, &cm_id_priv->flags));
+
+	destroy_cm_id(cm_id);
+
+	wait_for_completion(&cm_id_priv->destroy_comp);
+
+	dealloc_work_entries(cm_id_priv);
+
+	kfree(cm_id_priv);
+}
+EXPORT_SYMBOL(iw_destroy_cm_id);
+
+/*
+ * CM_ID <-- LISTEN
+ *
+ * Start listening for connect requests. Generates one CONNECT_REQUEST
+ * event for each inbound connect request.
+ */
+int iw_cm_listen(struct iw_cm_id *cm_id, int backlog)
+{
+	struct iwcm_id_private *cm_id_priv;
+	unsigned long flags;
+	int ret = 0;
+
+	cm_id_priv = container_of(cm_id, struct iwcm_id_private, id);
+
+	ret = alloc_work_entries(cm_id_priv, backlog);
+	if (ret)
+		return ret;
+
+	spin_lock_irqsave(&cm_id_priv->lock, flags);
+	switch (cm_id_priv->state) {
+	case IW_CM_STATE_IDLE:
+		cm_id_priv->state = IW_CM_STATE_LISTEN;
+		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+		ret = cm_id->device->iwcm->create_listen(cm_id, backlog);
+		if (ret)
+			cm_id_priv->state = IW_CM_STATE_IDLE;
+		spin_lock_irqsave(&cm_id_priv->lock, flags);
+		break;
+	default:
+		ret = -EINVAL;
+	}
+	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+
+	return ret;
+}
+EXPORT_SYMBOL(iw_cm_listen);
+
+/*
+ * CM_ID <-- IDLE
+ *
+ * Rejects an inbound connection request. No events are generated.
+ */
+int iw_cm_reject(struct iw_cm_id *cm_id,
+		 const void *private_data,
+		 u8 private_data_len)
+{
+	struct iwcm_id_private *cm_id_priv;
+	unsigned long flags;
+	int ret;
+
+	cm_id_priv = container_of(cm_id, struct iwcm_id_private, id);
+	set_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags);
+
+	spin_lock_irqsave(&cm_id_priv->lock, flags);
+	if (cm_id_priv->state != IW_CM_STATE_CONN_RECV) {
+		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+		clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags);
+		wake_up_all(&cm_id_priv->connect_wait);
+		return -EINVAL;
+	}
+	cm_id_priv->state = IW_CM_STATE_IDLE;
+	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+
+	ret = cm_id->device->iwcm->reject(cm_id, private_data,
+					  private_data_len);
+
+	clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags);
+	wake_up_all(&cm_id_priv->connect_wait);
+
+	return ret;
+}
+EXPORT_SYMBOL(iw_cm_reject);
+
+/*
+ * CM_ID <-- ESTABLISHED
+ *
+ * Accepts an inbound connection request and generates an ESTABLISHED
+ * event. Callers of iw_cm_disconnect and iw_destroy_cm_id will block
+ * until the ESTABLISHED event is received from the provider.
+ */
+int iw_cm_accept(struct iw_cm_id *cm_id,
+		 struct iw_cm_conn_param *iw_param)
+{
+	struct iwcm_id_private *cm_id_priv;
+	struct ib_qp *qp;
+	unsigned long flags;
+	int ret;
+
+	cm_id_priv = container_of(cm_id, struct iwcm_id_private, id);
+	set_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags);
+
+	spin_lock_irqsave(&cm_id_priv->lock, flags);
+	if (cm_id_priv->state != IW_CM_STATE_CONN_RECV) {
+		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+		clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags);
+		wake_up_all(&cm_id_priv->connect_wait);
+		return -EINVAL;
+	}
+	/* Get the ib_qp given the QPN */
+	qp = cm_id->device->iwcm->get_qp(cm_id->device, iw_param->qpn);
+	if (!qp) {
+		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+		return -EINVAL;
+	}
+	cm_id->device->iwcm->add_ref(qp);
+	cm_id_priv->qp = qp;
+	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+
+	ret = cm_id->device->iwcm->accept(cm_id, iw_param);
+	if (ret) {
+		/* An error on accept precludes provider events */
+		BUG_ON(cm_id_priv->state != IW_CM_STATE_CONN_RECV);
+		cm_id_priv->state = IW_CM_STATE_IDLE;
+		spin_lock_irqsave(&cm_id_priv->lock, flags);
+		if (cm_id_priv->qp) {
+			cm_id->device->iwcm->rem_ref(qp);
+			cm_id_priv->qp = NULL;
+		}
+		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+		clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags);
+		wake_up_all(&cm_id_priv->connect_wait);
+	}
+
+	return ret;
+}
+EXPORT_SYMBOL(iw_cm_accept);
+
+/*
+ * Active Side: CM_ID <-- CONN_SENT
+ *
+ * If successful, results in the generation of a CONNECT_REPLY
+ * event. iw_cm_disconnect and iw_cm_destroy will block until the
+ * CONNECT_REPLY event is received from the provider.
+ */
+int iw_cm_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *iw_param)
+{
+	struct iwcm_id_private *cm_id_priv;
+	int ret = 0;
+	unsigned long flags;
+	struct ib_qp *qp;
+
+	cm_id_priv = container_of(cm_id, struct iwcm_id_private, id);
+
+	ret = alloc_work_entries(cm_id_priv, 4);
+	if (ret)
+		return ret;
+
+	set_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags);
+	spin_lock_irqsave(&cm_id_priv->lock, flags);
+
+	if (cm_id_priv->state != IW_CM_STATE_IDLE) {
+		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+		clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags);
+		wake_up_all(&cm_id_priv->connect_wait);
+		return -EINVAL;
+	}
+
+	/* Get the ib_qp given the QPN */
+	qp = cm_id->device->iwcm->get_qp(cm_id->device, iw_param->qpn);
+	if (!qp) {
+		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+		return -EINVAL;
+	}
+	cm_id->device->iwcm->add_ref(qp);
+	cm_id_priv->qp = qp;
+	cm_id_priv->state = IW_CM_STATE_CONN_SENT;
+	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+
+	ret = cm_id->device->iwcm->connect(cm_id, iw_param);
+	if (ret) {
+		spin_lock_irqsave(&cm_id_priv->lock, flags);
+		if (cm_id_priv->qp) {
+			cm_id->device->iwcm->rem_ref(qp);
+			cm_id_priv->qp = NULL;
+		}
+		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+		BUG_ON(cm_id_priv->state != IW_CM_STATE_CONN_SENT);
+		cm_id_priv->state = IW_CM_STATE_IDLE;
+		clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags);
+		wake_up_all(&cm_id_priv->connect_wait);
+	}
+
+	return ret;
+}
+EXPORT_SYMBOL(iw_cm_connect);
+
+/*
+ * Passive Side: new CM_ID <-- CONN_RECV
+ *
+ * Handles an inbound connect request. The function creates a new
+ * iw_cm_id to represent the new connection and inherits the client
+ * callback function and other attributes from the listening parent.
+ *
+ * The work item contains a pointer to the listen_cm_id and the event. The
+ * listen_cm_id contains the client cm_handler, context and
+ * device. These are copied when the device is cloned. The event
+ * contains the new four tuple.
+ *
+ * An error on the child should not affect the parent, so this
+ * function does not return a value.
+ */
+static void cm_conn_req_handler(struct iwcm_id_private *listen_id_priv,
+				struct iw_cm_event *iw_event)
+{
+	unsigned long flags;
+	struct iw_cm_id *cm_id;
+	struct iwcm_id_private *cm_id_priv;
+	int ret;
+
+	/*
+	 * The provider should never generate a connection request
+	 * event with a bad status.
+	 */
+	BUG_ON(iw_event->status);
+
+	/*
+	 * We could be destroying the listening id. If so, ignore this
+	 * upcall.
+	 */
+	spin_lock_irqsave(&listen_id_priv->lock, flags);
+	if (listen_id_priv->state != IW_CM_STATE_LISTEN) {
+		spin_unlock_irqrestore(&listen_id_priv->lock, flags);
+		return;
+	}
+	spin_unlock_irqrestore(&listen_id_priv->lock, flags);
+
+	cm_id = iw_create_cm_id(listen_id_priv->id.device,
+				listen_id_priv->id.cm_handler,
+				listen_id_priv->id.context);
+	/* If the cm_id could not be created, ignore the request */
+	if (IS_ERR(cm_id))
+		return;
+
+	cm_id->provider_data = iw_event->provider_data;
+	cm_id->local_addr = iw_event->local_addr;
+	cm_id->remote_addr = iw_event->remote_addr;
+
+	cm_id_priv = container_of(cm_id, struct iwcm_id_private, id);
+	cm_id_priv->state = IW_CM_STATE_CONN_RECV;
+
+	ret = alloc_work_entries(cm_id_priv, 3);
+	if (ret) {
+		iw_cm_reject(cm_id, NULL, 0);
+		iw_destroy_cm_id(cm_id);
+		return;
+	}
+
+	/* Call the client CM handler */
+	ret = cm_id->cm_handler(cm_id, iw_event);
+	if (ret) {
+		set_bit(IWCM_F_CALLBACK_DESTROY, &cm_id_priv->flags);
+		destroy_cm_id(cm_id);
+		if (atomic_read(&cm_id_priv->refcount)==0)
+			kfree(cm_id);
+	}
+
+	if (iw_event->private_data_len)
+		kfree(iw_event->private_data);
+}
+
+/*
+ * Passive Side: CM_ID <-- ESTABLISHED
+ *
+ * The provider generated an ESTABLISHED event which means that
+ * the MPA negotion has completed successfully and we are now in MPA
+ * FPDU mode.
+ *
+ * This event can only be received in the CONN_RECV state. If the
+ * remote peer closed, the ESTABLISHED event would be received followed
+ * by the CLOSE event. If the app closes, it will block until we wake
+ * it up after processing this event.
+ */
+static int cm_conn_est_handler(struct iwcm_id_private *cm_id_priv,
+			       struct iw_cm_event *iw_event)
+{
+	unsigned long flags;
+	int ret = 0;
+
+	spin_lock_irqsave(&cm_id_priv->lock, flags);
+
+	/*
+	 * We clear the CONNECT_WAIT bit here to allow the callback
+	 * function to call iw_cm_disconnect. Calling iw_destroy_cm_id
+	 * from a callback handler is not allowed.
+	 */
+	clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags);
+	BUG_ON(cm_id_priv->state != IW_CM_STATE_CONN_RECV);
+	cm_id_priv->state = IW_CM_STATE_ESTABLISHED;
+	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+	ret = cm_id_priv->id.cm_handler(&cm_id_priv->id, iw_event);
+	wake_up_all(&cm_id_priv->connect_wait);
+
+	return ret;
+}
+
+/*
+ * Active Side: CM_ID <-- ESTABLISHED
+ *
+ * The app has called connect and is waiting for the established event to
+ * post it's requests to the server. This event will wake up anyone
+ * blocked in iw_cm_disconnect or iw_destroy_id.
+ */
+static int cm_conn_rep_handler(struct iwcm_id_private *cm_id_priv,
+			       struct iw_cm_event *iw_event)
+{
+	unsigned long flags;
+	int ret = 0;
+
+	spin_lock_irqsave(&cm_id_priv->lock, flags);
+	/*
+	 * Clear the connect wait bit so a callback function calling
+	 * iw_cm_disconnect will not wait and deadlock this thread
+	 */
+	clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags);
+	BUG_ON(cm_id_priv->state != IW_CM_STATE_CONN_SENT);
+	if (iw_event->status == IW_CM_EVENT_STATUS_ACCEPTED) {
+		cm_id_priv->id.local_addr = iw_event->local_addr;
+		cm_id_priv->id.remote_addr = iw_event->remote_addr;
+		cm_id_priv->state = IW_CM_STATE_ESTABLISHED;
+	} else {
+		/* REJECTED or RESET */
+		cm_id_priv->id.device->iwcm->rem_ref(cm_id_priv->qp);
+		cm_id_priv->qp = NULL;
+		cm_id_priv->state = IW_CM_STATE_IDLE;
+	}
+	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+	ret = cm_id_priv->id.cm_handler(&cm_id_priv->id, iw_event);
+
+	if (iw_event->private_data_len)
+		kfree(iw_event->private_data);
+
+	/* Wake up waiters on connect complete */
+	wake_up_all(&cm_id_priv->connect_wait);
+
+	return ret;
+}
+
+/*
+ * CM_ID <-- CLOSING
+ *
+ * If in the ESTABLISHED state, move to CLOSING.
+ */
+static void cm_disconnect_handler(struct iwcm_id_private *cm_id_priv,
+				  struct iw_cm_event *iw_event)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&cm_id_priv->lock, flags);
+	if (cm_id_priv->state == IW_CM_STATE_ESTABLISHED)
+		cm_id_priv->state = IW_CM_STATE_CLOSING;
+	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+}
+
+/*
+ * CM_ID <-- IDLE
+ *
+ * If in the ESTBLISHED or CLOSING states, the QP will have have been
+ * moved by the provider to the ERR state. Disassociate the CM_ID from
+ * the QP,  move to IDLE, and remove the 'connected' reference.
+ *
+ * If in some other state, the cm_id was destroyed asynchronously.
+ * This is the last reference that will result in waking up
+ * the app thread blocked in iw_destroy_cm_id.
+ */
+static int cm_close_handler(struct iwcm_id_private *cm_id_priv,
+				  struct iw_cm_event *iw_event)
+{
+	unsigned long flags;
+	int ret = 0;
+	spin_lock_irqsave(&cm_id_priv->lock, flags);
+
+	if (cm_id_priv->qp) {
+		cm_id_priv->id.device->iwcm->rem_ref(cm_id_priv->qp);
+		cm_id_priv->qp = NULL;
+	}
+	switch (cm_id_priv->state) {
+	case IW_CM_STATE_ESTABLISHED:
+	case IW_CM_STATE_CLOSING:
+		cm_id_priv->state = IW_CM_STATE_IDLE;
+		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+		ret = cm_id_priv->id.cm_handler(&cm_id_priv->id, iw_event);
+		spin_lock_irqsave(&cm_id_priv->lock, flags);
+		break;
+	case IW_CM_STATE_DESTROYING:
+		break;
+	default:
+		BUG();
+	}
+	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+
+	return ret;
+}
+
+static int process_event(struct iwcm_id_private *cm_id_priv,
+			 struct iw_cm_event *iw_event)
+{
+	int ret = 0;
+
+	switch (iw_event->event) {
+	case IW_CM_EVENT_CONNECT_REQUEST:
+		cm_conn_req_handler(cm_id_priv, iw_event);
+		break;
+	case IW_CM_EVENT_CONNECT_REPLY:
+		ret = cm_conn_rep_handler(cm_id_priv, iw_event);
+		break;
+	case IW_CM_EVENT_ESTABLISHED:
+		ret = cm_conn_est_handler(cm_id_priv, iw_event);
+		break;
+	case IW_CM_EVENT_DISCONNECT:
+		cm_disconnect_handler(cm_id_priv, iw_event);
+		break;
+	case IW_CM_EVENT_CLOSE:
+		ret = cm_close_handler(cm_id_priv, iw_event);
+		break;
+	default:
+		BUG();
+	}
+
+	return ret;
+}
+
+/*
+ * Process events on the work_list for the cm_id. If the callback
+ * function requests that the cm_id be deleted, a flag is set in the
+ * cm_id flags to indicate that when the last reference is
+ * removed, the cm_id is to be destroyed. This is necessary to
+ * distinguish between an object that will be destroyed by the app
+ * thread asleep on the destroy_comp list vs. an object destroyed
+ * here synchronously when the last reference is removed.
+ */
+static void cm_work_handler(void *arg)
+{
+	struct iwcm_work *work = arg, lwork;
+	struct iwcm_id_private *cm_id_priv = work->cm_id;
+	unsigned long flags;
+	int empty;
+	int ret = 0;
+
+	spin_lock_irqsave(&cm_id_priv->lock, flags);
+	empty = list_empty(&cm_id_priv->work_list);
+	while (!empty) {
+		work = list_entry(cm_id_priv->work_list.next,
+				  struct iwcm_work, list);
+		list_del_init(&work->list);
+		empty = list_empty(&cm_id_priv->work_list);
+		lwork = *work;
+		put_work(work);
+		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+
+		ret = process_event(cm_id_priv, &work->event);
+		if (ret) {
+			set_bit(IWCM_F_CALLBACK_DESTROY, &cm_id_priv->flags);
+			destroy_cm_id(&cm_id_priv->id);
+		}
+		BUG_ON(atomic_read(&cm_id_priv->refcount)==0);
+		if (iwcm_deref_id(cm_id_priv))
+			return;
+
+		if (atomic_read(&cm_id_priv->refcount)==0 &&
+		    test_bit(IWCM_F_CALLBACK_DESTROY, &cm_id_priv->flags)) {
+			dealloc_work_entries(cm_id_priv);
+			kfree(cm_id_priv);
+			return;
+		}
+		spin_lock_irqsave(&cm_id_priv->lock, flags);
+	}
+	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+}
+
+/*
+ * This function is called on interrupt context. Schedule events on
+ * the iwcm_wq thread to allow callback functions to downcall into
+ * the CM and/or block.  Events are queued to a per-CM_ID
+ * work_list. If this is the first event on the work_list, the work
+ * element is also queued on the iwcm_wq thread.
+ *
+ * Each event holds a reference on the cm_id. Until the last posted
+ * event has been delivered and processed, the cm_id cannot be
+ * deleted.
+ *
+ * Returns:
+ * 	      0	- the event was handled.
+ *	-ENOMEM	- the event was not handled due to lack of resources.
+ */
+static int cm_event_handler(struct iw_cm_id *cm_id,
+			     struct iw_cm_event *iw_event)
+{
+	struct iwcm_work *work;
+	struct iwcm_id_private *cm_id_priv;
+	unsigned long flags;
+	int ret = 0;
+
+	cm_id_priv = container_of(cm_id, struct iwcm_id_private, id);
+
+	spin_lock_irqsave(&cm_id_priv->lock, flags);
+	work = get_work(cm_id_priv);
+	if (!work) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	INIT_WORK(&work->work, cm_work_handler, work);
+	work->cm_id = cm_id_priv;
+	work->event = *iw_event;
+
+	if ((work->event.event == IW_CM_EVENT_CONNECT_REQUEST ||
+	     work->event.event == IW_CM_EVENT_CONNECT_REPLY) &&
+	    work->event.private_data_len) {
+		ret = copy_private_data(cm_id_priv, &work->event);
+		if (ret) {
+			put_work(work);
+			goto out;
+		}
+	}
+
+	atomic_inc(&cm_id_priv->refcount);
+	if (list_empty(&cm_id_priv->work_list)) {
+		list_add_tail(&work->list, &cm_id_priv->work_list);
+		queue_work(iwcm_wq, &work->work);
+	} else
+		list_add_tail(&work->list, &cm_id_priv->work_list);
+out:
+	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+	return ret;
+}
+
+static int iwcm_init_qp_init_attr(struct iwcm_id_private *cm_id_priv,
+				  struct ib_qp_attr *qp_attr,
+				  int *qp_attr_mask)
+{
+	unsigned long flags;
+	int ret;
+
+	spin_lock_irqsave(&cm_id_priv->lock, flags);
+	switch (cm_id_priv->state) {
+	case IW_CM_STATE_IDLE:
+	case IW_CM_STATE_CONN_SENT:
+	case IW_CM_STATE_CONN_RECV:
+	case IW_CM_STATE_ESTABLISHED:
+		*qp_attr_mask = IB_QP_STATE | IB_QP_ACCESS_FLAGS;
+		qp_attr->qp_access_flags = IB_ACCESS_LOCAL_WRITE |
+					   IB_ACCESS_REMOTE_WRITE|
+					   IB_ACCESS_REMOTE_READ;
+		ret = 0;
+		break;
+	default:
+		ret = -EINVAL;
+		break;
+	}
+	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+	return ret;
+}
+
+static int iwcm_init_qp_rts_attr(struct iwcm_id_private *cm_id_priv,
+				  struct ib_qp_attr *qp_attr,
+				  int *qp_attr_mask)
+{
+	unsigned long flags;
+	int ret;
+
+	spin_lock_irqsave(&cm_id_priv->lock, flags);
+	switch (cm_id_priv->state) {
+	case IW_CM_STATE_IDLE:
+	case IW_CM_STATE_CONN_SENT:
+	case IW_CM_STATE_CONN_RECV:
+	case IW_CM_STATE_ESTABLISHED:
+		*qp_attr_mask = 0;
+		ret = 0;
+		break;
+	default:
+		ret = -EINVAL;
+		break;
+	}
+	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+	return ret;
+}
+
+int iw_cm_init_qp_attr(struct iw_cm_id *cm_id,
+		       struct ib_qp_attr *qp_attr,
+		       int *qp_attr_mask)
+{
+	struct iwcm_id_private *cm_id_priv;
+	int ret;
+
+	cm_id_priv = container_of(cm_id, struct iwcm_id_private, id);
+	switch (qp_attr->qp_state) {
+	case IB_QPS_INIT:
+	case IB_QPS_RTR:
+		ret = iwcm_init_qp_init_attr(cm_id_priv,
+					     qp_attr, qp_attr_mask);
+		break;
+	case IB_QPS_RTS:
+		ret = iwcm_init_qp_rts_attr(cm_id_priv,
+					    qp_attr, qp_attr_mask);
+		break;
+	default:
+		ret = -EINVAL;
+		break;
+	}
+	return ret;
+}
+EXPORT_SYMBOL(iw_cm_init_qp_attr);
+
+static int __init iw_cm_init(void)
+{
+	iwcm_wq = create_singlethread_workqueue("iw_cm_wq");
+	if (!iwcm_wq)
+		return -ENOMEM;
+
+	return 0;
+}
+
+static void __exit iw_cm_cleanup(void)
+{
+	destroy_workqueue(iwcm_wq);
+}
+
+module_init(iw_cm_init);
+module_exit(iw_cm_cleanup);
diff --git a/drivers/infiniband/core/iwcm.h b/drivers/infiniband/core/iwcm.h
new file mode 100644
index 00000000000..3f6cc82564c
--- /dev/null
+++ b/drivers/infiniband/core/iwcm.h
@@ -0,0 +1,62 @@
+/*
+ * Copyright (c) 2005 Network Appliance, Inc. All rights reserved.
+ * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef IWCM_H
+#define IWCM_H
+
+enum iw_cm_state {
+	IW_CM_STATE_IDLE,             /* unbound, inactive */
+	IW_CM_STATE_LISTEN,           /* listen waiting for connect */
+	IW_CM_STATE_CONN_RECV,        /* inbound waiting for user accept */
+	IW_CM_STATE_CONN_SENT,        /* outbound waiting for peer accept */
+	IW_CM_STATE_ESTABLISHED,      /* established */
+	IW_CM_STATE_CLOSING,	      /* disconnect */
+	IW_CM_STATE_DESTROYING        /* object being deleted */
+};
+
+struct iwcm_id_private {
+	struct iw_cm_id	id;
+	enum iw_cm_state state;
+	unsigned long flags;
+	struct ib_qp *qp;
+	struct completion destroy_comp;
+	wait_queue_head_t connect_wait;
+	struct list_head work_list;
+	spinlock_t lock;
+	atomic_t refcount;
+	struct list_head work_free_list;
+};
+
+#define IWCM_F_CALLBACK_DESTROY   1
+#define IWCM_F_CONNECT_WAIT       2
+
+#endif /* IWCM_H */
diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c
index 1c3cfbbe6a9..082f03c158f 100644
--- a/drivers/infiniband/core/mad.c
+++ b/drivers/infiniband/core/mad.c
@@ -1246,8 +1246,8 @@ static int find_vendor_oui(struct ib_mad_mgmt_vendor_class *vendor_class,
 	int i;
 
 	for (i = 0; i < MAX_MGMT_OUI; i++)
-                /* Is there matching OUI for this vendor class ? */
-                if (!memcmp(vendor_class->oui[i], oui, 3))
+		/* Is there matching OUI for this vendor class ? */
+		if (!memcmp(vendor_class->oui[i], oui, 3))
 			return i;
 
 	return -1;
@@ -2237,7 +2237,7 @@ static void cancel_mads(struct ib_mad_agent_private *mad_agent_priv)
 	list_for_each_entry_safe(mad_send_wr, temp_mad_send_wr,
 				 &mad_agent_priv->send_list, agent_list) {
 		if (mad_send_wr->status == IB_WC_SUCCESS) {
- 			mad_send_wr->status = IB_WC_WR_FLUSH_ERR;
+			mad_send_wr->status = IB_WC_WR_FLUSH_ERR;
 			mad_send_wr->refcount -= (mad_send_wr->timeout > 0);
 		}
 	}
@@ -2528,10 +2528,10 @@ static int ib_mad_post_receive_mads(struct ib_mad_qp_info *qp_info,
 			}
 		}
 		sg_list.addr = dma_map_single(qp_info->port_priv->
-					      	device->dma_device,
+					        device->dma_device,
 					      &mad_priv->grh,
 					      sizeof *mad_priv -
-					      	sizeof mad_priv->header,
+					        sizeof mad_priv->header,
 					      DMA_FROM_DEVICE);
 		pci_unmap_addr_set(&mad_priv->header, mapping, sg_list.addr);
 		recv_wr.wr_id = (unsigned long)&mad_priv->header.mad_list;
@@ -2606,7 +2606,7 @@ static int ib_mad_port_start(struct ib_mad_port_private *port_priv)
 	struct ib_qp *qp;
 
 	attr = kmalloc(sizeof *attr, GFP_KERNEL);
- 	if (!attr) {
+	if (!attr) {
 		printk(KERN_ERR PFX "Couldn't kmalloc ib_qp_attr\n");
 		return -ENOMEM;
 	}
@@ -2876,7 +2876,10 @@ static void ib_mad_init_device(struct ib_device *device)
 {
 	int start, end, i;
 
-	if (device->node_type == IB_NODE_SWITCH) {
+	if (rdma_node_get_transport(device->node_type) != RDMA_TRANSPORT_IB)
+		return;
+
+	if (device->node_type == RDMA_NODE_IB_SWITCH) {
 		start = 0;
 		end   = 0;
 	} else {
@@ -2923,7 +2926,7 @@ static void ib_mad_remove_device(struct ib_device *device)
 {
 	int i, num_ports, cur_port;
 
-	if (device->node_type == IB_NODE_SWITCH) {
+	if (device->node_type == RDMA_NODE_IB_SWITCH) {
 		num_ports = 1;
 		cur_port = 0;
 	} else {
diff --git a/drivers/infiniband/core/mad_priv.h b/drivers/infiniband/core/mad_priv.h
index d147f3bad2c..1da9adbccae 100644
--- a/drivers/infiniband/core/mad_priv.h
+++ b/drivers/infiniband/core/mad_priv.h
@@ -39,7 +39,6 @@
 
 #include <linux/completion.h>
 #include <linux/pci.h>
-#include <linux/kthread.h>
 #include <linux/workqueue.h>
 #include <rdma/ib_mad.h>
 #include <rdma/ib_smi.h>
diff --git a/drivers/infiniband/core/mad_rmpp.c b/drivers/infiniband/core/mad_rmpp.c
index ebcd5b18177..1ef79d015a1 100644
--- a/drivers/infiniband/core/mad_rmpp.c
+++ b/drivers/infiniband/core/mad_rmpp.c
@@ -33,8 +33,6 @@
  * $Id: mad_rmpp.c 1921 2005-03-02 22:58:44Z sean.hefty $
  */
 
-#include <linux/dma-mapping.h>
-
 #include "mad_priv.h"
 #include "mad_rmpp.h"
 
@@ -60,6 +58,7 @@ struct mad_rmpp_recv {
 	int last_ack;
 	int seg_num;
 	int newwin;
+	int repwin;
 
 	__be64 tid;
 	u32 src_qp;
@@ -170,6 +169,32 @@ static struct ib_mad_send_buf *alloc_response_msg(struct ib_mad_agent *agent,
 	return msg;
 }
 
+static void ack_ds_ack(struct ib_mad_agent_private *agent,
+		       struct ib_mad_recv_wc *recv_wc)
+{
+	struct ib_mad_send_buf *msg;
+	struct ib_rmpp_mad *rmpp_mad;
+	int ret;
+
+	msg = alloc_response_msg(&agent->agent, recv_wc);
+	if (IS_ERR(msg))
+		return;
+
+	rmpp_mad = msg->mad;
+	memcpy(rmpp_mad, recv_wc->recv_buf.mad, msg->hdr_len);
+
+	rmpp_mad->mad_hdr.method ^= IB_MGMT_METHOD_RESP;
+	ib_set_rmpp_flags(&rmpp_mad->rmpp_hdr, IB_MGMT_RMPP_FLAG_ACTIVE);
+	rmpp_mad->rmpp_hdr.seg_num = 0;
+	rmpp_mad->rmpp_hdr.paylen_newwin = cpu_to_be32(1);
+
+	ret = ib_post_send_mad(msg, NULL);
+	if (ret) {
+		ib_destroy_ah(msg->ah);
+		ib_free_send_mad(msg);
+	}
+}
+
 void ib_rmpp_send_handler(struct ib_mad_send_wc *mad_send_wc)
 {
 	struct ib_rmpp_mad *rmpp_mad = mad_send_wc->send_buf->mad;
@@ -271,6 +296,7 @@ create_rmpp_recv(struct ib_mad_agent_private *agent,
 	rmpp_recv->newwin = 1;
 	rmpp_recv->seg_num = 1;
 	rmpp_recv->last_ack = 0;
+	rmpp_recv->repwin = 1;
 
 	mad_hdr = &mad_recv_wc->recv_buf.mad->mad_hdr;
 	rmpp_recv->tid = mad_hdr->tid;
@@ -365,7 +391,7 @@ static inline int window_size(struct ib_mad_agent_private *agent)
 static struct ib_mad_recv_buf * find_seg_location(struct list_head *rmpp_list,
 						  int seg_num)
 {
-        struct ib_mad_recv_buf *seg_buf;
+	struct ib_mad_recv_buf *seg_buf;
 	int cur_seg_num;
 
 	list_for_each_entry_reverse(seg_buf, rmpp_list, list) {
@@ -591,6 +617,16 @@ static inline void adjust_last_ack(struct ib_mad_send_wr_private *wr,
 			break;
 }
 
+static void process_ds_ack(struct ib_mad_agent_private *agent,
+			   struct ib_mad_recv_wc *mad_recv_wc, int newwin)
+{
+	struct mad_rmpp_recv *rmpp_recv;
+
+	rmpp_recv = find_rmpp_recv(agent, mad_recv_wc);
+	if (rmpp_recv && rmpp_recv->state == RMPP_STATE_COMPLETE)
+		rmpp_recv->repwin = newwin;
+}
+
 static void process_rmpp_ack(struct ib_mad_agent_private *agent,
 			     struct ib_mad_recv_wc *mad_recv_wc)
 {
@@ -616,8 +652,18 @@ static void process_rmpp_ack(struct ib_mad_agent_private *agent,
 
 	spin_lock_irqsave(&agent->lock, flags);
 	mad_send_wr = ib_find_send_mad(agent, mad_recv_wc);
-	if (!mad_send_wr)
-		goto out;	/* Unmatched ACK */
+	if (!mad_send_wr) {
+		if (!seg_num)
+			process_ds_ack(agent, mad_recv_wc, newwin);
+		goto out;	/* Unmatched or DS RMPP ACK */
+	}
+
+	if ((mad_send_wr->last_ack == mad_send_wr->send_buf.seg_count) &&
+	    (mad_send_wr->timeout)) {
+		spin_unlock_irqrestore(&agent->lock, flags);
+		ack_ds_ack(agent, mad_recv_wc);
+		return;		/* Repeated ACK for DS RMPP transaction */
+	}
 
 	if ((mad_send_wr->last_ack == mad_send_wr->send_buf.seg_count) ||
 	    (!mad_send_wr->timeout) || (mad_send_wr->status != IB_WC_SUCCESS))
@@ -656,6 +702,9 @@ static void process_rmpp_ack(struct ib_mad_agent_private *agent,
 		if (mad_send_wr->refcount == 1)
 			ib_reset_mad_timeout(mad_send_wr,
 					     mad_send_wr->send_buf.timeout_ms);
+		spin_unlock_irqrestore(&agent->lock, flags);
+		ack_ds_ack(agent, mad_recv_wc);
+		return;
 	} else if (mad_send_wr->refcount == 1 &&
 		   mad_send_wr->seg_num < mad_send_wr->newwin &&
 		   mad_send_wr->seg_num < mad_send_wr->send_buf.seg_count) {
@@ -772,6 +821,39 @@ out:
 	return NULL;
 }
 
+static int init_newwin(struct ib_mad_send_wr_private *mad_send_wr)
+{
+	struct ib_mad_agent_private *agent = mad_send_wr->mad_agent_priv;
+	struct ib_mad_hdr *mad_hdr = mad_send_wr->send_buf.mad;
+	struct mad_rmpp_recv *rmpp_recv;
+	struct ib_ah_attr ah_attr;
+	unsigned long flags;
+	int newwin = 1;
+
+	if (!(mad_hdr->method & IB_MGMT_METHOD_RESP))
+		goto out;
+
+	spin_lock_irqsave(&agent->lock, flags);
+	list_for_each_entry(rmpp_recv, &agent->rmpp_list, list) {
+		if (rmpp_recv->tid != mad_hdr->tid ||
+		    rmpp_recv->mgmt_class != mad_hdr->mgmt_class ||
+		    rmpp_recv->class_version != mad_hdr->class_version ||
+		    (rmpp_recv->method & IB_MGMT_METHOD_RESP))
+			continue;
+
+		if (ib_query_ah(mad_send_wr->send_buf.ah, &ah_attr))
+			continue;
+
+		if (rmpp_recv->slid == ah_attr.dlid) {
+			newwin = rmpp_recv->repwin;
+			break;
+		}
+	}
+	spin_unlock_irqrestore(&agent->lock, flags);
+out:
+	return newwin;
+}
+
 int ib_send_rmpp_mad(struct ib_mad_send_wr_private *mad_send_wr)
 {
 	struct ib_rmpp_mad *rmpp_mad;
@@ -787,7 +869,7 @@ int ib_send_rmpp_mad(struct ib_mad_send_wr_private *mad_send_wr)
 		return IB_RMPP_RESULT_INTERNAL;
 	}
 
-	mad_send_wr->newwin = 1;
+	mad_send_wr->newwin = init_newwin(mad_send_wr);
 
 	/* We need to wait for the final ACK even if there isn't a response */
 	mad_send_wr->refcount += (mad_send_wr->timeout == 0);
diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c
index d6b84226bba..1706d3c7e95 100644
--- a/drivers/infiniband/core/sa_query.c
+++ b/drivers/infiniband/core/sa_query.c
@@ -1,6 +1,7 @@
 /*
  * Copyright (c) 2004 Topspin Communications.  All rights reserved.
  * Copyright (c) 2005 Voltaire, Inc.  All rights reserved.
+ * Copyright (c) 2006 Intel Corporation.  All rights reserved.
  *
  * This software is available to you under a choice of one of two
  * licenses.  You may choose to be licensed under the terms of the GNU
@@ -75,6 +76,7 @@ struct ib_sa_device {
 struct ib_sa_query {
 	void (*callback)(struct ib_sa_query *, int, struct ib_sa_mad *);
 	void (*release)(struct ib_sa_query *);
+	struct ib_sa_client    *client;
 	struct ib_sa_port      *port;
 	struct ib_mad_send_buf *mad_buf;
 	struct ib_sa_sm_ah     *sm_ah;
@@ -415,6 +417,31 @@ static void ib_sa_event(struct ib_event_handler *handler, struct ib_event *event
 	}
 }
 
+void ib_sa_register_client(struct ib_sa_client *client)
+{
+	atomic_set(&client->users, 1);
+	init_completion(&client->comp);
+}
+EXPORT_SYMBOL(ib_sa_register_client);
+
+static inline void ib_sa_client_get(struct ib_sa_client *client)
+{
+	atomic_inc(&client->users);
+}
+
+static inline void ib_sa_client_put(struct ib_sa_client *client)
+{
+	if (atomic_dec_and_test(&client->users))
+		complete(&client->comp);
+}
+
+void ib_sa_unregister_client(struct ib_sa_client *client)
+{
+	ib_sa_client_put(client);
+	wait_for_completion(&client->comp);
+}
+EXPORT_SYMBOL(ib_sa_unregister_client);
+
 /**
  * ib_sa_cancel_query - try to cancel an SA query
  * @id:ID of query to cancel
@@ -557,6 +584,7 @@ static void ib_sa_path_rec_release(struct ib_sa_query *sa_query)
 
 /**
  * ib_sa_path_rec_get - Start a Path get query
+ * @client:SA client
  * @device:device to send query on
  * @port_num: port number to send query on
  * @rec:Path Record to send in query
@@ -579,7 +607,8 @@ static void ib_sa_path_rec_release(struct ib_sa_query *sa_query)
  * error code.  Otherwise it is a query ID that can be used to cancel
  * the query.
  */
-int ib_sa_path_rec_get(struct ib_device *device, u8 port_num,
+int ib_sa_path_rec_get(struct ib_sa_client *client,
+		       struct ib_device *device, u8 port_num,
 		       struct ib_sa_path_rec *rec,
 		       ib_sa_comp_mask comp_mask,
 		       int timeout_ms, gfp_t gfp_mask,
@@ -614,8 +643,10 @@ int ib_sa_path_rec_get(struct ib_device *device, u8 port_num,
 		goto err1;
 	}
 
-	query->callback = callback;
-	query->context  = context;
+	ib_sa_client_get(client);
+	query->sa_query.client = client;
+	query->callback        = callback;
+	query->context         = context;
 
 	mad = query->sa_query.mad_buf->mad;
 	init_mad(mad, agent);
@@ -639,6 +670,7 @@ int ib_sa_path_rec_get(struct ib_device *device, u8 port_num,
 
 err2:
 	*sa_query = NULL;
+	ib_sa_client_put(query->sa_query.client);
 	ib_free_send_mad(query->sa_query.mad_buf);
 
 err1:
@@ -671,6 +703,7 @@ static void ib_sa_service_rec_release(struct ib_sa_query *sa_query)
 
 /**
  * ib_sa_service_rec_query - Start Service Record operation
+ * @client:SA client
  * @device:device to send request on
  * @port_num: port number to send request on
  * @method:SA method - should be get, set, or delete
@@ -695,7 +728,8 @@ static void ib_sa_service_rec_release(struct ib_sa_query *sa_query)
  * error code.  Otherwise it is a request ID that can be used to cancel
  * the query.
  */
-int ib_sa_service_rec_query(struct ib_device *device, u8 port_num, u8 method,
+int ib_sa_service_rec_query(struct ib_sa_client *client,
+			    struct ib_device *device, u8 port_num, u8 method,
 			    struct ib_sa_service_rec *rec,
 			    ib_sa_comp_mask comp_mask,
 			    int timeout_ms, gfp_t gfp_mask,
@@ -735,8 +769,10 @@ int ib_sa_service_rec_query(struct ib_device *device, u8 port_num, u8 method,
 		goto err1;
 	}
 
-	query->callback = callback;
-	query->context  = context;
+	ib_sa_client_get(client);
+	query->sa_query.client = client;
+	query->callback        = callback;
+	query->context         = context;
 
 	mad = query->sa_query.mad_buf->mad;
 	init_mad(mad, agent);
@@ -761,6 +797,7 @@ int ib_sa_service_rec_query(struct ib_device *device, u8 port_num, u8 method,
 
 err2:
 	*sa_query = NULL;
+	ib_sa_client_put(query->sa_query.client);
 	ib_free_send_mad(query->sa_query.mad_buf);
 
 err1:
@@ -791,7 +828,8 @@ static void ib_sa_mcmember_rec_release(struct ib_sa_query *sa_query)
 	kfree(container_of(sa_query, struct ib_sa_mcmember_query, sa_query));
 }
 
-int ib_sa_mcmember_rec_query(struct ib_device *device, u8 port_num,
+int ib_sa_mcmember_rec_query(struct ib_sa_client *client,
+			     struct ib_device *device, u8 port_num,
 			     u8 method,
 			     struct ib_sa_mcmember_rec *rec,
 			     ib_sa_comp_mask comp_mask,
@@ -827,8 +865,10 @@ int ib_sa_mcmember_rec_query(struct ib_device *device, u8 port_num,
 		goto err1;
 	}
 
-	query->callback = callback;
-	query->context  = context;
+	ib_sa_client_get(client);
+	query->sa_query.client = client;
+	query->callback        = callback;
+	query->context         = context;
 
 	mad = query->sa_query.mad_buf->mad;
 	init_mad(mad, agent);
@@ -853,6 +893,7 @@ int ib_sa_mcmember_rec_query(struct ib_device *device, u8 port_num,
 
 err2:
 	*sa_query = NULL;
+	ib_sa_client_put(query->sa_query.client);
 	ib_free_send_mad(query->sa_query.mad_buf);
 
 err1:
@@ -887,8 +928,9 @@ static void send_handler(struct ib_mad_agent *agent,
 	idr_remove(&query_idr, query->id);
 	spin_unlock_irqrestore(&idr_lock, flags);
 
-        ib_free_send_mad(mad_send_wc->send_buf);
+	ib_free_send_mad(mad_send_wc->send_buf);
 	kref_put(&query->sm_ah->ref, free_sm_ah);
+	ib_sa_client_put(query->client);
 	query->release(query);
 }
 
@@ -919,7 +961,10 @@ static void ib_sa_add_one(struct ib_device *device)
 	struct ib_sa_device *sa_dev;
 	int s, e, i;
 
-	if (device->node_type == IB_NODE_SWITCH)
+	if (rdma_node_get_transport(device->node_type) != RDMA_TRANSPORT_IB)
+		return;
+
+	if (device->node_type == RDMA_NODE_IB_SWITCH)
 		s = e = 0;
 	else {
 		s = 1;
diff --git a/drivers/infiniband/core/smi.c b/drivers/infiniband/core/smi.c
index 35852e794e2..54b81e17ad5 100644
--- a/drivers/infiniband/core/smi.c
+++ b/drivers/infiniband/core/smi.c
@@ -64,7 +64,7 @@ int smi_handle_dr_smp_send(struct ib_smp *smp,
 
 		/* C14-9:2 */
 		if (hop_ptr && hop_ptr < hop_cnt) {
-			if (node_type != IB_NODE_SWITCH)
+			if (node_type != RDMA_NODE_IB_SWITCH)
 				return 0;
 
 			/* smp->return_path set when received */
@@ -77,7 +77,7 @@ int smi_handle_dr_smp_send(struct ib_smp *smp,
 		if (hop_ptr == hop_cnt) {
 			/* smp->return_path set when received */
 			smp->hop_ptr++;
-			return (node_type == IB_NODE_SWITCH ||
+			return (node_type == RDMA_NODE_IB_SWITCH ||
 				smp->dr_dlid == IB_LID_PERMISSIVE);
 		}
 
@@ -95,7 +95,7 @@ int smi_handle_dr_smp_send(struct ib_smp *smp,
 
 		/* C14-13:2 */
 		if (2 <= hop_ptr && hop_ptr <= hop_cnt) {
-			if (node_type != IB_NODE_SWITCH)
+			if (node_type != RDMA_NODE_IB_SWITCH)
 				return 0;
 
 			smp->hop_ptr--;
@@ -107,7 +107,7 @@ int smi_handle_dr_smp_send(struct ib_smp *smp,
 		if (hop_ptr == 1) {
 			smp->hop_ptr--;
 			/* C14-13:3 -- SMPs destined for SM shouldn't be here */
-			return (node_type == IB_NODE_SWITCH ||
+			return (node_type == RDMA_NODE_IB_SWITCH ||
 				smp->dr_slid == IB_LID_PERMISSIVE);
 		}
 
@@ -142,7 +142,7 @@ int smi_handle_dr_smp_recv(struct ib_smp *smp,
 
 		/* C14-9:2 -- intermediate hop */
 		if (hop_ptr && hop_ptr < hop_cnt) {
-			if (node_type != IB_NODE_SWITCH)
+			if (node_type != RDMA_NODE_IB_SWITCH)
 				return 0;
 
 			smp->return_path[hop_ptr] = port_num;
@@ -156,7 +156,7 @@ int smi_handle_dr_smp_recv(struct ib_smp *smp,
 				smp->return_path[hop_ptr] = port_num;
 			/* smp->hop_ptr updated when sending */
 
-			return (node_type == IB_NODE_SWITCH ||
+			return (node_type == RDMA_NODE_IB_SWITCH ||
 				smp->dr_dlid == IB_LID_PERMISSIVE);
 		}
 
@@ -175,7 +175,7 @@ int smi_handle_dr_smp_recv(struct ib_smp *smp,
 
 		/* C14-13:2 */
 		if (2 <= hop_ptr && hop_ptr <= hop_cnt) {
-			if (node_type != IB_NODE_SWITCH)
+			if (node_type != RDMA_NODE_IB_SWITCH)
 				return 0;
 
 			/* smp->hop_ptr updated when sending */
@@ -190,7 +190,7 @@ int smi_handle_dr_smp_recv(struct ib_smp *smp,
 				return 1;
 			}
 			/* smp->hop_ptr updated when sending */
-			return (node_type == IB_NODE_SWITCH);
+			return (node_type == RDMA_NODE_IB_SWITCH);
 		}
 
 		/* C14-13:4 -- hop_ptr = 0 -> give to SM */
diff --git a/drivers/infiniband/core/sysfs.c b/drivers/infiniband/core/sysfs.c
index 21f9282c1b2..709323c14c5 100644
--- a/drivers/infiniband/core/sysfs.c
+++ b/drivers/infiniband/core/sysfs.c
@@ -68,7 +68,7 @@ struct port_table_attribute {
 	int			index;
 };
 
-static inline int ibdev_is_alive(const struct ib_device *dev) 
+static inline int ibdev_is_alive(const struct ib_device *dev)
 {
 	return dev->reg_state == IB_DEV_REGISTERED;
 }
@@ -589,10 +589,11 @@ static ssize_t show_node_type(struct class_device *cdev, char *buf)
 		return -ENODEV;
 
 	switch (dev->node_type) {
-	case IB_NODE_CA:     return sprintf(buf, "%d: CA\n", dev->node_type);
-	case IB_NODE_SWITCH: return sprintf(buf, "%d: switch\n", dev->node_type);
-	case IB_NODE_ROUTER: return sprintf(buf, "%d: router\n", dev->node_type);
-	default:             return sprintf(buf, "%d: <unknown>\n", dev->node_type);
+	case RDMA_NODE_IB_CA:	  return sprintf(buf, "%d: CA\n", dev->node_type);
+	case RDMA_NODE_RNIC:	  return sprintf(buf, "%d: RNIC\n", dev->node_type);
+	case RDMA_NODE_IB_SWITCH: return sprintf(buf, "%d: switch\n", dev->node_type);
+	case RDMA_NODE_IB_ROUTER: return sprintf(buf, "%d: router\n", dev->node_type);
+	default:		  return sprintf(buf, "%d: <unknown>\n", dev->node_type);
 	}
 }
 
@@ -708,7 +709,7 @@ int ib_device_register_sysfs(struct ib_device *device)
 	if (ret)
 		goto err_put;
 
-	if (device->node_type == IB_NODE_SWITCH) {
+	if (device->node_type == RDMA_NODE_IB_SWITCH) {
 		ret = add_port(device, 0);
 		if (ret)
 			goto err_put;
diff --git a/drivers/infiniband/core/ucm.c b/drivers/infiniband/core/ucm.c
index c1c6fda9452..ad4f4d5c292 100644
--- a/drivers/infiniband/core/ucm.c
+++ b/drivers/infiniband/core/ucm.c
@@ -309,9 +309,9 @@ static int ib_ucm_event_process(struct ib_cm_event *evt,
 		info	      = evt->param.apr_rcvd.apr_info;
 		break;
 	case IB_CM_SIDR_REQ_RECEIVED:
-		uvt->resp.u.sidr_req_resp.pkey = 
+		uvt->resp.u.sidr_req_resp.pkey =
 					evt->param.sidr_req_rcvd.pkey;
-		uvt->resp.u.sidr_req_resp.port = 
+		uvt->resp.u.sidr_req_resp.port =
 					evt->param.sidr_req_rcvd.port;
 		uvt->data_len = IB_CM_SIDR_REQ_PRIVATE_DATA_SIZE;
 		break;
@@ -1237,7 +1237,7 @@ static struct class ucm_class = {
 static ssize_t show_ibdev(struct class_device *class_dev, char *buf)
 {
 	struct ib_ucm_device *dev;
-	
+
 	dev = container_of(class_dev, struct ib_ucm_device, class_dev);
 	return sprintf(buf, "%s\n", dev->ib_dev->name);
 }
@@ -1247,7 +1247,8 @@ static void ib_ucm_add_one(struct ib_device *device)
 {
 	struct ib_ucm_device *ucm_dev;
 
-	if (!device->alloc_ucontext)
+	if (!device->alloc_ucontext ||
+	    rdma_node_get_transport(device->node_type) != RDMA_TRANSPORT_IB)
 		return;
 
 	ucm_dev = kzalloc(sizeof *ucm_dev, GFP_KERNEL);
diff --git a/drivers/infiniband/core/user_mad.c b/drivers/infiniband/core/user_mad.c
index 1273f8807e8..807fbd6b841 100644
--- a/drivers/infiniband/core/user_mad.c
+++ b/drivers/infiniband/core/user_mad.c
@@ -1,6 +1,6 @@
 /*
  * Copyright (c) 2004 Topspin Communications.  All rights reserved.
- * Copyright (c) 2005 Voltaire, Inc. All rights reserved. 
+ * Copyright (c) 2005 Voltaire, Inc. All rights reserved.
  * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
  *
  * This software is available to you under a choice of one of two
@@ -1032,7 +1032,10 @@ static void ib_umad_add_one(struct ib_device *device)
 	struct ib_umad_device *umad_dev;
 	int s, e, i;
 
-	if (device->node_type == IB_NODE_SWITCH)
+	if (rdma_node_get_transport(device->node_type) != RDMA_TRANSPORT_IB)
+		return;
+
+	if (device->node_type == RDMA_NODE_IB_SWITCH)
 		s = e = 0;
 	else {
 		s = 1;
diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c
index 30923eb68ec..b72c7f69ca9 100644
--- a/drivers/infiniband/core/uverbs_cmd.c
+++ b/drivers/infiniband/core/uverbs_cmd.c
@@ -155,7 +155,7 @@ static struct ib_uobject *__idr_get_uobj(struct idr *idr, int id,
 }
 
 static struct ib_uobject *idr_read_uobj(struct idr *idr, int id,
-					struct ib_ucontext *context)
+					struct ib_ucontext *context, int nested)
 {
 	struct ib_uobject *uobj;
 
@@ -163,7 +163,10 @@ static struct ib_uobject *idr_read_uobj(struct idr *idr, int id,
 	if (!uobj)
 		return NULL;
 
-	down_read(&uobj->mutex);
+	if (nested)
+		down_read_nested(&uobj->mutex, SINGLE_DEPTH_NESTING);
+	else
+		down_read(&uobj->mutex);
 	if (!uobj->live) {
 		put_uobj_read(uobj);
 		return NULL;
@@ -190,17 +193,18 @@ static struct ib_uobject *idr_write_uobj(struct idr *idr, int id,
 	return uobj;
 }
 
-static void *idr_read_obj(struct idr *idr, int id, struct ib_ucontext *context)
+static void *idr_read_obj(struct idr *idr, int id, struct ib_ucontext *context,
+			  int nested)
 {
 	struct ib_uobject *uobj;
 
-	uobj = idr_read_uobj(idr, id, context);
+	uobj = idr_read_uobj(idr, id, context, nested);
 	return uobj ? uobj->object : NULL;
 }
 
 static struct ib_pd *idr_read_pd(int pd_handle, struct ib_ucontext *context)
 {
-	return idr_read_obj(&ib_uverbs_pd_idr, pd_handle, context);
+	return idr_read_obj(&ib_uverbs_pd_idr, pd_handle, context, 0);
 }
 
 static void put_pd_read(struct ib_pd *pd)
@@ -208,9 +212,9 @@ static void put_pd_read(struct ib_pd *pd)
 	put_uobj_read(pd->uobject);
 }
 
-static struct ib_cq *idr_read_cq(int cq_handle, struct ib_ucontext *context)
+static struct ib_cq *idr_read_cq(int cq_handle, struct ib_ucontext *context, int nested)
 {
-	return idr_read_obj(&ib_uverbs_cq_idr, cq_handle, context);
+	return idr_read_obj(&ib_uverbs_cq_idr, cq_handle, context, nested);
 }
 
 static void put_cq_read(struct ib_cq *cq)
@@ -220,7 +224,7 @@ static void put_cq_read(struct ib_cq *cq)
 
 static struct ib_ah *idr_read_ah(int ah_handle, struct ib_ucontext *context)
 {
-	return idr_read_obj(&ib_uverbs_ah_idr, ah_handle, context);
+	return idr_read_obj(&ib_uverbs_ah_idr, ah_handle, context, 0);
 }
 
 static void put_ah_read(struct ib_ah *ah)
@@ -230,7 +234,7 @@ static void put_ah_read(struct ib_ah *ah)
 
 static struct ib_qp *idr_read_qp(int qp_handle, struct ib_ucontext *context)
 {
-	return idr_read_obj(&ib_uverbs_qp_idr, qp_handle, context);
+	return idr_read_obj(&ib_uverbs_qp_idr, qp_handle, context, 0);
 }
 
 static void put_qp_read(struct ib_qp *qp)
@@ -240,7 +244,7 @@ static void put_qp_read(struct ib_qp *qp)
 
 static struct ib_srq *idr_read_srq(int srq_handle, struct ib_ucontext *context)
 {
-	return idr_read_obj(&ib_uverbs_srq_idr, srq_handle, context);
+	return idr_read_obj(&ib_uverbs_srq_idr, srq_handle, context, 0);
 }
 
 static void put_srq_read(struct ib_srq *srq)
@@ -837,7 +841,6 @@ ssize_t ib_uverbs_create_cq(struct ib_uverbs_file *file,
 err_copy:
 	idr_remove_uobj(&ib_uverbs_cq_idr, &obj->uobject);
 
-
 err_free:
 	ib_destroy_cq(cq);
 
@@ -867,7 +870,7 @@ ssize_t ib_uverbs_resize_cq(struct ib_uverbs_file *file,
 		   (unsigned long) cmd.response + sizeof resp,
 		   in_len - sizeof cmd, out_len - sizeof resp);
 
-	cq = idr_read_cq(cmd.cq_handle, file->ucontext);
+	cq = idr_read_cq(cmd.cq_handle, file->ucontext, 0);
 	if (!cq)
 		return -EINVAL;
 
@@ -875,11 +878,10 @@ ssize_t ib_uverbs_resize_cq(struct ib_uverbs_file *file,
 	if (ret)
 		goto out;
 
-	memset(&resp, 0, sizeof resp);
 	resp.cqe = cq->cqe;
 
 	if (copy_to_user((void __user *) (unsigned long) cmd.response,
-			 &resp, sizeof resp))
+			 &resp, sizeof resp.cqe))
 		ret = -EFAULT;
 
 out:
@@ -894,7 +896,6 @@ ssize_t ib_uverbs_poll_cq(struct ib_uverbs_file *file,
 {
 	struct ib_uverbs_poll_cq       cmd;
 	struct ib_uverbs_poll_cq_resp *resp;
-	struct ib_uobject	      *uobj;
 	struct ib_cq                  *cq;
 	struct ib_wc                  *wc;
 	int                            ret = 0;
@@ -915,16 +916,15 @@ ssize_t ib_uverbs_poll_cq(struct ib_uverbs_file *file,
 		goto out_wc;
 	}
 
-	uobj = idr_read_uobj(&ib_uverbs_cq_idr, cmd.cq_handle, file->ucontext);
-	if (!uobj) {
+	cq = idr_read_cq(cmd.cq_handle, file->ucontext, 0);
+	if (!cq) {
 		ret = -EINVAL;
 		goto out;
 	}
-	cq = uobj->object;
 
 	resp->count = ib_poll_cq(cq, cmd.ne, wc);
 
-	put_uobj_read(uobj);
+	put_cq_read(cq);
 
 	for (i = 0; i < resp->count; i++) {
 		resp->wc[i].wr_id 	   = wc[i].wr_id;
@@ -959,21 +959,19 @@ ssize_t ib_uverbs_req_notify_cq(struct ib_uverbs_file *file,
 				int out_len)
 {
 	struct ib_uverbs_req_notify_cq cmd;
-	struct ib_uobject	      *uobj;
 	struct ib_cq                  *cq;
 
 	if (copy_from_user(&cmd, buf, sizeof cmd))
 		return -EFAULT;
 
-	uobj = idr_read_uobj(&ib_uverbs_cq_idr, cmd.cq_handle, file->ucontext);
-	if (!uobj)
+	cq = idr_read_cq(cmd.cq_handle, file->ucontext, 0);
+	if (!cq)
 		return -EINVAL;
-	cq = uobj->object;
 
 	ib_req_notify_cq(cq, cmd.solicited_only ?
 			 IB_CQ_SOLICITED : IB_CQ_NEXT_COMP);
 
-	put_uobj_read(uobj);
+	put_cq_read(cq);
 
 	return in_len;
 }
@@ -1064,9 +1062,9 @@ ssize_t ib_uverbs_create_qp(struct ib_uverbs_file *file,
 
 	srq = cmd.is_srq ? idr_read_srq(cmd.srq_handle, file->ucontext) : NULL;
 	pd  = idr_read_pd(cmd.pd_handle, file->ucontext);
-	scq = idr_read_cq(cmd.send_cq_handle, file->ucontext);
+	scq = idr_read_cq(cmd.send_cq_handle, file->ucontext, 0);
 	rcq = cmd.recv_cq_handle == cmd.send_cq_handle ?
-		scq : idr_read_cq(cmd.recv_cq_handle, file->ucontext);
+		scq : idr_read_cq(cmd.recv_cq_handle, file->ucontext, 1);
 
 	if (!pd || !scq || !rcq || (cmd.is_srq && !srq)) {
 		ret = -EINVAL;
@@ -1274,6 +1272,7 @@ ssize_t ib_uverbs_modify_qp(struct ib_uverbs_file *file,
 			    int out_len)
 {
 	struct ib_uverbs_modify_qp cmd;
+	struct ib_udata            udata;
 	struct ib_qp              *qp;
 	struct ib_qp_attr         *attr;
 	int                        ret;
@@ -1281,6 +1280,9 @@ ssize_t ib_uverbs_modify_qp(struct ib_uverbs_file *file,
 	if (copy_from_user(&cmd, buf, sizeof cmd))
 		return -EFAULT;
 
+	INIT_UDATA(&udata, buf + sizeof cmd, NULL, in_len - sizeof cmd,
+		   out_len);
+
 	attr = kmalloc(sizeof *attr, GFP_KERNEL);
 	if (!attr)
 		return -ENOMEM;
@@ -1337,7 +1339,7 @@ ssize_t ib_uverbs_modify_qp(struct ib_uverbs_file *file,
 	attr->alt_ah_attr.ah_flags 	    = cmd.alt_dest.is_global ? IB_AH_GRH : 0;
 	attr->alt_ah_attr.port_num 	    = cmd.alt_dest.port_num;
 
-	ret = ib_modify_qp(qp, attr, cmd.attr_mask);
+	ret = qp->device->modify_qp(qp, attr, cmd.attr_mask, &udata);
 
 	put_qp_read(qp);
 
@@ -1674,7 +1676,6 @@ ssize_t ib_uverbs_post_recv(struct ib_uverbs_file *file,
 				break;
 		}
 
-
 	if (copy_to_user((void __user *) (unsigned long) cmd.response,
 			 &resp, sizeof resp))
 		ret = -EFAULT;
@@ -1724,7 +1725,6 @@ ssize_t ib_uverbs_post_srq_recv(struct ib_uverbs_file *file,
 				break;
 		}
 
-
 	if (copy_to_user((void __user *) (unsigned long) cmd.response,
 			 &resp, sizeof resp))
 		ret = -EFAULT;
@@ -2055,6 +2055,7 @@ ssize_t ib_uverbs_modify_srq(struct ib_uverbs_file *file,
 			     int out_len)
 {
 	struct ib_uverbs_modify_srq cmd;
+	struct ib_udata             udata;
 	struct ib_srq              *srq;
 	struct ib_srq_attr          attr;
 	int                         ret;
@@ -2062,6 +2063,9 @@ ssize_t ib_uverbs_modify_srq(struct ib_uverbs_file *file,
 	if (copy_from_user(&cmd, buf, sizeof cmd))
 		return -EFAULT;
 
+	INIT_UDATA(&udata, buf + sizeof cmd, NULL, in_len - sizeof cmd,
+		   out_len);
+
 	srq = idr_read_srq(cmd.srq_handle, file->ucontext);
 	if (!srq)
 		return -EINVAL;
@@ -2069,7 +2073,7 @@ ssize_t ib_uverbs_modify_srq(struct ib_uverbs_file *file,
 	attr.max_wr    = cmd.max_wr;
 	attr.srq_limit = cmd.srq_limit;
 
-	ret = ib_modify_srq(srq, &attr, cmd.attr_mask);
+	ret = srq->device->modify_srq(srq, &attr, cmd.attr_mask, &udata);
 
 	put_srq_read(srq);
 
diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c
index 468999c3880..8b5dd3649bb 100644
--- a/drivers/infiniband/core/verbs.c
+++ b/drivers/infiniband/core/verbs.c
@@ -79,6 +79,23 @@ enum ib_rate mult_to_ib_rate(int mult)
 }
 EXPORT_SYMBOL(mult_to_ib_rate);
 
+enum rdma_transport_type
+rdma_node_get_transport(enum rdma_node_type node_type)
+{
+	switch (node_type) {
+	case RDMA_NODE_IB_CA:
+	case RDMA_NODE_IB_SWITCH:
+	case RDMA_NODE_IB_ROUTER:
+		return RDMA_TRANSPORT_IB;
+	case RDMA_NODE_RNIC:
+		return RDMA_TRANSPORT_IWARP;
+	default:
+		BUG();
+		return 0;
+	}
+}
+EXPORT_SYMBOL(rdma_node_get_transport);
+
 /* Protection domains */
 
 struct ib_pd *ib_alloc_pd(struct ib_device *device)
@@ -231,7 +248,7 @@ int ib_modify_srq(struct ib_srq *srq,
 		  struct ib_srq_attr *srq_attr,
 		  enum ib_srq_attr_mask srq_attr_mask)
 {
-	return srq->device->modify_srq(srq, srq_attr, srq_attr_mask);
+	return srq->device->modify_srq(srq, srq_attr, srq_attr_mask, NULL);
 }
 EXPORT_SYMBOL(ib_modify_srq);
 
@@ -547,7 +564,7 @@ int ib_modify_qp(struct ib_qp *qp,
 		 struct ib_qp_attr *qp_attr,
 		 int qp_attr_mask)
 {
-	return qp->device->modify_qp(qp, qp_attr, qp_attr_mask);
+	return qp->device->modify_qp(qp, qp_attr, qp_attr_mask, NULL);
 }
 EXPORT_SYMBOL(ib_modify_qp);
 
diff --git a/drivers/infiniband/hw/amso1100/Kbuild b/drivers/infiniband/hw/amso1100/Kbuild
new file mode 100644
index 00000000000..06964c4af84
--- /dev/null
+++ b/drivers/infiniband/hw/amso1100/Kbuild
@@ -0,0 +1,8 @@
+ifdef CONFIG_INFINIBAND_AMSO1100_DEBUG
+EXTRA_CFLAGS += -DDEBUG
+endif
+
+obj-$(CONFIG_INFINIBAND_AMSO1100) += iw_c2.o
+
+iw_c2-y := c2.o c2_provider.o c2_rnic.o c2_alloc.o c2_mq.o c2_ae.o c2_vq.o \
+	c2_intr.o c2_cq.o c2_qp.o c2_cm.o c2_mm.o c2_pd.o
diff --git a/drivers/infiniband/hw/amso1100/Kconfig b/drivers/infiniband/hw/amso1100/Kconfig
new file mode 100644
index 00000000000..809cb14ac6d
--- /dev/null
+++ b/drivers/infiniband/hw/amso1100/Kconfig
@@ -0,0 +1,15 @@
+config INFINIBAND_AMSO1100
+	tristate "Ammasso 1100 HCA support"
+	depends on PCI && INET && INFINIBAND
+	---help---
+	  This is a low-level driver for the Ammasso 1100 host
+	  channel adapter (HCA).
+
+config INFINIBAND_AMSO1100_DEBUG
+	bool "Verbose debugging output"
+	depends on INFINIBAND_AMSO1100
+	default n
+	---help---
+	  This option causes the amso1100 driver to produce a bunch of
+	  debug messages.  Select this if you are developing the driver
+	  or trying to diagnose a problem.
diff --git a/drivers/infiniband/hw/amso1100/c2.c b/drivers/infiniband/hw/amso1100/c2.c
new file mode 100644
index 00000000000..9e9120f3601
--- /dev/null
+++ b/drivers/infiniband/hw/amso1100/c2.c
@@ -0,0 +1,1255 @@
+/*
+ * Copyright (c) 2005 Ammasso, Inc. All rights reserved.
+ * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/pci.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/inetdevice.h>
+#include <linux/delay.h>
+#include <linux/ethtool.h>
+#include <linux/mii.h>
+#include <linux/if_vlan.h>
+#include <linux/crc32.h>
+#include <linux/in.h>
+#include <linux/ip.h>
+#include <linux/tcp.h>
+#include <linux/init.h>
+#include <linux/dma-mapping.h>
+
+#include <asm/io.h>
+#include <asm/irq.h>
+#include <asm/byteorder.h>
+
+#include <rdma/ib_smi.h>
+#include "c2.h"
+#include "c2_provider.h"
+
+MODULE_AUTHOR("Tom Tucker <tom@opengridcomputing.com>");
+MODULE_DESCRIPTION("Ammasso AMSO1100 Low-level iWARP Driver");
+MODULE_LICENSE("Dual BSD/GPL");
+MODULE_VERSION(DRV_VERSION);
+
+static const u32 default_msg = NETIF_MSG_DRV | NETIF_MSG_PROBE | NETIF_MSG_LINK
+    | NETIF_MSG_IFUP | NETIF_MSG_IFDOWN;
+
+static int debug = -1;		/* defaults above */
+module_param(debug, int, 0);
+MODULE_PARM_DESC(debug, "Debug level (0=none,...,16=all)");
+
+static int c2_up(struct net_device *netdev);
+static int c2_down(struct net_device *netdev);
+static int c2_xmit_frame(struct sk_buff *skb, struct net_device *netdev);
+static void c2_tx_interrupt(struct net_device *netdev);
+static void c2_rx_interrupt(struct net_device *netdev);
+static irqreturn_t c2_interrupt(int irq, void *dev_id, struct pt_regs *regs);
+static void c2_tx_timeout(struct net_device *netdev);
+static int c2_change_mtu(struct net_device *netdev, int new_mtu);
+static void c2_reset(struct c2_port *c2_port);
+static struct net_device_stats *c2_get_stats(struct net_device *netdev);
+
+static struct pci_device_id c2_pci_table[] = {
+	{ PCI_DEVICE(0x18b8, 0xb001) },
+	{ 0 }
+};
+
+MODULE_DEVICE_TABLE(pci, c2_pci_table);
+
+static void c2_print_macaddr(struct net_device *netdev)
+{
+	pr_debug("%s: MAC %02X:%02X:%02X:%02X:%02X:%02X, "
+		"IRQ %u\n", netdev->name,
+		netdev->dev_addr[0], netdev->dev_addr[1], netdev->dev_addr[2],
+		netdev->dev_addr[3], netdev->dev_addr[4], netdev->dev_addr[5],
+		netdev->irq);
+}
+
+static void c2_set_rxbufsize(struct c2_port *c2_port)
+{
+	struct net_device *netdev = c2_port->netdev;
+
+	if (netdev->mtu > RX_BUF_SIZE)
+		c2_port->rx_buf_size =
+		    netdev->mtu + ETH_HLEN + sizeof(struct c2_rxp_hdr) +
+		    NET_IP_ALIGN;
+	else
+		c2_port->rx_buf_size = sizeof(struct c2_rxp_hdr) + RX_BUF_SIZE;
+}
+
+/*
+ * Allocate TX ring elements and chain them together.
+ * One-to-one association of adapter descriptors with ring elements.
+ */
+static int c2_tx_ring_alloc(struct c2_ring *tx_ring, void *vaddr,
+			    dma_addr_t base, void __iomem * mmio_txp_ring)
+{
+	struct c2_tx_desc *tx_desc;
+	struct c2_txp_desc __iomem *txp_desc;
+	struct c2_element *elem;
+	int i;
+
+	tx_ring->start = kmalloc(sizeof(*elem) * tx_ring->count, GFP_KERNEL);
+	if (!tx_ring->start)
+		return -ENOMEM;
+
+	elem = tx_ring->start;
+	tx_desc = vaddr;
+	txp_desc = mmio_txp_ring;
+	for (i = 0; i < tx_ring->count; i++, elem++, tx_desc++, txp_desc++) {
+		tx_desc->len = 0;
+		tx_desc->status = 0;
+
+		/* Set TXP_HTXD_UNINIT */
+		__raw_writeq(cpu_to_be64(0x1122334455667788ULL),
+			     (void __iomem *) txp_desc + C2_TXP_ADDR);
+		__raw_writew(0, (void __iomem *) txp_desc + C2_TXP_LEN);
+		__raw_writew(cpu_to_be16(TXP_HTXD_UNINIT),
+			     (void __iomem *) txp_desc + C2_TXP_FLAGS);
+
+		elem->skb = NULL;
+		elem->ht_desc = tx_desc;
+		elem->hw_desc = txp_desc;
+
+		if (i == tx_ring->count - 1) {
+			elem->next = tx_ring->start;
+			tx_desc->next_offset = base;
+		} else {
+			elem->next = elem + 1;
+			tx_desc->next_offset =
+			    base + (i + 1) * sizeof(*tx_desc);
+		}
+	}
+
+	tx_ring->to_use = tx_ring->to_clean = tx_ring->start;
+
+	return 0;
+}
+
+/*
+ * Allocate RX ring elements and chain them together.
+ * One-to-one association of adapter descriptors with ring elements.
+ */
+static int c2_rx_ring_alloc(struct c2_ring *rx_ring, void *vaddr,
+			    dma_addr_t base, void __iomem * mmio_rxp_ring)
+{
+	struct c2_rx_desc *rx_desc;
+	struct c2_rxp_desc __iomem *rxp_desc;
+	struct c2_element *elem;
+	int i;
+
+	rx_ring->start = kmalloc(sizeof(*elem) * rx_ring->count, GFP_KERNEL);
+	if (!rx_ring->start)
+		return -ENOMEM;
+
+	elem = rx_ring->start;
+	rx_desc = vaddr;
+	rxp_desc = mmio_rxp_ring;
+	for (i = 0; i < rx_ring->count; i++, elem++, rx_desc++, rxp_desc++) {
+		rx_desc->len = 0;
+		rx_desc->status = 0;
+
+		/* Set RXP_HRXD_UNINIT */
+		__raw_writew(cpu_to_be16(RXP_HRXD_OK),
+		       (void __iomem *) rxp_desc + C2_RXP_STATUS);
+		__raw_writew(0, (void __iomem *) rxp_desc + C2_RXP_COUNT);
+		__raw_writew(0, (void __iomem *) rxp_desc + C2_RXP_LEN);
+		__raw_writeq(cpu_to_be64(0x99aabbccddeeffULL),
+			     (void __iomem *) rxp_desc + C2_RXP_ADDR);
+		__raw_writew(cpu_to_be16(RXP_HRXD_UNINIT),
+			     (void __iomem *) rxp_desc + C2_RXP_FLAGS);
+
+		elem->skb = NULL;
+		elem->ht_desc = rx_desc;
+		elem->hw_desc = rxp_desc;
+
+		if (i == rx_ring->count - 1) {
+			elem->next = rx_ring->start;
+			rx_desc->next_offset = base;
+		} else {
+			elem->next = elem + 1;
+			rx_desc->next_offset =
+			    base + (i + 1) * sizeof(*rx_desc);
+		}
+	}
+
+	rx_ring->to_use = rx_ring->to_clean = rx_ring->start;
+
+	return 0;
+}
+
+/* Setup buffer for receiving */
+static inline int c2_rx_alloc(struct c2_port *c2_port, struct c2_element *elem)
+{
+	struct c2_dev *c2dev = c2_port->c2dev;
+	struct c2_rx_desc *rx_desc = elem->ht_desc;
+	struct sk_buff *skb;
+	dma_addr_t mapaddr;
+	u32 maplen;
+	struct c2_rxp_hdr *rxp_hdr;
+
+	skb = dev_alloc_skb(c2_port->rx_buf_size);
+	if (unlikely(!skb)) {
+		pr_debug("%s: out of memory for receive\n",
+			c2_port->netdev->name);
+		return -ENOMEM;
+	}
+
+	/* Zero out the rxp hdr in the sk_buff */
+	memset(skb->data, 0, sizeof(*rxp_hdr));
+
+	skb->dev = c2_port->netdev;
+
+	maplen = c2_port->rx_buf_size;
+	mapaddr =
+	    pci_map_single(c2dev->pcidev, skb->data, maplen,
+			   PCI_DMA_FROMDEVICE);
+
+	/* Set the sk_buff RXP_header to RXP_HRXD_READY */
+	rxp_hdr = (struct c2_rxp_hdr *) skb->data;
+	rxp_hdr->flags = RXP_HRXD_READY;
+
+	__raw_writew(0, elem->hw_desc + C2_RXP_STATUS);
+	__raw_writew(cpu_to_be16((u16) maplen - sizeof(*rxp_hdr)),
+		     elem->hw_desc + C2_RXP_LEN);
+	__raw_writeq(cpu_to_be64(mapaddr), elem->hw_desc + C2_RXP_ADDR);
+	__raw_writew(cpu_to_be16(RXP_HRXD_READY), elem->hw_desc + C2_RXP_FLAGS);
+
+	elem->skb = skb;
+	elem->mapaddr = mapaddr;
+	elem->maplen = maplen;
+	rx_desc->len = maplen;
+
+	return 0;
+}
+
+/*
+ * Allocate buffers for the Rx ring
+ * For receive:  rx_ring.to_clean is next received frame
+ */
+static int c2_rx_fill(struct c2_port *c2_port)
+{
+	struct c2_ring *rx_ring = &c2_port->rx_ring;
+	struct c2_element *elem;
+	int ret = 0;
+
+	elem = rx_ring->start;
+	do {
+		if (c2_rx_alloc(c2_port, elem)) {
+			ret = 1;
+			break;
+		}
+	} while ((elem = elem->next) != rx_ring->start);
+
+	rx_ring->to_clean = rx_ring->start;
+	return ret;
+}
+
+/* Free all buffers in RX ring, assumes receiver stopped */
+static void c2_rx_clean(struct c2_port *c2_port)
+{
+	struct c2_dev *c2dev = c2_port->c2dev;
+	struct c2_ring *rx_ring = &c2_port->rx_ring;
+	struct c2_element *elem;
+	struct c2_rx_desc *rx_desc;
+
+	elem = rx_ring->start;
+	do {
+		rx_desc = elem->ht_desc;
+		rx_desc->len = 0;
+
+		__raw_writew(0, elem->hw_desc + C2_RXP_STATUS);
+		__raw_writew(0, elem->hw_desc + C2_RXP_COUNT);
+		__raw_writew(0, elem->hw_desc + C2_RXP_LEN);
+		__raw_writeq(cpu_to_be64(0x99aabbccddeeffULL),
+			     elem->hw_desc + C2_RXP_ADDR);
+		__raw_writew(cpu_to_be16(RXP_HRXD_UNINIT),
+			     elem->hw_desc + C2_RXP_FLAGS);
+
+		if (elem->skb) {
+			pci_unmap_single(c2dev->pcidev, elem->mapaddr,
+					 elem->maplen, PCI_DMA_FROMDEVICE);
+			dev_kfree_skb(elem->skb);
+			elem->skb = NULL;
+		}
+	} while ((elem = elem->next) != rx_ring->start);
+}
+
+static inline int c2_tx_free(struct c2_dev *c2dev, struct c2_element *elem)
+{
+	struct c2_tx_desc *tx_desc = elem->ht_desc;
+
+	tx_desc->len = 0;
+
+	pci_unmap_single(c2dev->pcidev, elem->mapaddr, elem->maplen,
+			 PCI_DMA_TODEVICE);
+
+	if (elem->skb) {
+		dev_kfree_skb_any(elem->skb);
+		elem->skb = NULL;
+	}
+
+	return 0;
+}
+
+/* Free all buffers in TX ring, assumes transmitter stopped */
+static void c2_tx_clean(struct c2_port *c2_port)
+{
+	struct c2_ring *tx_ring = &c2_port->tx_ring;
+	struct c2_element *elem;
+	struct c2_txp_desc txp_htxd;
+	int retry;
+	unsigned long flags;
+
+	spin_lock_irqsave(&c2_port->tx_lock, flags);
+
+	elem = tx_ring->start;
+
+	do {
+		retry = 0;
+		do {
+			txp_htxd.flags =
+			    readw(elem->hw_desc + C2_TXP_FLAGS);
+
+			if (txp_htxd.flags == TXP_HTXD_READY) {
+				retry = 1;
+				__raw_writew(0,
+					     elem->hw_desc + C2_TXP_LEN);
+				__raw_writeq(0,
+					     elem->hw_desc + C2_TXP_ADDR);
+				__raw_writew(cpu_to_be16(TXP_HTXD_DONE),
+					     elem->hw_desc + C2_TXP_FLAGS);
+				c2_port->netstats.tx_dropped++;
+				break;
+			} else {
+				__raw_writew(0,
+					     elem->hw_desc + C2_TXP_LEN);
+				__raw_writeq(cpu_to_be64(0x1122334455667788ULL),
+					     elem->hw_desc + C2_TXP_ADDR);
+				__raw_writew(cpu_to_be16(TXP_HTXD_UNINIT),
+					     elem->hw_desc + C2_TXP_FLAGS);
+			}
+
+			c2_tx_free(c2_port->c2dev, elem);
+
+		} while ((elem = elem->next) != tx_ring->start);
+	} while (retry);
+
+	c2_port->tx_avail = c2_port->tx_ring.count - 1;
+	c2_port->c2dev->cur_tx = tx_ring->to_use - tx_ring->start;
+
+	if (c2_port->tx_avail > MAX_SKB_FRAGS + 1)
+		netif_wake_queue(c2_port->netdev);
+
+	spin_unlock_irqrestore(&c2_port->tx_lock, flags);
+}
+
+/*
+ * Process transmit descriptors marked 'DONE' by the firmware,
+ * freeing up their unneeded sk_buffs.
+ */
+static void c2_tx_interrupt(struct net_device *netdev)
+{
+	struct c2_port *c2_port = netdev_priv(netdev);
+	struct c2_dev *c2dev = c2_port->c2dev;
+	struct c2_ring *tx_ring = &c2_port->tx_ring;
+	struct c2_element *elem;
+	struct c2_txp_desc txp_htxd;
+
+	spin_lock(&c2_port->tx_lock);
+
+	for (elem = tx_ring->to_clean; elem != tx_ring->to_use;
+	     elem = elem->next) {
+		txp_htxd.flags =
+		    be16_to_cpu(readw(elem->hw_desc + C2_TXP_FLAGS));
+
+		if (txp_htxd.flags != TXP_HTXD_DONE)
+			break;
+
+		if (netif_msg_tx_done(c2_port)) {
+			/* PCI reads are expensive in fast path */
+			txp_htxd.len =
+			    be16_to_cpu(readw(elem->hw_desc + C2_TXP_LEN));
+			pr_debug("%s: tx done slot %3Zu status 0x%x len "
+				"%5u bytes\n",
+				netdev->name, elem - tx_ring->start,
+				txp_htxd.flags, txp_htxd.len);
+		}
+
+		c2_tx_free(c2dev, elem);
+		++(c2_port->tx_avail);
+	}
+
+	tx_ring->to_clean = elem;
+
+	if (netif_queue_stopped(netdev)
+	    && c2_port->tx_avail > MAX_SKB_FRAGS + 1)
+		netif_wake_queue(netdev);
+
+	spin_unlock(&c2_port->tx_lock);
+}
+
+static void c2_rx_error(struct c2_port *c2_port, struct c2_element *elem)
+{
+	struct c2_rx_desc *rx_desc = elem->ht_desc;
+	struct c2_rxp_hdr *rxp_hdr = (struct c2_rxp_hdr *) elem->skb->data;
+
+	if (rxp_hdr->status != RXP_HRXD_OK ||
+	    rxp_hdr->len > (rx_desc->len - sizeof(*rxp_hdr))) {
+		pr_debug("BAD RXP_HRXD\n");
+		pr_debug("  rx_desc : %p\n", rx_desc);
+		pr_debug("    index : %Zu\n",
+			elem - c2_port->rx_ring.start);
+		pr_debug("    len   : %u\n", rx_desc->len);
+		pr_debug("  rxp_hdr : %p [PA %p]\n", rxp_hdr,
+			(void *) __pa((unsigned long) rxp_hdr));
+		pr_debug("    flags : 0x%x\n", rxp_hdr->flags);
+		pr_debug("    status: 0x%x\n", rxp_hdr->status);
+		pr_debug("    len   : %u\n", rxp_hdr->len);
+		pr_debug("    rsvd  : 0x%x\n", rxp_hdr->rsvd);
+	}
+
+	/* Setup the skb for reuse since we're dropping this pkt */
+	elem->skb->tail = elem->skb->data = elem->skb->head;
+
+	/* Zero out the rxp hdr in the sk_buff */
+	memset(elem->skb->data, 0, sizeof(*rxp_hdr));
+
+	/* Write the descriptor to the adapter's rx ring */
+	__raw_writew(0, elem->hw_desc + C2_RXP_STATUS);
+	__raw_writew(0, elem->hw_desc + C2_RXP_COUNT);
+	__raw_writew(cpu_to_be16((u16) elem->maplen - sizeof(*rxp_hdr)),
+		     elem->hw_desc + C2_RXP_LEN);
+	__raw_writeq(cpu_to_be64(elem->mapaddr), elem->hw_desc + C2_RXP_ADDR);
+	__raw_writew(cpu_to_be16(RXP_HRXD_READY), elem->hw_desc + C2_RXP_FLAGS);
+
+	pr_debug("packet dropped\n");
+	c2_port->netstats.rx_dropped++;
+}
+
+static void c2_rx_interrupt(struct net_device *netdev)
+{
+	struct c2_port *c2_port = netdev_priv(netdev);
+	struct c2_dev *c2dev = c2_port->c2dev;
+	struct c2_ring *rx_ring = &c2_port->rx_ring;
+	struct c2_element *elem;
+	struct c2_rx_desc *rx_desc;
+	struct c2_rxp_hdr *rxp_hdr;
+	struct sk_buff *skb;
+	dma_addr_t mapaddr;
+	u32 maplen, buflen;
+	unsigned long flags;
+
+	spin_lock_irqsave(&c2dev->lock, flags);
+
+	/* Begin where we left off */
+	rx_ring->to_clean = rx_ring->start + c2dev->cur_rx;
+
+	for (elem = rx_ring->to_clean; elem->next != rx_ring->to_clean;
+	     elem = elem->next) {
+		rx_desc = elem->ht_desc;
+		mapaddr = elem->mapaddr;
+		maplen = elem->maplen;
+		skb = elem->skb;
+		rxp_hdr = (struct c2_rxp_hdr *) skb->data;
+
+		if (rxp_hdr->flags != RXP_HRXD_DONE)
+			break;
+		buflen = rxp_hdr->len;
+
+		/* Sanity check the RXP header */
+		if (rxp_hdr->status != RXP_HRXD_OK ||
+		    buflen > (rx_desc->len - sizeof(*rxp_hdr))) {
+			c2_rx_error(c2_port, elem);
+			continue;
+		}
+
+		/*
+		 * Allocate and map a new skb for replenishing the host
+		 * RX desc
+		 */
+		if (c2_rx_alloc(c2_port, elem)) {
+			c2_rx_error(c2_port, elem);
+			continue;
+		}
+
+		/* Unmap the old skb */
+		pci_unmap_single(c2dev->pcidev, mapaddr, maplen,
+				 PCI_DMA_FROMDEVICE);
+
+		prefetch(skb->data);
+
+		/*
+		 * Skip past the leading 8 bytes comprising of the
+		 * "struct c2_rxp_hdr", prepended by the adapter
+		 * to the usual Ethernet header ("struct ethhdr"),
+		 * to the start of the raw Ethernet packet.
+		 *
+		 * Fix up the various fields in the sk_buff before
+		 * passing it up to netif_rx(). The transfer size
+		 * (in bytes) specified by the adapter len field of
+		 * the "struct rxp_hdr_t" does NOT include the
+		 * "sizeof(struct c2_rxp_hdr)".
+		 */
+		skb->data += sizeof(*rxp_hdr);
+		skb->tail = skb->data + buflen;
+		skb->len = buflen;
+		skb->dev = netdev;
+		skb->protocol = eth_type_trans(skb, netdev);
+
+		netif_rx(skb);
+
+		netdev->last_rx = jiffies;
+		c2_port->netstats.rx_packets++;
+		c2_port->netstats.rx_bytes += buflen;
+	}
+
+	/* Save where we left off */
+	rx_ring->to_clean = elem;
+	c2dev->cur_rx = elem - rx_ring->start;
+	C2_SET_CUR_RX(c2dev, c2dev->cur_rx);
+
+	spin_unlock_irqrestore(&c2dev->lock, flags);
+}
+
+/*
+ * Handle netisr0 TX & RX interrupts.
+ */
+static irqreturn_t c2_interrupt(int irq, void *dev_id, struct pt_regs *regs)
+{
+	unsigned int netisr0, dmaisr;
+	int handled = 0;
+	struct c2_dev *c2dev = (struct c2_dev *) dev_id;
+
+	/* Process CCILNET interrupts */
+	netisr0 = readl(c2dev->regs + C2_NISR0);
+	if (netisr0) {
+
+		/*
+		 * There is an issue with the firmware that always
+		 * provides the status of RX for both TX & RX
+		 * interrupts.  So process both queues here.
+		 */
+		c2_rx_interrupt(c2dev->netdev);
+		c2_tx_interrupt(c2dev->netdev);
+
+		/* Clear the interrupt */
+		writel(netisr0, c2dev->regs + C2_NISR0);
+		handled++;
+	}
+
+	/* Process RNIC interrupts */
+	dmaisr = readl(c2dev->regs + C2_DISR);
+	if (dmaisr) {
+		writel(dmaisr, c2dev->regs + C2_DISR);
+		c2_rnic_interrupt(c2dev);
+		handled++;
+	}
+
+	if (handled) {
+		return IRQ_HANDLED;
+	} else {
+		return IRQ_NONE;
+	}
+}
+
+static int c2_up(struct net_device *netdev)
+{
+	struct c2_port *c2_port = netdev_priv(netdev);
+	struct c2_dev *c2dev = c2_port->c2dev;
+	struct c2_element *elem;
+	struct c2_rxp_hdr *rxp_hdr;
+	struct in_device *in_dev;
+	size_t rx_size, tx_size;
+	int ret, i;
+	unsigned int netimr0;
+
+	if (netif_msg_ifup(c2_port))
+		pr_debug("%s: enabling interface\n", netdev->name);
+
+	/* Set the Rx buffer size based on MTU */
+	c2_set_rxbufsize(c2_port);
+
+	/* Allocate DMA'able memory for Tx/Rx host descriptor rings */
+	rx_size = c2_port->rx_ring.count * sizeof(struct c2_rx_desc);
+	tx_size = c2_port->tx_ring.count * sizeof(struct c2_tx_desc);
+
+	c2_port->mem_size = tx_size + rx_size;
+	c2_port->mem = pci_alloc_consistent(c2dev->pcidev, c2_port->mem_size,
+					    &c2_port->dma);
+	if (c2_port->mem == NULL) {
+		pr_debug("Unable to allocate memory for "
+			"host descriptor rings\n");
+		return -ENOMEM;
+	}
+
+	memset(c2_port->mem, 0, c2_port->mem_size);
+
+	/* Create the Rx host descriptor ring */
+	if ((ret =
+	     c2_rx_ring_alloc(&c2_port->rx_ring, c2_port->mem, c2_port->dma,
+			      c2dev->mmio_rxp_ring))) {
+		pr_debug("Unable to create RX ring\n");
+		goto bail0;
+	}
+
+	/* Allocate Rx buffers for the host descriptor ring */
+	if (c2_rx_fill(c2_port)) {
+		pr_debug("Unable to fill RX ring\n");
+		goto bail1;
+	}
+
+	/* Create the Tx host descriptor ring */
+	if ((ret = c2_tx_ring_alloc(&c2_port->tx_ring, c2_port->mem + rx_size,
+				    c2_port->dma + rx_size,
+				    c2dev->mmio_txp_ring))) {
+		pr_debug("Unable to create TX ring\n");
+		goto bail1;
+	}
+
+	/* Set the TX pointer to where we left off */
+	c2_port->tx_avail = c2_port->tx_ring.count - 1;
+	c2_port->tx_ring.to_use = c2_port->tx_ring.to_clean =
+	    c2_port->tx_ring.start + c2dev->cur_tx;
+
+	/* missing: Initialize MAC */
+
+	BUG_ON(c2_port->tx_ring.to_use != c2_port->tx_ring.to_clean);
+
+	/* Reset the adapter, ensures the driver is in sync with the RXP */
+	c2_reset(c2_port);
+
+	/* Reset the READY bit in the sk_buff RXP headers & adapter HRXDQ */
+	for (i = 0, elem = c2_port->rx_ring.start; i < c2_port->rx_ring.count;
+	     i++, elem++) {
+		rxp_hdr = (struct c2_rxp_hdr *) elem->skb->data;
+		rxp_hdr->flags = 0;
+		__raw_writew(cpu_to_be16(RXP_HRXD_READY),
+			     elem->hw_desc + C2_RXP_FLAGS);
+	}
+
+	/* Enable network packets */
+	netif_start_queue(netdev);
+
+	/* Enable IRQ */
+	writel(0, c2dev->regs + C2_IDIS);
+	netimr0 = readl(c2dev->regs + C2_NIMR0);
+	netimr0 &= ~(C2_PCI_HTX_INT | C2_PCI_HRX_INT);
+	writel(netimr0, c2dev->regs + C2_NIMR0);
+
+	/* Tell the stack to ignore arp requests for ipaddrs bound to
+	 * other interfaces.  This is needed to prevent the host stack
+	 * from responding to arp requests to the ipaddr bound on the
+	 * rdma interface.
+	 */
+	in_dev = in_dev_get(netdev);
+	in_dev->cnf.arp_ignore = 1;
+	in_dev_put(in_dev);
+
+	return 0;
+
+      bail1:
+	c2_rx_clean(c2_port);
+	kfree(c2_port->rx_ring.start);
+
+      bail0:
+	pci_free_consistent(c2dev->pcidev, c2_port->mem_size, c2_port->mem,
+			    c2_port->dma);
+
+	return ret;
+}
+
+static int c2_down(struct net_device *netdev)
+{
+	struct c2_port *c2_port = netdev_priv(netdev);
+	struct c2_dev *c2dev = c2_port->c2dev;
+
+	if (netif_msg_ifdown(c2_port))
+		pr_debug("%s: disabling interface\n",
+			netdev->name);
+
+	/* Wait for all the queued packets to get sent */
+	c2_tx_interrupt(netdev);
+
+	/* Disable network packets */
+	netif_stop_queue(netdev);
+
+	/* Disable IRQs by clearing the interrupt mask */
+	writel(1, c2dev->regs + C2_IDIS);
+	writel(0, c2dev->regs + C2_NIMR0);
+
+	/* missing: Stop transmitter */
+
+	/* missing: Stop receiver */
+
+	/* Reset the adapter, ensures the driver is in sync with the RXP */
+	c2_reset(c2_port);
+
+	/* missing: Turn off LEDs here */
+
+	/* Free all buffers in the host descriptor rings */
+	c2_tx_clean(c2_port);
+	c2_rx_clean(c2_port);
+
+	/* Free the host descriptor rings */
+	kfree(c2_port->rx_ring.start);
+	kfree(c2_port->tx_ring.start);
+	pci_free_consistent(c2dev->pcidev, c2_port->mem_size, c2_port->mem,
+			    c2_port->dma);
+
+	return 0;
+}
+
+static void c2_reset(struct c2_port *c2_port)
+{
+	struct c2_dev *c2dev = c2_port->c2dev;
+	unsigned int cur_rx = c2dev->cur_rx;
+
+	/* Tell the hardware to quiesce */
+	C2_SET_CUR_RX(c2dev, cur_rx | C2_PCI_HRX_QUI);
+
+	/*
+	 * The hardware will reset the C2_PCI_HRX_QUI bit once
+	 * the RXP is quiesced.  Wait 2 seconds for this.
+	 */
+	ssleep(2);
+
+	cur_rx = C2_GET_CUR_RX(c2dev);
+
+	if (cur_rx & C2_PCI_HRX_QUI)
+		pr_debug("c2_reset: failed to quiesce the hardware!\n");
+
+	cur_rx &= ~C2_PCI_HRX_QUI;
+
+	c2dev->cur_rx = cur_rx;
+
+	pr_debug("Current RX: %u\n", c2dev->cur_rx);
+}
+
+static int c2_xmit_frame(struct sk_buff *skb, struct net_device *netdev)
+{
+	struct c2_port *c2_port = netdev_priv(netdev);
+	struct c2_dev *c2dev = c2_port->c2dev;
+	struct c2_ring *tx_ring = &c2_port->tx_ring;
+	struct c2_element *elem;
+	dma_addr_t mapaddr;
+	u32 maplen;
+	unsigned long flags;
+	unsigned int i;
+
+	spin_lock_irqsave(&c2_port->tx_lock, flags);
+
+	if (unlikely(c2_port->tx_avail < (skb_shinfo(skb)->nr_frags + 1))) {
+		netif_stop_queue(netdev);
+		spin_unlock_irqrestore(&c2_port->tx_lock, flags);
+
+		pr_debug("%s: Tx ring full when queue awake!\n",
+			netdev->name);
+		return NETDEV_TX_BUSY;
+	}
+
+	maplen = skb_headlen(skb);
+	mapaddr =
+	    pci_map_single(c2dev->pcidev, skb->data, maplen, PCI_DMA_TODEVICE);
+
+	elem = tx_ring->to_use;
+	elem->skb = skb;
+	elem->mapaddr = mapaddr;
+	elem->maplen = maplen;
+
+	/* Tell HW to xmit */
+	__raw_writeq(cpu_to_be64(mapaddr), elem->hw_desc + C2_TXP_ADDR);
+	__raw_writew(cpu_to_be16(maplen), elem->hw_desc + C2_TXP_LEN);
+	__raw_writew(cpu_to_be16(TXP_HTXD_READY), elem->hw_desc + C2_TXP_FLAGS);
+
+	c2_port->netstats.tx_packets++;
+	c2_port->netstats.tx_bytes += maplen;
+
+	/* Loop thru additional data fragments and queue them */
+	if (skb_shinfo(skb)->nr_frags) {
+		for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
+			skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
+			maplen = frag->size;
+			mapaddr =
+			    pci_map_page(c2dev->pcidev, frag->page,
+					 frag->page_offset, maplen,
+					 PCI_DMA_TODEVICE);
+
+			elem = elem->next;
+			elem->skb = NULL;
+			elem->mapaddr = mapaddr;
+			elem->maplen = maplen;
+
+			/* Tell HW to xmit */
+			__raw_writeq(cpu_to_be64(mapaddr),
+				     elem->hw_desc + C2_TXP_ADDR);
+			__raw_writew(cpu_to_be16(maplen),
+				     elem->hw_desc + C2_TXP_LEN);
+			__raw_writew(cpu_to_be16(TXP_HTXD_READY),
+				     elem->hw_desc + C2_TXP_FLAGS);
+
+			c2_port->netstats.tx_packets++;
+			c2_port->netstats.tx_bytes += maplen;
+		}
+	}
+
+	tx_ring->to_use = elem->next;
+	c2_port->tx_avail -= (skb_shinfo(skb)->nr_frags + 1);
+
+	if (c2_port->tx_avail <= MAX_SKB_FRAGS + 1) {
+		netif_stop_queue(netdev);
+		if (netif_msg_tx_queued(c2_port))
+			pr_debug("%s: transmit queue full\n",
+				netdev->name);
+	}
+
+	spin_unlock_irqrestore(&c2_port->tx_lock, flags);
+
+	netdev->trans_start = jiffies;
+
+	return NETDEV_TX_OK;
+}
+
+static struct net_device_stats *c2_get_stats(struct net_device *netdev)
+{
+	struct c2_port *c2_port = netdev_priv(netdev);
+
+	return &c2_port->netstats;
+}
+
+static void c2_tx_timeout(struct net_device *netdev)
+{
+	struct c2_port *c2_port = netdev_priv(netdev);
+
+	if (netif_msg_timer(c2_port))
+		pr_debug("%s: tx timeout\n", netdev->name);
+
+	c2_tx_clean(c2_port);
+}
+
+static int c2_change_mtu(struct net_device *netdev, int new_mtu)
+{
+	int ret = 0;
+
+	if (new_mtu < ETH_ZLEN || new_mtu > ETH_JUMBO_MTU)
+		return -EINVAL;
+
+	netdev->mtu = new_mtu;
+
+	if (netif_running(netdev)) {
+		c2_down(netdev);
+
+		c2_up(netdev);
+	}
+
+	return ret;
+}
+
+/* Initialize network device */
+static struct net_device *c2_devinit(struct c2_dev *c2dev,
+				     void __iomem * mmio_addr)
+{
+	struct c2_port *c2_port = NULL;
+	struct net_device *netdev = alloc_etherdev(sizeof(*c2_port));
+
+	if (!netdev) {
+		pr_debug("c2_port etherdev alloc failed");
+		return NULL;
+	}
+
+	SET_MODULE_OWNER(netdev);
+	SET_NETDEV_DEV(netdev, &c2dev->pcidev->dev);
+
+	netdev->open = c2_up;
+	netdev->stop = c2_down;
+	netdev->hard_start_xmit = c2_xmit_frame;
+	netdev->get_stats = c2_get_stats;
+	netdev->tx_timeout = c2_tx_timeout;
+	netdev->change_mtu = c2_change_mtu;
+	netdev->watchdog_timeo = C2_TX_TIMEOUT;
+	netdev->irq = c2dev->pcidev->irq;
+
+	c2_port = netdev_priv(netdev);
+	c2_port->netdev = netdev;
+	c2_port->c2dev = c2dev;
+	c2_port->msg_enable = netif_msg_init(debug, default_msg);
+	c2_port->tx_ring.count = C2_NUM_TX_DESC;
+	c2_port->rx_ring.count = C2_NUM_RX_DESC;
+
+	spin_lock_init(&c2_port->tx_lock);
+
+	/* Copy our 48-bit ethernet hardware address */
+	memcpy_fromio(netdev->dev_addr, mmio_addr + C2_REGS_ENADDR, 6);
+
+	/* Validate the MAC address */
+	if (!is_valid_ether_addr(netdev->dev_addr)) {
+		pr_debug("Invalid MAC Address\n");
+		c2_print_macaddr(netdev);
+		free_netdev(netdev);
+		return NULL;
+	}
+
+	c2dev->netdev = netdev;
+
+	return netdev;
+}
+
+static int __devinit c2_probe(struct pci_dev *pcidev,
+			      const struct pci_device_id *ent)
+{
+	int ret = 0, i;
+	unsigned long reg0_start, reg0_flags, reg0_len;
+	unsigned long reg2_start, reg2_flags, reg2_len;
+	unsigned long reg4_start, reg4_flags, reg4_len;
+	unsigned kva_map_size;
+	struct net_device *netdev = NULL;
+	struct c2_dev *c2dev = NULL;
+	void __iomem *mmio_regs = NULL;
+
+	printk(KERN_INFO PFX "AMSO1100 Gigabit Ethernet driver v%s loaded\n",
+		DRV_VERSION);
+
+	/* Enable PCI device */
+	ret = pci_enable_device(pcidev);
+	if (ret) {
+		printk(KERN_ERR PFX "%s: Unable to enable PCI device\n",
+			pci_name(pcidev));
+		goto bail0;
+	}
+
+	reg0_start = pci_resource_start(pcidev, BAR_0);
+	reg0_len = pci_resource_len(pcidev, BAR_0);
+	reg0_flags = pci_resource_flags(pcidev, BAR_0);
+
+	reg2_start = pci_resource_start(pcidev, BAR_2);
+	reg2_len = pci_resource_len(pcidev, BAR_2);
+	reg2_flags = pci_resource_flags(pcidev, BAR_2);
+
+	reg4_start = pci_resource_start(pcidev, BAR_4);
+	reg4_len = pci_resource_len(pcidev, BAR_4);
+	reg4_flags = pci_resource_flags(pcidev, BAR_4);
+
+	pr_debug("BAR0 size = 0x%lX bytes\n", reg0_len);
+	pr_debug("BAR2 size = 0x%lX bytes\n", reg2_len);
+	pr_debug("BAR4 size = 0x%lX bytes\n", reg4_len);
+
+	/* Make sure PCI base addr are MMIO */
+	if (!(reg0_flags & IORESOURCE_MEM) ||
+	    !(reg2_flags & IORESOURCE_MEM) || !(reg4_flags & IORESOURCE_MEM)) {
+		printk(KERN_ERR PFX "PCI regions not an MMIO resource\n");
+		ret = -ENODEV;
+		goto bail1;
+	}
+
+	/* Check for weird/broken PCI region reporting */
+	if ((reg0_len < C2_REG0_SIZE) ||
+	    (reg2_len < C2_REG2_SIZE) || (reg4_len < C2_REG4_SIZE)) {
+		printk(KERN_ERR PFX "Invalid PCI region sizes\n");
+		ret = -ENODEV;
+		goto bail1;
+	}
+
+	/* Reserve PCI I/O and memory resources */
+	ret = pci_request_regions(pcidev, DRV_NAME);
+	if (ret) {
+		printk(KERN_ERR PFX "%s: Unable to request regions\n",
+			pci_name(pcidev));
+		goto bail1;
+	}
+
+	if ((sizeof(dma_addr_t) > 4)) {
+		ret = pci_set_dma_mask(pcidev, DMA_64BIT_MASK);
+		if (ret < 0) {
+			printk(KERN_ERR PFX "64b DMA configuration failed\n");
+			goto bail2;
+		}
+	} else {
+		ret = pci_set_dma_mask(pcidev, DMA_32BIT_MASK);
+		if (ret < 0) {
+			printk(KERN_ERR PFX "32b DMA configuration failed\n");
+			goto bail2;
+		}
+	}
+
+	/* Enables bus-mastering on the device */
+	pci_set_master(pcidev);
+
+	/* Remap the adapter PCI registers in BAR4 */
+	mmio_regs = ioremap_nocache(reg4_start + C2_PCI_REGS_OFFSET,
+				    sizeof(struct c2_adapter_pci_regs));
+	if (mmio_regs == 0UL) {
+		printk(KERN_ERR PFX
+			"Unable to remap adapter PCI registers in BAR4\n");
+		ret = -EIO;
+		goto bail2;
+	}
+
+	/* Validate PCI regs magic */
+	for (i = 0; i < sizeof(c2_magic); i++) {
+		if (c2_magic[i] != readb(mmio_regs + C2_REGS_MAGIC + i)) {
+			printk(KERN_ERR PFX "Downlevel Firmware boot loader "
+				"[%d/%Zd: got 0x%x, exp 0x%x]. Use the cc_flash "
+			       "utility to update your boot loader\n",
+				i + 1, sizeof(c2_magic),
+				readb(mmio_regs + C2_REGS_MAGIC + i),
+				c2_magic[i]);
+			printk(KERN_ERR PFX "Adapter not claimed\n");
+			iounmap(mmio_regs);
+			ret = -EIO;
+			goto bail2;
+		}
+	}
+
+	/* Validate the adapter version */
+	if (be32_to_cpu(readl(mmio_regs + C2_REGS_VERS)) != C2_VERSION) {
+		printk(KERN_ERR PFX "Version mismatch "
+			"[fw=%u, c2=%u], Adapter not claimed\n",
+			be32_to_cpu(readl(mmio_regs + C2_REGS_VERS)),
+			C2_VERSION);
+		ret = -EINVAL;
+		iounmap(mmio_regs);
+		goto bail2;
+	}
+
+	/* Validate the adapter IVN */
+	if (be32_to_cpu(readl(mmio_regs + C2_REGS_IVN)) != C2_IVN) {
+		printk(KERN_ERR PFX "Downlevel FIrmware level. You should be using "
+		       "the OpenIB device support kit. "
+		       "[fw=0x%x, c2=0x%x], Adapter not claimed\n",
+			be32_to_cpu(readl(mmio_regs + C2_REGS_IVN)),
+			C2_IVN);
+		ret = -EINVAL;
+		iounmap(mmio_regs);
+		goto bail2;
+	}
+
+	/* Allocate hardware structure */
+	c2dev = (struct c2_dev *) ib_alloc_device(sizeof(*c2dev));
+	if (!c2dev) {
+		printk(KERN_ERR PFX "%s: Unable to alloc hardware struct\n",
+			pci_name(pcidev));
+		ret = -ENOMEM;
+		iounmap(mmio_regs);
+		goto bail2;
+	}
+
+	memset(c2dev, 0, sizeof(*c2dev));
+	spin_lock_init(&c2dev->lock);
+	c2dev->pcidev = pcidev;
+	c2dev->cur_tx = 0;
+
+	/* Get the last RX index */
+	c2dev->cur_rx =
+	    (be32_to_cpu(readl(mmio_regs + C2_REGS_HRX_CUR)) -
+	     0xffffc000) / sizeof(struct c2_rxp_desc);
+
+	/* Request an interrupt line for the driver */
+	ret = request_irq(pcidev->irq, c2_interrupt, SA_SHIRQ, DRV_NAME, c2dev);
+	if (ret) {
+		printk(KERN_ERR PFX "%s: requested IRQ %u is busy\n",
+			pci_name(pcidev), pcidev->irq);
+		iounmap(mmio_regs);
+		goto bail3;
+	}
+
+	/* Set driver specific data */
+	pci_set_drvdata(pcidev, c2dev);
+
+	/* Initialize network device */
+	if ((netdev = c2_devinit(c2dev, mmio_regs)) == NULL) {
+		iounmap(mmio_regs);
+		goto bail4;
+	}
+
+	/* Save off the actual size prior to unmapping mmio_regs */
+	kva_map_size = be32_to_cpu(readl(mmio_regs + C2_REGS_PCI_WINSIZE));
+
+	/* Unmap the adapter PCI registers in BAR4 */
+	iounmap(mmio_regs);
+
+	/* Register network device */
+	ret = register_netdev(netdev);
+	if (ret) {
+		printk(KERN_ERR PFX "Unable to register netdev, ret = %d\n",
+			ret);
+		goto bail5;
+	}
+
+	/* Disable network packets */
+	netif_stop_queue(netdev);
+
+	/* Remap the adapter HRXDQ PA space to kernel VA space */
+	c2dev->mmio_rxp_ring = ioremap_nocache(reg4_start + C2_RXP_HRXDQ_OFFSET,
+					       C2_RXP_HRXDQ_SIZE);
+	if (c2dev->mmio_rxp_ring == 0UL) {
+		printk(KERN_ERR PFX "Unable to remap MMIO HRXDQ region\n");
+		ret = -EIO;
+		goto bail6;
+	}
+
+	/* Remap the adapter HTXDQ PA space to kernel VA space */
+	c2dev->mmio_txp_ring = ioremap_nocache(reg4_start + C2_TXP_HTXDQ_OFFSET,
+					       C2_TXP_HTXDQ_SIZE);
+	if (c2dev->mmio_txp_ring == 0UL) {
+		printk(KERN_ERR PFX "Unable to remap MMIO HTXDQ region\n");
+		ret = -EIO;
+		goto bail7;
+	}
+
+	/* Save off the current RX index in the last 4 bytes of the TXP Ring */
+	C2_SET_CUR_RX(c2dev, c2dev->cur_rx);
+
+	/* Remap the PCI registers in adapter BAR0 to kernel VA space */
+	c2dev->regs = ioremap_nocache(reg0_start, reg0_len);
+	if (c2dev->regs == 0UL) {
+		printk(KERN_ERR PFX "Unable to remap BAR0\n");
+		ret = -EIO;
+		goto bail8;
+	}
+
+	/* Remap the PCI registers in adapter BAR4 to kernel VA space */
+	c2dev->pa = reg4_start + C2_PCI_REGS_OFFSET;
+	c2dev->kva = ioremap_nocache(reg4_start + C2_PCI_REGS_OFFSET,
+				     kva_map_size);
+	if (c2dev->kva == 0UL) {
+		printk(KERN_ERR PFX "Unable to remap BAR4\n");
+		ret = -EIO;
+		goto bail9;
+	}
+
+	/* Print out the MAC address */
+	c2_print_macaddr(netdev);
+
+	ret = c2_rnic_init(c2dev);
+	if (ret) {
+		printk(KERN_ERR PFX "c2_rnic_init failed: %d\n", ret);
+		goto bail10;
+	}
+
+	c2_register_device(c2dev);
+
+	return 0;
+
+ bail10:
+	iounmap(c2dev->kva);
+
+ bail9:
+	iounmap(c2dev->regs);
+
+ bail8:
+	iounmap(c2dev->mmio_txp_ring);
+
+ bail7:
+	iounmap(c2dev->mmio_rxp_ring);
+
+ bail6:
+	unregister_netdev(netdev);
+
+ bail5:
+	free_netdev(netdev);
+
+ bail4:
+	free_irq(pcidev->irq, c2dev);
+
+ bail3:
+	ib_dealloc_device(&c2dev->ibdev);
+
+ bail2:
+	pci_release_regions(pcidev);
+
+ bail1:
+	pci_disable_device(pcidev);
+
+ bail0:
+	return ret;
+}
+
+static void __devexit c2_remove(struct pci_dev *pcidev)
+{
+	struct c2_dev *c2dev = pci_get_drvdata(pcidev);
+	struct net_device *netdev = c2dev->netdev;
+
+	/* Unregister with OpenIB */
+	c2_unregister_device(c2dev);
+
+	/* Clean up the RNIC resources */
+	c2_rnic_term(c2dev);
+
+	/* Remove network device from the kernel */
+	unregister_netdev(netdev);
+
+	/* Free network device */
+	free_netdev(netdev);
+
+	/* Free the interrupt line */
+	free_irq(pcidev->irq, c2dev);
+
+	/* missing: Turn LEDs off here */
+
+	/* Unmap adapter PA space */
+	iounmap(c2dev->kva);
+	iounmap(c2dev->regs);
+	iounmap(c2dev->mmio_txp_ring);
+	iounmap(c2dev->mmio_rxp_ring);
+
+	/* Free the hardware structure */
+	ib_dealloc_device(&c2dev->ibdev);
+
+	/* Release reserved PCI I/O and memory resources */
+	pci_release_regions(pcidev);
+
+	/* Disable PCI device */
+	pci_disable_device(pcidev);
+
+	/* Clear driver specific data */
+	pci_set_drvdata(pcidev, NULL);
+}
+
+static struct pci_driver c2_pci_driver = {
+	.name = DRV_NAME,
+	.id_table = c2_pci_table,
+	.probe = c2_probe,
+	.remove = __devexit_p(c2_remove),
+};
+
+static int __init c2_init_module(void)
+{
+	return pci_module_init(&c2_pci_driver);
+}
+
+static void __exit c2_exit_module(void)
+{
+	pci_unregister_driver(&c2_pci_driver);
+}
+
+module_init(c2_init_module);
+module_exit(c2_exit_module);
diff --git a/drivers/infiniband/hw/amso1100/c2.h b/drivers/infiniband/hw/amso1100/c2.h
new file mode 100644
index 00000000000..1b17dcdd050
--- /dev/null
+++ b/drivers/infiniband/hw/amso1100/c2.h
@@ -0,0 +1,551 @@
+/*
+ * Copyright (c) 2005 Ammasso, Inc. All rights reserved.
+ * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef __C2_H
+#define __C2_H
+
+#include <linux/netdevice.h>
+#include <linux/spinlock.h>
+#include <linux/kernel.h>
+#include <linux/pci.h>
+#include <linux/dma-mapping.h>
+#include <linux/idr.h>
+#include <asm/semaphore.h>
+
+#include "c2_provider.h"
+#include "c2_mq.h"
+#include "c2_status.h"
+
+#define DRV_NAME     "c2"
+#define DRV_VERSION  "1.1"
+#define PFX          DRV_NAME ": "
+
+#define BAR_0                0
+#define BAR_2                2
+#define BAR_4                4
+
+#define RX_BUF_SIZE         (1536 + 8)
+#define ETH_JUMBO_MTU        9000
+#define C2_MAGIC            "CEPHEUS"
+#define C2_VERSION           4
+#define C2_IVN              (18 & 0x7fffffff)
+
+#define C2_REG0_SIZE        (16 * 1024)
+#define C2_REG2_SIZE        (2 * 1024 * 1024)
+#define C2_REG4_SIZE        (256 * 1024 * 1024)
+#define C2_NUM_TX_DESC       341
+#define C2_NUM_RX_DESC       256
+#define C2_PCI_REGS_OFFSET  (0x10000)
+#define C2_RXP_HRXDQ_OFFSET (((C2_REG4_SIZE)/2))
+#define C2_RXP_HRXDQ_SIZE   (4096)
+#define C2_TXP_HTXDQ_OFFSET (((C2_REG4_SIZE)/2) + C2_RXP_HRXDQ_SIZE)
+#define C2_TXP_HTXDQ_SIZE   (4096)
+#define C2_TX_TIMEOUT	    (6*HZ)
+
+/* CEPHEUS */
+static const u8 c2_magic[] = {
+	0x43, 0x45, 0x50, 0x48, 0x45, 0x55, 0x53
+};
+
+enum adapter_pci_regs {
+	C2_REGS_MAGIC = 0x0000,
+	C2_REGS_VERS = 0x0008,
+	C2_REGS_IVN = 0x000C,
+	C2_REGS_PCI_WINSIZE = 0x0010,
+	C2_REGS_Q0_QSIZE = 0x0014,
+	C2_REGS_Q0_MSGSIZE = 0x0018,
+	C2_REGS_Q0_POOLSTART = 0x001C,
+	C2_REGS_Q0_SHARED = 0x0020,
+	C2_REGS_Q1_QSIZE = 0x0024,
+	C2_REGS_Q1_MSGSIZE = 0x0028,
+	C2_REGS_Q1_SHARED = 0x0030,
+	C2_REGS_Q2_QSIZE = 0x0034,
+	C2_REGS_Q2_MSGSIZE = 0x0038,
+	C2_REGS_Q2_SHARED = 0x0040,
+	C2_REGS_ENADDR = 0x004C,
+	C2_REGS_RDMA_ENADDR = 0x0054,
+	C2_REGS_HRX_CUR = 0x006C,
+};
+
+struct c2_adapter_pci_regs {
+	char reg_magic[8];
+	u32 version;
+	u32 ivn;
+	u32 pci_window_size;
+	u32 q0_q_size;
+	u32 q0_msg_size;
+	u32 q0_pool_start;
+	u32 q0_shared;
+	u32 q1_q_size;
+	u32 q1_msg_size;
+	u32 q1_pool_start;
+	u32 q1_shared;
+	u32 q2_q_size;
+	u32 q2_msg_size;
+	u32 q2_pool_start;
+	u32 q2_shared;
+	u32 log_start;
+	u32 log_size;
+	u8 host_enaddr[8];
+	u8 rdma_enaddr[8];
+	u32 crash_entry;
+	u32 crash_ready[2];
+	u32 fw_txd_cur;
+	u32 fw_hrxd_cur;
+	u32 fw_rxd_cur;
+};
+
+enum pci_regs {
+	C2_HISR = 0x0000,
+	C2_DISR = 0x0004,
+	C2_HIMR = 0x0008,
+	C2_DIMR = 0x000C,
+	C2_NISR0 = 0x0010,
+	C2_NISR1 = 0x0014,
+	C2_NIMR0 = 0x0018,
+	C2_NIMR1 = 0x001C,
+	C2_IDIS = 0x0020,
+};
+
+enum {
+	C2_PCI_HRX_INT = 1 << 8,
+	C2_PCI_HTX_INT = 1 << 17,
+	C2_PCI_HRX_QUI = 1 << 31,
+};
+
+/*
+ * Cepheus registers in BAR0.
+ */
+struct c2_pci_regs {
+	u32 hostisr;
+	u32 dmaisr;
+	u32 hostimr;
+	u32 dmaimr;
+	u32 netisr0;
+	u32 netisr1;
+	u32 netimr0;
+	u32 netimr1;
+	u32 int_disable;
+};
+
+/* TXP flags */
+enum c2_txp_flags {
+	TXP_HTXD_DONE = 0,
+	TXP_HTXD_READY = 1 << 0,
+	TXP_HTXD_UNINIT = 1 << 1,
+};
+
+/* RXP flags */
+enum c2_rxp_flags {
+	RXP_HRXD_UNINIT = 0,
+	RXP_HRXD_READY = 1 << 0,
+	RXP_HRXD_DONE = 1 << 1,
+};
+
+/* RXP status */
+enum c2_rxp_status {
+	RXP_HRXD_ZERO = 0,
+	RXP_HRXD_OK = 1 << 0,
+	RXP_HRXD_BUF_OV = 1 << 1,
+};
+
+/* TXP descriptor fields */
+enum txp_desc {
+	C2_TXP_FLAGS = 0x0000,
+	C2_TXP_LEN = 0x0002,
+	C2_TXP_ADDR = 0x0004,
+};
+
+/* RXP descriptor fields */
+enum rxp_desc {
+	C2_RXP_FLAGS = 0x0000,
+	C2_RXP_STATUS = 0x0002,
+	C2_RXP_COUNT = 0x0004,
+	C2_RXP_LEN = 0x0006,
+	C2_RXP_ADDR = 0x0008,
+};
+
+struct c2_txp_desc {
+	u16 flags;
+	u16 len;
+	u64 addr;
+} __attribute__ ((packed));
+
+struct c2_rxp_desc {
+	u16 flags;
+	u16 status;
+	u16 count;
+	u16 len;
+	u64 addr;
+} __attribute__ ((packed));
+
+struct c2_rxp_hdr {
+	u16 flags;
+	u16 status;
+	u16 len;
+	u16 rsvd;
+} __attribute__ ((packed));
+
+struct c2_tx_desc {
+	u32 len;
+	u32 status;
+	dma_addr_t next_offset;
+};
+
+struct c2_rx_desc {
+	u32 len;
+	u32 status;
+	dma_addr_t next_offset;
+};
+
+struct c2_alloc {
+	u32 last;
+	u32 max;
+	spinlock_t lock;
+	unsigned long *table;
+};
+
+struct c2_array {
+	struct {
+		void **page;
+		int used;
+	} *page_list;
+};
+
+/*
+ * The MQ shared pointer pool is organized as a linked list of
+ * chunks. Each chunk contains a linked list of free shared pointers
+ * that can be allocated to a given user mode client.
+ *
+ */
+struct sp_chunk {
+	struct sp_chunk *next;
+	dma_addr_t dma_addr;
+	DECLARE_PCI_UNMAP_ADDR(mapping);
+	u16 head;
+	u16 shared_ptr[0];
+};
+
+struct c2_pd_table {
+	u32 last;
+	u32 max;
+	spinlock_t lock;
+	unsigned long *table;
+};
+
+struct c2_qp_table {
+	struct idr idr;
+	spinlock_t lock;
+	int last;
+};
+
+struct c2_element {
+	struct c2_element *next;
+	void *ht_desc;		/* host     descriptor */
+	void __iomem *hw_desc;	/* hardware descriptor */
+	struct sk_buff *skb;
+	dma_addr_t mapaddr;
+	u32 maplen;
+};
+
+struct c2_ring {
+	struct c2_element *to_clean;
+	struct c2_element *to_use;
+	struct c2_element *start;
+	unsigned long count;
+};
+
+struct c2_dev {
+	struct ib_device ibdev;
+	void __iomem *regs;
+	void __iomem *mmio_txp_ring; /* remapped adapter memory for hw rings */
+	void __iomem *mmio_rxp_ring;
+	spinlock_t lock;
+	struct pci_dev *pcidev;
+	struct net_device *netdev;
+	struct net_device *pseudo_netdev;
+	unsigned int cur_tx;
+	unsigned int cur_rx;
+	u32 adapter_handle;
+	int device_cap_flags;
+	void __iomem *kva;	/* KVA device memory */
+	unsigned long pa;	/* PA device memory */
+	void **qptr_array;
+
+	kmem_cache_t *host_msg_cache;
+
+	struct list_head cca_link;		/* adapter list */
+	struct list_head eh_wakeup_list;	/* event wakeup list */
+	wait_queue_head_t req_vq_wo;
+
+	/* Cached RNIC properties */
+	struct ib_device_attr props;
+
+	struct c2_pd_table pd_table;
+	struct c2_qp_table qp_table;
+	int ports;		/* num of GigE ports */
+	int devnum;
+	spinlock_t vqlock;	/* sync vbs req MQ */
+
+	/* Verbs Queues */
+	struct c2_mq req_vq;	/* Verbs Request MQ */
+	struct c2_mq rep_vq;	/* Verbs Reply MQ */
+	struct c2_mq aeq;	/* Async Events MQ */
+
+	/* Kernel client MQs */
+	struct sp_chunk *kern_mqsp_pool;
+
+	/* Device updates these values when posting messages to a host
+	 * target queue */
+	u16 req_vq_shared;
+	u16 rep_vq_shared;
+	u16 aeq_shared;
+	u16 irq_claimed;
+
+	/*
+	 * Shared host target pages for user-accessible MQs.
+	 */
+	int hthead;		/* index of first free entry */
+	void *htpages;		/* kernel vaddr */
+	int htlen;		/* length of htpages memory */
+	void *htuva;		/* user mapped vaddr */
+	spinlock_t htlock;	/* serialize allocation */
+
+	u64 adapter_hint_uva;	/* access to the activity FIFO */
+
+	//	spinlock_t aeq_lock;
+	//	spinlock_t rnic_lock;
+
+	u16 *hint_count;
+	dma_addr_t hint_count_dma;
+	u16 hints_read;
+
+	int init;		/* TRUE if it's ready */
+	char ae_cache_name[16];
+	char vq_cache_name[16];
+};
+
+struct c2_port {
+	u32 msg_enable;
+	struct c2_dev *c2dev;
+	struct net_device *netdev;
+
+	spinlock_t tx_lock;
+	u32 tx_avail;
+	struct c2_ring tx_ring;
+	struct c2_ring rx_ring;
+
+	void *mem;		/* PCI memory for host rings */
+	dma_addr_t dma;
+	unsigned long mem_size;
+
+	u32 rx_buf_size;
+
+	struct net_device_stats netstats;
+};
+
+/*
+ * Activity FIFO registers in BAR0.
+ */
+#define PCI_BAR0_HOST_HINT	0x100
+#define PCI_BAR0_ADAPTER_HINT	0x2000
+
+/*
+ * Ammasso PCI vendor id and Cepheus PCI device id.
+ */
+#define CQ_ARMED 	0x01
+#define CQ_WAIT_FOR_DMA	0x80
+
+/*
+ * The format of a hint is as follows:
+ * Lower 16 bits are the count of hints for the queue.
+ * Next 15 bits are the qp_index
+ * Upper most bit depends on who reads it:
+ *    If read by producer, then it means Full (1) or Not-Full (0)
+ *    If read by consumer, then it means Empty (1) or Not-Empty (0)
+ */
+#define C2_HINT_MAKE(q_index, hint_count) (((q_index) << 16) | hint_count)
+#define C2_HINT_GET_INDEX(hint) (((hint) & 0x7FFF0000) >> 16)
+#define C2_HINT_GET_COUNT(hint) ((hint) & 0x0000FFFF)
+
+
+/*
+ * The following defines the offset in SDRAM for the c2_adapter_pci_regs_t
+ * struct.
+ */
+#define C2_ADAPTER_PCI_REGS_OFFSET 0x10000
+
+#ifndef readq
+static inline u64 readq(const void __iomem * addr)
+{
+	u64 ret = readl(addr + 4);
+	ret <<= 32;
+	ret |= readl(addr);
+
+	return ret;
+}
+#endif
+
+#ifndef writeq
+static inline void __raw_writeq(u64 val, void __iomem * addr)
+{
+	__raw_writel((u32) (val), addr);
+	__raw_writel((u32) (val >> 32), (addr + 4));
+}
+#endif
+
+#define C2_SET_CUR_RX(c2dev, cur_rx) \
+	__raw_writel(cpu_to_be32(cur_rx), c2dev->mmio_txp_ring + 4092)
+
+#define C2_GET_CUR_RX(c2dev) \
+	be32_to_cpu(readl(c2dev->mmio_txp_ring + 4092))
+
+static inline struct c2_dev *to_c2dev(struct ib_device *ibdev)
+{
+	return container_of(ibdev, struct c2_dev, ibdev);
+}
+
+static inline int c2_errno(void *reply)
+{
+	switch (c2_wr_get_result(reply)) {
+	case C2_OK:
+		return 0;
+	case CCERR_NO_BUFS:
+	case CCERR_INSUFFICIENT_RESOURCES:
+	case CCERR_ZERO_RDMA_READ_RESOURCES:
+		return -ENOMEM;
+	case CCERR_MR_IN_USE:
+	case CCERR_QP_IN_USE:
+		return -EBUSY;
+	case CCERR_ADDR_IN_USE:
+		return -EADDRINUSE;
+	case CCERR_ADDR_NOT_AVAIL:
+		return -EADDRNOTAVAIL;
+	case CCERR_CONN_RESET:
+		return -ECONNRESET;
+	case CCERR_NOT_IMPLEMENTED:
+	case CCERR_INVALID_WQE:
+		return -ENOSYS;
+	case CCERR_QP_NOT_PRIVILEGED:
+		return -EPERM;
+	case CCERR_STACK_ERROR:
+		return -EPROTO;
+	case CCERR_ACCESS_VIOLATION:
+	case CCERR_BASE_AND_BOUNDS_VIOLATION:
+		return -EFAULT;
+	case CCERR_STAG_STATE_NOT_INVALID:
+	case CCERR_INVALID_ADDRESS:
+	case CCERR_INVALID_CQ:
+	case CCERR_INVALID_EP:
+	case CCERR_INVALID_MODIFIER:
+	case CCERR_INVALID_MTU:
+	case CCERR_INVALID_PD_ID:
+	case CCERR_INVALID_QP:
+	case CCERR_INVALID_RNIC:
+	case CCERR_INVALID_STAG:
+		return -EINVAL;
+	default:
+		return -EAGAIN;
+	}
+}
+
+/* Device */
+extern int c2_register_device(struct c2_dev *c2dev);
+extern void c2_unregister_device(struct c2_dev *c2dev);
+extern int c2_rnic_init(struct c2_dev *c2dev);
+extern void c2_rnic_term(struct c2_dev *c2dev);
+extern void c2_rnic_interrupt(struct c2_dev *c2dev);
+extern int c2_del_addr(struct c2_dev *c2dev, u32 inaddr, u32 inmask);
+extern int c2_add_addr(struct c2_dev *c2dev, u32 inaddr, u32 inmask);
+
+/* QPs */
+extern int c2_alloc_qp(struct c2_dev *c2dev, struct c2_pd *pd,
+		       struct ib_qp_init_attr *qp_attrs, struct c2_qp *qp);
+extern void c2_free_qp(struct c2_dev *c2dev, struct c2_qp *qp);
+extern struct ib_qp *c2_get_qp(struct ib_device *device, int qpn);
+extern int c2_qp_modify(struct c2_dev *c2dev, struct c2_qp *qp,
+			struct ib_qp_attr *attr, int attr_mask);
+extern int c2_qp_set_read_limits(struct c2_dev *c2dev, struct c2_qp *qp,
+				 int ord, int ird);
+extern int c2_post_send(struct ib_qp *ibqp, struct ib_send_wr *ib_wr,
+			struct ib_send_wr **bad_wr);
+extern int c2_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *ib_wr,
+			   struct ib_recv_wr **bad_wr);
+extern void __devinit c2_init_qp_table(struct c2_dev *c2dev);
+extern void __devexit c2_cleanup_qp_table(struct c2_dev *c2dev);
+extern void c2_set_qp_state(struct c2_qp *, int);
+extern struct c2_qp *c2_find_qpn(struct c2_dev *c2dev, int qpn);
+
+/* PDs */
+extern int c2_pd_alloc(struct c2_dev *c2dev, int privileged, struct c2_pd *pd);
+extern void c2_pd_free(struct c2_dev *c2dev, struct c2_pd *pd);
+extern int __devinit c2_init_pd_table(struct c2_dev *c2dev);
+extern void __devexit c2_cleanup_pd_table(struct c2_dev *c2dev);
+
+/* CQs */
+extern int c2_init_cq(struct c2_dev *c2dev, int entries,
+		      struct c2_ucontext *ctx, struct c2_cq *cq);
+extern void c2_free_cq(struct c2_dev *c2dev, struct c2_cq *cq);
+extern void c2_cq_event(struct c2_dev *c2dev, u32 mq_index);
+extern void c2_cq_clean(struct c2_dev *c2dev, struct c2_qp *qp, u32 mq_index);
+extern int c2_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry);
+extern int c2_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify notify);
+
+/* CM */
+extern int c2_llp_connect(struct iw_cm_id *cm_id,
+			  struct iw_cm_conn_param *iw_param);
+extern int c2_llp_accept(struct iw_cm_id *cm_id,
+			 struct iw_cm_conn_param *iw_param);
+extern int c2_llp_reject(struct iw_cm_id *cm_id, const void *pdata,
+			 u8 pdata_len);
+extern int c2_llp_service_create(struct iw_cm_id *cm_id, int backlog);
+extern int c2_llp_service_destroy(struct iw_cm_id *cm_id);
+
+/* MM */
+extern int c2_nsmr_register_phys_kern(struct c2_dev *c2dev, u64 *addr_list,
+ 				      int page_size, int pbl_depth, u32 length,
+ 				      u32 off, u64 *va, enum c2_acf acf,
+				      struct c2_mr *mr);
+extern int c2_stag_dealloc(struct c2_dev *c2dev, u32 stag_index);
+
+/* AE */
+extern void c2_ae_event(struct c2_dev *c2dev, u32 mq_index);
+
+/* MQSP Allocator */
+extern int c2_init_mqsp_pool(struct c2_dev *c2dev, gfp_t gfp_mask,
+			     struct sp_chunk **root);
+extern void c2_free_mqsp_pool(struct c2_dev *c2dev, struct sp_chunk *root);
+extern u16 *c2_alloc_mqsp(struct c2_dev *c2dev, struct sp_chunk *head,
+			  dma_addr_t *dma_addr, gfp_t gfp_mask);
+extern void c2_free_mqsp(u16 * mqsp);
+#endif
diff --git a/drivers/infiniband/hw/amso1100/c2_ae.c b/drivers/infiniband/hw/amso1100/c2_ae.c
new file mode 100644
index 00000000000..08f46c83a3a
--- /dev/null
+++ b/drivers/infiniband/hw/amso1100/c2_ae.c
@@ -0,0 +1,321 @@
+/*
+ * Copyright (c) 2005 Ammasso, Inc. All rights reserved.
+ * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "c2.h"
+#include <rdma/iw_cm.h>
+#include "c2_status.h"
+#include "c2_ae.h"
+
+static int c2_convert_cm_status(u32 c2_status)
+{
+	switch (c2_status) {
+	case C2_CONN_STATUS_SUCCESS:
+		return 0;
+	case C2_CONN_STATUS_REJECTED:
+		return -ENETRESET;
+	case C2_CONN_STATUS_REFUSED:
+		return -ECONNREFUSED;
+	case C2_CONN_STATUS_TIMEDOUT:
+		return -ETIMEDOUT;
+	case C2_CONN_STATUS_NETUNREACH:
+		return -ENETUNREACH;
+	case C2_CONN_STATUS_HOSTUNREACH:
+		return -EHOSTUNREACH;
+	case C2_CONN_STATUS_INVALID_RNIC:
+		return -EINVAL;
+	case C2_CONN_STATUS_INVALID_QP:
+		return -EINVAL;
+	case C2_CONN_STATUS_INVALID_QP_STATE:
+		return -EINVAL;
+	case C2_CONN_STATUS_ADDR_NOT_AVAIL:
+		return -EADDRNOTAVAIL;
+	default:
+		printk(KERN_ERR PFX
+		       "%s - Unable to convert CM status: %d\n",
+		       __FUNCTION__, c2_status);
+		return -EIO;
+	}
+}
+
+#ifdef DEBUG
+static const char* to_event_str(int event)
+{
+	static const char* event_str[] = {
+		"CCAE_REMOTE_SHUTDOWN",
+		"CCAE_ACTIVE_CONNECT_RESULTS",
+		"CCAE_CONNECTION_REQUEST",
+		"CCAE_LLP_CLOSE_COMPLETE",
+		"CCAE_TERMINATE_MESSAGE_RECEIVED",
+		"CCAE_LLP_CONNECTION_RESET",
+		"CCAE_LLP_CONNECTION_LOST",
+		"CCAE_LLP_SEGMENT_SIZE_INVALID",
+		"CCAE_LLP_INVALID_CRC",
+		"CCAE_LLP_BAD_FPDU",
+		"CCAE_INVALID_DDP_VERSION",
+		"CCAE_INVALID_RDMA_VERSION",
+		"CCAE_UNEXPECTED_OPCODE",
+		"CCAE_INVALID_DDP_QUEUE_NUMBER",
+		"CCAE_RDMA_READ_NOT_ENABLED",
+		"CCAE_RDMA_WRITE_NOT_ENABLED",
+		"CCAE_RDMA_READ_TOO_SMALL",
+		"CCAE_NO_L_BIT",
+		"CCAE_TAGGED_INVALID_STAG",
+		"CCAE_TAGGED_BASE_BOUNDS_VIOLATION",
+		"CCAE_TAGGED_ACCESS_RIGHTS_VIOLATION",
+		"CCAE_TAGGED_INVALID_PD",
+		"CCAE_WRAP_ERROR",
+		"CCAE_BAD_CLOSE",
+		"CCAE_BAD_LLP_CLOSE",
+		"CCAE_INVALID_MSN_RANGE",
+		"CCAE_INVALID_MSN_GAP",
+		"CCAE_IRRQ_OVERFLOW",
+		"CCAE_IRRQ_MSN_GAP",
+		"CCAE_IRRQ_MSN_RANGE",
+		"CCAE_IRRQ_INVALID_STAG",
+		"CCAE_IRRQ_BASE_BOUNDS_VIOLATION",
+		"CCAE_IRRQ_ACCESS_RIGHTS_VIOLATION",
+		"CCAE_IRRQ_INVALID_PD",
+		"CCAE_IRRQ_WRAP_ERROR",
+		"CCAE_CQ_SQ_COMPLETION_OVERFLOW",
+		"CCAE_CQ_RQ_COMPLETION_ERROR",
+		"CCAE_QP_SRQ_WQE_ERROR",
+		"CCAE_QP_LOCAL_CATASTROPHIC_ERROR",
+		"CCAE_CQ_OVERFLOW",
+		"CCAE_CQ_OPERATION_ERROR",
+		"CCAE_SRQ_LIMIT_REACHED",
+		"CCAE_QP_RQ_LIMIT_REACHED",
+		"CCAE_SRQ_CATASTROPHIC_ERROR",
+		"CCAE_RNIC_CATASTROPHIC_ERROR"
+	};
+
+	if (event < CCAE_REMOTE_SHUTDOWN ||
+	    event > CCAE_RNIC_CATASTROPHIC_ERROR)
+		return "<invalid event>";
+
+	event -= CCAE_REMOTE_SHUTDOWN;
+	return event_str[event];
+}
+
+static const char *to_qp_state_str(int state)
+{
+	switch (state) {
+	case C2_QP_STATE_IDLE:
+		return "C2_QP_STATE_IDLE";
+	case C2_QP_STATE_CONNECTING:
+		return "C2_QP_STATE_CONNECTING";
+	case C2_QP_STATE_RTS:
+		return "C2_QP_STATE_RTS";
+	case C2_QP_STATE_CLOSING:
+		return "C2_QP_STATE_CLOSING";
+	case C2_QP_STATE_TERMINATE:
+		return "C2_QP_STATE_TERMINATE";
+	case C2_QP_STATE_ERROR:
+		return "C2_QP_STATE_ERROR";
+	default:
+		return "<invalid QP state>";
+	};
+}
+#endif
+
+void c2_ae_event(struct c2_dev *c2dev, u32 mq_index)
+{
+	struct c2_mq *mq = c2dev->qptr_array[mq_index];
+	union c2wr *wr;
+	void *resource_user_context;
+	struct iw_cm_event cm_event;
+	struct ib_event ib_event;
+	enum c2_resource_indicator resource_indicator;
+	enum c2_event_id event_id;
+	unsigned long flags;
+	int status;
+
+	/*
+	 * retreive the message
+	 */
+	wr = c2_mq_consume(mq);
+	if (!wr)
+		return;
+
+	memset(&ib_event, 0, sizeof(ib_event));
+	memset(&cm_event, 0, sizeof(cm_event));
+
+	event_id = c2_wr_get_id(wr);
+	resource_indicator = be32_to_cpu(wr->ae.ae_generic.resource_type);
+	resource_user_context =
+	    (void *) (unsigned long) wr->ae.ae_generic.user_context;
+
+	status = cm_event.status = c2_convert_cm_status(c2_wr_get_result(wr));
+
+	pr_debug("event received c2_dev=%p, event_id=%d, "
+		"resource_indicator=%d, user_context=%p, status = %d\n",
+		c2dev, event_id, resource_indicator, resource_user_context,
+		status);
+
+	switch (resource_indicator) {
+	case C2_RES_IND_QP:{
+
+		struct c2_qp *qp = (struct c2_qp *)resource_user_context;
+		struct iw_cm_id *cm_id = qp->cm_id;
+		struct c2wr_ae_active_connect_results *res;
+
+		if (!cm_id) {
+			pr_debug("event received, but cm_id is <nul>, qp=%p!\n",
+				qp);
+			goto ignore_it;
+		}
+		pr_debug("%s: event = %s, user_context=%llx, "
+			"resource_type=%x, "
+			"resource=%x, qp_state=%s\n",
+			__FUNCTION__,
+			to_event_str(event_id),
+			be64_to_cpu(wr->ae.ae_generic.user_context),
+			be32_to_cpu(wr->ae.ae_generic.resource_type),
+			be32_to_cpu(wr->ae.ae_generic.resource),
+			to_qp_state_str(be32_to_cpu(wr->ae.ae_generic.qp_state)));
+
+		c2_set_qp_state(qp, be32_to_cpu(wr->ae.ae_generic.qp_state));
+
+		switch (event_id) {
+		case CCAE_ACTIVE_CONNECT_RESULTS:
+			res = &wr->ae.ae_active_connect_results;
+			cm_event.event = IW_CM_EVENT_CONNECT_REPLY;
+			cm_event.local_addr.sin_addr.s_addr = res->laddr;
+			cm_event.remote_addr.sin_addr.s_addr = res->raddr;
+			cm_event.local_addr.sin_port = res->lport;
+			cm_event.remote_addr.sin_port =	res->rport;
+			if (status == 0) {
+				cm_event.private_data_len =
+					be32_to_cpu(res->private_data_length);
+				cm_event.private_data = res->private_data;
+			} else {
+				spin_lock_irqsave(&qp->lock, flags);
+				if (qp->cm_id) {
+					qp->cm_id->rem_ref(qp->cm_id);
+					qp->cm_id = NULL;
+				}
+				spin_unlock_irqrestore(&qp->lock, flags);
+				cm_event.private_data_len = 0;
+				cm_event.private_data = NULL;
+			}
+			if (cm_id->event_handler)
+				cm_id->event_handler(cm_id, &cm_event);
+			break;
+		case CCAE_TERMINATE_MESSAGE_RECEIVED:
+		case CCAE_CQ_SQ_COMPLETION_OVERFLOW:
+			ib_event.device = &c2dev->ibdev;
+			ib_event.element.qp = &qp->ibqp;
+			ib_event.event = IB_EVENT_QP_REQ_ERR;
+
+			if (qp->ibqp.event_handler)
+				qp->ibqp.event_handler(&ib_event,
+						       qp->ibqp.
+						       qp_context);
+			break;
+		case CCAE_BAD_CLOSE:
+		case CCAE_LLP_CLOSE_COMPLETE:
+		case CCAE_LLP_CONNECTION_RESET:
+		case CCAE_LLP_CONNECTION_LOST:
+			BUG_ON(cm_id->event_handler==(void*)0x6b6b6b6b);
+
+			spin_lock_irqsave(&qp->lock, flags);
+			if (qp->cm_id) {
+				qp->cm_id->rem_ref(qp->cm_id);
+				qp->cm_id = NULL;
+			}
+			spin_unlock_irqrestore(&qp->lock, flags);
+			cm_event.event = IW_CM_EVENT_CLOSE;
+			cm_event.status = 0;
+			if (cm_id->event_handler)
+				cm_id->event_handler(cm_id, &cm_event);
+			break;
+		default:
+			BUG_ON(1);
+			pr_debug("%s:%d Unexpected event_id=%d on QP=%p, "
+				"CM_ID=%p\n",
+				__FUNCTION__, __LINE__,
+				event_id, qp, cm_id);
+			break;
+		}
+		break;
+	}
+
+	case C2_RES_IND_EP:{
+
+		struct c2wr_ae_connection_request *req =
+			&wr->ae.ae_connection_request;
+		struct iw_cm_id *cm_id =
+			(struct iw_cm_id *)resource_user_context;
+
+		pr_debug("C2_RES_IND_EP event_id=%d\n", event_id);
+		if (event_id != CCAE_CONNECTION_REQUEST) {
+			pr_debug("%s: Invalid event_id: %d\n",
+				__FUNCTION__, event_id);
+			break;
+		}
+		cm_event.event = IW_CM_EVENT_CONNECT_REQUEST;
+		cm_event.provider_data = (void*)(unsigned long)req->cr_handle;
+		cm_event.local_addr.sin_addr.s_addr = req->laddr;
+		cm_event.remote_addr.sin_addr.s_addr = req->raddr;
+		cm_event.local_addr.sin_port = req->lport;
+		cm_event.remote_addr.sin_port = req->rport;
+		cm_event.private_data_len =
+			be32_to_cpu(req->private_data_length);
+		cm_event.private_data = req->private_data;
+
+		if (cm_id->event_handler)
+			cm_id->event_handler(cm_id, &cm_event);
+		break;
+	}
+
+	case C2_RES_IND_CQ:{
+		struct c2_cq *cq =
+		    (struct c2_cq *) resource_user_context;
+
+		pr_debug("IB_EVENT_CQ_ERR\n");
+		ib_event.device = &c2dev->ibdev;
+		ib_event.element.cq = &cq->ibcq;
+		ib_event.event = IB_EVENT_CQ_ERR;
+
+		if (cq->ibcq.event_handler)
+			cq->ibcq.event_handler(&ib_event,
+					       cq->ibcq.cq_context);
+	}
+
+	default:
+		printk("Bad resource indicator = %d\n",
+		       resource_indicator);
+		break;
+	}
+
+ ignore_it:
+	c2_mq_free(mq);
+}
diff --git a/drivers/infiniband/hw/amso1100/c2_ae.h b/drivers/infiniband/hw/amso1100/c2_ae.h
new file mode 100644
index 00000000000..3a065c33b83
--- /dev/null
+++ b/drivers/infiniband/hw/amso1100/c2_ae.h
@@ -0,0 +1,108 @@
+/*
+ * Copyright (c) 2005 Ammasso, Inc. All rights reserved.
+ * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef _C2_AE_H_
+#define _C2_AE_H_
+
+/*
+ * WARNING: If you change this file, also bump C2_IVN_BASE
+ * in common/include/clustercore/c2_ivn.h.
+ */
+
+/*
+ * Asynchronous Event Identifiers
+ *
+ * These start at 0x80 only so it's obvious from inspection that
+ * they are not work-request statuses.  This isn't critical.
+ *
+ * NOTE: these event id's must fit in eight bits.
+ */
+enum c2_event_id {
+	CCAE_REMOTE_SHUTDOWN = 0x80,
+	CCAE_ACTIVE_CONNECT_RESULTS,
+	CCAE_CONNECTION_REQUEST,
+	CCAE_LLP_CLOSE_COMPLETE,
+	CCAE_TERMINATE_MESSAGE_RECEIVED,
+	CCAE_LLP_CONNECTION_RESET,
+	CCAE_LLP_CONNECTION_LOST,
+	CCAE_LLP_SEGMENT_SIZE_INVALID,
+	CCAE_LLP_INVALID_CRC,
+	CCAE_LLP_BAD_FPDU,
+	CCAE_INVALID_DDP_VERSION,
+	CCAE_INVALID_RDMA_VERSION,
+	CCAE_UNEXPECTED_OPCODE,
+	CCAE_INVALID_DDP_QUEUE_NUMBER,
+	CCAE_RDMA_READ_NOT_ENABLED,
+	CCAE_RDMA_WRITE_NOT_ENABLED,
+	CCAE_RDMA_READ_TOO_SMALL,
+	CCAE_NO_L_BIT,
+	CCAE_TAGGED_INVALID_STAG,
+	CCAE_TAGGED_BASE_BOUNDS_VIOLATION,
+	CCAE_TAGGED_ACCESS_RIGHTS_VIOLATION,
+	CCAE_TAGGED_INVALID_PD,
+	CCAE_WRAP_ERROR,
+	CCAE_BAD_CLOSE,
+	CCAE_BAD_LLP_CLOSE,
+	CCAE_INVALID_MSN_RANGE,
+	CCAE_INVALID_MSN_GAP,
+	CCAE_IRRQ_OVERFLOW,
+	CCAE_IRRQ_MSN_GAP,
+	CCAE_IRRQ_MSN_RANGE,
+	CCAE_IRRQ_INVALID_STAG,
+	CCAE_IRRQ_BASE_BOUNDS_VIOLATION,
+	CCAE_IRRQ_ACCESS_RIGHTS_VIOLATION,
+	CCAE_IRRQ_INVALID_PD,
+	CCAE_IRRQ_WRAP_ERROR,
+	CCAE_CQ_SQ_COMPLETION_OVERFLOW,
+	CCAE_CQ_RQ_COMPLETION_ERROR,
+	CCAE_QP_SRQ_WQE_ERROR,
+	CCAE_QP_LOCAL_CATASTROPHIC_ERROR,
+	CCAE_CQ_OVERFLOW,
+	CCAE_CQ_OPERATION_ERROR,
+	CCAE_SRQ_LIMIT_REACHED,
+	CCAE_QP_RQ_LIMIT_REACHED,
+	CCAE_SRQ_CATASTROPHIC_ERROR,
+	CCAE_RNIC_CATASTROPHIC_ERROR
+/* WARNING If you add more id's, make sure their values fit in eight bits. */
+};
+
+/*
+ * Resource Indicators and Identifiers
+ */
+enum c2_resource_indicator {
+	C2_RES_IND_QP = 1,
+	C2_RES_IND_EP,
+	C2_RES_IND_CQ,
+	C2_RES_IND_SRQ,
+};
+
+#endif /* _C2_AE_H_ */
diff --git a/drivers/infiniband/hw/amso1100/c2_alloc.c b/drivers/infiniband/hw/amso1100/c2_alloc.c
new file mode 100644
index 00000000000..1d2529992c0
--- /dev/null
+++ b/drivers/infiniband/hw/amso1100/c2_alloc.c
@@ -0,0 +1,144 @@
+/*
+ * Copyright (c) 2004 Topspin Communications.  All rights reserved.
+ * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/errno.h>
+#include <linux/slab.h>
+#include <linux/bitmap.h>
+
+#include "c2.h"
+
+static int c2_alloc_mqsp_chunk(struct c2_dev *c2dev, gfp_t gfp_mask,
+			       struct sp_chunk **head)
+{
+	int i;
+	struct sp_chunk *new_head;
+
+	new_head = (struct sp_chunk *) __get_free_page(gfp_mask);
+	if (new_head == NULL)
+		return -ENOMEM;
+
+	new_head->dma_addr = dma_map_single(c2dev->ibdev.dma_device, new_head,
+					    PAGE_SIZE, DMA_FROM_DEVICE);
+	pci_unmap_addr_set(new_head, mapping, new_head->dma_addr);
+
+	new_head->next = NULL;
+	new_head->head = 0;
+
+	/* build list where each index is the next free slot */
+	for (i = 0;
+	     i < (PAGE_SIZE - sizeof(struct sp_chunk) -
+		  sizeof(u16)) / sizeof(u16) - 1;
+	     i++) {
+		new_head->shared_ptr[i] = i + 1;
+	}
+	/* terminate list */
+	new_head->shared_ptr[i] = 0xFFFF;
+
+	*head = new_head;
+	return 0;
+}
+
+int c2_init_mqsp_pool(struct c2_dev *c2dev, gfp_t gfp_mask,
+		      struct sp_chunk **root)
+{
+	return c2_alloc_mqsp_chunk(c2dev, gfp_mask, root);
+}
+
+void c2_free_mqsp_pool(struct c2_dev *c2dev, struct sp_chunk *root)
+{
+	struct sp_chunk *next;
+
+	while (root) {
+		next = root->next;
+		dma_unmap_single(c2dev->ibdev.dma_device,
+				 pci_unmap_addr(root, mapping), PAGE_SIZE,
+			         DMA_FROM_DEVICE);
+		__free_page((struct page *) root);
+		root = next;
+	}
+}
+
+u16 *c2_alloc_mqsp(struct c2_dev *c2dev, struct sp_chunk *head,
+		   dma_addr_t *dma_addr, gfp_t gfp_mask)
+{
+	u16 mqsp;
+
+	while (head) {
+		mqsp = head->head;
+		if (mqsp != 0xFFFF) {
+			head->head = head->shared_ptr[mqsp];
+			break;
+		} else if (head->next == NULL) {
+			if (c2_alloc_mqsp_chunk(c2dev, gfp_mask, &head->next) ==
+			    0) {
+				head = head->next;
+				mqsp = head->head;
+				head->head = head->shared_ptr[mqsp];
+				break;
+			} else
+				return NULL;
+		} else
+			head = head->next;
+	}
+	if (head) {
+		*dma_addr = head->dma_addr +
+			    ((unsigned long) &(head->shared_ptr[mqsp]) -
+			     (unsigned long) head);
+		pr_debug("%s addr %p dma_addr %llx\n", __FUNCTION__,
+			 &(head->shared_ptr[mqsp]), (u64)*dma_addr);
+		return &(head->shared_ptr[mqsp]);
+	}
+	return NULL;
+}
+
+void c2_free_mqsp(u16 * mqsp)
+{
+	struct sp_chunk *head;
+	u16 idx;
+
+	/* The chunk containing this ptr begins at the page boundary */
+	head = (struct sp_chunk *) ((unsigned long) mqsp & PAGE_MASK);
+
+	/* Link head to new mqsp */
+	*mqsp = head->head;
+
+	/* Compute the shared_ptr index */
+	idx = ((unsigned long) mqsp & ~PAGE_MASK) >> 1;
+	idx -= (unsigned long) &(((struct sp_chunk *) 0)->shared_ptr[0]) >> 1;
+
+	/* Point this index at the head */
+	head->shared_ptr[idx] = head->head;
+
+	/* Point head at this index */
+	head->head = idx;
+}
diff --git a/drivers/infiniband/hw/amso1100/c2_cm.c b/drivers/infiniband/hw/amso1100/c2_cm.c
new file mode 100644
index 00000000000..485254efdd1
--- /dev/null
+++ b/drivers/infiniband/hw/amso1100/c2_cm.c
@@ -0,0 +1,452 @@
+/*
+ * Copyright (c) 2005 Ammasso, Inc.  All rights reserved.
+ * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+#include "c2.h"
+#include "c2_wr.h"
+#include "c2_vq.h"
+#include <rdma/iw_cm.h>
+
+int c2_llp_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *iw_param)
+{
+	struct c2_dev *c2dev = to_c2dev(cm_id->device);
+	struct ib_qp *ibqp;
+	struct c2_qp *qp;
+	struct c2wr_qp_connect_req *wr;	/* variable size needs a malloc. */
+	struct c2_vq_req *vq_req;
+	int err;
+
+	ibqp = c2_get_qp(cm_id->device, iw_param->qpn);
+	if (!ibqp)
+		return -EINVAL;
+	qp = to_c2qp(ibqp);
+
+	/* Associate QP <--> CM_ID */
+	cm_id->provider_data = qp;
+	cm_id->add_ref(cm_id);
+	qp->cm_id = cm_id;
+
+	/*
+	 * only support the max private_data length
+	 */
+	if (iw_param->private_data_len > C2_MAX_PRIVATE_DATA_SIZE) {
+		err = -EINVAL;
+		goto bail0;
+	}
+	/*
+	 * Set the rdma read limits
+	 */
+	err = c2_qp_set_read_limits(c2dev, qp, iw_param->ord, iw_param->ird);
+	if (err)
+		goto bail0;
+
+	/*
+	 * Create and send a WR_QP_CONNECT...
+	 */
+	wr = kmalloc(c2dev->req_vq.msg_size, GFP_KERNEL);
+	if (!wr) {
+		err = -ENOMEM;
+		goto bail0;
+	}
+
+	vq_req = vq_req_alloc(c2dev);
+	if (!vq_req) {
+		err = -ENOMEM;
+		goto bail1;
+	}
+
+	c2_wr_set_id(wr, CCWR_QP_CONNECT);
+	wr->hdr.context = 0;
+	wr->rnic_handle = c2dev->adapter_handle;
+	wr->qp_handle = qp->adapter_handle;
+
+	wr->remote_addr = cm_id->remote_addr.sin_addr.s_addr;
+	wr->remote_port = cm_id->remote_addr.sin_port;
+
+	/*
+	 * Move any private data from the callers's buf into
+	 * the WR.
+	 */
+	if (iw_param->private_data) {
+		wr->private_data_length =
+			cpu_to_be32(iw_param->private_data_len);
+		memcpy(&wr->private_data[0], iw_param->private_data,
+		       iw_param->private_data_len);
+	} else
+		wr->private_data_length = 0;
+
+	/*
+	 * Send WR to adapter.  NOTE: There is no synch reply from
+	 * the adapter.
+	 */
+	err = vq_send_wr(c2dev, (union c2wr *) wr);
+	vq_req_free(c2dev, vq_req);
+
+ bail1:
+	kfree(wr);
+ bail0:
+	if (err) {
+		/*
+		 * If we fail, release reference on QP and
+		 * disassociate QP from CM_ID
+		 */
+		cm_id->provider_data = NULL;
+		qp->cm_id = NULL;
+		cm_id->rem_ref(cm_id);
+	}
+	return err;
+}
+
+int c2_llp_service_create(struct iw_cm_id *cm_id, int backlog)
+{
+	struct c2_dev *c2dev;
+	struct c2wr_ep_listen_create_req wr;
+	struct c2wr_ep_listen_create_rep *reply;
+	struct c2_vq_req *vq_req;
+	int err;
+
+	c2dev = to_c2dev(cm_id->device);
+	if (c2dev == NULL)
+		return -EINVAL;
+
+	/*
+	 * Allocate verbs request.
+	 */
+	vq_req = vq_req_alloc(c2dev);
+	if (!vq_req)
+		return -ENOMEM;
+
+	/*
+	 * Build the WR
+	 */
+	c2_wr_set_id(&wr, CCWR_EP_LISTEN_CREATE);
+	wr.hdr.context = (u64) (unsigned long) vq_req;
+	wr.rnic_handle = c2dev->adapter_handle;
+	wr.local_addr = cm_id->local_addr.sin_addr.s_addr;
+	wr.local_port = cm_id->local_addr.sin_port;
+	wr.backlog = cpu_to_be32(backlog);
+	wr.user_context = (u64) (unsigned long) cm_id;
+
+	/*
+	 * Reference the request struct.  Dereferenced in the int handler.
+	 */
+	vq_req_get(c2dev, vq_req);
+
+	/*
+	 * Send WR to adapter
+	 */
+	err = vq_send_wr(c2dev, (union c2wr *) & wr);
+	if (err) {
+		vq_req_put(c2dev, vq_req);
+		goto bail0;
+	}
+
+	/*
+	 * Wait for reply from adapter
+	 */
+	err = vq_wait_for_reply(c2dev, vq_req);
+	if (err)
+		goto bail0;
+
+	/*
+	 * Process reply
+	 */
+	reply =
+	    (struct c2wr_ep_listen_create_rep *) (unsigned long) vq_req->reply_msg;
+	if (!reply) {
+		err = -ENOMEM;
+		goto bail1;
+	}
+
+	if ((err = c2_errno(reply)) != 0)
+		goto bail1;
+
+	/*
+	 * Keep the adapter handle. Used in subsequent destroy
+	 */
+	cm_id->provider_data = (void*)(unsigned long) reply->ep_handle;
+
+	/*
+	 * free vq stuff
+	 */
+	vq_repbuf_free(c2dev, reply);
+	vq_req_free(c2dev, vq_req);
+
+	return 0;
+
+ bail1:
+	vq_repbuf_free(c2dev, reply);
+ bail0:
+	vq_req_free(c2dev, vq_req);
+	return err;
+}
+
+
+int c2_llp_service_destroy(struct iw_cm_id *cm_id)
+{
+
+	struct c2_dev *c2dev;
+	struct c2wr_ep_listen_destroy_req wr;
+	struct c2wr_ep_listen_destroy_rep *reply;
+	struct c2_vq_req *vq_req;
+	int err;
+
+	c2dev = to_c2dev(cm_id->device);
+	if (c2dev == NULL)
+		return -EINVAL;
+
+	/*
+	 * Allocate verbs request.
+	 */
+	vq_req = vq_req_alloc(c2dev);
+	if (!vq_req)
+		return -ENOMEM;
+
+	/*
+	 * Build the WR
+	 */
+	c2_wr_set_id(&wr, CCWR_EP_LISTEN_DESTROY);
+	wr.hdr.context = (unsigned long) vq_req;
+	wr.rnic_handle = c2dev->adapter_handle;
+	wr.ep_handle = (u32)(unsigned long)cm_id->provider_data;
+
+	/*
+	 * reference the request struct.  dereferenced in the int handler.
+	 */
+	vq_req_get(c2dev, vq_req);
+
+	/*
+	 * Send WR to adapter
+	 */
+	err = vq_send_wr(c2dev, (union c2wr *) & wr);
+	if (err) {
+		vq_req_put(c2dev, vq_req);
+		goto bail0;
+	}
+
+	/*
+	 * Wait for reply from adapter
+	 */
+	err = vq_wait_for_reply(c2dev, vq_req);
+	if (err)
+		goto bail0;
+
+	/*
+	 * Process reply
+	 */
+	reply=(struct c2wr_ep_listen_destroy_rep *)(unsigned long)vq_req->reply_msg;
+	if (!reply) {
+		err = -ENOMEM;
+		goto bail0;
+	}
+	if ((err = c2_errno(reply)) != 0)
+		goto bail1;
+
+ bail1:
+	vq_repbuf_free(c2dev, reply);
+ bail0:
+	vq_req_free(c2dev, vq_req);
+	return err;
+}
+
+int c2_llp_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *iw_param)
+{
+	struct c2_dev *c2dev = to_c2dev(cm_id->device);
+	struct c2_qp *qp;
+	struct ib_qp *ibqp;
+	struct c2wr_cr_accept_req *wr;	/* variable length WR */
+	struct c2_vq_req *vq_req;
+	struct c2wr_cr_accept_rep *reply;	/* VQ Reply msg ptr. */
+	int err;
+
+	ibqp = c2_get_qp(cm_id->device, iw_param->qpn);
+	if (!ibqp)
+		return -EINVAL;
+	qp = to_c2qp(ibqp);
+
+	/* Set the RDMA read limits */
+	err = c2_qp_set_read_limits(c2dev, qp, iw_param->ord, iw_param->ird);
+	if (err)
+		goto bail0;
+
+	/* Allocate verbs request. */
+	vq_req = vq_req_alloc(c2dev);
+	if (!vq_req) {
+		err = -ENOMEM;
+		goto bail1;
+	}
+	vq_req->qp = qp;
+	vq_req->cm_id = cm_id;
+	vq_req->event = IW_CM_EVENT_ESTABLISHED;
+
+	wr = kmalloc(c2dev->req_vq.msg_size, GFP_KERNEL);
+	if (!wr) {
+		err = -ENOMEM;
+		goto bail2;
+	}
+
+	/* Build the WR */
+	c2_wr_set_id(wr, CCWR_CR_ACCEPT);
+	wr->hdr.context = (unsigned long) vq_req;
+	wr->rnic_handle = c2dev->adapter_handle;
+	wr->ep_handle = (u32) (unsigned long) cm_id->provider_data;
+	wr->qp_handle = qp->adapter_handle;
+
+	/* Replace the cr_handle with the QP after accept */
+	cm_id->provider_data = qp;
+	cm_id->add_ref(cm_id);
+	qp->cm_id = cm_id;
+
+	cm_id->provider_data = qp;
+
+	/* Validate private_data length */
+	if (iw_param->private_data_len > C2_MAX_PRIVATE_DATA_SIZE) {
+		err = -EINVAL;
+		goto bail2;
+	}
+
+	if (iw_param->private_data) {
+		wr->private_data_length = cpu_to_be32(iw_param->private_data_len);
+		memcpy(&wr->private_data[0],
+		       iw_param->private_data, iw_param->private_data_len);
+	} else
+		wr->private_data_length = 0;
+
+	/* Reference the request struct.  Dereferenced in the int handler. */
+	vq_req_get(c2dev, vq_req);
+
+	/* Send WR to adapter */
+	err = vq_send_wr(c2dev, (union c2wr *) wr);
+	if (err) {
+		vq_req_put(c2dev, vq_req);
+		goto bail2;
+	}
+
+	/* Wait for reply from adapter */
+	err = vq_wait_for_reply(c2dev, vq_req);
+	if (err)
+		goto bail2;
+
+	/* Check that reply is present */
+	reply = (struct c2wr_cr_accept_rep *) (unsigned long) vq_req->reply_msg;
+	if (!reply) {
+		err = -ENOMEM;
+		goto bail2;
+	}
+
+	err = c2_errno(reply);
+	vq_repbuf_free(c2dev, reply);
+
+	if (!err)
+		c2_set_qp_state(qp, C2_QP_STATE_RTS);
+ bail2:
+	kfree(wr);
+ bail1:
+	vq_req_free(c2dev, vq_req);
+ bail0:
+	if (err) {
+		/*
+		 * If we fail, release reference on QP and
+		 * disassociate QP from CM_ID
+		 */
+		cm_id->provider_data = NULL;
+		qp->cm_id = NULL;
+		cm_id->rem_ref(cm_id);
+	}
+	return err;
+}
+
+int c2_llp_reject(struct iw_cm_id *cm_id, const void *pdata, u8 pdata_len)
+{
+	struct c2_dev *c2dev;
+	struct c2wr_cr_reject_req wr;
+	struct c2_vq_req *vq_req;
+	struct c2wr_cr_reject_rep *reply;
+	int err;
+
+	c2dev = to_c2dev(cm_id->device);
+
+	/*
+	 * Allocate verbs request.
+	 */
+	vq_req = vq_req_alloc(c2dev);
+	if (!vq_req)
+		return -ENOMEM;
+
+	/*
+	 * Build the WR
+	 */
+	c2_wr_set_id(&wr, CCWR_CR_REJECT);
+	wr.hdr.context = (unsigned long) vq_req;
+	wr.rnic_handle = c2dev->adapter_handle;
+	wr.ep_handle = (u32) (unsigned long) cm_id->provider_data;
+
+	/*
+	 * reference the request struct.  dereferenced in the int handler.
+	 */
+	vq_req_get(c2dev, vq_req);
+
+	/*
+	 * Send WR to adapter
+	 */
+	err = vq_send_wr(c2dev, (union c2wr *) & wr);
+	if (err) {
+		vq_req_put(c2dev, vq_req);
+		goto bail0;
+	}
+
+	/*
+	 * Wait for reply from adapter
+	 */
+	err = vq_wait_for_reply(c2dev, vq_req);
+	if (err)
+		goto bail0;
+
+	/*
+	 * Process reply
+	 */
+	reply = (struct c2wr_cr_reject_rep *) (unsigned long)
+		vq_req->reply_msg;
+	if (!reply) {
+		err = -ENOMEM;
+		goto bail0;
+	}
+	err = c2_errno(reply);
+	/*
+	 * free vq stuff
+	 */
+	vq_repbuf_free(c2dev, reply);
+
+ bail0:
+	vq_req_free(c2dev, vq_req);
+	return err;
+}
diff --git a/drivers/infiniband/hw/amso1100/c2_cq.c b/drivers/infiniband/hw/amso1100/c2_cq.c
new file mode 100644
index 00000000000..9d7bcc5ade9
--- /dev/null
+++ b/drivers/infiniband/hw/amso1100/c2_cq.c
@@ -0,0 +1,433 @@
+/*
+ * Copyright (c) 2004, 2005 Topspin Communications.  All rights reserved.
+ * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
+ * Copyright (c) 2005 Cisco Systems, Inc. All rights reserved.
+ * Copyright (c) 2005 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2004 Voltaire, Inc. All rights reserved.
+ * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+#include "c2.h"
+#include "c2_vq.h"
+#include "c2_status.h"
+
+#define C2_CQ_MSG_SIZE ((sizeof(struct c2wr_ce) + 32-1) & ~(32-1))
+
+static struct c2_cq *c2_cq_get(struct c2_dev *c2dev, int cqn)
+{
+	struct c2_cq *cq;
+	unsigned long flags;
+
+	spin_lock_irqsave(&c2dev->lock, flags);
+	cq = c2dev->qptr_array[cqn];
+	if (!cq) {
+		spin_unlock_irqrestore(&c2dev->lock, flags);
+		return NULL;
+	}
+	atomic_inc(&cq->refcount);
+	spin_unlock_irqrestore(&c2dev->lock, flags);
+	return cq;
+}
+
+static void c2_cq_put(struct c2_cq *cq)
+{
+	if (atomic_dec_and_test(&cq->refcount))
+		wake_up(&cq->wait);
+}
+
+void c2_cq_event(struct c2_dev *c2dev, u32 mq_index)
+{
+	struct c2_cq *cq;
+
+	cq = c2_cq_get(c2dev, mq_index);
+	if (!cq) {
+		printk("discarding events on destroyed CQN=%d\n", mq_index);
+		return;
+	}
+
+	(*cq->ibcq.comp_handler) (&cq->ibcq, cq->ibcq.cq_context);
+	c2_cq_put(cq);
+}
+
+void c2_cq_clean(struct c2_dev *c2dev, struct c2_qp *qp, u32 mq_index)
+{
+	struct c2_cq *cq;
+	struct c2_mq *q;
+
+	cq = c2_cq_get(c2dev, mq_index);
+	if (!cq)
+		return;
+
+	spin_lock_irq(&cq->lock);
+	q = &cq->mq;
+	if (q && !c2_mq_empty(q)) {
+		u16 priv = q->priv;
+		struct c2wr_ce *msg;
+
+		while (priv != be16_to_cpu(*q->shared)) {
+			msg = (struct c2wr_ce *)
+				(q->msg_pool.host + priv * q->msg_size);
+			if (msg->qp_user_context == (u64) (unsigned long) qp) {
+				msg->qp_user_context = (u64) 0;
+			}
+			priv = (priv + 1) % q->q_size;
+		}
+	}
+	spin_unlock_irq(&cq->lock);
+	c2_cq_put(cq);
+}
+
+static inline enum ib_wc_status c2_cqe_status_to_openib(u8 status)
+{
+	switch (status) {
+	case C2_OK:
+		return IB_WC_SUCCESS;
+	case CCERR_FLUSHED:
+		return IB_WC_WR_FLUSH_ERR;
+	case CCERR_BASE_AND_BOUNDS_VIOLATION:
+		return IB_WC_LOC_PROT_ERR;
+	case CCERR_ACCESS_VIOLATION:
+		return IB_WC_LOC_ACCESS_ERR;
+	case CCERR_TOTAL_LENGTH_TOO_BIG:
+		return IB_WC_LOC_LEN_ERR;
+	case CCERR_INVALID_WINDOW:
+		return IB_WC_MW_BIND_ERR;
+	default:
+		return IB_WC_GENERAL_ERR;
+	}
+}
+
+
+static inline int c2_poll_one(struct c2_dev *c2dev,
+			      struct c2_cq *cq, struct ib_wc *entry)
+{
+	struct c2wr_ce *ce;
+	struct c2_qp *qp;
+	int is_recv = 0;
+
+	ce = (struct c2wr_ce *) c2_mq_consume(&cq->mq);
+	if (!ce) {
+		return -EAGAIN;
+	}
+
+	/*
+	 * if the qp returned is null then this qp has already
+	 * been freed and we are unable process the completion.
+	 * try pulling the next message
+	 */
+	while ((qp =
+		(struct c2_qp *) (unsigned long) ce->qp_user_context) == NULL) {
+		c2_mq_free(&cq->mq);
+		ce = (struct c2wr_ce *) c2_mq_consume(&cq->mq);
+		if (!ce)
+			return -EAGAIN;
+	}
+
+	entry->status = c2_cqe_status_to_openib(c2_wr_get_result(ce));
+	entry->wr_id = ce->hdr.context;
+	entry->qp_num = ce->handle;
+	entry->wc_flags = 0;
+	entry->slid = 0;
+	entry->sl = 0;
+	entry->src_qp = 0;
+	entry->dlid_path_bits = 0;
+	entry->pkey_index = 0;
+
+	switch (c2_wr_get_id(ce)) {
+	case C2_WR_TYPE_SEND:
+		entry->opcode = IB_WC_SEND;
+		break;
+	case C2_WR_TYPE_RDMA_WRITE:
+		entry->opcode = IB_WC_RDMA_WRITE;
+		break;
+	case C2_WR_TYPE_RDMA_READ:
+		entry->opcode = IB_WC_RDMA_READ;
+		break;
+	case C2_WR_TYPE_BIND_MW:
+		entry->opcode = IB_WC_BIND_MW;
+		break;
+	case C2_WR_TYPE_RECV:
+		entry->byte_len = be32_to_cpu(ce->bytes_rcvd);
+		entry->opcode = IB_WC_RECV;
+		is_recv = 1;
+		break;
+	default:
+		break;
+	}
+
+	/* consume the WQEs */
+	if (is_recv)
+		c2_mq_lconsume(&qp->rq_mq, 1);
+	else
+		c2_mq_lconsume(&qp->sq_mq,
+			       be32_to_cpu(c2_wr_get_wqe_count(ce)) + 1);
+
+	/* free the message */
+	c2_mq_free(&cq->mq);
+
+	return 0;
+}
+
+int c2_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry)
+{
+	struct c2_dev *c2dev = to_c2dev(ibcq->device);
+	struct c2_cq *cq = to_c2cq(ibcq);
+	unsigned long flags;
+	int npolled, err;
+
+	spin_lock_irqsave(&cq->lock, flags);
+
+	for (npolled = 0; npolled < num_entries; ++npolled) {
+
+		err = c2_poll_one(c2dev, cq, entry + npolled);
+		if (err)
+			break;
+	}
+
+	spin_unlock_irqrestore(&cq->lock, flags);
+
+	return npolled;
+}
+
+int c2_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify notify)
+{
+	struct c2_mq_shared __iomem *shared;
+	struct c2_cq *cq;
+
+	cq = to_c2cq(ibcq);
+	shared = cq->mq.peer;
+
+	if (notify == IB_CQ_NEXT_COMP)
+		writeb(C2_CQ_NOTIFICATION_TYPE_NEXT, &shared->notification_type);
+	else if (notify == IB_CQ_SOLICITED)
+		writeb(C2_CQ_NOTIFICATION_TYPE_NEXT_SE, &shared->notification_type);
+	else
+		return -EINVAL;
+
+	writeb(CQ_WAIT_FOR_DMA | CQ_ARMED, &shared->armed);
+
+	/*
+	 * Now read back shared->armed to make the PCI
+	 * write synchronous.  This is necessary for
+	 * correct cq notification semantics.
+	 */
+	readb(&shared->armed);
+
+	return 0;
+}
+
+static void c2_free_cq_buf(struct c2_dev *c2dev, struct c2_mq *mq)
+{
+
+	dma_unmap_single(c2dev->ibdev.dma_device, pci_unmap_addr(mq, mapping),
+			 mq->q_size * mq->msg_size, DMA_FROM_DEVICE);
+	free_pages((unsigned long) mq->msg_pool.host,
+		   get_order(mq->q_size * mq->msg_size));
+}
+
+static int c2_alloc_cq_buf(struct c2_dev *c2dev, struct c2_mq *mq, int q_size,
+			   int msg_size)
+{
+	unsigned long pool_start;
+
+	pool_start = __get_free_pages(GFP_KERNEL,
+				      get_order(q_size * msg_size));
+	if (!pool_start)
+		return -ENOMEM;
+
+	c2_mq_rep_init(mq,
+		       0,		/* index (currently unknown) */
+		       q_size,
+		       msg_size,
+		       (u8 *) pool_start,
+		       NULL,	/* peer (currently unknown) */
+		       C2_MQ_HOST_TARGET);
+
+	mq->host_dma = dma_map_single(c2dev->ibdev.dma_device,
+				      (void *)pool_start,
+				      q_size * msg_size, DMA_FROM_DEVICE);
+	pci_unmap_addr_set(mq, mapping, mq->host_dma);
+
+	return 0;
+}
+
+int c2_init_cq(struct c2_dev *c2dev, int entries,
+	       struct c2_ucontext *ctx, struct c2_cq *cq)
+{
+	struct c2wr_cq_create_req wr;
+	struct c2wr_cq_create_rep *reply;
+	unsigned long peer_pa;
+	struct c2_vq_req *vq_req;
+	int err;
+
+	might_sleep();
+
+	cq->ibcq.cqe = entries - 1;
+	cq->is_kernel = !ctx;
+
+	/* Allocate a shared pointer */
+	cq->mq.shared = c2_alloc_mqsp(c2dev, c2dev->kern_mqsp_pool,
+				      &cq->mq.shared_dma, GFP_KERNEL);
+	if (!cq->mq.shared)
+		return -ENOMEM;
+
+	/* Allocate pages for the message pool */
+	err = c2_alloc_cq_buf(c2dev, &cq->mq, entries + 1, C2_CQ_MSG_SIZE);
+	if (err)
+		goto bail0;
+
+	vq_req = vq_req_alloc(c2dev);
+	if (!vq_req) {
+		err = -ENOMEM;
+		goto bail1;
+	}
+
+	memset(&wr, 0, sizeof(wr));
+	c2_wr_set_id(&wr, CCWR_CQ_CREATE);
+	wr.hdr.context = (unsigned long) vq_req;
+	wr.rnic_handle = c2dev->adapter_handle;
+	wr.msg_size = cpu_to_be32(cq->mq.msg_size);
+	wr.depth = cpu_to_be32(cq->mq.q_size);
+	wr.shared_ht = cpu_to_be64(cq->mq.shared_dma);
+	wr.msg_pool = cpu_to_be64(cq->mq.host_dma);
+	wr.user_context = (u64) (unsigned long) (cq);
+
+	vq_req_get(c2dev, vq_req);
+
+	err = vq_send_wr(c2dev, (union c2wr *) & wr);
+	if (err) {
+		vq_req_put(c2dev, vq_req);
+		goto bail2;
+	}
+
+	err = vq_wait_for_reply(c2dev, vq_req);
+	if (err)
+		goto bail2;
+
+	reply = (struct c2wr_cq_create_rep *) (unsigned long) (vq_req->reply_msg);
+	if (!reply) {
+		err = -ENOMEM;
+		goto bail2;
+	}
+
+	if ((err = c2_errno(reply)) != 0)
+		goto bail3;
+
+	cq->adapter_handle = reply->cq_handle;
+	cq->mq.index = be32_to_cpu(reply->mq_index);
+
+	peer_pa = c2dev->pa + be32_to_cpu(reply->adapter_shared);
+	cq->mq.peer = ioremap_nocache(peer_pa, PAGE_SIZE);
+	if (!cq->mq.peer) {
+		err = -ENOMEM;
+		goto bail3;
+	}
+
+	vq_repbuf_free(c2dev, reply);
+	vq_req_free(c2dev, vq_req);
+
+	spin_lock_init(&cq->lock);
+	atomic_set(&cq->refcount, 1);
+	init_waitqueue_head(&cq->wait);
+
+	/*
+	 * Use the MQ index allocated by the adapter to
+	 * store the CQ in the qptr_array
+	 */
+	cq->cqn = cq->mq.index;
+	c2dev->qptr_array[cq->cqn] = cq;
+
+	return 0;
+
+      bail3:
+	vq_repbuf_free(c2dev, reply);
+      bail2:
+	vq_req_free(c2dev, vq_req);
+      bail1:
+	c2_free_cq_buf(c2dev, &cq->mq);
+      bail0:
+	c2_free_mqsp(cq->mq.shared);
+
+	return err;
+}
+
+void c2_free_cq(struct c2_dev *c2dev, struct c2_cq *cq)
+{
+	int err;
+	struct c2_vq_req *vq_req;
+	struct c2wr_cq_destroy_req wr;
+	struct c2wr_cq_destroy_rep *reply;
+
+	might_sleep();
+
+	/* Clear CQ from the qptr array */
+	spin_lock_irq(&c2dev->lock);
+	c2dev->qptr_array[cq->mq.index] = NULL;
+	atomic_dec(&cq->refcount);
+	spin_unlock_irq(&c2dev->lock);
+
+	wait_event(cq->wait, !atomic_read(&cq->refcount));
+
+	vq_req = vq_req_alloc(c2dev);
+	if (!vq_req) {
+		goto bail0;
+	}
+
+	memset(&wr, 0, sizeof(wr));
+	c2_wr_set_id(&wr, CCWR_CQ_DESTROY);
+	wr.hdr.context = (unsigned long) vq_req;
+	wr.rnic_handle = c2dev->adapter_handle;
+	wr.cq_handle = cq->adapter_handle;
+
+	vq_req_get(c2dev, vq_req);
+
+	err = vq_send_wr(c2dev, (union c2wr *) & wr);
+	if (err) {
+		vq_req_put(c2dev, vq_req);
+		goto bail1;
+	}
+
+	err = vq_wait_for_reply(c2dev, vq_req);
+	if (err)
+		goto bail1;
+
+	reply = (struct c2wr_cq_destroy_rep *) (unsigned long) (vq_req->reply_msg);
+
+	vq_repbuf_free(c2dev, reply);
+      bail1:
+	vq_req_free(c2dev, vq_req);
+      bail0:
+	if (cq->is_kernel) {
+		c2_free_cq_buf(c2dev, &cq->mq);
+	}
+
+	return;
+}
diff --git a/drivers/infiniband/hw/amso1100/c2_intr.c b/drivers/infiniband/hw/amso1100/c2_intr.c
new file mode 100644
index 00000000000..0d0bc33ca30
--- /dev/null
+++ b/drivers/infiniband/hw/amso1100/c2_intr.c
@@ -0,0 +1,209 @@
+/*
+ * Copyright (c) 2005 Ammasso, Inc. All rights reserved.
+ * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "c2.h"
+#include <rdma/iw_cm.h>
+#include "c2_vq.h"
+
+static void handle_mq(struct c2_dev *c2dev, u32 index);
+static void handle_vq(struct c2_dev *c2dev, u32 mq_index);
+
+/*
+ * Handle RNIC interrupts
+ */
+void c2_rnic_interrupt(struct c2_dev *c2dev)
+{
+	unsigned int mq_index;
+
+	while (c2dev->hints_read != be16_to_cpu(*c2dev->hint_count)) {
+		mq_index = readl(c2dev->regs + PCI_BAR0_HOST_HINT);
+		if (mq_index & 0x80000000) {
+			break;
+		}
+
+		c2dev->hints_read++;
+		handle_mq(c2dev, mq_index);
+	}
+
+}
+
+/*
+ * Top level MQ handler
+ */
+static void handle_mq(struct c2_dev *c2dev, u32 mq_index)
+{
+	if (c2dev->qptr_array[mq_index] == NULL) {
+		pr_debug(KERN_INFO "handle_mq: stray activity for mq_index=%d\n",
+			mq_index);
+		return;
+	}
+
+	switch (mq_index) {
+	case (0):
+		/*
+		 * An index of 0 in the activity queue
+		 * indicates the req vq now has messages
+		 * available...
+		 *
+		 * Wake up any waiters waiting on req VQ
+		 * message availability.
+		 */
+		wake_up(&c2dev->req_vq_wo);
+		break;
+	case (1):
+		handle_vq(c2dev, mq_index);
+		break;
+	case (2):
+		/* We have to purge the VQ in case there are pending
+		 * accept reply requests that would result in the
+		 * generation of an ESTABLISHED event. If we don't
+		 * generate these first, a CLOSE event could end up
+		 * being delivered before the ESTABLISHED event.
+		 */
+		handle_vq(c2dev, 1);
+
+		c2_ae_event(c2dev, mq_index);
+		break;
+	default:
+		/* There is no event synchronization between CQ events
+		 * and AE or CM events. In fact, CQE could be
+		 * delivered for all of the I/O up to and including the
+		 * FLUSH for a peer disconenct prior to the ESTABLISHED
+		 * event being delivered to the app. The reason for this
+		 * is that CM events are delivered on a thread, while AE
+		 * and CM events are delivered on interrupt context.
+		 */
+		c2_cq_event(c2dev, mq_index);
+		break;
+	}
+
+	return;
+}
+
+/*
+ * Handles verbs WR replies.
+ */
+static void handle_vq(struct c2_dev *c2dev, u32 mq_index)
+{
+	void *adapter_msg, *reply_msg;
+	struct c2wr_hdr *host_msg;
+	struct c2wr_hdr tmp;
+	struct c2_mq *reply_vq;
+	struct c2_vq_req *req;
+	struct iw_cm_event cm_event;
+	int err;
+
+	reply_vq = (struct c2_mq *) c2dev->qptr_array[mq_index];
+
+	/*
+	 * get next msg from mq_index into adapter_msg.
+	 * don't free it yet.
+	 */
+	adapter_msg = c2_mq_consume(reply_vq);
+	if (adapter_msg == NULL) {
+		return;
+	}
+
+	host_msg = vq_repbuf_alloc(c2dev);
+
+	/*
+	 * If we can't get a host buffer, then we'll still
+	 * wakeup the waiter, we just won't give him the msg.
+	 * It is assumed the waiter will deal with this...
+	 */
+	if (!host_msg) {
+		pr_debug("handle_vq: no repbufs!\n");
+
+		/*
+		 * just copy the WR header into a local variable.
+		 * this allows us to still demux on the context
+		 */
+		host_msg = &tmp;
+		memcpy(host_msg, adapter_msg, sizeof(tmp));
+		reply_msg = NULL;
+	} else {
+		memcpy(host_msg, adapter_msg, reply_vq->msg_size);
+		reply_msg = host_msg;
+	}
+
+	/*
+	 * consume the msg from the MQ
+	 */
+	c2_mq_free(reply_vq);
+
+	/*
+	 * wakeup the waiter.
+	 */
+	req = (struct c2_vq_req *) (unsigned long) host_msg->context;
+	if (req == NULL) {
+		/*
+		 * We should never get here, as the adapter should
+		 * never send us a reply that we're not expecting.
+		 */
+		vq_repbuf_free(c2dev, host_msg);
+		pr_debug("handle_vq: UNEXPECTEDLY got NULL req\n");
+		return;
+	}
+
+	err = c2_errno(reply_msg);
+	if (!err) switch (req->event) {
+	case IW_CM_EVENT_ESTABLISHED:
+		c2_set_qp_state(req->qp,
+				C2_QP_STATE_RTS);
+	case IW_CM_EVENT_CLOSE:
+
+		/*
+		 * Move the QP to RTS if this is
+		 * the established event
+		 */
+		cm_event.event = req->event;
+		cm_event.status = 0;
+		cm_event.local_addr = req->cm_id->local_addr;
+		cm_event.remote_addr = req->cm_id->remote_addr;
+		cm_event.private_data = NULL;
+		cm_event.private_data_len = 0;
+		req->cm_id->event_handler(req->cm_id, &cm_event);
+		break;
+	default:
+		break;
+	}
+
+	req->reply_msg = (u64) (unsigned long) (reply_msg);
+	atomic_set(&req->reply_ready, 1);
+	wake_up(&req->wait_object);
+
+	/*
+	 * If the request was cancelled, then this put will
+	 * free the vq_req memory...and reply_msg!!!
+	 */
+	vq_req_put(c2dev, req);
+}
diff --git a/drivers/infiniband/hw/amso1100/c2_mm.c b/drivers/infiniband/hw/amso1100/c2_mm.c
new file mode 100644
index 00000000000..1e4f46493fc
--- /dev/null
+++ b/drivers/infiniband/hw/amso1100/c2_mm.c
@@ -0,0 +1,375 @@
+/*
+ * Copyright (c) 2005 Ammasso, Inc. All rights reserved.
+ * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "c2.h"
+#include "c2_vq.h"
+
+#define PBL_VIRT 1
+#define PBL_PHYS 2
+
+/*
+ * Send all the PBL messages to convey the remainder of the PBL
+ * Wait for the adapter's reply on the last one.
+ * This is indicated by setting the MEM_PBL_COMPLETE in the flags.
+ *
+ * NOTE:  vq_req is _not_ freed by this function.  The VQ Host
+ *	  Reply buffer _is_ freed by this function.
+ */
+static int
+send_pbl_messages(struct c2_dev *c2dev, u32 stag_index,
+		  unsigned long va, u32 pbl_depth,
+		  struct c2_vq_req *vq_req, int pbl_type)
+{
+	u32 pbe_count;		/* amt that fits in a PBL msg */
+	u32 count;		/* amt in this PBL MSG. */
+	struct c2wr_nsmr_pbl_req *wr;	/* PBL WR ptr */
+	struct c2wr_nsmr_pbl_rep *reply;	/* reply ptr */
+ 	int err, pbl_virt, pbl_index, i;
+
+	switch (pbl_type) {
+	case PBL_VIRT:
+		pbl_virt = 1;
+		break;
+	case PBL_PHYS:
+		pbl_virt = 0;
+		break;
+	default:
+		return -EINVAL;
+		break;
+	}
+
+	pbe_count = (c2dev->req_vq.msg_size -
+		     sizeof(struct c2wr_nsmr_pbl_req)) / sizeof(u64);
+	wr = kmalloc(c2dev->req_vq.msg_size, GFP_KERNEL);
+	if (!wr) {
+		return -ENOMEM;
+	}
+	c2_wr_set_id(wr, CCWR_NSMR_PBL);
+
+	/*
+	 * Only the last PBL message will generate a reply from the verbs,
+	 * so we set the context to 0 indicating there is no kernel verbs
+	 * handler blocked awaiting this reply.
+	 */
+	wr->hdr.context = 0;
+	wr->rnic_handle = c2dev->adapter_handle;
+	wr->stag_index = stag_index;	/* already swapped */
+	wr->flags = 0;
+	pbl_index = 0;
+	while (pbl_depth) {
+		count = min(pbe_count, pbl_depth);
+		wr->addrs_length = cpu_to_be32(count);
+
+		/*
+		 *  If this is the last message, then reference the
+		 *  vq request struct cuz we're gonna wait for a reply.
+		 *  also make this PBL msg as the last one.
+		 */
+		if (count == pbl_depth) {
+			/*
+			 * reference the request struct.  dereferenced in the
+			 * int handler.
+			 */
+			vq_req_get(c2dev, vq_req);
+			wr->flags = cpu_to_be32(MEM_PBL_COMPLETE);
+
+			/*
+			 * This is the last PBL message.
+			 * Set the context to our VQ Request Object so we can
+			 * wait for the reply.
+			 */
+			wr->hdr.context = (unsigned long) vq_req;
+		}
+
+		/*
+		 * If pbl_virt is set then va is a virtual address
+		 * that describes a virtually contiguous memory
+		 * allocation. The wr needs the start of each virtual page
+		 * to be converted to the corresponding physical address
+		 * of the page. If pbl_virt is not set then va is an array
+		 * of physical addresses and there is no conversion to do.
+		 * Just fill in the wr with what is in the array.
+		 */
+		for (i = 0; i < count; i++) {
+			if (pbl_virt) {
+				va += PAGE_SIZE;
+			} else {
+ 				wr->paddrs[i] =
+				    cpu_to_be64(((u64 *)va)[pbl_index + i]);
+			}
+		}
+
+		/*
+		 * Send WR to adapter
+		 */
+		err = vq_send_wr(c2dev, (union c2wr *) wr);
+		if (err) {
+			if (count <= pbe_count) {
+				vq_req_put(c2dev, vq_req);
+			}
+			goto bail0;
+		}
+		pbl_depth -= count;
+		pbl_index += count;
+	}
+
+	/*
+	 *  Now wait for the reply...
+	 */
+	err = vq_wait_for_reply(c2dev, vq_req);
+	if (err) {
+		goto bail0;
+	}
+
+	/*
+	 * Process reply
+	 */
+	reply = (struct c2wr_nsmr_pbl_rep *) (unsigned long) vq_req->reply_msg;
+	if (!reply) {
+		err = -ENOMEM;
+		goto bail0;
+	}
+
+	err = c2_errno(reply);
+
+	vq_repbuf_free(c2dev, reply);
+      bail0:
+	kfree(wr);
+	return err;
+}
+
+#define C2_PBL_MAX_DEPTH 131072
+int
+c2_nsmr_register_phys_kern(struct c2_dev *c2dev, u64 *addr_list,
+ 			   int page_size, int pbl_depth, u32 length,
+ 			   u32 offset, u64 *va, enum c2_acf acf,
+			   struct c2_mr *mr)
+{
+	struct c2_vq_req *vq_req;
+	struct c2wr_nsmr_register_req *wr;
+	struct c2wr_nsmr_register_rep *reply;
+	u16 flags;
+	int i, pbe_count, count;
+	int err;
+
+	if (!va || !length || !addr_list || !pbl_depth)
+		return -EINTR;
+
+	/*
+	 * Verify PBL depth is within rnic max
+	 */
+	if (pbl_depth > C2_PBL_MAX_DEPTH) {
+		return -EINTR;
+	}
+
+	/*
+	 * allocate verbs request object
+	 */
+	vq_req = vq_req_alloc(c2dev);
+	if (!vq_req)
+		return -ENOMEM;
+
+	wr = kmalloc(c2dev->req_vq.msg_size, GFP_KERNEL);
+	if (!wr) {
+		err = -ENOMEM;
+		goto bail0;
+	}
+
+	/*
+	 * build the WR
+	 */
+	c2_wr_set_id(wr, CCWR_NSMR_REGISTER);
+	wr->hdr.context = (unsigned long) vq_req;
+	wr->rnic_handle = c2dev->adapter_handle;
+
+	flags = (acf | MEM_VA_BASED | MEM_REMOTE);
+
+	/*
+	 * compute how many pbes can fit in the message
+	 */
+	pbe_count = (c2dev->req_vq.msg_size -
+		     sizeof(struct c2wr_nsmr_register_req)) / sizeof(u64);
+
+	if (pbl_depth <= pbe_count) {
+		flags |= MEM_PBL_COMPLETE;
+	}
+	wr->flags = cpu_to_be16(flags);
+	wr->stag_key = 0;	//stag_key;
+	wr->va = cpu_to_be64(*va);
+	wr->pd_id = mr->pd->pd_id;
+	wr->pbe_size = cpu_to_be32(page_size);
+	wr->length = cpu_to_be32(length);
+	wr->pbl_depth = cpu_to_be32(pbl_depth);
+	wr->fbo = cpu_to_be32(offset);
+	count = min(pbl_depth, pbe_count);
+	wr->addrs_length = cpu_to_be32(count);
+
+	/*
+	 * fill out the PBL for this message
+	 */
+	for (i = 0; i < count; i++) {
+		wr->paddrs[i] = cpu_to_be64(addr_list[i]);
+	}
+
+	/*
+	 * regerence the request struct
+	 */
+	vq_req_get(c2dev, vq_req);
+
+	/*
+	 * send the WR to the adapter
+	 */
+	err = vq_send_wr(c2dev, (union c2wr *) wr);
+	if (err) {
+		vq_req_put(c2dev, vq_req);
+		goto bail1;
+	}
+
+	/*
+	 * wait for reply from adapter
+	 */
+	err = vq_wait_for_reply(c2dev, vq_req);
+	if (err) {
+		goto bail1;
+	}
+
+	/*
+	 * process reply
+	 */
+	reply =
+	    (struct c2wr_nsmr_register_rep *) (unsigned long) (vq_req->reply_msg);
+	if (!reply) {
+		err = -ENOMEM;
+		goto bail1;
+	}
+	if ((err = c2_errno(reply))) {
+		goto bail2;
+	}
+	//*p_pb_entries = be32_to_cpu(reply->pbl_depth);
+	mr->ibmr.lkey = mr->ibmr.rkey = be32_to_cpu(reply->stag_index);
+	vq_repbuf_free(c2dev, reply);
+
+	/*
+	 * if there are still more PBEs we need to send them to
+	 * the adapter and wait for a reply on the final one.
+	 * reuse vq_req for this purpose.
+	 */
+	pbl_depth -= count;
+	if (pbl_depth) {
+
+		vq_req->reply_msg = (unsigned long) NULL;
+		atomic_set(&vq_req->reply_ready, 0);
+		err = send_pbl_messages(c2dev,
+					cpu_to_be32(mr->ibmr.lkey),
+					(unsigned long) &addr_list[i],
+					pbl_depth, vq_req, PBL_PHYS);
+		if (err) {
+			goto bail1;
+		}
+	}
+
+	vq_req_free(c2dev, vq_req);
+	kfree(wr);
+
+	return err;
+
+      bail2:
+	vq_repbuf_free(c2dev, reply);
+      bail1:
+	kfree(wr);
+      bail0:
+	vq_req_free(c2dev, vq_req);
+	return err;
+}
+
+int c2_stag_dealloc(struct c2_dev *c2dev, u32 stag_index)
+{
+	struct c2_vq_req *vq_req;	/* verbs request object */
+	struct c2wr_stag_dealloc_req wr;	/* work request */
+	struct c2wr_stag_dealloc_rep *reply;	/* WR reply  */
+	int err;
+
+
+	/*
+	 * allocate verbs request object
+	 */
+	vq_req = vq_req_alloc(c2dev);
+	if (!vq_req) {
+		return -ENOMEM;
+	}
+
+	/*
+	 * Build the WR
+	 */
+	c2_wr_set_id(&wr, CCWR_STAG_DEALLOC);
+	wr.hdr.context = (u64) (unsigned long) vq_req;
+	wr.rnic_handle = c2dev->adapter_handle;
+	wr.stag_index = cpu_to_be32(stag_index);
+
+	/*
+	 * reference the request struct.  dereferenced in the int handler.
+	 */
+	vq_req_get(c2dev, vq_req);
+
+	/*
+	 * Send WR to adapter
+	 */
+	err = vq_send_wr(c2dev, (union c2wr *) & wr);
+	if (err) {
+		vq_req_put(c2dev, vq_req);
+		goto bail0;
+	}
+
+	/*
+	 * Wait for reply from adapter
+	 */
+	err = vq_wait_for_reply(c2dev, vq_req);
+	if (err) {
+		goto bail0;
+	}
+
+	/*
+	 * Process reply
+	 */
+	reply = (struct c2wr_stag_dealloc_rep *) (unsigned long) vq_req->reply_msg;
+	if (!reply) {
+		err = -ENOMEM;
+		goto bail0;
+	}
+
+	err = c2_errno(reply);
+
+	vq_repbuf_free(c2dev, reply);
+      bail0:
+	vq_req_free(c2dev, vq_req);
+	return err;
+}
diff --git a/drivers/infiniband/hw/amso1100/c2_mq.c b/drivers/infiniband/hw/amso1100/c2_mq.c
new file mode 100644
index 00000000000..b88a7559210
--- /dev/null
+++ b/drivers/infiniband/hw/amso1100/c2_mq.c
@@ -0,0 +1,174 @@
+/*
+ * Copyright (c) 2005 Ammasso, Inc. All rights reserved.
+ * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "c2.h"
+#include "c2_mq.h"
+
+void *c2_mq_alloc(struct c2_mq *q)
+{
+	BUG_ON(q->magic != C2_MQ_MAGIC);
+	BUG_ON(q->type != C2_MQ_ADAPTER_TARGET);
+
+	if (c2_mq_full(q)) {
+		return NULL;
+	} else {
+#ifdef DEBUG
+		struct c2wr_hdr *m =
+		    (struct c2wr_hdr *) (q->msg_pool.host + q->priv * q->msg_size);
+#ifdef CCMSGMAGIC
+		BUG_ON(m->magic != be32_to_cpu(~CCWR_MAGIC));
+		m->magic = cpu_to_be32(CCWR_MAGIC);
+#endif
+		return m;
+#else
+		return q->msg_pool.host + q->priv * q->msg_size;
+#endif
+	}
+}
+
+void c2_mq_produce(struct c2_mq *q)
+{
+	BUG_ON(q->magic != C2_MQ_MAGIC);
+	BUG_ON(q->type != C2_MQ_ADAPTER_TARGET);
+
+	if (!c2_mq_full(q)) {
+		q->priv = (q->priv + 1) % q->q_size;
+		q->hint_count++;
+		/* Update peer's offset. */
+		__raw_writew(cpu_to_be16(q->priv), &q->peer->shared);
+	}
+}
+
+void *c2_mq_consume(struct c2_mq *q)
+{
+	BUG_ON(q->magic != C2_MQ_MAGIC);
+	BUG_ON(q->type != C2_MQ_HOST_TARGET);
+
+	if (c2_mq_empty(q)) {
+		return NULL;
+	} else {
+#ifdef DEBUG
+		struct c2wr_hdr *m = (struct c2wr_hdr *)
+		    (q->msg_pool.host + q->priv * q->msg_size);
+#ifdef CCMSGMAGIC
+		BUG_ON(m->magic != be32_to_cpu(CCWR_MAGIC));
+#endif
+		return m;
+#else
+		return q->msg_pool.host + q->priv * q->msg_size;
+#endif
+	}
+}
+
+void c2_mq_free(struct c2_mq *q)
+{
+	BUG_ON(q->magic != C2_MQ_MAGIC);
+	BUG_ON(q->type != C2_MQ_HOST_TARGET);
+
+	if (!c2_mq_empty(q)) {
+
+#ifdef CCMSGMAGIC
+		{
+			struct c2wr_hdr __iomem *m = (struct c2wr_hdr __iomem *)
+			    (q->msg_pool.adapter + q->priv * q->msg_size);
+			__raw_writel(cpu_to_be32(~CCWR_MAGIC), &m->magic);
+		}
+#endif
+		q->priv = (q->priv + 1) % q->q_size;
+		/* Update peer's offset. */
+		__raw_writew(cpu_to_be16(q->priv), &q->peer->shared);
+	}
+}
+
+
+void c2_mq_lconsume(struct c2_mq *q, u32 wqe_count)
+{
+	BUG_ON(q->magic != C2_MQ_MAGIC);
+	BUG_ON(q->type != C2_MQ_ADAPTER_TARGET);
+
+	while (wqe_count--) {
+		BUG_ON(c2_mq_empty(q));
+		*q->shared = cpu_to_be16((be16_to_cpu(*q->shared)+1) % q->q_size);
+	}
+}
+
+#if 0
+u32 c2_mq_count(struct c2_mq *q)
+{
+	s32 count;
+
+	if (q->type == C2_MQ_HOST_TARGET)
+		count = be16_to_cpu(*q->shared) - q->priv;
+	else
+		count = q->priv - be16_to_cpu(*q->shared);
+
+	if (count < 0)
+		count += q->q_size;
+
+	return (u32) count;
+}
+#endif  /*  0  */
+
+void c2_mq_req_init(struct c2_mq *q, u32 index, u32 q_size, u32 msg_size,
+		    u8 __iomem *pool_start, u16 __iomem *peer, u32 type)
+{
+	BUG_ON(!q->shared);
+
+	/* This code assumes the byte swapping has already been done! */
+	q->index = index;
+	q->q_size = q_size;
+	q->msg_size = msg_size;
+	q->msg_pool.adapter = pool_start;
+	q->peer = (struct c2_mq_shared __iomem *) peer;
+	q->magic = C2_MQ_MAGIC;
+	q->type = type;
+	q->priv = 0;
+	q->hint_count = 0;
+	return;
+}
+void c2_mq_rep_init(struct c2_mq *q, u32 index, u32 q_size, u32 msg_size,
+		    u8 *pool_start, u16 __iomem *peer, u32 type)
+{
+	BUG_ON(!q->shared);
+
+	/* This code assumes the byte swapping has already been done! */
+	q->index = index;
+	q->q_size = q_size;
+	q->msg_size = msg_size;
+	q->msg_pool.host = pool_start;
+	q->peer = (struct c2_mq_shared __iomem *) peer;
+	q->magic = C2_MQ_MAGIC;
+	q->type = type;
+	q->priv = 0;
+	q->hint_count = 0;
+	return;
+}
diff --git a/drivers/infiniband/hw/amso1100/c2_mq.h b/drivers/infiniband/hw/amso1100/c2_mq.h
new file mode 100644
index 00000000000..9185bbb2165
--- /dev/null
+++ b/drivers/infiniband/hw/amso1100/c2_mq.h
@@ -0,0 +1,106 @@
+/*
+ * Copyright (c) 2005 Ammasso, Inc. All rights reserved.
+ * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef _C2_MQ_H_
+#define _C2_MQ_H_
+#include <linux/kernel.h>
+#include <linux/dma-mapping.h>
+#include "c2_wr.h"
+
+enum c2_shared_regs {
+
+	C2_SHARED_ARMED = 0x10,
+	C2_SHARED_NOTIFY = 0x18,
+	C2_SHARED_SHARED = 0x40,
+};
+
+struct c2_mq_shared {
+	u16 unused1;
+	u8 armed;
+	u8 notification_type;
+	u32 unused2;
+	u16 shared;
+	/* Pad to 64 bytes. */
+	u8 pad[64 - sizeof(u16) - 2 * sizeof(u8) - sizeof(u32) - sizeof(u16)];
+};
+
+enum c2_mq_type {
+	C2_MQ_HOST_TARGET = 1,
+	C2_MQ_ADAPTER_TARGET = 2,
+};
+
+/*
+ * c2_mq_t is for kernel-mode MQs like the VQs Cand the AEQ.
+ * c2_user_mq_t (which is the same format) is for user-mode MQs...
+ */
+#define C2_MQ_MAGIC 0x4d512020	/* 'MQ  ' */
+struct c2_mq {
+	u32 magic;
+	union {
+		u8 *host;
+		u8 __iomem *adapter;
+	} msg_pool;
+	dma_addr_t host_dma;
+	DECLARE_PCI_UNMAP_ADDR(mapping);
+	u16 hint_count;
+	u16 priv;
+	struct c2_mq_shared __iomem *peer;
+	u16 *shared;
+	dma_addr_t shared_dma;
+	u32 q_size;
+	u32 msg_size;
+	u32 index;
+	enum c2_mq_type type;
+};
+
+static __inline__ int c2_mq_empty(struct c2_mq *q)
+{
+	return q->priv == be16_to_cpu(*q->shared);
+}
+
+static __inline__ int c2_mq_full(struct c2_mq *q)
+{
+	return q->priv == (be16_to_cpu(*q->shared) + q->q_size - 1) % q->q_size;
+}
+
+extern void c2_mq_lconsume(struct c2_mq *q, u32 wqe_count);
+extern void *c2_mq_alloc(struct c2_mq *q);
+extern void c2_mq_produce(struct c2_mq *q);
+extern void *c2_mq_consume(struct c2_mq *q);
+extern void c2_mq_free(struct c2_mq *q);
+extern void c2_mq_req_init(struct c2_mq *q, u32 index, u32 q_size, u32 msg_size,
+		       u8 __iomem *pool_start, u16 __iomem *peer, u32 type);
+extern void c2_mq_rep_init(struct c2_mq *q, u32 index, u32 q_size, u32 msg_size,
+			   u8 *pool_start, u16 __iomem *peer, u32 type);
+
+#endif				/* _C2_MQ_H_ */
diff --git a/drivers/infiniband/hw/amso1100/c2_pd.c b/drivers/infiniband/hw/amso1100/c2_pd.c
new file mode 100644
index 00000000000..00c709926c8
--- /dev/null
+++ b/drivers/infiniband/hw/amso1100/c2_pd.c
@@ -0,0 +1,89 @@
+/*
+ * Copyright (c) 2004 Topspin Communications.  All rights reserved.
+ * Copyright (c) 2005 Cisco Systems.  All rights reserved.
+ * Copyright (c) 2005 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/init.h>
+#include <linux/errno.h>
+
+#include "c2.h"
+#include "c2_provider.h"
+
+int c2_pd_alloc(struct c2_dev *c2dev, int privileged, struct c2_pd *pd)
+{
+	u32 obj;
+	int ret = 0;
+
+	spin_lock(&c2dev->pd_table.lock);
+	obj = find_next_zero_bit(c2dev->pd_table.table, c2dev->pd_table.max,
+				 c2dev->pd_table.last);
+	if (obj >= c2dev->pd_table.max)
+		obj = find_first_zero_bit(c2dev->pd_table.table,
+					  c2dev->pd_table.max);
+	if (obj < c2dev->pd_table.max) {
+		pd->pd_id = obj;
+		__set_bit(obj, c2dev->pd_table.table);
+		c2dev->pd_table.last = obj+1;
+		if (c2dev->pd_table.last >= c2dev->pd_table.max)
+			c2dev->pd_table.last = 0;
+	} else
+		ret = -ENOMEM;
+	spin_unlock(&c2dev->pd_table.lock);
+	return ret;
+}
+
+void c2_pd_free(struct c2_dev *c2dev, struct c2_pd *pd)
+{
+	spin_lock(&c2dev->pd_table.lock);
+	__clear_bit(pd->pd_id, c2dev->pd_table.table);
+	spin_unlock(&c2dev->pd_table.lock);
+}
+
+int __devinit c2_init_pd_table(struct c2_dev *c2dev)
+{
+
+	c2dev->pd_table.last = 0;
+	c2dev->pd_table.max = c2dev->props.max_pd;
+	spin_lock_init(&c2dev->pd_table.lock);
+	c2dev->pd_table.table = kmalloc(BITS_TO_LONGS(c2dev->props.max_pd) *
+					sizeof(long), GFP_KERNEL);
+	if (!c2dev->pd_table.table)
+		return -ENOMEM;
+	bitmap_zero(c2dev->pd_table.table, c2dev->props.max_pd);
+	return 0;
+}
+
+void __devexit c2_cleanup_pd_table(struct c2_dev *c2dev)
+{
+	kfree(c2dev->pd_table.table);
+}
diff --git a/drivers/infiniband/hw/amso1100/c2_provider.c b/drivers/infiniband/hw/amso1100/c2_provider.c
new file mode 100644
index 00000000000..8fddc8cccdf
--- /dev/null
+++ b/drivers/infiniband/hw/amso1100/c2_provider.c
@@ -0,0 +1,869 @@
+/*
+ * Copyright (c) 2005 Ammasso, Inc. All rights reserved.
+ * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/pci.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/inetdevice.h>
+#include <linux/delay.h>
+#include <linux/ethtool.h>
+#include <linux/mii.h>
+#include <linux/if_vlan.h>
+#include <linux/crc32.h>
+#include <linux/in.h>
+#include <linux/ip.h>
+#include <linux/tcp.h>
+#include <linux/init.h>
+#include <linux/dma-mapping.h>
+#include <linux/if_arp.h>
+
+#include <asm/io.h>
+#include <asm/irq.h>
+#include <asm/byteorder.h>
+
+#include <rdma/ib_smi.h>
+#include <rdma/ib_user_verbs.h>
+#include "c2.h"
+#include "c2_provider.h"
+#include "c2_user.h"
+
+static int c2_query_device(struct ib_device *ibdev,
+			   struct ib_device_attr *props)
+{
+	struct c2_dev *c2dev = to_c2dev(ibdev);
+
+	pr_debug("%s:%u\n", __FUNCTION__, __LINE__);
+
+	*props = c2dev->props;
+	return 0;
+}
+
+static int c2_query_port(struct ib_device *ibdev,
+			 u8 port, struct ib_port_attr *props)
+{
+	pr_debug("%s:%u\n", __FUNCTION__, __LINE__);
+
+	props->max_mtu = IB_MTU_4096;
+	props->lid = 0;
+	props->lmc = 0;
+	props->sm_lid = 0;
+	props->sm_sl = 0;
+	props->state = IB_PORT_ACTIVE;
+	props->phys_state = 0;
+	props->port_cap_flags =
+	    IB_PORT_CM_SUP |
+	    IB_PORT_REINIT_SUP |
+	    IB_PORT_VENDOR_CLASS_SUP | IB_PORT_BOOT_MGMT_SUP;
+	props->gid_tbl_len = 1;
+	props->pkey_tbl_len = 1;
+	props->qkey_viol_cntr = 0;
+	props->active_width = 1;
+	props->active_speed = 1;
+
+	return 0;
+}
+
+static int c2_modify_port(struct ib_device *ibdev,
+			  u8 port, int port_modify_mask,
+			  struct ib_port_modify *props)
+{
+	pr_debug("%s:%u\n", __FUNCTION__, __LINE__);
+	return 0;
+}
+
+static int c2_query_pkey(struct ib_device *ibdev,
+			 u8 port, u16 index, u16 * pkey)
+{
+	pr_debug("%s:%u\n", __FUNCTION__, __LINE__);
+	*pkey = 0;
+	return 0;
+}
+
+static int c2_query_gid(struct ib_device *ibdev, u8 port,
+			int index, union ib_gid *gid)
+{
+	struct c2_dev *c2dev = to_c2dev(ibdev);
+
+	pr_debug("%s:%u\n", __FUNCTION__, __LINE__);
+	memset(&(gid->raw[0]), 0, sizeof(gid->raw));
+	memcpy(&(gid->raw[0]), c2dev->pseudo_netdev->dev_addr, 6);
+
+	return 0;
+}
+
+/* Allocate the user context data structure. This keeps track
+ * of all objects associated with a particular user-mode client.
+ */
+static struct ib_ucontext *c2_alloc_ucontext(struct ib_device *ibdev,
+					     struct ib_udata *udata)
+{
+	struct c2_ucontext *context;
+
+	pr_debug("%s:%u\n", __FUNCTION__, __LINE__);
+	context = kmalloc(sizeof(*context), GFP_KERNEL);
+	if (!context)
+		return ERR_PTR(-ENOMEM);
+
+	return &context->ibucontext;
+}
+
+static int c2_dealloc_ucontext(struct ib_ucontext *context)
+{
+	pr_debug("%s:%u\n", __FUNCTION__, __LINE__);
+	kfree(context);
+	return 0;
+}
+
+static int c2_mmap_uar(struct ib_ucontext *context, struct vm_area_struct *vma)
+{
+	pr_debug("%s:%u\n", __FUNCTION__, __LINE__);
+	return -ENOSYS;
+}
+
+static struct ib_pd *c2_alloc_pd(struct ib_device *ibdev,
+				 struct ib_ucontext *context,
+				 struct ib_udata *udata)
+{
+	struct c2_pd *pd;
+	int err;
+
+	pr_debug("%s:%u\n", __FUNCTION__, __LINE__);
+
+	pd = kmalloc(sizeof(*pd), GFP_KERNEL);
+	if (!pd)
+		return ERR_PTR(-ENOMEM);
+
+	err = c2_pd_alloc(to_c2dev(ibdev), !context, pd);
+	if (err) {
+		kfree(pd);
+		return ERR_PTR(err);
+	}
+
+	if (context) {
+		if (ib_copy_to_udata(udata, &pd->pd_id, sizeof(__u32))) {
+			c2_pd_free(to_c2dev(ibdev), pd);
+			kfree(pd);
+			return ERR_PTR(-EFAULT);
+		}
+	}
+
+	return &pd->ibpd;
+}
+
+static int c2_dealloc_pd(struct ib_pd *pd)
+{
+	pr_debug("%s:%u\n", __FUNCTION__, __LINE__);
+	c2_pd_free(to_c2dev(pd->device), to_c2pd(pd));
+	kfree(pd);
+
+	return 0;
+}
+
+static struct ib_ah *c2_ah_create(struct ib_pd *pd, struct ib_ah_attr *ah_attr)
+{
+	pr_debug("%s:%u\n", __FUNCTION__, __LINE__);
+	return ERR_PTR(-ENOSYS);
+}
+
+static int c2_ah_destroy(struct ib_ah *ah)
+{
+	pr_debug("%s:%u\n", __FUNCTION__, __LINE__);
+	return -ENOSYS;
+}
+
+static void c2_add_ref(struct ib_qp *ibqp)
+{
+	struct c2_qp *qp;
+	BUG_ON(!ibqp);
+	qp = to_c2qp(ibqp);
+	atomic_inc(&qp->refcount);
+}
+
+static void c2_rem_ref(struct ib_qp *ibqp)
+{
+	struct c2_qp *qp;
+	BUG_ON(!ibqp);
+	qp = to_c2qp(ibqp);
+	if (atomic_dec_and_test(&qp->refcount))
+		wake_up(&qp->wait);
+}
+
+struct ib_qp *c2_get_qp(struct ib_device *device, int qpn)
+{
+	struct c2_dev* c2dev = to_c2dev(device);
+	struct c2_qp *qp;
+
+	qp = c2_find_qpn(c2dev, qpn);
+	pr_debug("%s Returning QP=%p for QPN=%d, device=%p, refcount=%d\n",
+		__FUNCTION__, qp, qpn, device,
+		(qp?atomic_read(&qp->refcount):0));
+
+	return (qp?&qp->ibqp:NULL);
+}
+
+static struct ib_qp *c2_create_qp(struct ib_pd *pd,
+				  struct ib_qp_init_attr *init_attr,
+				  struct ib_udata *udata)
+{
+	struct c2_qp *qp;
+	int err;
+
+	pr_debug("%s:%u\n", __FUNCTION__, __LINE__);
+
+	switch (init_attr->qp_type) {
+	case IB_QPT_RC:
+		qp = kzalloc(sizeof(*qp), GFP_KERNEL);
+		if (!qp) {
+			pr_debug("%s: Unable to allocate QP\n", __FUNCTION__);
+			return ERR_PTR(-ENOMEM);
+		}
+		spin_lock_init(&qp->lock);
+		if (pd->uobject) {
+			/* userspace specific */
+		}
+
+		err = c2_alloc_qp(to_c2dev(pd->device),
+				  to_c2pd(pd), init_attr, qp);
+
+		if (err && pd->uobject) {
+			/* userspace specific */
+		}
+
+		break;
+	default:
+		pr_debug("%s: Invalid QP type: %d\n", __FUNCTION__,
+			init_attr->qp_type);
+		return ERR_PTR(-EINVAL);
+		break;
+	}
+
+	if (err) {
+		kfree(qp);
+		return ERR_PTR(err);
+	}
+
+	return &qp->ibqp;
+}
+
+static int c2_destroy_qp(struct ib_qp *ib_qp)
+{
+	struct c2_qp *qp = to_c2qp(ib_qp);
+
+	pr_debug("%s:%u qp=%p,qp->state=%d\n",
+		__FUNCTION__, __LINE__,ib_qp,qp->state);
+	c2_free_qp(to_c2dev(ib_qp->device), qp);
+	kfree(qp);
+	return 0;
+}
+
+static struct ib_cq *c2_create_cq(struct ib_device *ibdev, int entries,
+				  struct ib_ucontext *context,
+				  struct ib_udata *udata)
+{
+	struct c2_cq *cq;
+	int err;
+
+	cq = kmalloc(sizeof(*cq), GFP_KERNEL);
+	if (!cq) {
+		pr_debug("%s: Unable to allocate CQ\n", __FUNCTION__);
+		return ERR_PTR(-ENOMEM);
+	}
+
+	err = c2_init_cq(to_c2dev(ibdev), entries, NULL, cq);
+	if (err) {
+		pr_debug("%s: error initializing CQ\n", __FUNCTION__);
+		kfree(cq);
+		return ERR_PTR(err);
+	}
+
+	return &cq->ibcq;
+}
+
+static int c2_destroy_cq(struct ib_cq *ib_cq)
+{
+	struct c2_cq *cq = to_c2cq(ib_cq);
+
+	pr_debug("%s:%u\n", __FUNCTION__, __LINE__);
+
+	c2_free_cq(to_c2dev(ib_cq->device), cq);
+	kfree(cq);
+
+	return 0;
+}
+
+static inline u32 c2_convert_access(int acc)
+{
+	return (acc & IB_ACCESS_REMOTE_WRITE ? C2_ACF_REMOTE_WRITE : 0) |
+	    (acc & IB_ACCESS_REMOTE_READ ? C2_ACF_REMOTE_READ : 0) |
+	    (acc & IB_ACCESS_LOCAL_WRITE ? C2_ACF_LOCAL_WRITE : 0) |
+	    C2_ACF_LOCAL_READ | C2_ACF_WINDOW_BIND;
+}
+
+static struct ib_mr *c2_reg_phys_mr(struct ib_pd *ib_pd,
+				    struct ib_phys_buf *buffer_list,
+				    int num_phys_buf, int acc, u64 * iova_start)
+{
+	struct c2_mr *mr;
+	u64 *page_list;
+	u32 total_len;
+	int err, i, j, k, page_shift, pbl_depth;
+
+	pbl_depth = 0;
+	total_len = 0;
+
+	page_shift = PAGE_SHIFT;
+	/*
+	 * If there is only 1 buffer we assume this could
+	 * be a map of all phy mem...use a 32k page_shift.
+	 */
+	if (num_phys_buf == 1)
+		page_shift += 3;
+
+	for (i = 0; i < num_phys_buf; i++) {
+
+		if (buffer_list[i].addr & ~PAGE_MASK) {
+			pr_debug("Unaligned Memory Buffer: 0x%x\n",
+				(unsigned int) buffer_list[i].addr);
+			return ERR_PTR(-EINVAL);
+		}
+
+		if (!buffer_list[i].size) {
+			pr_debug("Invalid Buffer Size\n");
+			return ERR_PTR(-EINVAL);
+		}
+
+		total_len += buffer_list[i].size;
+		pbl_depth += ALIGN(buffer_list[i].size,
+				   (1 << page_shift)) >> page_shift;
+	}
+
+	page_list = vmalloc(sizeof(u64) * pbl_depth);
+	if (!page_list) {
+		pr_debug("couldn't vmalloc page_list of size %zd\n",
+			(sizeof(u64) * pbl_depth));
+		return ERR_PTR(-ENOMEM);
+	}
+
+	for (i = 0, j = 0; i < num_phys_buf; i++) {
+
+		int naddrs;
+
+ 		naddrs = ALIGN(buffer_list[i].size,
+			       (1 << page_shift)) >> page_shift;
+		for (k = 0; k < naddrs; k++)
+			page_list[j++] = (buffer_list[i].addr +
+						     (k << page_shift));
+	}
+
+	mr = kmalloc(sizeof(*mr), GFP_KERNEL);
+	if (!mr)
+		return ERR_PTR(-ENOMEM);
+
+	mr->pd = to_c2pd(ib_pd);
+	pr_debug("%s - page shift %d, pbl_depth %d, total_len %u, "
+		"*iova_start %llx, first pa %llx, last pa %llx\n",
+		__FUNCTION__, page_shift, pbl_depth, total_len,
+		*iova_start, page_list[0], page_list[pbl_depth-1]);
+  	err = c2_nsmr_register_phys_kern(to_c2dev(ib_pd->device), page_list,
+ 					 (1 << page_shift), pbl_depth,
+					 total_len, 0, iova_start,
+					 c2_convert_access(acc), mr);
+	vfree(page_list);
+	if (err) {
+		kfree(mr);
+		return ERR_PTR(err);
+	}
+
+	return &mr->ibmr;
+}
+
+static struct ib_mr *c2_get_dma_mr(struct ib_pd *pd, int acc)
+{
+	struct ib_phys_buf bl;
+	u64 kva = 0;
+
+	pr_debug("%s:%u\n", __FUNCTION__, __LINE__);
+
+	/* AMSO1100 limit */
+	bl.size = 0xffffffff;
+	bl.addr = 0;
+	return c2_reg_phys_mr(pd, &bl, 1, acc, &kva);
+}
+
+static struct ib_mr *c2_reg_user_mr(struct ib_pd *pd, struct ib_umem *region,
+				    int acc, struct ib_udata *udata)
+{
+	u64 *pages;
+	u64 kva = 0;
+	int shift, n, len;
+	int i, j, k;
+	int err = 0;
+	struct ib_umem_chunk *chunk;
+	struct c2_pd *c2pd = to_c2pd(pd);
+	struct c2_mr *c2mr;
+
+	pr_debug("%s:%u\n", __FUNCTION__, __LINE__);
+	shift = ffs(region->page_size) - 1;
+
+	c2mr = kmalloc(sizeof(*c2mr), GFP_KERNEL);
+	if (!c2mr)
+		return ERR_PTR(-ENOMEM);
+	c2mr->pd = c2pd;
+
+	n = 0;
+	list_for_each_entry(chunk, &region->chunk_list, list)
+		n += chunk->nents;
+
+	pages = kmalloc(n * sizeof(u64), GFP_KERNEL);
+	if (!pages) {
+		err = -ENOMEM;
+		goto err;
+	}
+
+	i = 0;
+	list_for_each_entry(chunk, &region->chunk_list, list) {
+		for (j = 0; j < chunk->nmap; ++j) {
+			len = sg_dma_len(&chunk->page_list[j]) >> shift;
+			for (k = 0; k < len; ++k) {
+				pages[i++] =
+					sg_dma_address(&chunk->page_list[j]) +
+					(region->page_size * k);
+			}
+		}
+	}
+
+	kva = (u64)region->virt_base;
+  	err = c2_nsmr_register_phys_kern(to_c2dev(pd->device),
+					 pages,
+ 					 region->page_size,
+					 i,
+					 region->length,
+					 region->offset,
+					 &kva,
+					 c2_convert_access(acc),
+					 c2mr);
+	kfree(pages);
+	if (err) {
+		kfree(c2mr);
+		return ERR_PTR(err);
+	}
+	return &c2mr->ibmr;
+
+err:
+	kfree(c2mr);
+	return ERR_PTR(err);
+}
+
+static int c2_dereg_mr(struct ib_mr *ib_mr)
+{
+	struct c2_mr *mr = to_c2mr(ib_mr);
+	int err;
+
+	pr_debug("%s:%u\n", __FUNCTION__, __LINE__);
+
+	err = c2_stag_dealloc(to_c2dev(ib_mr->device), ib_mr->lkey);
+	if (err)
+		pr_debug("c2_stag_dealloc failed: %d\n", err);
+	else
+		kfree(mr);
+
+	return err;
+}
+
+static ssize_t show_rev(struct class_device *cdev, char *buf)
+{
+	struct c2_dev *dev = container_of(cdev, struct c2_dev, ibdev.class_dev);
+	pr_debug("%s:%u\n", __FUNCTION__, __LINE__);
+	return sprintf(buf, "%x\n", dev->props.hw_ver);
+}
+
+static ssize_t show_fw_ver(struct class_device *cdev, char *buf)
+{
+	struct c2_dev *dev = container_of(cdev, struct c2_dev, ibdev.class_dev);
+	pr_debug("%s:%u\n", __FUNCTION__, __LINE__);
+	return sprintf(buf, "%x.%x.%x\n",
+		       (int) (dev->props.fw_ver >> 32),
+		       (int) (dev->props.fw_ver >> 16) & 0xffff,
+		       (int) (dev->props.fw_ver & 0xffff));
+}
+
+static ssize_t show_hca(struct class_device *cdev, char *buf)
+{
+	pr_debug("%s:%u\n", __FUNCTION__, __LINE__);
+	return sprintf(buf, "AMSO1100\n");
+}
+
+static ssize_t show_board(struct class_device *cdev, char *buf)
+{
+	pr_debug("%s:%u\n", __FUNCTION__, __LINE__);
+	return sprintf(buf, "%.*s\n", 32, "AMSO1100 Board ID");
+}
+
+static CLASS_DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL);
+static CLASS_DEVICE_ATTR(fw_ver, S_IRUGO, show_fw_ver, NULL);
+static CLASS_DEVICE_ATTR(hca_type, S_IRUGO, show_hca, NULL);
+static CLASS_DEVICE_ATTR(board_id, S_IRUGO, show_board, NULL);
+
+static struct class_device_attribute *c2_class_attributes[] = {
+	&class_device_attr_hw_rev,
+	&class_device_attr_fw_ver,
+	&class_device_attr_hca_type,
+	&class_device_attr_board_id
+};
+
+static int c2_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
+			int attr_mask, struct ib_udata *udata)
+{
+	int err;
+
+	err =
+	    c2_qp_modify(to_c2dev(ibqp->device), to_c2qp(ibqp), attr,
+			 attr_mask);
+
+	return err;
+}
+
+static int c2_multicast_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
+{
+	pr_debug("%s:%u\n", __FUNCTION__, __LINE__);
+	return -ENOSYS;
+}
+
+static int c2_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
+{
+	pr_debug("%s:%u\n", __FUNCTION__, __LINE__);
+	return -ENOSYS;
+}
+
+static int c2_process_mad(struct ib_device *ibdev,
+			  int mad_flags,
+			  u8 port_num,
+			  struct ib_wc *in_wc,
+			  struct ib_grh *in_grh,
+			  struct ib_mad *in_mad, struct ib_mad *out_mad)
+{
+	pr_debug("%s:%u\n", __FUNCTION__, __LINE__);
+	return -ENOSYS;
+}
+
+static int c2_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *iw_param)
+{
+	pr_debug("%s:%u\n", __FUNCTION__, __LINE__);
+
+	/* Request a connection */
+	return c2_llp_connect(cm_id, iw_param);
+}
+
+static int c2_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *iw_param)
+{
+	pr_debug("%s:%u\n", __FUNCTION__, __LINE__);
+
+	/* Accept the new connection */
+	return c2_llp_accept(cm_id, iw_param);
+}
+
+static int c2_reject(struct iw_cm_id *cm_id, const void *pdata, u8 pdata_len)
+{
+	int err;
+
+	pr_debug("%s:%u\n", __FUNCTION__, __LINE__);
+
+	err = c2_llp_reject(cm_id, pdata, pdata_len);
+	return err;
+}
+
+static int c2_service_create(struct iw_cm_id *cm_id, int backlog)
+{
+	int err;
+
+	pr_debug("%s:%u\n", __FUNCTION__, __LINE__);
+	err = c2_llp_service_create(cm_id, backlog);
+	pr_debug("%s:%u err=%d\n",
+		__FUNCTION__, __LINE__,
+		err);
+	return err;
+}
+
+static int c2_service_destroy(struct iw_cm_id *cm_id)
+{
+	int err;
+	pr_debug("%s:%u\n", __FUNCTION__, __LINE__);
+
+	err = c2_llp_service_destroy(cm_id);
+
+	return err;
+}
+
+static int c2_pseudo_up(struct net_device *netdev)
+{
+	struct in_device *ind;
+	struct c2_dev *c2dev = netdev->priv;
+
+	ind = in_dev_get(netdev);
+	if (!ind)
+		return 0;
+
+	pr_debug("adding...\n");
+	for_ifa(ind) {
+#ifdef DEBUG
+		u8 *ip = (u8 *) & ifa->ifa_address;
+
+		pr_debug("%s: %d.%d.%d.%d\n",
+		       ifa->ifa_label, ip[0], ip[1], ip[2], ip[3]);
+#endif
+		c2_add_addr(c2dev, ifa->ifa_address, ifa->ifa_mask);
+	}
+	endfor_ifa(ind);
+	in_dev_put(ind);
+
+	return 0;
+}
+
+static int c2_pseudo_down(struct net_device *netdev)
+{
+	struct in_device *ind;
+	struct c2_dev *c2dev = netdev->priv;
+
+	ind = in_dev_get(netdev);
+	if (!ind)
+		return 0;
+
+	pr_debug("deleting...\n");
+	for_ifa(ind) {
+#ifdef DEBUG
+		u8 *ip = (u8 *) & ifa->ifa_address;
+
+		pr_debug("%s: %d.%d.%d.%d\n",
+		       ifa->ifa_label, ip[0], ip[1], ip[2], ip[3]);
+#endif
+		c2_del_addr(c2dev, ifa->ifa_address, ifa->ifa_mask);
+	}
+	endfor_ifa(ind);
+	in_dev_put(ind);
+
+	return 0;
+}
+
+static int c2_pseudo_xmit_frame(struct sk_buff *skb, struct net_device *netdev)
+{
+	kfree_skb(skb);
+	return NETDEV_TX_OK;
+}
+
+static int c2_pseudo_change_mtu(struct net_device *netdev, int new_mtu)
+{
+	int ret = 0;
+
+	if (new_mtu < ETH_ZLEN || new_mtu > ETH_JUMBO_MTU)
+		return -EINVAL;
+
+	netdev->mtu = new_mtu;
+
+	/* TODO: Tell rnic about new rmda interface mtu */
+	return ret;
+}
+
+static void setup(struct net_device *netdev)
+{
+	SET_MODULE_OWNER(netdev);
+	netdev->open = c2_pseudo_up;
+	netdev->stop = c2_pseudo_down;
+	netdev->hard_start_xmit = c2_pseudo_xmit_frame;
+	netdev->get_stats = NULL;
+	netdev->tx_timeout = NULL;
+	netdev->set_mac_address = NULL;
+	netdev->change_mtu = c2_pseudo_change_mtu;
+	netdev->watchdog_timeo = 0;
+	netdev->type = ARPHRD_ETHER;
+	netdev->mtu = 1500;
+	netdev->hard_header_len = ETH_HLEN;
+	netdev->addr_len = ETH_ALEN;
+	netdev->tx_queue_len = 0;
+	netdev->flags |= IFF_NOARP;
+	return;
+}
+
+static struct net_device *c2_pseudo_netdev_init(struct c2_dev *c2dev)
+{
+	char name[IFNAMSIZ];
+	struct net_device *netdev;
+
+	/* change ethxxx to iwxxx */
+	strcpy(name, "iw");
+	strcat(name, &c2dev->netdev->name[3]);
+	netdev = alloc_netdev(sizeof(*netdev), name, setup);
+	if (!netdev) {
+		printk(KERN_ERR PFX "%s -  etherdev alloc failed",
+			__FUNCTION__);
+		return NULL;
+	}
+
+	netdev->priv = c2dev;
+
+	SET_NETDEV_DEV(netdev, &c2dev->pcidev->dev);
+
+	memcpy_fromio(netdev->dev_addr, c2dev->kva + C2_REGS_RDMA_ENADDR, 6);
+
+	/* Print out the MAC address */
+	pr_debug("%s: MAC %02X:%02X:%02X:%02X:%02X:%02X\n",
+		netdev->name,
+		netdev->dev_addr[0], netdev->dev_addr[1], netdev->dev_addr[2],
+		netdev->dev_addr[3], netdev->dev_addr[4], netdev->dev_addr[5]);
+
+#if 0
+	/* Disable network packets */
+	netif_stop_queue(netdev);
+#endif
+	return netdev;
+}
+
+int c2_register_device(struct c2_dev *dev)
+{
+	int ret;
+	int i;
+
+	/* Register pseudo network device */
+	dev->pseudo_netdev = c2_pseudo_netdev_init(dev);
+	if (dev->pseudo_netdev) {
+		ret = register_netdev(dev->pseudo_netdev);
+		if (ret) {
+			printk(KERN_ERR PFX
+				"Unable to register netdev, ret = %d\n", ret);
+			free_netdev(dev->pseudo_netdev);
+			return ret;
+		}
+	}
+
+	pr_debug("%s:%u\n", __FUNCTION__, __LINE__);
+	strlcpy(dev->ibdev.name, "amso%d", IB_DEVICE_NAME_MAX);
+	dev->ibdev.owner = THIS_MODULE;
+	dev->ibdev.uverbs_cmd_mask =
+	    (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) |
+	    (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) |
+	    (1ull << IB_USER_VERBS_CMD_QUERY_PORT) |
+	    (1ull << IB_USER_VERBS_CMD_ALLOC_PD) |
+	    (1ull << IB_USER_VERBS_CMD_DEALLOC_PD) |
+	    (1ull << IB_USER_VERBS_CMD_REG_MR) |
+	    (1ull << IB_USER_VERBS_CMD_DEREG_MR) |
+	    (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) |
+	    (1ull << IB_USER_VERBS_CMD_CREATE_CQ) |
+	    (1ull << IB_USER_VERBS_CMD_DESTROY_CQ) |
+	    (1ull << IB_USER_VERBS_CMD_REQ_NOTIFY_CQ) |
+	    (1ull << IB_USER_VERBS_CMD_CREATE_QP) |
+	    (1ull << IB_USER_VERBS_CMD_MODIFY_QP) |
+	    (1ull << IB_USER_VERBS_CMD_POLL_CQ) |
+	    (1ull << IB_USER_VERBS_CMD_DESTROY_QP) |
+	    (1ull << IB_USER_VERBS_CMD_POST_SEND) |
+	    (1ull << IB_USER_VERBS_CMD_POST_RECV);
+
+	dev->ibdev.node_type = RDMA_NODE_RNIC;
+	memset(&dev->ibdev.node_guid, 0, sizeof(dev->ibdev.node_guid));
+	memcpy(&dev->ibdev.node_guid, dev->pseudo_netdev->dev_addr, 6);
+	dev->ibdev.phys_port_cnt = 1;
+	dev->ibdev.dma_device = &dev->pcidev->dev;
+	dev->ibdev.class_dev.dev = &dev->pcidev->dev;
+	dev->ibdev.query_device = c2_query_device;
+	dev->ibdev.query_port = c2_query_port;
+	dev->ibdev.modify_port = c2_modify_port;
+	dev->ibdev.query_pkey = c2_query_pkey;
+	dev->ibdev.query_gid = c2_query_gid;
+	dev->ibdev.alloc_ucontext = c2_alloc_ucontext;
+	dev->ibdev.dealloc_ucontext = c2_dealloc_ucontext;
+	dev->ibdev.mmap = c2_mmap_uar;
+	dev->ibdev.alloc_pd = c2_alloc_pd;
+	dev->ibdev.dealloc_pd = c2_dealloc_pd;
+	dev->ibdev.create_ah = c2_ah_create;
+	dev->ibdev.destroy_ah = c2_ah_destroy;
+	dev->ibdev.create_qp = c2_create_qp;
+	dev->ibdev.modify_qp = c2_modify_qp;
+	dev->ibdev.destroy_qp = c2_destroy_qp;
+	dev->ibdev.create_cq = c2_create_cq;
+	dev->ibdev.destroy_cq = c2_destroy_cq;
+	dev->ibdev.poll_cq = c2_poll_cq;
+	dev->ibdev.get_dma_mr = c2_get_dma_mr;
+	dev->ibdev.reg_phys_mr = c2_reg_phys_mr;
+	dev->ibdev.reg_user_mr = c2_reg_user_mr;
+	dev->ibdev.dereg_mr = c2_dereg_mr;
+
+	dev->ibdev.alloc_fmr = NULL;
+	dev->ibdev.unmap_fmr = NULL;
+	dev->ibdev.dealloc_fmr = NULL;
+	dev->ibdev.map_phys_fmr = NULL;
+
+	dev->ibdev.attach_mcast = c2_multicast_attach;
+	dev->ibdev.detach_mcast = c2_multicast_detach;
+	dev->ibdev.process_mad = c2_process_mad;
+
+	dev->ibdev.req_notify_cq = c2_arm_cq;
+	dev->ibdev.post_send = c2_post_send;
+	dev->ibdev.post_recv = c2_post_receive;
+
+	dev->ibdev.iwcm = kmalloc(sizeof(*dev->ibdev.iwcm), GFP_KERNEL);
+	dev->ibdev.iwcm->add_ref = c2_add_ref;
+	dev->ibdev.iwcm->rem_ref = c2_rem_ref;
+	dev->ibdev.iwcm->get_qp = c2_get_qp;
+	dev->ibdev.iwcm->connect = c2_connect;
+	dev->ibdev.iwcm->accept = c2_accept;
+	dev->ibdev.iwcm->reject = c2_reject;
+	dev->ibdev.iwcm->create_listen = c2_service_create;
+	dev->ibdev.iwcm->destroy_listen = c2_service_destroy;
+
+	ret = ib_register_device(&dev->ibdev);
+	if (ret)
+		return ret;
+
+	for (i = 0; i < ARRAY_SIZE(c2_class_attributes); ++i) {
+		ret = class_device_create_file(&dev->ibdev.class_dev,
+					       c2_class_attributes[i]);
+		if (ret) {
+			unregister_netdev(dev->pseudo_netdev);
+			free_netdev(dev->pseudo_netdev);
+			ib_unregister_device(&dev->ibdev);
+			return ret;
+		}
+	}
+
+	pr_debug("%s:%u\n", __FUNCTION__, __LINE__);
+	return 0;
+}
+
+void c2_unregister_device(struct c2_dev *dev)
+{
+	pr_debug("%s:%u\n", __FUNCTION__, __LINE__);
+	unregister_netdev(dev->pseudo_netdev);
+	free_netdev(dev->pseudo_netdev);
+	ib_unregister_device(&dev->ibdev);
+}
diff --git a/drivers/infiniband/hw/amso1100/c2_provider.h b/drivers/infiniband/hw/amso1100/c2_provider.h
new file mode 100644
index 00000000000..fc906223220
--- /dev/null
+++ b/drivers/infiniband/hw/amso1100/c2_provider.h
@@ -0,0 +1,181 @@
+/*
+ * Copyright (c) 2005 Ammasso, Inc. All rights reserved.
+ * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#ifndef C2_PROVIDER_H
+#define C2_PROVIDER_H
+#include <linux/inetdevice.h>
+
+#include <rdma/ib_verbs.h>
+#include <rdma/ib_pack.h>
+
+#include "c2_mq.h"
+#include <rdma/iw_cm.h>
+
+#define C2_MPT_FLAG_ATOMIC        (1 << 14)
+#define C2_MPT_FLAG_REMOTE_WRITE  (1 << 13)
+#define C2_MPT_FLAG_REMOTE_READ   (1 << 12)
+#define C2_MPT_FLAG_LOCAL_WRITE   (1 << 11)
+#define C2_MPT_FLAG_LOCAL_READ    (1 << 10)
+
+struct c2_buf_list {
+	void *buf;
+	 DECLARE_PCI_UNMAP_ADDR(mapping)
+};
+
+
+/* The user context keeps track of objects allocated for a
+ * particular user-mode client. */
+struct c2_ucontext {
+	struct ib_ucontext ibucontext;
+};
+
+struct c2_mtt;
+
+/* All objects associated with a PD are kept in the
+ * associated user context if present.
+ */
+struct c2_pd {
+	struct ib_pd ibpd;
+	u32 pd_id;
+};
+
+struct c2_mr {
+	struct ib_mr ibmr;
+	struct c2_pd *pd;
+};
+
+struct c2_av;
+
+enum c2_ah_type {
+	C2_AH_ON_HCA,
+	C2_AH_PCI_POOL,
+	C2_AH_KMALLOC
+};
+
+struct c2_ah {
+	struct ib_ah ibah;
+};
+
+struct c2_cq {
+	struct ib_cq ibcq;
+	spinlock_t lock;
+	atomic_t refcount;
+	int cqn;
+	int is_kernel;
+	wait_queue_head_t wait;
+
+	u32 adapter_handle;
+	struct c2_mq mq;
+};
+
+struct c2_wq {
+	spinlock_t lock;
+};
+struct iw_cm_id;
+struct c2_qp {
+	struct ib_qp ibqp;
+	struct iw_cm_id *cm_id;
+	spinlock_t lock;
+	atomic_t refcount;
+	wait_queue_head_t wait;
+	int qpn;
+
+	u32 adapter_handle;
+	u32 send_sgl_depth;
+	u32 recv_sgl_depth;
+	u32 rdma_write_sgl_depth;
+	u8 state;
+
+	struct c2_mq sq_mq;
+	struct c2_mq rq_mq;
+};
+
+struct c2_cr_query_attrs {
+	u32 local_addr;
+	u32 remote_addr;
+	u16 local_port;
+	u16 remote_port;
+};
+
+static inline struct c2_pd *to_c2pd(struct ib_pd *ibpd)
+{
+	return container_of(ibpd, struct c2_pd, ibpd);
+}
+
+static inline struct c2_ucontext *to_c2ucontext(struct ib_ucontext *ibucontext)
+{
+	return container_of(ibucontext, struct c2_ucontext, ibucontext);
+}
+
+static inline struct c2_mr *to_c2mr(struct ib_mr *ibmr)
+{
+	return container_of(ibmr, struct c2_mr, ibmr);
+}
+
+
+static inline struct c2_ah *to_c2ah(struct ib_ah *ibah)
+{
+	return container_of(ibah, struct c2_ah, ibah);
+}
+
+static inline struct c2_cq *to_c2cq(struct ib_cq *ibcq)
+{
+	return container_of(ibcq, struct c2_cq, ibcq);
+}
+
+static inline struct c2_qp *to_c2qp(struct ib_qp *ibqp)
+{
+	return container_of(ibqp, struct c2_qp, ibqp);
+}
+
+static inline int is_rnic_addr(struct net_device *netdev, u32 addr)
+{
+	struct in_device *ind;
+	int ret = 0;
+
+	ind = in_dev_get(netdev);
+	if (!ind)
+		return 0;
+
+	for_ifa(ind) {
+		if (ifa->ifa_address == addr) {
+			ret = 1;
+			break;
+		}
+	}
+	endfor_ifa(ind);
+	in_dev_put(ind);
+	return ret;
+}
+#endif				/* C2_PROVIDER_H */
diff --git a/drivers/infiniband/hw/amso1100/c2_qp.c b/drivers/infiniband/hw/amso1100/c2_qp.c
new file mode 100644
index 00000000000..12261132b07
--- /dev/null
+++ b/drivers/infiniband/hw/amso1100/c2_qp.c
@@ -0,0 +1,975 @@
+/*
+ * Copyright (c) 2004 Topspin Communications.  All rights reserved.
+ * Copyright (c) 2005 Cisco Systems. All rights reserved.
+ * Copyright (c) 2005 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2004 Voltaire, Inc. All rights reserved.
+ * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#include "c2.h"
+#include "c2_vq.h"
+#include "c2_status.h"
+
+#define C2_MAX_ORD_PER_QP 128
+#define C2_MAX_IRD_PER_QP 128
+
+#define C2_HINT_MAKE(q_index, hint_count) (((q_index) << 16) | hint_count)
+#define C2_HINT_GET_INDEX(hint) (((hint) & 0x7FFF0000) >> 16)
+#define C2_HINT_GET_COUNT(hint) ((hint) & 0x0000FFFF)
+
+#define NO_SUPPORT -1
+static const u8 c2_opcode[] = {
+	[IB_WR_SEND] = C2_WR_TYPE_SEND,
+	[IB_WR_SEND_WITH_IMM] = NO_SUPPORT,
+	[IB_WR_RDMA_WRITE] = C2_WR_TYPE_RDMA_WRITE,
+	[IB_WR_RDMA_WRITE_WITH_IMM] = NO_SUPPORT,
+	[IB_WR_RDMA_READ] = C2_WR_TYPE_RDMA_READ,
+	[IB_WR_ATOMIC_CMP_AND_SWP] = NO_SUPPORT,
+	[IB_WR_ATOMIC_FETCH_AND_ADD] = NO_SUPPORT,
+};
+
+static int to_c2_state(enum ib_qp_state ib_state)
+{
+	switch (ib_state) {
+	case IB_QPS_RESET:
+		return C2_QP_STATE_IDLE;
+	case IB_QPS_RTS:
+		return C2_QP_STATE_RTS;
+	case IB_QPS_SQD:
+		return C2_QP_STATE_CLOSING;
+	case IB_QPS_SQE:
+		return C2_QP_STATE_CLOSING;
+	case IB_QPS_ERR:
+		return C2_QP_STATE_ERROR;
+	default:
+		return -1;
+	}
+}
+
+static int to_ib_state(enum c2_qp_state c2_state)
+{
+	switch (c2_state) {
+	case C2_QP_STATE_IDLE:
+		return IB_QPS_RESET;
+	case C2_QP_STATE_CONNECTING:
+		return IB_QPS_RTR;
+	case C2_QP_STATE_RTS:
+		return IB_QPS_RTS;
+	case C2_QP_STATE_CLOSING:
+		return IB_QPS_SQD;
+	case C2_QP_STATE_ERROR:
+		return IB_QPS_ERR;
+	case C2_QP_STATE_TERMINATE:
+		return IB_QPS_SQE;
+	default:
+		return -1;
+	}
+}
+
+static const char *to_ib_state_str(int ib_state)
+{
+	static const char *state_str[] = {
+		"IB_QPS_RESET",
+		"IB_QPS_INIT",
+		"IB_QPS_RTR",
+		"IB_QPS_RTS",
+		"IB_QPS_SQD",
+		"IB_QPS_SQE",
+		"IB_QPS_ERR"
+	};
+	if (ib_state < IB_QPS_RESET ||
+	    ib_state > IB_QPS_ERR)
+		return "<invalid IB QP state>";
+
+	ib_state -= IB_QPS_RESET;
+	return state_str[ib_state];
+}
+
+void c2_set_qp_state(struct c2_qp *qp, int c2_state)
+{
+	int new_state = to_ib_state(c2_state);
+
+	pr_debug("%s: qp[%p] state modify %s --> %s\n",
+	       __FUNCTION__,
+		qp,
+		to_ib_state_str(qp->state),
+		to_ib_state_str(new_state));
+	qp->state = new_state;
+}
+
+#define C2_QP_NO_ATTR_CHANGE 0xFFFFFFFF
+
+int c2_qp_modify(struct c2_dev *c2dev, struct c2_qp *qp,
+		 struct ib_qp_attr *attr, int attr_mask)
+{
+	struct c2wr_qp_modify_req wr;
+	struct c2wr_qp_modify_rep *reply;
+	struct c2_vq_req *vq_req;
+	unsigned long flags;
+	u8 next_state;
+	int err;
+
+	pr_debug("%s:%d qp=%p, %s --> %s\n",
+		__FUNCTION__, __LINE__,
+		qp,
+		to_ib_state_str(qp->state),
+		to_ib_state_str(attr->qp_state));
+
+	vq_req = vq_req_alloc(c2dev);
+	if (!vq_req)
+		return -ENOMEM;
+
+	c2_wr_set_id(&wr, CCWR_QP_MODIFY);
+	wr.hdr.context = (unsigned long) vq_req;
+	wr.rnic_handle = c2dev->adapter_handle;
+	wr.qp_handle = qp->adapter_handle;
+	wr.ord = cpu_to_be32(C2_QP_NO_ATTR_CHANGE);
+	wr.ird = cpu_to_be32(C2_QP_NO_ATTR_CHANGE);
+	wr.sq_depth = cpu_to_be32(C2_QP_NO_ATTR_CHANGE);
+	wr.rq_depth = cpu_to_be32(C2_QP_NO_ATTR_CHANGE);
+
+	if (attr_mask & IB_QP_STATE) {
+		/* Ensure the state is valid */
+		if (attr->qp_state < 0 || attr->qp_state > IB_QPS_ERR)
+			return -EINVAL;
+
+		wr.next_qp_state = cpu_to_be32(to_c2_state(attr->qp_state));
+
+		if (attr->qp_state == IB_QPS_ERR) {
+			spin_lock_irqsave(&qp->lock, flags);
+			if (qp->cm_id && qp->state == IB_QPS_RTS) {
+				pr_debug("Generating CLOSE event for QP-->ERR, "
+					"qp=%p, cm_id=%p\n",qp,qp->cm_id);
+				/* Generate an CLOSE event */
+				vq_req->cm_id = qp->cm_id;
+				vq_req->event = IW_CM_EVENT_CLOSE;
+			}
+			spin_unlock_irqrestore(&qp->lock, flags);
+		}
+		next_state =  attr->qp_state;
+
+	} else if (attr_mask & IB_QP_CUR_STATE) {
+
+		if (attr->cur_qp_state != IB_QPS_RTR &&
+		    attr->cur_qp_state != IB_QPS_RTS &&
+		    attr->cur_qp_state != IB_QPS_SQD &&
+		    attr->cur_qp_state != IB_QPS_SQE)
+			return -EINVAL;
+		else
+			wr.next_qp_state =
+			    cpu_to_be32(to_c2_state(attr->cur_qp_state));
+
+		next_state = attr->cur_qp_state;
+
+	} else {
+		err = 0;
+		goto bail0;
+	}
+
+	/* reference the request struct */
+	vq_req_get(c2dev, vq_req);
+
+	err = vq_send_wr(c2dev, (union c2wr *) & wr);
+	if (err) {
+		vq_req_put(c2dev, vq_req);
+		goto bail0;
+	}
+
+	err = vq_wait_for_reply(c2dev, vq_req);
+	if (err)
+		goto bail0;
+
+	reply = (struct c2wr_qp_modify_rep *) (unsigned long) vq_req->reply_msg;
+	if (!reply) {
+		err = -ENOMEM;
+		goto bail0;
+	}
+
+	err = c2_errno(reply);
+	if (!err)
+		qp->state = next_state;
+#ifdef DEBUG
+	else
+		pr_debug("%s: c2_errno=%d\n", __FUNCTION__, err);
+#endif
+	/*
+	 * If we're going to error and generating the event here, then
+	 * we need to remove the reference because there will be no
+	 * close event generated by the adapter
+	*/
+	spin_lock_irqsave(&qp->lock, flags);
+	if (vq_req->event==IW_CM_EVENT_CLOSE && qp->cm_id) {
+		qp->cm_id->rem_ref(qp->cm_id);
+		qp->cm_id = NULL;
+	}
+	spin_unlock_irqrestore(&qp->lock, flags);
+
+	vq_repbuf_free(c2dev, reply);
+      bail0:
+	vq_req_free(c2dev, vq_req);
+
+	pr_debug("%s:%d qp=%p, cur_state=%s\n",
+		__FUNCTION__, __LINE__,
+		qp,
+		to_ib_state_str(qp->state));
+	return err;
+}
+
+int c2_qp_set_read_limits(struct c2_dev *c2dev, struct c2_qp *qp,
+			  int ord, int ird)
+{
+	struct c2wr_qp_modify_req wr;
+	struct c2wr_qp_modify_rep *reply;
+	struct c2_vq_req *vq_req;
+	int err;
+
+	vq_req = vq_req_alloc(c2dev);
+	if (!vq_req)
+		return -ENOMEM;
+
+	c2_wr_set_id(&wr, CCWR_QP_MODIFY);
+	wr.hdr.context = (unsigned long) vq_req;
+	wr.rnic_handle = c2dev->adapter_handle;
+	wr.qp_handle = qp->adapter_handle;
+	wr.ord = cpu_to_be32(ord);
+	wr.ird = cpu_to_be32(ird);
+	wr.sq_depth = cpu_to_be32(C2_QP_NO_ATTR_CHANGE);
+	wr.rq_depth = cpu_to_be32(C2_QP_NO_ATTR_CHANGE);
+	wr.next_qp_state = cpu_to_be32(C2_QP_NO_ATTR_CHANGE);
+
+	/* reference the request struct */
+	vq_req_get(c2dev, vq_req);
+
+	err = vq_send_wr(c2dev, (union c2wr *) & wr);
+	if (err) {
+		vq_req_put(c2dev, vq_req);
+		goto bail0;
+	}
+
+	err = vq_wait_for_reply(c2dev, vq_req);
+	if (err)
+		goto bail0;
+
+	reply = (struct c2wr_qp_modify_rep *) (unsigned long)
+		vq_req->reply_msg;
+	if (!reply) {
+		err = -ENOMEM;
+		goto bail0;
+	}
+
+	err = c2_errno(reply);
+	vq_repbuf_free(c2dev, reply);
+      bail0:
+	vq_req_free(c2dev, vq_req);
+	return err;
+}
+
+static int destroy_qp(struct c2_dev *c2dev, struct c2_qp *qp)
+{
+	struct c2_vq_req *vq_req;
+	struct c2wr_qp_destroy_req wr;
+	struct c2wr_qp_destroy_rep *reply;
+	unsigned long flags;
+	int err;
+
+	/*
+	 * Allocate a verb request message
+	 */
+	vq_req = vq_req_alloc(c2dev);
+	if (!vq_req) {
+		return -ENOMEM;
+	}
+
+	/*
+	 * Initialize the WR
+	 */
+	c2_wr_set_id(&wr, CCWR_QP_DESTROY);
+	wr.hdr.context = (unsigned long) vq_req;
+	wr.rnic_handle = c2dev->adapter_handle;
+	wr.qp_handle = qp->adapter_handle;
+
+	/*
+	 * reference the request struct.  dereferenced in the int handler.
+	 */
+	vq_req_get(c2dev, vq_req);
+
+	spin_lock_irqsave(&qp->lock, flags);
+	if (qp->cm_id && qp->state == IB_QPS_RTS) {
+		pr_debug("destroy_qp: generating CLOSE event for QP-->ERR, "
+			"qp=%p, cm_id=%p\n",qp,qp->cm_id);
+		/* Generate an CLOSE event */
+		vq_req->qp = qp;
+		vq_req->cm_id = qp->cm_id;
+		vq_req->event = IW_CM_EVENT_CLOSE;
+	}
+	spin_unlock_irqrestore(&qp->lock, flags);
+
+	/*
+	 * Send WR to adapter
+	 */
+	err = vq_send_wr(c2dev, (union c2wr *) & wr);
+	if (err) {
+		vq_req_put(c2dev, vq_req);
+		goto bail0;
+	}
+
+	/*
+	 * Wait for reply from adapter
+	 */
+	err = vq_wait_for_reply(c2dev, vq_req);
+	if (err) {
+		goto bail0;
+	}
+
+	/*
+	 * Process reply
+	 */
+	reply = (struct c2wr_qp_destroy_rep *) (unsigned long) (vq_req->reply_msg);
+	if (!reply) {
+		err = -ENOMEM;
+		goto bail0;
+	}
+
+	spin_lock_irqsave(&qp->lock, flags);
+	if (qp->cm_id) {
+		qp->cm_id->rem_ref(qp->cm_id);
+		qp->cm_id = NULL;
+	}
+	spin_unlock_irqrestore(&qp->lock, flags);
+
+	vq_repbuf_free(c2dev, reply);
+      bail0:
+	vq_req_free(c2dev, vq_req);
+	return err;
+}
+
+static int c2_alloc_qpn(struct c2_dev *c2dev, struct c2_qp *qp)
+{
+	int ret;
+
+        do {
+		spin_lock_irq(&c2dev->qp_table.lock);
+		ret = idr_get_new_above(&c2dev->qp_table.idr, qp,
+					c2dev->qp_table.last++, &qp->qpn);
+		spin_unlock_irq(&c2dev->qp_table.lock);
+        } while ((ret == -EAGAIN) &&
+	 	 idr_pre_get(&c2dev->qp_table.idr, GFP_KERNEL));
+	return ret;
+}
+
+static void c2_free_qpn(struct c2_dev *c2dev, int qpn)
+{
+	spin_lock_irq(&c2dev->qp_table.lock);
+	idr_remove(&c2dev->qp_table.idr, qpn);
+	spin_unlock_irq(&c2dev->qp_table.lock);
+}
+
+struct c2_qp *c2_find_qpn(struct c2_dev *c2dev, int qpn)
+{
+	unsigned long flags;
+	struct c2_qp *qp;
+
+	spin_lock_irqsave(&c2dev->qp_table.lock, flags);
+	qp = idr_find(&c2dev->qp_table.idr, qpn);
+	spin_unlock_irqrestore(&c2dev->qp_table.lock, flags);
+	return qp;
+}
+
+int c2_alloc_qp(struct c2_dev *c2dev,
+		struct c2_pd *pd,
+		struct ib_qp_init_attr *qp_attrs, struct c2_qp *qp)
+{
+	struct c2wr_qp_create_req wr;
+	struct c2wr_qp_create_rep *reply;
+	struct c2_vq_req *vq_req;
+	struct c2_cq *send_cq = to_c2cq(qp_attrs->send_cq);
+	struct c2_cq *recv_cq = to_c2cq(qp_attrs->recv_cq);
+	unsigned long peer_pa;
+	u32 q_size, msg_size, mmap_size;
+	void __iomem *mmap;
+	int err;
+
+	err = c2_alloc_qpn(c2dev, qp);
+	if (err)
+		return err;
+	qp->ibqp.qp_num = qp->qpn;
+	qp->ibqp.qp_type = IB_QPT_RC;
+
+	/* Allocate the SQ and RQ shared pointers */
+	qp->sq_mq.shared = c2_alloc_mqsp(c2dev, c2dev->kern_mqsp_pool,
+					 &qp->sq_mq.shared_dma, GFP_KERNEL);
+	if (!qp->sq_mq.shared) {
+		err = -ENOMEM;
+		goto bail0;
+	}
+
+	qp->rq_mq.shared = c2_alloc_mqsp(c2dev, c2dev->kern_mqsp_pool,
+					 &qp->rq_mq.shared_dma, GFP_KERNEL);
+	if (!qp->rq_mq.shared) {
+		err = -ENOMEM;
+		goto bail1;
+	}
+
+	/* Allocate the verbs request */
+	vq_req = vq_req_alloc(c2dev);
+	if (vq_req == NULL) {
+		err = -ENOMEM;
+		goto bail2;
+	}
+
+	/* Initialize the work request */
+	memset(&wr, 0, sizeof(wr));
+	c2_wr_set_id(&wr, CCWR_QP_CREATE);
+	wr.hdr.context = (unsigned long) vq_req;
+	wr.rnic_handle = c2dev->adapter_handle;
+	wr.sq_cq_handle = send_cq->adapter_handle;
+	wr.rq_cq_handle = recv_cq->adapter_handle;
+	wr.sq_depth = cpu_to_be32(qp_attrs->cap.max_send_wr + 1);
+	wr.rq_depth = cpu_to_be32(qp_attrs->cap.max_recv_wr + 1);
+	wr.srq_handle = 0;
+	wr.flags = cpu_to_be32(QP_RDMA_READ | QP_RDMA_WRITE | QP_MW_BIND |
+			       QP_ZERO_STAG | QP_RDMA_READ_RESPONSE);
+	wr.send_sgl_depth = cpu_to_be32(qp_attrs->cap.max_send_sge);
+	wr.recv_sgl_depth = cpu_to_be32(qp_attrs->cap.max_recv_sge);
+	wr.rdma_write_sgl_depth = cpu_to_be32(qp_attrs->cap.max_send_sge);
+	wr.shared_sq_ht = cpu_to_be64(qp->sq_mq.shared_dma);
+	wr.shared_rq_ht = cpu_to_be64(qp->rq_mq.shared_dma);
+	wr.ord = cpu_to_be32(C2_MAX_ORD_PER_QP);
+	wr.ird = cpu_to_be32(C2_MAX_IRD_PER_QP);
+	wr.pd_id = pd->pd_id;
+	wr.user_context = (unsigned long) qp;
+
+	vq_req_get(c2dev, vq_req);
+
+	/* Send the WR to the adapter */
+	err = vq_send_wr(c2dev, (union c2wr *) & wr);
+	if (err) {
+		vq_req_put(c2dev, vq_req);
+		goto bail3;
+	}
+
+	/* Wait for the verb reply  */
+	err = vq_wait_for_reply(c2dev, vq_req);
+	if (err) {
+		goto bail3;
+	}
+
+	/* Process the reply */
+	reply = (struct c2wr_qp_create_rep *) (unsigned long) (vq_req->reply_msg);
+	if (!reply) {
+		err = -ENOMEM;
+		goto bail3;
+	}
+
+	if ((err = c2_wr_get_result(reply)) != 0) {
+		goto bail4;
+	}
+
+	/* Fill in the kernel QP struct */
+	atomic_set(&qp->refcount, 1);
+	qp->adapter_handle = reply->qp_handle;
+	qp->state = IB_QPS_RESET;
+	qp->send_sgl_depth = qp_attrs->cap.max_send_sge;
+	qp->rdma_write_sgl_depth = qp_attrs->cap.max_send_sge;
+	qp->recv_sgl_depth = qp_attrs->cap.max_recv_sge;
+
+	/* Initialize the SQ MQ */
+	q_size = be32_to_cpu(reply->sq_depth);
+	msg_size = be32_to_cpu(reply->sq_msg_size);
+	peer_pa = c2dev->pa + be32_to_cpu(reply->sq_mq_start);
+	mmap_size = PAGE_ALIGN(sizeof(struct c2_mq_shared) + msg_size * q_size);
+	mmap = ioremap_nocache(peer_pa, mmap_size);
+	if (!mmap) {
+		err = -ENOMEM;
+		goto bail5;
+	}
+
+	c2_mq_req_init(&qp->sq_mq,
+		       be32_to_cpu(reply->sq_mq_index),
+		       q_size,
+		       msg_size,
+		       mmap + sizeof(struct c2_mq_shared),	/* pool start */
+		       mmap,				/* peer */
+		       C2_MQ_ADAPTER_TARGET);
+
+	/* Initialize the RQ mq */
+	q_size = be32_to_cpu(reply->rq_depth);
+	msg_size = be32_to_cpu(reply->rq_msg_size);
+	peer_pa = c2dev->pa + be32_to_cpu(reply->rq_mq_start);
+	mmap_size = PAGE_ALIGN(sizeof(struct c2_mq_shared) + msg_size * q_size);
+	mmap = ioremap_nocache(peer_pa, mmap_size);
+	if (!mmap) {
+		err = -ENOMEM;
+		goto bail6;
+	}
+
+	c2_mq_req_init(&qp->rq_mq,
+		       be32_to_cpu(reply->rq_mq_index),
+		       q_size,
+		       msg_size,
+		       mmap + sizeof(struct c2_mq_shared),	/* pool start */
+		       mmap,				/* peer */
+		       C2_MQ_ADAPTER_TARGET);
+
+	vq_repbuf_free(c2dev, reply);
+	vq_req_free(c2dev, vq_req);
+
+	return 0;
+
+      bail6:
+	iounmap(qp->sq_mq.peer);
+      bail5:
+	destroy_qp(c2dev, qp);
+      bail4:
+	vq_repbuf_free(c2dev, reply);
+      bail3:
+	vq_req_free(c2dev, vq_req);
+      bail2:
+	c2_free_mqsp(qp->rq_mq.shared);
+      bail1:
+	c2_free_mqsp(qp->sq_mq.shared);
+      bail0:
+	c2_free_qpn(c2dev, qp->qpn);
+	return err;
+}
+
+void c2_free_qp(struct c2_dev *c2dev, struct c2_qp *qp)
+{
+	struct c2_cq *send_cq;
+	struct c2_cq *recv_cq;
+
+	send_cq = to_c2cq(qp->ibqp.send_cq);
+	recv_cq = to_c2cq(qp->ibqp.recv_cq);
+
+	/*
+	 * Lock CQs here, so that CQ polling code can do QP lookup
+	 * without taking a lock.
+	 */
+	spin_lock_irq(&send_cq->lock);
+	if (send_cq != recv_cq)
+		spin_lock(&recv_cq->lock);
+
+	c2_free_qpn(c2dev, qp->qpn);
+
+	if (send_cq != recv_cq)
+		spin_unlock(&recv_cq->lock);
+	spin_unlock_irq(&send_cq->lock);
+
+	/*
+	 * Destory qp in the rnic...
+	 */
+	destroy_qp(c2dev, qp);
+
+	/*
+	 * Mark any unreaped CQEs as null and void.
+	 */
+	c2_cq_clean(c2dev, qp, send_cq->cqn);
+	if (send_cq != recv_cq)
+		c2_cq_clean(c2dev, qp, recv_cq->cqn);
+	/*
+	 * Unmap the MQs and return the shared pointers
+	 * to the message pool.
+	 */
+	iounmap(qp->sq_mq.peer);
+	iounmap(qp->rq_mq.peer);
+	c2_free_mqsp(qp->sq_mq.shared);
+	c2_free_mqsp(qp->rq_mq.shared);
+
+	atomic_dec(&qp->refcount);
+	wait_event(qp->wait, !atomic_read(&qp->refcount));
+}
+
+/*
+ * Function: move_sgl
+ *
+ * Description:
+ * Move an SGL from the user's work request struct into a CCIL Work Request
+ * message, swapping to WR byte order and ensure the total length doesn't
+ * overflow.
+ *
+ * IN:
+ * dst		- ptr to CCIL Work Request message SGL memory.
+ * src		- ptr to the consumers SGL memory.
+ *
+ * OUT: none
+ *
+ * Return:
+ * CCIL status codes.
+ */
+static int
+move_sgl(struct c2_data_addr * dst, struct ib_sge *src, int count, u32 * p_len,
+	 u8 * actual_count)
+{
+	u32 tot = 0;		/* running total */
+	u8 acount = 0;		/* running total non-0 len sge's */
+
+	while (count > 0) {
+		/*
+		 * If the addition of this SGE causes the
+		 * total SGL length to exceed 2^32-1, then
+		 * fail-n-bail.
+		 *
+		 * If the current total plus the next element length
+		 * wraps, then it will go negative and be less than the
+		 * current total...
+		 */
+		if ((tot + src->length) < tot) {
+			return -EINVAL;
+		}
+		/*
+		 * Bug: 1456 (as well as 1498 & 1643)
+		 * Skip over any sge's supplied with len=0
+		 */
+		if (src->length) {
+			tot += src->length;
+			dst->stag = cpu_to_be32(src->lkey);
+			dst->to = cpu_to_be64(src->addr);
+			dst->length = cpu_to_be32(src->length);
+			dst++;
+			acount++;
+		}
+		src++;
+		count--;
+	}
+
+	if (acount == 0) {
+		/*
+		 * Bug: 1476 (as well as 1498, 1456 and 1643)
+		 * Setup the SGL in the WR to make it easier for the RNIC.
+		 * This way, the FW doesn't have to deal with special cases.
+		 * Setting length=0 should be sufficient.
+		 */
+		dst->stag = 0;
+		dst->to = 0;
+		dst->length = 0;
+	}
+
+	*p_len = tot;
+	*actual_count = acount;
+	return 0;
+}
+
+/*
+ * Function: c2_activity (private function)
+ *
+ * Description:
+ * Post an mq index to the host->adapter activity fifo.
+ *
+ * IN:
+ * c2dev	- ptr to c2dev structure
+ * mq_index	- mq index to post
+ * shared	- value most recently written to shared
+ *
+ * OUT:
+ *
+ * Return:
+ * none
+ */
+static inline void c2_activity(struct c2_dev *c2dev, u32 mq_index, u16 shared)
+{
+	/*
+	 * First read the register to see if the FIFO is full, and if so,
+	 * spin until it's not.  This isn't perfect -- there is no
+	 * synchronization among the clients of the register, but in
+	 * practice it prevents multiple CPU from hammering the bus
+	 * with PCI RETRY. Note that when this does happen, the card
+	 * cannot get on the bus and the card and system hang in a
+	 * deadlock -- thus the need for this code. [TOT]
+	 */
+	while (readl(c2dev->regs + PCI_BAR0_ADAPTER_HINT) & 0x80000000) {
+		set_current_state(TASK_UNINTERRUPTIBLE);
+		schedule_timeout(0);
+	}
+
+	__raw_writel(C2_HINT_MAKE(mq_index, shared),
+		     c2dev->regs + PCI_BAR0_ADAPTER_HINT);
+}
+
+/*
+ * Function: qp_wr_post
+ *
+ * Description:
+ * This in-line function allocates a MQ msg, then moves the host-copy of
+ * the completed WR into msg.  Then it posts the message.
+ *
+ * IN:
+ * q		- ptr to user MQ.
+ * wr		- ptr to host-copy of the WR.
+ * qp		- ptr to user qp
+ * size		- Number of bytes to post.  Assumed to be divisible by 4.
+ *
+ * OUT: none
+ *
+ * Return:
+ * CCIL status codes.
+ */
+static int qp_wr_post(struct c2_mq *q, union c2wr * wr, struct c2_qp *qp, u32 size)
+{
+	union c2wr *msg;
+
+	msg = c2_mq_alloc(q);
+	if (msg == NULL) {
+		return -EINVAL;
+	}
+#ifdef CCMSGMAGIC
+	((c2wr_hdr_t *) wr)->magic = cpu_to_be32(CCWR_MAGIC);
+#endif
+
+	/*
+	 * Since all header fields in the WR are the same as the
+	 * CQE, set the following so the adapter need not.
+	 */
+	c2_wr_set_result(wr, CCERR_PENDING);
+
+	/*
+	 * Copy the wr down to the adapter
+	 */
+	memcpy((void *) msg, (void *) wr, size);
+
+	c2_mq_produce(q);
+	return 0;
+}
+
+
+int c2_post_send(struct ib_qp *ibqp, struct ib_send_wr *ib_wr,
+		 struct ib_send_wr **bad_wr)
+{
+	struct c2_dev *c2dev = to_c2dev(ibqp->device);
+	struct c2_qp *qp = to_c2qp(ibqp);
+	union c2wr wr;
+	int err = 0;
+
+	u32 flags;
+	u32 tot_len;
+	u8 actual_sge_count;
+	u32 msg_size;
+
+	if (qp->state > IB_QPS_RTS)
+		return -EINVAL;
+
+	while (ib_wr) {
+
+		flags = 0;
+		wr.sqwr.sq_hdr.user_hdr.hdr.context = ib_wr->wr_id;
+		if (ib_wr->send_flags & IB_SEND_SIGNALED) {
+			flags |= SQ_SIGNALED;
+		}
+
+		switch (ib_wr->opcode) {
+		case IB_WR_SEND:
+			if (ib_wr->send_flags & IB_SEND_SOLICITED) {
+				c2_wr_set_id(&wr, C2_WR_TYPE_SEND_SE);
+				msg_size = sizeof(struct c2wr_send_req);
+			} else {
+				c2_wr_set_id(&wr, C2_WR_TYPE_SEND);
+				msg_size = sizeof(struct c2wr_send_req);
+			}
+
+			wr.sqwr.send.remote_stag = 0;
+			msg_size += sizeof(struct c2_data_addr) * ib_wr->num_sge;
+			if (ib_wr->num_sge > qp->send_sgl_depth) {
+				err = -EINVAL;
+				break;
+			}
+			if (ib_wr->send_flags & IB_SEND_FENCE) {
+				flags |= SQ_READ_FENCE;
+			}
+			err = move_sgl((struct c2_data_addr *) & (wr.sqwr.send.data),
+				       ib_wr->sg_list,
+				       ib_wr->num_sge,
+				       &tot_len, &actual_sge_count);
+			wr.sqwr.send.sge_len = cpu_to_be32(tot_len);
+			c2_wr_set_sge_count(&wr, actual_sge_count);
+			break;
+		case IB_WR_RDMA_WRITE:
+			c2_wr_set_id(&wr, C2_WR_TYPE_RDMA_WRITE);
+			msg_size = sizeof(struct c2wr_rdma_write_req) +
+			    (sizeof(struct c2_data_addr) * ib_wr->num_sge);
+			if (ib_wr->num_sge > qp->rdma_write_sgl_depth) {
+				err = -EINVAL;
+				break;
+			}
+			if (ib_wr->send_flags & IB_SEND_FENCE) {
+				flags |= SQ_READ_FENCE;
+			}
+			wr.sqwr.rdma_write.remote_stag =
+			    cpu_to_be32(ib_wr->wr.rdma.rkey);
+			wr.sqwr.rdma_write.remote_to =
+			    cpu_to_be64(ib_wr->wr.rdma.remote_addr);
+			err = move_sgl((struct c2_data_addr *)
+				       & (wr.sqwr.rdma_write.data),
+				       ib_wr->sg_list,
+				       ib_wr->num_sge,
+				       &tot_len, &actual_sge_count);
+			wr.sqwr.rdma_write.sge_len = cpu_to_be32(tot_len);
+			c2_wr_set_sge_count(&wr, actual_sge_count);
+			break;
+		case IB_WR_RDMA_READ:
+			c2_wr_set_id(&wr, C2_WR_TYPE_RDMA_READ);
+			msg_size = sizeof(struct c2wr_rdma_read_req);
+
+			/* IWarp only suppots 1 sge for RDMA reads */
+			if (ib_wr->num_sge > 1) {
+				err = -EINVAL;
+				break;
+			}
+
+			/*
+			 * Move the local and remote stag/to/len into the WR.
+			 */
+			wr.sqwr.rdma_read.local_stag =
+			    cpu_to_be32(ib_wr->sg_list->lkey);
+			wr.sqwr.rdma_read.local_to =
+			    cpu_to_be64(ib_wr->sg_list->addr);
+			wr.sqwr.rdma_read.remote_stag =
+			    cpu_to_be32(ib_wr->wr.rdma.rkey);
+			wr.sqwr.rdma_read.remote_to =
+			    cpu_to_be64(ib_wr->wr.rdma.remote_addr);
+			wr.sqwr.rdma_read.length =
+			    cpu_to_be32(ib_wr->sg_list->length);
+			break;
+		default:
+			/* error */
+			msg_size = 0;
+			err = -EINVAL;
+			break;
+		}
+
+		/*
+		 * If we had an error on the last wr build, then
+		 * break out.  Possible errors include bogus WR
+		 * type, and a bogus SGL length...
+		 */
+		if (err) {
+			break;
+		}
+
+		/*
+		 * Store flags
+		 */
+		c2_wr_set_flags(&wr, flags);
+
+		/*
+		 * Post the puppy!
+		 */
+		err = qp_wr_post(&qp->sq_mq, &wr, qp, msg_size);
+		if (err) {
+			break;
+		}
+
+		/*
+		 * Enqueue mq index to activity FIFO.
+		 */
+		c2_activity(c2dev, qp->sq_mq.index, qp->sq_mq.hint_count);
+
+		ib_wr = ib_wr->next;
+	}
+
+	if (err)
+		*bad_wr = ib_wr;
+	return err;
+}
+
+int c2_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *ib_wr,
+		    struct ib_recv_wr **bad_wr)
+{
+	struct c2_dev *c2dev = to_c2dev(ibqp->device);
+	struct c2_qp *qp = to_c2qp(ibqp);
+	union c2wr wr;
+	int err = 0;
+
+	if (qp->state > IB_QPS_RTS)
+		return -EINVAL;
+
+	/*
+	 * Try and post each work request
+	 */
+	while (ib_wr) {
+		u32 tot_len;
+		u8 actual_sge_count;
+
+		if (ib_wr->num_sge > qp->recv_sgl_depth) {
+			err = -EINVAL;
+			break;
+		}
+
+		/*
+		 * Create local host-copy of the WR
+		 */
+		wr.rqwr.rq_hdr.user_hdr.hdr.context = ib_wr->wr_id;
+		c2_wr_set_id(&wr, CCWR_RECV);
+		c2_wr_set_flags(&wr, 0);
+
+		/* sge_count is limited to eight bits. */
+		BUG_ON(ib_wr->num_sge >= 256);
+		err = move_sgl((struct c2_data_addr *) & (wr.rqwr.data),
+			       ib_wr->sg_list,
+			       ib_wr->num_sge, &tot_len, &actual_sge_count);
+		c2_wr_set_sge_count(&wr, actual_sge_count);
+
+		/*
+		 * If we had an error on the last wr build, then
+		 * break out.  Possible errors include bogus WR
+		 * type, and a bogus SGL length...
+		 */
+		if (err) {
+			break;
+		}
+
+		err = qp_wr_post(&qp->rq_mq, &wr, qp, qp->rq_mq.msg_size);
+		if (err) {
+			break;
+		}
+
+		/*
+		 * Enqueue mq index to activity FIFO
+		 */
+		c2_activity(c2dev, qp->rq_mq.index, qp->rq_mq.hint_count);
+
+		ib_wr = ib_wr->next;
+	}
+
+	if (err)
+		*bad_wr = ib_wr;
+	return err;
+}
+
+void __devinit c2_init_qp_table(struct c2_dev *c2dev)
+{
+	spin_lock_init(&c2dev->qp_table.lock);
+	idr_init(&c2dev->qp_table.idr);
+}
+
+void __devexit c2_cleanup_qp_table(struct c2_dev *c2dev)
+{
+	idr_destroy(&c2dev->qp_table.idr);
+}
diff --git a/drivers/infiniband/hw/amso1100/c2_rnic.c b/drivers/infiniband/hw/amso1100/c2_rnic.c
new file mode 100644
index 00000000000..1c3c9d65ece
--- /dev/null
+++ b/drivers/infiniband/hw/amso1100/c2_rnic.c
@@ -0,0 +1,663 @@
+/*
+ * Copyright (c) 2005 Ammasso, Inc. All rights reserved.
+ * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/pci.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/delay.h>
+#include <linux/ethtool.h>
+#include <linux/mii.h>
+#include <linux/if_vlan.h>
+#include <linux/crc32.h>
+#include <linux/in.h>
+#include <linux/ip.h>
+#include <linux/tcp.h>
+#include <linux/init.h>
+#include <linux/dma-mapping.h>
+#include <linux/mm.h>
+#include <linux/inet.h>
+
+#include <linux/route.h>
+
+#include <asm/io.h>
+#include <asm/irq.h>
+#include <asm/byteorder.h>
+#include <rdma/ib_smi.h>
+#include "c2.h"
+#include "c2_vq.h"
+
+/* Device capabilities */
+#define C2_MIN_PAGESIZE  1024
+
+#define C2_MAX_MRS       32768
+#define C2_MAX_QPS       16000
+#define C2_MAX_WQE_SZ    256
+#define C2_MAX_QP_WR     ((128*1024)/C2_MAX_WQE_SZ)
+#define C2_MAX_SGES      4
+#define C2_MAX_SGE_RD    1
+#define C2_MAX_CQS       32768
+#define C2_MAX_CQES      4096
+#define C2_MAX_PDS       16384
+
+/*
+ * Send the adapter INIT message to the amso1100
+ */
+static int c2_adapter_init(struct c2_dev *c2dev)
+{
+	struct c2wr_init_req wr;
+	int err;
+
+	memset(&wr, 0, sizeof(wr));
+	c2_wr_set_id(&wr, CCWR_INIT);
+	wr.hdr.context = 0;
+	wr.hint_count = cpu_to_be64(c2dev->hint_count_dma);
+	wr.q0_host_shared = cpu_to_be64(c2dev->req_vq.shared_dma);
+	wr.q1_host_shared = cpu_to_be64(c2dev->rep_vq.shared_dma);
+	wr.q1_host_msg_pool = cpu_to_be64(c2dev->rep_vq.host_dma);
+	wr.q2_host_shared = cpu_to_be64(c2dev->aeq.shared_dma);
+	wr.q2_host_msg_pool = cpu_to_be64(c2dev->aeq.host_dma);
+
+	/* Post the init message */
+	err = vq_send_wr(c2dev, (union c2wr *) & wr);
+
+	return err;
+}
+
+/*
+ * Send the adapter TERM message to the amso1100
+ */
+static void c2_adapter_term(struct c2_dev *c2dev)
+{
+	struct c2wr_init_req wr;
+
+	memset(&wr, 0, sizeof(wr));
+	c2_wr_set_id(&wr, CCWR_TERM);
+	wr.hdr.context = 0;
+
+	/* Post the init message */
+	vq_send_wr(c2dev, (union c2wr *) & wr);
+	c2dev->init = 0;
+
+	return;
+}
+
+/*
+ * Query the adapter
+ */
+static int c2_rnic_query(struct c2_dev *c2dev, struct ib_device_attr *props)
+{
+	struct c2_vq_req *vq_req;
+	struct c2wr_rnic_query_req wr;
+	struct c2wr_rnic_query_rep *reply;
+	int err;
+
+	vq_req = vq_req_alloc(c2dev);
+	if (!vq_req)
+		return -ENOMEM;
+
+	c2_wr_set_id(&wr, CCWR_RNIC_QUERY);
+	wr.hdr.context = (unsigned long) vq_req;
+	wr.rnic_handle = c2dev->adapter_handle;
+
+	vq_req_get(c2dev, vq_req);
+
+	err = vq_send_wr(c2dev, (union c2wr *) &wr);
+	if (err) {
+		vq_req_put(c2dev, vq_req);
+		goto bail1;
+	}
+
+	err = vq_wait_for_reply(c2dev, vq_req);
+	if (err)
+		goto bail1;
+
+	reply =
+	    (struct c2wr_rnic_query_rep *) (unsigned long) (vq_req->reply_msg);
+	if (!reply)
+		err = -ENOMEM;
+
+	err = c2_errno(reply);
+	if (err)
+		goto bail2;
+
+	props->fw_ver =
+		((u64)be32_to_cpu(reply->fw_ver_major) << 32) |
+		((be32_to_cpu(reply->fw_ver_minor) && 0xFFFF) << 16) |
+		(be32_to_cpu(reply->fw_ver_patch) && 0xFFFF);
+	memcpy(&props->sys_image_guid, c2dev->netdev->dev_addr, 6);
+	props->max_mr_size         = 0xFFFFFFFF;
+	props->page_size_cap       = ~(C2_MIN_PAGESIZE-1);
+	props->vendor_id           = be32_to_cpu(reply->vendor_id);
+	props->vendor_part_id      = be32_to_cpu(reply->part_number);
+	props->hw_ver              = be32_to_cpu(reply->hw_version);
+	props->max_qp              = be32_to_cpu(reply->max_qps);
+	props->max_qp_wr           = be32_to_cpu(reply->max_qp_depth);
+	props->device_cap_flags    = c2dev->device_cap_flags;
+	props->max_sge             = C2_MAX_SGES;
+	props->max_sge_rd          = C2_MAX_SGE_RD;
+	props->max_cq              = be32_to_cpu(reply->max_cqs);
+	props->max_cqe             = be32_to_cpu(reply->max_cq_depth);
+	props->max_mr              = be32_to_cpu(reply->max_mrs);
+	props->max_pd              = be32_to_cpu(reply->max_pds);
+	props->max_qp_rd_atom      = be32_to_cpu(reply->max_qp_ird);
+	props->max_ee_rd_atom      = 0;
+	props->max_res_rd_atom     = be32_to_cpu(reply->max_global_ird);
+	props->max_qp_init_rd_atom = be32_to_cpu(reply->max_qp_ord);
+	props->max_ee_init_rd_atom = 0;
+	props->atomic_cap          = IB_ATOMIC_NONE;
+	props->max_ee              = 0;
+	props->max_rdd             = 0;
+	props->max_mw              = be32_to_cpu(reply->max_mws);
+	props->max_raw_ipv6_qp     = 0;
+	props->max_raw_ethy_qp     = 0;
+	props->max_mcast_grp       = 0;
+	props->max_mcast_qp_attach = 0;
+	props->max_total_mcast_qp_attach = 0;
+	props->max_ah              = 0;
+	props->max_fmr             = 0;
+	props->max_map_per_fmr     = 0;
+	props->max_srq             = 0;
+	props->max_srq_wr          = 0;
+	props->max_srq_sge         = 0;
+	props->max_pkeys           = 0;
+	props->local_ca_ack_delay  = 0;
+
+ bail2:
+	vq_repbuf_free(c2dev, reply);
+
+ bail1:
+	vq_req_free(c2dev, vq_req);
+	return err;
+}
+
+/*
+ * Add an IP address to the RNIC interface
+ */
+int c2_add_addr(struct c2_dev *c2dev, u32 inaddr, u32 inmask)
+{
+	struct c2_vq_req *vq_req;
+	struct c2wr_rnic_setconfig_req *wr;
+	struct c2wr_rnic_setconfig_rep *reply;
+	struct c2_netaddr netaddr;
+	int err, len;
+
+	vq_req = vq_req_alloc(c2dev);
+	if (!vq_req)
+		return -ENOMEM;
+
+	len = sizeof(struct c2_netaddr);
+	wr = kmalloc(c2dev->req_vq.msg_size, GFP_KERNEL);
+	if (!wr) {
+		err = -ENOMEM;
+		goto bail0;
+	}
+
+	c2_wr_set_id(wr, CCWR_RNIC_SETCONFIG);
+	wr->hdr.context = (unsigned long) vq_req;
+	wr->rnic_handle = c2dev->adapter_handle;
+	wr->option = cpu_to_be32(C2_CFG_ADD_ADDR);
+
+	netaddr.ip_addr = inaddr;
+	netaddr.netmask = inmask;
+	netaddr.mtu = 0;
+
+	memcpy(wr->data, &netaddr, len);
+
+	vq_req_get(c2dev, vq_req);
+
+	err = vq_send_wr(c2dev, (union c2wr *) wr);
+	if (err) {
+		vq_req_put(c2dev, vq_req);
+		goto bail1;
+	}
+
+	err = vq_wait_for_reply(c2dev, vq_req);
+	if (err)
+		goto bail1;
+
+	reply =
+	    (struct c2wr_rnic_setconfig_rep *) (unsigned long) (vq_req->reply_msg);
+	if (!reply) {
+		err = -ENOMEM;
+		goto bail1;
+	}
+
+	err = c2_errno(reply);
+	vq_repbuf_free(c2dev, reply);
+
+      bail1:
+	kfree(wr);
+      bail0:
+	vq_req_free(c2dev, vq_req);
+	return err;
+}
+
+/*
+ * Delete an IP address from the RNIC interface
+ */
+int c2_del_addr(struct c2_dev *c2dev, u32 inaddr, u32 inmask)
+{
+	struct c2_vq_req *vq_req;
+	struct c2wr_rnic_setconfig_req *wr;
+	struct c2wr_rnic_setconfig_rep *reply;
+	struct c2_netaddr netaddr;
+	int err, len;
+
+	vq_req = vq_req_alloc(c2dev);
+	if (!vq_req)
+		return -ENOMEM;
+
+	len = sizeof(struct c2_netaddr);
+	wr = kmalloc(c2dev->req_vq.msg_size, GFP_KERNEL);
+	if (!wr) {
+		err = -ENOMEM;
+		goto bail0;
+	}
+
+	c2_wr_set_id(wr, CCWR_RNIC_SETCONFIG);
+	wr->hdr.context = (unsigned long) vq_req;
+	wr->rnic_handle = c2dev->adapter_handle;
+	wr->option = cpu_to_be32(C2_CFG_DEL_ADDR);
+
+	netaddr.ip_addr = inaddr;
+	netaddr.netmask = inmask;
+	netaddr.mtu = 0;
+
+	memcpy(wr->data, &netaddr, len);
+
+	vq_req_get(c2dev, vq_req);
+
+	err = vq_send_wr(c2dev, (union c2wr *) wr);
+	if (err) {
+		vq_req_put(c2dev, vq_req);
+		goto bail1;
+	}
+
+	err = vq_wait_for_reply(c2dev, vq_req);
+	if (err)
+		goto bail1;
+
+	reply =
+	    (struct c2wr_rnic_setconfig_rep *) (unsigned long) (vq_req->reply_msg);
+	if (!reply) {
+		err = -ENOMEM;
+		goto bail1;
+	}
+
+	err = c2_errno(reply);
+	vq_repbuf_free(c2dev, reply);
+
+      bail1:
+	kfree(wr);
+      bail0:
+	vq_req_free(c2dev, vq_req);
+	return err;
+}
+
+/*
+ * Open a single RNIC instance to use with all
+ * low level openib calls
+ */
+static int c2_rnic_open(struct c2_dev *c2dev)
+{
+	struct c2_vq_req *vq_req;
+	union c2wr wr;
+	struct c2wr_rnic_open_rep *reply;
+	int err;
+
+	vq_req = vq_req_alloc(c2dev);
+	if (vq_req == NULL) {
+		return -ENOMEM;
+	}
+
+	memset(&wr, 0, sizeof(wr));
+	c2_wr_set_id(&wr, CCWR_RNIC_OPEN);
+	wr.rnic_open.req.hdr.context = (unsigned long) (vq_req);
+	wr.rnic_open.req.flags = cpu_to_be16(RNIC_PRIV_MODE);
+	wr.rnic_open.req.port_num = cpu_to_be16(0);
+	wr.rnic_open.req.user_context = (unsigned long) c2dev;
+
+	vq_req_get(c2dev, vq_req);
+
+	err = vq_send_wr(c2dev, &wr);
+	if (err) {
+		vq_req_put(c2dev, vq_req);
+		goto bail0;
+	}
+
+	err = vq_wait_for_reply(c2dev, vq_req);
+	if (err) {
+		goto bail0;
+	}
+
+	reply = (struct c2wr_rnic_open_rep *) (unsigned long) (vq_req->reply_msg);
+	if (!reply) {
+		err = -ENOMEM;
+		goto bail0;
+	}
+
+	if ((err = c2_errno(reply)) != 0) {
+		goto bail1;
+	}
+
+	c2dev->adapter_handle = reply->rnic_handle;
+
+      bail1:
+	vq_repbuf_free(c2dev, reply);
+      bail0:
+	vq_req_free(c2dev, vq_req);
+	return err;
+}
+
+/*
+ * Close the RNIC instance
+ */
+static int c2_rnic_close(struct c2_dev *c2dev)
+{
+	struct c2_vq_req *vq_req;
+	union c2wr wr;
+	struct c2wr_rnic_close_rep *reply;
+	int err;
+
+	vq_req = vq_req_alloc(c2dev);
+	if (vq_req == NULL) {
+		return -ENOMEM;
+	}
+
+	memset(&wr, 0, sizeof(wr));
+	c2_wr_set_id(&wr, CCWR_RNIC_CLOSE);
+	wr.rnic_close.req.hdr.context = (unsigned long) vq_req;
+	wr.rnic_close.req.rnic_handle = c2dev->adapter_handle;
+
+	vq_req_get(c2dev, vq_req);
+
+	err = vq_send_wr(c2dev, &wr);
+	if (err) {
+		vq_req_put(c2dev, vq_req);
+		goto bail0;
+	}
+
+	err = vq_wait_for_reply(c2dev, vq_req);
+	if (err) {
+		goto bail0;
+	}
+
+	reply = (struct c2wr_rnic_close_rep *) (unsigned long) (vq_req->reply_msg);
+	if (!reply) {
+		err = -ENOMEM;
+		goto bail0;
+	}
+
+	if ((err = c2_errno(reply)) != 0) {
+		goto bail1;
+	}
+
+	c2dev->adapter_handle = 0;
+
+      bail1:
+	vq_repbuf_free(c2dev, reply);
+      bail0:
+	vq_req_free(c2dev, vq_req);
+	return err;
+}
+
+/*
+ * Called by c2_probe to initialize the RNIC. This principally
+ * involves initalizing the various limits and resouce pools that
+ * comprise the RNIC instance.
+ */
+int c2_rnic_init(struct c2_dev *c2dev)
+{
+	int err;
+	u32 qsize, msgsize;
+	void *q1_pages;
+	void *q2_pages;
+	void __iomem *mmio_regs;
+
+	/* Device capabilities */
+	c2dev->device_cap_flags =
+	    (IB_DEVICE_RESIZE_MAX_WR |
+	     IB_DEVICE_CURR_QP_STATE_MOD |
+	     IB_DEVICE_SYS_IMAGE_GUID |
+	     IB_DEVICE_ZERO_STAG |
+	     IB_DEVICE_SEND_W_INV | IB_DEVICE_MEM_WINDOW);
+
+	/* Allocate the qptr_array */
+	c2dev->qptr_array = vmalloc(C2_MAX_CQS * sizeof(void *));
+	if (!c2dev->qptr_array) {
+		return -ENOMEM;
+	}
+
+	/* Inialize the qptr_array */
+	memset(c2dev->qptr_array, 0, C2_MAX_CQS * sizeof(void *));
+	c2dev->qptr_array[0] = (void *) &c2dev->req_vq;
+	c2dev->qptr_array[1] = (void *) &c2dev->rep_vq;
+	c2dev->qptr_array[2] = (void *) &c2dev->aeq;
+
+	/* Initialize data structures */
+	init_waitqueue_head(&c2dev->req_vq_wo);
+	spin_lock_init(&c2dev->vqlock);
+	spin_lock_init(&c2dev->lock);
+
+	/* Allocate MQ shared pointer pool for kernel clients. User
+	 * mode client pools are hung off the user context
+	 */
+	err = c2_init_mqsp_pool(c2dev, GFP_KERNEL, &c2dev->kern_mqsp_pool);
+	if (err) {
+		goto bail0;
+	}
+
+	/* Allocate shared pointers for Q0, Q1, and Q2 from
+	 * the shared pointer pool.
+	 */
+
+	c2dev->hint_count = c2_alloc_mqsp(c2dev, c2dev->kern_mqsp_pool,
+					     &c2dev->hint_count_dma,
+					     GFP_KERNEL);
+	c2dev->req_vq.shared = c2_alloc_mqsp(c2dev, c2dev->kern_mqsp_pool,
+					     &c2dev->req_vq.shared_dma,
+					     GFP_KERNEL);
+	c2dev->rep_vq.shared = c2_alloc_mqsp(c2dev, c2dev->kern_mqsp_pool,
+					     &c2dev->rep_vq.shared_dma,
+					     GFP_KERNEL);
+	c2dev->aeq.shared = c2_alloc_mqsp(c2dev, c2dev->kern_mqsp_pool,
+					  &c2dev->aeq.shared_dma, GFP_KERNEL);
+	if (!c2dev->hint_count || !c2dev->req_vq.shared ||
+	    !c2dev->rep_vq.shared || !c2dev->aeq.shared) {
+		err = -ENOMEM;
+		goto bail1;
+	}
+
+	mmio_regs = c2dev->kva;
+	/* Initialize the Verbs Request Queue */
+	c2_mq_req_init(&c2dev->req_vq, 0,
+		       be32_to_cpu(readl(mmio_regs + C2_REGS_Q0_QSIZE)),
+		       be32_to_cpu(readl(mmio_regs + C2_REGS_Q0_MSGSIZE)),
+		       mmio_regs +
+		       be32_to_cpu(readl(mmio_regs + C2_REGS_Q0_POOLSTART)),
+		       mmio_regs +
+		       be32_to_cpu(readl(mmio_regs + C2_REGS_Q0_SHARED)),
+		       C2_MQ_ADAPTER_TARGET);
+
+	/* Initialize the Verbs Reply Queue */
+	qsize = be32_to_cpu(readl(mmio_regs + C2_REGS_Q1_QSIZE));
+	msgsize = be32_to_cpu(readl(mmio_regs + C2_REGS_Q1_MSGSIZE));
+	q1_pages = kmalloc(qsize * msgsize, GFP_KERNEL);
+	if (!q1_pages) {
+		err = -ENOMEM;
+		goto bail1;
+	}
+	c2dev->rep_vq.host_dma = dma_map_single(c2dev->ibdev.dma_device,
+					        (void *)q1_pages, qsize * msgsize,
+				      		DMA_FROM_DEVICE);
+	pci_unmap_addr_set(&c2dev->rep_vq, mapping, c2dev->rep_vq.host_dma);
+	pr_debug("%s rep_vq va %p dma %llx\n", __FUNCTION__, q1_pages,
+		 (u64)c2dev->rep_vq.host_dma);
+	c2_mq_rep_init(&c2dev->rep_vq,
+		   1,
+		   qsize,
+		   msgsize,
+		   q1_pages,
+		   mmio_regs +
+		   be32_to_cpu(readl(mmio_regs + C2_REGS_Q1_SHARED)),
+		   C2_MQ_HOST_TARGET);
+
+	/* Initialize the Asynchronus Event Queue */
+	qsize = be32_to_cpu(readl(mmio_regs + C2_REGS_Q2_QSIZE));
+	msgsize = be32_to_cpu(readl(mmio_regs + C2_REGS_Q2_MSGSIZE));
+	q2_pages = kmalloc(qsize * msgsize, GFP_KERNEL);
+	if (!q2_pages) {
+		err = -ENOMEM;
+		goto bail2;
+	}
+	c2dev->aeq.host_dma = dma_map_single(c2dev->ibdev.dma_device,
+					        (void *)q2_pages, qsize * msgsize,
+				      		DMA_FROM_DEVICE);
+	pci_unmap_addr_set(&c2dev->aeq, mapping, c2dev->aeq.host_dma);
+	pr_debug("%s aeq va %p dma %llx\n", __FUNCTION__, q1_pages,
+		 (u64)c2dev->rep_vq.host_dma);
+	c2_mq_rep_init(&c2dev->aeq,
+		       2,
+		       qsize,
+		       msgsize,
+		       q2_pages,
+		       mmio_regs +
+		       be32_to_cpu(readl(mmio_regs + C2_REGS_Q2_SHARED)),
+		       C2_MQ_HOST_TARGET);
+
+	/* Initialize the verbs request allocator */
+	err = vq_init(c2dev);
+	if (err)
+		goto bail3;
+
+	/* Enable interrupts on the adapter */
+	writel(0, c2dev->regs + C2_IDIS);
+
+	/* create the WR init message */
+	err = c2_adapter_init(c2dev);
+	if (err)
+		goto bail4;
+	c2dev->init++;
+
+	/* open an adapter instance */
+	err = c2_rnic_open(c2dev);
+	if (err)
+		goto bail4;
+
+	/* Initialize cached the adapter limits */
+	if (c2_rnic_query(c2dev, &c2dev->props))
+		goto bail5;
+
+	/* Initialize the PD pool */
+	err = c2_init_pd_table(c2dev);
+	if (err)
+		goto bail5;
+
+	/* Initialize the QP pool */
+	c2_init_qp_table(c2dev);
+	return 0;
+
+      bail5:
+	c2_rnic_close(c2dev);
+      bail4:
+	vq_term(c2dev);
+      bail3:
+	dma_unmap_single(c2dev->ibdev.dma_device,
+			 pci_unmap_addr(&c2dev->aeq, mapping),
+			 c2dev->aeq.q_size * c2dev->aeq.msg_size,
+		  	 DMA_FROM_DEVICE);
+	kfree(q2_pages);
+      bail2:
+	dma_unmap_single(c2dev->ibdev.dma_device,
+			 pci_unmap_addr(&c2dev->rep_vq, mapping),
+			 c2dev->rep_vq.q_size * c2dev->rep_vq.msg_size,
+		  	 DMA_FROM_DEVICE);
+	kfree(q1_pages);
+      bail1:
+	c2_free_mqsp_pool(c2dev, c2dev->kern_mqsp_pool);
+      bail0:
+	vfree(c2dev->qptr_array);
+
+	return err;
+}
+
+/*
+ * Called by c2_remove to cleanup the RNIC resources.
+ */
+void c2_rnic_term(struct c2_dev *c2dev)
+{
+
+	/* Close the open adapter instance */
+	c2_rnic_close(c2dev);
+
+	/* Send the TERM message to the adapter */
+	c2_adapter_term(c2dev);
+
+	/* Disable interrupts on the adapter */
+	writel(1, c2dev->regs + C2_IDIS);
+
+	/* Free the QP pool */
+	c2_cleanup_qp_table(c2dev);
+
+	/* Free the PD pool */
+	c2_cleanup_pd_table(c2dev);
+
+	/* Free the verbs request allocator */
+	vq_term(c2dev);
+
+	/* Unmap and free the asynchronus event queue */
+	dma_unmap_single(c2dev->ibdev.dma_device,
+			 pci_unmap_addr(&c2dev->aeq, mapping),
+			 c2dev->aeq.q_size * c2dev->aeq.msg_size,
+		  	 DMA_FROM_DEVICE);
+	kfree(c2dev->aeq.msg_pool.host);
+
+	/* Unmap and free the verbs reply queue */
+	dma_unmap_single(c2dev->ibdev.dma_device,
+			 pci_unmap_addr(&c2dev->rep_vq, mapping),
+			 c2dev->rep_vq.q_size * c2dev->rep_vq.msg_size,
+		  	 DMA_FROM_DEVICE);
+	kfree(c2dev->rep_vq.msg_pool.host);
+
+	/* Free the MQ shared pointer pool */
+	c2_free_mqsp_pool(c2dev, c2dev->kern_mqsp_pool);
+
+	/* Free the qptr_array */
+	vfree(c2dev->qptr_array);
+
+	return;
+}
diff --git a/drivers/infiniband/hw/amso1100/c2_status.h b/drivers/infiniband/hw/amso1100/c2_status.h
new file mode 100644
index 00000000000..6ee4aa92d87
--- /dev/null
+++ b/drivers/infiniband/hw/amso1100/c2_status.h
@@ -0,0 +1,158 @@
+/*
+ * Copyright (c) 2005 Ammasso, Inc. All rights reserved.
+ * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef	_C2_STATUS_H_
+#define _C2_STATUS_H_
+
+/*
+ * Verbs Status Codes
+ */
+enum c2_status {
+	C2_OK = 0,		/* This must be zero */
+	CCERR_INSUFFICIENT_RESOURCES = 1,
+	CCERR_INVALID_MODIFIER = 2,
+	CCERR_INVALID_MODE = 3,
+	CCERR_IN_USE = 4,
+	CCERR_INVALID_RNIC = 5,
+	CCERR_INTERRUPTED_OPERATION = 6,
+	CCERR_INVALID_EH = 7,
+	CCERR_INVALID_CQ = 8,
+	CCERR_CQ_EMPTY = 9,
+	CCERR_NOT_IMPLEMENTED = 10,
+	CCERR_CQ_DEPTH_TOO_SMALL = 11,
+	CCERR_PD_IN_USE = 12,
+	CCERR_INVALID_PD = 13,
+	CCERR_INVALID_SRQ = 14,
+	CCERR_INVALID_ADDRESS = 15,
+	CCERR_INVALID_NETMASK = 16,
+	CCERR_INVALID_QP = 17,
+	CCERR_INVALID_QP_STATE = 18,
+	CCERR_TOO_MANY_WRS_POSTED = 19,
+	CCERR_INVALID_WR_TYPE = 20,
+	CCERR_INVALID_SGL_LENGTH = 21,
+	CCERR_INVALID_SQ_DEPTH = 22,
+	CCERR_INVALID_RQ_DEPTH = 23,
+	CCERR_INVALID_ORD = 24,
+	CCERR_INVALID_IRD = 25,
+	CCERR_QP_ATTR_CANNOT_CHANGE = 26,
+	CCERR_INVALID_STAG = 27,
+	CCERR_QP_IN_USE = 28,
+	CCERR_OUTSTANDING_WRS = 29,
+	CCERR_STAG_IN_USE = 30,
+	CCERR_INVALID_STAG_INDEX = 31,
+	CCERR_INVALID_SGL_FORMAT = 32,
+	CCERR_ADAPTER_TIMEOUT = 33,
+	CCERR_INVALID_CQ_DEPTH = 34,
+	CCERR_INVALID_PRIVATE_DATA_LENGTH = 35,
+	CCERR_INVALID_EP = 36,
+	CCERR_MR_IN_USE = CCERR_STAG_IN_USE,
+	CCERR_FLUSHED = 38,
+	CCERR_INVALID_WQE = 39,
+	CCERR_LOCAL_QP_CATASTROPHIC_ERROR = 40,
+	CCERR_REMOTE_TERMINATION_ERROR = 41,
+	CCERR_BASE_AND_BOUNDS_VIOLATION = 42,
+	CCERR_ACCESS_VIOLATION = 43,
+	CCERR_INVALID_PD_ID = 44,
+	CCERR_WRAP_ERROR = 45,
+	CCERR_INV_STAG_ACCESS_ERROR = 46,
+	CCERR_ZERO_RDMA_READ_RESOURCES = 47,
+	CCERR_QP_NOT_PRIVILEGED = 48,
+	CCERR_STAG_STATE_NOT_INVALID = 49,
+	CCERR_INVALID_PAGE_SIZE = 50,
+	CCERR_INVALID_BUFFER_SIZE = 51,
+	CCERR_INVALID_PBE = 52,
+	CCERR_INVALID_FBO = 53,
+	CCERR_INVALID_LENGTH = 54,
+	CCERR_INVALID_ACCESS_RIGHTS = 55,
+	CCERR_PBL_TOO_BIG = 56,
+	CCERR_INVALID_VA = 57,
+	CCERR_INVALID_REGION = 58,
+	CCERR_INVALID_WINDOW = 59,
+	CCERR_TOTAL_LENGTH_TOO_BIG = 60,
+	CCERR_INVALID_QP_ID = 61,
+	CCERR_ADDR_IN_USE = 62,
+	CCERR_ADDR_NOT_AVAIL = 63,
+	CCERR_NET_DOWN = 64,
+	CCERR_NET_UNREACHABLE = 65,
+	CCERR_CONN_ABORTED = 66,
+	CCERR_CONN_RESET = 67,
+	CCERR_NO_BUFS = 68,
+	CCERR_CONN_TIMEDOUT = 69,
+	CCERR_CONN_REFUSED = 70,
+	CCERR_HOST_UNREACHABLE = 71,
+	CCERR_INVALID_SEND_SGL_DEPTH = 72,
+	CCERR_INVALID_RECV_SGL_DEPTH = 73,
+	CCERR_INVALID_RDMA_WRITE_SGL_DEPTH = 74,
+	CCERR_INSUFFICIENT_PRIVILEGES = 75,
+	CCERR_STACK_ERROR = 76,
+	CCERR_INVALID_VERSION = 77,
+	CCERR_INVALID_MTU = 78,
+	CCERR_INVALID_IMAGE = 79,
+	CCERR_PENDING = 98,	/* not an error; user internally by adapter */
+	CCERR_DEFER = 99,	/* not an error; used internally by adapter */
+	CCERR_FAILED_WRITE = 100,
+	CCERR_FAILED_ERASE = 101,
+	CCERR_FAILED_VERIFICATION = 102,
+	CCERR_NOT_FOUND = 103,
+
+};
+
+/*
+ * CCAE_ACTIVE_CONNECT_RESULTS status result codes.
+ */
+enum c2_connect_status {
+	C2_CONN_STATUS_SUCCESS = C2_OK,
+	C2_CONN_STATUS_NO_MEM = CCERR_INSUFFICIENT_RESOURCES,
+	C2_CONN_STATUS_TIMEDOUT = CCERR_CONN_TIMEDOUT,
+	C2_CONN_STATUS_REFUSED = CCERR_CONN_REFUSED,
+	C2_CONN_STATUS_NETUNREACH = CCERR_NET_UNREACHABLE,
+	C2_CONN_STATUS_HOSTUNREACH = CCERR_HOST_UNREACHABLE,
+	C2_CONN_STATUS_INVALID_RNIC = CCERR_INVALID_RNIC,
+	C2_CONN_STATUS_INVALID_QP = CCERR_INVALID_QP,
+	C2_CONN_STATUS_INVALID_QP_STATE = CCERR_INVALID_QP_STATE,
+	C2_CONN_STATUS_REJECTED = CCERR_CONN_RESET,
+	C2_CONN_STATUS_ADDR_NOT_AVAIL = CCERR_ADDR_NOT_AVAIL,
+};
+
+/*
+ * Flash programming status codes.
+ */
+enum c2_flash_status {
+	C2_FLASH_STATUS_SUCCESS = 0x0000,
+	C2_FLASH_STATUS_VERIFY_ERR = 0x0002,
+	C2_FLASH_STATUS_IMAGE_ERR = 0x0004,
+	C2_FLASH_STATUS_ECLBS = 0x0400,
+	C2_FLASH_STATUS_PSLBS = 0x0800,
+	C2_FLASH_STATUS_VPENS = 0x1000,
+};
+
+#endif				/* _C2_STATUS_H_ */
diff --git a/drivers/infiniband/hw/amso1100/c2_user.h b/drivers/infiniband/hw/amso1100/c2_user.h
new file mode 100644
index 00000000000..7e9e7ad6546
--- /dev/null
+++ b/drivers/infiniband/hw/amso1100/c2_user.h
@@ -0,0 +1,82 @@
+/*
+ * Copyright (c) 2005 Topspin Communications.  All rights reserved.
+ * Copyright (c) 2005 Cisco Systems.  All rights reserved.
+ * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#ifndef C2_USER_H
+#define C2_USER_H
+
+#include <linux/types.h>
+
+/*
+ * Make sure that all structs defined in this file remain laid out so
+ * that they pack the same way on 32-bit and 64-bit architectures (to
+ * avoid incompatibility between 32-bit userspace and 64-bit kernels).
+ * In particular do not use pointer types -- pass pointers in __u64
+ * instead.
+ */
+
+struct c2_alloc_ucontext_resp {
+	__u32 qp_tab_size;
+	__u32 uarc_size;
+};
+
+struct c2_alloc_pd_resp {
+	__u32 pdn;
+	__u32 reserved;
+};
+
+struct c2_create_cq {
+	__u32 lkey;
+	__u32 pdn;
+	__u64 arm_db_page;
+	__u64 set_db_page;
+	__u32 arm_db_index;
+	__u32 set_db_index;
+};
+
+struct c2_create_cq_resp {
+	__u32 cqn;
+	__u32 reserved;
+};
+
+struct c2_create_qp {
+	__u32 lkey;
+	__u32 reserved;
+	__u64 sq_db_page;
+	__u64 rq_db_page;
+	__u32 sq_db_index;
+	__u32 rq_db_index;
+};
+
+#endif				/* C2_USER_H */
diff --git a/drivers/infiniband/hw/amso1100/c2_vq.c b/drivers/infiniband/hw/amso1100/c2_vq.c
new file mode 100644
index 00000000000..40caeb5f41b
--- /dev/null
+++ b/drivers/infiniband/hw/amso1100/c2_vq.c
@@ -0,0 +1,260 @@
+/*
+ * Copyright (c) 2005 Ammasso, Inc. All rights reserved.
+ * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+
+#include "c2_vq.h"
+#include "c2_provider.h"
+
+/*
+ * Verbs Request Objects:
+ *
+ * VQ Request Objects are allocated by the kernel verbs handlers.
+ * They contain a wait object, a refcnt, an atomic bool indicating that the
+ * adapter has replied, and a copy of the verb reply work request.
+ * A pointer to the VQ Request Object is passed down in the context
+ * field of the work request message, and reflected back by the adapter
+ * in the verbs reply message.  The function handle_vq() in the interrupt
+ * path will use this pointer to:
+ * 	1) append a copy of the verbs reply message
+ * 	2) mark that the reply is ready
+ * 	3) wake up the kernel verbs handler blocked awaiting the reply.
+ *
+ *
+ * The kernel verbs handlers do a "get" to put a 2nd reference on the
+ * VQ Request object.  If the kernel verbs handler exits before the adapter
+ * can respond, this extra reference will keep the VQ Request object around
+ * until the adapter's reply can be processed.  The reason we need this is
+ * because a pointer to this object is stuffed into the context field of
+ * the verbs work request message, and reflected back in the reply message.
+ * It is used in the interrupt handler (handle_vq()) to wake up the appropriate
+ * kernel verb handler that is blocked awaiting the verb reply.
+ * So handle_vq() will do a "put" on the object when it's done accessing it.
+ * NOTE:  If we guarantee that the kernel verb handler will never bail before
+ *        getting the reply, then we don't need these refcnts.
+ *
+ *
+ * VQ Request objects are freed by the kernel verbs handlers only
+ * after the verb has been processed, or when the adapter fails and
+ * does not reply.
+ *
+ *
+ * Verbs Reply Buffers:
+ *
+ * VQ Reply bufs are local host memory copies of a
+ * outstanding Verb Request reply
+ * message.  The are always allocated by the kernel verbs handlers, and _may_ be
+ * freed by either the kernel verbs handler -or- the interrupt handler.  The
+ * kernel verbs handler _must_ free the repbuf, then free the vq request object
+ * in that order.
+ */
+
+int vq_init(struct c2_dev *c2dev)
+{
+	sprintf(c2dev->vq_cache_name, "c2-vq:dev%c",
+		(char) ('0' + c2dev->devnum));
+	c2dev->host_msg_cache =
+	    kmem_cache_create(c2dev->vq_cache_name, c2dev->rep_vq.msg_size, 0,
+			      SLAB_HWCACHE_ALIGN, NULL, NULL);
+	if (c2dev->host_msg_cache == NULL) {
+		return -ENOMEM;
+	}
+	return 0;
+}
+
+void vq_term(struct c2_dev *c2dev)
+{
+	kmem_cache_destroy(c2dev->host_msg_cache);
+}
+
+/* vq_req_alloc - allocate a VQ Request Object and initialize it.
+ * The refcnt is set to 1.
+ */
+struct c2_vq_req *vq_req_alloc(struct c2_dev *c2dev)
+{
+	struct c2_vq_req *r;
+
+	r = kmalloc(sizeof(struct c2_vq_req), GFP_KERNEL);
+	if (r) {
+		init_waitqueue_head(&r->wait_object);
+		r->reply_msg = (u64) NULL;
+		r->event = 0;
+		r->cm_id = NULL;
+		r->qp = NULL;
+		atomic_set(&r->refcnt, 1);
+		atomic_set(&r->reply_ready, 0);
+	}
+	return r;
+}
+
+
+/* vq_req_free - free the VQ Request Object.  It is assumed the verbs handler
+ * has already free the VQ Reply Buffer if it existed.
+ */
+void vq_req_free(struct c2_dev *c2dev, struct c2_vq_req *r)
+{
+	r->reply_msg = (u64) NULL;
+	if (atomic_dec_and_test(&r->refcnt)) {
+		kfree(r);
+	}
+}
+
+/* vq_req_get - reference a VQ Request Object.  Done
+ * only in the kernel verbs handlers.
+ */
+void vq_req_get(struct c2_dev *c2dev, struct c2_vq_req *r)
+{
+	atomic_inc(&r->refcnt);
+}
+
+
+/* vq_req_put - dereference and potentially free a VQ Request Object.
+ *
+ * This is only called by handle_vq() on the
+ * interrupt when it is done processing
+ * a verb reply message.  If the associated
+ * kernel verbs handler has already bailed,
+ * then this put will actually free the VQ
+ * Request object _and_ the VQ Reply Buffer
+ * if it exists.
+ */
+void vq_req_put(struct c2_dev *c2dev, struct c2_vq_req *r)
+{
+	if (atomic_dec_and_test(&r->refcnt)) {
+		if (r->reply_msg != (u64) NULL)
+			vq_repbuf_free(c2dev,
+				       (void *) (unsigned long) r->reply_msg);
+		kfree(r);
+	}
+}
+
+
+/*
+ * vq_repbuf_alloc - allocate a VQ Reply Buffer.
+ */
+void *vq_repbuf_alloc(struct c2_dev *c2dev)
+{
+	return kmem_cache_alloc(c2dev->host_msg_cache, SLAB_ATOMIC);
+}
+
+/*
+ * vq_send_wr - post a verbs request message to the Verbs Request Queue.
+ * If a message is not available in the MQ, then block until one is available.
+ * NOTE: handle_mq() on the interrupt context will wake up threads blocked here.
+ * When the adapter drains the Verbs Request Queue,
+ * it inserts MQ index 0 in to the
+ * adapter->host activity fifo and interrupts the host.
+ */
+int vq_send_wr(struct c2_dev *c2dev, union c2wr *wr)
+{
+	void *msg;
+	wait_queue_t __wait;
+
+	/*
+	 * grab adapter vq lock
+	 */
+	spin_lock(&c2dev->vqlock);
+
+	/*
+	 * allocate msg
+	 */
+	msg = c2_mq_alloc(&c2dev->req_vq);
+
+	/*
+	 * If we cannot get a msg, then we'll wait
+	 * When a messages are available, the int handler will wake_up()
+	 * any waiters.
+	 */
+	while (msg == NULL) {
+		pr_debug("%s:%d no available msg in VQ, waiting...\n",
+		       __FUNCTION__, __LINE__);
+		init_waitqueue_entry(&__wait, current);
+		add_wait_queue(&c2dev->req_vq_wo, &__wait);
+		spin_unlock(&c2dev->vqlock);
+		for (;;) {
+			set_current_state(TASK_INTERRUPTIBLE);
+			if (!c2_mq_full(&c2dev->req_vq)) {
+				break;
+			}
+			if (!signal_pending(current)) {
+				schedule_timeout(1 * HZ);	/* 1 second... */
+				continue;
+			}
+			set_current_state(TASK_RUNNING);
+			remove_wait_queue(&c2dev->req_vq_wo, &__wait);
+			return -EINTR;
+		}
+		set_current_state(TASK_RUNNING);
+		remove_wait_queue(&c2dev->req_vq_wo, &__wait);
+		spin_lock(&c2dev->vqlock);
+		msg = c2_mq_alloc(&c2dev->req_vq);
+	}
+
+	/*
+	 * copy wr into adapter msg
+	 */
+	memcpy(msg, wr, c2dev->req_vq.msg_size);
+
+	/*
+	 * post msg
+	 */
+	c2_mq_produce(&c2dev->req_vq);
+
+	/*
+	 * release adapter vq lock
+	 */
+	spin_unlock(&c2dev->vqlock);
+	return 0;
+}
+
+
+/*
+ * vq_wait_for_reply - block until the adapter posts a Verb Reply Message.
+ */
+int vq_wait_for_reply(struct c2_dev *c2dev, struct c2_vq_req *req)
+{
+	if (!wait_event_timeout(req->wait_object,
+				atomic_read(&req->reply_ready),
+				60*HZ))
+		return -ETIMEDOUT;
+
+	return 0;
+}
+
+/*
+ * vq_repbuf_free - Free a Verbs Reply Buffer.
+ */
+void vq_repbuf_free(struct c2_dev *c2dev, void *reply)
+{
+	kmem_cache_free(c2dev->host_msg_cache, reply);
+}
diff --git a/drivers/infiniband/hw/amso1100/c2_vq.h b/drivers/infiniband/hw/amso1100/c2_vq.h
new file mode 100644
index 00000000000..33805627a60
--- /dev/null
+++ b/drivers/infiniband/hw/amso1100/c2_vq.h
@@ -0,0 +1,63 @@
+/*
+ * Copyright (c) 2005 Ammasso, Inc. All rights reserved.
+ * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef _C2_VQ_H_
+#define _C2_VQ_H_
+#include <linux/sched.h>
+#include "c2.h"
+#include "c2_wr.h"
+#include "c2_provider.h"
+
+struct c2_vq_req {
+	u64 reply_msg;		/* ptr to reply msg */
+	wait_queue_head_t wait_object;	/* wait object for vq reqs */
+	atomic_t reply_ready;	/* set when reply is ready */
+	atomic_t refcnt;	/* used to cancel WRs... */
+	int event;
+	struct iw_cm_id *cm_id;
+	struct c2_qp *qp;
+};
+
+extern int vq_init(struct c2_dev *c2dev);
+extern void vq_term(struct c2_dev *c2dev);
+
+extern struct c2_vq_req *vq_req_alloc(struct c2_dev *c2dev);
+extern void vq_req_free(struct c2_dev *c2dev, struct c2_vq_req *req);
+extern void vq_req_get(struct c2_dev *c2dev, struct c2_vq_req *req);
+extern void vq_req_put(struct c2_dev *c2dev, struct c2_vq_req *req);
+extern int vq_send_wr(struct c2_dev *c2dev, union c2wr * wr);
+
+extern void *vq_repbuf_alloc(struct c2_dev *c2dev);
+extern void vq_repbuf_free(struct c2_dev *c2dev, void *reply);
+
+extern int vq_wait_for_reply(struct c2_dev *c2dev, struct c2_vq_req *req);
+#endif				/* _C2_VQ_H_ */
diff --git a/drivers/infiniband/hw/amso1100/c2_wr.h b/drivers/infiniband/hw/amso1100/c2_wr.h
new file mode 100644
index 00000000000..3ec6c43bb0e
--- /dev/null
+++ b/drivers/infiniband/hw/amso1100/c2_wr.h
@@ -0,0 +1,1520 @@
+/*
+ * Copyright (c) 2005 Ammasso, Inc. All rights reserved.
+ * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef _C2_WR_H_
+#define _C2_WR_H_
+
+#ifdef CCDEBUG
+#define CCWR_MAGIC		0xb07700b0
+#endif
+
+#define C2_QP_NO_ATTR_CHANGE 0xFFFFFFFF
+
+/* Maximum allowed size in bytes of private_data exchange
+ * on connect.
+ */
+#define C2_MAX_PRIVATE_DATA_SIZE 200
+
+/*
+ * These types are shared among the adapter, host, and CCIL consumer.
+ */
+enum c2_cq_notification_type {
+	C2_CQ_NOTIFICATION_TYPE_NONE = 1,
+	C2_CQ_NOTIFICATION_TYPE_NEXT,
+	C2_CQ_NOTIFICATION_TYPE_NEXT_SE
+};
+
+enum c2_setconfig_cmd {
+	C2_CFG_ADD_ADDR = 1,
+	C2_CFG_DEL_ADDR = 2,
+	C2_CFG_ADD_ROUTE = 3,
+	C2_CFG_DEL_ROUTE = 4
+};
+
+enum c2_getconfig_cmd {
+	C2_GETCONFIG_ROUTES = 1,
+	C2_GETCONFIG_ADDRS
+};
+
+/*
+ *  CCIL Work Request Identifiers
+ */
+enum c2wr_ids {
+	CCWR_RNIC_OPEN = 1,
+	CCWR_RNIC_QUERY,
+	CCWR_RNIC_SETCONFIG,
+	CCWR_RNIC_GETCONFIG,
+	CCWR_RNIC_CLOSE,
+	CCWR_CQ_CREATE,
+	CCWR_CQ_QUERY,
+	CCWR_CQ_MODIFY,
+	CCWR_CQ_DESTROY,
+	CCWR_QP_CONNECT,
+	CCWR_PD_ALLOC,
+	CCWR_PD_DEALLOC,
+	CCWR_SRQ_CREATE,
+	CCWR_SRQ_QUERY,
+	CCWR_SRQ_MODIFY,
+	CCWR_SRQ_DESTROY,
+	CCWR_QP_CREATE,
+	CCWR_QP_QUERY,
+	CCWR_QP_MODIFY,
+	CCWR_QP_DESTROY,
+	CCWR_NSMR_STAG_ALLOC,
+	CCWR_NSMR_REGISTER,
+	CCWR_NSMR_PBL,
+	CCWR_STAG_DEALLOC,
+	CCWR_NSMR_REREGISTER,
+	CCWR_SMR_REGISTER,
+	CCWR_MR_QUERY,
+	CCWR_MW_ALLOC,
+	CCWR_MW_QUERY,
+	CCWR_EP_CREATE,
+	CCWR_EP_GETOPT,
+	CCWR_EP_SETOPT,
+	CCWR_EP_DESTROY,
+	CCWR_EP_BIND,
+	CCWR_EP_CONNECT,
+	CCWR_EP_LISTEN,
+	CCWR_EP_SHUTDOWN,
+	CCWR_EP_LISTEN_CREATE,
+	CCWR_EP_LISTEN_DESTROY,
+	CCWR_EP_QUERY,
+	CCWR_CR_ACCEPT,
+	CCWR_CR_REJECT,
+	CCWR_CONSOLE,
+	CCWR_TERM,
+	CCWR_FLASH_INIT,
+	CCWR_FLASH,
+	CCWR_BUF_ALLOC,
+	CCWR_BUF_FREE,
+	CCWR_FLASH_WRITE,
+	CCWR_INIT,		/* WARNING: Don't move this ever again! */
+
+
+
+	/* Add new IDs here */
+
+
+
+	/*
+	 * WARNING: CCWR_LAST must always be the last verbs id defined!
+	 *          All the preceding IDs are fixed, and must not change.
+	 *          You can add new IDs, but must not remove or reorder
+	 *          any IDs. If you do, YOU will ruin any hope of
+	 *          compatability between versions.
+	 */
+	CCWR_LAST,
+
+	/*
+	 * Start over at 1 so that arrays indexed by user wr id's
+	 * begin at 1.  This is OK since the verbs and user wr id's
+	 * are always used on disjoint sets of queues.
+	 */
+	/*
+	 * The order of the CCWR_SEND_XX verbs must
+	 * match the order of the RDMA_OPs
+	 */
+	CCWR_SEND = 1,
+	CCWR_SEND_INV,
+	CCWR_SEND_SE,
+	CCWR_SEND_SE_INV,
+	CCWR_RDMA_WRITE,
+	CCWR_RDMA_READ,
+	CCWR_RDMA_READ_INV,
+	CCWR_MW_BIND,
+	CCWR_NSMR_FASTREG,
+	CCWR_STAG_INVALIDATE,
+	CCWR_RECV,
+	CCWR_NOP,
+	CCWR_UNIMPL,
+/* WARNING: This must always be the last user wr id defined! */
+};
+#define RDMA_SEND_OPCODE_FROM_WR_ID(x)   (x+2)
+
+/*
+ * SQ/RQ Work Request Types
+ */
+enum c2_wr_type {
+	C2_WR_TYPE_SEND = CCWR_SEND,
+	C2_WR_TYPE_SEND_SE = CCWR_SEND_SE,
+	C2_WR_TYPE_SEND_INV = CCWR_SEND_INV,
+	C2_WR_TYPE_SEND_SE_INV = CCWR_SEND_SE_INV,
+	C2_WR_TYPE_RDMA_WRITE = CCWR_RDMA_WRITE,
+	C2_WR_TYPE_RDMA_READ = CCWR_RDMA_READ,
+	C2_WR_TYPE_RDMA_READ_INV_STAG = CCWR_RDMA_READ_INV,
+	C2_WR_TYPE_BIND_MW = CCWR_MW_BIND,
+	C2_WR_TYPE_FASTREG_NSMR = CCWR_NSMR_FASTREG,
+	C2_WR_TYPE_INV_STAG = CCWR_STAG_INVALIDATE,
+	C2_WR_TYPE_RECV = CCWR_RECV,
+	C2_WR_TYPE_NOP = CCWR_NOP,
+};
+
+struct c2_netaddr {
+	u32 ip_addr;
+	u32 netmask;
+	u32 mtu;
+};
+
+struct c2_route {
+	u32 ip_addr;		/* 0 indicates the default route */
+	u32 netmask;		/* netmask associated with dst */
+	u32 flags;
+	union {
+		u32 ipaddr;	/* address of the nexthop interface */
+		u8 enaddr[6];
+	} nexthop;
+};
+
+/*
+ * A Scatter Gather Entry.
+ */
+struct c2_data_addr {
+	u32 stag;
+	u32 length;
+	u64 to;
+};
+
+/*
+ * MR and MW flags used by the consumer, RI, and RNIC.
+ */
+enum c2_mm_flags {
+	MEM_REMOTE = 0x0001,	/* allow mw binds with remote access. */
+	MEM_VA_BASED = 0x0002,	/* Not Zero-based */
+	MEM_PBL_COMPLETE = 0x0004,	/* PBL array is complete in this msg */
+	MEM_LOCAL_READ = 0x0008,	/* allow local reads */
+	MEM_LOCAL_WRITE = 0x0010,	/* allow local writes */
+	MEM_REMOTE_READ = 0x0020,	/* allow remote reads */
+	MEM_REMOTE_WRITE = 0x0040,	/* allow remote writes */
+	MEM_WINDOW_BIND = 0x0080,	/* binds allowed */
+	MEM_SHARED = 0x0100,	/* set if MR is shared */
+	MEM_STAG_VALID = 0x0200	/* set if STAG is in valid state */
+};
+
+/*
+ * CCIL API ACF flags defined in terms of the low level mem flags.
+ * This minimizes translation needed in the user API
+ */
+enum c2_acf {
+	C2_ACF_LOCAL_READ = MEM_LOCAL_READ,
+	C2_ACF_LOCAL_WRITE = MEM_LOCAL_WRITE,
+	C2_ACF_REMOTE_READ = MEM_REMOTE_READ,
+	C2_ACF_REMOTE_WRITE = MEM_REMOTE_WRITE,
+	C2_ACF_WINDOW_BIND = MEM_WINDOW_BIND
+};
+
+/*
+ * Image types of objects written to flash
+ */
+#define C2_FLASH_IMG_BITFILE 1
+#define C2_FLASH_IMG_OPTION_ROM 2
+#define C2_FLASH_IMG_VPD 3
+
+/*
+ *  to fix bug 1815 we define the max size allowable of the
+ *  terminate message (per the IETF spec).Refer to the IETF
+ *  protocal specification, section 12.1.6, page 64)
+ *  The message is prefixed by 20 types of DDP info.
+ *
+ *  Then the message has 6 bytes for the terminate control
+ *  and DDP segment length info plus a DDP header (either
+ *  14 or 18 byts) plus 28 bytes for the RDMA header.
+ *  Thus the max size in:
+ *  20 + (6 + 18 + 28) = 72
+ */
+#define C2_MAX_TERMINATE_MESSAGE_SIZE (72)
+
+/*
+ * Build String Length.  It must be the same as C2_BUILD_STR_LEN in ccil_api.h
+ */
+#define WR_BUILD_STR_LEN 64
+
+/*
+ * WARNING:  All of these structs need to align any 64bit types on
+ * 64 bit boundaries!  64bit types include u64 and u64.
+ */
+
+/*
+ * Clustercore Work Request Header.  Be sensitive to field layout
+ * and alignment.
+ */
+struct c2wr_hdr {
+	/* wqe_count is part of the cqe.  It is put here so the
+	 * adapter can write to it while the wr is pending without
+	 * clobbering part of the wr.  This word need not be dma'd
+	 * from the host to adapter by libccil, but we copy it anyway
+	 * to make the memcpy to the adapter better aligned.
+	 */
+	u32 wqe_count;
+
+	/* Put these fields next so that later 32- and 64-bit
+	 * quantities are naturally aligned.
+	 */
+	u8 id;
+	u8 result;		/* adapter -> host */
+	u8 sge_count;		/* host -> adapter */
+	u8 flags;		/* host -> adapter */
+
+	u64 context;
+#ifdef CCMSGMAGIC
+	u32 magic;
+	u32 pad;
+#endif
+} __attribute__((packed));
+
+/*
+ *------------------------ RNIC ------------------------
+ */
+
+/*
+ * WR_RNIC_OPEN
+ */
+
+/*
+ * Flags for the RNIC WRs
+ */
+enum c2_rnic_flags {
+	RNIC_IRD_STATIC = 0x0001,
+	RNIC_ORD_STATIC = 0x0002,
+	RNIC_QP_STATIC = 0x0004,
+	RNIC_SRQ_SUPPORTED = 0x0008,
+	RNIC_PBL_BLOCK_MODE = 0x0010,
+	RNIC_SRQ_MODEL_ARRIVAL = 0x0020,
+	RNIC_CQ_OVF_DETECTED = 0x0040,
+	RNIC_PRIV_MODE = 0x0080
+};
+
+struct c2wr_rnic_open_req {
+	struct c2wr_hdr hdr;
+	u64 user_context;
+	u16 flags;		/* See enum c2_rnic_flags */
+	u16 port_num;
+} __attribute__((packed));
+
+struct c2wr_rnic_open_rep {
+	struct c2wr_hdr hdr;
+	u32 rnic_handle;
+} __attribute__((packed));
+
+union c2wr_rnic_open {
+	struct c2wr_rnic_open_req req;
+	struct c2wr_rnic_open_rep rep;
+} __attribute__((packed));
+
+struct c2wr_rnic_query_req {
+	struct c2wr_hdr hdr;
+	u32 rnic_handle;
+} __attribute__((packed));
+
+/*
+ * WR_RNIC_QUERY
+ */
+struct c2wr_rnic_query_rep {
+	struct c2wr_hdr hdr;
+	u64 user_context;
+	u32 vendor_id;
+	u32 part_number;
+	u32 hw_version;
+	u32 fw_ver_major;
+	u32 fw_ver_minor;
+	u32 fw_ver_patch;
+	char fw_ver_build_str[WR_BUILD_STR_LEN];
+	u32 max_qps;
+	u32 max_qp_depth;
+	u32 max_srq_depth;
+	u32 max_send_sgl_depth;
+	u32 max_rdma_sgl_depth;
+	u32 max_cqs;
+	u32 max_cq_depth;
+	u32 max_cq_event_handlers;
+	u32 max_mrs;
+	u32 max_pbl_depth;
+	u32 max_pds;
+	u32 max_global_ird;
+	u32 max_global_ord;
+	u32 max_qp_ird;
+	u32 max_qp_ord;
+	u32 flags;
+	u32 max_mws;
+	u32 pbe_range_low;
+	u32 pbe_range_high;
+	u32 max_srqs;
+	u32 page_size;
+} __attribute__((packed));
+
+union c2wr_rnic_query {
+	struct c2wr_rnic_query_req req;
+	struct c2wr_rnic_query_rep rep;
+} __attribute__((packed));
+
+/*
+ * WR_RNIC_GETCONFIG
+ */
+
+struct c2wr_rnic_getconfig_req {
+	struct c2wr_hdr hdr;
+	u32 rnic_handle;
+	u32 option;		/* see c2_getconfig_cmd_t */
+	u64 reply_buf;
+	u32 reply_buf_len;
+} __attribute__((packed)) ;
+
+struct c2wr_rnic_getconfig_rep {
+	struct c2wr_hdr hdr;
+	u32 option;		/* see c2_getconfig_cmd_t */
+	u32 count_len;		/* length of the number of addresses configured */
+} __attribute__((packed)) ;
+
+union c2wr_rnic_getconfig {
+	struct c2wr_rnic_getconfig_req req;
+	struct c2wr_rnic_getconfig_rep rep;
+} __attribute__((packed)) ;
+
+/*
+ * WR_RNIC_SETCONFIG
+ */
+struct c2wr_rnic_setconfig_req {
+	struct c2wr_hdr hdr;
+	u32 rnic_handle;
+	u32 option;		/* See c2_setconfig_cmd_t */
+	/* variable data and pad. See c2_netaddr and c2_route */
+	u8 data[0];
+} __attribute__((packed)) ;
+
+struct c2wr_rnic_setconfig_rep {
+	struct c2wr_hdr hdr;
+} __attribute__((packed)) ;
+
+union c2wr_rnic_setconfig {
+	struct c2wr_rnic_setconfig_req req;
+	struct c2wr_rnic_setconfig_rep rep;
+} __attribute__((packed)) ;
+
+/*
+ * WR_RNIC_CLOSE
+ */
+struct c2wr_rnic_close_req {
+	struct c2wr_hdr hdr;
+	u32 rnic_handle;
+} __attribute__((packed)) ;
+
+struct c2wr_rnic_close_rep {
+	struct c2wr_hdr hdr;
+} __attribute__((packed)) ;
+
+union c2wr_rnic_close {
+	struct c2wr_rnic_close_req req;
+	struct c2wr_rnic_close_rep rep;
+} __attribute__((packed)) ;
+
+/*
+ *------------------------ CQ ------------------------
+ */
+struct c2wr_cq_create_req {
+	struct c2wr_hdr hdr;
+	u64 shared_ht;
+	u64 user_context;
+	u64 msg_pool;
+	u32 rnic_handle;
+	u32 msg_size;
+	u32 depth;
+} __attribute__((packed)) ;
+
+struct c2wr_cq_create_rep {
+	struct c2wr_hdr hdr;
+	u32 mq_index;
+	u32 adapter_shared;
+	u32 cq_handle;
+} __attribute__((packed)) ;
+
+union c2wr_cq_create {
+	struct c2wr_cq_create_req req;
+	struct c2wr_cq_create_rep rep;
+} __attribute__((packed)) ;
+
+struct c2wr_cq_modify_req {
+	struct c2wr_hdr hdr;
+	u32 rnic_handle;
+	u32 cq_handle;
+	u32 new_depth;
+	u64 new_msg_pool;
+} __attribute__((packed)) ;
+
+struct c2wr_cq_modify_rep {
+	struct c2wr_hdr hdr;
+} __attribute__((packed)) ;
+
+union c2wr_cq_modify {
+	struct c2wr_cq_modify_req req;
+	struct c2wr_cq_modify_rep rep;
+} __attribute__((packed)) ;
+
+struct c2wr_cq_destroy_req {
+	struct c2wr_hdr hdr;
+	u32 rnic_handle;
+	u32 cq_handle;
+} __attribute__((packed)) ;
+
+struct c2wr_cq_destroy_rep {
+	struct c2wr_hdr hdr;
+} __attribute__((packed)) ;
+
+union c2wr_cq_destroy {
+	struct c2wr_cq_destroy_req req;
+	struct c2wr_cq_destroy_rep rep;
+} __attribute__((packed)) ;
+
+/*
+ *------------------------ PD ------------------------
+ */
+struct c2wr_pd_alloc_req {
+	struct c2wr_hdr hdr;
+	u32 rnic_handle;
+	u32 pd_id;
+} __attribute__((packed)) ;
+
+struct c2wr_pd_alloc_rep {
+	struct c2wr_hdr hdr;
+} __attribute__((packed)) ;
+
+union c2wr_pd_alloc {
+	struct c2wr_pd_alloc_req req;
+	struct c2wr_pd_alloc_rep rep;
+} __attribute__((packed)) ;
+
+struct c2wr_pd_dealloc_req {
+	struct c2wr_hdr hdr;
+	u32 rnic_handle;
+	u32 pd_id;
+} __attribute__((packed)) ;
+
+struct c2wr_pd_dealloc_rep {
+	struct c2wr_hdr hdr;
+} __attribute__((packed)) ;
+
+union c2wr_pd_dealloc {
+	struct c2wr_pd_dealloc_req req;
+	struct c2wr_pd_dealloc_rep rep;
+} __attribute__((packed)) ;
+
+/*
+ *------------------------ SRQ ------------------------
+ */
+struct c2wr_srq_create_req {
+	struct c2wr_hdr hdr;
+	u64 shared_ht;
+	u64 user_context;
+	u32 rnic_handle;
+	u32 srq_depth;
+	u32 srq_limit;
+	u32 sgl_depth;
+	u32 pd_id;
+} __attribute__((packed)) ;
+
+struct c2wr_srq_create_rep {
+	struct c2wr_hdr hdr;
+	u32 srq_depth;
+	u32 sgl_depth;
+	u32 msg_size;
+	u32 mq_index;
+	u32 mq_start;
+	u32 srq_handle;
+} __attribute__((packed)) ;
+
+union c2wr_srq_create {
+	struct c2wr_srq_create_req req;
+	struct c2wr_srq_create_rep rep;
+} __attribute__((packed)) ;
+
+struct c2wr_srq_destroy_req {
+	struct c2wr_hdr hdr;
+	u32 rnic_handle;
+	u32 srq_handle;
+} __attribute__((packed)) ;
+
+struct c2wr_srq_destroy_rep {
+	struct c2wr_hdr hdr;
+} __attribute__((packed)) ;
+
+union c2wr_srq_destroy {
+	struct c2wr_srq_destroy_req req;
+	struct c2wr_srq_destroy_rep rep;
+} __attribute__((packed)) ;
+
+/*
+ *------------------------ QP ------------------------
+ */
+enum c2wr_qp_flags {
+	QP_RDMA_READ = 0x00000001,	/* RDMA read enabled? */
+	QP_RDMA_WRITE = 0x00000002,	/* RDMA write enabled? */
+	QP_MW_BIND = 0x00000004,	/* MWs enabled */
+	QP_ZERO_STAG = 0x00000008,	/* enabled? */
+	QP_REMOTE_TERMINATION = 0x00000010,	/* remote end terminated */
+	QP_RDMA_READ_RESPONSE = 0x00000020	/* Remote RDMA read  */
+	    /* enabled? */
+};
+
+struct c2wr_qp_create_req {
+	struct c2wr_hdr hdr;
+	u64 shared_sq_ht;
+	u64 shared_rq_ht;
+	u64 user_context;
+	u32 rnic_handle;
+	u32 sq_cq_handle;
+	u32 rq_cq_handle;
+	u32 sq_depth;
+	u32 rq_depth;
+	u32 srq_handle;
+	u32 srq_limit;
+	u32 flags;		/* see enum c2wr_qp_flags */
+	u32 send_sgl_depth;
+	u32 recv_sgl_depth;
+	u32 rdma_write_sgl_depth;
+	u32 ord;
+	u32 ird;
+	u32 pd_id;
+} __attribute__((packed)) ;
+
+struct c2wr_qp_create_rep {
+	struct c2wr_hdr hdr;
+	u32 sq_depth;
+	u32 rq_depth;
+	u32 send_sgl_depth;
+	u32 recv_sgl_depth;
+	u32 rdma_write_sgl_depth;
+	u32 ord;
+	u32 ird;
+	u32 sq_msg_size;
+	u32 sq_mq_index;
+	u32 sq_mq_start;
+	u32 rq_msg_size;
+	u32 rq_mq_index;
+	u32 rq_mq_start;
+	u32 qp_handle;
+} __attribute__((packed)) ;
+
+union c2wr_qp_create {
+	struct c2wr_qp_create_req req;
+	struct c2wr_qp_create_rep rep;
+} __attribute__((packed)) ;
+
+struct c2wr_qp_query_req {
+	struct c2wr_hdr hdr;
+	u32 rnic_handle;
+	u32 qp_handle;
+} __attribute__((packed)) ;
+
+struct c2wr_qp_query_rep {
+	struct c2wr_hdr hdr;
+	u64 user_context;
+	u32 rnic_handle;
+	u32 sq_depth;
+	u32 rq_depth;
+	u32 send_sgl_depth;
+	u32 rdma_write_sgl_depth;
+	u32 recv_sgl_depth;
+	u32 ord;
+	u32 ird;
+	u16 qp_state;
+	u16 flags;		/* see c2wr_qp_flags_t */
+	u32 qp_id;
+	u32 local_addr;
+	u32 remote_addr;
+	u16 local_port;
+	u16 remote_port;
+	u32 terminate_msg_length;	/* 0 if not present */
+	u8 data[0];
+	/* Terminate Message in-line here. */
+} __attribute__((packed)) ;
+
+union c2wr_qp_query {
+	struct c2wr_qp_query_req req;
+	struct c2wr_qp_query_rep rep;
+} __attribute__((packed)) ;
+
+struct c2wr_qp_modify_req {
+	struct c2wr_hdr hdr;
+	u64 stream_msg;
+	u32 stream_msg_length;
+	u32 rnic_handle;
+	u32 qp_handle;
+	u32 next_qp_state;
+	u32 ord;
+	u32 ird;
+	u32 sq_depth;
+	u32 rq_depth;
+	u32 llp_ep_handle;
+} __attribute__((packed)) ;
+
+struct c2wr_qp_modify_rep {
+	struct c2wr_hdr hdr;
+	u32 ord;
+	u32 ird;
+	u32 sq_depth;
+	u32 rq_depth;
+	u32 sq_msg_size;
+	u32 sq_mq_index;
+	u32 sq_mq_start;
+	u32 rq_msg_size;
+	u32 rq_mq_index;
+	u32 rq_mq_start;
+} __attribute__((packed)) ;
+
+union c2wr_qp_modify {
+	struct c2wr_qp_modify_req req;
+	struct c2wr_qp_modify_rep rep;
+} __attribute__((packed)) ;
+
+struct c2wr_qp_destroy_req {
+	struct c2wr_hdr hdr;
+	u32 rnic_handle;
+	u32 qp_handle;
+} __attribute__((packed)) ;
+
+struct c2wr_qp_destroy_rep {
+	struct c2wr_hdr hdr;
+} __attribute__((packed)) ;
+
+union c2wr_qp_destroy {
+	struct c2wr_qp_destroy_req req;
+	struct c2wr_qp_destroy_rep rep;
+} __attribute__((packed)) ;
+
+/*
+ * The CCWR_QP_CONNECT msg is posted on the verbs request queue.  It can
+ * only be posted when a QP is in IDLE state.  After the connect request is
+ * submitted to the LLP, the adapter moves the QP to CONNECT_PENDING state.
+ * No synchronous reply from adapter to this WR.  The results of
+ * connection are passed back in an async event CCAE_ACTIVE_CONNECT_RESULTS
+ * See c2wr_ae_active_connect_results_t
+ */
+struct c2wr_qp_connect_req {
+	struct c2wr_hdr hdr;
+	u32 rnic_handle;
+	u32 qp_handle;
+	u32 remote_addr;
+	u16 remote_port;
+	u16 pad;
+	u32 private_data_length;
+	u8 private_data[0];	/* Private data in-line. */
+} __attribute__((packed)) ;
+
+struct c2wr_qp_connect {
+	struct c2wr_qp_connect_req req;
+	/* no synchronous reply.         */
+} __attribute__((packed)) ;
+
+
+/*
+ *------------------------ MM ------------------------
+ */
+
+struct c2wr_nsmr_stag_alloc_req {
+	struct c2wr_hdr hdr;
+	u32 rnic_handle;
+	u32 pbl_depth;
+	u32 pd_id;
+	u32 flags;
+} __attribute__((packed)) ;
+
+struct c2wr_nsmr_stag_alloc_rep {
+	struct c2wr_hdr hdr;
+	u32 pbl_depth;
+	u32 stag_index;
+} __attribute__((packed)) ;
+
+union c2wr_nsmr_stag_alloc {
+	struct c2wr_nsmr_stag_alloc_req req;
+	struct c2wr_nsmr_stag_alloc_rep rep;
+} __attribute__((packed)) ;
+
+struct c2wr_nsmr_register_req {
+	struct c2wr_hdr hdr;
+	u64 va;
+	u32 rnic_handle;
+	u16 flags;
+	u8 stag_key;
+	u8 pad;
+	u32 pd_id;
+	u32 pbl_depth;
+	u32 pbe_size;
+	u32 fbo;
+	u32 length;
+	u32 addrs_length;
+	/* array of paddrs (must be aligned on a 64bit boundary) */
+	u64 paddrs[0];
+} __attribute__((packed)) ;
+
+struct c2wr_nsmr_register_rep {
+	struct c2wr_hdr hdr;
+	u32 pbl_depth;
+	u32 stag_index;
+} __attribute__((packed)) ;
+
+union c2wr_nsmr_register {
+	struct c2wr_nsmr_register_req req;
+	struct c2wr_nsmr_register_rep rep;
+} __attribute__((packed)) ;
+
+struct c2wr_nsmr_pbl_req {
+	struct c2wr_hdr hdr;
+	u32 rnic_handle;
+	u32 flags;
+	u32 stag_index;
+	u32 addrs_length;
+	/* array of paddrs (must be aligned on a 64bit boundary) */
+	u64 paddrs[0];
+} __attribute__((packed)) ;
+
+struct c2wr_nsmr_pbl_rep {
+	struct c2wr_hdr hdr;
+} __attribute__((packed)) ;
+
+union c2wr_nsmr_pbl {
+	struct c2wr_nsmr_pbl_req req;
+	struct c2wr_nsmr_pbl_rep rep;
+} __attribute__((packed)) ;
+
+struct c2wr_mr_query_req {
+	struct c2wr_hdr hdr;
+	u32 rnic_handle;
+	u32 stag_index;
+} __attribute__((packed)) ;
+
+struct c2wr_mr_query_rep {
+	struct c2wr_hdr hdr;
+	u8 stag_key;
+	u8 pad[3];
+	u32 pd_id;
+	u32 flags;
+	u32 pbl_depth;
+} __attribute__((packed)) ;
+
+union c2wr_mr_query {
+	struct c2wr_mr_query_req req;
+	struct c2wr_mr_query_rep rep;
+} __attribute__((packed)) ;
+
+struct c2wr_mw_query_req {
+	struct c2wr_hdr hdr;
+	u32 rnic_handle;
+	u32 stag_index;
+} __attribute__((packed)) ;
+
+struct c2wr_mw_query_rep {
+	struct c2wr_hdr hdr;
+	u8 stag_key;
+	u8 pad[3];
+	u32 pd_id;
+	u32 flags;
+} __attribute__((packed)) ;
+
+union c2wr_mw_query {
+	struct c2wr_mw_query_req req;
+	struct c2wr_mw_query_rep rep;
+} __attribute__((packed)) ;
+
+
+struct c2wr_stag_dealloc_req {
+	struct c2wr_hdr hdr;
+	u32 rnic_handle;
+	u32 stag_index;
+} __attribute__((packed)) ;
+
+struct c2wr_stag_dealloc_rep {
+	struct c2wr_hdr hdr;
+} __attribute__((packed)) ;
+
+union c2wr_stag_dealloc {
+	struct c2wr_stag_dealloc_req req;
+	struct c2wr_stag_dealloc_rep rep;
+} __attribute__((packed)) ;
+
+struct c2wr_nsmr_reregister_req {
+	struct c2wr_hdr hdr;
+	u64 va;
+	u32 rnic_handle;
+	u16 flags;
+	u8 stag_key;
+	u8 pad;
+	u32 stag_index;
+	u32 pd_id;
+	u32 pbl_depth;
+	u32 pbe_size;
+	u32 fbo;
+	u32 length;
+	u32 addrs_length;
+	u32 pad1;
+	/* array of paddrs (must be aligned on a 64bit boundary) */
+	u64 paddrs[0];
+} __attribute__((packed)) ;
+
+struct c2wr_nsmr_reregister_rep {
+	struct c2wr_hdr hdr;
+	u32 pbl_depth;
+	u32 stag_index;
+} __attribute__((packed)) ;
+
+union c2wr_nsmr_reregister {
+	struct c2wr_nsmr_reregister_req req;
+	struct c2wr_nsmr_reregister_rep rep;
+} __attribute__((packed)) ;
+
+struct c2wr_smr_register_req {
+	struct c2wr_hdr hdr;
+	u64 va;
+	u32 rnic_handle;
+	u16 flags;
+	u8 stag_key;
+	u8 pad;
+	u32 stag_index;
+	u32 pd_id;
+} __attribute__((packed)) ;
+
+struct c2wr_smr_register_rep {
+	struct c2wr_hdr hdr;
+	u32 stag_index;
+} __attribute__((packed)) ;
+
+union c2wr_smr_register {
+	struct c2wr_smr_register_req req;
+	struct c2wr_smr_register_rep rep;
+} __attribute__((packed)) ;
+
+struct c2wr_mw_alloc_req {
+	struct c2wr_hdr hdr;
+	u32 rnic_handle;
+	u32 pd_id;
+} __attribute__((packed)) ;
+
+struct c2wr_mw_alloc_rep {
+	struct c2wr_hdr hdr;
+	u32 stag_index;
+} __attribute__((packed)) ;
+
+union c2wr_mw_alloc {
+	struct c2wr_mw_alloc_req req;
+	struct c2wr_mw_alloc_rep rep;
+} __attribute__((packed)) ;
+
+/*
+ *------------------------ WRs -----------------------
+ */
+
+struct c2wr_user_hdr {
+	struct c2wr_hdr hdr;		/* Has status and WR Type */
+} __attribute__((packed)) ;
+
+enum c2_qp_state {
+	C2_QP_STATE_IDLE = 0x01,
+	C2_QP_STATE_CONNECTING = 0x02,
+	C2_QP_STATE_RTS = 0x04,
+	C2_QP_STATE_CLOSING = 0x08,
+	C2_QP_STATE_TERMINATE = 0x10,
+	C2_QP_STATE_ERROR = 0x20,
+};
+
+/* Completion queue entry. */
+struct c2wr_ce {
+	struct c2wr_hdr hdr;		/* Has status and WR Type */
+	u64 qp_user_context;	/* c2_user_qp_t * */
+	u32 qp_state;		/* Current QP State */
+	u32 handle;		/* QPID or EP Handle */
+	u32 bytes_rcvd;		/* valid for RECV WCs */
+	u32 stag;
+} __attribute__((packed)) ;
+
+
+/*
+ * Flags used for all post-sq WRs.  These must fit in the flags
+ * field of the struct c2wr_hdr (eight bits).
+ */
+enum {
+	SQ_SIGNALED = 0x01,
+	SQ_READ_FENCE = 0x02,
+	SQ_FENCE = 0x04,
+};
+
+/*
+ * Common fields for all post-sq WRs.  Namely the standard header and a
+ * secondary header with fields common to all post-sq WRs.
+ */
+struct c2_sq_hdr {
+	struct c2wr_user_hdr user_hdr;
+} __attribute__((packed));
+
+/*
+ * Same as above but for post-rq WRs.
+ */
+struct c2_rq_hdr {
+	struct c2wr_user_hdr user_hdr;
+} __attribute__((packed));
+
+/*
+ * use the same struct for all sends.
+ */
+struct c2wr_send_req {
+	struct c2_sq_hdr sq_hdr;
+	u32 sge_len;
+	u32 remote_stag;
+	u8 data[0];		/* SGE array */
+} __attribute__((packed));
+
+union c2wr_send {
+	struct c2wr_send_req req;
+	struct c2wr_ce rep;
+} __attribute__((packed));
+
+struct c2wr_rdma_write_req {
+	struct c2_sq_hdr sq_hdr;
+	u64 remote_to;
+	u32 remote_stag;
+	u32 sge_len;
+	u8 data[0];		/* SGE array */
+} __attribute__((packed));
+
+union c2wr_rdma_write {
+	struct c2wr_rdma_write_req req;
+	struct c2wr_ce rep;
+} __attribute__((packed));
+
+struct c2wr_rdma_read_req {
+	struct c2_sq_hdr sq_hdr;
+	u64 local_to;
+	u64 remote_to;
+	u32 local_stag;
+	u32 remote_stag;
+	u32 length;
+} __attribute__((packed));
+
+union c2wr_rdma_read {
+	struct c2wr_rdma_read_req req;
+	struct c2wr_ce rep;
+} __attribute__((packed));
+
+struct c2wr_mw_bind_req {
+	struct c2_sq_hdr sq_hdr;
+	u64 va;
+	u8 stag_key;
+	u8 pad[3];
+	u32 mw_stag_index;
+	u32 mr_stag_index;
+	u32 length;
+	u32 flags;
+} __attribute__((packed));
+
+union c2wr_mw_bind {
+	struct c2wr_mw_bind_req req;
+	struct c2wr_ce rep;
+} __attribute__((packed));
+
+struct c2wr_nsmr_fastreg_req {
+	struct c2_sq_hdr sq_hdr;
+	u64 va;
+	u8 stag_key;
+	u8 pad[3];
+	u32 stag_index;
+	u32 pbe_size;
+	u32 fbo;
+	u32 length;
+	u32 addrs_length;
+	/* array of paddrs (must be aligned on a 64bit boundary) */
+	u64 paddrs[0];
+} __attribute__((packed));
+
+union c2wr_nsmr_fastreg {
+	struct c2wr_nsmr_fastreg_req req;
+	struct c2wr_ce rep;
+} __attribute__((packed));
+
+struct c2wr_stag_invalidate_req {
+	struct c2_sq_hdr sq_hdr;
+	u8 stag_key;
+	u8 pad[3];
+	u32 stag_index;
+} __attribute__((packed));
+
+union c2wr_stag_invalidate {
+	struct c2wr_stag_invalidate_req req;
+	struct c2wr_ce rep;
+} __attribute__((packed));
+
+union c2wr_sqwr {
+	struct c2_sq_hdr sq_hdr;
+	struct c2wr_send_req send;
+	struct c2wr_send_req send_se;
+	struct c2wr_send_req send_inv;
+	struct c2wr_send_req send_se_inv;
+	struct c2wr_rdma_write_req rdma_write;
+	struct c2wr_rdma_read_req rdma_read;
+	struct c2wr_mw_bind_req mw_bind;
+	struct c2wr_nsmr_fastreg_req nsmr_fastreg;
+	struct c2wr_stag_invalidate_req stag_inv;
+} __attribute__((packed));
+
+
+/*
+ * RQ WRs
+ */
+struct c2wr_rqwr {
+	struct c2_rq_hdr rq_hdr;
+	u8 data[0];		/* array of SGEs */
+} __attribute__((packed));
+
+union c2wr_recv {
+	struct c2wr_rqwr req;
+	struct c2wr_ce rep;
+} __attribute__((packed));
+
+/*
+ * All AEs start with this header.  Most AEs only need to convey the
+ * information in the header.  Some, like LLP connection events, need
+ * more info.  The union typdef c2wr_ae_t has all the possible AEs.
+ *
+ * hdr.context is the user_context from the rnic_open WR.  NULL If this
+ * is not affiliated with an rnic
+ *
+ * hdr.id is the AE identifier (eg;  CCAE_REMOTE_SHUTDOWN,
+ * CCAE_LLP_CLOSE_COMPLETE)
+ *
+ * resource_type is one of:  C2_RES_IND_QP, C2_RES_IND_CQ, C2_RES_IND_SRQ
+ *
+ * user_context is the context passed down when the host created the resource.
+ */
+struct c2wr_ae_hdr {
+	struct c2wr_hdr hdr;
+	u64 user_context;	/* user context for this res. */
+	u32 resource_type;	/* see enum c2_resource_indicator */
+	u32 resource;		/* handle for resource */
+	u32 qp_state;		/* current QP State */
+} __attribute__((packed));
+
+/*
+ * After submitting the CCAE_ACTIVE_CONNECT_RESULTS message on the AEQ,
+ * the adapter moves the QP into RTS state
+ */
+struct c2wr_ae_active_connect_results {
+	struct c2wr_ae_hdr ae_hdr;
+	u32 laddr;
+	u32 raddr;
+	u16 lport;
+	u16 rport;
+	u32 private_data_length;
+	u8 private_data[0];	/* data is in-line in the msg. */
+} __attribute__((packed));
+
+/*
+ * When connections are established by the stack (and the private data
+ * MPA frame is received), the adapter will generate an event to the host.
+ * The details of the connection, any private data, and the new connection
+ * request handle is passed up via the CCAE_CONNECTION_REQUEST msg on the
+ * AE queue:
+ */
+struct c2wr_ae_connection_request {
+	struct c2wr_ae_hdr ae_hdr;
+	u32 cr_handle;		/* connreq handle (sock ptr) */
+	u32 laddr;
+	u32 raddr;
+	u16 lport;
+	u16 rport;
+	u32 private_data_length;
+	u8 private_data[0];	/* data is in-line in the msg. */
+} __attribute__((packed));
+
+union c2wr_ae {
+	struct c2wr_ae_hdr ae_generic;
+	struct c2wr_ae_active_connect_results ae_active_connect_results;
+	struct c2wr_ae_connection_request ae_connection_request;
+} __attribute__((packed));
+
+struct c2wr_init_req {
+	struct c2wr_hdr hdr;
+	u64 hint_count;
+	u64 q0_host_shared;
+	u64 q1_host_shared;
+	u64 q1_host_msg_pool;
+	u64 q2_host_shared;
+	u64 q2_host_msg_pool;
+} __attribute__((packed));
+
+struct c2wr_init_rep {
+	struct c2wr_hdr hdr;
+} __attribute__((packed));
+
+union c2wr_init {
+	struct c2wr_init_req req;
+	struct c2wr_init_rep rep;
+} __attribute__((packed));
+
+/*
+ * For upgrading flash.
+ */
+
+struct c2wr_flash_init_req {
+	struct c2wr_hdr hdr;
+	u32 rnic_handle;
+} __attribute__((packed));
+
+struct c2wr_flash_init_rep {
+	struct c2wr_hdr hdr;
+	u32 adapter_flash_buf_offset;
+	u32 adapter_flash_len;
+} __attribute__((packed));
+
+union c2wr_flash_init {
+	struct c2wr_flash_init_req req;
+	struct c2wr_flash_init_rep rep;
+} __attribute__((packed));
+
+struct c2wr_flash_req {
+	struct c2wr_hdr hdr;
+	u32 rnic_handle;
+	u32 len;
+} __attribute__((packed));
+
+struct c2wr_flash_rep {
+	struct c2wr_hdr hdr;
+	u32 status;
+} __attribute__((packed));
+
+union c2wr_flash {
+	struct c2wr_flash_req req;
+	struct c2wr_flash_rep rep;
+} __attribute__((packed));
+
+struct c2wr_buf_alloc_req {
+	struct c2wr_hdr hdr;
+	u32 rnic_handle;
+	u32 size;
+} __attribute__((packed));
+
+struct c2wr_buf_alloc_rep {
+	struct c2wr_hdr hdr;
+	u32 offset;		/* 0 if mem not available */
+	u32 size;		/* 0 if mem not available */
+} __attribute__((packed));
+
+union c2wr_buf_alloc {
+	struct c2wr_buf_alloc_req req;
+	struct c2wr_buf_alloc_rep rep;
+} __attribute__((packed));
+
+struct c2wr_buf_free_req {
+	struct c2wr_hdr hdr;
+	u32 rnic_handle;
+	u32 offset;		/* Must match value from alloc */
+	u32 size;		/* Must match value from alloc */
+} __attribute__((packed));
+
+struct c2wr_buf_free_rep {
+	struct c2wr_hdr hdr;
+} __attribute__((packed));
+
+union c2wr_buf_free {
+	struct c2wr_buf_free_req req;
+	struct c2wr_ce rep;
+} __attribute__((packed));
+
+struct c2wr_flash_write_req {
+	struct c2wr_hdr hdr;
+	u32 rnic_handle;
+	u32 offset;
+	u32 size;
+	u32 type;
+	u32 flags;
+} __attribute__((packed));
+
+struct c2wr_flash_write_rep {
+	struct c2wr_hdr hdr;
+	u32 status;
+} __attribute__((packed));
+
+union c2wr_flash_write {
+	struct c2wr_flash_write_req req;
+	struct c2wr_flash_write_rep rep;
+} __attribute__((packed));
+
+/*
+ * Messages for LLP connection setup.
+ */
+
+/*
+ * Listen Request.  This allocates a listening endpoint to allow passive
+ * connection setup.  Newly established LLP connections are passed up
+ * via an AE.  See c2wr_ae_connection_request_t
+ */
+struct c2wr_ep_listen_create_req {
+	struct c2wr_hdr hdr;
+	u64 user_context;	/* returned in AEs. */
+	u32 rnic_handle;
+	u32 local_addr;		/* local addr, or 0  */
+	u16 local_port;		/* 0 means "pick one" */
+	u16 pad;
+	u32 backlog;		/* tradional tcp listen bl */
+} __attribute__((packed));
+
+struct c2wr_ep_listen_create_rep {
+	struct c2wr_hdr hdr;
+	u32 ep_handle;		/* handle to new listening ep */
+	u16 local_port;		/* resulting port... */
+	u16 pad;
+} __attribute__((packed));
+
+union c2wr_ep_listen_create {
+	struct c2wr_ep_listen_create_req req;
+	struct c2wr_ep_listen_create_rep rep;
+} __attribute__((packed));
+
+struct c2wr_ep_listen_destroy_req {
+	struct c2wr_hdr hdr;
+	u32 rnic_handle;
+	u32 ep_handle;
+} __attribute__((packed));
+
+struct c2wr_ep_listen_destroy_rep {
+	struct c2wr_hdr hdr;
+} __attribute__((packed));
+
+union c2wr_ep_listen_destroy {
+	struct c2wr_ep_listen_destroy_req req;
+	struct c2wr_ep_listen_destroy_rep rep;
+} __attribute__((packed));
+
+struct c2wr_ep_query_req {
+	struct c2wr_hdr hdr;
+	u32 rnic_handle;
+	u32 ep_handle;
+} __attribute__((packed));
+
+struct c2wr_ep_query_rep {
+	struct c2wr_hdr hdr;
+	u32 rnic_handle;
+	u32 local_addr;
+	u32 remote_addr;
+	u16 local_port;
+	u16 remote_port;
+} __attribute__((packed));
+
+union c2wr_ep_query {
+	struct c2wr_ep_query_req req;
+	struct c2wr_ep_query_rep rep;
+} __attribute__((packed));
+
+
+/*
+ * The host passes this down to indicate acceptance of a pending iWARP
+ * connection.  The cr_handle was obtained from the CONNECTION_REQUEST
+ * AE passed up by the adapter.  See c2wr_ae_connection_request_t.
+ */
+struct c2wr_cr_accept_req {
+	struct c2wr_hdr hdr;
+	u32 rnic_handle;
+	u32 qp_handle;		/* QP to bind to this LLP conn */
+	u32 ep_handle;		/* LLP  handle to accept */
+	u32 private_data_length;
+	u8 private_data[0];	/* data in-line in msg. */
+} __attribute__((packed));
+
+/*
+ * adapter sends reply when private data is successfully submitted to
+ * the LLP.
+ */
+struct c2wr_cr_accept_rep {
+	struct c2wr_hdr hdr;
+} __attribute__((packed));
+
+union c2wr_cr_accept {
+	struct c2wr_cr_accept_req req;
+	struct c2wr_cr_accept_rep rep;
+} __attribute__((packed));
+
+/*
+ * The host sends this down if a given iWARP connection request was
+ * rejected by the consumer.  The cr_handle was obtained from a
+ * previous c2wr_ae_connection_request_t AE sent by the adapter.
+ */
+struct  c2wr_cr_reject_req {
+	struct c2wr_hdr hdr;
+	u32 rnic_handle;
+	u32 ep_handle;		/* LLP handle to reject */
+} __attribute__((packed));
+
+/*
+ * Dunno if this is needed, but we'll add it for now.  The adapter will
+ * send the reject_reply after the LLP endpoint has been destroyed.
+ */
+struct  c2wr_cr_reject_rep {
+	struct c2wr_hdr hdr;
+} __attribute__((packed));
+
+union c2wr_cr_reject {
+	struct c2wr_cr_reject_req req;
+	struct c2wr_cr_reject_rep rep;
+} __attribute__((packed));
+
+/*
+ * console command.  Used to implement a debug console over the verbs
+ * request and reply queues.
+ */
+
+/*
+ * Console request message.  It contains:
+ *	- message hdr with id = CCWR_CONSOLE
+ *	- the physaddr/len of host memory to be used for the reply.
+ *	- the command string.  eg:  "netstat -s" or "zoneinfo"
+ */
+struct c2wr_console_req {
+	struct c2wr_hdr hdr;		/* id = CCWR_CONSOLE */
+	u64 reply_buf;		/* pinned host buf for reply */
+	u32 reply_buf_len;	/* length of reply buffer */
+	u8 command[0];		/* NUL terminated ascii string */
+	/* containing the command req */
+} __attribute__((packed));
+
+/*
+ * flags used in the console reply.
+ */
+enum c2_console_flags {
+	CONS_REPLY_TRUNCATED = 0x00000001	/* reply was truncated */
+} __attribute__((packed));
+
+/*
+ * Console reply message.
+ * hdr.result contains the c2_status_t error if the reply was _not_ generated,
+ * or C2_OK if the reply was generated.
+ */
+struct c2wr_console_rep {
+	struct c2wr_hdr hdr;		/* id = CCWR_CONSOLE */
+	u32 flags;
+} __attribute__((packed));
+
+union c2wr_console {
+	struct c2wr_console_req req;
+	struct c2wr_console_rep rep;
+} __attribute__((packed));
+
+
+/*
+ * Giant union with all WRs.  Makes life easier...
+ */
+union c2wr {
+	struct c2wr_hdr hdr;
+	struct c2wr_user_hdr user_hdr;
+	union c2wr_rnic_open rnic_open;
+	union c2wr_rnic_query rnic_query;
+	union c2wr_rnic_getconfig rnic_getconfig;
+	union c2wr_rnic_setconfig rnic_setconfig;
+	union c2wr_rnic_close rnic_close;
+	union c2wr_cq_create cq_create;
+	union c2wr_cq_modify cq_modify;
+	union c2wr_cq_destroy cq_destroy;
+	union c2wr_pd_alloc pd_alloc;
+	union c2wr_pd_dealloc pd_dealloc;
+	union c2wr_srq_create srq_create;
+	union c2wr_srq_destroy srq_destroy;
+	union c2wr_qp_create qp_create;
+	union c2wr_qp_query qp_query;
+	union c2wr_qp_modify qp_modify;
+	union c2wr_qp_destroy qp_destroy;
+	struct c2wr_qp_connect qp_connect;
+	union c2wr_nsmr_stag_alloc nsmr_stag_alloc;
+	union c2wr_nsmr_register nsmr_register;
+	union c2wr_nsmr_pbl nsmr_pbl;
+	union c2wr_mr_query mr_query;
+	union c2wr_mw_query mw_query;
+	union c2wr_stag_dealloc stag_dealloc;
+	union c2wr_sqwr sqwr;
+	struct c2wr_rqwr rqwr;
+	struct c2wr_ce ce;
+	union c2wr_ae ae;
+	union c2wr_init init;
+	union c2wr_ep_listen_create ep_listen_create;
+	union c2wr_ep_listen_destroy ep_listen_destroy;
+	union c2wr_cr_accept cr_accept;
+	union c2wr_cr_reject cr_reject;
+	union c2wr_console console;
+	union c2wr_flash_init flash_init;
+	union c2wr_flash flash;
+	union c2wr_buf_alloc buf_alloc;
+	union c2wr_buf_free buf_free;
+	union c2wr_flash_write flash_write;
+} __attribute__((packed));
+
+
+/*
+ * Accessors for the wr fields that are packed together tightly to
+ * reduce the wr message size.  The wr arguments are void* so that
+ * either a struct c2wr*, a struct c2wr_hdr*, or a pointer to any of the types
+ * in the struct c2wr union can be passed in.
+ */
+static __inline__ u8 c2_wr_get_id(void *wr)
+{
+	return ((struct c2wr_hdr *) wr)->id;
+}
+static __inline__ void c2_wr_set_id(void *wr, u8 id)
+{
+	((struct c2wr_hdr *) wr)->id = id;
+}
+static __inline__ u8 c2_wr_get_result(void *wr)
+{
+	return ((struct c2wr_hdr *) wr)->result;
+}
+static __inline__ void c2_wr_set_result(void *wr, u8 result)
+{
+	((struct c2wr_hdr *) wr)->result = result;
+}
+static __inline__ u8 c2_wr_get_flags(void *wr)
+{
+	return ((struct c2wr_hdr *) wr)->flags;
+}
+static __inline__ void c2_wr_set_flags(void *wr, u8 flags)
+{
+	((struct c2wr_hdr *) wr)->flags = flags;
+}
+static __inline__ u8 c2_wr_get_sge_count(void *wr)
+{
+	return ((struct c2wr_hdr *) wr)->sge_count;
+}
+static __inline__ void c2_wr_set_sge_count(void *wr, u8 sge_count)
+{
+	((struct c2wr_hdr *) wr)->sge_count = sge_count;
+}
+static __inline__ u32 c2_wr_get_wqe_count(void *wr)
+{
+	return ((struct c2wr_hdr *) wr)->wqe_count;
+}
+static __inline__ void c2_wr_set_wqe_count(void *wr, u32 wqe_count)
+{
+	((struct c2wr_hdr *) wr)->wqe_count = wqe_count;
+}
+
+#endif				/* _C2_WR_H_ */
diff --git a/drivers/infiniband/hw/ehca/Kconfig b/drivers/infiniband/hw/ehca/Kconfig
new file mode 100644
index 00000000000..922389b6439
--- /dev/null
+++ b/drivers/infiniband/hw/ehca/Kconfig
@@ -0,0 +1,16 @@
+config INFINIBAND_EHCA
+	tristate "eHCA support"
+	depends on IBMEBUS && INFINIBAND
+	---help---
+	This driver supports the IBM pSeries eHCA InfiniBand adapter.
+
+	To compile the driver as a module, choose M here. The module
+	will be called ib_ehca.
+
+config INFINIBAND_EHCA_SCALING
+	bool "Scaling support (EXPERIMENTAL)"
+	depends on IBMEBUS && INFINIBAND_EHCA && HOTPLUG_CPU && EXPERIMENTAL
+	---help---
+	eHCA scaling support schedules the CQ callbacks to different CPUs.
+
+	To enable this feature choose Y here.
diff --git a/drivers/infiniband/hw/ehca/Makefile b/drivers/infiniband/hw/ehca/Makefile
new file mode 100644
index 00000000000..74d284e46a4
--- /dev/null
+++ b/drivers/infiniband/hw/ehca/Makefile
@@ -0,0 +1,16 @@
+#  Authors: Heiko J Schick <schickhj@de.ibm.com>
+#           Christoph Raisch <raisch@de.ibm.com>
+#           Joachim Fenkes <fenkes@de.ibm.com>
+#
+#  Copyright (c) 2005 IBM Corporation
+#
+#  All rights reserved.
+#
+#  This source code is distributed under a dual license of GPL v2.0 and OpenIB BSD.
+
+obj-$(CONFIG_INFINIBAND_EHCA) += ib_ehca.o
+
+ib_ehca-objs  = ehca_main.o ehca_hca.o ehca_mcast.o ehca_pd.o ehca_av.o ehca_eq.o \
+		ehca_cq.o ehca_qp.o ehca_sqp.o ehca_mrmw.o ehca_reqs.o ehca_irq.o \
+		ehca_uverbs.o ipz_pt_fn.o hcp_if.o hcp_phyp.o
+
diff --git a/drivers/infiniband/hw/ehca/ehca_av.c b/drivers/infiniband/hw/ehca/ehca_av.c
new file mode 100644
index 00000000000..3bac197f901
--- /dev/null
+++ b/drivers/infiniband/hw/ehca/ehca_av.c
@@ -0,0 +1,271 @@
+/*
+ *  IBM eServer eHCA Infiniband device driver for Linux on POWER
+ *
+ *  adress vector functions
+ *
+ *  Authors: Hoang-Nam Nguyen <hnguyen@de.ibm.com>
+ *           Khadija Souissi <souissik@de.ibm.com>
+ *           Reinhard Ernst <rernst@de.ibm.com>
+ *           Christoph Raisch <raisch@de.ibm.com>
+ *
+ *  Copyright (c) 2005 IBM Corporation
+ *
+ *  All rights reserved.
+ *
+ *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
+ *  BSD.
+ *
+ * OpenIB BSD License
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials
+ * provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
+ * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+
+#include <asm/current.h>
+
+#include "ehca_tools.h"
+#include "ehca_iverbs.h"
+#include "hcp_if.h"
+
+static struct kmem_cache *av_cache;
+
+struct ib_ah *ehca_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr)
+{
+	int ret;
+	struct ehca_av *av;
+	struct ehca_shca *shca = container_of(pd->device, struct ehca_shca,
+					      ib_device);
+
+	av = kmem_cache_alloc(av_cache, SLAB_KERNEL);
+	if (!av) {
+		ehca_err(pd->device, "Out of memory pd=%p ah_attr=%p",
+			 pd, ah_attr);
+		return ERR_PTR(-ENOMEM);
+	}
+
+	av->av.sl = ah_attr->sl;
+	av->av.dlid = ah_attr->dlid;
+	av->av.slid_path_bits = ah_attr->src_path_bits;
+
+	if (ehca_static_rate < 0) {
+		int ah_mult = ib_rate_to_mult(ah_attr->static_rate);
+		int ehca_mult =
+			ib_rate_to_mult(shca->sport[ah_attr->port_num].rate );
+
+		if (ah_mult >= ehca_mult)
+			av->av.ipd = 0;
+		else
+			av->av.ipd = (ah_mult > 0) ?
+				((ehca_mult - 1) / ah_mult) : 0;
+	} else
+	        av->av.ipd = ehca_static_rate;
+
+	av->av.lnh = ah_attr->ah_flags;
+	av->av.grh.word_0 = EHCA_BMASK_SET(GRH_IPVERSION_MASK, 6);
+	av->av.grh.word_0 |= EHCA_BMASK_SET(GRH_TCLASS_MASK,
+					    ah_attr->grh.traffic_class);
+	av->av.grh.word_0 |= EHCA_BMASK_SET(GRH_FLOWLABEL_MASK,
+					    ah_attr->grh.flow_label);
+	av->av.grh.word_0 |= EHCA_BMASK_SET(GRH_HOPLIMIT_MASK,
+					    ah_attr->grh.hop_limit);
+	av->av.grh.word_0 |= EHCA_BMASK_SET(GRH_NEXTHEADER_MASK, 0x1B);
+	/* set sgid in grh.word_1 */
+	if (ah_attr->ah_flags & IB_AH_GRH) {
+		int rc;
+		struct ib_port_attr port_attr;
+		union ib_gid gid;
+		memset(&port_attr, 0, sizeof(port_attr));
+		rc = ehca_query_port(pd->device, ah_attr->port_num,
+				     &port_attr);
+		if (rc) { /* invalid port number */
+			ret = -EINVAL;
+			ehca_err(pd->device, "Invalid port number "
+				 "ehca_query_port() returned %x "
+				 "pd=%p ah_attr=%p", rc, pd, ah_attr);
+			goto create_ah_exit1;
+		}
+		memset(&gid, 0, sizeof(gid));
+		rc = ehca_query_gid(pd->device,
+				    ah_attr->port_num,
+				    ah_attr->grh.sgid_index, &gid);
+		if (rc) {
+			ret = -EINVAL;
+			ehca_err(pd->device, "Failed to retrieve sgid "
+				 "ehca_query_gid() returned %x "
+				 "pd=%p ah_attr=%p", rc, pd, ah_attr);
+			goto create_ah_exit1;
+		}
+		memcpy(&av->av.grh.word_1, &gid, sizeof(gid));
+	}
+	/* for the time being we use a hard coded PMTU of 2048 Bytes */
+	av->av.pmtu = 4;
+
+	/* dgid comes in grh.word_3 */
+	memcpy(&av->av.grh.word_3, &ah_attr->grh.dgid,
+	       sizeof(ah_attr->grh.dgid));
+
+	return &av->ib_ah;
+
+create_ah_exit1:
+	kmem_cache_free(av_cache, av);
+
+	return ERR_PTR(ret);
+}
+
+int ehca_modify_ah(struct ib_ah *ah, struct ib_ah_attr *ah_attr)
+{
+	struct ehca_av *av;
+	struct ehca_ud_av new_ehca_av;
+	struct ehca_pd *my_pd = container_of(ah->pd, struct ehca_pd, ib_pd);
+	u32 cur_pid = current->tgid;
+
+	if (my_pd->ib_pd.uobject && my_pd->ib_pd.uobject->context &&
+	    my_pd->ownpid != cur_pid) {
+		ehca_err(ah->device, "Invalid caller pid=%x ownpid=%x",
+			 cur_pid, my_pd->ownpid);
+		return -EINVAL;
+	}
+
+	memset(&new_ehca_av, 0, sizeof(new_ehca_av));
+	new_ehca_av.sl = ah_attr->sl;
+	new_ehca_av.dlid = ah_attr->dlid;
+	new_ehca_av.slid_path_bits = ah_attr->src_path_bits;
+	new_ehca_av.ipd = ah_attr->static_rate;
+	new_ehca_av.lnh = EHCA_BMASK_SET(GRH_FLAG_MASK,
+					 (ah_attr->ah_flags & IB_AH_GRH) > 0);
+	new_ehca_av.grh.word_0 = EHCA_BMASK_SET(GRH_TCLASS_MASK,
+						ah_attr->grh.traffic_class);
+	new_ehca_av.grh.word_0 |= EHCA_BMASK_SET(GRH_FLOWLABEL_MASK,
+						 ah_attr->grh.flow_label);
+	new_ehca_av.grh.word_0 |= EHCA_BMASK_SET(GRH_HOPLIMIT_MASK,
+						 ah_attr->grh.hop_limit);
+	new_ehca_av.grh.word_0 |= EHCA_BMASK_SET(GRH_NEXTHEADER_MASK, 0x1b);
+
+	/* set sgid in grh.word_1 */
+	if (ah_attr->ah_flags & IB_AH_GRH) {
+		int rc;
+		struct ib_port_attr port_attr;
+		union ib_gid gid;
+		memset(&port_attr, 0, sizeof(port_attr));
+		rc = ehca_query_port(ah->device, ah_attr->port_num,
+				     &port_attr);
+		if (rc) { /* invalid port number */
+			ehca_err(ah->device, "Invalid port number "
+				 "ehca_query_port() returned %x "
+				 "ah=%p ah_attr=%p port_num=%x",
+				 rc, ah, ah_attr, ah_attr->port_num);
+			return -EINVAL;
+		}
+		memset(&gid, 0, sizeof(gid));
+		rc = ehca_query_gid(ah->device,
+				    ah_attr->port_num,
+				    ah_attr->grh.sgid_index, &gid);
+		if (rc) {
+			ehca_err(ah->device, "Failed to retrieve sgid "
+				 "ehca_query_gid() returned %x "
+				 "ah=%p ah_attr=%p port_num=%x "
+				 "sgid_index=%x",
+				 rc, ah, ah_attr, ah_attr->port_num,
+				 ah_attr->grh.sgid_index);
+			return -EINVAL;
+		}
+		memcpy(&new_ehca_av.grh.word_1, &gid, sizeof(gid));
+	}
+
+	new_ehca_av.pmtu = 4; /* see also comment in create_ah() */
+
+	memcpy(&new_ehca_av.grh.word_3, &ah_attr->grh.dgid,
+	       sizeof(ah_attr->grh.dgid));
+
+	av = container_of(ah, struct ehca_av, ib_ah);
+	av->av = new_ehca_av;
+
+	return 0;
+}
+
+int ehca_query_ah(struct ib_ah *ah, struct ib_ah_attr *ah_attr)
+{
+	struct ehca_av *av = container_of(ah, struct ehca_av, ib_ah);
+	struct ehca_pd *my_pd = container_of(ah->pd, struct ehca_pd, ib_pd);
+	u32 cur_pid = current->tgid;
+
+	if (my_pd->ib_pd.uobject && my_pd->ib_pd.uobject->context &&
+	    my_pd->ownpid != cur_pid) {
+		ehca_err(ah->device, "Invalid caller pid=%x ownpid=%x",
+			 cur_pid, my_pd->ownpid);
+		return -EINVAL;
+	}
+
+	memcpy(&ah_attr->grh.dgid, &av->av.grh.word_3,
+	       sizeof(ah_attr->grh.dgid));
+	ah_attr->sl = av->av.sl;
+
+	ah_attr->dlid = av->av.dlid;
+
+	ah_attr->src_path_bits = av->av.slid_path_bits;
+	ah_attr->static_rate = av->av.ipd;
+	ah_attr->ah_flags = EHCA_BMASK_GET(GRH_FLAG_MASK, av->av.lnh);
+	ah_attr->grh.traffic_class = EHCA_BMASK_GET(GRH_TCLASS_MASK,
+						    av->av.grh.word_0);
+	ah_attr->grh.hop_limit = EHCA_BMASK_GET(GRH_HOPLIMIT_MASK,
+						av->av.grh.word_0);
+	ah_attr->grh.flow_label = EHCA_BMASK_GET(GRH_FLOWLABEL_MASK,
+						 av->av.grh.word_0);
+
+	return 0;
+}
+
+int ehca_destroy_ah(struct ib_ah *ah)
+{
+	struct ehca_pd *my_pd = container_of(ah->pd, struct ehca_pd, ib_pd);
+	u32 cur_pid = current->tgid;
+
+	if (my_pd->ib_pd.uobject && my_pd->ib_pd.uobject->context &&
+	    my_pd->ownpid != cur_pid) {
+		ehca_err(ah->device, "Invalid caller pid=%x ownpid=%x",
+			 cur_pid, my_pd->ownpid);
+		return -EINVAL;
+	}
+
+	kmem_cache_free(av_cache, container_of(ah, struct ehca_av, ib_ah));
+
+	return 0;
+}
+
+int ehca_init_av_cache(void)
+{
+	av_cache = kmem_cache_create("ehca_cache_av",
+				   sizeof(struct ehca_av), 0,
+				   SLAB_HWCACHE_ALIGN,
+				   NULL, NULL);
+	if (!av_cache)
+		return -ENOMEM;
+	return 0;
+}
+
+void ehca_cleanup_av_cache(void)
+{
+	if (av_cache)
+		kmem_cache_destroy(av_cache);
+}
diff --git a/drivers/infiniband/hw/ehca/ehca_classes.h b/drivers/infiniband/hw/ehca/ehca_classes.h
new file mode 100644
index 00000000000..1c722032319
--- /dev/null
+++ b/drivers/infiniband/hw/ehca/ehca_classes.h
@@ -0,0 +1,346 @@
+/*
+ *  IBM eServer eHCA Infiniband device driver for Linux on POWER
+ *
+ *  Struct definition for eHCA internal structures
+ *
+ *  Authors: Heiko J Schick <schickhj@de.ibm.com>
+ *           Christoph Raisch <raisch@de.ibm.com>
+ *
+ *  Copyright (c) 2005 IBM Corporation
+ *
+ *  All rights reserved.
+ *
+ *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
+ *  BSD.
+ *
+ * OpenIB BSD License
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials
+ * provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
+ * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __EHCA_CLASSES_H__
+#define __EHCA_CLASSES_H__
+
+#include "ehca_classes.h"
+#include "ipz_pt_fn.h"
+
+struct ehca_module;
+struct ehca_qp;
+struct ehca_cq;
+struct ehca_eq;
+struct ehca_mr;
+struct ehca_mw;
+struct ehca_pd;
+struct ehca_av;
+
+#ifdef CONFIG_PPC64
+#include "ehca_classes_pSeries.h"
+#endif
+
+#include <rdma/ib_verbs.h>
+#include <rdma/ib_user_verbs.h>
+
+#include "ehca_irq.h"
+
+struct ehca_eq {
+	u32 length;
+	struct ipz_queue ipz_queue;
+	struct ipz_eq_handle ipz_eq_handle;
+	struct work_struct work;
+	struct h_galpas galpas;
+	int is_initialized;
+	struct ehca_pfeq pf;
+	spinlock_t spinlock;
+	struct tasklet_struct interrupt_task;
+	u32 ist;
+};
+
+struct ehca_sport {
+	struct ib_cq *ibcq_aqp1;
+	struct ib_qp *ibqp_aqp1;
+	enum ib_rate  rate;
+	enum ib_port_state port_state;
+};
+
+struct ehca_shca {
+	struct ib_device ib_device;
+	struct ibmebus_dev *ibmebus_dev;
+	u8 num_ports;
+	int hw_level;
+	struct list_head shca_list;
+	struct ipz_adapter_handle ipz_hca_handle;
+	struct ehca_sport sport[2];
+	struct ehca_eq eq;
+	struct ehca_eq neq;
+	struct ehca_mr *maxmr;
+	struct ehca_pd *pd;
+	struct h_galpas galpas;
+};
+
+struct ehca_pd {
+	struct ib_pd ib_pd;
+	struct ipz_pd fw_pd;
+	u32 ownpid;
+};
+
+struct ehca_qp {
+	struct ib_qp ib_qp;
+	u32 qp_type;
+	struct ipz_queue ipz_squeue;
+	struct ipz_queue ipz_rqueue;
+	struct h_galpas galpas;
+	u32 qkey;
+	u32 real_qp_num;
+	u32 token;
+	spinlock_t spinlock_s;
+	spinlock_t spinlock_r;
+	u32 sq_max_inline_data_size;
+	struct ipz_qp_handle ipz_qp_handle;
+	struct ehca_pfqp pf;
+	struct ib_qp_init_attr init_attr;
+	u64 uspace_squeue;
+	u64 uspace_rqueue;
+	u64 uspace_fwh;
+	struct ehca_cq *send_cq;
+	struct ehca_cq *recv_cq;
+	unsigned int sqerr_purgeflag;
+	struct hlist_node list_entries;
+};
+
+/* must be power of 2 */
+#define QP_HASHTAB_LEN 8
+
+struct ehca_cq {
+	struct ib_cq ib_cq;
+	struct ipz_queue ipz_queue;
+	struct h_galpas galpas;
+	spinlock_t spinlock;
+	u32 cq_number;
+	u32 token;
+	u32 nr_of_entries;
+	struct ipz_cq_handle ipz_cq_handle;
+	struct ehca_pfcq pf;
+	spinlock_t cb_lock;
+	u64 uspace_queue;
+	u64 uspace_fwh;
+	struct hlist_head qp_hashtab[QP_HASHTAB_LEN];
+	struct list_head entry;
+	u32 nr_callbacks;
+	spinlock_t task_lock;
+	u32 ownpid;
+};
+
+enum ehca_mr_flag {
+	EHCA_MR_FLAG_FMR = 0x80000000,	 /* FMR, created with ehca_alloc_fmr */
+	EHCA_MR_FLAG_MAXMR = 0x40000000, /* max-MR                           */
+};
+
+struct ehca_mr {
+	union {
+		struct ib_mr ib_mr;	/* must always be first in ehca_mr */
+		struct ib_fmr ib_fmr;	/* must always be first in ehca_mr */
+	} ib;
+	spinlock_t mrlock;
+
+	enum ehca_mr_flag flags;
+	u32 num_pages;		/* number of MR pages */
+	u32 num_4k;		/* number of 4k "page" portions to form MR */
+	int acl;		/* ACL (stored here for usage in reregister) */
+	u64 *start;		/* virtual start address (stored here for */
+	                        /* usage in reregister) */
+	u64 size;		/* size (stored here for usage in reregister) */
+	u32 fmr_page_size;	/* page size for FMR */
+	u32 fmr_max_pages;	/* max pages for FMR */
+	u32 fmr_max_maps;	/* max outstanding maps for FMR */
+	u32 fmr_map_cnt;	/* map counter for FMR */
+	/* fw specific data */
+	struct ipz_mrmw_handle ipz_mr_handle;	/* MR handle for h-calls */
+	struct h_galpas galpas;
+	/* data for userspace bridge */
+	u32 nr_of_pages;
+	void *pagearray;
+};
+
+struct ehca_mw {
+	struct ib_mw ib_mw;	/* gen2 mw, must always be first in ehca_mw */
+	spinlock_t mwlock;
+
+	u8 never_bound;		/* indication MW was never bound */
+	struct ipz_mrmw_handle ipz_mw_handle;	/* MW handle for h-calls */
+	struct h_galpas galpas;
+};
+
+enum ehca_mr_pgi_type {
+	EHCA_MR_PGI_PHYS   = 1,  /* type of ehca_reg_phys_mr,
+				  * ehca_rereg_phys_mr,
+				  * ehca_reg_internal_maxmr */
+	EHCA_MR_PGI_USER   = 2,  /* type of ehca_reg_user_mr */
+	EHCA_MR_PGI_FMR    = 3   /* type of ehca_map_phys_fmr */
+};
+
+struct ehca_mr_pginfo {
+	enum ehca_mr_pgi_type type;
+	u64 num_pages;
+	u64 page_cnt;
+	u64 num_4k;       /* number of 4k "page" portions */
+	u64 page_4k_cnt;  /* counter for 4k "page" portions */
+	u64 next_4k;      /* next 4k "page" portion in buffer/chunk/listelem */
+
+	/* type EHCA_MR_PGI_PHYS section */
+	int num_phys_buf;
+	struct ib_phys_buf *phys_buf_array;
+	u64 next_buf;
+
+	/* type EHCA_MR_PGI_USER section */
+	struct ib_umem *region;
+	struct ib_umem_chunk *next_chunk;
+	u64 next_nmap;
+
+	/* type EHCA_MR_PGI_FMR section */
+	u64 *page_list;
+	u64 next_listelem;
+	/* next_4k also used within EHCA_MR_PGI_FMR */
+};
+
+/* output parameters for MR/FMR hipz calls */
+struct ehca_mr_hipzout_parms {
+	struct ipz_mrmw_handle handle;
+	u32 lkey;
+	u32 rkey;
+	u64 len;
+	u64 vaddr;
+	u32 acl;
+};
+
+/* output parameters for MW hipz calls */
+struct ehca_mw_hipzout_parms {
+	struct ipz_mrmw_handle handle;
+	u32 rkey;
+};
+
+struct ehca_av {
+	struct ib_ah ib_ah;
+	struct ehca_ud_av av;
+};
+
+struct ehca_ucontext {
+	struct ib_ucontext ib_ucontext;
+};
+
+struct ehca_module *ehca_module_new(void);
+
+int ehca_module_delete(struct ehca_module *me);
+
+int ehca_eq_ctor(struct ehca_eq *eq);
+
+int ehca_eq_dtor(struct ehca_eq *eq);
+
+struct ehca_shca *ehca_shca_new(void);
+
+int ehca_shca_delete(struct ehca_shca *me);
+
+struct ehca_sport *ehca_sport_new(struct ehca_shca *anchor);
+
+int ehca_init_pd_cache(void);
+void ehca_cleanup_pd_cache(void);
+int ehca_init_cq_cache(void);
+void ehca_cleanup_cq_cache(void);
+int ehca_init_qp_cache(void);
+void ehca_cleanup_qp_cache(void);
+int ehca_init_av_cache(void);
+void ehca_cleanup_av_cache(void);
+int ehca_init_mrmw_cache(void);
+void ehca_cleanup_mrmw_cache(void);
+
+extern spinlock_t ehca_qp_idr_lock;
+extern spinlock_t ehca_cq_idr_lock;
+extern struct idr ehca_qp_idr;
+extern struct idr ehca_cq_idr;
+
+extern int ehca_static_rate;
+extern int ehca_port_act_time;
+extern int ehca_use_hp_mr;
+
+struct ipzu_queue_resp {
+	u64 queue;        /* points to first queue entry */
+	u32 qe_size;      /* queue entry size */
+	u32 act_nr_of_sg;
+	u32 queue_length; /* queue length allocated in bytes */
+	u32 pagesize;
+	u32 toggle_state;
+	u32 dummy; /* padding for 8 byte alignment */
+};
+
+struct ehca_create_cq_resp {
+	u32 cq_number;
+	u32 token;
+	struct ipzu_queue_resp ipz_queue;
+	struct h_galpas galpas;
+};
+
+struct ehca_create_qp_resp {
+	u32 qp_num;
+	u32 token;
+	u32 qp_type;
+	u32 qkey;
+	/* qp_num assigned by ehca: sqp0/1 may have got different numbers */
+	u32 real_qp_num;
+	u32 dummy; /* padding for 8 byte alignment */
+	struct ipzu_queue_resp ipz_squeue;
+	struct ipzu_queue_resp ipz_rqueue;
+	struct h_galpas galpas;
+};
+
+struct ehca_alloc_cq_parms {
+	u32 nr_cqe;
+	u32 act_nr_of_entries;
+	u32 act_pages;
+	struct ipz_eq_handle eq_handle;
+};
+
+struct ehca_alloc_qp_parms {
+	int servicetype;
+	int sigtype;
+	int daqp_ctrl;
+	int max_send_sge;
+	int max_recv_sge;
+	int ud_av_l_key_ctl;
+
+	u16 act_nr_send_wqes;
+	u16 act_nr_recv_wqes;
+	u8  act_nr_recv_sges;
+	u8  act_nr_send_sges;
+
+	u32 nr_rq_pages;
+	u32 nr_sq_pages;
+
+	struct ipz_eq_handle ipz_eq_handle;
+	struct ipz_pd pd;
+};
+
+int ehca_cq_assign_qp(struct ehca_cq *cq, struct ehca_qp *qp);
+int ehca_cq_unassign_qp(struct ehca_cq *cq, unsigned int qp_num);
+struct ehca_qp* ehca_cq_get_qp(struct ehca_cq *cq, int qp_num);
+
+#endif
diff --git a/drivers/infiniband/hw/ehca/ehca_classes_pSeries.h b/drivers/infiniband/hw/ehca/ehca_classes_pSeries.h
new file mode 100644
index 00000000000..5665f213b81
--- /dev/null
+++ b/drivers/infiniband/hw/ehca/ehca_classes_pSeries.h
@@ -0,0 +1,236 @@
+/*
+ *  IBM eServer eHCA Infiniband device driver for Linux on POWER
+ *
+ *  pSeries interface definitions
+ *
+ *  Authors: Waleri Fomin <fomin@de.ibm.com>
+ *           Christoph Raisch <raisch@de.ibm.com>
+ *
+ *  Copyright (c) 2005 IBM Corporation
+ *
+ *  All rights reserved.
+ *
+ *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
+ *  BSD.
+ *
+ * OpenIB BSD License
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials
+ * provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
+ * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __EHCA_CLASSES_PSERIES_H__
+#define __EHCA_CLASSES_PSERIES_H__
+
+#include "hcp_phyp.h"
+#include "ipz_pt_fn.h"
+
+
+struct ehca_pfqp {
+	struct ipz_qpt sqpt;
+	struct ipz_qpt rqpt;
+};
+
+struct ehca_pfcq {
+	struct ipz_qpt qpt;
+	u32 cqnr;
+};
+
+struct ehca_pfeq {
+	struct ipz_qpt qpt;
+	struct h_galpa galpa;
+	u32 eqnr;
+};
+
+struct ipz_adapter_handle {
+	u64 handle;
+};
+
+struct ipz_cq_handle {
+	u64 handle;
+};
+
+struct ipz_eq_handle {
+	u64 handle;
+};
+
+struct ipz_qp_handle {
+	u64 handle;
+};
+struct ipz_mrmw_handle {
+	u64 handle;
+};
+
+struct ipz_pd {
+	u32 value;
+};
+
+struct hcp_modify_qp_control_block {
+	u32 qkey;                      /* 00 */
+	u32 rdd;                       /* reliable datagram domain */
+	u32 send_psn;                  /* 02 */
+	u32 receive_psn;               /* 03 */
+	u32 prim_phys_port;            /* 04 */
+	u32 alt_phys_port;             /* 05 */
+	u32 prim_p_key_idx;            /* 06 */
+	u32 alt_p_key_idx;             /* 07 */
+	u32 rdma_atomic_ctrl;          /* 08 */
+	u32 qp_state;                  /* 09 */
+	u32 reserved_10;               /* 10 */
+	u32 rdma_nr_atomic_resp_res;   /* 11 */
+	u32 path_migration_state;      /* 12 */
+	u32 rdma_atomic_outst_dest_qp; /* 13 */
+	u32 dest_qp_nr;                /* 14 */
+	u32 min_rnr_nak_timer_field;   /* 15 */
+	u32 service_level;             /* 16 */
+	u32 send_grh_flag;             /* 17 */
+	u32 retry_count;               /* 18 */
+	u32 timeout;                   /* 19 */
+	u32 path_mtu;                  /* 20 */
+	u32 max_static_rate;           /* 21 */
+	u32 dlid;                      /* 22 */
+	u32 rnr_retry_count;           /* 23 */
+	u32 source_path_bits;          /* 24 */
+	u32 traffic_class;             /* 25 */
+	u32 hop_limit;                 /* 26 */
+	u32 source_gid_idx;            /* 27 */
+	u32 flow_label;                /* 28 */
+	u32 reserved_29;               /* 29 */
+	union {                        /* 30 */
+		u64 dw[2];
+		u8 byte[16];
+	} dest_gid;
+	u32 service_level_al;          /* 34 */
+	u32 send_grh_flag_al;          /* 35 */
+	u32 retry_count_al;            /* 36 */
+	u32 timeout_al;                /* 37 */
+	u32 max_static_rate_al;        /* 38 */
+	u32 dlid_al;                   /* 39 */
+	u32 rnr_retry_count_al;        /* 40 */
+	u32 source_path_bits_al;       /* 41 */
+	u32 traffic_class_al;          /* 42 */
+	u32 hop_limit_al;              /* 43 */
+	u32 source_gid_idx_al;         /* 44 */
+	u32 flow_label_al;             /* 45 */
+	u32 reserved_46;               /* 46 */
+	u32 reserved_47;               /* 47 */
+	union {                        /* 48 */
+		u64 dw[2];
+		u8 byte[16];
+	} dest_gid_al;
+	u32 max_nr_outst_send_wr;      /* 52 */
+	u32 max_nr_outst_recv_wr;      /* 53 */
+	u32 disable_ete_credit_check;  /* 54 */
+	u32 qp_number;                 /* 55 */
+	u64 send_queue_handle;         /* 56 */
+	u64 recv_queue_handle;         /* 58 */
+	u32 actual_nr_sges_in_sq_wqe;  /* 60 */
+	u32 actual_nr_sges_in_rq_wqe;  /* 61 */
+	u32 qp_enable;                 /* 62 */
+	u32 curr_srq_limit;            /* 63 */
+	u64 qp_aff_asyn_ev_log_reg;    /* 64 */
+	u64 shared_rq_hndl;            /* 66 */
+	u64 trigg_doorbell_qp_hndl;    /* 68 */
+	u32 reserved_70_127[58];       /* 70 */
+};
+
+#define MQPCB_MASK_QKEY                         EHCA_BMASK_IBM(0,0)
+#define MQPCB_MASK_SEND_PSN                     EHCA_BMASK_IBM(2,2)
+#define MQPCB_MASK_RECEIVE_PSN                  EHCA_BMASK_IBM(3,3)
+#define MQPCB_MASK_PRIM_PHYS_PORT               EHCA_BMASK_IBM(4,4)
+#define MQPCB_PRIM_PHYS_PORT                    EHCA_BMASK_IBM(24,31)
+#define MQPCB_MASK_ALT_PHYS_PORT                EHCA_BMASK_IBM(5,5)
+#define MQPCB_MASK_PRIM_P_KEY_IDX               EHCA_BMASK_IBM(6,6)
+#define MQPCB_PRIM_P_KEY_IDX                    EHCA_BMASK_IBM(24,31)
+#define MQPCB_MASK_ALT_P_KEY_IDX                EHCA_BMASK_IBM(7,7)
+#define MQPCB_MASK_RDMA_ATOMIC_CTRL             EHCA_BMASK_IBM(8,8)
+#define MQPCB_MASK_QP_STATE                     EHCA_BMASK_IBM(9,9)
+#define MQPCB_QP_STATE                          EHCA_BMASK_IBM(24,31)
+#define MQPCB_MASK_RDMA_NR_ATOMIC_RESP_RES      EHCA_BMASK_IBM(11,11)
+#define MQPCB_MASK_PATH_MIGRATION_STATE         EHCA_BMASK_IBM(12,12)
+#define MQPCB_MASK_RDMA_ATOMIC_OUTST_DEST_QP    EHCA_BMASK_IBM(13,13)
+#define MQPCB_MASK_DEST_QP_NR                   EHCA_BMASK_IBM(14,14)
+#define MQPCB_MASK_MIN_RNR_NAK_TIMER_FIELD      EHCA_BMASK_IBM(15,15)
+#define MQPCB_MASK_SERVICE_LEVEL                EHCA_BMASK_IBM(16,16)
+#define MQPCB_MASK_SEND_GRH_FLAG                EHCA_BMASK_IBM(17,17)
+#define MQPCB_MASK_RETRY_COUNT                  EHCA_BMASK_IBM(18,18)
+#define MQPCB_MASK_TIMEOUT                      EHCA_BMASK_IBM(19,19)
+#define MQPCB_MASK_PATH_MTU                     EHCA_BMASK_IBM(20,20)
+#define MQPCB_PATH_MTU                          EHCA_BMASK_IBM(24,31)
+#define MQPCB_MASK_MAX_STATIC_RATE              EHCA_BMASK_IBM(21,21)
+#define MQPCB_MAX_STATIC_RATE                   EHCA_BMASK_IBM(24,31)
+#define MQPCB_MASK_DLID                         EHCA_BMASK_IBM(22,22)
+#define MQPCB_DLID                              EHCA_BMASK_IBM(16,31)
+#define MQPCB_MASK_RNR_RETRY_COUNT              EHCA_BMASK_IBM(23,23)
+#define MQPCB_RNR_RETRY_COUNT                   EHCA_BMASK_IBM(29,31)
+#define MQPCB_MASK_SOURCE_PATH_BITS             EHCA_BMASK_IBM(24,24)
+#define MQPCB_SOURCE_PATH_BITS                  EHCA_BMASK_IBM(25,31)
+#define MQPCB_MASK_TRAFFIC_CLASS                EHCA_BMASK_IBM(25,25)
+#define MQPCB_TRAFFIC_CLASS                     EHCA_BMASK_IBM(24,31)
+#define MQPCB_MASK_HOP_LIMIT                    EHCA_BMASK_IBM(26,26)
+#define MQPCB_HOP_LIMIT                         EHCA_BMASK_IBM(24,31)
+#define MQPCB_MASK_SOURCE_GID_IDX               EHCA_BMASK_IBM(27,27)
+#define MQPCB_SOURCE_GID_IDX                    EHCA_BMASK_IBM(24,31)
+#define MQPCB_MASK_FLOW_LABEL                   EHCA_BMASK_IBM(28,28)
+#define MQPCB_FLOW_LABEL                        EHCA_BMASK_IBM(12,31)
+#define MQPCB_MASK_DEST_GID                     EHCA_BMASK_IBM(30,30)
+#define MQPCB_MASK_SERVICE_LEVEL_AL             EHCA_BMASK_IBM(31,31)
+#define MQPCB_SERVICE_LEVEL_AL                  EHCA_BMASK_IBM(28,31)
+#define MQPCB_MASK_SEND_GRH_FLAG_AL             EHCA_BMASK_IBM(32,32)
+#define MQPCB_SEND_GRH_FLAG_AL                  EHCA_BMASK_IBM(31,31)
+#define MQPCB_MASK_RETRY_COUNT_AL               EHCA_BMASK_IBM(33,33)
+#define MQPCB_RETRY_COUNT_AL                    EHCA_BMASK_IBM(29,31)
+#define MQPCB_MASK_TIMEOUT_AL                   EHCA_BMASK_IBM(34,34)
+#define MQPCB_TIMEOUT_AL                        EHCA_BMASK_IBM(27,31)
+#define MQPCB_MASK_MAX_STATIC_RATE_AL           EHCA_BMASK_IBM(35,35)
+#define MQPCB_MAX_STATIC_RATE_AL                EHCA_BMASK_IBM(24,31)
+#define MQPCB_MASK_DLID_AL                      EHCA_BMASK_IBM(36,36)
+#define MQPCB_DLID_AL                           EHCA_BMASK_IBM(16,31)
+#define MQPCB_MASK_RNR_RETRY_COUNT_AL           EHCA_BMASK_IBM(37,37)
+#define MQPCB_RNR_RETRY_COUNT_AL                EHCA_BMASK_IBM(29,31)
+#define MQPCB_MASK_SOURCE_PATH_BITS_AL          EHCA_BMASK_IBM(38,38)
+#define MQPCB_SOURCE_PATH_BITS_AL               EHCA_BMASK_IBM(25,31)
+#define MQPCB_MASK_TRAFFIC_CLASS_AL             EHCA_BMASK_IBM(39,39)
+#define MQPCB_TRAFFIC_CLASS_AL                  EHCA_BMASK_IBM(24,31)
+#define MQPCB_MASK_HOP_LIMIT_AL                 EHCA_BMASK_IBM(40,40)
+#define MQPCB_HOP_LIMIT_AL                      EHCA_BMASK_IBM(24,31)
+#define MQPCB_MASK_SOURCE_GID_IDX_AL            EHCA_BMASK_IBM(41,41)
+#define MQPCB_SOURCE_GID_IDX_AL                 EHCA_BMASK_IBM(24,31)
+#define MQPCB_MASK_FLOW_LABEL_AL                EHCA_BMASK_IBM(42,42)
+#define MQPCB_FLOW_LABEL_AL                     EHCA_BMASK_IBM(12,31)
+#define MQPCB_MASK_DEST_GID_AL                  EHCA_BMASK_IBM(44,44)
+#define MQPCB_MASK_MAX_NR_OUTST_SEND_WR         EHCA_BMASK_IBM(45,45)
+#define MQPCB_MAX_NR_OUTST_SEND_WR              EHCA_BMASK_IBM(16,31)
+#define MQPCB_MASK_MAX_NR_OUTST_RECV_WR         EHCA_BMASK_IBM(46,46)
+#define MQPCB_MAX_NR_OUTST_RECV_WR              EHCA_BMASK_IBM(16,31)
+#define MQPCB_MASK_DISABLE_ETE_CREDIT_CHECK     EHCA_BMASK_IBM(47,47)
+#define MQPCB_DISABLE_ETE_CREDIT_CHECK          EHCA_BMASK_IBM(31,31)
+#define MQPCB_QP_NUMBER                         EHCA_BMASK_IBM(8,31)
+#define MQPCB_MASK_QP_ENABLE                    EHCA_BMASK_IBM(48,48)
+#define MQPCB_QP_ENABLE                         EHCA_BMASK_IBM(31,31)
+#define MQPCB_MASK_CURR_SQR_LIMIT               EHCA_BMASK_IBM(49,49)
+#define MQPCB_CURR_SQR_LIMIT                    EHCA_BMASK_IBM(15,31)
+#define MQPCB_MASK_QP_AFF_ASYN_EV_LOG_REG       EHCA_BMASK_IBM(50,50)
+#define MQPCB_MASK_SHARED_RQ_HNDL               EHCA_BMASK_IBM(51,51)
+
+#endif /* __EHCA_CLASSES_PSERIES_H__ */
diff --git a/drivers/infiniband/hw/ehca/ehca_cq.c b/drivers/infiniband/hw/ehca/ehca_cq.c
new file mode 100644
index 00000000000..458fe19648a
--- /dev/null
+++ b/drivers/infiniband/hw/ehca/ehca_cq.c
@@ -0,0 +1,427 @@
+/*
+ *  IBM eServer eHCA Infiniband device driver for Linux on POWER
+ *
+ *  Completion queue handling
+ *
+ *  Authors: Waleri Fomin <fomin@de.ibm.com>
+ *           Khadija Souissi <souissi@de.ibm.com>
+ *           Reinhard Ernst <rernst@de.ibm.com>
+ *           Heiko J Schick <schickhj@de.ibm.com>
+ *           Hoang-Nam Nguyen <hnguyen@de.ibm.com>
+ *
+ *
+ *  Copyright (c) 2005 IBM Corporation
+ *
+ *  All rights reserved.
+ *
+ *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
+ *  BSD.
+ *
+ * OpenIB BSD License
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials
+ * provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
+ * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <asm/current.h>
+
+#include "ehca_iverbs.h"
+#include "ehca_classes.h"
+#include "ehca_irq.h"
+#include "hcp_if.h"
+
+static struct kmem_cache *cq_cache;
+
+int ehca_cq_assign_qp(struct ehca_cq *cq, struct ehca_qp *qp)
+{
+	unsigned int qp_num = qp->real_qp_num;
+	unsigned int key = qp_num & (QP_HASHTAB_LEN-1);
+	unsigned long spl_flags;
+
+	spin_lock_irqsave(&cq->spinlock, spl_flags);
+	hlist_add_head(&qp->list_entries, &cq->qp_hashtab[key]);
+	spin_unlock_irqrestore(&cq->spinlock, spl_flags);
+
+	ehca_dbg(cq->ib_cq.device, "cq_num=%x real_qp_num=%x",
+		 cq->cq_number, qp_num);
+
+	return 0;
+}
+
+int ehca_cq_unassign_qp(struct ehca_cq *cq, unsigned int real_qp_num)
+{
+	int ret = -EINVAL;
+	unsigned int key = real_qp_num & (QP_HASHTAB_LEN-1);
+	struct hlist_node *iter;
+	struct ehca_qp *qp;
+	unsigned long spl_flags;
+
+	spin_lock_irqsave(&cq->spinlock, spl_flags);
+	hlist_for_each(iter, &cq->qp_hashtab[key]) {
+		qp = hlist_entry(iter, struct ehca_qp, list_entries);
+		if (qp->real_qp_num == real_qp_num) {
+			hlist_del(iter);
+			ehca_dbg(cq->ib_cq.device,
+				 "removed qp from cq .cq_num=%x real_qp_num=%x",
+				 cq->cq_number, real_qp_num);
+			ret = 0;
+			break;
+		}
+	}
+	spin_unlock_irqrestore(&cq->spinlock, spl_flags);
+	if (ret)
+		ehca_err(cq->ib_cq.device,
+			 "qp not found cq_num=%x real_qp_num=%x",
+			 cq->cq_number, real_qp_num);
+
+	return ret;
+}
+
+struct ehca_qp* ehca_cq_get_qp(struct ehca_cq *cq, int real_qp_num)
+{
+	struct ehca_qp *ret = NULL;
+	unsigned int key = real_qp_num & (QP_HASHTAB_LEN-1);
+	struct hlist_node *iter;
+	struct ehca_qp *qp;
+	hlist_for_each(iter, &cq->qp_hashtab[key]) {
+		qp = hlist_entry(iter, struct ehca_qp, list_entries);
+		if (qp->real_qp_num == real_qp_num) {
+			ret = qp;
+			break;
+		}
+	}
+	return ret;
+}
+
+struct ib_cq *ehca_create_cq(struct ib_device *device, int cqe,
+			     struct ib_ucontext *context,
+			     struct ib_udata *udata)
+{
+	static const u32 additional_cqe = 20;
+	struct ib_cq *cq;
+	struct ehca_cq *my_cq;
+	struct ehca_shca *shca =
+		container_of(device, struct ehca_shca, ib_device);
+	struct ipz_adapter_handle adapter_handle;
+	struct ehca_alloc_cq_parms param; /* h_call's out parameters */
+	struct h_galpa gal;
+	void *vpage;
+	u32 counter;
+	u64 rpage, cqx_fec, h_ret;
+	int ipz_rc, ret, i;
+	unsigned long flags;
+
+	if (cqe >= 0xFFFFFFFF - 64 - additional_cqe)
+		return ERR_PTR(-EINVAL);
+
+	my_cq = kmem_cache_alloc(cq_cache, SLAB_KERNEL);
+	if (!my_cq) {
+		ehca_err(device, "Out of memory for ehca_cq struct device=%p",
+			 device);
+		return ERR_PTR(-ENOMEM);
+	}
+
+	memset(my_cq, 0, sizeof(struct ehca_cq));
+	memset(&param, 0, sizeof(struct ehca_alloc_cq_parms));
+
+	spin_lock_init(&my_cq->spinlock);
+	spin_lock_init(&my_cq->cb_lock);
+	spin_lock_init(&my_cq->task_lock);
+	my_cq->ownpid = current->tgid;
+
+	cq = &my_cq->ib_cq;
+
+	adapter_handle = shca->ipz_hca_handle;
+	param.eq_handle = shca->eq.ipz_eq_handle;
+
+	do {
+		if (!idr_pre_get(&ehca_cq_idr, GFP_KERNEL)) {
+			cq = ERR_PTR(-ENOMEM);
+			ehca_err(device, "Can't reserve idr nr. device=%p",
+				 device);
+			goto create_cq_exit1;
+		}
+
+		spin_lock_irqsave(&ehca_cq_idr_lock, flags);
+		ret = idr_get_new(&ehca_cq_idr, my_cq, &my_cq->token);
+		spin_unlock_irqrestore(&ehca_cq_idr_lock, flags);
+
+	} while (ret == -EAGAIN);
+
+	if (ret) {
+		cq = ERR_PTR(-ENOMEM);
+		ehca_err(device, "Can't allocate new idr entry. device=%p",
+			 device);
+		goto create_cq_exit1;
+	}
+
+	/*
+	 * CQs maximum depth is 4GB-64, but we need additional 20 as buffer
+	 * for receiving errors CQEs.
+	 */
+	param.nr_cqe = cqe + additional_cqe;
+	h_ret = hipz_h_alloc_resource_cq(adapter_handle, my_cq, &param);
+
+	if (h_ret != H_SUCCESS) {
+		ehca_err(device, "hipz_h_alloc_resource_cq() failed "
+			 "h_ret=%lx device=%p", h_ret, device);
+		cq = ERR_PTR(ehca2ib_return_code(h_ret));
+		goto create_cq_exit2;
+	}
+
+	ipz_rc = ipz_queue_ctor(&my_cq->ipz_queue, param.act_pages,
+				EHCA_PAGESIZE, sizeof(struct ehca_cqe), 0);
+	if (!ipz_rc) {
+		ehca_err(device, "ipz_queue_ctor() failed ipz_rc=%x device=%p",
+			 ipz_rc, device);
+		cq = ERR_PTR(-EINVAL);
+		goto create_cq_exit3;
+	}
+
+	for (counter = 0; counter < param.act_pages; counter++) {
+		vpage = ipz_qpageit_get_inc(&my_cq->ipz_queue);
+		if (!vpage) {
+			ehca_err(device, "ipz_qpageit_get_inc() "
+				 "returns NULL device=%p", device);
+			cq = ERR_PTR(-EAGAIN);
+			goto create_cq_exit4;
+		}
+		rpage = virt_to_abs(vpage);
+
+		h_ret = hipz_h_register_rpage_cq(adapter_handle,
+						 my_cq->ipz_cq_handle,
+						 &my_cq->pf,
+						 0,
+						 0,
+						 rpage,
+						 1,
+						 my_cq->galpas.
+						 kernel);
+
+		if (h_ret < H_SUCCESS) {
+			ehca_err(device, "hipz_h_register_rpage_cq() failed "
+				 "ehca_cq=%p cq_num=%x h_ret=%lx counter=%i "
+				 "act_pages=%i", my_cq, my_cq->cq_number,
+				 h_ret, counter, param.act_pages);
+			cq = ERR_PTR(-EINVAL);
+			goto create_cq_exit4;
+		}
+
+		if (counter == (param.act_pages - 1)) {
+			vpage = ipz_qpageit_get_inc(&my_cq->ipz_queue);
+			if ((h_ret != H_SUCCESS) || vpage) {
+				ehca_err(device, "Registration of pages not "
+					 "complete ehca_cq=%p cq_num=%x "
+					 "h_ret=%lx", my_cq, my_cq->cq_number,
+					 h_ret);
+				cq = ERR_PTR(-EAGAIN);
+				goto create_cq_exit4;
+			}
+		} else {
+			if (h_ret != H_PAGE_REGISTERED) {
+				ehca_err(device, "Registration of page failed "
+					 "ehca_cq=%p cq_num=%x h_ret=%lx"
+					 "counter=%i act_pages=%i",
+					 my_cq, my_cq->cq_number,
+					 h_ret, counter, param.act_pages);
+				cq = ERR_PTR(-ENOMEM);
+				goto create_cq_exit4;
+			}
+		}
+	}
+
+	ipz_qeit_reset(&my_cq->ipz_queue);
+
+	gal = my_cq->galpas.kernel;
+	cqx_fec = hipz_galpa_load(gal, CQTEMM_OFFSET(cqx_fec));
+	ehca_dbg(device, "ehca_cq=%p cq_num=%x CQX_FEC=%lx",
+		 my_cq, my_cq->cq_number, cqx_fec);
+
+	my_cq->ib_cq.cqe = my_cq->nr_of_entries =
+		param.act_nr_of_entries - additional_cqe;
+	my_cq->cq_number = (my_cq->ipz_cq_handle.handle) & 0xffff;
+
+	for (i = 0; i < QP_HASHTAB_LEN; i++)
+		INIT_HLIST_HEAD(&my_cq->qp_hashtab[i]);
+
+	if (context) {
+		struct ipz_queue *ipz_queue = &my_cq->ipz_queue;
+		struct ehca_create_cq_resp resp;
+		struct vm_area_struct *vma;
+		memset(&resp, 0, sizeof(resp));
+		resp.cq_number = my_cq->cq_number;
+		resp.token = my_cq->token;
+		resp.ipz_queue.qe_size = ipz_queue->qe_size;
+		resp.ipz_queue.act_nr_of_sg = ipz_queue->act_nr_of_sg;
+		resp.ipz_queue.queue_length = ipz_queue->queue_length;
+		resp.ipz_queue.pagesize = ipz_queue->pagesize;
+		resp.ipz_queue.toggle_state = ipz_queue->toggle_state;
+		ret = ehca_mmap_nopage(((u64)(my_cq->token) << 32) | 0x12000000,
+				       ipz_queue->queue_length,
+				       (void**)&resp.ipz_queue.queue,
+				       &vma);
+		if (ret) {
+			ehca_err(device, "Could not mmap queue pages");
+			cq = ERR_PTR(ret);
+			goto create_cq_exit4;
+		}
+		my_cq->uspace_queue = resp.ipz_queue.queue;
+		resp.galpas = my_cq->galpas;
+		ret = ehca_mmap_register(my_cq->galpas.user.fw_handle,
+					 (void**)&resp.galpas.kernel.fw_handle,
+					 &vma);
+		if (ret) {
+			ehca_err(device, "Could not mmap fw_handle");
+			cq = ERR_PTR(ret);
+			goto create_cq_exit5;
+		}
+		my_cq->uspace_fwh = (u64)resp.galpas.kernel.fw_handle;
+		if (ib_copy_to_udata(udata, &resp, sizeof(resp))) {
+			ehca_err(device, "Copy to udata failed.");
+			goto create_cq_exit6;
+		}
+	}
+
+	return cq;
+
+create_cq_exit6:
+	ehca_munmap(my_cq->uspace_fwh, EHCA_PAGESIZE);
+
+create_cq_exit5:
+	ehca_munmap(my_cq->uspace_queue, my_cq->ipz_queue.queue_length);
+
+create_cq_exit4:
+	ipz_queue_dtor(&my_cq->ipz_queue);
+
+create_cq_exit3:
+	h_ret = hipz_h_destroy_cq(adapter_handle, my_cq, 1);
+	if (h_ret != H_SUCCESS)
+		ehca_err(device, "hipz_h_destroy_cq() failed ehca_cq=%p "
+			 "cq_num=%x h_ret=%lx", my_cq, my_cq->cq_number, h_ret);
+
+create_cq_exit2:
+	spin_lock_irqsave(&ehca_cq_idr_lock, flags);
+	idr_remove(&ehca_cq_idr, my_cq->token);
+	spin_unlock_irqrestore(&ehca_cq_idr_lock, flags);
+
+create_cq_exit1:
+	kmem_cache_free(cq_cache, my_cq);
+
+	return cq;
+}
+
+int ehca_destroy_cq(struct ib_cq *cq)
+{
+	u64 h_ret;
+	int ret;
+	struct ehca_cq *my_cq = container_of(cq, struct ehca_cq, ib_cq);
+	int cq_num = my_cq->cq_number;
+	struct ib_device *device = cq->device;
+	struct ehca_shca *shca = container_of(device, struct ehca_shca,
+					      ib_device);
+	struct ipz_adapter_handle adapter_handle = shca->ipz_hca_handle;
+	u32 cur_pid = current->tgid;
+	unsigned long flags;
+
+	spin_lock_irqsave(&ehca_cq_idr_lock, flags);
+	while (my_cq->nr_callbacks)
+		yield();
+
+	idr_remove(&ehca_cq_idr, my_cq->token);
+	spin_unlock_irqrestore(&ehca_cq_idr_lock, flags);
+
+	if (my_cq->uspace_queue && my_cq->ownpid != cur_pid) {
+		ehca_err(device, "Invalid caller pid=%x ownpid=%x",
+			 cur_pid, my_cq->ownpid);
+		return -EINVAL;
+	}
+
+	/* un-mmap if vma alloc */
+	if (my_cq->uspace_queue ) {
+		ret = ehca_munmap(my_cq->uspace_queue,
+				  my_cq->ipz_queue.queue_length);
+		if (ret)
+			ehca_err(device, "Could not munmap queue ehca_cq=%p "
+				 "cq_num=%x", my_cq, cq_num);
+		ret = ehca_munmap(my_cq->uspace_fwh, EHCA_PAGESIZE);
+		if (ret)
+			ehca_err(device, "Could not munmap fwh ehca_cq=%p "
+				 "cq_num=%x", my_cq, cq_num);
+	}
+
+	h_ret = hipz_h_destroy_cq(adapter_handle, my_cq, 0);
+	if (h_ret == H_R_STATE) {
+		/* cq in err: read err data and destroy it forcibly */
+		ehca_dbg(device, "ehca_cq=%p cq_num=%x ressource=%lx in err "
+			 "state. Try to delete it forcibly.",
+			 my_cq, cq_num, my_cq->ipz_cq_handle.handle);
+		ehca_error_data(shca, my_cq, my_cq->ipz_cq_handle.handle);
+		h_ret = hipz_h_destroy_cq(adapter_handle, my_cq, 1);
+		if (h_ret == H_SUCCESS)
+			ehca_dbg(device, "cq_num=%x deleted successfully.",
+				 cq_num);
+	}
+	if (h_ret != H_SUCCESS) {
+		ehca_err(device, "hipz_h_destroy_cq() failed h_ret=%lx "
+			 "ehca_cq=%p cq_num=%x", h_ret, my_cq, cq_num);
+		return ehca2ib_return_code(h_ret);
+	}
+	ipz_queue_dtor(&my_cq->ipz_queue);
+	kmem_cache_free(cq_cache, my_cq);
+
+	return 0;
+}
+
+int ehca_resize_cq(struct ib_cq *cq, int cqe, struct ib_udata *udata)
+{
+	struct ehca_cq *my_cq = container_of(cq, struct ehca_cq, ib_cq);
+	u32 cur_pid = current->tgid;
+
+	if (my_cq->uspace_queue && my_cq->ownpid != cur_pid) {
+		ehca_err(cq->device, "Invalid caller pid=%x ownpid=%x",
+			 cur_pid, my_cq->ownpid);
+		return -EINVAL;
+	}
+
+	/* TODO: proper resize needs to be done */
+	ehca_err(cq->device, "not implemented yet");
+
+	return -EFAULT;
+}
+
+int ehca_init_cq_cache(void)
+{
+	cq_cache = kmem_cache_create("ehca_cache_cq",
+				     sizeof(struct ehca_cq), 0,
+				     SLAB_HWCACHE_ALIGN,
+				     NULL, NULL);
+	if (!cq_cache)
+		return -ENOMEM;
+	return 0;
+}
+
+void ehca_cleanup_cq_cache(void)
+{
+	if (cq_cache)
+		kmem_cache_destroy(cq_cache);
+}
diff --git a/drivers/infiniband/hw/ehca/ehca_eq.c b/drivers/infiniband/hw/ehca/ehca_eq.c
new file mode 100644
index 00000000000..5281dec66f1
--- /dev/null
+++ b/drivers/infiniband/hw/ehca/ehca_eq.c
@@ -0,0 +1,185 @@
+/*
+ *  IBM eServer eHCA Infiniband device driver for Linux on POWER
+ *
+ *  Event queue handling
+ *
+ *  Authors: Waleri Fomin <fomin@de.ibm.com>
+ *           Khadija Souissi <souissi@de.ibm.com>
+ *           Reinhard Ernst <rernst@de.ibm.com>
+ *           Heiko J Schick <schickhj@de.ibm.com>
+ *           Hoang-Nam Nguyen <hnguyen@de.ibm.com>
+ *
+ *
+ *  Copyright (c) 2005 IBM Corporation
+ *
+ *  All rights reserved.
+ *
+ *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
+ *  BSD.
+ *
+ * OpenIB BSD License
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials
+ * provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
+ * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "ehca_classes.h"
+#include "ehca_irq.h"
+#include "ehca_iverbs.h"
+#include "ehca_qes.h"
+#include "hcp_if.h"
+#include "ipz_pt_fn.h"
+
+int ehca_create_eq(struct ehca_shca *shca,
+		   struct ehca_eq *eq,
+		   const enum ehca_eq_type type, const u32 length)
+{
+	u64 ret;
+	u32 nr_pages;
+	u32 i;
+	void *vpage;
+	struct ib_device *ib_dev = &shca->ib_device;
+
+	spin_lock_init(&eq->spinlock);
+	eq->is_initialized = 0;
+
+	if (type != EHCA_EQ && type != EHCA_NEQ) {
+		ehca_err(ib_dev, "Invalid EQ type %x. eq=%p", type, eq);
+		return -EINVAL;
+	}
+	if (!length) {
+		ehca_err(ib_dev, "EQ length must not be zero. eq=%p", eq);
+		return -EINVAL;
+	}
+
+	ret = hipz_h_alloc_resource_eq(shca->ipz_hca_handle,
+				       &eq->pf,
+				       type,
+				       length,
+				       &eq->ipz_eq_handle,
+				       &eq->length,
+				       &nr_pages, &eq->ist);
+
+	if (ret != H_SUCCESS) {
+		ehca_err(ib_dev, "Can't allocate EQ/NEQ. eq=%p", eq);
+		return -EINVAL;
+	}
+
+	ret = ipz_queue_ctor(&eq->ipz_queue, nr_pages,
+			     EHCA_PAGESIZE, sizeof(struct ehca_eqe), 0);
+	if (!ret) {
+		ehca_err(ib_dev, "Can't allocate EQ pages eq=%p", eq);
+		goto create_eq_exit1;
+	}
+
+	for (i = 0; i < nr_pages; i++) {
+		u64 rpage;
+
+		if (!(vpage = ipz_qpageit_get_inc(&eq->ipz_queue))) {
+			ret = H_RESOURCE;
+			goto create_eq_exit2;
+		}
+
+		rpage = virt_to_abs(vpage);
+		ret = hipz_h_register_rpage_eq(shca->ipz_hca_handle,
+					       eq->ipz_eq_handle,
+					       &eq->pf,
+					       0, 0, rpage, 1);
+
+		if (i == (nr_pages - 1)) {
+			/* last page */
+			vpage = ipz_qpageit_get_inc(&eq->ipz_queue);
+			if (ret != H_SUCCESS || vpage)
+				goto create_eq_exit2;
+		} else {
+			if (ret != H_PAGE_REGISTERED || !vpage)
+				goto create_eq_exit2;
+		}
+	}
+
+	ipz_qeit_reset(&eq->ipz_queue);
+
+	/* register interrupt handlers and initialize work queues */
+	if (type == EHCA_EQ) {
+		ret = ibmebus_request_irq(NULL, eq->ist, ehca_interrupt_eq,
+					  SA_INTERRUPT, "ehca_eq",
+					  (void *)shca);
+		if (ret < 0)
+			ehca_err(ib_dev, "Can't map interrupt handler.");
+
+		tasklet_init(&eq->interrupt_task, ehca_tasklet_eq, (long)shca);
+	} else if (type == EHCA_NEQ) {
+		ret = ibmebus_request_irq(NULL, eq->ist, ehca_interrupt_neq,
+					  SA_INTERRUPT, "ehca_neq",
+					  (void *)shca);
+		if (ret < 0)
+			ehca_err(ib_dev, "Can't map interrupt handler.");
+
+		tasklet_init(&eq->interrupt_task, ehca_tasklet_neq, (long)shca);
+	}
+
+	eq->is_initialized = 1;
+
+	return 0;
+
+create_eq_exit2:
+	ipz_queue_dtor(&eq->ipz_queue);
+
+create_eq_exit1:
+	hipz_h_destroy_eq(shca->ipz_hca_handle, eq);
+
+	return -EINVAL;
+}
+
+void *ehca_poll_eq(struct ehca_shca *shca, struct ehca_eq *eq)
+{
+	unsigned long flags;
+	void *eqe;
+
+	spin_lock_irqsave(&eq->spinlock, flags);
+	eqe = ipz_eqit_eq_get_inc_valid(&eq->ipz_queue);
+	spin_unlock_irqrestore(&eq->spinlock, flags);
+
+	return eqe;
+}
+
+int ehca_destroy_eq(struct ehca_shca *shca, struct ehca_eq *eq)
+{
+	unsigned long flags;
+	u64 h_ret;
+
+	spin_lock_irqsave(&eq->spinlock, flags);
+	ibmebus_free_irq(NULL, eq->ist, (void *)shca);
+
+	h_ret = hipz_h_destroy_eq(shca->ipz_hca_handle, eq);
+
+	spin_unlock_irqrestore(&eq->spinlock, flags);
+
+	if (h_ret != H_SUCCESS) {
+		ehca_err(&shca->ib_device, "Can't free EQ resources.");
+		return -EINVAL;
+	}
+	ipz_queue_dtor(&eq->ipz_queue);
+
+	return 0;
+}
diff --git a/drivers/infiniband/hw/ehca/ehca_hca.c b/drivers/infiniband/hw/ehca/ehca_hca.c
new file mode 100644
index 00000000000..5eae6ac4842
--- /dev/null
+++ b/drivers/infiniband/hw/ehca/ehca_hca.c
@@ -0,0 +1,241 @@
+/*
+ *  IBM eServer eHCA Infiniband device driver for Linux on POWER
+ *
+ *  HCA query functions
+ *
+ *  Authors: Heiko J Schick <schickhj@de.ibm.com>
+ *           Christoph Raisch <raisch@de.ibm.com>
+ *
+ *  Copyright (c) 2005 IBM Corporation
+ *
+ *  All rights reserved.
+ *
+ *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
+ *  BSD.
+ *
+ * OpenIB BSD License
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials
+ * provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
+ * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "ehca_tools.h"
+#include "hcp_if.h"
+
+int ehca_query_device(struct ib_device *ibdev, struct ib_device_attr *props)
+{
+	int ret = 0;
+	struct ehca_shca *shca = container_of(ibdev, struct ehca_shca,
+					      ib_device);
+	struct hipz_query_hca *rblock;
+
+	rblock = kzalloc(H_CB_ALIGNMENT, GFP_KERNEL);
+	if (!rblock) {
+		ehca_err(&shca->ib_device, "Can't allocate rblock memory.");
+		return -ENOMEM;
+	}
+
+	if (hipz_h_query_hca(shca->ipz_hca_handle, rblock) != H_SUCCESS) {
+		ehca_err(&shca->ib_device, "Can't query device properties");
+		ret = -EINVAL;
+		goto query_device1;
+	}
+
+	memset(props, 0, sizeof(struct ib_device_attr));
+	props->fw_ver          = rblock->hw_ver;
+	props->max_mr_size     = rblock->max_mr_size;
+	props->vendor_id       = rblock->vendor_id >> 8;
+	props->vendor_part_id  = rblock->vendor_part_id >> 16;
+	props->hw_ver          = rblock->hw_ver;
+	props->max_qp          = min_t(int, rblock->max_qp, INT_MAX);
+	props->max_qp_wr       = min_t(int, rblock->max_wqes_wq, INT_MAX);
+	props->max_sge         = min_t(int, rblock->max_sge, INT_MAX);
+	props->max_sge_rd      = min_t(int, rblock->max_sge_rd, INT_MAX);
+	props->max_cq          = min_t(int, rblock->max_cq, INT_MAX);
+	props->max_cqe         = min_t(int, rblock->max_cqe, INT_MAX);
+	props->max_mr          = min_t(int, rblock->max_mr, INT_MAX);
+	props->max_mw          = min_t(int, rblock->max_mw, INT_MAX);
+	props->max_pd          = min_t(int, rblock->max_pd, INT_MAX);
+	props->max_ah          = min_t(int, rblock->max_ah, INT_MAX);
+	props->max_fmr         = min_t(int, rblock->max_mr, INT_MAX);
+	props->max_srq         = 0;
+	props->max_srq_wr      = 0;
+	props->max_srq_sge     = 0;
+	props->max_pkeys       = 16;
+	props->local_ca_ack_delay
+		= rblock->local_ca_ack_delay;
+	props->max_raw_ipv6_qp
+		= min_t(int, rblock->max_raw_ipv6_qp, INT_MAX);
+	props->max_raw_ethy_qp
+		= min_t(int, rblock->max_raw_ethy_qp, INT_MAX);
+	props->max_mcast_grp
+		= min_t(int, rblock->max_mcast_grp, INT_MAX);
+	props->max_mcast_qp_attach
+		= min_t(int, rblock->max_mcast_qp_attach, INT_MAX);
+	props->max_total_mcast_qp_attach
+		= min_t(int, rblock->max_total_mcast_qp_attach, INT_MAX);
+
+query_device1:
+	kfree(rblock);
+
+	return ret;
+}
+
+int ehca_query_port(struct ib_device *ibdev,
+		    u8 port, struct ib_port_attr *props)
+{
+	int ret = 0;
+	struct ehca_shca *shca = container_of(ibdev, struct ehca_shca,
+					      ib_device);
+	struct hipz_query_port *rblock;
+
+	rblock = kzalloc(H_CB_ALIGNMENT, GFP_KERNEL);
+	if (!rblock) {
+		ehca_err(&shca->ib_device, "Can't allocate rblock memory.");
+		return -ENOMEM;
+	}
+
+	if (hipz_h_query_port(shca->ipz_hca_handle, port, rblock) != H_SUCCESS) {
+		ehca_err(&shca->ib_device, "Can't query port properties");
+		ret = -EINVAL;
+		goto query_port1;
+	}
+
+	memset(props, 0, sizeof(struct ib_port_attr));
+	props->state = rblock->state;
+
+	switch (rblock->max_mtu) {
+	case 0x1:
+		props->active_mtu = props->max_mtu = IB_MTU_256;
+		break;
+	case 0x2:
+		props->active_mtu = props->max_mtu = IB_MTU_512;
+		break;
+	case 0x3:
+		props->active_mtu = props->max_mtu = IB_MTU_1024;
+		break;
+	case 0x4:
+		props->active_mtu = props->max_mtu = IB_MTU_2048;
+		break;
+	case 0x5:
+		props->active_mtu = props->max_mtu = IB_MTU_4096;
+		break;
+	default:
+		ehca_err(&shca->ib_device, "Unknown MTU size: %x.",
+			 rblock->max_mtu);
+		break;
+	}
+
+	props->gid_tbl_len     = rblock->gid_tbl_len;
+	props->max_msg_sz      = rblock->max_msg_sz;
+	props->bad_pkey_cntr   = rblock->bad_pkey_cntr;
+	props->qkey_viol_cntr  = rblock->qkey_viol_cntr;
+	props->pkey_tbl_len    = rblock->pkey_tbl_len;
+	props->lid             = rblock->lid;
+	props->sm_lid          = rblock->sm_lid;
+	props->lmc             = rblock->lmc;
+	props->sm_sl           = rblock->sm_sl;
+	props->subnet_timeout  = rblock->subnet_timeout;
+	props->init_type_reply = rblock->init_type_reply;
+
+	props->active_width    = IB_WIDTH_12X;
+	props->active_speed    = 0x1;
+
+query_port1:
+	kfree(rblock);
+
+	return ret;
+}
+
+int ehca_query_pkey(struct ib_device *ibdev, u8 port, u16 index, u16 *pkey)
+{
+	int ret = 0;
+	struct ehca_shca *shca = container_of(ibdev, struct ehca_shca, ib_device);
+	struct hipz_query_port *rblock;
+
+	if (index > 16) {
+		ehca_err(&shca->ib_device, "Invalid index: %x.", index);
+		return -EINVAL;
+	}
+
+	rblock = kzalloc(H_CB_ALIGNMENT, GFP_KERNEL);
+	if (!rblock) {
+		ehca_err(&shca->ib_device,  "Can't allocate rblock memory.");
+		return -ENOMEM;
+	}
+
+	if (hipz_h_query_port(shca->ipz_hca_handle, port, rblock) != H_SUCCESS) {
+		ehca_err(&shca->ib_device, "Can't query port properties");
+		ret = -EINVAL;
+		goto query_pkey1;
+	}
+
+	memcpy(pkey, &rblock->pkey_entries + index, sizeof(u16));
+
+query_pkey1:
+	kfree(rblock);
+
+	return ret;
+}
+
+int ehca_query_gid(struct ib_device *ibdev, u8 port,
+		   int index, union ib_gid *gid)
+{
+	int ret = 0;
+	struct ehca_shca *shca = container_of(ibdev, struct ehca_shca,
+					      ib_device);
+	struct hipz_query_port *rblock;
+
+	if (index > 255) {
+		ehca_err(&shca->ib_device, "Invalid index: %x.", index);
+		return -EINVAL;
+	}
+
+	rblock = kzalloc(H_CB_ALIGNMENT, GFP_KERNEL);
+	if (!rblock) {
+		ehca_err(&shca->ib_device, "Can't allocate rblock memory.");
+		return -ENOMEM;
+	}
+
+	if (hipz_h_query_port(shca->ipz_hca_handle, port, rblock) != H_SUCCESS) {
+		ehca_err(&shca->ib_device, "Can't query port properties");
+		ret = -EINVAL;
+		goto query_gid1;
+	}
+
+	memcpy(&gid->raw[0], &rblock->gid_prefix, sizeof(u64));
+	memcpy(&gid->raw[8], &rblock->guid_entries[index], sizeof(u64));
+
+query_gid1:
+	kfree(rblock);
+
+	return ret;
+}
+
+int ehca_modify_port(struct ib_device *ibdev,
+		     u8 port, int port_modify_mask,
+		     struct ib_port_modify *props)
+{
+	/* Not implemented yet */
+	return -EFAULT;
+}
diff --git a/drivers/infiniband/hw/ehca/ehca_irq.c b/drivers/infiniband/hw/ehca/ehca_irq.c
new file mode 100644
index 00000000000..2a65b5be197
--- /dev/null
+++ b/drivers/infiniband/hw/ehca/ehca_irq.c
@@ -0,0 +1,762 @@
+/*
+ *  IBM eServer eHCA Infiniband device driver for Linux on POWER
+ *
+ *  Functions for EQs, NEQs and interrupts
+ *
+ *  Authors: Heiko J Schick <schickhj@de.ibm.com>
+ *           Khadija Souissi <souissi@de.ibm.com>
+ *
+ *  Copyright (c) 2005 IBM Corporation
+ *
+ *  All rights reserved.
+ *
+ *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
+ *  BSD.
+ *
+ * OpenIB BSD License
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials
+ * provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
+ * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "ehca_classes.h"
+#include "ehca_irq.h"
+#include "ehca_iverbs.h"
+#include "ehca_tools.h"
+#include "hcp_if.h"
+#include "hipz_fns.h"
+
+#define EQE_COMPLETION_EVENT   EHCA_BMASK_IBM(1,1)
+#define EQE_CQ_QP_NUMBER       EHCA_BMASK_IBM(8,31)
+#define EQE_EE_IDENTIFIER      EHCA_BMASK_IBM(2,7)
+#define EQE_CQ_NUMBER          EHCA_BMASK_IBM(8,31)
+#define EQE_QP_NUMBER          EHCA_BMASK_IBM(8,31)
+#define EQE_QP_TOKEN           EHCA_BMASK_IBM(32,63)
+#define EQE_CQ_TOKEN           EHCA_BMASK_IBM(32,63)
+
+#define NEQE_COMPLETION_EVENT  EHCA_BMASK_IBM(1,1)
+#define NEQE_EVENT_CODE        EHCA_BMASK_IBM(2,7)
+#define NEQE_PORT_NUMBER       EHCA_BMASK_IBM(8,15)
+#define NEQE_PORT_AVAILABILITY EHCA_BMASK_IBM(16,16)
+
+#define ERROR_DATA_LENGTH      EHCA_BMASK_IBM(52,63)
+#define ERROR_DATA_TYPE        EHCA_BMASK_IBM(0,7)
+
+#ifdef CONFIG_INFINIBAND_EHCA_SCALING
+
+static void queue_comp_task(struct ehca_cq *__cq);
+
+static struct ehca_comp_pool* pool;
+static struct notifier_block comp_pool_callback_nb;
+
+#endif
+
+static inline void comp_event_callback(struct ehca_cq *cq)
+{
+	if (!cq->ib_cq.comp_handler)
+		return;
+
+	spin_lock(&cq->cb_lock);
+	cq->ib_cq.comp_handler(&cq->ib_cq, cq->ib_cq.cq_context);
+	spin_unlock(&cq->cb_lock);
+
+	return;
+}
+
+static void print_error_data(struct ehca_shca * shca, void* data,
+			     u64* rblock, int length)
+{
+	u64 type = EHCA_BMASK_GET(ERROR_DATA_TYPE, rblock[2]);
+	u64 resource = rblock[1];
+
+	switch (type) {
+	case 0x1: /* Queue Pair */
+	{
+		struct ehca_qp *qp = (struct ehca_qp*)data;
+
+		/* only print error data if AER is set */
+		if (rblock[6] == 0)
+			return;
+
+		ehca_err(&shca->ib_device,
+			 "QP 0x%x (resource=%lx) has errors.",
+			 qp->ib_qp.qp_num, resource);
+		break;
+	}
+	case 0x4: /* Completion Queue */
+	{
+		struct ehca_cq *cq = (struct ehca_cq*)data;
+
+		ehca_err(&shca->ib_device,
+			 "CQ 0x%x (resource=%lx) has errors.",
+			 cq->cq_number, resource);
+		break;
+	}
+	default:
+		ehca_err(&shca->ib_device,
+			 "Unknown errror type: %lx on %s.",
+			 type, shca->ib_device.name);
+		break;
+	}
+
+	ehca_err(&shca->ib_device, "Error data is available: %lx.", resource);
+	ehca_err(&shca->ib_device, "EHCA ----- error data begin "
+		 "---------------------------------------------------");
+	ehca_dmp(rblock, length, "resource=%lx", resource);
+	ehca_err(&shca->ib_device, "EHCA ----- error data end "
+		 "----------------------------------------------------");
+
+	return;
+}
+
+int ehca_error_data(struct ehca_shca *shca, void *data,
+		    u64 resource)
+{
+
+	unsigned long ret;
+	u64 *rblock;
+	unsigned long block_count;
+
+	rblock = kzalloc(H_CB_ALIGNMENT, GFP_KERNEL);
+	if (!rblock) {
+		ehca_err(&shca->ib_device, "Cannot allocate rblock memory.");
+		ret = -ENOMEM;
+		goto error_data1;
+	}
+
+	ret = hipz_h_error_data(shca->ipz_hca_handle,
+				resource,
+				rblock,
+				&block_count);
+
+	if (ret == H_R_STATE) {
+		ehca_err(&shca->ib_device,
+			 "No error data is available: %lx.", resource);
+	}
+	else if (ret == H_SUCCESS) {
+		int length;
+
+		length = EHCA_BMASK_GET(ERROR_DATA_LENGTH, rblock[0]);
+
+		if (length > PAGE_SIZE)
+			length = PAGE_SIZE;
+
+		print_error_data(shca, data, rblock, length);
+	}
+	else {
+		ehca_err(&shca->ib_device,
+			 "Error data could not be fetched: %lx", resource);
+	}
+
+	kfree(rblock);
+
+error_data1:
+	return ret;
+
+}
+
+static void qp_event_callback(struct ehca_shca *shca,
+			      u64 eqe,
+			      enum ib_event_type event_type)
+{
+	struct ib_event event;
+	struct ehca_qp *qp;
+	unsigned long flags;
+	u32 token = EHCA_BMASK_GET(EQE_QP_TOKEN, eqe);
+
+	spin_lock_irqsave(&ehca_qp_idr_lock, flags);
+	qp = idr_find(&ehca_qp_idr, token);
+	spin_unlock_irqrestore(&ehca_qp_idr_lock, flags);
+
+
+	if (!qp)
+		return;
+
+	ehca_error_data(shca, qp, qp->ipz_qp_handle.handle);
+
+	if (!qp->ib_qp.event_handler)
+		return;
+
+	event.device     = &shca->ib_device;
+	event.event      = event_type;
+	event.element.qp = &qp->ib_qp;
+
+	qp->ib_qp.event_handler(&event, qp->ib_qp.qp_context);
+
+	return;
+}
+
+static void cq_event_callback(struct ehca_shca *shca,
+					  u64 eqe)
+{
+	struct ehca_cq *cq;
+	unsigned long flags;
+	u32 token = EHCA_BMASK_GET(EQE_CQ_TOKEN, eqe);
+
+	spin_lock_irqsave(&ehca_cq_idr_lock, flags);
+	cq = idr_find(&ehca_cq_idr, token);
+	spin_unlock_irqrestore(&ehca_cq_idr_lock, flags);
+
+	if (!cq)
+		return;
+
+	ehca_error_data(shca, cq, cq->ipz_cq_handle.handle);
+
+	return;
+}
+
+static void parse_identifier(struct ehca_shca *shca, u64 eqe)
+{
+	u8 identifier = EHCA_BMASK_GET(EQE_EE_IDENTIFIER, eqe);
+
+	switch (identifier) {
+	case 0x02: /* path migrated */
+		qp_event_callback(shca, eqe, IB_EVENT_PATH_MIG);
+		break;
+	case 0x03: /* communication established */
+		qp_event_callback(shca, eqe, IB_EVENT_COMM_EST);
+		break;
+	case 0x04: /* send queue drained */
+		qp_event_callback(shca, eqe, IB_EVENT_SQ_DRAINED);
+		break;
+	case 0x05: /* QP error */
+	case 0x06: /* QP error */
+		qp_event_callback(shca, eqe, IB_EVENT_QP_FATAL);
+		break;
+	case 0x07: /* CQ error */
+	case 0x08: /* CQ error */
+		cq_event_callback(shca, eqe);
+		break;
+	case 0x09: /* MRMWPTE error */
+		ehca_err(&shca->ib_device, "MRMWPTE error.");
+		break;
+	case 0x0A: /* port event */
+		ehca_err(&shca->ib_device, "Port event.");
+		break;
+	case 0x0B: /* MR access error */
+		ehca_err(&shca->ib_device, "MR access error.");
+		break;
+	case 0x0C: /* EQ error */
+		ehca_err(&shca->ib_device, "EQ error.");
+		break;
+	case 0x0D: /* P/Q_Key mismatch */
+		ehca_err(&shca->ib_device, "P/Q_Key mismatch.");
+		break;
+	case 0x10: /* sampling complete */
+		ehca_err(&shca->ib_device, "Sampling complete.");
+		break;
+	case 0x11: /* unaffiliated access error */
+		ehca_err(&shca->ib_device, "Unaffiliated access error.");
+		break;
+	case 0x12: /* path migrating error */
+		ehca_err(&shca->ib_device, "Path migration error.");
+		break;
+	case 0x13: /* interface trace stopped */
+		ehca_err(&shca->ib_device, "Interface trace stopped.");
+		break;
+	case 0x14: /* first error capture info available */
+	default:
+		ehca_err(&shca->ib_device, "Unknown identifier: %x on %s.",
+			 identifier, shca->ib_device.name);
+		break;
+	}
+
+	return;
+}
+
+static void parse_ec(struct ehca_shca *shca, u64 eqe)
+{
+	struct ib_event event;
+	u8 ec   = EHCA_BMASK_GET(NEQE_EVENT_CODE, eqe);
+	u8 port = EHCA_BMASK_GET(NEQE_PORT_NUMBER, eqe);
+
+	switch (ec) {
+	case 0x30: /* port availability change */
+		if (EHCA_BMASK_GET(NEQE_PORT_AVAILABILITY, eqe)) {
+			ehca_info(&shca->ib_device,
+				  "port %x is active.", port);
+			event.device = &shca->ib_device;
+			event.event = IB_EVENT_PORT_ACTIVE;
+			event.element.port_num = port;
+			shca->sport[port - 1].port_state = IB_PORT_ACTIVE;
+			ib_dispatch_event(&event);
+		} else {
+			ehca_info(&shca->ib_device,
+				  "port %x is inactive.", port);
+			event.device = &shca->ib_device;
+			event.event = IB_EVENT_PORT_ERR;
+			event.element.port_num = port;
+			shca->sport[port - 1].port_state = IB_PORT_DOWN;
+			ib_dispatch_event(&event);
+		}
+		break;
+	case 0x31:
+		/* port configuration change
+		 * disruptive change is caused by
+		 * LID, PKEY or SM change
+		 */
+		ehca_warn(&shca->ib_device,
+			  "disruptive port %x configuration change", port);
+
+		ehca_info(&shca->ib_device,
+			 "port %x is inactive.", port);
+		event.device = &shca->ib_device;
+		event.event = IB_EVENT_PORT_ERR;
+		event.element.port_num = port;
+		shca->sport[port - 1].port_state = IB_PORT_DOWN;
+		ib_dispatch_event(&event);
+
+		ehca_info(&shca->ib_device,
+			 "port %x is active.", port);
+		event.device = &shca->ib_device;
+		event.event = IB_EVENT_PORT_ACTIVE;
+		event.element.port_num = port;
+		shca->sport[port - 1].port_state = IB_PORT_ACTIVE;
+		ib_dispatch_event(&event);
+		break;
+	case 0x32: /* adapter malfunction */
+		ehca_err(&shca->ib_device, "Adapter malfunction.");
+		break;
+	case 0x33:  /* trace stopped */
+		ehca_err(&shca->ib_device, "Traced stopped.");
+		break;
+	default:
+		ehca_err(&shca->ib_device, "Unknown event code: %x on %s.",
+			 ec, shca->ib_device.name);
+		break;
+	}
+
+	return;
+}
+
+static inline void reset_eq_pending(struct ehca_cq *cq)
+{
+	u64 CQx_EP;
+	struct h_galpa gal = cq->galpas.kernel;
+
+	hipz_galpa_store_cq(gal, cqx_ep, 0x0);
+	CQx_EP = hipz_galpa_load(gal, CQTEMM_OFFSET(cqx_ep));
+
+	return;
+}
+
+irqreturn_t ehca_interrupt_neq(int irq, void *dev_id, struct pt_regs *regs)
+{
+	struct ehca_shca *shca = (struct ehca_shca*)dev_id;
+
+	tasklet_hi_schedule(&shca->neq.interrupt_task);
+
+	return IRQ_HANDLED;
+}
+
+void ehca_tasklet_neq(unsigned long data)
+{
+	struct ehca_shca *shca = (struct ehca_shca*)data;
+	struct ehca_eqe *eqe;
+	u64 ret;
+
+	eqe = (struct ehca_eqe *)ehca_poll_eq(shca, &shca->neq);
+
+	while (eqe) {
+		if (!EHCA_BMASK_GET(NEQE_COMPLETION_EVENT, eqe->entry))
+			parse_ec(shca, eqe->entry);
+
+		eqe = (struct ehca_eqe *)ehca_poll_eq(shca, &shca->neq);
+	}
+
+	ret = hipz_h_reset_event(shca->ipz_hca_handle,
+				 shca->neq.ipz_eq_handle, 0xFFFFFFFFFFFFFFFFL);
+
+	if (ret != H_SUCCESS)
+		ehca_err(&shca->ib_device, "Can't clear notification events.");
+
+	return;
+}
+
+irqreturn_t ehca_interrupt_eq(int irq, void *dev_id, struct pt_regs *regs)
+{
+	struct ehca_shca *shca = (struct ehca_shca*)dev_id;
+
+	tasklet_hi_schedule(&shca->eq.interrupt_task);
+
+	return IRQ_HANDLED;
+}
+
+void ehca_tasklet_eq(unsigned long data)
+{
+	struct ehca_shca *shca = (struct ehca_shca*)data;
+	struct ehca_eqe *eqe;
+	int int_state;
+	int query_cnt = 0;
+
+	do {
+		eqe = (struct ehca_eqe *)ehca_poll_eq(shca, &shca->eq);
+
+		if ((shca->hw_level >= 2) && eqe)
+			int_state = 1;
+		else
+			int_state = 0;
+
+		while ((int_state == 1) || eqe) {
+			while (eqe) {
+				u64 eqe_value = eqe->entry;
+
+				ehca_dbg(&shca->ib_device,
+					 "eqe_value=%lx", eqe_value);
+
+				/* TODO: better structure */
+				if (EHCA_BMASK_GET(EQE_COMPLETION_EVENT,
+						   eqe_value)) {
+					unsigned long flags;
+					u32 token;
+					struct ehca_cq *cq;
+
+					ehca_dbg(&shca->ib_device,
+						 "... completion event");
+					token =
+						EHCA_BMASK_GET(EQE_CQ_TOKEN,
+							       eqe_value);
+					spin_lock_irqsave(&ehca_cq_idr_lock,
+							  flags);
+					cq = idr_find(&ehca_cq_idr, token);
+
+					if (cq == NULL) {
+						spin_unlock(&ehca_cq_idr_lock);
+						break;
+					}
+
+					reset_eq_pending(cq);
+#ifdef CONFIG_INFINIBAND_EHCA_SCALING
+					queue_comp_task(cq);
+					spin_unlock_irqrestore(&ehca_cq_idr_lock,
+							       flags);
+#else
+					spin_unlock_irqrestore(&ehca_cq_idr_lock,
+							       flags);
+					comp_event_callback(cq);
+#endif
+				} else {
+					ehca_dbg(&shca->ib_device,
+						 "... non completion event");
+					parse_identifier(shca, eqe_value);
+				}
+				eqe =
+					(struct ehca_eqe *)ehca_poll_eq(shca,
+								    &shca->eq);
+			}
+
+			if (shca->hw_level >= 2) {
+				int_state =
+				    hipz_h_query_int_state(shca->ipz_hca_handle,
+							   shca->eq.ist);
+				query_cnt++;
+				iosync();
+				if (query_cnt >= 100) {
+					query_cnt = 0;
+					int_state = 0;
+				}
+			}
+			eqe = (struct ehca_eqe *)ehca_poll_eq(shca, &shca->eq);
+
+		}
+	} while (int_state != 0);
+
+	return;
+}
+
+#ifdef CONFIG_INFINIBAND_EHCA_SCALING
+
+static inline int find_next_online_cpu(struct ehca_comp_pool* pool)
+{
+	unsigned long flags_last_cpu;
+
+	if (ehca_debug_level)
+		ehca_dmp(&cpu_online_map, sizeof(cpumask_t), "");
+
+	spin_lock_irqsave(&pool->last_cpu_lock, flags_last_cpu);
+	pool->last_cpu = next_cpu(pool->last_cpu, cpu_online_map);
+	if (pool->last_cpu == NR_CPUS)
+		pool->last_cpu = first_cpu(cpu_online_map);
+	spin_unlock_irqrestore(&pool->last_cpu_lock, flags_last_cpu);
+
+	return pool->last_cpu;
+}
+
+static void __queue_comp_task(struct ehca_cq *__cq,
+			      struct ehca_cpu_comp_task *cct)
+{
+	unsigned long flags_cct;
+	unsigned long flags_cq;
+
+	spin_lock_irqsave(&cct->task_lock, flags_cct);
+	spin_lock_irqsave(&__cq->task_lock, flags_cq);
+
+	if (__cq->nr_callbacks == 0) {
+		__cq->nr_callbacks++;
+		list_add_tail(&__cq->entry, &cct->cq_list);
+		cct->cq_jobs++;
+		wake_up(&cct->wait_queue);
+	}
+	else
+		__cq->nr_callbacks++;
+
+	spin_unlock_irqrestore(&__cq->task_lock, flags_cq);
+	spin_unlock_irqrestore(&cct->task_lock, flags_cct);
+}
+
+static void queue_comp_task(struct ehca_cq *__cq)
+{
+	int cpu;
+	int cpu_id;
+	struct ehca_cpu_comp_task *cct;
+
+	cpu = get_cpu();
+	cpu_id = find_next_online_cpu(pool);
+
+	BUG_ON(!cpu_online(cpu_id));
+
+	cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu_id);
+
+	if (cct->cq_jobs > 0) {
+		cpu_id = find_next_online_cpu(pool);
+		cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu_id);
+	}
+
+	__queue_comp_task(__cq, cct);
+
+	put_cpu();
+
+	return;
+}
+
+static void run_comp_task(struct ehca_cpu_comp_task* cct)
+{
+	struct ehca_cq *cq;
+	unsigned long flags_cct;
+	unsigned long flags_cq;
+
+	spin_lock_irqsave(&cct->task_lock, flags_cct);
+
+	while (!list_empty(&cct->cq_list)) {
+		cq = list_entry(cct->cq_list.next, struct ehca_cq, entry);
+		spin_unlock_irqrestore(&cct->task_lock, flags_cct);
+		comp_event_callback(cq);
+		spin_lock_irqsave(&cct->task_lock, flags_cct);
+
+		spin_lock_irqsave(&cq->task_lock, flags_cq);
+		cq->nr_callbacks--;
+		if (cq->nr_callbacks == 0) {
+			list_del_init(cct->cq_list.next);
+			cct->cq_jobs--;
+		}
+		spin_unlock_irqrestore(&cq->task_lock, flags_cq);
+
+	}
+
+	spin_unlock_irqrestore(&cct->task_lock, flags_cct);
+
+	return;
+}
+
+static int comp_task(void *__cct)
+{
+	struct ehca_cpu_comp_task* cct = __cct;
+	DECLARE_WAITQUEUE(wait, current);
+
+	set_current_state(TASK_INTERRUPTIBLE);
+	while(!kthread_should_stop()) {
+		add_wait_queue(&cct->wait_queue, &wait);
+
+		if (list_empty(&cct->cq_list))
+			schedule();
+		else
+			__set_current_state(TASK_RUNNING);
+
+		remove_wait_queue(&cct->wait_queue, &wait);
+
+		if (!list_empty(&cct->cq_list))
+			run_comp_task(__cct);
+
+		set_current_state(TASK_INTERRUPTIBLE);
+	}
+	__set_current_state(TASK_RUNNING);
+
+	return 0;
+}
+
+static struct task_struct *create_comp_task(struct ehca_comp_pool *pool,
+					    int cpu)
+{
+	struct ehca_cpu_comp_task *cct;
+
+	cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu);
+	spin_lock_init(&cct->task_lock);
+	INIT_LIST_HEAD(&cct->cq_list);
+	init_waitqueue_head(&cct->wait_queue);
+	cct->task = kthread_create(comp_task, cct, "ehca_comp/%d", cpu);
+
+	return cct->task;
+}
+
+static void destroy_comp_task(struct ehca_comp_pool *pool,
+			      int cpu)
+{
+	struct ehca_cpu_comp_task *cct;
+	struct task_struct *task;
+	unsigned long flags_cct;
+
+	cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu);
+
+	spin_lock_irqsave(&cct->task_lock, flags_cct);
+
+	task = cct->task;
+	cct->task = NULL;
+	cct->cq_jobs = 0;
+
+	spin_unlock_irqrestore(&cct->task_lock, flags_cct);
+
+	if (task)
+		kthread_stop(task);
+
+	return;
+}
+
+static void take_over_work(struct ehca_comp_pool *pool,
+			   int cpu)
+{
+	struct ehca_cpu_comp_task *cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu);
+	LIST_HEAD(list);
+	struct ehca_cq *cq;
+	unsigned long flags_cct;
+
+	spin_lock_irqsave(&cct->task_lock, flags_cct);
+
+	list_splice_init(&cct->cq_list, &list);
+
+	while(!list_empty(&list)) {
+	       cq = list_entry(cct->cq_list.next, struct ehca_cq, entry);
+
+	       list_del(&cq->entry);
+	       __queue_comp_task(cq, per_cpu_ptr(pool->cpu_comp_tasks,
+						 smp_processor_id()));
+	}
+
+	spin_unlock_irqrestore(&cct->task_lock, flags_cct);
+
+}
+
+static int comp_pool_callback(struct notifier_block *nfb,
+			      unsigned long action,
+			      void *hcpu)
+{
+	unsigned int cpu = (unsigned long)hcpu;
+	struct ehca_cpu_comp_task *cct;
+
+	switch (action) {
+	case CPU_UP_PREPARE:
+		ehca_gen_dbg("CPU: %x (CPU_PREPARE)", cpu);
+		if(!create_comp_task(pool, cpu)) {
+			ehca_gen_err("Can't create comp_task for cpu: %x", cpu);
+			return NOTIFY_BAD;
+		}
+		break;
+	case CPU_UP_CANCELED:
+		ehca_gen_dbg("CPU: %x (CPU_CANCELED)", cpu);
+		cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu);
+		kthread_bind(cct->task, any_online_cpu(cpu_online_map));
+		destroy_comp_task(pool, cpu);
+		break;
+	case CPU_ONLINE:
+		ehca_gen_dbg("CPU: %x (CPU_ONLINE)", cpu);
+		cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu);
+		kthread_bind(cct->task, cpu);
+		wake_up_process(cct->task);
+		break;
+	case CPU_DOWN_PREPARE:
+		ehca_gen_dbg("CPU: %x (CPU_DOWN_PREPARE)", cpu);
+		break;
+	case CPU_DOWN_FAILED:
+		ehca_gen_dbg("CPU: %x (CPU_DOWN_FAILED)", cpu);
+		break;
+	case CPU_DEAD:
+		ehca_gen_dbg("CPU: %x (CPU_DEAD)", cpu);
+		destroy_comp_task(pool, cpu);
+		take_over_work(pool, cpu);
+		break;
+	}
+
+	return NOTIFY_OK;
+}
+
+#endif
+
+int ehca_create_comp_pool(void)
+{
+#ifdef CONFIG_INFINIBAND_EHCA_SCALING
+	int cpu;
+	struct task_struct *task;
+
+	pool = kzalloc(sizeof(struct ehca_comp_pool), GFP_KERNEL);
+	if (pool == NULL)
+		return -ENOMEM;
+
+	spin_lock_init(&pool->last_cpu_lock);
+	pool->last_cpu = any_online_cpu(cpu_online_map);
+
+	pool->cpu_comp_tasks = alloc_percpu(struct ehca_cpu_comp_task);
+	if (pool->cpu_comp_tasks == NULL) {
+		kfree(pool);
+		return -EINVAL;
+	}
+
+	for_each_online_cpu(cpu) {
+		task = create_comp_task(pool, cpu);
+		if (task) {
+			kthread_bind(task, cpu);
+			wake_up_process(task);
+		}
+	}
+
+	comp_pool_callback_nb.notifier_call = comp_pool_callback;
+	comp_pool_callback_nb.priority =0;
+	register_cpu_notifier(&comp_pool_callback_nb);
+#endif
+
+	return 0;
+}
+
+void ehca_destroy_comp_pool(void)
+{
+#ifdef CONFIG_INFINIBAND_EHCA_SCALING
+	int i;
+
+	unregister_cpu_notifier(&comp_pool_callback_nb);
+
+	for (i = 0; i < NR_CPUS; i++) {
+		if (cpu_online(i))
+			destroy_comp_task(pool, i);
+	}
+#endif
+
+	return;
+}
diff --git a/drivers/infiniband/hw/ehca/ehca_irq.h b/drivers/infiniband/hw/ehca/ehca_irq.h
new file mode 100644
index 00000000000..85bf1fe16fe
--- /dev/null
+++ b/drivers/infiniband/hw/ehca/ehca_irq.h
@@ -0,0 +1,77 @@
+/*
+ *  IBM eServer eHCA Infiniband device driver for Linux on POWER
+ *
+ *  Function definitions and structs for EQs, NEQs and interrupts
+ *
+ *  Authors: Heiko J Schick <schickhj@de.ibm.com>
+ *           Khadija Souissi <souissi@de.ibm.com>
+ *
+ *  Copyright (c) 2005 IBM Corporation
+ *
+ *  All rights reserved.
+ *
+ *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
+ *  BSD.
+ *
+ * OpenIB BSD License
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials
+ * provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
+ * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __EHCA_IRQ_H
+#define __EHCA_IRQ_H
+
+
+struct ehca_shca;
+
+#include <linux/interrupt.h>
+#include <linux/types.h>
+#include <asm/atomic.h>
+
+int ehca_error_data(struct ehca_shca *shca, void *data, u64 resource);
+
+irqreturn_t ehca_interrupt_neq(int irq, void *dev_id, struct pt_regs *regs);
+void ehca_tasklet_neq(unsigned long data);
+
+irqreturn_t ehca_interrupt_eq(int irq, void *dev_id, struct pt_regs *regs);
+void ehca_tasklet_eq(unsigned long data);
+
+struct ehca_cpu_comp_task {
+	wait_queue_head_t wait_queue;
+	struct list_head cq_list;
+	struct task_struct *task;
+	spinlock_t task_lock;
+	int cq_jobs;
+};
+
+struct ehca_comp_pool {
+	struct ehca_cpu_comp_task *cpu_comp_tasks;
+	int last_cpu;
+	spinlock_t last_cpu_lock;
+};
+
+int ehca_create_comp_pool(void);
+void ehca_destroy_comp_pool(void);
+
+#endif
diff --git a/drivers/infiniband/hw/ehca/ehca_iverbs.h b/drivers/infiniband/hw/ehca/ehca_iverbs.h
new file mode 100644
index 00000000000..319c39d47f3
--- /dev/null
+++ b/drivers/infiniband/hw/ehca/ehca_iverbs.h
@@ -0,0 +1,182 @@
+/*
+ *  IBM eServer eHCA Infiniband device driver for Linux on POWER
+ *
+ *  Function definitions for internal functions
+ *
+ *  Authors: Heiko J Schick <schickhj@de.ibm.com>
+ *           Dietmar Decker <ddecker@de.ibm.com>
+ *
+ *  Copyright (c) 2005 IBM Corporation
+ *
+ *  All rights reserved.
+ *
+ *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
+ *  BSD.
+ *
+ * OpenIB BSD License
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials
+ * provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
+ * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __EHCA_IVERBS_H__
+#define __EHCA_IVERBS_H__
+
+#include "ehca_classes.h"
+
+int ehca_query_device(struct ib_device *ibdev, struct ib_device_attr *props);
+
+int ehca_query_port(struct ib_device *ibdev, u8 port,
+		    struct ib_port_attr *props);
+
+int ehca_query_pkey(struct ib_device *ibdev, u8 port, u16 index, u16 * pkey);
+
+int ehca_query_gid(struct ib_device *ibdev, u8 port, int index,
+		   union ib_gid *gid);
+
+int ehca_modify_port(struct ib_device *ibdev, u8 port, int port_modify_mask,
+		     struct ib_port_modify *props);
+
+struct ib_pd *ehca_alloc_pd(struct ib_device *device,
+			    struct ib_ucontext *context,
+			    struct ib_udata *udata);
+
+int ehca_dealloc_pd(struct ib_pd *pd);
+
+struct ib_ah *ehca_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr);
+
+int ehca_modify_ah(struct ib_ah *ah, struct ib_ah_attr *ah_attr);
+
+int ehca_query_ah(struct ib_ah *ah, struct ib_ah_attr *ah_attr);
+
+int ehca_destroy_ah(struct ib_ah *ah);
+
+struct ib_mr *ehca_get_dma_mr(struct ib_pd *pd, int mr_access_flags);
+
+struct ib_mr *ehca_reg_phys_mr(struct ib_pd *pd,
+			       struct ib_phys_buf *phys_buf_array,
+			       int num_phys_buf,
+			       int mr_access_flags, u64 *iova_start);
+
+struct ib_mr *ehca_reg_user_mr(struct ib_pd *pd,
+			       struct ib_umem *region,
+			       int mr_access_flags, struct ib_udata *udata);
+
+int ehca_rereg_phys_mr(struct ib_mr *mr,
+		       int mr_rereg_mask,
+		       struct ib_pd *pd,
+		       struct ib_phys_buf *phys_buf_array,
+		       int num_phys_buf, int mr_access_flags, u64 *iova_start);
+
+int ehca_query_mr(struct ib_mr *mr, struct ib_mr_attr *mr_attr);
+
+int ehca_dereg_mr(struct ib_mr *mr);
+
+struct ib_mw *ehca_alloc_mw(struct ib_pd *pd);
+
+int ehca_bind_mw(struct ib_qp *qp, struct ib_mw *mw,
+		 struct ib_mw_bind *mw_bind);
+
+int ehca_dealloc_mw(struct ib_mw *mw);
+
+struct ib_fmr *ehca_alloc_fmr(struct ib_pd *pd,
+			      int mr_access_flags,
+			      struct ib_fmr_attr *fmr_attr);
+
+int ehca_map_phys_fmr(struct ib_fmr *fmr,
+		      u64 *page_list, int list_len, u64 iova);
+
+int ehca_unmap_fmr(struct list_head *fmr_list);
+
+int ehca_dealloc_fmr(struct ib_fmr *fmr);
+
+enum ehca_eq_type {
+	EHCA_EQ = 0, /* Event Queue              */
+	EHCA_NEQ     /* Notification Event Queue */
+};
+
+int ehca_create_eq(struct ehca_shca *shca, struct ehca_eq *eq,
+		   enum ehca_eq_type type, const u32 length);
+
+int ehca_destroy_eq(struct ehca_shca *shca, struct ehca_eq *eq);
+
+void *ehca_poll_eq(struct ehca_shca *shca, struct ehca_eq *eq);
+
+
+struct ib_cq *ehca_create_cq(struct ib_device *device, int cqe,
+			     struct ib_ucontext *context,
+			     struct ib_udata *udata);
+
+int ehca_destroy_cq(struct ib_cq *cq);
+
+int ehca_resize_cq(struct ib_cq *cq, int cqe, struct ib_udata *udata);
+
+int ehca_poll_cq(struct ib_cq *cq, int num_entries, struct ib_wc *wc);
+
+int ehca_peek_cq(struct ib_cq *cq, int wc_cnt);
+
+int ehca_req_notify_cq(struct ib_cq *cq, enum ib_cq_notify cq_notify);
+
+struct ib_qp *ehca_create_qp(struct ib_pd *pd,
+			     struct ib_qp_init_attr *init_attr,
+			     struct ib_udata *udata);
+
+int ehca_destroy_qp(struct ib_qp *qp);
+
+int ehca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask,
+		   struct ib_udata *udata);
+
+int ehca_query_qp(struct ib_qp *qp, struct ib_qp_attr *qp_attr,
+		  int qp_attr_mask, struct ib_qp_init_attr *qp_init_attr);
+
+int ehca_post_send(struct ib_qp *qp, struct ib_send_wr *send_wr,
+		   struct ib_send_wr **bad_send_wr);
+
+int ehca_post_recv(struct ib_qp *qp, struct ib_recv_wr *recv_wr,
+		   struct ib_recv_wr **bad_recv_wr);
+
+u64 ehca_define_sqp(struct ehca_shca *shca, struct ehca_qp *ibqp,
+		    struct ib_qp_init_attr *qp_init_attr);
+
+int ehca_attach_mcast(struct ib_qp *qp, union ib_gid *gid, u16 lid);
+
+int ehca_detach_mcast(struct ib_qp *qp, union ib_gid *gid, u16 lid);
+
+struct ib_ucontext *ehca_alloc_ucontext(struct ib_device *device,
+					struct ib_udata *udata);
+
+int ehca_dealloc_ucontext(struct ib_ucontext *context);
+
+int ehca_mmap(struct ib_ucontext *context, struct vm_area_struct *vma);
+
+void ehca_poll_eqs(unsigned long data);
+
+int ehca_mmap_nopage(u64 foffset,u64 length,void **mapped,
+		     struct vm_area_struct **vma);
+
+int ehca_mmap_register(u64 physical,void **mapped,
+		       struct vm_area_struct **vma);
+
+int ehca_munmap(unsigned long addr, size_t len);
+
+#endif
diff --git a/drivers/infiniband/hw/ehca/ehca_main.c b/drivers/infiniband/hw/ehca/ehca_main.c
new file mode 100644
index 00000000000..2380994418a
--- /dev/null
+++ b/drivers/infiniband/hw/ehca/ehca_main.c
@@ -0,0 +1,818 @@
+/*
+ *  IBM eServer eHCA Infiniband device driver for Linux on POWER
+ *
+ *  module start stop, hca detection
+ *
+ *  Authors: Heiko J Schick <schickhj@de.ibm.com>
+ *           Hoang-Nam Nguyen <hnguyen@de.ibm.com>
+ *           Joachim Fenkes <fenkes@de.ibm.com>
+ *
+ *  Copyright (c) 2005 IBM Corporation
+ *
+ *  All rights reserved.
+ *
+ *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
+ *  BSD.
+ *
+ * OpenIB BSD License
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials
+ * provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
+ * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "ehca_classes.h"
+#include "ehca_iverbs.h"
+#include "ehca_mrmw.h"
+#include "ehca_tools.h"
+#include "hcp_if.h"
+
+MODULE_LICENSE("Dual BSD/GPL");
+MODULE_AUTHOR("Christoph Raisch <raisch@de.ibm.com>");
+MODULE_DESCRIPTION("IBM eServer HCA InfiniBand Device Driver");
+MODULE_VERSION("SVNEHCA_0016");
+
+int ehca_open_aqp1     = 0;
+int ehca_debug_level   = 0;
+int ehca_hw_level      = 0;
+int ehca_nr_ports      = 2;
+int ehca_use_hp_mr     = 0;
+int ehca_port_act_time = 30;
+int ehca_poll_all_eqs  = 1;
+int ehca_static_rate   = -1;
+
+module_param_named(open_aqp1,     ehca_open_aqp1,     int, 0);
+module_param_named(debug_level,   ehca_debug_level,   int, 0);
+module_param_named(hw_level,      ehca_hw_level,      int, 0);
+module_param_named(nr_ports,      ehca_nr_ports,      int, 0);
+module_param_named(use_hp_mr,     ehca_use_hp_mr,     int, 0);
+module_param_named(port_act_time, ehca_port_act_time, int, 0);
+module_param_named(poll_all_eqs,  ehca_poll_all_eqs,  int, 0);
+module_param_named(static_rate,   ehca_static_rate,   int, 0);
+
+MODULE_PARM_DESC(open_aqp1,
+		 "AQP1 on startup (0: no (default), 1: yes)");
+MODULE_PARM_DESC(debug_level,
+		 "debug level"
+		 " (0: no debug traces (default), 1: with debug traces)");
+MODULE_PARM_DESC(hw_level,
+		 "hardware level"
+		 " (0: autosensing (default), 1: v. 0.20, 2: v. 0.21)");
+MODULE_PARM_DESC(nr_ports,
+		 "number of connected ports (default: 2)");
+MODULE_PARM_DESC(use_hp_mr,
+		 "high performance MRs (0: no (default), 1: yes)");
+MODULE_PARM_DESC(port_act_time,
+		 "time to wait for port activation (default: 30 sec)");
+MODULE_PARM_DESC(poll_all_eqs,
+		 "polls all event queues periodically"
+		 " (0: no, 1: yes (default))");
+MODULE_PARM_DESC(static_rate,
+		 "set permanent static rate (default: disabled)");
+
+spinlock_t ehca_qp_idr_lock;
+spinlock_t ehca_cq_idr_lock;
+DEFINE_IDR(ehca_qp_idr);
+DEFINE_IDR(ehca_cq_idr);
+
+static struct list_head shca_list; /* list of all registered ehcas */
+static spinlock_t shca_list_lock;
+
+static struct timer_list poll_eqs_timer;
+
+static int ehca_create_slab_caches(void)
+{
+	int ret;
+
+	ret = ehca_init_pd_cache();
+	if (ret) {
+		ehca_gen_err("Cannot create PD SLAB cache.");
+		return ret;
+	}
+
+	ret = ehca_init_cq_cache();
+	if (ret) {
+		ehca_gen_err("Cannot create CQ SLAB cache.");
+		goto create_slab_caches2;
+	}
+
+	ret = ehca_init_qp_cache();
+	if (ret) {
+		ehca_gen_err("Cannot create QP SLAB cache.");
+		goto create_slab_caches3;
+	}
+
+	ret = ehca_init_av_cache();
+	if (ret) {
+		ehca_gen_err("Cannot create AV SLAB cache.");
+		goto create_slab_caches4;
+	}
+
+	ret = ehca_init_mrmw_cache();
+	if (ret) {
+		ehca_gen_err("Cannot create MR&MW SLAB cache.");
+		goto create_slab_caches5;
+	}
+
+	return 0;
+
+create_slab_caches5:
+	ehca_cleanup_av_cache();
+
+create_slab_caches4:
+	ehca_cleanup_qp_cache();
+
+create_slab_caches3:
+	ehca_cleanup_cq_cache();
+
+create_slab_caches2:
+	ehca_cleanup_pd_cache();
+
+	return ret;
+}
+
+static void ehca_destroy_slab_caches(void)
+{
+	ehca_cleanup_mrmw_cache();
+	ehca_cleanup_av_cache();
+	ehca_cleanup_qp_cache();
+	ehca_cleanup_cq_cache();
+	ehca_cleanup_pd_cache();
+}
+
+#define EHCA_HCAAVER  EHCA_BMASK_IBM(32,39)
+#define EHCA_REVID    EHCA_BMASK_IBM(40,63)
+
+int ehca_sense_attributes(struct ehca_shca *shca)
+{
+	int ret = 0;
+	u64 h_ret;
+	struct hipz_query_hca *rblock;
+
+	rblock = kzalloc(H_CB_ALIGNMENT, GFP_KERNEL);
+	if (!rblock) {
+		ehca_gen_err("Cannot allocate rblock memory.");
+		return -ENOMEM;
+	}
+
+	h_ret = hipz_h_query_hca(shca->ipz_hca_handle, rblock);
+	if (h_ret != H_SUCCESS) {
+		ehca_gen_err("Cannot query device properties. h_ret=%lx",
+			     h_ret);
+		ret = -EPERM;
+		goto num_ports1;
+	}
+
+	if (ehca_nr_ports == 1)
+		shca->num_ports = 1;
+	else
+		shca->num_ports = (u8)rblock->num_ports;
+
+	ehca_gen_dbg(" ... found %x ports", rblock->num_ports);
+
+	if (ehca_hw_level == 0) {
+		u32 hcaaver;
+		u32 revid;
+
+		hcaaver = EHCA_BMASK_GET(EHCA_HCAAVER, rblock->hw_ver);
+		revid   = EHCA_BMASK_GET(EHCA_REVID, rblock->hw_ver);
+
+		ehca_gen_dbg(" ... hardware version=%x:%x", hcaaver, revid);
+
+		if ((hcaaver == 1) && (revid == 0))
+			shca->hw_level = 0;
+		else if ((hcaaver == 1) && (revid == 1))
+			shca->hw_level = 1;
+		else if ((hcaaver == 1) && (revid == 2))
+			shca->hw_level = 2;
+	}
+	ehca_gen_dbg(" ... hardware level=%x", shca->hw_level);
+
+	shca->sport[0].rate = IB_RATE_30_GBPS;
+	shca->sport[1].rate = IB_RATE_30_GBPS;
+
+num_ports1:
+	kfree(rblock);
+	return ret;
+}
+
+static int init_node_guid(struct ehca_shca *shca)
+{
+	int ret = 0;
+	struct hipz_query_hca *rblock;
+
+	rblock = kzalloc(H_CB_ALIGNMENT, GFP_KERNEL);
+	if (!rblock) {
+		ehca_err(&shca->ib_device, "Can't allocate rblock memory.");
+		return -ENOMEM;
+	}
+
+	if (hipz_h_query_hca(shca->ipz_hca_handle, rblock) != H_SUCCESS) {
+		ehca_err(&shca->ib_device, "Can't query device properties");
+		ret = -EINVAL;
+		goto init_node_guid1;
+	}
+
+	memcpy(&shca->ib_device.node_guid, &rblock->node_guid, sizeof(u64));
+
+init_node_guid1:
+	kfree(rblock);
+	return ret;
+}
+
+int ehca_register_device(struct ehca_shca *shca)
+{
+	int ret;
+
+	ret = init_node_guid(shca);
+	if (ret)
+		return ret;
+
+	strlcpy(shca->ib_device.name, "ehca%d", IB_DEVICE_NAME_MAX);
+	shca->ib_device.owner               = THIS_MODULE;
+
+	shca->ib_device.uverbs_abi_ver	    = 5;
+	shca->ib_device.uverbs_cmd_mask	    =
+		(1ull << IB_USER_VERBS_CMD_GET_CONTEXT)		|
+		(1ull << IB_USER_VERBS_CMD_QUERY_DEVICE)	|
+		(1ull << IB_USER_VERBS_CMD_QUERY_PORT)		|
+		(1ull << IB_USER_VERBS_CMD_ALLOC_PD)		|
+		(1ull << IB_USER_VERBS_CMD_DEALLOC_PD)		|
+		(1ull << IB_USER_VERBS_CMD_REG_MR)		|
+		(1ull << IB_USER_VERBS_CMD_DEREG_MR)		|
+		(1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL)	|
+		(1ull << IB_USER_VERBS_CMD_CREATE_CQ)		|
+		(1ull << IB_USER_VERBS_CMD_DESTROY_CQ)		|
+		(1ull << IB_USER_VERBS_CMD_CREATE_QP)		|
+		(1ull << IB_USER_VERBS_CMD_MODIFY_QP)		|
+		(1ull << IB_USER_VERBS_CMD_QUERY_QP)		|
+		(1ull << IB_USER_VERBS_CMD_DESTROY_QP)		|
+		(1ull << IB_USER_VERBS_CMD_ATTACH_MCAST)	|
+		(1ull << IB_USER_VERBS_CMD_DETACH_MCAST);
+
+	shca->ib_device.node_type           = RDMA_NODE_IB_CA;
+	shca->ib_device.phys_port_cnt       = shca->num_ports;
+	shca->ib_device.dma_device          = &shca->ibmebus_dev->ofdev.dev;
+	shca->ib_device.query_device        = ehca_query_device;
+	shca->ib_device.query_port          = ehca_query_port;
+	shca->ib_device.query_gid           = ehca_query_gid;
+	shca->ib_device.query_pkey          = ehca_query_pkey;
+	/* shca->in_device.modify_device    = ehca_modify_device    */
+	shca->ib_device.modify_port         = ehca_modify_port;
+	shca->ib_device.alloc_ucontext      = ehca_alloc_ucontext;
+	shca->ib_device.dealloc_ucontext    = ehca_dealloc_ucontext;
+	shca->ib_device.alloc_pd            = ehca_alloc_pd;
+	shca->ib_device.dealloc_pd          = ehca_dealloc_pd;
+	shca->ib_device.create_ah	    = ehca_create_ah;
+	/* shca->ib_device.modify_ah	    = ehca_modify_ah;	    */
+	shca->ib_device.query_ah	    = ehca_query_ah;
+	shca->ib_device.destroy_ah	    = ehca_destroy_ah;
+	shca->ib_device.create_qp	    = ehca_create_qp;
+	shca->ib_device.modify_qp	    = ehca_modify_qp;
+	shca->ib_device.query_qp	    = ehca_query_qp;
+	shca->ib_device.destroy_qp	    = ehca_destroy_qp;
+	shca->ib_device.post_send	    = ehca_post_send;
+	shca->ib_device.post_recv	    = ehca_post_recv;
+	shca->ib_device.create_cq	    = ehca_create_cq;
+	shca->ib_device.destroy_cq	    = ehca_destroy_cq;
+	shca->ib_device.resize_cq	    = ehca_resize_cq;
+	shca->ib_device.poll_cq		    = ehca_poll_cq;
+	/* shca->ib_device.peek_cq	    = ehca_peek_cq;	    */
+	shca->ib_device.req_notify_cq	    = ehca_req_notify_cq;
+	/* shca->ib_device.req_ncomp_notif  = ehca_req_ncomp_notif; */
+	shca->ib_device.get_dma_mr	    = ehca_get_dma_mr;
+	shca->ib_device.reg_phys_mr	    = ehca_reg_phys_mr;
+	shca->ib_device.reg_user_mr	    = ehca_reg_user_mr;
+	shca->ib_device.query_mr	    = ehca_query_mr;
+	shca->ib_device.dereg_mr	    = ehca_dereg_mr;
+	shca->ib_device.rereg_phys_mr	    = ehca_rereg_phys_mr;
+	shca->ib_device.alloc_mw	    = ehca_alloc_mw;
+	shca->ib_device.bind_mw		    = ehca_bind_mw;
+	shca->ib_device.dealloc_mw	    = ehca_dealloc_mw;
+	shca->ib_device.alloc_fmr	    = ehca_alloc_fmr;
+	shca->ib_device.map_phys_fmr	    = ehca_map_phys_fmr;
+	shca->ib_device.unmap_fmr	    = ehca_unmap_fmr;
+	shca->ib_device.dealloc_fmr	    = ehca_dealloc_fmr;
+	shca->ib_device.attach_mcast	    = ehca_attach_mcast;
+	shca->ib_device.detach_mcast	    = ehca_detach_mcast;
+	/* shca->ib_device.process_mad	    = ehca_process_mad;	    */
+	shca->ib_device.mmap		    = ehca_mmap;
+
+	ret = ib_register_device(&shca->ib_device);
+	if (ret)
+		ehca_err(&shca->ib_device,
+			 "ib_register_device() failed ret=%x", ret);
+
+	return ret;
+}
+
+static int ehca_create_aqp1(struct ehca_shca *shca, u32 port)
+{
+	struct ehca_sport *sport = &shca->sport[port - 1];
+	struct ib_cq *ibcq;
+	struct ib_qp *ibqp;
+	struct ib_qp_init_attr qp_init_attr;
+	int ret;
+
+	if (sport->ibcq_aqp1) {
+		ehca_err(&shca->ib_device, "AQP1 CQ is already created.");
+		return -EPERM;
+	}
+
+	ibcq = ib_create_cq(&shca->ib_device, NULL, NULL, (void*)(-1), 10);
+	if (IS_ERR(ibcq)) {
+		ehca_err(&shca->ib_device, "Cannot create AQP1 CQ.");
+		return PTR_ERR(ibcq);
+	}
+	sport->ibcq_aqp1 = ibcq;
+
+	if (sport->ibqp_aqp1) {
+		ehca_err(&shca->ib_device, "AQP1 QP is already created.");
+		ret = -EPERM;
+		goto create_aqp1;
+	}
+
+	memset(&qp_init_attr, 0, sizeof(struct ib_qp_init_attr));
+	qp_init_attr.send_cq          = ibcq;
+	qp_init_attr.recv_cq          = ibcq;
+	qp_init_attr.sq_sig_type      = IB_SIGNAL_ALL_WR;
+	qp_init_attr.cap.max_send_wr  = 100;
+	qp_init_attr.cap.max_recv_wr  = 100;
+	qp_init_attr.cap.max_send_sge = 2;
+	qp_init_attr.cap.max_recv_sge = 1;
+	qp_init_attr.qp_type          = IB_QPT_GSI;
+	qp_init_attr.port_num         = port;
+	qp_init_attr.qp_context       = NULL;
+	qp_init_attr.event_handler    = NULL;
+	qp_init_attr.srq              = NULL;
+
+	ibqp = ib_create_qp(&shca->pd->ib_pd, &qp_init_attr);
+	if (IS_ERR(ibqp)) {
+		ehca_err(&shca->ib_device, "Cannot create AQP1 QP.");
+		ret = PTR_ERR(ibqp);
+		goto create_aqp1;
+	}
+	sport->ibqp_aqp1 = ibqp;
+
+	return 0;
+
+create_aqp1:
+	ib_destroy_cq(sport->ibcq_aqp1);
+	return ret;
+}
+
+static int ehca_destroy_aqp1(struct ehca_sport *sport)
+{
+	int ret;
+
+	ret = ib_destroy_qp(sport->ibqp_aqp1);
+	if (ret) {
+		ehca_gen_err("Cannot destroy AQP1 QP. ret=%x", ret);
+		return ret;
+	}
+
+	ret = ib_destroy_cq(sport->ibcq_aqp1);
+	if (ret)
+		ehca_gen_err("Cannot destroy AQP1 CQ. ret=%x", ret);
+
+	return ret;
+}
+
+static ssize_t ehca_show_debug_level(struct device_driver *ddp, char *buf)
+{
+	return  snprintf(buf, PAGE_SIZE, "%d\n",
+			 ehca_debug_level);
+}
+
+static ssize_t ehca_store_debug_level(struct device_driver *ddp,
+				      const char *buf, size_t count)
+{
+	int value = (*buf) - '0';
+	if (value >= 0 && value <= 9)
+		ehca_debug_level = value;
+	return 1;
+}
+
+DRIVER_ATTR(debug_level, S_IRUSR | S_IWUSR,
+	    ehca_show_debug_level, ehca_store_debug_level);
+
+void ehca_create_driver_sysfs(struct ibmebus_driver *drv)
+{
+	driver_create_file(&drv->driver, &driver_attr_debug_level);
+}
+
+void ehca_remove_driver_sysfs(struct ibmebus_driver *drv)
+{
+	driver_remove_file(&drv->driver, &driver_attr_debug_level);
+}
+
+#define EHCA_RESOURCE_ATTR(name)                                           \
+static ssize_t  ehca_show_##name(struct device *dev,                       \
+				 struct device_attribute *attr,            \
+				 char *buf)                                \
+{									   \
+	struct ehca_shca *shca;						   \
+	struct hipz_query_hca *rblock;				           \
+	int data;                                                          \
+									   \
+	shca = dev->driver_data;					   \
+									   \
+	rblock = kzalloc(H_CB_ALIGNMENT, GFP_KERNEL);			   \
+	if (!rblock) {						           \
+		dev_err(dev, "Can't allocate rblock memory.");		   \
+		return 0;						   \
+	}								   \
+									   \
+	if (hipz_h_query_hca(shca->ipz_hca_handle, rblock) != H_SUCCESS) { \
+		dev_err(dev, "Can't query device properties");	   	   \
+		kfree(rblock);					   	   \
+		return 0;					   	   \
+	}								   \
+									   \
+	data = rblock->name;                                               \
+	kfree(rblock);                                                     \
+									   \
+	if ((strcmp(#name, "num_ports") == 0) && (ehca_nr_ports == 1))	   \
+		return snprintf(buf, 256, "1\n");			   \
+	else								   \
+		return snprintf(buf, 256, "%d\n", data);		   \
+									   \
+}									   \
+static DEVICE_ATTR(name, S_IRUGO, ehca_show_##name, NULL);
+
+EHCA_RESOURCE_ATTR(num_ports);
+EHCA_RESOURCE_ATTR(hw_ver);
+EHCA_RESOURCE_ATTR(max_eq);
+EHCA_RESOURCE_ATTR(cur_eq);
+EHCA_RESOURCE_ATTR(max_cq);
+EHCA_RESOURCE_ATTR(cur_cq);
+EHCA_RESOURCE_ATTR(max_qp);
+EHCA_RESOURCE_ATTR(cur_qp);
+EHCA_RESOURCE_ATTR(max_mr);
+EHCA_RESOURCE_ATTR(cur_mr);
+EHCA_RESOURCE_ATTR(max_mw);
+EHCA_RESOURCE_ATTR(cur_mw);
+EHCA_RESOURCE_ATTR(max_pd);
+EHCA_RESOURCE_ATTR(max_ah);
+
+static ssize_t ehca_show_adapter_handle(struct device *dev,
+					struct device_attribute *attr,
+					char *buf)
+{
+	struct ehca_shca *shca = dev->driver_data;
+
+	return sprintf(buf, "%lx\n", shca->ipz_hca_handle.handle);
+
+}
+static DEVICE_ATTR(adapter_handle, S_IRUGO, ehca_show_adapter_handle, NULL);
+
+
+void ehca_create_device_sysfs(struct ibmebus_dev *dev)
+{
+	device_create_file(&dev->ofdev.dev, &dev_attr_adapter_handle);
+	device_create_file(&dev->ofdev.dev, &dev_attr_num_ports);
+	device_create_file(&dev->ofdev.dev, &dev_attr_hw_ver);
+	device_create_file(&dev->ofdev.dev, &dev_attr_max_eq);
+	device_create_file(&dev->ofdev.dev, &dev_attr_cur_eq);
+	device_create_file(&dev->ofdev.dev, &dev_attr_max_cq);
+	device_create_file(&dev->ofdev.dev, &dev_attr_cur_cq);
+	device_create_file(&dev->ofdev.dev, &dev_attr_max_qp);
+	device_create_file(&dev->ofdev.dev, &dev_attr_cur_qp);
+	device_create_file(&dev->ofdev.dev, &dev_attr_max_mr);
+	device_create_file(&dev->ofdev.dev, &dev_attr_cur_mr);
+	device_create_file(&dev->ofdev.dev, &dev_attr_max_mw);
+	device_create_file(&dev->ofdev.dev, &dev_attr_cur_mw);
+	device_create_file(&dev->ofdev.dev, &dev_attr_max_pd);
+	device_create_file(&dev->ofdev.dev, &dev_attr_max_ah);
+}
+
+void ehca_remove_device_sysfs(struct ibmebus_dev *dev)
+{
+	device_remove_file(&dev->ofdev.dev, &dev_attr_adapter_handle);
+	device_remove_file(&dev->ofdev.dev, &dev_attr_num_ports);
+	device_remove_file(&dev->ofdev.dev, &dev_attr_hw_ver);
+	device_remove_file(&dev->ofdev.dev, &dev_attr_max_eq);
+	device_remove_file(&dev->ofdev.dev, &dev_attr_cur_eq);
+	device_remove_file(&dev->ofdev.dev, &dev_attr_max_cq);
+	device_remove_file(&dev->ofdev.dev, &dev_attr_cur_cq);
+	device_remove_file(&dev->ofdev.dev, &dev_attr_max_qp);
+	device_remove_file(&dev->ofdev.dev, &dev_attr_cur_qp);
+	device_remove_file(&dev->ofdev.dev, &dev_attr_max_mr);
+	device_remove_file(&dev->ofdev.dev, &dev_attr_cur_mr);
+	device_remove_file(&dev->ofdev.dev, &dev_attr_max_mw);
+	device_remove_file(&dev->ofdev.dev, &dev_attr_cur_mw);
+	device_remove_file(&dev->ofdev.dev, &dev_attr_max_pd);
+	device_remove_file(&dev->ofdev.dev, &dev_attr_max_ah);
+}
+
+static int __devinit ehca_probe(struct ibmebus_dev *dev,
+				const struct of_device_id *id)
+{
+	struct ehca_shca *shca;
+	u64 *handle;
+	struct ib_pd *ibpd;
+	int ret;
+
+	handle = (u64 *)get_property(dev->ofdev.node, "ibm,hca-handle", NULL);
+	if (!handle) {
+		ehca_gen_err("Cannot get eHCA handle for adapter: %s.",
+			     dev->ofdev.node->full_name);
+		return -ENODEV;
+	}
+
+	if (!(*handle)) {
+		ehca_gen_err("Wrong eHCA handle for adapter: %s.",
+			     dev->ofdev.node->full_name);
+		return -ENODEV;
+	}
+
+	shca = (struct ehca_shca *)ib_alloc_device(sizeof(*shca));
+	if (!shca) {
+		ehca_gen_err("Cannot allocate shca memory.");
+		return -ENOMEM;
+	}
+
+	shca->ibmebus_dev = dev;
+	shca->ipz_hca_handle.handle = *handle;
+	dev->ofdev.dev.driver_data = shca;
+
+	ret = ehca_sense_attributes(shca);
+	if (ret < 0) {
+		ehca_gen_err("Cannot sense eHCA attributes.");
+		goto probe1;
+	}
+
+	ret = ehca_register_device(shca);
+	if (ret) {
+		ehca_gen_err("Cannot register Infiniband device");
+		goto probe1;
+	}
+
+	/* create event queues */
+	ret = ehca_create_eq(shca, &shca->eq, EHCA_EQ, 2048);
+	if (ret) {
+		ehca_err(&shca->ib_device, "Cannot create EQ.");
+		goto probe2;
+	}
+
+	ret = ehca_create_eq(shca, &shca->neq, EHCA_NEQ, 513);
+	if (ret) {
+		ehca_err(&shca->ib_device, "Cannot create NEQ.");
+		goto probe3;
+	}
+
+	/* create internal protection domain */
+	ibpd = ehca_alloc_pd(&shca->ib_device, (void*)(-1), NULL);
+	if (IS_ERR(ibpd)) {
+		ehca_err(&shca->ib_device, "Cannot create internal PD.");
+		ret = PTR_ERR(ibpd);
+		goto probe4;
+	}
+
+	shca->pd = container_of(ibpd, struct ehca_pd, ib_pd);
+	shca->pd->ib_pd.device = &shca->ib_device;
+
+	/* create internal max MR */
+	ret = ehca_reg_internal_maxmr(shca, shca->pd, &shca->maxmr);
+
+	if (ret) {
+		ehca_err(&shca->ib_device, "Cannot create internal MR ret=%x",
+			 ret);
+		goto probe5;
+	}
+
+	/* create AQP1 for port 1 */
+	if (ehca_open_aqp1 == 1) {
+		shca->sport[0].port_state = IB_PORT_DOWN;
+		ret = ehca_create_aqp1(shca, 1);
+		if (ret) {
+			ehca_err(&shca->ib_device,
+				 "Cannot create AQP1 for port 1.");
+			goto probe6;
+		}
+	}
+
+	/* create AQP1 for port 2 */
+	if ((ehca_open_aqp1 == 1) && (shca->num_ports == 2)) {
+		shca->sport[1].port_state = IB_PORT_DOWN;
+		ret = ehca_create_aqp1(shca, 2);
+		if (ret) {
+			ehca_err(&shca->ib_device,
+				 "Cannot create AQP1 for port 2.");
+			goto probe7;
+		}
+	}
+
+	ehca_create_device_sysfs(dev);
+
+	spin_lock(&shca_list_lock);
+	list_add(&shca->shca_list, &shca_list);
+	spin_unlock(&shca_list_lock);
+
+	return 0;
+
+probe7:
+	ret = ehca_destroy_aqp1(&shca->sport[0]);
+	if (ret)
+		ehca_err(&shca->ib_device,
+			 "Cannot destroy AQP1 for port 1. ret=%x", ret);
+
+probe6:
+	ret = ehca_dereg_internal_maxmr(shca);
+	if (ret)
+		ehca_err(&shca->ib_device,
+			 "Cannot destroy internal MR. ret=%x", ret);
+
+probe5:
+	ret = ehca_dealloc_pd(&shca->pd->ib_pd);
+	if (ret)
+		ehca_err(&shca->ib_device,
+			 "Cannot destroy internal PD. ret=%x", ret);
+
+probe4:
+	ret = ehca_destroy_eq(shca, &shca->neq);
+	if (ret)
+		ehca_err(&shca->ib_device,
+			 "Cannot destroy NEQ. ret=%x", ret);
+
+probe3:
+	ret = ehca_destroy_eq(shca, &shca->eq);
+	if (ret)
+		ehca_err(&shca->ib_device,
+			 "Cannot destroy EQ. ret=%x", ret);
+
+probe2:
+	ib_unregister_device(&shca->ib_device);
+
+probe1:
+	ib_dealloc_device(&shca->ib_device);
+
+	return -EINVAL;
+}
+
+static int __devexit ehca_remove(struct ibmebus_dev *dev)
+{
+	struct ehca_shca *shca = dev->ofdev.dev.driver_data;
+	int ret;
+
+	ehca_remove_device_sysfs(dev);
+
+	if (ehca_open_aqp1 == 1) {
+		int i;
+		for (i = 0; i < shca->num_ports; i++) {
+			ret = ehca_destroy_aqp1(&shca->sport[i]);
+			if (ret)
+				ehca_err(&shca->ib_device,
+					 "Cannot destroy AQP1 for port %x "
+					 "ret=%x", ret, i);
+		}
+	}
+
+	ib_unregister_device(&shca->ib_device);
+
+	ret = ehca_dereg_internal_maxmr(shca);
+	if (ret)
+		ehca_err(&shca->ib_device,
+			 "Cannot destroy internal MR. ret=%x", ret);
+
+	ret = ehca_dealloc_pd(&shca->pd->ib_pd);
+	if (ret)
+		ehca_err(&shca->ib_device,
+			 "Cannot destroy internal PD. ret=%x", ret);
+
+	ret = ehca_destroy_eq(shca, &shca->eq);
+	if (ret)
+		ehca_err(&shca->ib_device, "Cannot destroy EQ. ret=%x", ret);
+
+	ret = ehca_destroy_eq(shca, &shca->neq);
+	if (ret)
+		ehca_err(&shca->ib_device, "Canot destroy NEQ. ret=%x", ret);
+
+	ib_dealloc_device(&shca->ib_device);
+
+	spin_lock(&shca_list_lock);
+	list_del(&shca->shca_list);
+	spin_unlock(&shca_list_lock);
+
+	return ret;
+}
+
+static struct of_device_id ehca_device_table[] =
+{
+	{
+		.name       = "lhca",
+		.compatible = "IBM,lhca",
+	},
+	{},
+};
+
+static struct ibmebus_driver ehca_driver = {
+	.name     = "ehca",
+	.id_table = ehca_device_table,
+	.probe    = ehca_probe,
+	.remove   = ehca_remove,
+};
+
+void ehca_poll_eqs(unsigned long data)
+{
+	struct ehca_shca *shca;
+
+	spin_lock(&shca_list_lock);
+	list_for_each_entry(shca, &shca_list, shca_list) {
+		if (shca->eq.is_initialized)
+			ehca_tasklet_eq((unsigned long)(void*)shca);
+	}
+	mod_timer(&poll_eqs_timer, jiffies + HZ);
+	spin_unlock(&shca_list_lock);
+}
+
+int __init ehca_module_init(void)
+{
+	int ret;
+
+	printk(KERN_INFO "eHCA Infiniband Device Driver "
+	                 "(Rel.: SVNEHCA_0016)\n");
+	idr_init(&ehca_qp_idr);
+	idr_init(&ehca_cq_idr);
+	spin_lock_init(&ehca_qp_idr_lock);
+	spin_lock_init(&ehca_cq_idr_lock);
+
+	INIT_LIST_HEAD(&shca_list);
+	spin_lock_init(&shca_list_lock);
+
+	if ((ret = ehca_create_comp_pool())) {
+		ehca_gen_err("Cannot create comp pool.");
+		return ret;
+	}
+
+	if ((ret = ehca_create_slab_caches())) {
+		ehca_gen_err("Cannot create SLAB caches");
+		ret = -ENOMEM;
+		goto module_init1;
+	}
+
+	if ((ret = ibmebus_register_driver(&ehca_driver))) {
+		ehca_gen_err("Cannot register eHCA device driver");
+		ret = -EINVAL;
+		goto module_init2;
+	}
+
+	ehca_create_driver_sysfs(&ehca_driver);
+
+	if (ehca_poll_all_eqs != 1) {
+		ehca_gen_err("WARNING!!!");
+		ehca_gen_err("It is possible to lose interrupts.");
+	} else {
+		init_timer(&poll_eqs_timer);
+		poll_eqs_timer.function = ehca_poll_eqs;
+		poll_eqs_timer.expires = jiffies + HZ;
+		add_timer(&poll_eqs_timer);
+	}
+
+	return 0;
+
+module_init2:
+	ehca_destroy_slab_caches();
+
+module_init1:
+	ehca_destroy_comp_pool();
+	return ret;
+};
+
+void __exit ehca_module_exit(void)
+{
+	if (ehca_poll_all_eqs == 1)
+		del_timer_sync(&poll_eqs_timer);
+
+	ehca_remove_driver_sysfs(&ehca_driver);
+	ibmebus_unregister_driver(&ehca_driver);
+
+	ehca_destroy_slab_caches();
+
+	ehca_destroy_comp_pool();
+
+	idr_destroy(&ehca_cq_idr);
+	idr_destroy(&ehca_qp_idr);
+};
+
+module_init(ehca_module_init);
+module_exit(ehca_module_exit);
diff --git a/drivers/infiniband/hw/ehca/ehca_mcast.c b/drivers/infiniband/hw/ehca/ehca_mcast.c
new file mode 100644
index 00000000000..32a870660bf
--- /dev/null
+++ b/drivers/infiniband/hw/ehca/ehca_mcast.c
@@ -0,0 +1,131 @@
+/*
+ *  IBM eServer eHCA Infiniband device driver for Linux on POWER
+ *
+ *  mcast  functions
+ *
+ *  Authors: Khadija Souissi <souissik@de.ibm.com>
+ *           Waleri Fomin <fomin@de.ibm.com>
+ *           Reinhard Ernst <rernst@de.ibm.com>
+ *           Hoang-Nam Nguyen <hnguyen@de.ibm.com>
+ *           Heiko J Schick <schickhj@de.ibm.com>
+ *
+ *  Copyright (c) 2005 IBM Corporation
+ *
+ *  All rights reserved.
+ *
+ *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
+ *  BSD.
+ *
+ * OpenIB BSD License
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials
+ * provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
+ * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <linux/module.h>
+#include <linux/err.h>
+#include "ehca_classes.h"
+#include "ehca_tools.h"
+#include "ehca_qes.h"
+#include "ehca_iverbs.h"
+#include "hcp_if.h"
+
+#define MAX_MC_LID 0xFFFE
+#define MIN_MC_LID 0xC000	/* Multicast limits */
+#define EHCA_VALID_MULTICAST_GID(gid)  ((gid)[0] == 0xFF)
+#define EHCA_VALID_MULTICAST_LID(lid) \
+	(((lid) >= MIN_MC_LID) && ((lid) <= MAX_MC_LID))
+
+int ehca_attach_mcast(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
+{
+	struct ehca_qp *my_qp = container_of(ibqp, struct ehca_qp, ib_qp);
+	struct ehca_shca *shca = container_of(ibqp->device, struct ehca_shca,
+					      ib_device);
+	union ib_gid my_gid;
+	u64 subnet_prefix, interface_id, h_ret;
+
+	if (ibqp->qp_type != IB_QPT_UD) {
+		ehca_err(ibqp->device, "invalid qp_type=%x", ibqp->qp_type);
+		return -EINVAL;
+	}
+
+	if (!(EHCA_VALID_MULTICAST_GID(gid->raw))) {
+		ehca_err(ibqp->device, "invalid mulitcast gid");
+		return -EINVAL;
+	} else if ((lid < MIN_MC_LID) || (lid > MAX_MC_LID)) {
+		ehca_err(ibqp->device, "invalid mulitcast lid=%x", lid);
+		return -EINVAL;
+	}
+
+	memcpy(&my_gid.raw, gid->raw, sizeof(union ib_gid));
+
+	subnet_prefix = be64_to_cpu(my_gid.global.subnet_prefix);
+	interface_id = be64_to_cpu(my_gid.global.interface_id);
+	h_ret = hipz_h_attach_mcqp(shca->ipz_hca_handle,
+				   my_qp->ipz_qp_handle,
+				   my_qp->galpas.kernel,
+				   lid, subnet_prefix, interface_id);
+	if (h_ret != H_SUCCESS)
+		ehca_err(ibqp->device,
+			 "ehca_qp=%p qp_num=%x hipz_h_attach_mcqp() failed "
+			 "h_ret=%lx", my_qp, ibqp->qp_num, h_ret);
+
+	return ehca2ib_return_code(h_ret);
+}
+
+int ehca_detach_mcast(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
+{
+	struct ehca_qp *my_qp = container_of(ibqp, struct ehca_qp, ib_qp);
+	struct ehca_shca *shca = container_of(ibqp->pd->device,
+					      struct ehca_shca, ib_device);
+	union ib_gid my_gid;
+	u64 subnet_prefix, interface_id, h_ret;
+
+	if (ibqp->qp_type != IB_QPT_UD) {
+		ehca_err(ibqp->device, "invalid qp_type %x", ibqp->qp_type);
+		return -EINVAL;
+	}
+
+	if (!(EHCA_VALID_MULTICAST_GID(gid->raw))) {
+		ehca_err(ibqp->device, "invalid mulitcast gid");
+		return -EINVAL;
+	} else if ((lid < MIN_MC_LID) || (lid > MAX_MC_LID)) {
+		ehca_err(ibqp->device, "invalid mulitcast lid=%x", lid);
+		return -EINVAL;
+	}
+
+	memcpy(&my_gid.raw, gid->raw, sizeof(union ib_gid));
+
+	subnet_prefix = be64_to_cpu(my_gid.global.subnet_prefix);
+	interface_id = be64_to_cpu(my_gid.global.interface_id);
+	h_ret = hipz_h_detach_mcqp(shca->ipz_hca_handle,
+				   my_qp->ipz_qp_handle,
+				   my_qp->galpas.kernel,
+				   lid, subnet_prefix, interface_id);
+	if (h_ret != H_SUCCESS)
+		ehca_err(ibqp->device,
+			 "ehca_qp=%p qp_num=%x hipz_h_detach_mcqp() failed "
+			 "h_ret=%lx", my_qp, ibqp->qp_num, h_ret);
+
+	return ehca2ib_return_code(h_ret);
+}
diff --git a/drivers/infiniband/hw/ehca/ehca_mrmw.c b/drivers/infiniband/hw/ehca/ehca_mrmw.c
new file mode 100644
index 00000000000..5ca65441e1d
--- /dev/null
+++ b/drivers/infiniband/hw/ehca/ehca_mrmw.c
@@ -0,0 +1,2261 @@
+/*
+ *  IBM eServer eHCA Infiniband device driver for Linux on POWER
+ *
+ *  MR/MW functions
+ *
+ *  Authors: Dietmar Decker <ddecker@de.ibm.com>
+ *           Christoph Raisch <raisch@de.ibm.com>
+ *
+ *  Copyright (c) 2005 IBM Corporation
+ *
+ *  All rights reserved.
+ *
+ *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
+ *  BSD.
+ *
+ * OpenIB BSD License
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials
+ * provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
+ * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <asm/current.h>
+
+#include "ehca_iverbs.h"
+#include "ehca_mrmw.h"
+#include "hcp_if.h"
+#include "hipz_hw.h"
+
+static struct kmem_cache *mr_cache;
+static struct kmem_cache *mw_cache;
+
+static struct ehca_mr *ehca_mr_new(void)
+{
+	struct ehca_mr *me;
+
+	me = kmem_cache_alloc(mr_cache, SLAB_KERNEL);
+	if (me) {
+		memset(me, 0, sizeof(struct ehca_mr));
+		spin_lock_init(&me->mrlock);
+	} else
+		ehca_gen_err("alloc failed");
+
+	return me;
+}
+
+static void ehca_mr_delete(struct ehca_mr *me)
+{
+	kmem_cache_free(mr_cache, me);
+}
+
+static struct ehca_mw *ehca_mw_new(void)
+{
+	struct ehca_mw *me;
+
+	me = kmem_cache_alloc(mw_cache, SLAB_KERNEL);
+	if (me) {
+		memset(me, 0, sizeof(struct ehca_mw));
+		spin_lock_init(&me->mwlock);
+	} else
+		ehca_gen_err("alloc failed");
+
+	return me;
+}
+
+static void ehca_mw_delete(struct ehca_mw *me)
+{
+	kmem_cache_free(mw_cache, me);
+}
+
+/*----------------------------------------------------------------------*/
+
+struct ib_mr *ehca_get_dma_mr(struct ib_pd *pd, int mr_access_flags)
+{
+	struct ib_mr *ib_mr;
+	int ret;
+	struct ehca_mr *e_maxmr;
+	struct ehca_pd *e_pd = container_of(pd, struct ehca_pd, ib_pd);
+	struct ehca_shca *shca =
+		container_of(pd->device, struct ehca_shca, ib_device);
+
+	if (shca->maxmr) {
+		e_maxmr = ehca_mr_new();
+		if (!e_maxmr) {
+			ehca_err(&shca->ib_device, "out of memory");
+			ib_mr = ERR_PTR(-ENOMEM);
+			goto get_dma_mr_exit0;
+		}
+
+		ret = ehca_reg_maxmr(shca, e_maxmr, (u64*)KERNELBASE,
+				     mr_access_flags, e_pd,
+				     &e_maxmr->ib.ib_mr.lkey,
+				     &e_maxmr->ib.ib_mr.rkey);
+		if (ret) {
+			ib_mr = ERR_PTR(ret);
+			goto get_dma_mr_exit0;
+		}
+		ib_mr = &e_maxmr->ib.ib_mr;
+	} else {
+		ehca_err(&shca->ib_device, "no internal max-MR exist!");
+		ib_mr = ERR_PTR(-EINVAL);
+		goto get_dma_mr_exit0;
+	}
+
+get_dma_mr_exit0:
+	if (IS_ERR(ib_mr))
+		ehca_err(&shca->ib_device, "rc=%lx pd=%p mr_access_flags=%x ",
+			 PTR_ERR(ib_mr), pd, mr_access_flags);
+	return ib_mr;
+} /* end ehca_get_dma_mr() */
+
+/*----------------------------------------------------------------------*/
+
+struct ib_mr *ehca_reg_phys_mr(struct ib_pd *pd,
+			       struct ib_phys_buf *phys_buf_array,
+			       int num_phys_buf,
+			       int mr_access_flags,
+			       u64 *iova_start)
+{
+	struct ib_mr *ib_mr;
+	int ret;
+	struct ehca_mr *e_mr;
+	struct ehca_shca *shca =
+		container_of(pd->device, struct ehca_shca, ib_device);
+	struct ehca_pd *e_pd = container_of(pd, struct ehca_pd, ib_pd);
+
+	u64 size;
+	struct ehca_mr_pginfo pginfo={0,0,0,0,0,0,0,NULL,0,NULL,NULL,0,NULL,0};
+	u32 num_pages_mr;
+	u32 num_pages_4k; /* 4k portion "pages" */
+
+	if ((num_phys_buf <= 0) || !phys_buf_array) {
+		ehca_err(pd->device, "bad input values: num_phys_buf=%x "
+			 "phys_buf_array=%p", num_phys_buf, phys_buf_array);
+		ib_mr = ERR_PTR(-EINVAL);
+		goto reg_phys_mr_exit0;
+	}
+	if (((mr_access_flags & IB_ACCESS_REMOTE_WRITE) &&
+	     !(mr_access_flags & IB_ACCESS_LOCAL_WRITE)) ||
+	    ((mr_access_flags & IB_ACCESS_REMOTE_ATOMIC) &&
+	     !(mr_access_flags & IB_ACCESS_LOCAL_WRITE))) {
+		/*
+		 * Remote Write Access requires Local Write Access
+		 * Remote Atomic Access requires Local Write Access
+		 */
+		ehca_err(pd->device, "bad input values: mr_access_flags=%x",
+			 mr_access_flags);
+		ib_mr = ERR_PTR(-EINVAL);
+		goto reg_phys_mr_exit0;
+	}
+
+	/* check physical buffer list and calculate size */
+	ret = ehca_mr_chk_buf_and_calc_size(phys_buf_array, num_phys_buf,
+					    iova_start, &size);
+	if (ret) {
+		ib_mr = ERR_PTR(ret);
+		goto reg_phys_mr_exit0;
+	}
+	if ((size == 0) ||
+	    (((u64)iova_start + size) < (u64)iova_start)) {
+		ehca_err(pd->device, "bad input values: size=%lx iova_start=%p",
+			 size, iova_start);
+		ib_mr = ERR_PTR(-EINVAL);
+		goto reg_phys_mr_exit0;
+	}
+
+	e_mr = ehca_mr_new();
+	if (!e_mr) {
+		ehca_err(pd->device, "out of memory");
+		ib_mr = ERR_PTR(-ENOMEM);
+		goto reg_phys_mr_exit0;
+	}
+
+	/* determine number of MR pages */
+	num_pages_mr = ((((u64)iova_start % PAGE_SIZE) + size +
+			 PAGE_SIZE - 1) / PAGE_SIZE);
+	num_pages_4k = ((((u64)iova_start % EHCA_PAGESIZE) + size +
+			 EHCA_PAGESIZE - 1) / EHCA_PAGESIZE);
+
+	/* register MR on HCA */
+	if (ehca_mr_is_maxmr(size, iova_start)) {
+		e_mr->flags |= EHCA_MR_FLAG_MAXMR;
+		ret = ehca_reg_maxmr(shca, e_mr, iova_start, mr_access_flags,
+				     e_pd, &e_mr->ib.ib_mr.lkey,
+				     &e_mr->ib.ib_mr.rkey);
+		if (ret) {
+			ib_mr = ERR_PTR(ret);
+			goto reg_phys_mr_exit1;
+		}
+	} else {
+		pginfo.type           = EHCA_MR_PGI_PHYS;
+		pginfo.num_pages      = num_pages_mr;
+		pginfo.num_4k         = num_pages_4k;
+		pginfo.num_phys_buf   = num_phys_buf;
+		pginfo.phys_buf_array = phys_buf_array;
+		pginfo.next_4k        = (((u64)iova_start & ~PAGE_MASK) /
+					 EHCA_PAGESIZE);
+
+		ret = ehca_reg_mr(shca, e_mr, iova_start, size, mr_access_flags,
+				  e_pd, &pginfo, &e_mr->ib.ib_mr.lkey,
+				  &e_mr->ib.ib_mr.rkey);
+		if (ret) {
+			ib_mr = ERR_PTR(ret);
+			goto reg_phys_mr_exit1;
+		}
+	}
+
+	/* successful registration of all pages */
+	return &e_mr->ib.ib_mr;
+
+reg_phys_mr_exit1:
+	ehca_mr_delete(e_mr);
+reg_phys_mr_exit0:
+	if (IS_ERR(ib_mr))
+		ehca_err(pd->device, "rc=%lx pd=%p phys_buf_array=%p "
+			 "num_phys_buf=%x mr_access_flags=%x iova_start=%p",
+			 PTR_ERR(ib_mr), pd, phys_buf_array,
+			 num_phys_buf, mr_access_flags, iova_start);
+	return ib_mr;
+} /* end ehca_reg_phys_mr() */
+
+/*----------------------------------------------------------------------*/
+
+struct ib_mr *ehca_reg_user_mr(struct ib_pd *pd,
+			       struct ib_umem *region,
+			       int mr_access_flags,
+			       struct ib_udata *udata)
+{
+	struct ib_mr *ib_mr;
+	struct ehca_mr *e_mr;
+	struct ehca_shca *shca =
+		container_of(pd->device, struct ehca_shca, ib_device);
+	struct ehca_pd *e_pd = container_of(pd, struct ehca_pd, ib_pd);
+	struct ehca_mr_pginfo pginfo={0,0,0,0,0,0,0,NULL,0,NULL,NULL,0,NULL,0};
+	int ret;
+	u32 num_pages_mr;
+	u32 num_pages_4k; /* 4k portion "pages" */
+
+	if (!pd) {
+		ehca_gen_err("bad pd=%p", pd);
+		return ERR_PTR(-EFAULT);
+	}
+	if (!region) {
+		ehca_err(pd->device, "bad input values: region=%p", region);
+		ib_mr = ERR_PTR(-EINVAL);
+		goto reg_user_mr_exit0;
+	}
+	if (((mr_access_flags & IB_ACCESS_REMOTE_WRITE) &&
+	     !(mr_access_flags & IB_ACCESS_LOCAL_WRITE)) ||
+	    ((mr_access_flags & IB_ACCESS_REMOTE_ATOMIC) &&
+	     !(mr_access_flags & IB_ACCESS_LOCAL_WRITE))) {
+		/*
+		 * Remote Write Access requires Local Write Access
+		 * Remote Atomic Access requires Local Write Access
+		 */
+		ehca_err(pd->device, "bad input values: mr_access_flags=%x",
+			 mr_access_flags);
+		ib_mr = ERR_PTR(-EINVAL);
+		goto reg_user_mr_exit0;
+	}
+	if (region->page_size != PAGE_SIZE) {
+		ehca_err(pd->device, "page size not supported, "
+			 "region->page_size=%x", region->page_size);
+		ib_mr = ERR_PTR(-EINVAL);
+		goto reg_user_mr_exit0;
+	}
+
+	if ((region->length == 0) ||
+	    ((region->virt_base + region->length) < region->virt_base)) {
+		ehca_err(pd->device, "bad input values: length=%lx "
+			 "virt_base=%lx", region->length, region->virt_base);
+		ib_mr = ERR_PTR(-EINVAL);
+		goto reg_user_mr_exit0;
+	}
+
+	e_mr = ehca_mr_new();
+	if (!e_mr) {
+		ehca_err(pd->device, "out of memory");
+		ib_mr = ERR_PTR(-ENOMEM);
+		goto reg_user_mr_exit0;
+	}
+
+	/* determine number of MR pages */
+	num_pages_mr = (((region->virt_base % PAGE_SIZE) + region->length +
+			 PAGE_SIZE - 1) / PAGE_SIZE);
+	num_pages_4k = (((region->virt_base % EHCA_PAGESIZE) + region->length +
+			 EHCA_PAGESIZE - 1) / EHCA_PAGESIZE);
+
+	/* register MR on HCA */
+	pginfo.type       = EHCA_MR_PGI_USER;
+	pginfo.num_pages  = num_pages_mr;
+	pginfo.num_4k     = num_pages_4k;
+	pginfo.region     = region;
+	pginfo.next_4k	  = region->offset / EHCA_PAGESIZE;
+	pginfo.next_chunk = list_prepare_entry(pginfo.next_chunk,
+					       (&region->chunk_list),
+					       list);
+
+	ret = ehca_reg_mr(shca, e_mr, (u64*)region->virt_base,
+			  region->length, mr_access_flags, e_pd, &pginfo,
+			  &e_mr->ib.ib_mr.lkey, &e_mr->ib.ib_mr.rkey);
+	if (ret) {
+		ib_mr = ERR_PTR(ret);
+		goto reg_user_mr_exit1;
+	}
+
+	/* successful registration of all pages */
+	return &e_mr->ib.ib_mr;
+
+reg_user_mr_exit1:
+	ehca_mr_delete(e_mr);
+reg_user_mr_exit0:
+	if (IS_ERR(ib_mr))
+		ehca_err(pd->device, "rc=%lx pd=%p region=%p mr_access_flags=%x"
+			 " udata=%p",
+			 PTR_ERR(ib_mr), pd, region, mr_access_flags, udata);
+	return ib_mr;
+} /* end ehca_reg_user_mr() */
+
+/*----------------------------------------------------------------------*/
+
+int ehca_rereg_phys_mr(struct ib_mr *mr,
+		       int mr_rereg_mask,
+		       struct ib_pd *pd,
+		       struct ib_phys_buf *phys_buf_array,
+		       int num_phys_buf,
+		       int mr_access_flags,
+		       u64 *iova_start)
+{
+	int ret;
+
+	struct ehca_shca *shca =
+		container_of(mr->device, struct ehca_shca, ib_device);
+	struct ehca_mr *e_mr = container_of(mr, struct ehca_mr, ib.ib_mr);
+	struct ehca_pd *my_pd = container_of(mr->pd, struct ehca_pd, ib_pd);
+	u64 new_size;
+	u64 *new_start;
+	u32 new_acl;
+	struct ehca_pd *new_pd;
+	u32 tmp_lkey, tmp_rkey;
+	unsigned long sl_flags;
+	u32 num_pages_mr = 0;
+	u32 num_pages_4k = 0; /* 4k portion "pages" */
+	struct ehca_mr_pginfo pginfo={0,0,0,0,0,0,0,NULL,0,NULL,NULL,0,NULL,0};
+	u32 cur_pid = current->tgid;
+
+	if (my_pd->ib_pd.uobject && my_pd->ib_pd.uobject->context &&
+	    (my_pd->ownpid != cur_pid)) {
+		ehca_err(mr->device, "Invalid caller pid=%x ownpid=%x",
+			 cur_pid, my_pd->ownpid);
+		ret = -EINVAL;
+		goto rereg_phys_mr_exit0;
+	}
+
+	if (!(mr_rereg_mask & IB_MR_REREG_TRANS)) {
+		/* TODO not supported, because PHYP rereg hCall needs pages */
+		ehca_err(mr->device, "rereg without IB_MR_REREG_TRANS not "
+			 "supported yet, mr_rereg_mask=%x", mr_rereg_mask);
+		ret = -EINVAL;
+		goto rereg_phys_mr_exit0;
+	}
+
+	if (mr_rereg_mask & IB_MR_REREG_PD) {
+		if (!pd) {
+			ehca_err(mr->device, "rereg with bad pd, pd=%p "
+				 "mr_rereg_mask=%x", pd, mr_rereg_mask);
+			ret = -EINVAL;
+			goto rereg_phys_mr_exit0;
+		}
+	}
+
+	if ((mr_rereg_mask &
+	     ~(IB_MR_REREG_TRANS | IB_MR_REREG_PD | IB_MR_REREG_ACCESS)) ||
+	    (mr_rereg_mask == 0)) {
+		ret = -EINVAL;
+		goto rereg_phys_mr_exit0;
+	}
+
+	/* check other parameters */
+	if (e_mr == shca->maxmr) {
+		/* should be impossible, however reject to be sure */
+		ehca_err(mr->device, "rereg internal max-MR impossible, mr=%p "
+			 "shca->maxmr=%p mr->lkey=%x",
+			 mr, shca->maxmr, mr->lkey);
+		ret = -EINVAL;
+		goto rereg_phys_mr_exit0;
+	}
+	if (mr_rereg_mask & IB_MR_REREG_TRANS) { /* transl., i.e. addr/size */
+		if (e_mr->flags & EHCA_MR_FLAG_FMR) {
+			ehca_err(mr->device, "not supported for FMR, mr=%p "
+				 "flags=%x", mr, e_mr->flags);
+			ret = -EINVAL;
+			goto rereg_phys_mr_exit0;
+		}
+		if (!phys_buf_array || num_phys_buf <= 0) {
+			ehca_err(mr->device, "bad input values: mr_rereg_mask=%x"
+				 " phys_buf_array=%p num_phys_buf=%x",
+				 mr_rereg_mask, phys_buf_array, num_phys_buf);
+			ret = -EINVAL;
+			goto rereg_phys_mr_exit0;
+		}
+	}
+	if ((mr_rereg_mask & IB_MR_REREG_ACCESS) &&	/* change ACL */
+	    (((mr_access_flags & IB_ACCESS_REMOTE_WRITE) &&
+	      !(mr_access_flags & IB_ACCESS_LOCAL_WRITE)) ||
+	     ((mr_access_flags & IB_ACCESS_REMOTE_ATOMIC) &&
+	      !(mr_access_flags & IB_ACCESS_LOCAL_WRITE)))) {
+		/*
+		 * Remote Write Access requires Local Write Access
+		 * Remote Atomic Access requires Local Write Access
+		 */
+		ehca_err(mr->device, "bad input values: mr_rereg_mask=%x "
+			 "mr_access_flags=%x", mr_rereg_mask, mr_access_flags);
+		ret = -EINVAL;
+		goto rereg_phys_mr_exit0;
+	}
+
+	/* set requested values dependent on rereg request */
+	spin_lock_irqsave(&e_mr->mrlock, sl_flags);
+	new_start = e_mr->start;  /* new == old address */
+	new_size  = e_mr->size;	  /* new == old length */
+	new_acl   = e_mr->acl;	  /* new == old access control */
+	new_pd    = container_of(mr->pd,struct ehca_pd,ib_pd); /*new == old PD*/
+
+	if (mr_rereg_mask & IB_MR_REREG_TRANS) {
+		new_start = iova_start;	/* change address */
+		/* check physical buffer list and calculate size */
+		ret = ehca_mr_chk_buf_and_calc_size(phys_buf_array,
+						    num_phys_buf, iova_start,
+						    &new_size);
+		if (ret)
+			goto rereg_phys_mr_exit1;
+		if ((new_size == 0) ||
+		    (((u64)iova_start + new_size) < (u64)iova_start)) {
+			ehca_err(mr->device, "bad input values: new_size=%lx "
+				 "iova_start=%p", new_size, iova_start);
+			ret = -EINVAL;
+			goto rereg_phys_mr_exit1;
+		}
+		num_pages_mr = ((((u64)new_start % PAGE_SIZE) + new_size +
+				 PAGE_SIZE - 1) / PAGE_SIZE);
+		num_pages_4k = ((((u64)new_start % EHCA_PAGESIZE) + new_size +
+				 EHCA_PAGESIZE - 1) / EHCA_PAGESIZE);
+		pginfo.type           = EHCA_MR_PGI_PHYS;
+		pginfo.num_pages      = num_pages_mr;
+		pginfo.num_4k         = num_pages_4k;
+		pginfo.num_phys_buf   = num_phys_buf;
+		pginfo.phys_buf_array = phys_buf_array;
+		pginfo.next_4k        = (((u64)iova_start & ~PAGE_MASK) /
+					 EHCA_PAGESIZE);
+	}
+	if (mr_rereg_mask & IB_MR_REREG_ACCESS)
+		new_acl = mr_access_flags;
+	if (mr_rereg_mask & IB_MR_REREG_PD)
+		new_pd = container_of(pd, struct ehca_pd, ib_pd);
+
+	ret = ehca_rereg_mr(shca, e_mr, new_start, new_size, new_acl,
+			    new_pd, &pginfo, &tmp_lkey, &tmp_rkey);
+	if (ret)
+		goto rereg_phys_mr_exit1;
+
+	/* successful reregistration */
+	if (mr_rereg_mask & IB_MR_REREG_PD)
+		mr->pd = pd;
+	mr->lkey = tmp_lkey;
+	mr->rkey = tmp_rkey;
+
+rereg_phys_mr_exit1:
+	spin_unlock_irqrestore(&e_mr->mrlock, sl_flags);
+rereg_phys_mr_exit0:
+	if (ret)
+		ehca_err(mr->device, "ret=%x mr=%p mr_rereg_mask=%x pd=%p "
+			 "phys_buf_array=%p num_phys_buf=%x mr_access_flags=%x "
+			 "iova_start=%p",
+			 ret, mr, mr_rereg_mask, pd, phys_buf_array,
+			 num_phys_buf, mr_access_flags, iova_start);
+	return ret;
+} /* end ehca_rereg_phys_mr() */
+
+/*----------------------------------------------------------------------*/
+
+int ehca_query_mr(struct ib_mr *mr, struct ib_mr_attr *mr_attr)
+{
+	int ret = 0;
+	u64 h_ret;
+	struct ehca_shca *shca =
+		container_of(mr->device, struct ehca_shca, ib_device);
+	struct ehca_mr *e_mr = container_of(mr, struct ehca_mr, ib.ib_mr);
+	struct ehca_pd *my_pd = container_of(mr->pd, struct ehca_pd, ib_pd);
+	u32 cur_pid = current->tgid;
+	unsigned long sl_flags;
+	struct ehca_mr_hipzout_parms hipzout = {{0},0,0,0,0,0};
+
+	if (my_pd->ib_pd.uobject && my_pd->ib_pd.uobject->context &&
+	    (my_pd->ownpid != cur_pid)) {
+		ehca_err(mr->device, "Invalid caller pid=%x ownpid=%x",
+			 cur_pid, my_pd->ownpid);
+		ret = -EINVAL;
+		goto query_mr_exit0;
+	}
+
+	if ((e_mr->flags & EHCA_MR_FLAG_FMR)) {
+		ehca_err(mr->device, "not supported for FMR, mr=%p e_mr=%p "
+			 "e_mr->flags=%x", mr, e_mr, e_mr->flags);
+		ret = -EINVAL;
+		goto query_mr_exit0;
+	}
+
+	memset(mr_attr, 0, sizeof(struct ib_mr_attr));
+	spin_lock_irqsave(&e_mr->mrlock, sl_flags);
+
+	h_ret = hipz_h_query_mr(shca->ipz_hca_handle, e_mr, &hipzout);
+	if (h_ret != H_SUCCESS) {
+		ehca_err(mr->device, "hipz_mr_query failed, h_ret=%lx mr=%p "
+			 "hca_hndl=%lx mr_hndl=%lx lkey=%x",
+			 h_ret, mr, shca->ipz_hca_handle.handle,
+			 e_mr->ipz_mr_handle.handle, mr->lkey);
+		ret = ehca_mrmw_map_hrc_query_mr(h_ret);
+		goto query_mr_exit1;
+	}
+	mr_attr->pd               = mr->pd;
+	mr_attr->device_virt_addr = hipzout.vaddr;
+	mr_attr->size             = hipzout.len;
+	mr_attr->lkey             = hipzout.lkey;
+	mr_attr->rkey             = hipzout.rkey;
+	ehca_mrmw_reverse_map_acl(&hipzout.acl, &mr_attr->mr_access_flags);
+
+query_mr_exit1:
+	spin_unlock_irqrestore(&e_mr->mrlock, sl_flags);
+query_mr_exit0:
+	if (ret)
+		ehca_err(mr->device, "ret=%x mr=%p mr_attr=%p",
+			 ret, mr, mr_attr);
+	return ret;
+} /* end ehca_query_mr() */
+
+/*----------------------------------------------------------------------*/
+
+int ehca_dereg_mr(struct ib_mr *mr)
+{
+	int ret = 0;
+	u64 h_ret;
+	struct ehca_shca *shca =
+		container_of(mr->device, struct ehca_shca, ib_device);
+	struct ehca_mr *e_mr = container_of(mr, struct ehca_mr, ib.ib_mr);
+	struct ehca_pd *my_pd = container_of(mr->pd, struct ehca_pd, ib_pd);
+	u32 cur_pid = current->tgid;
+
+	if (my_pd->ib_pd.uobject && my_pd->ib_pd.uobject->context &&
+	    (my_pd->ownpid != cur_pid)) {
+		ehca_err(mr->device, "Invalid caller pid=%x ownpid=%x",
+			 cur_pid, my_pd->ownpid);
+		ret = -EINVAL;
+		goto dereg_mr_exit0;
+	}
+
+	if ((e_mr->flags & EHCA_MR_FLAG_FMR)) {
+		ehca_err(mr->device, "not supported for FMR, mr=%p e_mr=%p "
+			 "e_mr->flags=%x", mr, e_mr, e_mr->flags);
+		ret = -EINVAL;
+		goto dereg_mr_exit0;
+	} else if (e_mr == shca->maxmr) {
+		/* should be impossible, however reject to be sure */
+		ehca_err(mr->device, "dereg internal max-MR impossible, mr=%p "
+			 "shca->maxmr=%p mr->lkey=%x",
+			 mr, shca->maxmr, mr->lkey);
+		ret = -EINVAL;
+		goto dereg_mr_exit0;
+	}
+
+	/* TODO: BUSY: MR still has bound window(s) */
+	h_ret = hipz_h_free_resource_mr(shca->ipz_hca_handle, e_mr);
+	if (h_ret != H_SUCCESS) {
+		ehca_err(mr->device, "hipz_free_mr failed, h_ret=%lx shca=%p "
+			 "e_mr=%p hca_hndl=%lx mr_hndl=%lx mr->lkey=%x",
+			 h_ret, shca, e_mr, shca->ipz_hca_handle.handle,
+			 e_mr->ipz_mr_handle.handle, mr->lkey);
+		ret = ehca_mrmw_map_hrc_free_mr(h_ret);
+		goto dereg_mr_exit0;
+	}
+
+	/* successful deregistration */
+	ehca_mr_delete(e_mr);
+
+dereg_mr_exit0:
+	if (ret)
+		ehca_err(mr->device, "ret=%x mr=%p", ret, mr);
+	return ret;
+} /* end ehca_dereg_mr() */
+
+/*----------------------------------------------------------------------*/
+
+struct ib_mw *ehca_alloc_mw(struct ib_pd *pd)
+{
+	struct ib_mw *ib_mw;
+	u64 h_ret;
+	struct ehca_mw *e_mw;
+	struct ehca_pd *e_pd = container_of(pd, struct ehca_pd, ib_pd);
+	struct ehca_shca *shca =
+		container_of(pd->device, struct ehca_shca, ib_device);
+	struct ehca_mw_hipzout_parms hipzout = {{0},0};
+
+	e_mw = ehca_mw_new();
+	if (!e_mw) {
+		ib_mw = ERR_PTR(-ENOMEM);
+		goto alloc_mw_exit0;
+	}
+
+	h_ret = hipz_h_alloc_resource_mw(shca->ipz_hca_handle, e_mw,
+					 e_pd->fw_pd, &hipzout);
+	if (h_ret != H_SUCCESS) {
+		ehca_err(pd->device, "hipz_mw_allocate failed, h_ret=%lx "
+			 "shca=%p hca_hndl=%lx mw=%p",
+			 h_ret, shca, shca->ipz_hca_handle.handle, e_mw);
+		ib_mw = ERR_PTR(ehca_mrmw_map_hrc_alloc(h_ret));
+		goto alloc_mw_exit1;
+	}
+	/* successful MW allocation */
+	e_mw->ipz_mw_handle = hipzout.handle;
+	e_mw->ib_mw.rkey    = hipzout.rkey;
+	return &e_mw->ib_mw;
+
+alloc_mw_exit1:
+	ehca_mw_delete(e_mw);
+alloc_mw_exit0:
+	if (IS_ERR(ib_mw))
+		ehca_err(pd->device, "rc=%lx pd=%p", PTR_ERR(ib_mw), pd);
+	return ib_mw;
+} /* end ehca_alloc_mw() */
+
+/*----------------------------------------------------------------------*/
+
+int ehca_bind_mw(struct ib_qp *qp,
+		 struct ib_mw *mw,
+		 struct ib_mw_bind *mw_bind)
+{
+	/* TODO: not supported up to now */
+	ehca_gen_err("bind MW currently not supported by HCAD");
+
+	return -EPERM;
+} /* end ehca_bind_mw() */
+
+/*----------------------------------------------------------------------*/
+
+int ehca_dealloc_mw(struct ib_mw *mw)
+{
+	u64 h_ret;
+	struct ehca_shca *shca =
+		container_of(mw->device, struct ehca_shca, ib_device);
+	struct ehca_mw *e_mw = container_of(mw, struct ehca_mw, ib_mw);
+
+	h_ret = hipz_h_free_resource_mw(shca->ipz_hca_handle, e_mw);
+	if (h_ret != H_SUCCESS) {
+		ehca_err(mw->device, "hipz_free_mw failed, h_ret=%lx shca=%p "
+			 "mw=%p rkey=%x hca_hndl=%lx mw_hndl=%lx",
+			 h_ret, shca, mw, mw->rkey, shca->ipz_hca_handle.handle,
+			 e_mw->ipz_mw_handle.handle);
+		return ehca_mrmw_map_hrc_free_mw(h_ret);
+	}
+	/* successful deallocation */
+	ehca_mw_delete(e_mw);
+	return 0;
+} /* end ehca_dealloc_mw() */
+
+/*----------------------------------------------------------------------*/
+
+struct ib_fmr *ehca_alloc_fmr(struct ib_pd *pd,
+			      int mr_access_flags,
+			      struct ib_fmr_attr *fmr_attr)
+{
+	struct ib_fmr *ib_fmr;
+	struct ehca_shca *shca =
+		container_of(pd->device, struct ehca_shca, ib_device);
+	struct ehca_pd *e_pd = container_of(pd, struct ehca_pd, ib_pd);
+	struct ehca_mr *e_fmr;
+	int ret;
+	u32 tmp_lkey, tmp_rkey;
+	struct ehca_mr_pginfo pginfo={0,0,0,0,0,0,0,NULL,0,NULL,NULL,0,NULL,0};
+
+	/* check other parameters */
+	if (((mr_access_flags & IB_ACCESS_REMOTE_WRITE) &&
+	     !(mr_access_flags & IB_ACCESS_LOCAL_WRITE)) ||
+	    ((mr_access_flags & IB_ACCESS_REMOTE_ATOMIC) &&
+	     !(mr_access_flags & IB_ACCESS_LOCAL_WRITE))) {
+		/*
+		 * Remote Write Access requires Local Write Access
+		 * Remote Atomic Access requires Local Write Access
+		 */
+		ehca_err(pd->device, "bad input values: mr_access_flags=%x",
+			 mr_access_flags);
+		ib_fmr = ERR_PTR(-EINVAL);
+		goto alloc_fmr_exit0;
+	}
+	if (mr_access_flags & IB_ACCESS_MW_BIND) {
+		ehca_err(pd->device, "bad input values: mr_access_flags=%x",
+			 mr_access_flags);
+		ib_fmr = ERR_PTR(-EINVAL);
+		goto alloc_fmr_exit0;
+	}
+	if ((fmr_attr->max_pages == 0) || (fmr_attr->max_maps == 0)) {
+		ehca_err(pd->device, "bad input values: fmr_attr->max_pages=%x "
+			 "fmr_attr->max_maps=%x fmr_attr->page_shift=%x",
+			 fmr_attr->max_pages, fmr_attr->max_maps,
+			 fmr_attr->page_shift);
+		ib_fmr = ERR_PTR(-EINVAL);
+		goto alloc_fmr_exit0;
+	}
+	if (((1 << fmr_attr->page_shift) != EHCA_PAGESIZE) &&
+	    ((1 << fmr_attr->page_shift) != PAGE_SIZE)) {
+		ehca_err(pd->device, "unsupported fmr_attr->page_shift=%x",
+			 fmr_attr->page_shift);
+		ib_fmr = ERR_PTR(-EINVAL);
+		goto alloc_fmr_exit0;
+	}
+
+	e_fmr = ehca_mr_new();
+	if (!e_fmr) {
+		ib_fmr = ERR_PTR(-ENOMEM);
+		goto alloc_fmr_exit0;
+	}
+	e_fmr->flags |= EHCA_MR_FLAG_FMR;
+
+	/* register MR on HCA */
+	ret = ehca_reg_mr(shca, e_fmr, NULL,
+			  fmr_attr->max_pages * (1 << fmr_attr->page_shift),
+			  mr_access_flags, e_pd, &pginfo,
+			  &tmp_lkey, &tmp_rkey);
+	if (ret) {
+		ib_fmr = ERR_PTR(ret);
+		goto alloc_fmr_exit1;
+	}
+
+	/* successful */
+	e_fmr->fmr_page_size = 1 << fmr_attr->page_shift;
+	e_fmr->fmr_max_pages = fmr_attr->max_pages;
+	e_fmr->fmr_max_maps = fmr_attr->max_maps;
+	e_fmr->fmr_map_cnt = 0;
+	return &e_fmr->ib.ib_fmr;
+
+alloc_fmr_exit1:
+	ehca_mr_delete(e_fmr);
+alloc_fmr_exit0:
+	if (IS_ERR(ib_fmr))
+		ehca_err(pd->device, "rc=%lx pd=%p mr_access_flags=%x "
+			 "fmr_attr=%p", PTR_ERR(ib_fmr), pd,
+			 mr_access_flags, fmr_attr);
+	return ib_fmr;
+} /* end ehca_alloc_fmr() */
+
+/*----------------------------------------------------------------------*/
+
+int ehca_map_phys_fmr(struct ib_fmr *fmr,
+		      u64 *page_list,
+		      int list_len,
+		      u64 iova)
+{
+	int ret;
+	struct ehca_shca *shca =
+		container_of(fmr->device, struct ehca_shca, ib_device);
+	struct ehca_mr *e_fmr = container_of(fmr, struct ehca_mr, ib.ib_fmr);
+	struct ehca_pd *e_pd = container_of(fmr->pd, struct ehca_pd, ib_pd);
+	struct ehca_mr_pginfo pginfo={0,0,0,0,0,0,0,NULL,0,NULL,NULL,0,NULL,0};
+	u32 tmp_lkey, tmp_rkey;
+
+	if (!(e_fmr->flags & EHCA_MR_FLAG_FMR)) {
+		ehca_err(fmr->device, "not a FMR, e_fmr=%p e_fmr->flags=%x",
+			 e_fmr, e_fmr->flags);
+		ret = -EINVAL;
+		goto map_phys_fmr_exit0;
+	}
+	ret = ehca_fmr_check_page_list(e_fmr, page_list, list_len);
+	if (ret)
+		goto map_phys_fmr_exit0;
+	if (iova % e_fmr->fmr_page_size) {
+		/* only whole-numbered pages */
+		ehca_err(fmr->device, "bad iova, iova=%lx fmr_page_size=%x",
+			 iova, e_fmr->fmr_page_size);
+		ret = -EINVAL;
+		goto map_phys_fmr_exit0;
+	}
+	if (e_fmr->fmr_map_cnt >= e_fmr->fmr_max_maps) {
+		/* HCAD does not limit the maps, however trace this anyway */
+		ehca_info(fmr->device, "map limit exceeded, fmr=%p "
+			  "e_fmr->fmr_map_cnt=%x e_fmr->fmr_max_maps=%x",
+			  fmr, e_fmr->fmr_map_cnt, e_fmr->fmr_max_maps);
+	}
+
+	pginfo.type      = EHCA_MR_PGI_FMR;
+	pginfo.num_pages = list_len;
+	pginfo.num_4k    = list_len * (e_fmr->fmr_page_size / EHCA_PAGESIZE);
+	pginfo.page_list = page_list;
+	pginfo.next_4k   = ((iova & (e_fmr->fmr_page_size-1)) /
+			    EHCA_PAGESIZE);
+
+	ret = ehca_rereg_mr(shca, e_fmr, (u64*)iova,
+			    list_len * e_fmr->fmr_page_size,
+			    e_fmr->acl, e_pd, &pginfo, &tmp_lkey, &tmp_rkey);
+	if (ret)
+		goto map_phys_fmr_exit0;
+
+	/* successful reregistration */
+	e_fmr->fmr_map_cnt++;
+	e_fmr->ib.ib_fmr.lkey = tmp_lkey;
+	e_fmr->ib.ib_fmr.rkey = tmp_rkey;
+	return 0;
+
+map_phys_fmr_exit0:
+	if (ret)
+		ehca_err(fmr->device, "ret=%x fmr=%p page_list=%p list_len=%x "
+			 "iova=%lx",
+			 ret, fmr, page_list, list_len, iova);
+	return ret;
+} /* end ehca_map_phys_fmr() */
+
+/*----------------------------------------------------------------------*/
+
+int ehca_unmap_fmr(struct list_head *fmr_list)
+{
+	int ret = 0;
+	struct ib_fmr *ib_fmr;
+	struct ehca_shca *shca = NULL;
+	struct ehca_shca *prev_shca;
+	struct ehca_mr *e_fmr;
+	u32 num_fmr = 0;
+	u32 unmap_fmr_cnt = 0;
+
+	/* check all FMR belong to same SHCA, and check internal flag */
+	list_for_each_entry(ib_fmr, fmr_list, list) {
+		prev_shca = shca;
+		if (!ib_fmr) {
+			ehca_gen_err("bad fmr=%p in list", ib_fmr);
+			ret = -EINVAL;
+			goto unmap_fmr_exit0;
+		}
+		shca = container_of(ib_fmr->device, struct ehca_shca,
+				    ib_device);
+		e_fmr = container_of(ib_fmr, struct ehca_mr, ib.ib_fmr);
+		if ((shca != prev_shca) && prev_shca) {
+			ehca_err(&shca->ib_device, "SHCA mismatch, shca=%p "
+				 "prev_shca=%p e_fmr=%p",
+				 shca, prev_shca, e_fmr);
+			ret = -EINVAL;
+			goto unmap_fmr_exit0;
+		}
+		if (!(e_fmr->flags & EHCA_MR_FLAG_FMR)) {
+			ehca_err(&shca->ib_device, "not a FMR, e_fmr=%p "
+				 "e_fmr->flags=%x", e_fmr, e_fmr->flags);
+			ret = -EINVAL;
+			goto unmap_fmr_exit0;
+		}
+		num_fmr++;
+	}
+
+	/* loop over all FMRs to unmap */
+	list_for_each_entry(ib_fmr, fmr_list, list) {
+		unmap_fmr_cnt++;
+		e_fmr = container_of(ib_fmr, struct ehca_mr, ib.ib_fmr);
+		shca = container_of(ib_fmr->device, struct ehca_shca,
+				    ib_device);
+		ret = ehca_unmap_one_fmr(shca, e_fmr);
+		if (ret) {
+			/* unmap failed, stop unmapping of rest of FMRs */
+			ehca_err(&shca->ib_device, "unmap of one FMR failed, "
+				 "stop rest, e_fmr=%p num_fmr=%x "
+				 "unmap_fmr_cnt=%x lkey=%x", e_fmr, num_fmr,
+				 unmap_fmr_cnt, e_fmr->ib.ib_fmr.lkey);
+			goto unmap_fmr_exit0;
+		}
+	}
+
+unmap_fmr_exit0:
+	if (ret)
+		ehca_gen_err("ret=%x fmr_list=%p num_fmr=%x unmap_fmr_cnt=%x",
+			     ret, fmr_list, num_fmr, unmap_fmr_cnt);
+	return ret;
+} /* end ehca_unmap_fmr() */
+
+/*----------------------------------------------------------------------*/
+
+int ehca_dealloc_fmr(struct ib_fmr *fmr)
+{
+	int ret;
+	u64 h_ret;
+	struct ehca_shca *shca =
+		container_of(fmr->device, struct ehca_shca, ib_device);
+	struct ehca_mr *e_fmr = container_of(fmr, struct ehca_mr, ib.ib_fmr);
+
+	if (!(e_fmr->flags & EHCA_MR_FLAG_FMR)) {
+		ehca_err(fmr->device, "not a FMR, e_fmr=%p e_fmr->flags=%x",
+			 e_fmr, e_fmr->flags);
+		ret = -EINVAL;
+		goto free_fmr_exit0;
+	}
+
+	h_ret = hipz_h_free_resource_mr(shca->ipz_hca_handle, e_fmr);
+	if (h_ret != H_SUCCESS) {
+		ehca_err(fmr->device, "hipz_free_mr failed, h_ret=%lx e_fmr=%p "
+			 "hca_hndl=%lx fmr_hndl=%lx fmr->lkey=%x",
+			 h_ret, e_fmr, shca->ipz_hca_handle.handle,
+			 e_fmr->ipz_mr_handle.handle, fmr->lkey);
+		ret = ehca_mrmw_map_hrc_free_mr(h_ret);
+		goto free_fmr_exit0;
+	}
+	/* successful deregistration */
+	ehca_mr_delete(e_fmr);
+	return 0;
+
+free_fmr_exit0:
+	if (ret)
+		ehca_err(&shca->ib_device, "ret=%x fmr=%p", ret, fmr);
+	return ret;
+} /* end ehca_dealloc_fmr() */
+
+/*----------------------------------------------------------------------*/
+
+int ehca_reg_mr(struct ehca_shca *shca,
+		struct ehca_mr *e_mr,
+		u64 *iova_start,
+		u64 size,
+		int acl,
+		struct ehca_pd *e_pd,
+		struct ehca_mr_pginfo *pginfo,
+		u32 *lkey, /*OUT*/
+		u32 *rkey) /*OUT*/
+{
+	int ret;
+	u64 h_ret;
+	u32 hipz_acl;
+	struct ehca_mr_hipzout_parms hipzout = {{0},0,0,0,0,0};
+
+	ehca_mrmw_map_acl(acl, &hipz_acl);
+	ehca_mrmw_set_pgsize_hipz_acl(&hipz_acl);
+	if (ehca_use_hp_mr == 1)
+	        hipz_acl |= 0x00000001;
+
+	h_ret = hipz_h_alloc_resource_mr(shca->ipz_hca_handle, e_mr,
+					 (u64)iova_start, size, hipz_acl,
+					 e_pd->fw_pd, &hipzout);
+	if (h_ret != H_SUCCESS) {
+		ehca_err(&shca->ib_device, "hipz_alloc_mr failed, h_ret=%lx "
+			 "hca_hndl=%lx", h_ret, shca->ipz_hca_handle.handle);
+		ret = ehca_mrmw_map_hrc_alloc(h_ret);
+		goto ehca_reg_mr_exit0;
+	}
+
+	e_mr->ipz_mr_handle = hipzout.handle;
+
+	ret = ehca_reg_mr_rpages(shca, e_mr, pginfo);
+	if (ret)
+		goto ehca_reg_mr_exit1;
+
+	/* successful registration */
+	e_mr->num_pages = pginfo->num_pages;
+	e_mr->num_4k    = pginfo->num_4k;
+	e_mr->start     = iova_start;
+	e_mr->size      = size;
+	e_mr->acl       = acl;
+	*lkey = hipzout.lkey;
+	*rkey = hipzout.rkey;
+	return 0;
+
+ehca_reg_mr_exit1:
+	h_ret = hipz_h_free_resource_mr(shca->ipz_hca_handle, e_mr);
+	if (h_ret != H_SUCCESS) {
+		ehca_err(&shca->ib_device, "h_ret=%lx shca=%p e_mr=%p "
+			 "iova_start=%p size=%lx acl=%x e_pd=%p lkey=%x "
+			 "pginfo=%p num_pages=%lx num_4k=%lx ret=%x",
+			 h_ret, shca, e_mr, iova_start, size, acl, e_pd,
+			 hipzout.lkey, pginfo, pginfo->num_pages,
+			 pginfo->num_4k, ret);
+		ehca_err(&shca->ib_device, "internal error in ehca_reg_mr, "
+			 "not recoverable");
+	}
+ehca_reg_mr_exit0:
+	if (ret)
+		ehca_err(&shca->ib_device, "ret=%x shca=%p e_mr=%p "
+			 "iova_start=%p size=%lx acl=%x e_pd=%p pginfo=%p "
+			 "num_pages=%lx num_4k=%lx",
+			 ret, shca, e_mr, iova_start, size, acl, e_pd, pginfo,
+			 pginfo->num_pages, pginfo->num_4k);
+	return ret;
+} /* end ehca_reg_mr() */
+
+/*----------------------------------------------------------------------*/
+
+int ehca_reg_mr_rpages(struct ehca_shca *shca,
+		       struct ehca_mr *e_mr,
+		       struct ehca_mr_pginfo *pginfo)
+{
+	int ret = 0;
+	u64 h_ret;
+	u32 rnum;
+	u64 rpage;
+	u32 i;
+	u64 *kpage;
+
+	kpage = kzalloc(H_CB_ALIGNMENT, GFP_KERNEL);
+	if (!kpage) {
+		ehca_err(&shca->ib_device, "kpage alloc failed");
+		ret = -ENOMEM;
+		goto ehca_reg_mr_rpages_exit0;
+	}
+
+	/* max 512 pages per shot */
+	for (i = 0; i < ((pginfo->num_4k + 512 - 1) / 512); i++) {
+
+		if (i == ((pginfo->num_4k + 512 - 1) / 512) - 1) {
+			rnum = pginfo->num_4k % 512; /* last shot */
+			if (rnum == 0)
+				rnum = 512;      /* last shot is full */
+		} else
+			rnum = 512;
+
+		if (rnum > 1) {
+			ret = ehca_set_pagebuf(e_mr, pginfo, rnum, kpage);
+			if (ret) {
+				ehca_err(&shca->ib_device, "ehca_set_pagebuf "
+					 "bad rc, ret=%x rnum=%x kpage=%p",
+					 ret, rnum, kpage);
+				ret = -EFAULT;
+				goto ehca_reg_mr_rpages_exit1;
+			}
+			rpage = virt_to_abs(kpage);
+			if (!rpage) {
+				ehca_err(&shca->ib_device, "kpage=%p i=%x",
+					 kpage, i);
+				ret = -EFAULT;
+				goto ehca_reg_mr_rpages_exit1;
+			}
+		} else {  /* rnum==1 */
+			ret = ehca_set_pagebuf_1(e_mr, pginfo, &rpage);
+			if (ret) {
+				ehca_err(&shca->ib_device, "ehca_set_pagebuf_1 "
+					 "bad rc, ret=%x i=%x", ret, i);
+				ret = -EFAULT;
+				goto ehca_reg_mr_rpages_exit1;
+			}
+		}
+
+		h_ret = hipz_h_register_rpage_mr(shca->ipz_hca_handle, e_mr,
+						 0, /* pagesize 4k */
+						 0, rpage, rnum);
+
+		if (i == ((pginfo->num_4k + 512 - 1) / 512) - 1) {
+			/*
+			 * check for 'registration complete'==H_SUCCESS
+			 * and for 'page registered'==H_PAGE_REGISTERED
+			 */
+			if (h_ret != H_SUCCESS) {
+				ehca_err(&shca->ib_device, "last "
+					 "hipz_reg_rpage_mr failed, h_ret=%lx "
+					 "e_mr=%p i=%x hca_hndl=%lx mr_hndl=%lx"
+					 " lkey=%x", h_ret, e_mr, i,
+					 shca->ipz_hca_handle.handle,
+					 e_mr->ipz_mr_handle.handle,
+					 e_mr->ib.ib_mr.lkey);
+				ret = ehca_mrmw_map_hrc_rrpg_last(h_ret);
+				break;
+			} else
+				ret = 0;
+		} else if (h_ret != H_PAGE_REGISTERED) {
+			ehca_err(&shca->ib_device, "hipz_reg_rpage_mr failed, "
+				 "h_ret=%lx e_mr=%p i=%x lkey=%x hca_hndl=%lx "
+				 "mr_hndl=%lx", h_ret, e_mr, i,
+				 e_mr->ib.ib_mr.lkey,
+				 shca->ipz_hca_handle.handle,
+				 e_mr->ipz_mr_handle.handle);
+			ret = ehca_mrmw_map_hrc_rrpg_notlast(h_ret);
+			break;
+		} else
+			ret = 0;
+	} /* end for(i) */
+
+
+ehca_reg_mr_rpages_exit1:
+	kfree(kpage);
+ehca_reg_mr_rpages_exit0:
+	if (ret)
+		ehca_err(&shca->ib_device, "ret=%x shca=%p e_mr=%p pginfo=%p "
+			 "num_pages=%lx num_4k=%lx", ret, shca, e_mr, pginfo,
+			 pginfo->num_pages, pginfo->num_4k);
+	return ret;
+} /* end ehca_reg_mr_rpages() */
+
+/*----------------------------------------------------------------------*/
+
+inline int ehca_rereg_mr_rereg1(struct ehca_shca *shca,
+				struct ehca_mr *e_mr,
+				u64 *iova_start,
+				u64 size,
+				u32 acl,
+				struct ehca_pd *e_pd,
+				struct ehca_mr_pginfo *pginfo,
+				u32 *lkey, /*OUT*/
+				u32 *rkey) /*OUT*/
+{
+	int ret;
+	u64 h_ret;
+	u32 hipz_acl;
+	u64 *kpage;
+	u64 rpage;
+	struct ehca_mr_pginfo pginfo_save;
+	struct ehca_mr_hipzout_parms hipzout = {{0},0,0,0,0,0};
+
+	ehca_mrmw_map_acl(acl, &hipz_acl);
+	ehca_mrmw_set_pgsize_hipz_acl(&hipz_acl);
+
+	kpage = kzalloc(H_CB_ALIGNMENT, GFP_KERNEL);
+	if (!kpage) {
+		ehca_err(&shca->ib_device, "kpage alloc failed");
+		ret = -ENOMEM;
+		goto ehca_rereg_mr_rereg1_exit0;
+	}
+
+	pginfo_save = *pginfo;
+	ret = ehca_set_pagebuf(e_mr, pginfo, pginfo->num_4k, kpage);
+	if (ret) {
+		ehca_err(&shca->ib_device, "set pagebuf failed, e_mr=%p "
+			 "pginfo=%p type=%x num_pages=%lx num_4k=%lx kpage=%p",
+			 e_mr, pginfo, pginfo->type, pginfo->num_pages,
+			 pginfo->num_4k,kpage);
+		goto ehca_rereg_mr_rereg1_exit1;
+	}
+	rpage = virt_to_abs(kpage);
+	if (!rpage) {
+		ehca_err(&shca->ib_device, "kpage=%p", kpage);
+		ret = -EFAULT;
+		goto ehca_rereg_mr_rereg1_exit1;
+	}
+	h_ret = hipz_h_reregister_pmr(shca->ipz_hca_handle, e_mr,
+				      (u64)iova_start, size, hipz_acl,
+				      e_pd->fw_pd, rpage, &hipzout);
+	if (h_ret != H_SUCCESS) {
+		/*
+		 * reregistration unsuccessful, try it again with the 3 hCalls,
+		 * e.g. this is required in case H_MR_CONDITION
+		 * (MW bound or MR is shared)
+		 */
+		ehca_warn(&shca->ib_device, "hipz_h_reregister_pmr failed "
+			  "(Rereg1), h_ret=%lx e_mr=%p", h_ret, e_mr);
+		*pginfo = pginfo_save;
+		ret = -EAGAIN;
+	} else if ((u64*)hipzout.vaddr != iova_start) {
+		ehca_err(&shca->ib_device, "PHYP changed iova_start in "
+			 "rereg_pmr, iova_start=%p iova_start_out=%lx e_mr=%p "
+			 "mr_handle=%lx lkey=%x lkey_out=%x", iova_start,
+			 hipzout.vaddr, e_mr, e_mr->ipz_mr_handle.handle,
+			 e_mr->ib.ib_mr.lkey, hipzout.lkey);
+		ret = -EFAULT;
+	} else {
+		/*
+		 * successful reregistration
+		 * note: start and start_out are identical for eServer HCAs
+		 */
+		e_mr->num_pages = pginfo->num_pages;
+		e_mr->num_4k    = pginfo->num_4k;
+		e_mr->start     = iova_start;
+		e_mr->size      = size;
+		e_mr->acl       = acl;
+		*lkey = hipzout.lkey;
+		*rkey = hipzout.rkey;
+	}
+
+ehca_rereg_mr_rereg1_exit1:
+	kfree(kpage);
+ehca_rereg_mr_rereg1_exit0:
+	if ( ret && (ret != -EAGAIN) )
+		ehca_err(&shca->ib_device, "ret=%x lkey=%x rkey=%x "
+			 "pginfo=%p num_pages=%lx num_4k=%lx",
+			 ret, *lkey, *rkey, pginfo, pginfo->num_pages,
+			 pginfo->num_4k);
+	return ret;
+} /* end ehca_rereg_mr_rereg1() */
+
+/*----------------------------------------------------------------------*/
+
+int ehca_rereg_mr(struct ehca_shca *shca,
+		  struct ehca_mr *e_mr,
+		  u64 *iova_start,
+		  u64 size,
+		  int acl,
+		  struct ehca_pd *e_pd,
+		  struct ehca_mr_pginfo *pginfo,
+		  u32 *lkey,
+		  u32 *rkey)
+{
+	int ret = 0;
+	u64 h_ret;
+	int rereg_1_hcall = 1; /* 1: use hipz_h_reregister_pmr directly */
+	int rereg_3_hcall = 0; /* 1: use 3 hipz calls for reregistration */
+
+	/* first determine reregistration hCall(s) */
+	if ((pginfo->num_4k > 512) || (e_mr->num_4k > 512) ||
+	    (pginfo->num_4k > e_mr->num_4k)) {
+		ehca_dbg(&shca->ib_device, "Rereg3 case, pginfo->num_4k=%lx "
+			 "e_mr->num_4k=%x", pginfo->num_4k, e_mr->num_4k);
+		rereg_1_hcall = 0;
+		rereg_3_hcall = 1;
+	}
+
+	if (e_mr->flags & EHCA_MR_FLAG_MAXMR) {	/* check for max-MR */
+		rereg_1_hcall = 0;
+		rereg_3_hcall = 1;
+		e_mr->flags &= ~EHCA_MR_FLAG_MAXMR;
+		ehca_err(&shca->ib_device, "Rereg MR for max-MR! e_mr=%p",
+			 e_mr);
+	}
+
+	if (rereg_1_hcall) {
+		ret = ehca_rereg_mr_rereg1(shca, e_mr, iova_start, size,
+					   acl, e_pd, pginfo, lkey, rkey);
+		if (ret) {
+			if (ret == -EAGAIN)
+				rereg_3_hcall = 1;
+			else
+				goto ehca_rereg_mr_exit0;
+		}
+	}
+
+	if (rereg_3_hcall) {
+		struct ehca_mr save_mr;
+
+		/* first deregister old MR */
+		h_ret = hipz_h_free_resource_mr(shca->ipz_hca_handle, e_mr);
+		if (h_ret != H_SUCCESS) {
+			ehca_err(&shca->ib_device, "hipz_free_mr failed, "
+				 "h_ret=%lx e_mr=%p hca_hndl=%lx mr_hndl=%lx "
+				 "mr->lkey=%x",
+				 h_ret, e_mr, shca->ipz_hca_handle.handle,
+				 e_mr->ipz_mr_handle.handle,
+				 e_mr->ib.ib_mr.lkey);
+			ret = ehca_mrmw_map_hrc_free_mr(h_ret);
+			goto ehca_rereg_mr_exit0;
+		}
+		/* clean ehca_mr_t, without changing struct ib_mr and lock */
+		save_mr = *e_mr;
+		ehca_mr_deletenew(e_mr);
+
+		/* set some MR values */
+		e_mr->flags = save_mr.flags;
+		e_mr->fmr_page_size = save_mr.fmr_page_size;
+		e_mr->fmr_max_pages = save_mr.fmr_max_pages;
+		e_mr->fmr_max_maps = save_mr.fmr_max_maps;
+		e_mr->fmr_map_cnt = save_mr.fmr_map_cnt;
+
+		ret = ehca_reg_mr(shca, e_mr, iova_start, size, acl,
+				      e_pd, pginfo, lkey, rkey);
+		if (ret) {
+			u32 offset = (u64)(&e_mr->flags) - (u64)e_mr;
+			memcpy(&e_mr->flags, &(save_mr.flags),
+			       sizeof(struct ehca_mr) - offset);
+			goto ehca_rereg_mr_exit0;
+		}
+	}
+
+ehca_rereg_mr_exit0:
+	if (ret)
+		ehca_err(&shca->ib_device, "ret=%x shca=%p e_mr=%p "
+			 "iova_start=%p size=%lx acl=%x e_pd=%p pginfo=%p "
+			 "num_pages=%lx lkey=%x rkey=%x rereg_1_hcall=%x "
+			 "rereg_3_hcall=%x", ret, shca, e_mr, iova_start, size,
+			 acl, e_pd, pginfo, pginfo->num_pages, *lkey, *rkey,
+			 rereg_1_hcall, rereg_3_hcall);
+	return ret;
+} /* end ehca_rereg_mr() */
+
+/*----------------------------------------------------------------------*/
+
+int ehca_unmap_one_fmr(struct ehca_shca *shca,
+		       struct ehca_mr *e_fmr)
+{
+	int ret = 0;
+	u64 h_ret;
+	int rereg_1_hcall = 1; /* 1: use hipz_mr_reregister directly */
+	int rereg_3_hcall = 0; /* 1: use 3 hipz calls for unmapping */
+	struct ehca_pd *e_pd =
+		container_of(e_fmr->ib.ib_fmr.pd, struct ehca_pd, ib_pd);
+	struct ehca_mr save_fmr;
+	u32 tmp_lkey, tmp_rkey;
+	struct ehca_mr_pginfo pginfo={0,0,0,0,0,0,0,NULL,0,NULL,NULL,0,NULL,0};
+	struct ehca_mr_hipzout_parms hipzout = {{0},0,0,0,0,0};
+
+	/* first check if reregistration hCall can be used for unmap */
+	if (e_fmr->fmr_max_pages > 512) {
+		rereg_1_hcall = 0;
+		rereg_3_hcall = 1;
+	}
+
+	if (rereg_1_hcall) {
+		/*
+		 * note: after using rereg hcall with len=0,
+		 * rereg hcall must be used again for registering pages
+		 */
+		h_ret = hipz_h_reregister_pmr(shca->ipz_hca_handle, e_fmr, 0,
+					      0, 0, e_pd->fw_pd, 0, &hipzout);
+		if (h_ret != H_SUCCESS) {
+			/*
+			 * should not happen, because length checked above,
+			 * FMRs are not shared and no MW bound to FMRs
+			 */
+			ehca_err(&shca->ib_device, "hipz_reregister_pmr failed "
+				 "(Rereg1), h_ret=%lx e_fmr=%p hca_hndl=%lx "
+				 "mr_hndl=%lx lkey=%x lkey_out=%x",
+				 h_ret, e_fmr, shca->ipz_hca_handle.handle,
+				 e_fmr->ipz_mr_handle.handle,
+				 e_fmr->ib.ib_fmr.lkey, hipzout.lkey);
+			rereg_3_hcall = 1;
+		} else {
+			/* successful reregistration */
+			e_fmr->start = NULL;
+			e_fmr->size = 0;
+			tmp_lkey = hipzout.lkey;
+			tmp_rkey = hipzout.rkey;
+		}
+	}
+
+	if (rereg_3_hcall) {
+		struct ehca_mr save_mr;
+
+		/* first free old FMR */
+		h_ret = hipz_h_free_resource_mr(shca->ipz_hca_handle, e_fmr);
+		if (h_ret != H_SUCCESS) {
+			ehca_err(&shca->ib_device, "hipz_free_mr failed, "
+				 "h_ret=%lx e_fmr=%p hca_hndl=%lx mr_hndl=%lx "
+				 "lkey=%x",
+				 h_ret, e_fmr, shca->ipz_hca_handle.handle,
+				 e_fmr->ipz_mr_handle.handle,
+				 e_fmr->ib.ib_fmr.lkey);
+			ret = ehca_mrmw_map_hrc_free_mr(h_ret);
+			goto ehca_unmap_one_fmr_exit0;
+		}
+		/* clean ehca_mr_t, without changing lock */
+		save_fmr = *e_fmr;
+		ehca_mr_deletenew(e_fmr);
+
+		/* set some MR values */
+		e_fmr->flags = save_fmr.flags;
+		e_fmr->fmr_page_size = save_fmr.fmr_page_size;
+		e_fmr->fmr_max_pages = save_fmr.fmr_max_pages;
+		e_fmr->fmr_max_maps = save_fmr.fmr_max_maps;
+		e_fmr->fmr_map_cnt = save_fmr.fmr_map_cnt;
+		e_fmr->acl = save_fmr.acl;
+
+		pginfo.type      = EHCA_MR_PGI_FMR;
+		pginfo.num_pages = 0;
+		pginfo.num_4k    = 0;
+		ret = ehca_reg_mr(shca, e_fmr, NULL,
+				  (e_fmr->fmr_max_pages * e_fmr->fmr_page_size),
+				  e_fmr->acl, e_pd, &pginfo, &tmp_lkey,
+				  &tmp_rkey);
+		if (ret) {
+			u32 offset = (u64)(&e_fmr->flags) - (u64)e_fmr;
+			memcpy(&e_fmr->flags, &(save_mr.flags),
+			       sizeof(struct ehca_mr) - offset);
+			goto ehca_unmap_one_fmr_exit0;
+		}
+	}
+
+ehca_unmap_one_fmr_exit0:
+	if (ret)
+		ehca_err(&shca->ib_device, "ret=%x tmp_lkey=%x tmp_rkey=%x "
+			 "fmr_max_pages=%x rereg_1_hcall=%x rereg_3_hcall=%x",
+			 ret, tmp_lkey, tmp_rkey, e_fmr->fmr_max_pages,
+			 rereg_1_hcall, rereg_3_hcall);
+	return ret;
+} /* end ehca_unmap_one_fmr() */
+
+/*----------------------------------------------------------------------*/
+
+int ehca_reg_smr(struct ehca_shca *shca,
+		 struct ehca_mr *e_origmr,
+		 struct ehca_mr *e_newmr,
+		 u64 *iova_start,
+		 int acl,
+		 struct ehca_pd *e_pd,
+		 u32 *lkey, /*OUT*/
+		 u32 *rkey) /*OUT*/
+{
+	int ret = 0;
+	u64 h_ret;
+	u32 hipz_acl;
+	struct ehca_mr_hipzout_parms hipzout = {{0},0,0,0,0,0};
+
+	ehca_mrmw_map_acl(acl, &hipz_acl);
+	ehca_mrmw_set_pgsize_hipz_acl(&hipz_acl);
+
+	h_ret = hipz_h_register_smr(shca->ipz_hca_handle, e_newmr, e_origmr,
+				    (u64)iova_start, hipz_acl, e_pd->fw_pd,
+				    &hipzout);
+	if (h_ret != H_SUCCESS) {
+		ehca_err(&shca->ib_device, "hipz_reg_smr failed, h_ret=%lx "
+			 "shca=%p e_origmr=%p e_newmr=%p iova_start=%p acl=%x "
+			 "e_pd=%p hca_hndl=%lx mr_hndl=%lx lkey=%x",
+			 h_ret, shca, e_origmr, e_newmr, iova_start, acl, e_pd,
+			 shca->ipz_hca_handle.handle,
+			 e_origmr->ipz_mr_handle.handle,
+			 e_origmr->ib.ib_mr.lkey);
+		ret = ehca_mrmw_map_hrc_reg_smr(h_ret);
+		goto ehca_reg_smr_exit0;
+	}
+	/* successful registration */
+	e_newmr->num_pages     = e_origmr->num_pages;
+	e_newmr->num_4k        = e_origmr->num_4k;
+	e_newmr->start         = iova_start;
+	e_newmr->size          = e_origmr->size;
+	e_newmr->acl           = acl;
+	e_newmr->ipz_mr_handle = hipzout.handle;
+	*lkey = hipzout.lkey;
+	*rkey = hipzout.rkey;
+	return 0;
+
+ehca_reg_smr_exit0:
+	if (ret)
+		ehca_err(&shca->ib_device, "ret=%x shca=%p e_origmr=%p "
+			 "e_newmr=%p iova_start=%p acl=%x e_pd=%p",
+			 ret, shca, e_origmr, e_newmr, iova_start, acl, e_pd);
+	return ret;
+} /* end ehca_reg_smr() */
+
+/*----------------------------------------------------------------------*/
+
+/* register internal max-MR to internal SHCA */
+int ehca_reg_internal_maxmr(
+	struct ehca_shca *shca,
+	struct ehca_pd *e_pd,
+	struct ehca_mr **e_maxmr)  /*OUT*/
+{
+	int ret;
+	struct ehca_mr *e_mr;
+	u64 *iova_start;
+	u64 size_maxmr;
+	struct ehca_mr_pginfo pginfo={0,0,0,0,0,0,0,NULL,0,NULL,NULL,0,NULL,0};
+	struct ib_phys_buf ib_pbuf;
+	u32 num_pages_mr;
+	u32 num_pages_4k; /* 4k portion "pages" */
+
+	e_mr = ehca_mr_new();
+	if (!e_mr) {
+		ehca_err(&shca->ib_device, "out of memory");
+		ret = -ENOMEM;
+		goto ehca_reg_internal_maxmr_exit0;
+	}
+	e_mr->flags |= EHCA_MR_FLAG_MAXMR;
+
+	/* register internal max-MR on HCA */
+	size_maxmr = (u64)high_memory - PAGE_OFFSET;
+	iova_start = (u64*)KERNELBASE;
+	ib_pbuf.addr = 0;
+	ib_pbuf.size = size_maxmr;
+	num_pages_mr = ((((u64)iova_start % PAGE_SIZE) + size_maxmr +
+			 PAGE_SIZE - 1) / PAGE_SIZE);
+	num_pages_4k = ((((u64)iova_start % EHCA_PAGESIZE) + size_maxmr +
+			 EHCA_PAGESIZE - 1) / EHCA_PAGESIZE);
+
+	pginfo.type           = EHCA_MR_PGI_PHYS;
+	pginfo.num_pages      = num_pages_mr;
+	pginfo.num_4k         = num_pages_4k;
+	pginfo.num_phys_buf   = 1;
+	pginfo.phys_buf_array = &ib_pbuf;
+
+	ret = ehca_reg_mr(shca, e_mr, iova_start, size_maxmr, 0, e_pd,
+			  &pginfo, &e_mr->ib.ib_mr.lkey,
+			  &e_mr->ib.ib_mr.rkey);
+	if (ret) {
+		ehca_err(&shca->ib_device, "reg of internal max MR failed, "
+			 "e_mr=%p iova_start=%p size_maxmr=%lx num_pages_mr=%x "
+			 "num_pages_4k=%x", e_mr, iova_start, size_maxmr,
+			 num_pages_mr, num_pages_4k);
+		goto ehca_reg_internal_maxmr_exit1;
+	}
+
+	/* successful registration of all pages */
+	e_mr->ib.ib_mr.device = e_pd->ib_pd.device;
+	e_mr->ib.ib_mr.pd = &e_pd->ib_pd;
+	e_mr->ib.ib_mr.uobject = NULL;
+	atomic_inc(&(e_pd->ib_pd.usecnt));
+	atomic_set(&(e_mr->ib.ib_mr.usecnt), 0);
+	*e_maxmr = e_mr;
+	return 0;
+
+ehca_reg_internal_maxmr_exit1:
+	ehca_mr_delete(e_mr);
+ehca_reg_internal_maxmr_exit0:
+	if (ret)
+		ehca_err(&shca->ib_device, "ret=%x shca=%p e_pd=%p e_maxmr=%p",
+			 ret, shca, e_pd, e_maxmr);
+	return ret;
+} /* end ehca_reg_internal_maxmr() */
+
+/*----------------------------------------------------------------------*/
+
+int ehca_reg_maxmr(struct ehca_shca *shca,
+		   struct ehca_mr *e_newmr,
+		   u64 *iova_start,
+		   int acl,
+		   struct ehca_pd *e_pd,
+		   u32 *lkey,
+		   u32 *rkey)
+{
+	u64 h_ret;
+	struct ehca_mr *e_origmr = shca->maxmr;
+	u32 hipz_acl;
+	struct ehca_mr_hipzout_parms hipzout = {{0},0,0,0,0,0};
+
+	ehca_mrmw_map_acl(acl, &hipz_acl);
+	ehca_mrmw_set_pgsize_hipz_acl(&hipz_acl);
+
+	h_ret = hipz_h_register_smr(shca->ipz_hca_handle, e_newmr, e_origmr,
+				    (u64)iova_start, hipz_acl, e_pd->fw_pd,
+				    &hipzout);
+	if (h_ret != H_SUCCESS) {
+		ehca_err(&shca->ib_device, "hipz_reg_smr failed, h_ret=%lx "
+			 "e_origmr=%p hca_hndl=%lx mr_hndl=%lx lkey=%x",
+			 h_ret, e_origmr, shca->ipz_hca_handle.handle,
+			 e_origmr->ipz_mr_handle.handle,
+			 e_origmr->ib.ib_mr.lkey);
+		return ehca_mrmw_map_hrc_reg_smr(h_ret);
+	}
+	/* successful registration */
+	e_newmr->num_pages     = e_origmr->num_pages;
+	e_newmr->num_4k        = e_origmr->num_4k;
+	e_newmr->start         = iova_start;
+	e_newmr->size          = e_origmr->size;
+	e_newmr->acl           = acl;
+	e_newmr->ipz_mr_handle = hipzout.handle;
+	*lkey = hipzout.lkey;
+	*rkey = hipzout.rkey;
+	return 0;
+} /* end ehca_reg_maxmr() */
+
+/*----------------------------------------------------------------------*/
+
+int ehca_dereg_internal_maxmr(struct ehca_shca *shca)
+{
+	int ret;
+	struct ehca_mr *e_maxmr;
+	struct ib_pd *ib_pd;
+
+	if (!shca->maxmr) {
+		ehca_err(&shca->ib_device, "bad call, shca=%p", shca);
+		ret = -EINVAL;
+		goto ehca_dereg_internal_maxmr_exit0;
+	}
+
+	e_maxmr = shca->maxmr;
+	ib_pd = e_maxmr->ib.ib_mr.pd;
+	shca->maxmr = NULL; /* remove internal max-MR indication from SHCA */
+
+	ret = ehca_dereg_mr(&e_maxmr->ib.ib_mr);
+	if (ret) {
+		ehca_err(&shca->ib_device, "dereg internal max-MR failed, "
+			 "ret=%x e_maxmr=%p shca=%p lkey=%x",
+			 ret, e_maxmr, shca, e_maxmr->ib.ib_mr.lkey);
+		shca->maxmr = e_maxmr;
+		goto ehca_dereg_internal_maxmr_exit0;
+	}
+
+	atomic_dec(&ib_pd->usecnt);
+
+ehca_dereg_internal_maxmr_exit0:
+	if (ret)
+		ehca_err(&shca->ib_device, "ret=%x shca=%p shca->maxmr=%p",
+			 ret, shca, shca->maxmr);
+	return ret;
+} /* end ehca_dereg_internal_maxmr() */
+
+/*----------------------------------------------------------------------*/
+
+/*
+ * check physical buffer array of MR verbs for validness and
+ * calculates MR size
+ */
+int ehca_mr_chk_buf_and_calc_size(struct ib_phys_buf *phys_buf_array,
+				  int num_phys_buf,
+				  u64 *iova_start,
+				  u64 *size)
+{
+	struct ib_phys_buf *pbuf = phys_buf_array;
+	u64 size_count = 0;
+	u32 i;
+
+	if (num_phys_buf == 0) {
+		ehca_gen_err("bad phys buf array len, num_phys_buf=0");
+		return -EINVAL;
+	}
+	/* check first buffer */
+	if (((u64)iova_start & ~PAGE_MASK) != (pbuf->addr & ~PAGE_MASK)) {
+		ehca_gen_err("iova_start/addr mismatch, iova_start=%p "
+			     "pbuf->addr=%lx pbuf->size=%lx",
+			     iova_start, pbuf->addr, pbuf->size);
+		return -EINVAL;
+	}
+	if (((pbuf->addr + pbuf->size) % PAGE_SIZE) &&
+	    (num_phys_buf > 1)) {
+		ehca_gen_err("addr/size mismatch in 1st buf, pbuf->addr=%lx "
+			     "pbuf->size=%lx", pbuf->addr, pbuf->size);
+		return -EINVAL;
+	}
+
+	for (i = 0; i < num_phys_buf; i++) {
+		if ((i > 0) && (pbuf->addr % PAGE_SIZE)) {
+			ehca_gen_err("bad address, i=%x pbuf->addr=%lx "
+				     "pbuf->size=%lx",
+				     i, pbuf->addr, pbuf->size);
+			return -EINVAL;
+		}
+		if (((i > 0) &&	/* not 1st */
+		     (i < (num_phys_buf - 1)) &&	/* not last */
+		     (pbuf->size % PAGE_SIZE)) || (pbuf->size == 0)) {
+			ehca_gen_err("bad size, i=%x pbuf->size=%lx",
+				     i, pbuf->size);
+			return -EINVAL;
+		}
+		size_count += pbuf->size;
+		pbuf++;
+	}
+
+	*size = size_count;
+	return 0;
+} /* end ehca_mr_chk_buf_and_calc_size() */
+
+/*----------------------------------------------------------------------*/
+
+/* check page list of map FMR verb for validness */
+int ehca_fmr_check_page_list(struct ehca_mr *e_fmr,
+			     u64 *page_list,
+			     int list_len)
+{
+	u32 i;
+	u64 *page;
+
+	if ((list_len == 0) || (list_len > e_fmr->fmr_max_pages)) {
+		ehca_gen_err("bad list_len, list_len=%x "
+			     "e_fmr->fmr_max_pages=%x fmr=%p",
+			     list_len, e_fmr->fmr_max_pages, e_fmr);
+		return -EINVAL;
+	}
+
+	/* each page must be aligned */
+	page = page_list;
+	for (i = 0; i < list_len; i++) {
+		if (*page % e_fmr->fmr_page_size) {
+			ehca_gen_err("bad page, i=%x *page=%lx page=%p fmr=%p "
+				     "fmr_page_size=%x", i, *page, page, e_fmr,
+				     e_fmr->fmr_page_size);
+			return -EINVAL;
+		}
+		page++;
+	}
+
+	return 0;
+} /* end ehca_fmr_check_page_list() */
+
+/*----------------------------------------------------------------------*/
+
+/* setup page buffer from page info */
+int ehca_set_pagebuf(struct ehca_mr *e_mr,
+		     struct ehca_mr_pginfo *pginfo,
+		     u32 number,
+		     u64 *kpage)
+{
+	int ret = 0;
+	struct ib_umem_chunk *prev_chunk;
+	struct ib_umem_chunk *chunk;
+	struct ib_phys_buf *pbuf;
+	u64 *fmrlist;
+	u64 num4k, pgaddr, offs4k;
+	u32 i = 0;
+	u32 j = 0;
+
+	if (pginfo->type == EHCA_MR_PGI_PHYS) {
+		/* loop over desired phys_buf_array entries */
+		while (i < number) {
+			pbuf   = pginfo->phys_buf_array + pginfo->next_buf;
+			num4k  = ((pbuf->addr % EHCA_PAGESIZE) + pbuf->size +
+				  EHCA_PAGESIZE - 1) / EHCA_PAGESIZE;
+			offs4k = (pbuf->addr & ~PAGE_MASK) / EHCA_PAGESIZE;
+			while (pginfo->next_4k < offs4k + num4k) {
+				/* sanity check */
+				if ((pginfo->page_cnt >= pginfo->num_pages) ||
+				    (pginfo->page_4k_cnt >= pginfo->num_4k)) {
+					ehca_gen_err("page_cnt >= num_pages, "
+						     "page_cnt=%lx "
+						     "num_pages=%lx "
+						     "page_4k_cnt=%lx "
+						     "num_4k=%lx i=%x",
+						     pginfo->page_cnt,
+						     pginfo->num_pages,
+						     pginfo->page_4k_cnt,
+						     pginfo->num_4k, i);
+					ret = -EFAULT;
+					goto ehca_set_pagebuf_exit0;
+				}
+				*kpage = phys_to_abs(
+					(pbuf->addr & EHCA_PAGEMASK)
+					+ (pginfo->next_4k * EHCA_PAGESIZE));
+				if ( !(*kpage) && pbuf->addr ) {
+					ehca_gen_err("pbuf->addr=%lx "
+						     "pbuf->size=%lx "
+						     "next_4k=%lx", pbuf->addr,
+						     pbuf->size,
+						     pginfo->next_4k);
+					ret = -EFAULT;
+					goto ehca_set_pagebuf_exit0;
+				}
+				(pginfo->page_4k_cnt)++;
+				(pginfo->next_4k)++;
+				if (pginfo->next_4k %
+				    (PAGE_SIZE / EHCA_PAGESIZE) == 0)
+					(pginfo->page_cnt)++;
+				kpage++;
+				i++;
+				if (i >= number) break;
+			}
+			if (pginfo->next_4k >= offs4k + num4k) {
+				(pginfo->next_buf)++;
+				pginfo->next_4k = 0;
+			}
+		}
+	} else if (pginfo->type == EHCA_MR_PGI_USER) {
+		/* loop over desired chunk entries */
+		chunk      = pginfo->next_chunk;
+		prev_chunk = pginfo->next_chunk;
+		list_for_each_entry_continue(chunk,
+					     (&(pginfo->region->chunk_list)),
+					     list) {
+			for (i = pginfo->next_nmap; i < chunk->nmap; ) {
+				pgaddr = ( page_to_pfn(chunk->page_list[i].page)
+					   << PAGE_SHIFT );
+				*kpage = phys_to_abs(pgaddr +
+						     (pginfo->next_4k *
+						      EHCA_PAGESIZE));
+				if ( !(*kpage) ) {
+					ehca_gen_err("pgaddr=%lx "
+						     "chunk->page_list[i]=%lx "
+						     "i=%x next_4k=%lx mr=%p",
+						     pgaddr,
+						     (u64)sg_dma_address(
+							     &chunk->
+							     page_list[i]),
+						     i, pginfo->next_4k, e_mr);
+					ret = -EFAULT;
+					goto ehca_set_pagebuf_exit0;
+				}
+				(pginfo->page_4k_cnt)++;
+				(pginfo->next_4k)++;
+				kpage++;
+				if (pginfo->next_4k %
+				    (PAGE_SIZE / EHCA_PAGESIZE) == 0) {
+					(pginfo->page_cnt)++;
+					(pginfo->next_nmap)++;
+					pginfo->next_4k = 0;
+					i++;
+				}
+				j++;
+				if (j >= number) break;
+			}
+			if ((pginfo->next_nmap >= chunk->nmap) &&
+			    (j >= number)) {
+				pginfo->next_nmap = 0;
+				prev_chunk = chunk;
+				break;
+			} else if (pginfo->next_nmap >= chunk->nmap) {
+				pginfo->next_nmap = 0;
+				prev_chunk = chunk;
+			} else if (j >= number)
+				break;
+			else
+				prev_chunk = chunk;
+		}
+		pginfo->next_chunk =
+			list_prepare_entry(prev_chunk,
+					   (&(pginfo->region->chunk_list)),
+					   list);
+	} else if (pginfo->type == EHCA_MR_PGI_FMR) {
+		/* loop over desired page_list entries */
+		fmrlist = pginfo->page_list + pginfo->next_listelem;
+		for (i = 0; i < number; i++) {
+			*kpage = phys_to_abs((*fmrlist & EHCA_PAGEMASK) +
+					     pginfo->next_4k * EHCA_PAGESIZE);
+			if ( !(*kpage) ) {
+				ehca_gen_err("*fmrlist=%lx fmrlist=%p "
+					     "next_listelem=%lx next_4k=%lx",
+					     *fmrlist, fmrlist,
+					     pginfo->next_listelem,
+					     pginfo->next_4k);
+				ret = -EFAULT;
+				goto ehca_set_pagebuf_exit0;
+			}
+			(pginfo->page_4k_cnt)++;
+			(pginfo->next_4k)++;
+			kpage++;
+			if (pginfo->next_4k %
+			    (e_mr->fmr_page_size / EHCA_PAGESIZE) == 0) {
+				(pginfo->page_cnt)++;
+				(pginfo->next_listelem)++;
+				fmrlist++;
+				pginfo->next_4k = 0;
+			}
+		}
+	} else {
+		ehca_gen_err("bad pginfo->type=%x", pginfo->type);
+		ret = -EFAULT;
+		goto ehca_set_pagebuf_exit0;
+	}
+
+ehca_set_pagebuf_exit0:
+	if (ret)
+		ehca_gen_err("ret=%x e_mr=%p pginfo=%p type=%x num_pages=%lx "
+			     "num_4k=%lx next_buf=%lx next_4k=%lx number=%x "
+			     "kpage=%p page_cnt=%lx page_4k_cnt=%lx i=%x "
+			     "next_listelem=%lx region=%p next_chunk=%p "
+			     "next_nmap=%lx", ret, e_mr, pginfo, pginfo->type,
+			     pginfo->num_pages, pginfo->num_4k,
+			     pginfo->next_buf, pginfo->next_4k, number, kpage,
+			     pginfo->page_cnt, pginfo->page_4k_cnt, i,
+			     pginfo->next_listelem, pginfo->region,
+			     pginfo->next_chunk, pginfo->next_nmap);
+	return ret;
+} /* end ehca_set_pagebuf() */
+
+/*----------------------------------------------------------------------*/
+
+/* setup 1 page from page info page buffer */
+int ehca_set_pagebuf_1(struct ehca_mr *e_mr,
+		       struct ehca_mr_pginfo *pginfo,
+		       u64 *rpage)
+{
+	int ret = 0;
+	struct ib_phys_buf *tmp_pbuf;
+	u64 *fmrlist;
+	struct ib_umem_chunk *chunk;
+	struct ib_umem_chunk *prev_chunk;
+	u64 pgaddr, num4k, offs4k;
+
+	if (pginfo->type == EHCA_MR_PGI_PHYS) {
+		/* sanity check */
+		if ((pginfo->page_cnt >= pginfo->num_pages) ||
+		    (pginfo->page_4k_cnt >= pginfo->num_4k)) {
+			ehca_gen_err("page_cnt >= num_pages, page_cnt=%lx "
+				     "num_pages=%lx page_4k_cnt=%lx num_4k=%lx",
+				     pginfo->page_cnt, pginfo->num_pages,
+				     pginfo->page_4k_cnt, pginfo->num_4k);
+			ret = -EFAULT;
+			goto ehca_set_pagebuf_1_exit0;
+		}
+		tmp_pbuf = pginfo->phys_buf_array + pginfo->next_buf;
+		num4k  = ((tmp_pbuf->addr % EHCA_PAGESIZE) + tmp_pbuf->size +
+			  EHCA_PAGESIZE - 1) / EHCA_PAGESIZE;
+		offs4k = (tmp_pbuf->addr & ~PAGE_MASK) / EHCA_PAGESIZE;
+		*rpage = phys_to_abs((tmp_pbuf->addr & EHCA_PAGEMASK) +
+				     (pginfo->next_4k * EHCA_PAGESIZE));
+		if ( !(*rpage) && tmp_pbuf->addr ) {
+			ehca_gen_err("tmp_pbuf->addr=%lx"
+				     " tmp_pbuf->size=%lx next_4k=%lx",
+				     tmp_pbuf->addr, tmp_pbuf->size,
+				     pginfo->next_4k);
+			ret = -EFAULT;
+			goto ehca_set_pagebuf_1_exit0;
+		}
+		(pginfo->page_4k_cnt)++;
+		(pginfo->next_4k)++;
+		if (pginfo->next_4k % (PAGE_SIZE / EHCA_PAGESIZE) == 0)
+			(pginfo->page_cnt)++;
+		if (pginfo->next_4k >= offs4k + num4k) {
+			(pginfo->next_buf)++;
+			pginfo->next_4k = 0;
+		}
+	} else if (pginfo->type == EHCA_MR_PGI_USER) {
+		chunk      = pginfo->next_chunk;
+		prev_chunk = pginfo->next_chunk;
+		list_for_each_entry_continue(chunk,
+					     (&(pginfo->region->chunk_list)),
+					     list) {
+			pgaddr = ( page_to_pfn(chunk->page_list[
+						       pginfo->next_nmap].page)
+				   << PAGE_SHIFT);
+			*rpage = phys_to_abs(pgaddr +
+					     (pginfo->next_4k * EHCA_PAGESIZE));
+			if ( !(*rpage) ) {
+				ehca_gen_err("pgaddr=%lx chunk->page_list[]=%lx"
+					     " next_nmap=%lx next_4k=%lx mr=%p",
+					     pgaddr, (u64)sg_dma_address(
+						     &chunk->page_list[
+							     pginfo->
+							     next_nmap]),
+					     pginfo->next_nmap, pginfo->next_4k,
+					     e_mr);
+				ret = -EFAULT;
+				goto ehca_set_pagebuf_1_exit0;
+			}
+			(pginfo->page_4k_cnt)++;
+			(pginfo->next_4k)++;
+			if (pginfo->next_4k %
+			    (PAGE_SIZE / EHCA_PAGESIZE) == 0) {
+				(pginfo->page_cnt)++;
+				(pginfo->next_nmap)++;
+				pginfo->next_4k = 0;
+			}
+			if (pginfo->next_nmap >= chunk->nmap) {
+				pginfo->next_nmap = 0;
+				prev_chunk = chunk;
+			}
+			break;
+		}
+		pginfo->next_chunk =
+			list_prepare_entry(prev_chunk,
+					   (&(pginfo->region->chunk_list)),
+					   list);
+	} else if (pginfo->type == EHCA_MR_PGI_FMR) {
+		fmrlist = pginfo->page_list + pginfo->next_listelem;
+		*rpage = phys_to_abs((*fmrlist & EHCA_PAGEMASK) +
+				     pginfo->next_4k * EHCA_PAGESIZE);
+		if ( !(*rpage) ) {
+			ehca_gen_err("*fmrlist=%lx fmrlist=%p "
+				     "next_listelem=%lx next_4k=%lx",
+				     *fmrlist, fmrlist, pginfo->next_listelem,
+				     pginfo->next_4k);
+			ret = -EFAULT;
+			goto ehca_set_pagebuf_1_exit0;
+		}
+		(pginfo->page_4k_cnt)++;
+		(pginfo->next_4k)++;
+		if (pginfo->next_4k %
+		    (e_mr->fmr_page_size / EHCA_PAGESIZE) == 0) {
+			(pginfo->page_cnt)++;
+			(pginfo->next_listelem)++;
+			pginfo->next_4k = 0;
+		}
+	} else {
+		ehca_gen_err("bad pginfo->type=%x", pginfo->type);
+		ret = -EFAULT;
+		goto ehca_set_pagebuf_1_exit0;
+	}
+
+ehca_set_pagebuf_1_exit0:
+	if (ret)
+		ehca_gen_err("ret=%x e_mr=%p pginfo=%p type=%x num_pages=%lx "
+			     "num_4k=%lx next_buf=%lx next_4k=%lx rpage=%p "
+			     "page_cnt=%lx page_4k_cnt=%lx next_listelem=%lx "
+			     "region=%p next_chunk=%p next_nmap=%lx", ret, e_mr,
+			     pginfo, pginfo->type, pginfo->num_pages,
+			     pginfo->num_4k, pginfo->next_buf, pginfo->next_4k,
+			     rpage, pginfo->page_cnt, pginfo->page_4k_cnt,
+			     pginfo->next_listelem, pginfo->region,
+			     pginfo->next_chunk, pginfo->next_nmap);
+	return ret;
+} /* end ehca_set_pagebuf_1() */
+
+/*----------------------------------------------------------------------*/
+
+/*
+ * check MR if it is a max-MR, i.e. uses whole memory
+ * in case it's a max-MR 1 is returned, else 0
+ */
+int ehca_mr_is_maxmr(u64 size,
+		     u64 *iova_start)
+{
+	/* a MR is treated as max-MR only if it fits following: */
+	if ((size == ((u64)high_memory - PAGE_OFFSET)) &&
+	    (iova_start == (void*)KERNELBASE)) {
+		ehca_gen_dbg("this is a max-MR");
+		return 1;
+	} else
+		return 0;
+} /* end ehca_mr_is_maxmr() */
+
+/*----------------------------------------------------------------------*/
+
+/* map access control for MR/MW. This routine is used for MR and MW. */
+void ehca_mrmw_map_acl(int ib_acl,
+		       u32 *hipz_acl)
+{
+	*hipz_acl = 0;
+	if (ib_acl & IB_ACCESS_REMOTE_READ)
+		*hipz_acl |= HIPZ_ACCESSCTRL_R_READ;
+	if (ib_acl & IB_ACCESS_REMOTE_WRITE)
+		*hipz_acl |= HIPZ_ACCESSCTRL_R_WRITE;
+	if (ib_acl & IB_ACCESS_REMOTE_ATOMIC)
+		*hipz_acl |= HIPZ_ACCESSCTRL_R_ATOMIC;
+	if (ib_acl & IB_ACCESS_LOCAL_WRITE)
+		*hipz_acl |= HIPZ_ACCESSCTRL_L_WRITE;
+	if (ib_acl & IB_ACCESS_MW_BIND)
+		*hipz_acl |= HIPZ_ACCESSCTRL_MW_BIND;
+} /* end ehca_mrmw_map_acl() */
+
+/*----------------------------------------------------------------------*/
+
+/* sets page size in hipz access control for MR/MW. */
+void ehca_mrmw_set_pgsize_hipz_acl(u32 *hipz_acl) /*INOUT*/
+{
+	return; /* HCA supports only 4k */
+} /* end ehca_mrmw_set_pgsize_hipz_acl() */
+
+/*----------------------------------------------------------------------*/
+
+/*
+ * reverse map access control for MR/MW.
+ * This routine is used for MR and MW.
+ */
+void ehca_mrmw_reverse_map_acl(const u32 *hipz_acl,
+			       int *ib_acl) /*OUT*/
+{
+	*ib_acl = 0;
+	if (*hipz_acl & HIPZ_ACCESSCTRL_R_READ)
+		*ib_acl |= IB_ACCESS_REMOTE_READ;
+	if (*hipz_acl & HIPZ_ACCESSCTRL_R_WRITE)
+		*ib_acl |= IB_ACCESS_REMOTE_WRITE;
+	if (*hipz_acl & HIPZ_ACCESSCTRL_R_ATOMIC)
+		*ib_acl |= IB_ACCESS_REMOTE_ATOMIC;
+	if (*hipz_acl & HIPZ_ACCESSCTRL_L_WRITE)
+		*ib_acl |= IB_ACCESS_LOCAL_WRITE;
+	if (*hipz_acl & HIPZ_ACCESSCTRL_MW_BIND)
+		*ib_acl |= IB_ACCESS_MW_BIND;
+} /* end ehca_mrmw_reverse_map_acl() */
+
+
+/*----------------------------------------------------------------------*/
+
+/*
+ * map HIPZ rc to IB retcodes for MR/MW allocations
+ * Used for hipz_mr_reg_alloc and hipz_mw_alloc.
+ */
+int ehca_mrmw_map_hrc_alloc(const u64 hipz_rc)
+{
+	switch (hipz_rc) {
+	case H_SUCCESS:	             /* successful completion */
+		return 0;
+	case H_ADAPTER_PARM:         /* invalid adapter handle */
+	case H_RT_PARM:              /* invalid resource type */
+	case H_NOT_ENOUGH_RESOURCES: /* insufficient resources */
+	case H_MLENGTH_PARM:         /* invalid memory length */
+	case H_MEM_ACCESS_PARM:      /* invalid access controls */
+	case H_CONSTRAINED:          /* resource constraint */
+		return -EINVAL;
+	case H_BUSY:                 /* long busy */
+		return -EBUSY;
+	default:
+		return -EINVAL;
+	}
+} /* end ehca_mrmw_map_hrc_alloc() */
+
+/*----------------------------------------------------------------------*/
+
+/*
+ * map HIPZ rc to IB retcodes for MR register rpage
+ * Used for hipz_h_register_rpage_mr at registering last page
+ */
+int ehca_mrmw_map_hrc_rrpg_last(const u64 hipz_rc)
+{
+	switch (hipz_rc) {
+	case H_SUCCESS:         /* registration complete */
+		return 0;
+	case H_PAGE_REGISTERED:	/* page registered */
+	case H_ADAPTER_PARM:    /* invalid adapter handle */
+	case H_RH_PARM:         /* invalid resource handle */
+/*	case H_QT_PARM:            invalid queue type */
+	case H_PARAMETER:       /*
+				 * invalid logical address,
+				 * or count zero or greater 512
+				 */
+	case H_TABLE_FULL:      /* page table full */
+	case H_HARDWARE:        /* HCA not operational */
+		return -EINVAL;
+	case H_BUSY:            /* long busy */
+		return -EBUSY;
+	default:
+		return -EINVAL;
+	}
+} /* end ehca_mrmw_map_hrc_rrpg_last() */
+
+/*----------------------------------------------------------------------*/
+
+/*
+ * map HIPZ rc to IB retcodes for MR register rpage
+ * Used for hipz_h_register_rpage_mr at registering one page, but not last page
+ */
+int ehca_mrmw_map_hrc_rrpg_notlast(const u64 hipz_rc)
+{
+	switch (hipz_rc) {
+	case H_PAGE_REGISTERED:	/* page registered */
+		return 0;
+	case H_SUCCESS:         /* registration complete */
+	case H_ADAPTER_PARM:    /* invalid adapter handle */
+	case H_RH_PARM:         /* invalid resource handle */
+/*	case H_QT_PARM:            invalid queue type */
+	case H_PARAMETER:       /*
+				 * invalid logical address,
+				 * or count zero or greater 512
+				 */
+	case H_TABLE_FULL:      /* page table full */
+	case H_HARDWARE:        /* HCA not operational */
+		return -EINVAL;
+	case H_BUSY:            /* long busy */
+		return -EBUSY;
+	default:
+		return -EINVAL;
+	}
+} /* end ehca_mrmw_map_hrc_rrpg_notlast() */
+
+/*----------------------------------------------------------------------*/
+
+/* map HIPZ rc to IB retcodes for MR query. Used for hipz_mr_query. */
+int ehca_mrmw_map_hrc_query_mr(const u64 hipz_rc)
+{
+	switch (hipz_rc) {
+	case H_SUCCESS:	             /* successful completion */
+		return 0;
+	case H_ADAPTER_PARM:         /* invalid adapter handle */
+	case H_RH_PARM:              /* invalid resource handle */
+		return -EINVAL;
+	case H_BUSY:                 /* long busy */
+		return -EBUSY;
+	default:
+		return -EINVAL;
+	}
+} /* end ehca_mrmw_map_hrc_query_mr() */
+
+/*----------------------------------------------------------------------*/
+/*----------------------------------------------------------------------*/
+
+/*
+ * map HIPZ rc to IB retcodes for freeing MR resource
+ * Used for hipz_h_free_resource_mr
+ */
+int ehca_mrmw_map_hrc_free_mr(const u64 hipz_rc)
+{
+	switch (hipz_rc) {
+	case H_SUCCESS:      /* resource freed */
+		return 0;
+	case H_ADAPTER_PARM: /* invalid adapter handle */
+	case H_RH_PARM:      /* invalid resource handle */
+	case H_R_STATE:      /* invalid resource state */
+	case H_HARDWARE:     /* HCA not operational */
+		return -EINVAL;
+	case H_RESOURCE:     /* Resource in use */
+	case H_BUSY:         /* long busy */
+		return -EBUSY;
+	default:
+		return -EINVAL;
+	}
+} /* end ehca_mrmw_map_hrc_free_mr() */
+
+/*----------------------------------------------------------------------*/
+
+/*
+ * map HIPZ rc to IB retcodes for freeing MW resource
+ * Used for hipz_h_free_resource_mw
+ */
+int ehca_mrmw_map_hrc_free_mw(const u64 hipz_rc)
+{
+	switch (hipz_rc) {
+	case H_SUCCESS:	     /* resource freed */
+		return 0;
+	case H_ADAPTER_PARM: /* invalid adapter handle */
+	case H_RH_PARM:      /* invalid resource handle */
+	case H_R_STATE:      /* invalid resource state */
+	case H_HARDWARE:     /* HCA not operational */
+		return -EINVAL;
+	case H_RESOURCE:     /* Resource in use */
+	case H_BUSY:         /* long busy */
+		return -EBUSY;
+	default:
+		return -EINVAL;
+	}
+} /* end ehca_mrmw_map_hrc_free_mw() */
+
+/*----------------------------------------------------------------------*/
+
+/*
+ * map HIPZ rc to IB retcodes for SMR registrations
+ * Used for hipz_h_register_smr.
+ */
+int ehca_mrmw_map_hrc_reg_smr(const u64 hipz_rc)
+{
+	switch (hipz_rc) {
+	case H_SUCCESS:	             /* successful completion */
+		return 0;
+	case H_ADAPTER_PARM:         /* invalid adapter handle */
+	case H_RH_PARM:              /* invalid resource handle */
+	case H_MEM_PARM:             /* invalid MR virtual address */
+	case H_MEM_ACCESS_PARM:      /* invalid access controls */
+	case H_NOT_ENOUGH_RESOURCES: /* insufficient resources */
+		return -EINVAL;
+	case H_BUSY:                 /* long busy */
+		return -EBUSY;
+	default:
+		return -EINVAL;
+	}
+} /* end ehca_mrmw_map_hrc_reg_smr() */
+
+/*----------------------------------------------------------------------*/
+
+/*
+ * MR destructor and constructor
+ * used in Reregister MR verb, sets all fields in ehca_mr_t to 0,
+ * except struct ib_mr and spinlock
+ */
+void ehca_mr_deletenew(struct ehca_mr *mr)
+{
+	mr->flags         = 0;
+	mr->num_pages     = 0;
+	mr->num_4k        = 0;
+	mr->acl           = 0;
+	mr->start         = NULL;
+	mr->fmr_page_size = 0;
+	mr->fmr_max_pages = 0;
+	mr->fmr_max_maps  = 0;
+	mr->fmr_map_cnt   = 0;
+	memset(&mr->ipz_mr_handle, 0, sizeof(mr->ipz_mr_handle));
+	memset(&mr->galpas, 0, sizeof(mr->galpas));
+	mr->nr_of_pages   = 0;
+	mr->pagearray     = NULL;
+} /* end ehca_mr_deletenew() */
+
+int ehca_init_mrmw_cache(void)
+{
+	mr_cache = kmem_cache_create("ehca_cache_mr",
+				     sizeof(struct ehca_mr), 0,
+				     SLAB_HWCACHE_ALIGN,
+				     NULL, NULL);
+	if (!mr_cache)
+		return -ENOMEM;
+	mw_cache = kmem_cache_create("ehca_cache_mw",
+				     sizeof(struct ehca_mw), 0,
+				     SLAB_HWCACHE_ALIGN,
+				     NULL, NULL);
+	if (!mw_cache) {
+		kmem_cache_destroy(mr_cache);
+		mr_cache = NULL;
+		return -ENOMEM;
+	}
+	return 0;
+}
+
+void ehca_cleanup_mrmw_cache(void)
+{
+	if (mr_cache)
+		kmem_cache_destroy(mr_cache);
+	if (mw_cache)
+		kmem_cache_destroy(mw_cache);
+}
diff --git a/drivers/infiniband/hw/ehca/ehca_mrmw.h b/drivers/infiniband/hw/ehca/ehca_mrmw.h
new file mode 100644
index 00000000000..d936e40a574
--- /dev/null
+++ b/drivers/infiniband/hw/ehca/ehca_mrmw.h
@@ -0,0 +1,140 @@
+/*
+ *  IBM eServer eHCA Infiniband device driver for Linux on POWER
+ *
+ *  MR/MW declarations and inline functions
+ *
+ *  Authors: Dietmar Decker <ddecker@de.ibm.com>
+ *           Christoph Raisch <raisch@de.ibm.com>
+ *
+ *  Copyright (c) 2005 IBM Corporation
+ *
+ *  All rights reserved.
+ *
+ *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
+ *  BSD.
+ *
+ * OpenIB BSD License
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials
+ * provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
+ * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _EHCA_MRMW_H_
+#define _EHCA_MRMW_H_
+
+int ehca_reg_mr(struct ehca_shca *shca,
+		struct ehca_mr *e_mr,
+		u64 *iova_start,
+		u64 size,
+		int acl,
+		struct ehca_pd *e_pd,
+		struct ehca_mr_pginfo *pginfo,
+		u32 *lkey,
+		u32 *rkey);
+
+int ehca_reg_mr_rpages(struct ehca_shca *shca,
+		       struct ehca_mr *e_mr,
+		       struct ehca_mr_pginfo *pginfo);
+
+int ehca_rereg_mr(struct ehca_shca *shca,
+		  struct ehca_mr *e_mr,
+		  u64 *iova_start,
+		  u64 size,
+		  int mr_access_flags,
+		  struct ehca_pd *e_pd,
+		  struct ehca_mr_pginfo *pginfo,
+		  u32 *lkey,
+		  u32 *rkey);
+
+int ehca_unmap_one_fmr(struct ehca_shca *shca,
+		       struct ehca_mr *e_fmr);
+
+int ehca_reg_smr(struct ehca_shca *shca,
+		 struct ehca_mr *e_origmr,
+		 struct ehca_mr *e_newmr,
+		 u64 *iova_start,
+		 int acl,
+		 struct ehca_pd *e_pd,
+		 u32 *lkey,
+		 u32 *rkey);
+
+int ehca_reg_internal_maxmr(struct ehca_shca *shca,
+			    struct ehca_pd *e_pd,
+			    struct ehca_mr **maxmr);
+
+int ehca_reg_maxmr(struct ehca_shca *shca,
+		   struct ehca_mr *e_newmr,
+		   u64 *iova_start,
+		   int acl,
+		   struct ehca_pd *e_pd,
+		   u32 *lkey,
+		   u32 *rkey);
+
+int ehca_dereg_internal_maxmr(struct ehca_shca *shca);
+
+int ehca_mr_chk_buf_and_calc_size(struct ib_phys_buf *phys_buf_array,
+				  int num_phys_buf,
+				  u64 *iova_start,
+				  u64 *size);
+
+int ehca_fmr_check_page_list(struct ehca_mr *e_fmr,
+			     u64 *page_list,
+			     int list_len);
+
+int ehca_set_pagebuf(struct ehca_mr *e_mr,
+		     struct ehca_mr_pginfo *pginfo,
+		     u32 number,
+		     u64 *kpage);
+
+int ehca_set_pagebuf_1(struct ehca_mr *e_mr,
+		       struct ehca_mr_pginfo *pginfo,
+		       u64 *rpage);
+
+int ehca_mr_is_maxmr(u64 size,
+		     u64 *iova_start);
+
+void ehca_mrmw_map_acl(int ib_acl,
+		       u32 *hipz_acl);
+
+void ehca_mrmw_set_pgsize_hipz_acl(u32 *hipz_acl);
+
+void ehca_mrmw_reverse_map_acl(const u32 *hipz_acl,
+			       int *ib_acl);
+
+int ehca_mrmw_map_hrc_alloc(const u64 hipz_rc);
+
+int ehca_mrmw_map_hrc_rrpg_last(const u64 hipz_rc);
+
+int ehca_mrmw_map_hrc_rrpg_notlast(const u64 hipz_rc);
+
+int ehca_mrmw_map_hrc_query_mr(const u64 hipz_rc);
+
+int ehca_mrmw_map_hrc_free_mr(const u64 hipz_rc);
+
+int ehca_mrmw_map_hrc_free_mw(const u64 hipz_rc);
+
+int ehca_mrmw_map_hrc_reg_smr(const u64 hipz_rc);
+
+void ehca_mr_deletenew(struct ehca_mr *mr);
+
+#endif  /*_EHCA_MRMW_H_*/
diff --git a/drivers/infiniband/hw/ehca/ehca_pd.c b/drivers/infiniband/hw/ehca/ehca_pd.c
new file mode 100644
index 00000000000..2c3cdc6f7b3
--- /dev/null
+++ b/drivers/infiniband/hw/ehca/ehca_pd.c
@@ -0,0 +1,114 @@
+/*
+ *  IBM eServer eHCA Infiniband device driver for Linux on POWER
+ *
+ *  PD functions
+ *
+ *  Authors: Christoph Raisch <raisch@de.ibm.com>
+ *
+ *  Copyright (c) 2005 IBM Corporation
+ *
+ *  All rights reserved.
+ *
+ *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
+ *  BSD.
+ *
+ * OpenIB BSD License
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials
+ * provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
+ * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <asm/current.h>
+
+#include "ehca_tools.h"
+#include "ehca_iverbs.h"
+
+static struct kmem_cache *pd_cache;
+
+struct ib_pd *ehca_alloc_pd(struct ib_device *device,
+			    struct ib_ucontext *context, struct ib_udata *udata)
+{
+	struct ehca_pd *pd;
+
+	pd = kmem_cache_alloc(pd_cache, SLAB_KERNEL);
+	if (!pd) {
+		ehca_err(device, "device=%p context=%p out of memory",
+			 device, context);
+		return ERR_PTR(-ENOMEM);
+	}
+
+	memset(pd, 0, sizeof(struct ehca_pd));
+	pd->ownpid = current->tgid;
+
+	/*
+	 * Kernel PD: when device = -1, 0
+	 * User   PD: when context != -1
+	 */
+	if (!context) {
+		/*
+		 * Kernel PDs after init reuses always
+		 * the one created in ehca_shca_reopen()
+		 */
+		struct ehca_shca *shca = container_of(device, struct ehca_shca,
+						      ib_device);
+		pd->fw_pd.value = shca->pd->fw_pd.value;
+	} else
+		pd->fw_pd.value = (u64)pd;
+
+	return &pd->ib_pd;
+}
+
+int ehca_dealloc_pd(struct ib_pd *pd)
+{
+	u32 cur_pid = current->tgid;
+	struct ehca_pd *my_pd = container_of(pd, struct ehca_pd, ib_pd);
+
+	if (my_pd->ib_pd.uobject && my_pd->ib_pd.uobject->context &&
+	    my_pd->ownpid != cur_pid) {
+		ehca_err(pd->device, "Invalid caller pid=%x ownpid=%x",
+			 cur_pid, my_pd->ownpid);
+		return -EINVAL;
+	}
+
+	kmem_cache_free(pd_cache,
+			container_of(pd, struct ehca_pd, ib_pd));
+
+	return 0;
+}
+
+int ehca_init_pd_cache(void)
+{
+	pd_cache = kmem_cache_create("ehca_cache_pd",
+				     sizeof(struct ehca_pd), 0,
+				     SLAB_HWCACHE_ALIGN,
+				     NULL, NULL);
+	if (!pd_cache)
+		return -ENOMEM;
+	return 0;
+}
+
+void ehca_cleanup_pd_cache(void)
+{
+	if (pd_cache)
+		kmem_cache_destroy(pd_cache);
+}
diff --git a/drivers/infiniband/hw/ehca/ehca_qes.h b/drivers/infiniband/hw/ehca/ehca_qes.h
new file mode 100644
index 00000000000..8707d297ce4
--- /dev/null
+++ b/drivers/infiniband/hw/ehca/ehca_qes.h
@@ -0,0 +1,259 @@
+/*
+ *  IBM eServer eHCA Infiniband device driver for Linux on POWER
+ *
+ *  Hardware request structures
+ *
+ *  Authors: Waleri Fomin <fomin@de.ibm.com>
+ *           Reinhard Ernst <rernst@de.ibm.com>
+ *           Christoph Raisch <raisch@de.ibm.com>
+ *
+ *  Copyright (c) 2005 IBM Corporation
+ *
+ *  All rights reserved.
+ *
+ *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
+ *  BSD.
+ *
+ * OpenIB BSD License
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials
+ * provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
+ * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+
+#ifndef _EHCA_QES_H_
+#define _EHCA_QES_H_
+
+#include "ehca_tools.h"
+
+/* virtual scatter gather entry to specify remote adresses with length */
+struct ehca_vsgentry {
+	u64 vaddr;
+	u32 lkey;
+	u32 length;
+};
+
+#define GRH_FLAG_MASK        EHCA_BMASK_IBM(7,7)
+#define GRH_IPVERSION_MASK   EHCA_BMASK_IBM(0,3)
+#define GRH_TCLASS_MASK      EHCA_BMASK_IBM(4,12)
+#define GRH_FLOWLABEL_MASK   EHCA_BMASK_IBM(13,31)
+#define GRH_PAYLEN_MASK      EHCA_BMASK_IBM(32,47)
+#define GRH_NEXTHEADER_MASK  EHCA_BMASK_IBM(48,55)
+#define GRH_HOPLIMIT_MASK    EHCA_BMASK_IBM(56,63)
+
+/*
+ * Unreliable Datagram Address Vector Format
+ * see IBTA Vol1 chapter 8.3 Global Routing Header
+ */
+struct ehca_ud_av {
+	u8 sl;
+	u8 lnh;
+	u16 dlid;
+	u8 reserved1;
+	u8 reserved2;
+	u8 reserved3;
+	u8 slid_path_bits;
+	u8 reserved4;
+	u8 ipd;
+	u8 reserved5;
+	u8 pmtu;
+	u32 reserved6;
+	u64 reserved7;
+	union {
+		struct {
+			u64 word_0; /* always set to 6  */
+			/*should be 0x1B for IB transport */
+			u64 word_1;
+			u64 word_2;
+			u64 word_3;
+			u64 word_4;
+		} grh;
+		struct {
+			u32 wd_0;
+			u32 wd_1;
+			/* DWord_1 --> SGID */
+
+			u32 sgid_wd3;
+			u32 sgid_wd2;
+
+			u32 sgid_wd1;
+			u32 sgid_wd0;
+			/* DWord_3 --> DGID */
+
+			u32 dgid_wd3;
+			u32 dgid_wd2;
+
+			u32 dgid_wd1;
+			u32 dgid_wd0;
+		} grh_l;
+	};
+};
+
+/* maximum number of sg entries allowed in a WQE */
+#define MAX_WQE_SG_ENTRIES 252
+
+#define WQE_OPTYPE_SEND             0x80
+#define WQE_OPTYPE_RDMAREAD         0x40
+#define WQE_OPTYPE_RDMAWRITE        0x20
+#define WQE_OPTYPE_CMPSWAP          0x10
+#define WQE_OPTYPE_FETCHADD         0x08
+#define WQE_OPTYPE_BIND             0x04
+
+#define WQE_WRFLAG_REQ_SIGNAL_COM   0x80
+#define WQE_WRFLAG_FENCE            0x40
+#define WQE_WRFLAG_IMM_DATA_PRESENT 0x20
+#define WQE_WRFLAG_SOLIC_EVENT      0x10
+
+#define WQEF_CACHE_HINT             0x80
+#define WQEF_CACHE_HINT_RD_WR       0x40
+#define WQEF_TIMED_WQE              0x20
+#define WQEF_PURGE                  0x08
+#define WQEF_HIGH_NIBBLE            0xF0
+
+#define MW_BIND_ACCESSCTRL_R_WRITE   0x40
+#define MW_BIND_ACCESSCTRL_R_READ    0x20
+#define MW_BIND_ACCESSCTRL_R_ATOMIC  0x10
+
+struct ehca_wqe {
+	u64 work_request_id;
+	u8 optype;
+	u8 wr_flag;
+	u16 pkeyi;
+	u8 wqef;
+	u8 nr_of_data_seg;
+	u16 wqe_provided_slid;
+	u32 destination_qp_number;
+	u32 resync_psn_sqp;
+	u32 local_ee_context_qkey;
+	u32 immediate_data;
+	union {
+		struct {
+			u64 remote_virtual_adress;
+			u32 rkey;
+			u32 reserved;
+			u64 atomic_1st_op_dma_len;
+			u64 atomic_2nd_op;
+			struct ehca_vsgentry sg_list[MAX_WQE_SG_ENTRIES];
+
+		} nud;
+		struct {
+			u64 ehca_ud_av_ptr;
+			u64 reserved1;
+			u64 reserved2;
+			u64 reserved3;
+			struct ehca_vsgentry sg_list[MAX_WQE_SG_ENTRIES];
+		} ud_avp;
+		struct {
+			struct ehca_ud_av ud_av;
+			struct ehca_vsgentry sg_list[MAX_WQE_SG_ENTRIES -
+						     2];
+		} ud_av;
+		struct {
+			u64 reserved0;
+			u64 reserved1;
+			u64 reserved2;
+			u64 reserved3;
+			struct ehca_vsgentry sg_list[MAX_WQE_SG_ENTRIES];
+		} all_rcv;
+
+		struct {
+			u64 reserved;
+			u32 rkey;
+			u32 old_rkey;
+			u64 reserved1;
+			u64 reserved2;
+			u64 virtual_address;
+			u32 reserved3;
+			u32 length;
+			u32 reserved4;
+			u16 reserved5;
+			u8 reserved6;
+			u8 lr_ctl;
+			u32 lkey;
+			u32 reserved7;
+			u64 reserved8;
+			u64 reserved9;
+			u64 reserved10;
+			u64 reserved11;
+		} bind;
+		struct {
+			u64 reserved12;
+			u64 reserved13;
+			u32 size;
+			u32 start;
+		} inline_data;
+	} u;
+
+};
+
+#define WC_SEND_RECEIVE EHCA_BMASK_IBM(0,0)
+#define WC_IMM_DATA     EHCA_BMASK_IBM(1,1)
+#define WC_GRH_PRESENT  EHCA_BMASK_IBM(2,2)
+#define WC_SE_BIT       EHCA_BMASK_IBM(3,3)
+#define WC_STATUS_ERROR_BIT 0x80000000
+#define WC_STATUS_REMOTE_ERROR_FLAGS 0x0000F800
+#define WC_STATUS_PURGE_BIT 0x10
+
+struct ehca_cqe {
+	u64 work_request_id;
+	u8 optype;
+	u8 w_completion_flags;
+	u16 reserved1;
+	u32 nr_bytes_transferred;
+	u32 immediate_data;
+	u32 local_qp_number;
+	u8 freed_resource_count;
+	u8 service_level;
+	u16 wqe_count;
+	u32 qp_token;
+	u32 qkey_ee_token;
+	u32 remote_qp_number;
+	u16 dlid;
+	u16 rlid;
+	u16 reserved2;
+	u16 pkey_index;
+	u32 cqe_timestamp;
+	u32 wqe_timestamp;
+	u8 wqe_timestamp_valid;
+	u8 reserved3;
+	u8 reserved4;
+	u8 cqe_flags;
+	u32 status;
+};
+
+struct ehca_eqe {
+	u64 entry;
+};
+
+struct ehca_mrte {
+	u64 starting_va;
+	u64 length; /* length of memory region in bytes*/
+	u32 pd;
+	u8 key_instance;
+	u8 pagesize;
+	u8 mr_control;
+	u8 local_remote_access_ctrl;
+	u8 reserved[0x20 - 0x18];
+	u64 at_pointer[4];
+};
+#endif /*_EHCA_QES_H_*/
diff --git a/drivers/infiniband/hw/ehca/ehca_qp.c b/drivers/infiniband/hw/ehca/ehca_qp.c
new file mode 100644
index 00000000000..4394123cdbd
--- /dev/null
+++ b/drivers/infiniband/hw/ehca/ehca_qp.c
@@ -0,0 +1,1507 @@
+/*
+ *  IBM eServer eHCA Infiniband device driver for Linux on POWER
+ *
+ *  QP functions
+ *
+ *  Authors: Waleri Fomin <fomin@de.ibm.com>
+ *           Hoang-Nam Nguyen <hnguyen@de.ibm.com>
+ *           Reinhard Ernst <rernst@de.ibm.com>
+ *           Heiko J Schick <schickhj@de.ibm.com>
+ *
+ *  Copyright (c) 2005 IBM Corporation
+ *
+ *  All rights reserved.
+ *
+ *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
+ *  BSD.
+ *
+ * OpenIB BSD License
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials
+ * provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
+ * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+
+#include <asm/current.h>
+
+#include "ehca_classes.h"
+#include "ehca_tools.h"
+#include "ehca_qes.h"
+#include "ehca_iverbs.h"
+#include "hcp_if.h"
+#include "hipz_fns.h"
+
+static struct kmem_cache *qp_cache;
+
+/*
+ * attributes not supported by query qp
+ */
+#define QP_ATTR_QUERY_NOT_SUPPORTED (IB_QP_MAX_DEST_RD_ATOMIC | \
+				     IB_QP_MAX_QP_RD_ATOMIC   | \
+				     IB_QP_ACCESS_FLAGS       | \
+				     IB_QP_EN_SQD_ASYNC_NOTIFY)
+
+/*
+ * ehca (internal) qp state values
+ */
+enum ehca_qp_state {
+	EHCA_QPS_RESET = 1,
+	EHCA_QPS_INIT = 2,
+	EHCA_QPS_RTR = 3,
+	EHCA_QPS_RTS = 5,
+	EHCA_QPS_SQD = 6,
+	EHCA_QPS_SQE = 8,
+	EHCA_QPS_ERR = 128
+};
+
+/*
+ * qp state transitions as defined by IB Arch Rel 1.1 page 431
+ */
+enum ib_qp_statetrans {
+	IB_QPST_ANY2RESET,
+	IB_QPST_ANY2ERR,
+	IB_QPST_RESET2INIT,
+	IB_QPST_INIT2RTR,
+	IB_QPST_INIT2INIT,
+	IB_QPST_RTR2RTS,
+	IB_QPST_RTS2SQD,
+	IB_QPST_RTS2RTS,
+	IB_QPST_SQD2RTS,
+	IB_QPST_SQE2RTS,
+	IB_QPST_SQD2SQD,
+	IB_QPST_MAX	/* nr of transitions, this must be last!!! */
+};
+
+/*
+ * ib2ehca_qp_state maps IB to ehca qp_state
+ * returns ehca qp state corresponding to given ib qp state
+ */
+static inline enum ehca_qp_state ib2ehca_qp_state(enum ib_qp_state ib_qp_state)
+{
+	switch (ib_qp_state) {
+	case IB_QPS_RESET:
+		return EHCA_QPS_RESET;
+	case IB_QPS_INIT:
+		return EHCA_QPS_INIT;
+	case IB_QPS_RTR:
+		return EHCA_QPS_RTR;
+	case IB_QPS_RTS:
+		return EHCA_QPS_RTS;
+	case IB_QPS_SQD:
+		return EHCA_QPS_SQD;
+	case IB_QPS_SQE:
+		return EHCA_QPS_SQE;
+	case IB_QPS_ERR:
+		return EHCA_QPS_ERR;
+	default:
+		ehca_gen_err("invalid ib_qp_state=%x", ib_qp_state);
+		return -EINVAL;
+	}
+}
+
+/*
+ * ehca2ib_qp_state maps ehca to IB qp_state
+ * returns ib qp state corresponding to given ehca qp state
+ */
+static inline enum ib_qp_state ehca2ib_qp_state(enum ehca_qp_state
+						ehca_qp_state)
+{
+	switch (ehca_qp_state) {
+	case EHCA_QPS_RESET:
+		return IB_QPS_RESET;
+	case EHCA_QPS_INIT:
+		return IB_QPS_INIT;
+	case EHCA_QPS_RTR:
+		return IB_QPS_RTR;
+	case EHCA_QPS_RTS:
+		return IB_QPS_RTS;
+	case EHCA_QPS_SQD:
+		return IB_QPS_SQD;
+	case EHCA_QPS_SQE:
+		return IB_QPS_SQE;
+	case EHCA_QPS_ERR:
+		return IB_QPS_ERR;
+	default:
+		ehca_gen_err("invalid ehca_qp_state=%x", ehca_qp_state);
+		return -EINVAL;
+	}
+}
+
+/*
+ * ehca_qp_type used as index for req_attr and opt_attr of
+ * struct ehca_modqp_statetrans
+ */
+enum ehca_qp_type {
+	QPT_RC = 0,
+	QPT_UC = 1,
+	QPT_UD = 2,
+	QPT_SQP = 3,
+	QPT_MAX
+};
+
+/*
+ * ib2ehcaqptype maps Ib to ehca qp_type
+ * returns ehca qp type corresponding to ib qp type
+ */
+static inline enum ehca_qp_type ib2ehcaqptype(enum ib_qp_type ibqptype)
+{
+	switch (ibqptype) {
+	case IB_QPT_SMI:
+	case IB_QPT_GSI:
+		return QPT_SQP;
+	case IB_QPT_RC:
+		return QPT_RC;
+	case IB_QPT_UC:
+		return QPT_UC;
+	case IB_QPT_UD:
+		return QPT_UD;
+	default:
+		ehca_gen_err("Invalid ibqptype=%x", ibqptype);
+		return -EINVAL;
+	}
+}
+
+static inline enum ib_qp_statetrans get_modqp_statetrans(int ib_fromstate,
+							 int ib_tostate)
+{
+	int index = -EINVAL;
+	switch (ib_tostate) {
+	case IB_QPS_RESET:
+		index = IB_QPST_ANY2RESET;
+		break;
+	case IB_QPS_INIT:
+		switch (ib_fromstate) {
+		case IB_QPS_RESET:
+			index = IB_QPST_RESET2INIT;
+			break;
+		case IB_QPS_INIT:
+			index = IB_QPST_INIT2INIT;
+			break;
+		}
+		break;
+	case IB_QPS_RTR:
+		if (ib_fromstate == IB_QPS_INIT)
+			index = IB_QPST_INIT2RTR;
+		break;
+	case IB_QPS_RTS:
+		switch (ib_fromstate) {
+		case IB_QPS_RTR:
+			index = IB_QPST_RTR2RTS;
+			break;
+		case IB_QPS_RTS:
+			index = IB_QPST_RTS2RTS;
+			break;
+		case IB_QPS_SQD:
+			index = IB_QPST_SQD2RTS;
+			break;
+		case IB_QPS_SQE:
+			index = IB_QPST_SQE2RTS;
+			break;
+		}
+		break;
+	case IB_QPS_SQD:
+		if (ib_fromstate == IB_QPS_RTS)
+			index = IB_QPST_RTS2SQD;
+		break;
+	case IB_QPS_SQE:
+		break;
+	case IB_QPS_ERR:
+		index = IB_QPST_ANY2ERR;
+		break;
+	default:
+		break;
+	}
+	return index;
+}
+
+enum ehca_service_type {
+	ST_RC = 0,
+	ST_UC = 1,
+	ST_RD = 2,
+	ST_UD = 3
+};
+
+/*
+ * ibqptype2servicetype returns hcp service type corresponding to given
+ * ib qp type used by create_qp()
+ */
+static inline int ibqptype2servicetype(enum ib_qp_type ibqptype)
+{
+	switch (ibqptype) {
+	case IB_QPT_SMI:
+	case IB_QPT_GSI:
+		return ST_UD;
+	case IB_QPT_RC:
+		return ST_RC;
+	case IB_QPT_UC:
+		return ST_UC;
+	case IB_QPT_UD:
+		return ST_UD;
+	case IB_QPT_RAW_IPV6:
+		return -EINVAL;
+	case IB_QPT_RAW_ETY:
+		return -EINVAL;
+	default:
+		ehca_gen_err("Invalid ibqptype=%x", ibqptype);
+		return -EINVAL;
+	}
+}
+
+/*
+ * init_qp_queues initializes/constructs r/squeue and registers queue pages.
+ */
+static inline int init_qp_queues(struct ehca_shca *shca,
+				 struct ehca_qp *my_qp,
+				 int nr_sq_pages,
+				 int nr_rq_pages,
+				 int swqe_size,
+				 int rwqe_size,
+				 int nr_send_sges, int nr_receive_sges)
+{
+	int ret, cnt, ipz_rc;
+	void *vpage;
+	u64 rpage, h_ret;
+	struct ib_device *ib_dev = &shca->ib_device;
+	struct ipz_adapter_handle ipz_hca_handle = shca->ipz_hca_handle;
+
+	ipz_rc = ipz_queue_ctor(&my_qp->ipz_squeue,
+				nr_sq_pages,
+				EHCA_PAGESIZE, swqe_size, nr_send_sges);
+	if (!ipz_rc) {
+		ehca_err(ib_dev,"Cannot allocate page for squeue. ipz_rc=%x",
+			 ipz_rc);
+		return -EBUSY;
+	}
+
+	ipz_rc = ipz_queue_ctor(&my_qp->ipz_rqueue,
+				nr_rq_pages,
+				EHCA_PAGESIZE, rwqe_size, nr_receive_sges);
+	if (!ipz_rc) {
+		ehca_err(ib_dev, "Cannot allocate page for rqueue. ipz_rc=%x",
+			 ipz_rc);
+		ret = -EBUSY;
+		goto init_qp_queues0;
+	}
+	/* register SQ pages */
+	for (cnt = 0; cnt < nr_sq_pages; cnt++) {
+		vpage = ipz_qpageit_get_inc(&my_qp->ipz_squeue);
+		if (!vpage) {
+			ehca_err(ib_dev, "SQ ipz_qpageit_get_inc() "
+				 "failed p_vpage= %p", vpage);
+			ret = -EINVAL;
+			goto init_qp_queues1;
+		}
+		rpage = virt_to_abs(vpage);
+
+		h_ret = hipz_h_register_rpage_qp(ipz_hca_handle,
+						 my_qp->ipz_qp_handle,
+						 &my_qp->pf, 0, 0,
+						 rpage, 1,
+						 my_qp->galpas.kernel);
+		if (h_ret < H_SUCCESS) {
+			ehca_err(ib_dev, "SQ hipz_qp_register_rpage()"
+				 " failed rc=%lx", h_ret);
+			ret = ehca2ib_return_code(h_ret);
+			goto init_qp_queues1;
+		}
+	}
+
+	ipz_qeit_reset(&my_qp->ipz_squeue);
+
+	/* register RQ pages */
+	for (cnt = 0; cnt < nr_rq_pages; cnt++) {
+		vpage = ipz_qpageit_get_inc(&my_qp->ipz_rqueue);
+		if (!vpage) {
+			ehca_err(ib_dev, "RQ ipz_qpageit_get_inc() "
+				 "failed p_vpage = %p", vpage);
+			ret = -EINVAL;
+			goto init_qp_queues1;
+		}
+
+		rpage = virt_to_abs(vpage);
+
+		h_ret = hipz_h_register_rpage_qp(ipz_hca_handle,
+						 my_qp->ipz_qp_handle,
+						 &my_qp->pf, 0, 1,
+						 rpage, 1,my_qp->galpas.kernel);
+		if (h_ret < H_SUCCESS) {
+			ehca_err(ib_dev, "RQ hipz_qp_register_rpage() failed "
+				 "rc=%lx", h_ret);
+			ret = ehca2ib_return_code(h_ret);
+			goto init_qp_queues1;
+		}
+		if (cnt == (nr_rq_pages - 1)) {	/* last page! */
+			if (h_ret != H_SUCCESS) {
+				ehca_err(ib_dev, "RQ hipz_qp_register_rpage() "
+					 "h_ret= %lx ", h_ret);
+				ret = ehca2ib_return_code(h_ret);
+				goto init_qp_queues1;
+			}
+			vpage = ipz_qpageit_get_inc(&my_qp->ipz_rqueue);
+			if (vpage) {
+				ehca_err(ib_dev, "ipz_qpageit_get_inc() "
+					 "should not succeed vpage=%p", vpage);
+				ret = -EINVAL;
+				goto init_qp_queues1;
+			}
+		} else {
+			if (h_ret != H_PAGE_REGISTERED) {
+				ehca_err(ib_dev, "RQ hipz_qp_register_rpage() "
+					 "h_ret= %lx ", h_ret);
+				ret = ehca2ib_return_code(h_ret);
+				goto init_qp_queues1;
+			}
+		}
+	}
+
+	ipz_qeit_reset(&my_qp->ipz_rqueue);
+
+	return 0;
+
+init_qp_queues1:
+	ipz_queue_dtor(&my_qp->ipz_rqueue);
+init_qp_queues0:
+	ipz_queue_dtor(&my_qp->ipz_squeue);
+	return ret;
+}
+
+struct ib_qp *ehca_create_qp(struct ib_pd *pd,
+			     struct ib_qp_init_attr *init_attr,
+			     struct ib_udata *udata)
+{
+	static int da_rc_msg_size[]={ 128, 256, 512, 1024, 2048, 4096 };
+	static int da_ud_sq_msg_size[]={ 128, 384, 896, 1920, 3968 };
+	struct ehca_qp *my_qp;
+	struct ehca_pd *my_pd = container_of(pd, struct ehca_pd, ib_pd);
+	struct ehca_shca *shca = container_of(pd->device, struct ehca_shca,
+					      ib_device);
+	struct ib_ucontext *context = NULL;
+	u64 h_ret;
+	int max_send_sge, max_recv_sge, ret;
+
+	/* h_call's out parameters */
+	struct ehca_alloc_qp_parms parms;
+	u32 swqe_size = 0, rwqe_size = 0;
+	u8 daqp_completion, isdaqp;
+	unsigned long flags;
+
+	if (init_attr->sq_sig_type != IB_SIGNAL_REQ_WR &&
+		init_attr->sq_sig_type != IB_SIGNAL_ALL_WR) {
+		ehca_err(pd->device, "init_attr->sg_sig_type=%x not allowed",
+			 init_attr->sq_sig_type);
+		return ERR_PTR(-EINVAL);
+	}
+
+	/* save daqp completion bits */
+	daqp_completion = init_attr->qp_type & 0x60;
+	/* save daqp bit */
+	isdaqp = (init_attr->qp_type & 0x80) ? 1 : 0;
+	init_attr->qp_type = init_attr->qp_type & 0x1F;
+
+	if (init_attr->qp_type != IB_QPT_UD &&
+	    init_attr->qp_type != IB_QPT_SMI &&
+	    init_attr->qp_type != IB_QPT_GSI &&
+	    init_attr->qp_type != IB_QPT_UC &&
+	    init_attr->qp_type != IB_QPT_RC) {
+		ehca_err(pd->device, "wrong QP Type=%x", init_attr->qp_type);
+		return ERR_PTR(-EINVAL);
+	}
+	if ((init_attr->qp_type != IB_QPT_RC && init_attr->qp_type != IB_QPT_UD)
+	    && isdaqp) {
+		ehca_err(pd->device, "unsupported LL QP Type=%x",
+			 init_attr->qp_type);
+		return ERR_PTR(-EINVAL);
+	} else if (init_attr->qp_type == IB_QPT_RC && isdaqp &&
+		   (init_attr->cap.max_send_wr > 255 ||
+		    init_attr->cap.max_recv_wr > 255 )) {
+		       ehca_err(pd->device, "Invalid Number of max_sq_wr =%x "
+				"or max_rq_wr=%x for QP Type=%x",
+				init_attr->cap.max_send_wr,
+				init_attr->cap.max_recv_wr,init_attr->qp_type);
+		       return ERR_PTR(-EINVAL);
+	} else if (init_attr->qp_type == IB_QPT_UD && isdaqp &&
+		  init_attr->cap.max_send_wr > 255) {
+		ehca_err(pd->device,
+			 "Invalid Number of max_send_wr=%x for UD QP_TYPE=%x",
+			 init_attr->cap.max_send_wr, init_attr->qp_type);
+		return ERR_PTR(-EINVAL);
+	}
+
+	if (pd->uobject && udata)
+		context = pd->uobject->context;
+
+	my_qp = kmem_cache_alloc(qp_cache, SLAB_KERNEL);
+	if (!my_qp) {
+		ehca_err(pd->device, "pd=%p not enough memory to alloc qp", pd);
+		return ERR_PTR(-ENOMEM);
+	}
+
+	memset(my_qp, 0, sizeof(struct ehca_qp));
+	memset (&parms, 0, sizeof(struct ehca_alloc_qp_parms));
+	spin_lock_init(&my_qp->spinlock_s);
+	spin_lock_init(&my_qp->spinlock_r);
+
+	my_qp->recv_cq =
+		container_of(init_attr->recv_cq, struct ehca_cq, ib_cq);
+	my_qp->send_cq =
+		container_of(init_attr->send_cq, struct ehca_cq, ib_cq);
+
+	my_qp->init_attr = *init_attr;
+
+	do {
+		if (!idr_pre_get(&ehca_qp_idr, GFP_KERNEL)) {
+			ret = -ENOMEM;
+			ehca_err(pd->device, "Can't reserve idr resources.");
+			goto create_qp_exit0;
+		}
+
+		spin_lock_irqsave(&ehca_qp_idr_lock, flags);
+		ret = idr_get_new(&ehca_qp_idr, my_qp, &my_qp->token);
+		spin_unlock_irqrestore(&ehca_qp_idr_lock, flags);
+
+	} while (ret == -EAGAIN);
+
+	if (ret) {
+		ret = -ENOMEM;
+		ehca_err(pd->device, "Can't allocate new idr entry.");
+		goto create_qp_exit0;
+	}
+
+	parms.servicetype = ibqptype2servicetype(init_attr->qp_type);
+	if (parms.servicetype < 0) {
+		ret = -EINVAL;
+		ehca_err(pd->device, "Invalid qp_type=%x", init_attr->qp_type);
+		goto create_qp_exit0;
+	}
+
+	if (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR)
+		parms.sigtype = HCALL_SIGT_EVERY;
+	else
+		parms.sigtype = HCALL_SIGT_BY_WQE;
+
+	/* UD_AV CIRCUMVENTION */
+	max_send_sge = init_attr->cap.max_send_sge;
+	max_recv_sge = init_attr->cap.max_recv_sge;
+	if (IB_QPT_UD == init_attr->qp_type ||
+	    IB_QPT_GSI == init_attr->qp_type ||
+	    IB_QPT_SMI == init_attr->qp_type) {
+		max_send_sge += 2;
+		max_recv_sge += 2;
+	}
+
+	parms.ipz_eq_handle = shca->eq.ipz_eq_handle;
+	parms.daqp_ctrl = isdaqp | daqp_completion;
+	parms.pd = my_pd->fw_pd;
+	parms.max_recv_sge = max_recv_sge;
+	parms.max_send_sge = max_send_sge;
+
+	h_ret = hipz_h_alloc_resource_qp(shca->ipz_hca_handle, my_qp, &parms);
+
+	if (h_ret != H_SUCCESS) {
+		ehca_err(pd->device, "h_alloc_resource_qp() failed h_ret=%lx",
+			 h_ret);
+		ret = ehca2ib_return_code(h_ret);
+		goto create_qp_exit1;
+	}
+
+	switch (init_attr->qp_type) {
+	case IB_QPT_RC:
+	        if (isdaqp == 0) {
+			swqe_size = offsetof(struct ehca_wqe, u.nud.sg_list[
+					     (parms.act_nr_send_sges)]);
+			rwqe_size = offsetof(struct ehca_wqe, u.nud.sg_list[
+					     (parms.act_nr_recv_sges)]);
+		} else { /* for daqp we need to use msg size, not wqe size */
+		        swqe_size = da_rc_msg_size[max_send_sge];
+			rwqe_size = da_rc_msg_size[max_recv_sge];
+			parms.act_nr_send_sges = 1;
+			parms.act_nr_recv_sges = 1;
+		}
+		break;
+	case IB_QPT_UC:
+		swqe_size = offsetof(struct ehca_wqe,
+				     u.nud.sg_list[parms.act_nr_send_sges]);
+		rwqe_size = offsetof(struct ehca_wqe,
+				     u.nud.sg_list[parms.act_nr_recv_sges]);
+		break;
+
+	case IB_QPT_UD:
+	case IB_QPT_GSI:
+	case IB_QPT_SMI:
+		/* UD circumvention */
+		parms.act_nr_recv_sges -= 2;
+		parms.act_nr_send_sges -= 2;
+		if (isdaqp) {
+		        swqe_size = da_ud_sq_msg_size[max_send_sge];
+			rwqe_size = da_rc_msg_size[max_recv_sge];
+			parms.act_nr_send_sges = 1;
+			parms.act_nr_recv_sges = 1;
+		} else {
+			swqe_size = offsetof(struct ehca_wqe,
+					     u.ud_av.sg_list[parms.act_nr_send_sges]);
+			rwqe_size = offsetof(struct ehca_wqe,
+					     u.ud_av.sg_list[parms.act_nr_recv_sges]);
+		}
+
+		if (IB_QPT_GSI == init_attr->qp_type ||
+		    IB_QPT_SMI == init_attr->qp_type) {
+			parms.act_nr_send_wqes = init_attr->cap.max_send_wr;
+			parms.act_nr_recv_wqes = init_attr->cap.max_recv_wr;
+			parms.act_nr_send_sges = init_attr->cap.max_send_sge;
+			parms.act_nr_recv_sges = init_attr->cap.max_recv_sge;
+			my_qp->real_qp_num =
+				(init_attr->qp_type == IB_QPT_SMI) ? 0 : 1;
+		}
+
+		break;
+
+	default:
+		break;
+	}
+
+	/* initializes r/squeue and registers queue pages */
+	ret = init_qp_queues(shca, my_qp,
+			     parms.nr_sq_pages, parms.nr_rq_pages,
+			     swqe_size, rwqe_size,
+			     parms.act_nr_send_sges, parms.act_nr_recv_sges);
+	if (ret) {
+		ehca_err(pd->device,
+			 "Couldn't initialize r/squeue and pages ret=%x", ret);
+		goto create_qp_exit2;
+	}
+
+	my_qp->ib_qp.pd = &my_pd->ib_pd;
+	my_qp->ib_qp.device = my_pd->ib_pd.device;
+
+	my_qp->ib_qp.recv_cq = init_attr->recv_cq;
+	my_qp->ib_qp.send_cq = init_attr->send_cq;
+
+	my_qp->ib_qp.qp_num = my_qp->real_qp_num;
+	my_qp->ib_qp.qp_type = init_attr->qp_type;
+
+	my_qp->qp_type = init_attr->qp_type;
+	my_qp->ib_qp.srq = init_attr->srq;
+
+	my_qp->ib_qp.qp_context = init_attr->qp_context;
+	my_qp->ib_qp.event_handler = init_attr->event_handler;
+
+	init_attr->cap.max_inline_data = 0; /* not supported yet */
+	init_attr->cap.max_recv_sge = parms.act_nr_recv_sges;
+	init_attr->cap.max_recv_wr = parms.act_nr_recv_wqes;
+	init_attr->cap.max_send_sge = parms.act_nr_send_sges;
+	init_attr->cap.max_send_wr = parms.act_nr_send_wqes;
+
+	/* NOTE: define_apq0() not supported yet */
+	if (init_attr->qp_type == IB_QPT_GSI) {
+		h_ret = ehca_define_sqp(shca, my_qp, init_attr);
+		if (h_ret != H_SUCCESS) {
+			ehca_err(pd->device, "ehca_define_sqp() failed rc=%lx",
+				 h_ret);
+			ret = ehca2ib_return_code(h_ret);
+			goto create_qp_exit3;
+		}
+	}
+	if (init_attr->send_cq) {
+		struct ehca_cq *cq = container_of(init_attr->send_cq,
+						  struct ehca_cq, ib_cq);
+		ret = ehca_cq_assign_qp(cq, my_qp);
+		if (ret) {
+			ehca_err(pd->device, "Couldn't assign qp to send_cq ret=%x",
+				 ret);
+			goto create_qp_exit3;
+		}
+		my_qp->send_cq = cq;
+	}
+	/* copy queues, galpa data to user space */
+	if (context && udata) {
+		struct ipz_queue *ipz_rqueue = &my_qp->ipz_rqueue;
+		struct ipz_queue *ipz_squeue = &my_qp->ipz_squeue;
+		struct ehca_create_qp_resp resp;
+		struct vm_area_struct * vma;
+		memset(&resp, 0, sizeof(resp));
+
+		resp.qp_num = my_qp->real_qp_num;
+		resp.token = my_qp->token;
+		resp.qp_type = my_qp->qp_type;
+		resp.qkey = my_qp->qkey;
+		resp.real_qp_num = my_qp->real_qp_num;
+		/* rqueue properties */
+		resp.ipz_rqueue.qe_size = ipz_rqueue->qe_size;
+		resp.ipz_rqueue.act_nr_of_sg = ipz_rqueue->act_nr_of_sg;
+		resp.ipz_rqueue.queue_length = ipz_rqueue->queue_length;
+		resp.ipz_rqueue.pagesize = ipz_rqueue->pagesize;
+		resp.ipz_rqueue.toggle_state = ipz_rqueue->toggle_state;
+		ret = ehca_mmap_nopage(((u64)(my_qp->token) << 32) | 0x22000000,
+				       ipz_rqueue->queue_length,
+				       (void**)&resp.ipz_rqueue.queue,
+				       &vma);
+		if (ret) {
+			ehca_err(pd->device, "Could not mmap rqueue pages");
+			goto create_qp_exit3;
+		}
+		my_qp->uspace_rqueue = resp.ipz_rqueue.queue;
+		/* squeue properties */
+		resp.ipz_squeue.qe_size = ipz_squeue->qe_size;
+		resp.ipz_squeue.act_nr_of_sg = ipz_squeue->act_nr_of_sg;
+		resp.ipz_squeue.queue_length = ipz_squeue->queue_length;
+		resp.ipz_squeue.pagesize = ipz_squeue->pagesize;
+		resp.ipz_squeue.toggle_state = ipz_squeue->toggle_state;
+		ret = ehca_mmap_nopage(((u64)(my_qp->token) << 32) | 0x23000000,
+				       ipz_squeue->queue_length,
+				       (void**)&resp.ipz_squeue.queue,
+				       &vma);
+		if (ret) {
+			ehca_err(pd->device, "Could not mmap squeue pages");
+			goto create_qp_exit4;
+		}
+		my_qp->uspace_squeue = resp.ipz_squeue.queue;
+		/* fw_handle */
+		resp.galpas = my_qp->galpas;
+		ret = ehca_mmap_register(my_qp->galpas.user.fw_handle,
+					 (void**)&resp.galpas.kernel.fw_handle,
+					 &vma);
+		if (ret) {
+			ehca_err(pd->device, "Could not mmap fw_handle");
+			goto create_qp_exit5;
+		}
+		my_qp->uspace_fwh = (u64)resp.galpas.kernel.fw_handle;
+
+		if (ib_copy_to_udata(udata, &resp, sizeof resp)) {
+			ehca_err(pd->device, "Copy to udata failed");
+			ret = -EINVAL;
+			goto create_qp_exit6;
+		}
+	}
+
+	return &my_qp->ib_qp;
+
+create_qp_exit6:
+	ehca_munmap(my_qp->uspace_fwh, EHCA_PAGESIZE);
+
+create_qp_exit5:
+	ehca_munmap(my_qp->uspace_squeue, my_qp->ipz_squeue.queue_length);
+
+create_qp_exit4:
+	ehca_munmap(my_qp->uspace_rqueue, my_qp->ipz_rqueue.queue_length);
+
+create_qp_exit3:
+	ipz_queue_dtor(&my_qp->ipz_rqueue);
+	ipz_queue_dtor(&my_qp->ipz_squeue);
+
+create_qp_exit2:
+	hipz_h_destroy_qp(shca->ipz_hca_handle, my_qp);
+
+create_qp_exit1:
+	spin_lock_irqsave(&ehca_qp_idr_lock, flags);
+	idr_remove(&ehca_qp_idr, my_qp->token);
+	spin_unlock_irqrestore(&ehca_qp_idr_lock, flags);
+
+create_qp_exit0:
+	kmem_cache_free(qp_cache, my_qp);
+	return ERR_PTR(ret);
+}
+
+/*
+ * prepare_sqe_rts called by internal_modify_qp() at trans sqe -> rts
+ * set purge bit of bad wqe and subsequent wqes to avoid reentering sqe
+ * returns total number of bad wqes in bad_wqe_cnt
+ */
+static int prepare_sqe_rts(struct ehca_qp *my_qp, struct ehca_shca *shca,
+			   int *bad_wqe_cnt)
+{
+	u64 h_ret;
+	struct ipz_queue *squeue;
+	void *bad_send_wqe_p, *bad_send_wqe_v;
+	void *squeue_start_p, *squeue_end_p;
+	void *squeue_start_v, *squeue_end_v;
+	struct ehca_wqe *wqe;
+	int qp_num = my_qp->ib_qp.qp_num;
+
+	/* get send wqe pointer */
+	h_ret = hipz_h_disable_and_get_wqe(shca->ipz_hca_handle,
+					   my_qp->ipz_qp_handle, &my_qp->pf,
+					   &bad_send_wqe_p, NULL, 2);
+	if (h_ret != H_SUCCESS) {
+		ehca_err(&shca->ib_device, "hipz_h_disable_and_get_wqe() failed"
+			 " ehca_qp=%p qp_num=%x h_ret=%lx",
+			 my_qp, qp_num, h_ret);
+		return ehca2ib_return_code(h_ret);
+	}
+	bad_send_wqe_p = (void*)((u64)bad_send_wqe_p & (~(1L<<63)));
+	ehca_dbg(&shca->ib_device, "qp_num=%x bad_send_wqe_p=%p",
+		 qp_num, bad_send_wqe_p);
+	/* convert wqe pointer to vadr */
+	bad_send_wqe_v = abs_to_virt((u64)bad_send_wqe_p);
+	if (ehca_debug_level)
+		ehca_dmp(bad_send_wqe_v, 32, "qp_num=%x bad_wqe", qp_num);
+	squeue = &my_qp->ipz_squeue;
+	squeue_start_p = (void*)virt_to_abs(ipz_qeit_calc(squeue, 0L));
+	squeue_end_p = squeue_start_p+squeue->queue_length;
+	squeue_start_v = abs_to_virt((u64)squeue_start_p);
+	squeue_end_v = abs_to_virt((u64)squeue_end_p);
+	ehca_dbg(&shca->ib_device, "qp_num=%x squeue_start_v=%p squeue_end_v=%p",
+		 qp_num, squeue_start_v, squeue_end_v);
+
+	/* loop sets wqe's purge bit */
+	wqe = (struct ehca_wqe*)bad_send_wqe_v;
+	*bad_wqe_cnt = 0;
+	while (wqe->optype != 0xff && wqe->wqef != 0xff) {
+		if (ehca_debug_level)
+			ehca_dmp(wqe, 32, "qp_num=%x wqe", qp_num);
+		wqe->nr_of_data_seg = 0; /* suppress data access */
+		wqe->wqef = WQEF_PURGE; /* WQE to be purged */
+		wqe = (struct ehca_wqe*)((u8*)wqe+squeue->qe_size);
+		*bad_wqe_cnt = (*bad_wqe_cnt)+1;
+		if ((void*)wqe >= squeue_end_v) {
+			wqe = squeue_start_v;
+		}
+	}
+	/*
+	 * bad wqe will be reprocessed and ignored when pol_cq() is called,
+	 *  i.e. nr of wqes with flush error status is one less
+	 */
+	ehca_dbg(&shca->ib_device, "qp_num=%x flusherr_wqe_cnt=%x",
+		 qp_num, (*bad_wqe_cnt)-1);
+	wqe->wqef = 0;
+
+	return 0;
+}
+
+/*
+ * internal_modify_qp with circumvention to handle aqp0 properly
+ * smi_reset2init indicates if this is an internal reset-to-init-call for
+ * smi. This flag must always be zero if called from ehca_modify_qp()!
+ * This internal func was intorduced to avoid recursion of ehca_modify_qp()!
+ */
+static int internal_modify_qp(struct ib_qp *ibqp,
+			      struct ib_qp_attr *attr,
+			      int attr_mask, int smi_reset2init)
+{
+	enum ib_qp_state qp_cur_state, qp_new_state;
+	int cnt, qp_attr_idx, ret = 0;
+	enum ib_qp_statetrans statetrans;
+	struct hcp_modify_qp_control_block *mqpcb;
+	struct ehca_qp *my_qp = container_of(ibqp, struct ehca_qp, ib_qp);
+	struct ehca_shca *shca =
+		container_of(ibqp->pd->device, struct ehca_shca, ib_device);
+	u64 update_mask;
+	u64 h_ret;
+	int bad_wqe_cnt = 0;
+	int squeue_locked = 0;
+	unsigned long spl_flags = 0;
+
+	/* do query_qp to obtain current attr values */
+	mqpcb = kzalloc(H_CB_ALIGNMENT, GFP_KERNEL);
+	if (mqpcb == NULL) {
+		ehca_err(ibqp->device, "Could not get zeroed page for mqpcb "
+			 "ehca_qp=%p qp_num=%x ", my_qp, ibqp->qp_num);
+		return -ENOMEM;
+	}
+
+	h_ret = hipz_h_query_qp(shca->ipz_hca_handle,
+				my_qp->ipz_qp_handle,
+				&my_qp->pf,
+				mqpcb, my_qp->galpas.kernel);
+	if (h_ret != H_SUCCESS) {
+		ehca_err(ibqp->device, "hipz_h_query_qp() failed "
+			 "ehca_qp=%p qp_num=%x h_ret=%lx",
+			 my_qp, ibqp->qp_num, h_ret);
+		ret = ehca2ib_return_code(h_ret);
+		goto modify_qp_exit1;
+	}
+
+	qp_cur_state = ehca2ib_qp_state(mqpcb->qp_state);
+
+	if (qp_cur_state == -EINVAL) {	/* invalid qp state */
+		ret = -EINVAL;
+		ehca_err(ibqp->device, "Invalid current ehca_qp_state=%x "
+			 "ehca_qp=%p qp_num=%x",
+			 mqpcb->qp_state, my_qp, ibqp->qp_num);
+		goto modify_qp_exit1;
+	}
+	/*
+	 * circumvention to set aqp0 initial state to init
+	 * as expected by IB spec
+	 */
+	if (smi_reset2init == 0 &&
+	    ibqp->qp_type == IB_QPT_SMI &&
+	    qp_cur_state == IB_QPS_RESET &&
+	    (attr_mask & IB_QP_STATE) &&
+	    attr->qp_state == IB_QPS_INIT) { /* RESET -> INIT */
+		struct ib_qp_attr smiqp_attr = {
+			.qp_state = IB_QPS_INIT,
+			.port_num = my_qp->init_attr.port_num,
+			.pkey_index = 0,
+			.qkey = 0
+		};
+		int smiqp_attr_mask = IB_QP_STATE | IB_QP_PORT |
+			IB_QP_PKEY_INDEX | IB_QP_QKEY;
+		int smirc = internal_modify_qp(
+			ibqp, &smiqp_attr, smiqp_attr_mask, 1);
+		if (smirc) {
+			ehca_err(ibqp->device, "SMI RESET -> INIT failed. "
+				 "ehca_modify_qp() rc=%x", smirc);
+			ret = H_PARAMETER;
+			goto modify_qp_exit1;
+		}
+		qp_cur_state = IB_QPS_INIT;
+		ehca_dbg(ibqp->device, "SMI RESET -> INIT succeeded");
+	}
+	/* is transmitted current state  equal to "real" current state */
+	if ((attr_mask & IB_QP_CUR_STATE) &&
+	    qp_cur_state != attr->cur_qp_state) {
+		ret = -EINVAL;
+		ehca_err(ibqp->device,
+			 "Invalid IB_QP_CUR_STATE attr->curr_qp_state=%x <>"
+			 " actual cur_qp_state=%x. ehca_qp=%p qp_num=%x",
+			 attr->cur_qp_state, qp_cur_state, my_qp, ibqp->qp_num);
+		goto modify_qp_exit1;
+	}
+
+	ehca_dbg(ibqp->device,"ehca_qp=%p qp_num=%x current qp_state=%x "
+		 "new qp_state=%x attribute_mask=%x",
+		 my_qp, ibqp->qp_num, qp_cur_state, attr->qp_state, attr_mask);
+
+	qp_new_state = attr_mask & IB_QP_STATE ? attr->qp_state : qp_cur_state;
+	if (!smi_reset2init &&
+	    !ib_modify_qp_is_ok(qp_cur_state, qp_new_state, ibqp->qp_type,
+				attr_mask)) {
+		ret = -EINVAL;
+		ehca_err(ibqp->device,
+			 "Invalid qp transition new_state=%x cur_state=%x "
+			 "ehca_qp=%p qp_num=%x attr_mask=%x", qp_new_state,
+			 qp_cur_state, my_qp, ibqp->qp_num, attr_mask);
+		goto modify_qp_exit1;
+	}
+
+	if ((mqpcb->qp_state = ib2ehca_qp_state(qp_new_state)))
+		update_mask = EHCA_BMASK_SET(MQPCB_MASK_QP_STATE, 1);
+	else {
+		ret = -EINVAL;
+		ehca_err(ibqp->device, "Invalid new qp state=%x "
+			 "ehca_qp=%p qp_num=%x",
+			 qp_new_state, my_qp, ibqp->qp_num);
+		goto modify_qp_exit1;
+	}
+
+	/* retrieve state transition struct to get req and opt attrs */
+	statetrans = get_modqp_statetrans(qp_cur_state, qp_new_state);
+	if (statetrans < 0) {
+		ret = -EINVAL;
+		ehca_err(ibqp->device, "<INVALID STATE CHANGE> qp_cur_state=%x "
+			 "new_qp_state=%x State_xsition=%x ehca_qp=%p "
+			 "qp_num=%x", qp_cur_state, qp_new_state,
+			 statetrans, my_qp, ibqp->qp_num);
+		goto modify_qp_exit1;
+	}
+
+	qp_attr_idx = ib2ehcaqptype(ibqp->qp_type);
+
+	if (qp_attr_idx < 0) {
+		ret = qp_attr_idx;
+		ehca_err(ibqp->device,
+			 "Invalid QP type=%x ehca_qp=%p qp_num=%x",
+			 ibqp->qp_type, my_qp, ibqp->qp_num);
+		goto modify_qp_exit1;
+	}
+
+	ehca_dbg(ibqp->device,
+		 "ehca_qp=%p qp_num=%x <VALID STATE CHANGE> qp_state_xsit=%x",
+		 my_qp, ibqp->qp_num, statetrans);
+
+	/* sqe -> rts: set purge bit of bad wqe before actual trans */
+	if ((my_qp->qp_type == IB_QPT_UD ||
+	     my_qp->qp_type == IB_QPT_GSI ||
+	     my_qp->qp_type == IB_QPT_SMI) &&
+	    statetrans == IB_QPST_SQE2RTS) {
+		/* mark next free wqe if kernel */
+		if (my_qp->uspace_squeue == 0) {
+			struct ehca_wqe *wqe;
+			/* lock send queue */
+			spin_lock_irqsave(&my_qp->spinlock_s, spl_flags);
+			squeue_locked = 1;
+			/* mark next free wqe */
+			wqe = (struct ehca_wqe*)
+				ipz_qeit_get(&my_qp->ipz_squeue);
+			wqe->optype = wqe->wqef = 0xff;
+			ehca_dbg(ibqp->device, "qp_num=%x next_free_wqe=%p",
+				 ibqp->qp_num, wqe);
+		}
+		ret = prepare_sqe_rts(my_qp, shca, &bad_wqe_cnt);
+		if (ret) {
+			ehca_err(ibqp->device, "prepare_sqe_rts() failed "
+				 "ehca_qp=%p qp_num=%x ret=%x",
+				 my_qp, ibqp->qp_num, ret);
+			goto modify_qp_exit2;
+		}
+	}
+
+	/*
+	 * enable RDMA_Atomic_Control if reset->init und reliable con
+	 * this is necessary since gen2 does not provide that flag,
+	 * but pHyp requires it
+	 */
+	if (statetrans == IB_QPST_RESET2INIT &&
+	    (ibqp->qp_type == IB_QPT_RC || ibqp->qp_type == IB_QPT_UC)) {
+		mqpcb->rdma_atomic_ctrl = 3;
+		update_mask |= EHCA_BMASK_SET(MQPCB_MASK_RDMA_ATOMIC_CTRL, 1);
+	}
+	/* circ. pHyp requires #RDMA/Atomic Resp Res for UC INIT -> RTR */
+	if (statetrans == IB_QPST_INIT2RTR &&
+	    (ibqp->qp_type == IB_QPT_UC) &&
+	    !(attr_mask & IB_QP_MAX_DEST_RD_ATOMIC)) {
+		mqpcb->rdma_nr_atomic_resp_res = 1; /* default to 1 */
+		update_mask |=
+			EHCA_BMASK_SET(MQPCB_MASK_RDMA_NR_ATOMIC_RESP_RES, 1);
+	}
+
+	if (attr_mask & IB_QP_PKEY_INDEX) {
+		mqpcb->prim_p_key_idx = attr->pkey_index;
+		update_mask |= EHCA_BMASK_SET(MQPCB_MASK_PRIM_P_KEY_IDX, 1);
+	}
+	if (attr_mask & IB_QP_PORT) {
+		if (attr->port_num < 1 || attr->port_num > shca->num_ports) {
+			ret = -EINVAL;
+			ehca_err(ibqp->device, "Invalid port=%x. "
+				 "ehca_qp=%p qp_num=%x num_ports=%x",
+				 attr->port_num, my_qp, ibqp->qp_num,
+				 shca->num_ports);
+			goto modify_qp_exit2;
+		}
+		mqpcb->prim_phys_port = attr->port_num;
+		update_mask |= EHCA_BMASK_SET(MQPCB_MASK_PRIM_PHYS_PORT, 1);
+	}
+	if (attr_mask & IB_QP_QKEY) {
+		mqpcb->qkey = attr->qkey;
+		update_mask |= EHCA_BMASK_SET(MQPCB_MASK_QKEY, 1);
+	}
+	if (attr_mask & IB_QP_AV) {
+		int ah_mult = ib_rate_to_mult(attr->ah_attr.static_rate);
+		int ehca_mult = ib_rate_to_mult(shca->sport[my_qp->
+						init_attr.port_num].rate);
+
+		mqpcb->dlid = attr->ah_attr.dlid;
+		update_mask |= EHCA_BMASK_SET(MQPCB_MASK_DLID, 1);
+		mqpcb->source_path_bits = attr->ah_attr.src_path_bits;
+		update_mask |= EHCA_BMASK_SET(MQPCB_MASK_SOURCE_PATH_BITS, 1);
+		mqpcb->service_level = attr->ah_attr.sl;
+		update_mask |= EHCA_BMASK_SET(MQPCB_MASK_SERVICE_LEVEL, 1);
+
+		if (ah_mult < ehca_mult)
+			mqpcb->max_static_rate = (ah_mult > 0) ?
+			((ehca_mult - 1) / ah_mult) : 0;
+		else
+			mqpcb->max_static_rate = 0;
+
+		update_mask |= EHCA_BMASK_SET(MQPCB_MASK_MAX_STATIC_RATE, 1);
+
+		/*
+		 * only if GRH is TRUE we might consider SOURCE_GID_IDX
+		 * and DEST_GID otherwise phype will return H_ATTR_PARM!!!
+		 */
+		if (attr->ah_attr.ah_flags == IB_AH_GRH) {
+			mqpcb->send_grh_flag = 1 << 31;
+			update_mask |=
+				EHCA_BMASK_SET(MQPCB_MASK_SEND_GRH_FLAG, 1);
+			mqpcb->source_gid_idx = attr->ah_attr.grh.sgid_index;
+			update_mask |=
+				EHCA_BMASK_SET(MQPCB_MASK_SOURCE_GID_IDX, 1);
+
+			for (cnt = 0; cnt < 16; cnt++)
+				mqpcb->dest_gid.byte[cnt] =
+					attr->ah_attr.grh.dgid.raw[cnt];
+
+			update_mask |= EHCA_BMASK_SET(MQPCB_MASK_DEST_GID, 1);
+			mqpcb->flow_label = attr->ah_attr.grh.flow_label;
+			update_mask |= EHCA_BMASK_SET(MQPCB_MASK_FLOW_LABEL, 1);
+			mqpcb->hop_limit = attr->ah_attr.grh.hop_limit;
+			update_mask |= EHCA_BMASK_SET(MQPCB_MASK_HOP_LIMIT, 1);
+			mqpcb->traffic_class = attr->ah_attr.grh.traffic_class;
+			update_mask |=
+				EHCA_BMASK_SET(MQPCB_MASK_TRAFFIC_CLASS, 1);
+		}
+	}
+
+	if (attr_mask & IB_QP_PATH_MTU) {
+		mqpcb->path_mtu = attr->path_mtu;
+		update_mask |= EHCA_BMASK_SET(MQPCB_MASK_PATH_MTU, 1);
+	}
+	if (attr_mask & IB_QP_TIMEOUT) {
+		mqpcb->timeout = attr->timeout;
+		update_mask |= EHCA_BMASK_SET(MQPCB_MASK_TIMEOUT, 1);
+	}
+	if (attr_mask & IB_QP_RETRY_CNT) {
+		mqpcb->retry_count = attr->retry_cnt;
+		update_mask |= EHCA_BMASK_SET(MQPCB_MASK_RETRY_COUNT, 1);
+	}
+	if (attr_mask & IB_QP_RNR_RETRY) {
+		mqpcb->rnr_retry_count = attr->rnr_retry;
+		update_mask |= EHCA_BMASK_SET(MQPCB_MASK_RNR_RETRY_COUNT, 1);
+	}
+	if (attr_mask & IB_QP_RQ_PSN) {
+		mqpcb->receive_psn = attr->rq_psn;
+		update_mask |= EHCA_BMASK_SET(MQPCB_MASK_RECEIVE_PSN, 1);
+	}
+	if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) {
+		mqpcb->rdma_nr_atomic_resp_res = attr->max_dest_rd_atomic < 3 ?
+			attr->max_dest_rd_atomic : 2;
+		update_mask |=
+			EHCA_BMASK_SET(MQPCB_MASK_RDMA_NR_ATOMIC_RESP_RES, 1);
+	}
+	if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC) {
+		mqpcb->rdma_atomic_outst_dest_qp = attr->max_rd_atomic < 3 ?
+			attr->max_rd_atomic : 2;
+		update_mask |=
+			EHCA_BMASK_SET
+			(MQPCB_MASK_RDMA_ATOMIC_OUTST_DEST_QP, 1);
+	}
+	if (attr_mask & IB_QP_ALT_PATH) {
+		int ah_mult = ib_rate_to_mult(attr->alt_ah_attr.static_rate);
+		int ehca_mult = ib_rate_to_mult(
+			shca->sport[my_qp->init_attr.port_num].rate);
+
+		mqpcb->dlid_al = attr->alt_ah_attr.dlid;
+		update_mask |= EHCA_BMASK_SET(MQPCB_MASK_DLID_AL, 1);
+		mqpcb->source_path_bits_al = attr->alt_ah_attr.src_path_bits;
+		update_mask |=
+			EHCA_BMASK_SET(MQPCB_MASK_SOURCE_PATH_BITS_AL, 1);
+		mqpcb->service_level_al = attr->alt_ah_attr.sl;
+		update_mask |= EHCA_BMASK_SET(MQPCB_MASK_SERVICE_LEVEL_AL, 1);
+
+		if (ah_mult < ehca_mult)
+			mqpcb->max_static_rate = (ah_mult > 0) ?
+			((ehca_mult - 1) / ah_mult) : 0;
+		else
+			mqpcb->max_static_rate_al = 0;
+
+		update_mask |= EHCA_BMASK_SET(MQPCB_MASK_MAX_STATIC_RATE_AL, 1);
+
+		/*
+		 * only if GRH is TRUE we might consider SOURCE_GID_IDX
+		 * and DEST_GID otherwise phype will return H_ATTR_PARM!!!
+		 */
+		if (attr->alt_ah_attr.ah_flags == IB_AH_GRH) {
+			mqpcb->send_grh_flag_al = 1 << 31;
+			update_mask |=
+				EHCA_BMASK_SET(MQPCB_MASK_SEND_GRH_FLAG_AL, 1);
+			mqpcb->source_gid_idx_al =
+				attr->alt_ah_attr.grh.sgid_index;
+			update_mask |=
+				EHCA_BMASK_SET(MQPCB_MASK_SOURCE_GID_IDX_AL, 1);
+
+			for (cnt = 0; cnt < 16; cnt++)
+				mqpcb->dest_gid_al.byte[cnt] =
+					attr->alt_ah_attr.grh.dgid.raw[cnt];
+
+			update_mask |=
+				EHCA_BMASK_SET(MQPCB_MASK_DEST_GID_AL, 1);
+			mqpcb->flow_label_al = attr->alt_ah_attr.grh.flow_label;
+			update_mask |=
+				EHCA_BMASK_SET(MQPCB_MASK_FLOW_LABEL_AL, 1);
+			mqpcb->hop_limit_al = attr->alt_ah_attr.grh.hop_limit;
+			update_mask |=
+				EHCA_BMASK_SET(MQPCB_MASK_HOP_LIMIT_AL, 1);
+			mqpcb->traffic_class_al =
+				attr->alt_ah_attr.grh.traffic_class;
+			update_mask |=
+				EHCA_BMASK_SET(MQPCB_MASK_TRAFFIC_CLASS_AL, 1);
+		}
+	}
+
+	if (attr_mask & IB_QP_MIN_RNR_TIMER) {
+		mqpcb->min_rnr_nak_timer_field = attr->min_rnr_timer;
+		update_mask |=
+			EHCA_BMASK_SET(MQPCB_MASK_MIN_RNR_NAK_TIMER_FIELD, 1);
+	}
+
+	if (attr_mask & IB_QP_SQ_PSN) {
+		mqpcb->send_psn = attr->sq_psn;
+		update_mask |= EHCA_BMASK_SET(MQPCB_MASK_SEND_PSN, 1);
+	}
+
+	if (attr_mask & IB_QP_DEST_QPN) {
+		mqpcb->dest_qp_nr = attr->dest_qp_num;
+		update_mask |= EHCA_BMASK_SET(MQPCB_MASK_DEST_QP_NR, 1);
+	}
+
+	if (attr_mask & IB_QP_PATH_MIG_STATE) {
+		mqpcb->path_migration_state = attr->path_mig_state;
+		update_mask |=
+			EHCA_BMASK_SET(MQPCB_MASK_PATH_MIGRATION_STATE, 1);
+	}
+
+	if (attr_mask & IB_QP_CAP) {
+		mqpcb->max_nr_outst_send_wr = attr->cap.max_send_wr+1;
+		update_mask |=
+			EHCA_BMASK_SET(MQPCB_MASK_MAX_NR_OUTST_SEND_WR, 1);
+		mqpcb->max_nr_outst_recv_wr = attr->cap.max_recv_wr+1;
+		update_mask |=
+			EHCA_BMASK_SET(MQPCB_MASK_MAX_NR_OUTST_RECV_WR, 1);
+		/* no support for max_send/recv_sge yet */
+	}
+
+	if (ehca_debug_level)
+		ehca_dmp(mqpcb, 4*70, "qp_num=%x", ibqp->qp_num);
+
+	h_ret = hipz_h_modify_qp(shca->ipz_hca_handle,
+				 my_qp->ipz_qp_handle,
+				 &my_qp->pf,
+				 update_mask,
+				 mqpcb, my_qp->galpas.kernel);
+
+	if (h_ret != H_SUCCESS) {
+		ret = ehca2ib_return_code(h_ret);
+		ehca_err(ibqp->device, "hipz_h_modify_qp() failed rc=%lx "
+			 "ehca_qp=%p qp_num=%x",h_ret, my_qp, ibqp->qp_num);
+		goto modify_qp_exit2;
+	}
+
+	if ((my_qp->qp_type == IB_QPT_UD ||
+	     my_qp->qp_type == IB_QPT_GSI ||
+	     my_qp->qp_type == IB_QPT_SMI) &&
+	    statetrans == IB_QPST_SQE2RTS) {
+		/* doorbell to reprocessing wqes */
+		iosync(); /* serialize GAL register access */
+		hipz_update_sqa(my_qp, bad_wqe_cnt-1);
+		ehca_gen_dbg("doorbell for %x wqes", bad_wqe_cnt);
+	}
+
+	if (statetrans == IB_QPST_RESET2INIT ||
+	    statetrans == IB_QPST_INIT2INIT) {
+		mqpcb->qp_enable = 1;
+		mqpcb->qp_state = EHCA_QPS_INIT;
+		update_mask = 0;
+		update_mask = EHCA_BMASK_SET(MQPCB_MASK_QP_ENABLE, 1);
+
+		h_ret = hipz_h_modify_qp(shca->ipz_hca_handle,
+					 my_qp->ipz_qp_handle,
+					 &my_qp->pf,
+					 update_mask,
+					 mqpcb,
+					 my_qp->galpas.kernel);
+
+		if (h_ret != H_SUCCESS) {
+			ret = ehca2ib_return_code(h_ret);
+			ehca_err(ibqp->device, "ENABLE in context of "
+				 "RESET_2_INIT failed! Maybe you didn't get "
+				 "a LID h_ret=%lx ehca_qp=%p qp_num=%x",
+				 h_ret, my_qp, ibqp->qp_num);
+			goto modify_qp_exit2;
+		}
+	}
+
+	if (statetrans == IB_QPST_ANY2RESET) {
+		ipz_qeit_reset(&my_qp->ipz_rqueue);
+		ipz_qeit_reset(&my_qp->ipz_squeue);
+	}
+
+	if (attr_mask & IB_QP_QKEY)
+		my_qp->qkey = attr->qkey;
+
+modify_qp_exit2:
+	if (squeue_locked) { /* this means: sqe -> rts */
+		spin_unlock_irqrestore(&my_qp->spinlock_s, spl_flags);
+		my_qp->sqerr_purgeflag = 1;
+	}
+
+modify_qp_exit1:
+	kfree(mqpcb);
+
+	return ret;
+}
+
+int ehca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask,
+		   struct ib_udata *udata)
+{
+	struct ehca_qp *my_qp = container_of(ibqp, struct ehca_qp, ib_qp);
+	struct ehca_pd *my_pd = container_of(my_qp->ib_qp.pd, struct ehca_pd,
+					     ib_pd);
+	u32 cur_pid = current->tgid;
+
+	if (my_pd->ib_pd.uobject && my_pd->ib_pd.uobject->context &&
+	    my_pd->ownpid != cur_pid) {
+		ehca_err(ibqp->pd->device, "Invalid caller pid=%x ownpid=%x",
+			 cur_pid, my_pd->ownpid);
+		return -EINVAL;
+	}
+
+	return internal_modify_qp(ibqp, attr, attr_mask, 0);
+}
+
+int ehca_query_qp(struct ib_qp *qp,
+		  struct ib_qp_attr *qp_attr,
+		  int qp_attr_mask, struct ib_qp_init_attr *qp_init_attr)
+{
+	struct ehca_qp *my_qp = container_of(qp, struct ehca_qp, ib_qp);
+	struct ehca_pd *my_pd = container_of(my_qp->ib_qp.pd, struct ehca_pd,
+					     ib_pd);
+	struct ehca_shca *shca = container_of(qp->device, struct ehca_shca,
+					      ib_device);
+	struct ipz_adapter_handle adapter_handle = shca->ipz_hca_handle;
+	struct hcp_modify_qp_control_block *qpcb;
+	u32 cur_pid = current->tgid;
+	int cnt, ret = 0;
+	u64 h_ret;
+
+	if (my_pd->ib_pd.uobject  && my_pd->ib_pd.uobject->context  &&
+	    my_pd->ownpid != cur_pid) {
+		ehca_err(qp->device, "Invalid caller pid=%x ownpid=%x",
+			 cur_pid, my_pd->ownpid);
+		return -EINVAL;
+	}
+
+	if (qp_attr_mask & QP_ATTR_QUERY_NOT_SUPPORTED) {
+		ehca_err(qp->device,"Invalid attribute mask "
+			 "ehca_qp=%p qp_num=%x qp_attr_mask=%x ",
+			 my_qp, qp->qp_num, qp_attr_mask);
+		return -EINVAL;
+	}
+
+	qpcb = kzalloc(H_CB_ALIGNMENT, GFP_KERNEL );
+	if (!qpcb) {
+		ehca_err(qp->device,"Out of memory for qpcb "
+			 "ehca_qp=%p qp_num=%x", my_qp, qp->qp_num);
+		return -ENOMEM;
+	}
+
+	h_ret = hipz_h_query_qp(adapter_handle,
+				my_qp->ipz_qp_handle,
+				&my_qp->pf,
+				qpcb, my_qp->galpas.kernel);
+
+	if (h_ret != H_SUCCESS) {
+		ret = ehca2ib_return_code(h_ret);
+		ehca_err(qp->device,"hipz_h_query_qp() failed "
+			 "ehca_qp=%p qp_num=%x h_ret=%lx",
+			 my_qp, qp->qp_num, h_ret);
+		goto query_qp_exit1;
+	}
+
+	qp_attr->cur_qp_state = ehca2ib_qp_state(qpcb->qp_state);
+	qp_attr->qp_state = qp_attr->cur_qp_state;
+
+	if (qp_attr->cur_qp_state == -EINVAL) {
+		ret = -EINVAL;
+		ehca_err(qp->device,"Got invalid ehca_qp_state=%x "
+			 "ehca_qp=%p qp_num=%x",
+			 qpcb->qp_state, my_qp, qp->qp_num);
+		goto query_qp_exit1;
+	}
+
+	if (qp_attr->qp_state == IB_QPS_SQD)
+		qp_attr->sq_draining = 1;
+
+	qp_attr->qkey = qpcb->qkey;
+	qp_attr->path_mtu = qpcb->path_mtu;
+	qp_attr->path_mig_state = qpcb->path_migration_state;
+	qp_attr->rq_psn = qpcb->receive_psn;
+	qp_attr->sq_psn = qpcb->send_psn;
+	qp_attr->min_rnr_timer = qpcb->min_rnr_nak_timer_field;
+	qp_attr->cap.max_send_wr = qpcb->max_nr_outst_send_wr-1;
+	qp_attr->cap.max_recv_wr = qpcb->max_nr_outst_recv_wr-1;
+	/* UD_AV CIRCUMVENTION */
+	if (my_qp->qp_type == IB_QPT_UD) {
+		qp_attr->cap.max_send_sge =
+			qpcb->actual_nr_sges_in_sq_wqe - 2;
+		qp_attr->cap.max_recv_sge =
+			qpcb->actual_nr_sges_in_rq_wqe - 2;
+	} else {
+		qp_attr->cap.max_send_sge =
+			qpcb->actual_nr_sges_in_sq_wqe;
+		qp_attr->cap.max_recv_sge =
+			qpcb->actual_nr_sges_in_rq_wqe;
+	}
+
+	qp_attr->cap.max_inline_data = my_qp->sq_max_inline_data_size;
+	qp_attr->dest_qp_num = qpcb->dest_qp_nr;
+
+	qp_attr->pkey_index =
+		EHCA_BMASK_GET(MQPCB_PRIM_P_KEY_IDX, qpcb->prim_p_key_idx);
+
+	qp_attr->port_num =
+		EHCA_BMASK_GET(MQPCB_PRIM_PHYS_PORT, qpcb->prim_phys_port);
+
+	qp_attr->timeout = qpcb->timeout;
+	qp_attr->retry_cnt = qpcb->retry_count;
+	qp_attr->rnr_retry = qpcb->rnr_retry_count;
+
+	qp_attr->alt_pkey_index =
+		EHCA_BMASK_GET(MQPCB_PRIM_P_KEY_IDX, qpcb->alt_p_key_idx);
+
+	qp_attr->alt_port_num = qpcb->alt_phys_port;
+	qp_attr->alt_timeout = qpcb->timeout_al;
+
+	/* primary av */
+	qp_attr->ah_attr.sl = qpcb->service_level;
+
+	if (qpcb->send_grh_flag) {
+		qp_attr->ah_attr.ah_flags = IB_AH_GRH;
+	}
+
+	qp_attr->ah_attr.static_rate = qpcb->max_static_rate;
+	qp_attr->ah_attr.dlid = qpcb->dlid;
+	qp_attr->ah_attr.src_path_bits = qpcb->source_path_bits;
+	qp_attr->ah_attr.port_num = qp_attr->port_num;
+
+	/* primary GRH */
+	qp_attr->ah_attr.grh.traffic_class = qpcb->traffic_class;
+	qp_attr->ah_attr.grh.hop_limit = qpcb->hop_limit;
+	qp_attr->ah_attr.grh.sgid_index = qpcb->source_gid_idx;
+	qp_attr->ah_attr.grh.flow_label = qpcb->flow_label;
+
+	for (cnt = 0; cnt < 16; cnt++)
+		qp_attr->ah_attr.grh.dgid.raw[cnt] =
+			qpcb->dest_gid.byte[cnt];
+
+	/* alternate AV */
+	qp_attr->alt_ah_attr.sl = qpcb->service_level_al;
+	if (qpcb->send_grh_flag_al) {
+		qp_attr->alt_ah_attr.ah_flags = IB_AH_GRH;
+	}
+
+	qp_attr->alt_ah_attr.static_rate = qpcb->max_static_rate_al;
+	qp_attr->alt_ah_attr.dlid = qpcb->dlid_al;
+	qp_attr->alt_ah_attr.src_path_bits = qpcb->source_path_bits_al;
+
+	/* alternate GRH */
+	qp_attr->alt_ah_attr.grh.traffic_class = qpcb->traffic_class_al;
+	qp_attr->alt_ah_attr.grh.hop_limit = qpcb->hop_limit_al;
+	qp_attr->alt_ah_attr.grh.sgid_index = qpcb->source_gid_idx_al;
+	qp_attr->alt_ah_attr.grh.flow_label = qpcb->flow_label_al;
+
+	for (cnt = 0; cnt < 16; cnt++)
+		qp_attr->alt_ah_attr.grh.dgid.raw[cnt] =
+			qpcb->dest_gid_al.byte[cnt];
+
+	/* return init attributes given in ehca_create_qp */
+	if (qp_init_attr)
+		*qp_init_attr = my_qp->init_attr;
+
+	if (ehca_debug_level)
+		ehca_dmp(qpcb, 4*70, "qp_num=%x", qp->qp_num);
+
+query_qp_exit1:
+	kfree(qpcb);
+
+	return ret;
+}
+
+int ehca_destroy_qp(struct ib_qp *ibqp)
+{
+	struct ehca_qp *my_qp = container_of(ibqp, struct ehca_qp, ib_qp);
+	struct ehca_shca *shca = container_of(ibqp->device, struct ehca_shca,
+					      ib_device);
+	struct ehca_pd *my_pd = container_of(my_qp->ib_qp.pd, struct ehca_pd,
+					     ib_pd);
+	u32 cur_pid = current->tgid;
+	u32 qp_num = ibqp->qp_num;
+	int ret;
+	u64 h_ret;
+	u8 port_num;
+	enum ib_qp_type	qp_type;
+	unsigned long flags;
+
+	if (my_pd->ib_pd.uobject && my_pd->ib_pd.uobject->context &&
+	    my_pd->ownpid != cur_pid) {
+		ehca_err(ibqp->device, "Invalid caller pid=%x ownpid=%x",
+			 cur_pid, my_pd->ownpid);
+		return -EINVAL;
+	}
+
+	if (my_qp->send_cq) {
+		ret = ehca_cq_unassign_qp(my_qp->send_cq,
+					      my_qp->real_qp_num);
+		if (ret) {
+			ehca_err(ibqp->device, "Couldn't unassign qp from "
+				 "send_cq ret=%x qp_num=%x cq_num=%x", ret,
+				 my_qp->ib_qp.qp_num, my_qp->send_cq->cq_number);
+			return ret;
+		}
+	}
+
+	spin_lock_irqsave(&ehca_qp_idr_lock, flags);
+	idr_remove(&ehca_qp_idr, my_qp->token);
+	spin_unlock_irqrestore(&ehca_qp_idr_lock, flags);
+
+	/* un-mmap if vma alloc */
+	if (my_qp->uspace_rqueue) {
+		ret = ehca_munmap(my_qp->uspace_rqueue,
+				  my_qp->ipz_rqueue.queue_length);
+		if (ret)
+			ehca_err(ibqp->device, "Could not munmap rqueue "
+				 "qp_num=%x", qp_num);
+		ret = ehca_munmap(my_qp->uspace_squeue,
+				  my_qp->ipz_squeue.queue_length);
+		if (ret)
+			ehca_err(ibqp->device, "Could not munmap squeue "
+				 "qp_num=%x", qp_num);
+		ret = ehca_munmap(my_qp->uspace_fwh, EHCA_PAGESIZE);
+		if (ret)
+			ehca_err(ibqp->device, "Could not munmap fwh qp_num=%x",
+				 qp_num);
+	}
+
+	h_ret = hipz_h_destroy_qp(shca->ipz_hca_handle, my_qp);
+	if (h_ret != H_SUCCESS) {
+		ehca_err(ibqp->device, "hipz_h_destroy_qp() failed rc=%lx "
+			 "ehca_qp=%p qp_num=%x", h_ret, my_qp, qp_num);
+		return ehca2ib_return_code(h_ret);
+	}
+
+	port_num = my_qp->init_attr.port_num;
+	qp_type  = my_qp->init_attr.qp_type;
+
+	/* no support for IB_QPT_SMI yet */
+	if (qp_type == IB_QPT_GSI) {
+		struct ib_event event;
+		ehca_info(ibqp->device, "device %s: port %x is inactive.",
+			  shca->ib_device.name, port_num);
+		event.device = &shca->ib_device;
+		event.event = IB_EVENT_PORT_ERR;
+		event.element.port_num = port_num;
+		shca->sport[port_num - 1].port_state = IB_PORT_DOWN;
+		ib_dispatch_event(&event);
+	}
+
+	ipz_queue_dtor(&my_qp->ipz_rqueue);
+	ipz_queue_dtor(&my_qp->ipz_squeue);
+	kmem_cache_free(qp_cache, my_qp);
+	return 0;
+}
+
+int ehca_init_qp_cache(void)
+{
+	qp_cache = kmem_cache_create("ehca_cache_qp",
+				     sizeof(struct ehca_qp), 0,
+				     SLAB_HWCACHE_ALIGN,
+				     NULL, NULL);
+	if (!qp_cache)
+		return -ENOMEM;
+	return 0;
+}
+
+void ehca_cleanup_qp_cache(void)
+{
+	if (qp_cache)
+		kmem_cache_destroy(qp_cache);
+}
diff --git a/drivers/infiniband/hw/ehca/ehca_reqs.c b/drivers/infiniband/hw/ehca/ehca_reqs.c
new file mode 100644
index 00000000000..b46bda1bf85
--- /dev/null
+++ b/drivers/infiniband/hw/ehca/ehca_reqs.c
@@ -0,0 +1,653 @@
+/*
+ *  IBM eServer eHCA Infiniband device driver for Linux on POWER
+ *
+ *  post_send/recv, poll_cq, req_notify
+ *
+ *  Authors: Waleri Fomin <fomin@de.ibm.com>
+ *           Hoang-Nam Nguyen <hnguyen@de.ibm.com>
+ *           Reinhard Ernst <rernst@de.ibm.com>
+ *
+ *  Copyright (c) 2005 IBM Corporation
+ *
+ *  All rights reserved.
+ *
+ *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
+ *  BSD.
+ *
+ * OpenIB BSD License
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials
+ * provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
+ * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+
+#include <asm-powerpc/system.h>
+#include "ehca_classes.h"
+#include "ehca_tools.h"
+#include "ehca_qes.h"
+#include "ehca_iverbs.h"
+#include "hcp_if.h"
+#include "hipz_fns.h"
+
+static inline int ehca_write_rwqe(struct ipz_queue *ipz_rqueue,
+				  struct ehca_wqe *wqe_p,
+				  struct ib_recv_wr *recv_wr)
+{
+	u8 cnt_ds;
+	if (unlikely((recv_wr->num_sge < 0) ||
+		     (recv_wr->num_sge > ipz_rqueue->act_nr_of_sg))) {
+		ehca_gen_err("Invalid number of WQE SGE. "
+			 "num_sqe=%x max_nr_of_sg=%x",
+			 recv_wr->num_sge, ipz_rqueue->act_nr_of_sg);
+		return -EINVAL; /* invalid SG list length */
+	}
+
+	/* clear wqe header until sglist */
+	memset(wqe_p, 0, offsetof(struct ehca_wqe, u.ud_av.sg_list));
+
+	wqe_p->work_request_id = recv_wr->wr_id;
+	wqe_p->nr_of_data_seg = recv_wr->num_sge;
+
+	for (cnt_ds = 0; cnt_ds < recv_wr->num_sge; cnt_ds++) {
+		wqe_p->u.all_rcv.sg_list[cnt_ds].vaddr =
+			recv_wr->sg_list[cnt_ds].addr;
+		wqe_p->u.all_rcv.sg_list[cnt_ds].lkey =
+			recv_wr->sg_list[cnt_ds].lkey;
+		wqe_p->u.all_rcv.sg_list[cnt_ds].length =
+			recv_wr->sg_list[cnt_ds].length;
+	}
+
+	if (ehca_debug_level) {
+		ehca_gen_dbg("RECEIVE WQE written into ipz_rqueue=%p", ipz_rqueue);
+		ehca_dmp( wqe_p, 16*(6 + wqe_p->nr_of_data_seg), "recv wqe");
+	}
+
+	return 0;
+}
+
+#if defined(DEBUG_GSI_SEND_WR)
+
+/* need ib_mad struct */
+#include <rdma/ib_mad.h>
+
+static void trace_send_wr_ud(const struct ib_send_wr *send_wr)
+{
+	int idx;
+	int j;
+	while (send_wr) {
+		struct ib_mad_hdr *mad_hdr = send_wr->wr.ud.mad_hdr;
+		struct ib_sge *sge = send_wr->sg_list;
+		ehca_gen_dbg("send_wr#%x wr_id=%lx num_sge=%x "
+			     "send_flags=%x opcode=%x",idx, send_wr->wr_id,
+			     send_wr->num_sge, send_wr->send_flags,
+			     send_wr->opcode);
+		if (mad_hdr) {
+			ehca_gen_dbg("send_wr#%x mad_hdr base_version=%x "
+				     "mgmt_class=%x class_version=%x method=%x "
+				     "status=%x class_specific=%x tid=%lx "
+				     "attr_id=%x resv=%x attr_mod=%x",
+				     idx, mad_hdr->base_version,
+				     mad_hdr->mgmt_class,
+				     mad_hdr->class_version, mad_hdr->method,
+				     mad_hdr->status, mad_hdr->class_specific,
+				     mad_hdr->tid, mad_hdr->attr_id,
+				     mad_hdr->resv,
+				     mad_hdr->attr_mod);
+		}
+		for (j = 0; j < send_wr->num_sge; j++) {
+			u8 *data = (u8 *) abs_to_virt(sge->addr);
+			ehca_gen_dbg("send_wr#%x sge#%x addr=%p length=%x "
+				     "lkey=%x",
+				     idx, j, data, sge->length, sge->lkey);
+			/* assume length is n*16 */
+			ehca_dmp(data, sge->length, "send_wr#%x sge#%x",
+				 idx, j);
+			sge++;
+		} /* eof for j */
+		idx++;
+		send_wr = send_wr->next;
+	} /* eof while send_wr */
+}
+
+#endif /* DEBUG_GSI_SEND_WR */
+
+static inline int ehca_write_swqe(struct ehca_qp *qp,
+				  struct ehca_wqe *wqe_p,
+				  const struct ib_send_wr *send_wr)
+{
+	u32 idx;
+	u64 dma_length;
+	struct ehca_av *my_av;
+	u32 remote_qkey = send_wr->wr.ud.remote_qkey;
+
+	if (unlikely((send_wr->num_sge < 0) ||
+		     (send_wr->num_sge > qp->ipz_squeue.act_nr_of_sg))) {
+		ehca_gen_err("Invalid number of WQE SGE. "
+			 "num_sqe=%x max_nr_of_sg=%x",
+			 send_wr->num_sge, qp->ipz_squeue.act_nr_of_sg);
+		return -EINVAL; /* invalid SG list length */
+	}
+
+	/* clear wqe header until sglist */
+	memset(wqe_p, 0, offsetof(struct ehca_wqe, u.ud_av.sg_list));
+
+	wqe_p->work_request_id = send_wr->wr_id;
+
+	switch (send_wr->opcode) {
+	case IB_WR_SEND:
+	case IB_WR_SEND_WITH_IMM:
+		wqe_p->optype = WQE_OPTYPE_SEND;
+		break;
+	case IB_WR_RDMA_WRITE:
+	case IB_WR_RDMA_WRITE_WITH_IMM:
+		wqe_p->optype = WQE_OPTYPE_RDMAWRITE;
+		break;
+	case IB_WR_RDMA_READ:
+		wqe_p->optype = WQE_OPTYPE_RDMAREAD;
+		break;
+	default:
+		ehca_gen_err("Invalid opcode=%x", send_wr->opcode);
+		return -EINVAL; /* invalid opcode */
+	}
+
+	wqe_p->wqef = (send_wr->opcode) & WQEF_HIGH_NIBBLE;
+
+	wqe_p->wr_flag = 0;
+
+	if (send_wr->send_flags & IB_SEND_SIGNALED)
+		wqe_p->wr_flag |= WQE_WRFLAG_REQ_SIGNAL_COM;
+
+	if (send_wr->opcode == IB_WR_SEND_WITH_IMM ||
+	    send_wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM) {
+		/* this might not work as long as HW does not support it */
+		wqe_p->immediate_data = be32_to_cpu(send_wr->imm_data);
+		wqe_p->wr_flag |= WQE_WRFLAG_IMM_DATA_PRESENT;
+	}
+
+	wqe_p->nr_of_data_seg = send_wr->num_sge;
+
+	switch (qp->qp_type) {
+	case IB_QPT_SMI:
+	case IB_QPT_GSI:
+		/* no break is intential here */
+	case IB_QPT_UD:
+		/* IB 1.2 spec C10-15 compliance */
+		if (send_wr->wr.ud.remote_qkey & 0x80000000)
+			remote_qkey = qp->qkey;
+
+		wqe_p->destination_qp_number = send_wr->wr.ud.remote_qpn << 8;
+		wqe_p->local_ee_context_qkey = remote_qkey;
+		if (!send_wr->wr.ud.ah) {
+			ehca_gen_err("wr.ud.ah is NULL. qp=%p", qp);
+			return -EINVAL;
+		}
+		my_av = container_of(send_wr->wr.ud.ah, struct ehca_av, ib_ah);
+		wqe_p->u.ud_av.ud_av = my_av->av;
+
+		/*
+		 * omitted check of IB_SEND_INLINE
+		 * since HW does not support it
+		 */
+		for (idx = 0; idx < send_wr->num_sge; idx++) {
+			wqe_p->u.ud_av.sg_list[idx].vaddr =
+				send_wr->sg_list[idx].addr;
+			wqe_p->u.ud_av.sg_list[idx].lkey =
+				send_wr->sg_list[idx].lkey;
+			wqe_p->u.ud_av.sg_list[idx].length =
+				send_wr->sg_list[idx].length;
+		} /* eof for idx */
+		if (qp->qp_type == IB_QPT_SMI ||
+		    qp->qp_type == IB_QPT_GSI)
+			wqe_p->u.ud_av.ud_av.pmtu = 1;
+		if (qp->qp_type == IB_QPT_GSI) {
+			wqe_p->pkeyi = send_wr->wr.ud.pkey_index;
+#ifdef DEBUG_GSI_SEND_WR
+			trace_send_wr_ud(send_wr);
+#endif /* DEBUG_GSI_SEND_WR */
+		}
+		break;
+
+	case IB_QPT_UC:
+		if (send_wr->send_flags & IB_SEND_FENCE)
+			wqe_p->wr_flag |= WQE_WRFLAG_FENCE;
+		/* no break is intentional here */
+	case IB_QPT_RC:
+		/* TODO: atomic not implemented */
+		wqe_p->u.nud.remote_virtual_adress =
+			send_wr->wr.rdma.remote_addr;
+		wqe_p->u.nud.rkey = send_wr->wr.rdma.rkey;
+
+		/*
+		 * omitted checking of IB_SEND_INLINE
+		 * since HW does not support it
+		 */
+		dma_length = 0;
+		for (idx = 0; idx < send_wr->num_sge; idx++) {
+			wqe_p->u.nud.sg_list[idx].vaddr =
+				send_wr->sg_list[idx].addr;
+			wqe_p->u.nud.sg_list[idx].lkey =
+				send_wr->sg_list[idx].lkey;
+			wqe_p->u.nud.sg_list[idx].length =
+				send_wr->sg_list[idx].length;
+			dma_length += send_wr->sg_list[idx].length;
+		} /* eof idx */
+		wqe_p->u.nud.atomic_1st_op_dma_len = dma_length;
+
+		break;
+
+	default:
+		ehca_gen_err("Invalid qptype=%x", qp->qp_type);
+		return -EINVAL;
+	}
+
+	if (ehca_debug_level) {
+		ehca_gen_dbg("SEND WQE written into queue qp=%p ", qp);
+		ehca_dmp( wqe_p, 16*(6 + wqe_p->nr_of_data_seg), "send wqe");
+	}
+	return 0;
+}
+
+/* map_ib_wc_status converts raw cqe_status to ib_wc_status */
+static inline void map_ib_wc_status(u32 cqe_status,
+				    enum ib_wc_status *wc_status)
+{
+	if (unlikely(cqe_status & WC_STATUS_ERROR_BIT)) {
+		switch (cqe_status & 0x3F) {
+		case 0x01:
+		case 0x21:
+			*wc_status = IB_WC_LOC_LEN_ERR;
+			break;
+		case 0x02:
+		case 0x22:
+			*wc_status = IB_WC_LOC_QP_OP_ERR;
+			break;
+		case 0x03:
+		case 0x23:
+			*wc_status = IB_WC_LOC_EEC_OP_ERR;
+			break;
+		case 0x04:
+		case 0x24:
+			*wc_status = IB_WC_LOC_PROT_ERR;
+			break;
+		case 0x05:
+		case 0x25:
+			*wc_status = IB_WC_WR_FLUSH_ERR;
+			break;
+		case 0x06:
+			*wc_status = IB_WC_MW_BIND_ERR;
+			break;
+		case 0x07: /* remote error - look into bits 20:24 */
+			switch ((cqe_status
+				 & WC_STATUS_REMOTE_ERROR_FLAGS) >> 11) {
+			case 0x0:
+				/*
+				 * PSN Sequence Error!
+				 * couldn't find a matching status!
+				 */
+				*wc_status = IB_WC_GENERAL_ERR;
+				break;
+			case 0x1:
+				*wc_status = IB_WC_REM_INV_REQ_ERR;
+				break;
+			case 0x2:
+				*wc_status = IB_WC_REM_ACCESS_ERR;
+				break;
+			case 0x3:
+				*wc_status = IB_WC_REM_OP_ERR;
+				break;
+			case 0x4:
+				*wc_status = IB_WC_REM_INV_RD_REQ_ERR;
+				break;
+			}
+			break;
+		case 0x08:
+			*wc_status = IB_WC_RETRY_EXC_ERR;
+			break;
+		case 0x09:
+			*wc_status = IB_WC_RNR_RETRY_EXC_ERR;
+			break;
+		case 0x0A:
+		case 0x2D:
+			*wc_status = IB_WC_REM_ABORT_ERR;
+			break;
+		case 0x0B:
+		case 0x2E:
+			*wc_status = IB_WC_INV_EECN_ERR;
+			break;
+		case 0x0C:
+		case 0x2F:
+			*wc_status = IB_WC_INV_EEC_STATE_ERR;
+			break;
+		case 0x0D:
+			*wc_status = IB_WC_BAD_RESP_ERR;
+			break;
+		case 0x10:
+			/* WQE purged */
+			*wc_status = IB_WC_WR_FLUSH_ERR;
+			break;
+		default:
+			*wc_status = IB_WC_FATAL_ERR;
+
+		}
+	} else
+		*wc_status = IB_WC_SUCCESS;
+}
+
+int ehca_post_send(struct ib_qp *qp,
+		   struct ib_send_wr *send_wr,
+		   struct ib_send_wr **bad_send_wr)
+{
+	struct ehca_qp *my_qp = container_of(qp, struct ehca_qp, ib_qp);
+	struct ib_send_wr *cur_send_wr;
+	struct ehca_wqe *wqe_p;
+	int wqe_cnt = 0;
+	int ret = 0;
+	unsigned long spl_flags;
+
+	/* LOCK the QUEUE */
+	spin_lock_irqsave(&my_qp->spinlock_s, spl_flags);
+
+	/* loop processes list of send reqs */
+	for (cur_send_wr = send_wr; cur_send_wr != NULL;
+	     cur_send_wr = cur_send_wr->next) {
+		u64 start_offset = my_qp->ipz_squeue.current_q_offset;
+		/* get pointer next to free WQE */
+		wqe_p = ipz_qeit_get_inc(&my_qp->ipz_squeue);
+		if (unlikely(!wqe_p)) {
+			/* too many posted work requests: queue overflow */
+			if (bad_send_wr)
+				*bad_send_wr = cur_send_wr;
+			if (wqe_cnt == 0) {
+				ret = -ENOMEM;
+				ehca_err(qp->device, "Too many posted WQEs "
+					 "qp_num=%x", qp->qp_num);
+			}
+			goto post_send_exit0;
+		}
+		/* write a SEND WQE into the QUEUE */
+		ret = ehca_write_swqe(my_qp, wqe_p, cur_send_wr);
+		/*
+		 * if something failed,
+		 * reset the free entry pointer to the start value
+		 */
+		if (unlikely(ret)) {
+			my_qp->ipz_squeue.current_q_offset = start_offset;
+			*bad_send_wr = cur_send_wr;
+			if (wqe_cnt == 0) {
+				ret = -EINVAL;
+				ehca_err(qp->device, "Could not write WQE "
+					 "qp_num=%x", qp->qp_num);
+			}
+			goto post_send_exit0;
+		}
+		wqe_cnt++;
+		ehca_dbg(qp->device, "ehca_qp=%p qp_num=%x wqe_cnt=%d",
+			 my_qp, qp->qp_num, wqe_cnt);
+	} /* eof for cur_send_wr */
+
+post_send_exit0:
+	/* UNLOCK the QUEUE */
+	spin_unlock_irqrestore(&my_qp->spinlock_s, spl_flags);
+	iosync(); /* serialize GAL register access */
+	hipz_update_sqa(my_qp, wqe_cnt);
+	return ret;
+}
+
+int ehca_post_recv(struct ib_qp *qp,
+		   struct ib_recv_wr *recv_wr,
+		   struct ib_recv_wr **bad_recv_wr)
+{
+	struct ehca_qp *my_qp = container_of(qp, struct ehca_qp, ib_qp);
+	struct ib_recv_wr *cur_recv_wr;
+	struct ehca_wqe *wqe_p;
+	int wqe_cnt = 0;
+	int ret = 0;
+	unsigned long spl_flags;
+
+	/* LOCK the QUEUE */
+	spin_lock_irqsave(&my_qp->spinlock_r, spl_flags);
+
+	/* loop processes list of send reqs */
+	for (cur_recv_wr = recv_wr; cur_recv_wr != NULL;
+	     cur_recv_wr = cur_recv_wr->next) {
+		u64 start_offset = my_qp->ipz_rqueue.current_q_offset;
+		/* get pointer next to free WQE */
+		wqe_p = ipz_qeit_get_inc(&my_qp->ipz_rqueue);
+		if (unlikely(!wqe_p)) {
+			/* too many posted work requests: queue overflow */
+			if (bad_recv_wr)
+				*bad_recv_wr = cur_recv_wr;
+			if (wqe_cnt == 0) {
+				ret = -ENOMEM;
+				ehca_err(qp->device, "Too many posted WQEs "
+					 "qp_num=%x", qp->qp_num);
+			}
+			goto post_recv_exit0;
+		}
+		/* write a RECV WQE into the QUEUE */
+		ret = ehca_write_rwqe(&my_qp->ipz_rqueue, wqe_p, cur_recv_wr);
+		/*
+		 * if something failed,
+		 * reset the free entry pointer to the start value
+		 */
+		if (unlikely(ret)) {
+			my_qp->ipz_rqueue.current_q_offset = start_offset;
+			*bad_recv_wr = cur_recv_wr;
+			if (wqe_cnt == 0) {
+				ret = -EINVAL;
+				ehca_err(qp->device, "Could not write WQE "
+					 "qp_num=%x", qp->qp_num);
+			}
+			goto post_recv_exit0;
+		}
+		wqe_cnt++;
+		ehca_gen_dbg("ehca_qp=%p qp_num=%x wqe_cnt=%d",
+		     my_qp, qp->qp_num, wqe_cnt);
+	} /* eof for cur_recv_wr */
+
+post_recv_exit0:
+	spin_unlock_irqrestore(&my_qp->spinlock_r, spl_flags);
+	iosync(); /* serialize GAL register access */
+	hipz_update_rqa(my_qp, wqe_cnt);
+	return ret;
+}
+
+/*
+ * ib_wc_opcode table converts ehca wc opcode to ib
+ * Since we use zero to indicate invalid opcode, the actual ib opcode must
+ * be decremented!!!
+ */
+static const u8 ib_wc_opcode[255] = {
+	[0x01] = IB_WC_RECV+1,
+	[0x02] = IB_WC_RECV_RDMA_WITH_IMM+1,
+	[0x04] = IB_WC_BIND_MW+1,
+	[0x08] = IB_WC_FETCH_ADD+1,
+	[0x10] = IB_WC_COMP_SWAP+1,
+	[0x20] = IB_WC_RDMA_WRITE+1,
+	[0x40] = IB_WC_RDMA_READ+1,
+	[0x80] = IB_WC_SEND+1
+};
+
+/* internal function to poll one entry of cq */
+static inline int ehca_poll_cq_one(struct ib_cq *cq, struct ib_wc *wc)
+{
+	int ret = 0;
+	struct ehca_cq *my_cq = container_of(cq, struct ehca_cq, ib_cq);
+	struct ehca_cqe *cqe;
+	int cqe_count = 0;
+
+poll_cq_one_read_cqe:
+	cqe = (struct ehca_cqe *)
+		ipz_qeit_get_inc_valid(&my_cq->ipz_queue);
+	if (!cqe) {
+		ret = -EAGAIN;
+		ehca_dbg(cq->device, "Completion queue is empty ehca_cq=%p "
+			 "cq_num=%x ret=%x", my_cq, my_cq->cq_number, ret);
+		goto  poll_cq_one_exit0;
+	}
+
+	/* prevents loads being reordered across this point */
+	rmb();
+
+	cqe_count++;
+	if (unlikely(cqe->status & WC_STATUS_PURGE_BIT)) {
+		struct ehca_qp *qp=ehca_cq_get_qp(my_cq, cqe->local_qp_number);
+		int purgeflag;
+		unsigned long spl_flags;
+		if (!qp) {
+			ehca_err(cq->device, "cq_num=%x qp_num=%x "
+				 "could not find qp -> ignore cqe",
+				 my_cq->cq_number, cqe->local_qp_number);
+			ehca_dmp(cqe, 64, "cq_num=%x qp_num=%x",
+				 my_cq->cq_number, cqe->local_qp_number);
+			/* ignore this purged cqe */
+			goto poll_cq_one_read_cqe;
+		}
+		spin_lock_irqsave(&qp->spinlock_s, spl_flags);
+		purgeflag = qp->sqerr_purgeflag;
+		spin_unlock_irqrestore(&qp->spinlock_s, spl_flags);
+
+		if (purgeflag) {
+			ehca_dbg(cq->device, "Got CQE with purged bit qp_num=%x "
+				 "src_qp=%x",
+				 cqe->local_qp_number, cqe->remote_qp_number);
+			if (ehca_debug_level)
+				ehca_dmp(cqe, 64, "qp_num=%x src_qp=%x",
+					 cqe->local_qp_number,
+					 cqe->remote_qp_number);
+			/*
+			 * ignore this to avoid double cqes of bad wqe
+			 * that caused sqe and turn off purge flag
+			 */
+			qp->sqerr_purgeflag = 0;
+			goto poll_cq_one_read_cqe;
+		}
+	}
+
+	/* tracing cqe */
+	if (ehca_debug_level) {
+		ehca_dbg(cq->device,
+			 "Received COMPLETION ehca_cq=%p cq_num=%x -----",
+			 my_cq, my_cq->cq_number);
+		ehca_dmp(cqe, 64, "ehca_cq=%p cq_num=%x",
+			 my_cq, my_cq->cq_number);
+		ehca_dbg(cq->device,
+			 "ehca_cq=%p cq_num=%x -------------------------",
+			 my_cq, my_cq->cq_number);
+	}
+
+	/* we got a completion! */
+	wc->wr_id = cqe->work_request_id;
+
+	/* eval ib_wc_opcode */
+	wc->opcode = ib_wc_opcode[cqe->optype]-1;
+	if (unlikely(wc->opcode == -1)) {
+		ehca_err(cq->device, "Invalid cqe->OPType=%x cqe->status=%x "
+			 "ehca_cq=%p cq_num=%x",
+			 cqe->optype, cqe->status, my_cq, my_cq->cq_number);
+		/* dump cqe for other infos */
+		ehca_dmp(cqe, 64, "ehca_cq=%p cq_num=%x",
+			 my_cq, my_cq->cq_number);
+		/* update also queue adder to throw away this entry!!! */
+		goto poll_cq_one_exit0;
+	}
+	/* eval ib_wc_status */
+	if (unlikely(cqe->status & WC_STATUS_ERROR_BIT)) {
+		/* complete with errors */
+		map_ib_wc_status(cqe->status, &wc->status);
+		wc->vendor_err = wc->status;
+	} else
+		wc->status = IB_WC_SUCCESS;
+
+	wc->qp_num = cqe->local_qp_number;
+	wc->byte_len = cqe->nr_bytes_transferred;
+	wc->pkey_index = cqe->pkey_index;
+	wc->slid = cqe->rlid;
+	wc->dlid_path_bits = cqe->dlid;
+	wc->src_qp = cqe->remote_qp_number;
+	wc->wc_flags = cqe->w_completion_flags;
+	wc->imm_data = cpu_to_be32(cqe->immediate_data);
+	wc->sl = cqe->service_level;
+
+	if (wc->status != IB_WC_SUCCESS)
+		ehca_dbg(cq->device,
+			 "ehca_cq=%p cq_num=%x WARNING unsuccessful cqe "
+			 "OPType=%x status=%x qp_num=%x src_qp=%x wr_id=%lx "
+			 "cqe=%p", my_cq, my_cq->cq_number, cqe->optype,
+			 cqe->status, cqe->local_qp_number,
+			 cqe->remote_qp_number, cqe->work_request_id, cqe);
+
+poll_cq_one_exit0:
+	if (cqe_count > 0)
+		hipz_update_feca(my_cq, cqe_count);
+
+	return ret;
+}
+
+int ehca_poll_cq(struct ib_cq *cq, int num_entries, struct ib_wc *wc)
+{
+	struct ehca_cq *my_cq = container_of(cq, struct ehca_cq, ib_cq);
+	int nr;
+	struct ib_wc *current_wc = wc;
+	int ret = 0;
+	unsigned long spl_flags;
+
+	if (num_entries < 1) {
+		ehca_err(cq->device, "Invalid num_entries=%d ehca_cq=%p "
+			 "cq_num=%x", num_entries, my_cq, my_cq->cq_number);
+		ret = -EINVAL;
+		goto poll_cq_exit0;
+	}
+
+	spin_lock_irqsave(&my_cq->spinlock, spl_flags);
+	for (nr = 0; nr < num_entries; nr++) {
+		ret = ehca_poll_cq_one(cq, current_wc);
+		if (ret)
+			break;
+		current_wc++;
+	} /* eof for nr */
+	spin_unlock_irqrestore(&my_cq->spinlock, spl_flags);
+	if (ret == -EAGAIN  || !ret)
+		ret = nr;
+
+poll_cq_exit0:
+	return ret;
+}
+
+int ehca_req_notify_cq(struct ib_cq *cq, enum ib_cq_notify cq_notify)
+{
+	struct ehca_cq *my_cq = container_of(cq, struct ehca_cq, ib_cq);
+
+	switch (cq_notify) {
+	case IB_CQ_SOLICITED:
+		hipz_set_cqx_n0(my_cq, 1);
+		break;
+	case IB_CQ_NEXT_COMP:
+		hipz_set_cqx_n1(my_cq, 1);
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	return 0;
+}
diff --git a/drivers/infiniband/hw/ehca/ehca_sqp.c b/drivers/infiniband/hw/ehca/ehca_sqp.c
new file mode 100644
index 00000000000..9f16e9c7939
--- /dev/null
+++ b/drivers/infiniband/hw/ehca/ehca_sqp.c
@@ -0,0 +1,111 @@
+/*
+ *  IBM eServer eHCA Infiniband device driver for Linux on POWER
+ *
+ *  SQP functions
+ *
+ *  Authors: Khadija Souissi <souissi@de.ibm.com>
+ *           Heiko J Schick <schickhj@de.ibm.com>
+ *
+ *  Copyright (c) 2005 IBM Corporation
+ *
+ *  All rights reserved.
+ *
+ *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
+ *  BSD.
+ *
+ * OpenIB BSD License
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials
+ * provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
+ * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+
+#include <linux/module.h>
+#include <linux/err.h>
+#include "ehca_classes.h"
+#include "ehca_tools.h"
+#include "ehca_qes.h"
+#include "ehca_iverbs.h"
+#include "hcp_if.h"
+
+
+/**
+ * ehca_define_sqp - Defines special queue pair 1 (GSI QP). When special queue
+ * pair is created successfully, the corresponding port gets active.
+ *
+ * Define Special Queue pair 0 (SMI QP) is still not supported.
+ *
+ * @qp_init_attr: Queue pair init attributes with port and queue pair type
+ */
+
+u64 ehca_define_sqp(struct ehca_shca *shca,
+		    struct ehca_qp *ehca_qp,
+		    struct ib_qp_init_attr *qp_init_attr)
+{
+	u32 pma_qp_nr, bma_qp_nr;
+	u64 ret;
+	u8 port = qp_init_attr->port_num;
+	int counter;
+
+	shca->sport[port - 1].port_state = IB_PORT_DOWN;
+
+	switch (qp_init_attr->qp_type) {
+	case IB_QPT_SMI:
+		/* function not supported yet */
+		break;
+	case IB_QPT_GSI:
+		ret = hipz_h_define_aqp1(shca->ipz_hca_handle,
+					 ehca_qp->ipz_qp_handle,
+					 ehca_qp->galpas.kernel,
+					 (u32) qp_init_attr->port_num,
+					 &pma_qp_nr, &bma_qp_nr);
+
+		if (ret != H_SUCCESS) {
+			ehca_err(&shca->ib_device,
+				 "Can't define AQP1 for port %x. rc=%lx",
+				 port, ret);
+			return ret;
+		}
+		break;
+	default:
+		ehca_err(&shca->ib_device, "invalid qp_type=%x",
+			 qp_init_attr->qp_type);
+		return H_PARAMETER;
+	}
+
+	for (counter = 0;
+	     shca->sport[port - 1].port_state != IB_PORT_ACTIVE &&
+		     counter < ehca_port_act_time;
+	     counter++) {
+		ehca_dbg(&shca->ib_device, "... wait until port %x is active",
+			 port);
+		msleep_interruptible(1000);
+	}
+
+	if (counter == ehca_port_act_time) {
+		ehca_err(&shca->ib_device, "Port %x is not active.", port);
+		return H_HARDWARE;
+	}
+
+	return H_SUCCESS;
+}
diff --git a/drivers/infiniband/hw/ehca/ehca_tools.h b/drivers/infiniband/hw/ehca/ehca_tools.h
new file mode 100644
index 00000000000..9f56bb846d9
--- /dev/null
+++ b/drivers/infiniband/hw/ehca/ehca_tools.h
@@ -0,0 +1,172 @@
+/*
+ *  IBM eServer eHCA Infiniband device driver for Linux on POWER
+ *
+ *  auxiliary functions
+ *
+ *  Authors: Christoph Raisch <raisch@de.ibm.com>
+ *           Hoang-Nam Nguyen <hnguyen@de.ibm.com>
+ *           Khadija Souissi <souissik@de.ibm.com>
+ *           Waleri Fomin <fomin@de.ibm.com>
+ *           Heiko J Schick <schickhj@de.ibm.com>
+ *
+ *  Copyright (c) 2005 IBM Corporation
+ *
+ *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
+ *  BSD.
+ *
+ * OpenIB BSD License
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials
+ * provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
+ * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+
+#ifndef EHCA_TOOLS_H
+#define EHCA_TOOLS_H
+
+#include <linux/kernel.h>
+#include <linux/spinlock.h>
+#include <linux/delay.h>
+#include <linux/idr.h>
+#include <linux/kthread.h>
+#include <linux/mm.h>
+#include <linux/mman.h>
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/vmalloc.h>
+#include <linux/version.h>
+#include <linux/notifier.h>
+#include <linux/cpu.h>
+#include <linux/device.h>
+
+#include <asm/abs_addr.h>
+#include <asm/ibmebus.h>
+#include <asm/io.h>
+#include <asm/pgtable.h>
+
+extern int ehca_debug_level;
+
+#define ehca_dbg(ib_dev, format, arg...) \
+	do { \
+		if (unlikely(ehca_debug_level)) \
+			dev_printk(KERN_DEBUG, (ib_dev)->dma_device, \
+				   "PU%04x EHCA_DBG:%s " format "\n", \
+				   get_paca()->paca_index, __FUNCTION__, \
+				   ## arg); \
+	} while (0)
+
+#define ehca_info(ib_dev, format, arg...) \
+	dev_info((ib_dev)->dma_device, "PU%04x EHCA_INFO:%s " format "\n", \
+		 get_paca()->paca_index, __FUNCTION__, ## arg)
+
+#define ehca_warn(ib_dev, format, arg...) \
+	dev_warn((ib_dev)->dma_device, "PU%04x EHCA_WARN:%s " format "\n", \
+		 get_paca()->paca_index, __FUNCTION__, ## arg)
+
+#define ehca_err(ib_dev, format, arg...) \
+	dev_err((ib_dev)->dma_device, "PU%04x EHCA_ERR:%s " format "\n", \
+		get_paca()->paca_index, __FUNCTION__, ## arg)
+
+/* use this one only if no ib_dev available */
+#define ehca_gen_dbg(format, arg...) \
+	do { \
+		if (unlikely(ehca_debug_level)) \
+			printk(KERN_DEBUG "PU%04x EHCA_DBG:%s " format "\n",\
+			       get_paca()->paca_index, __FUNCTION__, ## arg); \
+	} while (0)
+
+#define ehca_gen_warn(format, arg...) \
+	do { \
+		if (unlikely(ehca_debug_level)) \
+			printk(KERN_INFO "PU%04x EHCA_WARN:%s " format "\n",\
+			       get_paca()->paca_index, __FUNCTION__, ## arg); \
+	} while (0)
+
+#define ehca_gen_err(format, arg...) \
+	printk(KERN_ERR "PU%04x EHCA_ERR:%s " format "\n", \
+		get_paca()->paca_index, __FUNCTION__, ## arg)
+
+/**
+ * ehca_dmp - printk a memory block, whose length is n*8 bytes.
+ * Each line has the following layout:
+ * <format string> adr=X ofs=Y <8 bytes hex> <8 bytes hex>
+ */
+#define ehca_dmp(adr, len, format, args...) \
+	do {				       \
+		unsigned int x;			      \
+		unsigned int l = (unsigned int)(len); \
+		unsigned char *deb = (unsigned char*)(adr);	\
+		for (x = 0; x < l; x += 16) { \
+			printk("EHCA_DMP:%s" format \
+			       " adr=%p ofs=%04x %016lx %016lx\n", \
+			       __FUNCTION__, ##args, deb, x, \
+			       *((u64 *)&deb[0]), *((u64 *)&deb[8])); \
+			deb += 16; \
+		} \
+	} while (0)
+
+/* define a bitmask, little endian version */
+#define EHCA_BMASK(pos,length) (((pos)<<16)+(length))
+
+/* define a bitmask, the ibm way... */
+#define EHCA_BMASK_IBM(from,to) (((63-to)<<16)+((to)-(from)+1))
+
+/* internal function, don't use */
+#define EHCA_BMASK_SHIFTPOS(mask) (((mask)>>16)&0xffff)
+
+/* internal function, don't use */
+#define EHCA_BMASK_MASK(mask) (0xffffffffffffffffULL >> ((64-(mask))&0xffff))
+
+/**
+ * EHCA_BMASK_SET - return value shifted and masked by mask
+ * variable|=EHCA_BMASK_SET(MY_MASK,0x4711) ORs the bits in variable
+ * variable&=~EHCA_BMASK_SET(MY_MASK,-1) clears the bits from the mask
+ * in variable
+ */
+#define EHCA_BMASK_SET(mask,value) \
+	((EHCA_BMASK_MASK(mask) & ((u64)(value)))<<EHCA_BMASK_SHIFTPOS(mask))
+
+/**
+ * EHCA_BMASK_GET - extract a parameter from value by mask
+ */
+#define EHCA_BMASK_GET(mask,value) \
+	(EHCA_BMASK_MASK(mask)& (((u64)(value))>>EHCA_BMASK_SHIFTPOS(mask)))
+
+
+/* Converts ehca to ib return code */
+static inline int ehca2ib_return_code(u64 ehca_rc)
+{
+	switch (ehca_rc) {
+	case H_SUCCESS:
+		return 0;
+	case H_BUSY:
+		return -EBUSY;
+	case H_NO_MEM:
+		return -ENOMEM;
+	default:
+		return -EINVAL;
+	}
+}
+
+
+#endif /* EHCA_TOOLS_H */
diff --git a/drivers/infiniband/hw/ehca/ehca_uverbs.c b/drivers/infiniband/hw/ehca/ehca_uverbs.c
new file mode 100644
index 00000000000..e08764e4aef
--- /dev/null
+++ b/drivers/infiniband/hw/ehca/ehca_uverbs.c
@@ -0,0 +1,392 @@
+/*
+ *  IBM eServer eHCA Infiniband device driver for Linux on POWER
+ *
+ *  userspace support verbs
+ *
+ *  Authors: Christoph Raisch <raisch@de.ibm.com>
+ *           Hoang-Nam Nguyen <hnguyen@de.ibm.com>
+ *           Heiko J Schick <schickhj@de.ibm.com>
+ *
+ *  Copyright (c) 2005 IBM Corporation
+ *
+ *  All rights reserved.
+ *
+ *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
+ *  BSD.
+ *
+ * OpenIB BSD License
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials
+ * provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
+ * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <asm/current.h>
+
+#include "ehca_classes.h"
+#include "ehca_iverbs.h"
+#include "ehca_mrmw.h"
+#include "ehca_tools.h"
+#include "hcp_if.h"
+
+struct ib_ucontext *ehca_alloc_ucontext(struct ib_device *device,
+					struct ib_udata *udata)
+{
+	struct ehca_ucontext *my_context;
+
+	my_context = kzalloc(sizeof *my_context, GFP_KERNEL);
+	if (!my_context) {
+		ehca_err(device, "Out of memory device=%p", device);
+		return ERR_PTR(-ENOMEM);
+	}
+
+	return &my_context->ib_ucontext;
+}
+
+int ehca_dealloc_ucontext(struct ib_ucontext *context)
+{
+	kfree(container_of(context, struct ehca_ucontext, ib_ucontext));
+	return 0;
+}
+
+struct page *ehca_nopage(struct vm_area_struct *vma,
+			 unsigned long address, int *type)
+{
+	struct page *mypage = NULL;
+	u64 fileoffset = vma->vm_pgoff << PAGE_SHIFT;
+	u32 idr_handle = fileoffset >> 32;
+	u32 q_type = (fileoffset >> 28) & 0xF;	  /* CQ, QP,...        */
+	u32 rsrc_type = (fileoffset >> 24) & 0xF; /* sq,rq,cmnd_window */
+	u32 cur_pid = current->tgid;
+	unsigned long flags;
+	struct ehca_cq *cq;
+	struct ehca_qp *qp;
+	struct ehca_pd *pd;
+	u64 offset;
+	void *vaddr;
+
+	switch (q_type) {
+	case 1: /* CQ */
+		spin_lock_irqsave(&ehca_cq_idr_lock, flags);
+		cq = idr_find(&ehca_cq_idr, idr_handle);
+		spin_unlock_irqrestore(&ehca_cq_idr_lock, flags);
+
+		/* make sure this mmap really belongs to the authorized user */
+		if (!cq) {
+			ehca_gen_err("cq is NULL ret=NOPAGE_SIGBUS");
+			return NOPAGE_SIGBUS;
+		}
+
+		if (cq->ownpid != cur_pid) {
+			ehca_err(cq->ib_cq.device,
+				 "Invalid caller pid=%x ownpid=%x",
+				 cur_pid, cq->ownpid);
+			return NOPAGE_SIGBUS;
+		}
+
+		if (rsrc_type == 2) {
+			ehca_dbg(cq->ib_cq.device, "cq=%p cq queuearea", cq);
+			offset = address - vma->vm_start;
+			vaddr = ipz_qeit_calc(&cq->ipz_queue, offset);
+			ehca_dbg(cq->ib_cq.device, "offset=%lx vaddr=%p",
+				 offset, vaddr);
+			mypage = virt_to_page(vaddr);
+		}
+		break;
+
+	case 2: /* QP */
+		spin_lock_irqsave(&ehca_qp_idr_lock, flags);
+		qp = idr_find(&ehca_qp_idr, idr_handle);
+		spin_unlock_irqrestore(&ehca_qp_idr_lock, flags);
+
+		/* make sure this mmap really belongs to the authorized user */
+		if (!qp) {
+			ehca_gen_err("qp is NULL ret=NOPAGE_SIGBUS");
+			return NOPAGE_SIGBUS;
+		}
+
+		pd = container_of(qp->ib_qp.pd, struct ehca_pd, ib_pd);
+		if (pd->ownpid != cur_pid) {
+			ehca_err(qp->ib_qp.device,
+				 "Invalid caller pid=%x ownpid=%x",
+				 cur_pid, pd->ownpid);
+			return NOPAGE_SIGBUS;
+		}
+
+		if (rsrc_type == 2) {	/* rqueue */
+			ehca_dbg(qp->ib_qp.device, "qp=%p qp rqueuearea", qp);
+			offset = address - vma->vm_start;
+			vaddr = ipz_qeit_calc(&qp->ipz_rqueue, offset);
+			ehca_dbg(qp->ib_qp.device, "offset=%lx vaddr=%p",
+				 offset, vaddr);
+			mypage = virt_to_page(vaddr);
+		} else if (rsrc_type == 3) {	/* squeue */
+			ehca_dbg(qp->ib_qp.device, "qp=%p qp squeuearea", qp);
+			offset = address - vma->vm_start;
+			vaddr = ipz_qeit_calc(&qp->ipz_squeue, offset);
+			ehca_dbg(qp->ib_qp.device, "offset=%lx vaddr=%p",
+				 offset, vaddr);
+			mypage = virt_to_page(vaddr);
+		}
+		break;
+
+	default:
+		ehca_gen_err("bad queue type %x", q_type);
+		return NOPAGE_SIGBUS;
+	}
+
+	if (!mypage) {
+		ehca_gen_err("Invalid page adr==NULL ret=NOPAGE_SIGBUS");
+		return NOPAGE_SIGBUS;
+	}
+	get_page(mypage);
+
+	return mypage;
+}
+
+static struct vm_operations_struct ehcau_vm_ops = {
+	.nopage = ehca_nopage,
+};
+
+int ehca_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
+{
+	u64 fileoffset = vma->vm_pgoff << PAGE_SHIFT;
+	u32 idr_handle = fileoffset >> 32;
+	u32 q_type = (fileoffset >> 28) & 0xF;	  /* CQ, QP,...        */
+	u32 rsrc_type = (fileoffset >> 24) & 0xF; /* sq,rq,cmnd_window */
+	u32 cur_pid = current->tgid;
+	u32 ret;
+	u64 vsize, physical;
+	unsigned long flags;
+	struct ehca_cq *cq;
+	struct ehca_qp *qp;
+	struct ehca_pd *pd;
+
+	switch (q_type) {
+	case  1: /* CQ */
+		spin_lock_irqsave(&ehca_cq_idr_lock, flags);
+		cq = idr_find(&ehca_cq_idr, idr_handle);
+		spin_unlock_irqrestore(&ehca_cq_idr_lock, flags);
+
+		/* make sure this mmap really belongs to the authorized user */
+		if (!cq)
+			return -EINVAL;
+
+		if (cq->ownpid != cur_pid) {
+			ehca_err(cq->ib_cq.device,
+				 "Invalid caller pid=%x ownpid=%x",
+				 cur_pid, cq->ownpid);
+			return -ENOMEM;
+		}
+
+		if (!cq->ib_cq.uobject || cq->ib_cq.uobject->context != context)
+			return -EINVAL;
+
+		switch (rsrc_type) {
+		case 1: /* galpa fw handle */
+			ehca_dbg(cq->ib_cq.device, "cq=%p cq triggerarea", cq);
+			vma->vm_flags |= VM_RESERVED;
+			vsize = vma->vm_end - vma->vm_start;
+			if (vsize != EHCA_PAGESIZE) {
+				ehca_err(cq->ib_cq.device, "invalid vsize=%lx",
+					 vma->vm_end - vma->vm_start);
+				return -EINVAL;
+			}
+
+			physical = cq->galpas.user.fw_handle;
+			vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+			vma->vm_flags |= VM_IO | VM_RESERVED;
+
+			ehca_dbg(cq->ib_cq.device,
+				 "vsize=%lx physical=%lx", vsize, physical);
+			ret = remap_pfn_range(vma, vma->vm_start,
+					      physical >> PAGE_SHIFT, vsize,
+					      vma->vm_page_prot);
+			if (ret) {
+				ehca_err(cq->ib_cq.device,
+					 "remap_pfn_range() failed ret=%x",
+					 ret);
+				return -ENOMEM;
+			}
+			break;
+
+		case 2: /* cq queue_addr */
+			ehca_dbg(cq->ib_cq.device, "cq=%p cq q_addr", cq);
+			vma->vm_flags |= VM_RESERVED;
+			vma->vm_ops = &ehcau_vm_ops;
+			break;
+
+		default:
+			ehca_err(cq->ib_cq.device, "bad resource type %x",
+				 rsrc_type);
+			return -EINVAL;
+		}
+		break;
+
+	case 2: /* QP */
+		spin_lock_irqsave(&ehca_qp_idr_lock, flags);
+		qp = idr_find(&ehca_qp_idr, idr_handle);
+		spin_unlock_irqrestore(&ehca_qp_idr_lock, flags);
+
+		/* make sure this mmap really belongs to the authorized user */
+		if (!qp)
+			return -EINVAL;
+
+		pd = container_of(qp->ib_qp.pd, struct ehca_pd, ib_pd);
+		if (pd->ownpid != cur_pid) {
+			ehca_err(qp->ib_qp.device,
+				 "Invalid caller pid=%x ownpid=%x",
+				 cur_pid, pd->ownpid);
+			return -ENOMEM;
+		}
+
+		if (!qp->ib_qp.uobject || qp->ib_qp.uobject->context != context)
+			return -EINVAL;
+
+		switch (rsrc_type) {
+		case 1: /* galpa fw handle */
+			ehca_dbg(qp->ib_qp.device, "qp=%p qp triggerarea", qp);
+			vma->vm_flags |= VM_RESERVED;
+			vsize = vma->vm_end - vma->vm_start;
+			if (vsize != EHCA_PAGESIZE) {
+				ehca_err(qp->ib_qp.device, "invalid vsize=%lx",
+					 vma->vm_end - vma->vm_start);
+				return -EINVAL;
+			}
+
+			physical = qp->galpas.user.fw_handle;
+			vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+			vma->vm_flags |= VM_IO | VM_RESERVED;
+
+			ehca_dbg(qp->ib_qp.device, "vsize=%lx physical=%lx",
+				 vsize, physical);
+			ret = remap_pfn_range(vma, vma->vm_start,
+					      physical >> PAGE_SHIFT, vsize,
+					      vma->vm_page_prot);
+			if (ret) {
+				ehca_err(qp->ib_qp.device,
+					 "remap_pfn_range() failed ret=%x",
+					 ret);
+				return -ENOMEM;
+			}
+			break;
+
+		case 2: /* qp rqueue_addr */
+			ehca_dbg(qp->ib_qp.device, "qp=%p qp rqueue_addr", qp);
+			vma->vm_flags |= VM_RESERVED;
+			vma->vm_ops = &ehcau_vm_ops;
+			break;
+
+		case 3: /* qp squeue_addr */
+			ehca_dbg(qp->ib_qp.device, "qp=%p qp squeue_addr", qp);
+			vma->vm_flags |= VM_RESERVED;
+			vma->vm_ops = &ehcau_vm_ops;
+			break;
+
+		default:
+			ehca_err(qp->ib_qp.device, "bad resource type %x",
+				 rsrc_type);
+			return -EINVAL;
+		}
+		break;
+
+	default:
+		ehca_gen_err("bad queue type %x", q_type);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+int ehca_mmap_nopage(u64 foffset, u64 length, void **mapped,
+		     struct vm_area_struct **vma)
+{
+	down_write(&current->mm->mmap_sem);
+	*mapped = (void*)do_mmap(NULL,0, length, PROT_WRITE,
+				 MAP_SHARED | MAP_ANONYMOUS,
+				 foffset);
+	up_write(&current->mm->mmap_sem);
+	if (!(*mapped)) {
+		ehca_gen_err("couldn't mmap foffset=%lx length=%lx",
+			     foffset, length);
+		return -EINVAL;
+	}
+
+	*vma = find_vma(current->mm, (u64)*mapped);
+	if (!(*vma)) {
+		down_write(&current->mm->mmap_sem);
+		do_munmap(current->mm, 0, length);
+		up_write(&current->mm->mmap_sem);
+		ehca_gen_err("couldn't find vma queue=%p", *mapped);
+		return -EINVAL;
+	}
+	(*vma)->vm_flags |= VM_RESERVED;
+	(*vma)->vm_ops = &ehcau_vm_ops;
+
+	return 0;
+}
+
+int ehca_mmap_register(u64 physical, void **mapped,
+		       struct vm_area_struct **vma)
+{
+	int ret;
+	unsigned long vsize;
+	/* ehca hw supports only 4k page */
+	ret = ehca_mmap_nopage(0, EHCA_PAGESIZE, mapped, vma);
+	if (ret) {
+		ehca_gen_err("could'nt mmap physical=%lx", physical);
+		return ret;
+	}
+
+	(*vma)->vm_flags |= VM_RESERVED;
+	vsize = (*vma)->vm_end - (*vma)->vm_start;
+	if (vsize != EHCA_PAGESIZE) {
+		ehca_gen_err("invalid vsize=%lx",
+			     (*vma)->vm_end - (*vma)->vm_start);
+		return -EINVAL;
+	}
+
+	(*vma)->vm_page_prot = pgprot_noncached((*vma)->vm_page_prot);
+	(*vma)->vm_flags |= VM_IO | VM_RESERVED;
+
+	ret = remap_pfn_range((*vma), (*vma)->vm_start,
+			      physical >> PAGE_SHIFT, vsize,
+			      (*vma)->vm_page_prot);
+	if (ret) {
+		ehca_gen_err("remap_pfn_range() failed ret=%x", ret);
+		return -ENOMEM;
+	}
+
+	return 0;
+
+}
+
+int ehca_munmap(unsigned long addr, size_t len) {
+	int ret = 0;
+	struct mm_struct *mm = current->mm;
+	if (mm) {
+		down_write(&mm->mmap_sem);
+		ret = do_munmap(mm, addr, len);
+		up_write(&mm->mmap_sem);
+	}
+	return ret;
+}
diff --git a/drivers/infiniband/hw/ehca/hcp_if.c b/drivers/infiniband/hw/ehca/hcp_if.c
new file mode 100644
index 00000000000..3fb46e67df8
--- /dev/null
+++ b/drivers/infiniband/hw/ehca/hcp_if.c
@@ -0,0 +1,874 @@
+/*
+ *  IBM eServer eHCA Infiniband device driver for Linux on POWER
+ *
+ *  Firmware Infiniband Interface code for POWER
+ *
+ *  Authors: Christoph Raisch <raisch@de.ibm.com>
+ *           Hoang-Nam Nguyen <hnguyen@de.ibm.com>
+ *           Gerd Bayer <gerd.bayer@de.ibm.com>
+ *           Waleri Fomin <fomin@de.ibm.com>
+ *
+ *  Copyright (c) 2005 IBM Corporation
+ *
+ *  All rights reserved.
+ *
+ *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
+ *  BSD.
+ *
+ * OpenIB BSD License
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials
+ * provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
+ * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <asm/hvcall.h>
+#include "ehca_tools.h"
+#include "hcp_if.h"
+#include "hcp_phyp.h"
+#include "hipz_fns.h"
+#include "ipz_pt_fn.h"
+
+#define H_ALL_RES_QP_ENHANCED_OPS       EHCA_BMASK_IBM(9, 11)
+#define H_ALL_RES_QP_PTE_PIN            EHCA_BMASK_IBM(12, 12)
+#define H_ALL_RES_QP_SERVICE_TYPE       EHCA_BMASK_IBM(13, 15)
+#define H_ALL_RES_QP_LL_RQ_CQE_POSTING  EHCA_BMASK_IBM(18, 18)
+#define H_ALL_RES_QP_LL_SQ_CQE_POSTING  EHCA_BMASK_IBM(19, 21)
+#define H_ALL_RES_QP_SIGNALING_TYPE     EHCA_BMASK_IBM(22, 23)
+#define H_ALL_RES_QP_UD_AV_LKEY_CTRL    EHCA_BMASK_IBM(31, 31)
+#define H_ALL_RES_QP_RESOURCE_TYPE      EHCA_BMASK_IBM(56, 63)
+
+#define H_ALL_RES_QP_MAX_OUTST_SEND_WR  EHCA_BMASK_IBM(0, 15)
+#define H_ALL_RES_QP_MAX_OUTST_RECV_WR  EHCA_BMASK_IBM(16, 31)
+#define H_ALL_RES_QP_MAX_SEND_SGE       EHCA_BMASK_IBM(32, 39)
+#define H_ALL_RES_QP_MAX_RECV_SGE       EHCA_BMASK_IBM(40, 47)
+
+#define H_ALL_RES_QP_ACT_OUTST_SEND_WR  EHCA_BMASK_IBM(16, 31)
+#define H_ALL_RES_QP_ACT_OUTST_RECV_WR  EHCA_BMASK_IBM(48, 63)
+#define H_ALL_RES_QP_ACT_SEND_SGE       EHCA_BMASK_IBM(8, 15)
+#define H_ALL_RES_QP_ACT_RECV_SGE       EHCA_BMASK_IBM(24, 31)
+
+#define H_ALL_RES_QP_SQUEUE_SIZE_PAGES  EHCA_BMASK_IBM(0, 31)
+#define H_ALL_RES_QP_RQUEUE_SIZE_PAGES  EHCA_BMASK_IBM(32, 63)
+
+/* direct access qp controls */
+#define DAQP_CTRL_ENABLE    0x01
+#define DAQP_CTRL_SEND_COMP 0x20
+#define DAQP_CTRL_RECV_COMP 0x40
+
+static u32 get_longbusy_msecs(int longbusy_rc)
+{
+	switch (longbusy_rc) {
+	case H_LONG_BUSY_ORDER_1_MSEC:
+		return 1;
+	case H_LONG_BUSY_ORDER_10_MSEC:
+		return 10;
+	case H_LONG_BUSY_ORDER_100_MSEC:
+		return 100;
+	case H_LONG_BUSY_ORDER_1_SEC:
+		return 1000;
+	case H_LONG_BUSY_ORDER_10_SEC:
+		return 10000;
+	case H_LONG_BUSY_ORDER_100_SEC:
+		return 100000;
+	default:
+		return 1;
+	}
+}
+
+static long ehca_plpar_hcall_norets(unsigned long opcode,
+				    unsigned long arg1,
+				    unsigned long arg2,
+				    unsigned long arg3,
+				    unsigned long arg4,
+				    unsigned long arg5,
+				    unsigned long arg6,
+				    unsigned long arg7)
+{
+	long ret;
+	int i, sleep_msecs;
+
+	ehca_gen_dbg("opcode=%lx arg1=%lx arg2=%lx arg3=%lx arg4=%lx "
+		     "arg5=%lx arg6=%lx arg7=%lx",
+		     opcode, arg1, arg2, arg3, arg4, arg5, arg6, arg7);
+
+	for (i = 0; i < 5; i++) {
+		ret = plpar_hcall_norets(opcode, arg1, arg2, arg3, arg4,
+					 arg5, arg6, arg7);
+
+		if (H_IS_LONG_BUSY(ret)) {
+			sleep_msecs = get_longbusy_msecs(ret);
+			msleep_interruptible(sleep_msecs);
+			continue;
+		}
+
+		if (ret < H_SUCCESS)
+			ehca_gen_err("opcode=%lx ret=%lx"
+				     " arg1=%lx arg2=%lx arg3=%lx arg4=%lx"
+				     " arg5=%lx arg6=%lx arg7=%lx ",
+				     opcode, ret,
+				     arg1, arg2, arg3, arg4, arg5,
+				     arg6, arg7);
+
+		ehca_gen_dbg("opcode=%lx ret=%lx", opcode, ret);
+		return ret;
+
+	}
+
+	return H_BUSY;
+}
+
+static long ehca_plpar_hcall9(unsigned long opcode,
+			      unsigned long *outs, /* array of 9 outputs */
+			      unsigned long arg1,
+			      unsigned long arg2,
+			      unsigned long arg3,
+			      unsigned long arg4,
+			      unsigned long arg5,
+			      unsigned long arg6,
+			      unsigned long arg7,
+			      unsigned long arg8,
+			      unsigned long arg9)
+{
+	long ret;
+	int i, sleep_msecs;
+
+	ehca_gen_dbg("opcode=%lx arg1=%lx arg2=%lx arg3=%lx arg4=%lx "
+		     "arg5=%lx arg6=%lx arg7=%lx arg8=%lx arg9=%lx",
+		     opcode, arg1, arg2, arg3, arg4, arg5, arg6, arg7,
+		     arg8, arg9);
+
+	for (i = 0; i < 5; i++) {
+		ret = plpar_hcall9(opcode, outs,
+				   arg1, arg2, arg3, arg4, arg5,
+				   arg6, arg7, arg8, arg9);
+
+		if (H_IS_LONG_BUSY(ret)) {
+			sleep_msecs = get_longbusy_msecs(ret);
+			msleep_interruptible(sleep_msecs);
+			continue;
+		}
+
+		if (ret < H_SUCCESS)
+			ehca_gen_err("opcode=%lx ret=%lx"
+				     " arg1=%lx arg2=%lx arg3=%lx arg4=%lx"
+				     " arg5=%lx arg6=%lx arg7=%lx arg8=%lx"
+				     " arg9=%lx"
+				     " out1=%lx out2=%lx out3=%lx out4=%lx"
+				     " out5=%lx out6=%lx out7=%lx out8=%lx"
+				     " out9=%lx",
+				     opcode, ret,
+				     arg1, arg2, arg3, arg4, arg5,
+				     arg6, arg7, arg8, arg9,
+				     outs[0], outs[1], outs[2], outs[3],
+				     outs[4], outs[5], outs[6], outs[7],
+				     outs[8]);
+
+		ehca_gen_dbg("opcode=%lx ret=%lx out1=%lx out2=%lx out3=%lx "
+			     "out4=%lx out5=%lx out6=%lx out7=%lx out8=%lx "
+			     "out9=%lx",
+			     opcode, ret, outs[0], outs[1], outs[2], outs[3],
+			     outs[4], outs[5], outs[6], outs[7], outs[8]);
+		return ret;
+
+	}
+
+	return H_BUSY;
+}
+u64 hipz_h_alloc_resource_eq(const struct ipz_adapter_handle adapter_handle,
+			     struct ehca_pfeq *pfeq,
+			     const u32 neq_control,
+			     const u32 number_of_entries,
+			     struct ipz_eq_handle *eq_handle,
+			     u32 *act_nr_of_entries,
+			     u32 *act_pages,
+			     u32 *eq_ist)
+{
+	u64 ret;
+	u64 outs[PLPAR_HCALL9_BUFSIZE];
+	u64 allocate_controls;
+
+	/* resource type */
+	allocate_controls = 3ULL;
+
+	/* ISN is associated */
+	if (neq_control != 1)
+		allocate_controls = (1ULL << (63 - 7)) | allocate_controls;
+	else /* notification event queue */
+		allocate_controls = (1ULL << 63) | allocate_controls;
+
+	ret = ehca_plpar_hcall9(H_ALLOC_RESOURCE, outs,
+				adapter_handle.handle,  /* r4 */
+				allocate_controls,      /* r5 */
+				number_of_entries,      /* r6 */
+				0, 0, 0, 0, 0, 0);
+	eq_handle->handle = outs[0];
+	*act_nr_of_entries = (u32)outs[3];
+	*act_pages = (u32)outs[4];
+	*eq_ist = (u32)outs[5];
+
+	if (ret == H_NOT_ENOUGH_RESOURCES)
+		ehca_gen_err("Not enough resource - ret=%lx ", ret);
+
+	return ret;
+}
+
+u64 hipz_h_reset_event(const struct ipz_adapter_handle adapter_handle,
+		       struct ipz_eq_handle eq_handle,
+		       const u64 event_mask)
+{
+	return ehca_plpar_hcall_norets(H_RESET_EVENTS,
+				       adapter_handle.handle, /* r4 */
+				       eq_handle.handle,      /* r5 */
+				       event_mask,	      /* r6 */
+				       0, 0, 0, 0);
+}
+
+u64 hipz_h_alloc_resource_cq(const struct ipz_adapter_handle adapter_handle,
+			     struct ehca_cq *cq,
+			     struct ehca_alloc_cq_parms *param)
+{
+	u64 ret;
+	u64 outs[PLPAR_HCALL9_BUFSIZE];
+
+	ret = ehca_plpar_hcall9(H_ALLOC_RESOURCE, outs,
+				adapter_handle.handle,   /* r4  */
+				2,	                 /* r5  */
+				param->eq_handle.handle, /* r6  */
+				cq->token,	         /* r7  */
+				param->nr_cqe,           /* r8  */
+				0, 0, 0, 0);
+	cq->ipz_cq_handle.handle = outs[0];
+	param->act_nr_of_entries = (u32)outs[3];
+	param->act_pages = (u32)outs[4];
+
+	if (ret == H_SUCCESS)
+		hcp_galpas_ctor(&cq->galpas, outs[5], outs[6]);
+
+	if (ret == H_NOT_ENOUGH_RESOURCES)
+		ehca_gen_err("Not enough resources. ret=%lx", ret);
+
+	return ret;
+}
+
+u64 hipz_h_alloc_resource_qp(const struct ipz_adapter_handle adapter_handle,
+			     struct ehca_qp *qp,
+			     struct ehca_alloc_qp_parms *parms)
+{
+	u64 ret;
+	u64 allocate_controls;
+	u64 max_r10_reg;
+	u64 outs[PLPAR_HCALL9_BUFSIZE];
+	u16 max_nr_receive_wqes = qp->init_attr.cap.max_recv_wr + 1;
+	u16 max_nr_send_wqes = qp->init_attr.cap.max_send_wr + 1;
+	int daqp_ctrl = parms->daqp_ctrl;
+
+	allocate_controls =
+		EHCA_BMASK_SET(H_ALL_RES_QP_ENHANCED_OPS,
+			       (daqp_ctrl & DAQP_CTRL_ENABLE) ? 1 : 0)
+		| EHCA_BMASK_SET(H_ALL_RES_QP_PTE_PIN, 0)
+		| EHCA_BMASK_SET(H_ALL_RES_QP_SERVICE_TYPE, parms->servicetype)
+		| EHCA_BMASK_SET(H_ALL_RES_QP_SIGNALING_TYPE, parms->sigtype)
+		| EHCA_BMASK_SET(H_ALL_RES_QP_LL_RQ_CQE_POSTING,
+				 (daqp_ctrl & DAQP_CTRL_RECV_COMP) ? 1 : 0)
+		| EHCA_BMASK_SET(H_ALL_RES_QP_LL_SQ_CQE_POSTING,
+				 (daqp_ctrl & DAQP_CTRL_SEND_COMP) ? 1 : 0)
+		| EHCA_BMASK_SET(H_ALL_RES_QP_UD_AV_LKEY_CTRL,
+				 parms->ud_av_l_key_ctl)
+		| EHCA_BMASK_SET(H_ALL_RES_QP_RESOURCE_TYPE, 1);
+
+	max_r10_reg =
+		EHCA_BMASK_SET(H_ALL_RES_QP_MAX_OUTST_SEND_WR,
+			       max_nr_send_wqes)
+		| EHCA_BMASK_SET(H_ALL_RES_QP_MAX_OUTST_RECV_WR,
+				 max_nr_receive_wqes)
+		| EHCA_BMASK_SET(H_ALL_RES_QP_MAX_SEND_SGE,
+				 parms->max_send_sge)
+		| EHCA_BMASK_SET(H_ALL_RES_QP_MAX_RECV_SGE,
+				 parms->max_recv_sge);
+
+	ret = ehca_plpar_hcall9(H_ALLOC_RESOURCE, outs,
+				adapter_handle.handle,	           /* r4  */
+				allocate_controls,	           /* r5  */
+				qp->send_cq->ipz_cq_handle.handle,
+				qp->recv_cq->ipz_cq_handle.handle,
+				parms->ipz_eq_handle.handle,
+				((u64)qp->token << 32) | parms->pd.value,
+				max_r10_reg,	                   /* r10 */
+				parms->ud_av_l_key_ctl,            /* r11 */
+				0);
+	qp->ipz_qp_handle.handle = outs[0];
+	qp->real_qp_num = (u32)outs[1];
+	parms->act_nr_send_sges =
+		(u16)EHCA_BMASK_GET(H_ALL_RES_QP_ACT_OUTST_SEND_WR, outs[2]);
+	parms->act_nr_recv_wqes =
+		(u16)EHCA_BMASK_GET(H_ALL_RES_QP_ACT_OUTST_RECV_WR, outs[2]);
+	parms->act_nr_send_sges =
+		(u8)EHCA_BMASK_GET(H_ALL_RES_QP_ACT_SEND_SGE, outs[3]);
+	parms->act_nr_recv_sges =
+		(u8)EHCA_BMASK_GET(H_ALL_RES_QP_ACT_RECV_SGE, outs[3]);
+	parms->nr_sq_pages =
+		(u32)EHCA_BMASK_GET(H_ALL_RES_QP_SQUEUE_SIZE_PAGES, outs[4]);
+	parms->nr_rq_pages =
+		(u32)EHCA_BMASK_GET(H_ALL_RES_QP_RQUEUE_SIZE_PAGES, outs[4]);
+
+	if (ret == H_SUCCESS)
+		hcp_galpas_ctor(&qp->galpas, outs[6], outs[6]);
+
+	if (ret == H_NOT_ENOUGH_RESOURCES)
+		ehca_gen_err("Not enough resources. ret=%lx", ret);
+
+	return ret;
+}
+
+u64 hipz_h_query_port(const struct ipz_adapter_handle adapter_handle,
+		      const u8 port_id,
+		      struct hipz_query_port *query_port_response_block)
+{
+	u64 ret;
+	u64 r_cb = virt_to_abs(query_port_response_block);
+
+	if (r_cb & (EHCA_PAGESIZE-1)) {
+		ehca_gen_err("response block not page aligned");
+		return H_PARAMETER;
+	}
+
+	ret = ehca_plpar_hcall_norets(H_QUERY_PORT,
+				      adapter_handle.handle, /* r4 */
+				      port_id,	             /* r5 */
+				      r_cb,	             /* r6 */
+				      0, 0, 0, 0);
+
+	if (ehca_debug_level)
+		ehca_dmp(query_port_response_block, 64, "response_block");
+
+	return ret;
+}
+
+u64 hipz_h_query_hca(const struct ipz_adapter_handle adapter_handle,
+		     struct hipz_query_hca *query_hca_rblock)
+{
+	u64 r_cb = virt_to_abs(query_hca_rblock);
+
+	if (r_cb & (EHCA_PAGESIZE-1)) {
+		ehca_gen_err("response_block=%p not page aligned",
+			     query_hca_rblock);
+		return H_PARAMETER;
+	}
+
+	return ehca_plpar_hcall_norets(H_QUERY_HCA,
+				       adapter_handle.handle, /* r4 */
+				       r_cb,                  /* r5 */
+				       0, 0, 0, 0, 0);
+}
+
+u64 hipz_h_register_rpage(const struct ipz_adapter_handle adapter_handle,
+			  const u8 pagesize,
+			  const u8 queue_type,
+			  const u64 resource_handle,
+			  const u64 logical_address_of_page,
+			  u64 count)
+{
+	return ehca_plpar_hcall_norets(H_REGISTER_RPAGES,
+				       adapter_handle.handle,      /* r4  */
+				       queue_type | pagesize << 8, /* r5  */
+				       resource_handle,	           /* r6  */
+				       logical_address_of_page,    /* r7  */
+				       count,	                   /* r8  */
+				       0, 0);
+}
+
+u64 hipz_h_register_rpage_eq(const struct ipz_adapter_handle adapter_handle,
+			     const struct ipz_eq_handle eq_handle,
+			     struct ehca_pfeq *pfeq,
+			     const u8 pagesize,
+			     const u8 queue_type,
+			     const u64 logical_address_of_page,
+			     const u64 count)
+{
+	if (count != 1) {
+		ehca_gen_err("Ppage counter=%lx", count);
+		return H_PARAMETER;
+	}
+	return hipz_h_register_rpage(adapter_handle,
+				     pagesize,
+				     queue_type,
+				     eq_handle.handle,
+				     logical_address_of_page, count);
+}
+
+u64 hipz_h_query_int_state(const struct ipz_adapter_handle adapter_handle,
+			   u32 ist)
+{
+	u64 ret;
+	ret = ehca_plpar_hcall_norets(H_QUERY_INT_STATE,
+				      adapter_handle.handle, /* r4 */
+				      ist,                   /* r5 */
+				      0, 0, 0, 0, 0);
+
+	if (ret != H_SUCCESS && ret != H_BUSY)
+		ehca_gen_err("Could not query interrupt state.");
+
+	return ret;
+}
+
+u64 hipz_h_register_rpage_cq(const struct ipz_adapter_handle adapter_handle,
+			     const struct ipz_cq_handle cq_handle,
+			     struct ehca_pfcq *pfcq,
+			     const u8 pagesize,
+			     const u8 queue_type,
+			     const u64 logical_address_of_page,
+			     const u64 count,
+			     const struct h_galpa gal)
+{
+	if (count != 1) {
+		ehca_gen_err("Page counter=%lx", count);
+		return H_PARAMETER;
+	}
+
+	return hipz_h_register_rpage(adapter_handle, pagesize, queue_type,
+				     cq_handle.handle, logical_address_of_page,
+				     count);
+}
+
+u64 hipz_h_register_rpage_qp(const struct ipz_adapter_handle adapter_handle,
+			     const struct ipz_qp_handle qp_handle,
+			     struct ehca_pfqp *pfqp,
+			     const u8 pagesize,
+			     const u8 queue_type,
+			     const u64 logical_address_of_page,
+			     const u64 count,
+			     const struct h_galpa galpa)
+{
+	if (count != 1) {
+		ehca_gen_err("Page counter=%lx", count);
+		return H_PARAMETER;
+	}
+
+	return hipz_h_register_rpage(adapter_handle,pagesize,queue_type,
+				     qp_handle.handle,logical_address_of_page,
+				     count);
+}
+
+u64 hipz_h_disable_and_get_wqe(const struct ipz_adapter_handle adapter_handle,
+			       const struct ipz_qp_handle qp_handle,
+			       struct ehca_pfqp *pfqp,
+			       void **log_addr_next_sq_wqe2processed,
+			       void **log_addr_next_rq_wqe2processed,
+			       int dis_and_get_function_code)
+{
+	u64 ret;
+	u64 outs[PLPAR_HCALL9_BUFSIZE];
+
+	ret = ehca_plpar_hcall9(H_DISABLE_AND_GETC, outs,
+				adapter_handle.handle,     /* r4 */
+				dis_and_get_function_code, /* r5 */
+				qp_handle.handle,	   /* r6 */
+				0, 0, 0, 0, 0, 0);
+	if (log_addr_next_sq_wqe2processed)
+		*log_addr_next_sq_wqe2processed = (void*)outs[0];
+	if (log_addr_next_rq_wqe2processed)
+		*log_addr_next_rq_wqe2processed = (void*)outs[1];
+
+	return ret;
+}
+
+u64 hipz_h_modify_qp(const struct ipz_adapter_handle adapter_handle,
+		     const struct ipz_qp_handle qp_handle,
+		     struct ehca_pfqp *pfqp,
+		     const u64 update_mask,
+		     struct hcp_modify_qp_control_block *mqpcb,
+		     struct h_galpa gal)
+{
+	u64 ret;
+	u64 outs[PLPAR_HCALL9_BUFSIZE];
+	ret = ehca_plpar_hcall9(H_MODIFY_QP, outs,
+				adapter_handle.handle, /* r4 */
+				qp_handle.handle,      /* r5 */
+				update_mask,	       /* r6 */
+				virt_to_abs(mqpcb),    /* r7 */
+				0, 0, 0, 0, 0);
+
+	if (ret == H_NOT_ENOUGH_RESOURCES)
+		ehca_gen_err("Insufficient resources ret=%lx", ret);
+
+	return ret;
+}
+
+u64 hipz_h_query_qp(const struct ipz_adapter_handle adapter_handle,
+		    const struct ipz_qp_handle qp_handle,
+		    struct ehca_pfqp *pfqp,
+		    struct hcp_modify_qp_control_block *qqpcb,
+		    struct h_galpa gal)
+{
+	return ehca_plpar_hcall_norets(H_QUERY_QP,
+				       adapter_handle.handle, /* r4 */
+				       qp_handle.handle,      /* r5 */
+				       virt_to_abs(qqpcb),    /* r6 */
+				       0, 0, 0, 0);
+}
+
+u64 hipz_h_destroy_qp(const struct ipz_adapter_handle adapter_handle,
+		      struct ehca_qp *qp)
+{
+	u64 ret;
+	u64 outs[PLPAR_HCALL9_BUFSIZE];
+
+	ret = hcp_galpas_dtor(&qp->galpas);
+	if (ret) {
+		ehca_gen_err("Could not destruct qp->galpas");
+		return H_RESOURCE;
+	}
+	ret = ehca_plpar_hcall9(H_DISABLE_AND_GETC, outs,
+				adapter_handle.handle,     /* r4 */
+				/* function code */
+				1,	                   /* r5 */
+				qp->ipz_qp_handle.handle,  /* r6 */
+				0, 0, 0, 0, 0, 0);
+	if (ret == H_HARDWARE)
+		ehca_gen_err("HCA not operational. ret=%lx", ret);
+
+	ret = ehca_plpar_hcall_norets(H_FREE_RESOURCE,
+				      adapter_handle.handle,     /* r4 */
+				      qp->ipz_qp_handle.handle,  /* r5 */
+				      0, 0, 0, 0, 0);
+
+	if (ret == H_RESOURCE)
+		ehca_gen_err("Resource still in use. ret=%lx", ret);
+
+	return ret;
+}
+
+u64 hipz_h_define_aqp0(const struct ipz_adapter_handle adapter_handle,
+		       const struct ipz_qp_handle qp_handle,
+		       struct h_galpa gal,
+		       u32 port)
+{
+	return ehca_plpar_hcall_norets(H_DEFINE_AQP0,
+				       adapter_handle.handle, /* r4 */
+				       qp_handle.handle,      /* r5 */
+				       port,                  /* r6 */
+				       0, 0, 0, 0);
+}
+
+u64 hipz_h_define_aqp1(const struct ipz_adapter_handle adapter_handle,
+		       const struct ipz_qp_handle qp_handle,
+		       struct h_galpa gal,
+		       u32 port, u32 * pma_qp_nr,
+		       u32 * bma_qp_nr)
+{
+	u64 ret;
+	u64 outs[PLPAR_HCALL9_BUFSIZE];
+
+	ret = ehca_plpar_hcall9(H_DEFINE_AQP1, outs,
+				adapter_handle.handle, /* r4 */
+				qp_handle.handle,      /* r5 */
+				port,	               /* r6 */
+				0, 0, 0, 0, 0, 0);
+	*pma_qp_nr = (u32)outs[0];
+	*bma_qp_nr = (u32)outs[1];
+
+	if (ret == H_ALIAS_EXIST)
+		ehca_gen_err("AQP1 already exists. ret=%lx", ret);
+
+	return ret;
+}
+
+u64 hipz_h_attach_mcqp(const struct ipz_adapter_handle adapter_handle,
+		       const struct ipz_qp_handle qp_handle,
+		       struct h_galpa gal,
+		       u16 mcg_dlid,
+		       u64 subnet_prefix, u64 interface_id)
+{
+	u64 ret;
+
+	ret = ehca_plpar_hcall_norets(H_ATTACH_MCQP,
+				      adapter_handle.handle,  /* r4 */
+				      qp_handle.handle,       /* r5 */
+				      mcg_dlid,               /* r6 */
+				      interface_id,           /* r7 */
+				      subnet_prefix,          /* r8 */
+				      0, 0);
+
+	if (ret == H_NOT_ENOUGH_RESOURCES)
+		ehca_gen_err("Not enough resources. ret=%lx", ret);
+
+	return ret;
+}
+
+u64 hipz_h_detach_mcqp(const struct ipz_adapter_handle adapter_handle,
+		       const struct ipz_qp_handle qp_handle,
+		       struct h_galpa gal,
+		       u16 mcg_dlid,
+		       u64 subnet_prefix, u64 interface_id)
+{
+	return ehca_plpar_hcall_norets(H_DETACH_MCQP,
+				       adapter_handle.handle, /* r4 */
+				       qp_handle.handle,      /* r5 */
+				       mcg_dlid,              /* r6 */
+				       interface_id,          /* r7 */
+				       subnet_prefix,         /* r8 */
+				       0, 0);
+}
+
+u64 hipz_h_destroy_cq(const struct ipz_adapter_handle adapter_handle,
+		      struct ehca_cq *cq,
+		      u8 force_flag)
+{
+	u64 ret;
+
+	ret = hcp_galpas_dtor(&cq->galpas);
+	if (ret) {
+		ehca_gen_err("Could not destruct cp->galpas");
+		return H_RESOURCE;
+	}
+
+	ret = ehca_plpar_hcall_norets(H_FREE_RESOURCE,
+				      adapter_handle.handle,     /* r4 */
+				      cq->ipz_cq_handle.handle,  /* r5 */
+				      force_flag != 0 ? 1L : 0L, /* r6 */
+				      0, 0, 0, 0);
+
+	if (ret == H_RESOURCE)
+		ehca_gen_err("H_FREE_RESOURCE failed ret=%lx ", ret);
+
+	return ret;
+}
+
+u64 hipz_h_destroy_eq(const struct ipz_adapter_handle adapter_handle,
+		      struct ehca_eq *eq)
+{
+	u64 ret;
+
+	ret = hcp_galpas_dtor(&eq->galpas);
+	if (ret) {
+		ehca_gen_err("Could not destruct eq->galpas");
+		return H_RESOURCE;
+	}
+
+	ret = ehca_plpar_hcall_norets(H_FREE_RESOURCE,
+				      adapter_handle.handle,     /* r4 */
+				      eq->ipz_eq_handle.handle,  /* r5 */
+				      0, 0, 0, 0, 0);
+
+	if (ret == H_RESOURCE)
+		ehca_gen_err("Resource in use. ret=%lx ", ret);
+
+	return ret;
+}
+
+u64 hipz_h_alloc_resource_mr(const struct ipz_adapter_handle adapter_handle,
+			     const struct ehca_mr *mr,
+			     const u64 vaddr,
+			     const u64 length,
+			     const u32 access_ctrl,
+			     const struct ipz_pd pd,
+			     struct ehca_mr_hipzout_parms *outparms)
+{
+	u64 ret;
+	u64 outs[PLPAR_HCALL9_BUFSIZE];
+
+	ret = ehca_plpar_hcall9(H_ALLOC_RESOURCE, outs,
+				adapter_handle.handle,            /* r4 */
+				5,                                /* r5 */
+				vaddr,                            /* r6 */
+				length,                           /* r7 */
+				(((u64)access_ctrl) << 32ULL),    /* r8 */
+				pd.value,                         /* r9 */
+				0, 0, 0);
+	outparms->handle.handle = outs[0];
+	outparms->lkey = (u32)outs[2];
+	outparms->rkey = (u32)outs[3];
+
+	return ret;
+}
+
+u64 hipz_h_register_rpage_mr(const struct ipz_adapter_handle adapter_handle,
+			     const struct ehca_mr *mr,
+			     const u8 pagesize,
+			     const u8 queue_type,
+			     const u64 logical_address_of_page,
+			     const u64 count)
+{
+	u64 ret;
+
+	if ((count > 1) && (logical_address_of_page & (EHCA_PAGESIZE-1))) {
+		ehca_gen_err("logical_address_of_page not on a 4k boundary "
+			     "adapter_handle=%lx mr=%p mr_handle=%lx "
+			     "pagesize=%x queue_type=%x "
+			     "logical_address_of_page=%lx count=%lx",
+			     adapter_handle.handle, mr,
+			     mr->ipz_mr_handle.handle, pagesize, queue_type,
+			     logical_address_of_page, count);
+		ret = H_PARAMETER;
+	} else
+		ret = hipz_h_register_rpage(adapter_handle, pagesize,
+					    queue_type,
+					    mr->ipz_mr_handle.handle,
+					    logical_address_of_page, count);
+	return ret;
+}
+
+u64 hipz_h_query_mr(const struct ipz_adapter_handle adapter_handle,
+		    const struct ehca_mr *mr,
+		    struct ehca_mr_hipzout_parms *outparms)
+{
+	u64 ret;
+	u64 outs[PLPAR_HCALL9_BUFSIZE];
+
+	ret = ehca_plpar_hcall9(H_QUERY_MR, outs,
+				adapter_handle.handle,     /* r4 */
+				mr->ipz_mr_handle.handle,  /* r5 */
+				0, 0, 0, 0, 0, 0, 0);
+	outparms->len = outs[0];
+	outparms->vaddr = outs[1];
+	outparms->acl  = outs[4] >> 32;
+	outparms->lkey = (u32)(outs[5] >> 32);
+	outparms->rkey = (u32)(outs[5] & (0xffffffff));
+
+	return ret;
+}
+
+u64 hipz_h_free_resource_mr(const struct ipz_adapter_handle adapter_handle,
+			    const struct ehca_mr *mr)
+{
+	return ehca_plpar_hcall_norets(H_FREE_RESOURCE,
+				       adapter_handle.handle,    /* r4 */
+				       mr->ipz_mr_handle.handle, /* r5 */
+				       0, 0, 0, 0, 0);
+}
+
+u64 hipz_h_reregister_pmr(const struct ipz_adapter_handle adapter_handle,
+			  const struct ehca_mr *mr,
+			  const u64 vaddr_in,
+			  const u64 length,
+			  const u32 access_ctrl,
+			  const struct ipz_pd pd,
+			  const u64 mr_addr_cb,
+			  struct ehca_mr_hipzout_parms *outparms)
+{
+	u64 ret;
+	u64 outs[PLPAR_HCALL9_BUFSIZE];
+
+	ret = ehca_plpar_hcall9(H_REREGISTER_PMR, outs,
+				adapter_handle.handle,    /* r4 */
+				mr->ipz_mr_handle.handle, /* r5 */
+				vaddr_in,	          /* r6 */
+				length,                   /* r7 */
+				/* r8 */
+				((((u64)access_ctrl) << 32ULL) | pd.value),
+				mr_addr_cb,               /* r9 */
+				0, 0, 0);
+	outparms->vaddr = outs[1];
+	outparms->lkey = (u32)outs[2];
+	outparms->rkey = (u32)outs[3];
+
+	return ret;
+}
+
+u64 hipz_h_register_smr(const struct ipz_adapter_handle adapter_handle,
+			const struct ehca_mr *mr,
+			const struct ehca_mr *orig_mr,
+			const u64 vaddr_in,
+			const u32 access_ctrl,
+			const struct ipz_pd pd,
+			struct ehca_mr_hipzout_parms *outparms)
+{
+	u64 ret;
+	u64 outs[PLPAR_HCALL9_BUFSIZE];
+
+	ret = ehca_plpar_hcall9(H_REGISTER_SMR, outs,
+				adapter_handle.handle,            /* r4 */
+				orig_mr->ipz_mr_handle.handle,    /* r5 */
+				vaddr_in,                         /* r6 */
+				(((u64)access_ctrl) << 32ULL),    /* r7 */
+				pd.value,                         /* r8 */
+				0, 0, 0, 0);
+	outparms->handle.handle = outs[0];
+	outparms->lkey = (u32)outs[2];
+	outparms->rkey = (u32)outs[3];
+
+	return ret;
+}
+
+u64 hipz_h_alloc_resource_mw(const struct ipz_adapter_handle adapter_handle,
+			     const struct ehca_mw *mw,
+			     const struct ipz_pd pd,
+			     struct ehca_mw_hipzout_parms *outparms)
+{
+	u64 ret;
+	u64 outs[PLPAR_HCALL9_BUFSIZE];
+
+	ret = ehca_plpar_hcall9(H_ALLOC_RESOURCE, outs,
+				adapter_handle.handle,      /* r4 */
+				6,                          /* r5 */
+				pd.value,                   /* r6 */
+				0, 0, 0, 0, 0, 0);
+	outparms->handle.handle = outs[0];
+	outparms->rkey = (u32)outs[3];
+
+	return ret;
+}
+
+u64 hipz_h_query_mw(const struct ipz_adapter_handle adapter_handle,
+		    const struct ehca_mw *mw,
+		    struct ehca_mw_hipzout_parms *outparms)
+{
+	u64 ret;
+	u64 outs[PLPAR_HCALL9_BUFSIZE];
+
+	ret = ehca_plpar_hcall9(H_QUERY_MW, outs,
+				adapter_handle.handle,    /* r4 */
+				mw->ipz_mw_handle.handle, /* r5 */
+				0, 0, 0, 0, 0, 0, 0);
+	outparms->rkey = (u32)outs[3];
+
+	return ret;
+}
+
+u64 hipz_h_free_resource_mw(const struct ipz_adapter_handle adapter_handle,
+			    const struct ehca_mw *mw)
+{
+	return ehca_plpar_hcall_norets(H_FREE_RESOURCE,
+				       adapter_handle.handle,    /* r4 */
+				       mw->ipz_mw_handle.handle, /* r5 */
+				       0, 0, 0, 0, 0);
+}
+
+u64 hipz_h_error_data(const struct ipz_adapter_handle adapter_handle,
+		      const u64 ressource_handle,
+		      void *rblock,
+		      unsigned long *byte_count)
+{
+	u64 r_cb = virt_to_abs(rblock);
+
+	if (r_cb & (EHCA_PAGESIZE-1)) {
+		ehca_gen_err("rblock not page aligned.");
+		return H_PARAMETER;
+	}
+
+	return ehca_plpar_hcall_norets(H_ERROR_DATA,
+				       adapter_handle.handle,
+				       ressource_handle,
+				       r_cb,
+				       0, 0, 0, 0);
+}
diff --git a/drivers/infiniband/hw/ehca/hcp_if.h b/drivers/infiniband/hw/ehca/hcp_if.h
new file mode 100644
index 00000000000..587ebd47095
--- /dev/null
+++ b/drivers/infiniband/hw/ehca/hcp_if.h
@@ -0,0 +1,261 @@
+/*
+ *  IBM eServer eHCA Infiniband device driver for Linux on POWER
+ *
+ *  Firmware Infiniband Interface code for POWER
+ *
+ *  Authors: Christoph Raisch <raisch@de.ibm.com>
+ *           Hoang-Nam Nguyen <hnguyen@de.ibm.com>
+ *           Gerd Bayer <gerd.bayer@de.ibm.com>
+ *           Waleri Fomin <fomin@de.ibm.com>
+ *
+ *  Copyright (c) 2005 IBM Corporation
+ *
+ *  All rights reserved.
+ *
+ *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
+ *  BSD.
+ *
+ * OpenIB BSD License
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials
+ * provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
+ * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __HCP_IF_H__
+#define __HCP_IF_H__
+
+#include "ehca_classes.h"
+#include "ehca_tools.h"
+#include "hipz_hw.h"
+
+/*
+ * hipz_h_alloc_resource_eq allocates EQ resources in HW and FW, initalize
+ * resources, create the empty EQPT (ring).
+ */
+u64 hipz_h_alloc_resource_eq(const struct ipz_adapter_handle adapter_handle,
+			     struct ehca_pfeq *pfeq,
+			     const u32 neq_control,
+			     const u32 number_of_entries,
+			     struct ipz_eq_handle *eq_handle,
+			     u32 * act_nr_of_entries,
+			     u32 * act_pages,
+			     u32 * eq_ist);
+
+u64 hipz_h_reset_event(const struct ipz_adapter_handle adapter_handle,
+		       struct ipz_eq_handle eq_handle,
+		       const u64 event_mask);
+/*
+ * hipz_h_allocate_resource_cq allocates CQ resources in HW and FW, initialize
+ * resources, create the empty CQPT (ring).
+ */
+u64 hipz_h_alloc_resource_cq(const struct ipz_adapter_handle adapter_handle,
+			     struct ehca_cq *cq,
+			     struct ehca_alloc_cq_parms *param);
+
+
+/*
+ * hipz_h_alloc_resource_qp allocates QP resources in HW and FW,
+ * initialize resources, create empty QPPTs (2 rings).
+ */
+u64 hipz_h_alloc_resource_qp(const struct ipz_adapter_handle adapter_handle,
+			     struct ehca_qp *qp,
+			     struct ehca_alloc_qp_parms *parms);
+
+u64 hipz_h_query_port(const struct ipz_adapter_handle adapter_handle,
+		      const u8 port_id,
+		      struct hipz_query_port *query_port_response_block);
+
+u64 hipz_h_query_hca(const struct ipz_adapter_handle adapter_handle,
+		     struct hipz_query_hca *query_hca_rblock);
+
+/*
+ * hipz_h_register_rpage internal function in hcp_if.h for all
+ * hcp_H_REGISTER_RPAGE calls.
+ */
+u64 hipz_h_register_rpage(const struct ipz_adapter_handle adapter_handle,
+			  const u8 pagesize,
+			  const u8 queue_type,
+			  const u64 resource_handle,
+			  const u64 logical_address_of_page,
+			  u64 count);
+
+u64 hipz_h_register_rpage_eq(const struct ipz_adapter_handle adapter_handle,
+			     const struct ipz_eq_handle eq_handle,
+			     struct ehca_pfeq *pfeq,
+			     const u8 pagesize,
+			     const u8 queue_type,
+			     const u64 logical_address_of_page,
+			     const u64 count);
+
+u64 hipz_h_query_int_state(const struct ipz_adapter_handle
+			   hcp_adapter_handle,
+			   u32 ist);
+
+u64 hipz_h_register_rpage_cq(const struct ipz_adapter_handle adapter_handle,
+			     const struct ipz_cq_handle cq_handle,
+			     struct ehca_pfcq *pfcq,
+			     const u8 pagesize,
+			     const u8 queue_type,
+			     const u64 logical_address_of_page,
+			     const u64 count,
+			     const struct h_galpa gal);
+
+u64 hipz_h_register_rpage_qp(const struct ipz_adapter_handle adapter_handle,
+			     const struct ipz_qp_handle qp_handle,
+			     struct ehca_pfqp *pfqp,
+			     const u8 pagesize,
+			     const u8 queue_type,
+			     const u64 logical_address_of_page,
+			     const u64 count,
+			     const struct h_galpa galpa);
+
+u64 hipz_h_disable_and_get_wqe(const struct ipz_adapter_handle adapter_handle,
+			       const struct ipz_qp_handle qp_handle,
+			       struct ehca_pfqp *pfqp,
+			       void **log_addr_next_sq_wqe_tb_processed,
+			       void **log_addr_next_rq_wqe_tb_processed,
+			       int dis_and_get_function_code);
+enum hcall_sigt {
+	HCALL_SIGT_NO_CQE = 0,
+	HCALL_SIGT_BY_WQE = 1,
+	HCALL_SIGT_EVERY = 2
+};
+
+u64 hipz_h_modify_qp(const struct ipz_adapter_handle adapter_handle,
+		     const struct ipz_qp_handle qp_handle,
+		     struct ehca_pfqp *pfqp,
+		     const u64 update_mask,
+		     struct hcp_modify_qp_control_block *mqpcb,
+		     struct h_galpa gal);
+
+u64 hipz_h_query_qp(const struct ipz_adapter_handle adapter_handle,
+		    const struct ipz_qp_handle qp_handle,
+		    struct ehca_pfqp *pfqp,
+		    struct hcp_modify_qp_control_block *qqpcb,
+		    struct h_galpa gal);
+
+u64 hipz_h_destroy_qp(const struct ipz_adapter_handle adapter_handle,
+		      struct ehca_qp *qp);
+
+u64 hipz_h_define_aqp0(const struct ipz_adapter_handle adapter_handle,
+		       const struct ipz_qp_handle qp_handle,
+		       struct h_galpa gal,
+		       u32 port);
+
+u64 hipz_h_define_aqp1(const struct ipz_adapter_handle adapter_handle,
+		       const struct ipz_qp_handle qp_handle,
+		       struct h_galpa gal,
+		       u32 port, u32 * pma_qp_nr,
+		       u32 * bma_qp_nr);
+
+u64 hipz_h_attach_mcqp(const struct ipz_adapter_handle adapter_handle,
+		       const struct ipz_qp_handle qp_handle,
+		       struct h_galpa gal,
+		       u16 mcg_dlid,
+		       u64 subnet_prefix, u64 interface_id);
+
+u64 hipz_h_detach_mcqp(const struct ipz_adapter_handle adapter_handle,
+		       const struct ipz_qp_handle qp_handle,
+		       struct h_galpa gal,
+		       u16 mcg_dlid,
+		       u64 subnet_prefix, u64 interface_id);
+
+u64 hipz_h_destroy_cq(const struct ipz_adapter_handle adapter_handle,
+		      struct ehca_cq *cq,
+		      u8 force_flag);
+
+u64 hipz_h_destroy_eq(const struct ipz_adapter_handle adapter_handle,
+		      struct ehca_eq *eq);
+
+/*
+ * hipz_h_alloc_resource_mr allocates MR resources in HW and FW, initialize
+ * resources.
+ */
+u64 hipz_h_alloc_resource_mr(const struct ipz_adapter_handle adapter_handle,
+			     const struct ehca_mr *mr,
+			     const u64 vaddr,
+			     const u64 length,
+			     const u32 access_ctrl,
+			     const struct ipz_pd pd,
+			     struct ehca_mr_hipzout_parms *outparms);
+
+/* hipz_h_register_rpage_mr registers MR resource pages in HW and FW */
+u64 hipz_h_register_rpage_mr(const struct ipz_adapter_handle adapter_handle,
+			     const struct ehca_mr *mr,
+			     const u8 pagesize,
+			     const u8 queue_type,
+			     const u64 logical_address_of_page,
+			     const u64 count);
+
+/* hipz_h_query_mr queries MR in HW and FW */
+u64 hipz_h_query_mr(const struct ipz_adapter_handle adapter_handle,
+		    const struct ehca_mr *mr,
+		    struct ehca_mr_hipzout_parms *outparms);
+
+/* hipz_h_free_resource_mr frees MR resources in HW and FW */
+u64 hipz_h_free_resource_mr(const struct ipz_adapter_handle adapter_handle,
+			    const struct ehca_mr *mr);
+
+/* hipz_h_reregister_pmr reregisters MR in HW and FW */
+u64 hipz_h_reregister_pmr(const struct ipz_adapter_handle adapter_handle,
+			  const struct ehca_mr *mr,
+			  const u64 vaddr_in,
+			  const u64 length,
+			  const u32 access_ctrl,
+			  const struct ipz_pd pd,
+			  const u64 mr_addr_cb,
+			  struct ehca_mr_hipzout_parms *outparms);
+
+/* hipz_h_register_smr register shared MR in HW and FW */
+u64 hipz_h_register_smr(const struct ipz_adapter_handle adapter_handle,
+			const struct ehca_mr *mr,
+			const struct ehca_mr *orig_mr,
+			const u64 vaddr_in,
+			const u32 access_ctrl,
+			const struct ipz_pd pd,
+			struct ehca_mr_hipzout_parms *outparms);
+
+/*
+ * hipz_h_alloc_resource_mw allocates MW resources in HW and FW, initialize
+ * resources.
+ */
+u64 hipz_h_alloc_resource_mw(const struct ipz_adapter_handle adapter_handle,
+			     const struct ehca_mw *mw,
+			     const struct ipz_pd pd,
+			     struct ehca_mw_hipzout_parms *outparms);
+
+/* hipz_h_query_mw queries MW in HW and FW */
+u64 hipz_h_query_mw(const struct ipz_adapter_handle adapter_handle,
+		    const struct ehca_mw *mw,
+		    struct ehca_mw_hipzout_parms *outparms);
+
+/* hipz_h_free_resource_mw frees MW resources in HW and FW */
+u64 hipz_h_free_resource_mw(const struct ipz_adapter_handle adapter_handle,
+			    const struct ehca_mw *mw);
+
+u64 hipz_h_error_data(const struct ipz_adapter_handle adapter_handle,
+		      const u64 ressource_handle,
+		      void *rblock,
+		      unsigned long *byte_count);
+
+#endif /* __HCP_IF_H__ */
diff --git a/drivers/infiniband/hw/ehca/hcp_phyp.c b/drivers/infiniband/hw/ehca/hcp_phyp.c
new file mode 100644
index 00000000000..0b1a4772c78
--- /dev/null
+++ b/drivers/infiniband/hw/ehca/hcp_phyp.c
@@ -0,0 +1,80 @@
+/*
+ *  IBM eServer eHCA Infiniband device driver for Linux on POWER
+ *
+ *   load store abstraction for ehca register access with tracing
+ *
+ *  Authors: Christoph Raisch <raisch@de.ibm.com>
+ *           Hoang-Nam Nguyen <hnguyen@de.ibm.com>
+ *
+ *  Copyright (c) 2005 IBM Corporation
+ *
+ *  All rights reserved.
+ *
+ *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
+ *  BSD.
+ *
+ * OpenIB BSD License
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials
+ * provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
+ * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "ehca_classes.h"
+#include "hipz_hw.h"
+
+int hcall_map_page(u64 physaddr, u64 *mapaddr)
+{
+	*mapaddr = (u64)(ioremap(physaddr, EHCA_PAGESIZE));
+	return 0;
+}
+
+int hcall_unmap_page(u64 mapaddr)
+{
+	iounmap((volatile void __iomem*)mapaddr);
+	return 0;
+}
+
+int hcp_galpas_ctor(struct h_galpas *galpas,
+		    u64 paddr_kernel, u64 paddr_user)
+{
+	int ret = hcall_map_page(paddr_kernel, &galpas->kernel.fw_handle);
+	if (ret)
+		return ret;
+
+	galpas->user.fw_handle = paddr_user;
+
+	return 0;
+}
+
+int hcp_galpas_dtor(struct h_galpas *galpas)
+{
+	if (galpas->kernel.fw_handle) {
+		int ret = hcall_unmap_page(galpas->kernel.fw_handle);
+		if (ret)
+			return ret;
+	}
+
+	galpas->user.fw_handle = galpas->kernel.fw_handle = 0;
+
+	return 0;
+}
diff --git a/drivers/infiniband/hw/ehca/hcp_phyp.h b/drivers/infiniband/hw/ehca/hcp_phyp.h
new file mode 100644
index 00000000000..5305c2a3ed9
--- /dev/null
+++ b/drivers/infiniband/hw/ehca/hcp_phyp.h
@@ -0,0 +1,90 @@
+/*
+ *  IBM eServer eHCA Infiniband device driver for Linux on POWER
+ *
+ *  Firmware calls
+ *
+ *  Authors: Christoph Raisch <raisch@de.ibm.com>
+ *           Hoang-Nam Nguyen <hnguyen@de.ibm.com>
+ *           Waleri Fomin <fomin@de.ibm.com>
+ *           Gerd Bayer <gerd.bayer@de.ibm.com>
+ *
+ *  Copyright (c) 2005 IBM Corporation
+ *
+ *  All rights reserved.
+ *
+ *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
+ *  BSD.
+ *
+ * OpenIB BSD License
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials
+ * provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
+ * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __HCP_PHYP_H__
+#define __HCP_PHYP_H__
+
+
+/*
+ * eHCA page (mapped into memory)
+ * resource to access eHCA register pages in CPU address space
+*/
+struct h_galpa {
+	u64 fw_handle;
+	/* for pSeries this is a 64bit memory address where
+	   I/O memory is mapped into CPU address space (kv) */
+};
+
+/*
+ * resource to access eHCA address space registers, all types
+ */
+struct h_galpas {
+	u32 pid;		/*PID of userspace galpa checking */
+	struct h_galpa user;	/* user space accessible resource,
+				   set to 0 if unused */
+	struct h_galpa kernel;	/* kernel space accessible resource,
+				   set to 0 if unused */
+};
+
+static inline u64 hipz_galpa_load(struct h_galpa galpa, u32 offset)
+{
+	u64 addr = galpa.fw_handle + offset;
+	return *(volatile u64 __force *)addr;
+}
+
+static inline void hipz_galpa_store(struct h_galpa galpa, u32 offset, u64 value)
+{
+	u64 addr = galpa.fw_handle + offset;
+	*(volatile u64 __force *)addr = value;
+}
+
+int hcp_galpas_ctor(struct h_galpas *galpas,
+		    u64 paddr_kernel, u64 paddr_user);
+
+int hcp_galpas_dtor(struct h_galpas *galpas);
+
+int hcall_map_page(u64 physaddr, u64 * mapaddr);
+
+int hcall_unmap_page(u64 mapaddr);
+
+#endif
diff --git a/drivers/infiniband/hw/ehca/hipz_fns.h b/drivers/infiniband/hw/ehca/hipz_fns.h
new file mode 100644
index 00000000000..9dac93d0214
--- /dev/null
+++ b/drivers/infiniband/hw/ehca/hipz_fns.h
@@ -0,0 +1,68 @@
+/*
+ *  IBM eServer eHCA Infiniband device driver for Linux on POWER
+ *
+ *  HW abstraction register functions
+ *
+ *  Authors: Christoph Raisch <raisch@de.ibm.com>
+ *           Reinhard Ernst <rernst@de.ibm.com>
+ *
+ *  Copyright (c) 2005 IBM Corporation
+ *
+ *  All rights reserved.
+ *
+ *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
+ *  BSD.
+ *
+ * OpenIB BSD License
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials
+ * provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
+ * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __HIPZ_FNS_H__
+#define __HIPZ_FNS_H__
+
+#include "ehca_classes.h"
+#include "hipz_hw.h"
+
+#include "hipz_fns_core.h"
+
+#define hipz_galpa_store_eq(gal, offset, value) \
+	hipz_galpa_store(gal, EQTEMM_OFFSET(offset), value)
+
+#define hipz_galpa_load_eq(gal, offset) \
+	hipz_galpa_load(gal, EQTEMM_OFFSET(offset))
+
+#define hipz_galpa_store_qped(gal, offset, value) \
+	hipz_galpa_store(gal, QPEDMM_OFFSET(offset), value)
+
+#define hipz_galpa_load_qped(gal, offset) \
+	hipz_galpa_load(gal, QPEDMM_OFFSET(offset))
+
+#define hipz_galpa_store_mrmw(gal, offset, value) \
+	hipz_galpa_store(gal, MRMWMM_OFFSET(offset), value)
+
+#define hipz_galpa_load_mrmw(gal, offset) \
+	hipz_galpa_load(gal, MRMWMM_OFFSET(offset))
+
+#endif
diff --git a/drivers/infiniband/hw/ehca/hipz_fns_core.h b/drivers/infiniband/hw/ehca/hipz_fns_core.h
new file mode 100644
index 00000000000..20898a15344
--- /dev/null
+++ b/drivers/infiniband/hw/ehca/hipz_fns_core.h
@@ -0,0 +1,100 @@
+/*
+ *  IBM eServer eHCA Infiniband device driver for Linux on POWER
+ *
+ *  HW abstraction register functions
+ *
+ *  Authors: Christoph Raisch <raisch@de.ibm.com>
+ *           Heiko J Schick <schickhj@de.ibm.com>
+ *           Hoang-Nam Nguyen <hnguyen@de.ibm.com>
+ *           Reinhard Ernst <rernst@de.ibm.com>
+ *
+ *  Copyright (c) 2005 IBM Corporation
+ *
+ *  All rights reserved.
+ *
+ *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
+ *  BSD.
+ *
+ * OpenIB BSD License
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials
+ * provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
+ * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __HIPZ_FNS_CORE_H__
+#define __HIPZ_FNS_CORE_H__
+
+#include "hcp_phyp.h"
+#include "hipz_hw.h"
+
+#define hipz_galpa_store_cq(gal, offset, value) \
+	hipz_galpa_store(gal, CQTEMM_OFFSET(offset), value)
+
+#define hipz_galpa_load_cq(gal, offset) \
+	hipz_galpa_load(gal, CQTEMM_OFFSET(offset))
+
+#define hipz_galpa_store_qp(gal,offset, value) \
+	hipz_galpa_store(gal, QPTEMM_OFFSET(offset), value)
+#define hipz_galpa_load_qp(gal, offset) \
+	hipz_galpa_load(gal,QPTEMM_OFFSET(offset))
+
+static inline void hipz_update_sqa(struct ehca_qp *qp, u16 nr_wqes)
+{
+	/*  ringing doorbell :-) */
+	hipz_galpa_store_qp(qp->galpas.kernel, qpx_sqa,
+			    EHCA_BMASK_SET(QPX_SQADDER, nr_wqes));
+}
+
+static inline void hipz_update_rqa(struct ehca_qp *qp, u16 nr_wqes)
+{
+	/*  ringing doorbell :-) */
+	hipz_galpa_store_qp(qp->galpas.kernel, qpx_rqa,
+			    EHCA_BMASK_SET(QPX_RQADDER, nr_wqes));
+}
+
+static inline void hipz_update_feca(struct ehca_cq *cq, u32 nr_cqes)
+{
+	hipz_galpa_store_cq(cq->galpas.kernel, cqx_feca,
+			    EHCA_BMASK_SET(CQX_FECADDER, nr_cqes));
+}
+
+static inline void hipz_set_cqx_n0(struct ehca_cq *cq, u32 value)
+{
+	u64 cqx_n0_reg;
+
+	hipz_galpa_store_cq(cq->galpas.kernel, cqx_n0,
+			    EHCA_BMASK_SET(CQX_N0_GENERATE_SOLICITED_COMP_EVENT,
+					   value));
+	cqx_n0_reg = hipz_galpa_load_cq(cq->galpas.kernel, cqx_n0);
+}
+
+static inline void hipz_set_cqx_n1(struct ehca_cq *cq, u32 value)
+{
+	u64 cqx_n1_reg;
+
+	hipz_galpa_store_cq(cq->galpas.kernel, cqx_n1,
+			    EHCA_BMASK_SET(CQX_N1_GENERATE_COMP_EVENT, value));
+	cqx_n1_reg = hipz_galpa_load_cq(cq->galpas.kernel, cqx_n1);
+}
+
+#endif /* __HIPZ_FNC_CORE_H__ */
diff --git a/drivers/infiniband/hw/ehca/hipz_hw.h b/drivers/infiniband/hw/ehca/hipz_hw.h
new file mode 100644
index 00000000000..3fc92b031c5
--- /dev/null
+++ b/drivers/infiniband/hw/ehca/hipz_hw.h
@@ -0,0 +1,388 @@
+/*
+ *  IBM eServer eHCA Infiniband device driver for Linux on POWER
+ *
+ *  eHCA register definitions
+ *
+ *  Authors: Waleri Fomin <fomin@de.ibm.com>
+ *           Christoph Raisch <raisch@de.ibm.com>
+ *           Reinhard Ernst <rernst@de.ibm.com>
+ *
+ *  Copyright (c) 2005 IBM Corporation
+ *
+ *  All rights reserved.
+ *
+ *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
+ *  BSD.
+ *
+ * OpenIB BSD License
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials
+ * provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
+ * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __HIPZ_HW_H__
+#define __HIPZ_HW_H__
+
+#include "ehca_tools.h"
+
+/* QP Table Entry Memory Map */
+struct hipz_qptemm {
+	u64 qpx_hcr;
+	u64 qpx_c;
+	u64 qpx_herr;
+	u64 qpx_aer;
+/* 0x20*/
+	u64 qpx_sqa;
+	u64 qpx_sqc;
+	u64 qpx_rqa;
+	u64 qpx_rqc;
+/* 0x40*/
+	u64 qpx_st;
+	u64 qpx_pmstate;
+	u64 qpx_pmfa;
+	u64 qpx_pkey;
+/* 0x60*/
+	u64 qpx_pkeya;
+	u64 qpx_pkeyb;
+	u64 qpx_pkeyc;
+	u64 qpx_pkeyd;
+/* 0x80*/
+	u64 qpx_qkey;
+	u64 qpx_dqp;
+	u64 qpx_dlidp;
+	u64 qpx_portp;
+/* 0xa0*/
+	u64 qpx_slidp;
+	u64 qpx_slidpp;
+	u64 qpx_dlida;
+	u64 qpx_porta;
+/* 0xc0*/
+	u64 qpx_slida;
+	u64 qpx_slidpa;
+	u64 qpx_slvl;
+	u64 qpx_ipd;
+/* 0xe0*/
+	u64 qpx_mtu;
+	u64 qpx_lato;
+	u64 qpx_rlimit;
+	u64 qpx_rnrlimit;
+/* 0x100*/
+	u64 qpx_t;
+	u64 qpx_sqhp;
+	u64 qpx_sqptp;
+	u64 qpx_nspsn;
+/* 0x120*/
+	u64 qpx_nspsnhwm;
+	u64 reserved1;
+	u64 qpx_sdsi;
+	u64 qpx_sdsbc;
+/* 0x140*/
+	u64 qpx_sqwsize;
+	u64 qpx_sqwts;
+	u64 qpx_lsn;
+	u64 qpx_nssn;
+/* 0x160 */
+	u64 qpx_mor;
+	u64 qpx_cor;
+	u64 qpx_sqsize;
+	u64 qpx_erc;
+/* 0x180*/
+	u64 qpx_rnrrc;
+	u64 qpx_ernrwt;
+	u64 qpx_rnrresp;
+	u64 qpx_lmsna;
+/* 0x1a0 */
+	u64 qpx_sqhpc;
+	u64 qpx_sqcptp;
+	u64 qpx_sigt;
+	u64 qpx_wqecnt;
+/* 0x1c0*/
+	u64 qpx_rqhp;
+	u64 qpx_rqptp;
+	u64 qpx_rqsize;
+	u64 qpx_nrr;
+/* 0x1e0*/
+	u64 qpx_rdmac;
+	u64 qpx_nrpsn;
+	u64 qpx_lapsn;
+	u64 qpx_lcr;
+/* 0x200*/
+	u64 qpx_rwc;
+	u64 qpx_rwva;
+	u64 qpx_rdsi;
+	u64 qpx_rdsbc;
+/* 0x220*/
+	u64 qpx_rqwsize;
+	u64 qpx_crmsn;
+	u64 qpx_rdd;
+	u64 qpx_larpsn;
+/* 0x240*/
+	u64 qpx_pd;
+	u64 qpx_scqn;
+	u64 qpx_rcqn;
+	u64 qpx_aeqn;
+/* 0x260*/
+	u64 qpx_aaelog;
+	u64 qpx_ram;
+	u64 qpx_rdmaqe0;
+	u64 qpx_rdmaqe1;
+/* 0x280*/
+	u64 qpx_rdmaqe2;
+	u64 qpx_rdmaqe3;
+	u64 qpx_nrpsnhwm;
+/* 0x298*/
+	u64 reserved[(0x400 - 0x298) / 8];
+/* 0x400 extended data */
+	u64 reserved_ext[(0x500 - 0x400) / 8];
+/* 0x500 */
+	u64 reserved2[(0x1000 - 0x500) / 8];
+/* 0x1000      */
+};
+
+#define QPX_SQADDER EHCA_BMASK_IBM(48,63)
+#define QPX_RQADDER EHCA_BMASK_IBM(48,63)
+
+#define QPTEMM_OFFSET(x) offsetof(struct hipz_qptemm,x)
+
+/* MRMWPT Entry Memory Map */
+struct hipz_mrmwmm {
+	/* 0x00 */
+	u64 mrx_hcr;
+
+	u64 mrx_c;
+	u64 mrx_herr;
+	u64 mrx_aer;
+	/* 0x20 */
+	u64 mrx_pp;
+	u64 reserved1;
+	u64 reserved2;
+	u64 reserved3;
+	/* 0x40 */
+	u64 reserved4[(0x200 - 0x40) / 8];
+	/* 0x200 */
+	u64 mrx_ctl[64];
+
+};
+
+#define MRMWMM_OFFSET(x) offsetof(struct hipz_mrmwmm,x)
+
+struct hipz_qpedmm {
+	/* 0x00 */
+	u64 reserved0[(0x400) / 8];
+	/* 0x400 */
+	u64 qpedx_phh;
+	u64 qpedx_ppsgp;
+	/* 0x410 */
+	u64 qpedx_ppsgu;
+	u64 qpedx_ppdgp;
+	/* 0x420 */
+	u64 qpedx_ppdgu;
+	u64 qpedx_aph;
+	/* 0x430 */
+	u64 qpedx_apsgp;
+	u64 qpedx_apsgu;
+	/* 0x440 */
+	u64 qpedx_apdgp;
+	u64 qpedx_apdgu;
+	/* 0x450 */
+	u64 qpedx_apav;
+	u64 qpedx_apsav;
+	/* 0x460  */
+	u64 qpedx_hcr;
+	u64 reserved1[4];
+	/* 0x488 */
+	u64 qpedx_rrl0;
+	/* 0x490 */
+	u64 qpedx_rrrkey0;
+	u64 qpedx_rrva0;
+	/* 0x4a0 */
+	u64 reserved2;
+	u64 qpedx_rrl1;
+	/* 0x4b0 */
+	u64 qpedx_rrrkey1;
+	u64 qpedx_rrva1;
+	/* 0x4c0 */
+	u64 reserved3;
+	u64 qpedx_rrl2;
+	/* 0x4d0 */
+	u64 qpedx_rrrkey2;
+	u64 qpedx_rrva2;
+	/* 0x4e0 */
+	u64 reserved4;
+	u64 qpedx_rrl3;
+	/* 0x4f0 */
+	u64 qpedx_rrrkey3;
+	u64 qpedx_rrva3;
+};
+
+#define QPEDMM_OFFSET(x) offsetof(struct hipz_qpedmm,x)
+
+/* CQ Table Entry Memory Map */
+struct hipz_cqtemm {
+	u64 cqx_hcr;
+	u64 cqx_c;
+	u64 cqx_herr;
+	u64 cqx_aer;
+/* 0x20  */
+	u64 cqx_ptp;
+	u64 cqx_tp;
+	u64 cqx_fec;
+	u64 cqx_feca;
+/* 0x40  */
+	u64 cqx_ep;
+	u64 cqx_eq;
+/* 0x50  */
+	u64 reserved1;
+	u64 cqx_n0;
+/* 0x60  */
+	u64 cqx_n1;
+	u64 reserved2[(0x1000 - 0x60) / 8];
+/* 0x1000 */
+};
+
+#define CQX_FEC_CQE_CNT           EHCA_BMASK_IBM(32,63)
+#define CQX_FECADDER              EHCA_BMASK_IBM(32,63)
+#define CQX_N0_GENERATE_SOLICITED_COMP_EVENT EHCA_BMASK_IBM(0,0)
+#define CQX_N1_GENERATE_COMP_EVENT EHCA_BMASK_IBM(0,0)
+
+#define CQTEMM_OFFSET(x) offsetof(struct hipz_cqtemm,x)
+
+/* EQ Table Entry Memory Map */
+struct hipz_eqtemm {
+	u64 eqx_hcr;
+	u64 eqx_c;
+
+	u64 eqx_herr;
+	u64 eqx_aer;
+/* 0x20 */
+	u64 eqx_ptp;
+	u64 eqx_tp;
+	u64 eqx_ssba;
+	u64 eqx_psba;
+
+/* 0x40 */
+	u64 eqx_cec;
+	u64 eqx_meql;
+	u64 eqx_xisbi;
+	u64 eqx_xisc;
+/* 0x60 */
+	u64 eqx_it;
+
+};
+
+#define EQTEMM_OFFSET(x) offsetof(struct hipz_eqtemm,x)
+
+/* access control defines for MR/MW */
+#define HIPZ_ACCESSCTRL_L_WRITE  0x00800000
+#define HIPZ_ACCESSCTRL_R_WRITE  0x00400000
+#define HIPZ_ACCESSCTRL_R_READ   0x00200000
+#define HIPZ_ACCESSCTRL_R_ATOMIC 0x00100000
+#define HIPZ_ACCESSCTRL_MW_BIND  0x00080000
+
+/* query hca response block */
+struct hipz_query_hca {
+	u32 cur_reliable_dg;
+	u32 cur_qp;
+	u32 cur_cq;
+	u32 cur_eq;
+	u32 cur_mr;
+	u32 cur_mw;
+	u32 cur_ee_context;
+	u32 cur_mcast_grp;
+	u32 cur_qp_attached_mcast_grp;
+	u32 reserved1;
+	u32 cur_ipv6_qp;
+	u32 cur_eth_qp;
+	u32 cur_hp_mr;
+	u32 reserved2[3];
+	u32 max_rd_domain;
+	u32 max_qp;
+	u32 max_cq;
+	u32 max_eq;
+	u32 max_mr;
+	u32 max_hp_mr;
+	u32 max_mw;
+	u32 max_mrwpte;
+	u32 max_special_mrwpte;
+	u32 max_rd_ee_context;
+	u32 max_mcast_grp;
+	u32 max_total_mcast_qp_attach;
+	u32 max_mcast_qp_attach;
+	u32 max_raw_ipv6_qp;
+	u32 max_raw_ethy_qp;
+	u32 internal_clock_frequency;
+	u32 max_pd;
+	u32 max_ah;
+	u32 max_cqe;
+	u32 max_wqes_wq;
+	u32 max_partitions;
+	u32 max_rr_ee_context;
+	u32 max_rr_qp;
+	u32 max_rr_hca;
+	u32 max_act_wqs_ee_context;
+	u32 max_act_wqs_qp;
+	u32 max_sge;
+	u32 max_sge_rd;
+	u32 memory_page_size_supported;
+	u64 max_mr_size;
+	u32 local_ca_ack_delay;
+	u32 num_ports;
+	u32 vendor_id;
+	u32 vendor_part_id;
+	u32 hw_ver;
+	u64 node_guid;
+	u64 hca_cap_indicators;
+	u32 data_counter_register_size;
+	u32 max_shared_rq;
+	u32 max_isns_eq;
+	u32 max_neq;
+} __attribute__ ((packed));
+
+/* query port response block */
+struct hipz_query_port {
+	u32 state;
+	u32 bad_pkey_cntr;
+	u32 lmc;
+	u32 lid;
+	u32 subnet_timeout;
+	u32 qkey_viol_cntr;
+	u32 sm_sl;
+	u32 sm_lid;
+	u32 capability_mask;
+	u32 init_type_reply;
+	u32 pkey_tbl_len;
+	u32 gid_tbl_len;
+	u64 gid_prefix;
+	u32 port_nr;
+	u16 pkey_entries[16];
+	u8  reserved1[32];
+	u32 trent_size;
+	u32 trbuf_size;
+	u64 max_msg_sz;
+	u32 max_mtu;
+	u32 vl_cap;
+	u8  reserved2[1900];
+	u64 guid_entries[255];
+} __attribute__ ((packed));
+
+#endif
diff --git a/drivers/infiniband/hw/ehca/ipz_pt_fn.c b/drivers/infiniband/hw/ehca/ipz_pt_fn.c
new file mode 100644
index 00000000000..e028ff1588c
--- /dev/null
+++ b/drivers/infiniband/hw/ehca/ipz_pt_fn.c
@@ -0,0 +1,149 @@
+/*
+ *  IBM eServer eHCA Infiniband device driver for Linux on POWER
+ *
+ *  internal queue handling
+ *
+ *  Authors: Waleri Fomin <fomin@de.ibm.com>
+ *           Reinhard Ernst <rernst@de.ibm.com>
+ *           Christoph Raisch <raisch@de.ibm.com>
+ *
+ *  Copyright (c) 2005 IBM Corporation
+ *
+ *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
+ *  BSD.
+ *
+ * OpenIB BSD License
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials
+ * provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
+ * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "ehca_tools.h"
+#include "ipz_pt_fn.h"
+
+void *ipz_qpageit_get_inc(struct ipz_queue *queue)
+{
+	void *ret = ipz_qeit_get(queue);
+	queue->current_q_offset += queue->pagesize;
+	if (queue->current_q_offset > queue->queue_length) {
+		queue->current_q_offset -= queue->pagesize;
+		ret = NULL;
+	}
+	if (((u64)ret) % EHCA_PAGESIZE) {
+		ehca_gen_err("ERROR!! not at PAGE-Boundary");
+		return NULL;
+	}
+	return ret;
+}
+
+void *ipz_qeit_eq_get_inc(struct ipz_queue *queue)
+{
+	void *ret = ipz_qeit_get(queue);
+	u64 last_entry_in_q = queue->queue_length - queue->qe_size;
+
+	queue->current_q_offset += queue->qe_size;
+	if (queue->current_q_offset > last_entry_in_q) {
+		queue->current_q_offset = 0;
+		queue->toggle_state = (~queue->toggle_state) & 1;
+	}
+
+	return ret;
+}
+
+int ipz_queue_ctor(struct ipz_queue *queue,
+		   const u32 nr_of_pages,
+		   const u32 pagesize, const u32 qe_size, const u32 nr_of_sg)
+{
+	int pages_per_kpage = PAGE_SIZE >> EHCA_PAGESHIFT;
+	int f;
+
+	if (pagesize > PAGE_SIZE) {
+		ehca_gen_err("FATAL ERROR: pagesize=%x is greater "
+			     "than kernel page size", pagesize);
+		return 0;
+	}
+	if (!pages_per_kpage) {
+		ehca_gen_err("FATAL ERROR: invalid kernel page size. "
+			     "pages_per_kpage=%x", pages_per_kpage);
+		return 0;
+	}
+	queue->queue_length = nr_of_pages * pagesize;
+	queue->queue_pages = vmalloc(nr_of_pages * sizeof(void *));
+	if (!queue->queue_pages) {
+		ehca_gen_err("ERROR!! didn't get the memory");
+		return 0;
+	}
+	memset(queue->queue_pages, 0, nr_of_pages * sizeof(void *));
+	/*
+	 * allocate pages for queue:
+	 * outer loop allocates whole kernel pages (page aligned) and
+	 * inner loop divides a kernel page into smaller hca queue pages
+	 */
+	f = 0;
+	while (f < nr_of_pages) {
+		u8 *kpage = (u8*)get_zeroed_page(GFP_KERNEL);
+		int k;
+		if (!kpage)
+			goto ipz_queue_ctor_exit0; /*NOMEM*/
+		for (k = 0; k < pages_per_kpage && f < nr_of_pages; k++) {
+			(queue->queue_pages)[f] = (struct ipz_page *)kpage;
+			kpage += EHCA_PAGESIZE;
+			f++;
+		}
+	}
+
+	queue->current_q_offset = 0;
+	queue->qe_size = qe_size;
+	queue->act_nr_of_sg = nr_of_sg;
+	queue->pagesize = pagesize;
+	queue->toggle_state = 1;
+	return 1;
+
+ ipz_queue_ctor_exit0:
+	ehca_gen_err("Couldn't get alloc pages queue=%p f=%x nr_of_pages=%x",
+		     queue, f, nr_of_pages);
+	for (f = 0; f < nr_of_pages; f += pages_per_kpage) {
+		if (!(queue->queue_pages)[f])
+			break;
+		free_page((unsigned long)(queue->queue_pages)[f]);
+	}
+	return 0;
+}
+
+int ipz_queue_dtor(struct ipz_queue *queue)
+{
+	int pages_per_kpage = PAGE_SIZE >> EHCA_PAGESHIFT;
+	int g;
+	int nr_pages;
+
+	if (!queue || !queue->queue_pages) {
+		ehca_gen_dbg("queue or queue_pages is NULL");
+		return 0;
+	}
+	nr_pages = queue->queue_length / queue->pagesize;
+	for (g = 0; g < nr_pages; g += pages_per_kpage)
+		free_page((unsigned long)(queue->queue_pages)[g]);
+	vfree(queue->queue_pages);
+
+	return 1;
+}
diff --git a/drivers/infiniband/hw/ehca/ipz_pt_fn.h b/drivers/infiniband/hw/ehca/ipz_pt_fn.h
new file mode 100644
index 00000000000..2f13509d525
--- /dev/null
+++ b/drivers/infiniband/hw/ehca/ipz_pt_fn.h
@@ -0,0 +1,247 @@
+/*
+ *  IBM eServer eHCA Infiniband device driver for Linux on POWER
+ *
+ *  internal queue handling
+ *
+ *  Authors: Waleri Fomin <fomin@de.ibm.com>
+ *           Reinhard Ernst <rernst@de.ibm.com>
+ *           Christoph Raisch <raisch@de.ibm.com>
+ *
+ *  Copyright (c) 2005 IBM Corporation
+ *
+ *  All rights reserved.
+ *
+ *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
+ *  BSD.
+ *
+ * OpenIB BSD License
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials
+ * provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
+ * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __IPZ_PT_FN_H__
+#define __IPZ_PT_FN_H__
+
+#define EHCA_PAGESHIFT   12
+#define EHCA_PAGESIZE   4096UL
+#define EHCA_PAGEMASK   (~(EHCA_PAGESIZE-1))
+#define EHCA_PT_ENTRIES 512UL
+
+#include "ehca_tools.h"
+#include "ehca_qes.h"
+
+/* struct generic ehca page */
+struct ipz_page {
+	u8 entries[EHCA_PAGESIZE];
+};
+
+/* struct generic queue in linux kernel virtual memory (kv) */
+struct ipz_queue {
+	u64 current_q_offset;	/* current queue entry */
+
+	struct ipz_page **queue_pages;	/* array of pages belonging to queue */
+	u32 qe_size;		/* queue entry size */
+	u32 act_nr_of_sg;
+	u32 queue_length;	/* queue length allocated in bytes */
+	u32 pagesize;
+	u32 toggle_state;	/* toggle flag - per page */
+	u32 dummy3;		/* 64 bit alignment */
+};
+
+/*
+ * return current Queue Entry for a certain q_offset
+ * returns address (kv) of Queue Entry
+ */
+static inline void *ipz_qeit_calc(struct ipz_queue *queue, u64 q_offset)
+{
+	struct ipz_page *current_page;
+	if (q_offset >= queue->queue_length)
+		return NULL;
+	current_page = (queue->queue_pages)[q_offset >> EHCA_PAGESHIFT];
+	return  &current_page->entries[q_offset & (EHCA_PAGESIZE - 1)];
+}
+
+/*
+ * return current Queue Entry
+ * returns address (kv) of Queue Entry
+ */
+static inline void *ipz_qeit_get(struct ipz_queue *queue)
+{
+	return ipz_qeit_calc(queue, queue->current_q_offset);
+}
+
+/*
+ * return current Queue Page , increment Queue Page iterator from
+ * page to page in struct ipz_queue, last increment will return 0! and
+ * NOT wrap
+ * returns address (kv) of Queue Page
+ * warning don't use in parallel with ipz_QE_get_inc()
+ */
+void *ipz_qpageit_get_inc(struct ipz_queue *queue);
+
+/*
+ * return current Queue Entry, increment Queue Entry iterator by one
+ * step in struct ipz_queue, will wrap in ringbuffer
+ * returns address (kv) of Queue Entry BEFORE increment
+ * warning don't use in parallel with ipz_qpageit_get_inc()
+ * warning unpredictable results may occur if steps>act_nr_of_queue_entries
+ */
+static inline void *ipz_qeit_get_inc(struct ipz_queue *queue)
+{
+	void *ret = ipz_qeit_get(queue);
+	queue->current_q_offset += queue->qe_size;
+	if (queue->current_q_offset >= queue->queue_length) {
+		queue->current_q_offset = 0;
+		/* toggle the valid flag */
+		queue->toggle_state = (~queue->toggle_state) & 1;
+	}
+
+	return ret;
+}
+
+/*
+ * return current Queue Entry, increment Queue Entry iterator by one
+ * step in struct ipz_queue, will wrap in ringbuffer
+ * returns address (kv) of Queue Entry BEFORE increment
+ * returns 0 and does not increment, if wrong valid state
+ * warning don't use in parallel with ipz_qpageit_get_inc()
+ * warning unpredictable results may occur if steps>act_nr_of_queue_entries
+ */
+static inline void *ipz_qeit_get_inc_valid(struct ipz_queue *queue)
+{
+	struct ehca_cqe *cqe = ipz_qeit_get(queue);
+	u32 cqe_flags = cqe->cqe_flags;
+
+	if ((cqe_flags >> 7) != (queue->toggle_state & 1))
+		return NULL;
+
+	ipz_qeit_get_inc(queue);
+	return cqe;
+}
+
+/*
+ * returns and resets Queue Entry iterator
+ * returns address (kv) of first Queue Entry
+ */
+static inline void *ipz_qeit_reset(struct ipz_queue *queue)
+{
+	queue->current_q_offset = 0;
+	return ipz_qeit_get(queue);
+}
+
+/* struct generic page table */
+struct ipz_pt {
+	u64 entries[EHCA_PT_ENTRIES];
+};
+
+/* struct page table for a queue, only to be used in pf */
+struct ipz_qpt {
+	/* queue page tables (kv), use u64 because we know the element length */
+	u64 *qpts;
+	u32 n_qpts;
+	u32 n_ptes;       /*  number of page table entries */
+	u64 *current_pte_addr;
+};
+
+/*
+ * constructor for a ipz_queue_t, placement new for ipz_queue_t,
+ * new for all dependent datastructors
+ * all QP Tables are the same
+ * flow:
+ *    allocate+pin queue
+ * see ipz_qpt_ctor()
+ * returns true if ok, false if out of memory
+ */
+int ipz_queue_ctor(struct ipz_queue *queue, const u32 nr_of_pages,
+		   const u32 pagesize, const u32 qe_size,
+		   const u32 nr_of_sg);
+
+/*
+ * destructor for a ipz_queue_t
+ *  -# free queue
+ *  see ipz_queue_ctor()
+ *  returns true if ok, false if queue was NULL-ptr of free failed
+ */
+int ipz_queue_dtor(struct ipz_queue *queue);
+
+/*
+ * constructor for a ipz_qpt_t,
+ * placement new for struct ipz_queue, new for all dependent datastructors
+ * all QP Tables are the same,
+ * flow:
+ * -# allocate+pin queue
+ * -# initialise ptcb
+ * -# allocate+pin PTs
+ * -# link PTs to a ring, according to HCA Arch, set bit62 id needed
+ * -# the ring must have room for exactly nr_of_PTEs
+ * see ipz_qpt_ctor()
+ */
+void ipz_qpt_ctor(struct ipz_qpt *qpt,
+		  const u32 nr_of_qes,
+		  const u32 pagesize,
+		  const u32 qe_size,
+		  const u8 lowbyte, const u8 toggle,
+		  u32 * act_nr_of_QEs, u32 * act_nr_of_pages);
+
+/*
+ * return current Queue Entry, increment Queue Entry iterator by one
+ * step in struct ipz_queue, will wrap in ringbuffer
+ * returns address (kv) of Queue Entry BEFORE increment
+ * warning don't use in parallel with ipz_qpageit_get_inc()
+ * warning unpredictable results may occur if steps>act_nr_of_queue_entries
+ * fix EQ page problems
+ */
+void *ipz_qeit_eq_get_inc(struct ipz_queue *queue);
+
+/*
+ * return current Event Queue Entry, increment Queue Entry iterator
+ * by one step in struct ipz_queue if valid, will wrap in ringbuffer
+ * returns address (kv) of Queue Entry BEFORE increment
+ * returns 0 and does not increment, if wrong valid state
+ * warning don't use in parallel with ipz_queue_QPageit_get_inc()
+ * warning unpredictable results may occur if steps>act_nr_of_queue_entries
+ */
+static inline void *ipz_eqit_eq_get_inc_valid(struct ipz_queue *queue)
+{
+	void *ret = ipz_qeit_get(queue);
+	u32 qe = *(u8 *) ret;
+	if ((qe >> 7) != (queue->toggle_state & 1))
+		return NULL;
+	ipz_qeit_eq_get_inc(queue); /* this is a good one */
+	return ret;
+}
+
+/* returns address (GX) of first queue entry */
+static inline u64 ipz_qpt_get_firstpage(struct ipz_qpt *qpt)
+{
+	return be64_to_cpu(qpt->qpts[0]);
+}
+
+/* returns address (kv) of first page of queue page table */
+static inline void *ipz_qpt_get_qpt(struct ipz_qpt *qpt)
+{
+	return qpt->qpts;
+}
+
+#endif				/* __IPZ_PT_FN_H__ */
diff --git a/drivers/infiniband/hw/ipath/Kconfig b/drivers/infiniband/hw/ipath/Kconfig
index 1db9489f1e8..574a678e7fd 100644
--- a/drivers/infiniband/hw/ipath/Kconfig
+++ b/drivers/infiniband/hw/ipath/Kconfig
@@ -1,16 +1,9 @@
-config IPATH_CORE
-	tristate "QLogic InfiniPath Driver"
-	depends on 64BIT && PCI_MSI && NET
-	---help---
-	This is a low-level driver for QLogic InfiniPath host channel
-	adapters (HCAs) based on the HT-400 and PE-800 chips.
-
 config INFINIBAND_IPATH
-	tristate "QLogic InfiniPath Verbs Driver"
-	depends on IPATH_CORE && INFINIBAND
+	tristate "QLogic InfiniPath Driver"
+	depends on PCI_MSI && 64BIT && INFINIBAND
 	---help---
-	This is a driver that provides InfiniBand verbs support for
-	QLogic InfiniPath host channel adapters (HCAs).  This
-	allows these devices to be used with both kernel upper level
-	protocols such as IP-over-InfiniBand as well as with userspace
-	applications (in conjunction with InfiniBand userspace access).
+	This is a driver for QLogic InfiniPath host channel adapters,
+	including InfiniBand verbs support.  This driver allows these
+	devices to be used with both kernel upper level protocols such
+	as IP-over-InfiniBand as well as with userspace applications
+	(in conjunction with InfiniBand userspace access).
diff --git a/drivers/infiniband/hw/ipath/Makefile b/drivers/infiniband/hw/ipath/Makefile
index b0bf7286413..5e29cb0095e 100644
--- a/drivers/infiniband/hw/ipath/Makefile
+++ b/drivers/infiniband/hw/ipath/Makefile
@@ -1,36 +1,35 @@
 EXTRA_CFLAGS += -DIPATH_IDSTR='"QLogic kernel.org driver"' \
 	-DIPATH_KERN_TYPE=0
 
-obj-$(CONFIG_IPATH_CORE) += ipath_core.o
 obj-$(CONFIG_INFINIBAND_IPATH) += ib_ipath.o
 
-ipath_core-y := \
+ib_ipath-y := \
+	ipath_cq.o \
 	ipath_diag.o \
 	ipath_driver.o \
 	ipath_eeprom.o \
 	ipath_file_ops.o \
 	ipath_fs.o \
-	ipath_ht400.o \
+	ipath_iba6110.o \
+	ipath_iba6120.o \
 	ipath_init_chip.o \
 	ipath_intr.o \
-	ipath_layer.o \
-	ipath_pe800.o \
-	ipath_stats.o \
-	ipath_sysfs.o \
-	ipath_user_pages.o
-
-ipath_core-$(CONFIG_X86_64) += ipath_wc_x86_64.o
-
-ib_ipath-y := \
-	ipath_cq.o \
 	ipath_keys.o \
+	ipath_layer.o \
 	ipath_mad.o \
+	ipath_mmap.o \
 	ipath_mr.o \
 	ipath_qp.o \
 	ipath_rc.o \
 	ipath_ruc.o \
 	ipath_srq.o \
+	ipath_stats.o \
+	ipath_sysfs.o \
 	ipath_uc.o \
 	ipath_ud.o \
-	ipath_verbs.o \
-	ipath_verbs_mcast.o
+	ipath_user_pages.o \
+	ipath_verbs_mcast.o \
+	ipath_verbs.o
+
+ib_ipath-$(CONFIG_X86_64) += ipath_wc_x86_64.o
+ib_ipath-$(CONFIG_PPC64) += ipath_wc_ppc64.o
diff --git a/drivers/infiniband/hw/ipath/ipath_common.h b/drivers/infiniband/hw/ipath/ipath_common.h
index 062bd392e7e..f577905e3ac 100644
--- a/drivers/infiniband/hw/ipath/ipath_common.h
+++ b/drivers/infiniband/hw/ipath/ipath_common.h
@@ -106,9 +106,9 @@ struct infinipath_stats {
 	__u64 sps_ether_spkts;
 	/* number of "ethernet" packets received by driver */
 	__u64 sps_ether_rpkts;
-	/* number of SMA packets sent by driver */
+	/* number of SMA packets sent by driver. Obsolete. */
 	__u64 sps_sma_spkts;
-	/* number of SMA packets received by driver */
+	/* number of SMA packets received by driver. Obsolete. */
 	__u64 sps_sma_rpkts;
 	/* number of times all ports rcvhdrq was full and packet dropped */
 	__u64 sps_hdrqfull;
@@ -138,7 +138,7 @@ struct infinipath_stats {
 	__u64 sps_pageunlocks;
 	/*
 	 * Number of packets dropped in kernel other than errors (ether
-	 * packets if ipath not configured, sma/mad, etc.)
+	 * packets if ipath not configured, etc.)
 	 */
 	__u64 sps_krdrops;
 	/* pad for future growth */
@@ -153,8 +153,6 @@ struct infinipath_stats {
 #define IPATH_STATUS_DISABLED      0x2	/* hardware disabled */
 /* Device has been disabled via admin request */
 #define IPATH_STATUS_ADMIN_DISABLED    0x4
-#define IPATH_STATUS_OIB_SMA       0x8	/* ipath_mad kernel SMA running */
-#define IPATH_STATUS_SMA          0x10	/* user SMA running */
 /* Chip has been found and initted */
 #define IPATH_STATUS_CHIP_PRESENT 0x20
 /* IB link is at ACTIVE, usable for data traffic */
@@ -465,12 +463,11 @@ struct __ipath_sendpkt {
 	struct ipath_iovec sps_iov[4];
 };
 
-/* Passed into SMA special file's ->read and ->write methods. */
-struct ipath_sma_pkt
-{
-	__u32 unit;	/* unit on which to send packet */
-	__u64 data;	/* address of payload in userspace */
-	__u32 len;	/* length of payload */
+/* Passed into diag data special file's ->write method. */
+struct ipath_diag_pkt {
+	__u32 unit;
+	__u64 data;
+	__u32 len;
 };
 
 /*
diff --git a/drivers/infiniband/hw/ipath/ipath_cq.c b/drivers/infiniband/hw/ipath/ipath_cq.c
index 3efee341c9b..049221bc590 100644
--- a/drivers/infiniband/hw/ipath/ipath_cq.c
+++ b/drivers/infiniband/hw/ipath/ipath_cq.c
@@ -42,20 +42,28 @@
  * @entry: work completion entry to add
  * @sig: true if @entry is a solicitated entry
  *
- * This may be called with one of the qp->s_lock or qp->r_rq.lock held.
+ * This may be called with qp->s_lock held.
  */
 void ipath_cq_enter(struct ipath_cq *cq, struct ib_wc *entry, int solicited)
 {
+	struct ipath_cq_wc *wc = cq->queue;
 	unsigned long flags;
+	u32 head;
 	u32 next;
 
 	spin_lock_irqsave(&cq->lock, flags);
 
-	if (cq->head == cq->ibcq.cqe)
+	/*
+	 * Note that the head pointer might be writable by user processes.
+	 * Take care to verify it is a sane value.
+	 */
+	head = wc->head;
+	if (head >= (unsigned) cq->ibcq.cqe) {
+		head = cq->ibcq.cqe;
 		next = 0;
-	else
-		next = cq->head + 1;
-	if (unlikely(next == cq->tail)) {
+	} else
+		next = head + 1;
+	if (unlikely(next == wc->tail)) {
 		spin_unlock_irqrestore(&cq->lock, flags);
 		if (cq->ibcq.event_handler) {
 			struct ib_event ev;
@@ -67,8 +75,8 @@ void ipath_cq_enter(struct ipath_cq *cq, struct ib_wc *entry, int solicited)
 		}
 		return;
 	}
-	cq->queue[cq->head] = *entry;
-	cq->head = next;
+	wc->queue[head] = *entry;
+	wc->head = next;
 
 	if (cq->notify == IB_CQ_NEXT_COMP ||
 	    (cq->notify == IB_CQ_SOLICITED && solicited)) {
@@ -101,19 +109,20 @@ void ipath_cq_enter(struct ipath_cq *cq, struct ib_wc *entry, int solicited)
 int ipath_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry)
 {
 	struct ipath_cq *cq = to_icq(ibcq);
+	struct ipath_cq_wc *wc = cq->queue;
 	unsigned long flags;
 	int npolled;
 
 	spin_lock_irqsave(&cq->lock, flags);
 
 	for (npolled = 0; npolled < num_entries; ++npolled, ++entry) {
-		if (cq->tail == cq->head)
+		if (wc->tail == wc->head)
 			break;
-		*entry = cq->queue[cq->tail];
-		if (cq->tail == cq->ibcq.cqe)
-			cq->tail = 0;
+		*entry = wc->queue[wc->tail];
+		if (wc->tail >= cq->ibcq.cqe)
+			wc->tail = 0;
 		else
-			cq->tail++;
+			wc->tail++;
 	}
 
 	spin_unlock_irqrestore(&cq->lock, flags);
@@ -160,38 +169,74 @@ struct ib_cq *ipath_create_cq(struct ib_device *ibdev, int entries,
 {
 	struct ipath_ibdev *dev = to_idev(ibdev);
 	struct ipath_cq *cq;
-	struct ib_wc *wc;
+	struct ipath_cq_wc *wc;
 	struct ib_cq *ret;
 
-	if (entries > ib_ipath_max_cqes) {
+	if (entries < 1 || entries > ib_ipath_max_cqes) {
 		ret = ERR_PTR(-EINVAL);
-		goto bail;
+		goto done;
 	}
 
 	if (dev->n_cqs_allocated == ib_ipath_max_cqs) {
 		ret = ERR_PTR(-ENOMEM);
-		goto bail;
+		goto done;
 	}
 
-	/*
-	 * Need to use vmalloc() if we want to support large #s of
-	 * entries.
-	 */
+	/* Allocate the completion queue structure. */
 	cq = kmalloc(sizeof(*cq), GFP_KERNEL);
 	if (!cq) {
 		ret = ERR_PTR(-ENOMEM);
-		goto bail;
+		goto done;
 	}
 
 	/*
-	 * Need to use vmalloc() if we want to support large #s of entries.
+	 * Allocate the completion queue entries and head/tail pointers.
+	 * This is allocated separately so that it can be resized and
+	 * also mapped into user space.
+	 * We need to use vmalloc() in order to support mmap and large
+	 * numbers of entries.
 	 */
-	wc = vmalloc(sizeof(*wc) * (entries + 1));
+	wc = vmalloc_user(sizeof(*wc) + sizeof(struct ib_wc) * entries);
 	if (!wc) {
-		kfree(cq);
 		ret = ERR_PTR(-ENOMEM);
-		goto bail;
+		goto bail_cq;
 	}
+
+	/*
+	 * Return the address of the WC as the offset to mmap.
+	 * See ipath_mmap() for details.
+	 */
+	if (udata && udata->outlen >= sizeof(__u64)) {
+		struct ipath_mmap_info *ip;
+		__u64 offset = (__u64) wc;
+		int err;
+
+		err = ib_copy_to_udata(udata, &offset, sizeof(offset));
+		if (err) {
+			ret = ERR_PTR(err);
+			goto bail_wc;
+		}
+
+		/* Allocate info for ipath_mmap(). */
+		ip = kmalloc(sizeof(*ip), GFP_KERNEL);
+		if (!ip) {
+			ret = ERR_PTR(-ENOMEM);
+			goto bail_wc;
+		}
+		cq->ip = ip;
+		ip->context = context;
+		ip->obj = wc;
+		kref_init(&ip->ref);
+		ip->mmap_cnt = 0;
+		ip->size = PAGE_ALIGN(sizeof(*wc) +
+				      sizeof(struct ib_wc) * entries);
+		spin_lock_irq(&dev->pending_lock);
+		ip->next = dev->pending_mmaps;
+		dev->pending_mmaps = ip;
+		spin_unlock_irq(&dev->pending_lock);
+	} else
+		cq->ip = NULL;
+
 	/*
 	 * ib_create_cq() will initialize cq->ibcq except for cq->ibcq.cqe.
 	 * The number of entries should be >= the number requested or return
@@ -202,15 +247,22 @@ struct ib_cq *ipath_create_cq(struct ib_device *ibdev, int entries,
 	cq->triggered = 0;
 	spin_lock_init(&cq->lock);
 	tasklet_init(&cq->comptask, send_complete, (unsigned long)cq);
-	cq->head = 0;
-	cq->tail = 0;
+	wc->head = 0;
+	wc->tail = 0;
 	cq->queue = wc;
 
 	ret = &cq->ibcq;
 
 	dev->n_cqs_allocated++;
+	goto done;
 
-bail:
+bail_wc:
+	vfree(wc);
+
+bail_cq:
+	kfree(cq);
+
+done:
 	return ret;
 }
 
@@ -229,7 +281,10 @@ int ipath_destroy_cq(struct ib_cq *ibcq)
 
 	tasklet_kill(&cq->comptask);
 	dev->n_cqs_allocated--;
-	vfree(cq->queue);
+	if (cq->ip)
+		kref_put(&cq->ip->ref, ipath_release_mmap_info);
+	else
+		vfree(cq->queue);
 	kfree(cq);
 
 	return 0;
@@ -253,7 +308,7 @@ int ipath_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify notify)
 	spin_lock_irqsave(&cq->lock, flags);
 	/*
 	 * Don't change IB_CQ_NEXT_COMP to IB_CQ_SOLICITED but allow
-	 * any other transitions.
+	 * any other transitions (see C11-31 and C11-32 in ch. 11.4.2.2).
 	 */
 	if (cq->notify != IB_CQ_NEXT_COMP)
 		cq->notify = notify;
@@ -264,46 +319,86 @@ int ipath_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify notify)
 int ipath_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata)
 {
 	struct ipath_cq *cq = to_icq(ibcq);
-	struct ib_wc *wc, *old_wc;
-	u32 n;
+	struct ipath_cq_wc *old_wc = cq->queue;
+	struct ipath_cq_wc *wc;
+	u32 head, tail, n;
 	int ret;
 
+	if (cqe < 1 || cqe > ib_ipath_max_cqes) {
+		ret = -EINVAL;
+		goto bail;
+	}
+
 	/*
 	 * Need to use vmalloc() if we want to support large #s of entries.
 	 */
-	wc = vmalloc(sizeof(*wc) * (cqe + 1));
+	wc = vmalloc_user(sizeof(*wc) + sizeof(struct ib_wc) * cqe);
 	if (!wc) {
 		ret = -ENOMEM;
 		goto bail;
 	}
 
+	/*
+	 * Return the address of the WC as the offset to mmap.
+	 * See ipath_mmap() for details.
+	 */
+	if (udata && udata->outlen >= sizeof(__u64)) {
+		__u64 offset = (__u64) wc;
+
+		ret = ib_copy_to_udata(udata, &offset, sizeof(offset));
+		if (ret)
+			goto bail;
+	}
+
 	spin_lock_irq(&cq->lock);
-	if (cq->head < cq->tail)
-		n = cq->ibcq.cqe + 1 + cq->head - cq->tail;
+	/*
+	 * Make sure head and tail are sane since they
+	 * might be user writable.
+	 */
+	head = old_wc->head;
+	if (head > (u32) cq->ibcq.cqe)
+		head = (u32) cq->ibcq.cqe;
+	tail = old_wc->tail;
+	if (tail > (u32) cq->ibcq.cqe)
+		tail = (u32) cq->ibcq.cqe;
+	if (head < tail)
+		n = cq->ibcq.cqe + 1 + head - tail;
 	else
-		n = cq->head - cq->tail;
+		n = head - tail;
 	if (unlikely((u32)cqe < n)) {
 		spin_unlock_irq(&cq->lock);
 		vfree(wc);
 		ret = -EOVERFLOW;
 		goto bail;
 	}
-	for (n = 0; cq->tail != cq->head; n++) {
-		wc[n] = cq->queue[cq->tail];
-		if (cq->tail == cq->ibcq.cqe)
-			cq->tail = 0;
+	for (n = 0; tail != head; n++) {
+		wc->queue[n] = old_wc->queue[tail];
+		if (tail == (u32) cq->ibcq.cqe)
+			tail = 0;
 		else
-			cq->tail++;
+			tail++;
 	}
 	cq->ibcq.cqe = cqe;
-	cq->head = n;
-	cq->tail = 0;
-	old_wc = cq->queue;
+	wc->head = n;
+	wc->tail = 0;
 	cq->queue = wc;
 	spin_unlock_irq(&cq->lock);
 
 	vfree(old_wc);
 
+	if (cq->ip) {
+		struct ipath_ibdev *dev = to_idev(ibcq->device);
+		struct ipath_mmap_info *ip = cq->ip;
+
+		ip->obj = wc;
+		ip->size = PAGE_ALIGN(sizeof(*wc) +
+				      sizeof(struct ib_wc) * cqe);
+		spin_lock_irq(&dev->pending_lock);
+		ip->next = dev->pending_mmaps;
+		dev->pending_mmaps = ip;
+		spin_unlock_irq(&dev->pending_lock);
+	}
+
 	ret = 0;
 
 bail:
diff --git a/drivers/infiniband/hw/ipath/ipath_debug.h b/drivers/infiniband/hw/ipath/ipath_debug.h
index f415beda0d3..df69f0d80b8 100644
--- a/drivers/infiniband/hw/ipath/ipath_debug.h
+++ b/drivers/infiniband/hw/ipath/ipath_debug.h
@@ -60,7 +60,6 @@
 #define __IPATH_USER_SEND   0x1000	/* use user mode send */
 #define __IPATH_KERNEL_SEND 0x2000	/* use kernel mode send */
 #define __IPATH_EPKTDBG     0x4000	/* print ethernet packet data */
-#define __IPATH_SMADBG      0x8000	/* sma packet debug */
 #define __IPATH_IPATHDBG    0x10000	/* Ethernet (IPATH) gen debug */
 #define __IPATH_IPATHWARN   0x20000	/* Ethernet (IPATH) warnings */
 #define __IPATH_IPATHERR    0x40000	/* Ethernet (IPATH) errors */
@@ -84,7 +83,6 @@
 /* print mmap/nopage stuff, not using VDBG any more */
 #define __IPATH_MMDBG     0x0
 #define __IPATH_EPKTDBG   0x0	/* print ethernet packet data */
-#define __IPATH_SMADBG    0x0   /* process startup (init)/exit messages */
 #define __IPATH_IPATHDBG  0x0	/* Ethernet (IPATH) table dump on */
 #define __IPATH_IPATHWARN 0x0	/* Ethernet (IPATH) warnings on   */
 #define __IPATH_IPATHERR  0x0	/* Ethernet (IPATH) errors on   */
diff --git a/drivers/infiniband/hw/ipath/ipath_diag.c b/drivers/infiniband/hw/ipath/ipath_diag.c
index 147dd89e21c..28b6b46c106 100644
--- a/drivers/infiniband/hw/ipath/ipath_diag.c
+++ b/drivers/infiniband/hw/ipath/ipath_diag.c
@@ -41,11 +41,11 @@
  * through the /sys/bus/pci resource mmap interface.
  */
 
+#include <linux/io.h>
 #include <linux/pci.h>
 #include <asm/uaccess.h>
 
 #include "ipath_kernel.h"
-#include "ipath_layer.h"
 #include "ipath_common.h"
 
 int ipath_diag_inuse;
@@ -274,6 +274,158 @@ bail:
 	return ret;
 }
 
+static ssize_t ipath_diagpkt_write(struct file *fp,
+				   const char __user *data,
+				   size_t count, loff_t *off);
+
+static struct file_operations diagpkt_file_ops = {
+	.owner = THIS_MODULE,
+	.write = ipath_diagpkt_write,
+};
+
+static struct cdev *diagpkt_cdev;
+static struct class_device *diagpkt_class_dev;
+
+int __init ipath_diagpkt_add(void)
+{
+	return ipath_cdev_init(IPATH_DIAGPKT_MINOR,
+			       "ipath_diagpkt", &diagpkt_file_ops,
+			       &diagpkt_cdev, &diagpkt_class_dev);
+}
+
+void __exit ipath_diagpkt_remove(void)
+{
+	ipath_cdev_cleanup(&diagpkt_cdev, &diagpkt_class_dev);
+}
+
+/**
+ * ipath_diagpkt_write - write an IB packet
+ * @fp: the diag data device file pointer
+ * @data: ipath_diag_pkt structure saying where to get the packet
+ * @count: size of data to write
+ * @off: unused by this code
+ */
+static ssize_t ipath_diagpkt_write(struct file *fp,
+				   const char __user *data,
+				   size_t count, loff_t *off)
+{
+	u32 __iomem *piobuf;
+	u32 plen, clen, pbufn;
+	struct ipath_diag_pkt dp;
+	u32 *tmpbuf = NULL;
+	struct ipath_devdata *dd;
+	ssize_t ret = 0;
+	u64 val;
+
+	if (count < sizeof(dp)) {
+		ret = -EINVAL;
+		goto bail;
+	}
+
+	if (copy_from_user(&dp, data, sizeof(dp))) {
+		ret = -EFAULT;
+		goto bail;
+	}
+
+	/* send count must be an exact number of dwords */
+	if (dp.len & 3) {
+		ret = -EINVAL;
+		goto bail;
+	}
+
+	clen = dp.len >> 2;
+
+	dd = ipath_lookup(dp.unit);
+	if (!dd || !(dd->ipath_flags & IPATH_PRESENT) ||
+	    !dd->ipath_kregbase) {
+		ipath_cdbg(VERBOSE, "illegal unit %u for diag data send\n",
+			   dp.unit);
+		ret = -ENODEV;
+		goto bail;
+	}
+
+	if (ipath_diag_inuse && !diag_set_link &&
+	    !(dd->ipath_flags & IPATH_LINKACTIVE)) {
+		diag_set_link = 1;
+		ipath_cdbg(VERBOSE, "Trying to set to set link active for "
+			   "diag pkt\n");
+		ipath_set_linkstate(dd, IPATH_IB_LINKARM);
+		ipath_set_linkstate(dd, IPATH_IB_LINKACTIVE);
+	}
+
+	if (!(dd->ipath_flags & IPATH_INITTED)) {
+		/* no hardware, freeze, etc. */
+		ipath_cdbg(VERBOSE, "unit %u not usable\n", dd->ipath_unit);
+		ret = -ENODEV;
+		goto bail;
+	}
+	val = dd->ipath_lastibcstat & IPATH_IBSTATE_MASK;
+	if (val != IPATH_IBSTATE_INIT && val != IPATH_IBSTATE_ARM &&
+	    val != IPATH_IBSTATE_ACTIVE) {
+		ipath_cdbg(VERBOSE, "unit %u not ready (state %llx)\n",
+			   dd->ipath_unit, (unsigned long long) val);
+		ret = -EINVAL;
+		goto bail;
+	}
+
+	/* need total length before first word written */
+	/* +1 word is for the qword padding */
+	plen = sizeof(u32) + dp.len;
+
+	if ((plen + 4) > dd->ipath_ibmaxlen) {
+		ipath_dbg("Pkt len 0x%x > ibmaxlen %x\n",
+			  plen - 4, dd->ipath_ibmaxlen);
+		ret = -EINVAL;
+		goto bail;	/* before writing pbc */
+	}
+	tmpbuf = vmalloc(plen);
+	if (!tmpbuf) {
+		dev_info(&dd->pcidev->dev, "Unable to allocate tmp buffer, "
+			 "failing\n");
+		ret = -ENOMEM;
+		goto bail;
+	}
+
+	if (copy_from_user(tmpbuf,
+			   (const void __user *) (unsigned long) dp.data,
+			   dp.len)) {
+		ret = -EFAULT;
+		goto bail;
+	}
+
+	piobuf = ipath_getpiobuf(dd, &pbufn);
+	if (!piobuf) {
+		ipath_cdbg(VERBOSE, "No PIO buffers avail unit for %u\n",
+			   dd->ipath_unit);
+		ret = -EBUSY;
+		goto bail;
+	}
+
+	plen >>= 2;		/* in dwords */
+
+	if (ipath_debug & __IPATH_PKTDBG)
+		ipath_cdbg(VERBOSE, "unit %u 0x%x+1w pio%d\n",
+			   dd->ipath_unit, plen - 1, pbufn);
+
+	/* we have to flush after the PBC for correctness on some cpus
+	 * or WC buffer can be written out of order */
+	writeq(plen, piobuf);
+	ipath_flush_wc();
+	/* copy all by the trigger word, then flush, so it's written
+	 * to chip before trigger word, then write trigger word, then
+	 * flush again, so packet is sent. */
+	__iowrite32_copy(piobuf + 2, tmpbuf, clen - 1);
+	ipath_flush_wc();
+	__raw_writel(tmpbuf[clen - 1], piobuf + clen + 1);
+	ipath_flush_wc();
+
+	ret = sizeof(dp);
+
+bail:
+	vfree(tmpbuf);
+	return ret;
+}
+
 static int ipath_diag_release(struct inode *in, struct file *fp)
 {
 	mutex_lock(&ipath_mutex);
diff --git a/drivers/infiniband/hw/ipath/ipath_driver.c b/drivers/infiniband/hw/ipath/ipath_driver.c
index f98518d912b..2108466c7e3 100644
--- a/drivers/infiniband/hw/ipath/ipath_driver.c
+++ b/drivers/infiniband/hw/ipath/ipath_driver.c
@@ -39,7 +39,7 @@
 #include <linux/vmalloc.h>
 
 #include "ipath_kernel.h"
-#include "ipath_layer.h"
+#include "ipath_verbs.h"
 #include "ipath_common.h"
 
 static void ipath_update_pio_bufs(struct ipath_devdata *);
@@ -51,8 +51,6 @@ const char *ipath_get_unit_name(int unit)
 	return iname;
 }
 
-EXPORT_SYMBOL_GPL(ipath_get_unit_name);
-
 #define DRIVER_LOAD_MSG "QLogic " IPATH_DRV_NAME " loaded: "
 #define PFX IPATH_DRV_NAME ": "
 
@@ -60,13 +58,13 @@ EXPORT_SYMBOL_GPL(ipath_get_unit_name);
  * The size has to be longer than this string, so we can append
  * board/chip information to it in the init code.
  */
-const char ipath_core_version[] = IPATH_IDSTR "\n";
+const char ib_ipath_version[] = IPATH_IDSTR "\n";
 
 static struct idr unit_table;
 DEFINE_SPINLOCK(ipath_devs_lock);
 LIST_HEAD(ipath_dev_list);
 
-wait_queue_head_t ipath_sma_state_wait;
+wait_queue_head_t ipath_state_wait;
 
 unsigned ipath_debug = __IPATH_INFO;
 
@@ -403,10 +401,10 @@ static int __devinit ipath_init_one(struct pci_dev *pdev,
 	/* setup the chip-specific functions, as early as possible. */
 	switch (ent->device) {
 	case PCI_DEVICE_ID_INFINIPATH_HT:
-		ipath_init_ht400_funcs(dd);
+		ipath_init_iba6110_funcs(dd);
 		break;
 	case PCI_DEVICE_ID_INFINIPATH_PE800:
-		ipath_init_pe800_funcs(dd);
+		ipath_init_iba6120_funcs(dd);
 		break;
 	default:
 		ipath_dev_err(dd, "Found unknown QLogic deviceid 0x%x, "
@@ -440,7 +438,13 @@ static int __devinit ipath_init_one(struct pci_dev *pdev,
 	}
 	dd->ipath_pcirev = rev;
 
+#if defined(__powerpc__)
+	/* There isn't a generic way to specify writethrough mappings */
+	dd->ipath_kregbase = __ioremap(addr, len,
+		(_PAGE_NO_CACHE|_PAGE_WRITETHRU));
+#else
 	dd->ipath_kregbase = ioremap_nocache(addr, len);
+#endif
 
 	if (!dd->ipath_kregbase) {
 		ipath_dbg("Unable to map io addr %llx to kvirt, failing\n",
@@ -503,7 +507,7 @@ static int __devinit ipath_init_one(struct pci_dev *pdev,
 	ipathfs_add_device(dd);
 	ipath_user_add(dd);
 	ipath_diag_add(dd);
-	ipath_layer_add(dd);
+	ipath_register_ib_device(dd);
 
 	goto bail;
 
@@ -532,7 +536,7 @@ static void __devexit ipath_remove_one(struct pci_dev *pdev)
 		return;
 
 	dd = pci_get_drvdata(pdev);
-	ipath_layer_remove(dd);
+	ipath_unregister_ib_device(dd->verbs_dev);
 	ipath_diag_remove(dd);
 	ipath_user_remove(dd);
 	ipathfs_remove_device(dd);
@@ -607,21 +611,23 @@ void ipath_disarm_piobufs(struct ipath_devdata *dd, unsigned first,
  *
  * wait up to msecs milliseconds for IB link state change to occur for
  * now, take the easy polling route.  Currently used only by
- * ipath_layer_set_linkstate.  Returns 0 if state reached, otherwise
+ * ipath_set_linkstate.  Returns 0 if state reached, otherwise
  * -ETIMEDOUT state can have multiple states set, for any of several
  * transitions.
  */
-int ipath_wait_linkstate(struct ipath_devdata *dd, u32 state, int msecs)
+static int ipath_wait_linkstate(struct ipath_devdata *dd, u32 state,
+				int msecs)
 {
-	dd->ipath_sma_state_wanted = state;
-	wait_event_interruptible_timeout(ipath_sma_state_wait,
+	dd->ipath_state_wanted = state;
+	wait_event_interruptible_timeout(ipath_state_wait,
 					 (dd->ipath_flags & state),
 					 msecs_to_jiffies(msecs));
-	dd->ipath_sma_state_wanted = 0;
+	dd->ipath_state_wanted = 0;
 
 	if (!(dd->ipath_flags & state)) {
 		u64 val;
-		ipath_cdbg(SMA, "Didn't reach linkstate %s within %u ms\n",
+		ipath_cdbg(VERBOSE, "Didn't reach linkstate %s within %u"
+			   " ms\n",
 			   /* test INIT ahead of DOWN, both can be set */
 			   (state & IPATH_LINKINIT) ? "INIT" :
 			   ((state & IPATH_LINKDOWN) ? "DOWN" :
@@ -807,58 +813,6 @@ bail:
 	return skb;
 }
 
-/**
- * ipath_rcv_layer - receive a packet for the layered (ethernet) driver
- * @dd: the infinipath device
- * @etail: the sk_buff number
- * @tlen: the total packet length
- * @hdr: the ethernet header
- *
- * Separate routine for better overall optimization
- */
-static void ipath_rcv_layer(struct ipath_devdata *dd, u32 etail,
-			    u32 tlen, struct ether_header *hdr)
-{
-	u32 elen;
-	u8 pad, *bthbytes;
-	struct sk_buff *skb, *nskb;
-
-	if (dd->ipath_port0_skbs &&
-			hdr->sub_opcode == IPATH_ITH4X_OPCODE_ENCAP) {
-		/*
-		 * Allocate a new sk_buff to replace the one we give
-		 * to the network stack.
-		 */
-		nskb = ipath_alloc_skb(dd, GFP_ATOMIC);
-		if (!nskb) {
-			/* count OK packets that we drop */
-			ipath_stats.sps_krdrops++;
-			return;
-		}
-
-		bthbytes = (u8 *) hdr->bth;
-		pad = (bthbytes[1] >> 4) & 3;
-		/* +CRC32 */
-		elen = tlen - (sizeof(*hdr) + pad + sizeof(u32));
-
-		skb = dd->ipath_port0_skbs[etail];
-		dd->ipath_port0_skbs[etail] = nskb;
-		skb_put(skb, elen);
-
-		dd->ipath_f_put_tid(dd, etail + (u64 __iomem *)
-				    ((char __iomem *) dd->ipath_kregbase
-				     + dd->ipath_rcvegrbase), 0,
-				    virt_to_phys(nskb->data));
-
-		__ipath_layer_rcv(dd, hdr, skb);
-
-		/* another ether packet received */
-		ipath_stats.sps_ether_rpkts++;
-	}
-	else if (hdr->sub_opcode == IPATH_ITH4X_OPCODE_LID_ARP)
-		__ipath_layer_rcv_lid(dd, hdr);
-}
-
 static void ipath_rcv_hdrerr(struct ipath_devdata *dd,
 			     u32 eflags,
 			     u32 l,
@@ -972,26 +926,17 @@ reloop:
 		if (unlikely(eflags))
 			ipath_rcv_hdrerr(dd, eflags, l, etail, rc);
 		else if (etype == RCVHQ_RCV_TYPE_NON_KD) {
-				int ret = __ipath_verbs_rcv(dd, rc + 1,
-							    ebuf, tlen);
-				if (ret == -ENODEV)
-					ipath_cdbg(VERBOSE,
-						   "received IB packet, "
-						   "not SMA (QP=%x)\n", qp);
-				if (dd->ipath_lli_counter)
-					dd->ipath_lli_counter--;
-
-		} else if (etype == RCVHQ_RCV_TYPE_EAGER) {
-			if (qp == IPATH_KD_QP &&
-			    bthbytes[0] == ipath_layer_rcv_opcode &&
-			    ebuf)
-				ipath_rcv_layer(dd, etail, tlen,
-						(struct ether_header *)hdr);
-			else
-				ipath_cdbg(PKT, "typ %x, opcode %x (eager, "
-					   "qp=%x), len %x; ignored\n",
-					   etype, bthbytes[0], qp, tlen);
+			ipath_ib_rcv(dd->verbs_dev, rc + 1, ebuf, tlen);
+			if (dd->ipath_lli_counter)
+				dd->ipath_lli_counter--;
+			ipath_cdbg(PKT, "typ %x, opcode %x (eager, "
+				   "qp=%x), len %x; ignored\n",
+				   etype, bthbytes[0], qp, tlen);
 		}
+		else if (etype == RCVHQ_RCV_TYPE_EAGER)
+			ipath_cdbg(PKT, "typ %x, opcode %x (eager, "
+				   "qp=%x), len %x; ignored\n",
+				   etype, bthbytes[0], qp, tlen);
 		else if (etype == RCVHQ_RCV_TYPE_EXPECTED)
 			ipath_dbg("Bug: Expected TID, opcode %x; ignored\n",
 				  be32_to_cpu(hdr->bth[0]) & 0xff);
@@ -1024,7 +969,8 @@ reloop:
 		 */
 		if (l == hdrqtail || (i && !(i&0xf))) {
 			u64 lval;
-			if (l == hdrqtail) /* PE-800 interrupt only on last */
+			if (l == hdrqtail)
+				/* request IBA6120 interrupt only on last */
 				lval = dd->ipath_rhdrhead_intr_off | l;
 			else
 				lval = l;
@@ -1038,7 +984,7 @@ reloop:
 	}
 
 	if (!dd->ipath_rhdrhead_intr_off && !reloop) {
-		/* HT-400 workaround; we can have a race clearing chip
+		/* IBA6110 workaround; we can have a race clearing chip
 		 * interrupt with another interrupt about to be delivered,
 		 * and can clear it before it is delivered on the GPIO
 		 * workaround.  By doing the extra check here for the
@@ -1211,7 +1157,7 @@ int ipath_setrcvhdrsize(struct ipath_devdata *dd, unsigned rhdrsize)
  *
  * do appropriate marking as busy, etc.
  * returns buffer number if one found (>=0), negative number is error.
- * Used by ipath_sma_send_pkt and ipath_layer_send
+ * Used by ipath_layer_send
  */
 u32 __iomem *ipath_getpiobuf(struct ipath_devdata *dd, u32 * pbufnum)
 {
@@ -1317,13 +1263,6 @@ rescan:
 		goto bail;
 	}
 
-	if (updated)
-		/*
-		 * ran out of bufs, now some (at least this one we just
-		 * got) are now available, so tell the layered driver.
-		 */
-		__ipath_layer_intr(dd, IPATH_LAYER_INT_SEND_CONTINUE);
-
 	/*
 	 * set next starting place.  Since it's just an optimization,
 	 * it doesn't matter who wins on this, so no locking
@@ -1500,7 +1439,7 @@ int ipath_waitfor_mdio_cmdready(struct ipath_devdata *dd)
 	return ret;
 }
 
-void ipath_set_ib_lstate(struct ipath_devdata *dd, int which)
+static void ipath_set_ib_lstate(struct ipath_devdata *dd, int which)
 {
 	static const char *what[4] = {
 		[0] = "DOWN",
@@ -1511,7 +1450,7 @@ void ipath_set_ib_lstate(struct ipath_devdata *dd, int which)
 	int linkcmd = (which >> INFINIPATH_IBCC_LINKCMD_SHIFT) &
 			INFINIPATH_IBCC_LINKCMD_MASK;
 
-	ipath_cdbg(SMA, "Trying to move unit %u to %s, current ltstate "
+	ipath_cdbg(VERBOSE, "Trying to move unit %u to %s, current ltstate "
 		   "is %s\n", dd->ipath_unit,
 		   what[linkcmd],
 		   ipath_ibcstatus_str[
@@ -1520,7 +1459,7 @@ void ipath_set_ib_lstate(struct ipath_devdata *dd, int which)
 			    INFINIPATH_IBCS_LINKTRAININGSTATE_SHIFT) &
 			   INFINIPATH_IBCS_LINKTRAININGSTATE_MASK]);
 	/* flush all queued sends when going to DOWN or INIT, to be sure that
-	 * they don't block SMA and other MAD packets */
+	 * they don't block MAD packets */
 	if (!linkcmd || linkcmd == INFINIPATH_IBCC_LINKCMD_INIT) {
 		ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
 				 INFINIPATH_S_ABORT);
@@ -1534,6 +1473,180 @@ void ipath_set_ib_lstate(struct ipath_devdata *dd, int which)
 			 dd->ipath_ibcctrl | which);
 }
 
+int ipath_set_linkstate(struct ipath_devdata *dd, u8 newstate)
+{
+	u32 lstate;
+	int ret;
+
+	switch (newstate) {
+	case IPATH_IB_LINKDOWN:
+		ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKINITCMD_POLL <<
+				    INFINIPATH_IBCC_LINKINITCMD_SHIFT);
+		/* don't wait */
+		ret = 0;
+		goto bail;
+
+	case IPATH_IB_LINKDOWN_SLEEP:
+		ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKINITCMD_SLEEP <<
+				    INFINIPATH_IBCC_LINKINITCMD_SHIFT);
+		/* don't wait */
+		ret = 0;
+		goto bail;
+
+	case IPATH_IB_LINKDOWN_DISABLE:
+		ipath_set_ib_lstate(dd,
+				    INFINIPATH_IBCC_LINKINITCMD_DISABLE <<
+				    INFINIPATH_IBCC_LINKINITCMD_SHIFT);
+		/* don't wait */
+		ret = 0;
+		goto bail;
+
+	case IPATH_IB_LINKINIT:
+		if (dd->ipath_flags & IPATH_LINKINIT) {
+			ret = 0;
+			goto bail;
+		}
+		ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKCMD_INIT <<
+				    INFINIPATH_IBCC_LINKCMD_SHIFT);
+		lstate = IPATH_LINKINIT;
+		break;
+
+	case IPATH_IB_LINKARM:
+		if (dd->ipath_flags & IPATH_LINKARMED) {
+			ret = 0;
+			goto bail;
+		}
+		if (!(dd->ipath_flags &
+		      (IPATH_LINKINIT | IPATH_LINKACTIVE))) {
+			ret = -EINVAL;
+			goto bail;
+		}
+		ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKCMD_ARMED <<
+				    INFINIPATH_IBCC_LINKCMD_SHIFT);
+		/*
+		 * Since the port can transition to ACTIVE by receiving
+		 * a non VL 15 packet, wait for either state.
+		 */
+		lstate = IPATH_LINKARMED | IPATH_LINKACTIVE;
+		break;
+
+	case IPATH_IB_LINKACTIVE:
+		if (dd->ipath_flags & IPATH_LINKACTIVE) {
+			ret = 0;
+			goto bail;
+		}
+		if (!(dd->ipath_flags & IPATH_LINKARMED)) {
+			ret = -EINVAL;
+			goto bail;
+		}
+		ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKCMD_ACTIVE <<
+				    INFINIPATH_IBCC_LINKCMD_SHIFT);
+		lstate = IPATH_LINKACTIVE;
+		break;
+
+	default:
+		ipath_dbg("Invalid linkstate 0x%x requested\n", newstate);
+		ret = -EINVAL;
+		goto bail;
+	}
+	ret = ipath_wait_linkstate(dd, lstate, 2000);
+
+bail:
+	return ret;
+}
+
+/**
+ * ipath_set_mtu - set the MTU
+ * @dd: the infinipath device
+ * @arg: the new MTU
+ *
+ * we can handle "any" incoming size, the issue here is whether we
+ * need to restrict our outgoing size.   For now, we don't do any
+ * sanity checking on this, and we don't deal with what happens to
+ * programs that are already running when the size changes.
+ * NOTE: changing the MTU will usually cause the IBC to go back to
+ * link initialize (IPATH_IBSTATE_INIT) state...
+ */
+int ipath_set_mtu(struct ipath_devdata *dd, u16 arg)
+{
+	u32 piosize;
+	int changed = 0;
+	int ret;
+
+	/*
+	 * mtu is IB data payload max.  It's the largest power of 2 less
+	 * than piosize (or even larger, since it only really controls the
+	 * largest we can receive; we can send the max of the mtu and
+	 * piosize).  We check that it's one of the valid IB sizes.
+	 */
+	if (arg != 256 && arg != 512 && arg != 1024 && arg != 2048 &&
+	    arg != 4096) {
+		ipath_dbg("Trying to set invalid mtu %u, failing\n", arg);
+		ret = -EINVAL;
+		goto bail;
+	}
+	if (dd->ipath_ibmtu == arg) {
+		ret = 0;        /* same as current */
+		goto bail;
+	}
+
+	piosize = dd->ipath_ibmaxlen;
+	dd->ipath_ibmtu = arg;
+
+	if (arg >= (piosize - IPATH_PIO_MAXIBHDR)) {
+		/* Only if it's not the initial value (or reset to it) */
+		if (piosize != dd->ipath_init_ibmaxlen) {
+			dd->ipath_ibmaxlen = piosize;
+			changed = 1;
+		}
+	} else if ((arg + IPATH_PIO_MAXIBHDR) != dd->ipath_ibmaxlen) {
+		piosize = arg + IPATH_PIO_MAXIBHDR;
+		ipath_cdbg(VERBOSE, "ibmaxlen was 0x%x, setting to 0x%x "
+			   "(mtu 0x%x)\n", dd->ipath_ibmaxlen, piosize,
+			   arg);
+		dd->ipath_ibmaxlen = piosize;
+		changed = 1;
+	}
+
+	if (changed) {
+		/*
+		 * set the IBC maxpktlength to the size of our pio
+		 * buffers in words
+		 */
+		u64 ibc = dd->ipath_ibcctrl;
+		ibc &= ~(INFINIPATH_IBCC_MAXPKTLEN_MASK <<
+			 INFINIPATH_IBCC_MAXPKTLEN_SHIFT);
+
+		piosize = piosize - 2 * sizeof(u32);    /* ignore pbc */
+		dd->ipath_ibmaxlen = piosize;
+		piosize /= sizeof(u32); /* in words */
+		/*
+		 * for ICRC, which we only send in diag test pkt mode, and
+		 * we don't need to worry about that for mtu
+		 */
+		piosize += 1;
+
+		ibc |= piosize << INFINIPATH_IBCC_MAXPKTLEN_SHIFT;
+		dd->ipath_ibcctrl = ibc;
+		ipath_write_kreg(dd, dd->ipath_kregs->kr_ibcctrl,
+				 dd->ipath_ibcctrl);
+		dd->ipath_f_tidtemplate(dd);
+	}
+
+	ret = 0;
+
+bail:
+	return ret;
+}
+
+int ipath_set_lid(struct ipath_devdata *dd, u32 arg, u8 lmc)
+{
+	dd->ipath_lid = arg;
+	dd->ipath_lmc = lmc;
+
+	return 0;
+}
+
 /**
  * ipath_read_kreg64_port - read a device's per-port 64-bit kernel register
  * @dd: the infinipath device
@@ -1637,13 +1750,6 @@ void ipath_shutdown_device(struct ipath_devdata *dd)
 	ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKINITCMD_DISABLE <<
 			    INFINIPATH_IBCC_LINKINITCMD_SHIFT);
 
-	/*
-	 * we are shutting down, so tell the layered driver.  We don't do
-	 * this on just a link state change, much like ethernet, a cable
-	 * unplug, etc. doesn't change driver state
-	 */
-	ipath_layer_intr(dd, IPATH_LAYER_INT_IF_DOWN);
-
 	/* disable IBC */
 	dd->ipath_control &= ~INFINIPATH_C_LINKENABLE;
 	ipath_write_kreg(dd, dd->ipath_kregs->kr_control,
@@ -1743,7 +1849,7 @@ static int __init infinipath_init(void)
 {
 	int ret;
 
-	ipath_dbg(KERN_INFO DRIVER_LOAD_MSG "%s", ipath_core_version);
+	ipath_dbg(KERN_INFO DRIVER_LOAD_MSG "%s", ib_ipath_version);
 
 	/*
 	 * These must be called before the driver is registered with
@@ -1776,8 +1882,18 @@ static int __init infinipath_init(void)
 		goto bail_group;
 	}
 
+	ret = ipath_diagpkt_add();
+	if (ret < 0) {
+		printk(KERN_ERR IPATH_DRV_NAME ": Unable to create "
+		       "diag data device: error %d\n", -ret);
+		goto bail_ipathfs;
+	}
+
 	goto bail;
 
+bail_ipathfs:
+	ipath_exit_ipathfs();
+
 bail_group:
 	ipath_driver_remove_group(&ipath_driver.driver);
 
@@ -1888,6 +2004,8 @@ static void __exit infinipath_cleanup(void)
 	struct ipath_devdata *dd, *tmp;
 	unsigned long flags;
 
+	ipath_diagpkt_remove();
+
 	ipath_exit_ipathfs();
 
 	ipath_driver_remove_group(&ipath_driver.driver);
@@ -1998,5 +2116,22 @@ bail:
 	return ret;
 }
 
+int ipath_set_rx_pol_inv(struct ipath_devdata *dd, u8 new_pol_inv)
+{
+	u64 val;
+	if ( new_pol_inv > INFINIPATH_XGXS_RX_POL_MASK ) {
+		return -1;
+	}
+	if ( dd->ipath_rx_pol_inv != new_pol_inv ) {
+		dd->ipath_rx_pol_inv = new_pol_inv;
+		val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_xgxsconfig);
+		val &= ~(INFINIPATH_XGXS_RX_POL_MASK <<
+			 INFINIPATH_XGXS_RX_POL_SHIFT);
+		val |= ((u64)dd->ipath_rx_pol_inv) <<
+			INFINIPATH_XGXS_RX_POL_SHIFT;
+		ipath_write_kreg(dd, dd->ipath_kregs->kr_xgxsconfig, val);
+	}
+	return 0;
+}
 module_init(infinipath_init);
 module_exit(infinipath_cleanup);
diff --git a/drivers/infiniband/hw/ipath/ipath_file_ops.c b/drivers/infiniband/hw/ipath/ipath_file_ops.c
index bbaa70e57db..29930e22318 100644
--- a/drivers/infiniband/hw/ipath/ipath_file_ops.c
+++ b/drivers/infiniband/hw/ipath/ipath_file_ops.c
@@ -39,7 +39,6 @@
 #include <asm/pgtable.h>
 
 #include "ipath_kernel.h"
-#include "ipath_layer.h"
 #include "ipath_common.h"
 
 static int ipath_open(struct inode *, struct file *);
@@ -985,15 +984,17 @@ static int mmap_piobufs(struct vm_area_struct *vma,
 	 * write combining behavior we want on the PIO buffers!
 	 */
 
-	if (vma->vm_flags & VM_READ) {
-		dev_info(&dd->pcidev->dev,
-			 "Can't map piobufs as readable (flags=%lx)\n",
-			 vma->vm_flags);
-		ret = -EPERM;
-		goto bail;
-	}
+#if defined(__powerpc__)
+	/* There isn't a generic way to specify writethrough mappings */
+	pgprot_val(vma->vm_page_prot) |= _PAGE_NO_CACHE;
+	pgprot_val(vma->vm_page_prot) |= _PAGE_WRITETHRU;
+	pgprot_val(vma->vm_page_prot) &= ~_PAGE_GUARDED;
+#endif
 
-	/* don't allow them to later change to readable with mprotect */
+	/*
+	 * don't allow them to later change to readable with mprotect (for when
+	 * not initially mapped readable, as is normally the case)
+	 */
 	vma->vm_flags &= ~VM_MAYREAD;
 	vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND;
 
@@ -1109,7 +1110,7 @@ static int ipath_mmap(struct file *fp, struct vm_area_struct *vma)
 		ret = mmap_rcvegrbufs(vma, pd);
 	else if (pgaddr == (u64) pd->port_rcvhdrq_phys) {
 		/*
-		 * The rcvhdrq itself; readonly except on HT-400 (so have
+		 * The rcvhdrq itself; readonly except on HT (so have
 		 * to allow writable mapping), multiple pages, contiguous
 		 * from an i/o perspective.
 		 */
@@ -1149,6 +1150,7 @@ static unsigned int ipath_poll(struct file *fp,
 	struct ipath_portdata *pd;
 	u32 head, tail;
 	int bit;
+	unsigned pollflag = 0;
 	struct ipath_devdata *dd;
 
 	pd = port_fp(fp);
@@ -1185,9 +1187,12 @@ static unsigned int ipath_poll(struct file *fp,
 			clear_bit(IPATH_PORT_WAITING_RCV, &pd->port_flag);
 			pd->port_rcvwait_to++;
 		}
+		else
+			pollflag = POLLIN | POLLRDNORM;
 	}
 	else {
 		/* it's already happened; don't do wait_event overhead */
+		pollflag = POLLIN | POLLRDNORM;
 		pd->port_rcvnowait++;
 	}
 
@@ -1195,7 +1200,7 @@ static unsigned int ipath_poll(struct file *fp,
 	ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl,
 			 dd->ipath_rcvctrl);
 
-	return 0;
+	return pollflag;
 }
 
 static int try_alloc_port(struct ipath_devdata *dd, int port,
@@ -1297,14 +1302,14 @@ static int find_best_unit(struct file *fp)
 	 * This code is present to allow a knowledgeable person to
 	 * specify the layout of processes to processors before opening
 	 * this driver, and then we'll assign the process to the "closest"
-	 * HT-400 to that processor (we assume reasonable connectivity,
+	 * InfiniPath chip to that processor (we assume reasonable connectivity,
 	 * for now).  This code assumes that if affinity has been set
 	 * before this point, that at most one cpu is set; for now this
 	 * is reasonable.  I check for both cpus_empty() and cpus_full(),
 	 * in case some kernel variant sets none of the bits when no
 	 * affinity is set.  2.6.11 and 12 kernels have all present
 	 * cpus set.  Some day we'll have to fix it up further to handle
-	 * a cpu subset.  This algorithm fails for two HT-400's connected
+	 * a cpu subset.  This algorithm fails for two HT chips connected
 	 * in tunnel fashion.  Eventually this needs real topology
 	 * information.  There may be some issues with dual core numbering
 	 * as well.  This needs more work prior to release.
@@ -1815,7 +1820,7 @@ int ipath_user_add(struct ipath_devdata *dd)
 		if (ret < 0) {
 			ipath_dev_err(dd, "Could not create wildcard "
 				      "minor: error %d\n", -ret);
-			goto bail_sma;
+			goto bail_user;
 		}
 
 		atomic_set(&user_setup, 1);
@@ -1831,7 +1836,7 @@ int ipath_user_add(struct ipath_devdata *dd)
 
 	goto bail;
 
-bail_sma:
+bail_user:
 	user_cleanup();
 bail:
 	return ret;
diff --git a/drivers/infiniband/hw/ipath/ipath_fs.c b/drivers/infiniband/hw/ipath/ipath_fs.c
index 0936d8e8d70..a5eb30a06a5 100644
--- a/drivers/infiniband/hw/ipath/ipath_fs.c
+++ b/drivers/infiniband/hw/ipath/ipath_fs.c
@@ -191,8 +191,8 @@ static ssize_t atomic_port_info_read(struct file *file, char __user *buf,
 	portinfo[4] = (dd->ipath_lid << 16);
 
 	/*
-	 * Notimpl yet SMLID (should we store this in the driver, in case
-	 * SMA dies?)  CapabilityMask is 0, we don't support any of these
+	 * Notimpl yet SMLID.
+	 * CapabilityMask is 0, we don't support any of these
 	 * DiagCode is 0; we don't store any diag info for now Notimpl yet
 	 * M_KeyLeasePeriod (we don't support M_Key)
 	 */
diff --git a/drivers/infiniband/hw/ipath/ipath_ht400.c b/drivers/infiniband/hw/ipath/ipath_iba6110.c
index 3db015da6e7..bf2455a6d56 100644
--- a/drivers/infiniband/hw/ipath/ipath_ht400.c
+++ b/drivers/infiniband/hw/ipath/ipath_iba6110.c
@@ -33,7 +33,7 @@
 
 /*
  * This file contains all of the code that is specific to the InfiniPath
- * HT-400 chip.
+ * HT chip.
  */
 
 #include <linux/pci.h>
@@ -43,7 +43,7 @@
 #include "ipath_registers.h"
 
 /*
- * This lists the InfiniPath HT400 registers, in the actual chip layout.
+ * This lists the InfiniPath registers, in the actual chip layout.
  * This structure should never be directly accessed.
  *
  * The names are in InterCap form because they're taken straight from
@@ -461,8 +461,9 @@ static void ipath_ht_handle_hwerrors(struct ipath_devdata *dd, char *msg,
 			 * times.
 			 */
 			if (dd->ipath_flags & IPATH_INITTED) {
-				ipath_dev_err(dd, "Fatal Error (freeze "
-					      "mode), no longer usable\n");
+				ipath_dev_err(dd, "Fatal Hardware Error (freeze "
+					      "mode), no longer usable, SN %.16s\n",
+						  dd->ipath_serial);
 				isfatal = 1;
 			}
 			*dd->ipath_statusp &= ~IPATH_STATUS_IB_READY;
@@ -537,7 +538,7 @@ static void ipath_ht_handle_hwerrors(struct ipath_devdata *dd, char *msg,
 	if (hwerrs & INFINIPATH_HWE_HTCMISCERR7)
 		strlcat(msg, "[HT core Misc7]", msgl);
 	if (hwerrs & INFINIPATH_HWE_MEMBISTFAILED) {
-		strlcat(msg, "[Memory BIST test failed, HT-400 unusable]",
+		strlcat(msg, "[Memory BIST test failed, InfiniPath hardware unusable]",
 			msgl);
 		/* ignore from now on, so disable until driver reloaded */
 		dd->ipath_hwerrmask &= ~INFINIPATH_HWE_MEMBISTFAILED;
@@ -553,7 +554,7 @@ static void ipath_ht_handle_hwerrors(struct ipath_devdata *dd, char *msg,
 
 	if (hwerrs & _IPATH_PLL_FAIL) {
 		snprintf(bitsmsg, sizeof bitsmsg,
-			 "[PLL failed (%llx), HT-400 unusable]",
+			 "[PLL failed (%llx), InfiniPath hardware unusable]",
 			 (unsigned long long) (hwerrs & _IPATH_PLL_FAIL));
 		strlcat(msg, bitsmsg, msgl);
 		/* ignore from now on, so disable until driver reloaded */
@@ -610,18 +611,18 @@ static int ipath_ht_boardname(struct ipath_devdata *dd, char *name,
 		break;
 	case 5:
 		/*
-		 * HT-460 original production board; two production levels, with
+		 * original production board; two production levels, with
 		 * different serial number ranges.   See ipath_ht_early_init() for
 		 * case where we enable IPATH_GPIO_INTR for later serial # range.
 		 */
-		n = "InfiniPath_HT-460";
+		n = "InfiniPath_QHT7040";
 		break;
 	case 6:
 		n = "OEM_Board_3";
 		break;
 	case 7:
-		/* HT-460 small form factor production board */
-		n = "InfiniPath_HT-465";
+		/* small form factor production board */
+		n = "InfiniPath_QHT7140";
 		break;
 	case 8:
 		n = "LS/X-1";
@@ -633,7 +634,7 @@ static int ipath_ht_boardname(struct ipath_devdata *dd, char *name,
 		n = "OEM_Board_2";
 		break;
 	case 11:
-		n = "InfiniPath_HT-470";
+		n = "InfiniPath_HT-470"; /* obsoleted */
 		break;
 	case 12:
 		n = "OEM_Board_4";
@@ -641,7 +642,7 @@ static int ipath_ht_boardname(struct ipath_devdata *dd, char *name,
 	default:		/* don't know, just print the number */
 		ipath_dev_err(dd, "Don't yet know about board "
 			      "with ID %u\n", boardrev);
-		snprintf(name, namelen, "Unknown_InfiniPath_HT-4xx_%u",
+		snprintf(name, namelen, "Unknown_InfiniPath_QHT7xxx_%u",
 			 boardrev);
 		break;
 	}
@@ -650,11 +651,10 @@ static int ipath_ht_boardname(struct ipath_devdata *dd, char *name,
 
 	if (dd->ipath_majrev != 3 || (dd->ipath_minrev < 2 || dd->ipath_minrev > 3)) {
 		/*
-		 * This version of the driver only supports the HT-400
-		 * Rev 3.2
+		 * This version of the driver only supports Rev 3.2 and 3.3
 		 */
 		ipath_dev_err(dd,
-			      "Unsupported HT-400 revision %u.%u!\n",
+			      "Unsupported InfiniPath hardware revision %u.%u!\n",
 			      dd->ipath_majrev, dd->ipath_minrev);
 		ret = 1;
 		goto bail;
@@ -738,7 +738,7 @@ static void ipath_check_htlink(struct ipath_devdata *dd)
 
 static int ipath_setup_ht_reset(struct ipath_devdata *dd)
 {
-	ipath_dbg("No reset possible for HT-400\n");
+	ipath_dbg("No reset possible for this InfiniPath hardware\n");
 	return 0;
 }
 
@@ -925,7 +925,7 @@ static int set_int_handler(struct ipath_devdata *dd, struct pci_dev *pdev,
 
 	/*
 	 * kernels with CONFIG_PCI_MSI set the vector in the irq field of
-	 * struct pci_device, so we use that to program the HT-400 internal
+	 * struct pci_device, so we use that to program the internal
 	 * interrupt register (not config space) with that value. The BIOS
 	 * must still have done the basic MSI setup.
 	 */
@@ -1013,7 +1013,7 @@ bail:
  * @dd: the infinipath device
  *
  * Called during driver unload.
- * This is currently a nop for the HT-400, not for all chips
+ * This is currently a nop for the HT chip, not for all chips
  */
 static void ipath_setup_ht_cleanup(struct ipath_devdata *dd)
 {
@@ -1290,6 +1290,15 @@ static int ipath_ht_bringup_serdes(struct ipath_devdata *dd)
 		val &= ~INFINIPATH_XGXS_RESET;
 		change = 1;
 	}
+	if (((val >> INFINIPATH_XGXS_RX_POL_SHIFT) &
+	     INFINIPATH_XGXS_RX_POL_MASK) != dd->ipath_rx_pol_inv ) {
+		/* need to compensate for Tx inversion in partner */
+		val &= ~(INFINIPATH_XGXS_RX_POL_MASK <<
+		         INFINIPATH_XGXS_RX_POL_SHIFT);
+		val |= dd->ipath_rx_pol_inv <<
+			INFINIPATH_XGXS_RX_POL_SHIFT;
+		change = 1;
+	}
 	if (change)
 		ipath_write_kreg(dd, dd->ipath_kregs->kr_xgxsconfig, val);
 
@@ -1470,7 +1479,7 @@ static int ipath_ht_early_init(struct ipath_devdata *dd)
 	dd->ipath_rcvhdrsize = IPATH_DFLT_RCVHDRSIZE;
 
 	/*
-	 * For HT-400, we allocate a somewhat overly large eager buffer,
+	 * For HT, we allocate a somewhat overly large eager buffer,
 	 * such that we can guarantee that we can receive the largest
 	 * packet that we can send out.  To truly support a 4KB MTU,
 	 * we need to bump this to a large value.  To date, other than
@@ -1531,7 +1540,7 @@ static int ipath_ht_early_init(struct ipath_devdata *dd)
 	if(dd->ipath_boardrev == 5 && dd->ipath_serial[0] == '1' &&
 		dd->ipath_serial[1] == '2' && dd->ipath_serial[2] == '8') {
 		/*
-		 * Later production HT-460 has same changes as HT-465, so
+		 * Later production QHT7040 has same changes as QHT7140, so
 		 * can use GPIO interrupts.  They have serial #'s starting
 		 * with 128, rather than 112.
 		 */
@@ -1560,13 +1569,13 @@ static int ipath_ht_get_base_info(struct ipath_portdata *pd, void *kbase)
 }
 
 /**
- * ipath_init_ht400_funcs - set up the chip-specific function pointers
+ * ipath_init_iba6110_funcs - set up the chip-specific function pointers
  * @dd: the infinipath device
  *
  * This is global, and is called directly at init to set up the
  * chip-specific function pointers for later use.
  */
-void ipath_init_ht400_funcs(struct ipath_devdata *dd)
+void ipath_init_iba6110_funcs(struct ipath_devdata *dd)
 {
 	dd->ipath_f_intrsetup = ipath_ht_intconfig;
 	dd->ipath_f_bus = ipath_setup_ht_config;
diff --git a/drivers/infiniband/hw/ipath/ipath_pe800.c b/drivers/infiniband/hw/ipath/ipath_iba6120.c
index b83f66d8262..d86516d23df 100644
--- a/drivers/infiniband/hw/ipath/ipath_pe800.c
+++ b/drivers/infiniband/hw/ipath/ipath_iba6120.c
@@ -32,7 +32,7 @@
  */
 /*
  * This file contains all of the code that is specific to the
- * InfiniPath PE-800 chip.
+ * InfiniPath PCIe chip.
  */
 
 #include <linux/interrupt.h>
@@ -45,9 +45,9 @@
 
 /*
  * This file contains all the chip-specific register information and
- * access functions for the QLogic InfiniPath PE800, the PCI-Express chip.
+ * access functions for the QLogic InfiniPath PCI-Express chip.
  *
- * This lists the InfiniPath PE800 registers, in the actual chip layout.
+ * This lists the InfiniPath registers, in the actual chip layout.
  * This structure should never be directly accessed.
  */
 struct _infinipath_do_not_use_kernel_regs {
@@ -213,7 +213,6 @@ static const struct ipath_kregs ipath_pe_kregs = {
 	.kr_rcvhdraddr = IPATH_KREG_OFFSET(RcvHdrAddr0),
 	.kr_rcvhdrtailaddr = IPATH_KREG_OFFSET(RcvHdrTailAddr0),
 
-	/* This group is pe-800-specific; and used only in this file */
 	/* The rcvpktled register controls one of the debug port signals, so
 	 * a packet activity LED can be connected to it. */
 	.kr_rcvpktledcnt = IPATH_KREG_OFFSET(RcvPktLEDCnt),
@@ -364,8 +363,9 @@ static void ipath_pe_handle_hwerrors(struct ipath_devdata *dd, char *msg,
 			 * and we get here multiple times
 			 */
 			if (dd->ipath_flags & IPATH_INITTED) {
-				ipath_dev_err(dd, "Fatal Error (freeze "
-					      "mode), no longer usable\n");
+				ipath_dev_err(dd, "Fatal Hardware Error (freeze "
+					      "mode), no longer usable, SN %.16s\n",
+						  dd->ipath_serial);
 				isfatal = 1;
 			}
 			/*
@@ -388,7 +388,7 @@ static void ipath_pe_handle_hwerrors(struct ipath_devdata *dd, char *msg,
 	*msg = '\0';
 
 	if (hwerrs & INFINIPATH_HWE_MEMBISTFAILED) {
-		strlcat(msg, "[Memory BIST test failed, PE-800 unusable]",
+		strlcat(msg, "[Memory BIST test failed, InfiniPath hardware unusable]",
 			msgl);
 		/* ignore from now on, so disable until driver reloaded */
 		*dd->ipath_statusp |= IPATH_STATUS_HWERROR;
@@ -433,7 +433,7 @@ static void ipath_pe_handle_hwerrors(struct ipath_devdata *dd, char *msg,
 
 	if (hwerrs & _IPATH_PLL_FAIL) {
 		snprintf(bitsmsg, sizeof bitsmsg,
-			 "[PLL failed (%llx), PE-800 unusable]",
+			 "[PLL failed (%llx), InfiniPath hardware unusable]",
 			 (unsigned long long) hwerrs & _IPATH_PLL_FAIL);
 		strlcat(msg, bitsmsg, msgl);
 		/* ignore from now on, so disable until driver reloaded */
@@ -511,22 +511,25 @@ static int ipath_pe_boardname(struct ipath_devdata *dd, char *name,
 		n = "InfiniPath_Emulation";
 		break;
 	case 1:
-		n = "InfiniPath_PE-800-Bringup";
+		n = "InfiniPath_QLE7140-Bringup";
 		break;
 	case 2:
-		n = "InfiniPath_PE-880";
+		n = "InfiniPath_QLE7140";
 		break;
 	case 3:
-		n = "InfiniPath_PE-850";
+		n = "InfiniPath_QMI7140";
 		break;
 	case 4:
-		n = "InfiniPath_PE-860";
+		n = "InfiniPath_QEM7140";
+		break;
+	case 5:
+		n = "InfiniPath_QMH7140";
 		break;
 	default:
 		ipath_dev_err(dd,
 			      "Don't yet know about board with ID %u\n",
 			      boardrev);
-		snprintf(name, namelen, "Unknown_InfiniPath_PE-8xx_%u",
+		snprintf(name, namelen, "Unknown_InfiniPath_PCIe_%u",
 			 boardrev);
 		break;
 	}
@@ -534,7 +537,7 @@ static int ipath_pe_boardname(struct ipath_devdata *dd, char *name,
 		snprintf(name, namelen, "%s", n);
 
 	if (dd->ipath_majrev != 4 || !dd->ipath_minrev || dd->ipath_minrev>2) {
-		ipath_dev_err(dd, "Unsupported PE-800 revision %u.%u!\n",
+		ipath_dev_err(dd, "Unsupported InfiniPath hardware revision %u.%u!\n",
 			      dd->ipath_majrev, dd->ipath_minrev);
 		ret = 1;
 	} else
@@ -651,6 +654,15 @@ static int ipath_pe_bringup_serdes(struct ipath_devdata *dd)
 		val &= ~INFINIPATH_XGXS_RESET;
 		change = 1;
 	}
+	if (((val >> INFINIPATH_XGXS_RX_POL_SHIFT) &
+	     INFINIPATH_XGXS_RX_POL_MASK) != dd->ipath_rx_pol_inv ) {
+		/* need to compensate for Tx inversion in partner */
+		val &= ~(INFINIPATH_XGXS_RX_POL_MASK <<
+		         INFINIPATH_XGXS_RX_POL_SHIFT);
+		val |= dd->ipath_rx_pol_inv <<
+			INFINIPATH_XGXS_RX_POL_SHIFT;
+		change = 1;
+	}
 	if (change)
 		ipath_write_kreg(dd, dd->ipath_kregs->kr_xgxsconfig, val);
 
@@ -705,7 +717,7 @@ static void ipath_pe_quiet_serdes(struct ipath_devdata *dd)
 	ipath_write_kreg(dd, dd->ipath_kregs->kr_serdesconfig0, val);
 }
 
-/* this is not yet needed on the PE800, so just return 0. */
+/* this is not yet needed on this chip, so just return 0. */
 static int ipath_pe_intconfig(struct ipath_devdata *dd)
 {
 	return 0;
@@ -759,8 +771,8 @@ static void ipath_setup_pe_setextled(struct ipath_devdata *dd, u64 lst,
  *
  * This is called during driver unload.
  * We do the pci_disable_msi here, not in generic code, because it
- * isn't used for the HT-400. If we do end up needing pci_enable_msi
- * at some point in the future for HT-400, we'll move the call back
+ * isn't used for the HT chips. If we do end up needing pci_enable_msi
+ * at some point in the future for HT, we'll move the call back
  * into the main init_one code.
  */
 static void ipath_setup_pe_cleanup(struct ipath_devdata *dd)
@@ -780,10 +792,10 @@ static void ipath_setup_pe_cleanup(struct ipath_devdata *dd)
  * late in 2.6.16).
  * All that can be done is to edit the kernel source to remove the quirk
  * check until that is fixed.
- * We do not need to call enable_msi() for our HyperTransport chip (HT-400),
- * even those it uses MSI, and we want to avoid the quirk warning, so
- * So we call enable_msi only for the PE-800.  If we do end up needing
- * pci_enable_msi at some point in the future for HT-400, we'll move the
+ * We do not need to call enable_msi() for our HyperTransport chip,
+ * even though it uses MSI, and we want to avoid the quirk warning, so
+ * So we call enable_msi only for PCIe.  If we do end up needing
+ * pci_enable_msi at some point in the future for HT, we'll move the
  * call back into the main init_one code.
  * We save the msi lo and hi values, so we can restore them after
  * chip reset (the kernel PCI infrastructure doesn't yet handle that
@@ -971,8 +983,7 @@ static int ipath_setup_pe_reset(struct ipath_devdata *dd)
 	int ret;
 
 	/* Use ERROR so it shows up in logs, etc. */
-	ipath_dev_err(dd, "Resetting PE-800 unit %u\n",
-		      dd->ipath_unit);
+	ipath_dev_err(dd, "Resetting InfiniPath unit %u\n", dd->ipath_unit);
 	/* keep chip from being accessed in a few places */
 	dd->ipath_flags &= ~(IPATH_INITTED|IPATH_PRESENT);
 	val = dd->ipath_control | INFINIPATH_C_RESET;
@@ -1078,7 +1089,7 @@ static void ipath_pe_put_tid(struct ipath_devdata *dd, u64 __iomem *tidptr,
  * @port: the port
  *
  * clear all TID entries for a port, expected and eager.
- * Used from ipath_close().  On PE800, TIDs are only 32 bits,
+ * Used from ipath_close().  On this chip, TIDs are only 32 bits,
  * not 64, but they are still on 64 bit boundaries, so tidbase
  * is declared as u64 * for the pointer math, even though we write 32 bits
  */
@@ -1148,9 +1159,9 @@ static int ipath_pe_early_init(struct ipath_devdata *dd)
 	dd->ipath_flags |= IPATH_4BYTE_TID;
 
 	/*
-	 * For openib, we need to be able to handle an IB header of 96 bytes
-	 * or 24 dwords.  HT-400 has arbitrary sized receive buffers, so we
-	 * made them the same size as the PIO buffers.  The PE-800 does not
+	 * For openfabrics, we need to be able to handle an IB header of
+	 * 24 dwords.  HT chip has arbitrary sized receive buffers, so we
+	 * made them the same size as the PIO buffers.  This chip does not
 	 * handle arbitrary size buffers, so we need the header large enough
 	 * to handle largest IB header, but still have room for a 2KB MTU
 	 * standard IB packet.
@@ -1158,11 +1169,10 @@ static int ipath_pe_early_init(struct ipath_devdata *dd)
 	dd->ipath_rcvhdrentsize = 24;
 	dd->ipath_rcvhdrsize = IPATH_DFLT_RCVHDRSIZE;
 
-	/* For HT-400, we allocate a somewhat overly large eager buffer,
-	 * such that we can guarantee that we can receive the largest packet
-	 * that we can send out.  To truly support a 4KB MTU, we need to
-	 * bump this to a larger value.  We'll do this when I get around to
-	 * testing 4KB sends on the PE-800, which I have not yet done.
+	/*
+	 * To truly support a 4KB MTU (for usermode), we need to
+	 * bump this to a larger value.  For now, we use them for
+	 * the kernel only.
 	 */
 	dd->ipath_rcvegrbufsize = 2048;
 	/*
@@ -1175,9 +1185,9 @@ static int ipath_pe_early_init(struct ipath_devdata *dd)
 	dd->ipath_init_ibmaxlen = dd->ipath_ibmaxlen;
 
 	/*
-	 * For PE-800, we can request a receive interrupt for 1 or
+	 * We can request a receive interrupt for 1 or
 	 * more packets from current offset.  For now, we set this
-	 * up for a single packet, to match the HT-400 behavior.
+	 * up for a single packet.
 	 */
 	dd->ipath_rhdrhead_intr_off = 1ULL<<32;
 
@@ -1216,13 +1226,13 @@ static int ipath_pe_get_base_info(struct ipath_portdata *pd, void *kbase)
 }
 
 /**
- * ipath_init_pe800_funcs - set up the chip-specific function pointers
+ * ipath_init_iba6120_funcs - set up the chip-specific function pointers
  * @dd: the infinipath device
  *
  * This is global, and is called directly at init to set up the
  * chip-specific function pointers for later use.
  */
-void ipath_init_pe800_funcs(struct ipath_devdata *dd)
+void ipath_init_iba6120_funcs(struct ipath_devdata *dd)
 {
 	dd->ipath_f_intrsetup = ipath_pe_intconfig;
 	dd->ipath_f_bus = ipath_setup_pe_config;
diff --git a/drivers/infiniband/hw/ipath/ipath_init_chip.c b/drivers/infiniband/hw/ipath/ipath_init_chip.c
index 414cdd1d80a..44669dc2e22 100644
--- a/drivers/infiniband/hw/ipath/ipath_init_chip.c
+++ b/drivers/infiniband/hw/ipath/ipath_init_chip.c
@@ -53,8 +53,8 @@ module_param_named(cfgports, ipath_cfgports, ushort, S_IRUGO);
 MODULE_PARM_DESC(cfgports, "Set max number of ports to use");
 
 /*
- * Number of buffers reserved for driver (layered drivers and SMA
- * send).  Reserved at end of buffer list.   Initialized based on
+ * Number of buffers reserved for driver (verbs and layered drivers.)
+ * Reserved at end of buffer list.   Initialized based on
  * number of PIO buffers if not set via module interface.
  * The problem with this is that it's global, but we'll use different
  * numbers for different chip types.  So the default value is not
@@ -80,7 +80,7 @@ MODULE_PARM_DESC(kpiobufs, "Set number of PIO buffers for driver");
  *
  * Allocate the eager TID buffers and program them into infinipath.
  * We use the network layer alloc_skb() allocator to allocate the
- * memory, and either use the buffers as is for things like SMA
+ * memory, and either use the buffers as is for things like verbs
  * packets, or pass the buffers up to the ipath layered driver and
  * thence the network layer, replacing them as we do so (see
  * ipath_rcv_layer()).
@@ -240,7 +240,11 @@ static int init_chip_first(struct ipath_devdata *dd,
 			  "only supports %u\n", ipath_cfgports,
 			  dd->ipath_portcnt);
 	}
-	dd->ipath_pd = kzalloc(sizeof(*dd->ipath_pd) * dd->ipath_cfgports,
+	/*
+	 * Allocate full portcnt array, rather than just cfgports, because
+	 * cleanup iterates across all possible ports.
+	 */
+	dd->ipath_pd = kzalloc(sizeof(*dd->ipath_pd) * dd->ipath_portcnt,
 			       GFP_KERNEL);
 
 	if (!dd->ipath_pd) {
@@ -446,9 +450,9 @@ static void enable_chip(struct ipath_devdata *dd,
 	u32 val;
 	int i;
 
-	if (!reinit) {
-		init_waitqueue_head(&ipath_sma_state_wait);
-	}
+	if (!reinit)
+		init_waitqueue_head(&ipath_state_wait);
+
 	ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl,
 			 dd->ipath_rcvctrl);
 
@@ -687,7 +691,7 @@ int ipath_init_chip(struct ipath_devdata *dd, int reinit)
 	dd->ipath_pioavregs = ALIGN(val, sizeof(u64) * BITS_PER_BYTE / 2)
 		/ (sizeof(u64) * BITS_PER_BYTE / 2);
 	if (ipath_kpiobufs == 0) {
-		/* not set by user, or set explictly to default  */
+		/* not set by user (this is default) */
 		if ((dd->ipath_piobcnt2k + dd->ipath_piobcnt4k) > 128)
 			kpiobufs = 32;
 		else
@@ -946,6 +950,7 @@ static int ipath_set_kpiobufs(const char *str, struct kernel_param *kp)
 			dd->ipath_piobcnt2k + dd->ipath_piobcnt4k - val;
 	}
 
+	ipath_kpiobufs = val;
 	ret = 0;
 bail:
 	spin_unlock_irqrestore(&ipath_devs_lock, flags);
diff --git a/drivers/infiniband/hw/ipath/ipath_intr.c b/drivers/infiniband/hw/ipath/ipath_intr.c
index 280e732660a..49bf7bb15b0 100644
--- a/drivers/infiniband/hw/ipath/ipath_intr.c
+++ b/drivers/infiniband/hw/ipath/ipath_intr.c
@@ -34,7 +34,7 @@
 #include <linux/pci.h>
 
 #include "ipath_kernel.h"
-#include "ipath_layer.h"
+#include "ipath_verbs.h"
 #include "ipath_common.h"
 
 /* These are all rcv-related errors which we want to count for stats */
@@ -201,7 +201,7 @@ static void handle_e_ibstatuschanged(struct ipath_devdata *dd,
 				  ib_linkstate(lstate));
 		}
 		else
-			ipath_cdbg(SMA, "Unit %u link state %s, last "
+			ipath_cdbg(VERBOSE, "Unit %u link state %s, last "
 				   "was %s\n", dd->ipath_unit,
 				   ib_linkstate(lstate),
 				   ib_linkstate((unsigned)
@@ -213,7 +213,7 @@ static void handle_e_ibstatuschanged(struct ipath_devdata *dd,
 		if (lstate == IPATH_IBSTATE_INIT ||
 		    lstate == IPATH_IBSTATE_ARM ||
 		    lstate == IPATH_IBSTATE_ACTIVE)
-			ipath_cdbg(SMA, "Unit %u link state down"
+			ipath_cdbg(VERBOSE, "Unit %u link state down"
 				   " (state 0x%x), from %s\n",
 				   dd->ipath_unit,
 				   (u32)val & IPATH_IBSTATE_MASK,
@@ -269,7 +269,7 @@ static void handle_e_ibstatuschanged(struct ipath_devdata *dd,
 			     INFINIPATH_IBCS_LINKSTATE_MASK)
 			    == INFINIPATH_IBCS_L_STATE_ACTIVE)
 				/* if from up to down be more vocal */
-				ipath_cdbg(SMA,
+				ipath_cdbg(VERBOSE,
 					   "Unit %u link now down (%s)\n",
 					   dd->ipath_unit,
 					   ipath_ibcstatus_str[ltstate]);
@@ -289,8 +289,6 @@ static void handle_e_ibstatuschanged(struct ipath_devdata *dd,
 		*dd->ipath_statusp |=
 			IPATH_STATUS_IB_READY | IPATH_STATUS_IB_CONF;
 		dd->ipath_f_setextled(dd, lstate, ltstate);
-
-		__ipath_layer_intr(dd, IPATH_LAYER_INT_IF_UP);
 	} else if ((val & IPATH_IBSTATE_MASK) == IPATH_IBSTATE_INIT) {
 		/*
 		 * set INIT and DOWN.  Down is checked by most of the other
@@ -598,11 +596,11 @@ static int handle_errors(struct ipath_devdata *dd, ipath_err_t errs)
 
 	if (!noprint && *msg)
 		ipath_dev_err(dd, "%s error\n", msg);
-	if (dd->ipath_sma_state_wanted & dd->ipath_flags) {
-		ipath_cdbg(VERBOSE, "sma wanted state %x, iflags now %x, "
-			   "waking\n", dd->ipath_sma_state_wanted,
+	if (dd->ipath_state_wanted & dd->ipath_flags) {
+		ipath_cdbg(VERBOSE, "driver wanted state %x, iflags now %x, "
+			   "waking\n", dd->ipath_state_wanted,
 			   dd->ipath_flags);
-		wake_up_interruptible(&ipath_sma_state_wait);
+		wake_up_interruptible(&ipath_state_wait);
 	}
 
 	return chkerrpkts;
@@ -708,11 +706,7 @@ static void handle_layer_pioavail(struct ipath_devdata *dd)
 {
 	int ret;
 
-	ret = __ipath_layer_intr(dd, IPATH_LAYER_INT_SEND_CONTINUE);
-	if (ret > 0)
-		goto set;
-
-	ret = __ipath_verbs_piobufavail(dd);
+	ret = ipath_ib_piobufavail(dd->verbs_dev);
 	if (ret > 0)
 		goto set;
 
diff --git a/drivers/infiniband/hw/ipath/ipath_kernel.h b/drivers/infiniband/hw/ipath/ipath_kernel.h
index e9f374fb641..a8a56276ff1 100644
--- a/drivers/infiniband/hw/ipath/ipath_kernel.h
+++ b/drivers/infiniband/hw/ipath/ipath_kernel.h
@@ -132,12 +132,6 @@ struct _ipath_layer {
 	void *l_arg;
 };
 
-/* Verbs layer interface */
-struct _verbs_layer {
-	void *l_arg;
-	struct timer_list l_timer;
-};
-
 struct ipath_devdata {
 	struct list_head ipath_list;
 
@@ -198,7 +192,8 @@ struct ipath_devdata {
 	void (*ipath_f_setextled)(struct ipath_devdata *, u64, u64);
 	/* fill out chip-specific fields */
 	int (*ipath_f_get_base_info)(struct ipath_portdata *, void *);
-	struct _verbs_layer verbs_layer;
+	struct ipath_ibdev *verbs_dev;
+	struct timer_list verbs_timer;
 	/* total dwords sent (summed from counter) */
 	u64 ipath_sword;
 	/* total dwords rcvd (summed from counter) */
@@ -241,7 +236,7 @@ struct ipath_devdata {
 	u64 ipath_tidtemplate;
 	/* value to write to free TIDs */
 	u64 ipath_tidinvalid;
-	/* PE-800 rcv interrupt setup */
+	/* IBA6120 rcv interrupt setup */
 	u64 ipath_rhdrhead_intr_off;
 
 	/* size of memory at ipath_kregbase */
@@ -250,8 +245,8 @@ struct ipath_devdata {
 	u32 ipath_pioavregs;
 	/* IPATH_POLL, etc. */
 	u32 ipath_flags;
-	/* ipath_flags sma is waiting for */
-	u32 ipath_sma_state_wanted;
+	/* ipath_flags driver is waiting for */
+	u32 ipath_state_wanted;
 	/* last buffer for user use, first buf for kernel use is this
 	 * index. */
 	u32 ipath_lastport_piobuf;
@@ -311,10 +306,6 @@ struct ipath_devdata {
 	u32 ipath_pcibar0;
 	/* so we can rewrite it after a chip reset */
 	u32 ipath_pcibar1;
-	/* sequential tries for SMA send and no bufs */
-	u32 ipath_nosma_bufs;
-	/* duration (seconds) ipath_nosma_bufs set */
-	u32 ipath_nosma_secs;
 
 	/* HT/PCI Vendor ID (here for NodeInfo) */
 	u16 ipath_vendorid;
@@ -512,6 +503,8 @@ struct ipath_devdata {
 	u8 ipath_pci_cacheline;
 	/* LID mask control */
 	u8 ipath_lmc;
+	/* Rx Polarity inversion (compensate for ~tx on partner) */
+	u8 ipath_rx_pol_inv;
 
 	/* local link integrity counter */
 	u32 ipath_lli_counter;
@@ -523,18 +516,6 @@ extern struct list_head ipath_dev_list;
 extern spinlock_t ipath_devs_lock;
 extern struct ipath_devdata *ipath_lookup(int unit);
 
-extern u16 ipath_layer_rcv_opcode;
-extern int __ipath_layer_intr(struct ipath_devdata *, u32);
-extern int ipath_layer_intr(struct ipath_devdata *, u32);
-extern int __ipath_layer_rcv(struct ipath_devdata *, void *,
-			     struct sk_buff *);
-extern int __ipath_layer_rcv_lid(struct ipath_devdata *, void *);
-extern int __ipath_verbs_piobufavail(struct ipath_devdata *);
-extern int __ipath_verbs_rcv(struct ipath_devdata *, void *, void *, u32);
-
-void ipath_layer_add(struct ipath_devdata *);
-void ipath_layer_remove(struct ipath_devdata *);
-
 int ipath_init_chip(struct ipath_devdata *, int);
 int ipath_enable_wc(struct ipath_devdata *dd);
 void ipath_disable_wc(struct ipath_devdata *dd);
@@ -549,9 +530,8 @@ void ipath_cdev_cleanup(struct cdev **cdevp,
 
 int ipath_diag_add(struct ipath_devdata *);
 void ipath_diag_remove(struct ipath_devdata *);
-void ipath_diag_bringup_link(struct ipath_devdata *);
 
-extern wait_queue_head_t ipath_sma_state_wait;
+extern wait_queue_head_t ipath_state_wait;
 
 int ipath_user_add(struct ipath_devdata *dd);
 void ipath_user_remove(struct ipath_devdata *dd);
@@ -582,12 +562,14 @@ void ipath_free_pddata(struct ipath_devdata *, struct ipath_portdata *);
 
 int ipath_parse_ushort(const char *str, unsigned short *valp);
 
-int ipath_wait_linkstate(struct ipath_devdata *, u32, int);
-void ipath_set_ib_lstate(struct ipath_devdata *, int);
 void ipath_kreceive(struct ipath_devdata *);
 int ipath_setrcvhdrsize(struct ipath_devdata *, unsigned);
 int ipath_reset_device(int);
 void ipath_get_faststats(unsigned long);
+int ipath_set_linkstate(struct ipath_devdata *, u8);
+int ipath_set_mtu(struct ipath_devdata *, u16);
+int ipath_set_lid(struct ipath_devdata *, u32, u8);
+int ipath_set_rx_pol_inv(struct ipath_devdata *dd, u8 new_pol_inv);
 
 /* for use in system calls, where we want to know device type, etc. */
 #define port_fp(fp) ((struct ipath_portdata *) (fp)->private_data)
@@ -642,10 +624,8 @@ void ipath_free_data(struct ipath_portdata *dd);
 int ipath_waitfor_mdio_cmdready(struct ipath_devdata *);
 int ipath_waitfor_complete(struct ipath_devdata *, ipath_kreg, u64, u64 *);
 u32 __iomem *ipath_getpiobuf(struct ipath_devdata *, u32 *);
-/* init PE-800-specific func */
-void ipath_init_pe800_funcs(struct ipath_devdata *);
-/* init HT-400-specific func */
-void ipath_init_ht400_funcs(struct ipath_devdata *);
+void ipath_init_iba6120_funcs(struct ipath_devdata *);
+void ipath_init_iba6110_funcs(struct ipath_devdata *);
 void ipath_get_eeprom_info(struct ipath_devdata *);
 u64 ipath_snap_cntr(struct ipath_devdata *, ipath_creg);
 
@@ -801,7 +781,7 @@ static inline u32 ipath_read_creg32(const struct ipath_devdata *dd,
 
 struct device_driver;
 
-extern const char ipath_core_version[];
+extern const char ib_ipath_version[];
 
 int ipath_driver_create_group(struct device_driver *);
 void ipath_driver_remove_group(struct device_driver *);
@@ -810,6 +790,9 @@ int ipath_device_create_group(struct device *, struct ipath_devdata *);
 void ipath_device_remove_group(struct device *, struct ipath_devdata *);
 int ipath_expose_reset(struct device *);
 
+int ipath_diagpkt_add(void);
+void ipath_diagpkt_remove(void);
+
 int ipath_init_ipathfs(void);
 void ipath_exit_ipathfs(void);
 int ipathfs_add_device(struct ipath_devdata *);
@@ -831,10 +814,10 @@ const char *ipath_get_unit_name(int unit);
 
 extern struct mutex ipath_mutex;
 
-#define IPATH_DRV_NAME		"ipath_core"
+#define IPATH_DRV_NAME		"ib_ipath"
 #define IPATH_MAJOR		233
 #define IPATH_USER_MINOR_BASE	0
-#define IPATH_SMA_MINOR		128
+#define IPATH_DIAGPKT_MINOR	127
 #define IPATH_DIAG_MINOR_BASE	129
 #define IPATH_NMINORS		255
 
diff --git a/drivers/infiniband/hw/ipath/ipath_keys.c b/drivers/infiniband/hw/ipath/ipath_keys.c
index a5ca279370a..ba1b93226ca 100644
--- a/drivers/infiniband/hw/ipath/ipath_keys.c
+++ b/drivers/infiniband/hw/ipath/ipath_keys.c
@@ -34,6 +34,7 @@
 #include <asm/io.h>
 
 #include "ipath_verbs.h"
+#include "ipath_kernel.h"
 
 /**
  * ipath_alloc_lkey - allocate an lkey
@@ -60,7 +61,7 @@ int ipath_alloc_lkey(struct ipath_lkey_table *rkt, struct ipath_mregion *mr)
 		r = (r + 1) & (rkt->max - 1);
 		if (r == n) {
 			spin_unlock_irqrestore(&rkt->lock, flags);
-			_VERBS_INFO("LKEY table full\n");
+			ipath_dbg(KERN_INFO "LKEY table full\n");
 			ret = 0;
 			goto bail;
 		}
diff --git a/drivers/infiniband/hw/ipath/ipath_layer.c b/drivers/infiniband/hw/ipath/ipath_layer.c
index b28c6f81c73..e46aa4ed2a7 100644
--- a/drivers/infiniband/hw/ipath/ipath_layer.c
+++ b/drivers/infiniband/hw/ipath/ipath_layer.c
@@ -42,26 +42,20 @@
 
 #include "ipath_kernel.h"
 #include "ipath_layer.h"
+#include "ipath_verbs.h"
 #include "ipath_common.h"
 
 /* Acquire before ipath_devs_lock. */
 static DEFINE_MUTEX(ipath_layer_mutex);
 
-static int ipath_verbs_registered;
-
 u16 ipath_layer_rcv_opcode;
 
 static int (*layer_intr)(void *, u32);
 static int (*layer_rcv)(void *, void *, struct sk_buff *);
 static int (*layer_rcv_lid)(void *, void *);
-static int (*verbs_piobufavail)(void *);
-static void (*verbs_rcv)(void *, void *, void *, u32);
 
 static void *(*layer_add_one)(int, struct ipath_devdata *);
 static void (*layer_remove_one)(void *);
-static void *(*verbs_add_one)(int, struct ipath_devdata *);
-static void (*verbs_remove_one)(void *);
-static void (*verbs_timer_cb)(void *);
 
 int __ipath_layer_intr(struct ipath_devdata *dd, u32 arg)
 {
@@ -107,302 +101,16 @@ int __ipath_layer_rcv_lid(struct ipath_devdata *dd, void *hdr)
 	return ret;
 }
 
-int __ipath_verbs_piobufavail(struct ipath_devdata *dd)
-{
-	int ret = -ENODEV;
-
-	if (dd->verbs_layer.l_arg && verbs_piobufavail)
-		ret = verbs_piobufavail(dd->verbs_layer.l_arg);
-
-	return ret;
-}
-
-int __ipath_verbs_rcv(struct ipath_devdata *dd, void *rc, void *ebuf,
-		      u32 tlen)
-{
-	int ret = -ENODEV;
-
-	if (dd->verbs_layer.l_arg && verbs_rcv) {
-		verbs_rcv(dd->verbs_layer.l_arg, rc, ebuf, tlen);
-		ret = 0;
-	}
-
-	return ret;
-}
-
-int ipath_layer_set_linkstate(struct ipath_devdata *dd, u8 newstate)
+void ipath_layer_lid_changed(struct ipath_devdata *dd)
 {
-	u32 lstate;
-	int ret;
-
-	switch (newstate) {
-	case IPATH_IB_LINKDOWN:
-		ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKINITCMD_POLL <<
-				    INFINIPATH_IBCC_LINKINITCMD_SHIFT);
-		/* don't wait */
-		ret = 0;
-		goto bail;
-
-	case IPATH_IB_LINKDOWN_SLEEP:
-		ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKINITCMD_SLEEP <<
-				    INFINIPATH_IBCC_LINKINITCMD_SHIFT);
-		/* don't wait */
-		ret = 0;
-		goto bail;
-
-	case IPATH_IB_LINKDOWN_DISABLE:
-		ipath_set_ib_lstate(dd,
-				    INFINIPATH_IBCC_LINKINITCMD_DISABLE <<
-				    INFINIPATH_IBCC_LINKINITCMD_SHIFT);
-		/* don't wait */
-		ret = 0;
-		goto bail;
-
-	case IPATH_IB_LINKINIT:
-		if (dd->ipath_flags & IPATH_LINKINIT) {
-			ret = 0;
-			goto bail;
-		}
-		ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKCMD_INIT <<
-				    INFINIPATH_IBCC_LINKCMD_SHIFT);
-		lstate = IPATH_LINKINIT;
-		break;
-
-	case IPATH_IB_LINKARM:
-		if (dd->ipath_flags & IPATH_LINKARMED) {
-			ret = 0;
-			goto bail;
-		}
-		if (!(dd->ipath_flags &
-		      (IPATH_LINKINIT | IPATH_LINKACTIVE))) {
-			ret = -EINVAL;
-			goto bail;
-		}
-		ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKCMD_ARMED <<
-				    INFINIPATH_IBCC_LINKCMD_SHIFT);
-		/*
-		 * Since the port can transition to ACTIVE by receiving
-		 * a non VL 15 packet, wait for either state.
-		 */
-		lstate = IPATH_LINKARMED | IPATH_LINKACTIVE;
-		break;
-
-	case IPATH_IB_LINKACTIVE:
-		if (dd->ipath_flags & IPATH_LINKACTIVE) {
-			ret = 0;
-			goto bail;
-		}
-		if (!(dd->ipath_flags & IPATH_LINKARMED)) {
-			ret = -EINVAL;
-			goto bail;
-		}
-		ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKCMD_ACTIVE <<
-				    INFINIPATH_IBCC_LINKCMD_SHIFT);
-		lstate = IPATH_LINKACTIVE;
-		break;
-
-	default:
-		ipath_dbg("Invalid linkstate 0x%x requested\n", newstate);
-		ret = -EINVAL;
-		goto bail;
-	}
-	ret = ipath_wait_linkstate(dd, lstate, 2000);
-
-bail:
-	return ret;
-}
-
-EXPORT_SYMBOL_GPL(ipath_layer_set_linkstate);
-
-/**
- * ipath_layer_set_mtu - set the MTU
- * @dd: the infinipath device
- * @arg: the new MTU
- *
- * we can handle "any" incoming size, the issue here is whether we
- * need to restrict our outgoing size.   For now, we don't do any
- * sanity checking on this, and we don't deal with what happens to
- * programs that are already running when the size changes.
- * NOTE: changing the MTU will usually cause the IBC to go back to
- * link initialize (IPATH_IBSTATE_INIT) state...
- */
-int ipath_layer_set_mtu(struct ipath_devdata *dd, u16 arg)
-{
-	u32 piosize;
-	int changed = 0;
-	int ret;
-
-	/*
-	 * mtu is IB data payload max.  It's the largest power of 2 less
-	 * than piosize (or even larger, since it only really controls the
-	 * largest we can receive; we can send the max of the mtu and
-	 * piosize).  We check that it's one of the valid IB sizes.
-	 */
-	if (arg != 256 && arg != 512 && arg != 1024 && arg != 2048 &&
-	    arg != 4096) {
-		ipath_dbg("Trying to set invalid mtu %u, failing\n", arg);
-		ret = -EINVAL;
-		goto bail;
-	}
-	if (dd->ipath_ibmtu == arg) {
-		ret = 0;	/* same as current */
-		goto bail;
-	}
-
-	piosize = dd->ipath_ibmaxlen;
-	dd->ipath_ibmtu = arg;
-
-	if (arg >= (piosize - IPATH_PIO_MAXIBHDR)) {
-		/* Only if it's not the initial value (or reset to it) */
-		if (piosize != dd->ipath_init_ibmaxlen) {
-			dd->ipath_ibmaxlen = piosize;
-			changed = 1;
-		}
-	} else if ((arg + IPATH_PIO_MAXIBHDR) != dd->ipath_ibmaxlen) {
-		piosize = arg + IPATH_PIO_MAXIBHDR;
-		ipath_cdbg(VERBOSE, "ibmaxlen was 0x%x, setting to 0x%x "
-			   "(mtu 0x%x)\n", dd->ipath_ibmaxlen, piosize,
-			   arg);
-		dd->ipath_ibmaxlen = piosize;
-		changed = 1;
-	}
-
-	if (changed) {
-		/*
-		 * set the IBC maxpktlength to the size of our pio
-		 * buffers in words
-		 */
-		u64 ibc = dd->ipath_ibcctrl;
-		ibc &= ~(INFINIPATH_IBCC_MAXPKTLEN_MASK <<
-			 INFINIPATH_IBCC_MAXPKTLEN_SHIFT);
-
-		piosize = piosize - 2 * sizeof(u32);	/* ignore pbc */
-		dd->ipath_ibmaxlen = piosize;
-		piosize /= sizeof(u32);	/* in words */
-		/*
-		 * for ICRC, which we only send in diag test pkt mode, and
-		 * we don't need to worry about that for mtu
-		 */
-		piosize += 1;
-
-		ibc |= piosize << INFINIPATH_IBCC_MAXPKTLEN_SHIFT;
-		dd->ipath_ibcctrl = ibc;
-		ipath_write_kreg(dd, dd->ipath_kregs->kr_ibcctrl,
-				 dd->ipath_ibcctrl);
-		dd->ipath_f_tidtemplate(dd);
-	}
-
-	ret = 0;
-
-bail:
-	return ret;
-}
-
-EXPORT_SYMBOL_GPL(ipath_layer_set_mtu);
-
-int ipath_set_lid(struct ipath_devdata *dd, u32 arg, u8 lmc)
-{
-	dd->ipath_lid = arg;
-	dd->ipath_lmc = lmc;
-
 	mutex_lock(&ipath_layer_mutex);
 
 	if (dd->ipath_layer.l_arg && layer_intr)
 		layer_intr(dd->ipath_layer.l_arg, IPATH_LAYER_INT_LID);
 
 	mutex_unlock(&ipath_layer_mutex);
-
-	return 0;
-}
-
-EXPORT_SYMBOL_GPL(ipath_set_lid);
-
-int ipath_layer_set_guid(struct ipath_devdata *dd, __be64 guid)
-{
-	/* XXX - need to inform anyone who cares this just happened. */
-	dd->ipath_guid = guid;
-	return 0;
-}
-
-EXPORT_SYMBOL_GPL(ipath_layer_set_guid);
-
-__be64 ipath_layer_get_guid(struct ipath_devdata *dd)
-{
-	return dd->ipath_guid;
-}
-
-EXPORT_SYMBOL_GPL(ipath_layer_get_guid);
-
-u32 ipath_layer_get_nguid(struct ipath_devdata *dd)
-{
-	return dd->ipath_nguid;
-}
-
-EXPORT_SYMBOL_GPL(ipath_layer_get_nguid);
-
-u32 ipath_layer_get_majrev(struct ipath_devdata *dd)
-{
-	return dd->ipath_majrev;
 }
 
-EXPORT_SYMBOL_GPL(ipath_layer_get_majrev);
-
-u32 ipath_layer_get_minrev(struct ipath_devdata *dd)
-{
-	return dd->ipath_minrev;
-}
-
-EXPORT_SYMBOL_GPL(ipath_layer_get_minrev);
-
-u32 ipath_layer_get_pcirev(struct ipath_devdata *dd)
-{
-	return dd->ipath_pcirev;
-}
-
-EXPORT_SYMBOL_GPL(ipath_layer_get_pcirev);
-
-u32 ipath_layer_get_flags(struct ipath_devdata *dd)
-{
-	return dd->ipath_flags;
-}
-
-EXPORT_SYMBOL_GPL(ipath_layer_get_flags);
-
-struct device *ipath_layer_get_device(struct ipath_devdata *dd)
-{
-	return &dd->pcidev->dev;
-}
-
-EXPORT_SYMBOL_GPL(ipath_layer_get_device);
-
-u16 ipath_layer_get_deviceid(struct ipath_devdata *dd)
-{
-	return dd->ipath_deviceid;
-}
-
-EXPORT_SYMBOL_GPL(ipath_layer_get_deviceid);
-
-u32 ipath_layer_get_vendorid(struct ipath_devdata *dd)
-{
-	return dd->ipath_vendorid;
-}
-
-EXPORT_SYMBOL_GPL(ipath_layer_get_vendorid);
-
-u64 ipath_layer_get_lastibcstat(struct ipath_devdata *dd)
-{
-	return dd->ipath_lastibcstat;
-}
-
-EXPORT_SYMBOL_GPL(ipath_layer_get_lastibcstat);
-
-u32 ipath_layer_get_ibmtu(struct ipath_devdata *dd)
-{
-	return dd->ipath_ibmtu;
-}
-
-EXPORT_SYMBOL_GPL(ipath_layer_get_ibmtu);
-
 void ipath_layer_add(struct ipath_devdata *dd)
 {
 	mutex_lock(&ipath_layer_mutex);
@@ -411,10 +119,6 @@ void ipath_layer_add(struct ipath_devdata *dd)
 		dd->ipath_layer.l_arg =
 			layer_add_one(dd->ipath_unit, dd);
 
-	if (verbs_add_one)
-		dd->verbs_layer.l_arg =
-			verbs_add_one(dd->ipath_unit, dd);
-
 	mutex_unlock(&ipath_layer_mutex);
 }
 
@@ -427,11 +131,6 @@ void ipath_layer_remove(struct ipath_devdata *dd)
 		dd->ipath_layer.l_arg = NULL;
 	}
 
-	if (dd->verbs_layer.l_arg && verbs_remove_one) {
-		verbs_remove_one(dd->verbs_layer.l_arg);
-		dd->verbs_layer.l_arg = NULL;
-	}
-
 	mutex_unlock(&ipath_layer_mutex);
 }
 
@@ -463,9 +162,6 @@ int ipath_layer_register(void *(*l_add)(int, struct ipath_devdata *),
 		if (dd->ipath_layer.l_arg)
 			continue;
 
-		if (!(*dd->ipath_statusp & IPATH_STATUS_SMA))
-			*dd->ipath_statusp |= IPATH_STATUS_OIB_SMA;
-
 		spin_unlock_irqrestore(&ipath_devs_lock, flags);
 		dd->ipath_layer.l_arg = l_add(dd->ipath_unit, dd);
 		spin_lock_irqsave(&ipath_devs_lock, flags);
@@ -509,107 +205,6 @@ void ipath_layer_unregister(void)
 
 EXPORT_SYMBOL_GPL(ipath_layer_unregister);
 
-static void __ipath_verbs_timer(unsigned long arg)
-{
-	struct ipath_devdata *dd = (struct ipath_devdata *) arg;
-
-	/*
-	 * If port 0 receive packet interrupts are not available, or
-	 * can be missed, poll the receive queue
-	 */
-	if (dd->ipath_flags & IPATH_POLL_RX_INTR)
-		ipath_kreceive(dd);
-
-	/* Handle verbs layer timeouts. */
-	if (dd->verbs_layer.l_arg && verbs_timer_cb)
-		verbs_timer_cb(dd->verbs_layer.l_arg);
-
-	mod_timer(&dd->verbs_layer.l_timer, jiffies + 1);
-}
-
-/**
- * ipath_verbs_register - verbs layer registration
- * @l_piobufavail: callback for when PIO buffers become available
- * @l_rcv: callback for receiving a packet
- * @l_timer_cb: timer callback
- * @ipath_devdata: device data structure is put here
- */
-int ipath_verbs_register(void *(*l_add)(int, struct ipath_devdata *),
-			 void (*l_remove)(void *arg),
-			 int (*l_piobufavail) (void *arg),
-			 void (*l_rcv) (void *arg, void *rhdr,
-					void *data, u32 tlen),
-			 void (*l_timer_cb) (void *arg))
-{
-	struct ipath_devdata *dd, *tmp;
-	unsigned long flags;
-
-	mutex_lock(&ipath_layer_mutex);
-
-	verbs_add_one = l_add;
-	verbs_remove_one = l_remove;
-	verbs_piobufavail = l_piobufavail;
-	verbs_rcv = l_rcv;
-	verbs_timer_cb = l_timer_cb;
-
-	spin_lock_irqsave(&ipath_devs_lock, flags);
-
-	list_for_each_entry_safe(dd, tmp, &ipath_dev_list, ipath_list) {
-		if (!(dd->ipath_flags & IPATH_INITTED))
-			continue;
-
-		if (dd->verbs_layer.l_arg)
-			continue;
-
-		spin_unlock_irqrestore(&ipath_devs_lock, flags);
-		dd->verbs_layer.l_arg = l_add(dd->ipath_unit, dd);
-		spin_lock_irqsave(&ipath_devs_lock, flags);
-	}
-
-	spin_unlock_irqrestore(&ipath_devs_lock, flags);
-	mutex_unlock(&ipath_layer_mutex);
-
-	ipath_verbs_registered = 1;
-
-	return 0;
-}
-
-EXPORT_SYMBOL_GPL(ipath_verbs_register);
-
-void ipath_verbs_unregister(void)
-{
-	struct ipath_devdata *dd, *tmp;
-	unsigned long flags;
-
-	mutex_lock(&ipath_layer_mutex);
-	spin_lock_irqsave(&ipath_devs_lock, flags);
-
-	list_for_each_entry_safe(dd, tmp, &ipath_dev_list, ipath_list) {
-		*dd->ipath_statusp &= ~IPATH_STATUS_OIB_SMA;
-
-		if (dd->verbs_layer.l_arg && verbs_remove_one) {
-			spin_unlock_irqrestore(&ipath_devs_lock, flags);
-			verbs_remove_one(dd->verbs_layer.l_arg);
-			spin_lock_irqsave(&ipath_devs_lock, flags);
-			dd->verbs_layer.l_arg = NULL;
-		}
-	}
-
-	spin_unlock_irqrestore(&ipath_devs_lock, flags);
-
-	verbs_add_one = NULL;
-	verbs_remove_one = NULL;
-	verbs_piobufavail = NULL;
-	verbs_rcv = NULL;
-	verbs_timer_cb = NULL;
-
-	ipath_verbs_registered = 0;
-
-	mutex_unlock(&ipath_layer_mutex);
-}
-
-EXPORT_SYMBOL_GPL(ipath_verbs_unregister);
-
 int ipath_layer_open(struct ipath_devdata *dd, u32 * pktmax)
 {
 	int ret;
@@ -698,390 +293,6 @@ u16 ipath_layer_get_bcast(struct ipath_devdata *dd)
 
 EXPORT_SYMBOL_GPL(ipath_layer_get_bcast);
 
-u32 ipath_layer_get_cr_errpkey(struct ipath_devdata *dd)
-{
-	return ipath_read_creg32(dd, dd->ipath_cregs->cr_errpkey);
-}
-
-EXPORT_SYMBOL_GPL(ipath_layer_get_cr_errpkey);
-
-static void update_sge(struct ipath_sge_state *ss, u32 length)
-{
-	struct ipath_sge *sge = &ss->sge;
-
-	sge->vaddr += length;
-	sge->length -= length;
-	sge->sge_length -= length;
-	if (sge->sge_length == 0) {
-		if (--ss->num_sge)
-			*sge = *ss->sg_list++;
-	} else if (sge->length == 0 && sge->mr != NULL) {
-		if (++sge->n >= IPATH_SEGSZ) {
-			if (++sge->m >= sge->mr->mapsz)
-				return;
-			sge->n = 0;
-		}
-		sge->vaddr = sge->mr->map[sge->m]->segs[sge->n].vaddr;
-		sge->length = sge->mr->map[sge->m]->segs[sge->n].length;
-	}
-}
-
-#ifdef __LITTLE_ENDIAN
-static inline u32 get_upper_bits(u32 data, u32 shift)
-{
-	return data >> shift;
-}
-
-static inline u32 set_upper_bits(u32 data, u32 shift)
-{
-	return data << shift;
-}
-
-static inline u32 clear_upper_bytes(u32 data, u32 n, u32 off)
-{
-	data <<= ((sizeof(u32) - n) * BITS_PER_BYTE);
-	data >>= ((sizeof(u32) - n - off) * BITS_PER_BYTE);
-	return data;
-}
-#else
-static inline u32 get_upper_bits(u32 data, u32 shift)
-{
-	return data << shift;
-}
-
-static inline u32 set_upper_bits(u32 data, u32 shift)
-{
-	return data >> shift;
-}
-
-static inline u32 clear_upper_bytes(u32 data, u32 n, u32 off)
-{
-	data >>= ((sizeof(u32) - n) * BITS_PER_BYTE);
-	data <<= ((sizeof(u32) - n - off) * BITS_PER_BYTE);
-	return data;
-}
-#endif
-
-static void copy_io(u32 __iomem *piobuf, struct ipath_sge_state *ss,
-		    u32 length)
-{
-	u32 extra = 0;
-	u32 data = 0;
-	u32 last;
-
-	while (1) {
-		u32 len = ss->sge.length;
-		u32 off;
-
-		BUG_ON(len == 0);
-		if (len > length)
-			len = length;
-		if (len > ss->sge.sge_length)
-			len = ss->sge.sge_length;
-		/* If the source address is not aligned, try to align it. */
-		off = (unsigned long)ss->sge.vaddr & (sizeof(u32) - 1);
-		if (off) {
-			u32 *addr = (u32 *)((unsigned long)ss->sge.vaddr &
-					    ~(sizeof(u32) - 1));
-			u32 v = get_upper_bits(*addr, off * BITS_PER_BYTE);
-			u32 y;
-
-			y = sizeof(u32) - off;
-			if (len > y)
-				len = y;
-			if (len + extra >= sizeof(u32)) {
-				data |= set_upper_bits(v, extra *
-						       BITS_PER_BYTE);
-				len = sizeof(u32) - extra;
-				if (len == length) {
-					last = data;
-					break;
-				}
-				__raw_writel(data, piobuf);
-				piobuf++;
-				extra = 0;
-				data = 0;
-			} else {
-				/* Clear unused upper bytes */
-				data |= clear_upper_bytes(v, len, extra);
-				if (len == length) {
-					last = data;
-					break;
-				}
-				extra += len;
-			}
-		} else if (extra) {
-			/* Source address is aligned. */
-			u32 *addr = (u32 *) ss->sge.vaddr;
-			int shift = extra * BITS_PER_BYTE;
-			int ushift = 32 - shift;
-			u32 l = len;
-
-			while (l >= sizeof(u32)) {
-				u32 v = *addr;
-
-				data |= set_upper_bits(v, shift);
-				__raw_writel(data, piobuf);
-				data = get_upper_bits(v, ushift);
-				piobuf++;
-				addr++;
-				l -= sizeof(u32);
-			}
-			/*
-			 * We still have 'extra' number of bytes leftover.
-			 */
-			if (l) {
-				u32 v = *addr;
-
-				if (l + extra >= sizeof(u32)) {
-					data |= set_upper_bits(v, shift);
-					len -= l + extra - sizeof(u32);
-					if (len == length) {
-						last = data;
-						break;
-					}
-					__raw_writel(data, piobuf);
-					piobuf++;
-					extra = 0;
-					data = 0;
-				} else {
-					/* Clear unused upper bytes */
-					data |= clear_upper_bytes(v, l,
-								  extra);
-					if (len == length) {
-						last = data;
-						break;
-					}
-					extra += l;
-				}
-			} else if (len == length) {
-				last = data;
-				break;
-			}
-		} else if (len == length) {
-			u32 w;
-
-			/*
-			 * Need to round up for the last dword in the
-			 * packet.
-			 */
-			w = (len + 3) >> 2;
-			__iowrite32_copy(piobuf, ss->sge.vaddr, w - 1);
-			piobuf += w - 1;
-			last = ((u32 *) ss->sge.vaddr)[w - 1];
-			break;
-		} else {
-			u32 w = len >> 2;
-
-			__iowrite32_copy(piobuf, ss->sge.vaddr, w);
-			piobuf += w;
-
-			extra = len & (sizeof(u32) - 1);
-			if (extra) {
-				u32 v = ((u32 *) ss->sge.vaddr)[w];
-
-				/* Clear unused upper bytes */
-				data = clear_upper_bytes(v, extra, 0);
-			}
-		}
-		update_sge(ss, len);
-		length -= len;
-	}
-	/* Update address before sending packet. */
-	update_sge(ss, length);
-	/* must flush early everything before trigger word */
-	ipath_flush_wc();
-	__raw_writel(last, piobuf);
-	/* be sure trigger word is written */
-	ipath_flush_wc();
-}
-
-/**
- * ipath_verbs_send - send a packet from the verbs layer
- * @dd: the infinipath device
- * @hdrwords: the number of words in the header
- * @hdr: the packet header
- * @len: the length of the packet in bytes
- * @ss: the SGE to send
- *
- * This is like ipath_sma_send_pkt() in that we need to be able to send
- * packets after the chip is initialized (MADs) but also like
- * ipath_layer_send_hdr() since its used by the verbs layer.
- */
-int ipath_verbs_send(struct ipath_devdata *dd, u32 hdrwords,
-		     u32 *hdr, u32 len, struct ipath_sge_state *ss)
-{
-	u32 __iomem *piobuf;
-	u32 plen;
-	int ret;
-
-	/* +1 is for the qword padding of pbc */
-	plen = hdrwords + ((len + 3) >> 2) + 1;
-	if (unlikely((plen << 2) > dd->ipath_ibmaxlen)) {
-		ipath_dbg("packet len 0x%x too long, failing\n", plen);
-		ret = -EINVAL;
-		goto bail;
-	}
-
-	/* Get a PIO buffer to use. */
-	piobuf = ipath_getpiobuf(dd, NULL);
-	if (unlikely(piobuf == NULL)) {
-		ret = -EBUSY;
-		goto bail;
-	}
-
-	/*
-	 * Write len to control qword, no flags.
-	 * We have to flush after the PBC for correctness on some cpus
-	 * or WC buffer can be written out of order.
-	 */
-	writeq(plen, piobuf);
-	ipath_flush_wc();
-	piobuf += 2;
-	if (len == 0) {
-		/*
-		 * If there is just the header portion, must flush before
-		 * writing last word of header for correctness, and after
-		 * the last header word (trigger word).
-		 */
-		__iowrite32_copy(piobuf, hdr, hdrwords - 1);
-		ipath_flush_wc();
-		__raw_writel(hdr[hdrwords - 1], piobuf + hdrwords - 1);
-		ipath_flush_wc();
-		ret = 0;
-		goto bail;
-	}
-
-	__iowrite32_copy(piobuf, hdr, hdrwords);
-	piobuf += hdrwords;
-
-	/* The common case is aligned and contained in one segment. */
-	if (likely(ss->num_sge == 1 && len <= ss->sge.length &&
-		   !((unsigned long)ss->sge.vaddr & (sizeof(u32) - 1)))) {
-		u32 w;
-		u32 *addr = (u32 *) ss->sge.vaddr;
-
-		/* Update address before sending packet. */
-		update_sge(ss, len);
-		/* Need to round up for the last dword in the packet. */
-		w = (len + 3) >> 2;
-		__iowrite32_copy(piobuf, addr, w - 1);
-		/* must flush early everything before trigger word */
-		ipath_flush_wc();
-		__raw_writel(addr[w - 1], piobuf + w - 1);
-		/* be sure trigger word is written */
-		ipath_flush_wc();
-		ret = 0;
-		goto bail;
-	}
-	copy_io(piobuf, ss, len);
-	ret = 0;
-
-bail:
-	return ret;
-}
-
-EXPORT_SYMBOL_GPL(ipath_verbs_send);
-
-int ipath_layer_snapshot_counters(struct ipath_devdata *dd, u64 *swords,
-				  u64 *rwords, u64 *spkts, u64 *rpkts,
-				  u64 *xmit_wait)
-{
-	int ret;
-
-	if (!(dd->ipath_flags & IPATH_INITTED)) {
-		/* no hardware, freeze, etc. */
-		ipath_dbg("unit %u not usable\n", dd->ipath_unit);
-		ret = -EINVAL;
-		goto bail;
-	}
-	*swords = ipath_snap_cntr(dd, dd->ipath_cregs->cr_wordsendcnt);
-	*rwords = ipath_snap_cntr(dd, dd->ipath_cregs->cr_wordrcvcnt);
-	*spkts = ipath_snap_cntr(dd, dd->ipath_cregs->cr_pktsendcnt);
-	*rpkts = ipath_snap_cntr(dd, dd->ipath_cregs->cr_pktrcvcnt);
-	*xmit_wait = ipath_snap_cntr(dd, dd->ipath_cregs->cr_sendstallcnt);
-
-	ret = 0;
-
-bail:
-	return ret;
-}
-
-EXPORT_SYMBOL_GPL(ipath_layer_snapshot_counters);
-
-/**
- * ipath_layer_get_counters - get various chip counters
- * @dd: the infinipath device
- * @cntrs: counters are placed here
- *
- * Return the counters needed by recv_pma_get_portcounters().
- */
-int ipath_layer_get_counters(struct ipath_devdata *dd,
-			      struct ipath_layer_counters *cntrs)
-{
-	int ret;
-
-	if (!(dd->ipath_flags & IPATH_INITTED)) {
-		/* no hardware, freeze, etc. */
-		ipath_dbg("unit %u not usable\n", dd->ipath_unit);
-		ret = -EINVAL;
-		goto bail;
-	}
-	cntrs->symbol_error_counter =
-		ipath_snap_cntr(dd, dd->ipath_cregs->cr_ibsymbolerrcnt);
-	cntrs->link_error_recovery_counter =
-		ipath_snap_cntr(dd, dd->ipath_cregs->cr_iblinkerrrecovcnt);
-	/*
-	 * The link downed counter counts when the other side downs the
-	 * connection.  We add in the number of times we downed the link
-	 * due to local link integrity errors to compensate.
-	 */
-	cntrs->link_downed_counter =
-		ipath_snap_cntr(dd, dd->ipath_cregs->cr_iblinkdowncnt);
-	cntrs->port_rcv_errors =
-		ipath_snap_cntr(dd, dd->ipath_cregs->cr_rxdroppktcnt) +
-		ipath_snap_cntr(dd, dd->ipath_cregs->cr_rcvovflcnt) +
-		ipath_snap_cntr(dd, dd->ipath_cregs->cr_portovflcnt) +
-		ipath_snap_cntr(dd, dd->ipath_cregs->cr_err_rlencnt) +
-		ipath_snap_cntr(dd, dd->ipath_cregs->cr_invalidrlencnt) +
-		ipath_snap_cntr(dd, dd->ipath_cregs->cr_erricrccnt) +
-		ipath_snap_cntr(dd, dd->ipath_cregs->cr_errvcrccnt) +
-		ipath_snap_cntr(dd, dd->ipath_cregs->cr_errlpcrccnt) +
-		ipath_snap_cntr(dd, dd->ipath_cregs->cr_badformatcnt);
-	cntrs->port_rcv_remphys_errors =
-		ipath_snap_cntr(dd, dd->ipath_cregs->cr_rcvebpcnt);
-	cntrs->port_xmit_discards =
-		ipath_snap_cntr(dd, dd->ipath_cregs->cr_unsupvlcnt);
-	cntrs->port_xmit_data =
-		ipath_snap_cntr(dd, dd->ipath_cregs->cr_wordsendcnt);
-	cntrs->port_rcv_data =
-		ipath_snap_cntr(dd, dd->ipath_cregs->cr_wordrcvcnt);
-	cntrs->port_xmit_packets =
-		ipath_snap_cntr(dd, dd->ipath_cregs->cr_pktsendcnt);
-	cntrs->port_rcv_packets =
-		ipath_snap_cntr(dd, dd->ipath_cregs->cr_pktrcvcnt);
-	cntrs->local_link_integrity_errors = dd->ipath_lli_errors;
-	cntrs->excessive_buffer_overrun_errors = 0; /* XXX */
-
-	ret = 0;
-
-bail:
-	return ret;
-}
-
-EXPORT_SYMBOL_GPL(ipath_layer_get_counters);
-
-int ipath_layer_want_buffer(struct ipath_devdata *dd)
-{
-	set_bit(IPATH_S_PIOINTBUFAVAIL, &dd->ipath_sendctrl);
-	ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
-			 dd->ipath_sendctrl);
-
-	return 0;
-}
-
-EXPORT_SYMBOL_GPL(ipath_layer_want_buffer);
-
 int ipath_layer_send_hdr(struct ipath_devdata *dd, struct ether_header *hdr)
 {
 	int ret = 0;
@@ -1153,389 +364,3 @@ int ipath_layer_set_piointbufavail_int(struct ipath_devdata *dd)
 }
 
 EXPORT_SYMBOL_GPL(ipath_layer_set_piointbufavail_int);
-
-int ipath_layer_enable_timer(struct ipath_devdata *dd)
-{
-	/*
-	 * HT-400 has a design flaw where the chip and kernel idea
-	 * of the tail register don't always agree, and therefore we won't
-	 * get an interrupt on the next packet received.
-	 * If the board supports per packet receive interrupts, use it.
-	 * Otherwise, the timer function periodically checks for packets
-	 * to cover this case.
-	 * Either way, the timer is needed for verbs layer related
-	 * processing.
-	 */
-	if (dd->ipath_flags & IPATH_GPIO_INTR) {
-		ipath_write_kreg(dd, dd->ipath_kregs->kr_debugportselect,
-				 0x2074076542310ULL);
-		/* Enable GPIO bit 2 interrupt */
-		ipath_write_kreg(dd, dd->ipath_kregs->kr_gpio_mask,
-				 (u64) (1 << 2));
-	}
-
-	init_timer(&dd->verbs_layer.l_timer);
-	dd->verbs_layer.l_timer.function = __ipath_verbs_timer;
-	dd->verbs_layer.l_timer.data = (unsigned long)dd;
-	dd->verbs_layer.l_timer.expires = jiffies + 1;
-	add_timer(&dd->verbs_layer.l_timer);
-
-	return 0;
-}
-
-EXPORT_SYMBOL_GPL(ipath_layer_enable_timer);
-
-int ipath_layer_disable_timer(struct ipath_devdata *dd)
-{
-	/* Disable GPIO bit 2 interrupt */
-	if (dd->ipath_flags & IPATH_GPIO_INTR)
-		ipath_write_kreg(dd, dd->ipath_kregs->kr_gpio_mask, 0);
-
-	del_timer_sync(&dd->verbs_layer.l_timer);
-
-	return 0;
-}
-
-EXPORT_SYMBOL_GPL(ipath_layer_disable_timer);
-
-/**
- * ipath_layer_set_verbs_flags - set the verbs layer flags
- * @dd: the infinipath device
- * @flags: the flags to set
- */
-int ipath_layer_set_verbs_flags(struct ipath_devdata *dd, unsigned flags)
-{
-	struct ipath_devdata *ss;
-	unsigned long lflags;
-
-	spin_lock_irqsave(&ipath_devs_lock, lflags);
-
-	list_for_each_entry(ss, &ipath_dev_list, ipath_list) {
-		if (!(ss->ipath_flags & IPATH_INITTED))
-			continue;
-		if ((flags & IPATH_VERBS_KERNEL_SMA) &&
-		    !(*ss->ipath_statusp & IPATH_STATUS_SMA))
-			*ss->ipath_statusp |= IPATH_STATUS_OIB_SMA;
-		else
-			*ss->ipath_statusp &= ~IPATH_STATUS_OIB_SMA;
-	}
-
-	spin_unlock_irqrestore(&ipath_devs_lock, lflags);
-
-	return 0;
-}
-
-EXPORT_SYMBOL_GPL(ipath_layer_set_verbs_flags);
-
-/**
- * ipath_layer_get_npkeys - return the size of the PKEY table for port 0
- * @dd: the infinipath device
- */
-unsigned ipath_layer_get_npkeys(struct ipath_devdata *dd)
-{
-	return ARRAY_SIZE(dd->ipath_pd[0]->port_pkeys);
-}
-
-EXPORT_SYMBOL_GPL(ipath_layer_get_npkeys);
-
-/**
- * ipath_layer_get_pkey - return the indexed PKEY from the port 0 PKEY table
- * @dd: the infinipath device
- * @index: the PKEY index
- */
-unsigned ipath_layer_get_pkey(struct ipath_devdata *dd, unsigned index)
-{
-	unsigned ret;
-
-	if (index >= ARRAY_SIZE(dd->ipath_pd[0]->port_pkeys))
-		ret = 0;
-	else
-		ret = dd->ipath_pd[0]->port_pkeys[index];
-
-	return ret;
-}
-
-EXPORT_SYMBOL_GPL(ipath_layer_get_pkey);
-
-/**
- * ipath_layer_get_pkeys - return the PKEY table for port 0
- * @dd: the infinipath device
- * @pkeys: the pkey table is placed here
- */
-int ipath_layer_get_pkeys(struct ipath_devdata *dd, u16 * pkeys)
-{
-	struct ipath_portdata *pd = dd->ipath_pd[0];
-
-	memcpy(pkeys, pd->port_pkeys, sizeof(pd->port_pkeys));
-
-	return 0;
-}
-
-EXPORT_SYMBOL_GPL(ipath_layer_get_pkeys);
-
-/**
- * rm_pkey - decrecment the reference count for the given PKEY
- * @dd: the infinipath device
- * @key: the PKEY index
- *
- * Return true if this was the last reference and the hardware table entry
- * needs to be changed.
- */
-static int rm_pkey(struct ipath_devdata *dd, u16 key)
-{
-	int i;
-	int ret;
-
-	for (i = 0; i < ARRAY_SIZE(dd->ipath_pkeys); i++) {
-		if (dd->ipath_pkeys[i] != key)
-			continue;
-		if (atomic_dec_and_test(&dd->ipath_pkeyrefs[i])) {
-			dd->ipath_pkeys[i] = 0;
-			ret = 1;
-			goto bail;
-		}
-		break;
-	}
-
-	ret = 0;
-
-bail:
-	return ret;
-}
-
-/**
- * add_pkey - add the given PKEY to the hardware table
- * @dd: the infinipath device
- * @key: the PKEY
- *
- * Return an error code if unable to add the entry, zero if no change,
- * or 1 if the hardware PKEY register needs to be updated.
- */
-static int add_pkey(struct ipath_devdata *dd, u16 key)
-{
-	int i;
-	u16 lkey = key & 0x7FFF;
-	int any = 0;
-	int ret;
-
-	if (lkey == 0x7FFF) {
-		ret = 0;
-		goto bail;
-	}
-
-	/* Look for an empty slot or a matching PKEY. */
-	for (i = 0; i < ARRAY_SIZE(dd->ipath_pkeys); i++) {
-		if (!dd->ipath_pkeys[i]) {
-			any++;
-			continue;
-		}
-		/* If it matches exactly, try to increment the ref count */
-		if (dd->ipath_pkeys[i] == key) {
-			if (atomic_inc_return(&dd->ipath_pkeyrefs[i]) > 1) {
-				ret = 0;
-				goto bail;
-			}
-			/* Lost the race. Look for an empty slot below. */
-			atomic_dec(&dd->ipath_pkeyrefs[i]);
-			any++;
-		}
-		/*
-		 * It makes no sense to have both the limited and unlimited
-		 * PKEY set at the same time since the unlimited one will
-		 * disable the limited one.
-		 */
-		if ((dd->ipath_pkeys[i] & 0x7FFF) == lkey) {
-			ret = -EEXIST;
-			goto bail;
-		}
-	}
-	if (!any) {
-		ret = -EBUSY;
-		goto bail;
-	}
-	for (i = 0; i < ARRAY_SIZE(dd->ipath_pkeys); i++) {
-		if (!dd->ipath_pkeys[i] &&
-		    atomic_inc_return(&dd->ipath_pkeyrefs[i]) == 1) {
-			/* for ipathstats, etc. */
-			ipath_stats.sps_pkeys[i] = lkey;
-			dd->ipath_pkeys[i] = key;
-			ret = 1;
-			goto bail;
-		}
-	}
-	ret = -EBUSY;
-
-bail:
-	return ret;
-}
-
-/**
- * ipath_layer_set_pkeys - set the PKEY table for port 0
- * @dd: the infinipath device
- * @pkeys: the PKEY table
- */
-int ipath_layer_set_pkeys(struct ipath_devdata *dd, u16 * pkeys)
-{
-	struct ipath_portdata *pd;
-	int i;
-	int changed = 0;
-
-	pd = dd->ipath_pd[0];
-
-	for (i = 0; i < ARRAY_SIZE(pd->port_pkeys); i++) {
-		u16 key = pkeys[i];
-		u16 okey = pd->port_pkeys[i];
-
-		if (key == okey)
-			continue;
-		/*
-		 * The value of this PKEY table entry is changing.
-		 * Remove the old entry in the hardware's array of PKEYs.
-		 */
-		if (okey & 0x7FFF)
-			changed |= rm_pkey(dd, okey);
-		if (key & 0x7FFF) {
-			int ret = add_pkey(dd, key);
-
-			if (ret < 0)
-				key = 0;
-			else
-				changed |= ret;
-		}
-		pd->port_pkeys[i] = key;
-	}
-	if (changed) {
-		u64 pkey;
-
-		pkey = (u64) dd->ipath_pkeys[0] |
-			((u64) dd->ipath_pkeys[1] << 16) |
-			((u64) dd->ipath_pkeys[2] << 32) |
-			((u64) dd->ipath_pkeys[3] << 48);
-		ipath_cdbg(VERBOSE, "p0 new pkey reg %llx\n",
-			   (unsigned long long) pkey);
-		ipath_write_kreg(dd, dd->ipath_kregs->kr_partitionkey,
-				 pkey);
-	}
-	return 0;
-}
-
-EXPORT_SYMBOL_GPL(ipath_layer_set_pkeys);
-
-/**
- * ipath_layer_get_linkdowndefaultstate - get the default linkdown state
- * @dd: the infinipath device
- *
- * Returns zero if the default is POLL, 1 if the default is SLEEP.
- */
-int ipath_layer_get_linkdowndefaultstate(struct ipath_devdata *dd)
-{
-	return !!(dd->ipath_ibcctrl & INFINIPATH_IBCC_LINKDOWNDEFAULTSTATE);
-}
-
-EXPORT_SYMBOL_GPL(ipath_layer_get_linkdowndefaultstate);
-
-/**
- * ipath_layer_set_linkdowndefaultstate - set the default linkdown state
- * @dd: the infinipath device
- * @sleep: the new state
- *
- * Note that this will only take effect when the link state changes.
- */
-int ipath_layer_set_linkdowndefaultstate(struct ipath_devdata *dd,
-					 int sleep)
-{
-	if (sleep)
-		dd->ipath_ibcctrl |= INFINIPATH_IBCC_LINKDOWNDEFAULTSTATE;
-	else
-		dd->ipath_ibcctrl &= ~INFINIPATH_IBCC_LINKDOWNDEFAULTSTATE;
-	ipath_write_kreg(dd, dd->ipath_kregs->kr_ibcctrl,
-			 dd->ipath_ibcctrl);
-	return 0;
-}
-
-EXPORT_SYMBOL_GPL(ipath_layer_set_linkdowndefaultstate);
-
-int ipath_layer_get_phyerrthreshold(struct ipath_devdata *dd)
-{
-	return (dd->ipath_ibcctrl >>
-		INFINIPATH_IBCC_PHYERRTHRESHOLD_SHIFT) &
-		INFINIPATH_IBCC_PHYERRTHRESHOLD_MASK;
-}
-
-EXPORT_SYMBOL_GPL(ipath_layer_get_phyerrthreshold);
-
-/**
- * ipath_layer_set_phyerrthreshold - set the physical error threshold
- * @dd: the infinipath device
- * @n: the new threshold
- *
- * Note that this will only take effect when the link state changes.
- */
-int ipath_layer_set_phyerrthreshold(struct ipath_devdata *dd, unsigned n)
-{
-	unsigned v;
-
-	v = (dd->ipath_ibcctrl >> INFINIPATH_IBCC_PHYERRTHRESHOLD_SHIFT) &
-		INFINIPATH_IBCC_PHYERRTHRESHOLD_MASK;
-	if (v != n) {
-		dd->ipath_ibcctrl &=
-			~(INFINIPATH_IBCC_PHYERRTHRESHOLD_MASK <<
-			  INFINIPATH_IBCC_PHYERRTHRESHOLD_SHIFT);
-		dd->ipath_ibcctrl |=
-			(u64) n << INFINIPATH_IBCC_PHYERRTHRESHOLD_SHIFT;
-		ipath_write_kreg(dd, dd->ipath_kregs->kr_ibcctrl,
-				 dd->ipath_ibcctrl);
-	}
-	return 0;
-}
-
-EXPORT_SYMBOL_GPL(ipath_layer_set_phyerrthreshold);
-
-int ipath_layer_get_overrunthreshold(struct ipath_devdata *dd)
-{
-	return (dd->ipath_ibcctrl >>
-		INFINIPATH_IBCC_OVERRUNTHRESHOLD_SHIFT) &
-		INFINIPATH_IBCC_OVERRUNTHRESHOLD_MASK;
-}
-
-EXPORT_SYMBOL_GPL(ipath_layer_get_overrunthreshold);
-
-/**
- * ipath_layer_set_overrunthreshold - set the overrun threshold
- * @dd: the infinipath device
- * @n: the new threshold
- *
- * Note that this will only take effect when the link state changes.
- */
-int ipath_layer_set_overrunthreshold(struct ipath_devdata *dd, unsigned n)
-{
-	unsigned v;
-
-	v = (dd->ipath_ibcctrl >> INFINIPATH_IBCC_OVERRUNTHRESHOLD_SHIFT) &
-		INFINIPATH_IBCC_OVERRUNTHRESHOLD_MASK;
-	if (v != n) {
-		dd->ipath_ibcctrl &=
-			~(INFINIPATH_IBCC_OVERRUNTHRESHOLD_MASK <<
-			  INFINIPATH_IBCC_OVERRUNTHRESHOLD_SHIFT);
-		dd->ipath_ibcctrl |=
-			(u64) n << INFINIPATH_IBCC_OVERRUNTHRESHOLD_SHIFT;
-		ipath_write_kreg(dd, dd->ipath_kregs->kr_ibcctrl,
-				 dd->ipath_ibcctrl);
-	}
-	return 0;
-}
-
-EXPORT_SYMBOL_GPL(ipath_layer_set_overrunthreshold);
-
-int ipath_layer_get_boardname(struct ipath_devdata *dd, char *name,
-			      size_t namelen)
-{
-	return dd->ipath_f_get_boardname(dd, name, namelen);
-}
-EXPORT_SYMBOL_GPL(ipath_layer_get_boardname);
-
-u32 ipath_layer_get_rcvhdrentsize(struct ipath_devdata *dd)
-{
-	return dd->ipath_rcvhdrentsize;
-}
-EXPORT_SYMBOL_GPL(ipath_layer_get_rcvhdrentsize);
diff --git a/drivers/infiniband/hw/ipath/ipath_layer.h b/drivers/infiniband/hw/ipath/ipath_layer.h
index 71485096fca..3854a4eae68 100644
--- a/drivers/infiniband/hw/ipath/ipath_layer.h
+++ b/drivers/infiniband/hw/ipath/ipath_layer.h
@@ -40,73 +40,9 @@
  */
 
 struct sk_buff;
-struct ipath_sge_state;
 struct ipath_devdata;
 struct ether_header;
 
-struct ipath_layer_counters {
-	u64 symbol_error_counter;
-	u64 link_error_recovery_counter;
-	u64 link_downed_counter;
-	u64 port_rcv_errors;
-	u64 port_rcv_remphys_errors;
-	u64 port_xmit_discards;
-	u64 port_xmit_data;
-	u64 port_rcv_data;
-	u64 port_xmit_packets;
-	u64 port_rcv_packets;
-	u32 local_link_integrity_errors;
-	u32 excessive_buffer_overrun_errors;
-};
-
-/*
- * A segment is a linear region of low physical memory.
- * XXX Maybe we should use phys addr here and kmap()/kunmap().
- * Used by the verbs layer.
- */
-struct ipath_seg {
-	void *vaddr;
-	size_t length;
-};
-
-/* The number of ipath_segs that fit in a page. */
-#define IPATH_SEGSZ     (PAGE_SIZE / sizeof (struct ipath_seg))
-
-struct ipath_segarray {
-	struct ipath_seg segs[IPATH_SEGSZ];
-};
-
-struct ipath_mregion {
-	u64 user_base;		/* User's address for this region */
-	u64 iova;		/* IB start address of this region */
-	size_t length;
-	u32 lkey;
-	u32 offset;		/* offset (bytes) to start of region */
-	int access_flags;
-	u32 max_segs;		/* number of ipath_segs in all the arrays */
-	u32 mapsz;		/* size of the map array */
-	struct ipath_segarray *map[0];	/* the segments */
-};
-
-/*
- * These keep track of the copy progress within a memory region.
- * Used by the verbs layer.
- */
-struct ipath_sge {
-	struct ipath_mregion *mr;
-	void *vaddr;		/* current pointer into the segment */
-	u32 sge_length;		/* length of the SGE */
-	u32 length;		/* remaining length of the segment */
-	u16 m;			/* current index: mr->map[m] */
-	u16 n;			/* current index: mr->map[m]->segs[n] */
-};
-
-struct ipath_sge_state {
-	struct ipath_sge *sg_list;	/* next SGE to be used if any */
-	struct ipath_sge sge;	/* progress state for the current SGE */
-	u8 num_sge;
-};
-
 int ipath_layer_register(void *(*l_add)(int, struct ipath_devdata *),
 			 void (*l_remove)(void *),
 			 int (*l_intr)(void *, u32),
@@ -114,62 +50,14 @@ int ipath_layer_register(void *(*l_add)(int, struct ipath_devdata *),
 				      struct sk_buff *),
 			 u16 rcv_opcode,
 			 int (*l_rcv_lid)(void *, void *));
-int ipath_verbs_register(void *(*l_add)(int, struct ipath_devdata *),
-			 void (*l_remove)(void *arg),
-			 int (*l_piobufavail)(void *arg),
-			 void (*l_rcv)(void *arg, void *rhdr,
-				       void *data, u32 tlen),
-			 void (*l_timer_cb)(void *arg));
 void ipath_layer_unregister(void);
-void ipath_verbs_unregister(void);
 int ipath_layer_open(struct ipath_devdata *, u32 * pktmax);
 u16 ipath_layer_get_lid(struct ipath_devdata *dd);
 int ipath_layer_get_mac(struct ipath_devdata *dd, u8 *);
 u16 ipath_layer_get_bcast(struct ipath_devdata *dd);
-u32 ipath_layer_get_cr_errpkey(struct ipath_devdata *dd);
-int ipath_layer_set_linkstate(struct ipath_devdata *dd, u8 state);
-int ipath_layer_set_mtu(struct ipath_devdata *, u16);
-int ipath_set_lid(struct ipath_devdata *, u32, u8);
 int ipath_layer_send_hdr(struct ipath_devdata *dd,
 			 struct ether_header *hdr);
-int ipath_verbs_send(struct ipath_devdata *dd, u32 hdrwords,
-		     u32 * hdr, u32 len, struct ipath_sge_state *ss);
 int ipath_layer_set_piointbufavail_int(struct ipath_devdata *dd);
-int ipath_layer_get_boardname(struct ipath_devdata *dd, char *name,
-			      size_t namelen);
-int ipath_layer_snapshot_counters(struct ipath_devdata *dd, u64 *swords,
-				  u64 *rwords, u64 *spkts, u64 *rpkts,
-				  u64 *xmit_wait);
-int ipath_layer_get_counters(struct ipath_devdata *dd,
-			     struct ipath_layer_counters *cntrs);
-int ipath_layer_want_buffer(struct ipath_devdata *dd);
-int ipath_layer_set_guid(struct ipath_devdata *, __be64 guid);
-__be64 ipath_layer_get_guid(struct ipath_devdata *);
-u32 ipath_layer_get_nguid(struct ipath_devdata *);
-u32 ipath_layer_get_majrev(struct ipath_devdata *);
-u32 ipath_layer_get_minrev(struct ipath_devdata *);
-u32 ipath_layer_get_pcirev(struct ipath_devdata *);
-u32 ipath_layer_get_flags(struct ipath_devdata *dd);
-struct device *ipath_layer_get_device(struct ipath_devdata *dd);
-u16 ipath_layer_get_deviceid(struct ipath_devdata *dd);
-u32 ipath_layer_get_vendorid(struct ipath_devdata *);
-u64 ipath_layer_get_lastibcstat(struct ipath_devdata *dd);
-u32 ipath_layer_get_ibmtu(struct ipath_devdata *dd);
-int ipath_layer_enable_timer(struct ipath_devdata *dd);
-int ipath_layer_disable_timer(struct ipath_devdata *dd);
-int ipath_layer_set_verbs_flags(struct ipath_devdata *dd, unsigned flags);
-unsigned ipath_layer_get_npkeys(struct ipath_devdata *dd);
-unsigned ipath_layer_get_pkey(struct ipath_devdata *dd, unsigned index);
-int ipath_layer_get_pkeys(struct ipath_devdata *dd, u16 *pkeys);
-int ipath_layer_set_pkeys(struct ipath_devdata *dd, u16 *pkeys);
-int ipath_layer_get_linkdowndefaultstate(struct ipath_devdata *dd);
-int ipath_layer_set_linkdowndefaultstate(struct ipath_devdata *dd,
-					 int sleep);
-int ipath_layer_get_phyerrthreshold(struct ipath_devdata *dd);
-int ipath_layer_set_phyerrthreshold(struct ipath_devdata *dd, unsigned n);
-int ipath_layer_get_overrunthreshold(struct ipath_devdata *dd);
-int ipath_layer_set_overrunthreshold(struct ipath_devdata *dd, unsigned n);
-u32 ipath_layer_get_rcvhdrentsize(struct ipath_devdata *dd);
 
 /* ipath_ether interrupt values */
 #define IPATH_LAYER_INT_IF_UP 0x2
@@ -178,9 +66,6 @@ u32 ipath_layer_get_rcvhdrentsize(struct ipath_devdata *dd);
 #define IPATH_LAYER_INT_SEND_CONTINUE 0x10
 #define IPATH_LAYER_INT_BCAST 0x40
 
-/* _verbs_layer.l_flags */
-#define IPATH_VERBS_KERNEL_SMA 0x1
-
 extern unsigned ipath_debug; /* debugging bit mask */
 
 #endif				/* _IPATH_LAYER_H */
diff --git a/drivers/infiniband/hw/ipath/ipath_mad.c b/drivers/infiniband/hw/ipath/ipath_mad.c
index d3402341b7d..72d1db89db8 100644
--- a/drivers/infiniband/hw/ipath/ipath_mad.c
+++ b/drivers/infiniband/hw/ipath/ipath_mad.c
@@ -101,15 +101,15 @@ static int recv_subn_get_nodeinfo(struct ib_smp *smp,
 	nip->num_ports = ibdev->phys_port_cnt;
 	/* This is already in network order */
 	nip->sys_guid = to_idev(ibdev)->sys_image_guid;
-	nip->node_guid = ipath_layer_get_guid(dd);
+	nip->node_guid = dd->ipath_guid;
 	nip->port_guid = nip->sys_guid;
-	nip->partition_cap = cpu_to_be16(ipath_layer_get_npkeys(dd));
-	nip->device_id = cpu_to_be16(ipath_layer_get_deviceid(dd));
-	majrev = ipath_layer_get_majrev(dd);
-	minrev = ipath_layer_get_minrev(dd);
+	nip->partition_cap = cpu_to_be16(ipath_get_npkeys(dd));
+	nip->device_id = cpu_to_be16(dd->ipath_deviceid);
+	majrev = dd->ipath_majrev;
+	minrev = dd->ipath_minrev;
 	nip->revision = cpu_to_be32((majrev << 16) | minrev);
 	nip->local_port_num = port;
-	vendor = ipath_layer_get_vendorid(dd);
+	vendor = dd->ipath_vendorid;
 	nip->vendor_id[0] = 0;
 	nip->vendor_id[1] = vendor >> 8;
 	nip->vendor_id[2] = vendor;
@@ -133,13 +133,89 @@ static int recv_subn_get_guidinfo(struct ib_smp *smp,
 	 */
 	if (startgx == 0)
 		/* The first is a copy of the read-only HW GUID. */
-		*p = ipath_layer_get_guid(to_idev(ibdev)->dd);
+		*p = to_idev(ibdev)->dd->ipath_guid;
 	else
 		smp->status |= IB_SMP_INVALID_FIELD;
 
 	return reply(smp);
 }
 
+
+static int get_overrunthreshold(struct ipath_devdata *dd)
+{
+	return (dd->ipath_ibcctrl >>
+		INFINIPATH_IBCC_OVERRUNTHRESHOLD_SHIFT) &
+		INFINIPATH_IBCC_OVERRUNTHRESHOLD_MASK;
+}
+
+/**
+ * set_overrunthreshold - set the overrun threshold
+ * @dd: the infinipath device
+ * @n: the new threshold
+ *
+ * Note that this will only take effect when the link state changes.
+ */
+static int set_overrunthreshold(struct ipath_devdata *dd, unsigned n)
+{
+	unsigned v;
+
+	v = (dd->ipath_ibcctrl >> INFINIPATH_IBCC_OVERRUNTHRESHOLD_SHIFT) &
+		INFINIPATH_IBCC_OVERRUNTHRESHOLD_MASK;
+	if (v != n) {
+		dd->ipath_ibcctrl &=
+			~(INFINIPATH_IBCC_OVERRUNTHRESHOLD_MASK <<
+			  INFINIPATH_IBCC_OVERRUNTHRESHOLD_SHIFT);
+		dd->ipath_ibcctrl |=
+			(u64) n << INFINIPATH_IBCC_OVERRUNTHRESHOLD_SHIFT;
+		ipath_write_kreg(dd, dd->ipath_kregs->kr_ibcctrl,
+				 dd->ipath_ibcctrl);
+	}
+	return 0;
+}
+
+static int get_phyerrthreshold(struct ipath_devdata *dd)
+{
+	return (dd->ipath_ibcctrl >>
+		INFINIPATH_IBCC_PHYERRTHRESHOLD_SHIFT) &
+		INFINIPATH_IBCC_PHYERRTHRESHOLD_MASK;
+}
+
+/**
+ * set_phyerrthreshold - set the physical error threshold
+ * @dd: the infinipath device
+ * @n: the new threshold
+ *
+ * Note that this will only take effect when the link state changes.
+ */
+static int set_phyerrthreshold(struct ipath_devdata *dd, unsigned n)
+{
+	unsigned v;
+
+	v = (dd->ipath_ibcctrl >> INFINIPATH_IBCC_PHYERRTHRESHOLD_SHIFT) &
+		INFINIPATH_IBCC_PHYERRTHRESHOLD_MASK;
+	if (v != n) {
+		dd->ipath_ibcctrl &=
+			~(INFINIPATH_IBCC_PHYERRTHRESHOLD_MASK <<
+			  INFINIPATH_IBCC_PHYERRTHRESHOLD_SHIFT);
+		dd->ipath_ibcctrl |=
+			(u64) n << INFINIPATH_IBCC_PHYERRTHRESHOLD_SHIFT;
+		ipath_write_kreg(dd, dd->ipath_kregs->kr_ibcctrl,
+				 dd->ipath_ibcctrl);
+	}
+	return 0;
+}
+
+/**
+ * get_linkdowndefaultstate - get the default linkdown state
+ * @dd: the infinipath device
+ *
+ * Returns zero if the default is POLL, 1 if the default is SLEEP.
+ */
+static int get_linkdowndefaultstate(struct ipath_devdata *dd)
+{
+	return !!(dd->ipath_ibcctrl & INFINIPATH_IBCC_LINKDOWNDEFAULTSTATE);
+}
+
 static int recv_subn_get_portinfo(struct ib_smp *smp,
 				  struct ib_device *ibdev, u8 port)
 {
@@ -166,7 +242,7 @@ static int recv_subn_get_portinfo(struct ib_smp *smp,
 	    (dev->mkeyprot_resv_lmc >> 6) == 0)
 		pip->mkey = dev->mkey;
 	pip->gid_prefix = dev->gid_prefix;
-	lid = ipath_layer_get_lid(dev->dd);
+	lid = dev->dd->ipath_lid;
 	pip->lid = lid ? cpu_to_be16(lid) : IB_LID_PERMISSIVE;
 	pip->sm_lid = cpu_to_be16(dev->sm_lid);
 	pip->cap_mask = cpu_to_be32(dev->port_cap_flags);
@@ -177,14 +253,14 @@ static int recv_subn_get_portinfo(struct ib_smp *smp,
 	pip->link_width_supported = 3;	/* 1x or 4x */
 	pip->link_width_active = 2;	/* 4x */
 	pip->linkspeed_portstate = 0x10;	/* 2.5Gbps */
-	ibcstat = ipath_layer_get_lastibcstat(dev->dd);
+	ibcstat = dev->dd->ipath_lastibcstat;
 	pip->linkspeed_portstate |= ((ibcstat >> 4) & 0x3) + 1;
 	pip->portphysstate_linkdown =
 		(ipath_cvt_physportstate[ibcstat & 0xf] << 4) |
-		(ipath_layer_get_linkdowndefaultstate(dev->dd) ? 1 : 2);
+		(get_linkdowndefaultstate(dev->dd) ? 1 : 2);
 	pip->mkeyprot_resv_lmc = dev->mkeyprot_resv_lmc;
 	pip->linkspeedactive_enabled = 0x11;	/* 2.5Gbps, 2.5Gbps */
-	switch (ipath_layer_get_ibmtu(dev->dd)) {
+	switch (dev->dd->ipath_ibmtu) {
 	case 4096:
 		mtu = IB_MTU_4096;
 		break;
@@ -217,7 +293,7 @@ static int recv_subn_get_portinfo(struct ib_smp *smp,
 	pip->mkey_violations = cpu_to_be16(dev->mkey_violations);
 	/* P_KeyViolations are counted by hardware. */
 	pip->pkey_violations =
-		cpu_to_be16((ipath_layer_get_cr_errpkey(dev->dd) -
+		cpu_to_be16((ipath_get_cr_errpkey(dev->dd) -
 			     dev->z_pkey_violations) & 0xFFFF);
 	pip->qkey_violations = cpu_to_be16(dev->qkey_violations);
 	/* Only the hardware GUID is supported for now */
@@ -226,8 +302,8 @@ static int recv_subn_get_portinfo(struct ib_smp *smp,
 	/* 32.768 usec. response time (guessing) */
 	pip->resv_resptimevalue = 3;
 	pip->localphyerrors_overrunerrors =
-		(ipath_layer_get_phyerrthreshold(dev->dd) << 4) |
-		ipath_layer_get_overrunthreshold(dev->dd);
+		(get_phyerrthreshold(dev->dd) << 4) |
+		get_overrunthreshold(dev->dd);
 	/* pip->max_credit_hint; */
 	/* pip->link_roundtrip_latency[3]; */
 
@@ -237,6 +313,20 @@ bail:
 	return ret;
 }
 
+/**
+ * get_pkeys - return the PKEY table for port 0
+ * @dd: the infinipath device
+ * @pkeys: the pkey table is placed here
+ */
+static int get_pkeys(struct ipath_devdata *dd, u16 * pkeys)
+{
+	struct ipath_portdata *pd = dd->ipath_pd[0];
+
+	memcpy(pkeys, pd->port_pkeys, sizeof(pd->port_pkeys));
+
+	return 0;
+}
+
 static int recv_subn_get_pkeytable(struct ib_smp *smp,
 				   struct ib_device *ibdev)
 {
@@ -249,9 +339,9 @@ static int recv_subn_get_pkeytable(struct ib_smp *smp,
 	memset(smp->data, 0, sizeof(smp->data));
 	if (startpx == 0) {
 		struct ipath_ibdev *dev = to_idev(ibdev);
-		unsigned i, n = ipath_layer_get_npkeys(dev->dd);
+		unsigned i, n = ipath_get_npkeys(dev->dd);
 
-		ipath_layer_get_pkeys(dev->dd, p);
+		get_pkeys(dev->dd, p);
 
 		for (i = 0; i < n; i++)
 			q[i] = cpu_to_be16(p[i]);
@@ -269,6 +359,24 @@ static int recv_subn_set_guidinfo(struct ib_smp *smp,
 }
 
 /**
+ * set_linkdowndefaultstate - set the default linkdown state
+ * @dd: the infinipath device
+ * @sleep: the new state
+ *
+ * Note that this will only take effect when the link state changes.
+ */
+static int set_linkdowndefaultstate(struct ipath_devdata *dd, int sleep)
+{
+	if (sleep)
+		dd->ipath_ibcctrl |= INFINIPATH_IBCC_LINKDOWNDEFAULTSTATE;
+	else
+		dd->ipath_ibcctrl &= ~INFINIPATH_IBCC_LINKDOWNDEFAULTSTATE;
+	ipath_write_kreg(dd, dd->ipath_kregs->kr_ibcctrl,
+			 dd->ipath_ibcctrl);
+	return 0;
+}
+
+/**
  * recv_subn_set_portinfo - set port information
  * @smp: the incoming SM packet
  * @ibdev: the infiniband device
@@ -290,7 +398,7 @@ static int recv_subn_set_portinfo(struct ib_smp *smp,
 	u8 state;
 	u16 lstate;
 	u32 mtu;
-	int ret;
+	int ret, ore;
 
 	if (be32_to_cpu(smp->attr_mod) > ibdev->phys_port_cnt)
 		goto err;
@@ -304,7 +412,7 @@ static int recv_subn_set_portinfo(struct ib_smp *smp,
 	dev->mkey_lease_period = be16_to_cpu(pip->mkey_lease_period);
 
 	lid = be16_to_cpu(pip->lid);
-	if (lid != ipath_layer_get_lid(dev->dd)) {
+	if (lid != dev->dd->ipath_lid) {
 		/* Must be a valid unicast LID address. */
 		if (lid == 0 || lid >= IPATH_MULTICAST_LID_BASE)
 			goto err;
@@ -342,11 +450,11 @@ static int recv_subn_set_portinfo(struct ib_smp *smp,
 	case 0: /* NOP */
 		break;
 	case 1: /* SLEEP */
-		if (ipath_layer_set_linkdowndefaultstate(dev->dd, 1))
+		if (set_linkdowndefaultstate(dev->dd, 1))
 			goto err;
 		break;
 	case 2: /* POLL */
-		if (ipath_layer_set_linkdowndefaultstate(dev->dd, 0))
+		if (set_linkdowndefaultstate(dev->dd, 0))
 			goto err;
 		break;
 	default:
@@ -376,7 +484,7 @@ static int recv_subn_set_portinfo(struct ib_smp *smp,
 		/* XXX We have already partially updated our state! */
 		goto err;
 	}
-	ipath_layer_set_mtu(dev->dd, mtu);
+	ipath_set_mtu(dev->dd, mtu);
 
 	dev->sm_sl = pip->neighbormtu_mastersmsl & 0xF;
 
@@ -392,20 +500,16 @@ static int recv_subn_set_portinfo(struct ib_smp *smp,
 	 * later.
 	 */
 	if (pip->pkey_violations == 0)
-		dev->z_pkey_violations =
-			ipath_layer_get_cr_errpkey(dev->dd);
+		dev->z_pkey_violations = ipath_get_cr_errpkey(dev->dd);
 
 	if (pip->qkey_violations == 0)
 		dev->qkey_violations = 0;
 
-	if (ipath_layer_set_phyerrthreshold(
-		    dev->dd,
-		    (pip->localphyerrors_overrunerrors >> 4) & 0xF))
+	ore = pip->localphyerrors_overrunerrors;
+	if (set_phyerrthreshold(dev->dd, (ore >> 4) & 0xF))
 		goto err;
 
-	if (ipath_layer_set_overrunthreshold(
-		    dev->dd,
-		    (pip->localphyerrors_overrunerrors & 0xF)))
+	if (set_overrunthreshold(dev->dd, (ore & 0xF)))
 		goto err;
 
 	dev->subnet_timeout = pip->clientrereg_resv_subnetto & 0x1F;
@@ -423,7 +527,7 @@ static int recv_subn_set_portinfo(struct ib_smp *smp,
 	 * is down or is being set to down.
 	 */
 	state = pip->linkspeed_portstate & 0xF;
-	flags = ipath_layer_get_flags(dev->dd);
+	flags = dev->dd->ipath_flags;
 	lstate = (pip->portphysstate_linkdown >> 4) & 0xF;
 	if (lstate && !(state == IB_PORT_DOWN || state == IB_PORT_NOP))
 		goto err;
@@ -439,7 +543,7 @@ static int recv_subn_set_portinfo(struct ib_smp *smp,
 		/* FALLTHROUGH */
 	case IB_PORT_DOWN:
 		if (lstate == 0)
-			if (ipath_layer_get_linkdowndefaultstate(dev->dd))
+			if (get_linkdowndefaultstate(dev->dd))
 				lstate = IPATH_IB_LINKDOWN_SLEEP;
 			else
 				lstate = IPATH_IB_LINKDOWN;
@@ -451,7 +555,7 @@ static int recv_subn_set_portinfo(struct ib_smp *smp,
 			lstate = IPATH_IB_LINKDOWN_DISABLE;
 		else
 			goto err;
-		ipath_layer_set_linkstate(dev->dd, lstate);
+		ipath_set_linkstate(dev->dd, lstate);
 		if (flags & IPATH_LINKACTIVE) {
 			event.event = IB_EVENT_PORT_ERR;
 			ib_dispatch_event(&event);
@@ -460,7 +564,7 @@ static int recv_subn_set_portinfo(struct ib_smp *smp,
 	case IB_PORT_ARMED:
 		if (!(flags & (IPATH_LINKINIT | IPATH_LINKACTIVE)))
 			break;
-		ipath_layer_set_linkstate(dev->dd, IPATH_IB_LINKARM);
+		ipath_set_linkstate(dev->dd, IPATH_IB_LINKARM);
 		if (flags & IPATH_LINKACTIVE) {
 			event.event = IB_EVENT_PORT_ERR;
 			ib_dispatch_event(&event);
@@ -469,7 +573,7 @@ static int recv_subn_set_portinfo(struct ib_smp *smp,
 	case IB_PORT_ACTIVE:
 		if (!(flags & IPATH_LINKARMED))
 			break;
-		ipath_layer_set_linkstate(dev->dd, IPATH_IB_LINKACTIVE);
+		ipath_set_linkstate(dev->dd, IPATH_IB_LINKACTIVE);
 		event.event = IB_EVENT_PORT_ACTIVE;
 		ib_dispatch_event(&event);
 		break;
@@ -493,6 +597,152 @@ done:
 	return ret;
 }
 
+/**
+ * rm_pkey - decrecment the reference count for the given PKEY
+ * @dd: the infinipath device
+ * @key: the PKEY index
+ *
+ * Return true if this was the last reference and the hardware table entry
+ * needs to be changed.
+ */
+static int rm_pkey(struct ipath_devdata *dd, u16 key)
+{
+	int i;
+	int ret;
+
+	for (i = 0; i < ARRAY_SIZE(dd->ipath_pkeys); i++) {
+		if (dd->ipath_pkeys[i] != key)
+			continue;
+		if (atomic_dec_and_test(&dd->ipath_pkeyrefs[i])) {
+			dd->ipath_pkeys[i] = 0;
+			ret = 1;
+			goto bail;
+		}
+		break;
+	}
+
+	ret = 0;
+
+bail:
+	return ret;
+}
+
+/**
+ * add_pkey - add the given PKEY to the hardware table
+ * @dd: the infinipath device
+ * @key: the PKEY
+ *
+ * Return an error code if unable to add the entry, zero if no change,
+ * or 1 if the hardware PKEY register needs to be updated.
+ */
+static int add_pkey(struct ipath_devdata *dd, u16 key)
+{
+	int i;
+	u16 lkey = key & 0x7FFF;
+	int any = 0;
+	int ret;
+
+	if (lkey == 0x7FFF) {
+		ret = 0;
+		goto bail;
+	}
+
+	/* Look for an empty slot or a matching PKEY. */
+	for (i = 0; i < ARRAY_SIZE(dd->ipath_pkeys); i++) {
+		if (!dd->ipath_pkeys[i]) {
+			any++;
+			continue;
+		}
+		/* If it matches exactly, try to increment the ref count */
+		if (dd->ipath_pkeys[i] == key) {
+			if (atomic_inc_return(&dd->ipath_pkeyrefs[i]) > 1) {
+				ret = 0;
+				goto bail;
+			}
+			/* Lost the race. Look for an empty slot below. */
+			atomic_dec(&dd->ipath_pkeyrefs[i]);
+			any++;
+		}
+		/*
+		 * It makes no sense to have both the limited and unlimited
+		 * PKEY set at the same time since the unlimited one will
+		 * disable the limited one.
+		 */
+		if ((dd->ipath_pkeys[i] & 0x7FFF) == lkey) {
+			ret = -EEXIST;
+			goto bail;
+		}
+	}
+	if (!any) {
+		ret = -EBUSY;
+		goto bail;
+	}
+	for (i = 0; i < ARRAY_SIZE(dd->ipath_pkeys); i++) {
+		if (!dd->ipath_pkeys[i] &&
+		    atomic_inc_return(&dd->ipath_pkeyrefs[i]) == 1) {
+			/* for ipathstats, etc. */
+			ipath_stats.sps_pkeys[i] = lkey;
+			dd->ipath_pkeys[i] = key;
+			ret = 1;
+			goto bail;
+		}
+	}
+	ret = -EBUSY;
+
+bail:
+	return ret;
+}
+
+/**
+ * set_pkeys - set the PKEY table for port 0
+ * @dd: the infinipath device
+ * @pkeys: the PKEY table
+ */
+static int set_pkeys(struct ipath_devdata *dd, u16 *pkeys)
+{
+	struct ipath_portdata *pd;
+	int i;
+	int changed = 0;
+
+	pd = dd->ipath_pd[0];
+
+	for (i = 0; i < ARRAY_SIZE(pd->port_pkeys); i++) {
+		u16 key = pkeys[i];
+		u16 okey = pd->port_pkeys[i];
+
+		if (key == okey)
+			continue;
+		/*
+		 * The value of this PKEY table entry is changing.
+		 * Remove the old entry in the hardware's array of PKEYs.
+		 */
+		if (okey & 0x7FFF)
+			changed |= rm_pkey(dd, okey);
+		if (key & 0x7FFF) {
+			int ret = add_pkey(dd, key);
+
+			if (ret < 0)
+				key = 0;
+			else
+				changed |= ret;
+		}
+		pd->port_pkeys[i] = key;
+	}
+	if (changed) {
+		u64 pkey;
+
+		pkey = (u64) dd->ipath_pkeys[0] |
+			((u64) dd->ipath_pkeys[1] << 16) |
+			((u64) dd->ipath_pkeys[2] << 32) |
+			((u64) dd->ipath_pkeys[3] << 48);
+		ipath_cdbg(VERBOSE, "p0 new pkey reg %llx\n",
+			   (unsigned long long) pkey);
+		ipath_write_kreg(dd, dd->ipath_kregs->kr_partitionkey,
+				 pkey);
+	}
+	return 0;
+}
+
 static int recv_subn_set_pkeytable(struct ib_smp *smp,
 				   struct ib_device *ibdev)
 {
@@ -500,13 +750,12 @@ static int recv_subn_set_pkeytable(struct ib_smp *smp,
 	__be16 *p = (__be16 *) smp->data;
 	u16 *q = (u16 *) smp->data;
 	struct ipath_ibdev *dev = to_idev(ibdev);
-	unsigned i, n = ipath_layer_get_npkeys(dev->dd);
+	unsigned i, n = ipath_get_npkeys(dev->dd);
 
 	for (i = 0; i < n; i++)
 		q[i] = be16_to_cpu(p[i]);
 
-	if (startpx != 0 ||
-	    ipath_layer_set_pkeys(dev->dd, q) != 0)
+	if (startpx != 0 || set_pkeys(dev->dd, q) != 0)
 		smp->status |= IB_SMP_INVALID_FIELD;
 
 	return recv_subn_get_pkeytable(smp, ibdev);
@@ -844,10 +1093,10 @@ static int recv_pma_get_portcounters(struct ib_perf *pmp,
 	struct ib_pma_portcounters *p = (struct ib_pma_portcounters *)
 		pmp->data;
 	struct ipath_ibdev *dev = to_idev(ibdev);
-	struct ipath_layer_counters cntrs;
+	struct ipath_verbs_counters cntrs;
 	u8 port_select = p->port_select;
 
-	ipath_layer_get_counters(dev->dd, &cntrs);
+	ipath_get_counters(dev->dd, &cntrs);
 
 	/* Adjust counters for any resets done. */
 	cntrs.symbol_error_counter -= dev->z_symbol_error_counter;
@@ -944,8 +1193,8 @@ static int recv_pma_get_portcounters_ext(struct ib_perf *pmp,
 	u64 swords, rwords, spkts, rpkts, xwait;
 	u8 port_select = p->port_select;
 
-	ipath_layer_snapshot_counters(dev->dd, &swords, &rwords, &spkts,
-				      &rpkts, &xwait);
+	ipath_snapshot_counters(dev->dd, &swords, &rwords, &spkts,
+				&rpkts, &xwait);
 
 	/* Adjust counters for any resets done. */
 	swords -= dev->z_port_xmit_data;
@@ -978,13 +1227,13 @@ static int recv_pma_set_portcounters(struct ib_perf *pmp,
 	struct ib_pma_portcounters *p = (struct ib_pma_portcounters *)
 		pmp->data;
 	struct ipath_ibdev *dev = to_idev(ibdev);
-	struct ipath_layer_counters cntrs;
+	struct ipath_verbs_counters cntrs;
 
 	/*
 	 * Since the HW doesn't support clearing counters, we save the
 	 * current count and subtract it from future responses.
 	 */
-	ipath_layer_get_counters(dev->dd, &cntrs);
+	ipath_get_counters(dev->dd, &cntrs);
 
 	if (p->counter_select & IB_PMA_SEL_SYMBOL_ERROR)
 		dev->z_symbol_error_counter = cntrs.symbol_error_counter;
@@ -1041,8 +1290,8 @@ static int recv_pma_set_portcounters_ext(struct ib_perf *pmp,
 	struct ipath_ibdev *dev = to_idev(ibdev);
 	u64 swords, rwords, spkts, rpkts, xwait;
 
-	ipath_layer_snapshot_counters(dev->dd, &swords, &rwords, &spkts,
-				      &rpkts, &xwait);
+	ipath_snapshot_counters(dev->dd, &swords, &rwords, &spkts,
+				&rpkts, &xwait);
 
 	if (p->counter_select & IB_PMA_SELX_PORT_XMIT_DATA)
 		dev->z_port_xmit_data = swords;
diff --git a/drivers/infiniband/hw/ipath/ipath_mmap.c b/drivers/infiniband/hw/ipath/ipath_mmap.c
new file mode 100644
index 00000000000..11b7378ff21
--- /dev/null
+++ b/drivers/infiniband/hw/ipath/ipath_mmap.c
@@ -0,0 +1,122 @@
+/*
+ * Copyright (c) 2006 QLogic, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/vmalloc.h>
+#include <linux/mm.h>
+#include <linux/errno.h>
+#include <asm/pgtable.h>
+
+#include "ipath_verbs.h"
+
+/**
+ * ipath_release_mmap_info - free mmap info structure
+ * @ref: a pointer to the kref within struct ipath_mmap_info
+ */
+void ipath_release_mmap_info(struct kref *ref)
+{
+	struct ipath_mmap_info *ip =
+		container_of(ref, struct ipath_mmap_info, ref);
+
+	vfree(ip->obj);
+	kfree(ip);
+}
+
+/*
+ * open and close keep track of how many times the CQ is mapped,
+ * to avoid releasing it.
+ */
+static void ipath_vma_open(struct vm_area_struct *vma)
+{
+	struct ipath_mmap_info *ip = vma->vm_private_data;
+
+	kref_get(&ip->ref);
+	ip->mmap_cnt++;
+}
+
+static void ipath_vma_close(struct vm_area_struct *vma)
+{
+	struct ipath_mmap_info *ip = vma->vm_private_data;
+
+	ip->mmap_cnt--;
+	kref_put(&ip->ref, ipath_release_mmap_info);
+}
+
+static struct vm_operations_struct ipath_vm_ops = {
+	.open =     ipath_vma_open,
+	.close =    ipath_vma_close,
+};
+
+/**
+ * ipath_mmap - create a new mmap region
+ * @context: the IB user context of the process making the mmap() call
+ * @vma: the VMA to be initialized
+ * Return zero if the mmap is OK. Otherwise, return an errno.
+ */
+int ipath_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
+{
+	struct ipath_ibdev *dev = to_idev(context->device);
+	unsigned long offset = vma->vm_pgoff << PAGE_SHIFT;
+	unsigned long size = vma->vm_end - vma->vm_start;
+	struct ipath_mmap_info *ip, **pp;
+	int ret = -EINVAL;
+
+	/*
+	 * Search the device's list of objects waiting for a mmap call.
+	 * Normally, this list is very short since a call to create a
+	 * CQ, QP, or SRQ is soon followed by a call to mmap().
+	 */
+	spin_lock_irq(&dev->pending_lock);
+	for (pp = &dev->pending_mmaps; (ip = *pp); pp = &ip->next) {
+		/* Only the creator is allowed to mmap the object */
+		if (context != ip->context || (void *) offset != ip->obj)
+			continue;
+		/* Don't allow a mmap larger than the object. */
+		if (size > ip->size)
+			break;
+
+		*pp = ip->next;
+		spin_unlock_irq(&dev->pending_lock);
+
+		ret = remap_vmalloc_range(vma, ip->obj, 0);
+		if (ret)
+			goto done;
+		vma->vm_ops = &ipath_vm_ops;
+		vma->vm_private_data = ip;
+		ipath_vma_open(vma);
+		goto done;
+	}
+	spin_unlock_irq(&dev->pending_lock);
+done:
+	return ret;
+}
diff --git a/drivers/infiniband/hw/ipath/ipath_mr.c b/drivers/infiniband/hw/ipath/ipath_mr.c
index 4ac31a5da33..b36f6fb3e37 100644
--- a/drivers/infiniband/hw/ipath/ipath_mr.c
+++ b/drivers/infiniband/hw/ipath/ipath_mr.c
@@ -36,6 +36,18 @@
 
 #include "ipath_verbs.h"
 
+/* Fast memory region */
+struct ipath_fmr {
+	struct ib_fmr ibfmr;
+	u8 page_shift;
+	struct ipath_mregion mr;        /* must be last */
+};
+
+static inline struct ipath_fmr *to_ifmr(struct ib_fmr *ibfmr)
+{
+	return container_of(ibfmr, struct ipath_fmr, ibfmr);
+}
+
 /**
  * ipath_get_dma_mr - get a DMA memory region
  * @pd: protection domain for this memory region
diff --git a/drivers/infiniband/hw/ipath/ipath_qp.c b/drivers/infiniband/hw/ipath/ipath_qp.c
index 83e557be591..224b0f40767 100644
--- a/drivers/infiniband/hw/ipath/ipath_qp.c
+++ b/drivers/infiniband/hw/ipath/ipath_qp.c
@@ -35,7 +35,7 @@
 #include <linux/vmalloc.h>
 
 #include "ipath_verbs.h"
-#include "ipath_common.h"
+#include "ipath_kernel.h"
 
 #define BITS_PER_PAGE		(PAGE_SIZE*BITS_PER_BYTE)
 #define BITS_PER_PAGE_MASK	(BITS_PER_PAGE-1)
@@ -44,19 +44,6 @@
 #define find_next_offset(map, off) find_next_zero_bit((map)->page, \
 						      BITS_PER_PAGE, off)
 
-#define TRANS_INVALID	0
-#define TRANS_ANY2RST	1
-#define TRANS_RST2INIT	2
-#define TRANS_INIT2INIT	3
-#define TRANS_INIT2RTR	4
-#define TRANS_RTR2RTS	5
-#define TRANS_RTS2RTS	6
-#define TRANS_SQERR2RTS	7
-#define TRANS_ANY2ERR	8
-#define TRANS_RTS2SQD	9  /* XXX Wait for expected ACKs & signal event */
-#define TRANS_SQD2SQD	10 /* error if not drained & parameter change */
-#define TRANS_SQD2RTS	11 /* error if not drained */
-
 /*
  * Convert the AETH credit code into the number of credits.
  */
@@ -287,7 +274,7 @@ void ipath_free_all_qps(struct ipath_qp_table *qpt)
 				free_qpn(qpt, qp->ibqp.qp_num);
 			if (!atomic_dec_and_test(&qp->refcount) ||
 			    !ipath_destroy_qp(&qp->ibqp))
-				_VERBS_INFO("QP memory leak!\n");
+				ipath_dbg(KERN_INFO "QP memory leak!\n");
 			qp = nqp;
 		}
 	}
@@ -355,8 +342,10 @@ static void ipath_reset_qp(struct ipath_qp *qp)
 	qp->s_last = 0;
 	qp->s_ssn = 1;
 	qp->s_lsn = 0;
-	qp->r_rq.head = 0;
-	qp->r_rq.tail = 0;
+	if (qp->r_rq.wq) {
+		qp->r_rq.wq->head = 0;
+		qp->r_rq.wq->tail = 0;
+	}
 	qp->r_reuse_sge = 0;
 }
 
@@ -373,8 +362,8 @@ void ipath_error_qp(struct ipath_qp *qp)
 	struct ipath_ibdev *dev = to_idev(qp->ibqp.device);
 	struct ib_wc wc;
 
-	_VERBS_INFO("QP%d/%d in error state\n",
-		    qp->ibqp.qp_num, qp->remote_qpn);
+	ipath_dbg(KERN_INFO "QP%d/%d in error state\n",
+		  qp->ibqp.qp_num, qp->remote_qpn);
 
 	spin_lock(&dev->pending_lock);
 	/* XXX What if its already removed by the timeout code? */
@@ -410,15 +399,32 @@ void ipath_error_qp(struct ipath_qp *qp)
 	qp->s_hdrwords = 0;
 	qp->s_ack_state = IB_OPCODE_RC_ACKNOWLEDGE;
 
-	wc.opcode = IB_WC_RECV;
-	spin_lock(&qp->r_rq.lock);
-	while (qp->r_rq.tail != qp->r_rq.head) {
-		wc.wr_id = get_rwqe_ptr(&qp->r_rq, qp->r_rq.tail)->wr_id;
-		if (++qp->r_rq.tail >= qp->r_rq.size)
-			qp->r_rq.tail = 0;
-		ipath_cq_enter(to_icq(qp->ibqp.recv_cq), &wc, 1);
+	if (qp->r_rq.wq) {
+		struct ipath_rwq *wq;
+		u32 head;
+		u32 tail;
+
+		spin_lock(&qp->r_rq.lock);
+
+		/* sanity check pointers before trusting them */
+		wq = qp->r_rq.wq;
+		head = wq->head;
+		if (head >= qp->r_rq.size)
+			head = 0;
+		tail = wq->tail;
+		if (tail >= qp->r_rq.size)
+			tail = 0;
+		wc.opcode = IB_WC_RECV;
+		while (tail != head) {
+			wc.wr_id = get_rwqe_ptr(&qp->r_rq, tail)->wr_id;
+			if (++tail >= qp->r_rq.size)
+				tail = 0;
+			ipath_cq_enter(to_icq(qp->ibqp.recv_cq), &wc, 1);
+		}
+		wq->tail = tail;
+
+		spin_unlock(&qp->r_rq.lock);
 	}
-	spin_unlock(&qp->r_rq.lock);
 }
 
 /**
@@ -426,11 +432,12 @@ void ipath_error_qp(struct ipath_qp *qp)
  * @ibqp: the queue pair who's attributes we're modifying
  * @attr: the new attributes
  * @attr_mask: the mask of attributes to modify
+ * @udata: user data for ipathverbs.so
  *
  * Returns 0 on success, otherwise returns an errno.
  */
 int ipath_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
-		    int attr_mask)
+		    int attr_mask, struct ib_udata *udata)
 {
 	struct ipath_ibdev *dev = to_idev(ibqp->device);
 	struct ipath_qp *qp = to_iqp(ibqp);
@@ -448,19 +455,46 @@ int ipath_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
 				attr_mask))
 		goto inval;
 
-	if (attr_mask & IB_QP_AV)
+	if (attr_mask & IB_QP_AV) {
 		if (attr->ah_attr.dlid == 0 ||
 		    attr->ah_attr.dlid >= IPATH_MULTICAST_LID_BASE)
 			goto inval;
 
+		if ((attr->ah_attr.ah_flags & IB_AH_GRH) &&
+		    (attr->ah_attr.grh.sgid_index > 1))
+			goto inval;
+	}
+
 	if (attr_mask & IB_QP_PKEY_INDEX)
-		if (attr->pkey_index >= ipath_layer_get_npkeys(dev->dd))
+		if (attr->pkey_index >= ipath_get_npkeys(dev->dd))
 			goto inval;
 
 	if (attr_mask & IB_QP_MIN_RNR_TIMER)
 		if (attr->min_rnr_timer > 31)
 			goto inval;
 
+	if (attr_mask & IB_QP_PORT)
+		if (attr->port_num == 0 ||
+		    attr->port_num > ibqp->device->phys_port_cnt)
+			goto inval;
+
+	if (attr_mask & IB_QP_PATH_MTU)
+		if (attr->path_mtu > IB_MTU_4096)
+			goto inval;
+
+	if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC)
+		if (attr->max_dest_rd_atomic > 1)
+			goto inval;
+
+	if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC)
+		if (attr->max_rd_atomic > 1)
+			goto inval;
+
+	if (attr_mask & IB_QP_PATH_MIG_STATE)
+		if (attr->path_mig_state != IB_MIG_MIGRATED &&
+		    attr->path_mig_state != IB_MIG_REARM)
+			goto inval;
+
 	switch (new_state) {
 	case IB_QPS_RESET:
 		ipath_reset_qp(qp);
@@ -511,6 +545,9 @@ int ipath_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
 	if (attr_mask & IB_QP_MIN_RNR_TIMER)
 		qp->r_min_rnr_timer = attr->min_rnr_timer;
 
+	if (attr_mask & IB_QP_TIMEOUT)
+		qp->timeout = attr->timeout;
+
 	if (attr_mask & IB_QP_QKEY)
 		qp->qkey = attr->qkey;
 
@@ -543,7 +580,7 @@ int ipath_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
 	attr->dest_qp_num = qp->remote_qpn;
 	attr->qp_access_flags = qp->qp_access_flags;
 	attr->cap.max_send_wr = qp->s_size - 1;
-	attr->cap.max_recv_wr = qp->r_rq.size - 1;
+	attr->cap.max_recv_wr = qp->ibqp.srq ? 0 : qp->r_rq.size - 1;
 	attr->cap.max_send_sge = qp->s_max_sge;
 	attr->cap.max_recv_sge = qp->r_rq.max_sge;
 	attr->cap.max_inline_data = 0;
@@ -557,7 +594,7 @@ int ipath_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
 	attr->max_dest_rd_atomic = 1;
 	attr->min_rnr_timer = qp->r_min_rnr_timer;
 	attr->port_num = 1;
-	attr->timeout = 0;
+	attr->timeout = qp->timeout;
 	attr->retry_cnt = qp->s_retry_cnt;
 	attr->rnr_retry = qp->s_rnr_retry;
 	attr->alt_port_num = 0;
@@ -569,9 +606,10 @@ int ipath_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
 	init_attr->recv_cq = qp->ibqp.recv_cq;
 	init_attr->srq = qp->ibqp.srq;
 	init_attr->cap = attr->cap;
-	init_attr->sq_sig_type =
-		(qp->s_flags & (1 << IPATH_S_SIGNAL_REQ_WR))
-		? IB_SIGNAL_REQ_WR : 0;
+	if (qp->s_flags & (1 << IPATH_S_SIGNAL_REQ_WR))
+		init_attr->sq_sig_type = IB_SIGNAL_REQ_WR;
+	else
+		init_attr->sq_sig_type = IB_SIGNAL_ALL_WR;
 	init_attr->qp_type = qp->ibqp.qp_type;
 	init_attr->port_num = 1;
 	return 0;
@@ -596,13 +634,23 @@ __be32 ipath_compute_aeth(struct ipath_qp *qp)
 	} else {
 		u32 min, max, x;
 		u32 credits;
-
+		struct ipath_rwq *wq = qp->r_rq.wq;
+		u32 head;
+		u32 tail;
+
+		/* sanity check pointers before trusting them */
+		head = wq->head;
+		if (head >= qp->r_rq.size)
+			head = 0;
+		tail = wq->tail;
+		if (tail >= qp->r_rq.size)
+			tail = 0;
 		/*
 		 * Compute the number of credits available (RWQEs).
 		 * XXX Not holding the r_rq.lock here so there is a small
 		 * chance that the pair of reads are not atomic.
 		 */
-		credits = qp->r_rq.head - qp->r_rq.tail;
+		credits = head - tail;
 		if ((int)credits < 0)
 			credits += qp->r_rq.size;
 		/*
@@ -679,27 +727,37 @@ struct ib_qp *ipath_create_qp(struct ib_pd *ibpd,
 	case IB_QPT_UD:
 	case IB_QPT_SMI:
 	case IB_QPT_GSI:
-		qp = kmalloc(sizeof(*qp), GFP_KERNEL);
+		sz = sizeof(*qp);
+		if (init_attr->srq) {
+			struct ipath_srq *srq = to_isrq(init_attr->srq);
+
+			sz += sizeof(*qp->r_sg_list) *
+				srq->rq.max_sge;
+		} else
+			sz += sizeof(*qp->r_sg_list) *
+				init_attr->cap.max_recv_sge;
+		qp = kmalloc(sz, GFP_KERNEL);
 		if (!qp) {
-			vfree(swq);
 			ret = ERR_PTR(-ENOMEM);
-			goto bail;
+			goto bail_swq;
 		}
 		if (init_attr->srq) {
+			sz = 0;
 			qp->r_rq.size = 0;
 			qp->r_rq.max_sge = 0;
 			qp->r_rq.wq = NULL;
+			init_attr->cap.max_recv_wr = 0;
+			init_attr->cap.max_recv_sge = 0;
 		} else {
 			qp->r_rq.size = init_attr->cap.max_recv_wr + 1;
 			qp->r_rq.max_sge = init_attr->cap.max_recv_sge;
-			sz = (sizeof(struct ipath_sge) * qp->r_rq.max_sge) +
+			sz = (sizeof(struct ib_sge) * qp->r_rq.max_sge) +
 				sizeof(struct ipath_rwqe);
-			qp->r_rq.wq = vmalloc(qp->r_rq.size * sz);
+			qp->r_rq.wq = vmalloc_user(sizeof(struct ipath_rwq) +
+					      qp->r_rq.size * sz);
 			if (!qp->r_rq.wq) {
-				kfree(qp);
-				vfree(swq);
 				ret = ERR_PTR(-ENOMEM);
-				goto bail;
+				goto bail_qp;
 			}
 		}
 
@@ -719,24 +777,19 @@ struct ib_qp *ipath_create_qp(struct ib_pd *ibpd,
 		qp->s_wq = swq;
 		qp->s_size = init_attr->cap.max_send_wr + 1;
 		qp->s_max_sge = init_attr->cap.max_send_sge;
-		qp->s_flags = init_attr->sq_sig_type == IB_SIGNAL_REQ_WR ?
-			1 << IPATH_S_SIGNAL_REQ_WR : 0;
+		if (init_attr->sq_sig_type == IB_SIGNAL_REQ_WR)
+			qp->s_flags = 1 << IPATH_S_SIGNAL_REQ_WR;
+		else
+			qp->s_flags = 0;
 		dev = to_idev(ibpd->device);
 		err = ipath_alloc_qpn(&dev->qp_table, qp,
 				      init_attr->qp_type);
 		if (err) {
-			vfree(swq);
-			vfree(qp->r_rq.wq);
-			kfree(qp);
 			ret = ERR_PTR(err);
-			goto bail;
+			goto bail_rwq;
 		}
+		qp->ip = NULL;
 		ipath_reset_qp(qp);
-
-		/* Tell the core driver that the kernel SMA is present. */
-		if (init_attr->qp_type == IB_QPT_SMI)
-			ipath_layer_set_verbs_flags(dev->dd,
-						    IPATH_VERBS_KERNEL_SMA);
 		break;
 
 	default:
@@ -747,8 +800,63 @@ struct ib_qp *ipath_create_qp(struct ib_pd *ibpd,
 
 	init_attr->cap.max_inline_data = 0;
 
+	/*
+	 * Return the address of the RWQ as the offset to mmap.
+	 * See ipath_mmap() for details.
+	 */
+	if (udata && udata->outlen >= sizeof(__u64)) {
+		struct ipath_mmap_info *ip;
+		__u64 offset = (__u64) qp->r_rq.wq;
+		int err;
+
+		err = ib_copy_to_udata(udata, &offset, sizeof(offset));
+		if (err) {
+			ret = ERR_PTR(err);
+			goto bail_rwq;
+		}
+
+		if (qp->r_rq.wq) {
+			/* Allocate info for ipath_mmap(). */
+			ip = kmalloc(sizeof(*ip), GFP_KERNEL);
+			if (!ip) {
+				ret = ERR_PTR(-ENOMEM);
+				goto bail_rwq;
+			}
+			qp->ip = ip;
+			ip->context = ibpd->uobject->context;
+			ip->obj = qp->r_rq.wq;
+			kref_init(&ip->ref);
+			ip->mmap_cnt = 0;
+			ip->size = PAGE_ALIGN(sizeof(struct ipath_rwq) +
+					      qp->r_rq.size * sz);
+			spin_lock_irq(&dev->pending_lock);
+			ip->next = dev->pending_mmaps;
+			dev->pending_mmaps = ip;
+			spin_unlock_irq(&dev->pending_lock);
+		}
+	}
+
+	spin_lock(&dev->n_qps_lock);
+	if (dev->n_qps_allocated == ib_ipath_max_qps) {
+		spin_unlock(&dev->n_qps_lock);
+		ret = ERR_PTR(-ENOMEM);
+		goto bail_ip;
+	}
+
+	dev->n_qps_allocated++;
+	spin_unlock(&dev->n_qps_lock);
+
 	ret = &qp->ibqp;
+	goto bail;
 
+bail_ip:
+	kfree(qp->ip);
+bail_rwq:
+	vfree(qp->r_rq.wq);
+bail_qp:
+	kfree(qp);
+bail_swq:
+	vfree(swq);
 bail:
 	return ret;
 }
@@ -768,15 +876,12 @@ int ipath_destroy_qp(struct ib_qp *ibqp)
 	struct ipath_ibdev *dev = to_idev(ibqp->device);
 	unsigned long flags;
 
-	/* Tell the core driver that the kernel SMA is gone. */
-	if (qp->ibqp.qp_type == IB_QPT_SMI)
-		ipath_layer_set_verbs_flags(dev->dd, 0);
-
-	spin_lock_irqsave(&qp->r_rq.lock, flags);
-	spin_lock(&qp->s_lock);
+	spin_lock_irqsave(&qp->s_lock, flags);
 	qp->state = IB_QPS_ERR;
-	spin_unlock(&qp->s_lock);
-	spin_unlock_irqrestore(&qp->r_rq.lock, flags);
+	spin_unlock_irqrestore(&qp->s_lock, flags);
+	spin_lock(&dev->n_qps_lock);
+	dev->n_qps_allocated--;
+	spin_unlock(&dev->n_qps_lock);
 
 	/* Stop the sending tasklet. */
 	tasklet_kill(&qp->s_task);
@@ -797,8 +902,11 @@ int ipath_destroy_qp(struct ib_qp *ibqp)
 	if (atomic_read(&qp->refcount) != 0)
 		ipath_free_qp(&dev->qp_table, qp);
 
+	if (qp->ip)
+		kref_put(&qp->ip->ref, ipath_release_mmap_info);
+	else
+		vfree(qp->r_rq.wq);
 	vfree(qp->s_wq);
-	vfree(qp->r_rq.wq);
 	kfree(qp);
 	return 0;
 }
@@ -850,8 +958,8 @@ void ipath_sqerror_qp(struct ipath_qp *qp, struct ib_wc *wc)
 	struct ipath_ibdev *dev = to_idev(qp->ibqp.device);
 	struct ipath_swqe *wqe = get_swqe_ptr(qp, qp->s_last);
 
-	_VERBS_INFO("Send queue error on QP%d/%d: err: %d\n",
-		    qp->ibqp.qp_num, qp->remote_qpn, wc->status);
+	ipath_dbg(KERN_INFO "Send queue error on QP%d/%d: err: %d\n",
+		  qp->ibqp.qp_num, qp->remote_qpn, wc->status);
 
 	spin_lock(&dev->pending_lock);
 	/* XXX What if its already removed by the timeout code? */
diff --git a/drivers/infiniband/hw/ipath/ipath_rc.c b/drivers/infiniband/hw/ipath/ipath_rc.c
index 774d1615ce2..a08654042c0 100644
--- a/drivers/infiniband/hw/ipath/ipath_rc.c
+++ b/drivers/infiniband/hw/ipath/ipath_rc.c
@@ -32,7 +32,7 @@
  */
 
 #include "ipath_verbs.h"
-#include "ipath_common.h"
+#include "ipath_kernel.h"
 
 /* cut down ridiculously long IB macro names */
 #define OP(x) IB_OPCODE_RC_##x
@@ -540,7 +540,7 @@ static void send_rc_ack(struct ipath_qp *qp)
 		lrh0 = IPATH_LRH_GRH;
 	}
 	/* read pkey_index w/o lock (its atomic) */
-	bth0 = ipath_layer_get_pkey(dev->dd, qp->s_pkey_index);
+	bth0 = ipath_get_pkey(dev->dd, qp->s_pkey_index);
 	if (qp->r_nak_state)
 		ohdr->u.aeth = cpu_to_be32((qp->r_msn & IPATH_MSN_MASK) |
 					    (qp->r_nak_state <<
@@ -557,7 +557,7 @@ static void send_rc_ack(struct ipath_qp *qp)
 	hdr.lrh[0] = cpu_to_be16(lrh0);
 	hdr.lrh[1] = cpu_to_be16(qp->remote_ah_attr.dlid);
 	hdr.lrh[2] = cpu_to_be16(hwords + SIZE_OF_CRC);
-	hdr.lrh[3] = cpu_to_be16(ipath_layer_get_lid(dev->dd));
+	hdr.lrh[3] = cpu_to_be16(dev->dd->ipath_lid);
 	ohdr->bth[0] = cpu_to_be32(bth0);
 	ohdr->bth[1] = cpu_to_be32(qp->remote_qpn);
 	ohdr->bth[2] = cpu_to_be32(qp->r_ack_psn & IPATH_PSN_MASK);
@@ -1323,8 +1323,7 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
 		 * the eager header buffer size to 56 bytes so the last 4
 		 * bytes of the BTH header (PSN) is in the data buffer.
 		 */
-		header_in_data =
-			ipath_layer_get_rcvhdrentsize(dev->dd) == 16;
+		header_in_data = dev->dd->ipath_rcvhdrentsize == 16;
 		if (header_in_data) {
 			psn = be32_to_cpu(((__be32 *) data)[0]);
 			data += sizeof(__be32);
diff --git a/drivers/infiniband/hw/ipath/ipath_registers.h b/drivers/infiniband/hw/ipath/ipath_registers.h
index 89df8f5ea99..6e23b3d632b 100644
--- a/drivers/infiniband/hw/ipath/ipath_registers.h
+++ b/drivers/infiniband/hw/ipath/ipath_registers.h
@@ -36,8 +36,7 @@
 
 /*
  * This file should only be included by kernel source, and by the diags.  It
- * defines the registers, and their contents, for the InfiniPath HT-400
- * chip.
+ * defines the registers, and their contents, for InfiniPath chips.
  */
 
 /*
@@ -283,10 +282,12 @@
 #define INFINIPATH_XGXS_RESET          0x7ULL
 #define INFINIPATH_XGXS_MDIOADDR_MASK  0xfULL
 #define INFINIPATH_XGXS_MDIOADDR_SHIFT 4
+#define INFINIPATH_XGXS_RX_POL_SHIFT 19
+#define INFINIPATH_XGXS_RX_POL_MASK 0xfULL
 
 #define INFINIPATH_RT_ADDR_MASK 0xFFFFFFFFFFULL	/* 40 bits valid */
 
-/* TID entries (memory), HT400-only */
+/* TID entries (memory), HT-only */
 #define INFINIPATH_RT_VALID 0x8000000000000000ULL
 #define INFINIPATH_RT_ADDR_SHIFT 0
 #define INFINIPATH_RT_BUFSIZE_MASK 0x3FFF
diff --git a/drivers/infiniband/hw/ipath/ipath_ruc.c b/drivers/infiniband/hw/ipath/ipath_ruc.c
index 772bc59fb85..5c1da2d25e0 100644
--- a/drivers/infiniband/hw/ipath/ipath_ruc.c
+++ b/drivers/infiniband/hw/ipath/ipath_ruc.c
@@ -32,7 +32,7 @@
  */
 
 #include "ipath_verbs.h"
-#include "ipath_common.h"
+#include "ipath_kernel.h"
 
 /*
  * Convert the AETH RNR timeout code into the number of milliseconds.
@@ -106,6 +106,54 @@ void ipath_insert_rnr_queue(struct ipath_qp *qp)
 	spin_unlock_irqrestore(&dev->pending_lock, flags);
 }
 
+static int init_sge(struct ipath_qp *qp, struct ipath_rwqe *wqe)
+{
+	struct ipath_ibdev *dev = to_idev(qp->ibqp.device);
+	int user = to_ipd(qp->ibqp.pd)->user;
+	int i, j, ret;
+	struct ib_wc wc;
+
+	qp->r_len = 0;
+	for (i = j = 0; i < wqe->num_sge; i++) {
+		if (wqe->sg_list[i].length == 0)
+			continue;
+		/* Check LKEY */
+		if ((user && wqe->sg_list[i].lkey == 0) ||
+		    !ipath_lkey_ok(&dev->lk_table,
+				   &qp->r_sg_list[j], &wqe->sg_list[i],
+				   IB_ACCESS_LOCAL_WRITE))
+			goto bad_lkey;
+		qp->r_len += wqe->sg_list[i].length;
+		j++;
+	}
+	qp->r_sge.sge = qp->r_sg_list[0];
+	qp->r_sge.sg_list = qp->r_sg_list + 1;
+	qp->r_sge.num_sge = j;
+	ret = 1;
+	goto bail;
+
+bad_lkey:
+	wc.wr_id = wqe->wr_id;
+	wc.status = IB_WC_LOC_PROT_ERR;
+	wc.opcode = IB_WC_RECV;
+	wc.vendor_err = 0;
+	wc.byte_len = 0;
+	wc.imm_data = 0;
+	wc.qp_num = qp->ibqp.qp_num;
+	wc.src_qp = 0;
+	wc.wc_flags = 0;
+	wc.pkey_index = 0;
+	wc.slid = 0;
+	wc.sl = 0;
+	wc.dlid_path_bits = 0;
+	wc.port_num = 0;
+	/* Signal solicited completion event. */
+	ipath_cq_enter(to_icq(qp->ibqp.recv_cq), &wc, 1);
+	ret = 0;
+bail:
+	return ret;
+}
+
 /**
  * ipath_get_rwqe - copy the next RWQE into the QP's RWQE
  * @qp: the QP
@@ -119,71 +167,71 @@ int ipath_get_rwqe(struct ipath_qp *qp, int wr_id_only)
 {
 	unsigned long flags;
 	struct ipath_rq *rq;
+	struct ipath_rwq *wq;
 	struct ipath_srq *srq;
 	struct ipath_rwqe *wqe;
-	int ret = 1;
+	void (*handler)(struct ib_event *, void *);
+	u32 tail;
+	int ret;
 
-	if (!qp->ibqp.srq) {
+	if (qp->ibqp.srq) {
+		srq = to_isrq(qp->ibqp.srq);
+		handler = srq->ibsrq.event_handler;
+		rq = &srq->rq;
+	} else {
+		srq = NULL;
+		handler = NULL;
 		rq = &qp->r_rq;
-		spin_lock_irqsave(&rq->lock, flags);
-
-		if (unlikely(rq->tail == rq->head)) {
-			ret = 0;
-			goto done;
-		}
-		wqe = get_rwqe_ptr(rq, rq->tail);
-		qp->r_wr_id = wqe->wr_id;
-		if (!wr_id_only) {
-			qp->r_sge.sge = wqe->sg_list[0];
-			qp->r_sge.sg_list = wqe->sg_list + 1;
-			qp->r_sge.num_sge = wqe->num_sge;
-			qp->r_len = wqe->length;
-		}
-		if (++rq->tail >= rq->size)
-			rq->tail = 0;
-		goto done;
 	}
 
-	srq = to_isrq(qp->ibqp.srq);
-	rq = &srq->rq;
 	spin_lock_irqsave(&rq->lock, flags);
-
-	if (unlikely(rq->tail == rq->head)) {
-		ret = 0;
-		goto done;
-	}
-	wqe = get_rwqe_ptr(rq, rq->tail);
+	wq = rq->wq;
+	tail = wq->tail;
+	/* Validate tail before using it since it is user writable. */
+	if (tail >= rq->size)
+		tail = 0;
+	do {
+		if (unlikely(tail == wq->head)) {
+			spin_unlock_irqrestore(&rq->lock, flags);
+			ret = 0;
+			goto bail;
+		}
+		wqe = get_rwqe_ptr(rq, tail);
+		if (++tail >= rq->size)
+			tail = 0;
+	} while (!wr_id_only && !init_sge(qp, wqe));
 	qp->r_wr_id = wqe->wr_id;
-	if (!wr_id_only) {
-		qp->r_sge.sge = wqe->sg_list[0];
-		qp->r_sge.sg_list = wqe->sg_list + 1;
-		qp->r_sge.num_sge = wqe->num_sge;
-		qp->r_len = wqe->length;
-	}
-	if (++rq->tail >= rq->size)
-		rq->tail = 0;
-	if (srq->ibsrq.event_handler) {
-		struct ib_event ev;
+	wq->tail = tail;
+
+	ret = 1;
+	if (handler) {
 		u32 n;
 
-		if (rq->head < rq->tail)
-			n = rq->size + rq->head - rq->tail;
+		/*
+		 * validate head pointer value and compute
+		 * the number of remaining WQEs.
+		 */
+		n = wq->head;
+		if (n >= rq->size)
+			n = 0;
+		if (n < tail)
+			n += rq->size - tail;
 		else
-			n = rq->head - rq->tail;
+			n -= tail;
 		if (n < srq->limit) {
+			struct ib_event ev;
+
 			srq->limit = 0;
 			spin_unlock_irqrestore(&rq->lock, flags);
 			ev.device = qp->ibqp.device;
 			ev.element.srq = qp->ibqp.srq;
 			ev.event = IB_EVENT_SRQ_LIMIT_REACHED;
-			srq->ibsrq.event_handler(&ev,
-						 srq->ibsrq.srq_context);
+			handler(&ev, srq->ibsrq.srq_context);
 			goto bail;
 		}
 	}
-
-done:
 	spin_unlock_irqrestore(&rq->lock, flags);
+
 bail:
 	return ret;
 }
@@ -422,6 +470,15 @@ done:
 		wake_up(&qp->wait);
 }
 
+static int want_buffer(struct ipath_devdata *dd)
+{
+	set_bit(IPATH_S_PIOINTBUFAVAIL, &dd->ipath_sendctrl);
+	ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
+			 dd->ipath_sendctrl);
+
+	return 0;
+}
+
 /**
  * ipath_no_bufs_available - tell the layer driver we need buffers
  * @qp: the QP that caused the problem
@@ -438,7 +495,7 @@ void ipath_no_bufs_available(struct ipath_qp *qp, struct ipath_ibdev *dev)
 		list_add_tail(&qp->piowait, &dev->piowait);
 	spin_unlock_irqrestore(&dev->pending_lock, flags);
 	/*
-	 * Note that as soon as ipath_layer_want_buffer() is called and
+	 * Note that as soon as want_buffer() is called and
 	 * possibly before it returns, ipath_ib_piobufavail()
 	 * could be called.  If we are still in the tasklet function,
 	 * tasklet_hi_schedule() will not call us until the next time
@@ -448,7 +505,7 @@ void ipath_no_bufs_available(struct ipath_qp *qp, struct ipath_ibdev *dev)
 	 */
 	clear_bit(IPATH_S_BUSY, &qp->s_flags);
 	tasklet_unlock(&qp->s_task);
-	ipath_layer_want_buffer(dev->dd);
+	want_buffer(dev->dd);
 	dev->n_piowait++;
 }
 
@@ -563,7 +620,7 @@ u32 ipath_make_grh(struct ipath_ibdev *dev, struct ib_grh *hdr,
 	hdr->hop_limit = grh->hop_limit;
 	/* The SGID is 32-bit aligned. */
 	hdr->sgid.global.subnet_prefix = dev->gid_prefix;
-	hdr->sgid.global.interface_id = ipath_layer_get_guid(dev->dd);
+	hdr->sgid.global.interface_id = dev->dd->ipath_guid;
 	hdr->dgid = grh->dgid;
 
 	/* GRH header size in 32-bit words. */
@@ -595,8 +652,7 @@ void ipath_do_ruc_send(unsigned long data)
 	if (test_and_set_bit(IPATH_S_BUSY, &qp->s_flags))
 		goto bail;
 
-	if (unlikely(qp->remote_ah_attr.dlid ==
-		     ipath_layer_get_lid(dev->dd))) {
+	if (unlikely(qp->remote_ah_attr.dlid == dev->dd->ipath_lid)) {
 		ipath_ruc_loopback(qp);
 		goto clear;
 	}
@@ -663,8 +719,8 @@ again:
 	qp->s_hdr.lrh[1] = cpu_to_be16(qp->remote_ah_attr.dlid);
 	qp->s_hdr.lrh[2] = cpu_to_be16(qp->s_hdrwords + nwords +
 				       SIZE_OF_CRC);
-	qp->s_hdr.lrh[3] = cpu_to_be16(ipath_layer_get_lid(dev->dd));
-	bth0 |= ipath_layer_get_pkey(dev->dd, qp->s_pkey_index);
+	qp->s_hdr.lrh[3] = cpu_to_be16(dev->dd->ipath_lid);
+	bth0 |= ipath_get_pkey(dev->dd, qp->s_pkey_index);
 	bth0 |= extra_bytes << 20;
 	ohdr->bth[0] = cpu_to_be32(bth0);
 	ohdr->bth[1] = cpu_to_be32(qp->remote_qpn);
diff --git a/drivers/infiniband/hw/ipath/ipath_srq.c b/drivers/infiniband/hw/ipath/ipath_srq.c
index f760434660b..941e866d951 100644
--- a/drivers/infiniband/hw/ipath/ipath_srq.c
+++ b/drivers/infiniband/hw/ipath/ipath_srq.c
@@ -48,66 +48,39 @@ int ipath_post_srq_receive(struct ib_srq *ibsrq, struct ib_recv_wr *wr,
 			   struct ib_recv_wr **bad_wr)
 {
 	struct ipath_srq *srq = to_isrq(ibsrq);
-	struct ipath_ibdev *dev = to_idev(ibsrq->device);
+	struct ipath_rwq *wq;
 	unsigned long flags;
 	int ret;
 
 	for (; wr; wr = wr->next) {
 		struct ipath_rwqe *wqe;
 		u32 next;
-		int i, j;
+		int i;
 
-		if (wr->num_sge > srq->rq.max_sge) {
+		if ((unsigned) wr->num_sge > srq->rq.max_sge) {
 			*bad_wr = wr;
 			ret = -ENOMEM;
 			goto bail;
 		}
 
 		spin_lock_irqsave(&srq->rq.lock, flags);
-		next = srq->rq.head + 1;
+		wq = srq->rq.wq;
+		next = wq->head + 1;
 		if (next >= srq->rq.size)
 			next = 0;
-		if (next == srq->rq.tail) {
+		if (next == wq->tail) {
 			spin_unlock_irqrestore(&srq->rq.lock, flags);
 			*bad_wr = wr;
 			ret = -ENOMEM;
 			goto bail;
 		}
 
-		wqe = get_rwqe_ptr(&srq->rq, srq->rq.head);
+		wqe = get_rwqe_ptr(&srq->rq, wq->head);
 		wqe->wr_id = wr->wr_id;
-		wqe->sg_list[0].mr = NULL;
-		wqe->sg_list[0].vaddr = NULL;
-		wqe->sg_list[0].length = 0;
-		wqe->sg_list[0].sge_length = 0;
-		wqe->length = 0;
-		for (i = 0, j = 0; i < wr->num_sge; i++) {
-			/* Check LKEY */
-			if (to_ipd(srq->ibsrq.pd)->user &&
-			    wr->sg_list[i].lkey == 0) {
-				spin_unlock_irqrestore(&srq->rq.lock,
-						       flags);
-				*bad_wr = wr;
-				ret = -EINVAL;
-				goto bail;
-			}
-			if (wr->sg_list[i].length == 0)
-				continue;
-			if (!ipath_lkey_ok(&dev->lk_table,
-					   &wqe->sg_list[j],
-					   &wr->sg_list[i],
-					   IB_ACCESS_LOCAL_WRITE)) {
-				spin_unlock_irqrestore(&srq->rq.lock,
-						       flags);
-				*bad_wr = wr;
-				ret = -EINVAL;
-				goto bail;
-			}
-			wqe->length += wr->sg_list[i].length;
-			j++;
-		}
-		wqe->num_sge = j;
-		srq->rq.head = next;
+		wqe->num_sge = wr->num_sge;
+		for (i = 0; i < wr->num_sge; i++)
+			wqe->sg_list[i] = wr->sg_list[i];
+		wq->head = next;
 		spin_unlock_irqrestore(&srq->rq.lock, flags);
 	}
 	ret = 0;
@@ -133,53 +106,95 @@ struct ib_srq *ipath_create_srq(struct ib_pd *ibpd,
 
 	if (dev->n_srqs_allocated == ib_ipath_max_srqs) {
 		ret = ERR_PTR(-ENOMEM);
-		goto bail;
+		goto done;
 	}
 
 	if (srq_init_attr->attr.max_wr == 0) {
 		ret = ERR_PTR(-EINVAL);
-		goto bail;
+		goto done;
 	}
 
 	if ((srq_init_attr->attr.max_sge > ib_ipath_max_srq_sges) ||
 	    (srq_init_attr->attr.max_wr > ib_ipath_max_srq_wrs)) {
 		ret = ERR_PTR(-EINVAL);
-		goto bail;
+		goto done;
 	}
 
 	srq = kmalloc(sizeof(*srq), GFP_KERNEL);
 	if (!srq) {
 		ret = ERR_PTR(-ENOMEM);
-		goto bail;
+		goto done;
 	}
 
 	/*
 	 * Need to use vmalloc() if we want to support large #s of entries.
 	 */
 	srq->rq.size = srq_init_attr->attr.max_wr + 1;
-	sz = sizeof(struct ipath_sge) * srq_init_attr->attr.max_sge +
+	srq->rq.max_sge = srq_init_attr->attr.max_sge;
+	sz = sizeof(struct ib_sge) * srq->rq.max_sge +
 		sizeof(struct ipath_rwqe);
-	srq->rq.wq = vmalloc(srq->rq.size * sz);
+	srq->rq.wq = vmalloc_user(sizeof(struct ipath_rwq) + srq->rq.size * sz);
 	if (!srq->rq.wq) {
-		kfree(srq);
 		ret = ERR_PTR(-ENOMEM);
-		goto bail;
+		goto bail_srq;
 	}
 
 	/*
+	 * Return the address of the RWQ as the offset to mmap.
+	 * See ipath_mmap() for details.
+	 */
+	if (udata && udata->outlen >= sizeof(__u64)) {
+		struct ipath_mmap_info *ip;
+		__u64 offset = (__u64) srq->rq.wq;
+		int err;
+
+		err = ib_copy_to_udata(udata, &offset, sizeof(offset));
+		if (err) {
+			ret = ERR_PTR(err);
+			goto bail_wq;
+		}
+
+		/* Allocate info for ipath_mmap(). */
+		ip = kmalloc(sizeof(*ip), GFP_KERNEL);
+		if (!ip) {
+			ret = ERR_PTR(-ENOMEM);
+			goto bail_wq;
+		}
+		srq->ip = ip;
+		ip->context = ibpd->uobject->context;
+		ip->obj = srq->rq.wq;
+		kref_init(&ip->ref);
+		ip->mmap_cnt = 0;
+		ip->size = PAGE_ALIGN(sizeof(struct ipath_rwq) +
+				      srq->rq.size * sz);
+		spin_lock_irq(&dev->pending_lock);
+		ip->next = dev->pending_mmaps;
+		dev->pending_mmaps = ip;
+		spin_unlock_irq(&dev->pending_lock);
+	} else
+		srq->ip = NULL;
+
+	/*
 	 * ib_create_srq() will initialize srq->ibsrq.
 	 */
 	spin_lock_init(&srq->rq.lock);
-	srq->rq.head = 0;
-	srq->rq.tail = 0;
+	srq->rq.wq->head = 0;
+	srq->rq.wq->tail = 0;
 	srq->rq.max_sge = srq_init_attr->attr.max_sge;
 	srq->limit = srq_init_attr->attr.srq_limit;
 
+	dev->n_srqs_allocated++;
+
 	ret = &srq->ibsrq;
+	goto done;
 
-	dev->n_srqs_allocated++;
+bail_wq:
+	vfree(srq->rq.wq);
 
-bail:
+bail_srq:
+	kfree(srq);
+
+done:
 	return ret;
 }
 
@@ -188,83 +203,130 @@ bail:
  * @ibsrq: the SRQ to modify
  * @attr: the new attributes of the SRQ
  * @attr_mask: indicates which attributes to modify
+ * @udata: user data for ipathverbs.so
  */
 int ipath_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
-		     enum ib_srq_attr_mask attr_mask)
+		     enum ib_srq_attr_mask attr_mask,
+		     struct ib_udata *udata)
 {
 	struct ipath_srq *srq = to_isrq(ibsrq);
-	unsigned long flags;
-	int ret;
+	int ret = 0;
 
-	if (attr_mask & IB_SRQ_MAX_WR)
-		if ((attr->max_wr > ib_ipath_max_srq_wrs) ||
-		    (attr->max_sge > srq->rq.max_sge)) {
-			ret = -EINVAL;
-			goto bail;
-		}
+	if (attr_mask & IB_SRQ_MAX_WR) {
+		struct ipath_rwq *owq;
+		struct ipath_rwq *wq;
+		struct ipath_rwqe *p;
+		u32 sz, size, n, head, tail;
 
-	if (attr_mask & IB_SRQ_LIMIT)
-		if (attr->srq_limit >= srq->rq.size) {
+		/* Check that the requested sizes are below the limits. */
+		if ((attr->max_wr > ib_ipath_max_srq_wrs) ||
+		    ((attr_mask & IB_SRQ_LIMIT) ?
+		     attr->srq_limit : srq->limit) > attr->max_wr) {
 			ret = -EINVAL;
 			goto bail;
 		}
 
-	if (attr_mask & IB_SRQ_MAX_WR) {
-		struct ipath_rwqe *wq, *p;
-		u32 sz, size, n;
-
 		sz = sizeof(struct ipath_rwqe) +
-			attr->max_sge * sizeof(struct ipath_sge);
+			srq->rq.max_sge * sizeof(struct ib_sge);
 		size = attr->max_wr + 1;
-		wq = vmalloc(size * sz);
+		wq = vmalloc_user(sizeof(struct ipath_rwq) + size * sz);
 		if (!wq) {
 			ret = -ENOMEM;
 			goto bail;
 		}
 
-		spin_lock_irqsave(&srq->rq.lock, flags);
-		if (srq->rq.head < srq->rq.tail)
-			n = srq->rq.size + srq->rq.head - srq->rq.tail;
+		/*
+		 * Return the address of the RWQ as the offset to mmap.
+		 * See ipath_mmap() for details.
+		 */
+		if (udata && udata->inlen >= sizeof(__u64)) {
+			__u64 offset_addr;
+			__u64 offset = (__u64) wq;
+
+			ret = ib_copy_from_udata(&offset_addr, udata,
+						 sizeof(offset_addr));
+			if (ret) {
+				vfree(wq);
+				goto bail;
+			}
+			udata->outbuf = (void __user *) offset_addr;
+			ret = ib_copy_to_udata(udata, &offset,
+					       sizeof(offset));
+			if (ret) {
+				vfree(wq);
+				goto bail;
+			}
+		}
+
+		spin_lock_irq(&srq->rq.lock);
+		/*
+		 * validate head pointer value and compute
+		 * the number of remaining WQEs.
+		 */
+		owq = srq->rq.wq;
+		head = owq->head;
+		if (head >= srq->rq.size)
+			head = 0;
+		tail = owq->tail;
+		if (tail >= srq->rq.size)
+			tail = 0;
+		n = head;
+		if (n < tail)
+			n += srq->rq.size - tail;
 		else
-			n = srq->rq.head - srq->rq.tail;
-		if (size <= n || size <= srq->limit) {
-			spin_unlock_irqrestore(&srq->rq.lock, flags);
+			n -= tail;
+		if (size <= n) {
+			spin_unlock_irq(&srq->rq.lock);
 			vfree(wq);
 			ret = -EINVAL;
 			goto bail;
 		}
 		n = 0;
-		p = wq;
-		while (srq->rq.tail != srq->rq.head) {
+		p = wq->wq;
+		while (tail != head) {
 			struct ipath_rwqe *wqe;
 			int i;
 
-			wqe = get_rwqe_ptr(&srq->rq, srq->rq.tail);
+			wqe = get_rwqe_ptr(&srq->rq, tail);
 			p->wr_id = wqe->wr_id;
-			p->length = wqe->length;
 			p->num_sge = wqe->num_sge;
 			for (i = 0; i < wqe->num_sge; i++)
 				p->sg_list[i] = wqe->sg_list[i];
 			n++;
 			p = (struct ipath_rwqe *)((char *) p + sz);
-			if (++srq->rq.tail >= srq->rq.size)
-				srq->rq.tail = 0;
+			if (++tail >= srq->rq.size)
+				tail = 0;
 		}
-		vfree(srq->rq.wq);
 		srq->rq.wq = wq;
 		srq->rq.size = size;
-		srq->rq.head = n;
-		srq->rq.tail = 0;
-		srq->rq.max_sge = attr->max_sge;
-		spin_unlock_irqrestore(&srq->rq.lock, flags);
-	}
-
-	if (attr_mask & IB_SRQ_LIMIT) {
-		spin_lock_irqsave(&srq->rq.lock, flags);
-		srq->limit = attr->srq_limit;
-		spin_unlock_irqrestore(&srq->rq.lock, flags);
+		wq->head = n;
+		wq->tail = 0;
+		if (attr_mask & IB_SRQ_LIMIT)
+			srq->limit = attr->srq_limit;
+		spin_unlock_irq(&srq->rq.lock);
+
+		vfree(owq);
+
+		if (srq->ip) {
+			struct ipath_mmap_info *ip = srq->ip;
+			struct ipath_ibdev *dev = to_idev(srq->ibsrq.device);
+
+			ip->obj = wq;
+			ip->size = PAGE_ALIGN(sizeof(struct ipath_rwq) +
+					      size * sz);
+			spin_lock_irq(&dev->pending_lock);
+			ip->next = dev->pending_mmaps;
+			dev->pending_mmaps = ip;
+			spin_unlock_irq(&dev->pending_lock);
+		}
+	} else if (attr_mask & IB_SRQ_LIMIT) {
+		spin_lock_irq(&srq->rq.lock);
+		if (attr->srq_limit >= srq->rq.size)
+			ret = -EINVAL;
+		else
+			srq->limit = attr->srq_limit;
+		spin_unlock_irq(&srq->rq.lock);
 	}
-	ret = 0;
 
 bail:
 	return ret;
diff --git a/drivers/infiniband/hw/ipath/ipath_stats.c b/drivers/infiniband/hw/ipath/ipath_stats.c
index 70351b7e35c..30a825928fc 100644
--- a/drivers/infiniband/hw/ipath/ipath_stats.c
+++ b/drivers/infiniband/hw/ipath/ipath_stats.c
@@ -271,33 +271,6 @@ void ipath_get_faststats(unsigned long opaque)
 		}
 	}
 
-	if (dd->ipath_nosma_bufs) {
-		dd->ipath_nosma_secs += 5;
-		if (dd->ipath_nosma_secs >= 30) {
-			ipath_cdbg(SMA, "No SMA bufs avail %u seconds; "
-				   "cancelling pending sends\n",
-				   dd->ipath_nosma_secs);
-			/*
-			 * issue an abort as well, in case we have a packet
-			 * stuck in launch fifo.  This could corrupt an
-			 * outgoing user packet in the worst case,
-			 * but this is a pretty catastrophic, anyway.
-			 */
-			ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
-					 INFINIPATH_S_ABORT);
-			ipath_disarm_piobufs(dd, dd->ipath_lastport_piobuf,
-					     dd->ipath_piobcnt2k +
-					     dd->ipath_piobcnt4k -
-					     dd->ipath_lastport_piobuf);
-			/* start again, if necessary */
-			dd->ipath_nosma_secs = 0;
-		} else
-			ipath_cdbg(SMA, "No SMA bufs avail %u tries, "
-				   "after %u seconds\n",
-				   dd->ipath_nosma_bufs,
-				   dd->ipath_nosma_secs);
-	}
-
 done:
 	mod_timer(&dd->ipath_stats_timer, jiffies + HZ * 5);
 }
diff --git a/drivers/infiniband/hw/ipath/ipath_sysfs.c b/drivers/infiniband/hw/ipath/ipath_sysfs.c
index b98821d7801..e299148c4b6 100644
--- a/drivers/infiniband/hw/ipath/ipath_sysfs.c
+++ b/drivers/infiniband/hw/ipath/ipath_sysfs.c
@@ -35,7 +35,6 @@
 #include <linux/pci.h>
 
 #include "ipath_kernel.h"
-#include "ipath_layer.h"
 #include "ipath_common.h"
 
 /**
@@ -76,7 +75,7 @@ bail:
 static ssize_t show_version(struct device_driver *dev, char *buf)
 {
 	/* The string printed here is already newline-terminated. */
-	return scnprintf(buf, PAGE_SIZE, "%s", ipath_core_version);
+	return scnprintf(buf, PAGE_SIZE, "%s", ib_ipath_version);
 }
 
 static ssize_t show_num_units(struct device_driver *dev, char *buf)
@@ -108,8 +107,8 @@ static const char *ipath_status_str[] = {
 	"Initted",
 	"Disabled",
 	"Admin_Disabled",
-	"OIB_SMA",
-	"SMA",
+	"", /* This used to be the old "OIB_SMA" status. */
+	"", /* This used to be the old "SMA" status. */
 	"Present",
 	"IB_link_up",
 	"IB_configured",
@@ -227,7 +226,6 @@ static ssize_t store_mlid(struct device *dev,
 	unit = dd->ipath_unit;
 
 	dd->ipath_mlid = mlid;
-	ipath_layer_intr(dd, IPATH_LAYER_INT_BCAST);
 
 	goto bail;
 invalid:
@@ -467,7 +465,7 @@ static ssize_t store_link_state(struct device *dev,
 	if (ret < 0)
 		goto invalid;
 
-	r = ipath_layer_set_linkstate(dd, state);
+	r = ipath_set_linkstate(dd, state);
 	if (r < 0) {
 		ret = r;
 		goto bail;
@@ -502,7 +500,7 @@ static ssize_t store_mtu(struct device *dev,
 	if (ret < 0)
 		goto invalid;
 
-	r = ipath_layer_set_mtu(dd, mtu);
+	r = ipath_set_mtu(dd, mtu);
 	if (r < 0)
 		ret = r;
 
@@ -563,6 +561,33 @@ bail:
 	return ret;
 }
 
+static ssize_t store_rx_pol_inv(struct device *dev,
+			  struct device_attribute *attr,
+			  const char *buf,
+			  size_t count)
+{
+	struct ipath_devdata *dd = dev_get_drvdata(dev);
+	int ret, r;
+	u16 val;
+
+	ret = ipath_parse_ushort(buf, &val);
+	if (ret < 0)
+		goto invalid;
+
+	r = ipath_set_rx_pol_inv(dd, val);
+	if (r < 0) {
+		ret = r;
+		goto bail;
+	}
+
+	goto bail;
+invalid:
+	ipath_dev_err(dd, "attempt to set invalid Rx Polarity invert\n");
+bail:
+	return ret;
+}
+
+
 static DRIVER_ATTR(num_units, S_IRUGO, show_num_units, NULL);
 static DRIVER_ATTR(version, S_IRUGO, show_version, NULL);
 
@@ -589,6 +614,7 @@ static DEVICE_ATTR(status, S_IRUGO, show_status, NULL);
 static DEVICE_ATTR(status_str, S_IRUGO, show_status_str, NULL);
 static DEVICE_ATTR(boardversion, S_IRUGO, show_boardversion, NULL);
 static DEVICE_ATTR(unit, S_IRUGO, show_unit, NULL);
+static DEVICE_ATTR(rx_pol_inv, S_IWUSR, NULL, store_rx_pol_inv);
 
 static struct attribute *dev_attributes[] = {
 	&dev_attr_guid.attr,
@@ -603,6 +629,7 @@ static struct attribute *dev_attributes[] = {
 	&dev_attr_boardversion.attr,
 	&dev_attr_unit.attr,
 	&dev_attr_enabled.attr,
+	&dev_attr_rx_pol_inv.attr,
 	NULL
 };
 
diff --git a/drivers/infiniband/hw/ipath/ipath_uc.c b/drivers/infiniband/hw/ipath/ipath_uc.c
index c33abea2d5a..0fd3cded16b 100644
--- a/drivers/infiniband/hw/ipath/ipath_uc.c
+++ b/drivers/infiniband/hw/ipath/ipath_uc.c
@@ -32,7 +32,7 @@
  */
 
 #include "ipath_verbs.h"
-#include "ipath_common.h"
+#include "ipath_kernel.h"
 
 /* cut down ridiculously long IB macro names */
 #define OP(x) IB_OPCODE_UC_##x
@@ -261,8 +261,7 @@ void ipath_uc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
 		 * size to 56 bytes so the last 4 bytes of
 		 * the BTH header (PSN) is in the data buffer.
 		 */
-		header_in_data =
-			ipath_layer_get_rcvhdrentsize(dev->dd) == 16;
+		header_in_data = dev->dd->ipath_rcvhdrentsize == 16;
 		if (header_in_data) {
 			psn = be32_to_cpu(((__be32 *) data)[0]);
 			data += sizeof(__be32);
diff --git a/drivers/infiniband/hw/ipath/ipath_ud.c b/drivers/infiniband/hw/ipath/ipath_ud.c
index 3466129af80..6991d1d74e3 100644
--- a/drivers/infiniband/hw/ipath/ipath_ud.c
+++ b/drivers/infiniband/hw/ipath/ipath_ud.c
@@ -34,7 +34,54 @@
 #include <rdma/ib_smi.h>
 
 #include "ipath_verbs.h"
-#include "ipath_common.h"
+#include "ipath_kernel.h"
+
+static int init_sge(struct ipath_qp *qp, struct ipath_rwqe *wqe,
+		    u32 *lengthp, struct ipath_sge_state *ss)
+{
+	struct ipath_ibdev *dev = to_idev(qp->ibqp.device);
+	int user = to_ipd(qp->ibqp.pd)->user;
+	int i, j, ret;
+	struct ib_wc wc;
+
+	*lengthp = 0;
+	for (i = j = 0; i < wqe->num_sge; i++) {
+		if (wqe->sg_list[i].length == 0)
+			continue;
+		/* Check LKEY */
+		if ((user && wqe->sg_list[i].lkey == 0) ||
+		    !ipath_lkey_ok(&dev->lk_table,
+				   j ? &ss->sg_list[j - 1] : &ss->sge,
+				   &wqe->sg_list[i], IB_ACCESS_LOCAL_WRITE))
+			goto bad_lkey;
+		*lengthp += wqe->sg_list[i].length;
+		j++;
+	}
+	ss->num_sge = j;
+	ret = 1;
+	goto bail;
+
+bad_lkey:
+	wc.wr_id = wqe->wr_id;
+	wc.status = IB_WC_LOC_PROT_ERR;
+	wc.opcode = IB_WC_RECV;
+	wc.vendor_err = 0;
+	wc.byte_len = 0;
+	wc.imm_data = 0;
+	wc.qp_num = qp->ibqp.qp_num;
+	wc.src_qp = 0;
+	wc.wc_flags = 0;
+	wc.pkey_index = 0;
+	wc.slid = 0;
+	wc.sl = 0;
+	wc.dlid_path_bits = 0;
+	wc.port_num = 0;
+	/* Signal solicited completion event. */
+	ipath_cq_enter(to_icq(qp->ibqp.recv_cq), &wc, 1);
+	ret = 0;
+bail:
+	return ret;
+}
 
 /**
  * ipath_ud_loopback - handle send on loopback QPs
@@ -46,6 +93,8 @@
  *
  * This is called from ipath_post_ud_send() to forward a WQE addressed
  * to the same HCA.
+ * Note that the receive interrupt handler may be calling ipath_ud_rcv()
+ * while this is being called.
  */
 static void ipath_ud_loopback(struct ipath_qp *sqp,
 			      struct ipath_sge_state *ss,
@@ -60,7 +109,11 @@ static void ipath_ud_loopback(struct ipath_qp *sqp,
 	struct ipath_srq *srq;
 	struct ipath_sge_state rsge;
 	struct ipath_sge *sge;
+	struct ipath_rwq *wq;
 	struct ipath_rwqe *wqe;
+	void (*handler)(struct ib_event *, void *);
+	u32 tail;
+	u32 rlen;
 
 	qp = ipath_lookup_qpn(&dev->qp_table, wr->wr.ud.remote_qpn);
 	if (!qp)
@@ -94,6 +147,13 @@ static void ipath_ud_loopback(struct ipath_qp *sqp,
 		wc->imm_data = 0;
 	}
 
+	if (wr->num_sge > 1) {
+		rsge.sg_list = kmalloc((wr->num_sge - 1) *
+					sizeof(struct ipath_sge),
+				       GFP_ATOMIC);
+	} else
+		rsge.sg_list = NULL;
+
 	/*
 	 * Get the next work request entry to find where to put the data.
 	 * Note that it is safe to drop the lock after changing rq->tail
@@ -101,37 +161,52 @@ static void ipath_ud_loopback(struct ipath_qp *sqp,
 	 */
 	if (qp->ibqp.srq) {
 		srq = to_isrq(qp->ibqp.srq);
+		handler = srq->ibsrq.event_handler;
 		rq = &srq->rq;
 	} else {
 		srq = NULL;
+		handler = NULL;
 		rq = &qp->r_rq;
 	}
+
 	spin_lock_irqsave(&rq->lock, flags);
-	if (rq->tail == rq->head) {
-		spin_unlock_irqrestore(&rq->lock, flags);
-		dev->n_pkt_drops++;
-		goto done;
+	wq = rq->wq;
+	tail = wq->tail;
+	while (1) {
+		if (unlikely(tail == wq->head)) {
+			spin_unlock_irqrestore(&rq->lock, flags);
+			dev->n_pkt_drops++;
+			goto bail_sge;
+		}
+		wqe = get_rwqe_ptr(rq, tail);
+		if (++tail >= rq->size)
+			tail = 0;
+		if (init_sge(qp, wqe, &rlen, &rsge))
+			break;
+		wq->tail = tail;
 	}
 	/* Silently drop packets which are too big. */
-	wqe = get_rwqe_ptr(rq, rq->tail);
-	if (wc->byte_len > wqe->length) {
+	if (wc->byte_len > rlen) {
 		spin_unlock_irqrestore(&rq->lock, flags);
 		dev->n_pkt_drops++;
-		goto done;
+		goto bail_sge;
 	}
+	wq->tail = tail;
 	wc->wr_id = wqe->wr_id;
-	rsge.sge = wqe->sg_list[0];
-	rsge.sg_list = wqe->sg_list + 1;
-	rsge.num_sge = wqe->num_sge;
-	if (++rq->tail >= rq->size)
-		rq->tail = 0;
-	if (srq && srq->ibsrq.event_handler) {
+	if (handler) {
 		u32 n;
 
-		if (rq->head < rq->tail)
-			n = rq->size + rq->head - rq->tail;
+		/*
+		 * validate head pointer value and compute
+		 * the number of remaining WQEs.
+		 */
+		n = wq->head;
+		if (n >= rq->size)
+			n = 0;
+		if (n < tail)
+			n += rq->size - tail;
 		else
-			n = rq->head - rq->tail;
+			n -= tail;
 		if (n < srq->limit) {
 			struct ib_event ev;
 
@@ -140,12 +215,12 @@ static void ipath_ud_loopback(struct ipath_qp *sqp,
 			ev.device = qp->ibqp.device;
 			ev.element.srq = qp->ibqp.srq;
 			ev.event = IB_EVENT_SRQ_LIMIT_REACHED;
-			srq->ibsrq.event_handler(&ev,
-						 srq->ibsrq.srq_context);
+			handler(&ev, srq->ibsrq.srq_context);
 		} else
 			spin_unlock_irqrestore(&rq->lock, flags);
 	} else
 		spin_unlock_irqrestore(&rq->lock, flags);
+
 	ah_attr = &to_iah(wr->wr.ud.ah)->attr;
 	if (ah_attr->ah_flags & IB_AH_GRH) {
 		ipath_copy_sge(&rsge, &ah_attr->grh, sizeof(struct ib_grh));
@@ -186,7 +261,7 @@ static void ipath_ud_loopback(struct ipath_qp *sqp,
 	wc->src_qp = sqp->ibqp.qp_num;
 	/* XXX do we know which pkey matched? Only needed for GSI. */
 	wc->pkey_index = 0;
-	wc->slid = ipath_layer_get_lid(dev->dd) |
+	wc->slid = dev->dd->ipath_lid |
 		(ah_attr->src_path_bits &
 		 ((1 << (dev->mkeyprot_resv_lmc & 7)) - 1));
 	wc->sl = ah_attr->sl;
@@ -196,6 +271,8 @@ static void ipath_ud_loopback(struct ipath_qp *sqp,
 	ipath_cq_enter(to_icq(qp->ibqp.recv_cq), wc,
 		       wr->send_flags & IB_SEND_SOLICITED);
 
+bail_sge:
+	kfree(rsge.sg_list);
 done:
 	if (atomic_dec_and_test(&qp->refcount))
 		wake_up(&qp->wait);
@@ -276,7 +353,7 @@ int ipath_post_ud_send(struct ipath_qp *qp, struct ib_send_wr *wr)
 		ss.num_sge++;
 	}
 	/* Check for invalid packet size. */
-	if (len > ipath_layer_get_ibmtu(dev->dd)) {
+	if (len > dev->dd->ipath_ibmtu) {
 		ret = -EINVAL;
 		goto bail;
 	}
@@ -298,7 +375,7 @@ int ipath_post_ud_send(struct ipath_qp *qp, struct ib_send_wr *wr)
 		dev->n_unicast_xmit++;
 		lid = ah_attr->dlid &
 			~((1 << (dev->mkeyprot_resv_lmc & 7)) - 1);
-		if (unlikely(lid == ipath_layer_get_lid(dev->dd))) {
+		if (unlikely(lid == dev->dd->ipath_lid)) {
 			/*
 			 * Pass in an uninitialized ib_wc to save stack
 			 * space.
@@ -327,7 +404,7 @@ int ipath_post_ud_send(struct ipath_qp *qp, struct ib_send_wr *wr)
 		qp->s_hdr.u.l.grh.sgid.global.subnet_prefix =
 			dev->gid_prefix;
 		qp->s_hdr.u.l.grh.sgid.global.interface_id =
-			ipath_layer_get_guid(dev->dd);
+			dev->dd->ipath_guid;
 		qp->s_hdr.u.l.grh.dgid = ah_attr->grh.dgid;
 		/*
 		 * Don't worry about sending to locally attached multicast
@@ -357,7 +434,7 @@ int ipath_post_ud_send(struct ipath_qp *qp, struct ib_send_wr *wr)
 	qp->s_hdr.lrh[0] = cpu_to_be16(lrh0);
 	qp->s_hdr.lrh[1] = cpu_to_be16(ah_attr->dlid);	/* DEST LID */
 	qp->s_hdr.lrh[2] = cpu_to_be16(hwords + nwords + SIZE_OF_CRC);
-	lid = ipath_layer_get_lid(dev->dd);
+	lid = dev->dd->ipath_lid;
 	if (lid) {
 		lid |= ah_attr->src_path_bits &
 			((1 << (dev->mkeyprot_resv_lmc & 7)) - 1);
@@ -368,7 +445,7 @@ int ipath_post_ud_send(struct ipath_qp *qp, struct ib_send_wr *wr)
 		bth0 |= 1 << 23;
 	bth0 |= extra_bytes << 20;
 	bth0 |= qp->ibqp.qp_type == IB_QPT_SMI ? IPATH_DEFAULT_P_KEY :
-		ipath_layer_get_pkey(dev->dd, qp->s_pkey_index);
+		ipath_get_pkey(dev->dd, qp->s_pkey_index);
 	ohdr->bth[0] = cpu_to_be32(bth0);
 	/*
 	 * Use the multicast QP if the destination LID is a multicast LID.
@@ -433,13 +510,9 @@ void ipath_ud_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
 	int opcode;
 	u32 hdrsize;
 	u32 pad;
-	unsigned long flags;
 	struct ib_wc wc;
 	u32 qkey;
 	u32 src_qp;
-	struct ipath_rq *rq;
-	struct ipath_srq *srq;
-	struct ipath_rwqe *wqe;
 	u16 dlid;
 	int header_in_data;
 
@@ -458,8 +531,7 @@ void ipath_ud_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
 		 * the eager header buffer size to 56 bytes so the last 12
 		 * bytes of the IB header is in the data buffer.
 		 */
-		header_in_data =
-			ipath_layer_get_rcvhdrentsize(dev->dd) == 16;
+		header_in_data = dev->dd->ipath_rcvhdrentsize == 16;
 		if (header_in_data) {
 			qkey = be32_to_cpu(((__be32 *) data)[1]);
 			src_qp = be32_to_cpu(((__be32 *) data)[2]);
@@ -547,19 +619,10 @@ void ipath_ud_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
 
 	/*
 	 * Get the next work request entry to find where to put the data.
-	 * Note that it is safe to drop the lock after changing rq->tail
-	 * since ipath_post_receive() won't fill the empty slot.
 	 */
-	if (qp->ibqp.srq) {
-		srq = to_isrq(qp->ibqp.srq);
-		rq = &srq->rq;
-	} else {
-		srq = NULL;
-		rq = &qp->r_rq;
-	}
-	spin_lock_irqsave(&rq->lock, flags);
-	if (rq->tail == rq->head) {
-		spin_unlock_irqrestore(&rq->lock, flags);
+	if (qp->r_reuse_sge)
+		qp->r_reuse_sge = 0;
+	else if (!ipath_get_rwqe(qp, 0)) {
 		/*
 		 * Count VL15 packets dropped due to no receive buffer.
 		 * Otherwise, count them as buffer overruns since usually,
@@ -573,39 +636,11 @@ void ipath_ud_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
 		goto bail;
 	}
 	/* Silently drop packets which are too big. */
-	wqe = get_rwqe_ptr(rq, rq->tail);
-	if (wc.byte_len > wqe->length) {
-		spin_unlock_irqrestore(&rq->lock, flags);
+	if (wc.byte_len > qp->r_len) {
+		qp->r_reuse_sge = 1;
 		dev->n_pkt_drops++;
 		goto bail;
 	}
-	wc.wr_id = wqe->wr_id;
-	qp->r_sge.sge = wqe->sg_list[0];
-	qp->r_sge.sg_list = wqe->sg_list + 1;
-	qp->r_sge.num_sge = wqe->num_sge;
-	if (++rq->tail >= rq->size)
-		rq->tail = 0;
-	if (srq && srq->ibsrq.event_handler) {
-		u32 n;
-
-		if (rq->head < rq->tail)
-			n = rq->size + rq->head - rq->tail;
-		else
-			n = rq->head - rq->tail;
-		if (n < srq->limit) {
-			struct ib_event ev;
-
-			srq->limit = 0;
-			spin_unlock_irqrestore(&rq->lock, flags);
-			ev.device = qp->ibqp.device;
-			ev.element.srq = qp->ibqp.srq;
-			ev.event = IB_EVENT_SRQ_LIMIT_REACHED;
-			srq->ibsrq.event_handler(&ev,
-						 srq->ibsrq.srq_context);
-		} else
-			spin_unlock_irqrestore(&rq->lock, flags);
-	} else
-		spin_unlock_irqrestore(&rq->lock, flags);
 	if (has_grh) {
 		ipath_copy_sge(&qp->r_sge, &hdr->u.l.grh,
 			       sizeof(struct ib_grh));
@@ -614,6 +649,7 @@ void ipath_ud_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
 		ipath_skip_sge(&qp->r_sge, sizeof(struct ib_grh));
 	ipath_copy_sge(&qp->r_sge, data,
 		       wc.byte_len - sizeof(struct ib_grh));
+	wc.wr_id = qp->r_wr_id;
 	wc.status = IB_WC_SUCCESS;
 	wc.opcode = IB_WC_RECV;
 	wc.vendor_err = 0;
diff --git a/drivers/infiniband/hw/ipath/ipath_verbs.c b/drivers/infiniband/hw/ipath/ipath_verbs.c
index d70a9b6b523..b8381c5e72b 100644
--- a/drivers/infiniband/hw/ipath/ipath_verbs.c
+++ b/drivers/infiniband/hw/ipath/ipath_verbs.c
@@ -33,15 +33,13 @@
 
 #include <rdma/ib_mad.h>
 #include <rdma/ib_user_verbs.h>
+#include <linux/io.h>
 #include <linux/utsname.h>
 
 #include "ipath_kernel.h"
 #include "ipath_verbs.h"
 #include "ipath_common.h"
 
-/* Not static, because we don't want the compiler removing it */
-const char ipath_verbs_version[] = "ipath_verbs " IPATH_IDSTR;
-
 static unsigned int ib_ipath_qp_table_size = 251;
 module_param_named(qp_table_size, ib_ipath_qp_table_size, uint, S_IRUGO);
 MODULE_PARM_DESC(qp_table_size, "QP table size");
@@ -52,10 +50,6 @@ module_param_named(lkey_table_size, ib_ipath_lkey_table_size, uint,
 MODULE_PARM_DESC(lkey_table_size,
 		 "LKEY table size in bits (2^n, 1 <= n <= 23)");
 
-unsigned int ib_ipath_debug;	/* debug mask */
-module_param_named(debug, ib_ipath_debug, uint, S_IWUSR | S_IRUGO);
-MODULE_PARM_DESC(debug, "Verbs debug mask");
-
 static unsigned int ib_ipath_max_pds = 0xFFFF;
 module_param_named(max_pds, ib_ipath_max_pds, uint, S_IWUSR | S_IRUGO);
 MODULE_PARM_DESC(max_pds,
@@ -79,6 +73,10 @@ module_param_named(max_qp_wrs, ib_ipath_max_qp_wrs, uint,
 		   S_IWUSR | S_IRUGO);
 MODULE_PARM_DESC(max_qp_wrs, "Maximum number of QP WRs to support");
 
+unsigned int ib_ipath_max_qps = 16384;
+module_param_named(max_qps, ib_ipath_max_qps, uint, S_IWUSR | S_IRUGO);
+MODULE_PARM_DESC(max_qps, "Maximum number of QPs to support");
+
 unsigned int ib_ipath_max_sges = 0x60;
 module_param_named(max_sges, ib_ipath_max_sges, uint, S_IWUSR | S_IRUGO);
 MODULE_PARM_DESC(max_sges, "Maximum number of SGEs to support");
@@ -109,9 +107,9 @@ module_param_named(max_srq_wrs, ib_ipath_max_srq_wrs,
 		   uint, S_IWUSR | S_IRUGO);
 MODULE_PARM_DESC(max_srq_wrs, "Maximum number of SRQ WRs support");
 
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("QLogic <support@pathscale.com>");
-MODULE_DESCRIPTION("QLogic InfiniPath driver");
+static unsigned int ib_ipath_disable_sma;
+module_param_named(disable_sma, ib_ipath_disable_sma, uint, S_IWUSR | S_IRUGO);
+MODULE_PARM_DESC(ib_ipath_disable_sma, "Disable the SMA");
 
 const int ib_ipath_state_ops[IB_QPS_ERR + 1] = {
 	[IB_QPS_RESET] = 0,
@@ -125,6 +123,16 @@ const int ib_ipath_state_ops[IB_QPS_ERR + 1] = {
 	[IB_QPS_ERR] = 0,
 };
 
+struct ipath_ucontext {
+	struct ib_ucontext ibucontext;
+};
+
+static inline struct ipath_ucontext *to_iucontext(struct ib_ucontext
+						  *ibucontext)
+{
+	return container_of(ibucontext, struct ipath_ucontext, ibucontext);
+}
+
 /*
  * Translate ib_wr_opcode into ib_wc_opcode.
  */
@@ -277,11 +285,12 @@ static int ipath_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
 			      struct ib_recv_wr **bad_wr)
 {
 	struct ipath_qp *qp = to_iqp(ibqp);
+	struct ipath_rwq *wq = qp->r_rq.wq;
 	unsigned long flags;
 	int ret;
 
 	/* Check that state is OK to post receive. */
-	if (!(ib_ipath_state_ops[qp->state] & IPATH_POST_RECV_OK)) {
+	if (!(ib_ipath_state_ops[qp->state] & IPATH_POST_RECV_OK) || !wq) {
 		*bad_wr = wr;
 		ret = -EINVAL;
 		goto bail;
@@ -290,59 +299,31 @@ static int ipath_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
 	for (; wr; wr = wr->next) {
 		struct ipath_rwqe *wqe;
 		u32 next;
-		int i, j;
+		int i;
 
-		if (wr->num_sge > qp->r_rq.max_sge) {
+		if ((unsigned) wr->num_sge > qp->r_rq.max_sge) {
 			*bad_wr = wr;
 			ret = -ENOMEM;
 			goto bail;
 		}
 
 		spin_lock_irqsave(&qp->r_rq.lock, flags);
-		next = qp->r_rq.head + 1;
+		next = wq->head + 1;
 		if (next >= qp->r_rq.size)
 			next = 0;
-		if (next == qp->r_rq.tail) {
+		if (next == wq->tail) {
 			spin_unlock_irqrestore(&qp->r_rq.lock, flags);
 			*bad_wr = wr;
 			ret = -ENOMEM;
 			goto bail;
 		}
 
-		wqe = get_rwqe_ptr(&qp->r_rq, qp->r_rq.head);
+		wqe = get_rwqe_ptr(&qp->r_rq, wq->head);
 		wqe->wr_id = wr->wr_id;
-		wqe->sg_list[0].mr = NULL;
-		wqe->sg_list[0].vaddr = NULL;
-		wqe->sg_list[0].length = 0;
-		wqe->sg_list[0].sge_length = 0;
-		wqe->length = 0;
-		for (i = 0, j = 0; i < wr->num_sge; i++) {
-			/* Check LKEY */
-			if (to_ipd(qp->ibqp.pd)->user &&
-			    wr->sg_list[i].lkey == 0) {
-				spin_unlock_irqrestore(&qp->r_rq.lock,
-						       flags);
-				*bad_wr = wr;
-				ret = -EINVAL;
-				goto bail;
-			}
-			if (wr->sg_list[i].length == 0)
-				continue;
-			if (!ipath_lkey_ok(
-				    &to_idev(qp->ibqp.device)->lk_table,
-				    &wqe->sg_list[j], &wr->sg_list[i],
-				    IB_ACCESS_LOCAL_WRITE)) {
-				spin_unlock_irqrestore(&qp->r_rq.lock,
-						       flags);
-				*bad_wr = wr;
-				ret = -EINVAL;
-				goto bail;
-			}
-			wqe->length += wr->sg_list[i].length;
-			j++;
-		}
-		wqe->num_sge = j;
-		qp->r_rq.head = next;
+		wqe->num_sge = wr->num_sge;
+		for (i = 0; i < wr->num_sge; i++)
+			wqe->sg_list[i] = wr->sg_list[i];
+		wq->head = next;
 		spin_unlock_irqrestore(&qp->r_rq.lock, flags);
 	}
 	ret = 0;
@@ -377,6 +358,9 @@ static void ipath_qp_rcv(struct ipath_ibdev *dev,
 	switch (qp->ibqp.qp_type) {
 	case IB_QPT_SMI:
 	case IB_QPT_GSI:
+		if (ib_ipath_disable_sma)
+			break;
+		/* FALLTHROUGH */
 	case IB_QPT_UD:
 		ipath_ud_rcv(dev, hdr, has_grh, data, tlen, qp);
 		break;
@@ -395,7 +379,7 @@ static void ipath_qp_rcv(struct ipath_ibdev *dev,
 }
 
 /**
- * ipath_ib_rcv - process and incoming packet
+ * ipath_ib_rcv - process an incoming packet
  * @arg: the device pointer
  * @rhdr: the header of the packet
  * @data: the packet data
@@ -404,9 +388,9 @@ static void ipath_qp_rcv(struct ipath_ibdev *dev,
  * This is called from ipath_kreceive() to process an incoming packet at
  * interrupt level. Tlen is the length of the header + data + CRC in bytes.
  */
-static void ipath_ib_rcv(void *arg, void *rhdr, void *data, u32 tlen)
+void ipath_ib_rcv(struct ipath_ibdev *dev, void *rhdr, void *data,
+		  u32 tlen)
 {
-	struct ipath_ibdev *dev = (struct ipath_ibdev *) arg;
 	struct ipath_ib_header *hdr = rhdr;
 	struct ipath_other_headers *ohdr;
 	struct ipath_qp *qp;
@@ -427,7 +411,7 @@ static void ipath_ib_rcv(void *arg, void *rhdr, void *data, u32 tlen)
 	lid = be16_to_cpu(hdr->lrh[1]);
 	if (lid < IPATH_MULTICAST_LID_BASE) {
 		lid &= ~((1 << (dev->mkeyprot_resv_lmc & 7)) - 1);
-		if (unlikely(lid != ipath_layer_get_lid(dev->dd))) {
+		if (unlikely(lid != dev->dd->ipath_lid)) {
 			dev->rcv_errors++;
 			goto bail;
 		}
@@ -495,9 +479,8 @@ bail:;
  * This is called from ipath_do_rcv_timer() at interrupt level to check for
  * QPs which need retransmits and to collect performance numbers.
  */
-static void ipath_ib_timer(void *arg)
+void ipath_ib_timer(struct ipath_ibdev *dev)
 {
-	struct ipath_ibdev *dev = (struct ipath_ibdev *) arg;
 	struct ipath_qp *resend = NULL;
 	struct list_head *last;
 	struct ipath_qp *qp;
@@ -539,19 +522,19 @@ static void ipath_ib_timer(void *arg)
 	if (dev->pma_sample_status == IB_PMA_SAMPLE_STATUS_STARTED &&
 	    --dev->pma_sample_start == 0) {
 		dev->pma_sample_status = IB_PMA_SAMPLE_STATUS_RUNNING;
-		ipath_layer_snapshot_counters(dev->dd, &dev->ipath_sword,
-					      &dev->ipath_rword,
-					      &dev->ipath_spkts,
-					      &dev->ipath_rpkts,
-					      &dev->ipath_xmit_wait);
+		ipath_snapshot_counters(dev->dd, &dev->ipath_sword,
+					&dev->ipath_rword,
+					&dev->ipath_spkts,
+					&dev->ipath_rpkts,
+					&dev->ipath_xmit_wait);
 	}
 	if (dev->pma_sample_status == IB_PMA_SAMPLE_STATUS_RUNNING) {
 		if (dev->pma_sample_interval == 0) {
 			u64 ta, tb, tc, td, te;
 
 			dev->pma_sample_status = IB_PMA_SAMPLE_STATUS_DONE;
-			ipath_layer_snapshot_counters(dev->dd, &ta, &tb,
-						      &tc, &td, &te);
+			ipath_snapshot_counters(dev->dd, &ta, &tb,
+						&tc, &td, &te);
 
 			dev->ipath_sword = ta - dev->ipath_sword;
 			dev->ipath_rword = tb - dev->ipath_rword;
@@ -581,6 +564,362 @@ static void ipath_ib_timer(void *arg)
 	}
 }
 
+static void update_sge(struct ipath_sge_state *ss, u32 length)
+{
+	struct ipath_sge *sge = &ss->sge;
+
+	sge->vaddr += length;
+	sge->length -= length;
+	sge->sge_length -= length;
+	if (sge->sge_length == 0) {
+		if (--ss->num_sge)
+			*sge = *ss->sg_list++;
+	} else if (sge->length == 0 && sge->mr != NULL) {
+		if (++sge->n >= IPATH_SEGSZ) {
+			if (++sge->m >= sge->mr->mapsz)
+				return;
+			sge->n = 0;
+		}
+		sge->vaddr = sge->mr->map[sge->m]->segs[sge->n].vaddr;
+		sge->length = sge->mr->map[sge->m]->segs[sge->n].length;
+	}
+}
+
+#ifdef __LITTLE_ENDIAN
+static inline u32 get_upper_bits(u32 data, u32 shift)
+{
+	return data >> shift;
+}
+
+static inline u32 set_upper_bits(u32 data, u32 shift)
+{
+	return data << shift;
+}
+
+static inline u32 clear_upper_bytes(u32 data, u32 n, u32 off)
+{
+	data <<= ((sizeof(u32) - n) * BITS_PER_BYTE);
+	data >>= ((sizeof(u32) - n - off) * BITS_PER_BYTE);
+	return data;
+}
+#else
+static inline u32 get_upper_bits(u32 data, u32 shift)
+{
+	return data << shift;
+}
+
+static inline u32 set_upper_bits(u32 data, u32 shift)
+{
+	return data >> shift;
+}
+
+static inline u32 clear_upper_bytes(u32 data, u32 n, u32 off)
+{
+	data >>= ((sizeof(u32) - n) * BITS_PER_BYTE);
+	data <<= ((sizeof(u32) - n - off) * BITS_PER_BYTE);
+	return data;
+}
+#endif
+
+static void copy_io(u32 __iomem *piobuf, struct ipath_sge_state *ss,
+		    u32 length)
+{
+	u32 extra = 0;
+	u32 data = 0;
+	u32 last;
+
+	while (1) {
+		u32 len = ss->sge.length;
+		u32 off;
+
+		BUG_ON(len == 0);
+		if (len > length)
+			len = length;
+		if (len > ss->sge.sge_length)
+			len = ss->sge.sge_length;
+		/* If the source address is not aligned, try to align it. */
+		off = (unsigned long)ss->sge.vaddr & (sizeof(u32) - 1);
+		if (off) {
+			u32 *addr = (u32 *)((unsigned long)ss->sge.vaddr &
+					    ~(sizeof(u32) - 1));
+			u32 v = get_upper_bits(*addr, off * BITS_PER_BYTE);
+			u32 y;
+
+			y = sizeof(u32) - off;
+			if (len > y)
+				len = y;
+			if (len + extra >= sizeof(u32)) {
+				data |= set_upper_bits(v, extra *
+						       BITS_PER_BYTE);
+				len = sizeof(u32) - extra;
+				if (len == length) {
+					last = data;
+					break;
+				}
+				__raw_writel(data, piobuf);
+				piobuf++;
+				extra = 0;
+				data = 0;
+			} else {
+				/* Clear unused upper bytes */
+				data |= clear_upper_bytes(v, len, extra);
+				if (len == length) {
+					last = data;
+					break;
+				}
+				extra += len;
+			}
+		} else if (extra) {
+			/* Source address is aligned. */
+			u32 *addr = (u32 *) ss->sge.vaddr;
+			int shift = extra * BITS_PER_BYTE;
+			int ushift = 32 - shift;
+			u32 l = len;
+
+			while (l >= sizeof(u32)) {
+				u32 v = *addr;
+
+				data |= set_upper_bits(v, shift);
+				__raw_writel(data, piobuf);
+				data = get_upper_bits(v, ushift);
+				piobuf++;
+				addr++;
+				l -= sizeof(u32);
+			}
+			/*
+			 * We still have 'extra' number of bytes leftover.
+			 */
+			if (l) {
+				u32 v = *addr;
+
+				if (l + extra >= sizeof(u32)) {
+					data |= set_upper_bits(v, shift);
+					len -= l + extra - sizeof(u32);
+					if (len == length) {
+						last = data;
+						break;
+					}
+					__raw_writel(data, piobuf);
+					piobuf++;
+					extra = 0;
+					data = 0;
+				} else {
+					/* Clear unused upper bytes */
+					data |= clear_upper_bytes(v, l,
+								  extra);
+					if (len == length) {
+						last = data;
+						break;
+					}
+					extra += l;
+				}
+			} else if (len == length) {
+				last = data;
+				break;
+			}
+		} else if (len == length) {
+			u32 w;
+
+			/*
+			 * Need to round up for the last dword in the
+			 * packet.
+			 */
+			w = (len + 3) >> 2;
+			__iowrite32_copy(piobuf, ss->sge.vaddr, w - 1);
+			piobuf += w - 1;
+			last = ((u32 *) ss->sge.vaddr)[w - 1];
+			break;
+		} else {
+			u32 w = len >> 2;
+
+			__iowrite32_copy(piobuf, ss->sge.vaddr, w);
+			piobuf += w;
+
+			extra = len & (sizeof(u32) - 1);
+			if (extra) {
+				u32 v = ((u32 *) ss->sge.vaddr)[w];
+
+				/* Clear unused upper bytes */
+				data = clear_upper_bytes(v, extra, 0);
+			}
+		}
+		update_sge(ss, len);
+		length -= len;
+	}
+	/* Update address before sending packet. */
+	update_sge(ss, length);
+	/* must flush early everything before trigger word */
+	ipath_flush_wc();
+	__raw_writel(last, piobuf);
+	/* be sure trigger word is written */
+	ipath_flush_wc();
+}
+
+/**
+ * ipath_verbs_send - send a packet
+ * @dd: the infinipath device
+ * @hdrwords: the number of words in the header
+ * @hdr: the packet header
+ * @len: the length of the packet in bytes
+ * @ss: the SGE to send
+ */
+int ipath_verbs_send(struct ipath_devdata *dd, u32 hdrwords,
+		     u32 *hdr, u32 len, struct ipath_sge_state *ss)
+{
+	u32 __iomem *piobuf;
+	u32 plen;
+	int ret;
+
+	/* +1 is for the qword padding of pbc */
+	plen = hdrwords + ((len + 3) >> 2) + 1;
+	if (unlikely((plen << 2) > dd->ipath_ibmaxlen)) {
+		ipath_dbg("packet len 0x%x too long, failing\n", plen);
+		ret = -EINVAL;
+		goto bail;
+	}
+
+	/* Get a PIO buffer to use. */
+	piobuf = ipath_getpiobuf(dd, NULL);
+	if (unlikely(piobuf == NULL)) {
+		ret = -EBUSY;
+		goto bail;
+	}
+
+	/*
+	 * Write len to control qword, no flags.
+	 * We have to flush after the PBC for correctness on some cpus
+	 * or WC buffer can be written out of order.
+	 */
+	writeq(plen, piobuf);
+	ipath_flush_wc();
+	piobuf += 2;
+	if (len == 0) {
+		/*
+		 * If there is just the header portion, must flush before
+		 * writing last word of header for correctness, and after
+		 * the last header word (trigger word).
+		 */
+		__iowrite32_copy(piobuf, hdr, hdrwords - 1);
+		ipath_flush_wc();
+		__raw_writel(hdr[hdrwords - 1], piobuf + hdrwords - 1);
+		ipath_flush_wc();
+		ret = 0;
+		goto bail;
+	}
+
+	__iowrite32_copy(piobuf, hdr, hdrwords);
+	piobuf += hdrwords;
+
+	/* The common case is aligned and contained in one segment. */
+	if (likely(ss->num_sge == 1 && len <= ss->sge.length &&
+		   !((unsigned long)ss->sge.vaddr & (sizeof(u32) - 1)))) {
+		u32 w;
+		u32 *addr = (u32 *) ss->sge.vaddr;
+
+		/* Update address before sending packet. */
+		update_sge(ss, len);
+		/* Need to round up for the last dword in the packet. */
+		w = (len + 3) >> 2;
+		__iowrite32_copy(piobuf, addr, w - 1);
+		/* must flush early everything before trigger word */
+		ipath_flush_wc();
+		__raw_writel(addr[w - 1], piobuf + w - 1);
+		/* be sure trigger word is written */
+		ipath_flush_wc();
+		ret = 0;
+		goto bail;
+	}
+	copy_io(piobuf, ss, len);
+	ret = 0;
+
+bail:
+	return ret;
+}
+
+int ipath_snapshot_counters(struct ipath_devdata *dd, u64 *swords,
+			    u64 *rwords, u64 *spkts, u64 *rpkts,
+			    u64 *xmit_wait)
+{
+	int ret;
+
+	if (!(dd->ipath_flags & IPATH_INITTED)) {
+		/* no hardware, freeze, etc. */
+		ipath_dbg("unit %u not usable\n", dd->ipath_unit);
+		ret = -EINVAL;
+		goto bail;
+	}
+	*swords = ipath_snap_cntr(dd, dd->ipath_cregs->cr_wordsendcnt);
+	*rwords = ipath_snap_cntr(dd, dd->ipath_cregs->cr_wordrcvcnt);
+	*spkts = ipath_snap_cntr(dd, dd->ipath_cregs->cr_pktsendcnt);
+	*rpkts = ipath_snap_cntr(dd, dd->ipath_cregs->cr_pktrcvcnt);
+	*xmit_wait = ipath_snap_cntr(dd, dd->ipath_cregs->cr_sendstallcnt);
+
+	ret = 0;
+
+bail:
+	return ret;
+}
+
+/**
+ * ipath_get_counters - get various chip counters
+ * @dd: the infinipath device
+ * @cntrs: counters are placed here
+ *
+ * Return the counters needed by recv_pma_get_portcounters().
+ */
+int ipath_get_counters(struct ipath_devdata *dd,
+		       struct ipath_verbs_counters *cntrs)
+{
+	int ret;
+
+	if (!(dd->ipath_flags & IPATH_INITTED)) {
+		/* no hardware, freeze, etc. */
+		ipath_dbg("unit %u not usable\n", dd->ipath_unit);
+		ret = -EINVAL;
+		goto bail;
+	}
+	cntrs->symbol_error_counter =
+		ipath_snap_cntr(dd, dd->ipath_cregs->cr_ibsymbolerrcnt);
+	cntrs->link_error_recovery_counter =
+		ipath_snap_cntr(dd, dd->ipath_cregs->cr_iblinkerrrecovcnt);
+	/*
+	 * The link downed counter counts when the other side downs the
+	 * connection.  We add in the number of times we downed the link
+	 * due to local link integrity errors to compensate.
+	 */
+	cntrs->link_downed_counter =
+		ipath_snap_cntr(dd, dd->ipath_cregs->cr_iblinkdowncnt);
+	cntrs->port_rcv_errors =
+		ipath_snap_cntr(dd, dd->ipath_cregs->cr_rxdroppktcnt) +
+		ipath_snap_cntr(dd, dd->ipath_cregs->cr_rcvovflcnt) +
+		ipath_snap_cntr(dd, dd->ipath_cregs->cr_portovflcnt) +
+		ipath_snap_cntr(dd, dd->ipath_cregs->cr_err_rlencnt) +
+		ipath_snap_cntr(dd, dd->ipath_cregs->cr_invalidrlencnt) +
+		ipath_snap_cntr(dd, dd->ipath_cregs->cr_erricrccnt) +
+		ipath_snap_cntr(dd, dd->ipath_cregs->cr_errvcrccnt) +
+		ipath_snap_cntr(dd, dd->ipath_cregs->cr_errlpcrccnt) +
+		ipath_snap_cntr(dd, dd->ipath_cregs->cr_badformatcnt);
+	cntrs->port_rcv_remphys_errors =
+		ipath_snap_cntr(dd, dd->ipath_cregs->cr_rcvebpcnt);
+	cntrs->port_xmit_discards =
+		ipath_snap_cntr(dd, dd->ipath_cregs->cr_unsupvlcnt);
+	cntrs->port_xmit_data =
+		ipath_snap_cntr(dd, dd->ipath_cregs->cr_wordsendcnt);
+	cntrs->port_rcv_data =
+		ipath_snap_cntr(dd, dd->ipath_cregs->cr_wordrcvcnt);
+	cntrs->port_xmit_packets =
+		ipath_snap_cntr(dd, dd->ipath_cregs->cr_pktsendcnt);
+	cntrs->port_rcv_packets =
+		ipath_snap_cntr(dd, dd->ipath_cregs->cr_pktrcvcnt);
+	cntrs->local_link_integrity_errors = dd->ipath_lli_errors;
+	cntrs->excessive_buffer_overrun_errors = 0; /* XXX */
+
+	ret = 0;
+
+bail:
+	return ret;
+}
+
 /**
  * ipath_ib_piobufavail - callback when a PIO buffer is available
  * @arg: the device pointer
@@ -591,9 +930,8 @@ static void ipath_ib_timer(void *arg)
  * QPs waiting for buffers (for now, just do a tasklet_hi_schedule and
  * return zero).
  */
-static int ipath_ib_piobufavail(void *arg)
+int ipath_ib_piobufavail(struct ipath_ibdev *dev)
 {
-	struct ipath_ibdev *dev = (struct ipath_ibdev *) arg;
 	struct ipath_qp *qp;
 	unsigned long flags;
 
@@ -624,14 +962,14 @@ static int ipath_query_device(struct ib_device *ibdev,
 		IB_DEVICE_BAD_QKEY_CNTR | IB_DEVICE_SHUTDOWN_PORT |
 		IB_DEVICE_SYS_IMAGE_GUID;
 	props->page_size_cap = PAGE_SIZE;
-	props->vendor_id = ipath_layer_get_vendorid(dev->dd);
-	props->vendor_part_id = ipath_layer_get_deviceid(dev->dd);
-	props->hw_ver = ipath_layer_get_pcirev(dev->dd);
+	props->vendor_id = dev->dd->ipath_vendorid;
+	props->vendor_part_id = dev->dd->ipath_deviceid;
+	props->hw_ver = dev->dd->ipath_pcirev;
 
 	props->sys_image_guid = dev->sys_image_guid;
 
 	props->max_mr_size = ~0ull;
-	props->max_qp = dev->qp_table.max;
+	props->max_qp = ib_ipath_max_qps;
 	props->max_qp_wr = ib_ipath_max_qp_wrs;
 	props->max_sge = ib_ipath_max_sges;
 	props->max_cq = ib_ipath_max_cqs;
@@ -647,7 +985,7 @@ static int ipath_query_device(struct ib_device *ibdev,
 	props->max_srq_sge = ib_ipath_max_srq_sges;
 	/* props->local_ca_ack_delay */
 	props->atomic_cap = IB_ATOMIC_HCA;
-	props->max_pkeys = ipath_layer_get_npkeys(dev->dd);
+	props->max_pkeys = ipath_get_npkeys(dev->dd);
 	props->max_mcast_grp = ib_ipath_max_mcast_grps;
 	props->max_mcast_qp_attach = ib_ipath_max_mcast_qp_attached;
 	props->max_total_mcast_qp_attach = props->max_mcast_qp_attach *
@@ -672,12 +1010,17 @@ const u8 ipath_cvt_physportstate[16] = {
 	[INFINIPATH_IBCS_LT_STATE_RECOVERIDLE] = 6,
 };
 
+u32 ipath_get_cr_errpkey(struct ipath_devdata *dd)
+{
+	return ipath_read_creg32(dd, dd->ipath_cregs->cr_errpkey);
+}
+
 static int ipath_query_port(struct ib_device *ibdev,
 			    u8 port, struct ib_port_attr *props)
 {
 	struct ipath_ibdev *dev = to_idev(ibdev);
 	enum ib_mtu mtu;
-	u16 lid = ipath_layer_get_lid(dev->dd);
+	u16 lid = dev->dd->ipath_lid;
 	u64 ibcstat;
 
 	memset(props, 0, sizeof(*props));
@@ -685,16 +1028,16 @@ static int ipath_query_port(struct ib_device *ibdev,
 	props->lmc = dev->mkeyprot_resv_lmc & 7;
 	props->sm_lid = dev->sm_lid;
 	props->sm_sl = dev->sm_sl;
-	ibcstat = ipath_layer_get_lastibcstat(dev->dd);
+	ibcstat = dev->dd->ipath_lastibcstat;
 	props->state = ((ibcstat >> 4) & 0x3) + 1;
 	/* See phys_state_show() */
 	props->phys_state = ipath_cvt_physportstate[
-		ipath_layer_get_lastibcstat(dev->dd) & 0xf];
+		dev->dd->ipath_lastibcstat & 0xf];
 	props->port_cap_flags = dev->port_cap_flags;
 	props->gid_tbl_len = 1;
 	props->max_msg_sz = 0x80000000;
-	props->pkey_tbl_len = ipath_layer_get_npkeys(dev->dd);
-	props->bad_pkey_cntr = ipath_layer_get_cr_errpkey(dev->dd) -
+	props->pkey_tbl_len = ipath_get_npkeys(dev->dd);
+	props->bad_pkey_cntr = ipath_get_cr_errpkey(dev->dd) -
 		dev->z_pkey_violations;
 	props->qkey_viol_cntr = dev->qkey_violations;
 	props->active_width = IB_WIDTH_4X;
@@ -704,7 +1047,7 @@ static int ipath_query_port(struct ib_device *ibdev,
 	props->init_type_reply = 0;
 
 	props->max_mtu = IB_MTU_4096;
-	switch (ipath_layer_get_ibmtu(dev->dd)) {
+	switch (dev->dd->ipath_ibmtu) {
 	case 4096:
 		mtu = IB_MTU_4096;
 		break;
@@ -763,7 +1106,7 @@ static int ipath_modify_port(struct ib_device *ibdev,
 	dev->port_cap_flags |= props->set_port_cap_mask;
 	dev->port_cap_flags &= ~props->clr_port_cap_mask;
 	if (port_modify_mask & IB_PORT_SHUTDOWN)
-		ipath_layer_set_linkstate(dev->dd, IPATH_IB_LINKDOWN);
+		ipath_set_linkstate(dev->dd, IPATH_IB_LINKDOWN);
 	if (port_modify_mask & IB_PORT_RESET_QKEY_CNTR)
 		dev->qkey_violations = 0;
 	return 0;
@@ -780,7 +1123,7 @@ static int ipath_query_gid(struct ib_device *ibdev, u8 port,
 		goto bail;
 	}
 	gid->global.subnet_prefix = dev->gid_prefix;
-	gid->global.interface_id = ipath_layer_get_guid(dev->dd);
+	gid->global.interface_id = dev->dd->ipath_guid;
 
 	ret = 0;
 
@@ -803,18 +1146,22 @@ static struct ib_pd *ipath_alloc_pd(struct ib_device *ibdev,
 	 * we allow allocations of more than we report for this value.
 	 */
 
-	if (dev->n_pds_allocated == ib_ipath_max_pds) {
+	pd = kmalloc(sizeof *pd, GFP_KERNEL);
+	if (!pd) {
 		ret = ERR_PTR(-ENOMEM);
 		goto bail;
 	}
 
-	pd = kmalloc(sizeof *pd, GFP_KERNEL);
-	if (!pd) {
+	spin_lock(&dev->n_pds_lock);
+	if (dev->n_pds_allocated == ib_ipath_max_pds) {
+		spin_unlock(&dev->n_pds_lock);
+		kfree(pd);
 		ret = ERR_PTR(-ENOMEM);
 		goto bail;
 	}
 
 	dev->n_pds_allocated++;
+	spin_unlock(&dev->n_pds_lock);
 
 	/* ib_alloc_pd() will initialize pd->ibpd. */
 	pd->user = udata != NULL;
@@ -830,7 +1177,9 @@ static int ipath_dealloc_pd(struct ib_pd *ibpd)
 	struct ipath_pd *pd = to_ipd(ibpd);
 	struct ipath_ibdev *dev = to_idev(ibpd->device);
 
+	spin_lock(&dev->n_pds_lock);
 	dev->n_pds_allocated--;
+	spin_unlock(&dev->n_pds_lock);
 
 	kfree(pd);
 
@@ -851,11 +1200,6 @@ static struct ib_ah *ipath_create_ah(struct ib_pd *pd,
 	struct ib_ah *ret;
 	struct ipath_ibdev *dev = to_idev(pd->device);
 
-	if (dev->n_ahs_allocated == ib_ipath_max_ahs) {
-		ret = ERR_PTR(-ENOMEM);
-		goto bail;
-	}
-
 	/* A multicast address requires a GRH (see ch. 8.4.1). */
 	if (ah_attr->dlid >= IPATH_MULTICAST_LID_BASE &&
 	    ah_attr->dlid != IPATH_PERMISSIVE_LID &&
@@ -881,7 +1225,16 @@ static struct ib_ah *ipath_create_ah(struct ib_pd *pd,
 		goto bail;
 	}
 
+	spin_lock(&dev->n_ahs_lock);
+	if (dev->n_ahs_allocated == ib_ipath_max_ahs) {
+		spin_unlock(&dev->n_ahs_lock);
+		kfree(ah);
+		ret = ERR_PTR(-ENOMEM);
+		goto bail;
+	}
+
 	dev->n_ahs_allocated++;
+	spin_unlock(&dev->n_ahs_lock);
 
 	/* ib_create_ah() will initialize ah->ibah. */
 	ah->attr = *ah_attr;
@@ -903,7 +1256,9 @@ static int ipath_destroy_ah(struct ib_ah *ibah)
 	struct ipath_ibdev *dev = to_idev(ibah->device);
 	struct ipath_ah *ah = to_iah(ibah);
 
+	spin_lock(&dev->n_ahs_lock);
 	dev->n_ahs_allocated--;
+	spin_unlock(&dev->n_ahs_lock);
 
 	kfree(ah);
 
@@ -919,25 +1274,50 @@ static int ipath_query_ah(struct ib_ah *ibah, struct ib_ah_attr *ah_attr)
 	return 0;
 }
 
+/**
+ * ipath_get_npkeys - return the size of the PKEY table for port 0
+ * @dd: the infinipath device
+ */
+unsigned ipath_get_npkeys(struct ipath_devdata *dd)
+{
+	return ARRAY_SIZE(dd->ipath_pd[0]->port_pkeys);
+}
+
+/**
+ * ipath_get_pkey - return the indexed PKEY from the port 0 PKEY table
+ * @dd: the infinipath device
+ * @index: the PKEY index
+ */
+unsigned ipath_get_pkey(struct ipath_devdata *dd, unsigned index)
+{
+	unsigned ret;
+
+	if (index >= ARRAY_SIZE(dd->ipath_pd[0]->port_pkeys))
+		ret = 0;
+	else
+		ret = dd->ipath_pd[0]->port_pkeys[index];
+
+	return ret;
+}
+
 static int ipath_query_pkey(struct ib_device *ibdev, u8 port, u16 index,
 			    u16 *pkey)
 {
 	struct ipath_ibdev *dev = to_idev(ibdev);
 	int ret;
 
-	if (index >= ipath_layer_get_npkeys(dev->dd)) {
+	if (index >= ipath_get_npkeys(dev->dd)) {
 		ret = -EINVAL;
 		goto bail;
 	}
 
-	*pkey = ipath_layer_get_pkey(dev->dd, index);
+	*pkey = ipath_get_pkey(dev->dd, index);
 	ret = 0;
 
 bail:
 	return ret;
 }
 
-
 /**
  * ipath_alloc_ucontext - allocate a ucontest
  * @ibdev: the infiniband device
@@ -970,26 +1350,91 @@ static int ipath_dealloc_ucontext(struct ib_ucontext *context)
 
 static int ipath_verbs_register_sysfs(struct ib_device *dev);
 
+static void __verbs_timer(unsigned long arg)
+{
+	struct ipath_devdata *dd = (struct ipath_devdata *) arg;
+
+	/*
+	 * If port 0 receive packet interrupts are not available, or
+	 * can be missed, poll the receive queue
+	 */
+	if (dd->ipath_flags & IPATH_POLL_RX_INTR)
+		ipath_kreceive(dd);
+
+	/* Handle verbs layer timeouts. */
+	ipath_ib_timer(dd->verbs_dev);
+
+	mod_timer(&dd->verbs_timer, jiffies + 1);
+}
+
+static int enable_timer(struct ipath_devdata *dd)
+{
+	/*
+	 * Early chips had a design flaw where the chip and kernel idea
+	 * of the tail register don't always agree, and therefore we won't
+	 * get an interrupt on the next packet received.
+	 * If the board supports per packet receive interrupts, use it.
+	 * Otherwise, the timer function periodically checks for packets
+	 * to cover this case.
+	 * Either way, the timer is needed for verbs layer related
+	 * processing.
+	 */
+	if (dd->ipath_flags & IPATH_GPIO_INTR) {
+		ipath_write_kreg(dd, dd->ipath_kregs->kr_debugportselect,
+				 0x2074076542310ULL);
+		/* Enable GPIO bit 2 interrupt */
+		ipath_write_kreg(dd, dd->ipath_kregs->kr_gpio_mask,
+				 (u64) (1 << 2));
+	}
+
+	init_timer(&dd->verbs_timer);
+	dd->verbs_timer.function = __verbs_timer;
+	dd->verbs_timer.data = (unsigned long)dd;
+	dd->verbs_timer.expires = jiffies + 1;
+	add_timer(&dd->verbs_timer);
+
+	return 0;
+}
+
+static int disable_timer(struct ipath_devdata *dd)
+{
+	/* Disable GPIO bit 2 interrupt */
+	if (dd->ipath_flags & IPATH_GPIO_INTR)
+		ipath_write_kreg(dd, dd->ipath_kregs->kr_gpio_mask, 0);
+
+	del_timer_sync(&dd->verbs_timer);
+
+	return 0;
+}
+
 /**
  * ipath_register_ib_device - register our device with the infiniband core
- * @unit: the device number to register
  * @dd: the device data structure
  * Return the allocated ipath_ibdev pointer or NULL on error.
  */
-static void *ipath_register_ib_device(int unit, struct ipath_devdata *dd)
+int ipath_register_ib_device(struct ipath_devdata *dd)
 {
-	struct ipath_layer_counters cntrs;
+	struct ipath_verbs_counters cntrs;
 	struct ipath_ibdev *idev;
 	struct ib_device *dev;
 	int ret;
 
 	idev = (struct ipath_ibdev *)ib_alloc_device(sizeof *idev);
-	if (idev == NULL)
+	if (idev == NULL) {
+		ret = -ENOMEM;
 		goto bail;
+	}
 
 	dev = &idev->ibdev;
 
 	/* Only need to initialize non-zero fields. */
+	spin_lock_init(&idev->n_pds_lock);
+	spin_lock_init(&idev->n_ahs_lock);
+	spin_lock_init(&idev->n_cqs_lock);
+	spin_lock_init(&idev->n_qps_lock);
+	spin_lock_init(&idev->n_srqs_lock);
+	spin_lock_init(&idev->n_mcast_grps_lock);
+
 	spin_lock_init(&idev->qp_table.lock);
 	spin_lock_init(&idev->lk_table.lock);
 	idev->sm_lid = __constant_be16_to_cpu(IB_LID_PERMISSIVE);
@@ -1030,7 +1475,7 @@ static void *ipath_register_ib_device(int unit, struct ipath_devdata *dd)
 	idev->link_width_enabled = 3;	/* 1x or 4x */
 
 	/* Snapshot current HW counters to "clear" them. */
-	ipath_layer_get_counters(dd, &cntrs);
+	ipath_get_counters(dd, &cntrs);
 	idev->z_symbol_error_counter = cntrs.symbol_error_counter;
 	idev->z_link_error_recovery_counter =
 		cntrs.link_error_recovery_counter;
@@ -1054,14 +1499,14 @@ static void *ipath_register_ib_device(int unit, struct ipath_devdata *dd)
 	 * device types in the system, we can't be sure this is unique.
 	 */
 	if (!sys_image_guid)
-		sys_image_guid = ipath_layer_get_guid(dd);
+		sys_image_guid = dd->ipath_guid;
 	idev->sys_image_guid = sys_image_guid;
-	idev->ib_unit = unit;
+	idev->ib_unit = dd->ipath_unit;
 	idev->dd = dd;
 
 	strlcpy(dev->name, "ipath%d", IB_DEVICE_NAME_MAX);
 	dev->owner = THIS_MODULE;
-	dev->node_guid = ipath_layer_get_guid(dd);
+	dev->node_guid = dd->ipath_guid;
 	dev->uverbs_abi_ver = IPATH_UVERBS_ABI_VERSION;
 	dev->uverbs_cmd_mask =
 		(1ull << IB_USER_VERBS_CMD_GET_CONTEXT)		|
@@ -1093,9 +1538,9 @@ static void *ipath_register_ib_device(int unit, struct ipath_devdata *dd)
 		(1ull << IB_USER_VERBS_CMD_QUERY_SRQ)		|
 		(1ull << IB_USER_VERBS_CMD_DESTROY_SRQ)		|
 		(1ull << IB_USER_VERBS_CMD_POST_SRQ_RECV);
-	dev->node_type = IB_NODE_CA;
+	dev->node_type = RDMA_NODE_IB_CA;
 	dev->phys_port_cnt = 1;
-	dev->dma_device = ipath_layer_get_device(dd);
+	dev->dma_device = &dd->pcidev->dev;
 	dev->class_dev.dev = dev->dma_device;
 	dev->query_device = ipath_query_device;
 	dev->modify_device = ipath_modify_device;
@@ -1137,9 +1582,10 @@ static void *ipath_register_ib_device(int unit, struct ipath_devdata *dd)
 	dev->attach_mcast = ipath_multicast_attach;
 	dev->detach_mcast = ipath_multicast_detach;
 	dev->process_mad = ipath_process_mad;
+	dev->mmap = ipath_mmap;
 
 	snprintf(dev->node_desc, sizeof(dev->node_desc),
-		 IPATH_IDSTR " %s kernel_SMA", system_utsname.nodename);
+		 IPATH_IDSTR " %s", system_utsname.nodename);
 
 	ret = ib_register_device(dev);
 	if (ret)
@@ -1148,7 +1594,7 @@ static void *ipath_register_ib_device(int unit, struct ipath_devdata *dd)
 	if (ipath_verbs_register_sysfs(dev))
 		goto err_class;
 
-	ipath_layer_enable_timer(dd);
+	enable_timer(dd);
 
 	goto bail;
 
@@ -1160,37 +1606,32 @@ err_lk:
 	kfree(idev->qp_table.table);
 err_qp:
 	ib_dealloc_device(dev);
-	_VERBS_ERROR("ib_ipath%d cannot register verbs (%d)!\n",
-		     unit, -ret);
+	ipath_dev_err(dd, "cannot register verbs: %d!\n", -ret);
 	idev = NULL;
 
 bail:
-	return idev;
+	dd->verbs_dev = idev;
+	return ret;
 }
 
-static void ipath_unregister_ib_device(void *arg)
+void ipath_unregister_ib_device(struct ipath_ibdev *dev)
 {
-	struct ipath_ibdev *dev = (struct ipath_ibdev *) arg;
 	struct ib_device *ibdev = &dev->ibdev;
 
-	ipath_layer_disable_timer(dev->dd);
+	disable_timer(dev->dd);
 
 	ib_unregister_device(ibdev);
 
 	if (!list_empty(&dev->pending[0]) ||
 	    !list_empty(&dev->pending[1]) ||
 	    !list_empty(&dev->pending[2]))
-		_VERBS_ERROR("ipath%d pending list not empty!\n",
-			     dev->ib_unit);
+		ipath_dev_err(dev->dd, "pending list not empty!\n");
 	if (!list_empty(&dev->piowait))
-		_VERBS_ERROR("ipath%d piowait list not empty!\n",
-			     dev->ib_unit);
+		ipath_dev_err(dev->dd, "piowait list not empty!\n");
 	if (!list_empty(&dev->rnrwait))
-		_VERBS_ERROR("ipath%d rnrwait list not empty!\n",
-			     dev->ib_unit);
+		ipath_dev_err(dev->dd, "rnrwait list not empty!\n");
 	if (!ipath_mcast_tree_empty())
-		_VERBS_ERROR("ipath%d multicast table memory leak!\n",
-			     dev->ib_unit);
+		ipath_dev_err(dev->dd, "multicast table memory leak!\n");
 	/*
 	 * Note that ipath_unregister_ib_device() can be called before all
 	 * the QPs are destroyed!
@@ -1201,25 +1642,12 @@ static void ipath_unregister_ib_device(void *arg)
 	ib_dealloc_device(ibdev);
 }
 
-static int __init ipath_verbs_init(void)
-{
-	return ipath_verbs_register(ipath_register_ib_device,
-				    ipath_unregister_ib_device,
-				    ipath_ib_piobufavail, ipath_ib_rcv,
-				    ipath_ib_timer);
-}
-
-static void __exit ipath_verbs_cleanup(void)
-{
-	ipath_verbs_unregister();
-}
-
 static ssize_t show_rev(struct class_device *cdev, char *buf)
 {
 	struct ipath_ibdev *dev =
 		container_of(cdev, struct ipath_ibdev, ibdev.class_dev);
 
-	return sprintf(buf, "%x\n", ipath_layer_get_pcirev(dev->dd));
+	return sprintf(buf, "%x\n", dev->dd->ipath_pcirev);
 }
 
 static ssize_t show_hca(struct class_device *cdev, char *buf)
@@ -1228,7 +1656,7 @@ static ssize_t show_hca(struct class_device *cdev, char *buf)
 		container_of(cdev, struct ipath_ibdev, ibdev.class_dev);
 	int ret;
 
-	ret = ipath_layer_get_boardname(dev->dd, buf, 128);
+	ret = dev->dd->ipath_f_get_boardname(dev->dd, buf, 128);
 	if (ret < 0)
 		goto bail;
 	strcat(buf, "\n");
@@ -1305,6 +1733,3 @@ static int ipath_verbs_register_sysfs(struct ib_device *dev)
 bail:
 	return ret;
 }
-
-module_init(ipath_verbs_init);
-module_exit(ipath_verbs_cleanup);
diff --git a/drivers/infiniband/hw/ipath/ipath_verbs.h b/drivers/infiniband/hw/ipath/ipath_verbs.h
index 2df684727dc..09bbb3f9a21 100644
--- a/drivers/infiniband/hw/ipath/ipath_verbs.h
+++ b/drivers/infiniband/hw/ipath/ipath_verbs.h
@@ -38,10 +38,10 @@
 #include <linux/spinlock.h>
 #include <linux/kernel.h>
 #include <linux/interrupt.h>
+#include <linux/kref.h>
 #include <rdma/ib_pack.h>
 
 #include "ipath_layer.h"
-#include "verbs_debug.h"
 
 #define QPN_MAX                 (1 << 24)
 #define QPNMAP_ENTRIES          (QPN_MAX / PAGE_SIZE / BITS_PER_BYTE)
@@ -50,7 +50,7 @@
  * Increment this value if any changes that break userspace ABI
  * compatibility are made.
  */
-#define IPATH_UVERBS_ABI_VERSION       1
+#define IPATH_UVERBS_ABI_VERSION       2
 
 /*
  * Define an ib_cq_notify value that is not valid so we know when CQ
@@ -152,19 +152,6 @@ struct ipath_mcast {
 	int n_attached;
 };
 
-/* Memory region */
-struct ipath_mr {
-	struct ib_mr ibmr;
-	struct ipath_mregion mr;	/* must be last */
-};
-
-/* Fast memory region */
-struct ipath_fmr {
-	struct ib_fmr ibfmr;
-	u8 page_shift;
-	struct ipath_mregion mr;	/* must be last */
-};
-
 /* Protection domain */
 struct ipath_pd {
 	struct ib_pd ibpd;
@@ -178,58 +165,89 @@ struct ipath_ah {
 };
 
 /*
- * Quick description of our CQ/QP locking scheme:
- *
- * We have one global lock that protects dev->cq/qp_table.  Each
- * struct ipath_cq/qp also has its own lock.  An individual qp lock
- * may be taken inside of an individual cq lock.  Both cqs attached to
- * a qp may be locked, with the send cq locked first.  No other
- * nesting should be done.
- *
- * Each struct ipath_cq/qp also has an atomic_t ref count.  The
- * pointer from the cq/qp_table to the struct counts as one reference.
- * This reference also is good for access through the consumer API, so
- * modifying the CQ/QP etc doesn't need to take another reference.
- * Access because of a completion being polled does need a reference.
- *
- * Finally, each struct ipath_cq/qp has a wait_queue_head_t for the
- * destroy function to sleep on.
- *
- * This means that access from the consumer API requires nothing but
- * taking the struct's lock.
- *
- * Access because of a completion event should go as follows:
- * - lock cq/qp_table and look up struct
- * - increment ref count in struct
- * - drop cq/qp_table lock
- * - lock struct, do your thing, and unlock struct
- * - decrement ref count; if zero, wake up waiters
- *
- * To destroy a CQ/QP, we can do the following:
- * - lock cq/qp_table, remove pointer, unlock cq/qp_table lock
- * - decrement ref count
- * - wait_event until ref count is zero
- *
- * It is the consumer's responsibilty to make sure that no QP
- * operations (WQE posting or state modification) are pending when the
- * QP is destroyed.  Also, the consumer must make sure that calls to
- * qp_modify are serialized.
- *
- * Possible optimizations (wait for profile data to see if/where we
- * have locks bouncing between CPUs):
- * - split cq/qp table lock into n separate (cache-aligned) locks,
- *   indexed (say) by the page in the table
+ * This structure is used by ipath_mmap() to validate an offset
+ * when an mmap() request is made.  The vm_area_struct then uses
+ * this as its vm_private_data.
+ */
+struct ipath_mmap_info {
+	struct ipath_mmap_info *next;
+	struct ib_ucontext *context;
+	void *obj;
+	struct kref ref;
+	unsigned size;
+	unsigned mmap_cnt;
+};
+
+/*
+ * This structure is used to contain the head pointer, tail pointer,
+ * and completion queue entries as a single memory allocation so
+ * it can be mmap'ed into user space.
  */
+struct ipath_cq_wc {
+	u32 head;		/* index of next entry to fill */
+	u32 tail;		/* index of next ib_poll_cq() entry */
+	struct ib_wc queue[1];	/* this is actually size ibcq.cqe + 1 */
+};
 
+/*
+ * The completion queue structure.
+ */
 struct ipath_cq {
 	struct ib_cq ibcq;
 	struct tasklet_struct comptask;
 	spinlock_t lock;
 	u8 notify;
 	u8 triggered;
-	u32 head;		/* new records added to the head */
-	u32 tail;		/* poll_cq() reads from here. */
-	struct ib_wc *queue;	/* this is actually ibcq.cqe + 1 */
+	struct ipath_cq_wc *queue;
+	struct ipath_mmap_info *ip;
+};
+
+/*
+ * A segment is a linear region of low physical memory.
+ * XXX Maybe we should use phys addr here and kmap()/kunmap().
+ * Used by the verbs layer.
+ */
+struct ipath_seg {
+	void *vaddr;
+	size_t length;
+};
+
+/* The number of ipath_segs that fit in a page. */
+#define IPATH_SEGSZ     (PAGE_SIZE / sizeof (struct ipath_seg))
+
+struct ipath_segarray {
+	struct ipath_seg segs[IPATH_SEGSZ];
+};
+
+struct ipath_mregion {
+	u64 user_base;		/* User's address for this region */
+	u64 iova;		/* IB start address of this region */
+	size_t length;
+	u32 lkey;
+	u32 offset;		/* offset (bytes) to start of region */
+	int access_flags;
+	u32 max_segs;		/* number of ipath_segs in all the arrays */
+	u32 mapsz;		/* size of the map array */
+	struct ipath_segarray *map[0];	/* the segments */
+};
+
+/*
+ * These keep track of the copy progress within a memory region.
+ * Used by the verbs layer.
+ */
+struct ipath_sge {
+	struct ipath_mregion *mr;
+	void *vaddr;		/* current pointer into the segment */
+	u32 sge_length;		/* length of the SGE */
+	u32 length;		/* remaining length of the segment */
+	u16 m;			/* current index: mr->map[m] */
+	u16 n;			/* current index: mr->map[m]->segs[n] */
+};
+
+/* Memory region */
+struct ipath_mr {
+	struct ib_mr ibmr;
+	struct ipath_mregion mr;	/* must be last */
 };
 
 /*
@@ -248,32 +266,50 @@ struct ipath_swqe {
 
 /*
  * Receive work request queue entry.
- * The size of the sg_list is determined when the QP is created and stored
- * in qp->r_max_sge.
+ * The size of the sg_list is determined when the QP (or SRQ) is created
+ * and stored in qp->r_rq.max_sge (or srq->rq.max_sge).
  */
 struct ipath_rwqe {
 	u64 wr_id;
-	u32 length;		/* total length of data in sg_list */
 	u8 num_sge;
-	struct ipath_sge sg_list[0];
+	struct ib_sge sg_list[0];
 };
 
-struct ipath_rq {
-	spinlock_t lock;
+/*
+ * This structure is used to contain the head pointer, tail pointer,
+ * and receive work queue entries as a single memory allocation so
+ * it can be mmap'ed into user space.
+ * Note that the wq array elements are variable size so you can't
+ * just index into the array to get the N'th element;
+ * use get_rwqe_ptr() instead.
+ */
+struct ipath_rwq {
 	u32 head;		/* new work requests posted to the head */
 	u32 tail;		/* receives pull requests from here. */
+	struct ipath_rwqe wq[0];
+};
+
+struct ipath_rq {
+	struct ipath_rwq *wq;
+	spinlock_t lock;
 	u32 size;		/* size of RWQE array */
 	u8 max_sge;
-	struct ipath_rwqe *wq;	/* RWQE array */
 };
 
 struct ipath_srq {
 	struct ib_srq ibsrq;
 	struct ipath_rq rq;
+	struct ipath_mmap_info *ip;
 	/* send signal when number of RWQEs < limit */
 	u32 limit;
 };
 
+struct ipath_sge_state {
+	struct ipath_sge *sg_list;      /* next SGE to be used if any */
+	struct ipath_sge sge;   /* progress state for the current SGE */
+	u8 num_sge;
+};
+
 /*
  * Variables prefixed with s_ are for the requester (sender).
  * Variables prefixed with r_ are for the responder (receiver).
@@ -293,6 +329,7 @@ struct ipath_qp {
 	atomic_t refcount;
 	wait_queue_head_t wait;
 	struct tasklet_struct s_task;
+	struct ipath_mmap_info *ip;
 	struct ipath_sge_state *s_cur_sge;
 	struct ipath_sge_state s_sge;	/* current send request data */
 	/* current RDMA read send data */
@@ -334,6 +371,7 @@ struct ipath_qp {
 	u8 s_retry;		/* requester retry counter */
 	u8 s_rnr_retry;		/* requester RNR retry counter */
 	u8 s_pkey_index;	/* PKEY index to use */
+	u8 timeout;		/* Timeout for this QP */
 	enum ib_mtu path_mtu;
 	u32 remote_qpn;
 	u32 qkey;		/* QKEY for this QP (for UD or RD) */
@@ -345,7 +383,8 @@ struct ipath_qp {
 	u32 s_ssn;		/* SSN of tail entry */
 	u32 s_lsn;		/* limit sequence number (credit) */
 	struct ipath_swqe *s_wq;	/* send work queue */
-	struct ipath_rq r_rq;	/* receive work queue */
+	struct ipath_rq r_rq;		/* receive work queue */
+	struct ipath_sge r_sg_list[0];	/* verified SGEs */
 };
 
 /*
@@ -369,15 +408,15 @@ static inline struct ipath_swqe *get_swqe_ptr(struct ipath_qp *qp,
 
 /*
  * Since struct ipath_rwqe is not a fixed size, we can't simply index into
- * struct ipath_rq.wq.  This function does the array index computation.
+ * struct ipath_rwq.wq.  This function does the array index computation.
  */
 static inline struct ipath_rwqe *get_rwqe_ptr(struct ipath_rq *rq,
 					      unsigned n)
 {
 	return (struct ipath_rwqe *)
-		((char *) rq->wq +
+		((char *) rq->wq->wq +
 		 (sizeof(struct ipath_rwqe) +
-		  rq->max_sge * sizeof(struct ipath_sge)) * n);
+		  rq->max_sge * sizeof(struct ib_sge)) * n);
 }
 
 /*
@@ -417,6 +456,7 @@ struct ipath_ibdev {
 	struct ib_device ibdev;
 	struct list_head dev_list;
 	struct ipath_devdata *dd;
+	struct ipath_mmap_info *pending_mmaps;
 	int ib_unit;		/* This is the device number */
 	u16 sm_lid;		/* in host order */
 	u8 sm_sl;
@@ -435,11 +475,20 @@ struct ipath_ibdev {
 	__be64 sys_image_guid;	/* in network order */
 	__be64 gid_prefix;	/* in network order */
 	__be64 mkey;
+
 	u32 n_pds_allocated;	/* number of PDs allocated for device */
+	spinlock_t n_pds_lock;
 	u32 n_ahs_allocated;	/* number of AHs allocated for device */
+	spinlock_t n_ahs_lock;
 	u32 n_cqs_allocated;	/* number of CQs allocated for device */
+	spinlock_t n_cqs_lock;
+	u32 n_qps_allocated;	/* number of QPs allocated for device */
+	spinlock_t n_qps_lock;
 	u32 n_srqs_allocated;	/* number of SRQs allocated for device */
+	spinlock_t n_srqs_lock;
 	u32 n_mcast_grps_allocated; /* number of mcast groups allocated */
+	spinlock_t n_mcast_grps_lock;
+
 	u64 ipath_sword;	/* total dwords sent (sample result) */
 	u64 ipath_rword;	/* total dwords received (sample result) */
 	u64 ipath_spkts;	/* total packets sent (sample result) */
@@ -494,8 +543,19 @@ struct ipath_ibdev {
 	struct ipath_opcode_stats opstats[128];
 };
 
-struct ipath_ucontext {
-	struct ib_ucontext ibucontext;
+struct ipath_verbs_counters {
+	u64 symbol_error_counter;
+	u64 link_error_recovery_counter;
+	u64 link_downed_counter;
+	u64 port_rcv_errors;
+	u64 port_rcv_remphys_errors;
+	u64 port_xmit_discards;
+	u64 port_xmit_data;
+	u64 port_rcv_data;
+	u64 port_xmit_packets;
+	u64 port_rcv_packets;
+	u32 local_link_integrity_errors;
+	u32 excessive_buffer_overrun_errors;
 };
 
 static inline struct ipath_mr *to_imr(struct ib_mr *ibmr)
@@ -503,11 +563,6 @@ static inline struct ipath_mr *to_imr(struct ib_mr *ibmr)
 	return container_of(ibmr, struct ipath_mr, ibmr);
 }
 
-static inline struct ipath_fmr *to_ifmr(struct ib_fmr *ibfmr)
-{
-	return container_of(ibfmr, struct ipath_fmr, ibfmr);
-}
-
 static inline struct ipath_pd *to_ipd(struct ib_pd *ibpd)
 {
 	return container_of(ibpd, struct ipath_pd, ibpd);
@@ -545,12 +600,6 @@ int ipath_process_mad(struct ib_device *ibdev,
 		      struct ib_grh *in_grh,
 		      struct ib_mad *in_mad, struct ib_mad *out_mad);
 
-static inline struct ipath_ucontext *to_iucontext(struct ib_ucontext
-						  *ibucontext)
-{
-	return container_of(ibucontext, struct ipath_ucontext, ibucontext);
-}
-
 /*
  * Compare the lower 24 bits of the two values.
  * Returns an integer <, ==, or > than zero.
@@ -562,6 +611,13 @@ static inline int ipath_cmp24(u32 a, u32 b)
 
 struct ipath_mcast *ipath_mcast_find(union ib_gid *mgid);
 
+int ipath_snapshot_counters(struct ipath_devdata *dd, u64 *swords,
+			    u64 *rwords, u64 *spkts, u64 *rpkts,
+			    u64 *xmit_wait);
+
+int ipath_get_counters(struct ipath_devdata *dd,
+		       struct ipath_verbs_counters *cntrs);
+
 int ipath_multicast_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid);
 
 int ipath_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid);
@@ -579,7 +635,7 @@ struct ib_qp *ipath_create_qp(struct ib_pd *ibpd,
 int ipath_destroy_qp(struct ib_qp *ibqp);
 
 int ipath_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
-		    int attr_mask);
+		    int attr_mask, struct ib_udata *udata);
 
 int ipath_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
 		   int attr_mask, struct ib_qp_init_attr *init_attr);
@@ -592,6 +648,9 @@ void ipath_sqerror_qp(struct ipath_qp *qp, struct ib_wc *wc);
 
 void ipath_get_credit(struct ipath_qp *qp, u32 aeth);
 
+int ipath_verbs_send(struct ipath_devdata *dd, u32 hdrwords,
+		     u32 *hdr, u32 len, struct ipath_sge_state *ss);
+
 void ipath_cq_enter(struct ipath_cq *cq, struct ib_wc *entry, int sig);
 
 int ipath_rkey_ok(struct ipath_ibdev *dev, struct ipath_sge_state *ss,
@@ -638,7 +697,8 @@ struct ib_srq *ipath_create_srq(struct ib_pd *ibpd,
 				struct ib_udata *udata);
 
 int ipath_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
-		     enum ib_srq_attr_mask attr_mask);
+		     enum ib_srq_attr_mask attr_mask,
+		     struct ib_udata *udata);
 
 int ipath_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr);
 
@@ -680,6 +740,10 @@ int ipath_unmap_fmr(struct list_head *fmr_list);
 
 int ipath_dealloc_fmr(struct ib_fmr *ibfmr);
 
+void ipath_release_mmap_info(struct kref *ref);
+
+int ipath_mmap(struct ib_ucontext *context, struct vm_area_struct *vma);
+
 void ipath_no_bufs_available(struct ipath_qp *qp, struct ipath_ibdev *dev);
 
 void ipath_insert_rnr_queue(struct ipath_qp *qp);
@@ -700,6 +764,22 @@ int ipath_make_rc_req(struct ipath_qp *qp, struct ipath_other_headers *ohdr,
 int ipath_make_uc_req(struct ipath_qp *qp, struct ipath_other_headers *ohdr,
 		      u32 pmtu, u32 *bth0p, u32 *bth2p);
 
+int ipath_register_ib_device(struct ipath_devdata *);
+
+void ipath_unregister_ib_device(struct ipath_ibdev *);
+
+void ipath_ib_rcv(struct ipath_ibdev *, void *, void *, u32);
+
+int ipath_ib_piobufavail(struct ipath_ibdev *);
+
+void ipath_ib_timer(struct ipath_ibdev *);
+
+unsigned ipath_get_npkeys(struct ipath_devdata *);
+
+u32 ipath_get_cr_errpkey(struct ipath_devdata *);
+
+unsigned ipath_get_pkey(struct ipath_devdata *, unsigned);
+
 extern const enum ib_wc_opcode ib_ipath_wc_opcode[];
 
 extern const u8 ipath_cvt_physportstate[];
@@ -714,6 +794,8 @@ extern unsigned int ib_ipath_max_cqs;
 
 extern unsigned int ib_ipath_max_qp_wrs;
 
+extern unsigned int ib_ipath_max_qps;
+
 extern unsigned int ib_ipath_max_sges;
 
 extern unsigned int ib_ipath_max_mcast_grps;
diff --git a/drivers/infiniband/hw/ipath/ipath_verbs_mcast.c b/drivers/infiniband/hw/ipath/ipath_verbs_mcast.c
index ee0e1d96d72..085e28b939e 100644
--- a/drivers/infiniband/hw/ipath/ipath_verbs_mcast.c
+++ b/drivers/infiniband/hw/ipath/ipath_verbs_mcast.c
@@ -207,12 +207,17 @@ static int ipath_mcast_add(struct ipath_ibdev *dev,
 		goto bail;
 	}
 
+	spin_lock(&dev->n_mcast_grps_lock);
 	if (dev->n_mcast_grps_allocated == ib_ipath_max_mcast_grps) {
+		spin_unlock(&dev->n_mcast_grps_lock);
 		ret = ENOMEM;
 		goto bail;
 	}
 
 	dev->n_mcast_grps_allocated++;
+	spin_unlock(&dev->n_mcast_grps_lock);
+
+	mcast->n_attached++;
 
 	list_add_tail_rcu(&mqp->list, &mcast->qp_list);
 
@@ -343,7 +348,9 @@ int ipath_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
 		atomic_dec(&mcast->refcount);
 		wait_event(mcast->wait, !atomic_read(&mcast->refcount));
 		ipath_mcast_free(mcast);
+		spin_lock(&dev->n_mcast_grps_lock);
 		dev->n_mcast_grps_allocated--;
+		spin_unlock(&dev->n_mcast_grps_lock);
 	}
 
 	ret = 0;
diff --git a/drivers/infiniband/hw/ipath/ipath_wc_ppc64.c b/drivers/infiniband/hw/ipath/ipath_wc_ppc64.c
new file mode 100644
index 00000000000..036fde662aa
--- /dev/null
+++ b/drivers/infiniband/hw/ipath/ipath_wc_ppc64.c
@@ -0,0 +1,52 @@
+/*
+ * Copyright (c) 2006 QLogic, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+/*
+ * This file is conditionally built on PowerPC only.  Otherwise weak symbol
+ * versions of the functions exported from here are used.
+ */
+
+#include "ipath_kernel.h"
+
+/**
+ * ipath_unordered_wc - indicate whether write combining is ordered
+ *
+ * PowerPC systems (at least those in the 970 processor family)
+ * write partially filled store buffers in address order, but will write
+ * completely filled store buffers in "random" order, and therefore must
+ * have serialization for correctness with current InfiniPath chips.
+ *
+ */
+int ipath_unordered_wc(void)
+{
+	return 1;
+}
diff --git a/drivers/infiniband/hw/ipath/verbs_debug.h b/drivers/infiniband/hw/ipath/verbs_debug.h
deleted file mode 100644
index 6186676f2a1..00000000000
--- a/drivers/infiniband/hw/ipath/verbs_debug.h
+++ /dev/null
@@ -1,108 +0,0 @@
-/*
- * Copyright (c) 2006 QLogic, Inc. All rights reserved.
- * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#ifndef _VERBS_DEBUG_H
-#define _VERBS_DEBUG_H
-
-/*
- * This file contains tracing code for the ib_ipath kernel module.
- */
-#ifndef _VERBS_DEBUGGING	/* tracing enabled or not */
-#define _VERBS_DEBUGGING 1
-#endif
-
-extern unsigned ib_ipath_debug;
-
-#define _VERBS_ERROR(fmt,...) \
-	do { \
-		printk(KERN_ERR "%s: " fmt, "ib_ipath", ##__VA_ARGS__); \
-	} while(0)
-
-#define _VERBS_UNIT_ERROR(unit,fmt,...) \
-	do { \
-		printk(KERN_ERR "%s: " fmt, "ib_ipath", ##__VA_ARGS__); \
-	} while(0)
-
-#if _VERBS_DEBUGGING
-
-/*
- * Mask values for debugging.  The scheme allows us to compile out any
- * of the debug tracing stuff, and if compiled in, to enable or
- * disable dynamically.
- * This can be set at modprobe time also:
- *      modprobe ib_path ib_ipath_debug=3
- */
-
-#define __VERBS_INFO        0x1	/* generic low verbosity stuff */
-#define __VERBS_DBG         0x2	/* generic debug */
-#define __VERBS_VDBG        0x4	/* verbose debug */
-#define __VERBS_SMADBG      0x8000	/* sma packet debug */
-
-#define _VERBS_INFO(fmt,...) \
-	do { \
-		if (unlikely(ib_ipath_debug&__VERBS_INFO)) \
-			printk(KERN_INFO "%s: " fmt,"ib_ipath", \
-			       ##__VA_ARGS__); \
-	} while(0)
-
-#define _VERBS_DBG(fmt,...) \
-	do { \
-		if (unlikely(ib_ipath_debug&__VERBS_DBG)) \
-			printk(KERN_DEBUG "%s: " fmt, __func__, \
-			       ##__VA_ARGS__); \
-	} while(0)
-
-#define _VERBS_VDBG(fmt,...) \
-	do { \
-		if (unlikely(ib_ipath_debug&__VERBS_VDBG)) \
-			printk(KERN_DEBUG "%s: " fmt, __func__, \
-			       ##__VA_ARGS__); \
-	} while(0)
-
-#define _VERBS_SMADBG(fmt,...) \
-	do { \
-		if (unlikely(ib_ipath_debug&__VERBS_SMADBG)) \
-			printk(KERN_DEBUG "%s: " fmt, __func__, \
-			       ##__VA_ARGS__); \
-	} while(0)
-
-#else /* ! _VERBS_DEBUGGING */
-
-#define _VERBS_INFO(fmt,...)
-#define _VERBS_DBG(fmt,...)
-#define _VERBS_VDBG(fmt,...)
-#define _VERBS_SMADBG(fmt,...)
-
-#endif /* _VERBS_DEBUGGING */
-
-#endif /* _VERBS_DEBUG_H */
diff --git a/drivers/infiniband/hw/mthca/mthca_av.c b/drivers/infiniband/hw/mthca/mthca_av.c
index e215041b2db..69599455aca 100644
--- a/drivers/infiniband/hw/mthca/mthca_av.c
+++ b/drivers/infiniband/hw/mthca/mthca_av.c
@@ -90,7 +90,7 @@ static enum ib_rate tavor_rate_to_ib(u8 mthca_rate, u8 port_rate)
 	case MTHCA_RATE_TAVOR_1X:     return IB_RATE_2_5_GBPS;
 	case MTHCA_RATE_TAVOR_1X_DDR: return IB_RATE_5_GBPS;
 	case MTHCA_RATE_TAVOR_4X:     return IB_RATE_10_GBPS;
-	default:		      return port_rate;
+	default:		      return mult_to_ib_rate(port_rate);
 	}
 }
 
diff --git a/drivers/infiniband/hw/mthca/mthca_catas.c b/drivers/infiniband/hw/mthca/mthca_catas.c
index c3bec7490f5..cd044ea2dfa 100644
--- a/drivers/infiniband/hw/mthca/mthca_catas.c
+++ b/drivers/infiniband/hw/mthca/mthca_catas.c
@@ -34,6 +34,7 @@
 
 #include <linux/jiffies.h>
 #include <linux/timer.h>
+#include <linux/workqueue.h>
 
 #include "mthca_dev.h"
 
@@ -48,9 +49,41 @@ enum {
 
 static DEFINE_SPINLOCK(catas_lock);
 
+static LIST_HEAD(catas_list);
+static struct workqueue_struct *catas_wq;
+static struct work_struct catas_work;
+
+static int catas_reset_disable;
+module_param_named(catas_reset_disable, catas_reset_disable, int, 0644);
+MODULE_PARM_DESC(catas_reset_disable, "disable reset on catastrophic event if nonzero");
+
+static void catas_reset(void *work_ptr)
+{
+	struct mthca_dev *dev, *tmpdev;
+	LIST_HEAD(tlist);
+	int ret;
+
+	mutex_lock(&mthca_device_mutex);
+
+	spin_lock_irq(&catas_lock);
+	list_splice_init(&catas_list, &tlist);
+	spin_unlock_irq(&catas_lock);
+
+	list_for_each_entry_safe(dev, tmpdev, &tlist, catas_err.list) {
+		ret = __mthca_restart_one(dev->pdev);
+		if (ret)
+			mthca_err(dev, "Reset failed (%d)\n", ret);
+		else
+			mthca_dbg(dev, "Reset succeeded\n");
+	}
+
+	mutex_unlock(&mthca_device_mutex);
+}
+
 static void handle_catas(struct mthca_dev *dev)
 {
 	struct ib_event event;
+	unsigned long flags;
 	const char *type;
 	int i;
 
@@ -82,6 +115,14 @@ static void handle_catas(struct mthca_dev *dev)
 	for (i = 0; i < dev->catas_err.size; ++i)
 		mthca_err(dev, "  buf[%02x]: %08x\n",
 			  i, swab32(readl(dev->catas_err.map + i)));
+
+	if (catas_reset_disable)
+		return;
+
+	spin_lock_irqsave(&catas_lock, flags);
+	list_add(&dev->catas_err.list, &catas_list);
+	queue_work(catas_wq, &catas_work);
+	spin_unlock_irqrestore(&catas_lock, flags);
 }
 
 static void poll_catas(unsigned long dev_ptr)
@@ -135,6 +176,7 @@ void mthca_start_catas_poll(struct mthca_dev *dev)
 	dev->catas_err.timer.data     = (unsigned long) dev;
 	dev->catas_err.timer.function = poll_catas;
 	dev->catas_err.timer.expires  = jiffies + MTHCA_CATAS_POLL_INTERVAL;
+	INIT_LIST_HEAD(&dev->catas_err.list);
 	add_timer(&dev->catas_err.timer);
 }
 
@@ -153,4 +195,24 @@ void mthca_stop_catas_poll(struct mthca_dev *dev)
 				    dev->catas_err.addr),
 				   dev->catas_err.size * 4);
 	}
+
+	spin_lock_irq(&catas_lock);
+	list_del(&dev->catas_err.list);
+	spin_unlock_irq(&catas_lock);
+}
+
+int __init mthca_catas_init(void)
+{
+	INIT_WORK(&catas_work, catas_reset, NULL);
+
+	catas_wq = create_singlethread_workqueue("mthca_catas");
+	if (!catas_wq)
+		return -ENOMEM;
+
+	return 0;
+}
+
+void mthca_catas_cleanup(void)
+{
+	destroy_workqueue(catas_wq);
 }
diff --git a/drivers/infiniband/hw/mthca/mthca_cmd.c b/drivers/infiniband/hw/mthca/mthca_cmd.c
index deabc14b4ea..99a94d71093 100644
--- a/drivers/infiniband/hw/mthca/mthca_cmd.c
+++ b/drivers/infiniband/hw/mthca/mthca_cmd.c
@@ -34,7 +34,7 @@
  * $Id: mthca_cmd.c 1349 2004-12-16 21:09:43Z roland $
  */
 
-#include <linux/sched.h>
+#include <linux/completion.h>
 #include <linux/pci.h>
 #include <linux/errno.h>
 #include <asm/io.h>
diff --git a/drivers/infiniband/hw/mthca/mthca_cq.c b/drivers/infiniband/hw/mthca/mthca_cq.c
index 3e27a084257..e393681ba7d 100644
--- a/drivers/infiniband/hw/mthca/mthca_cq.c
+++ b/drivers/infiniband/hw/mthca/mthca_cq.c
@@ -544,11 +544,11 @@ static inline int mthca_poll_one(struct mthca_dev *dev,
 		wq = &(*cur_qp)->rq;
 		wqe = be32_to_cpu(cqe->wqe);
 		wqe_index = wqe >> wq->wqe_shift;
-               /*
-		* WQE addr == base - 1 might be reported in receive completion
-		* with error instead of (rq size - 1) by Sinai FW 1.0.800 and
-		* Arbel FW 5.1.400.  This bug should be fixed in later FW revs.
-		*/
+		/*
+		 * WQE addr == base - 1 might be reported in receive completion
+		 * with error instead of (rq size - 1) by Sinai FW 1.0.800 and
+		 * Arbel FW 5.1.400.  This bug should be fixed in later FW revs.
+		 */
 		if (unlikely(wqe_index < 0))
 			wqe_index = wq->max - 1;
 		entry->wr_id = (*cur_qp)->wrid[wqe_index];
diff --git a/drivers/infiniband/hw/mthca/mthca_dev.h b/drivers/infiniband/hw/mthca/mthca_dev.h
index f8160b8de09..fe5cecf70fe 100644
--- a/drivers/infiniband/hw/mthca/mthca_dev.h
+++ b/drivers/infiniband/hw/mthca/mthca_dev.h
@@ -45,6 +45,7 @@
 #include <linux/dma-mapping.h>
 #include <linux/timer.h>
 #include <linux/mutex.h>
+#include <linux/list.h>
 
 #include <asm/semaphore.h>
 
@@ -283,8 +284,11 @@ struct mthca_catas_err {
 	unsigned long		stop;
 	u32			size;
 	struct timer_list	timer;
+	struct list_head	list;
 };
 
+extern struct mutex mthca_device_mutex;
+
 struct mthca_dev {
 	struct ib_device  ib_dev;
 	struct pci_dev   *pdev;
@@ -450,6 +454,9 @@ void mthca_unregister_device(struct mthca_dev *dev);
 
 void mthca_start_catas_poll(struct mthca_dev *dev);
 void mthca_stop_catas_poll(struct mthca_dev *dev);
+int __mthca_restart_one(struct pci_dev *pdev);
+int mthca_catas_init(void);
+void mthca_catas_cleanup(void);
 
 int mthca_uar_alloc(struct mthca_dev *dev, struct mthca_uar *uar);
 void mthca_uar_free(struct mthca_dev *dev, struct mthca_uar *uar);
@@ -506,7 +513,7 @@ int mthca_alloc_srq(struct mthca_dev *dev, struct mthca_pd *pd,
 		    struct ib_srq_attr *attr, struct mthca_srq *srq);
 void mthca_free_srq(struct mthca_dev *dev, struct mthca_srq *srq);
 int mthca_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
-		     enum ib_srq_attr_mask attr_mask);
+		     enum ib_srq_attr_mask attr_mask, struct ib_udata *udata);
 int mthca_query_srq(struct ib_srq *srq, struct ib_srq_attr *srq_attr);
 int mthca_max_srq_sge(struct mthca_dev *dev);
 void mthca_srq_event(struct mthca_dev *dev, u32 srqn,
@@ -521,7 +528,8 @@ void mthca_qp_event(struct mthca_dev *dev, u32 qpn,
 		    enum ib_event_type event_type);
 int mthca_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, int qp_attr_mask,
 		   struct ib_qp_init_attr *qp_init_attr);
-int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask);
+int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask,
+		    struct ib_udata *udata);
 int mthca_tavor_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
 			  struct ib_send_wr **bad_wr);
 int mthca_tavor_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
diff --git a/drivers/infiniband/hw/mthca/mthca_mad.c b/drivers/infiniband/hw/mthca/mthca_mad.c
index d9bc030bccc..45e106f1480 100644
--- a/drivers/infiniband/hw/mthca/mthca_mad.c
+++ b/drivers/infiniband/hw/mthca/mthca_mad.c
@@ -119,7 +119,7 @@ static void smp_snoop(struct ib_device *ibdev,
 
 			mthca_update_rate(to_mdev(ibdev), port_num);
 			update_sm_ah(to_mdev(ibdev), port_num,
-				     be16_to_cpu(pinfo->lid),
+				     be16_to_cpu(pinfo->sm_lid),
 				     pinfo->neighbormtu_mastersmsl & 0xf);
 
 			event.device           = ibdev;
diff --git a/drivers/infiniband/hw/mthca/mthca_main.c b/drivers/infiniband/hw/mthca/mthca_main.c
index 7b82c1907f0..47ea0214836 100644
--- a/drivers/infiniband/hw/mthca/mthca_main.c
+++ b/drivers/infiniband/hw/mthca/mthca_main.c
@@ -80,6 +80,8 @@ static int tune_pci = 0;
 module_param(tune_pci, int, 0444);
 MODULE_PARM_DESC(tune_pci, "increase PCI burst from the default set by BIOS if nonzero");
 
+struct mutex mthca_device_mutex;
+
 static const char mthca_version[] __devinitdata =
 	DRV_NAME ": Mellanox InfiniBand HCA driver v"
 	DRV_VERSION " (" DRV_RELDATE ")\n";
@@ -978,28 +980,15 @@ static struct {
 					MTHCA_FLAG_SINAI_OPT }
 };
 
-static int __devinit mthca_init_one(struct pci_dev *pdev,
-				    const struct pci_device_id *id)
+static int __mthca_init_one(struct pci_dev *pdev, int hca_type)
 {
-	static int mthca_version_printed = 0;
 	int ddr_hidden = 0;
 	int err;
 	struct mthca_dev *mdev;
 
-	if (!mthca_version_printed) {
-		printk(KERN_INFO "%s", mthca_version);
-		++mthca_version_printed;
-	}
-
 	printk(KERN_INFO PFX "Initializing %s\n",
 	       pci_name(pdev));
 
-	if (id->driver_data >= ARRAY_SIZE(mthca_hca_table)) {
-		printk(KERN_ERR PFX "%s has invalid driver data %lx\n",
-		       pci_name(pdev), id->driver_data);
-		return -ENODEV;
-	}
-
 	err = pci_enable_device(pdev);
 	if (err) {
 		dev_err(&pdev->dev, "Cannot enable PCI device, "
@@ -1065,7 +1054,7 @@ static int __devinit mthca_init_one(struct pci_dev *pdev,
 
 	mdev->pdev = pdev;
 
-	mdev->mthca_flags = mthca_hca_table[id->driver_data].flags;
+	mdev->mthca_flags = mthca_hca_table[hca_type].flags;
 	if (ddr_hidden)
 		mdev->mthca_flags |= MTHCA_FLAG_DDR_HIDDEN;
 
@@ -1099,13 +1088,13 @@ static int __devinit mthca_init_one(struct pci_dev *pdev,
 	if (err)
 		goto err_cmd;
 
-	if (mdev->fw_ver < mthca_hca_table[id->driver_data].latest_fw) {
+	if (mdev->fw_ver < mthca_hca_table[hca_type].latest_fw) {
 		mthca_warn(mdev, "HCA FW version %d.%d.%d is old (%d.%d.%d is current).\n",
 			   (int) (mdev->fw_ver >> 32), (int) (mdev->fw_ver >> 16) & 0xffff,
 			   (int) (mdev->fw_ver & 0xffff),
-			   (int) (mthca_hca_table[id->driver_data].latest_fw >> 32),
-			   (int) (mthca_hca_table[id->driver_data].latest_fw >> 16) & 0xffff,
-			   (int) (mthca_hca_table[id->driver_data].latest_fw & 0xffff));
+			   (int) (mthca_hca_table[hca_type].latest_fw >> 32),
+			   (int) (mthca_hca_table[hca_type].latest_fw >> 16) & 0xffff,
+			   (int) (mthca_hca_table[hca_type].latest_fw & 0xffff));
 		mthca_warn(mdev, "If you have problems, try updating your HCA FW.\n");
 	}
 
@@ -1122,6 +1111,7 @@ static int __devinit mthca_init_one(struct pci_dev *pdev,
 		goto err_unregister;
 
 	pci_set_drvdata(pdev, mdev);
+	mdev->hca_type = hca_type;
 
 	return 0;
 
@@ -1166,7 +1156,7 @@ err_disable_pdev:
 	return err;
 }
 
-static void __devexit mthca_remove_one(struct pci_dev *pdev)
+static void __mthca_remove_one(struct pci_dev *pdev)
 {
 	struct mthca_dev *mdev = pci_get_drvdata(pdev);
 	u8 status;
@@ -1211,6 +1201,51 @@ static void __devexit mthca_remove_one(struct pci_dev *pdev)
 	}
 }
 
+int __mthca_restart_one(struct pci_dev *pdev)
+{
+	struct mthca_dev *mdev;
+
+	mdev = pci_get_drvdata(pdev);
+	if (!mdev)
+		return -ENODEV;
+	__mthca_remove_one(pdev);
+	return __mthca_init_one(pdev, mdev->hca_type);
+}
+
+static int __devinit mthca_init_one(struct pci_dev *pdev,
+			     const struct pci_device_id *id)
+{
+	static int mthca_version_printed = 0;
+	int ret;
+
+	mutex_lock(&mthca_device_mutex);
+
+	if (!mthca_version_printed) {
+		printk(KERN_INFO "%s", mthca_version);
+		++mthca_version_printed;
+	}
+
+	if (id->driver_data >= ARRAY_SIZE(mthca_hca_table)) {
+		printk(KERN_ERR PFX "%s has invalid driver data %lx\n",
+		       pci_name(pdev), id->driver_data);
+		mutex_unlock(&mthca_device_mutex);
+		return -ENODEV;
+	}
+
+	ret = __mthca_init_one(pdev, id->driver_data);
+
+	mutex_unlock(&mthca_device_mutex);
+
+	return ret;
+}
+
+static void __devexit mthca_remove_one(struct pci_dev *pdev)
+{
+	mutex_lock(&mthca_device_mutex);
+	__mthca_remove_one(pdev);
+	mutex_unlock(&mthca_device_mutex);
+}
+
 static struct pci_device_id mthca_pci_table[] = {
 	{ PCI_DEVICE(PCI_VENDOR_ID_MELLANOX, PCI_DEVICE_ID_MELLANOX_TAVOR),
 	  .driver_data = TAVOR },
@@ -1248,13 +1283,24 @@ static int __init mthca_init(void)
 {
 	int ret;
 
+	mutex_init(&mthca_device_mutex);
+	ret = mthca_catas_init();
+	if (ret)
+		return ret;
+
 	ret = pci_register_driver(&mthca_driver);
-	return ret < 0 ? ret : 0;
+	if (ret < 0) {
+		mthca_catas_cleanup();
+		return ret;
+	}
+
+	return 0;
 }
 
 static void __exit mthca_cleanup(void)
 {
 	pci_unregister_driver(&mthca_driver);
+	mthca_catas_cleanup();
 }
 
 module_init(mthca_init);
diff --git a/drivers/infiniband/hw/mthca/mthca_provider.c b/drivers/infiniband/hw/mthca/mthca_provider.c
index 265b1d1c4a6..981fe2eebdf 100644
--- a/drivers/infiniband/hw/mthca/mthca_provider.c
+++ b/drivers/infiniband/hw/mthca/mthca_provider.c
@@ -1288,7 +1288,7 @@ int mthca_register_device(struct mthca_dev *dev)
 		(1ull << IB_USER_VERBS_CMD_DESTROY_QP)		|
 		(1ull << IB_USER_VERBS_CMD_ATTACH_MCAST)	|
 		(1ull << IB_USER_VERBS_CMD_DETACH_MCAST);
-	dev->ib_dev.node_type            = IB_NODE_CA;
+	dev->ib_dev.node_type            = RDMA_NODE_IB_CA;
 	dev->ib_dev.phys_port_cnt        = dev->limits.num_ports;
 	dev->ib_dev.dma_device           = &dev->pdev->dev;
 	dev->ib_dev.class_dev.dev        = &dev->pdev->dev;
diff --git a/drivers/infiniband/hw/mthca/mthca_qp.c b/drivers/infiniband/hw/mthca/mthca_qp.c
index 2e8f6f36e0a..5e5c58b9920 100644
--- a/drivers/infiniband/hw/mthca/mthca_qp.c
+++ b/drivers/infiniband/hw/mthca/mthca_qp.c
@@ -408,7 +408,7 @@ static void to_ib_ah_attr(struct mthca_dev *dev, struct ib_ah_attr *ib_ah_attr,
 	ib_ah_attr->sl       	  = be32_to_cpu(path->sl_tclass_flowlabel) >> 28;
 	ib_ah_attr->src_path_bits = path->g_mylmc & 0x7f;
 	ib_ah_attr->static_rate   = mthca_rate_to_ib(dev,
-						     path->static_rate & 0x7,
+						     path->static_rate & 0xf,
 						     ib_ah_attr->port_num);
 	ib_ah_attr->ah_flags      = (path->g_mylmc & (1 << 7)) ? IB_AH_GRH : 0;
 	if (ib_ah_attr->ah_flags) {
@@ -472,10 +472,14 @@ int mthca_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, int qp_attr_m
 	if (qp->transport == RC || qp->transport == UC) {
 		to_ib_ah_attr(dev, &qp_attr->ah_attr, &context->pri_path);
 		to_ib_ah_attr(dev, &qp_attr->alt_ah_attr, &context->alt_path);
+		qp_attr->alt_pkey_index =
+			be32_to_cpu(context->alt_path.port_pkey) & 0x7f;
+		qp_attr->alt_port_num 	= qp_attr->alt_ah_attr.port_num;
 	}
 
-	qp_attr->pkey_index     = be32_to_cpu(context->pri_path.port_pkey) & 0x7f;
-	qp_attr->alt_pkey_index = be32_to_cpu(context->alt_path.port_pkey) & 0x7f;
+	qp_attr->pkey_index = be32_to_cpu(context->pri_path.port_pkey) & 0x7f;
+	qp_attr->port_num   =
+		(be32_to_cpu(context->pri_path.port_pkey) >> 24) & 0x3;
 
 	/* qp_attr->en_sqd_async_notify is only applicable in modify qp */
 	qp_attr->sq_draining = mthca_state == MTHCA_QP_STATE_DRAINING;
@@ -486,11 +490,9 @@ int mthca_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, int qp_attr_m
 		1 << ((be32_to_cpu(context->params2) >> 21) & 0x7);
 	qp_attr->min_rnr_timer 	    =
 		(be32_to_cpu(context->rnr_nextrecvpsn) >> 24) & 0x1f;
-	qp_attr->port_num 	    = qp_attr->ah_attr.port_num;
 	qp_attr->timeout 	    = context->pri_path.ackto >> 3;
 	qp_attr->retry_cnt 	    = (be32_to_cpu(context->params1) >> 16) & 0x7;
 	qp_attr->rnr_retry 	    = context->pri_path.rnr_retry >> 5;
-	qp_attr->alt_port_num 	    = qp_attr->alt_ah_attr.port_num;
 	qp_attr->alt_timeout 	    = context->alt_path.ackto >> 3;
 	qp_init_attr->cap 	    = qp_attr->cap;
 
@@ -527,7 +529,8 @@ static int mthca_path_set(struct mthca_dev *dev, struct ib_ah_attr *ah,
 	return 0;
 }
 
-int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask)
+int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask,
+		    struct ib_udata *udata)
 {
 	struct mthca_dev *dev = to_mdev(ibqp->device);
 	struct mthca_qp *qp = to_mqp(ibqp);
@@ -842,11 +845,10 @@ int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask)
 	 * entries and reinitialize the QP.
 	 */
 	if (new_state == IB_QPS_RESET && !qp->ibqp.uobject) {
-		mthca_cq_clean(dev, to_mcq(qp->ibqp.send_cq), qp->qpn,
+		mthca_cq_clean(dev, to_mcq(qp->ibqp.recv_cq), qp->qpn,
 			       qp->ibqp.srq ? to_msrq(qp->ibqp.srq) : NULL);
 		if (qp->ibqp.send_cq != qp->ibqp.recv_cq)
-			mthca_cq_clean(dev, to_mcq(qp->ibqp.recv_cq), qp->qpn,
-				       qp->ibqp.srq ? to_msrq(qp->ibqp.srq) : NULL);
+			mthca_cq_clean(dev, to_mcq(qp->ibqp.send_cq), qp->qpn, NULL);
 
 		mthca_wq_reset(&qp->sq);
 		qp->sq.last = get_send_wqe(qp, qp->sq.max - 1);
diff --git a/drivers/infiniband/hw/mthca/mthca_srq.c b/drivers/infiniband/hw/mthca/mthca_srq.c
index b60a9d79ae5..0f316c87bf6 100644
--- a/drivers/infiniband/hw/mthca/mthca_srq.c
+++ b/drivers/infiniband/hw/mthca/mthca_srq.c
@@ -358,7 +358,7 @@ void mthca_free_srq(struct mthca_dev *dev, struct mthca_srq *srq)
 }
 
 int mthca_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
-		     enum ib_srq_attr_mask attr_mask)
+		     enum ib_srq_attr_mask attr_mask, struct ib_udata *udata)
 {
 	struct mthca_dev *dev = to_mdev(ibsrq->device);
 	struct mthca_srq *srq = to_msrq(ibsrq);
diff --git a/drivers/infiniband/hw/mthca/mthca_uar.c b/drivers/infiniband/hw/mthca/mthca_uar.c
index 8e9219842be..8b728486410 100644
--- a/drivers/infiniband/hw/mthca/mthca_uar.c
+++ b/drivers/infiniband/hw/mthca/mthca_uar.c
@@ -60,7 +60,7 @@ int mthca_init_uar_table(struct mthca_dev *dev)
 	ret = mthca_alloc_init(&dev->uar_table.alloc,
 			       dev->limits.num_uars,
 			       dev->limits.num_uars - 1,
-			       dev->limits.reserved_uars);
+			       dev->limits.reserved_uars + 1);
 	if (ret)
 		return ret;
 
diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h
index 474aa214ab5..0b8a79d53a0 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib.h
+++ b/drivers/infiniband/ulp/ipoib/ipoib.h
@@ -336,6 +336,8 @@ static inline void ipoib_unregister_debugfs(void) { }
 extern int ipoib_sendq_size;
 extern int ipoib_recvq_size;
 
+extern struct ib_sa_client ipoib_sa_client;
+
 #ifdef CONFIG_INFINIBAND_IPOIB_DEBUG
 extern int ipoib_debug_level;
 
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
index 5033666b148..f426a69d9a4 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
@@ -169,117 +169,129 @@ static int ipoib_ib_post_receives(struct net_device *dev)
 	return 0;
 }
 
-static void ipoib_ib_handle_wc(struct net_device *dev,
-			       struct ib_wc *wc)
+static void ipoib_ib_handle_rx_wc(struct net_device *dev, struct ib_wc *wc)
 {
 	struct ipoib_dev_priv *priv = netdev_priv(dev);
-	unsigned int wr_id = wc->wr_id;
+	unsigned int wr_id = wc->wr_id & ~IPOIB_OP_RECV;
+	struct sk_buff *skb;
+	dma_addr_t addr;
 
-	ipoib_dbg_data(priv, "called: id %d, op %d, status: %d\n",
+	ipoib_dbg_data(priv, "recv completion: id %d, op %d, status: %d\n",
 		       wr_id, wc->opcode, wc->status);
 
-	if (wr_id & IPOIB_OP_RECV) {
-		wr_id &= ~IPOIB_OP_RECV;
-
-		if (wr_id < ipoib_recvq_size) {
-			struct sk_buff *skb  = priv->rx_ring[wr_id].skb;
-			dma_addr_t      addr = priv->rx_ring[wr_id].mapping;
-
-			if (unlikely(wc->status != IB_WC_SUCCESS)) {
-				if (wc->status != IB_WC_WR_FLUSH_ERR)
-					ipoib_warn(priv, "failed recv event "
-						   "(status=%d, wrid=%d vend_err %x)\n",
-						   wc->status, wr_id, wc->vendor_err);
-				dma_unmap_single(priv->ca->dma_device, addr,
-						 IPOIB_BUF_SIZE, DMA_FROM_DEVICE);
-				dev_kfree_skb_any(skb);
-				priv->rx_ring[wr_id].skb = NULL;
-				return;
-			}
+	if (unlikely(wr_id >= ipoib_recvq_size)) {
+		ipoib_warn(priv, "recv completion event with wrid %d (> %d)\n",
+			   wr_id, ipoib_recvq_size);
+		return;
+	}
 
-			/*
-			 * If we can't allocate a new RX buffer, dump
-			 * this packet and reuse the old buffer.
-			 */
-			if (unlikely(ipoib_alloc_rx_skb(dev, wr_id))) {
-				++priv->stats.rx_dropped;
-				goto repost;
-			}
+	skb  = priv->rx_ring[wr_id].skb;
+	addr = priv->rx_ring[wr_id].mapping;
 
-			ipoib_dbg_data(priv, "received %d bytes, SLID 0x%04x\n",
-				       wc->byte_len, wc->slid);
+	if (unlikely(wc->status != IB_WC_SUCCESS)) {
+		if (wc->status != IB_WC_WR_FLUSH_ERR)
+			ipoib_warn(priv, "failed recv event "
+				   "(status=%d, wrid=%d vend_err %x)\n",
+				   wc->status, wr_id, wc->vendor_err);
+		dma_unmap_single(priv->ca->dma_device, addr,
+				 IPOIB_BUF_SIZE, DMA_FROM_DEVICE);
+		dev_kfree_skb_any(skb);
+		priv->rx_ring[wr_id].skb = NULL;
+		return;
+	}
 
-			dma_unmap_single(priv->ca->dma_device, addr,
-					 IPOIB_BUF_SIZE, DMA_FROM_DEVICE);
+	/*
+	 * If we can't allocate a new RX buffer, dump
+	 * this packet and reuse the old buffer.
+	 */
+	if (unlikely(ipoib_alloc_rx_skb(dev, wr_id))) {
+		++priv->stats.rx_dropped;
+		goto repost;
+	}
 
-			skb_put(skb, wc->byte_len);
-			skb_pull(skb, IB_GRH_BYTES);
+	ipoib_dbg_data(priv, "received %d bytes, SLID 0x%04x\n",
+		       wc->byte_len, wc->slid);
 
-			if (wc->slid != priv->local_lid ||
-			    wc->src_qp != priv->qp->qp_num) {
-				skb->protocol = ((struct ipoib_header *) skb->data)->proto;
-				skb->mac.raw = skb->data;
-				skb_pull(skb, IPOIB_ENCAP_LEN);
+	dma_unmap_single(priv->ca->dma_device, addr,
+			 IPOIB_BUF_SIZE, DMA_FROM_DEVICE);
 
-				dev->last_rx = jiffies;
-				++priv->stats.rx_packets;
-				priv->stats.rx_bytes += skb->len;
+	skb_put(skb, wc->byte_len);
+	skb_pull(skb, IB_GRH_BYTES);
 
-				skb->dev = dev;
-				/* XXX get correct PACKET_ type here */
-				skb->pkt_type = PACKET_HOST;
-				netif_rx_ni(skb);
-			} else {
-				ipoib_dbg_data(priv, "dropping loopback packet\n");
-				dev_kfree_skb_any(skb);
-			}
+	if (wc->slid != priv->local_lid ||
+	    wc->src_qp != priv->qp->qp_num) {
+		skb->protocol = ((struct ipoib_header *) skb->data)->proto;
+		skb->mac.raw = skb->data;
+		skb_pull(skb, IPOIB_ENCAP_LEN);
 
-		repost:
-			if (unlikely(ipoib_ib_post_receive(dev, wr_id)))
-				ipoib_warn(priv, "ipoib_ib_post_receive failed "
-					   "for buf %d\n", wr_id);
-		} else
-			ipoib_warn(priv, "completion event with wrid %d\n",
-				   wr_id);
+		dev->last_rx = jiffies;
+		++priv->stats.rx_packets;
+		priv->stats.rx_bytes += skb->len;
 
+		skb->dev = dev;
+		/* XXX get correct PACKET_ type here */
+		skb->pkt_type = PACKET_HOST;
+		netif_rx_ni(skb);
 	} else {
-		struct ipoib_tx_buf *tx_req;
-		unsigned long flags;
+		ipoib_dbg_data(priv, "dropping loopback packet\n");
+		dev_kfree_skb_any(skb);
+	}
 
-		if (wr_id >= ipoib_sendq_size) {
-			ipoib_warn(priv, "completion event with wrid %d (> %d)\n",
-				   wr_id, ipoib_sendq_size);
-			return;
-		}
+repost:
+	if (unlikely(ipoib_ib_post_receive(dev, wr_id)))
+		ipoib_warn(priv, "ipoib_ib_post_receive failed "
+			   "for buf %d\n", wr_id);
+}
 
-		ipoib_dbg_data(priv, "send complete, wrid %d\n", wr_id);
+static void ipoib_ib_handle_tx_wc(struct net_device *dev, struct ib_wc *wc)
+{
+	struct ipoib_dev_priv *priv = netdev_priv(dev);
+	unsigned int wr_id = wc->wr_id;
+	struct ipoib_tx_buf *tx_req;
+	unsigned long flags;
 
-		tx_req = &priv->tx_ring[wr_id];
+	ipoib_dbg_data(priv, "send completion: id %d, op %d, status: %d\n",
+		       wr_id, wc->opcode, wc->status);
 
-		dma_unmap_single(priv->ca->dma_device,
-				 pci_unmap_addr(tx_req, mapping),
-				 tx_req->skb->len,
-				 DMA_TO_DEVICE);
+	if (unlikely(wr_id >= ipoib_sendq_size)) {
+		ipoib_warn(priv, "send completion event with wrid %d (> %d)\n",
+			   wr_id, ipoib_sendq_size);
+		return;
+	}
 
-		++priv->stats.tx_packets;
-		priv->stats.tx_bytes += tx_req->skb->len;
+	tx_req = &priv->tx_ring[wr_id];
 
-		dev_kfree_skb_any(tx_req->skb);
+	dma_unmap_single(priv->ca->dma_device,
+			 pci_unmap_addr(tx_req, mapping),
+			 tx_req->skb->len,
+			 DMA_TO_DEVICE);
 
-		spin_lock_irqsave(&priv->tx_lock, flags);
-		++priv->tx_tail;
-		if (netif_queue_stopped(dev) &&
-		    test_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags) &&
-		    priv->tx_head - priv->tx_tail <= ipoib_sendq_size >> 1)
-			netif_wake_queue(dev);
-		spin_unlock_irqrestore(&priv->tx_lock, flags);
+	++priv->stats.tx_packets;
+	priv->stats.tx_bytes += tx_req->skb->len;
 
-		if (wc->status != IB_WC_SUCCESS &&
-		    wc->status != IB_WC_WR_FLUSH_ERR)
-			ipoib_warn(priv, "failed send event "
-				   "(status=%d, wrid=%d vend_err %x)\n",
-				   wc->status, wr_id, wc->vendor_err);
-	}
+	dev_kfree_skb_any(tx_req->skb);
+
+	spin_lock_irqsave(&priv->tx_lock, flags);
+	++priv->tx_tail;
+	if (netif_queue_stopped(dev) &&
+	    test_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags) &&
+	    priv->tx_head - priv->tx_tail <= ipoib_sendq_size >> 1)
+		netif_wake_queue(dev);
+	spin_unlock_irqrestore(&priv->tx_lock, flags);
+
+	if (wc->status != IB_WC_SUCCESS &&
+	    wc->status != IB_WC_WR_FLUSH_ERR)
+		ipoib_warn(priv, "failed send event "
+			   "(status=%d, wrid=%d vend_err %x)\n",
+			   wc->status, wr_id, wc->vendor_err);
+}
+
+static void ipoib_ib_handle_wc(struct net_device *dev, struct ib_wc *wc)
+{
+	if (wc->wr_id & IPOIB_OP_RECV)
+		ipoib_ib_handle_rx_wc(dev, wc);
+	else
+		ipoib_ib_handle_tx_wc(dev, wc);
 }
 
 void ipoib_ib_completion(struct ib_cq *cq, void *dev_ptr)
@@ -320,7 +332,7 @@ void ipoib_send(struct net_device *dev, struct sk_buff *skb,
 	struct ipoib_tx_buf *tx_req;
 	dma_addr_t addr;
 
-	if (skb->len > dev->mtu + INFINIBAND_ALEN) {
+	if (unlikely(skb->len > dev->mtu + INFINIBAND_ALEN)) {
 		ipoib_warn(priv, "packet len %d (> %d) too long to send, dropping\n",
 			   skb->len, dev->mtu + INFINIBAND_ALEN);
 		++priv->stats.tx_dropped;
@@ -619,8 +631,10 @@ void ipoib_ib_dev_flush(void *_dev)
 	 * The device could have been brought down between the start and when
 	 * we get here, don't bring it back up if it's not configured up
 	 */
-	if (test_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags))
+	if (test_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags)) {
 		ipoib_ib_dev_up(dev);
+		ipoib_mcast_restart_task(dev);
+	}
 
 	mutex_lock(&priv->vlan_mutex);
 
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c
index cf71d2a5515..1eaf00e9862 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_main.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c
@@ -40,7 +40,6 @@
 
 #include <linux/init.h>
 #include <linux/slab.h>
-#include <linux/vmalloc.h>
 #include <linux/kernel.h>
 
 #include <linux/if_arp.h>	/* For ARPHRD_xxx */
@@ -82,6 +81,8 @@ static const u8 ipv4_bcast_addr[] = {
 
 struct workqueue_struct *ipoib_workqueue;
 
+struct ib_sa_client ipoib_sa_client;
+
 static void ipoib_add_one(struct ib_device *device);
 static void ipoib_remove_one(struct ib_device *device);
 
@@ -336,7 +337,8 @@ void ipoib_flush_paths(struct net_device *dev)
 	struct ipoib_path *path, *tp;
 	LIST_HEAD(remove_list);
 
-	spin_lock_irq(&priv->lock);
+	spin_lock_irq(&priv->tx_lock);
+	spin_lock(&priv->lock);
 
 	list_splice(&priv->path_list, &remove_list);
 	INIT_LIST_HEAD(&priv->path_list);
@@ -347,12 +349,15 @@ void ipoib_flush_paths(struct net_device *dev)
 	list_for_each_entry_safe(path, tp, &remove_list, list) {
 		if (path->query)
 			ib_sa_cancel_query(path->query_id, path->query);
-		spin_unlock_irq(&priv->lock);
+		spin_unlock(&priv->lock);
+		spin_unlock_irq(&priv->tx_lock);
 		wait_for_completion(&path->done);
 		path_free(dev, path);
-		spin_lock_irq(&priv->lock);
+		spin_lock_irq(&priv->tx_lock);
+		spin_lock(&priv->lock);
 	}
-	spin_unlock_irq(&priv->lock);
+	spin_unlock(&priv->lock);
+	spin_unlock_irq(&priv->tx_lock);
 }
 
 static void path_rec_completion(int status,
@@ -459,7 +464,7 @@ static int path_rec_start(struct net_device *dev,
 	init_completion(&path->done);
 
 	path->query_id =
-		ib_sa_path_rec_get(priv->ca, priv->port,
+		ib_sa_path_rec_get(&ipoib_sa_client, priv->ca, priv->port,
 				   &path->pathrec,
 				   IB_SA_PATH_REC_DGID		|
 				   IB_SA_PATH_REC_SGID		|
@@ -615,7 +620,7 @@ static int ipoib_start_xmit(struct sk_buff *skb, struct net_device *dev)
 	struct ipoib_neigh *neigh;
 	unsigned long flags;
 
-	if (!spin_trylock_irqsave(&priv->tx_lock, flags))
+	if (unlikely(!spin_trylock_irqsave(&priv->tx_lock, flags)))
 		return NETDEV_TX_LOCKED;
 
 	/*
@@ -628,7 +633,7 @@ static int ipoib_start_xmit(struct sk_buff *skb, struct net_device *dev)
 		return NETDEV_TX_BUSY;
 	}
 
-	if (skb->dst && skb->dst->neighbour) {
+	if (likely(skb->dst && skb->dst->neighbour)) {
 		if (unlikely(!*to_ipoib_neigh(skb->dst->neighbour))) {
 			ipoib_path_lookup(skb, dev);
 			goto out;
@@ -1107,13 +1112,16 @@ static void ipoib_add_one(struct ib_device *device)
 	struct ipoib_dev_priv *priv;
 	int s, e, p;
 
+	if (rdma_node_get_transport(device->node_type) != RDMA_TRANSPORT_IB)
+		return;
+
 	dev_list = kmalloc(sizeof *dev_list, GFP_KERNEL);
 	if (!dev_list)
 		return;
 
 	INIT_LIST_HEAD(dev_list);
 
-	if (device->node_type == IB_NODE_SWITCH) {
+	if (device->node_type == RDMA_NODE_IB_SWITCH) {
 		s = 0;
 		e = 0;
 	} else {
@@ -1137,6 +1145,9 @@ static void ipoib_remove_one(struct ib_device *device)
 	struct ipoib_dev_priv *priv, *tmp;
 	struct list_head *dev_list;
 
+	if (rdma_node_get_transport(device->node_type) != RDMA_TRANSPORT_IB)
+		return;
+
 	dev_list = ib_get_client_data(device, &ipoib_client);
 
 	list_for_each_entry_safe(priv, tmp, dev_list, list) {
@@ -1181,13 +1192,16 @@ static int __init ipoib_init_module(void)
 		goto err_fs;
 	}
 
+	ib_sa_register_client(&ipoib_sa_client);
+
 	ret = ib_register_client(&ipoib_client);
 	if (ret)
-		goto err_wq;
+		goto err_sa;
 
 	return 0;
 
-err_wq:
+err_sa:
+	ib_sa_unregister_client(&ipoib_sa_client);
 	destroy_workqueue(ipoib_workqueue);
 
 err_fs:
@@ -1199,6 +1213,7 @@ err_fs:
 static void __exit ipoib_cleanup_module(void)
 {
 	ib_unregister_client(&ipoib_client);
+	ib_sa_unregister_client(&ipoib_sa_client);
 	ipoib_unregister_debugfs();
 	destroy_workqueue(ipoib_workqueue);
 }
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
index b5e6a7be603..3faa1820f0e 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
@@ -326,6 +326,7 @@ ipoib_mcast_sendonly_join_complete(int status,
 
 		/* Clear the busy flag so we try again */
 		clear_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags);
+		mcast->query = NULL;
 	}
 
 	complete(&mcast->done);
@@ -360,7 +361,7 @@ static int ipoib_mcast_sendonly_join(struct ipoib_mcast *mcast)
 
 	init_completion(&mcast->done);
 
-	ret = ib_sa_mcmember_rec_set(priv->ca, priv->port, &rec,
+	ret = ib_sa_mcmember_rec_set(&ipoib_sa_client, priv->ca, priv->port, &rec,
 				     IB_SA_MCMEMBER_REC_MGID		|
 				     IB_SA_MCMEMBER_REC_PORT_GID	|
 				     IB_SA_MCMEMBER_REC_PKEY		|
@@ -471,22 +472,32 @@ static void ipoib_mcast_join(struct net_device *dev, struct ipoib_mcast *mcast,
 
 	if (create) {
 		comp_mask |=
-			IB_SA_MCMEMBER_REC_QKEY		|
-			IB_SA_MCMEMBER_REC_SL		|
-			IB_SA_MCMEMBER_REC_FLOW_LABEL	|
-			IB_SA_MCMEMBER_REC_TRAFFIC_CLASS;
+			IB_SA_MCMEMBER_REC_QKEY			|
+			IB_SA_MCMEMBER_REC_MTU_SELECTOR		|
+			IB_SA_MCMEMBER_REC_MTU			|
+			IB_SA_MCMEMBER_REC_TRAFFIC_CLASS	|
+			IB_SA_MCMEMBER_REC_RATE_SELECTOR	|
+			IB_SA_MCMEMBER_REC_RATE			|
+			IB_SA_MCMEMBER_REC_SL			|
+			IB_SA_MCMEMBER_REC_FLOW_LABEL		|
+			IB_SA_MCMEMBER_REC_HOP_LIMIT;
 
 		rec.qkey	  = priv->broadcast->mcmember.qkey;
+		rec.mtu_selector  = IB_SA_EQ;
+		rec.mtu		  = priv->broadcast->mcmember.mtu;
+		rec.traffic_class = priv->broadcast->mcmember.traffic_class;
+		rec.rate_selector = IB_SA_EQ;
+		rec.rate	  = priv->broadcast->mcmember.rate;
 		rec.sl		  = priv->broadcast->mcmember.sl;
 		rec.flow_label	  = priv->broadcast->mcmember.flow_label;
-		rec.traffic_class = priv->broadcast->mcmember.traffic_class;
+		rec.hop_limit	  = priv->broadcast->mcmember.hop_limit;
 	}
 
 	init_completion(&mcast->done);
 
-	ret = ib_sa_mcmember_rec_set(priv->ca, priv->port, &rec, comp_mask,
-				     mcast->backoff * 1000, GFP_ATOMIC,
-				     ipoib_mcast_join_complete,
+	ret = ib_sa_mcmember_rec_set(&ipoib_sa_client, priv->ca, priv->port,
+				     &rec, comp_mask, mcast->backoff * 1000,
+				     GFP_ATOMIC, ipoib_mcast_join_complete,
 				     mcast, &mcast->query);
 
 	if (ret < 0) {
@@ -527,7 +538,7 @@ void ipoib_mcast_join_task(void *dev_ptr)
 			priv->local_rate = attr.active_speed *
 				ib_width_enum_to_int(attr.active_width);
 		} else
-			ipoib_warn(priv, "ib_query_port failed\n");
+		ipoib_warn(priv, "ib_query_port failed\n");
 	}
 
 	if (!priv->broadcast) {
@@ -680,7 +691,7 @@ static int ipoib_mcast_leave(struct net_device *dev, struct ipoib_mcast *mcast)
 	 * Just make one shot at leaving and don't wait for a reply;
 	 * if we fail, too bad.
 	 */
-	ret = ib_sa_mcmember_rec_delete(priv->ca, priv->port, &rec,
+	ret = ib_sa_mcmember_rec_delete(&ipoib_sa_client, priv->ca, priv->port, &rec,
 					IB_SA_MCMEMBER_REC_MGID		|
 					IB_SA_MCMEMBER_REC_PORT_GID	|
 					IB_SA_MCMEMBER_REC_PKEY		|
@@ -794,7 +805,7 @@ void ipoib_mcast_dev_flush(struct net_device *dev)
 	}
 
 	if (priv->broadcast) {
- 		rb_erase(&priv->broadcast->rb_node, &priv->multicast_tree);
+		rb_erase(&priv->broadcast->rb_node, &priv->multicast_tree);
 		list_add_tail(&priv->broadcast->list, &remove_list);
 		priv->broadcast = NULL;
 	}
diff --git a/drivers/infiniband/ulp/iser/Kconfig b/drivers/infiniband/ulp/iser/Kconfig
index fead87d1eff..365a1b5f19e 100644
--- a/drivers/infiniband/ulp/iser/Kconfig
+++ b/drivers/infiniband/ulp/iser/Kconfig
@@ -1,6 +1,6 @@
 config INFINIBAND_ISER
 	tristate "ISCSI RDMA Protocol"
-	depends on INFINIBAND && SCSI
+	depends on INFINIBAND && SCSI && INET
 	select SCSI_ISCSI_ATTRS
 	---help---
 	  Support for the ISCSI RDMA Protocol over InfiniBand.  This
diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.c b/drivers/infiniband/ulp/iser/iscsi_iser.c
index 1437d7ee3b1..e9cf1a9f1e1 100644
--- a/drivers/infiniband/ulp/iser/iscsi_iser.c
+++ b/drivers/infiniband/ulp/iser/iscsi_iser.c
@@ -555,6 +555,7 @@ static struct scsi_host_template iscsi_iser_sht = {
 	.queuecommand           = iscsi_queuecommand,
 	.can_queue		= ISCSI_XMIT_CMDS_MAX - 1,
 	.sg_tablesize           = ISCSI_ISER_SG_TABLESIZE,
+	.max_sectors		= 1024,
 	.cmd_per_lun            = ISCSI_MAX_CMD_PER_LUN,
 	.eh_abort_handler       = iscsi_eh_abort,
 	.eh_host_reset_handler	= iscsi_eh_host_reset,
diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.h b/drivers/infiniband/ulp/iser/iscsi_iser.h
index 3350ba690cf..7e1a411db2a 100644
--- a/drivers/infiniband/ulp/iser/iscsi_iser.h
+++ b/drivers/infiniband/ulp/iser/iscsi_iser.h
@@ -82,8 +82,12 @@
 		       __func__ , ## arg);		\
 	} while (0)
 
+#define SHIFT_4K	12
+#define SIZE_4K	(1UL << SHIFT_4K)
+#define MASK_4K	(~(SIZE_4K-1))
+
 					/* support upto 512KB in one RDMA */
-#define ISCSI_ISER_SG_TABLESIZE         (0x80000 >> PAGE_SHIFT)
+#define ISCSI_ISER_SG_TABLESIZE         (0x80000 >> SHIFT_4K)
 #define ISCSI_ISER_MAX_LUN		256
 #define ISCSI_ISER_MAX_CMD_LEN		16
 
@@ -171,6 +175,7 @@ struct iser_mem_reg {
 	u64  va;
 	u64  len;
 	void *mem_h;
+	int  is_fmr;
 };
 
 struct iser_regd_buf {
diff --git a/drivers/infiniband/ulp/iser/iser_memory.c b/drivers/infiniband/ulp/iser/iser_memory.c
index 31950a522a1..d0b03f42658 100644
--- a/drivers/infiniband/ulp/iser/iser_memory.c
+++ b/drivers/infiniband/ulp/iser/iser_memory.c
@@ -42,6 +42,7 @@
 #include "iscsi_iser.h"
 
 #define ISER_KMALLOC_THRESHOLD 0x20000 /* 128K - kmalloc limit */
+
 /**
  * Decrements the reference count for the
  * registered buffer & releases it
@@ -55,7 +56,7 @@ int iser_regd_buff_release(struct iser_regd_buf *regd_buf)
 	if ((atomic_read(&regd_buf->ref_count) == 0) ||
 	    atomic_dec_and_test(&regd_buf->ref_count)) {
 		/* if we used the dma mr, unreg is just NOP */
-		if (regd_buf->reg.rkey != 0)
+		if (regd_buf->reg.is_fmr)
 			iser_unreg_mem(&regd_buf->reg);
 
 		if (regd_buf->dma_addr) {
@@ -90,9 +91,9 @@ void iser_reg_single(struct iser_device *device,
 	BUG_ON(dma_mapping_error(dma_addr));
 
 	regd_buf->reg.lkey = device->mr->lkey;
-	regd_buf->reg.rkey = 0; /* indicate there's no need to unreg */
 	regd_buf->reg.len  = regd_buf->data_size;
 	regd_buf->reg.va   = dma_addr;
+	regd_buf->reg.is_fmr = 0;
 
 	regd_buf->dma_addr  = dma_addr;
 	regd_buf->direction = direction;
@@ -239,7 +240,7 @@ static int iser_sg_to_page_vec(struct iser_data_buf *data,
 	int i;
 
 	/* compute the offset of first element */
-	page_vec->offset = (u64) sg[0].offset;
+	page_vec->offset = (u64) sg[0].offset & ~MASK_4K;
 
 	for (i = 0; i < data->dma_nents; i++) {
 		total_sz += sg_dma_len(&sg[i]);
@@ -247,21 +248,30 @@ static int iser_sg_to_page_vec(struct iser_data_buf *data,
 		first_addr = sg_dma_address(&sg[i]);
 		last_addr  = first_addr + sg_dma_len(&sg[i]);
 
-		start_aligned = !(first_addr & ~PAGE_MASK);
-		end_aligned   = !(last_addr  & ~PAGE_MASK);
+		start_aligned = !(first_addr & ~MASK_4K);
+		end_aligned   = !(last_addr  & ~MASK_4K);
 
 		/* continue to collect page fragments till aligned or SG ends */
 		while (!end_aligned && (i + 1 < data->dma_nents)) {
 			i++;
 			total_sz += sg_dma_len(&sg[i]);
 			last_addr = sg_dma_address(&sg[i]) + sg_dma_len(&sg[i]);
-			end_aligned = !(last_addr  & ~PAGE_MASK);
+			end_aligned = !(last_addr  & ~MASK_4K);
 		}
 
-		first_addr = first_addr & PAGE_MASK;
-
-		for (page = first_addr; page < last_addr; page += PAGE_SIZE)
-			page_vec->pages[cur_page++] = page;
+		/* handle the 1st page in the 1st DMA element */
+		if (cur_page == 0) {
+			page = first_addr & MASK_4K;
+			page_vec->pages[cur_page] = page;
+			cur_page++;
+			page += SIZE_4K;
+		} else
+			page = first_addr;
+
+		for (; page < last_addr; page += SIZE_4K) {
+			page_vec->pages[cur_page] = page;
+			cur_page++;
+		}
 
 	}
 	page_vec->data_size = total_sz;
@@ -269,8 +279,7 @@ static int iser_sg_to_page_vec(struct iser_data_buf *data,
 	return cur_page;
 }
 
-#define MASK_4K			((1UL << 12) - 1) /* 0xFFF */
-#define IS_4K_ALIGNED(addr)	((((unsigned long)addr) & MASK_4K) == 0)
+#define IS_4K_ALIGNED(addr)	((((unsigned long)addr) & ~MASK_4K) == 0)
 
 /**
  * iser_data_buf_aligned_len - Tries to determine the maximal correctly aligned
@@ -320,9 +329,9 @@ static void iser_data_buf_dump(struct iser_data_buf *data)
 	struct scatterlist *sg = (struct scatterlist *)data->buf;
 	int i;
 
-	for (i = 0; i < data->size; i++)
+	for (i = 0; i < data->dma_nents; i++)
 		iser_err("sg[%d] dma_addr:0x%lX page:0x%p "
-			 "off:%d sz:%d dma_len:%d\n",
+			 "off:0x%x sz:0x%x dma_len:0x%x\n",
 			 i, (unsigned long)sg_dma_address(&sg[i]),
 			 sg[i].page, sg[i].offset,
 			 sg[i].length,sg_dma_len(&sg[i]));
@@ -352,7 +361,7 @@ static void iser_page_vec_build(struct iser_data_buf *data,
 
 	page_vec->length = page_vec_len;
 
-	if (page_vec_len * PAGE_SIZE < page_vec->data_size) {
+	if (page_vec_len * SIZE_4K < page_vec->data_size) {
 		iser_err("page_vec too short to hold this SG\n");
 		iser_data_buf_dump(data);
 		iser_dump_page_vec(page_vec);
@@ -370,15 +379,18 @@ int iser_reg_rdma_mem(struct iscsi_iser_cmd_task *iser_ctask,
 		      enum   iser_data_dir        cmd_dir)
 {
 	struct iser_conn     *ib_conn = iser_ctask->iser_conn->ib_conn;
+	struct iser_device   *device = ib_conn->device;
 	struct iser_data_buf *mem = &iser_ctask->data[cmd_dir];
 	struct iser_regd_buf *regd_buf;
 	int aligned_len;
 	int err;
+	int i;
+	struct scatterlist *sg;
 
 	regd_buf = &iser_ctask->rdma_regd[cmd_dir];
 
 	aligned_len = iser_data_buf_aligned_len(mem);
-	if (aligned_len != mem->size) {
+	if (aligned_len != mem->dma_nents) {
 		iser_err("rdma alignment violation %d/%d aligned\n",
 			 aligned_len, mem->size);
 		iser_data_buf_dump(mem);
@@ -389,10 +401,38 @@ int iser_reg_rdma_mem(struct iscsi_iser_cmd_task *iser_ctask,
 		mem = &iser_ctask->data_copy[cmd_dir];
 	}
 
-	iser_page_vec_build(mem, ib_conn->page_vec);
-	err = iser_reg_page_vec(ib_conn, ib_conn->page_vec, &regd_buf->reg);
-	if (err)
-		return err;
+	/* if there a single dma entry, FMR is not needed */
+	if (mem->dma_nents == 1) {
+		sg = (struct scatterlist *)mem->buf;
+
+		regd_buf->reg.lkey = device->mr->lkey;
+		regd_buf->reg.rkey = device->mr->rkey;
+		regd_buf->reg.len  = sg_dma_len(&sg[0]);
+		regd_buf->reg.va   = sg_dma_address(&sg[0]);
+		regd_buf->reg.is_fmr = 0;
+
+		iser_dbg("PHYSICAL Mem.register: lkey: 0x%08X rkey: 0x%08X  "
+			 "va: 0x%08lX sz: %ld]\n",
+			 (unsigned int)regd_buf->reg.lkey,
+			 (unsigned int)regd_buf->reg.rkey,
+			 (unsigned long)regd_buf->reg.va,
+			 (unsigned long)regd_buf->reg.len);
+	} else { /* use FMR for multiple dma entries */
+		iser_page_vec_build(mem, ib_conn->page_vec);
+		err = iser_reg_page_vec(ib_conn, ib_conn->page_vec, &regd_buf->reg);
+		if (err) {
+			iser_data_buf_dump(mem);
+			iser_err("mem->dma_nents = %d (dlength = 0x%x)\n", mem->dma_nents,
+				 ntoh24(iser_ctask->desc.iscsi_header.dlength));
+			iser_err("page_vec: data_size = 0x%x, length = %d, offset = 0x%x\n",
+				 ib_conn->page_vec->data_size, ib_conn->page_vec->length,
+				 ib_conn->page_vec->offset);
+			for (i=0 ; i<ib_conn->page_vec->length ; i++)
+				iser_err("page_vec[%d] = 0x%llx\n", i,
+					 (unsigned long long) ib_conn->page_vec->pages[i]);
+			return err;
+		}
+	}
 
 	/* take a reference on this regd buf such that it will not be released *
 	 * (eg in send dto completion) before we get the scsi response         */
diff --git a/drivers/infiniband/ulp/iser/iser_verbs.c b/drivers/infiniband/ulp/iser/iser_verbs.c
index 72febf1f8ff..ecdca7fc1e4 100644
--- a/drivers/infiniband/ulp/iser/iser_verbs.c
+++ b/drivers/infiniband/ulp/iser/iser_verbs.c
@@ -88,8 +88,9 @@ static int iser_create_device_ib_res(struct iser_device *device)
 		     iser_cq_tasklet_fn,
 		     (unsigned long)device);
 
-	device->mr = ib_get_dma_mr(device->pd,
-				   IB_ACCESS_LOCAL_WRITE);
+	device->mr = ib_get_dma_mr(device->pd, IB_ACCESS_LOCAL_WRITE |
+				   IB_ACCESS_REMOTE_WRITE |
+				   IB_ACCESS_REMOTE_READ);
 	if (IS_ERR(device->mr))
 		goto dma_mr_err;
 
@@ -150,7 +151,7 @@ static int iser_create_ib_conn_res(struct iser_conn *ib_conn)
 	}
 	ib_conn->page_vec->pages = (u64 *) (ib_conn->page_vec + 1);
 
-	params.page_shift        = PAGE_SHIFT;
+	params.page_shift        = SHIFT_4K;
 	/* when the first/last SG element are not start/end *
 	 * page aligned, the map whould be of N+1 pages     */
 	params.max_pages_per_fmr = ISCSI_ISER_SG_TABLESIZE + 1;
@@ -604,8 +605,9 @@ int iser_reg_page_vec(struct iser_conn     *ib_conn,
 
 	mem_reg->lkey  = mem->fmr->lkey;
 	mem_reg->rkey  = mem->fmr->rkey;
-	mem_reg->len   = page_vec->length * PAGE_SIZE;
+	mem_reg->len   = page_vec->length * SIZE_4K;
 	mem_reg->va    = io_addr;
+	mem_reg->is_fmr = 1;
 	mem_reg->mem_h = (void *)mem;
 
 	mem_reg->va   += page_vec->offset;
diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c
index 8257d5a2c8f..44b9e5be668 100644
--- a/drivers/infiniband/ulp/srp/ib_srp.c
+++ b/drivers/infiniband/ulp/srp/ib_srp.c
@@ -96,6 +96,8 @@ static struct ib_client srp_client = {
 	.remove = srp_remove_one
 };
 
+static struct ib_sa_client srp_sa_client;
+
 static inline struct srp_target_port *host_to_target(struct Scsi_Host *host)
 {
 	return (struct srp_target_port *) host->hostdata;
@@ -267,7 +269,8 @@ static int srp_lookup_path(struct srp_target_port *target)
 
 	init_completion(&target->done);
 
-	target->path_query_id = ib_sa_path_rec_get(target->srp_host->dev->dev,
+	target->path_query_id = ib_sa_path_rec_get(&srp_sa_client,
+						   target->srp_host->dev->dev,
 						   target->srp_host->port,
 						   &target->path,
 						   IB_SA_PATH_REC_DGID		|
@@ -330,7 +333,7 @@ static int srp_send_req(struct srp_target_port *target)
 	req->priv.req_buf_fmt 	= cpu_to_be16(SRP_BUF_FORMAT_DIRECT |
 					      SRP_BUF_FORMAT_INDIRECT);
 	/*
-	 * In the published SRP specification (draft rev. 16a), the 
+	 * In the published SRP specification (draft rev. 16a), the
 	 * port identifier format is 8 bytes of ID extension followed
 	 * by 8 bytes of GUID.  Older drafts put the two halves in the
 	 * opposite order, so that the GUID comes first.
@@ -799,13 +802,6 @@ static void srp_process_rsp(struct srp_target_port *target, struct srp_rsp *rsp)
 	spin_unlock_irqrestore(target->scsi_host->host_lock, flags);
 }
 
-static void srp_reconnect_work(void *target_ptr)
-{
-	struct srp_target_port *target = target_ptr;
-
-	srp_reconnect_target(target);
-}
-
 static void srp_handle_recv(struct srp_target_port *target, struct ib_wc *wc)
 {
 	struct srp_iu *iu;
@@ -858,7 +854,6 @@ static void srp_completion(struct ib_cq *cq, void *target_ptr)
 {
 	struct srp_target_port *target = target_ptr;
 	struct ib_wc wc;
-	unsigned long flags;
 
 	ib_req_notify_cq(cq, IB_CQ_NEXT_COMP);
 	while (ib_poll_cq(cq, 1, &wc) > 0) {
@@ -866,10 +861,6 @@ static void srp_completion(struct ib_cq *cq, void *target_ptr)
 			printk(KERN_ERR PFX "failed %s status %d\n",
 			       wc.wr_id & SRP_OP_RECV ? "receive" : "send",
 			       wc.status);
-			spin_lock_irqsave(target->scsi_host->host_lock, flags);
-			if (target->state == SRP_TARGET_LIVE)
-				schedule_work(&target->work);
-			spin_unlock_irqrestore(target->scsi_host->host_lock, flags);
 			break;
 		}
 
@@ -1461,12 +1452,28 @@ static ssize_t show_zero_req_lim(struct class_device *cdev, char *buf)
 	return sprintf(buf, "%d\n", target->zero_req_lim);
 }
 
-static CLASS_DEVICE_ATTR(id_ext,	S_IRUGO, show_id_ext,		NULL);
-static CLASS_DEVICE_ATTR(ioc_guid,	S_IRUGO, show_ioc_guid,		NULL);
-static CLASS_DEVICE_ATTR(service_id,	S_IRUGO, show_service_id,	NULL);
-static CLASS_DEVICE_ATTR(pkey,		S_IRUGO, show_pkey,		NULL);
-static CLASS_DEVICE_ATTR(dgid,		S_IRUGO, show_dgid,		NULL);
-static CLASS_DEVICE_ATTR(zero_req_lim,	S_IRUGO, show_zero_req_lim,	NULL);
+static ssize_t show_local_ib_port(struct class_device *cdev, char *buf)
+{
+	struct srp_target_port *target = host_to_target(class_to_shost(cdev));
+
+	return sprintf(buf, "%d\n", target->srp_host->port);
+}
+
+static ssize_t show_local_ib_device(struct class_device *cdev, char *buf)
+{
+	struct srp_target_port *target = host_to_target(class_to_shost(cdev));
+
+	return sprintf(buf, "%s\n", target->srp_host->dev->dev->name);
+}
+
+static CLASS_DEVICE_ATTR(id_ext,	  S_IRUGO, show_id_ext,		 NULL);
+static CLASS_DEVICE_ATTR(ioc_guid,	  S_IRUGO, show_ioc_guid,	 NULL);
+static CLASS_DEVICE_ATTR(service_id,	  S_IRUGO, show_service_id,	 NULL);
+static CLASS_DEVICE_ATTR(pkey,		  S_IRUGO, show_pkey,		 NULL);
+static CLASS_DEVICE_ATTR(dgid,		  S_IRUGO, show_dgid,		 NULL);
+static CLASS_DEVICE_ATTR(zero_req_lim,	  S_IRUGO, show_zero_req_lim,	 NULL);
+static CLASS_DEVICE_ATTR(local_ib_port,   S_IRUGO, show_local_ib_port,	 NULL);
+static CLASS_DEVICE_ATTR(local_ib_device, S_IRUGO, show_local_ib_device, NULL);
 
 static struct class_device_attribute *srp_host_attrs[] = {
 	&class_device_attr_id_ext,
@@ -1475,6 +1482,8 @@ static struct class_device_attribute *srp_host_attrs[] = {
 	&class_device_attr_pkey,
 	&class_device_attr_dgid,
 	&class_device_attr_zero_req_lim,
+	&class_device_attr_local_ib_port,
+	&class_device_attr_local_ib_device,
 	NULL
 };
 
@@ -1705,8 +1714,6 @@ static ssize_t srp_create_target(struct class_device *class_dev,
 	target->scsi_host  = target_host;
 	target->srp_host   = host;
 
-	INIT_WORK(&target->work, srp_reconnect_work, target);
-
 	INIT_LIST_HEAD(&target->free_reqs);
 	INIT_LIST_HEAD(&target->req_queue);
 	for (i = 0; i < SRP_SQ_SIZE; ++i) {
@@ -1895,7 +1902,7 @@ static void srp_add_one(struct ib_device *device)
 	if (IS_ERR(srp_dev->fmr_pool))
 		srp_dev->fmr_pool = NULL;
 
-	if (device->node_type == IB_NODE_SWITCH) {
+	if (device->node_type == RDMA_NODE_IB_SWITCH) {
 		s = 0;
 		e = 0;
 	} else {
@@ -1994,9 +2001,12 @@ static int __init srp_init_module(void)
 		return ret;
 	}
 
+	ib_sa_register_client(&srp_sa_client);
+
 	ret = ib_register_client(&srp_client);
 	if (ret) {
 		printk(KERN_ERR PFX "couldn't register IB client\n");
+		ib_sa_unregister_client(&srp_sa_client);
 		class_unregister(&srp_class);
 		return ret;
 	}
@@ -2007,6 +2017,7 @@ static int __init srp_init_module(void)
 static void __exit srp_cleanup_module(void)
 {
 	ib_unregister_client(&srp_client);
+	ib_sa_unregister_client(&srp_sa_client);
 	class_unregister(&srp_class);
 }