aboutsummaryrefslogtreecommitdiff
path: root/drivers/infiniband/hw/ipath
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/infiniband/hw/ipath')
-rw-r--r--drivers/infiniband/hw/ipath/Kconfig21
-rw-r--r--drivers/infiniband/hw/ipath/Makefile29
-rw-r--r--drivers/infiniband/hw/ipath/ipath_common.h73
-rw-r--r--drivers/infiniband/hw/ipath/ipath_cq.c209
-rw-r--r--drivers/infiniband/hw/ipath/ipath_debug.h2
-rw-r--r--drivers/infiniband/hw/ipath/ipath_diag.c155
-rw-r--r--drivers/infiniband/hw/ipath/ipath_driver.c702
-rw-r--r--drivers/infiniband/hw/ipath/ipath_eeprom.c17
-rw-r--r--drivers/infiniband/hw/ipath/ipath_file_ops.c1009
-rw-r--r--drivers/infiniband/hw/ipath/ipath_fs.c30
-rw-r--r--drivers/infiniband/hw/ipath/ipath_iba6110.c (renamed from drivers/infiniband/hw/ipath/ipath_ht400.c)190
-rw-r--r--drivers/infiniband/hw/ipath/ipath_iba6120.c (renamed from drivers/infiniband/hw/ipath/ipath_pe800.c)341
-rw-r--r--drivers/infiniband/hw/ipath/ipath_init_chip.c77
-rw-r--r--drivers/infiniband/hw/ipath/ipath_intr.c304
-rw-r--r--drivers/infiniband/hw/ipath/ipath_kernel.h169
-rw-r--r--drivers/infiniband/hw/ipath/ipath_keys.c15
-rw-r--r--drivers/infiniband/hw/ipath/ipath_layer.c1179
-rw-r--r--drivers/infiniband/hw/ipath/ipath_layer.h115
-rw-r--r--drivers/infiniband/hw/ipath/ipath_mad.c353
-rw-r--r--drivers/infiniband/hw/ipath/ipath_mmap.c122
-rw-r--r--drivers/infiniband/hw/ipath/ipath_mr.c15
-rw-r--r--drivers/infiniband/hw/ipath/ipath_qp.c258
-rw-r--r--drivers/infiniband/hw/ipath/ipath_rc.c86
-rw-r--r--drivers/infiniband/hw/ipath/ipath_registers.h47
-rw-r--r--drivers/infiniband/hw/ipath/ipath_ruc.c168
-rw-r--r--drivers/infiniband/hw/ipath/ipath_srq.c263
-rw-r--r--drivers/infiniband/hw/ipath/ipath_stats.c27
-rw-r--r--drivers/infiniband/hw/ipath/ipath_sysfs.c62
-rw-r--r--drivers/infiniband/hw/ipath/ipath_uc.c11
-rw-r--r--drivers/infiniband/hw/ipath/ipath_ud.c182
-rw-r--r--drivers/infiniband/hw/ipath/ipath_user_pages.c56
-rw-r--r--drivers/infiniband/hw/ipath/ipath_verbs.c706
-rw-r--r--drivers/infiniband/hw/ipath/ipath_verbs.h270
-rw-r--r--drivers/infiniband/hw/ipath/ipath_verbs_mcast.c7
-rw-r--r--drivers/infiniband/hw/ipath/ipath_wc_ppc64.c62
-rw-r--r--drivers/infiniband/hw/ipath/ipath_wc_x86_64.c13
-rw-r--r--drivers/infiniband/hw/ipath/verbs_debug.h108
37 files changed, 4350 insertions, 3103 deletions
diff --git a/drivers/infiniband/hw/ipath/Kconfig b/drivers/infiniband/hw/ipath/Kconfig
index 1db9489f1e8..574a678e7fd 100644
--- a/drivers/infiniband/hw/ipath/Kconfig
+++ b/drivers/infiniband/hw/ipath/Kconfig
@@ -1,16 +1,9 @@
-config IPATH_CORE
- tristate "QLogic InfiniPath Driver"
- depends on 64BIT && PCI_MSI && NET
- ---help---
- This is a low-level driver for QLogic InfiniPath host channel
- adapters (HCAs) based on the HT-400 and PE-800 chips.
-
config INFINIBAND_IPATH
- tristate "QLogic InfiniPath Verbs Driver"
- depends on IPATH_CORE && INFINIBAND
+ tristate "QLogic InfiniPath Driver"
+ depends on PCI_MSI && 64BIT && INFINIBAND
---help---
- This is a driver that provides InfiniBand verbs support for
- QLogic InfiniPath host channel adapters (HCAs). This
- allows these devices to be used with both kernel upper level
- protocols such as IP-over-InfiniBand as well as with userspace
- applications (in conjunction with InfiniBand userspace access).
+ This is a driver for QLogic InfiniPath host channel adapters,
+ including InfiniBand verbs support. This driver allows these
+ devices to be used with both kernel upper level protocols such
+ as IP-over-InfiniBand as well as with userspace applications
+ (in conjunction with InfiniBand userspace access).
diff --git a/drivers/infiniband/hw/ipath/Makefile b/drivers/infiniband/hw/ipath/Makefile
index b0bf7286413..5e29cb0095e 100644
--- a/drivers/infiniband/hw/ipath/Makefile
+++ b/drivers/infiniband/hw/ipath/Makefile
@@ -1,36 +1,35 @@
EXTRA_CFLAGS += -DIPATH_IDSTR='"QLogic kernel.org driver"' \
-DIPATH_KERN_TYPE=0
-obj-$(CONFIG_IPATH_CORE) += ipath_core.o
obj-$(CONFIG_INFINIBAND_IPATH) += ib_ipath.o
-ipath_core-y := \
+ib_ipath-y := \
+ ipath_cq.o \
ipath_diag.o \
ipath_driver.o \
ipath_eeprom.o \
ipath_file_ops.o \
ipath_fs.o \
- ipath_ht400.o \
+ ipath_iba6110.o \
+ ipath_iba6120.o \
ipath_init_chip.o \
ipath_intr.o \
- ipath_layer.o \
- ipath_pe800.o \
- ipath_stats.o \
- ipath_sysfs.o \
- ipath_user_pages.o
-
-ipath_core-$(CONFIG_X86_64) += ipath_wc_x86_64.o
-
-ib_ipath-y := \
- ipath_cq.o \
ipath_keys.o \
+ ipath_layer.o \
ipath_mad.o \
+ ipath_mmap.o \
ipath_mr.o \
ipath_qp.o \
ipath_rc.o \
ipath_ruc.o \
ipath_srq.o \
+ ipath_stats.o \
+ ipath_sysfs.o \
ipath_uc.o \
ipath_ud.o \
- ipath_verbs.o \
- ipath_verbs_mcast.o
+ ipath_user_pages.o \
+ ipath_verbs_mcast.o \
+ ipath_verbs.o
+
+ib_ipath-$(CONFIG_X86_64) += ipath_wc_x86_64.o
+ib_ipath-$(CONFIG_PPC64) += ipath_wc_ppc64.o
diff --git a/drivers/infiniband/hw/ipath/ipath_common.h b/drivers/infiniband/hw/ipath/ipath_common.h
index 062bd392e7e..54139d39818 100644
--- a/drivers/infiniband/hw/ipath/ipath_common.h
+++ b/drivers/infiniband/hw/ipath/ipath_common.h
@@ -106,9 +106,9 @@ struct infinipath_stats {
__u64 sps_ether_spkts;
/* number of "ethernet" packets received by driver */
__u64 sps_ether_rpkts;
- /* number of SMA packets sent by driver */
+ /* number of SMA packets sent by driver. Obsolete. */
__u64 sps_sma_spkts;
- /* number of SMA packets received by driver */
+ /* number of SMA packets received by driver. Obsolete. */
__u64 sps_sma_rpkts;
/* number of times all ports rcvhdrq was full and packet dropped */
__u64 sps_hdrqfull;
@@ -138,11 +138,12 @@ struct infinipath_stats {
__u64 sps_pageunlocks;
/*
* Number of packets dropped in kernel other than errors (ether
- * packets if ipath not configured, sma/mad, etc.)
+ * packets if ipath not configured, etc.)
*/
__u64 sps_krdrops;
+ __u64 sps_txeparity; /* PIO buffer parity error, recovered */
/* pad for future growth */
- __u64 __sps_pad[46];
+ __u64 __sps_pad[45];
};
/*
@@ -153,8 +154,6 @@ struct infinipath_stats {
#define IPATH_STATUS_DISABLED 0x2 /* hardware disabled */
/* Device has been disabled via admin request */
#define IPATH_STATUS_ADMIN_DISABLED 0x4
-#define IPATH_STATUS_OIB_SMA 0x8 /* ipath_mad kernel SMA running */
-#define IPATH_STATUS_SMA 0x10 /* user SMA running */
/* Chip has been found and initted */
#define IPATH_STATUS_CHIP_PRESENT 0x20
/* IB link is at ACTIVE, usable for data traffic */
@@ -187,6 +186,9 @@ typedef enum _ipath_ureg {
#define IPATH_RUNTIME_PCIE 0x2
#define IPATH_RUNTIME_FORCE_WC_ORDER 0x4
#define IPATH_RUNTIME_RCVHDR_COPY 0x8
+#define IPATH_RUNTIME_MASTER 0x10
+#define IPATH_RUNTIME_PBC_REWRITE 0x20
+#define IPATH_RUNTIME_LOOSE_DMA_ALIGN 0x40
/*
* This structure is returned by ipath_userinit() immediately after
@@ -204,7 +206,8 @@ struct ipath_base_info {
/* version of software, for feature checking. */
__u32 spi_sw_version;
/* InfiniPath port assigned, goes into sent packets */
- __u32 spi_port;
+ __u16 spi_port;
+ __u16 spi_subport;
/*
* IB MTU, packets IB data must be less than this.
* The MTU is in bytes, and will be a multiple of 4 bytes.
@@ -220,7 +223,7 @@ struct ipath_base_info {
__u32 spi_tidcnt;
/* size of the TID Eager list in infinipath, in entries */
__u32 spi_tidegrcnt;
- /* size of a single receive header queue entry. */
+ /* size of a single receive header queue entry in words. */
__u32 spi_rcvhdrent_size;
/*
* Count of receive header queue entries allocated.
@@ -312,6 +315,12 @@ struct ipath_base_info {
__u32 spi_filler_for_align;
/* address of readonly memory copy of the rcvhdrq tail register. */
__u64 spi_rcvhdr_tailaddr;
+
+ /* shared memory pages for subports if IPATH_RUNTIME_MASTER is set */
+ __u64 spi_subport_uregbase;
+ __u64 spi_subport_rcvegrbuf;
+ __u64 spi_subport_rcvhdr_base;
+
} __attribute__ ((aligned(8)));
@@ -330,12 +339,12 @@ struct ipath_base_info {
/*
* Minor version differences are always compatible
- * a within a major version, however if if user software is larger
+ * a within a major version, however if user software is larger
* than driver software, some new features and/or structure fields
* may not be implemented; the user code must deal with this if it
- * cares, or it must abort after initialization reports the difference
+ * cares, or it must abort after initialization reports the difference.
*/
-#define IPATH_USER_SWMINOR 2
+#define IPATH_USER_SWMINOR 3
#define IPATH_USER_SWVERSION ((IPATH_USER_SWMAJOR<<16) | IPATH_USER_SWMINOR)
@@ -381,7 +390,16 @@ struct ipath_user_info {
*/
__u32 spu_rcvhdrsize;
- __u64 spu_unused; /* kept for compatible layout */
+ /*
+ * If two or more processes wish to share a port, each process
+ * must set the spu_subport_cnt and spu_subport_id to the same
+ * values. The only restriction on the spu_subport_id is that
+ * it be unique for a given node.
+ */
+ __u16 spu_subport_cnt;
+ __u16 spu_subport_id;
+
+ __u32 spu_unused; /* kept for compatible layout */
/*
* address of struct base_info to write to
@@ -394,19 +412,25 @@ struct ipath_user_info {
#define IPATH_CMD_MIN 16
-#define IPATH_CMD_USER_INIT 16 /* set up userspace */
+#define __IPATH_CMD_USER_INIT 16 /* old set up userspace (for old user code) */
#define IPATH_CMD_PORT_INFO 17 /* find out what resources we got */
#define IPATH_CMD_RECV_CTRL 18 /* control receipt of packets */
#define IPATH_CMD_TID_UPDATE 19 /* update expected TID entries */
#define IPATH_CMD_TID_FREE 20 /* free expected TID entries */
#define IPATH_CMD_SET_PART_KEY 21 /* add partition key */
+#define IPATH_CMD_SLAVE_INFO 22 /* return info on slave processes */
+#define IPATH_CMD_ASSIGN_PORT 23 /* allocate HCA and port */
+#define IPATH_CMD_USER_INIT 24 /* set up userspace */
-#define IPATH_CMD_MAX 21
+#define IPATH_CMD_MAX 24
struct ipath_port_info {
__u32 num_active; /* number of active units */
__u32 unit; /* unit (chip) assigned to caller */
- __u32 port; /* port on unit assigned to caller */
+ __u16 port; /* port on unit assigned to caller */
+ __u16 subport; /* subport on unit assigned to caller */
+ __u16 num_ports; /* number of ports available on unit */
+ __u16 num_subports; /* number of subport slaves opened on port */
};
struct ipath_tid_info {
@@ -437,6 +461,8 @@ struct ipath_cmd {
__u32 recv_ctrl;
/* partition key to set */
__u16 part_key;
+ /* user address of __u32 bitmask of active slaves */
+ __u64 slave_mask_addr;
} cmd;
};
@@ -465,12 +491,11 @@ struct __ipath_sendpkt {
struct ipath_iovec sps_iov[4];
};
-/* Passed into SMA special file's ->read and ->write methods. */
-struct ipath_sma_pkt
-{
- __u32 unit; /* unit on which to send packet */
- __u64 data; /* address of payload in userspace */
- __u32 len; /* length of payload */
+/* Passed into diag data special file's ->write method. */
+struct ipath_diag_pkt {
+ __u32 unit;
+ __u64 data;
+ __u32 len;
};
/*
@@ -599,6 +624,10 @@ struct infinipath_counters {
/* K_PktFlags bits */
#define INFINIPATH_KPF_INTR 0x1
+#define INFINIPATH_KPF_SUBPORT_MASK 0x3
+#define INFINIPATH_KPF_SUBPORT_SHIFT 1
+
+#define INFINIPATH_MAX_SUBPORT 4
/* SendPIO per-buffer control */
#define INFINIPATH_SP_TEST 0x40
@@ -613,7 +642,7 @@ struct ipath_header {
/*
* Version - 4 bits, Port - 4 bits, TID - 10 bits and Offset -
* 14 bits before ECO change ~28 Dec 03. After that, Vers 4,
- * Port 3, TID 11, offset 14.
+ * Port 4, TID 11, offset 13.
*/
__le32 ver_port_tid_offset;
__le16 chksum;
diff --git a/drivers/infiniband/hw/ipath/ipath_cq.c b/drivers/infiniband/hw/ipath/ipath_cq.c
index 3efee341c9b..87462e0cb4d 100644
--- a/drivers/infiniband/hw/ipath/ipath_cq.c
+++ b/drivers/infiniband/hw/ipath/ipath_cq.c
@@ -42,20 +42,29 @@
* @entry: work completion entry to add
* @sig: true if @entry is a solicitated entry
*
- * This may be called with one of the qp->s_lock or qp->r_rq.lock held.
+ * This may be called with qp->s_lock held.
*/
void ipath_cq_enter(struct ipath_cq *cq, struct ib_wc *entry, int solicited)
{
+ struct ipath_cq_wc *wc;
unsigned long flags;
+ u32 head;
u32 next;
spin_lock_irqsave(&cq->lock, flags);
- if (cq->head == cq->ibcq.cqe)
+ /*
+ * Note that the head pointer might be writable by user processes.
+ * Take care to verify it is a sane value.
+ */
+ wc = cq->queue;
+ head = wc->head;
+ if (head >= (unsigned) cq->ibcq.cqe) {
+ head = cq->ibcq.cqe;
next = 0;
- else
- next = cq->head + 1;
- if (unlikely(next == cq->tail)) {
+ } else
+ next = head + 1;
+ if (unlikely(next == wc->tail)) {
spin_unlock_irqrestore(&cq->lock, flags);
if (cq->ibcq.event_handler) {
struct ib_event ev;
@@ -67,8 +76,8 @@ void ipath_cq_enter(struct ipath_cq *cq, struct ib_wc *entry, int solicited)
}
return;
}
- cq->queue[cq->head] = *entry;
- cq->head = next;
+ wc->queue[head] = *entry;
+ wc->head = next;
if (cq->notify == IB_CQ_NEXT_COMP ||
(cq->notify == IB_CQ_SOLICITED && solicited)) {
@@ -101,20 +110,27 @@ void ipath_cq_enter(struct ipath_cq *cq, struct ib_wc *entry, int solicited)
int ipath_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry)
{
struct ipath_cq *cq = to_icq(ibcq);
+ struct ipath_cq_wc *wc;
unsigned long flags;
int npolled;
+ u32 tail;
spin_lock_irqsave(&cq->lock, flags);
+ wc = cq->queue;
+ tail = wc->tail;
+ if (tail > (u32) cq->ibcq.cqe)
+ tail = (u32) cq->ibcq.cqe;
for (npolled = 0; npolled < num_entries; ++npolled, ++entry) {
- if (cq->tail == cq->head)
+ if (tail == wc->head)
break;
- *entry = cq->queue[cq->tail];
- if (cq->tail == cq->ibcq.cqe)
- cq->tail = 0;
+ *entry = wc->queue[tail];
+ if (tail >= cq->ibcq.cqe)
+ tail = 0;
else
- cq->tail++;
+ tail++;
}
+ wc->tail = tail;
spin_unlock_irqrestore(&cq->lock, flags);
@@ -160,38 +176,79 @@ struct ib_cq *ipath_create_cq(struct ib_device *ibdev, int entries,
{
struct ipath_ibdev *dev = to_idev(ibdev);
struct ipath_cq *cq;
- struct ib_wc *wc;
+ struct ipath_cq_wc *wc;
struct ib_cq *ret;
- if (entries > ib_ipath_max_cqes) {
+ if (entries < 1 || entries > ib_ipath_max_cqes) {
ret = ERR_PTR(-EINVAL);
- goto bail;
+ goto done;
}
- if (dev->n_cqs_allocated == ib_ipath_max_cqs) {
+ /* Allocate the completion queue structure. */
+ cq = kmalloc(sizeof(*cq), GFP_KERNEL);
+ if (!cq) {
ret = ERR_PTR(-ENOMEM);
- goto bail;
+ goto done;
}
/*
- * Need to use vmalloc() if we want to support large #s of
- * entries.
+ * Allocate the completion queue entries and head/tail pointers.
+ * This is allocated separately so that it can be resized and
+ * also mapped into user space.
+ * We need to use vmalloc() in order to support mmap and large
+ * numbers of entries.
*/
- cq = kmalloc(sizeof(*cq), GFP_KERNEL);
- if (!cq) {
+ wc = vmalloc_user(sizeof(*wc) + sizeof(struct ib_wc) * entries);
+ if (!wc) {
ret = ERR_PTR(-ENOMEM);
- goto bail;
+ goto bail_cq;
}
/*
- * Need to use vmalloc() if we want to support large #s of entries.
+ * Return the address of the WC as the offset to mmap.
+ * See ipath_mmap() for details.
*/
- wc = vmalloc(sizeof(*wc) * (entries + 1));
- if (!wc) {
- kfree(cq);
+ if (udata && udata->outlen >= sizeof(__u64)) {
+ struct ipath_mmap_info *ip;
+ __u64 offset = (__u64) wc;
+ int err;
+
+ err = ib_copy_to_udata(udata, &offset, sizeof(offset));
+ if (err) {
+ ret = ERR_PTR(err);
+ goto bail_wc;
+ }
+
+ /* Allocate info for ipath_mmap(). */
+ ip = kmalloc(sizeof(*ip), GFP_KERNEL);
+ if (!ip) {
+ ret = ERR_PTR(-ENOMEM);
+ goto bail_wc;
+ }
+ cq->ip = ip;
+ ip->context = context;
+ ip->obj = wc;
+ kref_init(&ip->ref);
+ ip->mmap_cnt = 0;
+ ip->size = PAGE_ALIGN(sizeof(*wc) +
+ sizeof(struct ib_wc) * entries);
+ spin_lock_irq(&dev->pending_lock);
+ ip->next = dev->pending_mmaps;
+ dev->pending_mmaps = ip;
+ spin_unlock_irq(&dev->pending_lock);
+ } else
+ cq->ip = NULL;
+
+ spin_lock(&dev->n_cqs_lock);
+ if (dev->n_cqs_allocated == ib_ipath_max_cqs) {
+ spin_unlock(&dev->n_cqs_lock);
ret = ERR_PTR(-ENOMEM);
- goto bail;
+ goto bail_wc;
}
+
+ dev->n_cqs_allocated++;
+ spin_unlock(&dev->n_cqs_lock);
+
/*
* ib_create_cq() will initialize cq->ibcq except for cq->ibcq.cqe.
* The number of entries should be >= the number requested or return
@@ -202,15 +259,21 @@ struct ib_cq *ipath_create_cq(struct ib_device *ibdev, int entries,
cq->triggered = 0;
spin_lock_init(&cq->lock);
tasklet_init(&cq->comptask, send_complete, (unsigned long)cq);
- cq->head = 0;
- cq->tail = 0;
+ wc->head = 0;
+ wc->tail = 0;
cq->queue = wc;
ret = &cq->ibcq;
- dev->n_cqs_allocated++;
+ goto done;
-bail:
+bail_wc:
+ vfree(wc);
+
+bail_cq:
+ kfree(cq);
+
+done:
return ret;
}
@@ -228,8 +291,13 @@ int ipath_destroy_cq(struct ib_cq *ibcq)
struct ipath_cq *cq = to_icq(ibcq);
tasklet_kill(&cq->comptask);
+ spin_lock(&dev->n_cqs_lock);
dev->n_cqs_allocated--;
- vfree(cq->queue);
+ spin_unlock(&dev->n_cqs_lock);
+ if (cq->ip)
+ kref_put(&cq->ip->ref, ipath_release_mmap_info);
+ else
+ vfree(cq->queue);
kfree(cq);
return 0;
@@ -253,7 +321,7 @@ int ipath_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify notify)
spin_lock_irqsave(&cq->lock, flags);
/*
* Don't change IB_CQ_NEXT_COMP to IB_CQ_SOLICITED but allow
- * any other transitions.
+ * any other transitions (see C11-31 and C11-32 in ch. 11.4.2.2).
*/
if (cq->notify != IB_CQ_NEXT_COMP)
cq->notify = notify;
@@ -261,49 +329,96 @@ int ipath_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify notify)
return 0;
}
+/**
+ * ipath_resize_cq - change the size of the CQ
+ * @ibcq: the completion queue
+ *
+ * Returns 0 for success.
+ */
int ipath_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata)
{
struct ipath_cq *cq = to_icq(ibcq);
- struct ib_wc *wc, *old_wc;
- u32 n;
+ struct ipath_cq_wc *old_wc;
+ struct ipath_cq_wc *wc;
+ u32 head, tail, n;
int ret;
+ if (cqe < 1 || cqe > ib_ipath_max_cqes) {
+ ret = -EINVAL;
+ goto bail;
+ }
+
/*
* Need to use vmalloc() if we want to support large #s of entries.
*/
- wc = vmalloc(sizeof(*wc) * (cqe + 1));
+ wc = vmalloc_user(sizeof(*wc) + sizeof(struct ib_wc) * cqe);
if (!wc) {
ret = -ENOMEM;
goto bail;
}
+ /*
+ * Return the address of the WC as the offset to mmap.
+ * See ipath_mmap() for details.
+ */
+ if (udata && udata->outlen >= sizeof(__u64)) {
+ __u64 offset = (__u64) wc;
+
+ ret = ib_copy_to_udata(udata, &offset, sizeof(offset));
+ if (ret)
+ goto bail;
+ }
+
spin_lock_irq(&cq->lock);
- if (cq->head < cq->tail)
- n = cq->ibcq.cqe + 1 + cq->head - cq->tail;
+ /*
+ * Make sure head and tail are sane since they
+ * might be user writable.
+ */
+ old_wc = cq->queue;
+ head = old_wc->head;
+ if (head > (u32) cq->ibcq.cqe)
+ head = (u32) cq->ibcq.cqe;
+ tail = old_wc->tail;
+ if (tail > (u32) cq->ibcq.cqe)
+ tail = (u32) cq->ibcq.cqe;
+ if (head < tail)
+ n = cq->ibcq.cqe + 1 + head - tail;
else
- n = cq->head - cq->tail;
+ n = head - tail;
if (unlikely((u32)cqe < n)) {
spin_unlock_irq(&cq->lock);
vfree(wc);
ret = -EOVERFLOW;
goto bail;
}
- for (n = 0; cq->tail != cq->head; n++) {
- wc[n] = cq->queue[cq->tail];
- if (cq->tail == cq->ibcq.cqe)
- cq->tail = 0;
+ for (n = 0; tail != head; n++) {
+ wc->queue[n] = old_wc->queue[tail];
+ if (tail == (u32) cq->ibcq.cqe)
+ tail = 0;
else
- cq->tail++;
+ tail++;
}
cq->ibcq.cqe = cqe;
- cq->head = n;
- cq->tail = 0;
- old_wc = cq->queue;
+ wc->head = n;
+ wc->tail = 0;
cq->queue = wc;
spin_unlock_irq(&cq->lock);
vfree(old_wc);
+ if (cq->ip) {
+ struct ipath_ibdev *dev = to_idev(ibcq->device);
+ struct ipath_mmap_info *ip = cq->ip;
+
+ ip->obj = wc;
+ ip->size = PAGE_ALIGN(sizeof(*wc) +
+ sizeof(struct ib_wc) * cqe);
+ spin_lock_irq(&dev->pending_lock);
+ ip->next = dev->pending_mmaps;
+ dev->pending_mmaps = ip;
+ spin_unlock_irq(&dev->pending_lock);
+ }
+
ret = 0;
bail:
diff --git a/drivers/infiniband/hw/ipath/ipath_debug.h b/drivers/infiniband/hw/ipath/ipath_debug.h
index f415beda0d3..df69f0d80b8 100644
--- a/drivers/infiniband/hw/ipath/ipath_debug.h
+++ b/drivers/infiniband/hw/ipath/ipath_debug.h
@@ -60,7 +60,6 @@
#define __IPATH_USER_SEND 0x1000 /* use user mode send */
#define __IPATH_KERNEL_SEND 0x2000 /* use kernel mode send */
#define __IPATH_EPKTDBG 0x4000 /* print ethernet packet data */
-#define __IPATH_SMADBG 0x8000 /* sma packet debug */
#define __IPATH_IPATHDBG 0x10000 /* Ethernet (IPATH) gen debug */
#define __IPATH_IPATHWARN 0x20000 /* Ethernet (IPATH) warnings */
#define __IPATH_IPATHERR 0x40000 /* Ethernet (IPATH) errors */
@@ -84,7 +83,6 @@
/* print mmap/nopage stuff, not using VDBG any more */
#define __IPATH_MMDBG 0x0
#define __IPATH_EPKTDBG 0x0 /* print ethernet packet data */
-#define __IPATH_SMADBG 0x0 /* process startup (init)/exit messages */
#define __IPATH_IPATHDBG 0x0 /* Ethernet (IPATH) table dump on */
#define __IPATH_IPATHWARN 0x0 /* Ethernet (IPATH) warnings on */
#define __IPATH_IPATHERR 0x0 /* Ethernet (IPATH) errors on */
diff --git a/drivers/infiniband/hw/ipath/ipath_diag.c b/drivers/infiniband/hw/ipath/ipath_diag.c
index 147dd89e21c..29958b6e021 100644
--- a/drivers/infiniband/hw/ipath/ipath_diag.c
+++ b/drivers/infiniband/hw/ipath/ipath_diag.c
@@ -41,11 +41,12 @@
* through the /sys/bus/pci resource mmap interface.
*/
+#include <linux/io.h>
#include <linux/pci.h>
+#include <linux/vmalloc.h>
#include <asm/uaccess.h>
#include "ipath_kernel.h"
-#include "ipath_layer.h"
#include "ipath_common.h"
int ipath_diag_inuse;
@@ -274,6 +275,158 @@ bail:
return ret;
}
+static ssize_t ipath_diagpkt_write(struct file *fp,
+ const char __user *data,
+ size_t count, loff_t *off);
+
+static struct file_operations diagpkt_file_ops = {
+ .owner = THIS_MODULE,
+ .write = ipath_diagpkt_write,
+};
+
+static struct cdev *diagpkt_cdev;
+static struct class_device *diagpkt_class_dev;
+
+int __init ipath_diagpkt_add(void)
+{
+ return ipath_cdev_init(IPATH_DIAGPKT_MINOR,
+ "ipath_diagpkt", &diagpkt_file_ops,
+ &diagpkt_cdev, &diagpkt_class_dev);
+}
+
+void __exit ipath_diagpkt_remove(void)
+{
+ ipath_cdev_cleanup(&diagpkt_cdev, &diagpkt_class_dev);
+}
+
+/**
+ * ipath_diagpkt_write - write an IB packet
+ * @fp: the diag data device file pointer
+ * @data: ipath_diag_pkt structure saying where to get the packet
+ * @count: size of data to write
+ * @off: unused by this code
+ */
+static ssize_t ipath_diagpkt_write(struct file *fp,
+ const char __user *data,
+ size_t count, loff_t *off)
+{
+ u32 __iomem *piobuf;
+ u32 plen, clen, pbufn;
+ struct ipath_diag_pkt dp;
+ u32 *tmpbuf = NULL;
+ struct ipath_devdata *dd;
+ ssize_t ret = 0;
+ u64 val;
+
+ if (count < sizeof(dp)) {
+ ret = -EINVAL;
+ goto bail;
+ }
+
+ if (copy_from_user(&dp, data, sizeof(dp))) {
+ ret = -EFAULT;
+ goto bail;
+ }
+
+ /* send count must be an exact number of dwords */
+ if (dp.len & 3) {
+ ret = -EINVAL;
+ goto bail;
+ }
+
+ clen = dp.len >> 2;
+
+ dd = ipath_lookup(dp.unit);
+ if (!dd || !(dd->ipath_flags & IPATH_PRESENT) ||
+ !dd->ipath_kregbase) {
+ ipath_cdbg(VERBOSE, "illegal unit %u for diag data send\n",
+ dp.unit);
+ ret = -ENODEV;
+ goto bail;
+ }
+
+ if (ipath_diag_inuse && !diag_set_link &&
+ !(dd->ipath_flags & IPATH_LINKACTIVE)) {
+ diag_set_link = 1;
+ ipath_cdbg(VERBOSE, "Trying to set to set link active for "
+ "diag pkt\n");
+ ipath_set_linkstate(dd, IPATH_IB_LINKARM);
+ ipath_set_linkstate(dd, IPATH_IB_LINKACTIVE);
+ }
+
+ if (!(dd->ipath_flags & IPATH_INITTED)) {
+ /* no hardware, freeze, etc. */
+ ipath_cdbg(VERBOSE, "unit %u not usable\n", dd->ipath_unit);
+ ret = -ENODEV;
+ goto bail;
+ }
+ val = dd->ipath_lastibcstat & IPATH_IBSTATE_MASK;
+ if (val != IPATH_IBSTATE_INIT && val != IPATH_IBSTATE_ARM &&
+ val != IPATH_IBSTATE_ACTIVE) {
+ ipath_cdbg(VERBOSE, "unit %u not ready (state %llx)\n",
+ dd->ipath_unit, (unsigned long long) val);
+ ret = -EINVAL;
+ goto bail;
+ }
+
+ /* need total length before first word written */
+ /* +1 word is for the qword padding */
+ plen = sizeof(u32) + dp.len;
+
+ if ((plen + 4) > dd->ipath_ibmaxlen) {
+ ipath_dbg("Pkt len 0x%x > ibmaxlen %x\n",
+ plen - 4, dd->ipath_ibmaxlen);
+ ret = -EINVAL;
+ goto bail; /* before writing pbc */
+ }
+ tmpbuf = vmalloc(plen);
+ if (!tmpbuf) {
+ dev_info(&dd->pcidev->dev, "Unable to allocate tmp buffer, "
+ "failing\n");
+ ret = -ENOMEM;
+ goto bail;
+ }
+
+ if (copy_from_user(tmpbuf,
+ (const void __user *) (unsigned long) dp.data,
+ dp.len)) {
+ ret = -EFAULT;
+ goto bail;
+ }
+
+ piobuf = ipath_getpiobuf(dd, &pbufn);
+ if (!piobuf) {
+ ipath_cdbg(VERBOSE, "No PIO buffers avail unit for %u\n",
+ dd->ipath_unit);
+ ret = -EBUSY;
+ goto bail;
+ }
+
+ plen >>= 2; /* in dwords */
+
+ if (ipath_debug & __IPATH_PKTDBG)
+ ipath_cdbg(VERBOSE, "unit %u 0x%x+1w pio%d\n",
+ dd->ipath_unit, plen - 1, pbufn);
+
+ /* we have to flush after the PBC for correctness on some cpus
+ * or WC buffer can be written out of order */
+ writeq(plen, piobuf);
+ ipath_flush_wc();
+ /* copy all by the trigger word, then flush, so it's written
+ * to chip before trigger word, then write trigger word, then
+ * flush again, so packet is sent. */
+ __iowrite32_copy(piobuf + 2, tmpbuf, clen - 1);
+ ipath_flush_wc();
+ __raw_writel(tmpbuf[clen - 1], piobuf + clen + 1);
+ ipath_flush_wc();
+
+ ret = sizeof(dp);
+
+bail:
+ vfree(tmpbuf);
+ return ret;
+}
+
static int ipath_diag_release(struct inode *in, struct file *fp)
{
mutex_lock(&ipath_mutex);
diff --git a/drivers/infiniband/hw/ipath/ipath_driver.c b/drivers/infiniband/hw/ipath/ipath_driver.c
index f98518d912b..12cefa658f3 100644
--- a/drivers/infiniband/hw/ipath/ipath_driver.c
+++ b/drivers/infiniband/hw/ipath/ipath_driver.c
@@ -39,7 +39,7 @@
#include <linux/vmalloc.h>
#include "ipath_kernel.h"
-#include "ipath_layer.h"
+#include "ipath_verbs.h"
#include "ipath_common.h"
static void ipath_update_pio_bufs(struct ipath_devdata *);
@@ -51,8 +51,6 @@ const char *ipath_get_unit_name(int unit)
return iname;
}
-EXPORT_SYMBOL_GPL(ipath_get_unit_name);
-
#define DRIVER_LOAD_MSG "QLogic " IPATH_DRV_NAME " loaded: "
#define PFX IPATH_DRV_NAME ": "
@@ -60,13 +58,13 @@ EXPORT_SYMBOL_GPL(ipath_get_unit_name);
* The size has to be longer than this string, so we can append
* board/chip information to it in the init code.
*/
-const char ipath_core_version[] = IPATH_IDSTR "\n";
+const char ib_ipath_version[] = IPATH_IDSTR "\n";
static struct idr unit_table;
DEFINE_SPINLOCK(ipath_devs_lock);
LIST_HEAD(ipath_dev_list);
-wait_queue_head_t ipath_sma_state_wait;
+wait_queue_head_t ipath_state_wait;
unsigned ipath_debug = __IPATH_INFO;
@@ -97,16 +95,6 @@ const char *ipath_ibcstatus_str[] = {
"RecovIdle",
};
-/*
- * These variables are initialized in the chip-specific files
- * but are defined here.
- */
-u16 ipath_gpio_sda_num, ipath_gpio_scl_num;
-u64 ipath_gpio_sda, ipath_gpio_scl;
-u64 infinipath_i_bitsextant;
-ipath_err_t infinipath_e_bitsextant, infinipath_hwe_bitsextant;
-u32 infinipath_i_rcvavail_mask, infinipath_i_rcvurg_mask;
-
static void __devexit ipath_remove_one(struct pci_dev *);
static int __devinit ipath_init_one(struct pci_dev *,
const struct pci_device_id *);
@@ -403,10 +391,10 @@ static int __devinit ipath_init_one(struct pci_dev *pdev,
/* setup the chip-specific functions, as early as possible. */
switch (ent->device) {
case PCI_DEVICE_ID_INFINIPATH_HT:
- ipath_init_ht400_funcs(dd);
+ ipath_init_iba6110_funcs(dd);
break;
case PCI_DEVICE_ID_INFINIPATH_PE800:
- ipath_init_pe800_funcs(dd);
+ ipath_init_iba6120_funcs(dd);
break;
default:
ipath_dev_err(dd, "Found unknown QLogic deviceid 0x%x, "
@@ -440,7 +428,13 @@ static int __devinit ipath_init_one(struct pci_dev *pdev,
}
dd->ipath_pcirev = rev;
+#if defined(__powerpc__)
+ /* There isn't a generic way to specify writethrough mappings */
+ dd->ipath_kregbase = __ioremap(addr, len,
+ (_PAGE_NO_CACHE|_PAGE_WRITETHRU));
+#else
dd->ipath_kregbase = ioremap_nocache(addr, len);
+#endif
if (!dd->ipath_kregbase) {
ipath_dbg("Unable to map io addr %llx to kvirt, failing\n",
@@ -503,7 +497,7 @@ static int __devinit ipath_init_one(struct pci_dev *pdev,
ipathfs_add_device(dd);
ipath_user_add(dd);
ipath_diag_add(dd);
- ipath_layer_add(dd);
+ ipath_register_ib_device(dd);
goto bail;
@@ -523,28 +517,146 @@ bail:
return ret;
}
+static void __devexit cleanup_device(struct ipath_devdata *dd)
+{
+ int port;
+
+ ipath_shutdown_device(dd);
+
+ if (*dd->ipath_statusp & IPATH_STATUS_CHIP_PRESENT) {
+ /* can't do anything more with chip; needs re-init */
+ *dd->ipath_statusp &= ~IPATH_STATUS_CHIP_PRESENT;
+ if (dd->ipath_kregbase) {
+ /*
+ * if we haven't already cleaned up before these are
+ * to ensure any register reads/writes "fail" until
+ * re-init
+ */
+ dd->ipath_kregbase = NULL;
+ dd->ipath_uregbase = 0;
+ dd->ipath_sregbase = 0;
+ dd->ipath_cregbase = 0;
+ dd->ipath_kregsize = 0;
+ }
+ ipath_disable_wc(dd);
+ }
+
+ if (dd->ipath_pioavailregs_dma) {
+ dma_free_coherent(&dd->pcidev->dev, PAGE_SIZE,
+ (void *) dd->ipath_pioavailregs_dma,
+ dd->ipath_pioavailregs_phys);
+ dd->ipath_pioavailregs_dma = NULL;
+ }
+ if (dd->ipath_dummy_hdrq) {
+ dma_free_coherent(&dd->pcidev->dev,
+ dd->ipath_pd[0]->port_rcvhdrq_size,
+ dd->ipath_dummy_hdrq, dd->ipath_dummy_hdrq_phys);
+ dd->ipath_dummy_hdrq = NULL;
+ }
+
+ if (dd->ipath_pageshadow) {
+ struct page **tmpp = dd->ipath_pageshadow;
+ dma_addr_t *tmpd = dd->ipath_physshadow;
+ int i, cnt = 0;
+
+ ipath_cdbg(VERBOSE, "Unlocking any expTID pages still "
+ "locked\n");
+ for (port = 0; port < dd->ipath_cfgports; port++) {
+ int port_tidbase = port * dd->ipath_rcvtidcnt;
+ int maxtid = port_tidbase + dd->ipath_rcvtidcnt;
+ for (i = port_tidbase; i < maxtid; i++) {
+ if (!tmpp[i])
+ continue;
+ pci_unmap_page(dd->pcidev, tmpd[i],
+ PAGE_SIZE, PCI_DMA_FROMDEVICE);
+ ipath_release_user_pages(&tmpp[i], 1);
+ tmpp[i] = NULL;
+ cnt++;
+ }
+ }
+ if (cnt) {
+ ipath_stats.sps_pageunlocks += cnt;
+ ipath_cdbg(VERBOSE, "There were still %u expTID "
+ "entries locked\n", cnt);
+ }
+ if (ipath_stats.sps_pagelocks ||
+ ipath_stats.sps_pageunlocks)
+ ipath_cdbg(VERBOSE, "%llu pages locked, %llu "
+ "unlocked via ipath_m{un}lock\n",
+ (unsigned long long)
+ ipath_stats.sps_pagelocks,
+ (unsigned long long)
+ ipath_stats.sps_pageunlocks);
+
+ ipath_cdbg(VERBOSE, "Free shadow page tid array at %p\n",
+ dd->ipath_pageshadow);
+ vfree(dd->ipath_pageshadow);
+ dd->ipath_pageshadow = NULL;
+ }
+
+ /*
+ * free any resources still in use (usually just kernel ports)
+ * at unload; we do for portcnt, not cfgports, because cfgports
+ * could have changed while we were loaded.
+ */
+ for (port = 0; port < dd->ipath_portcnt; port++) {
+ struct ipath_portdata *pd = dd->ipath_pd[port];
+ dd->ipath_pd[port] = NULL;
+ ipath_free_pddata(dd, pd);
+ }
+ kfree(dd->ipath_pd);
+ /*
+ * debuggability, in case some cleanup path tries to use it
+ * after this
+ */
+ dd->ipath_pd = NULL;
+}
+
static void __devexit ipath_remove_one(struct pci_dev *pdev)
{
- struct ipath_devdata *dd;
+ struct ipath_devdata *dd = pci_get_drvdata(pdev);
- ipath_cdbg(VERBOSE, "removing, pdev=%p\n", pdev);
- if (!pdev)
- return;
+ ipath_cdbg(VERBOSE, "removing, pdev=%p, dd=%p\n", pdev, dd);
+
+ if (dd->verbs_dev)
+ ipath_unregister_ib_device(dd->verbs_dev);
- dd = pci_get_drvdata(pdev);
- ipath_layer_remove(dd);
ipath_diag_remove(dd);
ipath_user_remove(dd);
ipathfs_remove_device(dd);
ipath_device_remove_group(&pdev->dev, dd);
+
ipath_cdbg(VERBOSE, "Releasing pci memory regions, dd %p, "
"unit %u\n", dd, (u32) dd->ipath_unit);
- if (dd->ipath_kregbase) {
- ipath_cdbg(VERBOSE, "Unmapping kregbase %p\n",
- dd->ipath_kregbase);
- iounmap((volatile void __iomem *) dd->ipath_kregbase);
- dd->ipath_kregbase = NULL;
- }
+
+ cleanup_device(dd);
+
+ /*
+ * turn off rcv, send, and interrupts for all ports, all drivers
+ * should also hard reset the chip here?
+ * free up port 0 (kernel) rcvhdr, egr bufs, and eventually tid bufs
+ * for all versions of the driver, if they were allocated
+ */
+ if (pdev->irq) {
+ ipath_cdbg(VERBOSE,
+ "unit %u free_irq of irq %x\n",
+ dd->ipath_unit, pdev->irq);
+ free_irq(pdev->irq, dd);
+ } else
+ ipath_dbg("irq is 0, not doing free_irq "
+ "for unit %u\n", dd->ipath_unit);
+ /*
+ * we check for NULL here, because it's outside
+ * the kregbase check, and we need to call it
+ * after the free_irq. Thus it's possible that
+ * the function pointers were never initialized.
+ */
+ if (dd->ipath_f_cleanup)
+ /* clean up chip-specific stuff */
+ dd->ipath_f_cleanup(dd);
+
+ ipath_cdbg(VERBOSE, "Unmapping kregbase %p\n", dd->ipath_kregbase);
+ iounmap((volatile void __iomem *) dd->ipath_kregbase);
pci_release_regions(pdev);
ipath_cdbg(VERBOSE, "calling pci_disable_device\n");
pci_disable_device(pdev);
@@ -607,21 +719,23 @@ void ipath_disarm_piobufs(struct ipath_devdata *dd, unsigned first,
*
* wait up to msecs milliseconds for IB link state change to occur for
* now, take the easy polling route. Currently used only by
- * ipath_layer_set_linkstate. Returns 0 if state reached, otherwise
+ * ipath_set_linkstate. Returns 0 if state reached, otherwise
* -ETIMEDOUT state can have multiple states set, for any of several
* transitions.
*/
-int ipath_wait_linkstate(struct ipath_devdata *dd, u32 state, int msecs)
+static int ipath_wait_linkstate(struct ipath_devdata *dd, u32 state,
+ int msecs)
{
- dd->ipath_sma_state_wanted = state;
- wait_event_interruptible_timeout(ipath_sma_state_wait,
+ dd->ipath_state_wanted = state;
+ wait_event_interruptible_timeout(ipath_state_wait,
(dd->ipath_flags & state),
msecs_to_jiffies(msecs));
- dd->ipath_sma_state_wanted = 0;
+ dd->ipath_state_wanted = 0;
if (!(dd->ipath_flags & state)) {
u64 val;
- ipath_cdbg(SMA, "Didn't reach linkstate %s within %u ms\n",
+ ipath_cdbg(VERBOSE, "Didn't reach linkstate %s within %u"
+ " ms\n",
/* test INIT ahead of DOWN, both can be set */
(state & IPATH_LINKINIT) ? "INIT" :
((state & IPATH_LINKDOWN) ? "DOWN" :
@@ -754,8 +868,8 @@ static void get_rhf_errstring(u32 err, char *msg, size_t len)
static inline void *ipath_get_egrbuf(struct ipath_devdata *dd, u32 bufnum,
int err)
{
- return dd->ipath_port0_skbs ?
- (void *)dd->ipath_port0_skbs[bufnum]->data : NULL;
+ return dd->ipath_port0_skbinfo ?
+ (void *) dd->ipath_port0_skbinfo[bufnum].skb->data : NULL;
}
/**
@@ -777,88 +891,39 @@ struct sk_buff *ipath_alloc_skb(struct ipath_devdata *dd,
*/
/*
- * We need 4 extra bytes for unaligned transfer copying
+ * We need 2 extra bytes for ipath_ether data sent in the
+ * key header. In order to keep everything dword aligned,
+ * we'll reserve 4 bytes.
*/
+ len = dd->ipath_ibmaxlen + 4;
+
if (dd->ipath_flags & IPATH_4BYTE_TID) {
- /* we need a 4KB multiple alignment, and there is no way
+ /* We need a 2KB multiple alignment, and there is no way
* to do it except to allocate extra and then skb_reserve
* enough to bring it up to the right alignment.
*/
- len = dd->ipath_ibmaxlen + 4 + (1 << 11) - 1;
+ len += 2047;
}
- else
- len = dd->ipath_ibmaxlen + 4;
+
skb = __dev_alloc_skb(len, gfp_mask);
if (!skb) {
ipath_dev_err(dd, "Failed to allocate skbuff, length %u\n",
len);
goto bail;
}
+
+ skb_reserve(skb, 4);
+
if (dd->ipath_flags & IPATH_4BYTE_TID) {
- u32 una = ((1 << 11) - 1) & (unsigned long)(skb->data + 4);
+ u32 una = (unsigned long)skb->data & 2047;
if (una)
- skb_reserve(skb, 4 + (1 << 11) - una);
- else
- skb_reserve(skb, 4);
- } else
- skb_reserve(skb, 4);
+ skb_reserve(skb, 2048 - una);
+ }
bail:
return skb;
}
-/**
- * ipath_rcv_layer - receive a packet for the layered (ethernet) driver
- * @dd: the infinipath device
- * @etail: the sk_buff number
- * @tlen: the total packet length
- * @hdr: the ethernet header
- *
- * Separate routine for better overall optimization
- */
-static void ipath_rcv_layer(struct ipath_devdata *dd, u32 etail,
- u32 tlen, struct ether_header *hdr)
-{
- u32 elen;
- u8 pad, *bthbytes;
- struct sk_buff *skb, *nskb;
-
- if (dd->ipath_port0_skbs &&
- hdr->sub_opcode == IPATH_ITH4X_OPCODE_ENCAP) {
- /*
- * Allocate a new sk_buff to replace the one we give
- * to the network stack.
- */
- nskb = ipath_alloc_skb(dd, GFP_ATOMIC);
- if (!nskb) {
- /* count OK packets that we drop */
- ipath_stats.sps_krdrops++;
- return;
- }
-
- bthbytes = (u8 *) hdr->bth;
- pad = (bthbytes[1] >> 4) & 3;
- /* +CRC32 */
- elen = tlen - (sizeof(*hdr) + pad + sizeof(u32));
-
- skb = dd->ipath_port0_skbs[etail];
- dd->ipath_port0_skbs[etail] = nskb;
- skb_put(skb, elen);
-
- dd->ipath_f_put_tid(dd, etail + (u64 __iomem *)
- ((char __iomem *) dd->ipath_kregbase
- + dd->ipath_rcvegrbase), 0,
- virt_to_phys(nskb->data));
-
- __ipath_layer_rcv(dd, hdr, skb);
-
- /* another ether packet received */
- ipath_stats.sps_ether_rpkts++;
- }
- else if (hdr->sub_opcode == IPATH_ITH4X_OPCODE_LID_ARP)
- __ipath_layer_rcv_lid(dd, hdr);
-}
-
static void ipath_rcv_hdrerr(struct ipath_devdata *dd,
u32 eflags,
u32 l,
@@ -972,26 +1037,17 @@ reloop:
if (unlikely(eflags))
ipath_rcv_hdrerr(dd, eflags, l, etail, rc);
else if (etype == RCVHQ_RCV_TYPE_NON_KD) {
- int ret = __ipath_verbs_rcv(dd, rc + 1,
- ebuf, tlen);
- if (ret == -ENODEV)
- ipath_cdbg(VERBOSE,
- "received IB packet, "
- "not SMA (QP=%x)\n", qp);
- if (dd->ipath_lli_counter)
- dd->ipath_lli_counter--;
-
- } else if (etype == RCVHQ_RCV_TYPE_EAGER) {
- if (qp == IPATH_KD_QP &&
- bthbytes[0] == ipath_layer_rcv_opcode &&
- ebuf)
- ipath_rcv_layer(dd, etail, tlen,
- (struct ether_header *)hdr);
- else
- ipath_cdbg(PKT, "typ %x, opcode %x (eager, "
- "qp=%x), len %x; ignored\n",
- etype, bthbytes[0], qp, tlen);
+ ipath_ib_rcv(dd->verbs_dev, rc + 1, ebuf, tlen);
+ if (dd->ipath_lli_counter)
+ dd->ipath_lli_counter--;
+ ipath_cdbg(PKT, "typ %x, opcode %x (eager, "
+ "qp=%x), len %x; ignored\n",
+ etype, bthbytes[0], qp, tlen);
}
+ else if (etype == RCVHQ_RCV_TYPE_EAGER)
+ ipath_cdbg(PKT, "typ %x, opcode %x (eager, "
+ "qp=%x), len %x; ignored\n",
+ etype, bthbytes[0], qp, tlen);
else if (etype == RCVHQ_RCV_TYPE_EXPECTED)
ipath_dbg("Bug: Expected TID, opcode %x; ignored\n",
be32_to_cpu(hdr->bth[0]) & 0xff);
@@ -1024,7 +1080,8 @@ reloop:
*/
if (l == hdrqtail || (i && !(i&0xf))) {
u64 lval;
- if (l == hdrqtail) /* PE-800 interrupt only on last */
+ if (l == hdrqtail)
+ /* request IBA6120 interrupt only on last */
lval = dd->ipath_rhdrhead_intr_off | l;
else
lval = l;
@@ -1038,7 +1095,7 @@ reloop:
}
if (!dd->ipath_rhdrhead_intr_off && !reloop) {
- /* HT-400 workaround; we can have a race clearing chip
+ /* IBA6110 workaround; we can have a race clearing chip
* interrupt with another interrupt about to be delivered,
* and can clear it before it is delivered on the GPIO
* workaround. By doing the extra check here for the
@@ -1211,7 +1268,7 @@ int ipath_setrcvhdrsize(struct ipath_devdata *dd, unsigned rhdrsize)
*
* do appropriate marking as busy, etc.
* returns buffer number if one found (>=0), negative number is error.
- * Used by ipath_sma_send_pkt and ipath_layer_send
+ * Used by ipath_layer_send
*/
u32 __iomem *ipath_getpiobuf(struct ipath_devdata *dd, u32 * pbufnum)
{
@@ -1317,13 +1374,6 @@ rescan:
goto bail;
}
- if (updated)
- /*
- * ran out of bufs, now some (at least this one we just
- * got) are now available, so tell the layered driver.
- */
- __ipath_layer_intr(dd, IPATH_LAYER_INT_SEND_CONTINUE);
-
/*
* set next starting place. Since it's just an optimization,
* it doesn't matter who wins on this, so no locking
@@ -1387,6 +1437,9 @@ int ipath_create_rcvhdrq(struct ipath_devdata *dd,
"for port %u rcvhdrqtailaddr failed\n",
pd->port_port);
ret = -ENOMEM;
+ dma_free_coherent(&dd->pcidev->dev, amt,
+ pd->port_rcvhdrq, pd->port_rcvhdrq_phys);
+ pd->port_rcvhdrq = NULL;
goto bail;
}
pd->port_rcvhdrqtailaddr_phys = phys_hdrqtail;
@@ -1408,12 +1461,13 @@ int ipath_create_rcvhdrq(struct ipath_devdata *dd,
ipath_cdbg(VERBOSE, "reuse port %d rcvhdrq @%p %llx phys; "
"hdrtailaddr@%p %llx physical\n",
pd->port_port, pd->port_rcvhdrq,
- pd->port_rcvhdrq_phys, pd->port_rcvhdrtail_kvaddr,
- (unsigned long long)pd->port_rcvhdrqtailaddr_phys);
+ (unsigned long long) pd->port_rcvhdrq_phys,
+ pd->port_rcvhdrtail_kvaddr, (unsigned long long)
+ pd->port_rcvhdrqtailaddr_phys);
/* clear for security and sanity on each use */
memset(pd->port_rcvhdrq, 0, pd->port_rcvhdrq_size);
- memset((void *)pd->port_rcvhdrtail_kvaddr, 0, PAGE_SIZE);
+ memset(pd->port_rcvhdrtail_kvaddr, 0, PAGE_SIZE);
/*
* tell chip each time we init it, even if we are re-using previous
@@ -1500,7 +1554,7 @@ int ipath_waitfor_mdio_cmdready(struct ipath_devdata *dd)
return ret;
}
-void ipath_set_ib_lstate(struct ipath_devdata *dd, int which)
+static void ipath_set_ib_lstate(struct ipath_devdata *dd, int which)
{
static const char *what[4] = {
[0] = "DOWN",
@@ -1511,7 +1565,7 @@ void ipath_set_ib_lstate(struct ipath_devdata *dd, int which)
int linkcmd = (which >> INFINIPATH_IBCC_LINKCMD_SHIFT) &
INFINIPATH_IBCC_LINKCMD_MASK;
- ipath_cdbg(SMA, "Trying to move unit %u to %s, current ltstate "
+ ipath_cdbg(VERBOSE, "Trying to move unit %u to %s, current ltstate "
"is %s\n", dd->ipath_unit,
what[linkcmd],
ipath_ibcstatus_str[
@@ -1520,7 +1574,7 @@ void ipath_set_ib_lstate(struct ipath_devdata *dd, int which)
INFINIPATH_IBCS_LINKTRAININGSTATE_SHIFT) &
INFINIPATH_IBCS_LINKTRAININGSTATE_MASK]);
/* flush all queued sends when going to DOWN or INIT, to be sure that
- * they don't block SMA and other MAD packets */
+ * they don't block MAD packets */
if (!linkcmd || linkcmd == INFINIPATH_IBCC_LINKCMD_INIT) {
ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
INFINIPATH_S_ABORT);
@@ -1534,6 +1588,180 @@ void ipath_set_ib_lstate(struct ipath_devdata *dd, int which)
dd->ipath_ibcctrl | which);
}
+int ipath_set_linkstate(struct ipath_devdata *dd, u8 newstate)
+{
+ u32 lstate;
+ int ret;
+
+ switch (newstate) {
+ case IPATH_IB_LINKDOWN:
+ ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKINITCMD_POLL <<
+ INFINIPATH_IBCC_LINKINITCMD_SHIFT);
+ /* don't wait */
+ ret = 0;
+ goto bail;
+
+ case IPATH_IB_LINKDOWN_SLEEP:
+ ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKINITCMD_SLEEP <<
+ INFINIPATH_IBCC_LINKINITCMD_SHIFT);
+ /* don't wait */
+ ret = 0;
+ goto bail;
+
+ case IPATH_IB_LINKDOWN_DISABLE:
+ ipath_set_ib_lstate(dd,
+ INFINIPATH_IBCC_LINKINITCMD_DISABLE <<
+ INFINIPATH_IBCC_LINKINITCMD_SHIFT);
+ /* don't wait */
+ ret = 0;
+ goto bail;
+
+ case IPATH_IB_LINKINIT:
+ if (dd->ipath_flags & IPATH_LINKINIT) {
+ ret = 0;
+ goto bail;
+ }
+ ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKCMD_INIT <<
+ INFINIPATH_IBCC_LINKCMD_SHIFT);
+ lstate = IPATH_LINKINIT;
+ break;
+
+ case IPATH_IB_LINKARM:
+ if (dd->ipath_flags & IPATH_LINKARMED) {
+ ret = 0;
+ goto bail;
+ }
+ if (!(dd->ipath_flags &
+ (IPATH_LINKINIT | IPATH_LINKACTIVE))) {
+ ret = -EINVAL;
+ goto bail;
+ }
+ ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKCMD_ARMED <<
+ INFINIPATH_IBCC_LINKCMD_SHIFT);
+ /*
+ * Since the port can transition to ACTIVE by receiving
+ * a non VL 15 packet, wait for either state.
+ */
+ lstate = IPATH_LINKARMED | IPATH_LINKACTIVE;
+ break;
+
+ case IPATH_IB_LINKACTIVE:
+ if (dd->ipath_flags & IPATH_LINKACTIVE) {
+ ret = 0;
+ goto bail;
+ }
+ if (!(dd->ipath_flags & IPATH_LINKARMED)) {
+ ret = -EINVAL;
+ goto bail;
+ }
+ ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKCMD_ACTIVE <<
+ INFINIPATH_IBCC_LINKCMD_SHIFT);
+ lstate = IPATH_LINKACTIVE;
+ break;
+
+ default:
+ ipath_dbg("Invalid linkstate 0x%x requested\n", newstate);
+ ret = -EINVAL;
+ goto bail;
+ }
+ ret = ipath_wait_linkstate(dd, lstate, 2000);
+
+bail:
+ return ret;
+}
+
+/**
+ * ipath_set_mtu - set the MTU
+ * @dd: the infinipath device
+ * @arg: the new MTU
+ *
+ * we can handle "any" incoming size, the issue here is whether we
+ * need to restrict our outgoing size. For now, we don't do any
+ * sanity checking on this, and we don't deal with what happens to
+ * programs that are already running when the size changes.
+ * NOTE: changing the MTU will usually cause the IBC to go back to
+ * link initialize (IPATH_IBSTATE_INIT) state...
+ */
+int ipath_set_mtu(struct ipath_devdata *dd, u16 arg)
+{
+ u32 piosize;
+ int changed = 0;
+ int ret;
+
+ /*
+ * mtu is IB data payload max. It's the largest power of 2 less
+ * than piosize (or even larger, since it only really controls the
+ * largest we can receive; we can send the max of the mtu and
+ * piosize). We check that it's one of the valid IB sizes.
+ */
+ if (arg != 256 && arg != 512 && arg != 1024 && arg != 2048 &&
+ arg != 4096) {
+ ipath_dbg("Trying to set invalid mtu %u, failing\n", arg);
+ ret = -EINVAL;
+ goto bail;
+ }
+ if (dd->ipath_ibmtu == arg) {
+ ret = 0; /* same as current */
+ goto bail;
+ }
+
+ piosize = dd->ipath_ibmaxlen;
+ dd->ipath_ibmtu = arg;
+
+ if (arg >= (piosize - IPATH_PIO_MAXIBHDR)) {
+ /* Only if it's not the initial value (or reset to it) */
+ if (piosize != dd->ipath_init_ibmaxlen) {
+ dd->ipath_ibmaxlen = piosize;
+ changed = 1;
+ }
+ } else if ((arg + IPATH_PIO_MAXIBHDR) != dd->ipath_ibmaxlen) {
+ piosize = arg + IPATH_PIO_MAXIBHDR;
+ ipath_cdbg(VERBOSE, "ibmaxlen was 0x%x, setting to 0x%x "
+ "(mtu 0x%x)\n", dd->ipath_ibmaxlen, piosize,
+ arg);
+ dd->ipath_ibmaxlen = piosize;
+ changed = 1;
+ }
+
+ if (changed) {
+ /*
+ * set the IBC maxpktlength to the size of our pio
+ * buffers in words
+ */
+ u64 ibc = dd->ipath_ibcctrl;
+ ibc &= ~(INFINIPATH_IBCC_MAXPKTLEN_MASK <<
+ INFINIPATH_IBCC_MAXPKTLEN_SHIFT);
+
+ piosize = piosize - 2 * sizeof(u32); /* ignore pbc */
+ dd->ipath_ibmaxlen = piosize;
+ piosize /= sizeof(u32); /* in words */
+ /*
+ * for ICRC, which we only send in diag test pkt mode, and
+ * we don't need to worry about that for mtu
+ */
+ piosize += 1;
+
+ ibc |= piosize << INFINIPATH_IBCC_MAXPKTLEN_SHIFT;
+ dd->ipath_ibcctrl = ibc;
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_ibcctrl,
+ dd->ipath_ibcctrl);
+ dd->ipath_f_tidtemplate(dd);
+ }
+
+ ret = 0;
+
+bail:
+ return ret;
+}
+
+int ipath_set_lid(struct ipath_devdata *dd, u32 arg, u8 lmc)
+{
+ dd->ipath_lid = arg;
+ dd->ipath_lmc = lmc;
+
+ return 0;
+}
+
/**
* ipath_read_kreg64_port - read a device's per-port 64-bit kernel register
* @dd: the infinipath device
@@ -1637,13 +1865,6 @@ void ipath_shutdown_device(struct ipath_devdata *dd)
ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKINITCMD_DISABLE <<
INFINIPATH_IBCC_LINKINITCMD_SHIFT);
- /*
- * we are shutting down, so tell the layered driver. We don't do
- * this on just a link state change, much like ethernet, a cable
- * unplug, etc. doesn't change driver state
- */
- ipath_layer_intr(dd, IPATH_LAYER_INT_IF_DOWN);
-
/* disable IBC */
dd->ipath_control &= ~INFINIPATH_C_LINKENABLE;
ipath_write_kreg(dd, dd->ipath_kregs->kr_control,
@@ -1699,7 +1920,7 @@ void ipath_free_pddata(struct ipath_devdata *dd, struct ipath_portdata *pd)
pd->port_rcvhdrq = NULL;
if (pd->port_rcvhdrtail_kvaddr) {
dma_free_coherent(&dd->pcidev->dev, PAGE_SIZE,
- (void *)pd->port_rcvhdrtail_kvaddr,
+ pd->port_rcvhdrtail_kvaddr,
pd->port_rcvhdrqtailaddr_phys);
pd->port_rcvhdrtail_kvaddr = NULL;
}
@@ -1718,24 +1939,32 @@ void ipath_free_pddata(struct ipath_devdata *dd, struct ipath_portdata *pd)
dma_free_coherent(&dd->pcidev->dev, size,
base, pd->port_rcvegrbuf_phys[e]);
}
- vfree(pd->port_rcvegrbuf);
+ kfree(pd->port_rcvegrbuf);
pd->port_rcvegrbuf = NULL;
- vfree(pd->port_rcvegrbuf_phys);
+ kfree(pd->port_rcvegrbuf_phys);
pd->port_rcvegrbuf_phys = NULL;
pd->port_rcvegrbuf_chunks = 0;
- } else if (pd->port_port == 0 && dd->ipath_port0_skbs) {
+ } else if (pd->port_port == 0 && dd->ipath_port0_skbinfo) {
unsigned e;
- struct sk_buff **skbs = dd->ipath_port0_skbs;
+ struct ipath_skbinfo *skbinfo = dd->ipath_port0_skbinfo;
- dd->ipath_port0_skbs = NULL;
- ipath_cdbg(VERBOSE, "free closed port %d ipath_port0_skbs "
- "@ %p\n", pd->port_port, skbs);
+ dd->ipath_port0_skbinfo = NULL;
+ ipath_cdbg(VERBOSE, "free closed port %d "
+ "ipath_port0_skbinfo @ %p\n", pd->port_port,
+ skbinfo);
for (e = 0; e < dd->ipath_rcvegrcnt; e++)
- if (skbs[e])
- dev_kfree_skb(skbs[e]);
- vfree(skbs);
+ if (skbinfo[e].skb) {
+ pci_unmap_single(dd->pcidev, skbinfo[e].phys,
+ dd->ipath_ibmaxlen,
+ PCI_DMA_FROMDEVICE);
+ dev_kfree_skb(skbinfo[e].skb);
+ }
+ vfree(skbinfo);
}
kfree(pd->port_tid_pg_list);
+ vfree(pd->subport_uregbase);
+ vfree(pd->subport_rcvegrbuf);
+ vfree(pd->subport_rcvhdr_base);
kfree(pd);
}
@@ -1743,7 +1972,7 @@ static int __init infinipath_init(void)
{
int ret;
- ipath_dbg(KERN_INFO DRIVER_LOAD_MSG "%s", ipath_core_version);
+ ipath_dbg(KERN_INFO DRIVER_LOAD_MSG "%s", ib_ipath_version);
/*
* These must be called before the driver is registered with
@@ -1776,8 +2005,18 @@ static int __init infinipath_init(void)
goto bail_group;
}
+ ret = ipath_diagpkt_add();
+ if (ret < 0) {
+ printk(KERN_ERR IPATH_DRV_NAME ": Unable to create "
+ "diag data device: error %d\n", -ret);
+ goto bail_ipathfs;
+ }
+
goto bail;
+bail_ipathfs:
+ ipath_exit_ipathfs();
+
bail_group:
ipath_driver_remove_group(&ipath_driver.driver);
@@ -1791,148 +2030,12 @@ bail:
return ret;
}
-static void cleanup_device(struct ipath_devdata *dd)
-{
- int port;
-
- ipath_shutdown_device(dd);
-
- if (*dd->ipath_statusp & IPATH_STATUS_CHIP_PRESENT) {
- /* can't do anything more with chip; needs re-init */
- *dd->ipath_statusp &= ~IPATH_STATUS_CHIP_PRESENT;
- if (dd->ipath_kregbase) {
- /*
- * if we haven't already cleaned up before these are
- * to ensure any register reads/writes "fail" until
- * re-init
- */
- dd->ipath_kregbase = NULL;
- dd->ipath_uregbase = 0;
- dd->ipath_sregbase = 0;
- dd->ipath_cregbase = 0;
- dd->ipath_kregsize = 0;
- }
- ipath_disable_wc(dd);
- }
-
- if (dd->ipath_pioavailregs_dma) {
- dma_free_coherent(&dd->pcidev->dev, PAGE_SIZE,
- (void *) dd->ipath_pioavailregs_dma,
- dd->ipath_pioavailregs_phys);
- dd->ipath_pioavailregs_dma = NULL;
- }
- if (dd->ipath_dummy_hdrq) {
- dma_free_coherent(&dd->pcidev->dev,
- dd->ipath_pd[0]->port_rcvhdrq_size,
- dd->ipath_dummy_hdrq, dd->ipath_dummy_hdrq_phys);
- dd->ipath_dummy_hdrq = NULL;
- }
-
- if (dd->ipath_pageshadow) {
- struct page **tmpp = dd->ipath_pageshadow;
- int i, cnt = 0;
-
- ipath_cdbg(VERBOSE, "Unlocking any expTID pages still "
- "locked\n");
- for (port = 0; port < dd->ipath_cfgports; port++) {
- int port_tidbase = port * dd->ipath_rcvtidcnt;
- int maxtid = port_tidbase + dd->ipath_rcvtidcnt;
- for (i = port_tidbase; i < maxtid; i++) {
- if (!tmpp[i])
- continue;
- ipath_release_user_pages(&tmpp[i], 1);
- tmpp[i] = NULL;
- cnt++;
- }
- }
- if (cnt) {
- ipath_stats.sps_pageunlocks += cnt;
- ipath_cdbg(VERBOSE, "There were still %u expTID "
- "entries locked\n", cnt);
- }
- if (ipath_stats.sps_pagelocks ||
- ipath_stats.sps_pageunlocks)
- ipath_cdbg(VERBOSE, "%llu pages locked, %llu "
- "unlocked via ipath_m{un}lock\n",
- (unsigned long long)
- ipath_stats.sps_pagelocks,
- (unsigned long long)
- ipath_stats.sps_pageunlocks);
-
- ipath_cdbg(VERBOSE, "Free shadow page tid array at %p\n",
- dd->ipath_pageshadow);
- vfree(dd->ipath_pageshadow);
- dd->ipath_pageshadow = NULL;
- }
-
- /*
- * free any resources still in use (usually just kernel ports)
- * at unload; we do for portcnt, not cfgports, because cfgports
- * could have changed while we were loaded.
- */
- for (port = 0; port < dd->ipath_portcnt; port++) {
- struct ipath_portdata *pd = dd->ipath_pd[port];
- dd->ipath_pd[port] = NULL;
- ipath_free_pddata(dd, pd);
- }
- kfree(dd->ipath_pd);
- /*
- * debuggability, in case some cleanup path tries to use it
- * after this
- */
- dd->ipath_pd = NULL;
-}
-
static void __exit infinipath_cleanup(void)
{
- struct ipath_devdata *dd, *tmp;
- unsigned long flags;
-
ipath_exit_ipathfs();
ipath_driver_remove_group(&ipath_driver.driver);
- spin_lock_irqsave(&ipath_devs_lock, flags);
-
- /*
- * turn off rcv, send, and interrupts for all ports, all drivers
- * should also hard reset the chip here?
- * free up port 0 (kernel) rcvhdr, egr bufs, and eventually tid bufs
- * for all versions of the driver, if they were allocated
- */
- list_for_each_entry_safe(dd, tmp, &ipath_dev_list, ipath_list) {
- spin_unlock_irqrestore(&ipath_devs_lock, flags);
-
- if (dd->ipath_kregbase)
- cleanup_device(dd);
-
- if (dd->pcidev) {
- if (dd->pcidev->irq) {
- ipath_cdbg(VERBOSE,
- "unit %u free_irq of irq %x\n",
- dd->ipath_unit, dd->pcidev->irq);
- free_irq(dd->pcidev->irq, dd);
- } else
- ipath_dbg("irq is 0, not doing free_irq "
- "for unit %u\n", dd->ipath_unit);
-
- /*
- * we check for NULL here, because it's outside
- * the kregbase check, and we need to call it
- * after the free_irq. Thus it's possible that
- * the function pointers were never initialized.
- */
- if (dd->ipath_f_cleanup)
- /* clean up chip-specific stuff */
- dd->ipath_f_cleanup(dd);
-
- dd->pcidev = NULL;
- }
- spin_lock_irqsave(&ipath_devs_lock, flags);
- }
-
- spin_unlock_irqrestore(&ipath_devs_lock, flags);
-
ipath_cdbg(VERBOSE, "Unregistering pci driver\n");
pci_unregister_driver(&ipath_driver);
@@ -1998,5 +2101,22 @@ bail:
return ret;
}
+int ipath_set_rx_pol_inv(struct ipath_devdata *dd, u8 new_pol_inv)
+{
+ u64 val;
+ if ( new_pol_inv > INFINIPATH_XGXS_RX_POL_MASK ) {
+ return -1;
+ }
+ if ( dd->ipath_rx_pol_inv != new_pol_inv ) {
+ dd->ipath_rx_pol_inv = new_pol_inv;
+ val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_xgxsconfig);
+ val &= ~(INFINIPATH_XGXS_RX_POL_MASK <<
+ INFINIPATH_XGXS_RX_POL_SHIFT);
+ val |= ((u64)dd->ipath_rx_pol_inv) <<
+ INFINIPATH_XGXS_RX_POL_SHIFT;
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_xgxsconfig, val);
+ }
+ return 0;
+}
module_init(infinipath_init);
module_exit(infinipath_cleanup);
diff --git a/drivers/infiniband/hw/ipath/ipath_eeprom.c b/drivers/infiniband/hw/ipath/ipath_eeprom.c
index 3313356ab93..a4019a6b756 100644
--- a/drivers/infiniband/hw/ipath/ipath_eeprom.c
+++ b/drivers/infiniband/hw/ipath/ipath_eeprom.c
@@ -100,9 +100,9 @@ static int i2c_gpio_set(struct ipath_devdata *dd,
gpioval = &dd->ipath_gpio_out;
read_val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_extctrl);
if (line == i2c_line_scl)
- mask = ipath_gpio_scl;
+ mask = dd->ipath_gpio_scl;
else
- mask = ipath_gpio_sda;
+ mask = dd->ipath_gpio_sda;
if (new_line_state == i2c_line_high)
/* tri-state the output rather than force high */
@@ -119,12 +119,12 @@ static int i2c_gpio_set(struct ipath_devdata *dd,
write_val = 0x0UL;
if (line == i2c_line_scl) {
- write_val <<= ipath_gpio_scl_num;
- *gpioval = *gpioval & ~(1UL << ipath_gpio_scl_num);
+ write_val <<= dd->ipath_gpio_scl_num;
+ *gpioval = *gpioval & ~(1UL << dd->ipath_gpio_scl_num);
*gpioval |= write_val;
} else {
- write_val <<= ipath_gpio_sda_num;
- *gpioval = *gpioval & ~(1UL << ipath_gpio_sda_num);
+ write_val <<= dd->ipath_gpio_sda_num;
+ *gpioval = *gpioval & ~(1UL << dd->ipath_gpio_sda_num);
*gpioval |= write_val;
}
ipath_write_kreg(dd, dd->ipath_kregs->kr_gpio_out, *gpioval);
@@ -157,9 +157,9 @@ static int i2c_gpio_get(struct ipath_devdata *dd,
read_val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_extctrl);
/* config line to be an input */
if (line == i2c_line_scl)
- mask = ipath_gpio_scl;
+ mask = dd->ipath_gpio_scl;
else
- mask = ipath_gpio_sda;
+ mask = dd->ipath_gpio_sda;
write_val = read_val & ~mask;
ipath_write_kreg(dd, dd->ipath_kregs->kr_extctrl, write_val);
read_val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_extstatus);
@@ -187,6 +187,7 @@ bail:
static void i2c_wait_for_writes(struct ipath_devdata *dd)
{
(void)ipath_read_kreg32(dd, dd->ipath_kregs->kr_scratch);
+ rmb();
}
static void scl_out(struct ipath_devdata *dd, u8 bit)
diff --git a/drivers/infiniband/hw/ipath/ipath_file_ops.c b/drivers/infiniband/hw/ipath/ipath_file_ops.c
index bbaa70e57db..a9ddc6911f6 100644
--- a/drivers/infiniband/hw/ipath/ipath_file_ops.c
+++ b/drivers/infiniband/hw/ipath/ipath_file_ops.c
@@ -39,9 +39,14 @@
#include <asm/pgtable.h>
#include "ipath_kernel.h"
-#include "ipath_layer.h"
#include "ipath_common.h"
+/*
+ * mmap64 doesn't allow all 64 bits for 32-bit applications
+ * so only use the low 43 bits.
+ */
+#define MMAP64_MASK 0x7FFFFFFFFFFUL
+
static int ipath_open(struct inode *, struct file *);
static int ipath_close(struct inode *, struct file *);
static ssize_t ipath_write(struct file *, const char __user *, size_t,
@@ -58,18 +63,35 @@ static struct file_operations ipath_file_ops = {
.mmap = ipath_mmap
};
-static int ipath_get_base_info(struct ipath_portdata *pd,
+static int ipath_get_base_info(struct file *fp,
void __user *ubase, size_t ubase_size)
{
+ struct ipath_portdata *pd = port_fp(fp);
int ret = 0;
struct ipath_base_info *kinfo = NULL;
struct ipath_devdata *dd = pd->port_dd;
+ unsigned subport_cnt;
+ int shared, master;
+ size_t sz;
+
+ subport_cnt = pd->port_subport_cnt;
+ if (!subport_cnt) {
+ shared = 0;
+ master = 0;
+ subport_cnt = 1;
+ } else {
+ shared = 1;
+ master = !subport_fp(fp);
+ }
- if (ubase_size < sizeof(*kinfo)) {
+ sz = sizeof(*kinfo);
+ /* If port sharing is not requested, allow the old size structure */
+ if (!shared)
+ sz -= 3 * sizeof(u64);
+ if (ubase_size < sz) {
ipath_cdbg(PROC,
- "Base size %lu, need %lu (version mismatch?)\n",
- (unsigned long) ubase_size,
- (unsigned long) sizeof(*kinfo));
+ "Base size %zu, need %zu (version mismatch?)\n",
+ ubase_size, sz);
ret = -EINVAL;
goto bail;
}
@@ -96,7 +118,9 @@ static int ipath_get_base_info(struct ipath_portdata *pd,
kinfo->spi_rcv_egrperchunk = pd->port_rcvegrbufs_perchunk;
kinfo->spi_rcv_egrchunksize = kinfo->spi_rcv_egrbuftotlen /
pd->port_rcvegrbuf_chunks;
- kinfo->spi_tidcnt = dd->ipath_rcvtidcnt;
+ kinfo->spi_tidcnt = dd->ipath_rcvtidcnt / subport_cnt;
+ if (master)
+ kinfo->spi_tidcnt += dd->ipath_rcvtidcnt % subport_cnt;
/*
* for this use, may be ipath_cfgports summed over all chips that
* are are configured and present
@@ -119,31 +143,75 @@ static int ipath_get_base_info(struct ipath_portdata *pd,
* page_address() macro worked, but in 2.6.11, even that returns the
* full 64 bit address (upper bits all 1's). So far, using the
* physical addresses (or chip offsets, for chip mapping) works, but
- * no doubt some future kernel release will chang that, and we'll be
- * on to yet another method of dealing with this
+ * no doubt some future kernel release will change that, and we'll be
+ * on to yet another method of dealing with this.
*/
kinfo->spi_rcvhdr_base = (u64) pd->port_rcvhdrq_phys;
- kinfo->spi_rcvhdr_tailaddr = (u64)pd->port_rcvhdrqtailaddr_phys;
+ kinfo->spi_rcvhdr_tailaddr = (u64) pd->port_rcvhdrqtailaddr_phys;
kinfo->spi_rcv_egrbufs = (u64) pd->port_rcvegr_phys;
kinfo->spi_pioavailaddr = (u64) dd->ipath_pioavailregs_phys;
kinfo->spi_status = (u64) kinfo->spi_pioavailaddr +
(void *) dd->ipath_statusp -
(void *) dd->ipath_pioavailregs_dma;
- kinfo->spi_piobufbase = (u64) pd->port_piobufs;
- kinfo->__spi_uregbase =
- dd->ipath_uregbase + dd->ipath_palign * pd->port_port;
+ if (!shared) {
+ kinfo->spi_piocnt = dd->ipath_pbufsport;
+ kinfo->spi_piobufbase = (u64) pd->port_piobufs;
+ kinfo->__spi_uregbase = (u64) dd->ipath_uregbase +
+ dd->ipath_palign * pd->port_port;
+ } else if (master) {
+ kinfo->spi_piocnt = (dd->ipath_pbufsport / subport_cnt) +
+ (dd->ipath_pbufsport % subport_cnt);
+ /* Master's PIO buffers are after all the slave's */
+ kinfo->spi_piobufbase = (u64) pd->port_piobufs +
+ dd->ipath_palign *
+ (dd->ipath_pbufsport - kinfo->spi_piocnt);
+ kinfo->__spi_uregbase = (u64) dd->ipath_uregbase +
+ dd->ipath_palign * pd->port_port;
+ } else {
+ unsigned slave = subport_fp(fp) - 1;
+
+ kinfo->spi_piocnt = dd->ipath_pbufsport / subport_cnt;
+ kinfo->spi_piobufbase = (u64) pd->port_piobufs +
+ dd->ipath_palign * kinfo->spi_piocnt * slave;
+ kinfo->__spi_uregbase = ((u64) pd->subport_uregbase +
+ PAGE_SIZE * slave) & MMAP64_MASK;
- kinfo->spi_pioindex = dd->ipath_pbufsport * (pd->port_port - 1);
- kinfo->spi_piocnt = dd->ipath_pbufsport;
+ kinfo->spi_rcvhdr_base = ((u64) pd->subport_rcvhdr_base +
+ pd->port_rcvhdrq_size * slave) & MMAP64_MASK;
+ kinfo->spi_rcvhdr_tailaddr =
+ (u64) pd->port_rcvhdrqtailaddr_phys & MMAP64_MASK;
+ kinfo->spi_rcv_egrbufs = ((u64) pd->subport_rcvegrbuf +
+ dd->ipath_rcvegrcnt * dd->ipath_rcvegrbufsize * slave) &
+ MMAP64_MASK;
+ }
+
+ kinfo->spi_pioindex = (kinfo->spi_piobufbase - dd->ipath_piobufbase) /
+ dd->ipath_palign;
kinfo->spi_pioalign = dd->ipath_palign;
kinfo->spi_qpair = IPATH_KD_QP;
kinfo->spi_piosize = dd->ipath_ibmaxlen;
kinfo->spi_mtu = dd->ipath_ibmaxlen; /* maxlen, not ibmtu */
kinfo->spi_port = pd->port_port;
+ kinfo->spi_subport = subport_fp(fp);
kinfo->spi_sw_version = IPATH_KERN_SWVERSION;
kinfo->spi_hw_version = dd->ipath_revision;
+ if (master) {
+ kinfo->spi_runtime_flags |= IPATH_RUNTIME_MASTER;
+ kinfo->spi_subport_uregbase =
+ (u64) pd->subport_uregbase & MMAP64_MASK;
+ kinfo->spi_subport_rcvegrbuf =
+ (u64) pd->subport_rcvegrbuf & MMAP64_MASK;
+ kinfo->spi_subport_rcvhdr_base =
+ (u64) pd->subport_rcvhdr_base & MMAP64_MASK;
+ ipath_cdbg(PROC, "port %u flags %x %llx %llx %llx\n",
+ kinfo->spi_port, kinfo->spi_runtime_flags,
+ (unsigned long long) kinfo->spi_subport_uregbase,
+ (unsigned long long) kinfo->spi_subport_rcvegrbuf,
+ (unsigned long long) kinfo->spi_subport_rcvhdr_base);
+ }
+
if (copy_to_user(ubase, kinfo, sizeof(*kinfo)))
ret = -EFAULT;
@@ -155,6 +223,7 @@ bail:
/**
* ipath_tid_update - update a port TID
* @pd: the port
+ * @fp: the ipath device file
* @ti: the TID information
*
* The new implementation as of Oct 2004 is that the driver assigns
@@ -177,11 +246,11 @@ bail:
* virtually contiguous pages, that should change to improve
* performance.
*/
-static int ipath_tid_update(struct ipath_portdata *pd,
+static int ipath_tid_update(struct ipath_portdata *pd, struct file *fp,
const struct ipath_tid_info *ti)
{
int ret = 0, ntids;
- u32 tid, porttid, cnt, i, tidcnt;
+ u32 tid, porttid, cnt, i, tidcnt, tidoff;
u16 *tidlist;
struct ipath_devdata *dd = pd->port_dd;
u64 physaddr;
@@ -189,6 +258,7 @@ static int ipath_tid_update(struct ipath_portdata *pd,
u64 __iomem *tidbase;
unsigned long tidmap[8];
struct page **pagep = NULL;
+ unsigned subport = subport_fp(fp);
if (!dd->ipath_pageshadow) {
ret = -ENOMEM;
@@ -205,20 +275,34 @@ static int ipath_tid_update(struct ipath_portdata *pd,
ret = -EFAULT;
goto done;
}
- tidcnt = dd->ipath_rcvtidcnt;
- if (cnt >= tidcnt) {
+ porttid = pd->port_port * dd->ipath_rcvtidcnt;
+ if (!pd->port_subport_cnt) {
+ tidcnt = dd->ipath_rcvtidcnt;
+ tid = pd->port_tidcursor;
+ tidoff = 0;
+ } else if (!subport) {
+ tidcnt = (dd->ipath_rcvtidcnt / pd->port_subport_cnt) +
+ (dd->ipath_rcvtidcnt % pd->port_subport_cnt);
+ tidoff = dd->ipath_rcvtidcnt - tidcnt;
+ porttid += tidoff;
+ tid = tidcursor_fp(fp);
+ } else {
+ tidcnt = dd->ipath_rcvtidcnt / pd->port_subport_cnt;
+ tidoff = tidcnt * (subport - 1);
+ porttid += tidoff;
+ tid = tidcursor_fp(fp);
+ }
+ if (cnt > tidcnt) {
/* make sure it all fits in port_tid_pg_list */
dev_info(&dd->pcidev->dev, "Process tried to allocate %u "
"TIDs, only trying max (%u)\n", cnt, tidcnt);
cnt = tidcnt;
}
- pagep = (struct page **)pd->port_tid_pg_list;
- tidlist = (u16 *) (&pagep[cnt]);
+ pagep = &((struct page **) pd->port_tid_pg_list)[tidoff];
+ tidlist = &((u16 *) &pagep[dd->ipath_rcvtidcnt])[tidoff];
memset(tidmap, 0, sizeof(tidmap));
- tid = pd->port_tidcursor;
/* before decrement; chip actual # */
- porttid = pd->port_port * tidcnt;
ntids = tidcnt;
tidbase = (u64 __iomem *) (((char __iomem *) dd->ipath_kregbase) +
dd->ipath_rcvtidbase +
@@ -275,16 +359,19 @@ static int ipath_tid_update(struct ipath_portdata *pd,
ret = -ENOMEM;
break;
}
- tidlist[i] = tid;
+ tidlist[i] = tid + tidoff;
ipath_cdbg(VERBOSE, "Updating idx %u to TID %u, "
- "vaddr %lx\n", i, tid, vaddr);
+ "vaddr %lx\n", i, tid + tidoff, vaddr);
/* we "know" system pages and TID pages are same size */
dd->ipath_pageshadow[porttid + tid] = pagep[i];
+ dd->ipath_physshadow[porttid + tid] = ipath_map_page(
+ dd->pcidev, pagep[i], 0, PAGE_SIZE,
+ PCI_DMA_FROMDEVICE);
/*
* don't need atomic or it's overhead
*/
__set_bit(tid, tidmap);
- physaddr = page_to_phys(pagep[i]);
+ physaddr = dd->ipath_physshadow[porttid + tid];
ipath_stats.sps_pagelocks++;
ipath_cdbg(VERBOSE,
"TID %u, vaddr %lx, physaddr %llx pgp %p\n",
@@ -318,6 +405,9 @@ static int ipath_tid_update(struct ipath_portdata *pd,
tid);
dd->ipath_f_put_tid(dd, &tidbase[tid], 1,
dd->ipath_tidinvalid);
+ pci_unmap_page(dd->pcidev,
+ dd->ipath_physshadow[porttid + tid],
+ PAGE_SIZE, PCI_DMA_FROMDEVICE);
dd->ipath_pageshadow[porttid + tid] = NULL;
ipath_stats.sps_pageunlocks++;
}
@@ -342,7 +432,10 @@ static int ipath_tid_update(struct ipath_portdata *pd,
}
if (tid == tidcnt)
tid = 0;
- pd->port_tidcursor = tid;
+ if (!pd->port_subport_cnt)
+ pd->port_tidcursor = tid;
+ else
+ tidcursor_fp(fp) = tid;
}
done:
@@ -355,6 +448,7 @@ done:
/**
* ipath_tid_free - free a port TID
* @pd: the port
+ * @subport: the subport
* @ti: the TID info
*
* right now we are unlocking one page at a time, but since
@@ -368,7 +462,7 @@ done:
* they pass in to us.
*/
-static int ipath_tid_free(struct ipath_portdata *pd,
+static int ipath_tid_free(struct ipath_portdata *pd, unsigned subport,
const struct ipath_tid_info *ti)
{
int ret = 0;
@@ -389,11 +483,20 @@ static int ipath_tid_free(struct ipath_portdata *pd,
}
porttid = pd->port_port * dd->ipath_rcvtidcnt;
+ if (!pd->port_subport_cnt)
+ tidcnt = dd->ipath_rcvtidcnt;
+ else if (!subport) {
+ tidcnt = (dd->ipath_rcvtidcnt / pd->port_subport_cnt) +
+ (dd->ipath_rcvtidcnt % pd->port_subport_cnt);
+ porttid += dd->ipath_rcvtidcnt - tidcnt;
+ } else {
+ tidcnt = dd->ipath_rcvtidcnt / pd->port_subport_cnt;
+ porttid += tidcnt * (subport - 1);
+ }
tidbase = (u64 __iomem *) ((char __iomem *)(dd->ipath_kregbase) +
dd->ipath_rcvtidbase +
porttid * sizeof(*tidbase));
- tidcnt = dd->ipath_rcvtidcnt;
limit = sizeof(tidmap) * BITS_PER_BYTE;
if (limit > tidcnt)
/* just in case size changes in future */
@@ -418,6 +521,9 @@ static int ipath_tid_free(struct ipath_portdata *pd,
pd->port_pid, tid);
dd->ipath_f_put_tid(dd, &tidbase[tid], 1,
dd->ipath_tidinvalid);
+ pci_unmap_page(dd->pcidev,
+ dd->ipath_physshadow[porttid + tid],
+ PAGE_SIZE, PCI_DMA_FROMDEVICE);
ipath_release_user_pages(
&dd->ipath_pageshadow[porttid + tid], 1);
dd->ipath_pageshadow[porttid + tid] = NULL;
@@ -582,20 +688,24 @@ bail:
/**
* ipath_manage_rcvq - manage a port's receive queue
* @pd: the port
+ * @subport: the subport
* @start_stop: action to carry out
*
* start_stop == 0 disables receive on the port, for use in queue
* overflow conditions. start_stop==1 re-enables, to be used to
* re-init the software copy of the head register
*/
-static int ipath_manage_rcvq(struct ipath_portdata *pd, int start_stop)
+static int ipath_manage_rcvq(struct ipath_portdata *pd, unsigned subport,
+ int start_stop)
{
struct ipath_devdata *dd = pd->port_dd;
u64 tval;
- ipath_cdbg(PROC, "%sabling rcv for unit %u port %u\n",
+ ipath_cdbg(PROC, "%sabling rcv for unit %u port %u:%u\n",
start_stop ? "en" : "dis", dd->ipath_unit,
- pd->port_port);
+ pd->port_port, subport);
+ if (subport)
+ goto bail;
/* atomically clear receive enable port. */
if (start_stop) {
/*
@@ -610,7 +720,7 @@ static int ipath_manage_rcvq(struct ipath_portdata *pd, int start_stop)
* updated and correct itself, even in the face of software
* bugs.
*/
- *pd->port_rcvhdrtail_kvaddr = 0;
+ *(volatile u64 *)pd->port_rcvhdrtail_kvaddr = 0;
set_bit(INFINIPATH_R_PORTENABLE_SHIFT + pd->port_port,
&dd->ipath_rcvctrl);
} else
@@ -631,6 +741,7 @@ static int ipath_manage_rcvq(struct ipath_portdata *pd, int start_stop)
tval = ipath_read_ureg32(dd, ur_rcvhdrtail, pd->port_port);
}
/* always; new head should be equal to new tail; see above */
+bail:
return 0;
}
@@ -688,6 +799,36 @@ static void ipath_clean_part_key(struct ipath_portdata *pd,
}
}
+/*
+ * Initialize the port data with the receive buffer sizes
+ * so this can be done while the master port is locked.
+ * Otherwise, there is a race with a slave opening the port
+ * and seeing these fields uninitialized.
+ */
+static void init_user_egr_sizes(struct ipath_portdata *pd)
+{
+ struct ipath_devdata *dd = pd->port_dd;
+ unsigned egrperchunk, egrcnt, size;
+
+ /*
+ * to avoid wasting a lot of memory, we allocate 32KB chunks of
+ * physically contiguous memory, advance through it until used up
+ * and then allocate more. Of course, we need memory to store those
+ * extra pointers, now. Started out with 256KB, but under heavy
+ * memory pressure (creating large files and then copying them over
+ * NFS while doing lots of MPI jobs), we hit some allocation
+ * failures, even though we can sleep... (2.6.10) Still get
+ * failures at 64K. 32K is the lowest we can go without wasting
+ * additional memory.
+ */
+ size = 0x8000;
+ egrperchunk = size / dd->ipath_rcvegrbufsize;
+ egrcnt = dd->ipath_rcvegrcnt;
+ pd->port_rcvegrbuf_chunks = (egrcnt + egrperchunk - 1) / egrperchunk;
+ pd->port_rcvegrbufs_perchunk = egrperchunk;
+ pd->port_rcvegrbuf_size = size;
+}
+
/**
* ipath_create_user_egr - allocate eager TID buffers
* @pd: the port to allocate TID buffers for
@@ -703,7 +844,7 @@ static void ipath_clean_part_key(struct ipath_portdata *pd,
static int ipath_create_user_egr(struct ipath_portdata *pd)
{
struct ipath_devdata *dd = pd->port_dd;
- unsigned e, egrcnt, alloced, egrperchunk, chunk, egrsize, egroff;
+ unsigned e, egrcnt, egrperchunk, chunk, egrsize, egroff;
size_t size;
int ret;
gfp_t gfp_flags;
@@ -723,31 +864,18 @@ static int ipath_create_user_egr(struct ipath_portdata *pd)
ipath_cdbg(VERBOSE, "Allocating %d egr buffers, at egrtid "
"offset %x, egrsize %u\n", egrcnt, egroff, egrsize);
- /*
- * to avoid wasting a lot of memory, we allocate 32KB chunks of
- * physically contiguous memory, advance through it until used up
- * and then allocate more. Of course, we need memory to store those
- * extra pointers, now. Started out with 256KB, but under heavy
- * memory pressure (creating large files and then copying them over
- * NFS while doing lots of MPI jobs), we hit some allocation
- * failures, even though we can sleep... (2.6.10) Still get
- * failures at 64K. 32K is the lowest we can go without wasting
- * additional memory.
- */
- size = 0x8000;
- alloced = ALIGN(egrsize * egrcnt, size);
- egrperchunk = size / egrsize;
- chunk = (egrcnt + egrperchunk - 1) / egrperchunk;
- pd->port_rcvegrbuf_chunks = chunk;
- pd->port_rcvegrbufs_perchunk = egrperchunk;
- pd->port_rcvegrbuf_size = size;
- pd->port_rcvegrbuf = vmalloc(chunk * sizeof(pd->port_rcvegrbuf[0]));
+ chunk = pd->port_rcvegrbuf_chunks;
+ egrperchunk = pd->port_rcvegrbufs_perchunk;
+ size = pd->port_rcvegrbuf_size;
+ pd->port_rcvegrbuf = kmalloc(chunk * sizeof(pd->port_rcvegrbuf[0]),
+ GFP_KERNEL);
if (!pd->port_rcvegrbuf) {
ret = -ENOMEM;
goto bail;
}
pd->port_rcvegrbuf_phys =
- vmalloc(chunk * sizeof(pd->port_rcvegrbuf_phys[0]));
+ kmalloc(chunk * sizeof(pd->port_rcvegrbuf_phys[0]),
+ GFP_KERNEL);
if (!pd->port_rcvegrbuf_phys) {
ret = -ENOMEM;
goto bail_rcvegrbuf;
@@ -792,105 +920,23 @@ bail_rcvegrbuf_phys:
pd->port_rcvegrbuf_phys[e]);
}
- vfree(pd->port_rcvegrbuf_phys);
+ kfree(pd->port_rcvegrbuf_phys);
pd->port_rcvegrbuf_phys = NULL;
bail_rcvegrbuf:
- vfree(pd->port_rcvegrbuf);
+ kfree(pd->port_rcvegrbuf);
pd->port_rcvegrbuf = NULL;
bail:
return ret;
}
-static int ipath_do_user_init(struct ipath_portdata *pd,
- const struct ipath_user_info *uinfo)
-{
- int ret = 0;
- struct ipath_devdata *dd = pd->port_dd;
- u32 head32;
-
- /* for now, if major version is different, bail */
- if ((uinfo->spu_userversion >> 16) != IPATH_USER_SWMAJOR) {
- dev_info(&dd->pcidev->dev,
- "User major version %d not same as driver "
- "major %d\n", uinfo->spu_userversion >> 16,
- IPATH_USER_SWMAJOR);
- ret = -ENODEV;
- goto done;
- }
-
- if ((uinfo->spu_userversion & 0xffff) != IPATH_USER_SWMINOR)
- ipath_dbg("User minor version %d not same as driver "
- "minor %d\n", uinfo->spu_userversion & 0xffff,
- IPATH_USER_SWMINOR);
-
- if (uinfo->spu_rcvhdrsize) {
- ret = ipath_setrcvhdrsize(dd, uinfo->spu_rcvhdrsize);
- if (ret)
- goto done;
- }
-
- /* for now we do nothing with rcvhdrcnt: uinfo->spu_rcvhdrcnt */
-
- /* for right now, kernel piobufs are at end, so port 1 is at 0 */
- pd->port_piobufs = dd->ipath_piobufbase +
- dd->ipath_pbufsport * (pd->port_port -
- 1) * dd->ipath_palign;
- ipath_cdbg(VERBOSE, "Set base of piobufs for port %u to 0x%x\n",
- pd->port_port, pd->port_piobufs);
-
- /*
- * Now allocate the rcvhdr Q and eager TIDs; skip the TID
- * array for time being. If pd->port_port > chip-supported,
- * we need to do extra stuff here to handle by handling overflow
- * through port 0, someday
- */
- ret = ipath_create_rcvhdrq(dd, pd);
- if (!ret)
- ret = ipath_create_user_egr(pd);
- if (ret)
- goto done;
-
- /*
- * set the eager head register for this port to the current values
- * of the tail pointers, since we don't know if they were
- * updated on last use of the port.
- */
- head32 = ipath_read_ureg32(dd, ur_rcvegrindextail, pd->port_port);
- ipath_write_ureg(dd, ur_rcvegrindexhead, head32, pd->port_port);
- dd->ipath_lastegrheads[pd->port_port] = -1;
- dd->ipath_lastrcvhdrqtails[pd->port_port] = -1;
- ipath_cdbg(VERBOSE, "Wrote port%d egrhead %x from tail regs\n",
- pd->port_port, head32);
- pd->port_tidcursor = 0; /* start at beginning after open */
- /*
- * now enable the port; the tail registers will be written to memory
- * by the chip as soon as it sees the write to
- * dd->ipath_kregs->kr_rcvctrl. The update only happens on
- * transition from 0 to 1, so clear it first, then set it as part of
- * enabling the port. This will (very briefly) affect any other
- * open ports, but it shouldn't be long enough to be an issue.
- * We explictly set the in-memory copy to 0 beforehand, so we don't
- * have to wait to be sure the DMA update has happened.
- */
- *pd->port_rcvhdrtail_kvaddr = 0ULL;
- set_bit(INFINIPATH_R_PORTENABLE_SHIFT + pd->port_port,
- &dd->ipath_rcvctrl);
- ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl,
- dd->ipath_rcvctrl & ~INFINIPATH_R_TAILUPD);
- ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl,
- dd->ipath_rcvctrl);
-done:
- return ret;
-}
-
/* common code for the mappings on dma_alloc_coherent mem */
static int ipath_mmap_mem(struct vm_area_struct *vma,
- struct ipath_portdata *pd, unsigned len,
- int write_ok, dma_addr_t addr, char *what)
+ struct ipath_portdata *pd, unsigned len, int write_ok,
+ void *kvaddr, char *what)
{
struct ipath_devdata *dd = pd->port_dd;
- unsigned pfn = (unsigned long)addr >> PAGE_SHIFT;
+ unsigned long pfn;
int ret;
if ((vma->vm_end - vma->vm_start) > len) {
@@ -913,17 +959,17 @@ static int ipath_mmap_mem(struct vm_area_struct *vma,
vma->vm_flags &= ~VM_MAYWRITE;
}
+ pfn = virt_to_phys(kvaddr) >> PAGE_SHIFT;
ret = remap_pfn_range(vma, vma->vm_start, pfn,
len, vma->vm_page_prot);
if (ret)
- dev_info(&dd->pcidev->dev,
- "%s port%u mmap of %lx, %x bytes r%c failed: %d\n",
- what, pd->port_port, (unsigned long)addr, len,
- write_ok?'w':'o', ret);
+ dev_info(&dd->pcidev->dev, "%s port%u mmap of %lx, %x "
+ "bytes r%c failed: %d\n", what, pd->port_port,
+ pfn, len, write_ok?'w':'o', ret);
else
- ipath_cdbg(VERBOSE, "%s port%u mmaped %lx, %x bytes r%c\n",
- what, pd->port_port, (unsigned long)addr, len,
- write_ok?'w':'o');
+ ipath_cdbg(VERBOSE, "%s port%u mmaped %lx, %x bytes "
+ "r%c\n", what, pd->port_port, pfn, len,
+ write_ok?'w':'o');
bail:
return ret;
}
@@ -958,7 +1004,8 @@ static int mmap_ureg(struct vm_area_struct *vma, struct ipath_devdata *dd,
static int mmap_piobufs(struct vm_area_struct *vma,
struct ipath_devdata *dd,
- struct ipath_portdata *pd)
+ struct ipath_portdata *pd,
+ unsigned piobufs, unsigned piocnt)
{
unsigned long phys;
int ret;
@@ -969,31 +1016,32 @@ static int mmap_piobufs(struct vm_area_struct *vma,
* process data, and catches users who might try to read the i/o
* space due to a bug.
*/
- if ((vma->vm_end - vma->vm_start) >
- (dd->ipath_pbufsport * dd->ipath_palign)) {
+ if ((vma->vm_end - vma->vm_start) > (piocnt * dd->ipath_palign)) {
dev_info(&dd->pcidev->dev, "FAIL mmap piobufs: "
"reqlen %lx > PAGE\n",
vma->vm_end - vma->vm_start);
- ret = -EFAULT;
+ ret = -EINVAL;
goto bail;
}
- phys = dd->ipath_physaddr + pd->port_piobufs;
+ phys = dd->ipath_physaddr + piobufs;
/*
* Don't mark this as non-cached, or we don't get the
* write combining behavior we want on the PIO buffers!
*/
- if (vma->vm_flags & VM_READ) {
- dev_info(&dd->pcidev->dev,
- "Can't map piobufs as readable (flags=%lx)\n",
- vma->vm_flags);
- ret = -EPERM;
- goto bail;
- }
+#if defined(__powerpc__)
+ /* There isn't a generic way to specify writethrough mappings */
+ pgprot_val(vma->vm_page_prot) |= _PAGE_NO_CACHE;
+ pgprot_val(vma->vm_page_prot) |= _PAGE_WRITETHRU;
+ pgprot_val(vma->vm_page_prot) &= ~_PAGE_GUARDED;
+#endif
- /* don't allow them to later change to readable with mprotect */
+ /*
+ * don't allow them to later change to readable with mprotect (for when
+ * not initially mapped readable, as is normally the case)
+ */
vma->vm_flags &= ~VM_MAYREAD;
vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND;
@@ -1010,7 +1058,7 @@ static int mmap_rcvegrbufs(struct vm_area_struct *vma,
struct ipath_devdata *dd = pd->port_dd;
unsigned long start, size;
size_t total_size, i;
- dma_addr_t *phys;
+ unsigned long pfn;
int ret;
size = pd->port_rcvegrbuf_size;
@@ -1020,7 +1068,7 @@ static int mmap_rcvegrbufs(struct vm_area_struct *vma,
"reqlen %lx > actual %lx\n",
vma->vm_end - vma->vm_start,
(unsigned long) total_size);
- ret = -EFAULT;
+ ret = -EINVAL;
goto bail;
}
@@ -1034,11 +1082,11 @@ static int mmap_rcvegrbufs(struct vm_area_struct *vma,
vma->vm_flags &= ~VM_MAYWRITE;
start = vma->vm_start;
- phys = pd->port_rcvegrbuf_phys;
for (i = 0; i < pd->port_rcvegrbuf_chunks; i++, start += size) {
- ret = remap_pfn_range(vma, start, phys[i] >> PAGE_SHIFT,
- size, vma->vm_page_prot);
+ pfn = virt_to_phys(pd->port_rcvegrbuf[i]) >> PAGE_SHIFT;
+ ret = remap_pfn_range(vma, start, pfn, size,
+ vma->vm_page_prot);
if (ret < 0)
goto bail;
}
@@ -1048,6 +1096,122 @@ bail:
return ret;
}
+/*
+ * ipath_file_vma_nopage - handle a VMA page fault.
+ */
+static struct page *ipath_file_vma_nopage(struct vm_area_struct *vma,
+ unsigned long address, int *type)
+{
+ unsigned long offset = address - vma->vm_start;
+ struct page *page = NOPAGE_SIGBUS;
+ void *pageptr;
+
+ /*
+ * Convert the vmalloc address into a struct page.
+ */
+ pageptr = (void *)(offset + (vma->vm_pgoff << PAGE_SHIFT));
+ page = vmalloc_to_page(pageptr);
+ if (!page)
+ goto out;
+
+ /* Increment the reference count. */
+ get_page(page);
+ if (type)
+ *type = VM_FAULT_MINOR;
+out:
+ return page;
+}
+
+static struct vm_operations_struct ipath_file_vm_ops = {
+ .nopage = ipath_file_vma_nopage,
+};
+
+static int mmap_kvaddr(struct vm_area_struct *vma, u64 pgaddr,
+ struct ipath_portdata *pd, unsigned subport)
+{
+ unsigned long len;
+ struct ipath_devdata *dd;
+ void *addr;
+ size_t size;
+ int ret;
+
+ /* If the port is not shared, all addresses should be physical */
+ if (!pd->port_subport_cnt) {
+ ret = -EINVAL;
+ goto bail;
+ }
+
+ dd = pd->port_dd;
+ size = pd->port_rcvegrbuf_chunks * pd->port_rcvegrbuf_size;
+
+ /*
+ * Master has all the slave uregbase, rcvhdrq, and
+ * rcvegrbufs mmapped.
+ */
+ if (subport == 0) {
+ unsigned num_slaves = pd->port_subport_cnt - 1;
+
+ if (pgaddr == ((u64) pd->subport_uregbase & MMAP64_MASK)) {
+ addr = pd->subport_uregbase;
+ size = PAGE_SIZE * num_slaves;
+ } else if (pgaddr == ((u64) pd->subport_rcvhdr_base &
+ MMAP64_MASK)) {
+ addr = pd->subport_rcvhdr_base;
+ size = pd->port_rcvhdrq_size * num_slaves;
+ } else if (pgaddr == ((u64) pd->subport_rcvegrbuf &
+ MMAP64_MASK)) {
+ addr = pd->subport_rcvegrbuf;
+ size *= num_slaves;
+ } else {
+ ret = -EINVAL;
+ goto bail;
+ }
+ } else if (pgaddr == (((u64) pd->subport_uregbase +
+ PAGE_SIZE * (subport - 1)) & MMAP64_MASK)) {
+ addr = pd->subport_uregbase + PAGE_SIZE * (subport - 1);
+ size = PAGE_SIZE;
+ } else if (pgaddr == (((u64) pd->subport_rcvhdr_base +
+ pd->port_rcvhdrq_size * (subport - 1)) &
+ MMAP64_MASK)) {
+ addr = pd->subport_rcvhdr_base +
+ pd->port_rcvhdrq_size * (subport - 1);
+ size = pd->port_rcvhdrq_size;
+ } else if (pgaddr == (((u64) pd->subport_rcvegrbuf +
+ size * (subport - 1)) & MMAP64_MASK)) {
+ addr = pd->subport_rcvegrbuf + size * (subport - 1);
+ /* rcvegrbufs are read-only on the slave */
+ if (vma->vm_flags & VM_WRITE) {
+ dev_info(&dd->pcidev->dev,
+ "Can't map eager buffers as "
+ "writable (flags=%lx)\n", vma->vm_flags);
+ ret = -EPERM;
+ goto bail;
+ }
+ /*
+ * Don't allow permission to later change to writeable
+ * with mprotect.
+ */
+ vma->vm_flags &= ~VM_MAYWRITE;
+ } else {
+ ret = -EINVAL;
+ goto bail;
+ }
+ len = vma->vm_end - vma->vm_start;
+ if (len > size) {
+ ipath_cdbg(MM, "FAIL: reqlen %lx > %zx\n", len, size);
+ ret = -EINVAL;
+ goto bail;
+ }
+
+ vma->vm_pgoff = (unsigned long) addr >> PAGE_SHIFT;
+ vma->vm_ops = &ipath_file_vm_ops;
+ vma->vm_flags |= VM_RESERVED | VM_DONTEXPAND;
+ ret = 0;
+
+bail:
+ return ret;
+}
+
/**
* ipath_mmap - mmap various structures into user space
* @fp: the file pointer
@@ -1063,73 +1227,99 @@ static int ipath_mmap(struct file *fp, struct vm_area_struct *vma)
struct ipath_portdata *pd;
struct ipath_devdata *dd;
u64 pgaddr, ureg;
+ unsigned piobufs, piocnt;
int ret;
pd = port_fp(fp);
+ if (!pd) {
+ ret = -EINVAL;
+ goto bail;
+ }
dd = pd->port_dd;
/*
* This is the ipath_do_user_init() code, mapping the shared buffers
* into the user process. The address referred to by vm_pgoff is the
- * virtual, not physical, address; we only do one mmap for each
- * space mapped.
+ * file offset passed via mmap(). For shared ports, this is the
+ * kernel vmalloc() address of the pages to share with the master.
+ * For non-shared or master ports, this is a physical address.
+ * We only do one mmap for each space mapped.
*/
pgaddr = vma->vm_pgoff << PAGE_SHIFT;
/*
- * Must fit in 40 bits for our hardware; some checked elsewhere,
- * but we'll be paranoid. Check for 0 is mostly in case one of the
- * allocations failed, but user called mmap anyway. We want to catch
- * that before it can match.
+ * Check for 0 in case one of the allocations failed, but user
+ * called mmap anyway.
*/
- if (!pgaddr || pgaddr >= (1ULL<<40)) {
- ipath_dev_err(dd, "Bad phys addr %llx, start %lx, end %lx\n",
- (unsigned long long)pgaddr, vma->vm_start, vma->vm_end);
- return -EINVAL;
+ if (!pgaddr) {
+ ret = -EINVAL;
+ goto bail;
}
- /* just the offset of the port user registers, not physical addr */
- ureg = dd->ipath_uregbase + dd->ipath_palign * pd->port_port;
-
- ipath_cdbg(MM, "ushare: pgaddr %llx vm_start=%lx, vmlen %lx\n",
+ ipath_cdbg(MM, "pgaddr %llx vm_start=%lx len %lx port %u:%u:%u\n",
(unsigned long long) pgaddr, vma->vm_start,
- vma->vm_end - vma->vm_start);
+ vma->vm_end - vma->vm_start, dd->ipath_unit,
+ pd->port_port, subport_fp(fp));
- if (vma->vm_start & (PAGE_SIZE-1)) {
- ipath_dev_err(dd,
- "vm_start not aligned: %lx, end=%lx phys %lx\n",
- vma->vm_start, vma->vm_end, (unsigned long)pgaddr);
- ret = -EINVAL;
+ /*
+ * Physical addresses must fit in 40 bits for our hardware.
+ * Check for kernel virtual addresses first, anything else must
+ * match a HW or memory address.
+ */
+ if (pgaddr >= (1ULL<<40)) {
+ ret = mmap_kvaddr(vma, pgaddr, pd, subport_fp(fp));
+ goto bail;
+ }
+
+ if (!pd->port_subport_cnt) {
+ /* port is not shared */
+ ureg = dd->ipath_uregbase + dd->ipath_palign * pd->port_port;
+ piocnt = dd->ipath_pbufsport;
+ piobufs = pd->port_piobufs;
+ } else if (!subport_fp(fp)) {
+ /* caller is the master */
+ ureg = dd->ipath_uregbase + dd->ipath_palign * pd->port_port;
+ piocnt = (dd->ipath_pbufsport / pd->port_subport_cnt) +
+ (dd->ipath_pbufsport % pd->port_subport_cnt);
+ piobufs = pd->port_piobufs +
+ dd->ipath_palign * (dd->ipath_pbufsport - piocnt);
+ } else {
+ unsigned slave = subport_fp(fp) - 1;
+
+ /* caller is a slave */
+ ureg = 0;
+ piocnt = dd->ipath_pbufsport / pd->port_subport_cnt;
+ piobufs = pd->port_piobufs + dd->ipath_palign * piocnt * slave;
}
- else if (pgaddr == ureg)
+
+ if (pgaddr == ureg)
ret = mmap_ureg(vma, dd, ureg);
- else if (pgaddr == pd->port_piobufs)
- ret = mmap_piobufs(vma, dd, pd);
- else if (pgaddr == (u64) pd->port_rcvegr_phys)
+ else if (pgaddr == piobufs)
+ ret = mmap_piobufs(vma, dd, pd, piobufs, piocnt);
+ else if (pgaddr == dd->ipath_pioavailregs_phys)
+ /* in-memory copy of pioavail registers */
+ ret = ipath_mmap_mem(vma, pd, PAGE_SIZE, 0,
+ (void *) dd->ipath_pioavailregs_dma,
+ "pioavail registers");
+ else if (subport_fp(fp))
+ /* Subports don't mmap the physical receive buffers */
+ ret = -EINVAL;
+ else if (pgaddr == pd->port_rcvegr_phys)
ret = mmap_rcvegrbufs(vma, pd);
- else if (pgaddr == (u64) pd->port_rcvhdrq_phys) {
+ else if (pgaddr == (u64) pd->port_rcvhdrq_phys)
/*
- * The rcvhdrq itself; readonly except on HT-400 (so have
+ * The rcvhdrq itself; readonly except on HT (so have
* to allow writable mapping), multiple pages, contiguous
* from an i/o perspective.
*/
- unsigned total_size =
- ALIGN(dd->ipath_rcvhdrcnt * dd->ipath_rcvhdrentsize
- * sizeof(u32), PAGE_SIZE);
- ret = ipath_mmap_mem(vma, pd, total_size, 1,
- pd->port_rcvhdrq_phys,
+ ret = ipath_mmap_mem(vma, pd, pd->port_rcvhdrq_size, 1,
+ pd->port_rcvhdrq,
"rcvhdrq");
- }
- else if (pgaddr == (u64)pd->port_rcvhdrqtailaddr_phys)
+ else if (pgaddr == (u64) pd->port_rcvhdrqtailaddr_phys)
/* in-memory copy of rcvhdrq tail register */
ret = ipath_mmap_mem(vma, pd, PAGE_SIZE, 0,
- pd->port_rcvhdrqtailaddr_phys,
+ pd->port_rcvhdrtail_kvaddr,
"rcvhdrq tail");
- else if (pgaddr == dd->ipath_pioavailregs_phys)
- /* in-memory copy of pioavail registers */
- ret = ipath_mmap_mem(vma, pd, PAGE_SIZE, 0,
- dd->ipath_pioavailregs_phys,
- "pioavail registers");
else
ret = -EINVAL;
@@ -1137,9 +1327,10 @@ static int ipath_mmap(struct file *fp, struct vm_area_struct *vma)
if (ret < 0)
dev_info(&dd->pcidev->dev,
- "Failure %d on addr %lx, off %lx\n",
- -ret, vma->vm_start, vma->vm_pgoff);
-
+ "Failure %d on off %llx len %lx\n",
+ -ret, (unsigned long long)pgaddr,
+ vma->vm_end - vma->vm_start);
+bail:
return ret;
}
@@ -1149,9 +1340,12 @@ static unsigned int ipath_poll(struct file *fp,
struct ipath_portdata *pd;
u32 head, tail;
int bit;
+ unsigned pollflag = 0;
struct ipath_devdata *dd;
pd = port_fp(fp);
+ if (!pd)
+ goto bail;
dd = pd->port_dd;
bit = pd->port_port + INFINIPATH_R_INTRAVAIL_SHIFT;
@@ -1174,7 +1368,7 @@ static unsigned int ipath_poll(struct file *fp,
if (tail == head) {
set_bit(IPATH_PORT_WAITING_RCV, &pd->port_flag);
- if(dd->ipath_rhdrhead_intr_off) /* arm rcv interrupt */
+ if (dd->ipath_rhdrhead_intr_off) /* arm rcv interrupt */
(void)ipath_write_ureg(dd, ur_rcvhdrhead,
dd->ipath_rhdrhead_intr_off
| head, pd->port_port);
@@ -1185,9 +1379,12 @@ static unsigned int ipath_poll(struct file *fp,
clear_bit(IPATH_PORT_WAITING_RCV, &pd->port_flag);
pd->port_rcvwait_to++;
}
+ else
+ pollflag = POLLIN | POLLRDNORM;
}
else {
/* it's already happened; don't do wait_event overhead */
+ pollflag = POLLIN | POLLRDNORM;
pd->port_rcvnowait++;
}
@@ -1195,18 +1392,80 @@ static unsigned int ipath_poll(struct file *fp,
ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl,
dd->ipath_rcvctrl);
- return 0;
+bail:
+ return pollflag;
+}
+
+static int init_subports(struct ipath_devdata *dd,
+ struct ipath_portdata *pd,
+ const struct ipath_user_info *uinfo)
+{
+ int ret = 0;
+ unsigned num_slaves;
+ size_t size;
+
+ /* Old user binaries don't know about subports */
+ if ((uinfo->spu_userversion & 0xffff) != IPATH_USER_SWMINOR)
+ goto bail;
+ /*
+ * If the user is requesting zero or one port,
+ * skip the subport allocation.
+ */
+ if (uinfo->spu_subport_cnt <= 1)
+ goto bail;
+ if (uinfo->spu_subport_cnt > 4) {
+ ret = -EINVAL;
+ goto bail;
+ }
+
+ num_slaves = uinfo->spu_subport_cnt - 1;
+ pd->subport_uregbase = vmalloc(PAGE_SIZE * num_slaves);
+ if (!pd->subport_uregbase) {
+ ret = -ENOMEM;
+ goto bail;
+ }
+ /* Note: pd->port_rcvhdrq_size isn't initialized yet. */
+ size = ALIGN(dd->ipath_rcvhdrcnt * dd->ipath_rcvhdrentsize *
+ sizeof(u32), PAGE_SIZE) * num_slaves;
+ pd->subport_rcvhdr_base = vmalloc(size);
+ if (!pd->subport_rcvhdr_base) {
+ ret = -ENOMEM;
+ goto bail_ureg;
+ }
+
+ pd->subport_rcvegrbuf = vmalloc(pd->port_rcvegrbuf_chunks *
+ pd->port_rcvegrbuf_size *
+ num_slaves);
+ if (!pd->subport_rcvegrbuf) {
+ ret = -ENOMEM;
+ goto bail_rhdr;
+ }
+
+ pd->port_subport_cnt = uinfo->spu_subport_cnt;
+ pd->port_subport_id = uinfo->spu_subport_id;
+ pd->active_slaves = 1;
+ goto bail;
+
+bail_rhdr:
+ vfree(pd->subport_rcvhdr_base);
+bail_ureg:
+ vfree(pd->subport_uregbase);
+ pd->subport_uregbase = NULL;
+bail:
+ return ret;
}
static int try_alloc_port(struct ipath_devdata *dd, int port,
- struct file *fp)
+ struct file *fp,
+ const struct ipath_user_info *uinfo)
{
+ struct ipath_portdata *pd;
int ret;
- if (!dd->ipath_pd[port]) {
- void *p, *ptmp;
+ if (!(pd = dd->ipath_pd[port])) {
+ void *ptmp;
- p = kzalloc(sizeof(struct ipath_portdata), GFP_KERNEL);
+ pd = kzalloc(sizeof(struct ipath_portdata), GFP_KERNEL);
/*
* Allocate memory for use in ipath_tid_update() just once
@@ -1216,34 +1475,36 @@ static int try_alloc_port(struct ipath_devdata *dd, int port,
ptmp = kmalloc(dd->ipath_rcvtidcnt * sizeof(u16) +
dd->ipath_rcvtidcnt * sizeof(struct page **),
GFP_KERNEL);
- if (!p || !ptmp) {
+ if (!pd || !ptmp) {
ipath_dev_err(dd, "Unable to allocate portdata "
"memory, failing open\n");
ret = -ENOMEM;
- kfree(p);
+ kfree(pd);
kfree(ptmp);
goto bail;
}
- dd->ipath_pd[port] = p;
+ dd->ipath_pd[port] = pd;
dd->ipath_pd[port]->port_port = port;
dd->ipath_pd[port]->port_dd = dd;
dd->ipath_pd[port]->port_tid_pg_list = ptmp;
init_waitqueue_head(&dd->ipath_pd[port]->port_wait);
}
- if (!dd->ipath_pd[port]->port_cnt) {
- dd->ipath_pd[port]->port_cnt = 1;
- fp->private_data = (void *) dd->ipath_pd[port];
+ if (!pd->port_cnt) {
+ pd->userversion = uinfo->spu_userversion;
+ init_user_egr_sizes(pd);
+ if ((ret = init_subports(dd, pd, uinfo)) != 0)
+ goto bail;
ipath_cdbg(PROC, "%s[%u] opened unit:port %u:%u\n",
current->comm, current->pid, dd->ipath_unit,
port);
- dd->ipath_pd[port]->port_pid = current->pid;
- strncpy(dd->ipath_pd[port]->port_comm, current->comm,
- sizeof(dd->ipath_pd[port]->port_comm));
+ pd->port_cnt = 1;
+ port_fp(fp) = pd;
+ pd->port_pid = current->pid;
+ strncpy(pd->port_comm, current->comm, sizeof(pd->port_comm));
ipath_stats.sps_ports++;
ret = 0;
- goto bail;
- }
- ret = -EBUSY;
+ } else
+ ret = -EBUSY;
bail:
return ret;
@@ -1259,7 +1520,8 @@ static inline int usable(struct ipath_devdata *dd)
| IPATH_LINKUNK));
}
-static int find_free_port(int unit, struct file *fp)
+static int find_free_port(int unit, struct file *fp,
+ const struct ipath_user_info *uinfo)
{
struct ipath_devdata *dd = ipath_lookup(unit);
int ret, i;
@@ -1274,8 +1536,8 @@ static int find_free_port(int unit, struct file *fp)
goto bail;
}
- for (i = 0; i < dd->ipath_cfgports; i++) {
- ret = try_alloc_port(dd, i, fp);
+ for (i = 1; i < dd->ipath_cfgports; i++) {
+ ret = try_alloc_port(dd, i, fp, uinfo);
if (ret != -EBUSY)
goto bail;
}
@@ -1285,26 +1547,27 @@ bail:
return ret;
}
-static int find_best_unit(struct file *fp)
+static int find_best_unit(struct file *fp,
+ const struct ipath_user_info *uinfo)
{
int ret = 0, i, prefunit = -1, devmax;
int maxofallports, npresent, nup;
int ndev;
- (void) ipath_count_units(&npresent, &nup, &maxofallports);
+ devmax = ipath_count_units(&npresent, &nup, &maxofallports);
/*
* This code is present to allow a knowledgeable person to
* specify the layout of processes to processors before opening
* this driver, and then we'll assign the process to the "closest"
- * HT-400 to that processor (we assume reasonable connectivity,
+ * InfiniPath chip to that processor (we assume reasonable connectivity,
* for now). This code assumes that if affinity has been set
* before this point, that at most one cpu is set; for now this
* is reasonable. I check for both cpus_empty() and cpus_full(),
* in case some kernel variant sets none of the bits when no
* affinity is set. 2.6.11 and 12 kernels have all present
* cpus set. Some day we'll have to fix it up further to handle
- * a cpu subset. This algorithm fails for two HT-400's connected
+ * a cpu subset. This algorithm fails for two HT chips connected
* in tunnel fashion. Eventually this needs real topology
* information. There may be some issues with dual core numbering
* as well. This needs more work prior to release.
@@ -1338,8 +1601,6 @@ static int find_best_unit(struct file *fp)
if (prefunit != -1)
devmax = prefunit + 1;
- else
- devmax = ipath_count_units(NULL, NULL, NULL);
recheck:
for (i = 1; i < maxofallports; i++) {
for (ndev = prefunit != -1 ? prefunit : 0; ndev < devmax;
@@ -1354,7 +1615,7 @@ recheck:
* next.
*/
continue;
- ret = try_alloc_port(dd, i, fp);
+ ret = try_alloc_port(dd, i, fp, uinfo);
if (!ret)
goto done;
}
@@ -1390,22 +1651,183 @@ done:
return ret;
}
+static int find_shared_port(struct file *fp,
+ const struct ipath_user_info *uinfo)
+{
+ int devmax, ndev, i;
+ int ret = 0;
+
+ devmax = ipath_count_units(NULL, NULL, NULL);
+
+ for (ndev = 0; ndev < devmax; ndev++) {
+ struct ipath_devdata *dd = ipath_lookup(ndev);
+
+ if (!dd)
+ continue;
+ for (i = 1; i < dd->ipath_cfgports; i++) {
+ struct ipath_portdata *pd = dd->ipath_pd[i];
+
+ /* Skip ports which are not yet open */
+ if (!pd || !pd->port_cnt)
+ continue;
+ /* Skip port if it doesn't match the requested one */
+ if (pd->port_subport_id != uinfo->spu_subport_id)
+ continue;
+ /* Verify the sharing process matches the master */
+ if (pd->port_subport_cnt != uinfo->spu_subport_cnt ||
+ pd->userversion != uinfo->spu_userversion ||
+ pd->port_cnt >= pd->port_subport_cnt) {
+ ret = -EINVAL;
+ goto done;
+ }
+ port_fp(fp) = pd;
+ subport_fp(fp) = pd->port_cnt++;
+ tidcursor_fp(fp) = 0;
+ pd->active_slaves |= 1 << subport_fp(fp);
+ ipath_cdbg(PROC,
+ "%s[%u] %u sharing %s[%u] unit:port %u:%u\n",
+ current->comm, current->pid,
+ subport_fp(fp),
+ pd->port_comm, pd->port_pid,
+ dd->ipath_unit, pd->port_port);
+ ret = 1;
+ goto done;
+ }
+ }
+
+done:
+ return ret;
+}
+
static int ipath_open(struct inode *in, struct file *fp)
{
- int ret, user_minor;
+ /* The real work is performed later in ipath_assign_port() */
+ fp->private_data = kzalloc(sizeof(struct ipath_filedata), GFP_KERNEL);
+ return fp->private_data ? 0 : -ENOMEM;
+}
+
+
+/* Get port early, so can set affinity prior to memory allocation */
+static int ipath_assign_port(struct file *fp,
+ const struct ipath_user_info *uinfo)
+{
+ int ret;
+ int i_minor;
+ unsigned swminor;
+
+ /* Check to be sure we haven't already initialized this file */
+ if (port_fp(fp)) {
+ ret = -EINVAL;
+ goto done;
+ }
+
+ /* for now, if major version is different, bail */
+ if ((uinfo->spu_userversion >> 16) != IPATH_USER_SWMAJOR) {
+ ipath_dbg("User major version %d not same as driver "
+ "major %d\n", uinfo->spu_userversion >> 16,
+ IPATH_USER_SWMAJOR);
+ ret = -ENODEV;
+ goto done;
+ }
+
+ swminor = uinfo->spu_userversion & 0xffff;
+ if (swminor != IPATH_USER_SWMINOR)
+ ipath_dbg("User minor version %d not same as driver "
+ "minor %d\n", swminor, IPATH_USER_SWMINOR);
mutex_lock(&ipath_mutex);
- user_minor = iminor(in) - IPATH_USER_MINOR_BASE;
+ if (swminor == IPATH_USER_SWMINOR && uinfo->spu_subport_cnt &&
+ (ret = find_shared_port(fp, uinfo))) {
+ mutex_unlock(&ipath_mutex);
+ if (ret > 0)
+ ret = 0;
+ goto done;
+ }
+
+ i_minor = iminor(fp->f_dentry->d_inode) - IPATH_USER_MINOR_BASE;
ipath_cdbg(VERBOSE, "open on dev %lx (minor %d)\n",
- (long)in->i_rdev, user_minor);
+ (long)fp->f_dentry->d_inode->i_rdev, i_minor);
- if (user_minor)
- ret = find_free_port(user_minor - 1, fp);
+ if (i_minor)
+ ret = find_free_port(i_minor - 1, fp, uinfo);
else
- ret = find_best_unit(fp);
+ ret = find_best_unit(fp, uinfo);
mutex_unlock(&ipath_mutex);
+
+done:
+ return ret;
+}
+
+
+static int ipath_do_user_init(struct file *fp,
+ const struct ipath_user_info *uinfo)
+{
+ int ret;
+ struct ipath_portdata *pd;
+ struct ipath_devdata *dd;
+ u32 head32;
+
+ pd = port_fp(fp);
+ dd = pd->port_dd;
+
+ if (uinfo->spu_rcvhdrsize) {
+ ret = ipath_setrcvhdrsize(dd, uinfo->spu_rcvhdrsize);
+ if (ret)
+ goto done;
+ }
+
+ /* for now we do nothing with rcvhdrcnt: uinfo->spu_rcvhdrcnt */
+
+ /* for right now, kernel piobufs are at end, so port 1 is at 0 */
+ pd->port_piobufs = dd->ipath_piobufbase +
+ dd->ipath_pbufsport * (pd->port_port - 1) * dd->ipath_palign;
+ ipath_cdbg(VERBOSE, "Set base of piobufs for port %u to 0x%x\n",
+ pd->port_port, pd->port_piobufs);
+
+ /*
+ * Now allocate the rcvhdr Q and eager TIDs; skip the TID
+ * array for time being. If pd->port_port > chip-supported,
+ * we need to do extra stuff here to handle by handling overflow
+ * through port 0, someday
+ */
+ ret = ipath_create_rcvhdrq(dd, pd);
+ if (!ret)
+ ret = ipath_create_user_egr(pd);
+ if (ret)
+ goto done;
+
+ /*
+ * set the eager head register for this port to the current values
+ * of the tail pointers, since we don't know if they were
+ * updated on last use of the port.
+ */
+ head32 = ipath_read_ureg32(dd, ur_rcvegrindextail, pd->port_port);
+ ipath_write_ureg(dd, ur_rcvegrindexhead, head32, pd->port_port);
+ dd->ipath_lastegrheads[pd->port_port] = -1;
+ dd->ipath_lastrcvhdrqtails[pd->port_port] = -1;
+ ipath_cdbg(VERBOSE, "Wrote port%d egrhead %x from tail regs\n",
+ pd->port_port, head32);
+ pd->port_tidcursor = 0; /* start at beginning after open */
+ /*
+ * now enable the port; the tail registers will be written to memory
+ * by the chip as soon as it sees the write to
+ * dd->ipath_kregs->kr_rcvctrl. The update only happens on
+ * transition from 0 to 1, so clear it first, then set it as part of
+ * enabling the port. This will (very briefly) affect any other
+ * open ports, but it shouldn't be long enough to be an issue.
+ * We explictly set the in-memory copy to 0 beforehand, so we don't
+ * have to wait to be sure the DMA update has happened.
+ */
+ *(volatile u64 *)pd->port_rcvhdrtail_kvaddr = 0ULL;
+ set_bit(INFINIPATH_R_PORTENABLE_SHIFT + pd->port_port,
+ &dd->ipath_rcvctrl);
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl,
+ dd->ipath_rcvctrl & ~INFINIPATH_R_TAILUPD);
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl,
+ dd->ipath_rcvctrl);
+done:
return ret;
}
@@ -1428,6 +1850,8 @@ static void unlock_expected_tids(struct ipath_portdata *pd)
if (!dd->ipath_pageshadow[i])
continue;
+ pci_unmap_page(dd->pcidev, dd->ipath_physshadow[i],
+ PAGE_SIZE, PCI_DMA_FROMDEVICE);
ipath_release_user_pages_on_close(&dd->ipath_pageshadow[i],
1);
dd->ipath_pageshadow[i] = NULL;
@@ -1448,6 +1872,7 @@ static void unlock_expected_tids(struct ipath_portdata *pd)
static int ipath_close(struct inode *in, struct file *fp)
{
int ret = 0;
+ struct ipath_filedata *fd;
struct ipath_portdata *pd;
struct ipath_devdata *dd;
unsigned port;
@@ -1457,9 +1882,24 @@ static int ipath_close(struct inode *in, struct file *fp)
mutex_lock(&ipath_mutex);
- pd = port_fp(fp);
- port = pd->port_port;
+ fd = (struct ipath_filedata *) fp->private_data;
fp->private_data = NULL;
+ pd = fd->pd;
+ if (!pd) {
+ mutex_unlock(&ipath_mutex);
+ goto bail;
+ }
+ if (--pd->port_cnt) {
+ /*
+ * XXX If the master closes the port before the slave(s),
+ * revoke the mmap for the eager receive queue so
+ * the slave(s) don't wait for receive data forever.
+ */
+ pd->active_slaves &= ~(1 << fd->subport);
+ mutex_unlock(&ipath_mutex);
+ goto bail;
+ }
+ port = pd->port_port;
dd = pd->port_dd;
if (pd->port_hdrqfull) {
@@ -1498,8 +1938,6 @@ static int ipath_close(struct inode *in, struct file *fp)
/* clean up the pkeys for this port user */
ipath_clean_part_key(pd, dd);
-
-
/*
* be paranoid, and never write 0's to these, just use an
* unused part of the port 0 tail page. Of course,
@@ -1518,39 +1956,49 @@ static int ipath_close(struct inode *in, struct file *fp)
i = dd->ipath_pbufsport * (port - 1);
ipath_disarm_piobufs(dd, i, dd->ipath_pbufsport);
+ dd->ipath_f_clear_tids(dd, pd->port_port);
+
if (dd->ipath_pageshadow)
unlock_expected_tids(pd);
ipath_stats.sps_ports--;
ipath_cdbg(PROC, "%s[%u] closed port %u:%u\n",
pd->port_comm, pd->port_pid,
dd->ipath_unit, port);
-
- dd->ipath_f_clear_tids(dd, pd->port_port);
}
- pd->port_cnt = 0;
pd->port_pid = 0;
-
dd->ipath_pd[pd->port_port] = NULL; /* before releasing mutex */
mutex_unlock(&ipath_mutex);
ipath_free_pddata(dd, pd); /* after releasing the mutex */
+bail:
+ kfree(fd);
return ret;
}
-static int ipath_port_info(struct ipath_portdata *pd,
+static int ipath_port_info(struct ipath_portdata *pd, u16 subport,
struct ipath_port_info __user *uinfo)
{
struct ipath_port_info info;
int nup;
int ret;
+ size_t sz;
(void) ipath_count_units(NULL, &nup, NULL);
info.num_active = nup;
info.unit = pd->port_dd->ipath_unit;
info.port = pd->port_port;
+ info.subport = subport;
+ /* Don't return new fields if old library opened the port. */
+ if ((pd->userversion & 0xffff) == IPATH_USER_SWMINOR) {
+ /* Number of user ports available for this device. */
+ info.num_ports = pd->port_dd->ipath_cfgports - 1;
+ info.num_subports = pd->port_subport_cnt;
+ sz = sizeof(info);
+ } else
+ sz = sizeof(info) - 2 * sizeof(u16);
- if (copy_to_user(uinfo, &info, sizeof(info))) {
+ if (copy_to_user(uinfo, &info, sz)) {
ret = -EFAULT;
goto bail;
}
@@ -1560,6 +2008,16 @@ bail:
return ret;
}
+static int ipath_get_slave_info(struct ipath_portdata *pd,
+ void __user *slave_mask_addr)
+{
+ int ret = 0;
+
+ if (copy_to_user(slave_mask_addr, &pd->active_slaves, sizeof(u32)))
+ ret = -EFAULT;
+ return ret;
+}
+
static ssize_t ipath_write(struct file *fp, const char __user *data,
size_t count, loff_t *off)
{
@@ -1586,6 +2044,8 @@ static ssize_t ipath_write(struct file *fp, const char __user *data,
consumed = sizeof(cmd.type);
switch (cmd.type) {
+ case IPATH_CMD_ASSIGN_PORT:
+ case __IPATH_CMD_USER_INIT:
case IPATH_CMD_USER_INIT:
copy = sizeof(cmd.cmd.user_info);
dest = &cmd.cmd.user_info;
@@ -1612,6 +2072,11 @@ static ssize_t ipath_write(struct file *fp, const char __user *data,
dest = &cmd.cmd.part_key;
src = &ucmd->cmd.part_key;
break;
+ case IPATH_CMD_SLAVE_INFO:
+ copy = sizeof(cmd.cmd.slave_mask_addr);
+ dest = &cmd.cmd.slave_mask_addr;
+ src = &ucmd->cmd.slave_mask_addr;
+ break;
default:
ret = -EINVAL;
goto bail;
@@ -1629,34 +2094,55 @@ static ssize_t ipath_write(struct file *fp, const char __user *data,
consumed += copy;
pd = port_fp(fp);
+ if (!pd && cmd.type != __IPATH_CMD_USER_INIT &&
+ cmd.type != IPATH_CMD_ASSIGN_PORT) {
+ ret = -EINVAL;
+ goto bail;
+ }
switch (cmd.type) {
+ case IPATH_CMD_ASSIGN_PORT:
+ ret = ipath_assign_port(fp, &cmd.cmd.user_info);
+ if (ret)
+ goto bail;
+ break;
+ case __IPATH_CMD_USER_INIT:
+ /* backwards compatibility, get port first */
+ ret = ipath_assign_port(fp, &cmd.cmd.user_info);
+ if (ret)
+ goto bail;
+ /* and fall through to current version. */
case IPATH_CMD_USER_INIT:
- ret = ipath_do_user_init(pd, &cmd.cmd.user_info);
- if (ret < 0)
+ ret = ipath_do_user_init(fp, &cmd.cmd.user_info);
+ if (ret)
goto bail;
ret = ipath_get_base_info(
- pd, (void __user *) (unsigned long)
+ fp, (void __user *) (unsigned long)
cmd.cmd.user_info.spu_base_info,
cmd.cmd.user_info.spu_base_info_size);
break;
case IPATH_CMD_RECV_CTRL:
- ret = ipath_manage_rcvq(pd, cmd.cmd.recv_ctrl);
+ ret = ipath_manage_rcvq(pd, subport_fp(fp), cmd.cmd.recv_ctrl);
break;
case IPATH_CMD_PORT_INFO:
- ret = ipath_port_info(pd,
+ ret = ipath_port_info(pd, subport_fp(fp),
(struct ipath_port_info __user *)
(unsigned long) cmd.cmd.port_info);
break;
case IPATH_CMD_TID_UPDATE:
- ret = ipath_tid_update(pd, &cmd.cmd.tid_info);
+ ret = ipath_tid_update(pd, fp, &cmd.cmd.tid_info);
break;
case IPATH_CMD_TID_FREE:
- ret = ipath_tid_free(pd, &cmd.cmd.tid_info);
+ ret = ipath_tid_free(pd, subport_fp(fp), &cmd.cmd.tid_info);
break;
case IPATH_CMD_SET_PART_KEY:
ret = ipath_set_part_key(pd, cmd.cmd.part_key);
break;
+ case IPATH_CMD_SLAVE_INFO:
+ ret = ipath_get_slave_info(pd,
+ (void __user *) (unsigned long)
+ cmd.cmd.slave_mask_addr);
+ break;
}
if (ret >= 0)
@@ -1815,7 +2301,7 @@ int ipath_user_add(struct ipath_devdata *dd)
if (ret < 0) {
ipath_dev_err(dd, "Could not create wildcard "
"minor: error %d\n", -ret);
- goto bail_sma;
+ goto bail_user;
}
atomic_set(&user_setup, 1);
@@ -1831,7 +2317,7 @@ int ipath_user_add(struct ipath_devdata *dd)
goto bail;
-bail_sma:
+bail_user:
user_cleanup();
bail:
return ret;
@@ -1853,4 +2339,3 @@ void ipath_user_remove(struct ipath_devdata *dd)
bail:
return;
}
-
diff --git a/drivers/infiniband/hw/ipath/ipath_fs.c b/drivers/infiniband/hw/ipath/ipath_fs.c
index 0936d8e8d70..d9ff283f725 100644
--- a/drivers/infiniband/hw/ipath/ipath_fs.c
+++ b/drivers/infiniband/hw/ipath/ipath_fs.c
@@ -61,14 +61,13 @@ static int ipathfs_mknod(struct inode *dir, struct dentry *dentry,
inode->i_mode = mode;
inode->i_uid = 0;
inode->i_gid = 0;
- inode->i_blksize = PAGE_CACHE_SIZE;
inode->i_blocks = 0;
inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
- inode->u.generic_ip = data;
+ inode->i_private = data;
if ((mode & S_IFMT) == S_IFDIR) {
inode->i_op = &simple_dir_inode_operations;
- inode->i_nlink++;
- dir->i_nlink++;
+ inc_nlink(inode);
+ inc_nlink(dir);
}
inode->i_fop = fops;
@@ -119,7 +118,7 @@ static ssize_t atomic_counters_read(struct file *file, char __user *buf,
u16 i;
struct ipath_devdata *dd;
- dd = file->f_dentry->d_inode->u.generic_ip;
+ dd = file->f_dentry->d_inode->i_private;
for (i = 0; i < NUM_COUNTERS; i++)
counters[i] = ipath_snap_cntr(dd, i);
@@ -139,7 +138,7 @@ static ssize_t atomic_node_info_read(struct file *file, char __user *buf,
struct ipath_devdata *dd;
u64 guid;
- dd = file->f_dentry->d_inode->u.generic_ip;
+ dd = file->f_dentry->d_inode->i_private;
guid = be64_to_cpu(dd->ipath_guid);
@@ -178,7 +177,7 @@ static ssize_t atomic_port_info_read(struct file *file, char __user *buf,
u32 tmp, tmp2;
struct ipath_devdata *dd;
- dd = file->f_dentry->d_inode->u.generic_ip;
+ dd = file->f_dentry->d_inode->i_private;
/* so we only initialize non-zero fields. */
memset(portinfo, 0, sizeof portinfo);
@@ -191,8 +190,8 @@ static ssize_t atomic_port_info_read(struct file *file, char __user *buf,
portinfo[4] = (dd->ipath_lid << 16);
/*
- * Notimpl yet SMLID (should we store this in the driver, in case
- * SMA dies?) CapabilityMask is 0, we don't support any of these
+ * Notimpl yet SMLID.
+ * CapabilityMask is 0, we don't support any of these
* DiagCode is 0; we don't store any diag info for now Notimpl yet
* M_KeyLeasePeriod (we don't support M_Key)
*/
@@ -325,7 +324,7 @@ static ssize_t flash_read(struct file *file, char __user *buf,
goto bail;
}
- dd = file->f_dentry->d_inode->u.generic_ip;
+ dd = file->f_dentry->d_inode->i_private;
if (ipath_eeprom_read(dd, pos, tmp, count)) {
ipath_dev_err(dd, "failed to read from flash\n");
ret = -ENXIO;
@@ -357,19 +356,16 @@ static ssize_t flash_write(struct file *file, const char __user *buf,
pos = *ppos;
- if ( pos < 0) {
+ if (pos != 0) {
ret = -EINVAL;
goto bail;
}
- if (pos >= sizeof(struct ipath_flash)) {
- ret = 0;
+ if (count != sizeof(struct ipath_flash)) {
+ ret = -EINVAL;
goto bail;
}
- if (count > sizeof(struct ipath_flash) - pos)
- count = sizeof(struct ipath_flash) - pos;
-
tmp = kmalloc(count, GFP_KERNEL);
if (!tmp) {
ret = -ENOMEM;
@@ -381,7 +377,7 @@ static ssize_t flash_write(struct file *file, const char __user *buf,
goto bail_tmp;
}
- dd = file->f_dentry->d_inode->u.generic_ip;
+ dd = file->f_dentry->d_inode->i_private;
if (ipath_eeprom_write(dd, pos, tmp, count)) {
ret = -ENXIO;
ipath_dev_err(dd, "failed to write to flash\n");
diff --git a/drivers/infiniband/hw/ipath/ipath_ht400.c b/drivers/infiniband/hw/ipath/ipath_iba6110.c
index 3db015da6e7..9e4e8d4c6e2 100644
--- a/drivers/infiniband/hw/ipath/ipath_ht400.c
+++ b/drivers/infiniband/hw/ipath/ipath_iba6110.c
@@ -33,7 +33,7 @@
/*
* This file contains all of the code that is specific to the InfiniPath
- * HT-400 chip.
+ * HT chip.
*/
#include <linux/pci.h>
@@ -43,7 +43,7 @@
#include "ipath_registers.h"
/*
- * This lists the InfiniPath HT400 registers, in the actual chip layout.
+ * This lists the InfiniPath registers, in the actual chip layout.
* This structure should never be directly accessed.
*
* The names are in InterCap form because they're taken straight from
@@ -252,8 +252,8 @@ static const struct ipath_cregs ipath_ht_cregs = {
};
/* kr_intstatus, kr_intclear, kr_intmask bits */
-#define INFINIPATH_I_RCVURG_MASK 0x1FF
-#define INFINIPATH_I_RCVAVAIL_MASK 0x1FF
+#define INFINIPATH_I_RCVURG_MASK ((1U<<9)-1)
+#define INFINIPATH_I_RCVAVAIL_MASK ((1U<<9)-1)
/* kr_hwerrclear, kr_hwerrmask, kr_hwerrstatus, bits */
#define INFINIPATH_HWE_HTCMEMPARITYERR_SHIFT 0
@@ -338,7 +338,7 @@ static void hwerr_crcbits(struct ipath_devdata *dd, ipath_err_t hwerrs,
if (crcbits) {
u16 ctrl0, ctrl1;
snprintf(bitsmsg, sizeof bitsmsg,
- "[HT%s lane %s CRC (%llx); ignore till reload]",
+ "[HT%s lane %s CRC (%llx); powercycle to completely clear]",
!(crcbits & _IPATH_HTLINK1_CRCBITS) ?
"0 (A)" : (!(crcbits & _IPATH_HTLINK0_CRCBITS)
? "1 (B)" : "0+1 (A+B)"),
@@ -389,17 +389,28 @@ static void hwerr_crcbits(struct ipath_devdata *dd, ipath_err_t hwerrs,
_IPATH_HTLINK1_CRCBITS)));
}
+/* 6110 specific hardware errors... */
+static const struct ipath_hwerror_msgs ipath_6110_hwerror_msgs[] = {
+ INFINIPATH_HWE_MSG(HTCBUSIREQPARITYERR, "HTC Ireq Parity"),
+ INFINIPATH_HWE_MSG(HTCBUSTREQPARITYERR, "HTC Treq Parity"),
+ INFINIPATH_HWE_MSG(HTCBUSTRESPPARITYERR, "HTC Tresp Parity"),
+ INFINIPATH_HWE_MSG(HTCMISCERR5, "HT core Misc5"),
+ INFINIPATH_HWE_MSG(HTCMISCERR6, "HT core Misc6"),
+ INFINIPATH_HWE_MSG(HTCMISCERR7, "HT core Misc7"),
+ INFINIPATH_HWE_MSG(RXDSYNCMEMPARITYERR, "Rx Dsync"),
+ INFINIPATH_HWE_MSG(SERDESPLLFAILED, "SerDes PLL"),
+};
+
/**
- * ipath_ht_handle_hwerrors - display hardware errors
+ * ipath_ht_handle_hwerrors - display hardware errors.
* @dd: the infinipath device
* @msg: the output buffer
* @msgl: the size of the output buffer
*
- * Use same msg buffer as regular errors to avoid
- * excessive stack use. Most hardware errors are catastrophic, but for
- * right now, we'll print them and continue.
- * We reuse the same message buffer as ipath_handle_errors() to avoid
- * excessive stack usage.
+ * Use same msg buffer as regular errors to avoid excessive stack
+ * use. Most hardware errors are catastrophic, but for right now,
+ * we'll print them and continue. We reuse the same message buffer as
+ * ipath_handle_errors() to avoid excessive stack usage.
*/
static void ipath_ht_handle_hwerrors(struct ipath_devdata *dd, char *msg,
size_t msgl)
@@ -440,19 +451,49 @@ static void ipath_ht_handle_hwerrors(struct ipath_devdata *dd, char *msg,
* make sure we get this much out, unless told to be quiet,
* or it's occurred within the last 5 seconds
*/
- if ((hwerrs & ~dd->ipath_lasthwerror) ||
+ if ((hwerrs & ~(dd->ipath_lasthwerror |
+ ((INFINIPATH_HWE_TXEMEMPARITYERR_PIOBUF |
+ INFINIPATH_HWE_TXEMEMPARITYERR_PIOPBC)
+ << INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT))) ||
(ipath_debug & __IPATH_VERBDBG))
dev_info(&dd->pcidev->dev, "Hardware error: hwerr=0x%llx "
"(cleared)\n", (unsigned long long) hwerrs);
dd->ipath_lasthwerror |= hwerrs;
- if (hwerrs & ~infinipath_hwe_bitsextant)
+ if (hwerrs & ~dd->ipath_hwe_bitsextant)
ipath_dev_err(dd, "hwerror interrupt with unknown errors "
"%llx set\n", (unsigned long long)
- (hwerrs & ~infinipath_hwe_bitsextant));
+ (hwerrs & ~dd->ipath_hwe_bitsextant));
ctrl = ipath_read_kreg32(dd, dd->ipath_kregs->kr_control);
if (ctrl & INFINIPATH_C_FREEZEMODE) {
+ /*
+ * parity errors in send memory are recoverable,
+ * just cancel the send (if indicated in * sendbuffererror),
+ * count the occurrence, unfreeze (if no other handled
+ * hardware error bits are set), and continue. They can
+ * occur if a processor speculative read is done to the PIO
+ * buffer while we are sending a packet, for example.
+ */
+ if (hwerrs & ((INFINIPATH_HWE_TXEMEMPARITYERR_PIOBUF |
+ INFINIPATH_HWE_TXEMEMPARITYERR_PIOPBC)
+ << INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT)) {
+ ipath_stats.sps_txeparity++;
+ ipath_dbg("Recovering from TXE parity error (%llu), "
+ "hwerrstatus=%llx\n",
+ (unsigned long long) ipath_stats.sps_txeparity,
+ (unsigned long long) hwerrs);
+ ipath_disarm_senderrbufs(dd);
+ hwerrs &= ~((INFINIPATH_HWE_TXEMEMPARITYERR_PIOBUF |
+ INFINIPATH_HWE_TXEMEMPARITYERR_PIOPBC)
+ << INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT);
+ if (!hwerrs) { /* else leave in freeze mode */
+ ipath_write_kreg(dd,
+ dd->ipath_kregs->kr_control,
+ dd->ipath_control);
+ return;
+ }
+ }
if (hwerrs) {
/*
* if any set that we aren't ignoring; only
@@ -461,8 +502,9 @@ static void ipath_ht_handle_hwerrors(struct ipath_devdata *dd, char *msg,
* times.
*/
if (dd->ipath_flags & IPATH_INITTED) {
- ipath_dev_err(dd, "Fatal Error (freeze "
- "mode), no longer usable\n");
+ ipath_dev_err(dd, "Fatal Hardware Error (freeze "
+ "mode), no longer usable, SN %.16s\n",
+ dd->ipath_serial);
isfatal = 1;
}
*dd->ipath_statusp &= ~IPATH_STATUS_IB_READY;
@@ -498,46 +540,18 @@ static void ipath_ht_handle_hwerrors(struct ipath_devdata *dd, char *msg,
bits);
strlcat(msg, bitsmsg, msgl);
}
- if (hwerrs & (INFINIPATH_HWE_RXEMEMPARITYERR_MASK
- << INFINIPATH_HWE_RXEMEMPARITYERR_SHIFT)) {
- bits = (u32) ((hwerrs >>
- INFINIPATH_HWE_RXEMEMPARITYERR_SHIFT) &
- INFINIPATH_HWE_RXEMEMPARITYERR_MASK);
- snprintf(bitsmsg, sizeof bitsmsg, "[RXE Parity Errs %x] ",
- bits);
- strlcat(msg, bitsmsg, msgl);
- }
- if (hwerrs & (INFINIPATH_HWE_TXEMEMPARITYERR_MASK
- << INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT)) {
- bits = (u32) ((hwerrs >>
- INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT) &
- INFINIPATH_HWE_TXEMEMPARITYERR_MASK);
- snprintf(bitsmsg, sizeof bitsmsg, "[TXE Parity Errs %x] ",
- bits);
- strlcat(msg, bitsmsg, msgl);
- }
- if (hwerrs & INFINIPATH_HWE_IBCBUSTOSPCPARITYERR)
- strlcat(msg, "[IB2IPATH Parity]", msgl);
- if (hwerrs & INFINIPATH_HWE_IBCBUSFRSPCPARITYERR)
- strlcat(msg, "[IPATH2IB Parity]", msgl);
- if (hwerrs & INFINIPATH_HWE_HTCBUSIREQPARITYERR)
- strlcat(msg, "[HTC Ireq Parity]", msgl);
- if (hwerrs & INFINIPATH_HWE_HTCBUSTREQPARITYERR)
- strlcat(msg, "[HTC Treq Parity]", msgl);
- if (hwerrs & INFINIPATH_HWE_HTCBUSTRESPPARITYERR)
- strlcat(msg, "[HTC Tresp Parity]", msgl);
+
+ ipath_format_hwerrors(hwerrs,
+ ipath_6110_hwerror_msgs,
+ sizeof(ipath_6110_hwerror_msgs) /
+ sizeof(ipath_6110_hwerror_msgs[0]),
+ msg, msgl);
if (hwerrs & (_IPATH_HTLINK0_CRCBITS | _IPATH_HTLINK1_CRCBITS))
hwerr_crcbits(dd, hwerrs, msg, msgl);
- if (hwerrs & INFINIPATH_HWE_HTCMISCERR5)
- strlcat(msg, "[HT core Misc5]", msgl);
- if (hwerrs & INFINIPATH_HWE_HTCMISCERR6)
- strlcat(msg, "[HT core Misc6]", msgl);
- if (hwerrs & INFINIPATH_HWE_HTCMISCERR7)
- strlcat(msg, "[HT core Misc7]", msgl);
if (hwerrs & INFINIPATH_HWE_MEMBISTFAILED) {
- strlcat(msg, "[Memory BIST test failed, HT-400 unusable]",
+ strlcat(msg, "[Memory BIST test failed, InfiniPath hardware unusable]",
msgl);
/* ignore from now on, so disable until driver reloaded */
dd->ipath_hwerrmask &= ~INFINIPATH_HWE_MEMBISTFAILED;
@@ -553,7 +567,7 @@ static void ipath_ht_handle_hwerrors(struct ipath_devdata *dd, char *msg,
if (hwerrs & _IPATH_PLL_FAIL) {
snprintf(bitsmsg, sizeof bitsmsg,
- "[PLL failed (%llx), HT-400 unusable]",
+ "[PLL failed (%llx), InfiniPath hardware unusable]",
(unsigned long long) (hwerrs & _IPATH_PLL_FAIL));
strlcat(msg, bitsmsg, msgl);
/* ignore from now on, so disable until driver reloaded */
@@ -572,11 +586,6 @@ static void ipath_ht_handle_hwerrors(struct ipath_devdata *dd, char *msg,
dd->ipath_hwerrmask);
}
- if (hwerrs & INFINIPATH_HWE_RXDSYNCMEMPARITYERR)
- strlcat(msg, "[Rx Dsync]", msgl);
- if (hwerrs & INFINIPATH_HWE_SERDESPLLFAILED)
- strlcat(msg, "[SerDes PLL]", msgl);
-
ipath_dev_err(dd, "%s hardware error\n", msg);
if (isfatal && !ipath_diag_inuse && dd->ipath_freezemsg)
/*
@@ -610,18 +619,18 @@ static int ipath_ht_boardname(struct ipath_devdata *dd, char *name,
break;
case 5:
/*
- * HT-460 original production board; two production levels, with
+ * original production board; two production levels, with
* different serial number ranges. See ipath_ht_early_init() for
* case where we enable IPATH_GPIO_INTR for later serial # range.
*/
- n = "InfiniPath_HT-460";
+ n = "InfiniPath_QHT7040";
break;
case 6:
n = "OEM_Board_3";
break;
case 7:
- /* HT-460 small form factor production board */
- n = "InfiniPath_HT-465";
+ /* small form factor production board */
+ n = "InfiniPath_QHT7140";
break;
case 8:
n = "LS/X-1";
@@ -633,7 +642,7 @@ static int ipath_ht_boardname(struct ipath_devdata *dd, char *name,
n = "OEM_Board_2";
break;
case 11:
- n = "InfiniPath_HT-470";
+ n = "InfiniPath_HT-470"; /* obsoleted */
break;
case 12:
n = "OEM_Board_4";
@@ -641,7 +650,7 @@ static int ipath_ht_boardname(struct ipath_devdata *dd, char *name,
default: /* don't know, just print the number */
ipath_dev_err(dd, "Don't yet know about board "
"with ID %u\n", boardrev);
- snprintf(name, namelen, "Unknown_InfiniPath_HT-4xx_%u",
+ snprintf(name, namelen, "Unknown_InfiniPath_QHT7xxx_%u",
boardrev);
break;
}
@@ -650,11 +659,10 @@ static int ipath_ht_boardname(struct ipath_devdata *dd, char *name,
if (dd->ipath_majrev != 3 || (dd->ipath_minrev < 2 || dd->ipath_minrev > 3)) {
/*
- * This version of the driver only supports the HT-400
- * Rev 3.2
+ * This version of the driver only supports Rev 3.2 and 3.3
*/
ipath_dev_err(dd,
- "Unsupported HT-400 revision %u.%u!\n",
+ "Unsupported InfiniPath hardware revision %u.%u!\n",
dd->ipath_majrev, dd->ipath_minrev);
ret = 1;
goto bail;
@@ -738,11 +746,10 @@ static void ipath_check_htlink(struct ipath_devdata *dd)
static int ipath_setup_ht_reset(struct ipath_devdata *dd)
{
- ipath_dbg("No reset possible for HT-400\n");
+ ipath_dbg("No reset possible for this InfiniPath hardware\n");
return 0;
}
-#define HT_CAPABILITY_ID 0x08 /* HT capabilities not defined in kernel */
#define HT_INTR_DISC_CONFIG 0x80 /* HT interrupt and discovery cap */
#define HT_INTR_REG_INDEX 2 /* intconfig requires indirect accesses */
@@ -925,7 +932,7 @@ static int set_int_handler(struct ipath_devdata *dd, struct pci_dev *pdev,
/*
* kernels with CONFIG_PCI_MSI set the vector in the irq field of
- * struct pci_device, so we use that to program the HT-400 internal
+ * struct pci_device, so we use that to program the internal
* interrupt register (not config space) with that value. The BIOS
* must still have done the basic MSI setup.
*/
@@ -973,7 +980,7 @@ static int ipath_setup_ht_config(struct ipath_devdata *dd,
* do this early, before we ever enable errors or hardware errors,
* mostly to avoid causing the chip to enter freeze mode.
*/
- pos = pci_find_capability(pdev, HT_CAPABILITY_ID);
+ pos = pci_find_capability(pdev, PCI_CAP_ID_HT);
if (!pos) {
ipath_dev_err(dd, "Couldn't find HyperTransport "
"capability; no interrupts\n");
@@ -996,7 +1003,7 @@ static int ipath_setup_ht_config(struct ipath_devdata *dd,
else if (cap_type == HT_INTR_DISC_CONFIG)
ihandler = set_int_handler(dd, pdev, pos);
} while ((pos = pci_find_next_capability(pdev, pos,
- HT_CAPABILITY_ID)));
+ PCI_CAP_ID_HT)));
if (!ihandler) {
ipath_dev_err(dd, "Couldn't find interrupt handler in "
@@ -1013,7 +1020,7 @@ bail:
* @dd: the infinipath device
*
* Called during driver unload.
- * This is currently a nop for the HT-400, not for all chips
+ * This is currently a nop for the HT chip, not for all chips
*/
static void ipath_setup_ht_cleanup(struct ipath_devdata *dd)
{
@@ -1081,21 +1088,21 @@ static void ipath_setup_ht_setextled(struct ipath_devdata *dd,
ipath_write_kreg(dd, dd->ipath_kregs->kr_extctrl, extctl);
}
-static void ipath_init_ht_variables(void)
+static void ipath_init_ht_variables(struct ipath_devdata *dd)
{
- ipath_gpio_sda_num = _IPATH_GPIO_SDA_NUM;
- ipath_gpio_scl_num = _IPATH_GPIO_SCL_NUM;
- ipath_gpio_sda = IPATH_GPIO_SDA;
- ipath_gpio_scl = IPATH_GPIO_SCL;
+ dd->ipath_gpio_sda_num = _IPATH_GPIO_SDA_NUM;
+ dd->ipath_gpio_scl_num = _IPATH_GPIO_SCL_NUM;
+ dd->ipath_gpio_sda = IPATH_GPIO_SDA;
+ dd->ipath_gpio_scl = IPATH_GPIO_SCL;
- infinipath_i_bitsextant =
+ dd->ipath_i_bitsextant =
(INFINIPATH_I_RCVURG_MASK << INFINIPATH_I_RCVURG_SHIFT) |
(INFINIPATH_I_RCVAVAIL_MASK <<
INFINIPATH_I_RCVAVAIL_SHIFT) |
INFINIPATH_I_ERROR | INFINIPATH_I_SPIOSENT |
INFINIPATH_I_SPIOBUFAVAIL | INFINIPATH_I_GPIO;
- infinipath_e_bitsextant =
+ dd->ipath_e_bitsextant =
INFINIPATH_E_RFORMATERR | INFINIPATH_E_RVCRC |
INFINIPATH_E_RICRC | INFINIPATH_E_RMINPKTLEN |
INFINIPATH_E_RMAXPKTLEN | INFINIPATH_E_RLONGPKTLEN |
@@ -1113,7 +1120,7 @@ static void ipath_init_ht_variables(void)
INFINIPATH_E_INVALIDADDR | INFINIPATH_E_RESET |
INFINIPATH_E_HARDWARE;
- infinipath_hwe_bitsextant =
+ dd->ipath_hwe_bitsextant =
(INFINIPATH_HWE_HTCMEMPARITYERR_MASK <<
INFINIPATH_HWE_HTCMEMPARITYERR_SHIFT) |
(INFINIPATH_HWE_TXEMEMPARITYERR_MASK <<
@@ -1142,8 +1149,8 @@ static void ipath_init_ht_variables(void)
INFINIPATH_HWE_IBCBUSTOSPCPARITYERR |
INFINIPATH_HWE_IBCBUSFRSPCPARITYERR;
- infinipath_i_rcvavail_mask = INFINIPATH_I_RCVAVAIL_MASK;
- infinipath_i_rcvurg_mask = INFINIPATH_I_RCVURG_MASK;
+ dd->ipath_i_rcvavail_mask = INFINIPATH_I_RCVAVAIL_MASK;
+ dd->ipath_i_rcvurg_mask = INFINIPATH_I_RCVURG_MASK;
}
/**
@@ -1290,6 +1297,15 @@ static int ipath_ht_bringup_serdes(struct ipath_devdata *dd)
val &= ~INFINIPATH_XGXS_RESET;
change = 1;
}
+ if (((val >> INFINIPATH_XGXS_RX_POL_SHIFT) &
+ INFINIPATH_XGXS_RX_POL_MASK) != dd->ipath_rx_pol_inv ) {
+ /* need to compensate for Tx inversion in partner */
+ val &= ~(INFINIPATH_XGXS_RX_POL_MASK <<
+ INFINIPATH_XGXS_RX_POL_SHIFT);
+ val |= dd->ipath_rx_pol_inv <<
+ INFINIPATH_XGXS_RX_POL_SHIFT;
+ change = 1;
+ }
if (change)
ipath_write_kreg(dd, dd->ipath_kregs->kr_xgxsconfig, val);
@@ -1470,7 +1486,7 @@ static int ipath_ht_early_init(struct ipath_devdata *dd)
dd->ipath_rcvhdrsize = IPATH_DFLT_RCVHDRSIZE;
/*
- * For HT-400, we allocate a somewhat overly large eager buffer,
+ * For HT, we allocate a somewhat overly large eager buffer,
* such that we can guarantee that we can receive the largest
* packet that we can send out. To truly support a 4KB MTU,
* we need to bump this to a large value. To date, other than
@@ -1531,7 +1547,7 @@ static int ipath_ht_early_init(struct ipath_devdata *dd)
if(dd->ipath_boardrev == 5 && dd->ipath_serial[0] == '1' &&
dd->ipath_serial[1] == '2' && dd->ipath_serial[2] == '8') {
/*
- * Later production HT-460 has same changes as HT-465, so
+ * Later production QHT7040 has same changes as QHT7140, so
* can use GPIO interrupts. They have serial #'s starting
* with 128, rather than 112.
*/
@@ -1560,13 +1576,13 @@ static int ipath_ht_get_base_info(struct ipath_portdata *pd, void *kbase)
}
/**
- * ipath_init_ht400_funcs - set up the chip-specific function pointers
+ * ipath_init_iba6110_funcs - set up the chip-specific function pointers
* @dd: the infinipath device
*
* This is global, and is called directly at init to set up the
* chip-specific function pointers for later use.
*/
-void ipath_init_ht400_funcs(struct ipath_devdata *dd)
+void ipath_init_iba6110_funcs(struct ipath_devdata *dd)
{
dd->ipath_f_intrsetup = ipath_ht_intconfig;
dd->ipath_f_bus = ipath_setup_ht_config;
@@ -1599,5 +1615,5 @@ void ipath_init_ht400_funcs(struct ipath_devdata *dd)
* do very early init that is needed before ipath_f_bus is
* called
*/
- ipath_init_ht_variables();
+ ipath_init_ht_variables(dd);
}
diff --git a/drivers/infiniband/hw/ipath/ipath_pe800.c b/drivers/infiniband/hw/ipath/ipath_iba6120.c
index b83f66d8262..a72ab9de386 100644
--- a/drivers/infiniband/hw/ipath/ipath_pe800.c
+++ b/drivers/infiniband/hw/ipath/ipath_iba6120.c
@@ -32,7 +32,7 @@
*/
/*
* This file contains all of the code that is specific to the
- * InfiniPath PE-800 chip.
+ * InfiniPath PCIe chip.
*/
#include <linux/interrupt.h>
@@ -45,9 +45,9 @@
/*
* This file contains all the chip-specific register information and
- * access functions for the QLogic InfiniPath PE800, the PCI-Express chip.
+ * access functions for the QLogic InfiniPath PCI-Express chip.
*
- * This lists the InfiniPath PE800 registers, in the actual chip layout.
+ * This lists the InfiniPath registers, in the actual chip layout.
* This structure should never be directly accessed.
*/
struct _infinipath_do_not_use_kernel_regs {
@@ -213,7 +213,6 @@ static const struct ipath_kregs ipath_pe_kregs = {
.kr_rcvhdraddr = IPATH_KREG_OFFSET(RcvHdrAddr0),
.kr_rcvhdrtailaddr = IPATH_KREG_OFFSET(RcvHdrTailAddr0),
- /* This group is pe-800-specific; and used only in this file */
/* The rcvpktled register controls one of the debug port signals, so
* a packet activity LED can be connected to it. */
.kr_rcvpktledcnt = IPATH_KREG_OFFSET(RcvPktLEDCnt),
@@ -264,8 +263,8 @@ static const struct ipath_cregs ipath_pe_cregs = {
};
/* kr_intstatus, kr_intclear, kr_intmask bits */
-#define INFINIPATH_I_RCVURG_MASK 0x1F
-#define INFINIPATH_I_RCVAVAIL_MASK 0x1F
+#define INFINIPATH_I_RCVURG_MASK ((1U<<5)-1)
+#define INFINIPATH_I_RCVAVAIL_MASK ((1U<<5)-1)
/* kr_hwerrclear, kr_hwerrmask, kr_hwerrstatus, bits */
#define INFINIPATH_HWE_PCIEMEMPARITYERR_MASK 0x000000000000003fULL
@@ -295,6 +294,33 @@ static const struct ipath_cregs ipath_pe_cregs = {
#define IPATH_GPIO_SCL (1ULL << \
(_IPATH_GPIO_SCL_NUM+INFINIPATH_EXTC_GPIOOE_SHIFT))
+/*
+ * Rev2 silicon allows suppressing check for ArmLaunch errors.
+ * this can speed up short packet sends on systems that do
+ * not guaranteee write-order.
+ */
+#define INFINIPATH_XGXS_SUPPRESS_ARMLAUNCH_ERR (1ULL<<63)
+
+/* 6120 specific hardware errors... */
+static const struct ipath_hwerror_msgs ipath_6120_hwerror_msgs[] = {
+ INFINIPATH_HWE_MSG(PCIEPOISONEDTLP, "PCIe Poisoned TLP"),
+ INFINIPATH_HWE_MSG(PCIECPLTIMEOUT, "PCIe completion timeout"),
+ /*
+ * In practice, it's unlikely wthat we'll see PCIe PLL, or bus
+ * parity or memory parity error failures, because most likely we
+ * won't be able to talk to the core of the chip. Nonetheless, we
+ * might see them, if they are in parts of the PCIe core that aren't
+ * essential.
+ */
+ INFINIPATH_HWE_MSG(PCIE1PLLFAILED, "PCIePLL1"),
+ INFINIPATH_HWE_MSG(PCIE0PLLFAILED, "PCIePLL0"),
+ INFINIPATH_HWE_MSG(PCIEBUSPARITYXTLH, "PCIe XTLH core parity"),
+ INFINIPATH_HWE_MSG(PCIEBUSPARITYXADM, "PCIe ADM TX core parity"),
+ INFINIPATH_HWE_MSG(PCIEBUSPARITYRADM, "PCIe ADM RX core parity"),
+ INFINIPATH_HWE_MSG(RXDSYNCMEMPARITYERR, "Rx Dsync"),
+ INFINIPATH_HWE_MSG(SERDESPLLFAILED, "SerDes PLL"),
+};
+
/**
* ipath_pe_handle_hwerrors - display hardware errors.
* @dd: the infinipath device
@@ -344,19 +370,49 @@ static void ipath_pe_handle_hwerrors(struct ipath_devdata *dd, char *msg,
* make sure we get this much out, unless told to be quiet,
* or it's occurred within the last 5 seconds
*/
- if ((hwerrs & ~dd->ipath_lasthwerror) ||
+ if ((hwerrs & ~(dd->ipath_lasthwerror |
+ ((INFINIPATH_HWE_TXEMEMPARITYERR_PIOBUF |
+ INFINIPATH_HWE_TXEMEMPARITYERR_PIOPBC)
+ << INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT))) ||
(ipath_debug & __IPATH_VERBDBG))
dev_info(&dd->pcidev->dev, "Hardware error: hwerr=0x%llx "
"(cleared)\n", (unsigned long long) hwerrs);
dd->ipath_lasthwerror |= hwerrs;
- if (hwerrs & ~infinipath_hwe_bitsextant)
+ if (hwerrs & ~dd->ipath_hwe_bitsextant)
ipath_dev_err(dd, "hwerror interrupt with unknown errors "
"%llx set\n", (unsigned long long)
- (hwerrs & ~infinipath_hwe_bitsextant));
+ (hwerrs & ~dd->ipath_hwe_bitsextant));
ctrl = ipath_read_kreg32(dd, dd->ipath_kregs->kr_control);
if (ctrl & INFINIPATH_C_FREEZEMODE) {
+ /*
+ * parity errors in send memory are recoverable,
+ * just cancel the send (if indicated in * sendbuffererror),
+ * count the occurrence, unfreeze (if no other handled
+ * hardware error bits are set), and continue. They can
+ * occur if a processor speculative read is done to the PIO
+ * buffer while we are sending a packet, for example.
+ */
+ if (hwerrs & ((INFINIPATH_HWE_TXEMEMPARITYERR_PIOBUF |
+ INFINIPATH_HWE_TXEMEMPARITYERR_PIOPBC)
+ << INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT)) {
+ ipath_stats.sps_txeparity++;
+ ipath_dbg("Recovering from TXE parity error (%llu), "
+ "hwerrstatus=%llx\n",
+ (unsigned long long) ipath_stats.sps_txeparity,
+ (unsigned long long) hwerrs);
+ ipath_disarm_senderrbufs(dd);
+ hwerrs &= ~((INFINIPATH_HWE_TXEMEMPARITYERR_PIOBUF |
+ INFINIPATH_HWE_TXEMEMPARITYERR_PIOPBC)
+ << INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT);
+ if (!hwerrs) { /* else leave in freeze mode */
+ ipath_write_kreg(dd,
+ dd->ipath_kregs->kr_control,
+ dd->ipath_control);
+ return;
+ }
+ }
if (hwerrs) {
/*
* if any set that we aren't ignoring only make the
@@ -364,8 +420,9 @@ static void ipath_pe_handle_hwerrors(struct ipath_devdata *dd, char *msg,
* and we get here multiple times
*/
if (dd->ipath_flags & IPATH_INITTED) {
- ipath_dev_err(dd, "Fatal Error (freeze "
- "mode), no longer usable\n");
+ ipath_dev_err(dd, "Fatal Hardware Error (freeze "
+ "mode), no longer usable, SN %.16s\n",
+ dd->ipath_serial);
isfatal = 1;
}
/*
@@ -379,16 +436,15 @@ static void ipath_pe_handle_hwerrors(struct ipath_devdata *dd, char *msg,
} else {
ipath_dbg("Clearing freezemode on ignored hardware "
"error\n");
- ctrl &= ~INFINIPATH_C_FREEZEMODE;
ipath_write_kreg(dd, dd->ipath_kregs->kr_control,
- ctrl);
+ dd->ipath_control);
}
}
*msg = '\0';
if (hwerrs & INFINIPATH_HWE_MEMBISTFAILED) {
- strlcat(msg, "[Memory BIST test failed, PE-800 unusable]",
+ strlcat(msg, "[Memory BIST test failed, InfiniPath hardware unusable]",
msgl);
/* ignore from now on, so disable until driver reloaded */
*dd->ipath_statusp |= IPATH_STATUS_HWERROR;
@@ -396,24 +452,13 @@ static void ipath_pe_handle_hwerrors(struct ipath_devdata *dd, char *msg,
ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrmask,
dd->ipath_hwerrmask);
}
- if (hwerrs & (INFINIPATH_HWE_RXEMEMPARITYERR_MASK
- << INFINIPATH_HWE_RXEMEMPARITYERR_SHIFT)) {
- bits = (u32) ((hwerrs >>
- INFINIPATH_HWE_RXEMEMPARITYERR_SHIFT) &
- INFINIPATH_HWE_RXEMEMPARITYERR_MASK);
- snprintf(bitsmsg, sizeof bitsmsg, "[RXE Parity Errs %x] ",
- bits);
- strlcat(msg, bitsmsg, msgl);
- }
- if (hwerrs & (INFINIPATH_HWE_TXEMEMPARITYERR_MASK
- << INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT)) {
- bits = (u32) ((hwerrs >>
- INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT) &
- INFINIPATH_HWE_TXEMEMPARITYERR_MASK);
- snprintf(bitsmsg, sizeof bitsmsg, "[TXE Parity Errs %x] ",
- bits);
- strlcat(msg, bitsmsg, msgl);
- }
+
+ ipath_format_hwerrors(hwerrs,
+ ipath_6120_hwerror_msgs,
+ sizeof(ipath_6120_hwerror_msgs)/
+ sizeof(ipath_6120_hwerror_msgs[0]),
+ msg, msgl);
+
if (hwerrs & (INFINIPATH_HWE_PCIEMEMPARITYERR_MASK
<< INFINIPATH_HWE_PCIEMEMPARITYERR_SHIFT)) {
bits = (u32) ((hwerrs >>
@@ -423,17 +468,13 @@ static void ipath_pe_handle_hwerrors(struct ipath_devdata *dd, char *msg,
"[PCIe Mem Parity Errs %x] ", bits);
strlcat(msg, bitsmsg, msgl);
}
- if (hwerrs & INFINIPATH_HWE_IBCBUSTOSPCPARITYERR)
- strlcat(msg, "[IB2IPATH Parity]", msgl);
- if (hwerrs & INFINIPATH_HWE_IBCBUSFRSPCPARITYERR)
- strlcat(msg, "[IPATH2IB Parity]", msgl);
#define _IPATH_PLL_FAIL (INFINIPATH_HWE_COREPLL_FBSLIP | \
INFINIPATH_HWE_COREPLL_RFSLIP )
if (hwerrs & _IPATH_PLL_FAIL) {
snprintf(bitsmsg, sizeof bitsmsg,
- "[PLL failed (%llx), PE-800 unusable]",
+ "[PLL failed (%llx), InfiniPath hardware unusable]",
(unsigned long long) hwerrs & _IPATH_PLL_FAIL);
strlcat(msg, bitsmsg, msgl);
/* ignore from now on, so disable until driver reloaded */
@@ -452,34 +493,6 @@ static void ipath_pe_handle_hwerrors(struct ipath_devdata *dd, char *msg,
dd->ipath_hwerrmask);
}
- if (hwerrs & INFINIPATH_HWE_PCIEPOISONEDTLP)
- strlcat(msg, "[PCIe Poisoned TLP]", msgl);
- if (hwerrs & INFINIPATH_HWE_PCIECPLTIMEOUT)
- strlcat(msg, "[PCIe completion timeout]", msgl);
-
- /*
- * In practice, it's unlikely wthat we'll see PCIe PLL, or bus
- * parity or memory parity error failures, because most likely we
- * won't be able to talk to the core of the chip. Nonetheless, we
- * might see them, if they are in parts of the PCIe core that aren't
- * essential.
- */
- if (hwerrs & INFINIPATH_HWE_PCIE1PLLFAILED)
- strlcat(msg, "[PCIePLL1]", msgl);
- if (hwerrs & INFINIPATH_HWE_PCIE0PLLFAILED)
- strlcat(msg, "[PCIePLL0]", msgl);
- if (hwerrs & INFINIPATH_HWE_PCIEBUSPARITYXTLH)
- strlcat(msg, "[PCIe XTLH core parity]", msgl);
- if (hwerrs & INFINIPATH_HWE_PCIEBUSPARITYXADM)
- strlcat(msg, "[PCIe ADM TX core parity]", msgl);
- if (hwerrs & INFINIPATH_HWE_PCIEBUSPARITYRADM)
- strlcat(msg, "[PCIe ADM RX core parity]", msgl);
-
- if (hwerrs & INFINIPATH_HWE_RXDSYNCMEMPARITYERR)
- strlcat(msg, "[Rx Dsync]", msgl);
- if (hwerrs & INFINIPATH_HWE_SERDESPLLFAILED)
- strlcat(msg, "[SerDes PLL]", msgl);
-
ipath_dev_err(dd, "%s hardware error\n", msg);
if (isfatal && !ipath_diag_inuse && dd->ipath_freezemsg) {
/*
@@ -511,22 +524,28 @@ static int ipath_pe_boardname(struct ipath_devdata *dd, char *name,
n = "InfiniPath_Emulation";
break;
case 1:
- n = "InfiniPath_PE-800-Bringup";
+ n = "InfiniPath_QLE7140-Bringup";
break;
case 2:
- n = "InfiniPath_PE-880";
+ n = "InfiniPath_QLE7140";
break;
case 3:
- n = "InfiniPath_PE-850";
+ n = "InfiniPath_QMI7140";
break;
case 4:
- n = "InfiniPath_PE-860";
+ n = "InfiniPath_QEM7140";
+ break;
+ case 5:
+ n = "InfiniPath_QMH7140";
+ break;
+ case 6:
+ n = "InfiniPath_QLE7142";
break;
default:
ipath_dev_err(dd,
"Don't yet know about board with ID %u\n",
boardrev);
- snprintf(name, namelen, "Unknown_InfiniPath_PE-8xx_%u",
+ snprintf(name, namelen, "Unknown_InfiniPath_PCIe_%u",
boardrev);
break;
}
@@ -534,7 +553,7 @@ static int ipath_pe_boardname(struct ipath_devdata *dd, char *name,
snprintf(name, namelen, "%s", n);
if (dd->ipath_majrev != 4 || !dd->ipath_minrev || dd->ipath_minrev>2) {
- ipath_dev_err(dd, "Unsupported PE-800 revision %u.%u!\n",
+ ipath_dev_err(dd, "Unsupported InfiniPath hardware revision %u.%u!\n",
dd->ipath_majrev, dd->ipath_minrev);
ret = 1;
} else
@@ -568,9 +587,12 @@ static void ipath_pe_init_hwerrors(struct ipath_devdata *dd)
if (!dd->ipath_boardrev) // no PLL for Emulator
val &= ~INFINIPATH_HWE_SERDESPLLFAILED;
- /* workaround bug 9460 in internal interface bus parity checking */
- val &= ~INFINIPATH_HWE_PCIEBUSPARITYRADM;
-
+ if (dd->ipath_minrev < 2) {
+ /* workaround bug 9460 in internal interface bus parity
+ * checking. Fixed (HW bug 9490) in Rev2.
+ */
+ val &= ~INFINIPATH_HWE_PCIEBUSPARITYRADM;
+ }
dd->ipath_hwerrmask = val;
}
@@ -580,8 +602,8 @@ static void ipath_pe_init_hwerrors(struct ipath_devdata *dd)
*/
static int ipath_pe_bringup_serdes(struct ipath_devdata *dd)
{
- u64 val, tmp, config1;
- int ret = 0, change = 0;
+ u64 val, tmp, config1, prev_val;
+ int ret = 0;
ipath_dbg("Trying to bringup serdes\n");
@@ -638,6 +660,7 @@ static int ipath_pe_bringup_serdes(struct ipath_devdata *dd)
val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_xgxsconfig);
+ prev_val = val;
if (((val >> INFINIPATH_XGXS_MDIOADDR_SHIFT) &
INFINIPATH_XGXS_MDIOADDR_MASK) != 3) {
val &=
@@ -645,13 +668,30 @@ static int ipath_pe_bringup_serdes(struct ipath_devdata *dd)
INFINIPATH_XGXS_MDIOADDR_SHIFT);
/* MDIO address 3 */
val |= 3ULL << INFINIPATH_XGXS_MDIOADDR_SHIFT;
- change = 1;
}
if (val & INFINIPATH_XGXS_RESET) {
val &= ~INFINIPATH_XGXS_RESET;
- change = 1;
}
- if (change)
+ if (((val >> INFINIPATH_XGXS_RX_POL_SHIFT) &
+ INFINIPATH_XGXS_RX_POL_MASK) != dd->ipath_rx_pol_inv ) {
+ /* need to compensate for Tx inversion in partner */
+ val &= ~(INFINIPATH_XGXS_RX_POL_MASK <<
+ INFINIPATH_XGXS_RX_POL_SHIFT);
+ val |= dd->ipath_rx_pol_inv <<
+ INFINIPATH_XGXS_RX_POL_SHIFT;
+ }
+ if (dd->ipath_minrev >= 2) {
+ /* Rev 2. can tolerate multiple writes to PBC, and
+ * allowing them can provide lower latency on some
+ * CPUs, but this feature is off by default, only
+ * turned on by setting D63 of XGXSconfig reg.
+ * May want to make this conditional more
+ * fine-grained in future. This is not exactly
+ * related to XGXS, but where the bit ended up.
+ */
+ val |= INFINIPATH_XGXS_SUPPRESS_ARMLAUNCH_ERR;
+ }
+ if (val != prev_val)
ipath_write_kreg(dd, dd->ipath_kregs->kr_xgxsconfig, val);
val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_serdesconfig0);
@@ -705,9 +745,25 @@ static void ipath_pe_quiet_serdes(struct ipath_devdata *dd)
ipath_write_kreg(dd, dd->ipath_kregs->kr_serdesconfig0, val);
}
-/* this is not yet needed on the PE800, so just return 0. */
static int ipath_pe_intconfig(struct ipath_devdata *dd)
{
+ u64 val;
+ u32 chiprev;
+
+ /*
+ * If the chip supports added error indication via GPIO pins,
+ * enable interrupts on those bits so the interrupt routine
+ * can count the events. Also set flag so interrupt routine
+ * can know they are expected.
+ */
+ chiprev = dd->ipath_revision >> INFINIPATH_R_CHIPREVMINOR_SHIFT;
+ if ((chiprev & INFINIPATH_R_CHIPREVMINOR_MASK) > 1) {
+ /* Rev2+ reports extra errors via internal GPIO pins */
+ dd->ipath_flags |= IPATH_GPIO_ERRINTRS;
+ val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_gpio_mask);
+ val |= IPATH_GPIO_ERRINTR_MASK;
+ ipath_write_kreg( dd, dd->ipath_kregs->kr_gpio_mask, val);
+ }
return 0;
}
@@ -759,8 +815,8 @@ static void ipath_setup_pe_setextled(struct ipath_devdata *dd, u64 lst,
*
* This is called during driver unload.
* We do the pci_disable_msi here, not in generic code, because it
- * isn't used for the HT-400. If we do end up needing pci_enable_msi
- * at some point in the future for HT-400, we'll move the call back
+ * isn't used for the HT chips. If we do end up needing pci_enable_msi
+ * at some point in the future for HT, we'll move the call back
* into the main init_one code.
*/
static void ipath_setup_pe_cleanup(struct ipath_devdata *dd)
@@ -780,10 +836,10 @@ static void ipath_setup_pe_cleanup(struct ipath_devdata *dd)
* late in 2.6.16).
* All that can be done is to edit the kernel source to remove the quirk
* check until that is fixed.
- * We do not need to call enable_msi() for our HyperTransport chip (HT-400),
- * even those it uses MSI, and we want to avoid the quirk warning, so
- * So we call enable_msi only for the PE-800. If we do end up needing
- * pci_enable_msi at some point in the future for HT-400, we'll move the
+ * We do not need to call enable_msi() for our HyperTransport chip,
+ * even though it uses MSI, and we want to avoid the quirk warning, so
+ * So we call enable_msi only for PCIe. If we do end up needing
+ * pci_enable_msi at some point in the future for HT, we'll move the
* call back into the main init_one code.
* We save the msi lo and hi values, so we can restore them after
* chip reset (the kernel PCI infrastructure doesn't yet handle that
@@ -841,21 +897,23 @@ static int ipath_setup_pe_config(struct ipath_devdata *dd,
return 0;
}
-static void ipath_init_pe_variables(void)
+static void ipath_init_pe_variables(struct ipath_devdata *dd)
{
/*
* bits for selecting i2c direction and values,
* used for I2C serial flash
*/
- ipath_gpio_sda_num = _IPATH_GPIO_SDA_NUM;
- ipath_gpio_scl_num = _IPATH_GPIO_SCL_NUM;
- ipath_gpio_sda = IPATH_GPIO_SDA;
- ipath_gpio_scl = IPATH_GPIO_SCL;
+ dd->ipath_gpio_sda_num = _IPATH_GPIO_SDA_NUM;
+ dd->ipath_gpio_scl_num = _IPATH_GPIO_SCL_NUM;
+ dd->ipath_gpio_sda = IPATH_GPIO_SDA;
+ dd->ipath_gpio_scl = IPATH_GPIO_SCL;
/* variables for sanity checking interrupt and errors */
- infinipath_hwe_bitsextant =
+ dd->ipath_hwe_bitsextant =
(INFINIPATH_HWE_RXEMEMPARITYERR_MASK <<
INFINIPATH_HWE_RXEMEMPARITYERR_SHIFT) |
+ (INFINIPATH_HWE_TXEMEMPARITYERR_MASK <<
+ INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT) |
(INFINIPATH_HWE_PCIEMEMPARITYERR_MASK <<
INFINIPATH_HWE_PCIEMEMPARITYERR_SHIFT) |
INFINIPATH_HWE_PCIE1PLLFAILED |
@@ -871,13 +929,13 @@ static void ipath_init_pe_variables(void)
INFINIPATH_HWE_SERDESPLLFAILED |
INFINIPATH_HWE_IBCBUSTOSPCPARITYERR |
INFINIPATH_HWE_IBCBUSFRSPCPARITYERR;
- infinipath_i_bitsextant =
+ dd->ipath_i_bitsextant =
(INFINIPATH_I_RCVURG_MASK << INFINIPATH_I_RCVURG_SHIFT) |
(INFINIPATH_I_RCVAVAIL_MASK <<
INFINIPATH_I_RCVAVAIL_SHIFT) |
INFINIPATH_I_ERROR | INFINIPATH_I_SPIOSENT |
INFINIPATH_I_SPIOBUFAVAIL | INFINIPATH_I_GPIO;
- infinipath_e_bitsextant =
+ dd->ipath_e_bitsextant =
INFINIPATH_E_RFORMATERR | INFINIPATH_E_RVCRC |
INFINIPATH_E_RICRC | INFINIPATH_E_RMINPKTLEN |
INFINIPATH_E_RMAXPKTLEN | INFINIPATH_E_RLONGPKTLEN |
@@ -895,8 +953,8 @@ static void ipath_init_pe_variables(void)
INFINIPATH_E_INVALIDADDR | INFINIPATH_E_RESET |
INFINIPATH_E_HARDWARE;
- infinipath_i_rcvavail_mask = INFINIPATH_I_RCVAVAIL_MASK;
- infinipath_i_rcvurg_mask = INFINIPATH_I_RCVURG_MASK;
+ dd->ipath_i_rcvavail_mask = INFINIPATH_I_RCVAVAIL_MASK;
+ dd->ipath_i_rcvurg_mask = INFINIPATH_I_RCVURG_MASK;
}
/* setup the MSI stuff again after a reset. I'd like to just call
@@ -971,8 +1029,7 @@ static int ipath_setup_pe_reset(struct ipath_devdata *dd)
int ret;
/* Use ERROR so it shows up in logs, etc. */
- ipath_dev_err(dd, "Resetting PE-800 unit %u\n",
- dd->ipath_unit);
+ ipath_dev_err(dd, "Resetting InfiniPath unit %u\n", dd->ipath_unit);
/* keep chip from being accessed in a few places */
dd->ipath_flags &= ~(IPATH_INITTED|IPATH_PRESENT);
val = dd->ipath_control | INFINIPATH_C_RESET;
@@ -1071,6 +1128,45 @@ static void ipath_pe_put_tid(struct ipath_devdata *dd, u64 __iomem *tidptr,
mmiowb();
spin_unlock_irqrestore(&dd->ipath_tid_lock, flags);
}
+/**
+ * ipath_pe_put_tid_2 - write a TID in chip, Revision 2 or higher
+ * @dd: the infinipath device
+ * @tidptr: pointer to the expected TID (in chip) to udpate
+ * @tidtype: 0 for eager, 1 for expected
+ * @pa: physical address of in memory buffer; ipath_tidinvalid if freeing
+ *
+ * This exists as a separate routine to allow for selection of the
+ * appropriate "flavor". The static calls in cleanup just use the
+ * revision-agnostic form, as they are not performance critical.
+ */
+static void ipath_pe_put_tid_2(struct ipath_devdata *dd, u64 __iomem *tidptr,
+ u32 type, unsigned long pa)
+{
+ u32 __iomem *tidp32 = (u32 __iomem *)tidptr;
+
+ if (pa != dd->ipath_tidinvalid) {
+ if (pa & ((1U << 11) - 1)) {
+ dev_info(&dd->pcidev->dev, "BUG: physaddr %lx "
+ "not 2KB aligned!\n", pa);
+ return;
+ }
+ pa >>= 11;
+ /* paranoia check */
+ if (pa & (7<<29))
+ ipath_dev_err(dd,
+ "BUG: Physical page address 0x%lx "
+ "has bits set in 31-29\n", pa);
+
+ if (type == 0)
+ pa |= dd->ipath_tidtemplate;
+ else /* for now, always full 4KB page */
+ pa |= 2 << 29;
+ }
+ if (dd->ipath_kregbase)
+ writel(pa, tidp32);
+ mmiowb();
+}
+
/**
* ipath_pe_clear_tid - clear all TID entries for a port, expected and eager
@@ -1078,7 +1174,7 @@ static void ipath_pe_put_tid(struct ipath_devdata *dd, u64 __iomem *tidptr,
* @port: the port
*
* clear all TID entries for a port, expected and eager.
- * Used from ipath_close(). On PE800, TIDs are only 32 bits,
+ * Used from ipath_close(). On this chip, TIDs are only 32 bits,
* not 64, but they are still on 64 bit boundaries, so tidbase
* is declared as u64 * for the pointer math, even though we write 32 bits
*/
@@ -1148,9 +1244,9 @@ static int ipath_pe_early_init(struct ipath_devdata *dd)
dd->ipath_flags |= IPATH_4BYTE_TID;
/*
- * For openib, we need to be able to handle an IB header of 96 bytes
- * or 24 dwords. HT-400 has arbitrary sized receive buffers, so we
- * made them the same size as the PIO buffers. The PE-800 does not
+ * For openfabrics, we need to be able to handle an IB header of
+ * 24 dwords. HT chip has arbitrary sized receive buffers, so we
+ * made them the same size as the PIO buffers. This chip does not
* handle arbitrary size buffers, so we need the header large enough
* to handle largest IB header, but still have room for a 2KB MTU
* standard IB packet.
@@ -1158,11 +1254,10 @@ static int ipath_pe_early_init(struct ipath_devdata *dd)
dd->ipath_rcvhdrentsize = 24;
dd->ipath_rcvhdrsize = IPATH_DFLT_RCVHDRSIZE;
- /* For HT-400, we allocate a somewhat overly large eager buffer,
- * such that we can guarantee that we can receive the largest packet
- * that we can send out. To truly support a 4KB MTU, we need to
- * bump this to a larger value. We'll do this when I get around to
- * testing 4KB sends on the PE-800, which I have not yet done.
+ /*
+ * To truly support a 4KB MTU (for usermode), we need to
+ * bump this to a larger value. For now, we use them for
+ * the kernel only.
*/
dd->ipath_rcvegrbufsize = 2048;
/*
@@ -1175,9 +1270,9 @@ static int ipath_pe_early_init(struct ipath_devdata *dd)
dd->ipath_init_ibmaxlen = dd->ipath_ibmaxlen;
/*
- * For PE-800, we can request a receive interrupt for 1 or
+ * We can request a receive interrupt for 1 or
* more packets from current offset. For now, we set this
- * up for a single packet, to match the HT-400 behavior.
+ * up for a single packet.
*/
dd->ipath_rhdrhead_intr_off = 1ULL<<32;
@@ -1193,7 +1288,7 @@ int __attribute__((weak)) ipath_unordered_wc(void)
/**
* ipath_init_pe_get_base_info - set chip-specific flags for user code
- * @dd: the infinipath device
+ * @pd: the infinipath port
* @kbase: ipath_base_info pointer
*
* We set the PCIE flag because the lower bandwidth on PCIe vs
@@ -1202,6 +1297,7 @@ int __attribute__((weak)) ipath_unordered_wc(void)
static int ipath_pe_get_base_info(struct ipath_portdata *pd, void *kbase)
{
struct ipath_base_info *kinfo = kbase;
+ struct ipath_devdata *dd;
if (ipath_unordered_wc()) {
kinfo->spi_runtime_flags |= IPATH_RUNTIME_FORCE_WC_ORDER;
@@ -1210,19 +1306,31 @@ static int ipath_pe_get_base_info(struct ipath_portdata *pd, void *kbase)
else
ipath_cdbg(PROC, "Not Intel processor, WC ordered\n");
- kinfo->spi_runtime_flags |= IPATH_RUNTIME_PCIE;
+ if (pd == NULL)
+ goto done;
+ dd = pd->port_dd;
+
+ if (dd != NULL && dd->ipath_minrev >= 2) {
+ ipath_cdbg(PROC, "IBA6120 Rev2, allow multiple PBC write\n");
+ kinfo->spi_runtime_flags |= IPATH_RUNTIME_PBC_REWRITE;
+ ipath_cdbg(PROC, "IBA6120 Rev2, allow loose DMA alignment\n");
+ kinfo->spi_runtime_flags |= IPATH_RUNTIME_LOOSE_DMA_ALIGN;
+ }
+
+done:
+ kinfo->spi_runtime_flags |= IPATH_RUNTIME_PCIE;
return 0;
}
/**
- * ipath_init_pe800_funcs - set up the chip-specific function pointers
+ * ipath_init_iba6120_funcs - set up the chip-specific function pointers
* @dd: the infinipath device
*
* This is global, and is called directly at init to set up the
* chip-specific function pointers for later use.
*/
-void ipath_init_pe800_funcs(struct ipath_devdata *dd)
+void ipath_init_iba6120_funcs(struct ipath_devdata *dd)
{
dd->ipath_f_intrsetup = ipath_pe_intconfig;
dd->ipath_f_bus = ipath_setup_pe_config;
@@ -1234,7 +1342,10 @@ void ipath_init_pe800_funcs(struct ipath_devdata *dd)
dd->ipath_f_quiet_serdes = ipath_pe_quiet_serdes;
dd->ipath_f_bringup_serdes = ipath_pe_bringup_serdes;
dd->ipath_f_clear_tids = ipath_pe_clear_tids;
- dd->ipath_f_put_tid = ipath_pe_put_tid;
+ if (dd->ipath_minrev >= 2)
+ dd->ipath_f_put_tid = ipath_pe_put_tid_2;
+ else
+ dd->ipath_f_put_tid = ipath_pe_put_tid;
dd->ipath_f_cleanup = ipath_setup_pe_cleanup;
dd->ipath_f_setextled = ipath_setup_pe_setextled;
dd->ipath_f_get_base_info = ipath_pe_get_base_info;
@@ -1249,6 +1360,6 @@ void ipath_init_pe800_funcs(struct ipath_devdata *dd)
dd->ipath_kregs = &ipath_pe_kregs;
dd->ipath_cregs = &ipath_pe_cregs;
- ipath_init_pe_variables();
+ ipath_init_pe_variables(dd);
}
diff --git a/drivers/infiniband/hw/ipath/ipath_init_chip.c b/drivers/infiniband/hw/ipath/ipath_init_chip.c
index 414cdd1d80a..d819cca524c 100644
--- a/drivers/infiniband/hw/ipath/ipath_init_chip.c
+++ b/drivers/infiniband/hw/ipath/ipath_init_chip.c
@@ -53,8 +53,8 @@ module_param_named(cfgports, ipath_cfgports, ushort, S_IRUGO);
MODULE_PARM_DESC(cfgports, "Set max number of ports to use");
/*
- * Number of buffers reserved for driver (layered drivers and SMA
- * send). Reserved at end of buffer list. Initialized based on
+ * Number of buffers reserved for driver (verbs and layered drivers.)
+ * Reserved at end of buffer list. Initialized based on
* number of PIO buffers if not set via module interface.
* The problem with this is that it's global, but we'll use different
* numbers for different chip types. So the default value is not
@@ -80,7 +80,7 @@ MODULE_PARM_DESC(kpiobufs, "Set number of PIO buffers for driver");
*
* Allocate the eager TID buffers and program them into infinipath.
* We use the network layer alloc_skb() allocator to allocate the
- * memory, and either use the buffers as is for things like SMA
+ * memory, and either use the buffers as is for things like verbs
* packets, or pass the buffers up to the ipath layered driver and
* thence the network layer, replacing them as we do so (see
* ipath_rcv_layer()).
@@ -88,13 +88,13 @@ MODULE_PARM_DESC(kpiobufs, "Set number of PIO buffers for driver");
static int create_port0_egr(struct ipath_devdata *dd)
{
unsigned e, egrcnt;
- struct sk_buff **skbs;
+ struct ipath_skbinfo *skbinfo;
int ret;
egrcnt = dd->ipath_rcvegrcnt;
- skbs = vmalloc(sizeof(*dd->ipath_port0_skbs) * egrcnt);
- if (skbs == NULL) {
+ skbinfo = vmalloc(sizeof(*dd->ipath_port0_skbinfo) * egrcnt);
+ if (skbinfo == NULL) {
ipath_dev_err(dd, "allocation error for eager TID "
"skb array\n");
ret = -ENOMEM;
@@ -109,13 +109,13 @@ static int create_port0_egr(struct ipath_devdata *dd)
* 4 bytes so that the data buffer stays word aligned.
* See ipath_kreceive() for more details.
*/
- skbs[e] = ipath_alloc_skb(dd, GFP_KERNEL);
- if (!skbs[e]) {
+ skbinfo[e].skb = ipath_alloc_skb(dd, GFP_KERNEL);
+ if (!skbinfo[e].skb) {
ipath_dev_err(dd, "SKB allocation error for "
"eager TID %u\n", e);
while (e != 0)
- dev_kfree_skb(skbs[--e]);
- vfree(skbs);
+ dev_kfree_skb(skbinfo[--e].skb);
+ vfree(skbinfo);
ret = -ENOMEM;
goto bail;
}
@@ -124,14 +124,17 @@ static int create_port0_egr(struct ipath_devdata *dd)
* After loop above, so we can test non-NULL to see if ready
* to use at receive, etc.
*/
- dd->ipath_port0_skbs = skbs;
+ dd->ipath_port0_skbinfo = skbinfo;
for (e = 0; e < egrcnt; e++) {
- unsigned long phys =
- virt_to_phys(dd->ipath_port0_skbs[e]->data);
+ dd->ipath_port0_skbinfo[e].phys =
+ ipath_map_single(dd->pcidev,
+ dd->ipath_port0_skbinfo[e].skb->data,
+ dd->ipath_ibmaxlen, PCI_DMA_FROMDEVICE);
dd->ipath_f_put_tid(dd, e + (u64 __iomem *)
((char __iomem *) dd->ipath_kregbase +
- dd->ipath_rcvegrbase), 0, phys);
+ dd->ipath_rcvegrbase), 0,
+ dd->ipath_port0_skbinfo[e].phys);
}
ret = 0;
@@ -240,7 +243,11 @@ static int init_chip_first(struct ipath_devdata *dd,
"only supports %u\n", ipath_cfgports,
dd->ipath_portcnt);
}
- dd->ipath_pd = kzalloc(sizeof(*dd->ipath_pd) * dd->ipath_cfgports,
+ /*
+ * Allocate full portcnt array, rather than just cfgports, because
+ * cleanup iterates across all possible ports.
+ */
+ dd->ipath_pd = kzalloc(sizeof(*dd->ipath_pd) * dd->ipath_portcnt,
GFP_KERNEL);
if (!dd->ipath_pd) {
@@ -428,16 +435,33 @@ done:
*/
static void init_shadow_tids(struct ipath_devdata *dd)
{
- dd->ipath_pageshadow = (struct page **)
- vmalloc(dd->ipath_cfgports * dd->ipath_rcvtidcnt *
+ struct page **pages;
+ dma_addr_t *addrs;
+
+ pages = vmalloc(dd->ipath_cfgports * dd->ipath_rcvtidcnt *
sizeof(struct page *));
- if (!dd->ipath_pageshadow)
+ if (!pages) {
ipath_dev_err(dd, "failed to allocate shadow page * "
"array, no expected sends!\n");
- else
- memset(dd->ipath_pageshadow, 0,
- dd->ipath_cfgports * dd->ipath_rcvtidcnt *
- sizeof(struct page *));
+ dd->ipath_pageshadow = NULL;
+ return;
+ }
+
+ addrs = vmalloc(dd->ipath_cfgports * dd->ipath_rcvtidcnt *
+ sizeof(dma_addr_t));
+ if (!addrs) {
+ ipath_dev_err(dd, "failed to allocate shadow dma handle "
+ "array, no expected sends!\n");
+ vfree(dd->ipath_pageshadow);
+ dd->ipath_pageshadow = NULL;
+ return;
+ }
+
+ memset(pages, 0, dd->ipath_cfgports * dd->ipath_rcvtidcnt *
+ sizeof(struct page *));
+
+ dd->ipath_pageshadow = pages;
+ dd->ipath_physshadow = addrs;
}
static void enable_chip(struct ipath_devdata *dd,
@@ -446,9 +470,9 @@ static void enable_chip(struct ipath_devdata *dd,
u32 val;
int i;
- if (!reinit) {
- init_waitqueue_head(&ipath_sma_state_wait);
- }
+ if (!reinit)
+ init_waitqueue_head(&ipath_state_wait);
+
ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl,
dd->ipath_rcvctrl);
@@ -687,7 +711,7 @@ int ipath_init_chip(struct ipath_devdata *dd, int reinit)
dd->ipath_pioavregs = ALIGN(val, sizeof(u64) * BITS_PER_BYTE / 2)
/ (sizeof(u64) * BITS_PER_BYTE / 2);
if (ipath_kpiobufs == 0) {
- /* not set by user, or set explictly to default */
+ /* not set by user (this is default) */
if ((dd->ipath_piobcnt2k + dd->ipath_piobcnt4k) > 128)
kpiobufs = 32;
else
@@ -946,6 +970,7 @@ static int ipath_set_kpiobufs(const char *str, struct kernel_param *kp)
dd->ipath_piobcnt2k + dd->ipath_piobcnt4k - val;
}
+ ipath_kpiobufs = val;
ret = 0;
bail:
spin_unlock_irqrestore(&ipath_devs_lock, flags);
diff --git a/drivers/infiniband/hw/ipath/ipath_intr.c b/drivers/infiniband/hw/ipath/ipath_intr.c
index 280e732660a..6bee53ce5f3 100644
--- a/drivers/infiniband/hw/ipath/ipath_intr.c
+++ b/drivers/infiniband/hw/ipath/ipath_intr.c
@@ -34,9 +34,53 @@
#include <linux/pci.h>
#include "ipath_kernel.h"
-#include "ipath_layer.h"
+#include "ipath_verbs.h"
#include "ipath_common.h"
+/*
+ * Called when we might have an error that is specific to a particular
+ * PIO buffer, and may need to cancel that buffer, so it can be re-used.
+ */
+void ipath_disarm_senderrbufs(struct ipath_devdata *dd)
+{
+ u32 piobcnt;
+ unsigned long sbuf[4];
+ /*
+ * it's possible that sendbuffererror could have bits set; might
+ * have already done this as a result of hardware error handling
+ */
+ piobcnt = dd->ipath_piobcnt2k + dd->ipath_piobcnt4k;
+ /* read these before writing errorclear */
+ sbuf[0] = ipath_read_kreg64(
+ dd, dd->ipath_kregs->kr_sendbuffererror);
+ sbuf[1] = ipath_read_kreg64(
+ dd, dd->ipath_kregs->kr_sendbuffererror + 1);
+ if (piobcnt > 128) {
+ sbuf[2] = ipath_read_kreg64(
+ dd, dd->ipath_kregs->kr_sendbuffererror + 2);
+ sbuf[3] = ipath_read_kreg64(
+ dd, dd->ipath_kregs->kr_sendbuffererror + 3);
+ }
+
+ if (sbuf[0] || sbuf[1] || (piobcnt > 128 && (sbuf[2] || sbuf[3]))) {
+ int i;
+ if (ipath_debug & (__IPATH_PKTDBG|__IPATH_DBG)) {
+ __IPATH_DBG_WHICH(__IPATH_PKTDBG|__IPATH_DBG,
+ "SendbufErrs %lx %lx", sbuf[0],
+ sbuf[1]);
+ if (ipath_debug & __IPATH_PKTDBG && piobcnt > 128)
+ printk(" %lx %lx ", sbuf[2], sbuf[3]);
+ printk("\n");
+ }
+
+ for (i = 0; i < piobcnt; i++)
+ if (test_bit(i, sbuf))
+ ipath_disarm_piobufs(dd, i, 1);
+ dd->ipath_lastcancel = jiffies+3; /* no armlaunch for a bit */
+ }
+}
+
+
/* These are all rcv-related errors which we want to count for stats */
#define E_SUM_PKTERRS \
(INFINIPATH_E_RHDRLEN | INFINIPATH_E_RBADTID | \
@@ -68,53 +112,9 @@
static u64 handle_e_sum_errs(struct ipath_devdata *dd, ipath_err_t errs)
{
- unsigned long sbuf[4];
u64 ignore_this_time = 0;
- u32 piobcnt;
-
- /* if possible that sendbuffererror could be valid */
- piobcnt = dd->ipath_piobcnt2k + dd->ipath_piobcnt4k;
- /* read these before writing errorclear */
- sbuf[0] = ipath_read_kreg64(
- dd, dd->ipath_kregs->kr_sendbuffererror);
- sbuf[1] = ipath_read_kreg64(
- dd, dd->ipath_kregs->kr_sendbuffererror + 1);
- if (piobcnt > 128) {
- sbuf[2] = ipath_read_kreg64(
- dd, dd->ipath_kregs->kr_sendbuffererror + 2);
- sbuf[3] = ipath_read_kreg64(
- dd, dd->ipath_kregs->kr_sendbuffererror + 3);
- }
-
- if (sbuf[0] || sbuf[1] || (piobcnt > 128 && (sbuf[2] || sbuf[3]))) {
- int i;
-
- ipath_cdbg(PKT, "SendbufErrs %lx %lx ", sbuf[0], sbuf[1]);
- if (ipath_debug & __IPATH_PKTDBG && piobcnt > 128)
- printk("%lx %lx ", sbuf[2], sbuf[3]);
- for (i = 0; i < piobcnt; i++) {
- if (test_bit(i, sbuf)) {
- u32 __iomem *piobuf;
- if (i < dd->ipath_piobcnt2k)
- piobuf = (u32 __iomem *)
- (dd->ipath_pio2kbase +
- i * dd->ipath_palign);
- else
- piobuf = (u32 __iomem *)
- (dd->ipath_pio4kbase +
- (i - dd->ipath_piobcnt2k) *
- dd->ipath_4kalign);
-
- ipath_cdbg(PKT,
- "PIObuf[%u] @%p pbc is %x; ",
- i, piobuf, readl(piobuf));
- ipath_disarm_piobufs(dd, i, 1);
- }
- }
- if (ipath_debug & __IPATH_PKTDBG)
- printk("\n");
- }
+ ipath_disarm_senderrbufs(dd);
if ((errs & E_SUM_LINK_PKTERRS) &&
!(dd->ipath_flags & IPATH_LINKACTIVE)) {
/*
@@ -132,6 +132,82 @@ static u64 handle_e_sum_errs(struct ipath_devdata *dd, ipath_err_t errs)
return ignore_this_time;
}
+/* generic hw error messages... */
+#define INFINIPATH_HWE_TXEMEMPARITYERR_MSG(a) \
+ { \
+ .mask = ( INFINIPATH_HWE_TXEMEMPARITYERR_##a << \
+ INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT ), \
+ .msg = "TXE " #a " Memory Parity" \
+ }
+#define INFINIPATH_HWE_RXEMEMPARITYERR_MSG(a) \
+ { \
+ .mask = ( INFINIPATH_HWE_RXEMEMPARITYERR_##a << \
+ INFINIPATH_HWE_RXEMEMPARITYERR_SHIFT ), \
+ .msg = "RXE " #a " Memory Parity" \
+ }
+
+static const struct ipath_hwerror_msgs ipath_generic_hwerror_msgs[] = {
+ INFINIPATH_HWE_MSG(IBCBUSFRSPCPARITYERR, "IPATH2IB Parity"),
+ INFINIPATH_HWE_MSG(IBCBUSTOSPCPARITYERR, "IB2IPATH Parity"),
+
+ INFINIPATH_HWE_TXEMEMPARITYERR_MSG(PIOBUF),
+ INFINIPATH_HWE_TXEMEMPARITYERR_MSG(PIOPBC),
+ INFINIPATH_HWE_TXEMEMPARITYERR_MSG(PIOLAUNCHFIFO),
+
+ INFINIPATH_HWE_RXEMEMPARITYERR_MSG(RCVBUF),
+ INFINIPATH_HWE_RXEMEMPARITYERR_MSG(LOOKUPQ),
+ INFINIPATH_HWE_RXEMEMPARITYERR_MSG(EAGERTID),
+ INFINIPATH_HWE_RXEMEMPARITYERR_MSG(EXPTID),
+ INFINIPATH_HWE_RXEMEMPARITYERR_MSG(FLAGBUF),
+ INFINIPATH_HWE_RXEMEMPARITYERR_MSG(DATAINFO),
+ INFINIPATH_HWE_RXEMEMPARITYERR_MSG(HDRINFO),
+};
+
+/**
+ * ipath_format_hwmsg - format a single hwerror message
+ * @msg message buffer
+ * @msgl length of message buffer
+ * @hwmsg message to add to message buffer
+ */
+static void ipath_format_hwmsg(char *msg, size_t msgl, const char *hwmsg)
+{
+ strlcat(msg, "[", msgl);
+ strlcat(msg, hwmsg, msgl);
+ strlcat(msg, "]", msgl);
+}
+
+/**
+ * ipath_format_hwerrors - format hardware error messages for display
+ * @hwerrs hardware errors bit vector
+ * @hwerrmsgs hardware error descriptions
+ * @nhwerrmsgs number of hwerrmsgs
+ * @msg message buffer
+ * @msgl message buffer length
+ */
+void ipath_format_hwerrors(u64 hwerrs,
+ const struct ipath_hwerror_msgs *hwerrmsgs,
+ size_t nhwerrmsgs,
+ char *msg, size_t msgl)
+{
+ int i;
+ const int glen =
+ sizeof(ipath_generic_hwerror_msgs) /
+ sizeof(ipath_generic_hwerror_msgs[0]);
+
+ for (i=0; i<glen; i++) {
+ if (hwerrs & ipath_generic_hwerror_msgs[i].mask) {
+ ipath_format_hwmsg(msg, msgl,
+ ipath_generic_hwerror_msgs[i].msg);
+ }
+ }
+
+ for (i=0; i<nhwerrmsgs; i++) {
+ if (hwerrs & hwerrmsgs[i].mask) {
+ ipath_format_hwmsg(msg, msgl, hwerrmsgs[i].msg);
+ }
+ }
+}
+
/* return the strings for the most common link states */
static char *ib_linkstate(u32 linkstate)
{
@@ -201,7 +277,7 @@ static void handle_e_ibstatuschanged(struct ipath_devdata *dd,
ib_linkstate(lstate));
}
else
- ipath_cdbg(SMA, "Unit %u link state %s, last "
+ ipath_cdbg(VERBOSE, "Unit %u link state %s, last "
"was %s\n", dd->ipath_unit,
ib_linkstate(lstate),
ib_linkstate((unsigned)
@@ -213,7 +289,7 @@ static void handle_e_ibstatuschanged(struct ipath_devdata *dd,
if (lstate == IPATH_IBSTATE_INIT ||
lstate == IPATH_IBSTATE_ARM ||
lstate == IPATH_IBSTATE_ACTIVE)
- ipath_cdbg(SMA, "Unit %u link state down"
+ ipath_cdbg(VERBOSE, "Unit %u link state down"
" (state 0x%x), from %s\n",
dd->ipath_unit,
(u32)val & IPATH_IBSTATE_MASK,
@@ -269,7 +345,7 @@ static void handle_e_ibstatuschanged(struct ipath_devdata *dd,
INFINIPATH_IBCS_LINKSTATE_MASK)
== INFINIPATH_IBCS_L_STATE_ACTIVE)
/* if from up to down be more vocal */
- ipath_cdbg(SMA,
+ ipath_cdbg(VERBOSE,
"Unit %u link now down (%s)\n",
dd->ipath_unit,
ipath_ibcstatus_str[ltstate]);
@@ -289,8 +365,6 @@ static void handle_e_ibstatuschanged(struct ipath_devdata *dd,
*dd->ipath_statusp |=
IPATH_STATUS_IB_READY | IPATH_STATUS_IB_CONF;
dd->ipath_f_setextled(dd, lstate, ltstate);
-
- __ipath_layer_intr(dd, IPATH_LAYER_INT_IF_UP);
} else if ((val & IPATH_IBSTATE_MASK) == IPATH_IBSTATE_INIT) {
/*
* set INIT and DOWN. Down is checked by most of the other
@@ -406,10 +480,10 @@ static int handle_errors(struct ipath_devdata *dd, ipath_err_t errs)
dd->ipath_f_handle_hwerrors(dd, msg, sizeof msg);
}
- if (!noprint && (errs & ~infinipath_e_bitsextant))
+ if (!noprint && (errs & ~dd->ipath_e_bitsextant))
ipath_dev_err(dd, "error interrupt with unknown errors "
"%llx set\n", (unsigned long long)
- (errs & ~infinipath_e_bitsextant));
+ (errs & ~dd->ipath_e_bitsextant));
if (errs & E_SUM_ERRS)
ignore_this_time = handle_e_sum_errs(dd, errs);
@@ -480,6 +554,14 @@ static int handle_errors(struct ipath_devdata *dd, ipath_err_t errs)
~(INFINIPATH_E_HARDWARE |
INFINIPATH_E_IBSTATUSCHANGED);
}
+
+ /* likely due to cancel, so suppress */
+ if ((errs & (INFINIPATH_E_SPKTLEN | INFINIPATH_E_SPIOARMLAUNCH)) &&
+ dd->ipath_lastcancel > jiffies) {
+ ipath_dbg("Suppressed armlaunch/spktlen after error send cancel\n");
+ errs &= ~(INFINIPATH_E_SPIOARMLAUNCH | INFINIPATH_E_SPKTLEN);
+ }
+
if (!errs)
return 0;
@@ -531,7 +613,7 @@ static int handle_errors(struct ipath_devdata *dd, ipath_err_t errs)
* don't report same point multiple times,
* except kernel
*/
- tl = (u32) * pd->port_rcvhdrtail_kvaddr;
+ tl = *(u64 *) pd->port_rcvhdrtail_kvaddr;
if (tl == dd->ipath_lastrcvhdrqtails[i])
continue;
hd = ipath_read_ureg32(dd, ur_rcvhdrhead,
@@ -598,11 +680,11 @@ static int handle_errors(struct ipath_devdata *dd, ipath_err_t errs)
if (!noprint && *msg)
ipath_dev_err(dd, "%s error\n", msg);
- if (dd->ipath_sma_state_wanted & dd->ipath_flags) {
- ipath_cdbg(VERBOSE, "sma wanted state %x, iflags now %x, "
- "waking\n", dd->ipath_sma_state_wanted,
+ if (dd->ipath_state_wanted & dd->ipath_flags) {
+ ipath_cdbg(VERBOSE, "driver wanted state %x, iflags now %x, "
+ "waking\n", dd->ipath_state_wanted,
dd->ipath_flags);
- wake_up_interruptible(&ipath_sma_state_wait);
+ wake_up_interruptible(&ipath_state_wait);
}
return chkerrpkts;
@@ -708,11 +790,7 @@ static void handle_layer_pioavail(struct ipath_devdata *dd)
{
int ret;
- ret = __ipath_layer_intr(dd, IPATH_LAYER_INT_SEND_CONTINUE);
- if (ret > 0)
- goto set;
-
- ret = __ipath_verbs_piobufavail(dd);
+ ret = ipath_ib_piobufavail(dd->verbs_dev);
if (ret > 0)
goto set;
@@ -735,9 +813,9 @@ static void handle_urcv(struct ipath_devdata *dd, u32 istat)
int rcvdint = 0;
portr = ((istat >> INFINIPATH_I_RCVAVAIL_SHIFT) &
- infinipath_i_rcvavail_mask)
+ dd->ipath_i_rcvavail_mask)
| ((istat >> INFINIPATH_I_RCVURG_SHIFT) &
- infinipath_i_rcvurg_mask);
+ dd->ipath_i_rcvurg_mask);
for (i = 1; i < dd->ipath_cfgports; i++) {
struct ipath_portdata *pd = dd->ipath_pd[i];
if (portr & (1 << i) && pd && pd->port_cnt &&
@@ -814,7 +892,7 @@ irqreturn_t ipath_intr(int irq, void *data, struct pt_regs *regs)
if (oldhead != curtail) {
if (dd->ipath_flags & IPATH_GPIO_INTR) {
ipath_write_kreg(dd, dd->ipath_kregs->kr_gpio_clear,
- (u64) (1 << 2));
+ (u64) (1 << IPATH_GPIO_PORT0_BIT));
istat = port0rbits | INFINIPATH_I_GPIO;
}
else
@@ -844,10 +922,10 @@ irqreturn_t ipath_intr(int irq, void *data, struct pt_regs *regs)
if (unexpected)
unexpected = 0;
- if (unlikely(istat & ~infinipath_i_bitsextant))
+ if (unlikely(istat & ~dd->ipath_i_bitsextant))
ipath_dev_err(dd,
"interrupt with unknown interrupts %x set\n",
- istat & (u32) ~ infinipath_i_bitsextant);
+ istat & (u32) ~ dd->ipath_i_bitsextant);
else
ipath_cdbg(VERBOSE, "intr stat=0x%x\n", istat);
@@ -873,26 +951,80 @@ irqreturn_t ipath_intr(int irq, void *data, struct pt_regs *regs)
if (istat & INFINIPATH_I_GPIO) {
/*
- * Packets are available in the port 0 rcv queue.
- * Eventually this needs to be generalized to check
- * IPATH_GPIO_INTR, and the specific GPIO bit, if
- * GPIO interrupts are used for anything else.
+ * GPIO interrupts fall in two broad classes:
+ * GPIO_2 indicates (on some HT4xx boards) that a packet
+ * has arrived for Port 0. Checking for this
+ * is controlled by flag IPATH_GPIO_INTR.
+ * GPIO_3..5 on IBA6120 Rev2 chips indicate errors
+ * that we need to count. Checking for this
+ * is controlled by flag IPATH_GPIO_ERRINTRS.
*/
- if (unlikely(!(dd->ipath_flags & IPATH_GPIO_INTR))) {
- u32 gpiostatus;
- gpiostatus = ipath_read_kreg32(
- dd, dd->ipath_kregs->kr_gpio_status);
- ipath_dbg("Unexpected GPIO interrupt bits %x\n",
- gpiostatus);
- ipath_write_kreg(dd, dd->ipath_kregs->kr_gpio_clear,
- gpiostatus);
+ u32 gpiostatus;
+ u32 to_clear = 0;
+
+ gpiostatus = ipath_read_kreg32(
+ dd, dd->ipath_kregs->kr_gpio_status);
+ /* First the error-counter case.
+ */
+ if ((gpiostatus & IPATH_GPIO_ERRINTR_MASK) &&
+ (dd->ipath_flags & IPATH_GPIO_ERRINTRS)) {
+ /* want to clear the bits we see asserted. */
+ to_clear |= (gpiostatus & IPATH_GPIO_ERRINTR_MASK);
+
+ /*
+ * Count appropriately, clear bits out of our copy,
+ * as they have been "handled".
+ */
+ if (gpiostatus & (1 << IPATH_GPIO_RXUVL_BIT)) {
+ ipath_dbg("FlowCtl on UnsupVL\n");
+ dd->ipath_rxfc_unsupvl_errs++;
+ }
+ if (gpiostatus & (1 << IPATH_GPIO_OVRUN_BIT)) {
+ ipath_dbg("Overrun Threshold exceeded\n");
+ dd->ipath_overrun_thresh_errs++;
+ }
+ if (gpiostatus & (1 << IPATH_GPIO_LLI_BIT)) {
+ ipath_dbg("Local Link Integrity error\n");
+ dd->ipath_lli_errs++;
+ }
+ gpiostatus &= ~IPATH_GPIO_ERRINTR_MASK;
}
- else {
- /* Clear GPIO status bit 2 */
- ipath_write_kreg(dd, dd->ipath_kregs->kr_gpio_clear,
- (u64) (1 << 2));
+ /* Now the Port0 Receive case */
+ if ((gpiostatus & (1 << IPATH_GPIO_PORT0_BIT)) &&
+ (dd->ipath_flags & IPATH_GPIO_INTR)) {
+ /*
+ * GPIO status bit 2 is set, and we expected it.
+ * clear it and indicate in p0bits.
+ * This probably only happens if a Port0 pkt
+ * arrives at _just_ the wrong time, and we
+ * handle that by seting chk0rcv;
+ */
+ to_clear |= (1 << IPATH_GPIO_PORT0_BIT);
+ gpiostatus &= ~(1 << IPATH_GPIO_PORT0_BIT);
chk0rcv = 1;
}
+ if (unlikely(gpiostatus)) {
+ /*
+ * Some unexpected bits remain. If they could have
+ * caused the interrupt, complain and clear.
+ * MEA: this is almost certainly non-ideal.
+ * we should look into auto-disable of unexpected
+ * GPIO interrupts, possibly on a "three strikes"
+ * basis.
+ */
+ u32 mask;
+ mask = ipath_read_kreg32(
+ dd, dd->ipath_kregs->kr_gpio_mask);
+ if (mask & gpiostatus) {
+ ipath_dbg("Unexpected GPIO IRQ bits %x\n",
+ gpiostatus & mask);
+ to_clear |= (gpiostatus & mask);
+ }
+ }
+ if (to_clear) {
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_gpio_clear,
+ (u64) to_clear);
+ }
}
chk0rcv |= istat & port0rbits;
@@ -917,9 +1049,9 @@ irqreturn_t ipath_intr(int irq, void *data, struct pt_regs *regs)
istat &= ~port0rbits;
}
- if (istat & ((infinipath_i_rcvavail_mask <<
+ if (istat & ((dd->ipath_i_rcvavail_mask <<
INFINIPATH_I_RCVAVAIL_SHIFT)
- | (infinipath_i_rcvurg_mask <<
+ | (dd->ipath_i_rcvurg_mask <<
INFINIPATH_I_RCVURG_SHIFT)))
handle_urcv(dd, istat);
diff --git a/drivers/infiniband/hw/ipath/ipath_kernel.h b/drivers/infiniband/hw/ipath/ipath_kernel.h
index e9f374fb641..d7540b71b45 100644
--- a/drivers/infiniband/hw/ipath/ipath_kernel.h
+++ b/drivers/infiniband/hw/ipath/ipath_kernel.h
@@ -39,6 +39,8 @@
*/
#include <linux/interrupt.h>
+#include <linux/pci.h>
+#include <linux/dma-mapping.h>
#include <asm/io.h>
#include "ipath_common.h"
@@ -62,7 +64,7 @@ struct ipath_portdata {
/* rcvhdrq base, needs mmap before useful */
void *port_rcvhdrq;
/* kernel virtual address where hdrqtail is updated */
- volatile __le64 *port_rcvhdrtail_kvaddr;
+ void *port_rcvhdrtail_kvaddr;
/*
* temp buffer for expected send setup, allocated at open, instead
* of each setup call
@@ -79,8 +81,8 @@ struct ipath_portdata {
dma_addr_t port_rcvhdrq_phys;
dma_addr_t port_rcvhdrqtailaddr_phys;
/*
- * number of opens on this instance (0 or 1; ignoring forks, dup,
- * etc. for now)
+ * number of opens (including slave subports) on this instance
+ * (ignoring forks, dup, etc. for now)
*/
int port_cnt;
/*
@@ -89,6 +91,10 @@ struct ipath_portdata {
*/
/* instead of calculating it */
unsigned port_port;
+ /* non-zero if port is being shared. */
+ u16 port_subport_cnt;
+ /* non-zero if port is being shared. */
+ u16 port_subport_id;
/* chip offset of PIO buffers for this port */
u32 port_piobufs;
/* how many alloc_pages() chunks in port_rcvegrbuf_pages */
@@ -121,6 +127,16 @@ struct ipath_portdata {
u16 port_pkeys[4];
/* so file ops can get at unit */
struct ipath_devdata *port_dd;
+ /* A page of memory for rcvhdrhead, rcvegrhead, rcvegrtail * N */
+ void *subport_uregbase;
+ /* An array of pages for the eager receive buffers * N */
+ void *subport_rcvegrbuf;
+ /* An array of pages for the eager header queue entries * N */
+ void *subport_rcvhdr_base;
+ /* The version of the library which opened this port */
+ u32 userversion;
+ /* Bitmask of active slaves */
+ u32 active_slaves;
};
struct sk_buff;
@@ -132,10 +148,9 @@ struct _ipath_layer {
void *l_arg;
};
-/* Verbs layer interface */
-struct _verbs_layer {
- void *l_arg;
- struct timer_list l_timer;
+struct ipath_skbinfo {
+ struct sk_buff *skb;
+ dma_addr_t phys;
};
struct ipath_devdata {
@@ -160,7 +175,7 @@ struct ipath_devdata {
/* ipath_cfgports pointers */
struct ipath_portdata **ipath_pd;
/* sk_buffs used by port 0 eager receive queue */
- struct sk_buff **ipath_port0_skbs;
+ struct ipath_skbinfo *ipath_port0_skbinfo;
/* kvirt address of 1st 2k pio buffer */
void __iomem *ipath_pio2kbase;
/* kvirt address of 1st 4k pio buffer */
@@ -198,7 +213,8 @@ struct ipath_devdata {
void (*ipath_f_setextled)(struct ipath_devdata *, u64, u64);
/* fill out chip-specific fields */
int (*ipath_f_get_base_info)(struct ipath_portdata *, void *);
- struct _verbs_layer verbs_layer;
+ struct ipath_ibdev *verbs_dev;
+ struct timer_list verbs_timer;
/* total dwords sent (summed from counter) */
u64 ipath_sword;
/* total dwords rcvd (summed from counter) */
@@ -241,7 +257,7 @@ struct ipath_devdata {
u64 ipath_tidtemplate;
/* value to write to free TIDs */
u64 ipath_tidinvalid;
- /* PE-800 rcv interrupt setup */
+ /* IBA6120 rcv interrupt setup */
u64 ipath_rhdrhead_intr_off;
/* size of memory at ipath_kregbase */
@@ -250,8 +266,8 @@ struct ipath_devdata {
u32 ipath_pioavregs;
/* IPATH_POLL, etc. */
u32 ipath_flags;
- /* ipath_flags sma is waiting for */
- u32 ipath_sma_state_wanted;
+ /* ipath_flags driver is waiting for */
+ u32 ipath_state_wanted;
/* last buffer for user use, first buf for kernel use is this
* index. */
u32 ipath_lastport_piobuf;
@@ -311,10 +327,6 @@ struct ipath_devdata {
u32 ipath_pcibar0;
/* so we can rewrite it after a chip reset */
u32 ipath_pcibar1;
- /* sequential tries for SMA send and no bufs */
- u32 ipath_nosma_bufs;
- /* duration (seconds) ipath_nosma_bufs set */
- u32 ipath_nosma_secs;
/* HT/PCI Vendor ID (here for NodeInfo) */
u16 ipath_vendorid;
@@ -324,12 +336,16 @@ struct ipath_devdata {
u8 ipath_ht_slave_off;
/* for write combining settings */
unsigned long ipath_wc_cookie;
+ unsigned long ipath_wc_base;
+ unsigned long ipath_wc_len;
/* ref count for each pkey */
atomic_t ipath_pkeyrefs[4];
/* shadow copy of all exptids physaddr; used only by funcsim */
u64 *ipath_tidsimshadow;
/* shadow copy of struct page *'s for exp tid pages */
struct page **ipath_pageshadow;
+ /* shadow copy of dma handles for exp tid pages */
+ dma_addr_t *ipath_physshadow;
/* lock to workaround chip bug 9437 */
spinlock_t ipath_tid_lock;
@@ -411,6 +427,9 @@ struct ipath_devdata {
unsigned long ipath_rcvctrl;
/* shadow kr_sendctrl */
unsigned long ipath_sendctrl;
+ /* ports waiting for PIOavail intr */
+ unsigned long ipath_portpiowait;
+ unsigned long ipath_lastcancel; /* to not count armlaunch after cancel */
/* value we put in kr_rcvhdrcnt */
u32 ipath_rcvhdrcnt;
@@ -474,8 +493,6 @@ struct ipath_devdata {
u32 ipath_htwidth;
/* HT speed (200,400,800,1000) from HT config */
u32 ipath_htspeed;
- /* ports waiting for PIOavail intr */
- unsigned long ipath_portpiowait;
/*
* number of sequential ibcstatus change for polling active/quiet
* (i.e., link not coming up).
@@ -512,34 +529,64 @@ struct ipath_devdata {
u8 ipath_pci_cacheline;
/* LID mask control */
u8 ipath_lmc;
+ /* Rx Polarity inversion (compensate for ~tx on partner) */
+ u8 ipath_rx_pol_inv;
/* local link integrity counter */
u32 ipath_lli_counter;
/* local link integrity errors */
u32 ipath_lli_errors;
+ /*
+ * Above counts only cases where _successive_ LocalLinkIntegrity
+ * errors were seen in the receive headers of kern-packets.
+ * Below are the three (monotonically increasing) counters
+ * maintained via GPIO interrupts on iba6120-rev2.
+ */
+ u32 ipath_rxfc_unsupvl_errs;
+ u32 ipath_overrun_thresh_errs;
+ u32 ipath_lli_errs;
+
+ /*
+ * Not all devices managed by a driver instance are the same
+ * type, so these fields must be per-device.
+ */
+ u64 ipath_i_bitsextant;
+ ipath_err_t ipath_e_bitsextant;
+ ipath_err_t ipath_hwe_bitsextant;
+
+ /*
+ * Below should be computable from number of ports,
+ * since they are never modified.
+ */
+ u32 ipath_i_rcvavail_mask;
+ u32 ipath_i_rcvurg_mask;
+
+ /*
+ * Register bits for selecting i2c direction and values, used for
+ * I2C serial flash.
+ */
+ u16 ipath_gpio_sda_num;
+ u16 ipath_gpio_scl_num;
+ u64 ipath_gpio_sda;
+ u64 ipath_gpio_scl;
};
+/* Private data for file operations */
+struct ipath_filedata {
+ struct ipath_portdata *pd;
+ unsigned subport;
+ unsigned tidcursor;
+};
extern struct list_head ipath_dev_list;
extern spinlock_t ipath_devs_lock;
extern struct ipath_devdata *ipath_lookup(int unit);
-extern u16 ipath_layer_rcv_opcode;
-extern int __ipath_layer_intr(struct ipath_devdata *, u32);
-extern int ipath_layer_intr(struct ipath_devdata *, u32);
-extern int __ipath_layer_rcv(struct ipath_devdata *, void *,
- struct sk_buff *);
-extern int __ipath_layer_rcv_lid(struct ipath_devdata *, void *);
-extern int __ipath_verbs_piobufavail(struct ipath_devdata *);
-extern int __ipath_verbs_rcv(struct ipath_devdata *, void *, void *, u32);
-
-void ipath_layer_add(struct ipath_devdata *);
-void ipath_layer_remove(struct ipath_devdata *);
-
int ipath_init_chip(struct ipath_devdata *, int);
int ipath_enable_wc(struct ipath_devdata *dd);
void ipath_disable_wc(struct ipath_devdata *dd);
int ipath_count_units(int *npresentp, int *nupp, u32 *maxportsp);
void ipath_shutdown_device(struct ipath_devdata *);
+void ipath_disarm_senderrbufs(struct ipath_devdata *);
struct file_operations;
int ipath_cdev_init(int minor, char *name, struct file_operations *fops,
@@ -549,9 +596,8 @@ void ipath_cdev_cleanup(struct cdev **cdevp,
int ipath_diag_add(struct ipath_devdata *);
void ipath_diag_remove(struct ipath_devdata *);
-void ipath_diag_bringup_link(struct ipath_devdata *);
-extern wait_queue_head_t ipath_sma_state_wait;
+extern wait_queue_head_t ipath_state_wait;
int ipath_user_add(struct ipath_devdata *dd);
void ipath_user_remove(struct ipath_devdata *dd);
@@ -582,15 +628,21 @@ void ipath_free_pddata(struct ipath_devdata *, struct ipath_portdata *);
int ipath_parse_ushort(const char *str, unsigned short *valp);
-int ipath_wait_linkstate(struct ipath_devdata *, u32, int);
-void ipath_set_ib_lstate(struct ipath_devdata *, int);
void ipath_kreceive(struct ipath_devdata *);
int ipath_setrcvhdrsize(struct ipath_devdata *, unsigned);
int ipath_reset_device(int);
void ipath_get_faststats(unsigned long);
+int ipath_set_linkstate(struct ipath_devdata *, u8);
+int ipath_set_mtu(struct ipath_devdata *, u16);
+int ipath_set_lid(struct ipath_devdata *, u32, u8);
+int ipath_set_rx_pol_inv(struct ipath_devdata *dd, u8 new_pol_inv);
/* for use in system calls, where we want to know device type, etc. */
-#define port_fp(fp) ((struct ipath_portdata *) (fp)->private_data)
+#define port_fp(fp) ((struct ipath_filedata *)(fp)->private_data)->pd
+#define subport_fp(fp) \
+ ((struct ipath_filedata *)(fp)->private_data)->subport
+#define tidcursor_fp(fp) \
+ ((struct ipath_filedata *)(fp)->private_data)->tidcursor
/*
* values for ipath_flags
@@ -630,6 +682,15 @@ void ipath_get_faststats(unsigned long);
/* can miss port0 rx interrupts */
#define IPATH_POLL_RX_INTR 0x40000
#define IPATH_DISABLED 0x80000 /* administratively disabled */
+ /* Use GPIO interrupts for new counters */
+#define IPATH_GPIO_ERRINTRS 0x100000
+
+/* Bits in GPIO for the added interrupts */
+#define IPATH_GPIO_PORT0_BIT 2
+#define IPATH_GPIO_RXUVL_BIT 3
+#define IPATH_GPIO_OVRUN_BIT 4
+#define IPATH_GPIO_LLI_BIT 5
+#define IPATH_GPIO_ERRINTR_MASK 0x38
/* portdata flag bit offsets */
/* waiting for a packet to arrive */
@@ -642,10 +703,8 @@ void ipath_free_data(struct ipath_portdata *dd);
int ipath_waitfor_mdio_cmdready(struct ipath_devdata *);
int ipath_waitfor_complete(struct ipath_devdata *, ipath_kreg, u64, u64 *);
u32 __iomem *ipath_getpiobuf(struct ipath_devdata *, u32 *);
-/* init PE-800-specific func */
-void ipath_init_pe800_funcs(struct ipath_devdata *);
-/* init HT-400-specific func */
-void ipath_init_ht400_funcs(struct ipath_devdata *);
+void ipath_init_iba6120_funcs(struct ipath_devdata *);
+void ipath_init_iba6110_funcs(struct ipath_devdata *);
void ipath_get_eeprom_info(struct ipath_devdata *);
u64 ipath_snap_cntr(struct ipath_devdata *, ipath_creg);
@@ -801,7 +860,7 @@ static inline u32 ipath_read_creg32(const struct ipath_devdata *dd,
struct device_driver;
-extern const char ipath_core_version[];
+extern const char ib_ipath_version[];
int ipath_driver_create_group(struct device_driver *);
void ipath_driver_remove_group(struct device_driver *);
@@ -810,12 +869,22 @@ int ipath_device_create_group(struct device *, struct ipath_devdata *);
void ipath_device_remove_group(struct device *, struct ipath_devdata *);
int ipath_expose_reset(struct device *);
+int ipath_diagpkt_add(void);
+void ipath_diagpkt_remove(void);
+
int ipath_init_ipathfs(void);
void ipath_exit_ipathfs(void);
int ipathfs_add_device(struct ipath_devdata *);
int ipathfs_remove_device(struct ipath_devdata *);
/*
+ * dma_addr wrappers - all 0's invalid for hw
+ */
+dma_addr_t ipath_map_page(struct pci_dev *, struct page *, unsigned long,
+ size_t, int);
+dma_addr_t ipath_map_single(struct pci_dev *, void *, size_t, int);
+
+/*
* Flush write combining store buffers (if present) and perform a write
* barrier.
*/
@@ -831,10 +900,10 @@ const char *ipath_get_unit_name(int unit);
extern struct mutex ipath_mutex;
-#define IPATH_DRV_NAME "ipath_core"
+#define IPATH_DRV_NAME "ib_ipath"
#define IPATH_MAJOR 233
#define IPATH_USER_MINOR_BASE 0
-#define IPATH_SMA_MINOR 128
+#define IPATH_DIAGPKT_MINOR 127
#define IPATH_DIAG_MINOR_BASE 129
#define IPATH_NMINORS 255
@@ -872,4 +941,20 @@ extern struct mutex ipath_mutex;
#endif /* _IPATH_DEBUGGING */
+/*
+ * this is used for formatting hw error messages...
+ */
+struct ipath_hwerror_msgs {
+ u64 mask;
+ const char *msg;
+};
+
+#define INFINIPATH_HWE_MSG(a, b) { .mask = INFINIPATH_HWE_##a, .msg = b }
+
+/* in ipath_intr.c... */
+void ipath_format_hwerrors(u64 hwerrs,
+ const struct ipath_hwerror_msgs *hwerrmsgs,
+ size_t nhwerrmsgs,
+ char *msg, size_t lmsg);
+
#endif /* _IPATH_KERNEL_H */
diff --git a/drivers/infiniband/hw/ipath/ipath_keys.c b/drivers/infiniband/hw/ipath/ipath_keys.c
index a5ca279370a..9a6cbd05adc 100644
--- a/drivers/infiniband/hw/ipath/ipath_keys.c
+++ b/drivers/infiniband/hw/ipath/ipath_keys.c
@@ -34,6 +34,7 @@
#include <asm/io.h>
#include "ipath_verbs.h"
+#include "ipath_kernel.h"
/**
* ipath_alloc_lkey - allocate an lkey
@@ -60,7 +61,7 @@ int ipath_alloc_lkey(struct ipath_lkey_table *rkt, struct ipath_mregion *mr)
r = (r + 1) & (rkt->max - 1);
if (r == n) {
spin_unlock_irqrestore(&rkt->lock, flags);
- _VERBS_INFO("LKEY table full\n");
+ ipath_dbg(KERN_INFO "LKEY table full\n");
ret = 0;
goto bail;
}
@@ -117,9 +118,10 @@ void ipath_free_lkey(struct ipath_lkey_table *rkt, u32 lkey)
* Check the IB SGE for validity and initialize our internal version
* of it.
*/
-int ipath_lkey_ok(struct ipath_lkey_table *rkt, struct ipath_sge *isge,
+int ipath_lkey_ok(struct ipath_qp *qp, struct ipath_sge *isge,
struct ib_sge *sge, int acc)
{
+ struct ipath_lkey_table *rkt = &to_idev(qp->ibqp.device)->lk_table;
struct ipath_mregion *mr;
unsigned n, m;
size_t off;
@@ -139,7 +141,8 @@ int ipath_lkey_ok(struct ipath_lkey_table *rkt, struct ipath_sge *isge,
goto bail;
}
mr = rkt->table[(sge->lkey >> (32 - ib_ipath_lkey_table_size))];
- if (unlikely(mr == NULL || mr->lkey != sge->lkey)) {
+ if (unlikely(mr == NULL || mr->lkey != sge->lkey ||
+ qp->ibqp.pd != mr->pd)) {
ret = 0;
goto bail;
}
@@ -187,9 +190,10 @@ bail:
*
* Return 1 if successful, otherwise 0.
*/
-int ipath_rkey_ok(struct ipath_ibdev *dev, struct ipath_sge_state *ss,
+int ipath_rkey_ok(struct ipath_qp *qp, struct ipath_sge_state *ss,
u32 len, u64 vaddr, u32 rkey, int acc)
{
+ struct ipath_ibdev *dev = to_idev(qp->ibqp.device);
struct ipath_lkey_table *rkt = &dev->lk_table;
struct ipath_sge *sge = &ss->sge;
struct ipath_mregion *mr;
@@ -213,7 +217,8 @@ int ipath_rkey_ok(struct ipath_ibdev *dev, struct ipath_sge_state *ss,
}
mr = rkt->table[(rkey >> (32 - ib_ipath_lkey_table_size))];
- if (unlikely(mr == NULL || mr->lkey != rkey)) {
+ if (unlikely(mr == NULL || mr->lkey != rkey ||
+ qp->ibqp.pd != mr->pd)) {
ret = 0;
goto bail;
}
diff --git a/drivers/infiniband/hw/ipath/ipath_layer.c b/drivers/infiniband/hw/ipath/ipath_layer.c
index b28c6f81c73..e46aa4ed2a7 100644
--- a/drivers/infiniband/hw/ipath/ipath_layer.c
+++ b/drivers/infiniband/hw/ipath/ipath_layer.c
@@ -42,26 +42,20 @@
#include "ipath_kernel.h"
#include "ipath_layer.h"
+#include "ipath_verbs.h"
#include "ipath_common.h"
/* Acquire before ipath_devs_lock. */
static DEFINE_MUTEX(ipath_layer_mutex);
-static int ipath_verbs_registered;
-
u16 ipath_layer_rcv_opcode;
static int (*layer_intr)(void *, u32);
static int (*layer_rcv)(void *, void *, struct sk_buff *);
static int (*layer_rcv_lid)(void *, void *);
-static int (*verbs_piobufavail)(void *);
-static void (*verbs_rcv)(void *, void *, void *, u32);
static void *(*layer_add_one)(int, struct ipath_devdata *);
static void (*layer_remove_one)(void *);
-static void *(*verbs_add_one)(int, struct ipath_devdata *);
-static void (*verbs_remove_one)(void *);
-static void (*verbs_timer_cb)(void *);
int __ipath_layer_intr(struct ipath_devdata *dd, u32 arg)
{
@@ -107,302 +101,16 @@ int __ipath_layer_rcv_lid(struct ipath_devdata *dd, void *hdr)
return ret;
}
-int __ipath_verbs_piobufavail(struct ipath_devdata *dd)
-{
- int ret = -ENODEV;
-
- if (dd->verbs_layer.l_arg && verbs_piobufavail)
- ret = verbs_piobufavail(dd->verbs_layer.l_arg);
-
- return ret;
-}
-
-int __ipath_verbs_rcv(struct ipath_devdata *dd, void *rc, void *ebuf,
- u32 tlen)
-{
- int ret = -ENODEV;
-
- if (dd->verbs_layer.l_arg && verbs_rcv) {
- verbs_rcv(dd->verbs_layer.l_arg, rc, ebuf, tlen);
- ret = 0;
- }
-
- return ret;
-}
-
-int ipath_layer_set_linkstate(struct ipath_devdata *dd, u8 newstate)
+void ipath_layer_lid_changed(struct ipath_devdata *dd)
{
- u32 lstate;
- int ret;
-
- switch (newstate) {
- case IPATH_IB_LINKDOWN:
- ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKINITCMD_POLL <<
- INFINIPATH_IBCC_LINKINITCMD_SHIFT);
- /* don't wait */
- ret = 0;
- goto bail;
-
- case IPATH_IB_LINKDOWN_SLEEP:
- ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKINITCMD_SLEEP <<
- INFINIPATH_IBCC_LINKINITCMD_SHIFT);
- /* don't wait */
- ret = 0;
- goto bail;
-
- case IPATH_IB_LINKDOWN_DISABLE:
- ipath_set_ib_lstate(dd,
- INFINIPATH_IBCC_LINKINITCMD_DISABLE <<
- INFINIPATH_IBCC_LINKINITCMD_SHIFT);
- /* don't wait */
- ret = 0;
- goto bail;
-
- case IPATH_IB_LINKINIT:
- if (dd->ipath_flags & IPATH_LINKINIT) {
- ret = 0;
- goto bail;
- }
- ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKCMD_INIT <<
- INFINIPATH_IBCC_LINKCMD_SHIFT);
- lstate = IPATH_LINKINIT;
- break;
-
- case IPATH_IB_LINKARM:
- if (dd->ipath_flags & IPATH_LINKARMED) {
- ret = 0;
- goto bail;
- }
- if (!(dd->ipath_flags &
- (IPATH_LINKINIT | IPATH_LINKACTIVE))) {
- ret = -EINVAL;
- goto bail;
- }
- ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKCMD_ARMED <<
- INFINIPATH_IBCC_LINKCMD_SHIFT);
- /*
- * Since the port can transition to ACTIVE by receiving
- * a non VL 15 packet, wait for either state.
- */
- lstate = IPATH_LINKARMED | IPATH_LINKACTIVE;
- break;
-
- case IPATH_IB_LINKACTIVE:
- if (dd->ipath_flags & IPATH_LINKACTIVE) {
- ret = 0;
- goto bail;
- }
- if (!(dd->ipath_flags & IPATH_LINKARMED)) {
- ret = -EINVAL;
- goto bail;
- }
- ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKCMD_ACTIVE <<
- INFINIPATH_IBCC_LINKCMD_SHIFT);
- lstate = IPATH_LINKACTIVE;
- break;
-
- default:
- ipath_dbg("Invalid linkstate 0x%x requested\n", newstate);
- ret = -EINVAL;
- goto bail;
- }
- ret = ipath_wait_linkstate(dd, lstate, 2000);
-
-bail:
- return ret;
-}
-
-EXPORT_SYMBOL_GPL(ipath_layer_set_linkstate);
-
-/**
- * ipath_layer_set_mtu - set the MTU
- * @dd: the infinipath device
- * @arg: the new MTU
- *
- * we can handle "any" incoming size, the issue here is whether we
- * need to restrict our outgoing size. For now, we don't do any
- * sanity checking on this, and we don't deal with what happens to
- * programs that are already running when the size changes.
- * NOTE: changing the MTU will usually cause the IBC to go back to
- * link initialize (IPATH_IBSTATE_INIT) state...
- */
-int ipath_layer_set_mtu(struct ipath_devdata *dd, u16 arg)
-{
- u32 piosize;
- int changed = 0;
- int ret;
-
- /*
- * mtu is IB data payload max. It's the largest power of 2 less
- * than piosize (or even larger, since it only really controls the
- * largest we can receive; we can send the max of the mtu and
- * piosize). We check that it's one of the valid IB sizes.
- */
- if (arg != 256 && arg != 512 && arg != 1024 && arg != 2048 &&
- arg != 4096) {
- ipath_dbg("Trying to set invalid mtu %u, failing\n", arg);
- ret = -EINVAL;
- goto bail;
- }
- if (dd->ipath_ibmtu == arg) {
- ret = 0; /* same as current */
- goto bail;
- }
-
- piosize = dd->ipath_ibmaxlen;
- dd->ipath_ibmtu = arg;
-
- if (arg >= (piosize - IPATH_PIO_MAXIBHDR)) {
- /* Only if it's not the initial value (or reset to it) */
- if (piosize != dd->ipath_init_ibmaxlen) {
- dd->ipath_ibmaxlen = piosize;
- changed = 1;
- }
- } else if ((arg + IPATH_PIO_MAXIBHDR) != dd->ipath_ibmaxlen) {
- piosize = arg + IPATH_PIO_MAXIBHDR;
- ipath_cdbg(VERBOSE, "ibmaxlen was 0x%x, setting to 0x%x "
- "(mtu 0x%x)\n", dd->ipath_ibmaxlen, piosize,
- arg);
- dd->ipath_ibmaxlen = piosize;
- changed = 1;
- }
-
- if (changed) {
- /*
- * set the IBC maxpktlength to the size of our pio
- * buffers in words
- */
- u64 ibc = dd->ipath_ibcctrl;
- ibc &= ~(INFINIPATH_IBCC_MAXPKTLEN_MASK <<
- INFINIPATH_IBCC_MAXPKTLEN_SHIFT);
-
- piosize = piosize - 2 * sizeof(u32); /* ignore pbc */
- dd->ipath_ibmaxlen = piosize;
- piosize /= sizeof(u32); /* in words */
- /*
- * for ICRC, which we only send in diag test pkt mode, and
- * we don't need to worry about that for mtu
- */
- piosize += 1;
-
- ibc |= piosize << INFINIPATH_IBCC_MAXPKTLEN_SHIFT;
- dd->ipath_ibcctrl = ibc;
- ipath_write_kreg(dd, dd->ipath_kregs->kr_ibcctrl,
- dd->ipath_ibcctrl);
- dd->ipath_f_tidtemplate(dd);
- }
-
- ret = 0;
-
-bail:
- return ret;
-}
-
-EXPORT_SYMBOL_GPL(ipath_layer_set_mtu);
-
-int ipath_set_lid(struct ipath_devdata *dd, u32 arg, u8 lmc)
-{
- dd->ipath_lid = arg;
- dd->ipath_lmc = lmc;
-
mutex_lock(&ipath_layer_mutex);
if (dd->ipath_layer.l_arg && layer_intr)
layer_intr(dd->ipath_layer.l_arg, IPATH_LAYER_INT_LID);
mutex_unlock(&ipath_layer_mutex);
-
- return 0;
-}
-
-EXPORT_SYMBOL_GPL(ipath_set_lid);
-
-int ipath_layer_set_guid(struct ipath_devdata *dd, __be64 guid)
-{
- /* XXX - need to inform anyone who cares this just happened. */
- dd->ipath_guid = guid;
- return 0;
-}
-
-EXPORT_SYMBOL_GPL(ipath_layer_set_guid);
-
-__be64 ipath_layer_get_guid(struct ipath_devdata *dd)
-{
- return dd->ipath_guid;
-}
-
-EXPORT_SYMBOL_GPL(ipath_layer_get_guid);
-
-u32 ipath_layer_get_nguid(struct ipath_devdata *dd)
-{
- return dd->ipath_nguid;
-}
-
-EXPORT_SYMBOL_GPL(ipath_layer_get_nguid);
-
-u32 ipath_layer_get_majrev(struct ipath_devdata *dd)
-{
- return dd->ipath_majrev;
}
-EXPORT_SYMBOL_GPL(ipath_layer_get_majrev);
-
-u32 ipath_layer_get_minrev(struct ipath_devdata *dd)
-{
- return dd->ipath_minrev;
-}
-
-EXPORT_SYMBOL_GPL(ipath_layer_get_minrev);
-
-u32 ipath_layer_get_pcirev(struct ipath_devdata *dd)
-{
- return dd->ipath_pcirev;
-}
-
-EXPORT_SYMBOL_GPL(ipath_layer_get_pcirev);
-
-u32 ipath_layer_get_flags(struct ipath_devdata *dd)
-{
- return dd->ipath_flags;
-}
-
-EXPORT_SYMBOL_GPL(ipath_layer_get_flags);
-
-struct device *ipath_layer_get_device(struct ipath_devdata *dd)
-{
- return &dd->pcidev->dev;
-}
-
-EXPORT_SYMBOL_GPL(ipath_layer_get_device);
-
-u16 ipath_layer_get_deviceid(struct ipath_devdata *dd)
-{
- return dd->ipath_deviceid;
-}
-
-EXPORT_SYMBOL_GPL(ipath_layer_get_deviceid);
-
-u32 ipath_layer_get_vendorid(struct ipath_devdata *dd)
-{
- return dd->ipath_vendorid;
-}
-
-EXPORT_SYMBOL_GPL(ipath_layer_get_vendorid);
-
-u64 ipath_layer_get_lastibcstat(struct ipath_devdata *dd)
-{
- return dd->ipath_lastibcstat;
-}
-
-EXPORT_SYMBOL_GPL(ipath_layer_get_lastibcstat);
-
-u32 ipath_layer_get_ibmtu(struct ipath_devdata *dd)
-{
- return dd->ipath_ibmtu;
-}
-
-EXPORT_SYMBOL_GPL(ipath_layer_get_ibmtu);
-
void ipath_layer_add(struct ipath_devdata *dd)
{
mutex_lock(&ipath_layer_mutex);
@@ -411,10 +119,6 @@ void ipath_layer_add(struct ipath_devdata *dd)
dd->ipath_layer.l_arg =
layer_add_one(dd->ipath_unit, dd);
- if (verbs_add_one)
- dd->verbs_layer.l_arg =
- verbs_add_one(dd->ipath_unit, dd);
-
mutex_unlock(&ipath_layer_mutex);
}
@@ -427,11 +131,6 @@ void ipath_layer_remove(struct ipath_devdata *dd)
dd->ipath_layer.l_arg = NULL;
}
- if (dd->verbs_layer.l_arg && verbs_remove_one) {
- verbs_remove_one(dd->verbs_layer.l_arg);
- dd->verbs_layer.l_arg = NULL;
- }
-
mutex_unlock(&ipath_layer_mutex);
}
@@ -463,9 +162,6 @@ int ipath_layer_register(void *(*l_add)(int, struct ipath_devdata *),
if (dd->ipath_layer.l_arg)
continue;
- if (!(*dd->ipath_statusp & IPATH_STATUS_SMA))
- *dd->ipath_statusp |= IPATH_STATUS_OIB_SMA;
-
spin_unlock_irqrestore(&ipath_devs_lock, flags);
dd->ipath_layer.l_arg = l_add(dd->ipath_unit, dd);
spin_lock_irqsave(&ipath_devs_lock, flags);
@@ -509,107 +205,6 @@ void ipath_layer_unregister(void)
EXPORT_SYMBOL_GPL(ipath_layer_unregister);
-static void __ipath_verbs_timer(unsigned long arg)
-{
- struct ipath_devdata *dd = (struct ipath_devdata *) arg;
-
- /*
- * If port 0 receive packet interrupts are not available, or
- * can be missed, poll the receive queue
- */
- if (dd->ipath_flags & IPATH_POLL_RX_INTR)
- ipath_kreceive(dd);
-
- /* Handle verbs layer timeouts. */
- if (dd->verbs_layer.l_arg && verbs_timer_cb)
- verbs_timer_cb(dd->verbs_layer.l_arg);
-
- mod_timer(&dd->verbs_layer.l_timer, jiffies + 1);
-}
-
-/**
- * ipath_verbs_register - verbs layer registration
- * @l_piobufavail: callback for when PIO buffers become available
- * @l_rcv: callback for receiving a packet
- * @l_timer_cb: timer callback
- * @ipath_devdata: device data structure is put here
- */
-int ipath_verbs_register(void *(*l_add)(int, struct ipath_devdata *),
- void (*l_remove)(void *arg),
- int (*l_piobufavail) (void *arg),
- void (*l_rcv) (void *arg, void *rhdr,
- void *data, u32 tlen),
- void (*l_timer_cb) (void *arg))
-{
- struct ipath_devdata *dd, *tmp;
- unsigned long flags;
-
- mutex_lock(&ipath_layer_mutex);
-
- verbs_add_one = l_add;
- verbs_remove_one = l_remove;
- verbs_piobufavail = l_piobufavail;
- verbs_rcv = l_rcv;
- verbs_timer_cb = l_timer_cb;
-
- spin_lock_irqsave(&ipath_devs_lock, flags);
-
- list_for_each_entry_safe(dd, tmp, &ipath_dev_list, ipath_list) {
- if (!(dd->ipath_flags & IPATH_INITTED))
- continue;
-
- if (dd->verbs_layer.l_arg)
- continue;
-
- spin_unlock_irqrestore(&ipath_devs_lock, flags);
- dd->verbs_layer.l_arg = l_add(dd->ipath_unit, dd);
- spin_lock_irqsave(&ipath_devs_lock, flags);
- }
-
- spin_unlock_irqrestore(&ipath_devs_lock, flags);
- mutex_unlock(&ipath_layer_mutex);
-
- ipath_verbs_registered = 1;
-
- return 0;
-}
-
-EXPORT_SYMBOL_GPL(ipath_verbs_register);
-
-void ipath_verbs_unregister(void)
-{
- struct ipath_devdata *dd, *tmp;
- unsigned long flags;
-
- mutex_lock(&ipath_layer_mutex);
- spin_lock_irqsave(&ipath_devs_lock, flags);
-
- list_for_each_entry_safe(dd, tmp, &ipath_dev_list, ipath_list) {
- *dd->ipath_statusp &= ~IPATH_STATUS_OIB_SMA;
-
- if (dd->verbs_layer.l_arg && verbs_remove_one) {
- spin_unlock_irqrestore(&ipath_devs_lock, flags);
- verbs_remove_one(dd->verbs_layer.l_arg);
- spin_lock_irqsave(&ipath_devs_lock, flags);
- dd->verbs_layer.l_arg = NULL;
- }
- }
-
- spin_unlock_irqrestore(&ipath_devs_lock, flags);
-
- verbs_add_one = NULL;
- verbs_remove_one = NULL;
- verbs_piobufavail = NULL;
- verbs_rcv = NULL;
- verbs_timer_cb = NULL;
-
- ipath_verbs_registered = 0;
-
- mutex_unlock(&ipath_layer_mutex);
-}
-
-EXPORT_SYMBOL_GPL(ipath_verbs_unregister);
-
int ipath_layer_open(struct ipath_devdata *dd, u32 * pktmax)
{
int ret;
@@ -698,390 +293,6 @@ u16 ipath_layer_get_bcast(struct ipath_devdata *dd)
EXPORT_SYMBOL_GPL(ipath_layer_get_bcast);
-u32 ipath_layer_get_cr_errpkey(struct ipath_devdata *dd)
-{
- return ipath_read_creg32(dd, dd->ipath_cregs->cr_errpkey);
-}
-
-EXPORT_SYMBOL_GPL(ipath_layer_get_cr_errpkey);
-
-static void update_sge(struct ipath_sge_state *ss, u32 length)
-{
- struct ipath_sge *sge = &ss->sge;
-
- sge->vaddr += length;
- sge->length -= length;
- sge->sge_length -= length;
- if (sge->sge_length == 0) {
- if (--ss->num_sge)
- *sge = *ss->sg_list++;
- } else if (sge->length == 0 && sge->mr != NULL) {
- if (++sge->n >= IPATH_SEGSZ) {
- if (++sge->m >= sge->mr->mapsz)
- return;
- sge->n = 0;
- }
- sge->vaddr = sge->mr->map[sge->m]->segs[sge->n].vaddr;
- sge->length = sge->mr->map[sge->m]->segs[sge->n].length;
- }
-}
-
-#ifdef __LITTLE_ENDIAN
-static inline u32 get_upper_bits(u32 data, u32 shift)
-{
- return data >> shift;
-}
-
-static inline u32 set_upper_bits(u32 data, u32 shift)
-{
- return data << shift;
-}
-
-static inline u32 clear_upper_bytes(u32 data, u32 n, u32 off)
-{
- data <<= ((sizeof(u32) - n) * BITS_PER_BYTE);
- data >>= ((sizeof(u32) - n - off) * BITS_PER_BYTE);
- return data;
-}
-#else
-static inline u32 get_upper_bits(u32 data, u32 shift)
-{
- return data << shift;
-}
-
-static inline u32 set_upper_bits(u32 data, u32 shift)
-{
- return data >> shift;
-}
-
-static inline u32 clear_upper_bytes(u32 data, u32 n, u32 off)
-{
- data >>= ((sizeof(u32) - n) * BITS_PER_BYTE);
- data <<= ((sizeof(u32) - n - off) * BITS_PER_BYTE);
- return data;
-}
-#endif
-
-static void copy_io(u32 __iomem *piobuf, struct ipath_sge_state *ss,
- u32 length)
-{
- u32 extra = 0;
- u32 data = 0;
- u32 last;
-
- while (1) {
- u32 len = ss->sge.length;
- u32 off;
-
- BUG_ON(len == 0);
- if (len > length)
- len = length;
- if (len > ss->sge.sge_length)
- len = ss->sge.sge_length;
- /* If the source address is not aligned, try to align it. */
- off = (unsigned long)ss->sge.vaddr & (sizeof(u32) - 1);
- if (off) {
- u32 *addr = (u32 *)((unsigned long)ss->sge.vaddr &
- ~(sizeof(u32) - 1));
- u32 v = get_upper_bits(*addr, off * BITS_PER_BYTE);
- u32 y;
-
- y = sizeof(u32) - off;
- if (len > y)
- len = y;
- if (len + extra >= sizeof(u32)) {
- data |= set_upper_bits(v, extra *
- BITS_PER_BYTE);
- len = sizeof(u32) - extra;
- if (len == length) {
- last = data;
- break;
- }
- __raw_writel(data, piobuf);
- piobuf++;
- extra = 0;
- data = 0;
- } else {
- /* Clear unused upper bytes */
- data |= clear_upper_bytes(v, len, extra);
- if (len == length) {
- last = data;
- break;
- }
- extra += len;
- }
- } else if (extra) {
- /* Source address is aligned. */
- u32 *addr = (u32 *) ss->sge.vaddr;
- int shift = extra * BITS_PER_BYTE;
- int ushift = 32 - shift;
- u32 l = len;
-
- while (l >= sizeof(u32)) {
- u32 v = *addr;
-
- data |= set_upper_bits(v, shift);
- __raw_writel(data, piobuf);
- data = get_upper_bits(v, ushift);
- piobuf++;
- addr++;
- l -= sizeof(u32);
- }
- /*
- * We still have 'extra' number of bytes leftover.
- */
- if (l) {
- u32 v = *addr;
-
- if (l + extra >= sizeof(u32)) {
- data |= set_upper_bits(v, shift);
- len -= l + extra - sizeof(u32);
- if (len == length) {
- last = data;
- break;
- }
- __raw_writel(data, piobuf);
- piobuf++;
- extra = 0;
- data = 0;
- } else {
- /* Clear unused upper bytes */
- data |= clear_upper_bytes(v, l,
- extra);
- if (len == length) {
- last = data;
- break;
- }
- extra += l;
- }
- } else if (len == length) {
- last = data;
- break;
- }
- } else if (len == length) {
- u32 w;
-
- /*
- * Need to round up for the last dword in the
- * packet.
- */
- w = (len + 3) >> 2;
- __iowrite32_copy(piobuf, ss->sge.vaddr, w - 1);
- piobuf += w - 1;
- last = ((u32 *) ss->sge.vaddr)[w - 1];
- break;
- } else {
- u32 w = len >> 2;
-
- __iowrite32_copy(piobuf, ss->sge.vaddr, w);
- piobuf += w;
-
- extra = len & (sizeof(u32) - 1);
- if (extra) {
- u32 v = ((u32 *) ss->sge.vaddr)[w];
-
- /* Clear unused upper bytes */
- data = clear_upper_bytes(v, extra, 0);
- }
- }
- update_sge(ss, len);
- length -= len;
- }
- /* Update address before sending packet. */
- update_sge(ss, length);
- /* must flush early everything before trigger word */
- ipath_flush_wc();
- __raw_writel(last, piobuf);
- /* be sure trigger word is written */
- ipath_flush_wc();
-}
-
-/**
- * ipath_verbs_send - send a packet from the verbs layer
- * @dd: the infinipath device
- * @hdrwords: the number of words in the header
- * @hdr: the packet header
- * @len: the length of the packet in bytes
- * @ss: the SGE to send
- *
- * This is like ipath_sma_send_pkt() in that we need to be able to send
- * packets after the chip is initialized (MADs) but also like
- * ipath_layer_send_hdr() since its used by the verbs layer.
- */
-int ipath_verbs_send(struct ipath_devdata *dd, u32 hdrwords,
- u32 *hdr, u32 len, struct ipath_sge_state *ss)
-{
- u32 __iomem *piobuf;
- u32 plen;
- int ret;
-
- /* +1 is for the qword padding of pbc */
- plen = hdrwords + ((len + 3) >> 2) + 1;
- if (unlikely((plen << 2) > dd->ipath_ibmaxlen)) {
- ipath_dbg("packet len 0x%x too long, failing\n", plen);
- ret = -EINVAL;
- goto bail;
- }
-
- /* Get a PIO buffer to use. */
- piobuf = ipath_getpiobuf(dd, NULL);
- if (unlikely(piobuf == NULL)) {
- ret = -EBUSY;
- goto bail;
- }
-
- /*
- * Write len to control qword, no flags.
- * We have to flush after the PBC for correctness on some cpus
- * or WC buffer can be written out of order.
- */
- writeq(plen, piobuf);
- ipath_flush_wc();
- piobuf += 2;
- if (len == 0) {
- /*
- * If there is just the header portion, must flush before
- * writing last word of header for correctness, and after
- * the last header word (trigger word).
- */
- __iowrite32_copy(piobuf, hdr, hdrwords - 1);
- ipath_flush_wc();
- __raw_writel(hdr[hdrwords - 1], piobuf + hdrwords - 1);
- ipath_flush_wc();
- ret = 0;
- goto bail;
- }
-
- __iowrite32_copy(piobuf, hdr, hdrwords);
- piobuf += hdrwords;
-
- /* The common case is aligned and contained in one segment. */
- if (likely(ss->num_sge == 1 && len <= ss->sge.length &&
- !((unsigned long)ss->sge.vaddr & (sizeof(u32) - 1)))) {
- u32 w;
- u32 *addr = (u32 *) ss->sge.vaddr;
-
- /* Update address before sending packet. */
- update_sge(ss, len);
- /* Need to round up for the last dword in the packet. */
- w = (len + 3) >> 2;
- __iowrite32_copy(piobuf, addr, w - 1);
- /* must flush early everything before trigger word */
- ipath_flush_wc();
- __raw_writel(addr[w - 1], piobuf + w - 1);
- /* be sure trigger word is written */
- ipath_flush_wc();
- ret = 0;
- goto bail;
- }
- copy_io(piobuf, ss, len);
- ret = 0;
-
-bail:
- return ret;
-}
-
-EXPORT_SYMBOL_GPL(ipath_verbs_send);
-
-int ipath_layer_snapshot_counters(struct ipath_devdata *dd, u64 *swords,
- u64 *rwords, u64 *spkts, u64 *rpkts,
- u64 *xmit_wait)
-{
- int ret;
-
- if (!(dd->ipath_flags & IPATH_INITTED)) {
- /* no hardware, freeze, etc. */
- ipath_dbg("unit %u not usable\n", dd->ipath_unit);
- ret = -EINVAL;
- goto bail;
- }
- *swords = ipath_snap_cntr(dd, dd->ipath_cregs->cr_wordsendcnt);
- *rwords = ipath_snap_cntr(dd, dd->ipath_cregs->cr_wordrcvcnt);
- *spkts = ipath_snap_cntr(dd, dd->ipath_cregs->cr_pktsendcnt);
- *rpkts = ipath_snap_cntr(dd, dd->ipath_cregs->cr_pktrcvcnt);
- *xmit_wait = ipath_snap_cntr(dd, dd->ipath_cregs->cr_sendstallcnt);
-
- ret = 0;
-
-bail:
- return ret;
-}
-
-EXPORT_SYMBOL_GPL(ipath_layer_snapshot_counters);
-
-/**
- * ipath_layer_get_counters - get various chip counters
- * @dd: the infinipath device
- * @cntrs: counters are placed here
- *
- * Return the counters needed by recv_pma_get_portcounters().
- */
-int ipath_layer_get_counters(struct ipath_devdata *dd,
- struct ipath_layer_counters *cntrs)
-{
- int ret;
-
- if (!(dd->ipath_flags & IPATH_INITTED)) {
- /* no hardware, freeze, etc. */
- ipath_dbg("unit %u not usable\n", dd->ipath_unit);
- ret = -EINVAL;
- goto bail;
- }
- cntrs->symbol_error_counter =
- ipath_snap_cntr(dd, dd->ipath_cregs->cr_ibsymbolerrcnt);
- cntrs->link_error_recovery_counter =
- ipath_snap_cntr(dd, dd->ipath_cregs->cr_iblinkerrrecovcnt);
- /*
- * The link downed counter counts when the other side downs the
- * connection. We add in the number of times we downed the link
- * due to local link integrity errors to compensate.
- */
- cntrs->link_downed_counter =
- ipath_snap_cntr(dd, dd->ipath_cregs->cr_iblinkdowncnt);
- cntrs->port_rcv_errors =
- ipath_snap_cntr(dd, dd->ipath_cregs->cr_rxdroppktcnt) +
- ipath_snap_cntr(dd, dd->ipath_cregs->cr_rcvovflcnt) +
- ipath_snap_cntr(dd, dd->ipath_cregs->cr_portovflcnt) +
- ipath_snap_cntr(dd, dd->ipath_cregs->cr_err_rlencnt) +
- ipath_snap_cntr(dd, dd->ipath_cregs->cr_invalidrlencnt) +
- ipath_snap_cntr(dd, dd->ipath_cregs->cr_erricrccnt) +
- ipath_snap_cntr(dd, dd->ipath_cregs->cr_errvcrccnt) +
- ipath_snap_cntr(dd, dd->ipath_cregs->cr_errlpcrccnt) +
- ipath_snap_cntr(dd, dd->ipath_cregs->cr_badformatcnt);
- cntrs->port_rcv_remphys_errors =
- ipath_snap_cntr(dd, dd->ipath_cregs->cr_rcvebpcnt);
- cntrs->port_xmit_discards =
- ipath_snap_cntr(dd, dd->ipath_cregs->cr_unsupvlcnt);
- cntrs->port_xmit_data =
- ipath_snap_cntr(dd, dd->ipath_cregs->cr_wordsendcnt);
- cntrs->port_rcv_data =
- ipath_snap_cntr(dd, dd->ipath_cregs->cr_wordrcvcnt);
- cntrs->port_xmit_packets =
- ipath_snap_cntr(dd, dd->ipath_cregs->cr_pktsendcnt);
- cntrs->port_rcv_packets =
- ipath_snap_cntr(dd, dd->ipath_cregs->cr_pktrcvcnt);
- cntrs->local_link_integrity_errors = dd->ipath_lli_errors;
- cntrs->excessive_buffer_overrun_errors = 0; /* XXX */
-
- ret = 0;
-
-bail:
- return ret;
-}
-
-EXPORT_SYMBOL_GPL(ipath_layer_get_counters);
-
-int ipath_layer_want_buffer(struct ipath_devdata *dd)
-{
- set_bit(IPATH_S_PIOINTBUFAVAIL, &dd->ipath_sendctrl);
- ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
- dd->ipath_sendctrl);
-
- return 0;
-}
-
-EXPORT_SYMBOL_GPL(ipath_layer_want_buffer);
-
int ipath_layer_send_hdr(struct ipath_devdata *dd, struct ether_header *hdr)
{
int ret = 0;
@@ -1153,389 +364,3 @@ int ipath_layer_set_piointbufavail_int(struct ipath_devdata *dd)
}
EXPORT_SYMBOL_GPL(ipath_layer_set_piointbufavail_int);
-
-int ipath_layer_enable_timer(struct ipath_devdata *dd)
-{
- /*
- * HT-400 has a design flaw where the chip and kernel idea
- * of the tail register don't always agree, and therefore we won't
- * get an interrupt on the next packet received.
- * If the board supports per packet receive interrupts, use it.
- * Otherwise, the timer function periodically checks for packets
- * to cover this case.
- * Either way, the timer is needed for verbs layer related
- * processing.
- */
- if (dd->ipath_flags & IPATH_GPIO_INTR) {
- ipath_write_kreg(dd, dd->ipath_kregs->kr_debugportselect,
- 0x2074076542310ULL);
- /* Enable GPIO bit 2 interrupt */
- ipath_write_kreg(dd, dd->ipath_kregs->kr_gpio_mask,
- (u64) (1 << 2));
- }
-
- init_timer(&dd->verbs_layer.l_timer);
- dd->verbs_layer.l_timer.function = __ipath_verbs_timer;
- dd->verbs_layer.l_timer.data = (unsigned long)dd;
- dd->verbs_layer.l_timer.expires = jiffies + 1;
- add_timer(&dd->verbs_layer.l_timer);
-
- return 0;
-}
-
-EXPORT_SYMBOL_GPL(ipath_layer_enable_timer);
-
-int ipath_layer_disable_timer(struct ipath_devdata *dd)
-{
- /* Disable GPIO bit 2 interrupt */
- if (dd->ipath_flags & IPATH_GPIO_INTR)
- ipath_write_kreg(dd, dd->ipath_kregs->kr_gpio_mask, 0);
-
- del_timer_sync(&dd->verbs_layer.l_timer);
-
- return 0;
-}
-
-EXPORT_SYMBOL_GPL(ipath_layer_disable_timer);
-
-/**
- * ipath_layer_set_verbs_flags - set the verbs layer flags
- * @dd: the infinipath device
- * @flags: the flags to set
- */
-int ipath_layer_set_verbs_flags(struct ipath_devdata *dd, unsigned flags)
-{
- struct ipath_devdata *ss;
- unsigned long lflags;
-
- spin_lock_irqsave(&ipath_devs_lock, lflags);
-
- list_for_each_entry(ss, &ipath_dev_list, ipath_list) {
- if (!(ss->ipath_flags & IPATH_INITTED))
- continue;
- if ((flags & IPATH_VERBS_KERNEL_SMA) &&
- !(*ss->ipath_statusp & IPATH_STATUS_SMA))
- *ss->ipath_statusp |= IPATH_STATUS_OIB_SMA;
- else
- *ss->ipath_statusp &= ~IPATH_STATUS_OIB_SMA;
- }
-
- spin_unlock_irqrestore(&ipath_devs_lock, lflags);
-
- return 0;
-}
-
-EXPORT_SYMBOL_GPL(ipath_layer_set_verbs_flags);
-
-/**
- * ipath_layer_get_npkeys - return the size of the PKEY table for port 0
- * @dd: the infinipath device
- */
-unsigned ipath_layer_get_npkeys(struct ipath_devdata *dd)
-{
- return ARRAY_SIZE(dd->ipath_pd[0]->port_pkeys);
-}
-
-EXPORT_SYMBOL_GPL(ipath_layer_get_npkeys);
-
-/**
- * ipath_layer_get_pkey - return the indexed PKEY from the port 0 PKEY table
- * @dd: the infinipath device
- * @index: the PKEY index
- */
-unsigned ipath_layer_get_pkey(struct ipath_devdata *dd, unsigned index)
-{
- unsigned ret;
-
- if (index >= ARRAY_SIZE(dd->ipath_pd[0]->port_pkeys))
- ret = 0;
- else
- ret = dd->ipath_pd[0]->port_pkeys[index];
-
- return ret;
-}
-
-EXPORT_SYMBOL_GPL(ipath_layer_get_pkey);
-
-/**
- * ipath_layer_get_pkeys - return the PKEY table for port 0
- * @dd: the infinipath device
- * @pkeys: the pkey table is placed here
- */
-int ipath_layer_get_pkeys(struct ipath_devdata *dd, u16 * pkeys)
-{
- struct ipath_portdata *pd = dd->ipath_pd[0];
-
- memcpy(pkeys, pd->port_pkeys, sizeof(pd->port_pkeys));
-
- return 0;
-}
-
-EXPORT_SYMBOL_GPL(ipath_layer_get_pkeys);
-
-/**
- * rm_pkey - decrecment the reference count for the given PKEY
- * @dd: the infinipath device
- * @key: the PKEY index
- *
- * Return true if this was the last reference and the hardware table entry
- * needs to be changed.
- */
-static int rm_pkey(struct ipath_devdata *dd, u16 key)
-{
- int i;
- int ret;
-
- for (i = 0; i < ARRAY_SIZE(dd->ipath_pkeys); i++) {
- if (dd->ipath_pkeys[i] != key)
- continue;
- if (atomic_dec_and_test(&dd->ipath_pkeyrefs[i])) {
- dd->ipath_pkeys[i] = 0;
- ret = 1;
- goto bail;
- }
- break;
- }
-
- ret = 0;
-
-bail:
- return ret;
-}
-
-/**
- * add_pkey - add the given PKEY to the hardware table
- * @dd: the infinipath device
- * @key: the PKEY
- *
- * Return an error code if unable to add the entry, zero if no change,
- * or 1 if the hardware PKEY register needs to be updated.
- */
-static int add_pkey(struct ipath_devdata *dd, u16 key)
-{
- int i;
- u16 lkey = key & 0x7FFF;
- int any = 0;
- int ret;
-
- if (lkey == 0x7FFF) {
- ret = 0;
- goto bail;
- }
-
- /* Look for an empty slot or a matching PKEY. */
- for (i = 0; i < ARRAY_SIZE(dd->ipath_pkeys); i++) {
- if (!dd->ipath_pkeys[i]) {
- any++;
- continue;
- }
- /* If it matches exactly, try to increment the ref count */
- if (dd->ipath_pkeys[i] == key) {
- if (atomic_inc_return(&dd->ipath_pkeyrefs[i]) > 1) {
- ret = 0;
- goto bail;
- }
- /* Lost the race. Look for an empty slot below. */
- atomic_dec(&dd->ipath_pkeyrefs[i]);
- any++;
- }
- /*
- * It makes no sense to have both the limited and unlimited
- * PKEY set at the same time since the unlimited one will
- * disable the limited one.
- */
- if ((dd->ipath_pkeys[i] & 0x7FFF) == lkey) {
- ret = -EEXIST;
- goto bail;
- }
- }
- if (!any) {
- ret = -EBUSY;
- goto bail;
- }
- for (i = 0; i < ARRAY_SIZE(dd->ipath_pkeys); i++) {
- if (!dd->ipath_pkeys[i] &&
- atomic_inc_return(&dd->ipath_pkeyrefs[i]) == 1) {
- /* for ipathstats, etc. */
- ipath_stats.sps_pkeys[i] = lkey;
- dd->ipath_pkeys[i] = key;
- ret = 1;
- goto bail;
- }
- }
- ret = -EBUSY;
-
-bail:
- return ret;
-}
-
-/**
- * ipath_layer_set_pkeys - set the PKEY table for port 0
- * @dd: the infinipath device
- * @pkeys: the PKEY table
- */
-int ipath_layer_set_pkeys(struct ipath_devdata *dd, u16 * pkeys)
-{
- struct ipath_portdata *pd;
- int i;
- int changed = 0;
-
- pd = dd->ipath_pd[0];
-
- for (i = 0; i < ARRAY_SIZE(pd->port_pkeys); i++) {
- u16 key = pkeys[i];
- u16 okey = pd->port_pkeys[i];
-
- if (key == okey)
- continue;
- /*
- * The value of this PKEY table entry is changing.
- * Remove the old entry in the hardware's array of PKEYs.
- */
- if (okey & 0x7FFF)
- changed |= rm_pkey(dd, okey);
- if (key & 0x7FFF) {
- int ret = add_pkey(dd, key);
-
- if (ret < 0)
- key = 0;
- else
- changed |= ret;
- }
- pd->port_pkeys[i] = key;
- }
- if (changed) {
- u64 pkey;
-
- pkey = (u64) dd->ipath_pkeys[0] |
- ((u64) dd->ipath_pkeys[1] << 16) |
- ((u64) dd->ipath_pkeys[2] << 32) |
- ((u64) dd->ipath_pkeys[3] << 48);
- ipath_cdbg(VERBOSE, "p0 new pkey reg %llx\n",
- (unsigned long long) pkey);
- ipath_write_kreg(dd, dd->ipath_kregs->kr_partitionkey,
- pkey);
- }
- return 0;
-}
-
-EXPORT_SYMBOL_GPL(ipath_layer_set_pkeys);
-
-/**
- * ipath_layer_get_linkdowndefaultstate - get the default linkdown state
- * @dd: the infinipath device
- *
- * Returns zero if the default is POLL, 1 if the default is SLEEP.
- */
-int ipath_layer_get_linkdowndefaultstate(struct ipath_devdata *dd)
-{
- return !!(dd->ipath_ibcctrl & INFINIPATH_IBCC_LINKDOWNDEFAULTSTATE);
-}
-
-EXPORT_SYMBOL_GPL(ipath_layer_get_linkdowndefaultstate);
-
-/**
- * ipath_layer_set_linkdowndefaultstate - set the default linkdown state
- * @dd: the infinipath device
- * @sleep: the new state
- *
- * Note that this will only take effect when the link state changes.
- */
-int ipath_layer_set_linkdowndefaultstate(struct ipath_devdata *dd,
- int sleep)
-{
- if (sleep)
- dd->ipath_ibcctrl |= INFINIPATH_IBCC_LINKDOWNDEFAULTSTATE;
- else
- dd->ipath_ibcctrl &= ~INFINIPATH_IBCC_LINKDOWNDEFAULTSTATE;
- ipath_write_kreg(dd, dd->ipath_kregs->kr_ibcctrl,
- dd->ipath_ibcctrl);
- return 0;
-}
-
-EXPORT_SYMBOL_GPL(ipath_layer_set_linkdowndefaultstate);
-
-int ipath_layer_get_phyerrthreshold(struct ipath_devdata *dd)
-{
- return (dd->ipath_ibcctrl >>
- INFINIPATH_IBCC_PHYERRTHRESHOLD_SHIFT) &
- INFINIPATH_IBCC_PHYERRTHRESHOLD_MASK;
-}
-
-EXPORT_SYMBOL_GPL(ipath_layer_get_phyerrthreshold);
-
-/**
- * ipath_layer_set_phyerrthreshold - set the physical error threshold
- * @dd: the infinipath device
- * @n: the new threshold
- *
- * Note that this will only take effect when the link state changes.
- */
-int ipath_layer_set_phyerrthreshold(struct ipath_devdata *dd, unsigned n)
-{
- unsigned v;
-
- v = (dd->ipath_ibcctrl >> INFINIPATH_IBCC_PHYERRTHRESHOLD_SHIFT) &
- INFINIPATH_IBCC_PHYERRTHRESHOLD_MASK;
- if (v != n) {
- dd->ipath_ibcctrl &=
- ~(INFINIPATH_IBCC_PHYERRTHRESHOLD_MASK <<
- INFINIPATH_IBCC_PHYERRTHRESHOLD_SHIFT);
- dd->ipath_ibcctrl |=
- (u64) n << INFINIPATH_IBCC_PHYERRTHRESHOLD_SHIFT;
- ipath_write_kreg(dd, dd->ipath_kregs->kr_ibcctrl,
- dd->ipath_ibcctrl);
- }
- return 0;
-}
-
-EXPORT_SYMBOL_GPL(ipath_layer_set_phyerrthreshold);
-
-int ipath_layer_get_overrunthreshold(struct ipath_devdata *dd)
-{
- return (dd->ipath_ibcctrl >>
- INFINIPATH_IBCC_OVERRUNTHRESHOLD_SHIFT) &
- INFINIPATH_IBCC_OVERRUNTHRESHOLD_MASK;
-}
-
-EXPORT_SYMBOL_GPL(ipath_layer_get_overrunthreshold);
-
-/**
- * ipath_layer_set_overrunthreshold - set the overrun threshold
- * @dd: the infinipath device
- * @n: the new threshold
- *
- * Note that this will only take effect when the link state changes.
- */
-int ipath_layer_set_overrunthreshold(struct ipath_devdata *dd, unsigned n)
-{
- unsigned v;
-
- v = (dd->ipath_ibcctrl >> INFINIPATH_IBCC_OVERRUNTHRESHOLD_SHIFT) &
- INFINIPATH_IBCC_OVERRUNTHRESHOLD_MASK;
- if (v != n) {
- dd->ipath_ibcctrl &=
- ~(INFINIPATH_IBCC_OVERRUNTHRESHOLD_MASK <<
- INFINIPATH_IBCC_OVERRUNTHRESHOLD_SHIFT);
- dd->ipath_ibcctrl |=
- (u64) n << INFINIPATH_IBCC_OVERRUNTHRESHOLD_SHIFT;
- ipath_write_kreg(dd, dd->ipath_kregs->kr_ibcctrl,
- dd->ipath_ibcctrl);
- }
- return 0;
-}
-
-EXPORT_SYMBOL_GPL(ipath_layer_set_overrunthreshold);
-
-int ipath_layer_get_boardname(struct ipath_devdata *dd, char *name,
- size_t namelen)
-{
- return dd->ipath_f_get_boardname(dd, name, namelen);
-}
-EXPORT_SYMBOL_GPL(ipath_layer_get_boardname);
-
-u32 ipath_layer_get_rcvhdrentsize(struct ipath_devdata *dd)
-{
- return dd->ipath_rcvhdrentsize;
-}
-EXPORT_SYMBOL_GPL(ipath_layer_get_rcvhdrentsize);
diff --git a/drivers/infiniband/hw/ipath/ipath_layer.h b/drivers/infiniband/hw/ipath/ipath_layer.h
index 71485096fca..3854a4eae68 100644
--- a/drivers/infiniband/hw/ipath/ipath_layer.h
+++ b/drivers/infiniband/hw/ipath/ipath_layer.h
@@ -40,73 +40,9 @@
*/
struct sk_buff;
-struct ipath_sge_state;
struct ipath_devdata;
struct ether_header;
-struct ipath_layer_counters {
- u64 symbol_error_counter;
- u64 link_error_recovery_counter;
- u64 link_downed_counter;
- u64 port_rcv_errors;
- u64 port_rcv_remphys_errors;
- u64 port_xmit_discards;
- u64 port_xmit_data;
- u64 port_rcv_data;
- u64 port_xmit_packets;
- u64 port_rcv_packets;
- u32 local_link_integrity_errors;
- u32 excessive_buffer_overrun_errors;
-};
-
-/*
- * A segment is a linear region of low physical memory.
- * XXX Maybe we should use phys addr here and kmap()/kunmap().
- * Used by the verbs layer.
- */
-struct ipath_seg {
- void *vaddr;
- size_t length;
-};
-
-/* The number of ipath_segs that fit in a page. */
-#define IPATH_SEGSZ (PAGE_SIZE / sizeof (struct ipath_seg))
-
-struct ipath_segarray {
- struct ipath_seg segs[IPATH_SEGSZ];
-};
-
-struct ipath_mregion {
- u64 user_base; /* User's address for this region */
- u64 iova; /* IB start address of this region */
- size_t length;
- u32 lkey;
- u32 offset; /* offset (bytes) to start of region */
- int access_flags;
- u32 max_segs; /* number of ipath_segs in all the arrays */
- u32 mapsz; /* size of the map array */
- struct ipath_segarray *map[0]; /* the segments */
-};
-
-/*
- * These keep track of the copy progress within a memory region.
- * Used by the verbs layer.
- */
-struct ipath_sge {
- struct ipath_mregion *mr;
- void *vaddr; /* current pointer into the segment */
- u32 sge_length; /* length of the SGE */
- u32 length; /* remaining length of the segment */
- u16 m; /* current index: mr->map[m] */
- u16 n; /* current index: mr->map[m]->segs[n] */
-};
-
-struct ipath_sge_state {
- struct ipath_sge *sg_list; /* next SGE to be used if any */
- struct ipath_sge sge; /* progress state for the current SGE */
- u8 num_sge;
-};
-
int ipath_layer_register(void *(*l_add)(int, struct ipath_devdata *),
void (*l_remove)(void *),
int (*l_intr)(void *, u32),
@@ -114,62 +50,14 @@ int ipath_layer_register(void *(*l_add)(int, struct ipath_devdata *),
struct sk_buff *),
u16 rcv_opcode,
int (*l_rcv_lid)(void *, void *));
-int ipath_verbs_register(void *(*l_add)(int, struct ipath_devdata *),
- void (*l_remove)(void *arg),
- int (*l_piobufavail)(void *arg),
- void (*l_rcv)(void *arg, void *rhdr,
- void *data, u32 tlen),
- void (*l_timer_cb)(void *arg));
void ipath_layer_unregister(void);
-void ipath_verbs_unregister(void);
int ipath_layer_open(struct ipath_devdata *, u32 * pktmax);
u16 ipath_layer_get_lid(struct ipath_devdata *dd);
int ipath_layer_get_mac(struct ipath_devdata *dd, u8 *);
u16 ipath_layer_get_bcast(struct ipath_devdata *dd);
-u32 ipath_layer_get_cr_errpkey(struct ipath_devdata *dd);
-int ipath_layer_set_linkstate(struct ipath_devdata *dd, u8 state);
-int ipath_layer_set_mtu(struct ipath_devdata *, u16);
-int ipath_set_lid(struct ipath_devdata *, u32, u8);
int ipath_layer_send_hdr(struct ipath_devdata *dd,
struct ether_header *hdr);
-int ipath_verbs_send(struct ipath_devdata *dd, u32 hdrwords,
- u32 * hdr, u32 len, struct ipath_sge_state *ss);
int ipath_layer_set_piointbufavail_int(struct ipath_devdata *dd);
-int ipath_layer_get_boardname(struct ipath_devdata *dd, char *name,
- size_t namelen);
-int ipath_layer_snapshot_counters(struct ipath_devdata *dd, u64 *swords,
- u64 *rwords, u64 *spkts, u64 *rpkts,
- u64 *xmit_wait);
-int ipath_layer_get_counters(struct ipath_devdata *dd,
- struct ipath_layer_counters *cntrs);
-int ipath_layer_want_buffer(struct ipath_devdata *dd);
-int ipath_layer_set_guid(struct ipath_devdata *, __be64 guid);
-__be64 ipath_layer_get_guid(struct ipath_devdata *);
-u32 ipath_layer_get_nguid(struct ipath_devdata *);
-u32 ipath_layer_get_majrev(struct ipath_devdata *);
-u32 ipath_layer_get_minrev(struct ipath_devdata *);
-u32 ipath_layer_get_pcirev(struct ipath_devdata *);
-u32 ipath_layer_get_flags(struct ipath_devdata *dd);
-struct device *ipath_layer_get_device(struct ipath_devdata *dd);
-u16 ipath_layer_get_deviceid(struct ipath_devdata *dd);
-u32 ipath_layer_get_vendorid(struct ipath_devdata *);
-u64 ipath_layer_get_lastibcstat(struct ipath_devdata *dd);
-u32 ipath_layer_get_ibmtu(struct ipath_devdata *dd);
-int ipath_layer_enable_timer(struct ipath_devdata *dd);
-int ipath_layer_disable_timer(struct ipath_devdata *dd);
-int ipath_layer_set_verbs_flags(struct ipath_devdata *dd, unsigned flags);
-unsigned ipath_layer_get_npkeys(struct ipath_devdata *dd);
-unsigned ipath_layer_get_pkey(struct ipath_devdata *dd, unsigned index);
-int ipath_layer_get_pkeys(struct ipath_devdata *dd, u16 *pkeys);
-int ipath_layer_set_pkeys(struct ipath_devdata *dd, u16 *pkeys);
-int ipath_layer_get_linkdowndefaultstate(struct ipath_devdata *dd);
-int ipath_layer_set_linkdowndefaultstate(struct ipath_devdata *dd,
- int sleep);
-int ipath_layer_get_phyerrthreshold(struct ipath_devdata *dd);
-int ipath_layer_set_phyerrthreshold(struct ipath_devdata *dd, unsigned n);
-int ipath_layer_get_overrunthreshold(struct ipath_devdata *dd);
-int ipath_layer_set_overrunthreshold(struct ipath_devdata *dd, unsigned n);
-u32 ipath_layer_get_rcvhdrentsize(struct ipath_devdata *dd);
/* ipath_ether interrupt values */
#define IPATH_LAYER_INT_IF_UP 0x2
@@ -178,9 +66,6 @@ u32 ipath_layer_get_rcvhdrentsize(struct ipath_devdata *dd);
#define IPATH_LAYER_INT_SEND_CONTINUE 0x10
#define IPATH_LAYER_INT_BCAST 0x40
-/* _verbs_layer.l_flags */
-#define IPATH_VERBS_KERNEL_SMA 0x1
-
extern unsigned ipath_debug; /* debugging bit mask */
#endif /* _IPATH_LAYER_H */
diff --git a/drivers/infiniband/hw/ipath/ipath_mad.c b/drivers/infiniband/hw/ipath/ipath_mad.c
index d3402341b7d..25908b02fbe 100644
--- a/drivers/infiniband/hw/ipath/ipath_mad.c
+++ b/drivers/infiniband/hw/ipath/ipath_mad.c
@@ -87,7 +87,8 @@ static int recv_subn_get_nodeinfo(struct ib_smp *smp,
struct ipath_devdata *dd = to_idev(ibdev)->dd;
u32 vendor, majrev, minrev;
- if (smp->attr_mod)
+ /* GUID 0 is illegal */
+ if (smp->attr_mod || (dd->ipath_guid == 0))
smp->status |= IB_SMP_INVALID_FIELD;
nip->base_version = 1;
@@ -101,15 +102,15 @@ static int recv_subn_get_nodeinfo(struct ib_smp *smp,
nip->num_ports = ibdev->phys_port_cnt;
/* This is already in network order */
nip->sys_guid = to_idev(ibdev)->sys_image_guid;
- nip->node_guid = ipath_layer_get_guid(dd);
+ nip->node_guid = dd->ipath_guid;
nip->port_guid = nip->sys_guid;
- nip->partition_cap = cpu_to_be16(ipath_layer_get_npkeys(dd));
- nip->device_id = cpu_to_be16(ipath_layer_get_deviceid(dd));
- majrev = ipath_layer_get_majrev(dd);
- minrev = ipath_layer_get_minrev(dd);
+ nip->partition_cap = cpu_to_be16(ipath_get_npkeys(dd));
+ nip->device_id = cpu_to_be16(dd->ipath_deviceid);
+ majrev = dd->ipath_majrev;
+ minrev = dd->ipath_minrev;
nip->revision = cpu_to_be32((majrev << 16) | minrev);
nip->local_port_num = port;
- vendor = ipath_layer_get_vendorid(dd);
+ vendor = dd->ipath_vendorid;
nip->vendor_id[0] = 0;
nip->vendor_id[1] = vendor >> 8;
nip->vendor_id[2] = vendor;
@@ -131,15 +132,96 @@ static int recv_subn_get_guidinfo(struct ib_smp *smp,
* We only support one GUID for now. If this changes, the
* portinfo.guid_cap field needs to be updated too.
*/
- if (startgx == 0)
- /* The first is a copy of the read-only HW GUID. */
- *p = ipath_layer_get_guid(to_idev(ibdev)->dd);
- else
+ if (startgx == 0) {
+ __be64 g = to_idev(ibdev)->dd->ipath_guid;
+ if (g == 0)
+ /* GUID 0 is illegal */
+ smp->status |= IB_SMP_INVALID_FIELD;
+ else
+ /* The first is a copy of the read-only HW GUID. */
+ *p = g;
+ } else
smp->status |= IB_SMP_INVALID_FIELD;
return reply(smp);
}
+
+static int get_overrunthreshold(struct ipath_devdata *dd)
+{
+ return (dd->ipath_ibcctrl >>
+ INFINIPATH_IBCC_OVERRUNTHRESHOLD_SHIFT) &
+ INFINIPATH_IBCC_OVERRUNTHRESHOLD_MASK;
+}
+
+/**
+ * set_overrunthreshold - set the overrun threshold
+ * @dd: the infinipath device
+ * @n: the new threshold
+ *
+ * Note that this will only take effect when the link state changes.
+ */
+static int set_overrunthreshold(struct ipath_devdata *dd, unsigned n)
+{
+ unsigned v;
+
+ v = (dd->ipath_ibcctrl >> INFINIPATH_IBCC_OVERRUNTHRESHOLD_SHIFT) &
+ INFINIPATH_IBCC_OVERRUNTHRESHOLD_MASK;
+ if (v != n) {
+ dd->ipath_ibcctrl &=
+ ~(INFINIPATH_IBCC_OVERRUNTHRESHOLD_MASK <<
+ INFINIPATH_IBCC_OVERRUNTHRESHOLD_SHIFT);
+ dd->ipath_ibcctrl |=
+ (u64) n << INFINIPATH_IBCC_OVERRUNTHRESHOLD_SHIFT;
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_ibcctrl,
+ dd->ipath_ibcctrl);
+ }
+ return 0;
+}
+
+static int get_phyerrthreshold(struct ipath_devdata *dd)
+{
+ return (dd->ipath_ibcctrl >>
+ INFINIPATH_IBCC_PHYERRTHRESHOLD_SHIFT) &
+ INFINIPATH_IBCC_PHYERRTHRESHOLD_MASK;
+}
+
+/**
+ * set_phyerrthreshold - set the physical error threshold
+ * @dd: the infinipath device
+ * @n: the new threshold
+ *
+ * Note that this will only take effect when the link state changes.
+ */
+static int set_phyerrthreshold(struct ipath_devdata *dd, unsigned n)
+{
+ unsigned v;
+
+ v = (dd->ipath_ibcctrl >> INFINIPATH_IBCC_PHYERRTHRESHOLD_SHIFT) &
+ INFINIPATH_IBCC_PHYERRTHRESHOLD_MASK;
+ if (v != n) {
+ dd->ipath_ibcctrl &=
+ ~(INFINIPATH_IBCC_PHYERRTHRESHOLD_MASK <<
+ INFINIPATH_IBCC_PHYERRTHRESHOLD_SHIFT);
+ dd->ipath_ibcctrl |=
+ (u64) n << INFINIPATH_IBCC_PHYERRTHRESHOLD_SHIFT;
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_ibcctrl,
+ dd->ipath_ibcctrl);
+ }
+ return 0;
+}
+
+/**
+ * get_linkdowndefaultstate - get the default linkdown state
+ * @dd: the infinipath device
+ *
+ * Returns zero if the default is POLL, 1 if the default is SLEEP.
+ */
+static int get_linkdowndefaultstate(struct ipath_devdata *dd)
+{
+ return !!(dd->ipath_ibcctrl & INFINIPATH_IBCC_LINKDOWNDEFAULTSTATE);
+}
+
static int recv_subn_get_portinfo(struct ib_smp *smp,
struct ib_device *ibdev, u8 port)
{
@@ -166,7 +248,7 @@ static int recv_subn_get_portinfo(struct ib_smp *smp,
(dev->mkeyprot_resv_lmc >> 6) == 0)
pip->mkey = dev->mkey;
pip->gid_prefix = dev->gid_prefix;
- lid = ipath_layer_get_lid(dev->dd);
+ lid = dev->dd->ipath_lid;
pip->lid = lid ? cpu_to_be16(lid) : IB_LID_PERMISSIVE;
pip->sm_lid = cpu_to_be16(dev->sm_lid);
pip->cap_mask = cpu_to_be32(dev->port_cap_flags);
@@ -177,14 +259,14 @@ static int recv_subn_get_portinfo(struct ib_smp *smp,
pip->link_width_supported = 3; /* 1x or 4x */
pip->link_width_active = 2; /* 4x */
pip->linkspeed_portstate = 0x10; /* 2.5Gbps */
- ibcstat = ipath_layer_get_lastibcstat(dev->dd);
+ ibcstat = dev->dd->ipath_lastibcstat;
pip->linkspeed_portstate |= ((ibcstat >> 4) & 0x3) + 1;
pip->portphysstate_linkdown =
(ipath_cvt_physportstate[ibcstat & 0xf] << 4) |
- (ipath_layer_get_linkdowndefaultstate(dev->dd) ? 1 : 2);
+ (get_linkdowndefaultstate(dev->dd) ? 1 : 2);
pip->mkeyprot_resv_lmc = dev->mkeyprot_resv_lmc;
pip->linkspeedactive_enabled = 0x11; /* 2.5Gbps, 2.5Gbps */
- switch (ipath_layer_get_ibmtu(dev->dd)) {
+ switch (dev->dd->ipath_ibmtu) {
case 4096:
mtu = IB_MTU_4096;
break;
@@ -217,7 +299,7 @@ static int recv_subn_get_portinfo(struct ib_smp *smp,
pip->mkey_violations = cpu_to_be16(dev->mkey_violations);
/* P_KeyViolations are counted by hardware. */
pip->pkey_violations =
- cpu_to_be16((ipath_layer_get_cr_errpkey(dev->dd) -
+ cpu_to_be16((ipath_get_cr_errpkey(dev->dd) -
dev->z_pkey_violations) & 0xFFFF);
pip->qkey_violations = cpu_to_be16(dev->qkey_violations);
/* Only the hardware GUID is supported for now */
@@ -226,8 +308,8 @@ static int recv_subn_get_portinfo(struct ib_smp *smp,
/* 32.768 usec. response time (guessing) */
pip->resv_resptimevalue = 3;
pip->localphyerrors_overrunerrors =
- (ipath_layer_get_phyerrthreshold(dev->dd) << 4) |
- ipath_layer_get_overrunthreshold(dev->dd);
+ (get_phyerrthreshold(dev->dd) << 4) |
+ get_overrunthreshold(dev->dd);
/* pip->max_credit_hint; */
/* pip->link_roundtrip_latency[3]; */
@@ -237,6 +319,20 @@ bail:
return ret;
}
+/**
+ * get_pkeys - return the PKEY table for port 0
+ * @dd: the infinipath device
+ * @pkeys: the pkey table is placed here
+ */
+static int get_pkeys(struct ipath_devdata *dd, u16 * pkeys)
+{
+ struct ipath_portdata *pd = dd->ipath_pd[0];
+
+ memcpy(pkeys, pd->port_pkeys, sizeof(pd->port_pkeys));
+
+ return 0;
+}
+
static int recv_subn_get_pkeytable(struct ib_smp *smp,
struct ib_device *ibdev)
{
@@ -249,9 +345,9 @@ static int recv_subn_get_pkeytable(struct ib_smp *smp,
memset(smp->data, 0, sizeof(smp->data));
if (startpx == 0) {
struct ipath_ibdev *dev = to_idev(ibdev);
- unsigned i, n = ipath_layer_get_npkeys(dev->dd);
+ unsigned i, n = ipath_get_npkeys(dev->dd);
- ipath_layer_get_pkeys(dev->dd, p);
+ get_pkeys(dev->dd, p);
for (i = 0; i < n; i++)
q[i] = cpu_to_be16(p[i]);
@@ -269,6 +365,24 @@ static int recv_subn_set_guidinfo(struct ib_smp *smp,
}
/**
+ * set_linkdowndefaultstate - set the default linkdown state
+ * @dd: the infinipath device
+ * @sleep: the new state
+ *
+ * Note that this will only take effect when the link state changes.
+ */
+static int set_linkdowndefaultstate(struct ipath_devdata *dd, int sleep)
+{
+ if (sleep)
+ dd->ipath_ibcctrl |= INFINIPATH_IBCC_LINKDOWNDEFAULTSTATE;
+ else
+ dd->ipath_ibcctrl &= ~INFINIPATH_IBCC_LINKDOWNDEFAULTSTATE;
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_ibcctrl,
+ dd->ipath_ibcctrl);
+ return 0;
+}
+
+/**
* recv_subn_set_portinfo - set port information
* @smp: the incoming SM packet
* @ibdev: the infiniband device
@@ -290,7 +404,7 @@ static int recv_subn_set_portinfo(struct ib_smp *smp,
u8 state;
u16 lstate;
u32 mtu;
- int ret;
+ int ret, ore;
if (be32_to_cpu(smp->attr_mod) > ibdev->phys_port_cnt)
goto err;
@@ -304,7 +418,7 @@ static int recv_subn_set_portinfo(struct ib_smp *smp,
dev->mkey_lease_period = be16_to_cpu(pip->mkey_lease_period);
lid = be16_to_cpu(pip->lid);
- if (lid != ipath_layer_get_lid(dev->dd)) {
+ if (lid != dev->dd->ipath_lid) {
/* Must be a valid unicast LID address. */
if (lid == 0 || lid >= IPATH_MULTICAST_LID_BASE)
goto err;
@@ -342,11 +456,11 @@ static int recv_subn_set_portinfo(struct ib_smp *smp,
case 0: /* NOP */
break;
case 1: /* SLEEP */
- if (ipath_layer_set_linkdowndefaultstate(dev->dd, 1))
+ if (set_linkdowndefaultstate(dev->dd, 1))
goto err;
break;
case 2: /* POLL */
- if (ipath_layer_set_linkdowndefaultstate(dev->dd, 0))
+ if (set_linkdowndefaultstate(dev->dd, 0))
goto err;
break;
default:
@@ -376,7 +490,7 @@ static int recv_subn_set_portinfo(struct ib_smp *smp,
/* XXX We have already partially updated our state! */
goto err;
}
- ipath_layer_set_mtu(dev->dd, mtu);
+ ipath_set_mtu(dev->dd, mtu);
dev->sm_sl = pip->neighbormtu_mastersmsl & 0xF;
@@ -392,20 +506,16 @@ static int recv_subn_set_portinfo(struct ib_smp *smp,
* later.
*/
if (pip->pkey_violations == 0)
- dev->z_pkey_violations =
- ipath_layer_get_cr_errpkey(dev->dd);
+ dev->z_pkey_violations = ipath_get_cr_errpkey(dev->dd);
if (pip->qkey_violations == 0)
dev->qkey_violations = 0;
- if (ipath_layer_set_phyerrthreshold(
- dev->dd,
- (pip->localphyerrors_overrunerrors >> 4) & 0xF))
+ ore = pip->localphyerrors_overrunerrors;
+ if (set_phyerrthreshold(dev->dd, (ore >> 4) & 0xF))
goto err;
- if (ipath_layer_set_overrunthreshold(
- dev->dd,
- (pip->localphyerrors_overrunerrors & 0xF)))
+ if (set_overrunthreshold(dev->dd, (ore & 0xF)))
goto err;
dev->subnet_timeout = pip->clientrereg_resv_subnetto & 0x1F;
@@ -423,7 +533,7 @@ static int recv_subn_set_portinfo(struct ib_smp *smp,
* is down or is being set to down.
*/
state = pip->linkspeed_portstate & 0xF;
- flags = ipath_layer_get_flags(dev->dd);
+ flags = dev->dd->ipath_flags;
lstate = (pip->portphysstate_linkdown >> 4) & 0xF;
if (lstate && !(state == IB_PORT_DOWN || state == IB_PORT_NOP))
goto err;
@@ -439,7 +549,7 @@ static int recv_subn_set_portinfo(struct ib_smp *smp,
/* FALLTHROUGH */
case IB_PORT_DOWN:
if (lstate == 0)
- if (ipath_layer_get_linkdowndefaultstate(dev->dd))
+ if (get_linkdowndefaultstate(dev->dd))
lstate = IPATH_IB_LINKDOWN_SLEEP;
else
lstate = IPATH_IB_LINKDOWN;
@@ -451,7 +561,7 @@ static int recv_subn_set_portinfo(struct ib_smp *smp,
lstate = IPATH_IB_LINKDOWN_DISABLE;
else
goto err;
- ipath_layer_set_linkstate(dev->dd, lstate);
+ ipath_set_linkstate(dev->dd, lstate);
if (flags & IPATH_LINKACTIVE) {
event.event = IB_EVENT_PORT_ERR;
ib_dispatch_event(&event);
@@ -460,7 +570,7 @@ static int recv_subn_set_portinfo(struct ib_smp *smp,
case IB_PORT_ARMED:
if (!(flags & (IPATH_LINKINIT | IPATH_LINKACTIVE)))
break;
- ipath_layer_set_linkstate(dev->dd, IPATH_IB_LINKARM);
+ ipath_set_linkstate(dev->dd, IPATH_IB_LINKARM);
if (flags & IPATH_LINKACTIVE) {
event.event = IB_EVENT_PORT_ERR;
ib_dispatch_event(&event);
@@ -469,7 +579,7 @@ static int recv_subn_set_portinfo(struct ib_smp *smp,
case IB_PORT_ACTIVE:
if (!(flags & IPATH_LINKARMED))
break;
- ipath_layer_set_linkstate(dev->dd, IPATH_IB_LINKACTIVE);
+ ipath_set_linkstate(dev->dd, IPATH_IB_LINKACTIVE);
event.event = IB_EVENT_PORT_ACTIVE;
ib_dispatch_event(&event);
break;
@@ -493,6 +603,152 @@ done:
return ret;
}
+/**
+ * rm_pkey - decrecment the reference count for the given PKEY
+ * @dd: the infinipath device
+ * @key: the PKEY index
+ *
+ * Return true if this was the last reference and the hardware table entry
+ * needs to be changed.
+ */
+static int rm_pkey(struct ipath_devdata *dd, u16 key)
+{
+ int i;
+ int ret;
+
+ for (i = 0; i < ARRAY_SIZE(dd->ipath_pkeys); i++) {
+ if (dd->ipath_pkeys[i] != key)
+ continue;
+ if (atomic_dec_and_test(&dd->ipath_pkeyrefs[i])) {
+ dd->ipath_pkeys[i] = 0;
+ ret = 1;
+ goto bail;
+ }
+ break;
+ }
+
+ ret = 0;
+
+bail:
+ return ret;
+}
+
+/**
+ * add_pkey - add the given PKEY to the hardware table
+ * @dd: the infinipath device
+ * @key: the PKEY
+ *
+ * Return an error code if unable to add the entry, zero if no change,
+ * or 1 if the hardware PKEY register needs to be updated.
+ */
+static int add_pkey(struct ipath_devdata *dd, u16 key)
+{
+ int i;
+ u16 lkey = key & 0x7FFF;
+ int any = 0;
+ int ret;
+
+ if (lkey == 0x7FFF) {
+ ret = 0;
+ goto bail;
+ }
+
+ /* Look for an empty slot or a matching PKEY. */
+ for (i = 0; i < ARRAY_SIZE(dd->ipath_pkeys); i++) {
+ if (!dd->ipath_pkeys[i]) {
+ any++;
+ continue;
+ }
+ /* If it matches exactly, try to increment the ref count */
+ if (dd->ipath_pkeys[i] == key) {
+ if (atomic_inc_return(&dd->ipath_pkeyrefs[i]) > 1) {
+ ret = 0;
+ goto bail;
+ }
+ /* Lost the race. Look for an empty slot below. */
+ atomic_dec(&dd->ipath_pkeyrefs[i]);
+ any++;
+ }
+ /*
+ * It makes no sense to have both the limited and unlimited
+ * PKEY set at the same time since the unlimited one will
+ * disable the limited one.
+ */
+ if ((dd->ipath_pkeys[i] & 0x7FFF) == lkey) {
+ ret = -EEXIST;
+ goto bail;
+ }
+ }
+ if (!any) {
+ ret = -EBUSY;
+ goto bail;
+ }
+ for (i = 0; i < ARRAY_SIZE(dd->ipath_pkeys); i++) {
+ if (!dd->ipath_pkeys[i] &&
+ atomic_inc_return(&dd->ipath_pkeyrefs[i]) == 1) {
+ /* for ipathstats, etc. */
+ ipath_stats.sps_pkeys[i] = lkey;
+ dd->ipath_pkeys[i] = key;
+ ret = 1;
+ goto bail;
+ }
+ }
+ ret = -EBUSY;
+
+bail:
+ return ret;
+}
+
+/**
+ * set_pkeys - set the PKEY table for port 0
+ * @dd: the infinipath device
+ * @pkeys: the PKEY table
+ */
+static int set_pkeys(struct ipath_devdata *dd, u16 *pkeys)
+{
+ struct ipath_portdata *pd;
+ int i;
+ int changed = 0;
+
+ pd = dd->ipath_pd[0];
+
+ for (i = 0; i < ARRAY_SIZE(pd->port_pkeys); i++) {
+ u16 key = pkeys[i];
+ u16 okey = pd->port_pkeys[i];
+
+ if (key == okey)
+ continue;
+ /*
+ * The value of this PKEY table entry is changing.
+ * Remove the old entry in the hardware's array of PKEYs.
+ */
+ if (okey & 0x7FFF)
+ changed |= rm_pkey(dd, okey);
+ if (key & 0x7FFF) {
+ int ret = add_pkey(dd, key);
+
+ if (ret < 0)
+ key = 0;
+ else
+ changed |= ret;
+ }
+ pd->port_pkeys[i] = key;
+ }
+ if (changed) {
+ u64 pkey;
+
+ pkey = (u64) dd->ipath_pkeys[0] |
+ ((u64) dd->ipath_pkeys[1] << 16) |
+ ((u64) dd->ipath_pkeys[2] << 32) |
+ ((u64) dd->ipath_pkeys[3] << 48);
+ ipath_cdbg(VERBOSE, "p0 new pkey reg %llx\n",
+ (unsigned long long) pkey);
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_partitionkey,
+ pkey);
+ }
+ return 0;
+}
+
static int recv_subn_set_pkeytable(struct ib_smp *smp,
struct ib_device *ibdev)
{
@@ -500,13 +756,12 @@ static int recv_subn_set_pkeytable(struct ib_smp *smp,
__be16 *p = (__be16 *) smp->data;
u16 *q = (u16 *) smp->data;
struct ipath_ibdev *dev = to_idev(ibdev);
- unsigned i, n = ipath_layer_get_npkeys(dev->dd);
+ unsigned i, n = ipath_get_npkeys(dev->dd);
for (i = 0; i < n; i++)
q[i] = be16_to_cpu(p[i]);
- if (startpx != 0 ||
- ipath_layer_set_pkeys(dev->dd, q) != 0)
+ if (startpx != 0 || set_pkeys(dev->dd, q) != 0)
smp->status |= IB_SMP_INVALID_FIELD;
return recv_subn_get_pkeytable(smp, ibdev);
@@ -844,10 +1099,10 @@ static int recv_pma_get_portcounters(struct ib_perf *pmp,
struct ib_pma_portcounters *p = (struct ib_pma_portcounters *)
pmp->data;
struct ipath_ibdev *dev = to_idev(ibdev);
- struct ipath_layer_counters cntrs;
+ struct ipath_verbs_counters cntrs;
u8 port_select = p->port_select;
- ipath_layer_get_counters(dev->dd, &cntrs);
+ ipath_get_counters(dev->dd, &cntrs);
/* Adjust counters for any resets done. */
cntrs.symbol_error_counter -= dev->z_symbol_error_counter;
@@ -944,8 +1199,8 @@ static int recv_pma_get_portcounters_ext(struct ib_perf *pmp,
u64 swords, rwords, spkts, rpkts, xwait;
u8 port_select = p->port_select;
- ipath_layer_snapshot_counters(dev->dd, &swords, &rwords, &spkts,
- &rpkts, &xwait);
+ ipath_snapshot_counters(dev->dd, &swords, &rwords, &spkts,
+ &rpkts, &xwait);
/* Adjust counters for any resets done. */
swords -= dev->z_port_xmit_data;
@@ -978,13 +1233,13 @@ static int recv_pma_set_portcounters(struct ib_perf *pmp,
struct ib_pma_portcounters *p = (struct ib_pma_portcounters *)
pmp->data;
struct ipath_ibdev *dev = to_idev(ibdev);
- struct ipath_layer_counters cntrs;
+ struct ipath_verbs_counters cntrs;
/*
* Since the HW doesn't support clearing counters, we save the
* current count and subtract it from future responses.
*/
- ipath_layer_get_counters(dev->dd, &cntrs);
+ ipath_get_counters(dev->dd, &cntrs);
if (p->counter_select & IB_PMA_SEL_SYMBOL_ERROR)
dev->z_symbol_error_counter = cntrs.symbol_error_counter;
@@ -1041,8 +1296,8 @@ static int recv_pma_set_portcounters_ext(struct ib_perf *pmp,
struct ipath_ibdev *dev = to_idev(ibdev);
u64 swords, rwords, spkts, rpkts, xwait;
- ipath_layer_snapshot_counters(dev->dd, &swords, &rwords, &spkts,
- &rpkts, &xwait);
+ ipath_snapshot_counters(dev->dd, &swords, &rwords, &spkts,
+ &rpkts, &xwait);
if (p->counter_select & IB_PMA_SELX_PORT_XMIT_DATA)
dev->z_port_xmit_data = swords;
diff --git a/drivers/infiniband/hw/ipath/ipath_mmap.c b/drivers/infiniband/hw/ipath/ipath_mmap.c
new file mode 100644
index 00000000000..11b7378ff21
--- /dev/null
+++ b/drivers/infiniband/hw/ipath/ipath_mmap.c
@@ -0,0 +1,122 @@
+/*
+ * Copyright (c) 2006 QLogic, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/vmalloc.h>
+#include <linux/mm.h>
+#include <linux/errno.h>
+#include <asm/pgtable.h>
+
+#include "ipath_verbs.h"
+
+/**
+ * ipath_release_mmap_info - free mmap info structure
+ * @ref: a pointer to the kref within struct ipath_mmap_info
+ */
+void ipath_release_mmap_info(struct kref *ref)
+{
+ struct ipath_mmap_info *ip =
+ container_of(ref, struct ipath_mmap_info, ref);
+
+ vfree(ip->obj);
+ kfree(ip);
+}
+
+/*
+ * open and close keep track of how many times the CQ is mapped,
+ * to avoid releasing it.
+ */
+static void ipath_vma_open(struct vm_area_struct *vma)
+{
+ struct ipath_mmap_info *ip = vma->vm_private_data;
+
+ kref_get(&ip->ref);
+ ip->mmap_cnt++;
+}
+
+static void ipath_vma_close(struct vm_area_struct *vma)
+{
+ struct ipath_mmap_info *ip = vma->vm_private_data;
+
+ ip->mmap_cnt--;
+ kref_put(&ip->ref, ipath_release_mmap_info);
+}
+
+static struct vm_operations_struct ipath_vm_ops = {
+ .open = ipath_vma_open,
+ .close = ipath_vma_close,
+};
+
+/**
+ * ipath_mmap - create a new mmap region
+ * @context: the IB user context of the process making the mmap() call
+ * @vma: the VMA to be initialized
+ * Return zero if the mmap is OK. Otherwise, return an errno.
+ */
+int ipath_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
+{
+ struct ipath_ibdev *dev = to_idev(context->device);
+ unsigned long offset = vma->vm_pgoff << PAGE_SHIFT;
+ unsigned long size = vma->vm_end - vma->vm_start;
+ struct ipath_mmap_info *ip, **pp;
+ int ret = -EINVAL;
+
+ /*
+ * Search the device's list of objects waiting for a mmap call.
+ * Normally, this list is very short since a call to create a
+ * CQ, QP, or SRQ is soon followed by a call to mmap().
+ */
+ spin_lock_irq(&dev->pending_lock);
+ for (pp = &dev->pending_mmaps; (ip = *pp); pp = &ip->next) {
+ /* Only the creator is allowed to mmap the object */
+ if (context != ip->context || (void *) offset != ip->obj)
+ continue;
+ /* Don't allow a mmap larger than the object. */
+ if (size > ip->size)
+ break;
+
+ *pp = ip->next;
+ spin_unlock_irq(&dev->pending_lock);
+
+ ret = remap_vmalloc_range(vma, ip->obj, 0);
+ if (ret)
+ goto done;
+ vma->vm_ops = &ipath_vm_ops;
+ vma->vm_private_data = ip;
+ ipath_vma_open(vma);
+ goto done;
+ }
+ spin_unlock_irq(&dev->pending_lock);
+done:
+ return ret;
+}
diff --git a/drivers/infiniband/hw/ipath/ipath_mr.c b/drivers/infiniband/hw/ipath/ipath_mr.c
index 4ac31a5da33..a0673c1eef7 100644
--- a/drivers/infiniband/hw/ipath/ipath_mr.c
+++ b/drivers/infiniband/hw/ipath/ipath_mr.c
@@ -36,6 +36,18 @@
#include "ipath_verbs.h"
+/* Fast memory region */
+struct ipath_fmr {
+ struct ib_fmr ibfmr;
+ u8 page_shift;
+ struct ipath_mregion mr; /* must be last */
+};
+
+static inline struct ipath_fmr *to_ifmr(struct ib_fmr *ibfmr)
+{
+ return container_of(ibfmr, struct ipath_fmr, ibfmr);
+}
+
/**
* ipath_get_dma_mr - get a DMA memory region
* @pd: protection domain for this memory region
@@ -126,6 +138,7 @@ struct ib_mr *ipath_reg_phys_mr(struct ib_pd *pd,
goto bail;
}
+ mr->mr.pd = pd;
mr->mr.user_base = *iova_start;
mr->mr.iova = *iova_start;
mr->mr.length = 0;
@@ -185,6 +198,7 @@ struct ib_mr *ipath_reg_user_mr(struct ib_pd *pd, struct ib_umem *region,
goto bail;
}
+ mr->mr.pd = pd;
mr->mr.user_base = region->user_base;
mr->mr.iova = region->virt_base;
mr->mr.length = region->length;
@@ -277,6 +291,7 @@ struct ib_fmr *ipath_alloc_fmr(struct ib_pd *pd, int mr_access_flags,
* Resources are allocated but no valid mapping (RKEY can't be
* used).
*/
+ fmr->mr.pd = pd;
fmr->mr.user_base = 0;
fmr->mr.iova = 0;
fmr->mr.length = 0;
diff --git a/drivers/infiniband/hw/ipath/ipath_qp.c b/drivers/infiniband/hw/ipath/ipath_qp.c
index 83e557be591..46c1c89bf6a 100644
--- a/drivers/infiniband/hw/ipath/ipath_qp.c
+++ b/drivers/infiniband/hw/ipath/ipath_qp.c
@@ -35,7 +35,7 @@
#include <linux/vmalloc.h>
#include "ipath_verbs.h"
-#include "ipath_common.h"
+#include "ipath_kernel.h"
#define BITS_PER_PAGE (PAGE_SIZE*BITS_PER_BYTE)
#define BITS_PER_PAGE_MASK (BITS_PER_PAGE-1)
@@ -44,19 +44,6 @@
#define find_next_offset(map, off) find_next_zero_bit((map)->page, \
BITS_PER_PAGE, off)
-#define TRANS_INVALID 0
-#define TRANS_ANY2RST 1
-#define TRANS_RST2INIT 2
-#define TRANS_INIT2INIT 3
-#define TRANS_INIT2RTR 4
-#define TRANS_RTR2RTS 5
-#define TRANS_RTS2RTS 6
-#define TRANS_SQERR2RTS 7
-#define TRANS_ANY2ERR 8
-#define TRANS_RTS2SQD 9 /* XXX Wait for expected ACKs & signal event */
-#define TRANS_SQD2SQD 10 /* error if not drained & parameter change */
-#define TRANS_SQD2RTS 11 /* error if not drained */
-
/*
* Convert the AETH credit code into the number of credits.
*/
@@ -287,7 +274,7 @@ void ipath_free_all_qps(struct ipath_qp_table *qpt)
free_qpn(qpt, qp->ibqp.qp_num);
if (!atomic_dec_and_test(&qp->refcount) ||
!ipath_destroy_qp(&qp->ibqp))
- _VERBS_INFO("QP memory leak!\n");
+ ipath_dbg(KERN_INFO "QP memory leak!\n");
qp = nqp;
}
}
@@ -348,6 +335,7 @@ static void ipath_reset_qp(struct ipath_qp *qp)
qp->s_ack_state = IB_OPCODE_RC_ACKNOWLEDGE;
qp->r_ack_state = IB_OPCODE_RC_ACKNOWLEDGE;
qp->r_nak_state = 0;
+ qp->r_wrid_valid = 0;
qp->s_rnr_timeout = 0;
qp->s_head = 0;
qp->s_tail = 0;
@@ -355,26 +343,30 @@ static void ipath_reset_qp(struct ipath_qp *qp)
qp->s_last = 0;
qp->s_ssn = 1;
qp->s_lsn = 0;
- qp->r_rq.head = 0;
- qp->r_rq.tail = 0;
+ qp->s_wait_credit = 0;
+ if (qp->r_rq.wq) {
+ qp->r_rq.wq->head = 0;
+ qp->r_rq.wq->tail = 0;
+ }
qp->r_reuse_sge = 0;
}
/**
* ipath_error_qp - put a QP into an error state
* @qp: the QP to put into an error state
+ * @err: the receive completion error to signal if a RWQE is active
*
* Flushes both send and receive work queues.
* QP s_lock should be held and interrupts disabled.
*/
-void ipath_error_qp(struct ipath_qp *qp)
+void ipath_error_qp(struct ipath_qp *qp, enum ib_wc_status err)
{
struct ipath_ibdev *dev = to_idev(qp->ibqp.device);
struct ib_wc wc;
- _VERBS_INFO("QP%d/%d in error state\n",
- qp->ibqp.qp_num, qp->remote_qpn);
+ ipath_dbg(KERN_INFO "QP%d/%d in error state\n",
+ qp->ibqp.qp_num, qp->remote_qpn);
spin_lock(&dev->pending_lock);
/* XXX What if its already removed by the timeout code? */
@@ -384,7 +376,6 @@ void ipath_error_qp(struct ipath_qp *qp)
list_del_init(&qp->piowait);
spin_unlock(&dev->pending_lock);
- wc.status = IB_WC_WR_FLUSH_ERR;
wc.vendor_err = 0;
wc.byte_len = 0;
wc.imm_data = 0;
@@ -396,6 +387,12 @@ void ipath_error_qp(struct ipath_qp *qp)
wc.sl = 0;
wc.dlid_path_bits = 0;
wc.port_num = 0;
+ if (qp->r_wrid_valid) {
+ qp->r_wrid_valid = 0;
+ wc.status = err;
+ ipath_cq_enter(to_icq(qp->ibqp.send_cq), &wc, 1);
+ }
+ wc.status = IB_WC_WR_FLUSH_ERR;
while (qp->s_last != qp->s_head) {
struct ipath_swqe *wqe = get_swqe_ptr(qp, qp->s_last);
@@ -410,15 +407,32 @@ void ipath_error_qp(struct ipath_qp *qp)
qp->s_hdrwords = 0;
qp->s_ack_state = IB_OPCODE_RC_ACKNOWLEDGE;
- wc.opcode = IB_WC_RECV;
- spin_lock(&qp->r_rq.lock);
- while (qp->r_rq.tail != qp->r_rq.head) {
- wc.wr_id = get_rwqe_ptr(&qp->r_rq, qp->r_rq.tail)->wr_id;
- if (++qp->r_rq.tail >= qp->r_rq.size)
- qp->r_rq.tail = 0;
- ipath_cq_enter(to_icq(qp->ibqp.recv_cq), &wc, 1);
+ if (qp->r_rq.wq) {
+ struct ipath_rwq *wq;
+ u32 head;
+ u32 tail;
+
+ spin_lock(&qp->r_rq.lock);
+
+ /* sanity check pointers before trusting them */
+ wq = qp->r_rq.wq;
+ head = wq->head;
+ if (head >= qp->r_rq.size)
+ head = 0;
+ tail = wq->tail;
+ if (tail >= qp->r_rq.size)
+ tail = 0;
+ wc.opcode = IB_WC_RECV;
+ while (tail != head) {
+ wc.wr_id = get_rwqe_ptr(&qp->r_rq, tail)->wr_id;
+ if (++tail >= qp->r_rq.size)
+ tail = 0;
+ ipath_cq_enter(to_icq(qp->ibqp.recv_cq), &wc, 1);
+ }
+ wq->tail = tail;
+
+ spin_unlock(&qp->r_rq.lock);
}
- spin_unlock(&qp->r_rq.lock);
}
/**
@@ -426,11 +440,12 @@ void ipath_error_qp(struct ipath_qp *qp)
* @ibqp: the queue pair who's attributes we're modifying
* @attr: the new attributes
* @attr_mask: the mask of attributes to modify
+ * @udata: user data for ipathverbs.so
*
* Returns 0 on success, otherwise returns an errno.
*/
int ipath_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
- int attr_mask)
+ int attr_mask, struct ib_udata *udata)
{
struct ipath_ibdev *dev = to_idev(ibqp->device);
struct ipath_qp *qp = to_iqp(ibqp);
@@ -448,26 +463,53 @@ int ipath_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
attr_mask))
goto inval;
- if (attr_mask & IB_QP_AV)
+ if (attr_mask & IB_QP_AV) {
if (attr->ah_attr.dlid == 0 ||
attr->ah_attr.dlid >= IPATH_MULTICAST_LID_BASE)
goto inval;
+ if ((attr->ah_attr.ah_flags & IB_AH_GRH) &&
+ (attr->ah_attr.grh.sgid_index > 1))
+ goto inval;
+ }
+
if (attr_mask & IB_QP_PKEY_INDEX)
- if (attr->pkey_index >= ipath_layer_get_npkeys(dev->dd))
+ if (attr->pkey_index >= ipath_get_npkeys(dev->dd))
goto inval;
if (attr_mask & IB_QP_MIN_RNR_TIMER)
if (attr->min_rnr_timer > 31)
goto inval;
+ if (attr_mask & IB_QP_PORT)
+ if (attr->port_num == 0 ||
+ attr->port_num > ibqp->device->phys_port_cnt)
+ goto inval;
+
+ if (attr_mask & IB_QP_PATH_MTU)
+ if (attr->path_mtu > IB_MTU_4096)
+ goto inval;
+
+ if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC)
+ if (attr->max_dest_rd_atomic > 1)
+ goto inval;
+
+ if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC)
+ if (attr->max_rd_atomic > 1)
+ goto inval;
+
+ if (attr_mask & IB_QP_PATH_MIG_STATE)
+ if (attr->path_mig_state != IB_MIG_MIGRATED &&
+ attr->path_mig_state != IB_MIG_REARM)
+ goto inval;
+
switch (new_state) {
case IB_QPS_RESET:
ipath_reset_qp(qp);
break;
case IB_QPS_ERR:
- ipath_error_qp(qp);
+ ipath_error_qp(qp, IB_WC_GENERAL_ERR);
break;
default:
@@ -482,7 +524,7 @@ int ipath_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
qp->remote_qpn = attr->dest_qp_num;
if (attr_mask & IB_QP_SQ_PSN) {
- qp->s_next_psn = attr->sq_psn;
+ qp->s_psn = qp->s_next_psn = attr->sq_psn;
qp->s_last_psn = qp->s_next_psn - 1;
}
@@ -511,6 +553,9 @@ int ipath_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
if (attr_mask & IB_QP_MIN_RNR_TIMER)
qp->r_min_rnr_timer = attr->min_rnr_timer;
+ if (attr_mask & IB_QP_TIMEOUT)
+ qp->timeout = attr->timeout;
+
if (attr_mask & IB_QP_QKEY)
qp->qkey = attr->qkey;
@@ -543,7 +588,7 @@ int ipath_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
attr->dest_qp_num = qp->remote_qpn;
attr->qp_access_flags = qp->qp_access_flags;
attr->cap.max_send_wr = qp->s_size - 1;
- attr->cap.max_recv_wr = qp->r_rq.size - 1;
+ attr->cap.max_recv_wr = qp->ibqp.srq ? 0 : qp->r_rq.size - 1;
attr->cap.max_send_sge = qp->s_max_sge;
attr->cap.max_recv_sge = qp->r_rq.max_sge;
attr->cap.max_inline_data = 0;
@@ -557,7 +602,7 @@ int ipath_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
attr->max_dest_rd_atomic = 1;
attr->min_rnr_timer = qp->r_min_rnr_timer;
attr->port_num = 1;
- attr->timeout = 0;
+ attr->timeout = qp->timeout;
attr->retry_cnt = qp->s_retry_cnt;
attr->rnr_retry = qp->s_rnr_retry;
attr->alt_port_num = 0;
@@ -569,9 +614,10 @@ int ipath_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
init_attr->recv_cq = qp->ibqp.recv_cq;
init_attr->srq = qp->ibqp.srq;
init_attr->cap = attr->cap;
- init_attr->sq_sig_type =
- (qp->s_flags & (1 << IPATH_S_SIGNAL_REQ_WR))
- ? IB_SIGNAL_REQ_WR : 0;
+ if (qp->s_flags & (1 << IPATH_S_SIGNAL_REQ_WR))
+ init_attr->sq_sig_type = IB_SIGNAL_REQ_WR;
+ else
+ init_attr->sq_sig_type = IB_SIGNAL_ALL_WR;
init_attr->qp_type = qp->ibqp.qp_type;
init_attr->port_num = 1;
return 0;
@@ -596,13 +642,23 @@ __be32 ipath_compute_aeth(struct ipath_qp *qp)
} else {
u32 min, max, x;
u32 credits;
-
+ struct ipath_rwq *wq = qp->r_rq.wq;
+ u32 head;
+ u32 tail;
+
+ /* sanity check pointers before trusting them */
+ head = wq->head;
+ if (head >= qp->r_rq.size)
+ head = 0;
+ tail = wq->tail;
+ if (tail >= qp->r_rq.size)
+ tail = 0;
/*
* Compute the number of credits available (RWQEs).
* XXX Not holding the r_rq.lock here so there is a small
* chance that the pair of reads are not atomic.
*/
- credits = qp->r_rq.head - qp->r_rq.tail;
+ credits = head - tail;
if ((int)credits < 0)
credits += qp->r_rq.size;
/*
@@ -679,27 +735,37 @@ struct ib_qp *ipath_create_qp(struct ib_pd *ibpd,
case IB_QPT_UD:
case IB_QPT_SMI:
case IB_QPT_GSI:
- qp = kmalloc(sizeof(*qp), GFP_KERNEL);
+ sz = sizeof(*qp);
+ if (init_attr->srq) {
+ struct ipath_srq *srq = to_isrq(init_attr->srq);
+
+ sz += sizeof(*qp->r_sg_list) *
+ srq->rq.max_sge;
+ } else
+ sz += sizeof(*qp->r_sg_list) *
+ init_attr->cap.max_recv_sge;
+ qp = kmalloc(sz, GFP_KERNEL);
if (!qp) {
- vfree(swq);
ret = ERR_PTR(-ENOMEM);
- goto bail;
+ goto bail_swq;
}
if (init_attr->srq) {
+ sz = 0;
qp->r_rq.size = 0;
qp->r_rq.max_sge = 0;
qp->r_rq.wq = NULL;
+ init_attr->cap.max_recv_wr = 0;
+ init_attr->cap.max_recv_sge = 0;
} else {
qp->r_rq.size = init_attr->cap.max_recv_wr + 1;
qp->r_rq.max_sge = init_attr->cap.max_recv_sge;
- sz = (sizeof(struct ipath_sge) * qp->r_rq.max_sge) +
+ sz = (sizeof(struct ib_sge) * qp->r_rq.max_sge) +
sizeof(struct ipath_rwqe);
- qp->r_rq.wq = vmalloc(qp->r_rq.size * sz);
+ qp->r_rq.wq = vmalloc_user(sizeof(struct ipath_rwq) +
+ qp->r_rq.size * sz);
if (!qp->r_rq.wq) {
- kfree(qp);
- vfree(swq);
ret = ERR_PTR(-ENOMEM);
- goto bail;
+ goto bail_qp;
}
}
@@ -719,24 +785,19 @@ struct ib_qp *ipath_create_qp(struct ib_pd *ibpd,
qp->s_wq = swq;
qp->s_size = init_attr->cap.max_send_wr + 1;
qp->s_max_sge = init_attr->cap.max_send_sge;
- qp->s_flags = init_attr->sq_sig_type == IB_SIGNAL_REQ_WR ?
- 1 << IPATH_S_SIGNAL_REQ_WR : 0;
+ if (init_attr->sq_sig_type == IB_SIGNAL_REQ_WR)
+ qp->s_flags = 1 << IPATH_S_SIGNAL_REQ_WR;
+ else
+ qp->s_flags = 0;
dev = to_idev(ibpd->device);
err = ipath_alloc_qpn(&dev->qp_table, qp,
init_attr->qp_type);
if (err) {
- vfree(swq);
- vfree(qp->r_rq.wq);
- kfree(qp);
ret = ERR_PTR(err);
- goto bail;
+ goto bail_rwq;
}
+ qp->ip = NULL;
ipath_reset_qp(qp);
-
- /* Tell the core driver that the kernel SMA is present. */
- if (init_attr->qp_type == IB_QPT_SMI)
- ipath_layer_set_verbs_flags(dev->dd,
- IPATH_VERBS_KERNEL_SMA);
break;
default:
@@ -747,8 +808,63 @@ struct ib_qp *ipath_create_qp(struct ib_pd *ibpd,
init_attr->cap.max_inline_data = 0;
+ /*
+ * Return the address of the RWQ as the offset to mmap.
+ * See ipath_mmap() for details.
+ */
+ if (udata && udata->outlen >= sizeof(__u64)) {
+ struct ipath_mmap_info *ip;
+ __u64 offset = (__u64) qp->r_rq.wq;
+ int err;
+
+ err = ib_copy_to_udata(udata, &offset, sizeof(offset));
+ if (err) {
+ ret = ERR_PTR(err);
+ goto bail_rwq;
+ }
+
+ if (qp->r_rq.wq) {
+ /* Allocate info for ipath_mmap(). */
+ ip = kmalloc(sizeof(*ip), GFP_KERNEL);
+ if (!ip) {
+ ret = ERR_PTR(-ENOMEM);
+ goto bail_rwq;
+ }
+ qp->ip = ip;
+ ip->context = ibpd->uobject->context;
+ ip->obj = qp->r_rq.wq;
+ kref_init(&ip->ref);
+ ip->mmap_cnt = 0;
+ ip->size = PAGE_ALIGN(sizeof(struct ipath_rwq) +
+ qp->r_rq.size * sz);
+ spin_lock_irq(&dev->pending_lock);
+ ip->next = dev->pending_mmaps;
+ dev->pending_mmaps = ip;
+ spin_unlock_irq(&dev->pending_lock);
+ }
+ }
+
+ spin_lock(&dev->n_qps_lock);
+ if (dev->n_qps_allocated == ib_ipath_max_qps) {
+ spin_unlock(&dev->n_qps_lock);
+ ret = ERR_PTR(-ENOMEM);
+ goto bail_ip;
+ }
+
+ dev->n_qps_allocated++;
+ spin_unlock(&dev->n_qps_lock);
+
ret = &qp->ibqp;
+ goto bail;
+bail_ip:
+ kfree(qp->ip);
+bail_rwq:
+ vfree(qp->r_rq.wq);
+bail_qp:
+ kfree(qp);
+bail_swq:
+ vfree(swq);
bail:
return ret;
}
@@ -768,15 +884,12 @@ int ipath_destroy_qp(struct ib_qp *ibqp)
struct ipath_ibdev *dev = to_idev(ibqp->device);
unsigned long flags;
- /* Tell the core driver that the kernel SMA is gone. */
- if (qp->ibqp.qp_type == IB_QPT_SMI)
- ipath_layer_set_verbs_flags(dev->dd, 0);
-
- spin_lock_irqsave(&qp->r_rq.lock, flags);
- spin_lock(&qp->s_lock);
+ spin_lock_irqsave(&qp->s_lock, flags);
qp->state = IB_QPS_ERR;
- spin_unlock(&qp->s_lock);
- spin_unlock_irqrestore(&qp->r_rq.lock, flags);
+ spin_unlock_irqrestore(&qp->s_lock, flags);
+ spin_lock(&dev->n_qps_lock);
+ dev->n_qps_allocated--;
+ spin_unlock(&dev->n_qps_lock);
/* Stop the sending tasklet. */
tasklet_kill(&qp->s_task);
@@ -797,8 +910,11 @@ int ipath_destroy_qp(struct ib_qp *ibqp)
if (atomic_read(&qp->refcount) != 0)
ipath_free_qp(&dev->qp_table, qp);
+ if (qp->ip)
+ kref_put(&qp->ip->ref, ipath_release_mmap_info);
+ else
+ vfree(qp->r_rq.wq);
vfree(qp->s_wq);
- vfree(qp->r_rq.wq);
kfree(qp);
return 0;
}
@@ -850,8 +966,8 @@ void ipath_sqerror_qp(struct ipath_qp *qp, struct ib_wc *wc)
struct ipath_ibdev *dev = to_idev(qp->ibqp.device);
struct ipath_swqe *wqe = get_swqe_ptr(qp, qp->s_last);
- _VERBS_INFO("Send queue error on QP%d/%d: err: %d\n",
- qp->ibqp.qp_num, qp->remote_qpn, wc->status);
+ ipath_dbg(KERN_INFO "Send queue error on QP%d/%d: err: %d\n",
+ qp->ibqp.qp_num, qp->remote_qpn, wc->status);
spin_lock(&dev->pending_lock);
/* XXX What if its already removed by the timeout code? */
diff --git a/drivers/infiniband/hw/ipath/ipath_rc.c b/drivers/infiniband/hw/ipath/ipath_rc.c
index 774d1615ce2..a504cf67f27 100644
--- a/drivers/infiniband/hw/ipath/ipath_rc.c
+++ b/drivers/infiniband/hw/ipath/ipath_rc.c
@@ -32,7 +32,7 @@
*/
#include "ipath_verbs.h"
-#include "ipath_common.h"
+#include "ipath_kernel.h"
/* cut down ridiculously long IB macro names */
#define OP(x) IB_OPCODE_RC_##x
@@ -201,6 +201,18 @@ int ipath_make_rc_req(struct ipath_qp *qp,
qp->s_rnr_timeout)
goto done;
+ /* Limit the number of packets sent without an ACK. */
+ if (ipath_cmp24(qp->s_psn, qp->s_last_psn + IPATH_PSN_CREDIT) > 0) {
+ qp->s_wait_credit = 1;
+ dev->n_rc_stalls++;
+ spin_lock(&dev->pending_lock);
+ if (list_empty(&qp->timerwait))
+ list_add_tail(&qp->timerwait,
+ &dev->pending[dev->pending_index]);
+ spin_unlock(&dev->pending_lock);
+ goto done;
+ }
+
/* header size in 32-bit words LRH+BTH = (8+12)/4. */
hwords = 5;
bth0 = 0;
@@ -221,7 +233,7 @@ int ipath_make_rc_req(struct ipath_qp *qp,
/* Check if send work queue is empty. */
if (qp->s_tail == qp->s_head)
goto done;
- qp->s_psn = wqe->psn = qp->s_next_psn;
+ wqe->psn = qp->s_next_psn;
newreq = 1;
}
/*
@@ -393,12 +405,6 @@ int ipath_make_rc_req(struct ipath_qp *qp,
ss = &qp->s_sge;
len = qp->s_len;
if (len > pmtu) {
- /*
- * Request an ACK every 1/2 MB to avoid retransmit
- * timeouts.
- */
- if (((wqe->length - len) % (512 * 1024)) == 0)
- bth2 |= 1 << 31;
len = pmtu;
break;
}
@@ -435,12 +441,6 @@ int ipath_make_rc_req(struct ipath_qp *qp,
ss = &qp->s_sge;
len = qp->s_len;
if (len > pmtu) {
- /*
- * Request an ACK every 1/2 MB to avoid retransmit
- * timeouts.
- */
- if (((wqe->length - len) % (512 * 1024)) == 0)
- bth2 |= 1 << 31;
len = pmtu;
break;
}
@@ -498,6 +498,8 @@ int ipath_make_rc_req(struct ipath_qp *qp,
*/
goto done;
}
+ if (ipath_cmp24(qp->s_psn, qp->s_last_psn + IPATH_PSN_CREDIT - 1) >= 0)
+ bth2 |= 1 << 31; /* Request ACK. */
qp->s_len -= len;
qp->s_hdrwords = hwords;
qp->s_cur_sge = ss;
@@ -540,7 +542,7 @@ static void send_rc_ack(struct ipath_qp *qp)
lrh0 = IPATH_LRH_GRH;
}
/* read pkey_index w/o lock (its atomic) */
- bth0 = ipath_layer_get_pkey(dev->dd, qp->s_pkey_index);
+ bth0 = ipath_get_pkey(dev->dd, qp->s_pkey_index);
if (qp->r_nak_state)
ohdr->u.aeth = cpu_to_be32((qp->r_msn & IPATH_MSN_MASK) |
(qp->r_nak_state <<
@@ -557,7 +559,7 @@ static void send_rc_ack(struct ipath_qp *qp)
hdr.lrh[0] = cpu_to_be16(lrh0);
hdr.lrh[1] = cpu_to_be16(qp->remote_ah_attr.dlid);
hdr.lrh[2] = cpu_to_be16(hwords + SIZE_OF_CRC);
- hdr.lrh[3] = cpu_to_be16(ipath_layer_get_lid(dev->dd));
+ hdr.lrh[3] = cpu_to_be16(dev->dd->ipath_lid);
ohdr->bth[0] = cpu_to_be32(bth0);
ohdr->bth[1] = cpu_to_be32(qp->remote_qpn);
ohdr->bth[2] = cpu_to_be32(qp->r_ack_psn & IPATH_PSN_MASK);
@@ -737,6 +739,15 @@ bail:
return;
}
+static inline void update_last_psn(struct ipath_qp *qp, u32 psn)
+{
+ if (qp->s_wait_credit) {
+ qp->s_wait_credit = 0;
+ tasklet_hi_schedule(&qp->s_task);
+ }
+ qp->s_last_psn = psn;
+}
+
/**
* do_rc_ack - process an incoming RC ACK
* @qp: the QP the ACK came in on
@@ -805,7 +816,7 @@ static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode)
* The last valid PSN seen is the previous
* request's.
*/
- qp->s_last_psn = wqe->psn - 1;
+ update_last_psn(qp, wqe->psn - 1);
/* Retry this request. */
ipath_restart_rc(qp, wqe->psn, &wc);
/*
@@ -864,7 +875,7 @@ static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode)
ipath_get_credit(qp, aeth);
qp->s_rnr_retry = qp->s_rnr_retry_cnt;
qp->s_retry = qp->s_retry_cnt;
- qp->s_last_psn = psn;
+ update_last_psn(qp, psn);
ret = 1;
goto bail;
@@ -883,7 +894,7 @@ static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode)
goto bail;
/* The last valid PSN is the previous PSN. */
- qp->s_last_psn = psn - 1;
+ update_last_psn(qp, psn - 1);
dev->n_rc_resends += (int)qp->s_psn - (int)psn;
@@ -898,7 +909,7 @@ static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode)
case 3: /* NAK */
/* The last valid PSN seen is the previous request's. */
if (qp->s_last != qp->s_tail)
- qp->s_last_psn = wqe->psn - 1;
+ update_last_psn(qp, wqe->psn - 1);
switch ((aeth >> IPATH_AETH_CREDIT_SHIFT) &
IPATH_AETH_CREDIT_MASK) {
case 0: /* PSN sequence error */
@@ -1071,7 +1082,7 @@ static inline void ipath_rc_rcv_resp(struct ipath_ibdev *dev,
* since we don't want s_sge modified.
*/
qp->s_len -= pmtu;
- qp->s_last_psn = psn;
+ update_last_psn(qp, psn);
spin_unlock_irqrestore(&qp->s_lock, flags);
ipath_copy_sge(&qp->s_sge, data, pmtu);
goto bail;
@@ -1223,7 +1234,7 @@ static inline int ipath_rc_rcv_error(struct ipath_ibdev *dev,
* Address range must be a subset of the original
* request and start on pmtu boundaries.
*/
- ok = ipath_rkey_ok(dev, &qp->s_rdma_sge,
+ ok = ipath_rkey_ok(qp, &qp->s_rdma_sge,
qp->s_rdma_len, vaddr, rkey,
IB_ACCESS_REMOTE_READ);
if (unlikely(!ok)) {
@@ -1282,6 +1293,14 @@ done:
return 1;
}
+static void ipath_rc_error(struct ipath_qp *qp, enum ib_wc_status err)
+{
+ spin_lock_irq(&qp->s_lock);
+ qp->state = IB_QPS_ERR;
+ ipath_error_qp(qp, err);
+ spin_unlock_irq(&qp->s_lock);
+}
+
/**
* ipath_rc_rcv - process an incoming RC packet
* @dev: the device this packet came in on
@@ -1309,6 +1328,10 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
struct ib_reth *reth;
int header_in_data;
+ /* Validate the SLID. See Ch. 9.6.1.5 */
+ if (unlikely(be16_to_cpu(hdr->lrh[3]) != qp->remote_ah_attr.dlid))
+ goto done;
+
/* Check for GRH */
if (!has_grh) {
ohdr = &hdr->u.oth;
@@ -1323,8 +1346,7 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
* the eager header buffer size to 56 bytes so the last 4
* bytes of the BTH header (PSN) is in the data buffer.
*/
- header_in_data =
- ipath_layer_get_rcvhdrentsize(dev->dd) == 16;
+ header_in_data = dev->dd->ipath_rcvhdrentsize == 16;
if (header_in_data) {
psn = be32_to_cpu(((__be32 *) data)[0]);
data += sizeof(__be32);
@@ -1371,8 +1393,7 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
*/
if (qp->r_ack_state >= OP(COMPARE_SWAP))
goto send_ack;
- /* XXX Flush WQEs */
- qp->state = IB_QPS_ERR;
+ ipath_rc_error(qp, IB_WC_REM_INV_REQ_ERR);
qp->r_ack_state = OP(SEND_ONLY);
qp->r_nak_state = IB_NAK_INVALID_REQUEST;
qp->r_ack_psn = qp->r_psn;
@@ -1478,9 +1499,9 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
goto nack_inv;
ipath_copy_sge(&qp->r_sge, data, tlen);
qp->r_msn++;
- if (opcode == OP(RDMA_WRITE_LAST) ||
- opcode == OP(RDMA_WRITE_ONLY))
+ if (!qp->r_wrid_valid)
break;
+ qp->r_wrid_valid = 0;
wc.wr_id = qp->r_wr_id;
wc.status = IB_WC_SUCCESS;
wc.opcode = IB_WC_RECV;
@@ -1518,7 +1539,7 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
int ok;
/* Check rkey & NAK */
- ok = ipath_rkey_ok(dev, &qp->r_sge,
+ ok = ipath_rkey_ok(qp, &qp->r_sge,
qp->r_len, vaddr, rkey,
IB_ACCESS_REMOTE_WRITE);
if (unlikely(!ok))
@@ -1560,7 +1581,7 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
int ok;
/* Check rkey & NAK */
- ok = ipath_rkey_ok(dev, &qp->s_rdma_sge,
+ ok = ipath_rkey_ok(qp, &qp->s_rdma_sge,
qp->s_rdma_len, vaddr, rkey,
IB_ACCESS_REMOTE_READ);
if (unlikely(!ok)) {
@@ -1619,7 +1640,7 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
goto nack_inv;
rkey = be32_to_cpu(ateth->rkey);
/* Check rkey & NAK */
- if (unlikely(!ipath_rkey_ok(dev, &qp->r_sge,
+ if (unlikely(!ipath_rkey_ok(qp, &qp->r_sge,
sizeof(u64), vaddr, rkey,
IB_ACCESS_REMOTE_ATOMIC)))
goto nack_acc;
@@ -1671,8 +1692,7 @@ nack_acc:
* is pending though.
*/
if (qp->r_ack_state < OP(COMPARE_SWAP)) {
- /* XXX Flush WQEs */
- qp->state = IB_QPS_ERR;
+ ipath_rc_error(qp, IB_WC_REM_ACCESS_ERR);
qp->r_ack_state = OP(RDMA_WRITE_ONLY);
qp->r_nak_state = IB_NAK_REMOTE_ACCESS_ERROR;
qp->r_ack_psn = qp->r_psn;
diff --git a/drivers/infiniband/hw/ipath/ipath_registers.h b/drivers/infiniband/hw/ipath/ipath_registers.h
index 89df8f5ea99..dffc76016d3 100644
--- a/drivers/infiniband/hw/ipath/ipath_registers.h
+++ b/drivers/infiniband/hw/ipath/ipath_registers.h
@@ -36,8 +36,7 @@
/*
* This file should only be included by kernel source, and by the diags. It
- * defines the registers, and their contents, for the InfiniPath HT-400
- * chip.
+ * defines the registers, and their contents, for InfiniPath chips.
*/
/*
@@ -135,10 +134,24 @@
#define INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT 40
#define INFINIPATH_HWE_RXEMEMPARITYERR_MASK 0x7FULL
#define INFINIPATH_HWE_RXEMEMPARITYERR_SHIFT 44
-#define INFINIPATH_HWE_RXDSYNCMEMPARITYERR 0x0000000400000000ULL
-#define INFINIPATH_HWE_MEMBISTFAILED 0x0040000000000000ULL
#define INFINIPATH_HWE_IBCBUSTOSPCPARITYERR 0x4000000000000000ULL
#define INFINIPATH_HWE_IBCBUSFRSPCPARITYERR 0x8000000000000000ULL
+/* txe mem parity errors (shift by INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT) */
+#define INFINIPATH_HWE_TXEMEMPARITYERR_PIOBUF 0x1ULL
+#define INFINIPATH_HWE_TXEMEMPARITYERR_PIOPBC 0x2ULL
+#define INFINIPATH_HWE_TXEMEMPARITYERR_PIOLAUNCHFIFO 0x4ULL
+/* rxe mem parity errors (shift by INFINIPATH_HWE_RXEMEMPARITYERR_SHIFT) */
+#define INFINIPATH_HWE_RXEMEMPARITYERR_RCVBUF 0x01ULL
+#define INFINIPATH_HWE_RXEMEMPARITYERR_LOOKUPQ 0x02ULL
+#define INFINIPATH_HWE_RXEMEMPARITYERR_EAGERTID 0x04ULL
+#define INFINIPATH_HWE_RXEMEMPARITYERR_EXPTID 0x08ULL
+#define INFINIPATH_HWE_RXEMEMPARITYERR_FLAGBUF 0x10ULL
+#define INFINIPATH_HWE_RXEMEMPARITYERR_DATAINFO 0x20ULL
+#define INFINIPATH_HWE_RXEMEMPARITYERR_HDRINFO 0x40ULL
+/* waldo specific -- find the rest in ipath_6110.c */
+#define INFINIPATH_HWE_RXDSYNCMEMPARITYERR 0x0000000400000000ULL
+/* monty specific -- find the rest in ipath_6120.c */
+#define INFINIPATH_HWE_MEMBISTFAILED 0x0040000000000000ULL
/* kr_hwdiagctrl bits */
#define INFINIPATH_DC_FORCETXEMEMPARITYERR_MASK 0xFULL
@@ -210,9 +223,9 @@
/* combination link status states that we use with some frequency */
#define IPATH_IBSTATE_MASK ((INFINIPATH_IBCS_LINKTRAININGSTATE_MASK \
- << INFINIPATH_IBCS_LINKSTATE_SHIFT) | \
+ << INFINIPATH_IBCS_LINKTRAININGSTATE_SHIFT) | \
(INFINIPATH_IBCS_LINKSTATE_MASK \
- <<INFINIPATH_IBCS_LINKTRAININGSTATE_SHIFT))
+ <<INFINIPATH_IBCS_LINKSTATE_SHIFT))
#define IPATH_IBSTATE_INIT ((INFINIPATH_IBCS_L_STATE_INIT \
<< INFINIPATH_IBCS_LINKSTATE_SHIFT) | \
(INFINIPATH_IBCS_LT_STATE_LINKUP \
@@ -283,10 +296,12 @@
#define INFINIPATH_XGXS_RESET 0x7ULL
#define INFINIPATH_XGXS_MDIOADDR_MASK 0xfULL
#define INFINIPATH_XGXS_MDIOADDR_SHIFT 4
+#define INFINIPATH_XGXS_RX_POL_SHIFT 19
+#define INFINIPATH_XGXS_RX_POL_MASK 0xfULL
#define INFINIPATH_RT_ADDR_MASK 0xFFFFFFFFFFULL /* 40 bits valid */
-/* TID entries (memory), HT400-only */
+/* TID entries (memory), HT-only */
#define INFINIPATH_RT_VALID 0x8000000000000000ULL
#define INFINIPATH_RT_ADDR_SHIFT 0
#define INFINIPATH_RT_BUFSIZE_MASK 0x3FFF
@@ -301,6 +316,17 @@
typedef u64 ipath_err_t;
+/* The following change with the type of device, so
+ * need to be part of the ipath_devdata struct, or
+ * we could have problems plugging in devices of
+ * different types (e.g. one HT, one PCIE)
+ * in one system, to be managed by one driver.
+ * On the other hand, this file is may also be included
+ * by other code, so leave the declarations here
+ * temporarily. Minor footprint issue if common-model
+ * linker used, none if C89+ linker used.
+ */
+
/* mask of defined bits for various registers */
extern u64 infinipath_i_bitsextant;
extern ipath_err_t infinipath_e_bitsextant, infinipath_hwe_bitsextant;
@@ -309,13 +335,6 @@ extern ipath_err_t infinipath_e_bitsextant, infinipath_hwe_bitsextant;
extern u32 infinipath_i_rcvavail_mask, infinipath_i_rcvurg_mask;
/*
- * register bits for selecting i2c direction and values, used for I2C serial
- * flash
- */
-extern u16 ipath_gpio_sda_num, ipath_gpio_scl_num;
-extern u64 ipath_gpio_sda, ipath_gpio_scl;
-
-/*
* These are the infinipath general register numbers (not offsets).
* The kernel registers are used directly, those beyond the kernel
* registers are calculated from one of the base registers. The use of
diff --git a/drivers/infiniband/hw/ipath/ipath_ruc.c b/drivers/infiniband/hw/ipath/ipath_ruc.c
index 772bc59fb85..f7530512045 100644
--- a/drivers/infiniband/hw/ipath/ipath_ruc.c
+++ b/drivers/infiniband/hw/ipath/ipath_ruc.c
@@ -32,7 +32,7 @@
*/
#include "ipath_verbs.h"
-#include "ipath_common.h"
+#include "ipath_kernel.h"
/*
* Convert the AETH RNR timeout code into the number of milliseconds.
@@ -106,6 +106,52 @@ void ipath_insert_rnr_queue(struct ipath_qp *qp)
spin_unlock_irqrestore(&dev->pending_lock, flags);
}
+static int init_sge(struct ipath_qp *qp, struct ipath_rwqe *wqe)
+{
+ int user = to_ipd(qp->ibqp.pd)->user;
+ int i, j, ret;
+ struct ib_wc wc;
+
+ qp->r_len = 0;
+ for (i = j = 0; i < wqe->num_sge; i++) {
+ if (wqe->sg_list[i].length == 0)
+ continue;
+ /* Check LKEY */
+ if ((user && wqe->sg_list[i].lkey == 0) ||
+ !ipath_lkey_ok(qp, &qp->r_sg_list[j], &wqe->sg_list[i],
+ IB_ACCESS_LOCAL_WRITE))
+ goto bad_lkey;
+ qp->r_len += wqe->sg_list[i].length;
+ j++;
+ }
+ qp->r_sge.sge = qp->r_sg_list[0];
+ qp->r_sge.sg_list = qp->r_sg_list + 1;
+ qp->r_sge.num_sge = j;
+ ret = 1;
+ goto bail;
+
+bad_lkey:
+ wc.wr_id = wqe->wr_id;
+ wc.status = IB_WC_LOC_PROT_ERR;
+ wc.opcode = IB_WC_RECV;
+ wc.vendor_err = 0;
+ wc.byte_len = 0;
+ wc.imm_data = 0;
+ wc.qp_num = qp->ibqp.qp_num;
+ wc.src_qp = 0;
+ wc.wc_flags = 0;
+ wc.pkey_index = 0;
+ wc.slid = 0;
+ wc.sl = 0;
+ wc.dlid_path_bits = 0;
+ wc.port_num = 0;
+ /* Signal solicited completion event. */
+ ipath_cq_enter(to_icq(qp->ibqp.recv_cq), &wc, 1);
+ ret = 0;
+bail:
+ return ret;
+}
+
/**
* ipath_get_rwqe - copy the next RWQE into the QP's RWQE
* @qp: the QP
@@ -119,71 +165,72 @@ int ipath_get_rwqe(struct ipath_qp *qp, int wr_id_only)
{
unsigned long flags;
struct ipath_rq *rq;
+ struct ipath_rwq *wq;
struct ipath_srq *srq;
struct ipath_rwqe *wqe;
- int ret = 1;
+ void (*handler)(struct ib_event *, void *);
+ u32 tail;
+ int ret;
- if (!qp->ibqp.srq) {
+ if (qp->ibqp.srq) {
+ srq = to_isrq(qp->ibqp.srq);
+ handler = srq->ibsrq.event_handler;
+ rq = &srq->rq;
+ } else {
+ srq = NULL;
+ handler = NULL;
rq = &qp->r_rq;
- spin_lock_irqsave(&rq->lock, flags);
-
- if (unlikely(rq->tail == rq->head)) {
- ret = 0;
- goto done;
- }
- wqe = get_rwqe_ptr(rq, rq->tail);
- qp->r_wr_id = wqe->wr_id;
- if (!wr_id_only) {
- qp->r_sge.sge = wqe->sg_list[0];
- qp->r_sge.sg_list = wqe->sg_list + 1;
- qp->r_sge.num_sge = wqe->num_sge;
- qp->r_len = wqe->length;
- }
- if (++rq->tail >= rq->size)
- rq->tail = 0;
- goto done;
}
- srq = to_isrq(qp->ibqp.srq);
- rq = &srq->rq;
spin_lock_irqsave(&rq->lock, flags);
-
- if (unlikely(rq->tail == rq->head)) {
- ret = 0;
- goto done;
- }
- wqe = get_rwqe_ptr(rq, rq->tail);
+ wq = rq->wq;
+ tail = wq->tail;
+ /* Validate tail before using it since it is user writable. */
+ if (tail >= rq->size)
+ tail = 0;
+ do {
+ if (unlikely(tail == wq->head)) {
+ spin_unlock_irqrestore(&rq->lock, flags);
+ ret = 0;
+ goto bail;
+ }
+ wqe = get_rwqe_ptr(rq, tail);
+ if (++tail >= rq->size)
+ tail = 0;
+ } while (!wr_id_only && !init_sge(qp, wqe));
qp->r_wr_id = wqe->wr_id;
- if (!wr_id_only) {
- qp->r_sge.sge = wqe->sg_list[0];
- qp->r_sge.sg_list = wqe->sg_list + 1;
- qp->r_sge.num_sge = wqe->num_sge;
- qp->r_len = wqe->length;
- }
- if (++rq->tail >= rq->size)
- rq->tail = 0;
- if (srq->ibsrq.event_handler) {
- struct ib_event ev;
+ wq->tail = tail;
+
+ ret = 1;
+ if (handler) {
u32 n;
- if (rq->head < rq->tail)
- n = rq->size + rq->head - rq->tail;
+ /*
+ * validate head pointer value and compute
+ * the number of remaining WQEs.
+ */
+ n = wq->head;
+ if (n >= rq->size)
+ n = 0;
+ if (n < tail)
+ n += rq->size - tail;
else
- n = rq->head - rq->tail;
+ n -= tail;
if (n < srq->limit) {
+ struct ib_event ev;
+
srq->limit = 0;
spin_unlock_irqrestore(&rq->lock, flags);
ev.device = qp->ibqp.device;
ev.element.srq = qp->ibqp.srq;
ev.event = IB_EVENT_SRQ_LIMIT_REACHED;
- srq->ibsrq.event_handler(&ev,
- srq->ibsrq.srq_context);
+ handler(&ev, srq->ibsrq.srq_context);
goto bail;
}
}
-
-done:
spin_unlock_irqrestore(&rq->lock, flags);
+ qp->r_wrid_valid = 1;
+
bail:
return ret;
}
@@ -278,7 +325,7 @@ again:
case IB_WR_RDMA_WRITE:
if (wqe->length == 0)
break;
- if (unlikely(!ipath_rkey_ok(dev, &qp->r_sge, wqe->length,
+ if (unlikely(!ipath_rkey_ok(qp, &qp->r_sge, wqe->length,
wqe->wr.wr.rdma.remote_addr,
wqe->wr.wr.rdma.rkey,
IB_ACCESS_REMOTE_WRITE))) {
@@ -302,7 +349,7 @@ again:
break;
case IB_WR_RDMA_READ:
- if (unlikely(!ipath_rkey_ok(dev, &sqp->s_sge, wqe->length,
+ if (unlikely(!ipath_rkey_ok(qp, &sqp->s_sge, wqe->length,
wqe->wr.wr.rdma.remote_addr,
wqe->wr.wr.rdma.rkey,
IB_ACCESS_REMOTE_READ)))
@@ -317,7 +364,7 @@ again:
case IB_WR_ATOMIC_CMP_AND_SWP:
case IB_WR_ATOMIC_FETCH_AND_ADD:
- if (unlikely(!ipath_rkey_ok(dev, &qp->r_sge, sizeof(u64),
+ if (unlikely(!ipath_rkey_ok(qp, &qp->r_sge, sizeof(u64),
wqe->wr.wr.rdma.remote_addr,
wqe->wr.wr.rdma.rkey,
IB_ACCESS_REMOTE_ATOMIC)))
@@ -422,6 +469,15 @@ done:
wake_up(&qp->wait);
}
+static int want_buffer(struct ipath_devdata *dd)
+{
+ set_bit(IPATH_S_PIOINTBUFAVAIL, &dd->ipath_sendctrl);
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
+ dd->ipath_sendctrl);
+
+ return 0;
+}
+
/**
* ipath_no_bufs_available - tell the layer driver we need buffers
* @qp: the QP that caused the problem
@@ -438,7 +494,7 @@ void ipath_no_bufs_available(struct ipath_qp *qp, struct ipath_ibdev *dev)
list_add_tail(&qp->piowait, &dev->piowait);
spin_unlock_irqrestore(&dev->pending_lock, flags);
/*
- * Note that as soon as ipath_layer_want_buffer() is called and
+ * Note that as soon as want_buffer() is called and
* possibly before it returns, ipath_ib_piobufavail()
* could be called. If we are still in the tasklet function,
* tasklet_hi_schedule() will not call us until the next time
@@ -448,7 +504,7 @@ void ipath_no_bufs_available(struct ipath_qp *qp, struct ipath_ibdev *dev)
*/
clear_bit(IPATH_S_BUSY, &qp->s_flags);
tasklet_unlock(&qp->s_task);
- ipath_layer_want_buffer(dev->dd);
+ want_buffer(dev->dd);
dev->n_piowait++;
}
@@ -518,8 +574,7 @@ int ipath_post_ruc_send(struct ipath_qp *qp, struct ib_send_wr *wr)
}
if (wr->sg_list[i].length == 0)
continue;
- if (!ipath_lkey_ok(&to_idev(qp->ibqp.device)->lk_table,
- &wqe->sg_list[j], &wr->sg_list[i],
+ if (!ipath_lkey_ok(qp, &wqe->sg_list[j], &wr->sg_list[i],
acc)) {
spin_unlock_irqrestore(&qp->s_lock, flags);
ret = -EINVAL;
@@ -563,7 +618,7 @@ u32 ipath_make_grh(struct ipath_ibdev *dev, struct ib_grh *hdr,
hdr->hop_limit = grh->hop_limit;
/* The SGID is 32-bit aligned. */
hdr->sgid.global.subnet_prefix = dev->gid_prefix;
- hdr->sgid.global.interface_id = ipath_layer_get_guid(dev->dd);
+ hdr->sgid.global.interface_id = dev->dd->ipath_guid;
hdr->dgid = grh->dgid;
/* GRH header size in 32-bit words. */
@@ -595,8 +650,7 @@ void ipath_do_ruc_send(unsigned long data)
if (test_and_set_bit(IPATH_S_BUSY, &qp->s_flags))
goto bail;
- if (unlikely(qp->remote_ah_attr.dlid ==
- ipath_layer_get_lid(dev->dd))) {
+ if (unlikely(qp->remote_ah_attr.dlid == dev->dd->ipath_lid)) {
ipath_ruc_loopback(qp);
goto clear;
}
@@ -663,8 +717,8 @@ again:
qp->s_hdr.lrh[1] = cpu_to_be16(qp->remote_ah_attr.dlid);
qp->s_hdr.lrh[2] = cpu_to_be16(qp->s_hdrwords + nwords +
SIZE_OF_CRC);
- qp->s_hdr.lrh[3] = cpu_to_be16(ipath_layer_get_lid(dev->dd));
- bth0 |= ipath_layer_get_pkey(dev->dd, qp->s_pkey_index);
+ qp->s_hdr.lrh[3] = cpu_to_be16(dev->dd->ipath_lid);
+ bth0 |= ipath_get_pkey(dev->dd, qp->s_pkey_index);
bth0 |= extra_bytes << 20;
ohdr->bth[0] = cpu_to_be32(bth0);
ohdr->bth[1] = cpu_to_be32(qp->remote_qpn);
diff --git a/drivers/infiniband/hw/ipath/ipath_srq.c b/drivers/infiniband/hw/ipath/ipath_srq.c
index f760434660b..94033503400 100644
--- a/drivers/infiniband/hw/ipath/ipath_srq.c
+++ b/drivers/infiniband/hw/ipath/ipath_srq.c
@@ -48,66 +48,39 @@ int ipath_post_srq_receive(struct ib_srq *ibsrq, struct ib_recv_wr *wr,
struct ib_recv_wr **bad_wr)
{
struct ipath_srq *srq = to_isrq(ibsrq);
- struct ipath_ibdev *dev = to_idev(ibsrq->device);
+ struct ipath_rwq *wq;
unsigned long flags;
int ret;
for (; wr; wr = wr->next) {
struct ipath_rwqe *wqe;
u32 next;
- int i, j;
+ int i;
- if (wr->num_sge > srq->rq.max_sge) {
+ if ((unsigned) wr->num_sge > srq->rq.max_sge) {
*bad_wr = wr;
ret = -ENOMEM;
goto bail;
}
spin_lock_irqsave(&srq->rq.lock, flags);
- next = srq->rq.head + 1;
+ wq = srq->rq.wq;
+ next = wq->head + 1;
if (next >= srq->rq.size)
next = 0;
- if (next == srq->rq.tail) {
+ if (next == wq->tail) {
spin_unlock_irqrestore(&srq->rq.lock, flags);
*bad_wr = wr;
ret = -ENOMEM;
goto bail;
}
- wqe = get_rwqe_ptr(&srq->rq, srq->rq.head);
+ wqe = get_rwqe_ptr(&srq->rq, wq->head);
wqe->wr_id = wr->wr_id;
- wqe->sg_list[0].mr = NULL;
- wqe->sg_list[0].vaddr = NULL;
- wqe->sg_list[0].length = 0;
- wqe->sg_list[0].sge_length = 0;
- wqe->length = 0;
- for (i = 0, j = 0; i < wr->num_sge; i++) {
- /* Check LKEY */
- if (to_ipd(srq->ibsrq.pd)->user &&
- wr->sg_list[i].lkey == 0) {
- spin_unlock_irqrestore(&srq->rq.lock,
- flags);
- *bad_wr = wr;
- ret = -EINVAL;
- goto bail;
- }
- if (wr->sg_list[i].length == 0)
- continue;
- if (!ipath_lkey_ok(&dev->lk_table,
- &wqe->sg_list[j],
- &wr->sg_list[i],
- IB_ACCESS_LOCAL_WRITE)) {
- spin_unlock_irqrestore(&srq->rq.lock,
- flags);
- *bad_wr = wr;
- ret = -EINVAL;
- goto bail;
- }
- wqe->length += wr->sg_list[i].length;
- j++;
- }
- wqe->num_sge = j;
- srq->rq.head = next;
+ wqe->num_sge = wr->num_sge;
+ for (i = 0; i < wr->num_sge; i++)
+ wqe->sg_list[i] = wr->sg_list[i];
+ wq->head = next;
spin_unlock_irqrestore(&srq->rq.lock, flags);
}
ret = 0;
@@ -131,55 +104,99 @@ struct ib_srq *ipath_create_srq(struct ib_pd *ibpd,
u32 sz;
struct ib_srq *ret;
- if (dev->n_srqs_allocated == ib_ipath_max_srqs) {
- ret = ERR_PTR(-ENOMEM);
- goto bail;
- }
-
if (srq_init_attr->attr.max_wr == 0) {
ret = ERR_PTR(-EINVAL);
- goto bail;
+ goto done;
}
if ((srq_init_attr->attr.max_sge > ib_ipath_max_srq_sges) ||
(srq_init_attr->attr.max_wr > ib_ipath_max_srq_wrs)) {
ret = ERR_PTR(-EINVAL);
- goto bail;
+ goto done;
}
srq = kmalloc(sizeof(*srq), GFP_KERNEL);
if (!srq) {
ret = ERR_PTR(-ENOMEM);
- goto bail;
+ goto done;
}
/*
* Need to use vmalloc() if we want to support large #s of entries.
*/
srq->rq.size = srq_init_attr->attr.max_wr + 1;
- sz = sizeof(struct ipath_sge) * srq_init_attr->attr.max_sge +
+ srq->rq.max_sge = srq_init_attr->attr.max_sge;
+ sz = sizeof(struct ib_sge) * srq->rq.max_sge +
sizeof(struct ipath_rwqe);
- srq->rq.wq = vmalloc(srq->rq.size * sz);
+ srq->rq.wq = vmalloc_user(sizeof(struct ipath_rwq) + srq->rq.size * sz);
if (!srq->rq.wq) {
- kfree(srq);
ret = ERR_PTR(-ENOMEM);
- goto bail;
+ goto bail_srq;
}
/*
+ * Return the address of the RWQ as the offset to mmap.
+ * See ipath_mmap() for details.
+ */
+ if (udata && udata->outlen >= sizeof(__u64)) {
+ struct ipath_mmap_info *ip;
+ __u64 offset = (__u64) srq->rq.wq;
+ int err;
+
+ err = ib_copy_to_udata(udata, &offset, sizeof(offset));
+ if (err) {
+ ret = ERR_PTR(err);
+ goto bail_wq;
+ }
+
+ /* Allocate info for ipath_mmap(). */
+ ip = kmalloc(sizeof(*ip), GFP_KERNEL);
+ if (!ip) {
+ ret = ERR_PTR(-ENOMEM);
+ goto bail_wq;
+ }
+ srq->ip = ip;
+ ip->context = ibpd->uobject->context;
+ ip->obj = srq->rq.wq;
+ kref_init(&ip->ref);
+ ip->mmap_cnt = 0;
+ ip->size = PAGE_ALIGN(sizeof(struct ipath_rwq) +
+ srq->rq.size * sz);
+ spin_lock_irq(&dev->pending_lock);
+ ip->next = dev->pending_mmaps;
+ dev->pending_mmaps = ip;
+ spin_unlock_irq(&dev->pending_lock);
+ } else
+ srq->ip = NULL;
+
+ /*
* ib_create_srq() will initialize srq->ibsrq.
*/
spin_lock_init(&srq->rq.lock);
- srq->rq.head = 0;
- srq->rq.tail = 0;
- srq->rq.max_sge = srq_init_attr->attr.max_sge;
+ srq->rq.wq->head = 0;
+ srq->rq.wq->tail = 0;
srq->limit = srq_init_attr->attr.srq_limit;
+ spin_lock(&dev->n_srqs_lock);
+ if (dev->n_srqs_allocated == ib_ipath_max_srqs) {
+ spin_unlock(&dev->n_srqs_lock);
+ ret = ERR_PTR(-ENOMEM);
+ goto bail_wq;
+ }
+
+ dev->n_srqs_allocated++;
+ spin_unlock(&dev->n_srqs_lock);
+
ret = &srq->ibsrq;
+ goto done;
- dev->n_srqs_allocated++;
+bail_wq:
+ vfree(srq->rq.wq);
-bail:
+bail_srq:
+ kfree(srq);
+
+done:
return ret;
}
@@ -188,83 +205,130 @@ bail:
* @ibsrq: the SRQ to modify
* @attr: the new attributes of the SRQ
* @attr_mask: indicates which attributes to modify
+ * @udata: user data for ipathverbs.so
*/
int ipath_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
- enum ib_srq_attr_mask attr_mask)
+ enum ib_srq_attr_mask attr_mask,
+ struct ib_udata *udata)
{
struct ipath_srq *srq = to_isrq(ibsrq);
- unsigned long flags;
- int ret;
+ int ret = 0;
- if (attr_mask & IB_SRQ_MAX_WR)
- if ((attr->max_wr > ib_ipath_max_srq_wrs) ||
- (attr->max_sge > srq->rq.max_sge)) {
- ret = -EINVAL;
- goto bail;
- }
+ if (attr_mask & IB_SRQ_MAX_WR) {
+ struct ipath_rwq *owq;
+ struct ipath_rwq *wq;
+ struct ipath_rwqe *p;
+ u32 sz, size, n, head, tail;
- if (attr_mask & IB_SRQ_LIMIT)
- if (attr->srq_limit >= srq->rq.size) {
+ /* Check that the requested sizes are below the limits. */
+ if ((attr->max_wr > ib_ipath_max_srq_wrs) ||
+ ((attr_mask & IB_SRQ_LIMIT) ?
+ attr->srq_limit : srq->limit) > attr->max_wr) {
ret = -EINVAL;
goto bail;
}
- if (attr_mask & IB_SRQ_MAX_WR) {
- struct ipath_rwqe *wq, *p;
- u32 sz, size, n;
-
sz = sizeof(struct ipath_rwqe) +
- attr->max_sge * sizeof(struct ipath_sge);
+ srq->rq.max_sge * sizeof(struct ib_sge);
size = attr->max_wr + 1;
- wq = vmalloc(size * sz);
+ wq = vmalloc_user(sizeof(struct ipath_rwq) + size * sz);
if (!wq) {
ret = -ENOMEM;
goto bail;
}
- spin_lock_irqsave(&srq->rq.lock, flags);
- if (srq->rq.head < srq->rq.tail)
- n = srq->rq.size + srq->rq.head - srq->rq.tail;
+ /*
+ * Return the address of the RWQ as the offset to mmap.
+ * See ipath_mmap() for details.
+ */
+ if (udata && udata->inlen >= sizeof(__u64)) {
+ __u64 offset_addr;
+ __u64 offset = (__u64) wq;
+
+ ret = ib_copy_from_udata(&offset_addr, udata,
+ sizeof(offset_addr));
+ if (ret) {
+ vfree(wq);
+ goto bail;
+ }
+ udata->outbuf = (void __user *) offset_addr;
+ ret = ib_copy_to_udata(udata, &offset,
+ sizeof(offset));
+ if (ret) {
+ vfree(wq);
+ goto bail;
+ }
+ }
+
+ spin_lock_irq(&srq->rq.lock);
+ /*
+ * validate head pointer value and compute
+ * the number of remaining WQEs.
+ */
+ owq = srq->rq.wq;
+ head = owq->head;
+ if (head >= srq->rq.size)
+ head = 0;
+ tail = owq->tail;
+ if (tail >= srq->rq.size)
+ tail = 0;
+ n = head;
+ if (n < tail)
+ n += srq->rq.size - tail;
else
- n = srq->rq.head - srq->rq.tail;
- if (size <= n || size <= srq->limit) {
- spin_unlock_irqrestore(&srq->rq.lock, flags);
+ n -= tail;
+ if (size <= n) {
+ spin_unlock_irq(&srq->rq.lock);
vfree(wq);
ret = -EINVAL;
goto bail;
}
n = 0;
- p = wq;
- while (srq->rq.tail != srq->rq.head) {
+ p = wq->wq;
+ while (tail != head) {
struct ipath_rwqe *wqe;
int i;
- wqe = get_rwqe_ptr(&srq->rq, srq->rq.tail);
+ wqe = get_rwqe_ptr(&srq->rq, tail);
p->wr_id = wqe->wr_id;
- p->length = wqe->length;
p->num_sge = wqe->num_sge;
for (i = 0; i < wqe->num_sge; i++)
p->sg_list[i] = wqe->sg_list[i];
n++;
p = (struct ipath_rwqe *)((char *) p + sz);
- if (++srq->rq.tail >= srq->rq.size)
- srq->rq.tail = 0;
+ if (++tail >= srq->rq.size)
+ tail = 0;
}
- vfree(srq->rq.wq);
srq->rq.wq = wq;
srq->rq.size = size;
- srq->rq.head = n;
- srq->rq.tail = 0;
- srq->rq.max_sge = attr->max_sge;
- spin_unlock_irqrestore(&srq->rq.lock, flags);
- }
-
- if (attr_mask & IB_SRQ_LIMIT) {
- spin_lock_irqsave(&srq->rq.lock, flags);
- srq->limit = attr->srq_limit;
- spin_unlock_irqrestore(&srq->rq.lock, flags);
+ wq->head = n;
+ wq->tail = 0;
+ if (attr_mask & IB_SRQ_LIMIT)
+ srq->limit = attr->srq_limit;
+ spin_unlock_irq(&srq->rq.lock);
+
+ vfree(owq);
+
+ if (srq->ip) {
+ struct ipath_mmap_info *ip = srq->ip;
+ struct ipath_ibdev *dev = to_idev(srq->ibsrq.device);
+
+ ip->obj = wq;
+ ip->size = PAGE_ALIGN(sizeof(struct ipath_rwq) +
+ size * sz);
+ spin_lock_irq(&dev->pending_lock);
+ ip->next = dev->pending_mmaps;
+ dev->pending_mmaps = ip;
+ spin_unlock_irq(&dev->pending_lock);
+ }
+ } else if (attr_mask & IB_SRQ_LIMIT) {
+ spin_lock_irq(&srq->rq.lock);
+ if (attr->srq_limit >= srq->rq.size)
+ ret = -EINVAL;
+ else
+ srq->limit = attr->srq_limit;
+ spin_unlock_irq(&srq->rq.lock);
}
- ret = 0;
bail:
return ret;
@@ -289,8 +353,13 @@ int ipath_destroy_srq(struct ib_srq *ibsrq)
struct ipath_srq *srq = to_isrq(ibsrq);
struct ipath_ibdev *dev = to_idev(ibsrq->device);
+ spin_lock(&dev->n_srqs_lock);
dev->n_srqs_allocated--;
- vfree(srq->rq.wq);
+ spin_unlock(&dev->n_srqs_lock);
+ if (srq->ip)
+ kref_put(&srq->ip->ref, ipath_release_mmap_info);
+ else
+ vfree(srq->rq.wq);
kfree(srq);
return 0;
diff --git a/drivers/infiniband/hw/ipath/ipath_stats.c b/drivers/infiniband/hw/ipath/ipath_stats.c
index 70351b7e35c..30a825928fc 100644
--- a/drivers/infiniband/hw/ipath/ipath_stats.c
+++ b/drivers/infiniband/hw/ipath/ipath_stats.c
@@ -271,33 +271,6 @@ void ipath_get_faststats(unsigned long opaque)
}
}
- if (dd->ipath_nosma_bufs) {
- dd->ipath_nosma_secs += 5;
- if (dd->ipath_nosma_secs >= 30) {
- ipath_cdbg(SMA, "No SMA bufs avail %u seconds; "
- "cancelling pending sends\n",
- dd->ipath_nosma_secs);
- /*
- * issue an abort as well, in case we have a packet
- * stuck in launch fifo. This could corrupt an
- * outgoing user packet in the worst case,
- * but this is a pretty catastrophic, anyway.
- */
- ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
- INFINIPATH_S_ABORT);
- ipath_disarm_piobufs(dd, dd->ipath_lastport_piobuf,
- dd->ipath_piobcnt2k +
- dd->ipath_piobcnt4k -
- dd->ipath_lastport_piobuf);
- /* start again, if necessary */
- dd->ipath_nosma_secs = 0;
- } else
- ipath_cdbg(SMA, "No SMA bufs avail %u tries, "
- "after %u seconds\n",
- dd->ipath_nosma_bufs,
- dd->ipath_nosma_secs);
- }
-
done:
mod_timer(&dd->ipath_stats_timer, jiffies + HZ * 5);
}
diff --git a/drivers/infiniband/hw/ipath/ipath_sysfs.c b/drivers/infiniband/hw/ipath/ipath_sysfs.c
index b98821d7801..182de34f9f4 100644
--- a/drivers/infiniband/hw/ipath/ipath_sysfs.c
+++ b/drivers/infiniband/hw/ipath/ipath_sysfs.c
@@ -35,7 +35,6 @@
#include <linux/pci.h>
#include "ipath_kernel.h"
-#include "ipath_layer.h"
#include "ipath_common.h"
/**
@@ -76,7 +75,7 @@ bail:
static ssize_t show_version(struct device_driver *dev, char *buf)
{
/* The string printed here is already newline-terminated. */
- return scnprintf(buf, PAGE_SIZE, "%s", ipath_core_version);
+ return scnprintf(buf, PAGE_SIZE, "%s", ib_ipath_version);
}
static ssize_t show_num_units(struct device_driver *dev, char *buf)
@@ -108,8 +107,8 @@ static const char *ipath_status_str[] = {
"Initted",
"Disabled",
"Admin_Disabled",
- "OIB_SMA",
- "SMA",
+ "", /* This used to be the old "OIB_SMA" status. */
+ "", /* This used to be the old "SMA" status. */
"Present",
"IB_link_up",
"IB_configured",
@@ -227,7 +226,6 @@ static ssize_t store_mlid(struct device *dev,
unit = dd->ipath_unit;
dd->ipath_mlid = mlid;
- ipath_layer_intr(dd, IPATH_LAYER_INT_BCAST);
goto bail;
invalid:
@@ -259,7 +257,7 @@ static ssize_t store_guid(struct device *dev,
struct ipath_devdata *dd = dev_get_drvdata(dev);
ssize_t ret;
unsigned short guid[8];
- __be64 nguid;
+ __be64 new_guid;
u8 *ng;
int i;
@@ -268,7 +266,7 @@ static ssize_t store_guid(struct device *dev,
&guid[4], &guid[5], &guid[6], &guid[7]) != 8)
goto invalid;
- ng = (u8 *) &nguid;
+ ng = (u8 *) &new_guid;
for (i = 0; i < 8; i++) {
if (guid[i] > 0xff)
@@ -276,7 +274,10 @@ static ssize_t store_guid(struct device *dev,
ng[i] = guid[i];
}
- dd->ipath_guid = nguid;
+ if (new_guid == 0)
+ goto invalid;
+
+ dd->ipath_guid = new_guid;
dd->ipath_nguid = 1;
ret = strlen(buf);
@@ -299,6 +300,16 @@ static ssize_t show_nguid(struct device *dev,
return scnprintf(buf, PAGE_SIZE, "%u\n", dd->ipath_nguid);
}
+static ssize_t show_nports(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct ipath_devdata *dd = dev_get_drvdata(dev);
+
+ /* Return the number of user ports available. */
+ return scnprintf(buf, PAGE_SIZE, "%u\n", dd->ipath_cfgports - 1);
+}
+
static ssize_t show_serial(struct device *dev,
struct device_attribute *attr,
char *buf)
@@ -467,7 +478,7 @@ static ssize_t store_link_state(struct device *dev,
if (ret < 0)
goto invalid;
- r = ipath_layer_set_linkstate(dd, state);
+ r = ipath_set_linkstate(dd, state);
if (r < 0) {
ret = r;
goto bail;
@@ -502,7 +513,7 @@ static ssize_t store_mtu(struct device *dev,
if (ret < 0)
goto invalid;
- r = ipath_layer_set_mtu(dd, mtu);
+ r = ipath_set_mtu(dd, mtu);
if (r < 0)
ret = r;
@@ -563,6 +574,33 @@ bail:
return ret;
}
+static ssize_t store_rx_pol_inv(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf,
+ size_t count)
+{
+ struct ipath_devdata *dd = dev_get_drvdata(dev);
+ int ret, r;
+ u16 val;
+
+ ret = ipath_parse_ushort(buf, &val);
+ if (ret < 0)
+ goto invalid;
+
+ r = ipath_set_rx_pol_inv(dd, val);
+ if (r < 0) {
+ ret = r;
+ goto bail;
+ }
+
+ goto bail;
+invalid:
+ ipath_dev_err(dd, "attempt to set invalid Rx Polarity invert\n");
+bail:
+ return ret;
+}
+
+
static DRIVER_ATTR(num_units, S_IRUGO, show_num_units, NULL);
static DRIVER_ATTR(version, S_IRUGO, show_version, NULL);
@@ -583,12 +621,14 @@ static DEVICE_ATTR(mlid, S_IWUSR | S_IRUGO, show_mlid, store_mlid);
static DEVICE_ATTR(mtu, S_IWUSR | S_IRUGO, show_mtu, store_mtu);
static DEVICE_ATTR(enabled, S_IWUSR | S_IRUGO, show_enabled, store_enabled);
static DEVICE_ATTR(nguid, S_IRUGO, show_nguid, NULL);
+static DEVICE_ATTR(nports, S_IRUGO, show_nports, NULL);
static DEVICE_ATTR(reset, S_IWUSR, NULL, store_reset);
static DEVICE_ATTR(serial, S_IRUGO, show_serial, NULL);
static DEVICE_ATTR(status, S_IRUGO, show_status, NULL);
static DEVICE_ATTR(status_str, S_IRUGO, show_status_str, NULL);
static DEVICE_ATTR(boardversion, S_IRUGO, show_boardversion, NULL);
static DEVICE_ATTR(unit, S_IRUGO, show_unit, NULL);
+static DEVICE_ATTR(rx_pol_inv, S_IWUSR, NULL, store_rx_pol_inv);
static struct attribute *dev_attributes[] = {
&dev_attr_guid.attr,
@@ -597,12 +637,14 @@ static struct attribute *dev_attributes[] = {
&dev_attr_mlid.attr,
&dev_attr_mtu.attr,
&dev_attr_nguid.attr,
+ &dev_attr_nports.attr,
&dev_attr_serial.attr,
&dev_attr_status.attr,
&dev_attr_status_str.attr,
&dev_attr_boardversion.attr,
&dev_attr_unit.attr,
&dev_attr_enabled.attr,
+ &dev_attr_rx_pol_inv.attr,
NULL
};
diff --git a/drivers/infiniband/hw/ipath/ipath_uc.c b/drivers/infiniband/hw/ipath/ipath_uc.c
index c33abea2d5a..e636cfd67a8 100644
--- a/drivers/infiniband/hw/ipath/ipath_uc.c
+++ b/drivers/infiniband/hw/ipath/ipath_uc.c
@@ -32,7 +32,7 @@
*/
#include "ipath_verbs.h"
-#include "ipath_common.h"
+#include "ipath_kernel.h"
/* cut down ridiculously long IB macro names */
#define OP(x) IB_OPCODE_UC_##x
@@ -246,6 +246,10 @@ void ipath_uc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
struct ib_reth *reth;
int header_in_data;
+ /* Validate the SLID. See Ch. 9.6.1.5 */
+ if (unlikely(be16_to_cpu(hdr->lrh[3]) != qp->remote_ah_attr.dlid))
+ goto done;
+
/* Check for GRH */
if (!has_grh) {
ohdr = &hdr->u.oth;
@@ -261,8 +265,7 @@ void ipath_uc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
* size to 56 bytes so the last 4 bytes of
* the BTH header (PSN) is in the data buffer.
*/
- header_in_data =
- ipath_layer_get_rcvhdrentsize(dev->dd) == 16;
+ header_in_data = dev->dd->ipath_rcvhdrentsize == 16;
if (header_in_data) {
psn = be32_to_cpu(((__be32 *) data)[0]);
data += sizeof(__be32);
@@ -441,7 +444,7 @@ void ipath_uc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
int ok;
/* Check rkey */
- ok = ipath_rkey_ok(dev, &qp->r_sge, qp->r_len,
+ ok = ipath_rkey_ok(qp, &qp->r_sge, qp->r_len,
vaddr, rkey,
IB_ACCESS_REMOTE_WRITE);
if (unlikely(!ok)) {
diff --git a/drivers/infiniband/hw/ipath/ipath_ud.c b/drivers/infiniband/hw/ipath/ipath_ud.c
index 3466129af80..49f1102af8b 100644
--- a/drivers/infiniband/hw/ipath/ipath_ud.c
+++ b/drivers/infiniband/hw/ipath/ipath_ud.c
@@ -34,7 +34,52 @@
#include <rdma/ib_smi.h>
#include "ipath_verbs.h"
-#include "ipath_common.h"
+#include "ipath_kernel.h"
+
+static int init_sge(struct ipath_qp *qp, struct ipath_rwqe *wqe,
+ u32 *lengthp, struct ipath_sge_state *ss)
+{
+ int user = to_ipd(qp->ibqp.pd)->user;
+ int i, j, ret;
+ struct ib_wc wc;
+
+ *lengthp = 0;
+ for (i = j = 0; i < wqe->num_sge; i++) {
+ if (wqe->sg_list[i].length == 0)
+ continue;
+ /* Check LKEY */
+ if ((user && wqe->sg_list[i].lkey == 0) ||
+ !ipath_lkey_ok(qp, j ? &ss->sg_list[j - 1] : &ss->sge,
+ &wqe->sg_list[i], IB_ACCESS_LOCAL_WRITE))
+ goto bad_lkey;
+ *lengthp += wqe->sg_list[i].length;
+ j++;
+ }
+ ss->num_sge = j;
+ ret = 1;
+ goto bail;
+
+bad_lkey:
+ wc.wr_id = wqe->wr_id;
+ wc.status = IB_WC_LOC_PROT_ERR;
+ wc.opcode = IB_WC_RECV;
+ wc.vendor_err = 0;
+ wc.byte_len = 0;
+ wc.imm_data = 0;
+ wc.qp_num = qp->ibqp.qp_num;
+ wc.src_qp = 0;
+ wc.wc_flags = 0;
+ wc.pkey_index = 0;
+ wc.slid = 0;
+ wc.sl = 0;
+ wc.dlid_path_bits = 0;
+ wc.port_num = 0;
+ /* Signal solicited completion event. */
+ ipath_cq_enter(to_icq(qp->ibqp.recv_cq), &wc, 1);
+ ret = 0;
+bail:
+ return ret;
+}
/**
* ipath_ud_loopback - handle send on loopback QPs
@@ -46,6 +91,8 @@
*
* This is called from ipath_post_ud_send() to forward a WQE addressed
* to the same HCA.
+ * Note that the receive interrupt handler may be calling ipath_ud_rcv()
+ * while this is being called.
*/
static void ipath_ud_loopback(struct ipath_qp *sqp,
struct ipath_sge_state *ss,
@@ -60,7 +107,11 @@ static void ipath_ud_loopback(struct ipath_qp *sqp,
struct ipath_srq *srq;
struct ipath_sge_state rsge;
struct ipath_sge *sge;
+ struct ipath_rwq *wq;
struct ipath_rwqe *wqe;
+ void (*handler)(struct ib_event *, void *);
+ u32 tail;
+ u32 rlen;
qp = ipath_lookup_qpn(&dev->qp_table, wr->wr.ud.remote_qpn);
if (!qp)
@@ -94,6 +145,13 @@ static void ipath_ud_loopback(struct ipath_qp *sqp,
wc->imm_data = 0;
}
+ if (wr->num_sge > 1) {
+ rsge.sg_list = kmalloc((wr->num_sge - 1) *
+ sizeof(struct ipath_sge),
+ GFP_ATOMIC);
+ } else
+ rsge.sg_list = NULL;
+
/*
* Get the next work request entry to find where to put the data.
* Note that it is safe to drop the lock after changing rq->tail
@@ -101,37 +159,52 @@ static void ipath_ud_loopback(struct ipath_qp *sqp,
*/
if (qp->ibqp.srq) {
srq = to_isrq(qp->ibqp.srq);
+ handler = srq->ibsrq.event_handler;
rq = &srq->rq;
} else {
srq = NULL;
+ handler = NULL;
rq = &qp->r_rq;
}
+
spin_lock_irqsave(&rq->lock, flags);
- if (rq->tail == rq->head) {
- spin_unlock_irqrestore(&rq->lock, flags);
- dev->n_pkt_drops++;
- goto done;
+ wq = rq->wq;
+ tail = wq->tail;
+ while (1) {
+ if (unlikely(tail == wq->head)) {
+ spin_unlock_irqrestore(&rq->lock, flags);
+ dev->n_pkt_drops++;
+ goto bail_sge;
+ }
+ wqe = get_rwqe_ptr(rq, tail);
+ if (++tail >= rq->size)
+ tail = 0;
+ if (init_sge(qp, wqe, &rlen, &rsge))
+ break;
+ wq->tail = tail;
}
/* Silently drop packets which are too big. */
- wqe = get_rwqe_ptr(rq, rq->tail);
- if (wc->byte_len > wqe->length) {
+ if (wc->byte_len > rlen) {
spin_unlock_irqrestore(&rq->lock, flags);
dev->n_pkt_drops++;
- goto done;
+ goto bail_sge;
}
+ wq->tail = tail;
wc->wr_id = wqe->wr_id;
- rsge.sge = wqe->sg_list[0];
- rsge.sg_list = wqe->sg_list + 1;
- rsge.num_sge = wqe->num_sge;
- if (++rq->tail >= rq->size)
- rq->tail = 0;
- if (srq && srq->ibsrq.event_handler) {
+ if (handler) {
u32 n;
- if (rq->head < rq->tail)
- n = rq->size + rq->head - rq->tail;
+ /*
+ * validate head pointer value and compute
+ * the number of remaining WQEs.
+ */
+ n = wq->head;
+ if (n >= rq->size)
+ n = 0;
+ if (n < tail)
+ n += rq->size - tail;
else
- n = rq->head - rq->tail;
+ n -= tail;
if (n < srq->limit) {
struct ib_event ev;
@@ -140,12 +213,12 @@ static void ipath_ud_loopback(struct ipath_qp *sqp,
ev.device = qp->ibqp.device;
ev.element.srq = qp->ibqp.srq;
ev.event = IB_EVENT_SRQ_LIMIT_REACHED;
- srq->ibsrq.event_handler(&ev,
- srq->ibsrq.srq_context);
+ handler(&ev, srq->ibsrq.srq_context);
} else
spin_unlock_irqrestore(&rq->lock, flags);
} else
spin_unlock_irqrestore(&rq->lock, flags);
+
ah_attr = &to_iah(wr->wr.ud.ah)->attr;
if (ah_attr->ah_flags & IB_AH_GRH) {
ipath_copy_sge(&rsge, &ah_attr->grh, sizeof(struct ib_grh));
@@ -186,7 +259,7 @@ static void ipath_ud_loopback(struct ipath_qp *sqp,
wc->src_qp = sqp->ibqp.qp_num;
/* XXX do we know which pkey matched? Only needed for GSI. */
wc->pkey_index = 0;
- wc->slid = ipath_layer_get_lid(dev->dd) |
+ wc->slid = dev->dd->ipath_lid |
(ah_attr->src_path_bits &
((1 << (dev->mkeyprot_resv_lmc & 7)) - 1));
wc->sl = ah_attr->sl;
@@ -196,6 +269,8 @@ static void ipath_ud_loopback(struct ipath_qp *sqp,
ipath_cq_enter(to_icq(qp->ibqp.recv_cq), wc,
wr->send_flags & IB_SEND_SOLICITED);
+bail_sge:
+ kfree(rsge.sg_list);
done:
if (atomic_dec_and_test(&qp->refcount))
wake_up(&qp->wait);
@@ -266,7 +341,7 @@ int ipath_post_ud_send(struct ipath_qp *qp, struct ib_send_wr *wr)
if (wr->sg_list[i].length == 0)
continue;
- if (!ipath_lkey_ok(&dev->lk_table, ss.num_sge ?
+ if (!ipath_lkey_ok(qp, ss.num_sge ?
sg_list + ss.num_sge - 1 : &ss.sge,
&wr->sg_list[i], 0)) {
ret = -EINVAL;
@@ -276,7 +351,7 @@ int ipath_post_ud_send(struct ipath_qp *qp, struct ib_send_wr *wr)
ss.num_sge++;
}
/* Check for invalid packet size. */
- if (len > ipath_layer_get_ibmtu(dev->dd)) {
+ if (len > dev->dd->ipath_ibmtu) {
ret = -EINVAL;
goto bail;
}
@@ -298,7 +373,7 @@ int ipath_post_ud_send(struct ipath_qp *qp, struct ib_send_wr *wr)
dev->n_unicast_xmit++;
lid = ah_attr->dlid &
~((1 << (dev->mkeyprot_resv_lmc & 7)) - 1);
- if (unlikely(lid == ipath_layer_get_lid(dev->dd))) {
+ if (unlikely(lid == dev->dd->ipath_lid)) {
/*
* Pass in an uninitialized ib_wc to save stack
* space.
@@ -327,7 +402,7 @@ int ipath_post_ud_send(struct ipath_qp *qp, struct ib_send_wr *wr)
qp->s_hdr.u.l.grh.sgid.global.subnet_prefix =
dev->gid_prefix;
qp->s_hdr.u.l.grh.sgid.global.interface_id =
- ipath_layer_get_guid(dev->dd);
+ dev->dd->ipath_guid;
qp->s_hdr.u.l.grh.dgid = ah_attr->grh.dgid;
/*
* Don't worry about sending to locally attached multicast
@@ -357,7 +432,7 @@ int ipath_post_ud_send(struct ipath_qp *qp, struct ib_send_wr *wr)
qp->s_hdr.lrh[0] = cpu_to_be16(lrh0);
qp->s_hdr.lrh[1] = cpu_to_be16(ah_attr->dlid); /* DEST LID */
qp->s_hdr.lrh[2] = cpu_to_be16(hwords + nwords + SIZE_OF_CRC);
- lid = ipath_layer_get_lid(dev->dd);
+ lid = dev->dd->ipath_lid;
if (lid) {
lid |= ah_attr->src_path_bits &
((1 << (dev->mkeyprot_resv_lmc & 7)) - 1);
@@ -368,7 +443,7 @@ int ipath_post_ud_send(struct ipath_qp *qp, struct ib_send_wr *wr)
bth0 |= 1 << 23;
bth0 |= extra_bytes << 20;
bth0 |= qp->ibqp.qp_type == IB_QPT_SMI ? IPATH_DEFAULT_P_KEY :
- ipath_layer_get_pkey(dev->dd, qp->s_pkey_index);
+ ipath_get_pkey(dev->dd, qp->s_pkey_index);
ohdr->bth[0] = cpu_to_be32(bth0);
/*
* Use the multicast QP if the destination LID is a multicast LID.
@@ -433,13 +508,9 @@ void ipath_ud_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
int opcode;
u32 hdrsize;
u32 pad;
- unsigned long flags;
struct ib_wc wc;
u32 qkey;
u32 src_qp;
- struct ipath_rq *rq;
- struct ipath_srq *srq;
- struct ipath_rwqe *wqe;
u16 dlid;
int header_in_data;
@@ -458,8 +529,7 @@ void ipath_ud_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
* the eager header buffer size to 56 bytes so the last 12
* bytes of the IB header is in the data buffer.
*/
- header_in_data =
- ipath_layer_get_rcvhdrentsize(dev->dd) == 16;
+ header_in_data = dev->dd->ipath_rcvhdrentsize == 16;
if (header_in_data) {
qkey = be32_to_cpu(((__be32 *) data)[1]);
src_qp = be32_to_cpu(((__be32 *) data)[2]);
@@ -547,19 +617,10 @@ void ipath_ud_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
/*
* Get the next work request entry to find where to put the data.
- * Note that it is safe to drop the lock after changing rq->tail
- * since ipath_post_receive() won't fill the empty slot.
*/
- if (qp->ibqp.srq) {
- srq = to_isrq(qp->ibqp.srq);
- rq = &srq->rq;
- } else {
- srq = NULL;
- rq = &qp->r_rq;
- }
- spin_lock_irqsave(&rq->lock, flags);
- if (rq->tail == rq->head) {
- spin_unlock_irqrestore(&rq->lock, flags);
+ if (qp->r_reuse_sge)
+ qp->r_reuse_sge = 0;
+ else if (!ipath_get_rwqe(qp, 0)) {
/*
* Count VL15 packets dropped due to no receive buffer.
* Otherwise, count them as buffer overruns since usually,
@@ -573,39 +634,11 @@ void ipath_ud_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
goto bail;
}
/* Silently drop packets which are too big. */
- wqe = get_rwqe_ptr(rq, rq->tail);
- if (wc.byte_len > wqe->length) {
- spin_unlock_irqrestore(&rq->lock, flags);
+ if (wc.byte_len > qp->r_len) {
+ qp->r_reuse_sge = 1;
dev->n_pkt_drops++;
goto bail;
}
- wc.wr_id = wqe->wr_id;
- qp->r_sge.sge = wqe->sg_list[0];
- qp->r_sge.sg_list = wqe->sg_list + 1;
- qp->r_sge.num_sge = wqe->num_sge;
- if (++rq->tail >= rq->size)
- rq->tail = 0;
- if (srq && srq->ibsrq.event_handler) {
- u32 n;
-
- if (rq->head < rq->tail)
- n = rq->size + rq->head - rq->tail;
- else
- n = rq->head - rq->tail;
- if (n < srq->limit) {
- struct ib_event ev;
-
- srq->limit = 0;
- spin_unlock_irqrestore(&rq->lock, flags);
- ev.device = qp->ibqp.device;
- ev.element.srq = qp->ibqp.srq;
- ev.event = IB_EVENT_SRQ_LIMIT_REACHED;
- srq->ibsrq.event_handler(&ev,
- srq->ibsrq.srq_context);
- } else
- spin_unlock_irqrestore(&rq->lock, flags);
- } else
- spin_unlock_irqrestore(&rq->lock, flags);
if (has_grh) {
ipath_copy_sge(&qp->r_sge, &hdr->u.l.grh,
sizeof(struct ib_grh));
@@ -614,6 +647,7 @@ void ipath_ud_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
ipath_skip_sge(&qp->r_sge, sizeof(struct ib_grh));
ipath_copy_sge(&qp->r_sge, data,
wc.byte_len - sizeof(struct ib_grh));
+ wc.wr_id = qp->r_wr_id;
wc.status = IB_WC_SUCCESS;
wc.opcode = IB_WC_RECV;
wc.vendor_err = 0;
diff --git a/drivers/infiniband/hw/ipath/ipath_user_pages.c b/drivers/infiniband/hw/ipath/ipath_user_pages.c
index e32fca9faf8..413754b1d8a 100644
--- a/drivers/infiniband/hw/ipath/ipath_user_pages.c
+++ b/drivers/infiniband/hw/ipath/ipath_user_pages.c
@@ -90,6 +90,62 @@ bail:
}
/**
+ * ipath_map_page - a safety wrapper around pci_map_page()
+ *
+ * A dma_addr of all 0's is interpreted by the chip as "disabled".
+ * Unfortunately, it can also be a valid dma_addr returned on some
+ * architectures.
+ *
+ * The powerpc iommu assigns dma_addrs in ascending order, so we don't
+ * have to bother with retries or mapping a dummy page to insure we
+ * don't just get the same mapping again.
+ *
+ * I'm sure we won't be so lucky with other iommu's, so FIXME.
+ */
+dma_addr_t ipath_map_page(struct pci_dev *hwdev, struct page *page,
+ unsigned long offset, size_t size, int direction)
+{
+ dma_addr_t phys;
+
+ phys = pci_map_page(hwdev, page, offset, size, direction);
+
+ if (phys == 0) {
+ pci_unmap_page(hwdev, phys, size, direction);
+ phys = pci_map_page(hwdev, page, offset, size, direction);
+ /*
+ * FIXME: If we get 0 again, we should keep this page,
+ * map another, then free the 0 page.
+ */
+ }
+
+ return phys;
+}
+
+/**
+ * ipath_map_single - a safety wrapper around pci_map_single()
+ *
+ * Same idea as ipath_map_page().
+ */
+dma_addr_t ipath_map_single(struct pci_dev *hwdev, void *ptr, size_t size,
+ int direction)
+{
+ dma_addr_t phys;
+
+ phys = pci_map_single(hwdev, ptr, size, direction);
+
+ if (phys == 0) {
+ pci_unmap_single(hwdev, phys, size, direction);
+ phys = pci_map_single(hwdev, ptr, size, direction);
+ /*
+ * FIXME: If we get 0 again, we should keep this page,
+ * map another, then free the 0 page.
+ */
+ }
+
+ return phys;
+}
+
+/**
* ipath_get_user_pages - lock user pages into memory
* @start_page: the start page
* @num_pages: the number of pages
diff --git a/drivers/infiniband/hw/ipath/ipath_verbs.c b/drivers/infiniband/hw/ipath/ipath_verbs.c
index d70a9b6b523..42eaed88c28 100644
--- a/drivers/infiniband/hw/ipath/ipath_verbs.c
+++ b/drivers/infiniband/hw/ipath/ipath_verbs.c
@@ -33,15 +33,13 @@
#include <rdma/ib_mad.h>
#include <rdma/ib_user_verbs.h>
+#include <linux/io.h>
#include <linux/utsname.h>
#include "ipath_kernel.h"
#include "ipath_verbs.h"
#include "ipath_common.h"
-/* Not static, because we don't want the compiler removing it */
-const char ipath_verbs_version[] = "ipath_verbs " IPATH_IDSTR;
-
static unsigned int ib_ipath_qp_table_size = 251;
module_param_named(qp_table_size, ib_ipath_qp_table_size, uint, S_IRUGO);
MODULE_PARM_DESC(qp_table_size, "QP table size");
@@ -52,10 +50,6 @@ module_param_named(lkey_table_size, ib_ipath_lkey_table_size, uint,
MODULE_PARM_DESC(lkey_table_size,
"LKEY table size in bits (2^n, 1 <= n <= 23)");
-unsigned int ib_ipath_debug; /* debug mask */
-module_param_named(debug, ib_ipath_debug, uint, S_IWUSR | S_IRUGO);
-MODULE_PARM_DESC(debug, "Verbs debug mask");
-
static unsigned int ib_ipath_max_pds = 0xFFFF;
module_param_named(max_pds, ib_ipath_max_pds, uint, S_IWUSR | S_IRUGO);
MODULE_PARM_DESC(max_pds,
@@ -79,6 +73,10 @@ module_param_named(max_qp_wrs, ib_ipath_max_qp_wrs, uint,
S_IWUSR | S_IRUGO);
MODULE_PARM_DESC(max_qp_wrs, "Maximum number of QP WRs to support");
+unsigned int ib_ipath_max_qps = 16384;
+module_param_named(max_qps, ib_ipath_max_qps, uint, S_IWUSR | S_IRUGO);
+MODULE_PARM_DESC(max_qps, "Maximum number of QPs to support");
+
unsigned int ib_ipath_max_sges = 0x60;
module_param_named(max_sges, ib_ipath_max_sges, uint, S_IWUSR | S_IRUGO);
MODULE_PARM_DESC(max_sges, "Maximum number of SGEs to support");
@@ -109,9 +107,9 @@ module_param_named(max_srq_wrs, ib_ipath_max_srq_wrs,
uint, S_IWUSR | S_IRUGO);
MODULE_PARM_DESC(max_srq_wrs, "Maximum number of SRQ WRs support");
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("QLogic <support@pathscale.com>");
-MODULE_DESCRIPTION("QLogic InfiniPath driver");
+static unsigned int ib_ipath_disable_sma;
+module_param_named(disable_sma, ib_ipath_disable_sma, uint, S_IWUSR | S_IRUGO);
+MODULE_PARM_DESC(ib_ipath_disable_sma, "Disable the SMA");
const int ib_ipath_state_ops[IB_QPS_ERR + 1] = {
[IB_QPS_RESET] = 0,
@@ -125,6 +123,16 @@ const int ib_ipath_state_ops[IB_QPS_ERR + 1] = {
[IB_QPS_ERR] = 0,
};
+struct ipath_ucontext {
+ struct ib_ucontext ibucontext;
+};
+
+static inline struct ipath_ucontext *to_iucontext(struct ib_ucontext
+ *ibucontext)
+{
+ return container_of(ibucontext, struct ipath_ucontext, ibucontext);
+}
+
/*
* Translate ib_wr_opcode into ib_wc_opcode.
*/
@@ -277,11 +285,12 @@ static int ipath_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
struct ib_recv_wr **bad_wr)
{
struct ipath_qp *qp = to_iqp(ibqp);
+ struct ipath_rwq *wq = qp->r_rq.wq;
unsigned long flags;
int ret;
/* Check that state is OK to post receive. */
- if (!(ib_ipath_state_ops[qp->state] & IPATH_POST_RECV_OK)) {
+ if (!(ib_ipath_state_ops[qp->state] & IPATH_POST_RECV_OK) || !wq) {
*bad_wr = wr;
ret = -EINVAL;
goto bail;
@@ -290,59 +299,31 @@ static int ipath_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
for (; wr; wr = wr->next) {
struct ipath_rwqe *wqe;
u32 next;
- int i, j;
+ int i;
- if (wr->num_sge > qp->r_rq.max_sge) {
+ if ((unsigned) wr->num_sge > qp->r_rq.max_sge) {
*bad_wr = wr;
ret = -ENOMEM;
goto bail;
}
spin_lock_irqsave(&qp->r_rq.lock, flags);
- next = qp->r_rq.head + 1;
+ next = wq->head + 1;
if (next >= qp->r_rq.size)
next = 0;
- if (next == qp->r_rq.tail) {
+ if (next == wq->tail) {
spin_unlock_irqrestore(&qp->r_rq.lock, flags);
*bad_wr = wr;
ret = -ENOMEM;
goto bail;
}
- wqe = get_rwqe_ptr(&qp->r_rq, qp->r_rq.head);
+ wqe = get_rwqe_ptr(&qp->r_rq, wq->head);
wqe->wr_id = wr->wr_id;
- wqe->sg_list[0].mr = NULL;
- wqe->sg_list[0].vaddr = NULL;
- wqe->sg_list[0].length = 0;
- wqe->sg_list[0].sge_length = 0;
- wqe->length = 0;
- for (i = 0, j = 0; i < wr->num_sge; i++) {
- /* Check LKEY */
- if (to_ipd(qp->ibqp.pd)->user &&
- wr->sg_list[i].lkey == 0) {
- spin_unlock_irqrestore(&qp->r_rq.lock,
- flags);
- *bad_wr = wr;
- ret = -EINVAL;
- goto bail;
- }
- if (wr->sg_list[i].length == 0)
- continue;
- if (!ipath_lkey_ok(
- &to_idev(qp->ibqp.device)->lk_table,
- &wqe->sg_list[j], &wr->sg_list[i],
- IB_ACCESS_LOCAL_WRITE)) {
- spin_unlock_irqrestore(&qp->r_rq.lock,
- flags);
- *bad_wr = wr;
- ret = -EINVAL;
- goto bail;
- }
- wqe->length += wr->sg_list[i].length;
- j++;
- }
- wqe->num_sge = j;
- qp->r_rq.head = next;
+ wqe->num_sge = wr->num_sge;
+ for (i = 0; i < wr->num_sge; i++)
+ wqe->sg_list[i] = wr->sg_list[i];
+ wq->head = next;
spin_unlock_irqrestore(&qp->r_rq.lock, flags);
}
ret = 0;
@@ -377,6 +358,9 @@ static void ipath_qp_rcv(struct ipath_ibdev *dev,
switch (qp->ibqp.qp_type) {
case IB_QPT_SMI:
case IB_QPT_GSI:
+ if (ib_ipath_disable_sma)
+ break;
+ /* FALLTHROUGH */
case IB_QPT_UD:
ipath_ud_rcv(dev, hdr, has_grh, data, tlen, qp);
break;
@@ -395,7 +379,7 @@ static void ipath_qp_rcv(struct ipath_ibdev *dev,
}
/**
- * ipath_ib_rcv - process and incoming packet
+ * ipath_ib_rcv - process an incoming packet
* @arg: the device pointer
* @rhdr: the header of the packet
* @data: the packet data
@@ -404,9 +388,9 @@ static void ipath_qp_rcv(struct ipath_ibdev *dev,
* This is called from ipath_kreceive() to process an incoming packet at
* interrupt level. Tlen is the length of the header + data + CRC in bytes.
*/
-static void ipath_ib_rcv(void *arg, void *rhdr, void *data, u32 tlen)
+void ipath_ib_rcv(struct ipath_ibdev *dev, void *rhdr, void *data,
+ u32 tlen)
{
- struct ipath_ibdev *dev = (struct ipath_ibdev *) arg;
struct ipath_ib_header *hdr = rhdr;
struct ipath_other_headers *ohdr;
struct ipath_qp *qp;
@@ -427,7 +411,7 @@ static void ipath_ib_rcv(void *arg, void *rhdr, void *data, u32 tlen)
lid = be16_to_cpu(hdr->lrh[1]);
if (lid < IPATH_MULTICAST_LID_BASE) {
lid &= ~((1 << (dev->mkeyprot_resv_lmc & 7)) - 1);
- if (unlikely(lid != ipath_layer_get_lid(dev->dd))) {
+ if (unlikely(lid != dev->dd->ipath_lid)) {
dev->rcv_errors++;
goto bail;
}
@@ -495,9 +479,8 @@ bail:;
* This is called from ipath_do_rcv_timer() at interrupt level to check for
* QPs which need retransmits and to collect performance numbers.
*/
-static void ipath_ib_timer(void *arg)
+void ipath_ib_timer(struct ipath_ibdev *dev)
{
- struct ipath_ibdev *dev = (struct ipath_ibdev *) arg;
struct ipath_qp *resend = NULL;
struct list_head *last;
struct ipath_qp *qp;
@@ -539,19 +522,19 @@ static void ipath_ib_timer(void *arg)
if (dev->pma_sample_status == IB_PMA_SAMPLE_STATUS_STARTED &&
--dev->pma_sample_start == 0) {
dev->pma_sample_status = IB_PMA_SAMPLE_STATUS_RUNNING;
- ipath_layer_snapshot_counters(dev->dd, &dev->ipath_sword,
- &dev->ipath_rword,
- &dev->ipath_spkts,
- &dev->ipath_rpkts,
- &dev->ipath_xmit_wait);
+ ipath_snapshot_counters(dev->dd, &dev->ipath_sword,
+ &dev->ipath_rword,
+ &dev->ipath_spkts,
+ &dev->ipath_rpkts,
+ &dev->ipath_xmit_wait);
}
if (dev->pma_sample_status == IB_PMA_SAMPLE_STATUS_RUNNING) {
if (dev->pma_sample_interval == 0) {
u64 ta, tb, tc, td, te;
dev->pma_sample_status = IB_PMA_SAMPLE_STATUS_DONE;
- ipath_layer_snapshot_counters(dev->dd, &ta, &tb,
- &tc, &td, &te);
+ ipath_snapshot_counters(dev->dd, &ta, &tb,
+ &tc, &td, &te);
dev->ipath_sword = ta - dev->ipath_sword;
dev->ipath_rword = tb - dev->ipath_rword;
@@ -581,6 +564,365 @@ static void ipath_ib_timer(void *arg)
}
}
+static void update_sge(struct ipath_sge_state *ss, u32 length)
+{
+ struct ipath_sge *sge = &ss->sge;
+
+ sge->vaddr += length;
+ sge->length -= length;
+ sge->sge_length -= length;
+ if (sge->sge_length == 0) {
+ if (--ss->num_sge)
+ *sge = *ss->sg_list++;
+ } else if (sge->length == 0 && sge->mr != NULL) {
+ if (++sge->n >= IPATH_SEGSZ) {
+ if (++sge->m >= sge->mr->mapsz)
+ return;
+ sge->n = 0;
+ }
+ sge->vaddr = sge->mr->map[sge->m]->segs[sge->n].vaddr;
+ sge->length = sge->mr->map[sge->m]->segs[sge->n].length;
+ }
+}
+
+#ifdef __LITTLE_ENDIAN
+static inline u32 get_upper_bits(u32 data, u32 shift)
+{
+ return data >> shift;
+}
+
+static inline u32 set_upper_bits(u32 data, u32 shift)
+{
+ return data << shift;
+}
+
+static inline u32 clear_upper_bytes(u32 data, u32 n, u32 off)
+{
+ data <<= ((sizeof(u32) - n) * BITS_PER_BYTE);
+ data >>= ((sizeof(u32) - n - off) * BITS_PER_BYTE);
+ return data;
+}
+#else
+static inline u32 get_upper_bits(u32 data, u32 shift)
+{
+ return data << shift;
+}
+
+static inline u32 set_upper_bits(u32 data, u32 shift)
+{
+ return data >> shift;
+}
+
+static inline u32 clear_upper_bytes(u32 data, u32 n, u32 off)
+{
+ data >>= ((sizeof(u32) - n) * BITS_PER_BYTE);
+ data <<= ((sizeof(u32) - n - off) * BITS_PER_BYTE);
+ return data;
+}
+#endif
+
+static void copy_io(u32 __iomem *piobuf, struct ipath_sge_state *ss,
+ u32 length)
+{
+ u32 extra = 0;
+ u32 data = 0;
+ u32 last;
+
+ while (1) {
+ u32 len = ss->sge.length;
+ u32 off;
+
+ BUG_ON(len == 0);
+ if (len > length)
+ len = length;
+ if (len > ss->sge.sge_length)
+ len = ss->sge.sge_length;
+ /* If the source address is not aligned, try to align it. */
+ off = (unsigned long)ss->sge.vaddr & (sizeof(u32) - 1);
+ if (off) {
+ u32 *addr = (u32 *)((unsigned long)ss->sge.vaddr &
+ ~(sizeof(u32) - 1));
+ u32 v = get_upper_bits(*addr, off * BITS_PER_BYTE);
+ u32 y;
+
+ y = sizeof(u32) - off;
+ if (len > y)
+ len = y;
+ if (len + extra >= sizeof(u32)) {
+ data |= set_upper_bits(v, extra *
+ BITS_PER_BYTE);
+ len = sizeof(u32) - extra;
+ if (len == length) {
+ last = data;
+ break;
+ }
+ __raw_writel(data, piobuf);
+ piobuf++;
+ extra = 0;
+ data = 0;
+ } else {
+ /* Clear unused upper bytes */
+ data |= clear_upper_bytes(v, len, extra);
+ if (len == length) {
+ last = data;
+ break;
+ }
+ extra += len;
+ }
+ } else if (extra) {
+ /* Source address is aligned. */
+ u32 *addr = (u32 *) ss->sge.vaddr;
+ int shift = extra * BITS_PER_BYTE;
+ int ushift = 32 - shift;
+ u32 l = len;
+
+ while (l >= sizeof(u32)) {
+ u32 v = *addr;
+
+ data |= set_upper_bits(v, shift);
+ __raw_writel(data, piobuf);
+ data = get_upper_bits(v, ushift);
+ piobuf++;
+ addr++;
+ l -= sizeof(u32);
+ }
+ /*
+ * We still have 'extra' number of bytes leftover.
+ */
+ if (l) {
+ u32 v = *addr;
+
+ if (l + extra >= sizeof(u32)) {
+ data |= set_upper_bits(v, shift);
+ len -= l + extra - sizeof(u32);
+ if (len == length) {
+ last = data;
+ break;
+ }
+ __raw_writel(data, piobuf);
+ piobuf++;
+ extra = 0;
+ data = 0;
+ } else {
+ /* Clear unused upper bytes */
+ data |= clear_upper_bytes(v, l,
+ extra);
+ if (len == length) {
+ last = data;
+ break;
+ }
+ extra += l;
+ }
+ } else if (len == length) {
+ last = data;
+ break;
+ }
+ } else if (len == length) {
+ u32 w;
+
+ /*
+ * Need to round up for the last dword in the
+ * packet.
+ */
+ w = (len + 3) >> 2;
+ __iowrite32_copy(piobuf, ss->sge.vaddr, w - 1);
+ piobuf += w - 1;
+ last = ((u32 *) ss->sge.vaddr)[w - 1];
+ break;
+ } else {
+ u32 w = len >> 2;
+
+ __iowrite32_copy(piobuf, ss->sge.vaddr, w);
+ piobuf += w;
+
+ extra = len & (sizeof(u32) - 1);
+ if (extra) {
+ u32 v = ((u32 *) ss->sge.vaddr)[w];
+
+ /* Clear unused upper bytes */
+ data = clear_upper_bytes(v, extra, 0);
+ }
+ }
+ update_sge(ss, len);
+ length -= len;
+ }
+ /* Update address before sending packet. */
+ update_sge(ss, length);
+ /* must flush early everything before trigger word */
+ ipath_flush_wc();
+ __raw_writel(last, piobuf);
+ /* be sure trigger word is written */
+ ipath_flush_wc();
+}
+
+/**
+ * ipath_verbs_send - send a packet
+ * @dd: the infinipath device
+ * @hdrwords: the number of words in the header
+ * @hdr: the packet header
+ * @len: the length of the packet in bytes
+ * @ss: the SGE to send
+ */
+int ipath_verbs_send(struct ipath_devdata *dd, u32 hdrwords,
+ u32 *hdr, u32 len, struct ipath_sge_state *ss)
+{
+ u32 __iomem *piobuf;
+ u32 plen;
+ int ret;
+
+ /* +1 is for the qword padding of pbc */
+ plen = hdrwords + ((len + 3) >> 2) + 1;
+ if (unlikely((plen << 2) > dd->ipath_ibmaxlen)) {
+ ipath_dbg("packet len 0x%x too long, failing\n", plen);
+ ret = -EINVAL;
+ goto bail;
+ }
+
+ /* Get a PIO buffer to use. */
+ piobuf = ipath_getpiobuf(dd, NULL);
+ if (unlikely(piobuf == NULL)) {
+ ret = -EBUSY;
+ goto bail;
+ }
+
+ /*
+ * Write len to control qword, no flags.
+ * We have to flush after the PBC for correctness on some cpus
+ * or WC buffer can be written out of order.
+ */
+ writeq(plen, piobuf);
+ ipath_flush_wc();
+ piobuf += 2;
+ if (len == 0) {
+ /*
+ * If there is just the header portion, must flush before
+ * writing last word of header for correctness, and after
+ * the last header word (trigger word).
+ */
+ __iowrite32_copy(piobuf, hdr, hdrwords - 1);
+ ipath_flush_wc();
+ __raw_writel(hdr[hdrwords - 1], piobuf + hdrwords - 1);
+ ipath_flush_wc();
+ ret = 0;
+ goto bail;
+ }
+
+ __iowrite32_copy(piobuf, hdr, hdrwords);
+ piobuf += hdrwords;
+
+ /* The common case is aligned and contained in one segment. */
+ if (likely(ss->num_sge == 1 && len <= ss->sge.length &&
+ !((unsigned long)ss->sge.vaddr & (sizeof(u32) - 1)))) {
+ u32 w;
+ u32 *addr = (u32 *) ss->sge.vaddr;
+
+ /* Update address before sending packet. */
+ update_sge(ss, len);
+ /* Need to round up for the last dword in the packet. */
+ w = (len + 3) >> 2;
+ __iowrite32_copy(piobuf, addr, w - 1);
+ /* must flush early everything before trigger word */
+ ipath_flush_wc();
+ __raw_writel(addr[w - 1], piobuf + w - 1);
+ /* be sure trigger word is written */
+ ipath_flush_wc();
+ ret = 0;
+ goto bail;
+ }
+ copy_io(piobuf, ss, len);
+ ret = 0;
+
+bail:
+ return ret;
+}
+
+int ipath_snapshot_counters(struct ipath_devdata *dd, u64 *swords,
+ u64 *rwords, u64 *spkts, u64 *rpkts,
+ u64 *xmit_wait)
+{
+ int ret;
+
+ if (!(dd->ipath_flags & IPATH_INITTED)) {
+ /* no hardware, freeze, etc. */
+ ipath_dbg("unit %u not usable\n", dd->ipath_unit);
+ ret = -EINVAL;
+ goto bail;
+ }
+ *swords = ipath_snap_cntr(dd, dd->ipath_cregs->cr_wordsendcnt);
+ *rwords = ipath_snap_cntr(dd, dd->ipath_cregs->cr_wordrcvcnt);
+ *spkts = ipath_snap_cntr(dd, dd->ipath_cregs->cr_pktsendcnt);
+ *rpkts = ipath_snap_cntr(dd, dd->ipath_cregs->cr_pktrcvcnt);
+ *xmit_wait = ipath_snap_cntr(dd, dd->ipath_cregs->cr_sendstallcnt);
+
+ ret = 0;
+
+bail:
+ return ret;
+}
+
+/**
+ * ipath_get_counters - get various chip counters
+ * @dd: the infinipath device
+ * @cntrs: counters are placed here
+ *
+ * Return the counters needed by recv_pma_get_portcounters().
+ */
+int ipath_get_counters(struct ipath_devdata *dd,
+ struct ipath_verbs_counters *cntrs)
+{
+ int ret;
+
+ if (!(dd->ipath_flags & IPATH_INITTED)) {
+ /* no hardware, freeze, etc. */
+ ipath_dbg("unit %u not usable\n", dd->ipath_unit);
+ ret = -EINVAL;
+ goto bail;
+ }
+ cntrs->symbol_error_counter =
+ ipath_snap_cntr(dd, dd->ipath_cregs->cr_ibsymbolerrcnt);
+ cntrs->link_error_recovery_counter =
+ ipath_snap_cntr(dd, dd->ipath_cregs->cr_iblinkerrrecovcnt);
+ /*
+ * The link downed counter counts when the other side downs the
+ * connection. We add in the number of times we downed the link
+ * due to local link integrity errors to compensate.
+ */
+ cntrs->link_downed_counter =
+ ipath_snap_cntr(dd, dd->ipath_cregs->cr_iblinkdowncnt);
+ cntrs->port_rcv_errors =
+ ipath_snap_cntr(dd, dd->ipath_cregs->cr_rxdroppktcnt) +
+ ipath_snap_cntr(dd, dd->ipath_cregs->cr_rcvovflcnt) +
+ ipath_snap_cntr(dd, dd->ipath_cregs->cr_portovflcnt) +
+ ipath_snap_cntr(dd, dd->ipath_cregs->cr_err_rlencnt) +
+ ipath_snap_cntr(dd, dd->ipath_cregs->cr_invalidrlencnt) +
+ ipath_snap_cntr(dd, dd->ipath_cregs->cr_erricrccnt) +
+ ipath_snap_cntr(dd, dd->ipath_cregs->cr_errvcrccnt) +
+ ipath_snap_cntr(dd, dd->ipath_cregs->cr_errlpcrccnt) +
+ ipath_snap_cntr(dd, dd->ipath_cregs->cr_badformatcnt) +
+ dd->ipath_rxfc_unsupvl_errs;
+ cntrs->port_rcv_remphys_errors =
+ ipath_snap_cntr(dd, dd->ipath_cregs->cr_rcvebpcnt);
+ cntrs->port_xmit_discards =
+ ipath_snap_cntr(dd, dd->ipath_cregs->cr_unsupvlcnt);
+ cntrs->port_xmit_data =
+ ipath_snap_cntr(dd, dd->ipath_cregs->cr_wordsendcnt);
+ cntrs->port_rcv_data =
+ ipath_snap_cntr(dd, dd->ipath_cregs->cr_wordrcvcnt);
+ cntrs->port_xmit_packets =
+ ipath_snap_cntr(dd, dd->ipath_cregs->cr_pktsendcnt);
+ cntrs->port_rcv_packets =
+ ipath_snap_cntr(dd, dd->ipath_cregs->cr_pktrcvcnt);
+ cntrs->local_link_integrity_errors =
+ (dd->ipath_flags & IPATH_GPIO_ERRINTRS) ?
+ dd->ipath_lli_errs : dd->ipath_lli_errors;
+ cntrs->excessive_buffer_overrun_errors = dd->ipath_overrun_thresh_errs;
+
+ ret = 0;
+
+bail:
+ return ret;
+}
+
/**
* ipath_ib_piobufavail - callback when a PIO buffer is available
* @arg: the device pointer
@@ -591,9 +933,8 @@ static void ipath_ib_timer(void *arg)
* QPs waiting for buffers (for now, just do a tasklet_hi_schedule and
* return zero).
*/
-static int ipath_ib_piobufavail(void *arg)
+int ipath_ib_piobufavail(struct ipath_ibdev *dev)
{
- struct ipath_ibdev *dev = (struct ipath_ibdev *) arg;
struct ipath_qp *qp;
unsigned long flags;
@@ -624,14 +965,14 @@ static int ipath_query_device(struct ib_device *ibdev,
IB_DEVICE_BAD_QKEY_CNTR | IB_DEVICE_SHUTDOWN_PORT |
IB_DEVICE_SYS_IMAGE_GUID;
props->page_size_cap = PAGE_SIZE;
- props->vendor_id = ipath_layer_get_vendorid(dev->dd);
- props->vendor_part_id = ipath_layer_get_deviceid(dev->dd);
- props->hw_ver = ipath_layer_get_pcirev(dev->dd);
+ props->vendor_id = dev->dd->ipath_vendorid;
+ props->vendor_part_id = dev->dd->ipath_deviceid;
+ props->hw_ver = dev->dd->ipath_pcirev;
props->sys_image_guid = dev->sys_image_guid;
props->max_mr_size = ~0ull;
- props->max_qp = dev->qp_table.max;
+ props->max_qp = ib_ipath_max_qps;
props->max_qp_wr = ib_ipath_max_qp_wrs;
props->max_sge = ib_ipath_max_sges;
props->max_cq = ib_ipath_max_cqs;
@@ -647,7 +988,7 @@ static int ipath_query_device(struct ib_device *ibdev,
props->max_srq_sge = ib_ipath_max_srq_sges;
/* props->local_ca_ack_delay */
props->atomic_cap = IB_ATOMIC_HCA;
- props->max_pkeys = ipath_layer_get_npkeys(dev->dd);
+ props->max_pkeys = ipath_get_npkeys(dev->dd);
props->max_mcast_grp = ib_ipath_max_mcast_grps;
props->max_mcast_qp_attach = ib_ipath_max_mcast_qp_attached;
props->max_total_mcast_qp_attach = props->max_mcast_qp_attach *
@@ -672,12 +1013,17 @@ const u8 ipath_cvt_physportstate[16] = {
[INFINIPATH_IBCS_LT_STATE_RECOVERIDLE] = 6,
};
+u32 ipath_get_cr_errpkey(struct ipath_devdata *dd)
+{
+ return ipath_read_creg32(dd, dd->ipath_cregs->cr_errpkey);
+}
+
static int ipath_query_port(struct ib_device *ibdev,
u8 port, struct ib_port_attr *props)
{
struct ipath_ibdev *dev = to_idev(ibdev);
enum ib_mtu mtu;
- u16 lid = ipath_layer_get_lid(dev->dd);
+ u16 lid = dev->dd->ipath_lid;
u64 ibcstat;
memset(props, 0, sizeof(*props));
@@ -685,16 +1031,16 @@ static int ipath_query_port(struct ib_device *ibdev,
props->lmc = dev->mkeyprot_resv_lmc & 7;
props->sm_lid = dev->sm_lid;
props->sm_sl = dev->sm_sl;
- ibcstat = ipath_layer_get_lastibcstat(dev->dd);
+ ibcstat = dev->dd->ipath_lastibcstat;
props->state = ((ibcstat >> 4) & 0x3) + 1;
/* See phys_state_show() */
props->phys_state = ipath_cvt_physportstate[
- ipath_layer_get_lastibcstat(dev->dd) & 0xf];
+ dev->dd->ipath_lastibcstat & 0xf];
props->port_cap_flags = dev->port_cap_flags;
props->gid_tbl_len = 1;
props->max_msg_sz = 0x80000000;
- props->pkey_tbl_len = ipath_layer_get_npkeys(dev->dd);
- props->bad_pkey_cntr = ipath_layer_get_cr_errpkey(dev->dd) -
+ props->pkey_tbl_len = ipath_get_npkeys(dev->dd);
+ props->bad_pkey_cntr = ipath_get_cr_errpkey(dev->dd) -
dev->z_pkey_violations;
props->qkey_viol_cntr = dev->qkey_violations;
props->active_width = IB_WIDTH_4X;
@@ -704,7 +1050,7 @@ static int ipath_query_port(struct ib_device *ibdev,
props->init_type_reply = 0;
props->max_mtu = IB_MTU_4096;
- switch (ipath_layer_get_ibmtu(dev->dd)) {
+ switch (dev->dd->ipath_ibmtu) {
case 4096:
mtu = IB_MTU_4096;
break;
@@ -763,7 +1109,7 @@ static int ipath_modify_port(struct ib_device *ibdev,
dev->port_cap_flags |= props->set_port_cap_mask;
dev->port_cap_flags &= ~props->clr_port_cap_mask;
if (port_modify_mask & IB_PORT_SHUTDOWN)
- ipath_layer_set_linkstate(dev->dd, IPATH_IB_LINKDOWN);
+ ipath_set_linkstate(dev->dd, IPATH_IB_LINKDOWN);
if (port_modify_mask & IB_PORT_RESET_QKEY_CNTR)
dev->qkey_violations = 0;
return 0;
@@ -780,7 +1126,7 @@ static int ipath_query_gid(struct ib_device *ibdev, u8 port,
goto bail;
}
gid->global.subnet_prefix = dev->gid_prefix;
- gid->global.interface_id = ipath_layer_get_guid(dev->dd);
+ gid->global.interface_id = dev->dd->ipath_guid;
ret = 0;
@@ -803,18 +1149,22 @@ static struct ib_pd *ipath_alloc_pd(struct ib_device *ibdev,
* we allow allocations of more than we report for this value.
*/
- if (dev->n_pds_allocated == ib_ipath_max_pds) {
+ pd = kmalloc(sizeof *pd, GFP_KERNEL);
+ if (!pd) {
ret = ERR_PTR(-ENOMEM);
goto bail;
}
- pd = kmalloc(sizeof *pd, GFP_KERNEL);
- if (!pd) {
+ spin_lock(&dev->n_pds_lock);
+ if (dev->n_pds_allocated == ib_ipath_max_pds) {
+ spin_unlock(&dev->n_pds_lock);
+ kfree(pd);
ret = ERR_PTR(-ENOMEM);
goto bail;
}
dev->n_pds_allocated++;
+ spin_unlock(&dev->n_pds_lock);
/* ib_alloc_pd() will initialize pd->ibpd. */
pd->user = udata != NULL;
@@ -830,7 +1180,9 @@ static int ipath_dealloc_pd(struct ib_pd *ibpd)
struct ipath_pd *pd = to_ipd(ibpd);
struct ipath_ibdev *dev = to_idev(ibpd->device);
+ spin_lock(&dev->n_pds_lock);
dev->n_pds_allocated--;
+ spin_unlock(&dev->n_pds_lock);
kfree(pd);
@@ -850,11 +1202,7 @@ static struct ib_ah *ipath_create_ah(struct ib_pd *pd,
struct ipath_ah *ah;
struct ib_ah *ret;
struct ipath_ibdev *dev = to_idev(pd->device);
-
- if (dev->n_ahs_allocated == ib_ipath_max_ahs) {
- ret = ERR_PTR(-ENOMEM);
- goto bail;
- }
+ unsigned long flags;
/* A multicast address requires a GRH (see ch. 8.4.1). */
if (ah_attr->dlid >= IPATH_MULTICAST_LID_BASE &&
@@ -881,7 +1229,16 @@ static struct ib_ah *ipath_create_ah(struct ib_pd *pd,
goto bail;
}
+ spin_lock_irqsave(&dev->n_ahs_lock, flags);
+ if (dev->n_ahs_allocated == ib_ipath_max_ahs) {
+ spin_unlock_irqrestore(&dev->n_ahs_lock, flags);
+ kfree(ah);
+ ret = ERR_PTR(-ENOMEM);
+ goto bail;
+ }
+
dev->n_ahs_allocated++;
+ spin_unlock_irqrestore(&dev->n_ahs_lock, flags);
/* ib_create_ah() will initialize ah->ibah. */
ah->attr = *ah_attr;
@@ -902,8 +1259,11 @@ static int ipath_destroy_ah(struct ib_ah *ibah)
{
struct ipath_ibdev *dev = to_idev(ibah->device);
struct ipath_ah *ah = to_iah(ibah);
+ unsigned long flags;
+ spin_lock_irqsave(&dev->n_ahs_lock, flags);
dev->n_ahs_allocated--;
+ spin_unlock_irqrestore(&dev->n_ahs_lock, flags);
kfree(ah);
@@ -919,25 +1279,50 @@ static int ipath_query_ah(struct ib_ah *ibah, struct ib_ah_attr *ah_attr)
return 0;
}
+/**
+ * ipath_get_npkeys - return the size of the PKEY table for port 0
+ * @dd: the infinipath device
+ */
+unsigned ipath_get_npkeys(struct ipath_devdata *dd)
+{
+ return ARRAY_SIZE(dd->ipath_pd[0]->port_pkeys);
+}
+
+/**
+ * ipath_get_pkey - return the indexed PKEY from the port 0 PKEY table
+ * @dd: the infinipath device
+ * @index: the PKEY index
+ */
+unsigned ipath_get_pkey(struct ipath_devdata *dd, unsigned index)
+{
+ unsigned ret;
+
+ if (index >= ARRAY_SIZE(dd->ipath_pd[0]->port_pkeys))
+ ret = 0;
+ else
+ ret = dd->ipath_pd[0]->port_pkeys[index];
+
+ return ret;
+}
+
static int ipath_query_pkey(struct ib_device *ibdev, u8 port, u16 index,
u16 *pkey)
{
struct ipath_ibdev *dev = to_idev(ibdev);
int ret;
- if (index >= ipath_layer_get_npkeys(dev->dd)) {
+ if (index >= ipath_get_npkeys(dev->dd)) {
ret = -EINVAL;
goto bail;
}
- *pkey = ipath_layer_get_pkey(dev->dd, index);
+ *pkey = ipath_get_pkey(dev->dd, index);
ret = 0;
bail:
return ret;
}
-
/**
* ipath_alloc_ucontext - allocate a ucontest
* @ibdev: the infiniband device
@@ -970,26 +1355,102 @@ static int ipath_dealloc_ucontext(struct ib_ucontext *context)
static int ipath_verbs_register_sysfs(struct ib_device *dev);
+static void __verbs_timer(unsigned long arg)
+{
+ struct ipath_devdata *dd = (struct ipath_devdata *) arg;
+
+ /*
+ * If port 0 receive packet interrupts are not available, or
+ * can be missed, poll the receive queue
+ */
+ if (dd->ipath_flags & IPATH_POLL_RX_INTR)
+ ipath_kreceive(dd);
+
+ /* Handle verbs layer timeouts. */
+ ipath_ib_timer(dd->verbs_dev);
+
+ mod_timer(&dd->verbs_timer, jiffies + 1);
+}
+
+static int enable_timer(struct ipath_devdata *dd)
+{
+ /*
+ * Early chips had a design flaw where the chip and kernel idea
+ * of the tail register don't always agree, and therefore we won't
+ * get an interrupt on the next packet received.
+ * If the board supports per packet receive interrupts, use it.
+ * Otherwise, the timer function periodically checks for packets
+ * to cover this case.
+ * Either way, the timer is needed for verbs layer related
+ * processing.
+ */
+ if (dd->ipath_flags & IPATH_GPIO_INTR) {
+ u64 val;
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_debugportselect,
+ 0x2074076542310ULL);
+ /* Enable GPIO bit 2 interrupt */
+ val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_gpio_mask);
+ val |= (u64) (1 << IPATH_GPIO_PORT0_BIT);
+ ipath_write_kreg( dd, dd->ipath_kregs->kr_gpio_mask, val);
+ }
+
+ init_timer(&dd->verbs_timer);
+ dd->verbs_timer.function = __verbs_timer;
+ dd->verbs_timer.data = (unsigned long)dd;
+ dd->verbs_timer.expires = jiffies + 1;
+ add_timer(&dd->verbs_timer);
+
+ return 0;
+}
+
+static int disable_timer(struct ipath_devdata *dd)
+{
+ /* Disable GPIO bit 2 interrupt */
+ if (dd->ipath_flags & IPATH_GPIO_INTR) {
+ u64 val;
+ /* Disable GPIO bit 2 interrupt */
+ val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_gpio_mask);
+ val &= ~((u64) (1 << IPATH_GPIO_PORT0_BIT));
+ ipath_write_kreg( dd, dd->ipath_kregs->kr_gpio_mask, val);
+ /*
+ * We might want to undo changes to debugportselect,
+ * but how?
+ */
+ }
+
+ del_timer_sync(&dd->verbs_timer);
+
+ return 0;
+}
+
/**
* ipath_register_ib_device - register our device with the infiniband core
- * @unit: the device number to register
* @dd: the device data structure
* Return the allocated ipath_ibdev pointer or NULL on error.
*/
-static void *ipath_register_ib_device(int unit, struct ipath_devdata *dd)
+int ipath_register_ib_device(struct ipath_devdata *dd)
{
- struct ipath_layer_counters cntrs;
+ struct ipath_verbs_counters cntrs;
struct ipath_ibdev *idev;
struct ib_device *dev;
int ret;
idev = (struct ipath_ibdev *)ib_alloc_device(sizeof *idev);
- if (idev == NULL)
+ if (idev == NULL) {
+ ret = -ENOMEM;
goto bail;
+ }
dev = &idev->ibdev;
/* Only need to initialize non-zero fields. */
+ spin_lock_init(&idev->n_pds_lock);
+ spin_lock_init(&idev->n_ahs_lock);
+ spin_lock_init(&idev->n_cqs_lock);
+ spin_lock_init(&idev->n_qps_lock);
+ spin_lock_init(&idev->n_srqs_lock);
+ spin_lock_init(&idev->n_mcast_grps_lock);
+
spin_lock_init(&idev->qp_table.lock);
spin_lock_init(&idev->lk_table.lock);
idev->sm_lid = __constant_be16_to_cpu(IB_LID_PERMISSIVE);
@@ -1030,7 +1491,7 @@ static void *ipath_register_ib_device(int unit, struct ipath_devdata *dd)
idev->link_width_enabled = 3; /* 1x or 4x */
/* Snapshot current HW counters to "clear" them. */
- ipath_layer_get_counters(dd, &cntrs);
+ ipath_get_counters(dd, &cntrs);
idev->z_symbol_error_counter = cntrs.symbol_error_counter;
idev->z_link_error_recovery_counter =
cntrs.link_error_recovery_counter;
@@ -1054,14 +1515,14 @@ static void *ipath_register_ib_device(int unit, struct ipath_devdata *dd)
* device types in the system, we can't be sure this is unique.
*/
if (!sys_image_guid)
- sys_image_guid = ipath_layer_get_guid(dd);
+ sys_image_guid = dd->ipath_guid;
idev->sys_image_guid = sys_image_guid;
- idev->ib_unit = unit;
+ idev->ib_unit = dd->ipath_unit;
idev->dd = dd;
strlcpy(dev->name, "ipath%d", IB_DEVICE_NAME_MAX);
dev->owner = THIS_MODULE;
- dev->node_guid = ipath_layer_get_guid(dd);
+ dev->node_guid = dd->ipath_guid;
dev->uverbs_abi_ver = IPATH_UVERBS_ABI_VERSION;
dev->uverbs_cmd_mask =
(1ull << IB_USER_VERBS_CMD_GET_CONTEXT) |
@@ -1093,9 +1554,9 @@ static void *ipath_register_ib_device(int unit, struct ipath_devdata *dd)
(1ull << IB_USER_VERBS_CMD_QUERY_SRQ) |
(1ull << IB_USER_VERBS_CMD_DESTROY_SRQ) |
(1ull << IB_USER_VERBS_CMD_POST_SRQ_RECV);
- dev->node_type = IB_NODE_CA;
+ dev->node_type = RDMA_NODE_IB_CA;
dev->phys_port_cnt = 1;
- dev->dma_device = ipath_layer_get_device(dd);
+ dev->dma_device = &dd->pcidev->dev;
dev->class_dev.dev = dev->dma_device;
dev->query_device = ipath_query_device;
dev->modify_device = ipath_modify_device;
@@ -1137,9 +1598,10 @@ static void *ipath_register_ib_device(int unit, struct ipath_devdata *dd)
dev->attach_mcast = ipath_multicast_attach;
dev->detach_mcast = ipath_multicast_detach;
dev->process_mad = ipath_process_mad;
+ dev->mmap = ipath_mmap;
snprintf(dev->node_desc, sizeof(dev->node_desc),
- IPATH_IDSTR " %s kernel_SMA", system_utsname.nodename);
+ IPATH_IDSTR " %s", system_utsname.nodename);
ret = ib_register_device(dev);
if (ret)
@@ -1148,7 +1610,7 @@ static void *ipath_register_ib_device(int unit, struct ipath_devdata *dd)
if (ipath_verbs_register_sysfs(dev))
goto err_class;
- ipath_layer_enable_timer(dd);
+ enable_timer(dd);
goto bail;
@@ -1160,37 +1622,32 @@ err_lk:
kfree(idev->qp_table.table);
err_qp:
ib_dealloc_device(dev);
- _VERBS_ERROR("ib_ipath%d cannot register verbs (%d)!\n",
- unit, -ret);
+ ipath_dev_err(dd, "cannot register verbs: %d!\n", -ret);
idev = NULL;
bail:
- return idev;
+ dd->verbs_dev = idev;
+ return ret;
}
-static void ipath_unregister_ib_device(void *arg)
+void ipath_unregister_ib_device(struct ipath_ibdev *dev)
{
- struct ipath_ibdev *dev = (struct ipath_ibdev *) arg;
struct ib_device *ibdev = &dev->ibdev;
- ipath_layer_disable_timer(dev->dd);
+ disable_timer(dev->dd);
ib_unregister_device(ibdev);
if (!list_empty(&dev->pending[0]) ||
!list_empty(&dev->pending[1]) ||
!list_empty(&dev->pending[2]))
- _VERBS_ERROR("ipath%d pending list not empty!\n",
- dev->ib_unit);
+ ipath_dev_err(dev->dd, "pending list not empty!\n");
if (!list_empty(&dev->piowait))
- _VERBS_ERROR("ipath%d piowait list not empty!\n",
- dev->ib_unit);
+ ipath_dev_err(dev->dd, "piowait list not empty!\n");
if (!list_empty(&dev->rnrwait))
- _VERBS_ERROR("ipath%d rnrwait list not empty!\n",
- dev->ib_unit);
+ ipath_dev_err(dev->dd, "rnrwait list not empty!\n");
if (!ipath_mcast_tree_empty())
- _VERBS_ERROR("ipath%d multicast table memory leak!\n",
- dev->ib_unit);
+ ipath_dev_err(dev->dd, "multicast table memory leak!\n");
/*
* Note that ipath_unregister_ib_device() can be called before all
* the QPs are destroyed!
@@ -1201,25 +1658,12 @@ static void ipath_unregister_ib_device(void *arg)
ib_dealloc_device(ibdev);
}
-static int __init ipath_verbs_init(void)
-{
- return ipath_verbs_register(ipath_register_ib_device,
- ipath_unregister_ib_device,
- ipath_ib_piobufavail, ipath_ib_rcv,
- ipath_ib_timer);
-}
-
-static void __exit ipath_verbs_cleanup(void)
-{
- ipath_verbs_unregister();
-}
-
static ssize_t show_rev(struct class_device *cdev, char *buf)
{
struct ipath_ibdev *dev =
container_of(cdev, struct ipath_ibdev, ibdev.class_dev);
- return sprintf(buf, "%x\n", ipath_layer_get_pcirev(dev->dd));
+ return sprintf(buf, "%x\n", dev->dd->ipath_pcirev);
}
static ssize_t show_hca(struct class_device *cdev, char *buf)
@@ -1228,7 +1672,7 @@ static ssize_t show_hca(struct class_device *cdev, char *buf)
container_of(cdev, struct ipath_ibdev, ibdev.class_dev);
int ret;
- ret = ipath_layer_get_boardname(dev->dd, buf, 128);
+ ret = dev->dd->ipath_f_get_boardname(dev->dd, buf, 128);
if (ret < 0)
goto bail;
strcat(buf, "\n");
@@ -1255,6 +1699,7 @@ static ssize_t show_stats(struct class_device *cdev, char *buf)
"RC OTH NAKs %d\n"
"RC timeouts %d\n"
"RC RDMA dup %d\n"
+ "RC stalls %d\n"
"piobuf wait %d\n"
"no piobuf %d\n"
"PKT drops %d\n"
@@ -1262,7 +1707,7 @@ static ssize_t show_stats(struct class_device *cdev, char *buf)
dev->n_rc_resends, dev->n_rc_qacks, dev->n_rc_acks,
dev->n_seq_naks, dev->n_rdma_seq, dev->n_rnr_naks,
dev->n_other_naks, dev->n_timeouts,
- dev->n_rdma_dup_busy, dev->n_piowait,
+ dev->n_rdma_dup_busy, dev->n_rc_stalls, dev->n_piowait,
dev->n_no_piobuf, dev->n_pkt_drops, dev->n_wqe_errs);
for (i = 0; i < ARRAY_SIZE(dev->opstats); i++) {
const struct ipath_opcode_stats *si = &dev->opstats[i];
@@ -1305,6 +1750,3 @@ static int ipath_verbs_register_sysfs(struct ib_device *dev)
bail:
return ret;
}
-
-module_init(ipath_verbs_init);
-module_exit(ipath_verbs_cleanup);
diff --git a/drivers/infiniband/hw/ipath/ipath_verbs.h b/drivers/infiniband/hw/ipath/ipath_verbs.h
index 2df684727dc..8039f6e5f0c 100644
--- a/drivers/infiniband/hw/ipath/ipath_verbs.h
+++ b/drivers/infiniband/hw/ipath/ipath_verbs.h
@@ -38,10 +38,10 @@
#include <linux/spinlock.h>
#include <linux/kernel.h>
#include <linux/interrupt.h>
+#include <linux/kref.h>
#include <rdma/ib_pack.h>
#include "ipath_layer.h"
-#include "verbs_debug.h"
#define QPN_MAX (1 << 24)
#define QPNMAP_ENTRIES (QPN_MAX / PAGE_SIZE / BITS_PER_BYTE)
@@ -50,7 +50,7 @@
* Increment this value if any changes that break userspace ABI
* compatibility are made.
*/
-#define IPATH_UVERBS_ABI_VERSION 1
+#define IPATH_UVERBS_ABI_VERSION 2
/*
* Define an ib_cq_notify value that is not valid so we know when CQ
@@ -152,19 +152,6 @@ struct ipath_mcast {
int n_attached;
};
-/* Memory region */
-struct ipath_mr {
- struct ib_mr ibmr;
- struct ipath_mregion mr; /* must be last */
-};
-
-/* Fast memory region */
-struct ipath_fmr {
- struct ib_fmr ibfmr;
- u8 page_shift;
- struct ipath_mregion mr; /* must be last */
-};
-
/* Protection domain */
struct ipath_pd {
struct ib_pd ibpd;
@@ -178,58 +165,90 @@ struct ipath_ah {
};
/*
- * Quick description of our CQ/QP locking scheme:
- *
- * We have one global lock that protects dev->cq/qp_table. Each
- * struct ipath_cq/qp also has its own lock. An individual qp lock
- * may be taken inside of an individual cq lock. Both cqs attached to
- * a qp may be locked, with the send cq locked first. No other
- * nesting should be done.
- *
- * Each struct ipath_cq/qp also has an atomic_t ref count. The
- * pointer from the cq/qp_table to the struct counts as one reference.
- * This reference also is good for access through the consumer API, so
- * modifying the CQ/QP etc doesn't need to take another reference.
- * Access because of a completion being polled does need a reference.
- *
- * Finally, each struct ipath_cq/qp has a wait_queue_head_t for the
- * destroy function to sleep on.
- *
- * This means that access from the consumer API requires nothing but
- * taking the struct's lock.
- *
- * Access because of a completion event should go as follows:
- * - lock cq/qp_table and look up struct
- * - increment ref count in struct
- * - drop cq/qp_table lock
- * - lock struct, do your thing, and unlock struct
- * - decrement ref count; if zero, wake up waiters
- *
- * To destroy a CQ/QP, we can do the following:
- * - lock cq/qp_table, remove pointer, unlock cq/qp_table lock
- * - decrement ref count
- * - wait_event until ref count is zero
- *
- * It is the consumer's responsibilty to make sure that no QP
- * operations (WQE posting or state modification) are pending when the
- * QP is destroyed. Also, the consumer must make sure that calls to
- * qp_modify are serialized.
- *
- * Possible optimizations (wait for profile data to see if/where we
- * have locks bouncing between CPUs):
- * - split cq/qp table lock into n separate (cache-aligned) locks,
- * indexed (say) by the page in the table
+ * This structure is used by ipath_mmap() to validate an offset
+ * when an mmap() request is made. The vm_area_struct then uses
+ * this as its vm_private_data.
*/
+struct ipath_mmap_info {
+ struct ipath_mmap_info *next;
+ struct ib_ucontext *context;
+ void *obj;
+ struct kref ref;
+ unsigned size;
+ unsigned mmap_cnt;
+};
+
+/*
+ * This structure is used to contain the head pointer, tail pointer,
+ * and completion queue entries as a single memory allocation so
+ * it can be mmap'ed into user space.
+ */
+struct ipath_cq_wc {
+ u32 head; /* index of next entry to fill */
+ u32 tail; /* index of next ib_poll_cq() entry */
+ struct ib_wc queue[1]; /* this is actually size ibcq.cqe + 1 */
+};
+/*
+ * The completion queue structure.
+ */
struct ipath_cq {
struct ib_cq ibcq;
struct tasklet_struct comptask;
spinlock_t lock;
u8 notify;
u8 triggered;
- u32 head; /* new records added to the head */
- u32 tail; /* poll_cq() reads from here. */
- struct ib_wc *queue; /* this is actually ibcq.cqe + 1 */
+ struct ipath_cq_wc *queue;
+ struct ipath_mmap_info *ip;
+};
+
+/*
+ * A segment is a linear region of low physical memory.
+ * XXX Maybe we should use phys addr here and kmap()/kunmap().
+ * Used by the verbs layer.
+ */
+struct ipath_seg {
+ void *vaddr;
+ size_t length;
+};
+
+/* The number of ipath_segs that fit in a page. */
+#define IPATH_SEGSZ (PAGE_SIZE / sizeof (struct ipath_seg))
+
+struct ipath_segarray {
+ struct ipath_seg segs[IPATH_SEGSZ];
+};
+
+struct ipath_mregion {
+ struct ib_pd *pd; /* shares refcnt of ibmr.pd */
+ u64 user_base; /* User's address for this region */
+ u64 iova; /* IB start address of this region */
+ size_t length;
+ u32 lkey;
+ u32 offset; /* offset (bytes) to start of region */
+ int access_flags;
+ u32 max_segs; /* number of ipath_segs in all the arrays */
+ u32 mapsz; /* size of the map array */
+ struct ipath_segarray *map[0]; /* the segments */
+};
+
+/*
+ * These keep track of the copy progress within a memory region.
+ * Used by the verbs layer.
+ */
+struct ipath_sge {
+ struct ipath_mregion *mr;
+ void *vaddr; /* current pointer into the segment */
+ u32 sge_length; /* length of the SGE */
+ u32 length; /* remaining length of the segment */
+ u16 m; /* current index: mr->map[m] */
+ u16 n; /* current index: mr->map[m]->segs[n] */
+};
+
+/* Memory region */
+struct ipath_mr {
+ struct ib_mr ibmr;
+ struct ipath_mregion mr; /* must be last */
};
/*
@@ -248,32 +267,50 @@ struct ipath_swqe {
/*
* Receive work request queue entry.
- * The size of the sg_list is determined when the QP is created and stored
- * in qp->r_max_sge.
+ * The size of the sg_list is determined when the QP (or SRQ) is created
+ * and stored in qp->r_rq.max_sge (or srq->rq.max_sge).
*/
struct ipath_rwqe {
u64 wr_id;
- u32 length; /* total length of data in sg_list */
u8 num_sge;
- struct ipath_sge sg_list[0];
+ struct ib_sge sg_list[0];
};
-struct ipath_rq {
- spinlock_t lock;
+/*
+ * This structure is used to contain the head pointer, tail pointer,
+ * and receive work queue entries as a single memory allocation so
+ * it can be mmap'ed into user space.
+ * Note that the wq array elements are variable size so you can't
+ * just index into the array to get the N'th element;
+ * use get_rwqe_ptr() instead.
+ */
+struct ipath_rwq {
u32 head; /* new work requests posted to the head */
u32 tail; /* receives pull requests from here. */
+ struct ipath_rwqe wq[0];
+};
+
+struct ipath_rq {
+ struct ipath_rwq *wq;
+ spinlock_t lock;
u32 size; /* size of RWQE array */
u8 max_sge;
- struct ipath_rwqe *wq; /* RWQE array */
};
struct ipath_srq {
struct ib_srq ibsrq;
struct ipath_rq rq;
+ struct ipath_mmap_info *ip;
/* send signal when number of RWQEs < limit */
u32 limit;
};
+struct ipath_sge_state {
+ struct ipath_sge *sg_list; /* next SGE to be used if any */
+ struct ipath_sge sge; /* progress state for the current SGE */
+ u8 num_sge;
+};
+
/*
* Variables prefixed with s_ are for the requester (sender).
* Variables prefixed with r_ are for the responder (receiver).
@@ -293,6 +330,7 @@ struct ipath_qp {
atomic_t refcount;
wait_queue_head_t wait;
struct tasklet_struct s_task;
+ struct ipath_mmap_info *ip;
struct ipath_sge_state *s_cur_sge;
struct ipath_sge_state s_sge; /* current send request data */
/* current RDMA read send data */
@@ -327,13 +365,16 @@ struct ipath_qp {
u8 r_min_rnr_timer; /* retry timeout value for RNR NAKs */
u8 r_reuse_sge; /* for UC receive errors */
u8 r_sge_inx; /* current index into sg_list */
+ u8 r_wrid_valid; /* r_wrid set but CQ entry not yet made */
u8 qp_access_flags;
u8 s_max_sge; /* size of s_wq->sg_list */
u8 s_retry_cnt; /* number of times to retry */
u8 s_rnr_retry_cnt;
u8 s_retry; /* requester retry counter */
u8 s_rnr_retry; /* requester RNR retry counter */
+ u8 s_wait_credit; /* limit number of unacked packets sent */
u8 s_pkey_index; /* PKEY index to use */
+ u8 timeout; /* Timeout for this QP */
enum ib_mtu path_mtu;
u32 remote_qpn;
u32 qkey; /* QKEY for this QP (for UD or RD) */
@@ -345,7 +386,8 @@ struct ipath_qp {
u32 s_ssn; /* SSN of tail entry */
u32 s_lsn; /* limit sequence number (credit) */
struct ipath_swqe *s_wq; /* send work queue */
- struct ipath_rq r_rq; /* receive work queue */
+ struct ipath_rq r_rq; /* receive work queue */
+ struct ipath_sge r_sg_list[0]; /* verified SGEs */
};
/*
@@ -354,6 +396,8 @@ struct ipath_qp {
#define IPATH_S_BUSY 0
#define IPATH_S_SIGNAL_REQ_WR 1
+#define IPATH_PSN_CREDIT 2048
+
/*
* Since struct ipath_swqe is not a fixed size, we can't simply index into
* struct ipath_qp.s_wq. This function does the array index computation.
@@ -369,15 +413,15 @@ static inline struct ipath_swqe *get_swqe_ptr(struct ipath_qp *qp,
/*
* Since struct ipath_rwqe is not a fixed size, we can't simply index into
- * struct ipath_rq.wq. This function does the array index computation.
+ * struct ipath_rwq.wq. This function does the array index computation.
*/
static inline struct ipath_rwqe *get_rwqe_ptr(struct ipath_rq *rq,
unsigned n)
{
return (struct ipath_rwqe *)
- ((char *) rq->wq +
+ ((char *) rq->wq->wq +
(sizeof(struct ipath_rwqe) +
- rq->max_sge * sizeof(struct ipath_sge)) * n);
+ rq->max_sge * sizeof(struct ib_sge)) * n);
}
/*
@@ -417,6 +461,7 @@ struct ipath_ibdev {
struct ib_device ibdev;
struct list_head dev_list;
struct ipath_devdata *dd;
+ struct ipath_mmap_info *pending_mmaps;
int ib_unit; /* This is the device number */
u16 sm_lid; /* in host order */
u8 sm_sl;
@@ -435,11 +480,20 @@ struct ipath_ibdev {
__be64 sys_image_guid; /* in network order */
__be64 gid_prefix; /* in network order */
__be64 mkey;
+
u32 n_pds_allocated; /* number of PDs allocated for device */
+ spinlock_t n_pds_lock;
u32 n_ahs_allocated; /* number of AHs allocated for device */
+ spinlock_t n_ahs_lock;
u32 n_cqs_allocated; /* number of CQs allocated for device */
+ spinlock_t n_cqs_lock;
+ u32 n_qps_allocated; /* number of QPs allocated for device */
+ spinlock_t n_qps_lock;
u32 n_srqs_allocated; /* number of SRQs allocated for device */
+ spinlock_t n_srqs_lock;
u32 n_mcast_grps_allocated; /* number of mcast groups allocated */
+ spinlock_t n_mcast_grps_lock;
+
u64 ipath_sword; /* total dwords sent (sample result) */
u64 ipath_rword; /* total dwords received (sample result) */
u64 ipath_spkts; /* total packets sent (sample result) */
@@ -472,6 +526,7 @@ struct ipath_ibdev {
u32 n_rnr_naks;
u32 n_other_naks;
u32 n_timeouts;
+ u32 n_rc_stalls;
u32 n_pkt_drops;
u32 n_vl15_dropped;
u32 n_wqe_errs;
@@ -494,8 +549,19 @@ struct ipath_ibdev {
struct ipath_opcode_stats opstats[128];
};
-struct ipath_ucontext {
- struct ib_ucontext ibucontext;
+struct ipath_verbs_counters {
+ u64 symbol_error_counter;
+ u64 link_error_recovery_counter;
+ u64 link_downed_counter;
+ u64 port_rcv_errors;
+ u64 port_rcv_remphys_errors;
+ u64 port_xmit_discards;
+ u64 port_xmit_data;
+ u64 port_rcv_data;
+ u64 port_xmit_packets;
+ u64 port_rcv_packets;
+ u32 local_link_integrity_errors;
+ u32 excessive_buffer_overrun_errors;
};
static inline struct ipath_mr *to_imr(struct ib_mr *ibmr)
@@ -503,11 +569,6 @@ static inline struct ipath_mr *to_imr(struct ib_mr *ibmr)
return container_of(ibmr, struct ipath_mr, ibmr);
}
-static inline struct ipath_fmr *to_ifmr(struct ib_fmr *ibfmr)
-{
- return container_of(ibfmr, struct ipath_fmr, ibfmr);
-}
-
static inline struct ipath_pd *to_ipd(struct ib_pd *ibpd)
{
return container_of(ibpd, struct ipath_pd, ibpd);
@@ -545,12 +606,6 @@ int ipath_process_mad(struct ib_device *ibdev,
struct ib_grh *in_grh,
struct ib_mad *in_mad, struct ib_mad *out_mad);
-static inline struct ipath_ucontext *to_iucontext(struct ib_ucontext
- *ibucontext)
-{
- return container_of(ibucontext, struct ipath_ucontext, ibucontext);
-}
-
/*
* Compare the lower 24 bits of the two values.
* Returns an integer <, ==, or > than zero.
@@ -562,6 +617,13 @@ static inline int ipath_cmp24(u32 a, u32 b)
struct ipath_mcast *ipath_mcast_find(union ib_gid *mgid);
+int ipath_snapshot_counters(struct ipath_devdata *dd, u64 *swords,
+ u64 *rwords, u64 *spkts, u64 *rpkts,
+ u64 *xmit_wait);
+
+int ipath_get_counters(struct ipath_devdata *dd,
+ struct ipath_verbs_counters *cntrs);
+
int ipath_multicast_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid);
int ipath_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid);
@@ -578,8 +640,10 @@ struct ib_qp *ipath_create_qp(struct ib_pd *ibpd,
int ipath_destroy_qp(struct ib_qp *ibqp);
+void ipath_error_qp(struct ipath_qp *qp, enum ib_wc_status err);
+
int ipath_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
- int attr_mask);
+ int attr_mask, struct ib_udata *udata);
int ipath_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
int attr_mask, struct ib_qp_init_attr *init_attr);
@@ -592,13 +656,10 @@ void ipath_sqerror_qp(struct ipath_qp *qp, struct ib_wc *wc);
void ipath_get_credit(struct ipath_qp *qp, u32 aeth);
-void ipath_cq_enter(struct ipath_cq *cq, struct ib_wc *entry, int sig);
+int ipath_verbs_send(struct ipath_devdata *dd, u32 hdrwords,
+ u32 *hdr, u32 len, struct ipath_sge_state *ss);
-int ipath_rkey_ok(struct ipath_ibdev *dev, struct ipath_sge_state *ss,
- u32 len, u64 vaddr, u32 rkey, int acc);
-
-int ipath_lkey_ok(struct ipath_lkey_table *rkt, struct ipath_sge *isge,
- struct ib_sge *sge, int acc);
+void ipath_cq_enter(struct ipath_cq *cq, struct ib_wc *entry, int sig);
void ipath_copy_sge(struct ipath_sge_state *ss, void *data, u32 length);
@@ -624,10 +685,10 @@ int ipath_alloc_lkey(struct ipath_lkey_table *rkt,
void ipath_free_lkey(struct ipath_lkey_table *rkt, u32 lkey);
-int ipath_lkey_ok(struct ipath_lkey_table *rkt, struct ipath_sge *isge,
+int ipath_lkey_ok(struct ipath_qp *qp, struct ipath_sge *isge,
struct ib_sge *sge, int acc);
-int ipath_rkey_ok(struct ipath_ibdev *dev, struct ipath_sge_state *ss,
+int ipath_rkey_ok(struct ipath_qp *qp, struct ipath_sge_state *ss,
u32 len, u64 vaddr, u32 rkey, int acc);
int ipath_post_srq_receive(struct ib_srq *ibsrq, struct ib_recv_wr *wr,
@@ -638,7 +699,8 @@ struct ib_srq *ipath_create_srq(struct ib_pd *ibpd,
struct ib_udata *udata);
int ipath_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
- enum ib_srq_attr_mask attr_mask);
+ enum ib_srq_attr_mask attr_mask,
+ struct ib_udata *udata);
int ipath_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr);
@@ -680,6 +742,10 @@ int ipath_unmap_fmr(struct list_head *fmr_list);
int ipath_dealloc_fmr(struct ib_fmr *ibfmr);
+void ipath_release_mmap_info(struct kref *ref);
+
+int ipath_mmap(struct ib_ucontext *context, struct vm_area_struct *vma);
+
void ipath_no_bufs_available(struct ipath_qp *qp, struct ipath_ibdev *dev);
void ipath_insert_rnr_queue(struct ipath_qp *qp);
@@ -700,6 +766,22 @@ int ipath_make_rc_req(struct ipath_qp *qp, struct ipath_other_headers *ohdr,
int ipath_make_uc_req(struct ipath_qp *qp, struct ipath_other_headers *ohdr,
u32 pmtu, u32 *bth0p, u32 *bth2p);
+int ipath_register_ib_device(struct ipath_devdata *);
+
+void ipath_unregister_ib_device(struct ipath_ibdev *);
+
+void ipath_ib_rcv(struct ipath_ibdev *, void *, void *, u32);
+
+int ipath_ib_piobufavail(struct ipath_ibdev *);
+
+void ipath_ib_timer(struct ipath_ibdev *);
+
+unsigned ipath_get_npkeys(struct ipath_devdata *);
+
+u32 ipath_get_cr_errpkey(struct ipath_devdata *);
+
+unsigned ipath_get_pkey(struct ipath_devdata *, unsigned);
+
extern const enum ib_wc_opcode ib_ipath_wc_opcode[];
extern const u8 ipath_cvt_physportstate[];
@@ -714,6 +796,8 @@ extern unsigned int ib_ipath_max_cqs;
extern unsigned int ib_ipath_max_qp_wrs;
+extern unsigned int ib_ipath_max_qps;
+
extern unsigned int ib_ipath_max_sges;
extern unsigned int ib_ipath_max_mcast_grps;
diff --git a/drivers/infiniband/hw/ipath/ipath_verbs_mcast.c b/drivers/infiniband/hw/ipath/ipath_verbs_mcast.c
index ee0e1d96d72..085e28b939e 100644
--- a/drivers/infiniband/hw/ipath/ipath_verbs_mcast.c
+++ b/drivers/infiniband/hw/ipath/ipath_verbs_mcast.c
@@ -207,12 +207,17 @@ static int ipath_mcast_add(struct ipath_ibdev *dev,
goto bail;
}
+ spin_lock(&dev->n_mcast_grps_lock);
if (dev->n_mcast_grps_allocated == ib_ipath_max_mcast_grps) {
+ spin_unlock(&dev->n_mcast_grps_lock);
ret = ENOMEM;
goto bail;
}
dev->n_mcast_grps_allocated++;
+ spin_unlock(&dev->n_mcast_grps_lock);
+
+ mcast->n_attached++;
list_add_tail_rcu(&mqp->list, &mcast->qp_list);
@@ -343,7 +348,9 @@ int ipath_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
atomic_dec(&mcast->refcount);
wait_event(mcast->wait, !atomic_read(&mcast->refcount));
ipath_mcast_free(mcast);
+ spin_lock(&dev->n_mcast_grps_lock);
dev->n_mcast_grps_allocated--;
+ spin_unlock(&dev->n_mcast_grps_lock);
}
ret = 0;
diff --git a/drivers/infiniband/hw/ipath/ipath_wc_ppc64.c b/drivers/infiniband/hw/ipath/ipath_wc_ppc64.c
new file mode 100644
index 00000000000..0095bb70f34
--- /dev/null
+++ b/drivers/infiniband/hw/ipath/ipath_wc_ppc64.c
@@ -0,0 +1,62 @@
+/*
+ * Copyright (c) 2006 QLogic, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+/*
+ * This file is conditionally built on PowerPC only. Otherwise weak symbol
+ * versions of the functions exported from here are used.
+ */
+
+#include "ipath_kernel.h"
+
+/**
+ * ipath_enable_wc - enable write combining for MMIO writes to the device
+ * @dd: infinipath device
+ *
+ * Nothing to do on PowerPC, so just return without error.
+ */
+int ipath_enable_wc(struct ipath_devdata *dd)
+{
+ return 0;
+}
+
+/**
+ * ipath_unordered_wc - indicate whether write combining is unordered
+ *
+ * Because our performance depends on our ability to do write
+ * combining mmio writes in the most efficient way, we need to
+ * know if we are on a processor that may reorder stores when
+ * write combining.
+ */
+int ipath_unordered_wc(void)
+{
+ return 1;
+}
diff --git a/drivers/infiniband/hw/ipath/ipath_wc_x86_64.c b/drivers/infiniband/hw/ipath/ipath_wc_x86_64.c
index f8f9e2e8cbd..04696e62da8 100644
--- a/drivers/infiniband/hw/ipath/ipath_wc_x86_64.c
+++ b/drivers/infiniband/hw/ipath/ipath_wc_x86_64.c
@@ -123,6 +123,8 @@ int ipath_enable_wc(struct ipath_devdata *dd)
ipath_cdbg(VERBOSE, "Set mtrr for chip to WC, "
"cookie is %d\n", cookie);
dd->ipath_wc_cookie = cookie;
+ dd->ipath_wc_base = (unsigned long) pioaddr;
+ dd->ipath_wc_len = (unsigned long) piolen;
}
}
@@ -136,9 +138,16 @@ int ipath_enable_wc(struct ipath_devdata *dd)
void ipath_disable_wc(struct ipath_devdata *dd)
{
if (dd->ipath_wc_cookie) {
+ int r;
ipath_cdbg(VERBOSE, "undoing WCCOMB on pio buffers\n");
- mtrr_del(dd->ipath_wc_cookie, 0, 0);
- dd->ipath_wc_cookie = 0;
+ r = mtrr_del(dd->ipath_wc_cookie, dd->ipath_wc_base,
+ dd->ipath_wc_len);
+ if (r < 0)
+ dev_info(&dd->pcidev->dev,
+ "mtrr_del(%lx, %lx, %lx) failed: %d\n",
+ dd->ipath_wc_cookie, dd->ipath_wc_base,
+ dd->ipath_wc_len, r);
+ dd->ipath_wc_cookie = 0; /* even on failure */
}
}
diff --git a/drivers/infiniband/hw/ipath/verbs_debug.h b/drivers/infiniband/hw/ipath/verbs_debug.h
deleted file mode 100644
index 6186676f2a1..00000000000
--- a/drivers/infiniband/hw/ipath/verbs_debug.h
+++ /dev/null
@@ -1,108 +0,0 @@
-/*
- * Copyright (c) 2006 QLogic, Inc. All rights reserved.
- * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses. You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- *
- * - Redistributions of source code must retain the above
- * copyright notice, this list of conditions and the following
- * disclaimer.
- *
- * - Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials
- * provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#ifndef _VERBS_DEBUG_H
-#define _VERBS_DEBUG_H
-
-/*
- * This file contains tracing code for the ib_ipath kernel module.
- */
-#ifndef _VERBS_DEBUGGING /* tracing enabled or not */
-#define _VERBS_DEBUGGING 1
-#endif
-
-extern unsigned ib_ipath_debug;
-
-#define _VERBS_ERROR(fmt,...) \
- do { \
- printk(KERN_ERR "%s: " fmt, "ib_ipath", ##__VA_ARGS__); \
- } while(0)
-
-#define _VERBS_UNIT_ERROR(unit,fmt,...) \
- do { \
- printk(KERN_ERR "%s: " fmt, "ib_ipath", ##__VA_ARGS__); \
- } while(0)
-
-#if _VERBS_DEBUGGING
-
-/*
- * Mask values for debugging. The scheme allows us to compile out any
- * of the debug tracing stuff, and if compiled in, to enable or
- * disable dynamically.
- * This can be set at modprobe time also:
- * modprobe ib_path ib_ipath_debug=3
- */
-
-#define __VERBS_INFO 0x1 /* generic low verbosity stuff */
-#define __VERBS_DBG 0x2 /* generic debug */
-#define __VERBS_VDBG 0x4 /* verbose debug */
-#define __VERBS_SMADBG 0x8000 /* sma packet debug */
-
-#define _VERBS_INFO(fmt,...) \
- do { \
- if (unlikely(ib_ipath_debug&__VERBS_INFO)) \
- printk(KERN_INFO "%s: " fmt,"ib_ipath", \
- ##__VA_ARGS__); \
- } while(0)
-
-#define _VERBS_DBG(fmt,...) \
- do { \
- if (unlikely(ib_ipath_debug&__VERBS_DBG)) \
- printk(KERN_DEBUG "%s: " fmt, __func__, \
- ##__VA_ARGS__); \
- } while(0)
-
-#define _VERBS_VDBG(fmt,...) \
- do { \
- if (unlikely(ib_ipath_debug&__VERBS_VDBG)) \
- printk(KERN_DEBUG "%s: " fmt, __func__, \
- ##__VA_ARGS__); \
- } while(0)
-
-#define _VERBS_SMADBG(fmt,...) \
- do { \
- if (unlikely(ib_ipath_debug&__VERBS_SMADBG)) \
- printk(KERN_DEBUG "%s: " fmt, __func__, \
- ##__VA_ARGS__); \
- } while(0)
-
-#else /* ! _VERBS_DEBUGGING */
-
-#define _VERBS_INFO(fmt,...)
-#define _VERBS_DBG(fmt,...)
-#define _VERBS_VDBG(fmt,...)
-#define _VERBS_SMADBG(fmt,...)
-
-#endif /* _VERBS_DEBUGGING */
-
-#endif /* _VERBS_DEBUG_H */