From 124b4dcb1dd3a6fb80051f1785117a732d785f70 Mon Sep 17 00:00:00 2001 From: Dave Olson Date: Wed, 16 Apr 2008 21:09:32 -0700 Subject: IB/ipath: add calls to new 7220 code and enable in build This patch adds the initialization calls into the new 7220 HCA files, changes the Makefile to compile and link the new files, and code to handle send DMA. Signed-off-by: Dave Olson Signed-off-by: Roland Dreier --- drivers/infiniband/hw/ipath/Makefile | 3 + drivers/infiniband/hw/ipath/ipath_common.h | 16 +- drivers/infiniband/hw/ipath/ipath_driver.c | 150 ++++++++-- drivers/infiniband/hw/ipath/ipath_file_ops.c | 97 ++++++- drivers/infiniband/hw/ipath/ipath_init_chip.c | 4 + drivers/infiniband/hw/ipath/ipath_intr.c | 239 +++++++++++----- drivers/infiniband/hw/ipath/ipath_kernel.h | 4 +- drivers/infiniband/hw/ipath/ipath_qp.c | 14 +- drivers/infiniband/hw/ipath/ipath_ruc.c | 18 +- drivers/infiniband/hw/ipath/ipath_sdma.c | 91 ++++-- drivers/infiniband/hw/ipath/ipath_stats.c | 4 +- drivers/infiniband/hw/ipath/ipath_ud.c | 1 + drivers/infiniband/hw/ipath/ipath_verbs.c | 391 +++++++++++++++++++++++++- 13 files changed, 896 insertions(+), 136 deletions(-) (limited to 'drivers/infiniband/hw/ipath') diff --git a/drivers/infiniband/hw/ipath/Makefile b/drivers/infiniband/hw/ipath/Makefile index fe673882686..75a6c91944c 100644 --- a/drivers/infiniband/hw/ipath/Makefile +++ b/drivers/infiniband/hw/ipath/Makefile @@ -20,17 +20,20 @@ ib_ipath-y := \ ipath_qp.o \ ipath_rc.o \ ipath_ruc.o \ + ipath_sdma.o \ ipath_srq.o \ ipath_stats.o \ ipath_sysfs.o \ ipath_uc.o \ ipath_ud.o \ ipath_user_pages.o \ + ipath_user_sdma.o \ ipath_verbs_mcast.o \ ipath_verbs.o ib_ipath-$(CONFIG_HT_IRQ) += ipath_iba6110.o ib_ipath-$(CONFIG_PCI_MSI) += ipath_iba6120.o +ib_ipath-$(CONFIG_PCI_MSI) += ipath_iba7220.o ipath_sd7220.o ipath_sd7220_img.o ib_ipath-$(CONFIG_X86_64) += ipath_wc_x86_64.o ib_ipath-$(CONFIG_PPC64) += ipath_wc_ppc64.o diff --git a/drivers/infiniband/hw/ipath/ipath_common.h b/drivers/infiniband/hw/ipath/ipath_common.h index 02fd310d1ef..2cf7cd2cb66 100644 --- a/drivers/infiniband/hw/ipath/ipath_common.h +++ b/drivers/infiniband/hw/ipath/ipath_common.h @@ -447,8 +447,9 @@ struct ipath_user_info { #define IPATH_CMD_PIOAVAILUPD 27 /* force an update of PIOAvail reg */ #define IPATH_CMD_POLL_TYPE 28 /* set the kind of polling we want */ #define IPATH_CMD_ARMLAUNCH_CTRL 29 /* armlaunch detection control */ - -#define IPATH_CMD_MAX 29 +/* 30 is unused */ +#define IPATH_CMD_SDMA_INFLIGHT 31 /* sdma inflight counter request */ +#define IPATH_CMD_SDMA_COMPLETE 32 /* sdma completion counter request */ /* * Poll types @@ -486,6 +487,17 @@ struct ipath_cmd { union { struct ipath_tid_info tid_info; struct ipath_user_info user_info; + + /* + * address in userspace where we should put the sdma + * inflight counter + */ + __u64 sdma_inflight; + /* + * address in userspace where we should put the sdma + * completion counter + */ + __u64 sdma_complete; /* address in userspace of struct ipath_port_info to write result to */ __u64 port_info; diff --git a/drivers/infiniband/hw/ipath/ipath_driver.c b/drivers/infiniband/hw/ipath/ipath_driver.c index c94bc4730b2..ed7bdc93b9e 100644 --- a/drivers/infiniband/hw/ipath/ipath_driver.c +++ b/drivers/infiniband/hw/ipath/ipath_driver.c @@ -129,8 +129,10 @@ static int __devinit ipath_init_one(struct pci_dev *, /* Only needed for registration, nothing else needs this info */ #define PCI_VENDOR_ID_PATHSCALE 0x1fc1 +#define PCI_VENDOR_ID_QLOGIC 0x1077 #define PCI_DEVICE_ID_INFINIPATH_HT 0xd #define PCI_DEVICE_ID_INFINIPATH_PE800 0x10 +#define PCI_DEVICE_ID_INFINIPATH_7220 0x7220 /* Number of seconds before our card status check... */ #define STATUS_TIMEOUT 60 @@ -138,6 +140,7 @@ static int __devinit ipath_init_one(struct pci_dev *, static const struct pci_device_id ipath_pci_tbl[] = { { PCI_DEVICE(PCI_VENDOR_ID_PATHSCALE, PCI_DEVICE_ID_INFINIPATH_HT) }, { PCI_DEVICE(PCI_VENDOR_ID_PATHSCALE, PCI_DEVICE_ID_INFINIPATH_PE800) }, + { PCI_DEVICE(PCI_VENDOR_ID_QLOGIC, PCI_DEVICE_ID_INFINIPATH_7220) }, { 0, } }; @@ -532,6 +535,13 @@ static int __devinit ipath_init_one(struct pci_dev *pdev, "CONFIG_PCI_MSI is not enabled\n", ent->device); return -ENODEV; #endif + case PCI_DEVICE_ID_INFINIPATH_7220: +#ifndef CONFIG_PCI_MSI + ipath_dbg("CONFIG_PCI_MSI is not enabled, " + "using IntX for unit %u\n", dd->ipath_unit); +#endif + ipath_init_iba7220_funcs(dd); + break; default: ipath_dev_err(dd, "Found unknown QLogic deviceid 0x%x, " "failing\n", ent->device); @@ -887,13 +897,47 @@ int ipath_wait_linkstate(struct ipath_devdata *dd, u32 state, int msecs) return (dd->ipath_flags & state) ? 0 : -ETIMEDOUT; } +static void decode_sdma_errs(struct ipath_devdata *dd, ipath_err_t err, + char *buf, size_t blen) +{ + static const struct { + ipath_err_t err; + const char *msg; + } errs[] = { + { INFINIPATH_E_SDMAGENMISMATCH, "SDmaGenMismatch" }, + { INFINIPATH_E_SDMAOUTOFBOUND, "SDmaOutOfBound" }, + { INFINIPATH_E_SDMATAILOUTOFBOUND, "SDmaTailOutOfBound" }, + { INFINIPATH_E_SDMABASE, "SDmaBase" }, + { INFINIPATH_E_SDMA1STDESC, "SDma1stDesc" }, + { INFINIPATH_E_SDMARPYTAG, "SDmaRpyTag" }, + { INFINIPATH_E_SDMADWEN, "SDmaDwEn" }, + { INFINIPATH_E_SDMAMISSINGDW, "SDmaMissingDw" }, + { INFINIPATH_E_SDMAUNEXPDATA, "SDmaUnexpData" }, + { INFINIPATH_E_SDMADESCADDRMISALIGN, "SDmaDescAddrMisalign" }, + { INFINIPATH_E_SENDBUFMISUSE, "SendBufMisuse" }, + { INFINIPATH_E_SDMADISABLED, "SDmaDisabled" }, + }; + int i; + int expected; + size_t bidx = 0; + + for (i = 0; i < ARRAY_SIZE(errs); i++) { + expected = (errs[i].err != INFINIPATH_E_SDMADISABLED) ? 0 : + test_bit(IPATH_SDMA_ABORTING, &dd->ipath_sdma_status); + if ((err & errs[i].err) && !expected) + bidx += snprintf(buf + bidx, blen - bidx, + "%s ", errs[i].msg); + } +} + /* * Decode the error status into strings, deciding whether to always * print * it or not depending on "normal packet errors" vs everything * else. Return 1 if "real" errors, otherwise 0 if only packet * errors, so caller can decide what to print with the string. */ -int ipath_decode_err(char *buf, size_t blen, ipath_err_t err) +int ipath_decode_err(struct ipath_devdata *dd, char *buf, size_t blen, + ipath_err_t err) { int iserr = 1; *buf = '\0'; @@ -975,6 +1019,8 @@ int ipath_decode_err(char *buf, size_t blen, ipath_err_t err) strlcat(buf, "hardware ", blen); if (err & INFINIPATH_E_RESET) strlcat(buf, "reset ", blen); + if (err & INFINIPATH_E_SDMAERRS) + decode_sdma_errs(dd, err, buf, blen); if (err & INFINIPATH_E_INVALIDEEPCMD) strlcat(buf, "invalideepromcmd ", blen); done: @@ -1730,30 +1776,80 @@ bail: */ void ipath_cancel_sends(struct ipath_devdata *dd, int restore_sendctrl) { + unsigned long flags; + if (dd->ipath_flags & IPATH_IB_AUTONEG_INPROG) { ipath_cdbg(VERBOSE, "Ignore while in autonegotiation\n"); goto bail; } + /* + * If we have SDMA, and it's not disabled, we have to kick off the + * abort state machine, provided we aren't already aborting. + * If we are in the process of aborting SDMA (!DISABLED, but ABORTING), + * we skip the rest of this routine. It is already "in progress" + */ + if (dd->ipath_flags & IPATH_HAS_SEND_DMA) { + int skip_cancel; + u64 *statp = &dd->ipath_sdma_status; + + spin_lock_irqsave(&dd->ipath_sdma_lock, flags); + skip_cancel = + !test_bit(IPATH_SDMA_DISABLED, statp) && + test_and_set_bit(IPATH_SDMA_ABORTING, statp); + spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags); + if (skip_cancel) + goto bail; + } + ipath_dbg("Cancelling all in-progress send buffers\n"); /* skip armlaunch errs for a while */ dd->ipath_lastcancel = jiffies + HZ / 2; /* - * the abort bit is auto-clearing. We read scratch to be sure - * that cancels and the abort have taken effect in the chip. + * The abort bit is auto-clearing. We also don't want pioavail + * update happening during this, and we don't want any other + * sends going out, so turn those off for the duration. We read + * the scratch register to be sure that cancels and the abort + * have taken effect in the chip. Otherwise two parts are same + * as ipath_force_pio_avail_update() */ + spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags); + dd->ipath_sendctrl &= ~(INFINIPATH_S_PIOBUFAVAILUPD + | INFINIPATH_S_PIOENABLE); ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, - INFINIPATH_S_ABORT); + dd->ipath_sendctrl | INFINIPATH_S_ABORT); ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch); + spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags); + + /* disarm all send buffers */ ipath_disarm_piobufs(dd, 0, - (unsigned)(dd->ipath_piobcnt2k + dd->ipath_piobcnt4k)); - if (restore_sendctrl) /* else done by caller later */ + dd->ipath_piobcnt2k + dd->ipath_piobcnt4k); + + if (restore_sendctrl) { + /* else done by caller later if needed */ + spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags); + dd->ipath_sendctrl |= INFINIPATH_S_PIOBUFAVAILUPD | + INFINIPATH_S_PIOENABLE; ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, - dd->ipath_sendctrl); + dd->ipath_sendctrl); + /* and again, be sure all have hit the chip */ + ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch); + spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags); + } - /* and again, be sure all have hit the chip */ - ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch); + if ((dd->ipath_flags & IPATH_HAS_SEND_DMA) && + !test_bit(IPATH_SDMA_DISABLED, &dd->ipath_sdma_status) && + test_bit(IPATH_SDMA_RUNNING, &dd->ipath_sdma_status)) { + spin_lock_irqsave(&dd->ipath_sdma_lock, flags); + /* only wait so long for intr */ + dd->ipath_sdma_abort_intr_timeout = jiffies + HZ; + dd->ipath_sdma_reset_wait = 200; + __set_bit(IPATH_SDMA_DISARMED, &dd->ipath_sdma_status); + if (!test_bit(IPATH_SDMA_SHUTDOWN, &dd->ipath_sdma_status)) + tasklet_hi_schedule(&dd->ipath_sdma_abort_task); + spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags); + } bail:; } @@ -1952,7 +2048,7 @@ bail: * sanity checking on this, and we don't deal with what happens to * programs that are already running when the size changes. * NOTE: changing the MTU will usually cause the IBC to go back to - * link initialize (IPATH_IBSTATE_INIT) state... + * link INIT state... */ int ipath_set_mtu(struct ipath_devdata *dd, u16 arg) { @@ -2092,9 +2188,8 @@ static void ipath_run_led_override(unsigned long opaque) * but leave that to per-chip functions. */ val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_ibcstatus); - ltstate = (val >> INFINIPATH_IBCS_LINKTRAININGSTATE_SHIFT) & - dd->ibcs_lts_mask; - lstate = (val >> dd->ibcs_ls_shift) & INFINIPATH_IBCS_LINKSTATE_MASK; + ltstate = ipath_ib_linktrstate(dd, val); + lstate = ipath_ib_linkstate(dd, val); dd->ipath_f_setextled(dd, lstate, ltstate); mod_timer(&dd->ipath_led_override_timer, jiffies + timeoff); @@ -2170,6 +2265,9 @@ void ipath_shutdown_device(struct ipath_devdata *dd) ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl, dd->ipath_rcvctrl); + if (dd->ipath_flags & IPATH_HAS_SEND_DMA) + teardown_sdma(dd); + /* * gracefully stop all sends allowing any in progress to trickle out * first. @@ -2187,9 +2285,16 @@ void ipath_shutdown_device(struct ipath_devdata *dd) */ udelay(5); + dd->ipath_f_setextled(dd, 0, 0); /* make sure LEDs are off */ + ipath_set_ib_lstate(dd, 0, INFINIPATH_IBCC_LINKINITCMD_DISABLE); ipath_cancel_sends(dd, 0); + /* + * we are shutting down, so tell components that care. We don't do + * this on just a link state change, much like ethernet, a cable + * unplug, etc. doesn't change driver state + */ signal_ib_event(dd, IB_EVENT_PORT_ERR); /* disable IBC */ @@ -2214,6 +2319,10 @@ void ipath_shutdown_device(struct ipath_devdata *dd) del_timer_sync(&dd->ipath_intrchk_timer); dd->ipath_intrchk_timer.data = 0; } + if (atomic_read(&dd->ipath_led_override_timer_active)) { + del_timer_sync(&dd->ipath_led_override_timer); + atomic_set(&dd->ipath_led_override_timer_active, 0); + } /* * clear all interrupts and errors, so that the next time the driver @@ -2408,13 +2517,18 @@ int ipath_reset_device(int unit) } } + if (dd->ipath_flags & IPATH_HAS_SEND_DMA) + teardown_sdma(dd); + dd->ipath_flags &= ~IPATH_INITTED; + ipath_write_kreg(dd, dd->ipath_kregs->kr_intmask, 0ULL); ret = dd->ipath_f_reset(dd); - if (ret != 1) - ipath_dbg("reset was not successful\n"); - ipath_dbg("Trying to reinitialize unit %u after reset attempt\n", - unit); - ret = ipath_init_chip(dd, 1); + if (ret == 1) { + ipath_dbg("Reinitializing unit %u after reset attempt\n", + unit); + ret = ipath_init_chip(dd, 1); + } else + ret = -EAGAIN; if (ret) ipath_dev_err(dd, "Reinitialize unit %u after " "reset failed with %d\n", unit, ret); diff --git a/drivers/infiniband/hw/ipath/ipath_file_ops.c b/drivers/infiniband/hw/ipath/ipath_file_ops.c index b87d3126e4d..d38ba293323 100644 --- a/drivers/infiniband/hw/ipath/ipath_file_ops.c +++ b/drivers/infiniband/hw/ipath/ipath_file_ops.c @@ -36,21 +36,28 @@ #include #include #include +#include +#include +#include #include #include "ipath_kernel.h" #include "ipath_common.h" +#include "ipath_user_sdma.h" static int ipath_open(struct inode *, struct file *); static int ipath_close(struct inode *, struct file *); static ssize_t ipath_write(struct file *, const char __user *, size_t, loff_t *); +static ssize_t ipath_writev(struct kiocb *, const struct iovec *, + unsigned long , loff_t); static unsigned int ipath_poll(struct file *, struct poll_table_struct *); static int ipath_mmap(struct file *, struct vm_area_struct *); static const struct file_operations ipath_file_ops = { .owner = THIS_MODULE, .write = ipath_write, + .aio_write = ipath_writev, .open = ipath_open, .release = ipath_close, .poll = ipath_poll, @@ -1870,10 +1877,9 @@ static int ipath_assign_port(struct file *fp, if (ipath_compatible_subports(swmajor, swminor) && uinfo->spu_subport_cnt && (ret = find_shared_port(fp, uinfo))) { - mutex_unlock(&ipath_mutex); if (ret > 0) ret = 0; - goto done; + goto done_chk_sdma; } i_minor = iminor(fp->f_path.dentry->d_inode) - IPATH_USER_MINOR_BASE; @@ -1885,6 +1891,21 @@ static int ipath_assign_port(struct file *fp, else ret = find_best_unit(fp, uinfo); +done_chk_sdma: + if (!ret) { + struct ipath_filedata *fd = fp->private_data; + const struct ipath_portdata *pd = fd->pd; + const struct ipath_devdata *dd = pd->port_dd; + + fd->pq = ipath_user_sdma_queue_create(&dd->pcidev->dev, + dd->ipath_unit, + pd->port_port, + fd->subport); + + if (!fd->pq) + ret = -ENOMEM; + } + mutex_unlock(&ipath_mutex); done: @@ -2042,6 +2063,13 @@ static int ipath_close(struct inode *in, struct file *fp) mutex_unlock(&ipath_mutex); goto bail; } + + dd = pd->port_dd; + + /* drain user sdma queue */ + ipath_user_sdma_queue_drain(dd, fd->pq); + ipath_user_sdma_queue_destroy(fd->pq); + if (--pd->port_cnt) { /* * XXX If the master closes the port before the slave(s), @@ -2054,7 +2082,6 @@ static int ipath_close(struct inode *in, struct file *fp) goto bail; } port = pd->port_port; - dd = pd->port_dd; if (pd->port_hdrqfull) { ipath_cdbg(PROC, "%s[%u] had %u rcvhdrqfull errors " @@ -2176,6 +2203,35 @@ static int ipath_get_slave_info(struct ipath_portdata *pd, return ret; } +static int ipath_sdma_get_inflight(struct ipath_user_sdma_queue *pq, + u32 __user *inflightp) +{ + const u32 val = ipath_user_sdma_inflight_counter(pq); + + if (put_user(val, inflightp)) + return -EFAULT; + + return 0; +} + +static int ipath_sdma_get_complete(struct ipath_devdata *dd, + struct ipath_user_sdma_queue *pq, + u32 __user *completep) +{ + u32 val; + int err; + + err = ipath_user_sdma_make_progress(dd, pq); + if (err < 0) + return err; + + val = ipath_user_sdma_complete_counter(pq); + if (put_user(val, completep)) + return -EFAULT; + + return 0; +} + static ssize_t ipath_write(struct file *fp, const char __user *data, size_t count, loff_t *off) { @@ -2250,6 +2306,16 @@ static ssize_t ipath_write(struct file *fp, const char __user *data, dest = &cmd.cmd.armlaunch_ctrl; src = &ucmd->cmd.armlaunch_ctrl; break; + case IPATH_CMD_SDMA_INFLIGHT: + copy = sizeof(cmd.cmd.sdma_inflight); + dest = &cmd.cmd.sdma_inflight; + src = &ucmd->cmd.sdma_inflight; + break; + case IPATH_CMD_SDMA_COMPLETE: + copy = sizeof(cmd.cmd.sdma_complete); + dest = &cmd.cmd.sdma_complete; + src = &ucmd->cmd.sdma_complete; + break; default: ret = -EINVAL; goto bail; @@ -2331,6 +2397,17 @@ static ssize_t ipath_write(struct file *fp, const char __user *data, else ipath_disable_armlaunch(pd->port_dd); break; + case IPATH_CMD_SDMA_INFLIGHT: + ret = ipath_sdma_get_inflight(user_sdma_queue_fp(fp), + (u32 __user *) (unsigned long) + cmd.cmd.sdma_inflight); + break; + case IPATH_CMD_SDMA_COMPLETE: + ret = ipath_sdma_get_complete(pd->port_dd, + user_sdma_queue_fp(fp), + (u32 __user *) (unsigned long) + cmd.cmd.sdma_complete); + break; } if (ret >= 0) @@ -2340,6 +2417,20 @@ bail: return ret; } +static ssize_t ipath_writev(struct kiocb *iocb, const struct iovec *iov, + unsigned long dim, loff_t off) +{ + struct file *filp = iocb->ki_filp; + struct ipath_filedata *fp = filp->private_data; + struct ipath_portdata *pd = port_fp(filp); + struct ipath_user_sdma_queue *pq = fp->pq; + + if (!dim) + return -EINVAL; + + return ipath_user_sdma_writev(pd->port_dd, pq, iov, dim); +} + static struct class *ipath_class; static int init_cdev(int minor, char *name, const struct file_operations *fops, diff --git a/drivers/infiniband/hw/ipath/ipath_init_chip.c b/drivers/infiniband/hw/ipath/ipath_init_chip.c index c012e05649b..b43c2a10029 100644 --- a/drivers/infiniband/hw/ipath/ipath_init_chip.c +++ b/drivers/infiniband/hw/ipath/ipath_init_chip.c @@ -980,6 +980,10 @@ int ipath_init_chip(struct ipath_devdata *dd, int reinit) dd->ipath_stats_timer_active = 1; } + /* Set up SendDMA if chip supports it */ + if (dd->ipath_flags & IPATH_HAS_SEND_DMA) + ret = setup_sdma(dd); + /* Set up HoL state */ init_timer(&dd->ipath_hol_timer); dd->ipath_hol_timer.function = ipath_hol_event; diff --git a/drivers/infiniband/hw/ipath/ipath_intr.c b/drivers/infiniband/hw/ipath/ipath_intr.c index 90b972f6a84..d0088d5b9a4 100644 --- a/drivers/infiniband/hw/ipath/ipath_intr.c +++ b/drivers/infiniband/hw/ipath/ipath_intr.c @@ -433,6 +433,8 @@ static void handle_e_ibstatuschanged(struct ipath_devdata *dd, dd->ipath_flags &= ~(IPATH_LINKUNK | IPATH_LINKINIT | IPATH_LINKDOWN | IPATH_LINKARMED | IPATH_NOCABLE); + if (dd->ipath_flags & IPATH_HAS_SEND_DMA) + ipath_restart_sdma(dd); signal_ib_event(dd, IB_EVENT_PORT_ACTIVE); /* LED active not handled in chip _f_updown */ dd->ipath_f_setextled(dd, lstate, ltstate); @@ -480,7 +482,7 @@ done: } static void handle_supp_msgs(struct ipath_devdata *dd, - unsigned supp_msgs, char *msg, int msgsz) + unsigned supp_msgs, char *msg, u32 msgsz) { /* * Print the message unless it's ibc status change only, which @@ -488,12 +490,19 @@ static void handle_supp_msgs(struct ipath_devdata *dd, */ if (dd->ipath_lasterror & ~INFINIPATH_E_IBSTATUSCHANGED) { int iserr; - iserr = ipath_decode_err(msg, msgsz, + ipath_err_t mask; + iserr = ipath_decode_err(dd, msg, msgsz, dd->ipath_lasterror & ~INFINIPATH_E_IBSTATUSCHANGED); - if (dd->ipath_lasterror & - ~(INFINIPATH_E_RRCVEGRFULL | - INFINIPATH_E_RRCVHDRFULL | INFINIPATH_E_PKTERRS)) + + mask = INFINIPATH_E_RRCVEGRFULL | INFINIPATH_E_RRCVHDRFULL | + INFINIPATH_E_PKTERRS | INFINIPATH_E_SDMADISABLED; + + /* if we're in debug, then don't mask SDMADISABLED msgs */ + if (ipath_debug & __IPATH_DBG) + mask &= ~INFINIPATH_E_SDMADISABLED; + + if (dd->ipath_lasterror & ~mask) ipath_dev_err(dd, "Suppressed %u messages for " "fast-repeating errors (%s) (%llx)\n", supp_msgs, msg, @@ -520,7 +529,7 @@ static void handle_supp_msgs(struct ipath_devdata *dd, static unsigned handle_frequent_errors(struct ipath_devdata *dd, ipath_err_t errs, char *msg, - int msgsz, int *noprint) + u32 msgsz, int *noprint) { unsigned long nc; static unsigned long nextmsg_time; @@ -550,19 +559,125 @@ static unsigned handle_frequent_errors(struct ipath_devdata *dd, return supp_msgs; } +static void handle_sdma_errors(struct ipath_devdata *dd, ipath_err_t errs) +{ + unsigned long flags; + int expected; + + if (ipath_debug & __IPATH_DBG) { + char msg[128]; + ipath_decode_err(dd, msg, sizeof msg, errs & + INFINIPATH_E_SDMAERRS); + ipath_dbg("errors %lx (%s)\n", (unsigned long)errs, msg); + } + if (ipath_debug & __IPATH_VERBDBG) { + unsigned long tl, hd, status, lengen; + tl = ipath_read_kreg64(dd, dd->ipath_kregs->kr_senddmatail); + hd = ipath_read_kreg64(dd, dd->ipath_kregs->kr_senddmahead); + status = ipath_read_kreg64(dd + , dd->ipath_kregs->kr_senddmastatus); + lengen = ipath_read_kreg64(dd, + dd->ipath_kregs->kr_senddmalengen); + ipath_cdbg(VERBOSE, "sdma tl 0x%lx hd 0x%lx status 0x%lx " + "lengen 0x%lx\n", tl, hd, status, lengen); + } + + spin_lock_irqsave(&dd->ipath_sdma_lock, flags); + __set_bit(IPATH_SDMA_DISABLED, &dd->ipath_sdma_status); + expected = test_bit(IPATH_SDMA_ABORTING, &dd->ipath_sdma_status); + spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags); + if (!expected) + ipath_cancel_sends(dd, 1); +} + +static void handle_sdma_intr(struct ipath_devdata *dd, u64 istat) +{ + unsigned long flags; + int expected; + + if ((istat & INFINIPATH_I_SDMAINT) && + !test_bit(IPATH_SDMA_SHUTDOWN, &dd->ipath_sdma_status)) + ipath_sdma_intr(dd); + + if (istat & INFINIPATH_I_SDMADISABLED) { + expected = test_bit(IPATH_SDMA_ABORTING, + &dd->ipath_sdma_status); + ipath_dbg("%s SDmaDisabled intr\n", + expected ? "expected" : "unexpected"); + spin_lock_irqsave(&dd->ipath_sdma_lock, flags); + __set_bit(IPATH_SDMA_DISABLED, &dd->ipath_sdma_status); + spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags); + if (!expected) + ipath_cancel_sends(dd, 1); + if (!test_bit(IPATH_SDMA_SHUTDOWN, &dd->ipath_sdma_status)) + tasklet_hi_schedule(&dd->ipath_sdma_abort_task); + } +} + +static int handle_hdrq_full(struct ipath_devdata *dd) +{ + int chkerrpkts = 0; + u32 hd, tl; + u32 i; + + ipath_stats.sps_hdrqfull++; + for (i = 0; i < dd->ipath_cfgports; i++) { + struct ipath_portdata *pd = dd->ipath_pd[i]; + + if (i == 0) { + /* + * For kernel receive queues, we just want to know + * if there are packets in the queue that we can + * process. + */ + if (pd->port_head != ipath_get_hdrqtail(pd)) + chkerrpkts |= 1 << i; + continue; + } + + /* Skip if user context is not open */ + if (!pd || !pd->port_cnt) + continue; + + /* Don't report the same point multiple times. */ + if (dd->ipath_flags & IPATH_NODMA_RTAIL) + tl = ipath_read_ureg32(dd, ur_rcvhdrtail, i); + else + tl = ipath_get_rcvhdrtail(pd); + if (tl == pd->port_lastrcvhdrqtail) + continue; + + hd = ipath_read_ureg32(dd, ur_rcvhdrhead, i); + if (hd == (tl + 1) || (!hd && tl == dd->ipath_hdrqlast)) { + pd->port_lastrcvhdrqtail = tl; + pd->port_hdrqfull++; + /* flush hdrqfull so that poll() sees it */ + wmb(); + wake_up_interruptible(&pd->port_wait); + } + } + + return chkerrpkts; +} + static int handle_errors(struct ipath_devdata *dd, ipath_err_t errs) { char msg[128]; u64 ignore_this_time = 0; - int i, iserr = 0; + u64 iserr = 0; int chkerrpkts = 0, noprint = 0; unsigned supp_msgs; int log_idx; - supp_msgs = handle_frequent_errors(dd, errs, msg, sizeof msg, &noprint); + /* + * don't report errors that are masked, either at init + * (not set in ipath_errormask), or temporarily (set in + * ipath_maskederrs) + */ + errs &= dd->ipath_errormask & ~dd->ipath_maskederrs; - /* don't report errors that are masked */ - errs &= ~dd->ipath_maskederrs; + supp_msgs = handle_frequent_errors(dd, errs, msg, (u32)sizeof msg, + &noprint); /* do these first, they are most important */ if (errs & INFINIPATH_E_HARDWARE) { @@ -577,6 +692,9 @@ static int handle_errors(struct ipath_devdata *dd, ipath_err_t errs) } } + if (errs & INFINIPATH_E_SDMAERRS) + handle_sdma_errors(dd, errs); + if (!noprint && (errs & ~dd->ipath_e_bitsextant)) ipath_dev_err(dd, "error interrupt with unknown errors " "%llx set\n", (unsigned long long) @@ -611,7 +729,7 @@ static int handle_errors(struct ipath_devdata *dd, ipath_err_t errs) dd->ipath_errormask &= ~dd->ipath_maskederrs; ipath_write_kreg(dd, dd->ipath_kregs->kr_errormask, dd->ipath_errormask); - s_iserr = ipath_decode_err(msg, sizeof msg, + s_iserr = ipath_decode_err(dd, msg, sizeof msg, dd->ipath_maskederrs); if (dd->ipath_maskederrs & @@ -661,26 +779,43 @@ static int handle_errors(struct ipath_devdata *dd, ipath_err_t errs) INFINIPATH_E_IBSTATUSCHANGED); } - /* likely due to cancel, so suppress */ + if (errs & INFINIPATH_E_SENDSPECIALTRIGGER) { + dd->ipath_spectriggerhit++; + ipath_dbg("%lu special trigger hits\n", + dd->ipath_spectriggerhit); + } + + /* likely due to cancel; so suppress message unless verbose */ if ((errs & (INFINIPATH_E_SPKTLEN | INFINIPATH_E_SPIOARMLAUNCH)) && dd->ipath_lastcancel > jiffies) { - ipath_dbg("Suppressed armlaunch/spktlen after error send cancel\n"); + /* armlaunch takes precedence; it often causes both. */ + ipath_cdbg(VERBOSE, + "Suppressed %s error (%llx) after sendbuf cancel\n", + (errs & INFINIPATH_E_SPIOARMLAUNCH) ? + "armlaunch" : "sendpktlen", (unsigned long long)errs); errs &= ~(INFINIPATH_E_SPIOARMLAUNCH | INFINIPATH_E_SPKTLEN); } if (!errs) return 0; - if (!noprint) + if (!noprint) { + ipath_err_t mask; /* - * the ones we mask off are handled specially below or above + * The ones we mask off are handled specially below + * or above. Also mask SDMADISABLED by default as it + * is too chatty. */ - ipath_decode_err(msg, sizeof msg, - errs & ~(INFINIPATH_E_IBSTATUSCHANGED | - INFINIPATH_E_RRCVEGRFULL | - INFINIPATH_E_RRCVHDRFULL | - INFINIPATH_E_HARDWARE)); - else + mask = INFINIPATH_E_IBSTATUSCHANGED | + INFINIPATH_E_RRCVEGRFULL | INFINIPATH_E_RRCVHDRFULL | + INFINIPATH_E_HARDWARE | INFINIPATH_E_SDMADISABLED; + + /* if we're in debug, then don't mask SDMADISABLED msgs */ + if (ipath_debug & __IPATH_DBG) + mask &= ~INFINIPATH_E_SDMADISABLED; + + ipath_decode_err(dd, msg, sizeof msg, errs & ~mask); + } else /* so we don't need if (!noprint) at strlcat's below */ *msg = 0; @@ -705,39 +840,8 @@ static int handle_errors(struct ipath_devdata *dd, ipath_err_t errs) * fast_stats, no more than every 5 seconds, user ports get printed * on close */ - if (errs & INFINIPATH_E_RRCVHDRFULL) { - u32 hd, tl; - ipath_stats.sps_hdrqfull++; - for (i = 0; i < dd->ipath_cfgports; i++) { - struct ipath_portdata *pd = dd->ipath_pd[i]; - if (i == 0) { - hd = pd->port_head; - tl = ipath_get_hdrqtail(pd); - } else if (pd && pd->port_cnt && - pd->port_rcvhdrtail_kvaddr) { - /* - * don't report same point multiple times, - * except kernel - */ - tl = *(u64 *) pd->port_rcvhdrtail_kvaddr; - if (tl == pd->port_lastrcvhdrqtail) - continue; - hd = ipath_read_ureg32(dd, ur_rcvhdrhead, - i); - } else - continue; - if (hd == (tl + 1) || - (!hd && tl == dd->ipath_hdrqlast)) { - if (i == 0) - chkerrpkts = 1; - pd->port_lastrcvhdrqtail = tl; - pd->port_hdrqfull++; - /* flush hdrqfull so that poll() sees it */ - wmb(); - wake_up_interruptible(&pd->port_wait); - } - } - } + if (errs & INFINIPATH_E_RRCVHDRFULL) + chkerrpkts |= handle_hdrq_full(dd); if (errs & INFINIPATH_E_RRCVEGRFULL) { struct ipath_portdata *pd = dd->ipath_pd[0]; @@ -749,7 +853,7 @@ static int handle_errors(struct ipath_devdata *dd, ipath_err_t errs) */ ipath_stats.sps_etidfull++; if (pd->port_head != ipath_get_hdrqtail(pd)) - chkerrpkts = 1; + chkerrpkts |= 1; } /* @@ -788,9 +892,6 @@ static int handle_errors(struct ipath_devdata *dd, ipath_err_t errs) if (!noprint && *msg) { if (iserr) ipath_dev_err(dd, "%s error\n", msg); - else - dev_info(&dd->pcidev->dev, "%s packet problems\n", - msg); } if (dd->ipath_state_wanted & dd->ipath_flags) { ipath_cdbg(VERBOSE, "driver wanted state %x, iflags now %x, " @@ -1017,7 +1118,7 @@ static void handle_urcv(struct ipath_devdata *dd, u64 istat) irqreturn_t ipath_intr(int irq, void *data) { struct ipath_devdata *dd = data; - u32 istat, chk0rcv = 0; + u64 istat, chk0rcv = 0; ipath_err_t estat = 0; irqreturn_t ret; static unsigned unexpected = 0; @@ -1070,17 +1171,17 @@ irqreturn_t ipath_intr(int irq, void *data) if (unlikely(istat & ~dd->ipath_i_bitsextant)) ipath_dev_err(dd, - "interrupt with unknown interrupts %x set\n", - istat & (u32) ~ dd->ipath_i_bitsextant); - else - ipath_cdbg(VERBOSE, "intr stat=0x%x\n", istat); + "interrupt with unknown interrupts %Lx set\n", + istat & ~dd->ipath_i_bitsextant); + else if (istat & ~INFINIPATH_I_ERROR) /* errors do own printing */ + ipath_cdbg(VERBOSE, "intr stat=0x%Lx\n", istat); - if (unlikely(istat & INFINIPATH_I_ERROR)) { + if (istat & INFINIPATH_I_ERROR) { ipath_stats.sps_errints++; estat = ipath_read_kreg64(dd, dd->ipath_kregs->kr_errorstatus); if (!estat) - dev_info(&dd->pcidev->dev, "error interrupt (%x), " + dev_info(&dd->pcidev->dev, "error interrupt (%Lx), " "but no error bits set!\n", istat); else if (estat == -1LL) /* @@ -1198,6 +1299,9 @@ irqreturn_t ipath_intr(int irq, void *data) (dd->ipath_i_rcvurg_mask << dd->ipath_i_rcvurg_shift))) handle_urcv(dd, istat); + if (istat & (INFINIPATH_I_SDMAINT | INFINIPATH_I_SDMADISABLED)) + handle_sdma_intr(dd, istat); + if (istat & INFINIPATH_I_SPIOBUFAVAIL) { unsigned long flags; @@ -1208,7 +1312,10 @@ irqreturn_t ipath_intr(int irq, void *data) ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch); spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags); - handle_layer_pioavail(dd); + if (!(dd->ipath_flags & IPATH_HAS_SEND_DMA)) + handle_layer_pioavail(dd); + else + ipath_dbg("unexpected BUFAVAIL intr\n"); } ret = IRQ_HANDLED; diff --git a/drivers/infiniband/hw/ipath/ipath_kernel.h b/drivers/infiniband/hw/ipath/ipath_kernel.h index 550d46c1aef..a2f036c9c28 100644 --- a/drivers/infiniband/hw/ipath/ipath_kernel.h +++ b/drivers/infiniband/hw/ipath/ipath_kernel.h @@ -871,7 +871,8 @@ struct sk_buff *ipath_alloc_skb(struct ipath_devdata *dd, gfp_t); extern int ipath_diag_inuse; irqreturn_t ipath_intr(int irq, void *devid); -int ipath_decode_err(char *buf, size_t blen, ipath_err_t err); +int ipath_decode_err(struct ipath_devdata *dd, char *buf, size_t blen, + ipath_err_t err); #if __IPATH_INFO || __IPATH_DBG extern const char *ipath_ibcstatus_str[]; #endif @@ -1026,6 +1027,7 @@ void ipath_set_led_override(struct ipath_devdata *dd, unsigned int val); /* send dma routines */ int setup_sdma(struct ipath_devdata *); void teardown_sdma(struct ipath_devdata *); +void ipath_restart_sdma(struct ipath_devdata *); void ipath_sdma_intr(struct ipath_devdata *); int ipath_sdma_verbs_send(struct ipath_devdata *, struct ipath_sge_state *, u32, struct ipath_verbs_txreq *); diff --git a/drivers/infiniband/hw/ipath/ipath_qp.c b/drivers/infiniband/hw/ipath/ipath_qp.c index 812b42c500e..ded970bd13e 100644 --- a/drivers/infiniband/hw/ipath/ipath_qp.c +++ b/drivers/infiniband/hw/ipath/ipath_qp.c @@ -340,6 +340,7 @@ static void ipath_reset_qp(struct ipath_qp *qp, enum ib_qp_type type) qp->s_flags &= IPATH_S_SIGNAL_REQ_WR; qp->s_hdrwords = 0; qp->s_wqe = NULL; + qp->s_pkt_delay = 0; qp->s_psn = 0; qp->r_psn = 0; qp->r_msn = 0; @@ -563,8 +564,10 @@ int ipath_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, if (attr_mask & IB_QP_ACCESS_FLAGS) qp->qp_access_flags = attr->qp_access_flags; - if (attr_mask & IB_QP_AV) + if (attr_mask & IB_QP_AV) { qp->remote_ah_attr = attr->ah_attr; + qp->s_dmult = ipath_ib_rate_to_mult(attr->ah_attr.static_rate); + } if (attr_mask & IB_QP_PATH_MTU) qp->path_mtu = attr->path_mtu; @@ -850,6 +853,7 @@ struct ib_qp *ipath_create_qp(struct ib_pd *ibpd, goto bail_qp; } qp->ip = NULL; + qp->s_tx = NULL; ipath_reset_qp(qp, init_attr->qp_type); break; @@ -955,12 +959,20 @@ int ipath_destroy_qp(struct ib_qp *ibqp) /* Stop the sending tasklet. */ tasklet_kill(&qp->s_task); + if (qp->s_tx) { + atomic_dec(&qp->refcount); + if (qp->s_tx->txreq.flags & IPATH_SDMA_TXREQ_F_FREEBUF) + kfree(qp->s_tx->txreq.map_addr); + } + /* Make sure the QP isn't on the timeout list. */ spin_lock_irqsave(&dev->pending_lock, flags); if (!list_empty(&qp->timerwait)) list_del_init(&qp->timerwait); if (!list_empty(&qp->piowait)) list_del_init(&qp->piowait); + if (qp->s_tx) + list_add(&qp->s_tx->txreq.list, &dev->txreq_free); spin_unlock_irqrestore(&dev->pending_lock, flags); /* diff --git a/drivers/infiniband/hw/ipath/ipath_ruc.c b/drivers/infiniband/hw/ipath/ipath_ruc.c index a59bdbd0ed8..bcaa2914e34 100644 --- a/drivers/infiniband/hw/ipath/ipath_ruc.c +++ b/drivers/infiniband/hw/ipath/ipath_ruc.c @@ -483,14 +483,16 @@ done: static void want_buffer(struct ipath_devdata *dd) { - unsigned long flags; - - spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags); - dd->ipath_sendctrl |= INFINIPATH_S_PIOINTBUFAVAIL; - ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, - dd->ipath_sendctrl); - ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch); - spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags); + if (!(dd->ipath_flags & IPATH_HAS_SEND_DMA)) { + unsigned long flags; + + spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags); + dd->ipath_sendctrl |= INFINIPATH_S_PIOINTBUFAVAIL; + ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, + dd->ipath_sendctrl); + ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch); + spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags); + } } /** diff --git a/drivers/infiniband/hw/ipath/ipath_sdma.c b/drivers/infiniband/hw/ipath/ipath_sdma.c index 5918cafb880..1974df7a9f7 100644 --- a/drivers/infiniband/hw/ipath/ipath_sdma.c +++ b/drivers/infiniband/hw/ipath/ipath_sdma.c @@ -230,7 +230,6 @@ static void dump_sdma_state(struct ipath_devdata *dd) static void sdma_abort_task(unsigned long opaque) { struct ipath_devdata *dd = (struct ipath_devdata *) opaque; - int kick = 0; u64 status; unsigned long flags; @@ -308,30 +307,26 @@ static void sdma_abort_task(unsigned long opaque) /* done with sdma state for a bit */ spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags); - /* restart sdma engine */ + /* + * Don't restart sdma here. Wait until link is up to ACTIVE. + * VL15 MADs used to bring the link up use PIO, and multiple + * link transitions otherwise cause the sdma engine to be + * stopped and started multiple times. + * The disable is done here, including the shadow, so the + * state is kept consistent. + * See ipath_restart_sdma() for the actual starting of sdma. + */ spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags); dd->ipath_sendctrl &= ~INFINIPATH_S_SDMAENABLE; ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, dd->ipath_sendctrl); ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch); - dd->ipath_sendctrl |= INFINIPATH_S_SDMAENABLE; - ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, - dd->ipath_sendctrl); - ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch); spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags); - kick = 1; - ipath_dbg("sdma restarted from abort\n"); - - /* now clear status bits */ - spin_lock_irqsave(&dd->ipath_sdma_lock, flags); - __clear_bit(IPATH_SDMA_ABORTING, &dd->ipath_sdma_status); - __clear_bit(IPATH_SDMA_DISARMED, &dd->ipath_sdma_status); - __clear_bit(IPATH_SDMA_DISABLED, &dd->ipath_sdma_status); /* make sure I see next message */ dd->ipath_sdma_abort_jiffies = 0; - goto unlock; + goto done; } resched: @@ -353,10 +348,8 @@ resched_noprint: unlock: spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags); - - /* kick upper layers */ - if (kick) - ipath_ib_piobufavail(dd->verbs_dev); +done: + return; } /* @@ -481,10 +474,14 @@ int setup_sdma(struct ipath_devdata *dd) tasklet_init(&dd->ipath_sdma_abort_task, sdma_abort_task, (unsigned long) dd); - /* Turn on SDMA */ + /* + * No use to turn on SDMA here, as link is probably not ACTIVE + * Just mark it RUNNING and enable the interrupt, and let the + * ipath_restart_sdma() on link transition to ACTIVE actually + * enable it. + */ spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags); - dd->ipath_sendctrl |= INFINIPATH_S_SDMAENABLE | - INFINIPATH_S_SDMAINTENABLE; + dd->ipath_sendctrl |= INFINIPATH_S_SDMAINTENABLE; ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, dd->ipath_sendctrl); ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch); __set_bit(IPATH_SDMA_RUNNING, &dd->ipath_sdma_status); @@ -572,6 +569,56 @@ void teardown_sdma(struct ipath_devdata *dd) sdma_descq, sdma_descq_phys); } +/* + * [Re]start SDMA, if we use it, and it's not already OK. + * This is called on transition to link ACTIVE, either the first or + * subsequent times. + */ +void ipath_restart_sdma(struct ipath_devdata *dd) +{ + unsigned long flags; + int needed = 1; + + if (!(dd->ipath_flags & IPATH_HAS_SEND_DMA)) + goto bail; + + /* + * First, make sure we should, which is to say, + * check that we are "RUNNING" (not in teardown) + * and not "SHUTDOWN" + */ + spin_lock_irqsave(&dd->ipath_sdma_lock, flags); + if (!test_bit(IPATH_SDMA_RUNNING, &dd->ipath_sdma_status) + || test_bit(IPATH_SDMA_SHUTDOWN, &dd->ipath_sdma_status)) + needed = 0; + else { + __clear_bit(IPATH_SDMA_DISABLED, &dd->ipath_sdma_status); + __clear_bit(IPATH_SDMA_DISARMED, &dd->ipath_sdma_status); + __clear_bit(IPATH_SDMA_ABORTING, &dd->ipath_sdma_status); + } + spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags); + if (!needed) { + ipath_dbg("invalid attempt to restart SDMA, status 0x%016llx\n", + dd->ipath_sdma_status); + goto bail; + } + spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags); + /* + * First clear, just to be safe. Enable is only done + * in chip on 0->1 transition + */ + dd->ipath_sendctrl &= ~INFINIPATH_S_SDMAENABLE; + ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, dd->ipath_sendctrl); + ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch); + dd->ipath_sendctrl |= INFINIPATH_S_SDMAENABLE; + ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, dd->ipath_sendctrl); + ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch); + spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags); + +bail: + return; +} + static inline void make_sdma_desc(struct ipath_devdata *dd, u64 *sdmadesc, u64 addr, u64 dwlen, u64 dwoffset) { diff --git a/drivers/infiniband/hw/ipath/ipath_stats.c b/drivers/infiniband/hw/ipath/ipath_stats.c index adff2f10dc0..1e36bac8149 100644 --- a/drivers/infiniband/hw/ipath/ipath_stats.c +++ b/drivers/infiniband/hw/ipath/ipath_stats.c @@ -292,8 +292,8 @@ void ipath_get_faststats(unsigned long opaque) && time_after(jiffies, dd->ipath_unmasktime)) { char ebuf[256]; int iserr; - iserr = ipath_decode_err(ebuf, sizeof ebuf, - dd->ipath_maskederrs); + iserr = ipath_decode_err(dd, ebuf, sizeof ebuf, + dd->ipath_maskederrs); if (dd->ipath_maskederrs & ~(INFINIPATH_E_RRCVEGRFULL | INFINIPATH_E_RRCVHDRFULL | INFINIPATH_E_PKTERRS)) diff --git a/drivers/infiniband/hw/ipath/ipath_ud.c b/drivers/infiniband/hw/ipath/ipath_ud.c index de67eed08ed..4d4d58d8e9d 100644 --- a/drivers/infiniband/hw/ipath/ipath_ud.c +++ b/drivers/infiniband/hw/ipath/ipath_ud.c @@ -303,6 +303,7 @@ int ipath_make_ud_req(struct ipath_qp *qp) qp->s_hdrwords = 7; qp->s_cur_size = wqe->length; qp->s_cur_sge = &qp->s_sge; + qp->s_dmult = ah_attr->static_rate; qp->s_wqe = wqe; qp->s_sge.sge = wqe->sg_list[0]; qp->s_sge.sg_list = wqe->sg_list + 1; diff --git a/drivers/infiniband/hw/ipath/ipath_verbs.c b/drivers/infiniband/hw/ipath/ipath_verbs.c index 2e6b6f6265b..75429aa92ce 100644 --- a/drivers/infiniband/hw/ipath/ipath_verbs.c +++ b/drivers/infiniband/hw/ipath/ipath_verbs.c @@ -242,6 +242,93 @@ static void ipath_flush_wqe(struct ipath_qp *qp, struct ib_send_wr *wr) ipath_cq_enter(to_icq(qp->ibqp.send_cq), &wc, 1); } +/* + * Count the number of DMA descriptors needed to send length bytes of data. + * Don't modify the ipath_sge_state to get the count. + * Return zero if any of the segments is not aligned. + */ +static u32 ipath_count_sge(struct ipath_sge_state *ss, u32 length) +{ + struct ipath_sge *sg_list = ss->sg_list; + struct ipath_sge sge = ss->sge; + u8 num_sge = ss->num_sge; + u32 ndesc = 1; /* count the header */ + + while (length) { + u32 len = sge.length; + + if (len > length) + len = length; + if (len > sge.sge_length) + len = sge.sge_length; + BUG_ON(len == 0); + if (((long) sge.vaddr & (sizeof(u32) - 1)) || + (len != length && (len & (sizeof(u32) - 1)))) { + ndesc = 0; + break; + } + ndesc++; + sge.vaddr += len; + sge.length -= len; + sge.sge_length -= len; + if (sge.sge_length == 0) { + if (--num_sge) + sge = *sg_list++; + } else if (sge.length == 0 && sge.mr != NULL) { + if (++sge.n >= IPATH_SEGSZ) { + if (++sge.m >= sge.mr->mapsz) + break; + sge.n = 0; + } + sge.vaddr = + sge.mr->map[sge.m]->segs[sge.n].vaddr; + sge.length = + sge.mr->map[sge.m]->segs[sge.n].length; + } + length -= len; + } + return ndesc; +} + +/* + * Copy from the SGEs to the data buffer. + */ +static void ipath_copy_from_sge(void *data, struct ipath_sge_state *ss, + u32 length) +{ + struct ipath_sge *sge = &ss->sge; + + while (length) { + u32 len = sge->length; + + if (len > length) + len = length; + if (len > sge->sge_length) + len = sge->sge_length; + BUG_ON(len == 0); + memcpy(data, sge->vaddr, len); + sge->vaddr += len; + sge->length -= len; + sge->sge_length -= len; + if (sge->sge_length == 0) { + if (--ss->num_sge) + *sge = *ss->sg_list++; + } else if (sge->length == 0 && sge->mr != NULL) { + if (++sge->n >= IPATH_SEGSZ) { + if (++sge->m >= sge->mr->mapsz) + break; + sge->n = 0; + } + sge->vaddr = + sge->mr->map[sge->m]->segs[sge->n].vaddr; + sge->length = + sge->mr->map[sge->m]->segs[sge->n].length; + } + data += len; + length -= len; + } +} + /** * ipath_post_one_send - post one RC, UC, or UD send work request * @qp: the QP to post on @@ -866,13 +953,231 @@ static void copy_io(u32 __iomem *piobuf, struct ipath_sge_state *ss, __raw_writel(last, piobuf); } -static int ipath_verbs_send_pio(struct ipath_qp *qp, u32 *hdr, u32 hdrwords, +/* + * Convert IB rate to delay multiplier. + */ +unsigned ipath_ib_rate_to_mult(enum ib_rate rate) +{ + switch (rate) { + case IB_RATE_2_5_GBPS: return 8; + case IB_RATE_5_GBPS: return 4; + case IB_RATE_10_GBPS: return 2; + case IB_RATE_20_GBPS: return 1; + default: return 0; + } +} + +/* + * Convert delay multiplier to IB rate + */ +static enum ib_rate ipath_mult_to_ib_rate(unsigned mult) +{ + switch (mult) { + case 8: return IB_RATE_2_5_GBPS; + case 4: return IB_RATE_5_GBPS; + case 2: return IB_RATE_10_GBPS; + case 1: return IB_RATE_20_GBPS; + default: return IB_RATE_PORT_CURRENT; + } +} + +static inline struct ipath_verbs_txreq *get_txreq(struct ipath_ibdev *dev) +{ + struct ipath_verbs_txreq *tx = NULL; + unsigned long flags; + + spin_lock_irqsave(&dev->pending_lock, flags); + if (!list_empty(&dev->txreq_free)) { + struct list_head *l = dev->txreq_free.next; + + list_del(l); + tx = list_entry(l, struct ipath_verbs_txreq, txreq.list); + } + spin_unlock_irqrestore(&dev->pending_lock, flags); + return tx; +} + +static inline void put_txreq(struct ipath_ibdev *dev, + struct ipath_verbs_txreq *tx) +{ + unsigned long flags; + + spin_lock_irqsave(&dev->pending_lock, flags); + list_add(&tx->txreq.list, &dev->txreq_free); + spin_unlock_irqrestore(&dev->pending_lock, flags); +} + +static void sdma_complete(void *cookie, int status) +{ + struct ipath_verbs_txreq *tx = cookie; + struct ipath_qp *qp = tx->qp; + struct ipath_ibdev *dev = to_idev(qp->ibqp.device); + + /* Generate a completion queue entry if needed */ + if (qp->ibqp.qp_type != IB_QPT_RC && tx->wqe) { + enum ib_wc_status ibs = status == IPATH_SDMA_TXREQ_S_OK ? + IB_WC_SUCCESS : IB_WC_WR_FLUSH_ERR; + + ipath_send_complete(qp, tx->wqe, ibs); + } + + if (tx->txreq.flags & IPATH_SDMA_TXREQ_F_FREEBUF) + kfree(tx->txreq.map_addr); + put_txreq(dev, tx); + + if (atomic_dec_and_test(&qp->refcount)) + wake_up(&qp->wait); +} + +/* + * Compute the number of clock cycles of delay before sending the next packet. + * The multipliers reflect the number of clocks for the fastest rate so + * one tick at 4xDDR is 8 ticks at 1xSDR. + * If the destination port will take longer to receive a packet than + * the outgoing link can send it, we need to delay sending the next packet + * by the difference in time it takes the receiver to receive and the sender + * to send this packet. + * Note that this delay is always correct for UC and RC but not always + * optimal for UD. For UD, the destination HCA can be different for each + * packet, in which case, we could send packets to a different destination + * while "waiting" for the delay. The overhead for doing this without + * HW support is more than just paying the cost of delaying some packets + * unnecessarily. + */ +static inline unsigned ipath_pkt_delay(u32 plen, u8 snd_mult, u8 rcv_mult) +{ + return (rcv_mult > snd_mult) ? + (plen * (rcv_mult - snd_mult) + 1) >> 1 : 0; +} + +static int ipath_verbs_send_dma(struct ipath_qp *qp, + struct ipath_ib_header *hdr, u32 hdrwords, + struct ipath_sge_state *ss, u32 len, + u32 plen, u32 dwords) +{ + struct ipath_ibdev *dev = to_idev(qp->ibqp.device); + struct ipath_devdata *dd = dev->dd; + struct ipath_verbs_txreq *tx; + u32 *piobuf; + u32 control; + u32 ndesc; + int ret; + + tx = qp->s_tx; + if (tx) { + qp->s_tx = NULL; + /* resend previously constructed packet */ + ret = ipath_sdma_verbs_send(dd, tx->ss, tx->len, tx); + if (ret) + qp->s_tx = tx; + goto bail; + } + + tx = get_txreq(dev); + if (!tx) { + ret = -EBUSY; + goto bail; + } + + /* + * Get the saved delay count we computed for the previous packet + * and save the delay count for this packet to be used next time + * we get here. + */ + control = qp->s_pkt_delay; + qp->s_pkt_delay = ipath_pkt_delay(plen, dd->delay_mult, qp->s_dmult); + + tx->qp = qp; + atomic_inc(&qp->refcount); + tx->wqe = qp->s_wqe; + tx->txreq.callback = sdma_complete; + tx->txreq.callback_cookie = tx; + tx->txreq.flags = IPATH_SDMA_TXREQ_F_HEADTOHOST | + IPATH_SDMA_TXREQ_F_INTREQ | IPATH_SDMA_TXREQ_F_FREEDESC; + if (plen + 1 >= IPATH_SMALLBUF_DWORDS) + tx->txreq.flags |= IPATH_SDMA_TXREQ_F_USELARGEBUF; + + /* VL15 packets bypass credit check */ + if ((be16_to_cpu(hdr->lrh[0]) >> 12) == 15) { + control |= 1ULL << 31; + tx->txreq.flags |= IPATH_SDMA_TXREQ_F_VL15; + } + + if (len) { + /* + * Don't try to DMA if it takes more descriptors than + * the queue holds. + */ + ndesc = ipath_count_sge(ss, len); + if (ndesc >= dd->ipath_sdma_descq_cnt) + ndesc = 0; + } else + ndesc = 1; + if (ndesc) { + tx->hdr.pbc[0] = cpu_to_le32(plen); + tx->hdr.pbc[1] = cpu_to_le32(control); + memcpy(&tx->hdr.hdr, hdr, hdrwords << 2); + tx->txreq.sg_count = ndesc; + tx->map_len = (hdrwords + 2) << 2; + tx->txreq.map_addr = &tx->hdr; + ret = ipath_sdma_verbs_send(dd, ss, dwords, tx); + if (ret) { + /* save ss and length in dwords */ + tx->ss = ss; + tx->len = dwords; + qp->s_tx = tx; + } + goto bail; + } + + /* Allocate a buffer and copy the header and payload to it. */ + tx->map_len = (plen + 1) << 2; + piobuf = kmalloc(tx->map_len, GFP_ATOMIC); + if (unlikely(piobuf == NULL)) { + ret = -EBUSY; + goto err_tx; + } + tx->txreq.map_addr = piobuf; + tx->txreq.flags |= IPATH_SDMA_TXREQ_F_FREEBUF; + tx->txreq.sg_count = 1; + + *piobuf++ = (__force u32) cpu_to_le32(plen); + *piobuf++ = (__force u32) cpu_to_le32(control); + memcpy(piobuf, hdr, hdrwords << 2); + ipath_copy_from_sge(piobuf + hdrwords, ss, len); + + ret = ipath_sdma_verbs_send(dd, NULL, 0, tx); + /* + * If we couldn't queue the DMA request, save the info + * and try again later rather than destroying the + * buffer and undoing the side effects of the copy. + */ + if (ret) { + tx->ss = NULL; + tx->len = 0; + qp->s_tx = tx; + } + dev->n_unaligned++; + goto bail; + +err_tx: + if (atomic_dec_and_test(&qp->refcount)) + wake_up(&qp->wait); + put_txreq(dev, tx); +bail: + return ret; +} + +static int ipath_verbs_send_pio(struct ipath_qp *qp, + struct ipath_ib_header *ibhdr, u32 hdrwords, struct ipath_sge_state *ss, u32 len, u32 plen, u32 dwords) { struct ipath_devdata *dd = to_idev(qp->ibqp.device)->dd; + u32 *hdr = (u32 *) ibhdr; u32 __iomem *piobuf; unsigned flush_wc; + u32 control; int ret; piobuf = ipath_getpiobuf(dd, plen, NULL); @@ -882,11 +1187,23 @@ static int ipath_verbs_send_pio(struct ipath_qp *qp, u32 *hdr, u32 hdrwords, } /* - * Write len to control qword, no flags. + * Get the saved delay count we computed for the previous packet + * and save the delay count for this packet to be used next time + * we get here. + */ + control = qp->s_pkt_delay; + qp->s_pkt_delay = ipath_pkt_delay(plen, dd->delay_mult, qp->s_dmult); + + /* VL15 packets bypass credit check */ + if ((be16_to_cpu(ibhdr->lrh[0]) >> 12) == 15) + control |= 1ULL << 31; + + /* + * Write the length to the control qword plus any needed flags. * We have to flush after the PBC for correctness on some cpus * or WC buffer can be written out of order. */ - writeq(plen, piobuf); + writeq(((u64) control << 32) | plen, piobuf); piobuf += 2; flush_wc = dd->ipath_flags & IPATH_PIO_FLUSH_WC; @@ -961,15 +1278,25 @@ int ipath_verbs_send(struct ipath_qp *qp, struct ipath_ib_header *hdr, */ plen = hdrwords + dwords + 1; - /* Drop non-VL15 packets if we are not in the active state */ - if (!(dd->ipath_flags & IPATH_LINKACTIVE) && - qp->ibqp.qp_type != IB_QPT_SMI) { + /* + * VL15 packets (IB_QPT_SMI) will always use PIO, so we + * can defer SDMA restart until link goes ACTIVE without + * worrying about just how we got there. + */ + if (qp->ibqp.qp_type == IB_QPT_SMI) + ret = ipath_verbs_send_pio(qp, hdr, hdrwords, ss, len, + plen, dwords); + /* All non-VL15 packets are dropped if link is not ACTIVE */ + else if (!(dd->ipath_flags & IPATH_LINKACTIVE)) { if (qp->s_wqe) ipath_send_complete(qp, qp->s_wqe, IB_WC_SUCCESS); ret = 0; - } else - ret = ipath_verbs_send_pio(qp, (u32 *) hdr, hdrwords, - ss, len, plen, dwords); + } else if (dd->ipath_flags & IPATH_HAS_SEND_DMA) + ret = ipath_verbs_send_dma(qp, hdr, hdrwords, ss, len, + plen, dwords); + else + ret = ipath_verbs_send_pio(qp, hdr, hdrwords, ss, len, + plen, dwords); return ret; } @@ -1038,6 +1365,12 @@ int ipath_get_counters(struct ipath_devdata *dd, ipath_snap_cntr(dd, crp->cr_errlpcrccnt) + ipath_snap_cntr(dd, crp->cr_badformatcnt) + dd->ipath_rxfc_unsupvl_errs; + if (crp->cr_rxotherlocalphyerrcnt) + cntrs->port_rcv_errors += + ipath_snap_cntr(dd, crp->cr_rxotherlocalphyerrcnt); + if (crp->cr_rxvlerrcnt) + cntrs->port_rcv_errors += + ipath_snap_cntr(dd, crp->cr_rxvlerrcnt); cntrs->port_rcv_remphys_errors = ipath_snap_cntr(dd, crp->cr_rcvebpcnt); cntrs->port_xmit_discards = ipath_snap_cntr(dd, crp->cr_unsupvlcnt); @@ -1046,9 +1379,16 @@ int ipath_get_counters(struct ipath_devdata *dd, cntrs->port_xmit_packets = ipath_snap_cntr(dd, crp->cr_pktsendcnt); cntrs->port_rcv_packets = ipath_snap_cntr(dd, crp->cr_pktrcvcnt); cntrs->local_link_integrity_errors = - (dd->ipath_flags & IPATH_GPIO_ERRINTRS) ? - dd->ipath_lli_errs : dd->ipath_lli_errors; - cntrs->excessive_buffer_overrun_errors = dd->ipath_overrun_thresh_errs; + crp->cr_locallinkintegrityerrcnt ? + ipath_snap_cntr(dd, crp->cr_locallinkintegrityerrcnt) : + ((dd->ipath_flags & IPATH_GPIO_ERRINTRS) ? + dd->ipath_lli_errs : dd->ipath_lli_errors); + cntrs->excessive_buffer_overrun_errors = + crp->cr_excessbufferovflcnt ? + ipath_snap_cntr(dd, crp->cr_excessbufferovflcnt) : + dd->ipath_overrun_thresh_errs; + cntrs->vl15_dropped = crp->cr_vl15droppedpktcnt ? + ipath_snap_cntr(dd, crp->cr_vl15droppedpktcnt) : 0; ret = 0; @@ -1396,6 +1736,7 @@ static struct ib_ah *ipath_create_ah(struct ib_pd *pd, /* ib_create_ah() will initialize ah->ibah. */ ah->attr = *ah_attr; + ah->attr.static_rate = ipath_ib_rate_to_mult(ah_attr->static_rate); ret = &ah->ibah; @@ -1429,6 +1770,7 @@ static int ipath_query_ah(struct ib_ah *ibah, struct ib_ah_attr *ah_attr) struct ipath_ah *ah = to_iah(ibah); *ah_attr = ah->attr; + ah_attr->static_rate = ipath_mult_to_ib_rate(ah->attr.static_rate); return 0; } @@ -1578,6 +1920,8 @@ int ipath_register_ib_device(struct ipath_devdata *dd) struct ipath_verbs_counters cntrs; struct ipath_ibdev *idev; struct ib_device *dev; + struct ipath_verbs_txreq *tx; + unsigned i; int ret; idev = (struct ipath_ibdev *)ib_alloc_device(sizeof *idev); @@ -1588,6 +1932,17 @@ int ipath_register_ib_device(struct ipath_devdata *dd) dev = &idev->ibdev; + if (dd->ipath_sdma_descq_cnt) { + tx = kmalloc(dd->ipath_sdma_descq_cnt * sizeof *tx, + GFP_KERNEL); + if (tx == NULL) { + ret = -ENOMEM; + goto err_tx; + } + } else + tx = NULL; + idev->txreq_bufs = tx; + /* Only need to initialize non-zero fields. */ spin_lock_init(&idev->n_pds_lock); spin_lock_init(&idev->n_ahs_lock); @@ -1628,6 +1983,7 @@ int ipath_register_ib_device(struct ipath_devdata *dd) INIT_LIST_HEAD(&idev->pending[2]); INIT_LIST_HEAD(&idev->piowait); INIT_LIST_HEAD(&idev->rnrwait); + INIT_LIST_HEAD(&idev->txreq_free); idev->pending_index = 0; idev->port_cap_flags = IB_PORT_SYS_IMAGE_GUID_SUP | IB_PORT_CLIENT_REG_SUP; @@ -1659,6 +2015,9 @@ int ipath_register_ib_device(struct ipath_devdata *dd) cntrs.excessive_buffer_overrun_errors; idev->z_vl15_dropped = cntrs.vl15_dropped; + for (i = 0; i < dd->ipath_sdma_descq_cnt; i++, tx++) + list_add(&tx->txreq.list, &idev->txreq_free); + /* * The system image GUID is supposed to be the same for all * IB HCAs in a single system but since there can be other @@ -1708,6 +2067,7 @@ int ipath_register_ib_device(struct ipath_devdata *dd) dev->phys_port_cnt = 1; dev->num_comp_vectors = 1; dev->dma_device = &dd->pcidev->dev; + dev->class_dev.dev = dev->dma_device; dev->query_device = ipath_query_device; dev->modify_device = ipath_modify_device; dev->query_port = ipath_query_port; @@ -1772,6 +2132,8 @@ err_reg: err_lk: kfree(idev->qp_table.table); err_qp: + kfree(idev->txreq_bufs); +err_tx: ib_dealloc_device(dev); ipath_dev_err(dd, "cannot register verbs: %d!\n", -ret); idev = NULL; @@ -1806,6 +2168,7 @@ void ipath_unregister_ib_device(struct ipath_ibdev *dev) ipath_free_all_qps(&dev->qp_table); kfree(dev->qp_table.table); kfree(dev->lk_table.table); + kfree(dev->txreq_bufs); ib_dealloc_device(ibdev); } @@ -1853,13 +2216,15 @@ static ssize_t show_stats(struct class_device *cdev, char *buf) "RC stalls %d\n" "piobuf wait %d\n" "no piobuf %d\n" + "unaligned %d\n" "PKT drops %d\n" "WQE errs %d\n", dev->n_rc_resends, dev->n_rc_qacks, dev->n_rc_acks, dev->n_seq_naks, dev->n_rdma_seq, dev->n_rnr_naks, dev->n_other_naks, dev->n_timeouts, dev->n_rdma_dup_busy, dev->n_rc_stalls, dev->n_piowait, - dev->n_no_piobuf, dev->n_pkt_drops, dev->n_wqe_errs); + dev->n_no_piobuf, dev->n_unaligned, + dev->n_pkt_drops, dev->n_wqe_errs); for (i = 0; i < ARRAY_SIZE(dev->opstats); i++) { const struct ipath_opcode_stats *si = &dev->opstats[i]; -- cgit v1.2.3