diff options
-rw-r--r-- | drivers/usb/host/xhci-hcd.c | 52 | ||||
-rw-r--r-- | drivers/usb/host/xhci-mem.c | 15 | ||||
-rw-r--r-- | drivers/usb/host/xhci-ring.c | 170 | ||||
-rw-r--r-- | drivers/usb/host/xhci.h | 22 |
4 files changed, 243 insertions, 16 deletions
diff --git a/drivers/usb/host/xhci-hcd.c b/drivers/usb/host/xhci-hcd.c index 5839453d342..0d5a8564ed1 100644 --- a/drivers/usb/host/xhci-hcd.c +++ b/drivers/usb/host/xhci-hcd.c @@ -246,8 +246,14 @@ static void xhci_work(struct xhci_hcd *xhci) /* Flush posted writes */ xhci_readl(xhci, &xhci->ir_set->irq_pending); - /* FIXME this should be a delayed service routine that clears the EHB */ - xhci_handle_event(xhci); + if (xhci->xhc_state & XHCI_STATE_DYING) + xhci_dbg(xhci, "xHCI dying, ignoring interrupt. " + "Shouldn't IRQs be disabled?\n"); + else + /* FIXME this should be a delayed service routine + * that clears the EHB. + */ + xhci_handle_event(xhci); /* Clear the event handler busy flag (RW1C); the event ring should be empty. */ temp_64 = xhci_read_64(xhci, &xhci->ir_set->erst_dequeue); @@ -320,7 +326,7 @@ void xhci_event_ring_work(unsigned long arg) spin_lock_irqsave(&xhci->lock, flags); temp = xhci_readl(xhci, &xhci->op_regs->status); xhci_dbg(xhci, "op reg status = 0x%x\n", temp); - if (temp == 0xffffffff) { + if (temp == 0xffffffff || (xhci->xhc_state & XHCI_STATE_DYING)) { xhci_dbg(xhci, "HW died, polling stopped.\n"); spin_unlock_irqrestore(&xhci->lock, flags); return; @@ -710,16 +716,22 @@ int xhci_urb_enqueue(struct usb_hcd *hcd, struct urb *urb, gfp_t mem_flags) * atomic context to this function, which may allocate memory. */ spin_lock_irqsave(&xhci->lock, flags); + if (xhci->xhc_state & XHCI_STATE_DYING) + goto dying; ret = xhci_queue_ctrl_tx(xhci, GFP_ATOMIC, urb, slot_id, ep_index); spin_unlock_irqrestore(&xhci->lock, flags); } else if (usb_endpoint_xfer_bulk(&urb->ep->desc)) { spin_lock_irqsave(&xhci->lock, flags); + if (xhci->xhc_state & XHCI_STATE_DYING) + goto dying; ret = xhci_queue_bulk_tx(xhci, GFP_ATOMIC, urb, slot_id, ep_index); spin_unlock_irqrestore(&xhci->lock, flags); } else if (usb_endpoint_xfer_int(&urb->ep->desc)) { spin_lock_irqsave(&xhci->lock, flags); + if (xhci->xhc_state & XHCI_STATE_DYING) + goto dying; ret = xhci_queue_intr_tx(xhci, GFP_ATOMIC, urb, slot_id, ep_index); spin_unlock_irqrestore(&xhci->lock, flags); @@ -728,6 +740,12 @@ int xhci_urb_enqueue(struct usb_hcd *hcd, struct urb *urb, gfp_t mem_flags) } exit: return ret; +dying: + xhci_dbg(xhci, "Ep 0x%x: URB %p submitted for " + "non-responsive xHCI host.\n", + urb->ep->desc.bEndpointAddress, urb); + spin_unlock_irqrestore(&xhci->lock, flags); + return -ESHUTDOWN; } /* @@ -789,6 +807,17 @@ int xhci_urb_dequeue(struct usb_hcd *hcd, struct urb *urb, int status) kfree(td); return ret; } + if (xhci->xhc_state & XHCI_STATE_DYING) { + xhci_dbg(xhci, "Ep 0x%x: URB %p to be canceled on " + "non-responsive xHCI host.\n", + urb->ep->desc.bEndpointAddress, urb); + /* Let the stop endpoint command watchdog timer (which set this + * state) finish cleaning up the endpoint TD lists. We must + * have caught it in the middle of dropping a lock and giving + * back an URB. + */ + goto done; + } xhci_dbg(xhci, "Cancel URB %p\n", urb); xhci_dbg(xhci, "Event ring:\n"); @@ -806,6 +835,10 @@ int xhci_urb_dequeue(struct usb_hcd *hcd, struct urb *urb, int status) */ if (!(ep->ep_state & EP_HALT_PENDING)) { ep->ep_state |= EP_HALT_PENDING; + ep->stop_cmds_pending++; + ep->stop_cmd_timer.expires = jiffies + + XHCI_STOP_EP_CMD_TIMEOUT * HZ; + add_timer(&ep->stop_cmd_timer); xhci_queue_stop_endpoint(xhci, urb->dev->slot_id, ep_index); xhci_ring_cmd_db(xhci); } @@ -1410,16 +1443,27 @@ void xhci_endpoint_reset(struct usb_hcd *hcd, void xhci_free_dev(struct usb_hcd *hcd, struct usb_device *udev) { struct xhci_hcd *xhci = hcd_to_xhci(hcd); + struct xhci_virt_device *virt_dev; unsigned long flags; u32 state; + int i; if (udev->slot_id == 0) return; + virt_dev = xhci->devs[udev->slot_id]; + if (!virt_dev) + return; + + /* Stop any wayward timer functions (which may grab the lock) */ + for (i = 0; i < 31; ++i) { + virt_dev->eps[i].ep_state &= ~EP_HALT_PENDING; + del_timer_sync(&virt_dev->eps[i].stop_cmd_timer); + } spin_lock_irqsave(&xhci->lock, flags); /* Don't disable the slot if the host controller is dead. */ state = xhci_readl(xhci, &xhci->op_regs->status); - if (state == 0xffffffff) { + if (state == 0xffffffff || (xhci->xhc_state & XHCI_STATE_DYING)) { xhci_free_virt_device(xhci, udev->slot_id); spin_unlock_irqrestore(&xhci->lock, flags); return; diff --git a/drivers/usb/host/xhci-mem.c b/drivers/usb/host/xhci-mem.c index b8fd270a8b0..fd05247b7bb 100644 --- a/drivers/usb/host/xhci-mem.c +++ b/drivers/usb/host/xhci-mem.c @@ -248,6 +248,15 @@ struct xhci_ep_ctx *xhci_get_ep_ctx(struct xhci_hcd *xhci, (ctx->bytes + (ep_index * CTX_SIZE(xhci->hcc_params))); } +static void xhci_init_endpoint_timer(struct xhci_hcd *xhci, + struct xhci_virt_ep *ep) +{ + init_timer(&ep->stop_cmd_timer); + ep->stop_cmd_timer.data = (unsigned long) ep; + ep->stop_cmd_timer.function = xhci_stop_endpoint_command_watchdog; + ep->xhci = xhci; +} + /* All the xhci_tds in the ring's TD list should be freed at this point */ void xhci_free_virt_device(struct xhci_hcd *xhci, int slot_id) { @@ -309,9 +318,11 @@ int xhci_alloc_virt_device(struct xhci_hcd *xhci, int slot_id, xhci_dbg(xhci, "Slot %d input ctx = 0x%llx (dma)\n", slot_id, (unsigned long long)dev->in_ctx->dma); - /* Initialize the cancellation list for each endpoint */ - for (i = 0; i < 31; i++) + /* Initialize the cancellation list and watchdog timers for each ep */ + for (i = 0; i < 31; i++) { + xhci_init_endpoint_timer(xhci, &dev->eps[i]); INIT_LIST_HEAD(&dev->eps[i].cancelled_td_list); + } /* Allocate endpoint 0 ring */ dev->eps[0].ring = xhci_ring_alloc(xhci, 1, true, flags); diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c index 184e8b6f30b..9541e88df68 100644 --- a/drivers/usb/host/xhci-ring.c +++ b/drivers/usb/host/xhci-ring.c @@ -475,6 +475,35 @@ void xhci_queue_new_dequeue_state(struct xhci_hcd *xhci, ep->ep_state |= SET_DEQ_PENDING; } +static inline void xhci_stop_watchdog_timer_in_irq(struct xhci_hcd *xhci, + struct xhci_virt_ep *ep) +{ + ep->ep_state &= ~EP_HALT_PENDING; + /* Can't del_timer_sync in interrupt, so we attempt to cancel. If the + * timer is running on another CPU, we don't decrement stop_cmds_pending + * (since we didn't successfully stop the watchdog timer). + */ + if (del_timer(&ep->stop_cmd_timer)) + ep->stop_cmds_pending--; +} + +/* Must be called with xhci->lock held in interrupt context */ +static void xhci_giveback_urb_in_irq(struct xhci_hcd *xhci, + struct xhci_td *cur_td, int status, char *adjective) +{ + struct usb_hcd *hcd = xhci_to_hcd(xhci); + + cur_td->urb->hcpriv = NULL; + usb_hcd_unlink_urb_from_ep(hcd, cur_td->urb); + xhci_dbg(xhci, "Giveback %s URB %p\n", adjective, cur_td->urb); + + spin_unlock(&xhci->lock); + usb_hcd_giveback_urb(hcd, cur_td->urb, status); + kfree(cur_td); + spin_lock(&xhci->lock); + xhci_dbg(xhci, "%s URB given back\n", adjective); +} + /* * When we get a command completion for a Stop Endpoint Command, we need to * unlink any cancelled TDs from the ring. There are two ways to do that: @@ -508,7 +537,7 @@ static void handle_stopped_endpoint(struct xhci_hcd *xhci, ep_ring = ep->ring; if (list_empty(&ep->cancelled_td_list)) { - ep->ep_state &= ~EP_HALT_PENDING; + xhci_stop_watchdog_timer_in_irq(xhci, ep); ring_ep_doorbell(xhci, slot_id, ep_index); return; } @@ -540,7 +569,7 @@ static void handle_stopped_endpoint(struct xhci_hcd *xhci, list_del(&cur_td->td_list); } last_unlinked_td = cur_td; - ep->ep_state &= ~EP_HALT_PENDING; + xhci_stop_watchdog_timer_in_irq(xhci, ep); /* If necessary, queue a Set Transfer Ring Dequeue Pointer command */ if (deq_state.new_deq_ptr && deq_state.new_deq_seg) { @@ -568,23 +597,136 @@ static void handle_stopped_endpoint(struct xhci_hcd *xhci, hcd_stat_update(xhci->tp_stat, cur_td->urb->actual_length, ktime_sub(stop_time, cur_td->start_time)); #endif - cur_td->urb->hcpriv = NULL; - usb_hcd_unlink_urb_from_ep(xhci_to_hcd(xhci), cur_td->urb); - - xhci_dbg(xhci, "Giveback cancelled URB %p\n", cur_td->urb); - spin_unlock(&xhci->lock); /* Doesn't matter what we pass for status, since the core will * just overwrite it (because the URB has been unlinked). */ - usb_hcd_giveback_urb(xhci_to_hcd(xhci), cur_td->urb, 0); - kfree(cur_td); + xhci_giveback_urb_in_irq(xhci, cur_td, 0, "cancelled"); - spin_lock(&xhci->lock); + /* Stop processing the cancelled list if the watchdog timer is + * running. + */ + if (xhci->xhc_state & XHCI_STATE_DYING) + return; } while (cur_td != last_unlinked_td); /* Return to the event handler with xhci->lock re-acquired */ } +/* Watchdog timer function for when a stop endpoint command fails to complete. + * In this case, we assume the host controller is broken or dying or dead. The + * host may still be completing some other events, so we have to be careful to + * let the event ring handler and the URB dequeueing/enqueueing functions know + * through xhci->state. + * + * The timer may also fire if the host takes a very long time to respond to the + * command, and the stop endpoint command completion handler cannot delete the + * timer before the timer function is called. Another endpoint cancellation may + * sneak in before the timer function can grab the lock, and that may queue + * another stop endpoint command and add the timer back. So we cannot use a + * simple flag to say whether there is a pending stop endpoint command for a + * particular endpoint. + * + * Instead we use a combination of that flag and a counter for the number of + * pending stop endpoint commands. If the timer is the tail end of the last + * stop endpoint command, and the endpoint's command is still pending, we assume + * the host is dying. + */ +void xhci_stop_endpoint_command_watchdog(unsigned long arg) +{ + struct xhci_hcd *xhci; + struct xhci_virt_ep *ep; + struct xhci_virt_ep *temp_ep; + struct xhci_ring *ring; + struct xhci_td *cur_td; + int ret, i, j; + + ep = (struct xhci_virt_ep *) arg; + xhci = ep->xhci; + + spin_lock(&xhci->lock); + + ep->stop_cmds_pending--; + if (xhci->xhc_state & XHCI_STATE_DYING) { + xhci_dbg(xhci, "Stop EP timer ran, but another timer marked " + "xHCI as DYING, exiting.\n"); + spin_unlock(&xhci->lock); + return; + } + if (!(ep->stop_cmds_pending == 0 && (ep->ep_state & EP_HALT_PENDING))) { + xhci_dbg(xhci, "Stop EP timer ran, but no command pending, " + "exiting.\n"); + spin_unlock(&xhci->lock); + return; + } + + xhci_warn(xhci, "xHCI host not responding to stop endpoint command.\n"); + xhci_warn(xhci, "Assuming host is dying, halting host.\n"); + /* Oops, HC is dead or dying or at least not responding to the stop + * endpoint command. + */ + xhci->xhc_state |= XHCI_STATE_DYING; + /* Disable interrupts from the host controller and start halting it */ + xhci_quiesce(xhci); + spin_unlock(&xhci->lock); + + ret = xhci_halt(xhci); + + spin_lock(&xhci->lock); + if (ret < 0) { + /* This is bad; the host is not responding to commands and it's + * not allowing itself to be halted. At least interrupts are + * disabled, so we can set HC_STATE_HALT and notify the + * USB core. But if we call usb_hc_died(), it will attempt to + * disconnect all device drivers under this host. Those + * disconnect() methods will wait for all URBs to be unlinked, + * so we must complete them. + */ + xhci_warn(xhci, "Non-responsive xHCI host is not halting.\n"); + xhci_warn(xhci, "Completing active URBs anyway.\n"); + /* We could turn all TDs on the rings to no-ops. This won't + * help if the host has cached part of the ring, and is slow if + * we want to preserve the cycle bit. Skip it and hope the host + * doesn't touch the memory. + */ + } + for (i = 0; i < MAX_HC_SLOTS; i++) { + if (!xhci->devs[i]) + continue; + for (j = 0; j < 31; j++) { + temp_ep = &xhci->devs[i]->eps[j]; + ring = temp_ep->ring; + if (!ring) + continue; + xhci_dbg(xhci, "Killing URBs for slot ID %u, " + "ep index %u\n", i, j); + while (!list_empty(&ring->td_list)) { + cur_td = list_first_entry(&ring->td_list, + struct xhci_td, + td_list); + list_del(&cur_td->td_list); + if (!list_empty(&cur_td->cancelled_td_list)) + list_del(&cur_td->cancelled_td_list); + xhci_giveback_urb_in_irq(xhci, cur_td, + -ESHUTDOWN, "killed"); + } + while (!list_empty(&temp_ep->cancelled_td_list)) { + cur_td = list_first_entry( + &temp_ep->cancelled_td_list, + struct xhci_td, + cancelled_td_list); + list_del(&cur_td->cancelled_td_list); + xhci_giveback_urb_in_irq(xhci, cur_td, + -ESHUTDOWN, "killed"); + } + } + } + spin_unlock(&xhci->lock); + xhci_to_hcd(xhci)->state = HC_STATE_HALT; + xhci_dbg(xhci, "Calling usb_hc_died()\n"); + usb_hc_died(xhci_to_hcd(xhci)); + xhci_dbg(xhci, "xHCI host controller is dead.\n"); +} + /* * When we get a completion for a Set Transfer Ring Dequeue Pointer command, * we need to clear the set deq pending flag in the endpoint ring state, so that @@ -1333,6 +1475,14 @@ void xhci_handle_event(struct xhci_hcd *xhci) default: xhci->error_bitmask |= 1 << 3; } + /* Any of the above functions may drop and re-acquire the lock, so check + * to make sure a watchdog timer didn't mark the host as non-responsive. + */ + if (xhci->xhc_state & XHCI_STATE_DYING) { + xhci_dbg(xhci, "xHCI host dying, returning from " + "event handler.\n"); + return; + } if (update_ptrs) { /* Update SW and HC event ring dequeue pointer */ diff --git a/drivers/usb/host/xhci.h b/drivers/usb/host/xhci.h index af3c5638526..c92f84154fb 100644 --- a/drivers/usb/host/xhci.h +++ b/drivers/usb/host/xhci.h @@ -659,6 +659,10 @@ struct xhci_virt_ep { /* The TRB that was last reported in a stopped endpoint ring */ union xhci_trb *stopped_trb; struct xhci_td *stopped_td; + /* Watchdog timer for stop endpoint command to cancel URBs */ + struct timer_list stop_cmd_timer; + int stop_cmds_pending; + struct xhci_hcd *xhci; }; struct xhci_virt_device { @@ -1022,6 +1026,8 @@ struct xhci_scratchpad { #define ERST_ENTRIES 1 /* Poll every 60 seconds */ #define POLL_TIMEOUT 60 +/* Stop endpoint command timeout (secs) for URB cancellation watchdog timer */ +#define XHCI_STOP_EP_CMD_TIMEOUT 5 /* XXX: Make these module parameters */ @@ -1083,6 +1089,21 @@ struct xhci_hcd { struct timer_list event_ring_timer; int zombie; #endif + /* Host controller watchdog timer structures */ + unsigned int xhc_state; +/* Host controller is dying - not responding to commands. "I'm not dead yet!" + * + * xHC interrupts have been disabled and a watchdog timer will (or has already) + * halt the xHCI host, and complete all URBs with an -ESHUTDOWN code. Any code + * that sees this status (other than the timer that set it) should stop touching + * hardware immediately. Interrupt handlers should return immediately when + * they see this status (any time they drop and re-acquire xhci->lock). + * xhci_urb_dequeue() should call usb_hcd_check_unlink_urb() and return without + * putting the TD on the canceled list, etc. + * + * There are no reports of xHCI host controllers that display this issue. + */ +#define XHCI_STATE_DYING (1 << 0) /* Statistics */ int noops_submitted; int noops_handled; @@ -1279,6 +1300,7 @@ void xhci_cleanup_stalled_ring(struct xhci_hcd *xhci, void xhci_queue_config_ep_quirk(struct xhci_hcd *xhci, unsigned int slot_id, unsigned int ep_index, struct xhci_dequeue_state *deq_state); +void xhci_stop_endpoint_command_watchdog(unsigned long arg); /* xHCI roothub code */ int xhci_hub_control(struct usb_hcd *hcd, u16 typeReq, u16 wValue, u16 wIndex, |