aboutsummaryrefslogtreecommitdiff
path: root/drivers/ata/libata-eh.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@woody.linux-foundation.org>2007-02-26 14:58:03 -0800
committerLinus Torvalds <torvalds@woody.linux-foundation.org>2007-02-26 14:58:03 -0800
commit6f366c1c751454df3d1c0f25f15ee0164821112a (patch)
treea609adb9ead12ec0df5dcadaa11133f92a442466 /drivers/ata/libata-eh.c
parent221dee285ee38099b82437531bcae9fa9cb64cc4 (diff)
parentcb48cab7f363014e0a5dc21f7b4892c15d626d41 (diff)
Merge branch 'upstream-linus' of master.kernel.org:/pub/scm/linux/kernel/git/jgarzik/libata-dev
* 'upstream-linus' of master.kernel.org:/pub/scm/linux/kernel/git/jgarzik/libata-dev: (51 commits) [libata] bump versions [libata] Trim trailing whitespace. [libata] sata_mv: Fix 50xx irq mask [libata] sata_mv: don't touch reserved bits in EDMA config register libata: Use new id_to_dma_mode function to tidy reporting in more drivers (minimally tested) pata_pcmcia: Fix oops in 2.6.21-rc1 Add id_to_dma_mode function for printing DMA modes sata_promise: simplify port setup sata_promise: fix 20619 new EH merge error [libata] ACPI: remove needless ->qc_issue hook existence test sata_vsc: refactor vsc_sata_interrupt and hook up error handling sata_sil: ignore and clear spurious IRQs while executing commands by polling sata_mv: fix pci_enable_msi() error handling pata_amd: fix an obvious bug in cable detection [libata] ata_piix: remove duplicate PCI IDs sata_nv: complain on spurious completion notifiers libata: test major version in ata_id_is_sata() sata_nv: kill old private BMDMA helper functions libata: fix remaining ap->id ahci: consider SDB FIS containing spurious NCQ completions HSM violation (regenerated) ...
Diffstat (limited to 'drivers/ata/libata-eh.c')
-rw-r--r--drivers/ata/libata-eh.c220
1 files changed, 144 insertions, 76 deletions
diff --git a/drivers/ata/libata-eh.c b/drivers/ata/libata-eh.c
index 52c85af7fe9..cad0d6db6df 100644
--- a/drivers/ata/libata-eh.c
+++ b/drivers/ata/libata-eh.c
@@ -44,6 +44,12 @@
#include "libata.h"
+enum {
+ ATA_EH_SPDN_NCQ_OFF = (1 << 0),
+ ATA_EH_SPDN_SPEED_DOWN = (1 << 1),
+ ATA_EH_SPDN_FALLBACK_TO_PIO = (1 << 2),
+};
+
static void __ata_port_freeze(struct ata_port *ap);
static void ata_eh_finish(struct ata_port *ap);
static void ata_eh_handle_port_suspend(struct ata_port *ap);
@@ -65,12 +71,9 @@ static void ata_ering_record(struct ata_ering *ering, int is_io,
ent->timestamp = get_jiffies_64();
}
-static struct ata_ering_entry * ata_ering_top(struct ata_ering *ering)
+static void ata_ering_clear(struct ata_ering *ering)
{
- struct ata_ering_entry *ent = &ering->ring[ering->cursor];
- if (!ent->err_mask)
- return NULL;
- return ent;
+ memset(ering, 0, sizeof(*ering));
}
static int ata_ering_map(struct ata_ering *ering,
@@ -585,7 +588,7 @@ static void __ata_port_freeze(struct ata_port *ap)
ap->pflags |= ATA_PFLAG_FROZEN;
- DPRINTK("ata%u port frozen\n", ap->id);
+ DPRINTK("ata%u port frozen\n", ap->print_id);
}
/**
@@ -658,7 +661,7 @@ void ata_eh_thaw_port(struct ata_port *ap)
spin_unlock_irqrestore(ap->lock, flags);
- DPRINTK("ata%u port thawed\n", ap->id);
+ DPRINTK("ata%u port thawed\n", ap->print_id);
}
static void ata_eh_scsidone(struct scsi_cmnd *scmd)
@@ -1159,87 +1162,99 @@ static unsigned int ata_eh_analyze_tf(struct ata_queued_cmd *qc,
return action;
}
-static int ata_eh_categorize_ering_entry(struct ata_ering_entry *ent)
+static int ata_eh_categorize_error(int is_io, unsigned int err_mask)
{
- if (ent->err_mask & (AC_ERR_ATA_BUS | AC_ERR_TIMEOUT))
+ if (err_mask & AC_ERR_ATA_BUS)
return 1;
- if (ent->is_io) {
- if (ent->err_mask & AC_ERR_HSM)
- return 1;
- if ((ent->err_mask &
- (AC_ERR_DEV|AC_ERR_MEDIA|AC_ERR_INVALID)) == AC_ERR_DEV)
+ if (err_mask & AC_ERR_TIMEOUT)
+ return 2;
+
+ if (is_io) {
+ if (err_mask & AC_ERR_HSM)
return 2;
+ if ((err_mask &
+ (AC_ERR_DEV|AC_ERR_MEDIA|AC_ERR_INVALID)) == AC_ERR_DEV)
+ return 3;
}
return 0;
}
-struct speed_down_needed_arg {
+struct speed_down_verdict_arg {
u64 since;
- int nr_errors[3];
+ int nr_errors[4];
};
-static int speed_down_needed_cb(struct ata_ering_entry *ent, void *void_arg)
+static int speed_down_verdict_cb(struct ata_ering_entry *ent, void *void_arg)
{
- struct speed_down_needed_arg *arg = void_arg;
+ struct speed_down_verdict_arg *arg = void_arg;
+ int cat = ata_eh_categorize_error(ent->is_io, ent->err_mask);
if (ent->timestamp < arg->since)
return -1;
- arg->nr_errors[ata_eh_categorize_ering_entry(ent)]++;
+ arg->nr_errors[cat]++;
return 0;
}
/**
- * ata_eh_speed_down_needed - Determine wheter speed down is necessary
+ * ata_eh_speed_down_verdict - Determine speed down verdict
* @dev: Device of interest
*
* This function examines error ring of @dev and determines
- * whether speed down is necessary. Speed down is necessary if
- * there have been more than 3 of Cat-1 errors or 10 of Cat-2
- * errors during last 15 minutes.
+ * whether NCQ needs to be turned off, transfer speed should be
+ * stepped down, or falling back to PIO is necessary.
*
- * Cat-1 errors are ATA_BUS, TIMEOUT for any command and HSM
- * violation for known supported commands.
+ * Cat-1 is ATA_BUS error for any command.
*
- * Cat-2 errors are unclassified DEV error for known supported
+ * Cat-2 is TIMEOUT for any command or HSM violation for known
+ * supported commands.
+ *
+ * Cat-3 is is unclassified DEV error for known supported
* command.
*
+ * NCQ needs to be turned off if there have been more than 3
+ * Cat-2 + Cat-3 errors during last 10 minutes.
+ *
+ * Speed down is necessary if there have been more than 3 Cat-1 +
+ * Cat-2 errors or 10 Cat-3 errors during last 10 minutes.
+ *
+ * Falling back to PIO mode is necessary if there have been more
+ * than 10 Cat-1 + Cat-2 + Cat-3 errors during last 5 minutes.
+ *
* LOCKING:
* Inherited from caller.
*
* RETURNS:
- * 1 if speed down is necessary, 0 otherwise
+ * OR of ATA_EH_SPDN_* flags.
*/
-static int ata_eh_speed_down_needed(struct ata_device *dev)
+static unsigned int ata_eh_speed_down_verdict(struct ata_device *dev)
{
- const u64 interval = 15LLU * 60 * HZ;
- static const int err_limits[3] = { -1, 3, 10 };
- struct speed_down_needed_arg arg;
- struct ata_ering_entry *ent;
- int err_cat;
- u64 j64;
+ const u64 j5mins = 5LLU * 60 * HZ, j10mins = 10LLU * 60 * HZ;
+ u64 j64 = get_jiffies_64();
+ struct speed_down_verdict_arg arg;
+ unsigned int verdict = 0;
- ent = ata_ering_top(&dev->ering);
- if (!ent)
- return 0;
+ /* scan past 10 mins of error history */
+ memset(&arg, 0, sizeof(arg));
+ arg.since = j64 - min(j64, j10mins);
+ ata_ering_map(&dev->ering, speed_down_verdict_cb, &arg);
- err_cat = ata_eh_categorize_ering_entry(ent);
- if (err_cat == 0)
- return 0;
+ if (arg.nr_errors[2] + arg.nr_errors[3] > 3)
+ verdict |= ATA_EH_SPDN_NCQ_OFF;
+ if (arg.nr_errors[1] + arg.nr_errors[2] > 3 || arg.nr_errors[3] > 10)
+ verdict |= ATA_EH_SPDN_SPEED_DOWN;
+ /* scan past 3 mins of error history */
memset(&arg, 0, sizeof(arg));
+ arg.since = j64 - min(j64, j5mins);
+ ata_ering_map(&dev->ering, speed_down_verdict_cb, &arg);
- j64 = get_jiffies_64();
- if (j64 >= interval)
- arg.since = j64 - interval;
- else
- arg.since = 0;
-
- ata_ering_map(&dev->ering, speed_down_needed_cb, &arg);
+ if (arg.nr_errors[1] + arg.nr_errors[2] + arg.nr_errors[3] > 10)
+ verdict |= ATA_EH_SPDN_FALLBACK_TO_PIO;
- return arg.nr_errors[err_cat] > err_limits[err_cat];
+ return verdict;
}
/**
@@ -1257,31 +1272,80 @@ static int ata_eh_speed_down_needed(struct ata_device *dev)
* Kernel thread context (may sleep).
*
* RETURNS:
- * 0 on success, -errno otherwise
+ * Determined recovery action.
*/
-static int ata_eh_speed_down(struct ata_device *dev, int is_io,
- unsigned int err_mask)
+static unsigned int ata_eh_speed_down(struct ata_device *dev, int is_io,
+ unsigned int err_mask)
{
- if (!err_mask)
+ unsigned int verdict;
+ unsigned int action = 0;
+
+ /* don't bother if Cat-0 error */
+ if (ata_eh_categorize_error(is_io, err_mask) == 0)
return 0;
/* record error and determine whether speed down is necessary */
ata_ering_record(&dev->ering, is_io, err_mask);
+ verdict = ata_eh_speed_down_verdict(dev);
- if (!ata_eh_speed_down_needed(dev))
- return 0;
+ /* turn off NCQ? */
+ if ((verdict & ATA_EH_SPDN_NCQ_OFF) &&
+ (dev->flags & (ATA_DFLAG_PIO | ATA_DFLAG_NCQ |
+ ATA_DFLAG_NCQ_OFF)) == ATA_DFLAG_NCQ) {
+ dev->flags |= ATA_DFLAG_NCQ_OFF;
+ ata_dev_printk(dev, KERN_WARNING,
+ "NCQ disabled due to excessive errors\n");
+ goto done;
+ }
- /* speed down SATA link speed if possible */
- if (sata_down_spd_limit(dev->ap) == 0)
- return ATA_EH_HARDRESET;
+ /* speed down? */
+ if (verdict & ATA_EH_SPDN_SPEED_DOWN) {
+ /* speed down SATA link speed if possible */
+ if (sata_down_spd_limit(dev->ap) == 0) {
+ action |= ATA_EH_HARDRESET;
+ goto done;
+ }
- /* lower transfer mode */
- if (ata_down_xfermask_limit(dev, 0) == 0)
- return ATA_EH_SOFTRESET;
+ /* lower transfer mode */
+ if (dev->spdn_cnt < 2) {
+ static const int dma_dnxfer_sel[] =
+ { ATA_DNXFER_DMA, ATA_DNXFER_40C };
+ static const int pio_dnxfer_sel[] =
+ { ATA_DNXFER_PIO, ATA_DNXFER_FORCE_PIO0 };
+ int sel;
+
+ if (dev->xfer_shift != ATA_SHIFT_PIO)
+ sel = dma_dnxfer_sel[dev->spdn_cnt];
+ else
+ sel = pio_dnxfer_sel[dev->spdn_cnt];
+
+ dev->spdn_cnt++;
+
+ if (ata_down_xfermask_limit(dev, sel) == 0) {
+ action |= ATA_EH_SOFTRESET;
+ goto done;
+ }
+ }
+ }
+
+ /* Fall back to PIO? Slowing down to PIO is meaningless for
+ * SATA. Consider it only for PATA.
+ */
+ if ((verdict & ATA_EH_SPDN_FALLBACK_TO_PIO) && (dev->spdn_cnt >= 2) &&
+ (dev->ap->cbl != ATA_CBL_SATA) &&
+ (dev->xfer_shift != ATA_SHIFT_PIO)) {
+ if (ata_down_xfermask_limit(dev, ATA_DNXFER_FORCE_PIO) == 0) {
+ dev->spdn_cnt = 0;
+ action |= ATA_EH_SOFTRESET;
+ goto done;
+ }
+ }
- ata_dev_printk(dev, KERN_ERR,
- "speed down requested but no transfer mode left\n");
return 0;
+ done:
+ /* device has been slowed down, blow error history */
+ ata_ering_clear(&dev->ering);
+ return action;
}
/**
@@ -1964,7 +2028,7 @@ static int ata_eh_recover(struct ata_port *ap, ata_prereset_fn_t prereset,
{
struct ata_eh_context *ehc = &ap->eh_context;
struct ata_device *dev;
- int down_xfermask, i, rc;
+ int i, rc;
DPRINTK("ENTER\n");
@@ -1993,7 +2057,6 @@ static int ata_eh_recover(struct ata_port *ap, ata_prereset_fn_t prereset,
}
retry:
- down_xfermask = 0;
rc = 0;
/* if UNLOADING, finish immediately */
@@ -2038,10 +2101,8 @@ static int ata_eh_recover(struct ata_port *ap, ata_prereset_fn_t prereset,
/* configure transfer mode if necessary */
if (ehc->i.flags & ATA_EHI_SETMODE) {
rc = ata_set_mode(ap, &dev);
- if (rc) {
- down_xfermask = 1;
+ if (rc)
goto dev_fail;
- }
ehc->i.flags &= ~ATA_EHI_SETMODE;
}
@@ -2053,20 +2114,27 @@ static int ata_eh_recover(struct ata_port *ap, ata_prereset_fn_t prereset,
goto out;
dev_fail:
+ ehc->tries[dev->devno]--;
+
switch (rc) {
- case -ENODEV:
- /* device missing, schedule probing */
- ehc->i.probe_mask |= (1 << dev->devno);
case -EINVAL:
+ /* eeek, something went very wrong, give up */
ehc->tries[dev->devno] = 0;
break;
+
+ case -ENODEV:
+ /* device missing or wrong IDENTIFY data, schedule probing */
+ ehc->i.probe_mask |= (1 << dev->devno);
+ /* give it just one more chance */
+ ehc->tries[dev->devno] = min(ehc->tries[dev->devno], 1);
case -EIO:
- sata_down_spd_limit(ap);
- default:
- ehc->tries[dev->devno]--;
- if (down_xfermask &&
- ata_down_xfermask_limit(dev, ehc->tries[dev->devno] == 1))
- ehc->tries[dev->devno] = 0;
+ if (ehc->tries[dev->devno] == 1) {
+ /* This is the last chance, better to slow
+ * down than lose it.
+ */
+ sata_down_spd_limit(ap);
+ ata_down_xfermask_limit(dev, ATA_DNXFER_PIO);
+ }
}
if (ata_dev_enabled(dev) && !ehc->tries[dev->devno]) {