From 29c976844d0bef07d97babc8db60fa6c46788133 Mon Sep 17 00:00:00 2001 From: "Salyzyn, Mark" Date: Tue, 12 Jun 2007 09:33:54 -0400 Subject: [SCSI] aacraid: add user initiated reset Add the ability for an application to issue a hardware reset to the adapter via sysfs. Typical uses include restarting the adapter after it has been flashed. Bumped revision number for the driver and added a feature to periodically check the adapter's health (check_interval), update the adapter's concept of time (update_interval) and block checking/resetting of the adapter (check_reset). Signed-off-by: Mark Salyzyn Signed-off-by: James Bottomley --- drivers/scsi/aacraid/aachba.c | 18 ++++ drivers/scsi/aacraid/aacraid.h | 25 ++++- drivers/scsi/aacraid/commsup.c | 210 +++++++++++++++++++++++++++++++++++++---- drivers/scsi/aacraid/linit.c | 46 ++++++++- drivers/scsi/aacraid/rx.c | 33 ++++--- 5 files changed, 294 insertions(+), 38 deletions(-) (limited to 'drivers/scsi') diff --git a/drivers/scsi/aacraid/aachba.c b/drivers/scsi/aacraid/aachba.c index ef11c18d9ba..b3081b10d0a 100644 --- a/drivers/scsi/aacraid/aachba.c +++ b/drivers/scsi/aacraid/aachba.c @@ -169,6 +169,18 @@ int acbsize = -1; module_param(acbsize, int, S_IRUGO|S_IWUSR); MODULE_PARM_DESC(acbsize, "Request a specific adapter control block (FIB) size. Valid values are 512, 2048, 4096 and 8192. Default is to use suggestion from Firmware."); +int update_interval = 30 * 60; +module_param(update_interval, int, S_IRUGO|S_IWUSR); +MODULE_PARM_DESC(update_interval, "Interval in seconds between time sync updates issued to adapter."); + +int check_interval = 24 * 60 * 60; +module_param(check_interval, int, S_IRUGO|S_IWUSR); +MODULE_PARM_DESC(check_interval, "Interval in seconds between adapter health checks."); + +int check_reset = 1; +module_param(check_reset, int, S_IRUGO|S_IWUSR); +MODULE_PARM_DESC(check_reset, "If adapter fails health check, reset the adapter."); + int expose_physicals = -1; module_param(expose_physicals, int, S_IRUGO|S_IWUSR); MODULE_PARM_DESC(expose_physicals, "Expose physical components of the arrays. -1=protect 0=off, 1=on"); @@ -1197,6 +1209,12 @@ int aac_get_adapter_info(struct aac_dev* dev) (int)sizeof(dev->supplement_adapter_info.VpdInfo.Tsid), dev->supplement_adapter_info.VpdInfo.Tsid); } + if (!check_reset || + (dev->supplement_adapter_info.SupportedOptions2 & + le32_to_cpu(AAC_OPTION_IGNORE_RESET))) { + printk(KERN_INFO "%s%d: Reset Adapter Ignored\n", + dev->name, dev->id); + } } dev->nondasd_support = 0; diff --git a/drivers/scsi/aacraid/aacraid.h b/drivers/scsi/aacraid/aacraid.h index fdbedb17d03..8abe4f97b0a 100644 --- a/drivers/scsi/aacraid/aacraid.h +++ b/drivers/scsi/aacraid/aacraid.h @@ -12,8 +12,8 @@ *----------------------------------------------------------------------------*/ #ifndef AAC_DRIVER_BUILD -# define AAC_DRIVER_BUILD 2437 -# define AAC_DRIVER_BRANCH "-mh4" +# define AAC_DRIVER_BUILD 2447 +# define AAC_DRIVER_BRANCH "-ms" #endif #define MAXIMUM_NUM_CONTAINERS 32 @@ -860,10 +860,12 @@ struct aac_supplement_adapter_info __le32 FlashFirmwareBootBuild; u8 MfgPcbaSerialNo[12]; u8 MfgWWNName[8]; - __le32 MoreFeatureBits; + __le32 SupportedOptions2; __le32 ReservedGrowth[1]; }; #define AAC_FEATURE_FALCON 0x00000010 +#define AAC_OPTION_MU_RESET 0x00000001 +#define AAC_OPTION_IGNORE_RESET 0x00000002 #define AAC_SIS_VERSION_V3 3 #define AAC_SIS_SLOT_UNKNOWN 0xFF @@ -1260,6 +1262,19 @@ struct aac_synchronize_reply { u8 data[16]; }; +#define CT_PAUSE_IO 65 +#define CT_RELEASE_IO 66 +struct aac_pause { + __le32 command; /* VM_ContainerConfig */ + __le32 type; /* CT_PAUSE_IO */ + __le32 timeout; /* 10ms ticks */ + __le32 min; + __le32 noRescan; + __le32 parm3; + __le32 parm4; + __le32 count; /* sizeof(((struct aac_pause_reply *)NULL)->data) */ +}; + struct aac_srb { __le32 function; @@ -1816,6 +1831,7 @@ int aac_queue_get(struct aac_dev * dev, u32 * index, u32 qid, struct hw_fib * hw unsigned int aac_response_normal(struct aac_queue * q); unsigned int aac_command_normal(struct aac_queue * q); unsigned int aac_intr_normal(struct aac_dev * dev, u32 Index); +int aac_reset_adapter(struct aac_dev * dev, int forced); int aac_check_health(struct aac_dev * dev); int aac_command_thread(void *data); int aac_close_fib_context(struct aac_dev * dev, struct aac_fib_context *fibctx); @@ -1835,3 +1851,6 @@ extern int aif_timeout; extern int expose_physicals; extern int aac_reset_devices; extern int aac_commit; +extern int update_interval; +extern int check_interval; +extern int check_reset; diff --git a/drivers/scsi/aacraid/commsup.c b/drivers/scsi/aacraid/commsup.c index 9aca57eda94..d510839c0bb 100644 --- a/drivers/scsi/aacraid/commsup.c +++ b/drivers/scsi/aacraid/commsup.c @@ -1021,7 +1021,7 @@ static void aac_handle_aif(struct aac_dev * dev, struct fib * fibptr) } -static int _aac_reset_adapter(struct aac_dev *aac) +static int _aac_reset_adapter(struct aac_dev *aac, int forced) { int index, quirks; int retval; @@ -1029,25 +1029,32 @@ static int _aac_reset_adapter(struct aac_dev *aac) struct scsi_device *dev; struct scsi_cmnd *command; struct scsi_cmnd *command_list; + int jafo = 0; /* * Assumptions: - * - host is locked. + * - host is locked, unless called by the aacraid thread. + * (a matter of convenience, due to legacy issues surrounding + * eh_host_adapter_reset). * - in_reset is asserted, so no new i/o is getting to the * card. - * - The card is dead. + * - The card is dead, or will be very shortly ;-/ so no new + * commands are completing in the interrupt service. */ host = aac->scsi_host_ptr; scsi_block_requests(host); aac_adapter_disable_int(aac); - spin_unlock_irq(host->host_lock); - kthread_stop(aac->thread); + if (aac->thread->pid != current->pid) { + spin_unlock_irq(host->host_lock); + kthread_stop(aac->thread); + jafo = 1; + } /* * If a positive health, means in a known DEAD PANIC * state and the adapter could be reset to `try again'. */ - retval = aac_adapter_restart(aac, aac_adapter_check_health(aac)); + retval = aac_adapter_restart(aac, forced ? 0 : aac_adapter_check_health(aac)); if (retval) goto out; @@ -1104,10 +1111,12 @@ static int _aac_reset_adapter(struct aac_dev *aac) if (aac_get_driver_ident(index)->quirks & AAC_QUIRK_31BIT) if ((retval = pci_set_dma_mask(aac->pdev, DMA_32BIT_MASK))) goto out; - aac->thread = kthread_run(aac_command_thread, aac, aac->name); - if (IS_ERR(aac->thread)) { - retval = PTR_ERR(aac->thread); - goto out; + if (jafo) { + aac->thread = kthread_run(aac_command_thread, aac, aac->name); + if (IS_ERR(aac->thread)) { + retval = PTR_ERR(aac->thread); + goto out; + } } (void)aac_get_adapter_info(aac); quirks = aac_get_driver_ident(index)->quirks; @@ -1150,7 +1159,98 @@ static int _aac_reset_adapter(struct aac_dev *aac) out: aac->in_reset = 0; scsi_unblock_requests(host); - spin_lock_irq(host->host_lock); + if (jafo) { + spin_lock_irq(host->host_lock); + } + return retval; +} + +int aac_reset_adapter(struct aac_dev * aac, int forced) +{ + unsigned long flagv = 0; + int retval; + struct Scsi_Host * host; + + if (spin_trylock_irqsave(&aac->fib_lock, flagv) == 0) + return -EBUSY; + + if (aac->in_reset) { + spin_unlock_irqrestore(&aac->fib_lock, flagv); + return -EBUSY; + } + aac->in_reset = 1; + spin_unlock_irqrestore(&aac->fib_lock, flagv); + + /* + * Wait for all commands to complete to this specific + * target (block maximum 60 seconds). Although not necessary, + * it does make us a good storage citizen. + */ + host = aac->scsi_host_ptr; + scsi_block_requests(host); + if (forced < 2) for (retval = 60; retval; --retval) { + struct scsi_device * dev; + struct scsi_cmnd * command; + int active = 0; + + __shost_for_each_device(dev, host) { + spin_lock_irqsave(&dev->list_lock, flagv); + list_for_each_entry(command, &dev->cmd_list, list) { + if (command->SCp.phase == AAC_OWNER_FIRMWARE) { + active++; + break; + } + } + spin_unlock_irqrestore(&dev->list_lock, flagv); + if (active) + break; + + } + /* + * We can exit If all the commands are complete + */ + if (active == 0) + break; + ssleep(1); + } + + /* Quiesce build, flush cache, write through mode */ + aac_send_shutdown(aac); + spin_lock_irqsave(host->host_lock, flagv); + retval = _aac_reset_adapter(aac, forced); + spin_unlock_irqrestore(host->host_lock, flagv); + + if (retval == -ENODEV) { + /* Unwind aac_send_shutdown() IOP_RESET unsupported/disabled */ + struct fib * fibctx = aac_fib_alloc(aac); + if (fibctx) { + struct aac_pause *cmd; + int status; + + aac_fib_init(fibctx); + + cmd = (struct aac_pause *) fib_data(fibctx); + + cmd->command = cpu_to_le32(VM_ContainerConfig); + cmd->type = cpu_to_le32(CT_PAUSE_IO); + cmd->timeout = cpu_to_le32(1); + cmd->min = cpu_to_le32(1); + cmd->noRescan = cpu_to_le32(1); + cmd->count = cpu_to_le32(0); + + status = aac_fib_send(ContainerCommand, + fibctx, + sizeof(struct aac_pause), + FsaNormal, + -2 /* Timeout silently */, 1, + NULL, NULL); + + if (status >= 0) + aac_fib_complete(fibctx); + aac_fib_free(fibctx); + } + } + return retval; } @@ -1270,10 +1370,15 @@ int aac_check_health(struct aac_dev * aac) printk(KERN_ERR "%s: Host adapter BLINK LED 0x%x\n", aac->name, BlinkLED); + if (!check_reset || (aac->supplement_adapter_info.SupportedOptions2 & + le32_to_cpu(AAC_OPTION_IGNORE_RESET))) + goto out; host = aac->scsi_host_ptr; - spin_lock_irqsave(host->host_lock, flagv); - BlinkLED = _aac_reset_adapter(aac); - spin_unlock_irqrestore(host->host_lock, flagv); + if (aac->thread->pid != current->pid) + spin_lock_irqsave(host->host_lock, flagv); + BlinkLED = _aac_reset_adapter(aac, 0); + if (aac->thread->pid != current->pid) + spin_unlock_irqrestore(host->host_lock, flagv); return BlinkLED; out: @@ -1300,6 +1405,9 @@ int aac_command_thread(void *data) struct aac_fib_context *fibctx; unsigned long flags; DECLARE_WAITQUEUE(wait, current); + unsigned long next_jiffies = jiffies + HZ; + unsigned long next_check_jiffies = next_jiffies; + long difference = HZ; /* * We can only have one thread per adapter for AIF's. @@ -1368,7 +1476,7 @@ int aac_command_thread(void *data) cpu_to_le32(AifCmdJobProgress))) { aac_handle_aif(dev, fib); } - + time_now = jiffies/HZ; /* @@ -1507,11 +1615,79 @@ int aac_command_thread(void *data) * There are no more AIF's */ spin_unlock_irqrestore(dev->queues->queue[HostNormCmdQueue].lock, flags); - schedule(); + + /* + * Background activity + */ + if ((time_before(next_check_jiffies,next_jiffies)) + && ((difference = next_check_jiffies - jiffies) <= 0)) { + next_check_jiffies = next_jiffies; + if (aac_check_health(dev) == 0) { + difference = ((long)(unsigned)check_interval) + * HZ; + next_check_jiffies = jiffies + difference; + } else if (!dev->queues) + break; + } + if (!time_before(next_check_jiffies,next_jiffies) + && ((difference = next_jiffies - jiffies) <= 0)) { + struct timeval now; + int ret; + + /* Don't even try to talk to adapter if its sick */ + ret = aac_check_health(dev); + if (!ret && !dev->queues) + break; + next_check_jiffies = jiffies + + ((long)(unsigned)check_interval) + * HZ; + do_gettimeofday(&now); + + /* Synchronize our watches */ + if (((1000000 - (1000000 / HZ)) > now.tv_usec) + && (now.tv_usec > (1000000 / HZ))) + difference = (((1000000 - now.tv_usec) * HZ) + + 500000) / 1000000; + else if (ret == 0) { + struct fib *fibptr; + + if ((fibptr = aac_fib_alloc(dev))) { + u32 * info; + + aac_fib_init(fibptr); + + info = (u32 *) fib_data(fibptr); + if (now.tv_usec > 500000) + ++now.tv_sec; + + *info = cpu_to_le32(now.tv_sec); + + (void)aac_fib_send(SendHostTime, + fibptr, + sizeof(*info), + FsaNormal, + 1, 1, + NULL, + NULL); + aac_fib_complete(fibptr); + aac_fib_free(fibptr); + } + difference = (long)(unsigned)update_interval*HZ; + } else { + /* retry shortly */ + difference = 10 * HZ; + } + next_jiffies = jiffies + difference; + if (time_before(next_check_jiffies,next_jiffies)) + difference = next_check_jiffies - jiffies; + } + if (difference <= 0) + difference = 1; + set_current_state(TASK_INTERRUPTIBLE); + schedule_timeout(difference); if (kthread_should_stop()) break; - set_current_state(TASK_INTERRUPTIBLE); } if (dev->queues) remove_wait_queue(&dev->queues->queue[HostNormCmdQueue].cmdready, &wait); diff --git a/drivers/scsi/aacraid/linit.c b/drivers/scsi/aacraid/linit.c index 6f92d077679..f8c2aaf72af 100644 --- a/drivers/scsi/aacraid/linit.c +++ b/drivers/scsi/aacraid/linit.c @@ -39,10 +39,8 @@ #include #include #include -#include #include #include -#include #include #include @@ -581,6 +579,14 @@ static int aac_eh_reset(struct scsi_cmnd* cmd) ssleep(1); } printk(KERN_ERR "%s: SCSI bus appears hung\n", AAC_DRIVERNAME); + /* + * This adapter needs a blind reset, only do so for Adapters that + * support a register, instead of a commanded, reset. + */ + if ((aac->supplement_adapter_info.SupportedOptions2 & + le32_to_cpu(AAC_OPTION_MU_RESET|AAC_OPTION_IGNORE_RESET)) == + le32_to_cpu(AAC_OPTION_MU_RESET)) + aac_reset_adapter(aac, 2); /* Bypass wait for command quiesce */ return SUCCESS; /* Cause an immediate retry of the command with a ten second delay after successful tur */ } @@ -788,6 +794,31 @@ static ssize_t aac_show_max_id(struct class_device *class_dev, char *buf) class_to_shost(class_dev)->max_id); } +static ssize_t aac_store_reset_adapter(struct class_device *class_dev, + const char *buf, size_t count) +{ + int retval = -EACCES; + + if (!capable(CAP_SYS_ADMIN)) + return retval; + retval = aac_reset_adapter((struct aac_dev*)class_to_shost(class_dev)->hostdata, buf[0] == '!'); + if (retval >= 0) + retval = count; + return retval; +} + +static ssize_t aac_show_reset_adapter(struct class_device *class_dev, + char *buf) +{ + struct aac_dev *dev = (struct aac_dev*)class_to_shost(class_dev)->hostdata; + int len, tmp; + + tmp = aac_adapter_check_health(dev); + if ((tmp == 0) && dev->in_reset) + tmp = -EBUSY; + len = snprintf(buf, PAGE_SIZE, "0x%x", tmp); + return len; +} static struct class_device_attribute aac_model = { .attr = { @@ -845,6 +876,14 @@ static struct class_device_attribute aac_max_id = { }, .show = aac_show_max_id, }; +static struct class_device_attribute aac_reset = { + .attr = { + .name = "reset_host", + .mode = S_IWUSR|S_IRUGO, + }, + .store = aac_store_reset_adapter, + .show = aac_show_reset_adapter, +}; static struct class_device_attribute *aac_attrs[] = { &aac_model, @@ -855,6 +894,7 @@ static struct class_device_attribute *aac_attrs[] = { &aac_serial_number, &aac_max_channel, &aac_max_id, + &aac_reset, NULL }; @@ -1118,7 +1158,7 @@ static int __init aac_init(void) { int error; - printk(KERN_INFO "Adaptec %s driver (%s)\n", + printk(KERN_INFO "Adaptec %s driver %s\n", AAC_DRIVERNAME, aac_driver_version); error = pci_register_driver(&aac_pci_driver); diff --git a/drivers/scsi/aacraid/rx.c b/drivers/scsi/aacraid/rx.c index ae978a373c5..ebc65b9fea9 100644 --- a/drivers/scsi/aacraid/rx.c +++ b/drivers/scsi/aacraid/rx.c @@ -464,21 +464,24 @@ static int aac_rx_restart_adapter(struct aac_dev *dev, int bled) { u32 var; - if (bled) - printk(KERN_ERR "%s%d: adapter kernel panic'd %x.\n", - dev->name, dev->id, bled); - else { - bled = aac_adapter_sync_cmd(dev, IOP_RESET_ALWAYS, - 0, 0, 0, 0, 0, 0, &var, NULL, NULL, NULL, NULL); - if (!bled && (var != 0x00000001)) - bled = -EINVAL; - } - if (bled && (bled != -ETIMEDOUT)) - bled = aac_adapter_sync_cmd(dev, IOP_RESET, - 0, 0, 0, 0, 0, 0, &var, NULL, NULL, NULL, NULL); + if (!(dev->supplement_adapter_info.SupportedOptions2 & + le32_to_cpu(AAC_OPTION_MU_RESET)) || (bled >= 0) || (bled == -2)) { + if (bled) + printk(KERN_ERR "%s%d: adapter kernel panic'd %x.\n", + dev->name, dev->id, bled); + else { + bled = aac_adapter_sync_cmd(dev, IOP_RESET_ALWAYS, + 0, 0, 0, 0, 0, 0, &var, NULL, NULL, NULL, NULL); + if (!bled && (var != 0x00000001)) + bled = -EINVAL; + } + if (bled && (bled != -ETIMEDOUT)) + bled = aac_adapter_sync_cmd(dev, IOP_RESET, + 0, 0, 0, 0, 0, 0, &var, NULL, NULL, NULL, NULL); - if (bled && (bled != -ETIMEDOUT)) - return -EINVAL; + if (bled && (bled != -ETIMEDOUT)) + return -EINVAL; + } if (bled || (var == 0x3803000F)) { /* USE_OTHER_METHOD */ rx_writel(dev, MUnit.reserved2, 3); msleep(5000); /* Delay 5 seconds */ @@ -596,7 +599,7 @@ int _aac_rx_init(struct aac_dev *dev) } msleep(1); } - if (restart) + if (restart && aac_commit) aac_commit = 1; /* * Fill in the common function dispatch table. -- cgit v1.2.3