aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBrian King <brking@us.ibm.com>2006-06-27 11:10:43 -0500
committerJames Bottomley <jejb@mulgrave.il.steeleye.com>2006-06-28 12:39:56 -0400
commit309bd271211caa5a04a8137649cebd7691376351 (patch)
treed7939cbc549d21e0e8c08c83c9f7001d5d5f116d
parenta144c5ae0956fb262e6c82624c82b1110a451437 (diff)
[SCSI] scsi: Device scanning oops for offlined devices (resend)
If a device gets offlined as a result of the Inquiry sent during scanning, the following oops can occur. After the disk gets put into the SDEV_OFFLINE state, the error handler sends back the failed inquiry, which wakes the thread doing the scan. This starts a race between the scanning thread freeing the scsi device and the error handler calling scsi_run_host_queues to restart the host. Since the disk is in the SDEV_OFFLINE state, scsi_device_get will still work, which results in __scsi_iterate_devices getting a reference to the scsi disk when it shouldn't. The following execution thread causes the oops: CPU 0 (scan) CPU 1 (eh) --------------------------------------------------------- scsi_probe_and_add_lun .... scsi_eh_offline_sdevs scsi_eh_flush_done_q scsi_destroy_sdev scsi_device_dev_release scsi_restart_operations scsi_run_host_queues __scsi_iterate_devices get_device scsi_device_dev_release_usercontext scsi_run_queue <---OOPS---> The patch fixes this by changing the state of the sdev to SDEV_DEL before doing the final put_device, which should prevent the race from occurring. Original oops follows: Badness in kref_get at lib/kref.c:32 Call Trace: [C00000002F4476D0] [C00000000000EE20] .show_stack+0x68/0x1b0 (unreliable) [C00000002F447770] [C00000000037515C] .program_check_exception+0x1cc/0x5a8 [C00000002F447840] [C00000000000446C] program_check_common+0xec/0x100 Exception: 700 at .kref_get+0x10/0x28 LR = .kobject_get+0x20/0x3c [C00000002F447B30] [C00000002F447BC0] 0xc00000002f447bc0 (unreliable) [C00000002F447BB0] [C000000000254BDC] .get_device+0x20/0x3c [C00000002F447C30] [D000000000063188] .scsi_device_get+0x34/0xdc [scsi_mod] [C00000002F447CC0] [D0000000000633EC] .__scsi_iterate_devices+0x50/0xbc [scsi_mod] [C00000002F447D60] [D00000000006A910] .scsi_run_host_queues+0x34/0x5c [scsi_mod] [C00000002F447DF0] [D000000000069054] .scsi_error_handler+0xdb4/0xe44 [scsi_mod] [C00000002F447EE0] [C00000000007B4E0] .kthread+0x128/0x178 [C00000002F447F90] [C000000000025E84] .kernel_thread+0x4c/0x68 Unable to handle kernel paging request for <7>PCI: Enabling device: (0002:41:01.1), cmd 143 data at address 0x000001b8 Faulting instruction address: 0xd0000000000698e4 sym1: <1010-66> rev 0x1 at pci 0002:41:01.1 irq 216 sym1: No NVRAM, ID 7, Fast-80, LVD, parity checking sym1: SCSI BUS has been reset. scsi2 : sym-2.2.2 cpu 0x0: Vector: 300 (Data Access) at [c00000002f447a30] pc: d0000000000698e4: .scsi_run_queue+0x2c/0x218 [scsi_mod] lr: d00000000006a904: .scsi_run_host_queues+0x28/0x5c [scsi_mod] sp: c00000002f447cb0 msr: 9000000000009032 dar: 1b8 dsisr: 40000000 current = 0xc0000000045fecd0 paca = 0xc00000000048ee80 pid = 1123, comm = scsi_eh_1 enter ? for help [c00000002f447d60] d00000000006a904 .scsi_run_host_queues+0x28/0x5c [scsi_mod] [c00000002f447df0] d000000000069054 .scsi_error_handler+0xdb4/0xe44 [scsi_mod] [c00000002f447ee0] c00000000007b4e0 .kthread+0x128/0x178 [c00000002f447f90] c000000000025e84 .kernel_thread+0x4c/0x68 Signed-off-by: Brian King <brking@us.ibm.com> Signed-off-by: James Bottomley <James.Bottomley@SteelEye.com>
-rw-r--r--drivers/scsi/scsi_lib.c3
-rw-r--r--drivers/scsi/scsi_scan.c1
2 files changed, 4 insertions, 0 deletions
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index 68e0d7dbe6c..bf5191f6aaa 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -2037,6 +2037,9 @@ scsi_device_set_state(struct scsi_device *sdev, enum scsi_device_state state)
case SDEV_DEL:
switch (oldstate) {
+ case SDEV_CREATED:
+ case SDEV_RUNNING:
+ case SDEV_OFFLINE:
case SDEV_CANCEL:
break;
default:
diff --git a/drivers/scsi/scsi_scan.c b/drivers/scsi/scsi_scan.c
index 0f7e6f94d66..4c31799e271 100644
--- a/drivers/scsi/scsi_scan.c
+++ b/drivers/scsi/scsi_scan.c
@@ -810,6 +810,7 @@ static int scsi_add_lun(struct scsi_device *sdev, char *inq_result, int *bflags)
static inline void scsi_destroy_sdev(struct scsi_device *sdev)
{
+ scsi_device_set_state(sdev, SDEV_DEL);
if (sdev->host->hostt->slave_destroy)
sdev->host->hostt->slave_destroy(sdev);
transport_destroy_device(&sdev->sdev_gendev);