From e0a33270ed0e8e00cbb882a33d21e1f92aac0ceb Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 1 May 2006 12:15:45 -0700 Subject: [PATCH] md: Fixed refcounting/locking when attempting read error correction in raid10 We need to hold a reference to rdevs while reading and writing to attempt to correct read errors. This reference must be taken under an rcu lock. Signed-off-by: Neil Brown Cc: "Paul E. McKenney" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/md/raid10.c | 44 ++++++++++++++++++++++++++++++-------------- 1 file changed, 30 insertions(+), 14 deletions(-) (limited to 'drivers/md/raid10.c') diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index ddc1dfc4d3d..1440935414e 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -1407,36 +1407,45 @@ static void raid10d(mddev_t *mddev) if (s > (PAGE_SIZE>>9)) s = PAGE_SIZE >> 9; + rcu_read_lock(); do { int d = r10_bio->devs[sl].devnum; - rdev = conf->mirrors[d].rdev; + rdev = rcu_dereference(conf->mirrors[d].rdev); if (rdev && - test_bit(In_sync, &rdev->flags) && - sync_page_io(rdev->bdev, - r10_bio->devs[sl].addr + - sect + rdev->data_offset, - s<<9, - conf->tmppage, READ)) - success = 1; - else { - sl++; - if (sl == conf->copies) - sl = 0; + test_bit(In_sync, &rdev->flags)) { + atomic_inc(&rdev->nr_pending); + rcu_read_unlock(); + success = sync_page_io(rdev->bdev, + r10_bio->devs[sl].addr + + sect + rdev->data_offset, + s<<9, + conf->tmppage, READ); + rdev_dec_pending(rdev, mddev); + rcu_read_lock(); + if (success) + break; } + sl++; + if (sl == conf->copies) + sl = 0; } while (!success && sl != r10_bio->read_slot); + rcu_read_unlock(); if (success) { int start = sl; /* write it back and re-read */ + rcu_read_lock(); while (sl != r10_bio->read_slot) { int d; if (sl==0) sl = conf->copies; sl--; d = r10_bio->devs[sl].devnum; - rdev = conf->mirrors[d].rdev; + rdev = rcu_dereference(conf->mirrors[d].rdev); if (rdev && test_bit(In_sync, &rdev->flags)) { + atomic_inc(&rdev->nr_pending); + rcu_read_unlock(); atomic_add(s, &rdev->corrected_errors); if (sync_page_io(rdev->bdev, r10_bio->devs[sl].addr + @@ -1444,6 +1453,8 @@ static void raid10d(mddev_t *mddev) s<<9, conf->tmppage, WRITE) == 0) /* Well, this device is dead */ md_error(mddev, rdev); + rdev_dec_pending(rdev, mddev); + rcu_read_lock(); } } sl = start; @@ -1453,17 +1464,22 @@ static void raid10d(mddev_t *mddev) sl = conf->copies; sl--; d = r10_bio->devs[sl].devnum; - rdev = conf->mirrors[d].rdev; + rdev = rcu_dereference(conf->mirrors[d].rdev); if (rdev && test_bit(In_sync, &rdev->flags)) { + atomic_inc(&rdev->nr_pending); + rcu_read_unlock(); if (sync_page_io(rdev->bdev, r10_bio->devs[sl].addr + sect + rdev->data_offset, s<<9, conf->tmppage, READ) == 0) /* Well, this device is dead */ md_error(mddev, rdev); + rdev_dec_pending(rdev, mddev); + rcu_read_lock(); } } + rcu_read_unlock(); } else { /* Cannot read from anywhere -- bye bye array */ md_error(mddev, conf->mirrors[r10_bio->devs[r10_bio->read_slot].devnum].rdev); -- cgit v1.2.3