From 8c2e870a625bd336b2e7a65a97c1836acef07322 Mon Sep 17 00:00:00 2001 From: Neil Brown Date: Sat, 28 Jun 2008 08:30:52 +1000 Subject: Ensure interrupted recovery completed properly (v1 metadata plus bitmap) If, while assembling an array, we find a device which is not fully in-sync with the array, it is important to set the "fullsync" flags. This is an exact analog to the setting of this flag in hot_add_disk methods. Currently, only v1.x metadata supports having devices in an array which are not fully in-sync (it keep track of how in sync they are). The 'fullsync' flag only makes a difference when a write-intent bitmap is being used. In this case it tells recovery to ignore the bitmap and recovery all blocks. This fix is already in place for raid1, but not raid5/6 or raid10. So without this fix, a raid1 ir raid4/5/6 array with version 1.x metadata and a write intent bitmaps, that is stopped in the middle of a recovery, will appear to complete the recovery instantly after it is reassembled, but the recovery will not be correct. If you might have an array like that, issueing echo repair > /sys/block/mdXX/md/sync_action will make sure recovery completes properly. Cc: Signed-off-by: Neil Brown --- drivers/md/raid5.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'drivers/md/raid5.c') diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index c37e256b117..475fba4d371 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -4305,7 +4305,9 @@ static int run(mddev_t *mddev) " disk %d\n", bdevname(rdev->bdev,b), raid_disk); working_disks++; - } + } else + /* Cannot rely on bitmap to complete recovery */ + conf->fullsync = 1; } /* -- cgit v1.2.3 From efe311431869b40d67911820a309f9a1a41306f3 Mon Sep 17 00:00:00 2001 From: Neil Brown Date: Sat, 28 Jun 2008 08:31:14 +1000 Subject: Don't acknowlege that stripe-expand is complete until it really is. We shouldn't acknowledge that a stripe has been expanded (When reshaping a raid5 by adding a device) until the moved data has actually been written out. However we are currently acknowledging (by calling md_done_sync) when the POST_XOR is complete and before the write. So track in s.locked whether there are pending writes, and don't call md_done_sync yet if there are. Note: we all set R5_LOCKED on devices which are are about to read from. This probably isn't technically necessary, but is usually done when writing a block, and justifies the use of s.locked here. This bug can lead to a crash if an array is stopped while an reshape is in progress. Cc: Signed-off-by: Neil Brown --- drivers/md/raid5.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'drivers/md/raid5.c') diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 475fba4d371..54c8ee28fcc 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -2898,6 +2898,8 @@ static void handle_stripe5(struct stripe_head *sh) for (i = conf->raid_disks; i--; ) { set_bit(R5_Wantwrite, &sh->dev[i].flags); + set_bit(R5_LOCKED, &dev->flags); + s.locked++; if (!test_and_set_bit(STRIPE_OP_IO, &sh->ops.pending)) sh->ops.count++; } @@ -2911,6 +2913,7 @@ static void handle_stripe5(struct stripe_head *sh) conf->raid_disks); s.locked += handle_write_operations5(sh, 1, 1); } else if (s.expanded && + s.locked == 0 && !test_bit(STRIPE_OP_POSTXOR, &sh->ops.pending)) { clear_bit(STRIPE_EXPAND_READY, &sh->state); atomic_dec(&conf->reshape_stripes); -- cgit v1.2.3