From 538452700d95480c16e7aa6b10ff77cd937d33f4 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Fri, 9 Jan 2009 08:31:05 +1100 Subject: md: fix bitmap-on-external-file bug. commit a2ed9615e3222645007fc19991aedf30eed3ecfd fixed a bug with 'internal' bitmaps, but in the process broke 'in a file' bitmaps. So they are broken in 2.6.28 This fixes it, and needs to go in 2.6.28-stable. Signed-off-by: NeilBrown Cc: stable@kernel.org --- drivers/md/bitmap.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/md/bitmap.c b/drivers/md/bitmap.c index ab7c8e4a61f..666b7ba47ec 100644 --- a/drivers/md/bitmap.c +++ b/drivers/md/bitmap.c @@ -964,9 +964,11 @@ static int bitmap_init_from_disk(struct bitmap *bitmap, sector_t start) */ page = bitmap->sb_page; offset = sizeof(bitmap_super_t); - read_sb_page(bitmap->mddev, bitmap->offset, - page, - index, count); + if (!file) + read_sb_page(bitmap->mddev, + bitmap->offset, + page, + index, count); } else if (file) { page = read_page(file, index, bitmap, count); offset = 0; -- cgit v1.2.3 From 0c3573f19d135d718264e38c46597295bd6154b7 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Fri, 9 Jan 2009 08:31:05 +1100 Subject: md: use sysfs_notify_dirent to notify changes to md/sync_action. There is no compelling need for this, but sysfs_notify_dirent is a nicer interface and the change is good for consistency. Signed-off-by: NeilBrown --- drivers/md/md.c | 20 +++++++++++++------- include/linux/raid/md_k.h | 1 + 2 files changed, 14 insertions(+), 7 deletions(-) diff --git a/drivers/md/md.c b/drivers/md/md.c index 1b1d32694f6..ee759b193e9 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -3046,7 +3046,7 @@ action_store(mddev_t *mddev, const char *page, size_t len) } set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); md_wakeup_thread(mddev->thread); - sysfs_notify(&mddev->kobj, NULL, "sync_action"); + sysfs_notify_dirent(mddev->sysfs_action); return len; } @@ -3684,6 +3684,7 @@ static int do_md_run(mddev_t * mddev) printk(KERN_WARNING "md: cannot register extra attributes for %s\n", mdname(mddev)); + mddev->sysfs_action = sysfs_get_dirent(mddev->kobj.sd, "sync_action"); } else if (mddev->ro == 2) /* auto-readonly not meaningful */ mddev->ro = 0; @@ -3754,7 +3755,8 @@ static int do_md_run(mddev_t * mddev) mddev->changed = 1; md_new_event(mddev); sysfs_notify_dirent(mddev->sysfs_state); - sysfs_notify(&mddev->kobj, NULL, "sync_action"); + if (mddev->sysfs_action) + sysfs_notify_dirent(mddev->sysfs_action); sysfs_notify(&mddev->kobj, NULL, "degraded"); kobject_uevent(&disk_to_dev(mddev->gendisk)->kobj, KOBJ_CHANGE); return 0; @@ -3854,9 +3856,12 @@ static int do_md_stop(mddev_t * mddev, int mode, int is_open) mddev->queue->merge_bvec_fn = NULL; mddev->queue->unplug_fn = NULL; mddev->queue->backing_dev_info.congested_fn = NULL; - if (mddev->pers->sync_request) + if (mddev->pers->sync_request) { sysfs_remove_group(&mddev->kobj, &md_redundancy_group); - + if (mddev->sysfs_action) + sysfs_put(mddev->sysfs_action); + mddev->sysfs_action = NULL; + } module_put(mddev->pers->owner); mddev->pers = NULL; /* tell userspace to handle 'inactive' */ @@ -6155,7 +6160,7 @@ void md_check_recovery(mddev_t *mddev) mddev->recovery = 0; /* flag recovery needed just to double check */ set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); - sysfs_notify(&mddev->kobj, NULL, "sync_action"); + sysfs_notify_dirent(mddev->sysfs_action); md_new_event(mddev); goto unlock; } @@ -6216,7 +6221,7 @@ void md_check_recovery(mddev_t *mddev) mddev->recovery = 0; } else md_wakeup_thread(mddev->sync_thread); - sysfs_notify(&mddev->kobj, NULL, "sync_action"); + sysfs_notify_dirent(mddev->sysfs_action); md_new_event(mddev); } unlock: @@ -6224,7 +6229,8 @@ void md_check_recovery(mddev_t *mddev) clear_bit(MD_RECOVERY_RUNNING, &mddev->recovery); if (test_and_clear_bit(MD_RECOVERY_RECOVER, &mddev->recovery)) - sysfs_notify(&mddev->kobj, NULL, "sync_action"); + if (mddev->sysfs_action) + sysfs_notify_dirent(mddev->sysfs_action); } mddev_unlock(mddev); } diff --git a/include/linux/raid/md_k.h b/include/linux/raid/md_k.h index 8fc909ef678..663803eaf0d 100644 --- a/include/linux/raid/md_k.h +++ b/include/linux/raid/md_k.h @@ -244,6 +244,7 @@ struct mddev_s struct sysfs_dirent *sysfs_state; /* handle for 'array_state' * file in sysfs. */ + struct sysfs_dirent *sysfs_action; /* handle for 'sync_action' */ spinlock_t write_lock; wait_queue_head_t sb_wait; /* for waiting on superblock updates */ -- cgit v1.2.3 From 1b7fdf8ff7c0e3fba9c679def4e98d5701d2949e Mon Sep 17 00:00:00 2001 From: Andre Noll Date: Fri, 9 Jan 2009 08:31:06 +1100 Subject: md: raid0_make_request(): Replace chunksize_bits by chunksect_bits. As ffz(~(2 * x)) = ffz(~x) + 1, we have chunksect_bits = chunksize_bits + 1. Fixup all users accordingly. Signed-off-by: Andre Noll Signed-off-by: NeilBrown --- drivers/md/raid0.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c index 8ac6488ad0d..62a193c2a25 100644 --- a/drivers/md/raid0.c +++ b/drivers/md/raid0.c @@ -387,7 +387,7 @@ static int raid0_stop (mddev_t *mddev) static int raid0_make_request (struct request_queue *q, struct bio *bio) { mddev_t *mddev = q->queuedata; - unsigned int sect_in_chunk, chunksize_bits, chunk_size, chunk_sects; + unsigned int sect_in_chunk, chunksect_bits, chunk_size, chunk_sects; raid0_conf_t *conf = mddev_to_conf(mddev); struct strip_zone *zone; mdk_rdev_t *tmp_dev; @@ -409,7 +409,7 @@ static int raid0_make_request (struct request_queue *q, struct bio *bio) chunk_size = mddev->chunk_size >> 10; chunk_sects = mddev->chunk_size >> 9; - chunksize_bits = ffz(~chunk_size); + chunksect_bits = ffz(~chunk_sects); block = bio->bi_sector >> 1; @@ -446,15 +446,15 @@ static int raid0_make_request (struct request_queue *q, struct bio *bio) { - sector_t x = (block - zone->zone_offset) >> chunksize_bits; + sector_t x = (block - zone->zone_offset) >> (chunksect_bits - 1); sector_div(x, zone->nb_dev); chunk = x; - x = block >> chunksize_bits; + x = block >> (chunksect_bits - 1); tmp_dev = zone->dev[sector_div(x, zone->nb_dev)]; } - rsect = (((chunk << chunksize_bits) + zone->dev_offset)<<1) + rsect = (((chunk << (chunksect_bits - 1)) + zone->dev_offset)<<1) + sect_in_chunk; bio->bi_bdev = tmp_dev->bdev; -- cgit v1.2.3 From a471200595b24fb1907ad12107a6a66db02c63f2 Mon Sep 17 00:00:00 2001 From: Andre Noll Date: Fri, 9 Jan 2009 08:31:06 +1100 Subject: md: raid0_make_request(): Remove local variable chunk_size. We might as well use chunk_sects instead. Signed-off-by: Andre Noll Signed-off-by: NeilBrown --- drivers/md/raid0.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c index 62a193c2a25..d8438494e9d 100644 --- a/drivers/md/raid0.c +++ b/drivers/md/raid0.c @@ -387,7 +387,7 @@ static int raid0_stop (mddev_t *mddev) static int raid0_make_request (struct request_queue *q, struct bio *bio) { mddev_t *mddev = q->queuedata; - unsigned int sect_in_chunk, chunksect_bits, chunk_size, chunk_sects; + unsigned int sect_in_chunk, chunksect_bits, chunk_sects; raid0_conf_t *conf = mddev_to_conf(mddev); struct strip_zone *zone; mdk_rdev_t *tmp_dev; @@ -407,7 +407,6 @@ static int raid0_make_request (struct request_queue *q, struct bio *bio) bio_sectors(bio)); part_stat_unlock(); - chunk_size = mddev->chunk_size >> 10; chunk_sects = mddev->chunk_size >> 9; chunksect_bits = ffz(~chunk_sects); block = bio->bi_sector >> 1; @@ -442,7 +441,7 @@ static int raid0_make_request (struct request_queue *q, struct bio *bio) while (block >= (zone->zone_offset + zone->size)) zone++; - sect_in_chunk = bio->bi_sector & ((chunk_size<<1) -1); + sect_in_chunk = bio->bi_sector & (chunk_sects - 1); { @@ -467,7 +466,7 @@ static int raid0_make_request (struct request_queue *q, struct bio *bio) bad_map: printk("raid0_make_request bug: can't convert block across chunks" - " or bigger than %dk %llu %d\n", chunk_size, + " or bigger than %dk %llu %d\n", chunk_sects / 2, (unsigned long long)bio->bi_sector, bio->bi_size >> 10); bio_io_error(bio); -- cgit v1.2.3 From e0f06868341700c5c1964a04f6c5b51d0a2d5bca Mon Sep 17 00:00:00 2001 From: Andre Noll Date: Fri, 9 Jan 2009 08:31:06 +1100 Subject: md: raid0_make_request(): Replace local variable block by sector. This change already simplifies the code a bit. Signed-off-by: Andre Noll Signed-off-by: NeilBrown --- drivers/md/raid0.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c index d8438494e9d..fd65d8806c0 100644 --- a/drivers/md/raid0.c +++ b/drivers/md/raid0.c @@ -392,7 +392,7 @@ static int raid0_make_request (struct request_queue *q, struct bio *bio) struct strip_zone *zone; mdk_rdev_t *tmp_dev; sector_t chunk; - sector_t block, rsect; + sector_t sector, rsect; const int rw = bio_data_dir(bio); int cpu; @@ -409,8 +409,7 @@ static int raid0_make_request (struct request_queue *q, struct bio *bio) chunk_sects = mddev->chunk_size >> 9; chunksect_bits = ffz(~chunk_sects); - block = bio->bi_sector >> 1; - + sector = bio->bi_sector; if (unlikely(chunk_sects < (bio->bi_sector & (chunk_sects - 1)) + (bio->bi_size >> 9))) { struct bio_pair *bp; @@ -433,24 +432,24 @@ static int raid0_make_request (struct request_queue *q, struct bio *bio) { - sector_t x = block >> conf->preshift; + sector_t x = sector >> (conf->preshift + 1); sector_div(x, (u32)conf->hash_spacing); zone = conf->hash_table[x]; } - while (block >= (zone->zone_offset + zone->size)) + while (sector / 2 >= (zone->zone_offset + zone->size)) zone++; sect_in_chunk = bio->bi_sector & (chunk_sects - 1); { - sector_t x = (block - zone->zone_offset) >> (chunksect_bits - 1); + sector_t x = (sector - zone->zone_offset * 2) >> chunksect_bits; sector_div(x, zone->nb_dev); chunk = x; - x = block >> (chunksect_bits - 1); + x = sector >> chunksect_bits; tmp_dev = zone->dev[sector_div(x, zone->nb_dev)]; } rsect = (((chunk << (chunksect_bits - 1)) + zone->dev_offset)<<1) -- cgit v1.2.3 From 019c4e2f3e02aac4b44003913b54ca4b332e4371 Mon Sep 17 00:00:00 2001 From: Andre Noll Date: Fri, 9 Jan 2009 08:31:06 +1100 Subject: md: raid0: Represent device offset in sectors. Rename zone->dev_offset to zone->dev_start to make sure all users have been converted. Signed-off-by: Andre Noll Signed-off-by: NeilBrown --- drivers/md/raid0.c | 9 ++++----- include/linux/raid/raid0.h | 2 +- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c index fd65d8806c0..b860536dc89 100644 --- a/drivers/md/raid0.c +++ b/drivers/md/raid0.c @@ -167,7 +167,7 @@ static int create_strip_zones (mddev_t *mddev) zone->dev = conf->strip_zone[i-1].dev + mddev->raid_disks; printk("raid0: zone %d\n", i); - zone->dev_offset = current_offset; + zone->dev_start = current_offset * 2; smallest = NULL; c = 0; @@ -452,8 +452,7 @@ static int raid0_make_request (struct request_queue *q, struct bio *bio) x = sector >> chunksect_bits; tmp_dev = zone->dev[sector_div(x, zone->nb_dev)]; } - rsect = (((chunk << (chunksect_bits - 1)) + zone->dev_offset)<<1) - + sect_in_chunk; + rsect = (chunk << chunksect_bits) + zone->dev_start + sect_in_chunk; bio->bi_bdev = tmp_dev->bdev; bio->bi_sector = rsect + tmp_dev->data_offset; @@ -490,9 +489,9 @@ static void raid0_status (struct seq_file *seq, mddev_t *mddev) seq_printf(seq, "%s/", bdevname( conf->strip_zone[j].dev[k]->bdev,b)); - seq_printf(seq, "] zo=%d do=%d s=%d\n", + seq_printf(seq, "] zo=%d ds=%d s=%d\n", conf->strip_zone[j].zone_offset, - conf->strip_zone[j].dev_offset, + conf->strip_zone[j].dev_start, conf->strip_zone[j].size); } #endif diff --git a/include/linux/raid/raid0.h b/include/linux/raid/raid0.h index 1b2dda035f8..61c3d29dc15 100644 --- a/include/linux/raid/raid0.h +++ b/include/linux/raid/raid0.h @@ -6,7 +6,7 @@ struct strip_zone { sector_t zone_offset; /* Zone offset in md_dev */ - sector_t dev_offset; /* Zone offset in real dev */ + sector_t dev_start; /* Zone offset in real dev (in sectors) */ sector_t size; /* Zone size */ int nb_dev; /* # of devices attached to the zone */ mdk_rdev_t **dev; /* Devices attached to the zone */ -- cgit v1.2.3 From 6199d3db0fc34f8ada895879d04a353a6ae632bc Mon Sep 17 00:00:00 2001 From: Andre Noll Date: Fri, 9 Jan 2009 08:31:07 +1100 Subject: md: raid0: Represent zone->zone_offset in sectors. For the same reason as in the previous patch, rename it from zone_offset to zone_start. Signed-off-by: Andre Noll Signed-off-by: NeilBrown --- drivers/md/raid0.c | 12 ++++++------ include/linux/raid/raid0.h | 2 +- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c index b860536dc89..6e12a358f36 100644 --- a/drivers/md/raid0.c +++ b/drivers/md/raid0.c @@ -155,7 +155,7 @@ static int create_strip_zones (mddev_t *mddev) } zone->nb_dev = cnt; zone->size = smallest->size * cnt; - zone->zone_offset = 0; + zone->zone_start = 0; current_offset = smallest->size; curr_zone_offset = zone->size; @@ -194,7 +194,7 @@ static int create_strip_zones (mddev_t *mddev) printk("raid0: zone->nb_dev: %d, size: %llu\n", zone->nb_dev, (unsigned long long)zone->size); - zone->zone_offset = curr_zone_offset; + zone->zone_start = curr_zone_offset * 2; curr_zone_offset += zone->size; current_offset = smallest->size; @@ -437,14 +437,14 @@ static int raid0_make_request (struct request_queue *q, struct bio *bio) zone = conf->hash_table[x]; } - while (sector / 2 >= (zone->zone_offset + zone->size)) + while (sector / 2 >= (zone->zone_start / 2 + zone->size)) zone++; sect_in_chunk = bio->bi_sector & (chunk_sects - 1); { - sector_t x = (sector - zone->zone_offset * 2) >> chunksect_bits; + sector_t x = (sector - zone->zone_start) >> chunksect_bits; sector_div(x, zone->nb_dev); chunk = x; @@ -489,8 +489,8 @@ static void raid0_status (struct seq_file *seq, mddev_t *mddev) seq_printf(seq, "%s/", bdevname( conf->strip_zone[j].dev[k]->bdev,b)); - seq_printf(seq, "] zo=%d ds=%d s=%d\n", - conf->strip_zone[j].zone_offset, + seq_printf(seq, "] zs=%d ds=%d s=%d\n", + conf->strip_zone[j].zone_start, conf->strip_zone[j].dev_start, conf->strip_zone[j].size); } diff --git a/include/linux/raid/raid0.h b/include/linux/raid/raid0.h index 61c3d29dc15..eaf4f6ac55f 100644 --- a/include/linux/raid/raid0.h +++ b/include/linux/raid/raid0.h @@ -5,7 +5,7 @@ struct strip_zone { - sector_t zone_offset; /* Zone offset in md_dev */ + sector_t zone_start; /* Zone offset in md_dev (in sectors) */ sector_t dev_start; /* Zone offset in real dev (in sectors) */ sector_t size; /* Zone size */ int nb_dev; /* # of devices attached to the zone */ -- cgit v1.2.3 From 6b8796cc3decb43c7c67a13a0699b6a21b754c78 Mon Sep 17 00:00:00 2001 From: Andre Noll Date: Fri, 9 Jan 2009 08:31:07 +1100 Subject: md: raid0 create_strip_zones(): Make two local variables sector-based. current_offset and curr_zone_offset stored the corresponding offsets as 1K quantities. Rename them to current_start and curr_zone_start to match the naming of struct strip_zone and store the offsets as sector counts. Also, add KERN_INFO to the printk() affected by this change to make checkpatch happy. Signed-off-by: Andre Noll Signed-off-by: NeilBrown --- drivers/md/raid0.c | 27 +++++++++++++-------------- 1 file changed, 13 insertions(+), 14 deletions(-) diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c index 6e12a358f36..9074ea43d52 100644 --- a/drivers/md/raid0.c +++ b/drivers/md/raid0.c @@ -53,7 +53,7 @@ static int raid0_congested(void *data, int bits) static int create_strip_zones (mddev_t *mddev) { int i, c, j; - sector_t current_offset, curr_zone_offset; + sector_t current_start, curr_zone_start; sector_t min_spacing; raid0_conf_t *conf = mddev_to_conf(mddev); mdk_rdev_t *smallest, *rdev1, *rdev2, *rdev; @@ -157,8 +157,8 @@ static int create_strip_zones (mddev_t *mddev) zone->size = smallest->size * cnt; zone->zone_start = 0; - current_offset = smallest->size; - curr_zone_offset = zone->size; + current_start = smallest->size * 2; + curr_zone_start = zone->size * 2; /* now do the other zones */ for (i = 1; i < conf->nr_strip_zones; i++) @@ -167,7 +167,7 @@ static int create_strip_zones (mddev_t *mddev) zone->dev = conf->strip_zone[i-1].dev + mddev->raid_disks; printk("raid0: zone %d\n", i); - zone->dev_start = current_offset * 2; + zone->dev_start = current_start; smallest = NULL; c = 0; @@ -175,8 +175,7 @@ static int create_strip_zones (mddev_t *mddev) char b[BDEVNAME_SIZE]; rdev = conf->strip_zone[0].dev[j]; printk("raid0: checking %s ...", bdevname(rdev->bdev,b)); - if (rdev->size > current_offset) - { + if (rdev->size > current_start / 2) { printk(" contained as device %d\n", c); zone->dev[c] = rdev; c++; @@ -190,16 +189,16 @@ static int create_strip_zones (mddev_t *mddev) } zone->nb_dev = c; - zone->size = (smallest->size - current_offset) * c; + zone->size = (smallest->size - current_start / 2) * c; printk("raid0: zone->nb_dev: %d, size: %llu\n", zone->nb_dev, (unsigned long long)zone->size); - zone->zone_start = curr_zone_offset * 2; - curr_zone_offset += zone->size; + zone->zone_start = curr_zone_start; + curr_zone_start += zone->size * 2; - current_offset = smallest->size; - printk("raid0: current zone offset: %llu\n", - (unsigned long long)current_offset); + current_start = smallest->size * 2; + printk(KERN_INFO "raid0: current zone start: %llu\n", + (unsigned long long)current_start); } /* Now find appropriate hash spacing. @@ -210,8 +209,8 @@ static int create_strip_zones (mddev_t *mddev) * strip though as it's size has no bearing on the efficacy of the hash * table. */ - conf->hash_spacing = curr_zone_offset; - min_spacing = curr_zone_offset; + conf->hash_spacing = curr_zone_start / 2; + min_spacing = curr_zone_start / 2; sector_div(min_spacing, PAGE_SIZE/sizeof(struct strip_zone*)); for (i=0; i < conf->nr_strip_zones-1; i++) { sector_t sz = 0; -- cgit v1.2.3 From 0825b87a7dd9645c7e16489fec839a3cb5c40a08 Mon Sep 17 00:00:00 2001 From: Andre Noll Date: Fri, 9 Jan 2009 08:31:07 +1100 Subject: md: raid0 create_strip_zones(): Add KERN_INFO/KERN_ERR to printk's. This patch consists only of these trivial changes. Signed-off-by: Andre Noll Signed-off-by: NeilBrown --- drivers/md/raid0.c | 46 +++++++++++++++++++++++++--------------------- 1 file changed, 25 insertions(+), 21 deletions(-) diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c index 9074ea43d52..c759436b5d8 100644 --- a/drivers/md/raid0.c +++ b/drivers/md/raid0.c @@ -68,18 +68,18 @@ static int create_strip_zones (mddev_t *mddev) conf->nr_strip_zones = 0; rdev_for_each(rdev1, tmp1, mddev) { - printk("raid0: looking at %s\n", + printk(KERN_INFO "raid0: looking at %s\n", bdevname(rdev1->bdev,b)); c = 0; rdev_for_each(rdev2, tmp2, mddev) { - printk("raid0: comparing %s(%llu)", + printk(KERN_INFO "raid0: comparing %s(%llu)", bdevname(rdev1->bdev,b), (unsigned long long)rdev1->size); - printk(" with %s(%llu)\n", + printk(KERN_INFO " with %s(%llu)\n", bdevname(rdev2->bdev,b), (unsigned long long)rdev2->size); if (rdev2 == rdev1) { - printk("raid0: END\n"); + printk(KERN_INFO "raid0: END\n"); break; } if (rdev2->size == rdev1->size) @@ -88,19 +88,20 @@ static int create_strip_zones (mddev_t *mddev) * Not unique, don't count it as a new * group */ - printk("raid0: EQUAL\n"); + printk(KERN_INFO "raid0: EQUAL\n"); c = 1; break; } - printk("raid0: NOT EQUAL\n"); + printk(KERN_INFO "raid0: NOT EQUAL\n"); } if (!c) { - printk("raid0: ==> UNIQUE\n"); + printk(KERN_INFO "raid0: ==> UNIQUE\n"); conf->nr_strip_zones++; - printk("raid0: %d zones\n", conf->nr_strip_zones); + printk(KERN_INFO "raid0: %d zones\n", + conf->nr_strip_zones); } } - printk("raid0: FINAL %d zones\n", conf->nr_strip_zones); + printk(KERN_INFO "raid0: FINAL %d zones\n", conf->nr_strip_zones); conf->strip_zone = kzalloc(sizeof(struct strip_zone)* conf->nr_strip_zones, GFP_KERNEL); @@ -123,12 +124,13 @@ static int create_strip_zones (mddev_t *mddev) int j = rdev1->raid_disk; if (j < 0 || j >= mddev->raid_disks) { - printk("raid0: bad disk number %d - aborting!\n", j); + printk(KERN_ERR "raid0: bad disk number %d - " + "aborting!\n", j); goto abort; } if (zone->dev[j]) { - printk("raid0: multiple devices for %d - aborting!\n", - j); + printk(KERN_ERR "raid0: multiple devices for %d - " + "aborting!\n", j); goto abort; } zone->dev[j] = rdev1; @@ -149,8 +151,8 @@ static int create_strip_zones (mddev_t *mddev) cnt++; } if (cnt != mddev->raid_disks) { - printk("raid0: too few disks (%d of %d) - aborting!\n", - cnt, mddev->raid_disks); + printk(KERN_ERR "raid0: too few disks (%d of %d) - " + "aborting!\n", cnt, mddev->raid_disks); goto abort; } zone->nb_dev = cnt; @@ -166,7 +168,7 @@ static int create_strip_zones (mddev_t *mddev) zone = conf->strip_zone + i; zone->dev = conf->strip_zone[i-1].dev + mddev->raid_disks; - printk("raid0: zone %d\n", i); + printk(KERN_INFO "raid0: zone %d\n", i); zone->dev_start = current_start; smallest = NULL; c = 0; @@ -174,23 +176,25 @@ static int create_strip_zones (mddev_t *mddev) for (j=0; jstrip_zone[0].dev[j]; - printk("raid0: checking %s ...", bdevname(rdev->bdev,b)); + printk(KERN_INFO "raid0: checking %s ...", + bdevname(rdev->bdev, b)); if (rdev->size > current_start / 2) { - printk(" contained as device %d\n", c); + printk(KERN_INFO " contained as device %d\n", + c); zone->dev[c] = rdev; c++; if (!smallest || (rdev->size size)) { smallest = rdev; - printk(" (%llu) is smallest!.\n", + printk(KERN_INFO " (%llu) is smallest!.\n", (unsigned long long)rdev->size); } } else - printk(" nope.\n"); + printk(KERN_INFO " nope.\n"); } zone->nb_dev = c; zone->size = (smallest->size - current_start / 2) * c; - printk("raid0: zone->nb_dev: %d, size: %llu\n", + printk(KERN_INFO "raid0: zone->nb_dev: %d, size: %llu\n", zone->nb_dev, (unsigned long long)zone->size); zone->zone_start = curr_zone_start; @@ -226,7 +230,7 @@ static int create_strip_zones (mddev_t *mddev) mddev->queue->backing_dev_info.congested_fn = raid0_congested; mddev->queue->backing_dev_info.congested_data = mddev; - printk("raid0: done.\n"); + printk(KERN_INFO "raid0: done.\n"); return 0; abort: return 1; -- cgit v1.2.3 From 83838ed87898e0a8ff8dbf001e54e6c017f0a011 Mon Sep 17 00:00:00 2001 From: Andre Noll Date: Fri, 9 Jan 2009 08:31:07 +1100 Subject: md: raid0: Represent the size of strip zones in sectors. This completes the block -> sector conversion of struct strip_zone. Signed-off-by: Andre Noll Signed-off-by: NeilBrown --- drivers/md/raid0.c | 26 +++++++++++++------------- include/linux/raid/raid0.h | 2 +- 2 files changed, 14 insertions(+), 14 deletions(-) diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c index c759436b5d8..6e85e88bbae 100644 --- a/drivers/md/raid0.c +++ b/drivers/md/raid0.c @@ -156,11 +156,11 @@ static int create_strip_zones (mddev_t *mddev) goto abort; } zone->nb_dev = cnt; - zone->size = smallest->size * cnt; + zone->sectors = smallest->size * cnt * 2; zone->zone_start = 0; current_start = smallest->size * 2; - curr_zone_start = zone->size * 2; + curr_zone_start = zone->sectors; /* now do the other zones */ for (i = 1; i < conf->nr_strip_zones; i++) @@ -193,12 +193,12 @@ static int create_strip_zones (mddev_t *mddev) } zone->nb_dev = c; - zone->size = (smallest->size - current_start / 2) * c; - printk(KERN_INFO "raid0: zone->nb_dev: %d, size: %llu\n", - zone->nb_dev, (unsigned long long)zone->size); + zone->sectors = (smallest->size * 2 - current_start) * c; + printk(KERN_INFO "raid0: zone->nb_dev: %d, sectors: %llu\n", + zone->nb_dev, (unsigned long long)zone->sectors); zone->zone_start = curr_zone_start; - curr_zone_start += zone->size * 2; + curr_zone_start += zone->sectors; current_start = smallest->size * 2; printk(KERN_INFO "raid0: current zone start: %llu\n", @@ -220,7 +220,7 @@ static int create_strip_zones (mddev_t *mddev) sector_t sz = 0; for (j=i; jnr_strip_zones-1 && sz < min_spacing ; j++) - sz += conf->strip_zone[j].size; + sz += conf->strip_zone[j].sectors / 2; if (sz >= min_spacing && sz < conf->hash_spacing) conf->hash_spacing = sz; } @@ -325,13 +325,13 @@ static int raid0_run (mddev_t *mddev) conf->hash_table = kmalloc (sizeof (struct strip_zone *)*nb_zone, GFP_KERNEL); if (!conf->hash_table) goto out_free_conf; - size = conf->strip_zone[cur].size; + size = conf->strip_zone[cur].sectors / 2; conf->hash_table[0] = conf->strip_zone + cur; for (i=1; i< nb_zone; i++) { while (size <= conf->hash_spacing) { cur++; - size += conf->strip_zone[cur].size; + size += conf->strip_zone[cur].sectors / 2; } size -= conf->hash_spacing; conf->hash_table[i] = conf->strip_zone + cur; @@ -439,10 +439,10 @@ static int raid0_make_request (struct request_queue *q, struct bio *bio) sector_div(x, (u32)conf->hash_spacing); zone = conf->hash_table[x]; } - - while (sector / 2 >= (zone->zone_start / 2 + zone->size)) + + while (sector >= zone->zone_start + zone->sectors) zone++; - + sect_in_chunk = bio->bi_sector & (chunk_sects - 1); @@ -495,7 +495,7 @@ static void raid0_status (struct seq_file *seq, mddev_t *mddev) seq_printf(seq, "] zs=%d ds=%d s=%d\n", conf->strip_zone[j].zone_start, conf->strip_zone[j].dev_start, - conf->strip_zone[j].size); + conf->strip_zone[j].sectors); } #endif seq_printf(seq, " %dk chunks", mddev->chunk_size/1024); diff --git a/include/linux/raid/raid0.h b/include/linux/raid/raid0.h index eaf4f6ac55f..c12521d027e 100644 --- a/include/linux/raid/raid0.h +++ b/include/linux/raid/raid0.h @@ -7,7 +7,7 @@ struct strip_zone { sector_t zone_start; /* Zone offset in md_dev (in sectors) */ sector_t dev_start; /* Zone offset in real dev (in sectors) */ - sector_t size; /* Zone size */ + sector_t sectors; /* Zone size in sectors */ int nb_dev; /* # of devices attached to the zone */ mdk_rdev_t **dev; /* Devices attached to the zone */ }; -- cgit v1.2.3 From ccacc7d2cf03114a24ab903f710118e9e5d43273 Mon Sep 17 00:00:00 2001 From: Andre Noll Date: Fri, 9 Jan 2009 08:31:08 +1100 Subject: md: raid0: make hash_spacing and preshift sector-based. This patch renames the hash_spacing and preshift members of struct raid0_private_data to spacing and sector_shift respectively and changes the semantics as follows: We always have spacing = 2 * hash_spacing. In case sizeof(sector_t) > sizeof(u32) we also have sector_shift = preshift + 1 while sector_shift = preshift = 0 otherwise. Note that the values of nb_zone and zone are unaffected by these changes because in the sector_div() preceeding the assignement of these two variables both arguments double. Signed-off-by: Andre Noll Signed-off-by: NeilBrown --- drivers/md/raid0.c | 58 +++++++++++++++++++++++----------------------- include/linux/raid/raid0.h | 4 ++-- 2 files changed, 31 insertions(+), 31 deletions(-) diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c index 6e85e88bbae..90f5b24f6e6 100644 --- a/drivers/md/raid0.c +++ b/drivers/md/raid0.c @@ -213,16 +213,16 @@ static int create_strip_zones (mddev_t *mddev) * strip though as it's size has no bearing on the efficacy of the hash * table. */ - conf->hash_spacing = curr_zone_start / 2; - min_spacing = curr_zone_start / 2; + conf->spacing = curr_zone_start; + min_spacing = curr_zone_start; sector_div(min_spacing, PAGE_SIZE/sizeof(struct strip_zone*)); for (i=0; i < conf->nr_strip_zones-1; i++) { - sector_t sz = 0; - for (j=i; jnr_strip_zones-1 && - sz < min_spacing ; j++) - sz += conf->strip_zone[j].sectors / 2; - if (sz >= min_spacing && sz < conf->hash_spacing) - conf->hash_spacing = sz; + sector_t s = 0; + for (j = i; j < conf->nr_strip_zones - 1 && + s < min_spacing; j++) + s += conf->strip_zone[j].sectors; + if (s >= min_spacing && s < conf->spacing) + conf->spacing = s; } mddev->queue->unplug_fn = raid0_unplug; @@ -265,7 +265,7 @@ static int raid0_mergeable_bvec(struct request_queue *q, static int raid0_run (mddev_t *mddev) { unsigned cur=0, i=0, nb_zone; - s64 size; + s64 sectors; raid0_conf_t *conf; mdk_rdev_t *rdev; struct list_head *tmp; @@ -297,51 +297,51 @@ static int raid0_run (mddev_t *mddev) rdev_for_each(rdev, tmp, mddev) mddev->array_sectors += rdev->size * 2; - printk("raid0 : md_size is %llu blocks.\n", - (unsigned long long)mddev->array_sectors / 2); - printk("raid0 : conf->hash_spacing is %llu blocks.\n", - (unsigned long long)conf->hash_spacing); + printk(KERN_INFO "raid0 : md_size is %llu sectors.\n", + (unsigned long long)mddev->array_sectors); + printk(KERN_INFO "raid0 : conf->spacing is %llu sectors.\n", + (unsigned long long)conf->spacing); { - sector_t s = mddev->array_sectors / 2; - sector_t space = conf->hash_spacing; + sector_t s = mddev->array_sectors; + sector_t space = conf->spacing; int round; - conf->preshift = 0; + conf->sector_shift = 0; if (sizeof(sector_t) > sizeof(u32)) { /*shift down space and s so that sector_div will work */ while (space > (sector_t) (~(u32)0)) { s >>= 1; space >>= 1; s += 1; /* force round-up */ - conf->preshift++; + conf->sector_shift++; } } round = sector_div(s, (u32)space) ? 1 : 0; nb_zone = s + round; } - printk("raid0 : nb_zone is %d.\n", nb_zone); + printk(KERN_INFO "raid0 : nb_zone is %d.\n", nb_zone); - printk("raid0 : Allocating %Zd bytes for hash.\n", + printk(KERN_INFO "raid0 : Allocating %zu bytes for hash.\n", nb_zone*sizeof(struct strip_zone*)); conf->hash_table = kmalloc (sizeof (struct strip_zone *)*nb_zone, GFP_KERNEL); if (!conf->hash_table) goto out_free_conf; - size = conf->strip_zone[cur].sectors / 2; + sectors = conf->strip_zone[cur].sectors; conf->hash_table[0] = conf->strip_zone + cur; for (i=1; i< nb_zone; i++) { - while (size <= conf->hash_spacing) { + while (sectors <= conf->spacing) { cur++; - size += conf->strip_zone[cur].sectors / 2; + sectors += conf->strip_zone[cur].sectors; } - size -= conf->hash_spacing; + sectors -= conf->spacing; conf->hash_table[i] = conf->strip_zone + cur; } - if (conf->preshift) { - conf->hash_spacing >>= conf->preshift; - /* round hash_spacing up so when we divide by it, we + if (conf->sector_shift) { + conf->spacing >>= conf->sector_shift; + /* round spacing up so when we divide by it, we * err on the side of too-low, which is safest */ - conf->hash_spacing++; + conf->spacing++; } /* calculate the max read-ahead size. @@ -435,8 +435,8 @@ static int raid0_make_request (struct request_queue *q, struct bio *bio) { - sector_t x = sector >> (conf->preshift + 1); - sector_div(x, (u32)conf->hash_spacing); + sector_t x = sector >> conf->sector_shift; + sector_div(x, (u32)conf->spacing); zone = conf->hash_table[x]; } diff --git a/include/linux/raid/raid0.h b/include/linux/raid/raid0.h index c12521d027e..fd42aa87c39 100644 --- a/include/linux/raid/raid0.h +++ b/include/linux/raid/raid0.h @@ -19,8 +19,8 @@ struct raid0_private_data mdk_rdev_t **devlist; /* lists of rdevs, pointed to by strip_zone->dev */ int nr_strip_zones; - sector_t hash_spacing; - int preshift; /* shift this before divide by hash_spacing */ + sector_t spacing; + int sector_shift; /* shift this before divide by spacing */ }; typedef struct raid0_private_data raid0_conf_t; -- cgit v1.2.3 From 159ec1fc060ab22b157a62364045f5e98749c4d3 Mon Sep 17 00:00:00 2001 From: Cheng Renquan Date: Fri, 9 Jan 2009 08:31:08 +1100 Subject: md: use list_for_each_entry macro directly The rdev_for_each macro defined in is identical to list_for_each_entry_safe, from , it should be defined to use list_for_each_entry_safe, instead of reinventing the wheel. But some calls to each_entry_safe don't really need a safe version, just a direct list_for_each_entry is enough, this could save a temp variable (tmp) in every function that used rdev_for_each. In this patch, most rdev_for_each loops are replaced by list_for_each_entry, totally save many tmp vars; and only in the other situations that will call list_del to delete an entry, the safe version is used. Signed-off-by: Cheng Renquan Signed-off-by: NeilBrown --- drivers/md/bitmap.c | 3 +- drivers/md/faulty.c | 3 +- drivers/md/linear.c | 3 +- drivers/md/md.c | 107 +++++++++++++++++++--------------------------- drivers/md/multipath.c | 3 +- drivers/md/raid0.c | 10 ++--- drivers/md/raid1.c | 3 +- drivers/md/raid10.c | 3 +- drivers/md/raid5.c | 8 ++-- include/linux/raid/md_k.h | 11 ++--- 10 files changed, 60 insertions(+), 94 deletions(-) diff --git a/drivers/md/bitmap.c b/drivers/md/bitmap.c index 666b7ba47ec..71994376339 100644 --- a/drivers/md/bitmap.c +++ b/drivers/md/bitmap.c @@ -215,7 +215,6 @@ static struct page *read_sb_page(mddev_t *mddev, long offset, /* choose a good rdev and read the page from there */ mdk_rdev_t *rdev; - struct list_head *tmp; sector_t target; if (!page) @@ -223,7 +222,7 @@ static struct page *read_sb_page(mddev_t *mddev, long offset, if (!page) return ERR_PTR(-ENOMEM); - rdev_for_each(rdev, tmp, mddev) { + list_for_each_entry(rdev, &mddev->disks, same_set) { if (! test_bit(In_sync, &rdev->flags) || test_bit(Faulty, &rdev->flags)) continue; diff --git a/drivers/md/faulty.c b/drivers/md/faulty.c index f26c1f9a475..86d9adf90e7 100644 --- a/drivers/md/faulty.c +++ b/drivers/md/faulty.c @@ -283,7 +283,6 @@ static int reconfig(mddev_t *mddev, int layout, int chunk_size) static int run(mddev_t *mddev) { mdk_rdev_t *rdev; - struct list_head *tmp; int i; conf_t *conf = kmalloc(sizeof(*conf), GFP_KERNEL); @@ -296,7 +295,7 @@ static int run(mddev_t *mddev) } conf->nfaults = 0; - rdev_for_each(rdev, tmp, mddev) + list_for_each_entry(rdev, &mddev->disks, same_set) conf->rdev = rdev; mddev->array_sectors = mddev->size * 2; diff --git a/drivers/md/linear.c b/drivers/md/linear.c index 3b90c5c924e..1e3aea9eecf 100644 --- a/drivers/md/linear.c +++ b/drivers/md/linear.c @@ -105,7 +105,6 @@ static linear_conf_t *linear_conf(mddev_t *mddev, int raid_disks) int i, nb_zone, cnt; sector_t min_sectors; sector_t curr_sector; - struct list_head *tmp; conf = kzalloc (sizeof (*conf) + raid_disks*sizeof(dev_info_t), GFP_KERNEL); @@ -115,7 +114,7 @@ static linear_conf_t *linear_conf(mddev_t *mddev, int raid_disks) cnt = 0; conf->array_sectors = 0; - rdev_for_each(rdev, tmp, mddev) { + list_for_each_entry(rdev, &mddev->disks, same_set) { int j = rdev->raid_disk; dev_info_t *disk = conf->disks + j; diff --git a/drivers/md/md.c b/drivers/md/md.c index ee759b193e9..58be665bb46 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -307,25 +307,23 @@ static inline void mddev_unlock(mddev_t * mddev) static mdk_rdev_t * find_rdev_nr(mddev_t *mddev, int nr) { - mdk_rdev_t * rdev; - struct list_head *tmp; + mdk_rdev_t *rdev; - rdev_for_each(rdev, tmp, mddev) { + list_for_each_entry(rdev, &mddev->disks, same_set) if (rdev->desc_nr == nr) return rdev; - } + return NULL; } static mdk_rdev_t * find_rdev(mddev_t * mddev, dev_t dev) { - struct list_head *tmp; mdk_rdev_t *rdev; - rdev_for_each(rdev, tmp, mddev) { + list_for_each_entry(rdev, &mddev->disks, same_set) if (rdev->bdev->bd_dev == dev) return rdev; - } + return NULL; } @@ -861,7 +859,6 @@ static int super_90_validate(mddev_t *mddev, mdk_rdev_t *rdev) static void super_90_sync(mddev_t *mddev, mdk_rdev_t *rdev) { mdp_super_t *sb; - struct list_head *tmp; mdk_rdev_t *rdev2; int next_spare = mddev->raid_disks; @@ -933,7 +930,7 @@ static void super_90_sync(mddev_t *mddev, mdk_rdev_t *rdev) sb->state |= (1<disks[0].state = (1<disks, same_set) { mdp_disk_t *d; int desc_nr; if (rdev2->raid_disk >= 0 && test_bit(In_sync, &rdev2->flags) @@ -1259,7 +1256,6 @@ static int super_1_validate(mddev_t *mddev, mdk_rdev_t *rdev) static void super_1_sync(mddev_t *mddev, mdk_rdev_t *rdev) { struct mdp_superblock_1 *sb; - struct list_head *tmp; mdk_rdev_t *rdev2; int max_dev, i; /* make rdev->sb match mddev and rdev data. */ @@ -1307,7 +1303,7 @@ static void super_1_sync(mddev_t *mddev, mdk_rdev_t *rdev) } max_dev = 0; - rdev_for_each(rdev2, tmp, mddev) + list_for_each_entry(rdev2, &mddev->disks, same_set) if (rdev2->desc_nr+1 > max_dev) max_dev = rdev2->desc_nr+1; @@ -1316,7 +1312,7 @@ static void super_1_sync(mddev_t *mddev, mdk_rdev_t *rdev) for (i=0; idev_roles[i] = cpu_to_le16(0xfffe); - rdev_for_each(rdev2, tmp, mddev) { + list_for_each_entry(rdev2, &mddev->disks, same_set) { i = rdev2->desc_nr; if (test_bit(Faulty, &rdev2->flags)) sb->dev_roles[i] = cpu_to_le16(0xfffe); @@ -1571,8 +1567,7 @@ static void kick_rdev_from_array(mdk_rdev_t * rdev) static void export_array(mddev_t *mddev) { - struct list_head *tmp; - mdk_rdev_t *rdev; + mdk_rdev_t *rdev, *tmp; rdev_for_each(rdev, tmp, mddev) { if (!rdev->mddev) { @@ -1643,7 +1638,7 @@ static void print_rdev(mdk_rdev_t *rdev) static void md_print_devices(void) { - struct list_head *tmp, *tmp2; + struct list_head *tmp; mdk_rdev_t *rdev; mddev_t *mddev; char b[BDEVNAME_SIZE]; @@ -1658,11 +1653,11 @@ static void md_print_devices(void) bitmap_print_sb(mddev->bitmap); else printk("%s: ", mdname(mddev)); - rdev_for_each(rdev, tmp2, mddev) + list_for_each_entry(rdev, &mddev->disks, same_set) printk("<%s>", bdevname(rdev->bdev,b)); printk("\n"); - rdev_for_each(rdev, tmp2, mddev) + list_for_each_entry(rdev, &mddev->disks, same_set) print_rdev(rdev); } printk("md: **********************************\n"); @@ -1679,9 +1674,8 @@ static void sync_sbs(mddev_t * mddev, int nospares) * with the rest of the array) */ mdk_rdev_t *rdev; - struct list_head *tmp; - rdev_for_each(rdev, tmp, mddev) { + list_for_each_entry(rdev, &mddev->disks, same_set) { if (rdev->sb_events == mddev->events || (nospares && rdev->raid_disk < 0 && @@ -1699,7 +1693,6 @@ static void sync_sbs(mddev_t * mddev, int nospares) static void md_update_sb(mddev_t * mddev, int force_change) { - struct list_head *tmp; mdk_rdev_t *rdev; int sync_req; int nospares = 0; @@ -1790,7 +1783,7 @@ repeat: mdname(mddev),mddev->in_sync); bitmap_update_sb(mddev->bitmap); - rdev_for_each(rdev, tmp, mddev) { + list_for_each_entry(rdev, &mddev->disks, same_set) { char b[BDEVNAME_SIZE]; dprintk(KERN_INFO "md: "); if (rdev->sb_loaded != 1) @@ -1999,7 +1992,6 @@ slot_store(mdk_rdev_t *rdev, const char *buf, size_t len) md_wakeup_thread(rdev->mddev->thread); } else if (rdev->mddev->pers) { mdk_rdev_t *rdev2; - struct list_head *tmp; /* Activating a spare .. or possibly reactivating * if we every get bitmaps working here. */ @@ -2010,7 +2002,7 @@ slot_store(mdk_rdev_t *rdev, const char *buf, size_t len) if (rdev->mddev->pers->hot_add_disk == NULL) return -EINVAL; - rdev_for_each(rdev2, tmp, rdev->mddev) + list_for_each_entry(rdev2, &rdev->mddev->disks, same_set) if (rdev2->raid_disk == slot) return -EEXIST; @@ -2125,14 +2117,14 @@ rdev_size_store(mdk_rdev_t *rdev, const char *buf, size_t len) */ mddev_t *mddev; int overlap = 0; - struct list_head *tmp, *tmp2; + struct list_head *tmp; mddev_unlock(my_mddev); for_each_mddev(mddev, tmp) { mdk_rdev_t *rdev2; mddev_lock(mddev); - rdev_for_each(rdev2, tmp2, mddev) + list_for_each_entry(rdev2, &mddev->disks, same_set) if (test_bit(AllReserved, &rdev2->flags) || (rdev->bdev == rdev2->bdev && rdev != rdev2 && @@ -2328,8 +2320,7 @@ abort_free: static void analyze_sbs(mddev_t * mddev) { int i; - struct list_head *tmp; - mdk_rdev_t *rdev, *freshest; + mdk_rdev_t *rdev, *freshest, *tmp; char b[BDEVNAME_SIZE]; freshest = NULL; @@ -3501,7 +3492,6 @@ static int do_md_run(mddev_t * mddev) { int err; int chunk_size; - struct list_head *tmp; mdk_rdev_t *rdev; struct gendisk *disk; struct mdk_personality *pers; @@ -3540,7 +3530,7 @@ static int do_md_run(mddev_t * mddev) } /* devices must have minimum size of one chunk */ - rdev_for_each(rdev, tmp, mddev) { + list_for_each_entry(rdev, &mddev->disks, same_set) { if (test_bit(Faulty, &rdev->flags)) continue; if (rdev->size < chunk_size / 1024) { @@ -3565,7 +3555,7 @@ static int do_md_run(mddev_t * mddev) * the only valid external interface is through the md * device. */ - rdev_for_each(rdev, tmp, mddev) { + list_for_each_entry(rdev, &mddev->disks, same_set) { if (test_bit(Faulty, &rdev->flags)) continue; sync_blockdev(rdev->bdev); @@ -3630,10 +3620,10 @@ static int do_md_run(mddev_t * mddev) */ char b[BDEVNAME_SIZE], b2[BDEVNAME_SIZE]; mdk_rdev_t *rdev2; - struct list_head *tmp2; int warned = 0; - rdev_for_each(rdev, tmp, mddev) { - rdev_for_each(rdev2, tmp2, mddev) { + + list_for_each_entry(rdev, &mddev->disks, same_set) + list_for_each_entry(rdev2, &mddev->disks, same_set) { if (rdev < rdev2 && rdev->bdev->bd_contains == rdev2->bdev->bd_contains) { @@ -3647,7 +3637,7 @@ static int do_md_run(mddev_t * mddev) warned = 1; } } - } + if (warned) printk(KERN_WARNING "True protection against single-disk" @@ -3695,7 +3685,7 @@ static int do_md_run(mddev_t * mddev) mddev->safemode_delay = (200 * HZ)/1000 +1; /* 200 msec delay */ mddev->in_sync = 1; - rdev_for_each(rdev, tmp, mddev) + list_for_each_entry(rdev, &mddev->disks, same_set) if (rdev->raid_disk >= 0) { char nm[20]; sprintf(nm, "rd%d", rdev->raid_disk); @@ -3726,9 +3716,8 @@ static int do_md_run(mddev_t * mddev) * it will remove the drives and not do the right thing */ if (mddev->degraded && !mddev->sync_thread) { - struct list_head *rtmp; int spares = 0; - rdev_for_each(rdev, rtmp, mddev) + list_for_each_entry(rdev, &mddev->disks, same_set) if (rdev->raid_disk >= 0 && !test_bit(In_sync, &rdev->flags) && !test_bit(Faulty, &rdev->flags)) @@ -3888,7 +3877,6 @@ static int do_md_stop(mddev_t * mddev, int mode, int is_open) */ if (mode == 0) { mdk_rdev_t *rdev; - struct list_head *tmp; printk(KERN_INFO "md: %s stopped.\n", mdname(mddev)); @@ -3900,7 +3888,7 @@ static int do_md_stop(mddev_t * mddev, int mode, int is_open) } mddev->bitmap_offset = 0; - rdev_for_each(rdev, tmp, mddev) + list_for_each_entry(rdev, &mddev->disks, same_set) if (rdev->raid_disk >= 0) { char nm[20]; sprintf(nm, "rd%d", rdev->raid_disk); @@ -3961,7 +3949,6 @@ out: static void autorun_array(mddev_t *mddev) { mdk_rdev_t *rdev; - struct list_head *tmp; int err; if (list_empty(&mddev->disks)) @@ -3969,7 +3956,7 @@ static void autorun_array(mddev_t *mddev) printk(KERN_INFO "md: running: "); - rdev_for_each(rdev, tmp, mddev) { + list_for_each_entry(rdev, &mddev->disks, same_set) { char b[BDEVNAME_SIZE]; printk("<%s>", bdevname(rdev->bdev,b)); } @@ -3996,8 +3983,7 @@ static void autorun_array(mddev_t *mddev) */ static void autorun_devices(int part) { - struct list_head *tmp; - mdk_rdev_t *rdev0, *rdev; + mdk_rdev_t *rdev0, *rdev, *tmp; mddev_t *mddev; char b[BDEVNAME_SIZE]; @@ -4012,7 +3998,7 @@ static void autorun_devices(int part) printk(KERN_INFO "md: considering %s ...\n", bdevname(rdev0->bdev,b)); INIT_LIST_HEAD(&candidates); - rdev_for_each_list(rdev, tmp, pending_raid_disks) + rdev_for_each_list(rdev, tmp, &pending_raid_disks) if (super_90_load(rdev, rdev0, 0) >= 0) { printk(KERN_INFO "md: adding %s ...\n", bdevname(rdev->bdev,b)); @@ -4058,7 +4044,7 @@ static void autorun_devices(int part) } else { printk(KERN_INFO "md: created %s\n", mdname(mddev)); mddev->persistent = 1; - rdev_for_each_list(rdev, tmp, candidates) { + rdev_for_each_list(rdev, tmp, &candidates) { list_del_init(&rdev->same_set); if (bind_rdev_to_array(rdev, mddev)) export_rdev(rdev); @@ -4069,7 +4055,7 @@ static void autorun_devices(int part) /* on success, candidates will be empty, on error * it won't... */ - rdev_for_each_list(rdev, tmp, candidates) { + rdev_for_each_list(rdev, tmp, &candidates) { list_del_init(&rdev->same_set); export_rdev(rdev); } @@ -4098,10 +4084,9 @@ static int get_array_info(mddev_t * mddev, void __user * arg) mdu_array_info_t info; int nr,working,active,failed,spare; mdk_rdev_t *rdev; - struct list_head *tmp; nr=working=active=failed=spare=0; - rdev_for_each(rdev, tmp, mddev) { + list_for_each_entry(rdev, &mddev->disks, same_set) { nr++; if (test_bit(Faulty, &rdev->flags)) failed++; @@ -4619,9 +4604,8 @@ static int set_array_info(mddev_t * mddev, mdu_array_info_t *info) static int update_size(mddev_t *mddev, sector_t num_sectors) { - mdk_rdev_t * rdev; + mdk_rdev_t *rdev; int rv; - struct list_head *tmp; int fit = (num_sectors == 0); if (mddev->pers->resize == NULL) @@ -4643,7 +4627,7 @@ static int update_size(mddev_t *mddev, sector_t num_sectors) * grow, and re-add. */ return -EBUSY; - rdev_for_each(rdev, tmp, mddev) { + list_for_each_entry(rdev, &mddev->disks, same_set) { sector_t avail; avail = rdev->size * 2; @@ -5192,11 +5176,10 @@ static void status_unused(struct seq_file *seq) { int i = 0; mdk_rdev_t *rdev; - struct list_head *tmp; seq_printf(seq, "unused devices: "); - rdev_for_each_list(rdev, tmp, pending_raid_disks) { + list_for_each_entry(rdev, &pending_raid_disks, same_set) { char b[BDEVNAME_SIZE]; i++; seq_printf(seq, "%s ", @@ -5355,7 +5338,6 @@ static int md_seq_show(struct seq_file *seq, void *v) { mddev_t *mddev = v; sector_t size; - struct list_head *tmp2; mdk_rdev_t *rdev; struct mdstat_info *mi = seq->private; struct bitmap *bitmap; @@ -5392,7 +5374,7 @@ static int md_seq_show(struct seq_file *seq, void *v) } size = 0; - rdev_for_each(rdev, tmp2, mddev) { + list_for_each_entry(rdev, &mddev->disks, same_set) { char b[BDEVNAME_SIZE]; seq_printf(seq, " %s[%d]", bdevname(rdev->bdev,b), rdev->desc_nr); @@ -5699,7 +5681,6 @@ void md_do_sync(mddev_t *mddev) struct list_head *tmp; sector_t last_check; int skipped = 0; - struct list_head *rtmp; mdk_rdev_t *rdev; char *desc; @@ -5804,7 +5785,7 @@ void md_do_sync(mddev_t *mddev) /* recovery follows the physical size of devices */ max_sectors = mddev->size << 1; j = MaxSector; - rdev_for_each(rdev, rtmp, mddev) + list_for_each_entry(rdev, &mddev->disks, same_set) if (rdev->raid_disk >= 0 && !test_bit(Faulty, &rdev->flags) && !test_bit(In_sync, &rdev->flags) && @@ -5954,7 +5935,7 @@ void md_do_sync(mddev_t *mddev) } else { if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery)) mddev->curr_resync = MaxSector; - rdev_for_each(rdev, rtmp, mddev) + list_for_each_entry(rdev, &mddev->disks, same_set) if (rdev->raid_disk >= 0 && !test_bit(Faulty, &rdev->flags) && !test_bit(In_sync, &rdev->flags) && @@ -5990,10 +5971,9 @@ EXPORT_SYMBOL_GPL(md_do_sync); static int remove_and_add_spares(mddev_t *mddev) { mdk_rdev_t *rdev; - struct list_head *rtmp; int spares = 0; - rdev_for_each(rdev, rtmp, mddev) + list_for_each_entry(rdev, &mddev->disks, same_set) if (rdev->raid_disk >= 0 && !test_bit(Blocked, &rdev->flags) && (test_bit(Faulty, &rdev->flags) || @@ -6009,7 +5989,7 @@ static int remove_and_add_spares(mddev_t *mddev) } if (mddev->degraded && ! mddev->ro) { - rdev_for_each(rdev, rtmp, mddev) { + list_for_each_entry(rdev, &mddev->disks, same_set) { if (rdev->raid_disk >= 0 && !test_bit(In_sync, &rdev->flags) && !test_bit(Blocked, &rdev->flags)) @@ -6061,7 +6041,6 @@ static int remove_and_add_spares(mddev_t *mddev) void md_check_recovery(mddev_t *mddev) { mdk_rdev_t *rdev; - struct list_head *rtmp; if (mddev->bitmap) @@ -6125,7 +6104,7 @@ void md_check_recovery(mddev_t *mddev) if (mddev->flags) md_update_sb(mddev, 0); - rdev_for_each(rdev, rtmp, mddev) + list_for_each_entry(rdev, &mddev->disks, same_set) if (test_and_clear_bit(StateChanged, &rdev->flags)) sysfs_notify_dirent(rdev->sysfs_state); @@ -6154,7 +6133,7 @@ void md_check_recovery(mddev_t *mddev) * information must be scrapped */ if (!mddev->degraded) - rdev_for_each(rdev, rtmp, mddev) + list_for_each_entry(rdev, &mddev->disks, same_set) rdev->saved_raid_disk = -1; mddev->recovery = 0; diff --git a/drivers/md/multipath.c b/drivers/md/multipath.c index d4ac47d1127..f6d08f24167 100644 --- a/drivers/md/multipath.c +++ b/drivers/md/multipath.c @@ -408,7 +408,6 @@ static int multipath_run (mddev_t *mddev) int disk_idx; struct multipath_info *disk; mdk_rdev_t *rdev; - struct list_head *tmp; if (mddev->level != LEVEL_MULTIPATH) { printk("multipath: %s: raid level not set to multipath IO (%d)\n", @@ -441,7 +440,7 @@ static int multipath_run (mddev_t *mddev) } conf->working_disks = 0; - rdev_for_each(rdev, tmp, mddev) { + list_for_each_entry(rdev, &mddev->disks, same_set) { disk_idx = rdev->raid_disk; if (disk_idx < 0 || disk_idx >= mddev->raid_disks) diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c index 90f5b24f6e6..c605ba80558 100644 --- a/drivers/md/raid0.c +++ b/drivers/md/raid0.c @@ -57,7 +57,6 @@ static int create_strip_zones (mddev_t *mddev) sector_t min_spacing; raid0_conf_t *conf = mddev_to_conf(mddev); mdk_rdev_t *smallest, *rdev1, *rdev2, *rdev; - struct list_head *tmp1, *tmp2; struct strip_zone *zone; int cnt; char b[BDEVNAME_SIZE]; @@ -67,11 +66,11 @@ static int create_strip_zones (mddev_t *mddev) */ conf->nr_strip_zones = 0; - rdev_for_each(rdev1, tmp1, mddev) { + list_for_each_entry(rdev1, &mddev->disks, same_set) { printk(KERN_INFO "raid0: looking at %s\n", bdevname(rdev1->bdev,b)); c = 0; - rdev_for_each(rdev2, tmp2, mddev) { + list_for_each_entry(rdev2, &mddev->disks, same_set) { printk(KERN_INFO "raid0: comparing %s(%llu)", bdevname(rdev1->bdev,b), (unsigned long long)rdev1->size); @@ -120,7 +119,7 @@ static int create_strip_zones (mddev_t *mddev) cnt = 0; smallest = NULL; zone->dev = conf->devlist; - rdev_for_each(rdev1, tmp1, mddev) { + list_for_each_entry(rdev1, &mddev->disks, same_set) { int j = rdev1->raid_disk; if (j < 0 || j >= mddev->raid_disks) { @@ -268,7 +267,6 @@ static int raid0_run (mddev_t *mddev) s64 sectors; raid0_conf_t *conf; mdk_rdev_t *rdev; - struct list_head *tmp; if (mddev->chunk_size == 0) { printk(KERN_ERR "md/raid0: non-zero chunk size required.\n"); @@ -294,7 +292,7 @@ static int raid0_run (mddev_t *mddev) /* calculate array device size */ mddev->array_sectors = 0; - rdev_for_each(rdev, tmp, mddev) + list_for_each_entry(rdev, &mddev->disks, same_set) mddev->array_sectors += rdev->size * 2; printk(KERN_INFO "raid0 : md_size is %llu sectors.\n", diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index 9c788e2489b..c165b1eed8b 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -1919,7 +1919,6 @@ static int run(mddev_t *mddev) int i, j, disk_idx; mirror_info_t *disk; mdk_rdev_t *rdev; - struct list_head *tmp; if (mddev->level != 1) { printk("raid1: %s: raid level not set to mirroring (%d)\n", @@ -1964,7 +1963,7 @@ static int run(mddev_t *mddev) spin_lock_init(&conf->device_lock); mddev->queue->queue_lock = &conf->device_lock; - rdev_for_each(rdev, tmp, mddev) { + list_for_each_entry(rdev, &mddev->disks, same_set) { disk_idx = rdev->raid_disk; if (disk_idx >= mddev->raid_disks || disk_idx < 0) diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index 970a96ef9b1..6736d6dff98 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -2025,7 +2025,6 @@ static int run(mddev_t *mddev) int i, disk_idx; mirror_info_t *disk; mdk_rdev_t *rdev; - struct list_head *tmp; int nc, fc, fo; sector_t stride, size; @@ -2108,7 +2107,7 @@ static int run(mddev_t *mddev) spin_lock_init(&conf->device_lock); mddev->queue->queue_lock = &conf->device_lock; - rdev_for_each(rdev, tmp, mddev) { + list_for_each_entry(rdev, &mddev->disks, same_set) { disk_idx = rdev->raid_disk; if (disk_idx >= mddev->raid_disks || disk_idx < 0) diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index a36a7435edf..a5ba080d303 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -3998,7 +3998,6 @@ static int run(mddev_t *mddev) int raid_disk, memory; mdk_rdev_t *rdev; struct disk_info *disk; - struct list_head *tmp; int working_disks = 0; if (mddev->level != 5 && mddev->level != 4 && mddev->level != 6) { @@ -4108,7 +4107,7 @@ static int run(mddev_t *mddev) pr_debug("raid5: run(%s) called.\n", mdname(mddev)); - rdev_for_each(rdev, tmp, mddev) { + list_for_each_entry(rdev, &mddev->disks, same_set) { raid_disk = rdev->raid_disk; if (raid_disk >= conf->raid_disks || raid_disk < 0) @@ -4533,7 +4532,6 @@ static int raid5_start_reshape(mddev_t *mddev) { raid5_conf_t *conf = mddev_to_conf(mddev); mdk_rdev_t *rdev; - struct list_head *rtmp; int spares = 0; int added_devices = 0; unsigned long flags; @@ -4541,7 +4539,7 @@ static int raid5_start_reshape(mddev_t *mddev) if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) return -EBUSY; - rdev_for_each(rdev, rtmp, mddev) + list_for_each_entry(rdev, &mddev->disks, same_set) if (rdev->raid_disk < 0 && !test_bit(Faulty, &rdev->flags)) spares++; @@ -4563,7 +4561,7 @@ static int raid5_start_reshape(mddev_t *mddev) /* Add some new drives, as many as will fit. * We know there are enough to make the newly sized array work. */ - rdev_for_each(rdev, rtmp, mddev) + list_for_each_entry(rdev, &mddev->disks, same_set) if (rdev->raid_disk < 0 && !test_bit(Faulty, &rdev->flags)) { if (raid5_add_disk(mddev, rdev) == 0) { diff --git a/include/linux/raid/md_k.h b/include/linux/raid/md_k.h index 663803eaf0d..8f9a54c1fb0 100644 --- a/include/linux/raid/md_k.h +++ b/include/linux/raid/md_k.h @@ -335,17 +335,14 @@ static inline char * mdname (mddev_t * mddev) * iterates through some rdev ringlist. It's safe to remove the * current 'rdev'. Dont touch 'tmp' though. */ -#define rdev_for_each_list(rdev, tmp, list) \ - \ - for ((tmp) = (list).next; \ - (rdev) = (list_entry((tmp), mdk_rdev_t, same_set)), \ - (tmp) = (tmp)->next, (tmp)->prev != &(list) \ - ; ) +#define rdev_for_each_list(rdev, tmp, head) \ + list_for_each_entry_safe(rdev, tmp, head, same_set) + /* * iterates through the 'same array disks' ringlist */ #define rdev_for_each(rdev, tmp, mddev) \ - rdev_for_each_list(rdev, tmp, (mddev)->disks) + list_for_each_entry_safe(rdev, tmp, &((mddev)->disks), same_set) #define rdev_for_each_rcu(rdev, mddev) \ list_for_each_entry_rcu(rdev, &((mddev)->disks), same_set) -- cgit v1.2.3 From cd2ac9321c26dc7a76455cd2a4df89123fa2b73e Mon Sep 17 00:00:00 2001 From: Cheng Renquan Date: Fri, 9 Jan 2009 08:31:08 +1100 Subject: md: need another print_sb for mdp_superblock_1 md_print_devices is called in two code path: MD_BUG(...), and md_ioctl with PRINT_RAID_DEBUG. it will dump out all in use md devices information; However, it wrongly processed two types of superblock in one: The header file has defined two types of superblock, struct mdp_superblock_s (typedefed with mdp_super_t) according to md with metadata 0.90, and struct mdp_superblock_1 according to md with metadata 1.0 and later, These two types of superblock are very different, The md_print_devices code processed them both in mdp_super_t, that would lead to wrong informaton dump like: [ 6742.345877] [ 6742.345887] md: ********************************** [ 6742.345890] md: * * [ 6742.345892] md: ********************************** [ 6742.345896] md1: [ 6742.345907] md: rdev ram7, SZ:00065472 F:0 S:1 DN:3 [ 6742.345909] md: rdev superblock: [ 6742.345914] md: SB: (V:0.90.0) ID:<42ef13c7.598c059a.5f9f1645.801e9ee6> CT:4919856d [ 6742.345918] md: L5 S00065472 ND:4 RD:4 md1 LO:2 CS:65536 [ 6742.345922] md: UT:4919856d ST:1 AD:4 WD:4 FD:0 SD:0 CSUM:b7992907 E:00000001 [ 6742.345924] D 0: DISK [ 6742.345930] D 1: DISK [ 6742.345933] D 2: DISK [ 6742.345937] D 3: DISK [ 6742.345942] md: THIS: DISK ... [ 6742.346058] md0: [ 6742.346067] md: rdev ram3, SZ:00065472 F:0 S:1 DN:3 [ 6742.346070] md: rdev superblock: [ 6742.346073] md: SB: (V:1.0.0) ID:<369aad81.00000000.00000000.00000000> CT:9a322a9c [ 6742.346077] md: L-1507699579 S976570180 ND:48 RD:0 md0 LO:65536 CS:196610 [ 6742.346081] md: UT:00000018 ST:0 AD:131048 WD:0 FD:8 SD:0 CSUM:00000000 E:00000000 [ 6742.346084] D 0: DISK [ 6742.346089] D 1: DISK [ 6742.346092] D 2: DISK [ 6742.346096] D 3: DISK [ 6742.346102] md: THIS: DISK ... [ 6742.346219] md: ********************************** [ 6742.346221] Here md1 is metadata 0.90.0, and md0 is metadata 1.2 After some more code to distinguish these two types of superblock, in this patch, it will generate dump information like: [ 7906.755790] [ 7906.755799] md: ********************************** [ 7906.755802] md: * * [ 7906.755804] md: ********************************** [ 7906.755808] md1: [ 7906.755819] md: rdev ram7, SZ:00065472 F:0 S:1 DN:3 [ 7906.755821] md: rdev superblock (MJ:0): [ 7906.755826] md: SB: (V:0.90.0) ID:<3fca7a0d.a612bfed.5f9f1645.801e9ee6> CT:491989f3 [ 7906.755830] md: L5 S00065472 ND:4 RD:4 md1 LO:2 CS:65536 [ 7906.755834] md: UT:491989f3 ST:1 AD:4 WD:4 FD:0 SD:0 CSUM:00fb52ad E:00000001 [ 7906.755836] D 0: DISK [ 7906.755842] D 1: DISK [ 7906.755845] D 2: DISK [ 7906.755849] D 3: DISK [ 7906.755855] md: THIS: DISK ... [ 7906.755972] md0: [ 7906.755981] md: rdev ram3, SZ:00065472 F:0 S:1 DN:3 [ 7906.755984] md: rdev superblock (MJ:1): [ 7906.755989] md: SB: (V:1) (F:0) Array-ID:<5fbcf158:55aa:5fbe:9a79:1e939880dcbd> [ 7906.755990] md: Name: "DG5:0" CT:1226410480 [ 7906.755998] md: L5 SZ130944 RD:4 LO:2 CS:128 DO:24 DS:131048 SO:8 RO:0 [ 7906.755999] md: Dev:00000003 UUID: 9194d744:87f7:a448:85f2:7497b84ce30a [ 7906.756001] md: (F:0) UT:1226410480 Events:0 ResyncOffset:-1 CSUM:0dbcd829 [ 7906.756003] md: (MaxDev:384) ... [ 7906.756113] md: ********************************** [ 7906.756116] this md0 (metadata 1.2) information dumping is exactly according to struct mdp_superblock_1. Signed-off-by: Cheng Renquan Cc: Neil Brown Cc: Dan Williams Signed-off-by: Andrew Morton Signed-off-by: NeilBrown --- drivers/md/md.c | 66 ++++++++++++++++++++++++++++++++++++++++++----- include/linux/raid/md_p.h | 2 ++ 2 files changed, 62 insertions(+), 6 deletions(-) diff --git a/drivers/md/md.c b/drivers/md/md.c index 58be665bb46..1f770c16d43 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -1588,7 +1588,7 @@ static void print_desc(mdp_disk_t *desc) desc->major,desc->minor,desc->raid_disk,desc->state); } -static void print_sb(mdp_super_t *sb) +static void print_sb_90(mdp_super_t *sb) { int i; @@ -1619,10 +1619,57 @@ static void print_sb(mdp_super_t *sb) } printk(KERN_INFO "md: THIS: "); print_desc(&sb->this_disk); - } -static void print_rdev(mdk_rdev_t *rdev) +static void print_sb_1(struct mdp_superblock_1 *sb) +{ + __u8 *uuid; + + uuid = sb->set_uuid; + printk(KERN_INFO "md: SB: (V:%u) (F:0x%08x) Array-ID:<%02x%02x%02x%02x" + ":%02x%02x:%02x%02x:%02x%02x:%02x%02x%02x%02x%02x%02x>\n" + KERN_INFO "md: Name: \"%s\" CT:%llu\n", + le32_to_cpu(sb->major_version), + le32_to_cpu(sb->feature_map), + uuid[0], uuid[1], uuid[2], uuid[3], + uuid[4], uuid[5], uuid[6], uuid[7], + uuid[8], uuid[9], uuid[10], uuid[11], + uuid[12], uuid[13], uuid[14], uuid[15], + sb->set_name, + (unsigned long long)le64_to_cpu(sb->ctime) + & MD_SUPERBLOCK_1_TIME_SEC_MASK); + + uuid = sb->device_uuid; + printk(KERN_INFO "md: L%u SZ%llu RD:%u LO:%u CS:%u DO:%llu DS:%llu SO:%llu" + " RO:%llu\n" + KERN_INFO "md: Dev:%08x UUID: %02x%02x%02x%02x:%02x%02x:%02x%02x:%02x%02x" + ":%02x%02x%02x%02x%02x%02x\n" + KERN_INFO "md: (F:0x%08x) UT:%llu Events:%llu ResyncOffset:%llu CSUM:0x%08x\n" + KERN_INFO "md: (MaxDev:%u) \n", + le32_to_cpu(sb->level), + (unsigned long long)le64_to_cpu(sb->size), + le32_to_cpu(sb->raid_disks), + le32_to_cpu(sb->layout), + le32_to_cpu(sb->chunksize), + (unsigned long long)le64_to_cpu(sb->data_offset), + (unsigned long long)le64_to_cpu(sb->data_size), + (unsigned long long)le64_to_cpu(sb->super_offset), + (unsigned long long)le64_to_cpu(sb->recovery_offset), + le32_to_cpu(sb->dev_number), + uuid[0], uuid[1], uuid[2], uuid[3], + uuid[4], uuid[5], uuid[6], uuid[7], + uuid[8], uuid[9], uuid[10], uuid[11], + uuid[12], uuid[13], uuid[14], uuid[15], + sb->devflags, + (unsigned long long)le64_to_cpu(sb->utime) & MD_SUPERBLOCK_1_TIME_SEC_MASK, + (unsigned long long)le64_to_cpu(sb->events), + (unsigned long long)le64_to_cpu(sb->resync_offset), + le32_to_cpu(sb->sb_csum), + le32_to_cpu(sb->max_dev) + ); +} + +static void print_rdev(mdk_rdev_t *rdev, int major_version) { char b[BDEVNAME_SIZE]; printk(KERN_INFO "md: rdev %s, SZ:%08llu F:%d S:%d DN:%u\n", @@ -1630,8 +1677,15 @@ static void print_rdev(mdk_rdev_t *rdev) test_bit(Faulty, &rdev->flags), test_bit(In_sync, &rdev->flags), rdev->desc_nr); if (rdev->sb_loaded) { - printk(KERN_INFO "md: rdev superblock:\n"); - print_sb((mdp_super_t*)page_address(rdev->sb_page)); + printk(KERN_INFO "md: rdev superblock (MJ:%d):\n", major_version); + switch (major_version) { + case 0: + print_sb_90((mdp_super_t*)page_address(rdev->sb_page)); + break; + case 1: + print_sb_1((struct mdp_superblock_1 *)page_address(rdev->sb_page)); + break; + } } else printk(KERN_INFO "md: no rdev superblock!\n"); } @@ -1658,7 +1712,7 @@ static void md_print_devices(void) printk("\n"); list_for_each_entry(rdev, &mddev->disks, same_set) - print_rdev(rdev); + print_rdev(rdev, mddev->major_version); } printk("md: **********************************\n"); printk("\n"); diff --git a/include/linux/raid/md_p.h b/include/linux/raid/md_p.h index 8b4de4a41ff..9491026afe6 100644 --- a/include/linux/raid/md_p.h +++ b/include/linux/raid/md_p.h @@ -194,6 +194,8 @@ static inline __u64 md_event(mdp_super_t *sb) { return (ev<<32)| sb->events_lo; } +#define MD_SUPERBLOCK_1_TIME_SEC_MASK ((1ULL<<40) - 1) + /* * The version-1 superblock : * All numeric fields are little-endian. -- cgit v1.2.3 From 8b76539823d71576927e3eb08b395eb6620f628d Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Fri, 9 Jan 2009 08:31:08 +1100 Subject: md: move allocation of ->queue from mddev_find to md_probe It is more balanced to just do simple initialisation in mddev_find, which allocates and links a new md device, and leave all the more sophisticated allocation to md_probe (which calls mddev_find). md_probe already allocated the gendisk. It should allocate the queue too. Signed-off-by: NeilBrown --- drivers/md/md.c | 28 +++++++++++++++++----------- 1 file changed, 17 insertions(+), 11 deletions(-) diff --git a/drivers/md/md.c b/drivers/md/md.c index 1f770c16d43..da838cc32cc 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -221,7 +221,9 @@ static void mddev_put(mddev_t *mddev) if (!mddev->raid_disks && list_empty(&mddev->disks)) { list_del(&mddev->all_mddevs); spin_unlock(&all_mddevs_lock); - blk_cleanup_queue(mddev->queue); + if (mddev->queue) + blk_cleanup_queue(mddev->queue); + mddev->queue = NULL; if (mddev->sysfs_state) sysfs_put(mddev->sysfs_state); mddev->sysfs_state = NULL; @@ -275,16 +277,6 @@ static mddev_t * mddev_find(dev_t unit) new->resync_max = MaxSector; new->level = LEVEL_NONE; - new->queue = blk_alloc_queue(GFP_KERNEL); - if (!new->queue) { - kfree(new); - return NULL; - } - /* Can be unlocked because the queue is new: no concurrency */ - queue_flag_set_unlocked(QUEUE_FLAG_CLUSTER, new->queue); - - blk_queue_make_request(new->queue, md_fail_request); - goto retry; } @@ -3493,9 +3485,23 @@ static struct kobject *md_probe(dev_t dev, int *part, void *data) mddev_put(mddev); return NULL; } + + mddev->queue = blk_alloc_queue(GFP_KERNEL); + if (!mddev->queue) { + mutex_unlock(&disks_mutex); + mddev_put(mddev); + return NULL; + } + /* Can be unlocked because the queue is new: no concurrency */ + queue_flag_set_unlocked(QUEUE_FLAG_CLUSTER, mddev->queue); + + blk_queue_make_request(mddev->queue, md_fail_request); + disk = alloc_disk(1 << shift); if (!disk) { mutex_unlock(&disks_mutex); + blk_cleanup_queue(mddev->queue); + mddev->queue = NULL; mddev_put(mddev); return NULL; } -- cgit v1.2.3 From a21d15042d8cd736caf82c2bac564f3f93f3d017 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Fri, 9 Jan 2009 08:31:09 +1100 Subject: md: centralise all freeing of an 'mddev' in 'md_free' md_free is the .release handler for the md kobj_type. So it makes sense to release all the objects referenced by the mddev in there, rather than just prior to calling kobject_put for what we think is the last time. Signed-off-by: NeilBrown --- drivers/md/md.c | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/drivers/md/md.c b/drivers/md/md.c index da838cc32cc..970a8c42ba9 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -221,12 +221,6 @@ static void mddev_put(mddev_t *mddev) if (!mddev->raid_disks && list_empty(&mddev->disks)) { list_del(&mddev->all_mddevs); spin_unlock(&all_mddevs_lock); - if (mddev->queue) - blk_cleanup_queue(mddev->queue); - mddev->queue = NULL; - if (mddev->sysfs_state) - sysfs_put(mddev->sysfs_state); - mddev->sysfs_state = NULL; kobject_put(&mddev->kobj); } else spin_unlock(&all_mddevs_lock); @@ -3451,6 +3445,17 @@ md_attr_store(struct kobject *kobj, struct attribute *attr, static void md_free(struct kobject *ko) { mddev_t *mddev = container_of(ko, mddev_t, kobj); + + if (mddev->sysfs_state) + sysfs_put(mddev->sysfs_state); + + if (mddev->gendisk) { + del_gendisk(mddev->gendisk); + put_disk(mddev->gendisk); + } + if (mddev->queue) + blk_cleanup_queue(mddev->queue); + kfree(mddev); } @@ -6435,9 +6440,6 @@ static __exit void md_exit(void) if (!disk) continue; export_array(mddev); - del_gendisk(disk); - put_disk(disk); - mddev->gendisk = NULL; mddev_put(mddev); } } -- cgit v1.2.3 From d3374825ce57ba2214d375023979f6197ccc1385 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Fri, 9 Jan 2009 08:31:10 +1100 Subject: md: make devices disappear when they are no longer needed. Currently md devices, once created, never disappear until the module is unloaded. This is essentially because the gendisk holds a reference to the mddev, and the mddev holds a reference to the gendisk, this a circular reference. If we drop the reference from mddev to gendisk, then we need to ensure that the mddev is destroyed when the gendisk is destroyed. However it is not possible to hook into the gendisk destruction process to enable this. So we drop the reference from the gendisk to the mddev and destroy the gendisk when the mddev gets destroyed. However this has a complication. Between the call __blkdev_get->get_gendisk->kobj_lookup->md_probe and the call __blkdev_get->md_open there is no obvious way to hold a reference on the mddev any more, so unless something is done, it will disappear and gendisk will be destroyed prematurely. Also, once we decide to destroy the mddev, there will be an unlockable moment before the gendisk is unlinked (blk_unregister_region) during which a new reference to the gendisk can be created. We need to ensure that this reference can not be used. i.e. the ->open must fail. So: 1/ in md_probe we set a flag in the mddev (hold_active) which indicates that the array should be treated as active, even though there are no references, and no appearance of activity. This is cleared by md_release when the device is closed if it is no longer needed. This ensures that the gendisk will survive between md_probe and md_open. 2/ In md_open we check if the mddev we expect to open matches the gendisk that we did open. If there is a mismatch we return -ERESTARTSYS and modify __blkdev_get to retry from the top in that case. In the -ERESTARTSYS sys case we make sure to wait until the old gendisk (that we succeeded in opening) is really gone so we loop at most once. Some udev configurations will always open an md device when it first appears. If we allow an md device that was just created by an open to disappear on an immediate close, then this can race with such udev configurations and result in an infinite loop the device being opened and closed, then re-open due to the 'ADD' even from the first open, and then close and so on. So we make sure an md device, once created by an open, remains active at least until some md 'ioctl' has been made on it. This means that all normal usage of md devices will allow them to disappear promptly when not needed, but the worst that an incorrect usage will do it cause an inactive md device to be left in existence (it can easily be removed). As an array can be stopped by writing to a sysfs attribute echo clear > /sys/block/mdXXX/md/array_state we need to use scheduled work for deleting the gendisk and other kobjects. This allows us to wait for any pending gendisk deletion to complete by simply calling flush_scheduled_work(). Signed-off-by: NeilBrown --- drivers/md/md.c | 61 +++++++++++++++++++++++++++++++++++++---------- fs/block_dev.c | 14 +++++++++++ include/linux/raid/md_k.h | 4 ++++ 3 files changed, 67 insertions(+), 12 deletions(-) diff --git a/drivers/md/md.c b/drivers/md/md.c index 970a8c42ba9..38697283aaf 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -214,16 +214,33 @@ static inline mddev_t *mddev_get(mddev_t *mddev) return mddev; } +static void mddev_delayed_delete(struct work_struct *ws) +{ + mddev_t *mddev = container_of(ws, mddev_t, del_work); + kobject_del(&mddev->kobj); + kobject_put(&mddev->kobj); +} + static void mddev_put(mddev_t *mddev) { if (!atomic_dec_and_lock(&mddev->active, &all_mddevs_lock)) return; - if (!mddev->raid_disks && list_empty(&mddev->disks)) { + if (!mddev->raid_disks && list_empty(&mddev->disks) && + !mddev->hold_active) { list_del(&mddev->all_mddevs); - spin_unlock(&all_mddevs_lock); - kobject_put(&mddev->kobj); - } else - spin_unlock(&all_mddevs_lock); + if (mddev->gendisk) { + /* we did a probe so need to clean up. + * Call schedule_work inside the spinlock + * so that flush_scheduled_work() after + * mddev_find will succeed in waiting for the + * work to be done. + */ + INIT_WORK(&mddev->del_work, mddev_delayed_delete); + schedule_work(&mddev->del_work); + } else + kfree(mddev); + } + spin_unlock(&all_mddevs_lock); } static mddev_t * mddev_find(dev_t unit) @@ -242,6 +259,7 @@ static mddev_t * mddev_find(dev_t unit) if (new) { list_add(&new->all_mddevs, &all_mddevs); + mddev->hold_active = UNTIL_IOCTL; spin_unlock(&all_mddevs_lock); return new; } @@ -3435,6 +3453,8 @@ md_attr_store(struct kobject *kobj, struct attribute *attr, if (!capable(CAP_SYS_ADMIN)) return -EACCES; rv = mddev_lock(mddev); + if (mddev->hold_active == UNTIL_IOCTL) + mddev->hold_active = 0; if (!rv) { rv = entry->store(mddev, page, length); mddev_unlock(mddev); @@ -3484,6 +3504,11 @@ static struct kobject *md_probe(dev_t dev, int *part, void *data) if (!mddev) return NULL; + /* wait for any previous instance if this device + * to be completed removed (mddev_delayed_delete). + */ + flush_scheduled_work(); + mutex_lock(&disks_mutex); if (mddev->gendisk) { mutex_unlock(&disks_mutex); @@ -3520,7 +3545,7 @@ static struct kobject *md_probe(dev_t dev, int *part, void *data) disk->private_data = mddev; disk->queue = mddev->queue; /* Allow extended partitions. This makes the - * 'mdp' device redundant, but we can really + * 'mdp' device redundant, but we can't really * remove it now. */ disk->flags |= GENHD_FL_EXT_DEVT; @@ -3536,6 +3561,7 @@ static struct kobject *md_probe(dev_t dev, int *part, void *data) kobject_uevent(&mddev->kobj, KOBJ_ADD); mddev->sysfs_state = sysfs_get_dirent(mddev->kobj.sd, "array_state"); } + mddev_put(mddev); return NULL; } @@ -5054,6 +5080,9 @@ static int md_ioctl(struct block_device *bdev, fmode_t mode, done_unlock: abort_unlock: + if (mddev->hold_active == UNTIL_IOCTL && + err != -EINVAL) + mddev->hold_active = 0; mddev_unlock(mddev); return err; @@ -5070,14 +5099,25 @@ static int md_open(struct block_device *bdev, fmode_t mode) * Succeed if we can lock the mddev, which confirms that * it isn't being stopped right now. */ - mddev_t *mddev = bdev->bd_disk->private_data; + mddev_t *mddev = mddev_find(bdev->bd_dev); int err; + if (mddev->gendisk != bdev->bd_disk) { + /* we are racing with mddev_put which is discarding this + * bd_disk. + */ + mddev_put(mddev); + /* Wait until bdev->bd_disk is definitely gone */ + flush_scheduled_work(); + /* Then retry the open from the top */ + return -ERESTARTSYS; + } + BUG_ON(mddev != bdev->bd_disk->private_data); + if ((err = mutex_lock_interruptible_nested(&mddev->reconfig_mutex, 1))) goto out; err = 0; - mddev_get(mddev); atomic_inc(&mddev->openers); mddev_unlock(mddev); @@ -6436,11 +6476,8 @@ static __exit void md_exit(void) unregister_sysctl_table(raid_table_header); remove_proc_entry("mdstat", NULL); for_each_mddev(mddev, tmp) { - struct gendisk *disk = mddev->gendisk; - if (!disk) - continue; export_array(mddev); - mddev_put(mddev); + mddev->hold_active = 0; } } diff --git a/fs/block_dev.c b/fs/block_dev.c index b957717e25a..8ebbfdf708c 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -1005,6 +1005,7 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part) } lock_kernel(); + restart: ret = -ENXIO; disk = get_gendisk(bdev->bd_dev, &partno); @@ -1025,6 +1026,19 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part) if (disk->fops->open) { ret = disk->fops->open(bdev, mode); + if (ret == -ERESTARTSYS) { + /* Lost a race with 'disk' being + * deleted, try again. + * See md.c + */ + disk_put_part(bdev->bd_part); + bdev->bd_part = NULL; + module_put(disk->fops->owner); + put_disk(disk); + bdev->bd_disk = NULL; + mutex_unlock(&bdev->bd_mutex); + goto restart; + } if (ret) goto out_clear; } diff --git a/include/linux/raid/md_k.h b/include/linux/raid/md_k.h index 8f9a54c1fb0..e3d17c7f954 100644 --- a/include/linux/raid/md_k.h +++ b/include/linux/raid/md_k.h @@ -137,6 +137,8 @@ struct mddev_s struct gendisk *gendisk; struct kobject kobj; + int hold_active; +#define UNTIL_IOCTL 1 /* Superblock information */ int major_version, @@ -246,6 +248,8 @@ struct mddev_s */ struct sysfs_dirent *sysfs_action; /* handle for 'sync_action' */ + struct work_struct del_work; /* used for delayed sysfs removal */ + spinlock_t write_lock; wait_queue_head_t sb_wait; /* for waiting on superblock updates */ atomic_t pending_writes; /* number of active superblock writes */ -- cgit v1.2.3 From efeb53c0e57213e843b7ef3cc6ebcdea7d6186ac Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Fri, 9 Jan 2009 08:31:10 +1100 Subject: md: Allow md devices to be created by name. Using sequential numbers to identify md devices is somewhat artificial. Using names can be a lot more user-friendly. Also, creating md devices by opening the device special file is a bit awkward. So this patch provides a new option for creating and naming devices. Writing a name such as "md_home" to /sys/modules/md_mod/parameters/new_array will cause an array with that name to be created. It will appear in /sys/block/ /proc/partitions and /proc/mdstat as 'md_home'. It will have an arbitrary minor number allocated. md devices that a created by an open are destroyed on the last close when the device is inactive. For named md devices, they will not be destroyed until the array is explicitly stopped, either with the STOP_ARRAY ioctl or by writing 'clear' to /sys/block/md_XXXX/md/array_state. The name of the array must start 'md_' to avoid conflict with other devices. Signed-off-by: NeilBrown --- drivers/md/md.c | 119 +++++++++++++++++++++++++++++++++++++++------- include/linux/raid/md_k.h | 1 + 2 files changed, 102 insertions(+), 18 deletions(-) diff --git a/drivers/md/md.c b/drivers/md/md.c index 38697283aaf..f5cbb9d2371 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -249,17 +249,51 @@ static mddev_t * mddev_find(dev_t unit) retry: spin_lock(&all_mddevs_lock); - list_for_each_entry(mddev, &all_mddevs, all_mddevs) - if (mddev->unit == unit) { - mddev_get(mddev); + + if (unit) { + list_for_each_entry(mddev, &all_mddevs, all_mddevs) + if (mddev->unit == unit) { + mddev_get(mddev); + spin_unlock(&all_mddevs_lock); + kfree(new); + return mddev; + } + + if (new) { + list_add(&new->all_mddevs, &all_mddevs); spin_unlock(&all_mddevs_lock); - kfree(new); - return mddev; + new->hold_active = UNTIL_IOCTL; + return new; } - - if (new) { + } else if (new) { + /* find an unused unit number */ + static int next_minor = 512; + int start = next_minor; + int is_free = 0; + int dev = 0; + while (!is_free) { + dev = MKDEV(MD_MAJOR, next_minor); + next_minor++; + if (next_minor > MINORMASK) + next_minor = 0; + if (next_minor == start) { + /* Oh dear, all in use. */ + spin_unlock(&all_mddevs_lock); + kfree(new); + return NULL; + } + + is_free = 1; + list_for_each_entry(mddev, &all_mddevs, all_mddevs) + if (mddev->unit == dev) { + is_free = 0; + break; + } + } + new->unit = dev; + new->md_minor = MINOR(dev); + new->hold_active = UNTIL_STOP; list_add(&new->all_mddevs, &all_mddevs); - mddev->hold_active = UNTIL_IOCTL; spin_unlock(&all_mddevs_lock); return new; } @@ -3491,18 +3525,22 @@ static struct kobj_type md_ktype = { int mdp_major = 0; -static struct kobject *md_probe(dev_t dev, int *part, void *data) +static int md_alloc(dev_t dev, char *name) { static DEFINE_MUTEX(disks_mutex); mddev_t *mddev = mddev_find(dev); struct gendisk *disk; - int partitioned = (MAJOR(dev) != MD_MAJOR); - int shift = partitioned ? MdpMinorShift : 0; - int unit = MINOR(dev) >> shift; + int partitioned; + int shift; + int unit; int error; if (!mddev) - return NULL; + return -ENODEV; + + partitioned = (MAJOR(mddev->unit) != MD_MAJOR); + shift = partitioned ? MdpMinorShift : 0; + unit = MINOR(mddev->unit) >> shift; /* wait for any previous instance if this device * to be completed removed (mddev_delayed_delete). @@ -3513,14 +3551,29 @@ static struct kobject *md_probe(dev_t dev, int *part, void *data) if (mddev->gendisk) { mutex_unlock(&disks_mutex); mddev_put(mddev); - return NULL; + return -EEXIST; + } + + if (name) { + /* Need to ensure that 'name' is not a duplicate. + */ + mddev_t *mddev2; + spin_lock(&all_mddevs_lock); + + list_for_each_entry(mddev2, &all_mddevs, all_mddevs) + if (mddev2->gendisk && + strcmp(mddev2->gendisk->disk_name, name) == 0) { + spin_unlock(&all_mddevs_lock); + return -EEXIST; + } + spin_unlock(&all_mddevs_lock); } mddev->queue = blk_alloc_queue(GFP_KERNEL); if (!mddev->queue) { mutex_unlock(&disks_mutex); mddev_put(mddev); - return NULL; + return -ENOMEM; } /* Can be unlocked because the queue is new: no concurrency */ queue_flag_set_unlocked(QUEUE_FLAG_CLUSTER, mddev->queue); @@ -3533,11 +3586,13 @@ static struct kobject *md_probe(dev_t dev, int *part, void *data) blk_cleanup_queue(mddev->queue); mddev->queue = NULL; mddev_put(mddev); - return NULL; + return -ENOMEM; } - disk->major = MAJOR(dev); + disk->major = MAJOR(mddev->unit); disk->first_minor = unit << shift; - if (partitioned) + if (name) + strcpy(disk->disk_name, name); + else if (partitioned) sprintf(disk->disk_name, "md_d%d", unit); else sprintf(disk->disk_name, "md%d", unit); @@ -3562,9 +3617,34 @@ static struct kobject *md_probe(dev_t dev, int *part, void *data) mddev->sysfs_state = sysfs_get_dirent(mddev->kobj.sd, "array_state"); } mddev_put(mddev); + return 0; +} + +static struct kobject *md_probe(dev_t dev, int *part, void *data) +{ + md_alloc(dev, NULL); return NULL; } +static int add_named_array(const char *val, struct kernel_param *kp) +{ + /* val must be "md_*" where * is not all digits. + * We allocate an array with a large free minor number, and + * set the name to val. val must not already be an active name. + */ + int len = strlen(val); + char buf[DISK_NAME_LEN]; + + while (len && val[len-1] == '\n') + len--; + if (len >= DISK_NAME_LEN) + return -E2BIG; + strlcpy(buf, val, len+1); + if (strncmp(buf, "md_", 3) != 0) + return -EINVAL; + return md_alloc(0, buf); +} + static void md_safemode_timeout(unsigned long data) { mddev_t *mddev = (mddev_t *) data; @@ -4025,6 +4105,8 @@ static int do_md_stop(mddev_t * mddev, int mode, int is_open) mddev->barriers_work = 0; mddev->safemode = 0; kobject_uevent(&disk_to_dev(mddev->gendisk)->kobj, KOBJ_CHANGE); + if (mddev->hold_active == UNTIL_STOP) + mddev->hold_active = 0; } else if (mddev->pers) printk(KERN_INFO "md: %s switched to read-only mode.\n", @@ -6502,6 +6584,7 @@ static int set_ro(const char *val, struct kernel_param *kp) module_param_call(start_ro, set_ro, get_ro, NULL, S_IRUSR|S_IWUSR); module_param(start_dirty_degraded, int, S_IRUGO|S_IWUSR); +module_param_call(new_array, add_named_array, NULL, NULL, S_IWUSR); EXPORT_SYMBOL(register_md_personality); EXPORT_SYMBOL(unregister_md_personality); diff --git a/include/linux/raid/md_k.h b/include/linux/raid/md_k.h index e3d17c7f954..dac4217194b 100644 --- a/include/linux/raid/md_k.h +++ b/include/linux/raid/md_k.h @@ -139,6 +139,7 @@ struct mddev_s struct kobject kobj; int hold_active; #define UNTIL_IOCTL 1 +#define UNTIL_STOP 2 /* Superblock information */ int major_version, -- cgit v1.2.3 From 4044ba58dd15cb01797c4fd034f39ef4a75f7cc3 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Fri, 9 Jan 2009 08:31:11 +1100 Subject: md: don't retry recovery of raid1 that fails due to error on source drive. If a raid1 has only one working drive and it has a sector which gives an error on read, then an attempt to recover onto a spare will fail, but as the single remaining drive is not removed from the array, the recovery will be immediately re-attempted, resulting in an infinite recovery loop. So detect this situation and don't retry recovery once an error on the lone remaining drive is detected. Allow recovery to be retried once every time a spare is added in case the problem wasn't actually a media error. Signed-off-by: NeilBrown --- drivers/md/md.c | 5 ++++- drivers/md/raid1.c | 8 ++++++-- include/linux/raid/md_k.h | 3 +++ 3 files changed, 13 insertions(+), 3 deletions(-) diff --git a/drivers/md/md.c b/drivers/md/md.c index f5cbb9d2371..41e2509bf89 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -1500,6 +1500,9 @@ static int bind_rdev_to_array(mdk_rdev_t * rdev, mddev_t * mddev) list_add_rcu(&rdev->same_set, &mddev->disks); bd_claim_by_disk(rdev->bdev, rdev->bdev->bd_holder, mddev->gendisk); + + /* May as well allow recovery to be retried once */ + mddev->recovery_disabled = 0; return 0; fail: @@ -6175,7 +6178,7 @@ static int remove_and_add_spares(mddev_t *mddev) } } - if (mddev->degraded && ! mddev->ro) { + if (mddev->degraded && ! mddev->ro && !mddev->recovery_disabled) { list_for_each_entry(rdev, &mddev->disks, same_set) { if (rdev->raid_disk >= 0 && !test_bit(In_sync, &rdev->flags) && diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index c165b1eed8b..7b4f5f7155d 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -1016,12 +1016,16 @@ static void error(mddev_t *mddev, mdk_rdev_t *rdev) * else mark the drive as failed */ if (test_bit(In_sync, &rdev->flags) - && (conf->raid_disks - mddev->degraded) == 1) + && (conf->raid_disks - mddev->degraded) == 1) { /* * Don't fail the drive, act as though we were just a - * normal single drive + * normal single drive. + * However don't try a recovery from this drive as + * it is very likely to fail. */ + mddev->recovery_disabled = 1; return; + } if (test_and_clear_bit(In_sync, &rdev->flags)) { unsigned long flags; spin_lock_irqsave(&conf->device_lock, flags); diff --git a/include/linux/raid/md_k.h b/include/linux/raid/md_k.h index dac4217194b..9743e4dbc91 100644 --- a/include/linux/raid/md_k.h +++ b/include/linux/raid/md_k.h @@ -218,6 +218,9 @@ struct mddev_s #define MD_RECOVERY_FROZEN 9 unsigned long recovery; + int recovery_disabled; /* if we detect that recovery + * will always fail, set this + * so we don't loop trying */ int in_sync; /* know to not need resync */ struct mutex reconfig_mutex; -- cgit v1.2.3