From 1312f40e11c57edb5c3250f1b782cef8e3efea82 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 12 Mar 2006 11:02:03 -0500 Subject: [PATCH] regularize blk_cleanup_queue() use Signed-off-by: Al Viro --- drivers/md/md.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'drivers/md/md.c') diff --git a/drivers/md/md.c b/drivers/md/md.c index d05e3125d29..5ed2228745c 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -213,8 +213,11 @@ static void mddev_put(mddev_t *mddev) return; if (!mddev->raid_disks && list_empty(&mddev->disks)) { list_del(&mddev->all_mddevs); - blk_put_queue(mddev->queue); + /* that blocks */ + blk_cleanup_queue(mddev->queue); + /* that also blocks */ kobject_unregister(&mddev->kobj); + /* result blows... */ } spin_unlock(&all_mddevs_lock); } -- cgit v1.2.3 From 5463c7904c952aa6b6804dd902c72a5332fa5221 Mon Sep 17 00:00:00 2001 From: Jun'ichi Nomura Date: Mon, 27 Mar 2006 01:17:58 -0800 Subject: [PATCH] dm/md dependency tree in sysfs: md to use bd_claim_by_disk Use bd_claim_by_disk. Following symlinks are created if md0 is built from sda and sdb /sys/block/md0/slaves/sda --> /sys/block/sda /sys/block/md0/slaves/sdb --> /sys/block/sdb /sys/block/sda/holders/md0 --> /sys/block/md0 /sys/block/sdb/holders/md0 --> /sys/block/md0 Signed-off-by: Jun'ichi Nomura Cc: Alasdair G Kergon Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/md/md.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'drivers/md/md.c') diff --git a/drivers/md/md.c b/drivers/md/md.c index 5ed2228745c..bde3e968225 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -1301,6 +1301,7 @@ static int bind_rdev_to_array(mdk_rdev_t * rdev, mddev_t * mddev) else ko = &rdev->bdev->bd_disk->kobj; sysfs_create_link(&rdev->kobj, ko, "block"); + bd_claim_by_disk(rdev->bdev, rdev, mddev->gendisk); return 0; } @@ -1311,6 +1312,7 @@ static void unbind_rdev_from_array(mdk_rdev_t * rdev) MD_BUG(); return; } + bd_release_from_disk(rdev->bdev, rdev->mddev->gendisk); list_del_init(&rdev->same_set); printk(KERN_INFO "md: unbind<%s>\n", bdevname(rdev->bdev,b)); rdev->mddev = NULL; -- cgit v1.2.3 From 89e5c8b5b85d6d46e8a28cdfa076313ae691d35c Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 27 Mar 2006 01:18:02 -0800 Subject: [PATCH] md: Make sure QUEUE_FLAG_CLUSTER is set properly for md. This flag should be set for a virtual device iff it is set for all underlying devices. Signed-off-by: Neil Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/md/md.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/md/md.c') diff --git a/drivers/md/md.c b/drivers/md/md.c index bde3e968225..3254ff1a5cc 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -266,6 +266,7 @@ static mddev_t * mddev_find(dev_t unit) kfree(new); return NULL; } + set_bit(QUEUE_FLAG_CLUSTER, &new->queue->queue_flags); blk_queue_make_request(new->queue, md_fail_request); -- cgit v1.2.3 From c5a10f62c5c496c49db749af103b991873b7e2dc Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 27 Mar 2006 01:18:03 -0800 Subject: [PATCH] md: Add '4' to the list of levels for which bitmaps are supported I really should make this a function of the personality.... Signed-off-by: Neil Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/md/md.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers/md/md.c') diff --git a/drivers/md/md.c b/drivers/md/md.c index 3254ff1a5cc..875bced88e3 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -765,7 +765,8 @@ static int super_90_validate(mddev_t *mddev, mdk_rdev_t *rdev) if (sb->state & (1<bitmap_file == NULL) { - if (mddev->level != 1 && mddev->level != 5 && mddev->level != 6 + if (mddev->level != 1 && mddev->level != 4 + && mddev->level != 5 && mddev->level != 6 && mddev->level != 10) { /* FIXME use a better test */ printk(KERN_WARNING "md: bitmaps not supported for this level.\n"); -- cgit v1.2.3 From 1be7892fffb45f6017494a88ff68fe84c6de26b4 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 27 Mar 2006 01:18:03 -0800 Subject: [PATCH] md: Fix the 'failed' count for version-0 superblocks We are counting failed devices twice, once of the device that is failed, and once for the hole that has been left in the array. Remove the former so 'failed' matches 'missing'. Storing these counts in the superblock is a bit silly anyway.... Signed-off-by: Neil Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/md/md.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'drivers/md/md.c') diff --git a/drivers/md/md.c b/drivers/md/md.c index 875bced88e3..686314f070a 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -895,10 +895,9 @@ static void super_90_sync(mddev_t *mddev, mdk_rdev_t *rdev) d->raid_disk = rdev2->raid_disk; else d->raid_disk = rdev2->desc_nr; /* compatibility */ - if (test_bit(Faulty, &rdev2->flags)) { + if (test_bit(Faulty, &rdev2->flags)) d->state = (1<flags)) { + else if (test_bit(In_sync, &rdev2->flags)) { d->state = (1<state |= (1< Date: Mon, 27 Mar 2006 01:18:04 -0800 Subject: [PATCH] md: Update status_resync to handle LARGE devices status_resync - used by /proc/mdstat to report the status of a resync, assumes that device sizes will always fit into an 'unsigned long' This is no longer the case... Signed-off-by: Neil Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/md/md.c | 30 ++++++++++++++++++++++++------ 1 file changed, 24 insertions(+), 6 deletions(-) (limited to 'drivers/md/md.c') diff --git a/drivers/md/md.c b/drivers/md/md.c index 686314f070a..a3ecaf8ed30 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -4044,7 +4044,10 @@ static void status_unused(struct seq_file *seq) static void status_resync(struct seq_file *seq, mddev_t * mddev) { - unsigned long max_blocks, resync, res, dt, db, rt; + sector_t max_blocks, resync, res; + unsigned long dt, db, rt; + int scale; + unsigned int per_milli; resync = (mddev->curr_resync - atomic_read(&mddev->recovery_active))/2; @@ -4060,9 +4063,22 @@ static void status_resync(struct seq_file *seq, mddev_t * mddev) MD_BUG(); return; } - res = (resync/1024)*1000/(max_blocks/1024 + 1); + /* Pick 'scale' such that (resync>>scale)*1000 will fit + * in a sector_t, and (max_blocks>>scale) will fit in a + * u32, as those are the requirements for sector_div. + * Thus 'scale' must be at least 10 + */ + scale = 10; + if (sizeof(sector_t) > sizeof(unsigned long)) { + while ( max_blocks/2 > (1ULL<<(scale+32))) + scale++; + } + res = (resync>>scale)*1000; + sector_div(res, (u32)((max_blocks>>scale)+1)); + + per_milli = res; { - int i, x = res/50, y = 20-x; + int i, x = per_milli/50, y = 20-x; seq_printf(seq, "["); for (i = 0; i < x; i++) seq_printf(seq, "="); @@ -4071,10 +4087,12 @@ static void status_resync(struct seq_file *seq, mddev_t * mddev) seq_printf(seq, "."); seq_printf(seq, "] "); } - seq_printf(seq, " %s =%3lu.%lu%% (%lu/%lu)", + seq_printf(seq, " %s =%3u.%u%% (%llu/%llu)", (test_bit(MD_RECOVERY_SYNC, &mddev->recovery) ? "resync" : "recovery"), - res/10, res % 10, resync, max_blocks); + per_milli/10, per_milli % 10, + (unsigned long long) resync, + (unsigned long long) max_blocks); /* * We do not want to overflow, so the order of operands and @@ -4088,7 +4106,7 @@ static void status_resync(struct seq_file *seq, mddev_t * mddev) dt = ((jiffies - mddev->resync_mark) / HZ); if (!dt) dt++; db = resync - (mddev->resync_mark_cnt/2); - rt = (dt * ((max_blocks-resync) / (db/100+1)))/100; + rt = (dt * ((unsigned long)(max_blocks-resync) / (db/100+1)))/100; seq_printf(seq, " finish=%lu.%lumin", rt / 60, (rt % 60)/6); -- cgit v1.2.3 From ad01c9e3752f4ba4f3d99c89b7370fa4983a25b5 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 27 Mar 2006 01:18:07 -0800 Subject: [PATCH] md: Allow stripes to be expanded in preparation for expanding an array Before a RAID-5 can be expanded, we need to be able to expand the stripe-cache data structure. This requires allocating new stripes in a new kmem_cache. If this succeeds, we copy cache pages over and release the old stripes and kmem_cache. We then allocate new pages. If that fails, we leave the stripe cache at it's new size. It isn't worth the effort to shrink it back again. Unfortuanately this means we need two kmem_cache names as we, for a short period of time, we have two kmem_caches. So they are raid5/%s and raid5/%s-alt Signed-off-by: Neil Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/md/md.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/md/md.c') diff --git a/drivers/md/md.c b/drivers/md/md.c index a3ecaf8ed30..c7b7656f9aa 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -2775,7 +2775,6 @@ static void autorun_array(mddev_t *mddev) */ static void autorun_devices(int part) { - struct list_head candidates; struct list_head *tmp; mdk_rdev_t *rdev0, *rdev; mddev_t *mddev; @@ -2784,6 +2783,7 @@ static void autorun_devices(int part) printk(KERN_INFO "md: autorun ...\n"); while (!list_empty(&pending_raid_disks)) { dev_t dev; + LIST_HEAD(candidates); rdev0 = list_entry(pending_raid_disks.next, mdk_rdev_t, same_set); -- cgit v1.2.3 From ccfcc3c10b2a5cb8fd3c918199a4ff904fc6fb3e Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 27 Mar 2006 01:18:09 -0800 Subject: [PATCH] md: Core of raid5 resize process This patch provides the core of the resize/expand process. sync_request notices if a 'reshape' is happening and acts accordingly. It allocated new stripe_heads for the next chunk-wide-stripe in the target geometry, marking them STRIPE_EXPANDING. Then it finds which stripe heads in the old geometry can provide data needed by these and marks them STRIPE_EXPAND_SOURCE. This causes stripe_handle to read all blocks on those stripes. Once all blocks on a STRIPE_EXPAND_SOURCE stripe_head are read, any that are needed are copied into the corresponding STRIPE_EXPANDING stripe_head. Once a STRIPE_EXPANDING stripe_head is full, it is marks STRIPE_EXPAND_READY and then is written out and released. Signed-off-by: Neil Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/md/md.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) (limited to 'drivers/md/md.c') diff --git a/drivers/md/md.c b/drivers/md/md.c index c7b7656f9aa..8e65986bc63 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -2165,7 +2165,9 @@ action_show(mddev_t *mddev, char *page) char *type = "idle"; if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) || test_bit(MD_RECOVERY_NEEDED, &mddev->recovery)) { - if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) { + if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery)) + type = "reshape"; + else if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) { if (!test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery)) type = "resync"; else if (test_bit(MD_RECOVERY_CHECK, &mddev->recovery)) @@ -4088,8 +4090,10 @@ static void status_resync(struct seq_file *seq, mddev_t * mddev) seq_printf(seq, "] "); } seq_printf(seq, " %s =%3u.%u%% (%llu/%llu)", + (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery)? + "reshape" : (test_bit(MD_RECOVERY_SYNC, &mddev->recovery) ? - "resync" : "recovery"), + "resync" : "recovery")), per_milli/10, per_milli % 10, (unsigned long long) resync, (unsigned long long) max_blocks); @@ -4543,7 +4547,9 @@ static void md_do_sync(mddev_t *mddev) */ max_sectors = mddev->resync_max_sectors; mddev->resync_mismatches = 0; - } else + } else if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery)) + max_sectors = mddev->size << 1; + else /* recovery follows the physical size of devices */ max_sectors = mddev->size << 1; @@ -4679,6 +4685,8 @@ static void md_do_sync(mddev_t *mddev) mddev->pers->sync_request(mddev, max_sectors, &skipped, 1); if (!test_bit(MD_RECOVERY_ERR, &mddev->recovery) && + test_bit(MD_RECOVERY_SYNC, &mddev->recovery) && + !test_bit(MD_RECOVERY_CHECK, &mddev->recovery) && mddev->curr_resync > 2 && mddev->curr_resync >= mddev->recovery_cp) { if (test_bit(MD_RECOVERY_INTR, &mddev->recovery)) { -- cgit v1.2.3 From 292695531ae4019bb15deedc121b218d1908b648 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 27 Mar 2006 01:18:10 -0800 Subject: [PATCH] md: Final stages of raid5 expand code This patch adds raid5_reshape and end_reshape which will start and finish the reshape processes. raid5_reshape is only enabled in CONFIG_MD_RAID5_RESHAPE is set, to discourage accidental use. Read the 'help' for the CONFIG_MD_RAID5_RESHAPE entry. and Make sure that you have backups, just in case. Signed-off-by: Neil Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/md/md.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'drivers/md/md.c') diff --git a/drivers/md/md.c b/drivers/md/md.c index 8e65986bc63..d169bc96467 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -158,11 +158,12 @@ static int start_readonly; */ static DECLARE_WAIT_QUEUE_HEAD(md_event_waiters); static atomic_t md_event_count; -static void md_new_event(mddev_t *mddev) +void md_new_event(mddev_t *mddev) { atomic_inc(&md_event_count); wake_up(&md_event_waiters); } +EXPORT_SYMBOL_GPL(md_new_event); /* * Enables to iterate over all existing md arrays @@ -4467,7 +4468,7 @@ static DECLARE_WAIT_QUEUE_HEAD(resync_wait); #define SYNC_MARKS 10 #define SYNC_MARK_STEP (3*HZ) -static void md_do_sync(mddev_t *mddev) +void md_do_sync(mddev_t *mddev) { mddev_t *mddev2; unsigned int currspeed = 0, @@ -4704,6 +4705,7 @@ static void md_do_sync(mddev_t *mddev) set_bit(MD_RECOVERY_DONE, &mddev->recovery); md_wakeup_thread(mddev->thread); } +EXPORT_SYMBOL_GPL(md_do_sync); /* -- cgit v1.2.3 From f67055780caac6a99f43834795c43acf99eba6a6 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 27 Mar 2006 01:18:11 -0800 Subject: [PATCH] md: Checkpoint and allow restart of raid5 reshape We allow the superblock to record an 'old' and a 'new' geometry, and a position where any conversion is up to. The geometry allows for changing chunksize, layout and level as well as number of devices. When using verion-0.90 superblock, we convert the version to 0.91 while the conversion is happening so that an old kernel will refuse the assemble the array. For version-1, we use a feature bit for the same effect. When starting an array we check for an incomplete reshape and restart the reshape process if needed. If the reshape stopped at an awkward time (like when updating the first stripe) we refuse to assemble the array, and let user-space worry about it. Signed-off-by: Neil Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/md/md.c | 69 ++++++++++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 66 insertions(+), 3 deletions(-) (limited to 'drivers/md/md.c') diff --git a/drivers/md/md.c b/drivers/md/md.c index d169bc96467..b9dfdfccdb7 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -662,7 +662,8 @@ static int super_90_load(mdk_rdev_t *rdev, mdk_rdev_t *refdev, int minor_version } if (sb->major_version != 0 || - sb->minor_version != 90) { + sb->minor_version < 90 || + sb->minor_version > 91) { printk(KERN_WARNING "Bad version number %d.%d on %s\n", sb->major_version, sb->minor_version, b); @@ -747,6 +748,20 @@ static int super_90_validate(mddev_t *mddev, mdk_rdev_t *rdev) mddev->bitmap_offset = 0; mddev->default_bitmap_offset = MD_SB_BYTES >> 9; + if (mddev->minor_version >= 91) { + mddev->reshape_position = sb->reshape_position; + mddev->delta_disks = sb->delta_disks; + mddev->new_level = sb->new_level; + mddev->new_layout = sb->new_layout; + mddev->new_chunk = sb->new_chunk; + } else { + mddev->reshape_position = MaxSector; + mddev->delta_disks = 0; + mddev->new_level = mddev->level; + mddev->new_layout = mddev->layout; + mddev->new_chunk = mddev->chunk_size; + } + if (sb->state & (1<recovery_cp = MaxSector; else { @@ -841,7 +856,6 @@ static void super_90_sync(mddev_t *mddev, mdk_rdev_t *rdev) sb->md_magic = MD_SB_MAGIC; sb->major_version = mddev->major_version; - sb->minor_version = mddev->minor_version; sb->patch_version = mddev->patch_version; sb->gvalid_words = 0; /* ignored */ memcpy(&sb->set_uuid0, mddev->uuid+0, 4); @@ -860,6 +874,17 @@ static void super_90_sync(mddev_t *mddev, mdk_rdev_t *rdev) sb->events_hi = (mddev->events>>32); sb->events_lo = (u32)mddev->events; + if (mddev->reshape_position == MaxSector) + sb->minor_version = 90; + else { + sb->minor_version = 91; + sb->reshape_position = mddev->reshape_position; + sb->new_level = mddev->new_level; + sb->delta_disks = mddev->delta_disks; + sb->new_layout = mddev->new_layout; + sb->new_chunk = mddev->new_chunk; + } + mddev->minor_version = sb->minor_version; if (mddev->in_sync) { sb->recovery_cp = mddev->recovery_cp; @@ -1104,6 +1129,20 @@ static int super_1_validate(mddev_t *mddev, mdk_rdev_t *rdev) } mddev->bitmap_offset = (__s32)le32_to_cpu(sb->bitmap_offset); } + if ((le32_to_cpu(sb->feature_map) & MD_FEATURE_RESHAPE_ACTIVE)) { + mddev->reshape_position = le64_to_cpu(sb->reshape_position); + mddev->delta_disks = le32_to_cpu(sb->delta_disks); + mddev->new_level = le32_to_cpu(sb->new_level); + mddev->new_layout = le32_to_cpu(sb->new_layout); + mddev->new_chunk = le32_to_cpu(sb->new_chunk)<<9; + } else { + mddev->reshape_position = MaxSector; + mddev->delta_disks = 0; + mddev->new_level = mddev->level; + mddev->new_layout = mddev->layout; + mddev->new_chunk = mddev->chunk_size; + } + } else if (mddev->pers == NULL) { /* Insist of good event counter while assembling */ __u64 ev1 = le64_to_cpu(sb->events); @@ -1175,6 +1214,14 @@ static void super_1_sync(mddev_t *mddev, mdk_rdev_t *rdev) sb->bitmap_offset = cpu_to_le32((__u32)mddev->bitmap_offset); sb->feature_map = cpu_to_le32(MD_FEATURE_BITMAP_OFFSET); } + if (mddev->reshape_position != MaxSector) { + sb->feature_map |= cpu_to_le32(MD_FEATURE_RESHAPE_ACTIVE); + sb->reshape_position = cpu_to_le64(mddev->reshape_position); + sb->new_layout = cpu_to_le32(mddev->new_layout); + sb->delta_disks = cpu_to_le32(mddev->delta_disks); + sb->new_level = cpu_to_le32(mddev->new_level); + sb->new_chunk = cpu_to_le32(mddev->new_chunk>>9); + } max_dev = 0; ITERATE_RDEV(mddev,rdev2,tmp) @@ -1497,7 +1544,7 @@ static void sync_sbs(mddev_t * mddev) } } -static void md_update_sb(mddev_t * mddev) +void md_update_sb(mddev_t * mddev) { int err; struct list_head *tmp; @@ -1574,6 +1621,7 @@ repeat: wake_up(&mddev->sb_wait); } +EXPORT_SYMBOL_GPL(md_update_sb); /* words written to sysfs files may, or my not, be \n terminated. * We want to accept with case. For this we use cmd_match. @@ -2545,6 +2593,14 @@ static int do_md_run(mddev_t * mddev) mddev->level = pers->level; strlcpy(mddev->clevel, pers->name, sizeof(mddev->clevel)); + if (mddev->reshape_position != MaxSector && + pers->reshape == NULL) { + /* This personality cannot handle reshaping... */ + mddev->pers = NULL; + module_put(pers->owner); + return -EINVAL; + } + mddev->recovery = 0; mddev->resync_max_sectors = mddev->size << 1; /* may be over-ridden by personality */ mddev->barriers_work = 1; @@ -3433,11 +3489,18 @@ static int set_array_info(mddev_t * mddev, mdu_array_info_t *info) mddev->default_bitmap_offset = MD_SB_BYTES >> 9; mddev->bitmap_offset = 0; + mddev->reshape_position = MaxSector; + /* * Generate a 128 bit UUID */ get_random_bytes(mddev->uuid, 16); + mddev->new_level = mddev->level; + mddev->new_chunk = mddev->chunk_size; + mddev->new_layout = mddev->layout; + mddev->delta_disks = 0; + return 0; } -- cgit v1.2.3 From 63c70c4f3a30e77e6f445bd16eff7934a031ebd3 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 27 Mar 2006 01:18:13 -0800 Subject: [PATCH] md: Split reshape handler in check_reshape and start_reshape check_reshape checks validity and does things that can be done instantly - like adding devices to raid1. start_reshape initiates a restriping process to convert the whole array. Signed-off-by: Neil Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/md/md.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'drivers/md/md.c') diff --git a/drivers/md/md.c b/drivers/md/md.c index b9dfdfccdb7..1a637676a93 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -2594,7 +2594,7 @@ static int do_md_run(mddev_t * mddev) strlcpy(mddev->clevel, pers->name, sizeof(mddev->clevel)); if (mddev->reshape_position != MaxSector && - pers->reshape == NULL) { + pers->start_reshape == NULL) { /* This personality cannot handle reshaping... */ mddev->pers = NULL; module_put(pers->owner); @@ -3556,14 +3556,16 @@ static int update_raid_disks(mddev_t *mddev, int raid_disks) { int rv; /* change the number of raid disks */ - if (mddev->pers->reshape == NULL) + if (mddev->pers->check_reshape == NULL) return -EINVAL; if (raid_disks <= 0 || raid_disks >= mddev->max_disks) return -EINVAL; - if (mddev->sync_thread) + if (mddev->sync_thread || mddev->reshape_position != MaxSector) return -EBUSY; - rv = mddev->pers->reshape(mddev, raid_disks); + mddev->delta_disks = raid_disks - mddev->raid_disks; + + rv = mddev->pers->check_reshape(mddev); return rv; } -- cgit v1.2.3 From 16484bf59634e25d1299761e5ed8bacf22bc6368 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 27 Mar 2006 01:18:13 -0800 Subject: [PATCH] md: Make 'reshape' a possible sync_action action This allows reshape to be triggerred via sysfs (which is the only way to start it happening). Signed-off-by: Neil Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/md/md.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'drivers/md/md.c') diff --git a/drivers/md/md.c b/drivers/md/md.c index 1a637676a93..a79dd33d343 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -2247,7 +2247,14 @@ action_store(mddev_t *mddev, const char *page, size_t len) return -EBUSY; else if (cmd_match(page, "resync") || cmd_match(page, "recover")) set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); - else { + else if (cmd_match(page, "reshape")) { + int err; + if (mddev->pers->start_reshape == NULL) + return -EINVAL; + err = mddev->pers->start_reshape(mddev); + if (err) + return err; + } else { if (cmd_match(page, "check")) set_bit(MD_RECOVERY_CHECK, &mddev->recovery); else if (cmd_match(page, "repair")) -- cgit v1.2.3 From e464eafdb4400c6d6576ba3840d8bd40340f8a96 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 27 Mar 2006 01:18:14 -0800 Subject: [PATCH] md: Support suspending of IO to regions of an md array This allows user-space to access data safely. This is needed for raid5 reshape as user-space needs to take a backup of the first few stripes before allowing reshape to commence. It will also be useful in cluster-aware raid1 configurations so that all cluster members can leave a section of the array untouched while a resync/recovery happens. A 'start' and 'end' of the suspended range are written to 2 sysfs attributes. Note that only one range can be suspended at a time. Signed-off-by: Neil Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/md/md.c | 59 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 59 insertions(+) (limited to 'drivers/md/md.c') diff --git a/drivers/md/md.c b/drivers/md/md.c index a79dd33d343..92fd0104fa0 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -2365,6 +2365,63 @@ sync_completed_show(mddev_t *mddev, char *page) static struct md_sysfs_entry md_sync_completed = __ATTR_RO(sync_completed); +static ssize_t +suspend_lo_show(mddev_t *mddev, char *page) +{ + return sprintf(page, "%llu\n", (unsigned long long)mddev->suspend_lo); +} + +static ssize_t +suspend_lo_store(mddev_t *mddev, const char *buf, size_t len) +{ + char *e; + unsigned long long new = simple_strtoull(buf, &e, 10); + + if (mddev->pers->quiesce == NULL) + return -EINVAL; + if (buf == e || (*e && *e != '\n')) + return -EINVAL; + if (new >= mddev->suspend_hi || + (new > mddev->suspend_lo && new < mddev->suspend_hi)) { + mddev->suspend_lo = new; + mddev->pers->quiesce(mddev, 2); + return len; + } else + return -EINVAL; +} +static struct md_sysfs_entry md_suspend_lo = +__ATTR(suspend_lo, S_IRUGO|S_IWUSR, suspend_lo_show, suspend_lo_store); + + +static ssize_t +suspend_hi_show(mddev_t *mddev, char *page) +{ + return sprintf(page, "%llu\n", (unsigned long long)mddev->suspend_hi); +} + +static ssize_t +suspend_hi_store(mddev_t *mddev, const char *buf, size_t len) +{ + char *e; + unsigned long long new = simple_strtoull(buf, &e, 10); + + if (mddev->pers->quiesce == NULL) + return -EINVAL; + if (buf == e || (*e && *e != '\n')) + return -EINVAL; + if ((new <= mddev->suspend_lo && mddev->suspend_lo >= mddev->suspend_hi) || + (new > mddev->suspend_lo && new > mddev->suspend_hi)) { + mddev->suspend_hi = new; + mddev->pers->quiesce(mddev, 1); + mddev->pers->quiesce(mddev, 0); + return len; + } else + return -EINVAL; +} +static struct md_sysfs_entry md_suspend_hi = +__ATTR(suspend_hi, S_IRUGO|S_IWUSR, suspend_hi_show, suspend_hi_store); + + static struct attribute *md_default_attrs[] = { &md_level.attr, &md_raid_disks.attr, @@ -2382,6 +2439,8 @@ static struct attribute *md_redundancy_attrs[] = { &md_sync_max.attr, &md_sync_speed.attr, &md_sync_completed.attr, + &md_suspend_lo.attr, + &md_suspend_hi.attr, NULL, }; static struct attribute_group md_redundancy_group = { -- cgit v1.2.3 From 8ddeeae51f2f197b4fafcba117ee8191b49d843e Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 27 Mar 2006 01:18:18 -0800 Subject: [PATCH] md: Fix md grow/size code to correctly find the maximum available space An md array can be asked to change the amount of each device that it is using, and in particular can be asked to use the maximum available space. This currently only works if the first device is not larger than the rest. As 'size' gets changed and so 'fit' becomes wrong. So check if a 'fit' is required early and don't corrupt it. Signed-off-by: Doug Ledford Signed-off-by: Neil Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/md/md.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/md/md.c') diff --git a/drivers/md/md.c b/drivers/md/md.c index 92fd0104fa0..147efcb1e8c 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -3575,6 +3575,7 @@ static int update_size(mddev_t *mddev, unsigned long size) mdk_rdev_t * rdev; int rv; struct list_head *tmp; + int fit = (size == 0); if (mddev->pers->resize == NULL) return -EINVAL; @@ -3592,7 +3593,6 @@ static int update_size(mddev_t *mddev, unsigned long size) return -EBUSY; ITERATE_RDEV(mddev,rdev,tmp) { sector_t avail; - int fit = (size == 0); if (rdev->sb_offset > rdev->data_offset) avail = (rdev->sb_offset*2) - rdev->data_offset; else -- cgit v1.2.3 From 48c9c27b8bcd2a328a06151e2d5c1170db0b701b Mon Sep 17 00:00:00 2001 From: Arjan van de Ven Date: Mon, 27 Mar 2006 01:18:20 -0800 Subject: [PATCH] sem2mutex: drivers/md Semaphore to mutex conversion. The conversion was generated via scripts, and the result was validated automatically via a script as well. Signed-off-by: Arjan van de Ven Signed-off-by: Ingo Molnar Cc: Neil Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/md/md.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) (limited to 'drivers/md/md.c') diff --git a/drivers/md/md.c b/drivers/md/md.c index 147efcb1e8c..c9c9c096ad8 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -43,6 +43,7 @@ #include /* for invalidate_bdev */ #include #include +#include #include @@ -2500,7 +2501,7 @@ int mdp_major = 0; static struct kobject *md_probe(dev_t dev, int *part, void *data) { - static DECLARE_MUTEX(disks_sem); + static DEFINE_MUTEX(disks_mutex); mddev_t *mddev = mddev_find(dev); struct gendisk *disk; int partitioned = (MAJOR(dev) != MD_MAJOR); @@ -2510,15 +2511,15 @@ static struct kobject *md_probe(dev_t dev, int *part, void *data) if (!mddev) return NULL; - down(&disks_sem); + mutex_lock(&disks_mutex); if (mddev->gendisk) { - up(&disks_sem); + mutex_unlock(&disks_mutex); mddev_put(mddev); return NULL; } disk = alloc_disk(1 << shift); if (!disk) { - up(&disks_sem); + mutex_unlock(&disks_mutex); mddev_put(mddev); return NULL; } @@ -2536,7 +2537,7 @@ static struct kobject *md_probe(dev_t dev, int *part, void *data) disk->queue = mddev->queue; add_disk(disk); mddev->gendisk = disk; - up(&disks_sem); + mutex_unlock(&disks_mutex); mddev->kobj.parent = &disk->kobj; mddev->kobj.k_name = NULL; snprintf(mddev->kobj.name, KOBJ_NAME_LEN, "%s", "md"); -- cgit v1.2.3 From df5b89b323b922f56650b4b4d7c41899b937cf19 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 27 Mar 2006 01:18:20 -0800 Subject: [PATCH] md: Convert reconfig_sem to reconfig_mutex ... being careful that mutex_trylock is inverted wrt down_trylock Signed-off-by: Neil Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/md/md.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'drivers/md/md.c') diff --git a/drivers/md/md.c b/drivers/md/md.c index c9c9c096ad8..039e071c100 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -255,7 +255,7 @@ static mddev_t * mddev_find(dev_t unit) else new->md_minor = MINOR(unit) >> MdpMinorShift; - init_MUTEX(&new->reconfig_sem); + mutex_init(&new->reconfig_mutex); INIT_LIST_HEAD(&new->disks); INIT_LIST_HEAD(&new->all_mddevs); init_timer(&new->safemode_timer); @@ -277,22 +277,22 @@ static mddev_t * mddev_find(dev_t unit) static inline int mddev_lock(mddev_t * mddev) { - return down_interruptible(&mddev->reconfig_sem); + return mutex_lock_interruptible(&mddev->reconfig_mutex); } static inline void mddev_lock_uninterruptible(mddev_t * mddev) { - down(&mddev->reconfig_sem); + mutex_lock(&mddev->reconfig_mutex); } static inline int mddev_trylock(mddev_t * mddev) { - return down_trylock(&mddev->reconfig_sem); + return mutex_trylock(&mddev->reconfig_mutex); } static inline void mddev_unlock(mddev_t * mddev) { - up(&mddev->reconfig_sem); + mutex_unlock(&mddev->reconfig_mutex); md_wakeup_thread(mddev->thread); } @@ -4893,7 +4893,7 @@ void md_check_recovery(mddev_t *mddev) )) return; - if (mddev_trylock(mddev)==0) { + if (mddev_trylock(mddev)) { int spares =0; spin_lock_irq(&mddev->write_lock); @@ -5029,7 +5029,7 @@ static int md_notify_reboot(struct notifier_block *this, printk(KERN_INFO "md: stopping all md devices.\n"); ITERATE_MDDEV(mddev,tmp) - if (mddev_trylock(mddev)==0) + if (mddev_trylock(mddev)) do_md_stop (mddev, 1); /* * certain more exotic SCSI devices are known to be -- cgit v1.2.3 From 926ce2d8a7d446c720faec9d8c5105eeb04bcf7a Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Fri, 31 Mar 2006 02:32:02 -0800 Subject: [PATCH] md: Remove some code that can sleep from under a spinlock And remove the comments that were put in inplace of a fix too.... Signed-off-by: Neil Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/md/md.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) (limited to 'drivers/md/md.c') diff --git a/drivers/md/md.c b/drivers/md/md.c index 039e071c100..1ed5152db45 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -215,13 +215,11 @@ static void mddev_put(mddev_t *mddev) return; if (!mddev->raid_disks && list_empty(&mddev->disks)) { list_del(&mddev->all_mddevs); - /* that blocks */ + spin_unlock(&all_mddevs_lock); blk_cleanup_queue(mddev->queue); - /* that also blocks */ kobject_unregister(&mddev->kobj); - /* result blows... */ - } - spin_unlock(&all_mddevs_lock); + } else + spin_unlock(&all_mddevs_lock); } static mddev_t * mddev_find(dev_t unit) -- cgit v1.2.3