diff options
Diffstat (limited to 'drivers/md/md.c')
-rw-r--r-- | drivers/md/md.c | 194 |
1 files changed, 130 insertions, 64 deletions
diff --git a/drivers/md/md.c b/drivers/md/md.c index 05febfd9f07..509171ca7fa 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -1296,27 +1296,17 @@ static struct super_type super_types[] = { .sync_super = super_1_sync, }, }; - -static mdk_rdev_t * match_dev_unit(mddev_t *mddev, mdk_rdev_t *dev) -{ - struct list_head *tmp; - mdk_rdev_t *rdev; - - ITERATE_RDEV(mddev,rdev,tmp) - if (rdev->bdev->bd_contains == dev->bdev->bd_contains) - return rdev; - - return NULL; -} static int match_mddev_units(mddev_t *mddev1, mddev_t *mddev2) { - struct list_head *tmp; - mdk_rdev_t *rdev; + struct list_head *tmp, *tmp2; + mdk_rdev_t *rdev, *rdev2; ITERATE_RDEV(mddev1,rdev,tmp) - if (match_dev_unit(mddev2, rdev)) - return 1; + ITERATE_RDEV(mddev2, rdev2, tmp2) + if (rdev->bdev->bd_contains == + rdev2->bdev->bd_contains) + return 1; return 0; } @@ -1325,10 +1315,10 @@ static LIST_HEAD(pending_raid_disks); static int bind_rdev_to_array(mdk_rdev_t * rdev, mddev_t * mddev) { - mdk_rdev_t *same_pdev; - char b[BDEVNAME_SIZE], b2[BDEVNAME_SIZE]; + char b[BDEVNAME_SIZE]; struct kobject *ko; char *s; + int err; if (rdev->mddev) { MD_BUG(); @@ -1342,14 +1332,6 @@ static int bind_rdev_to_array(mdk_rdev_t * rdev, mddev_t * mddev) else mddev->size = rdev->size; } - same_pdev = match_dev_unit(mddev, rdev); - if (same_pdev) - printk(KERN_WARNING - "%s: WARNING: %s appears to be on the same physical" - " disk as %s. True\n protection against single-disk" - " failure might be compromised.\n", - mdname(mddev), bdevname(rdev->bdev,b), - bdevname(same_pdev->bdev,b2)); /* Verify rdev->desc_nr is unique. * If it is -1, assign a free number, else @@ -1371,20 +1353,35 @@ static int bind_rdev_to_array(mdk_rdev_t * rdev, mddev_t * mddev) while ( (s=strchr(rdev->kobj.k_name, '/')) != NULL) *s = '!'; - list_add(&rdev->same_set, &mddev->disks); rdev->mddev = mddev; printk(KERN_INFO "md: bind<%s>\n", b); rdev->kobj.parent = &mddev->kobj; - kobject_add(&rdev->kobj); + if ((err = kobject_add(&rdev->kobj))) + goto fail; if (rdev->bdev->bd_part) ko = &rdev->bdev->bd_part->kobj; else ko = &rdev->bdev->bd_disk->kobj; - sysfs_create_link(&rdev->kobj, ko, "block"); + if ((err = sysfs_create_link(&rdev->kobj, ko, "block"))) { + kobject_del(&rdev->kobj); + goto fail; + } + list_add(&rdev->same_set, &mddev->disks); bd_claim_by_disk(rdev->bdev, rdev, mddev->gendisk); return 0; + + fail: + printk(KERN_WARNING "md: failed to register dev-%s for %s\n", + b, mdname(mddev)); + return err; +} + +static void delayed_delete(struct work_struct *ws) +{ + mdk_rdev_t *rdev = container_of(ws, mdk_rdev_t, del_work); + kobject_del(&rdev->kobj); } static void unbind_rdev_from_array(mdk_rdev_t * rdev) @@ -1399,7 +1396,12 @@ static void unbind_rdev_from_array(mdk_rdev_t * rdev) printk(KERN_INFO "md: unbind<%s>\n", bdevname(rdev->bdev,b)); rdev->mddev = NULL; sysfs_remove_link(&rdev->kobj, "block"); - kobject_del(&rdev->kobj); + + /* We need to delay this, otherwise we can deadlock when + * writing to 'remove' to "dev/state" + */ + INIT_WORK(&rdev->del_work, delayed_delete); + schedule_work(&rdev->del_work); } /* @@ -2985,7 +2987,9 @@ static struct kobject *md_probe(dev_t dev, int *part, void *data) mddev->kobj.k_name = NULL; snprintf(mddev->kobj.name, KOBJ_NAME_LEN, "%s", "md"); mddev->kobj.ktype = &md_ktype; - kobject_register(&mddev->kobj); + if (kobject_register(&mddev->kobj)) + printk(KERN_WARNING "md: cannot register %s/md - name in use\n", + disk->disk_name); return NULL; } @@ -3109,6 +3113,36 @@ static int do_md_run(mddev_t * mddev) return -EINVAL; } + if (pers->sync_request) { + /* Warn if this is a potentially silly + * configuration. + */ + char b[BDEVNAME_SIZE], b2[BDEVNAME_SIZE]; + mdk_rdev_t *rdev2; + struct list_head *tmp2; + int warned = 0; + ITERATE_RDEV(mddev, rdev, tmp) { + ITERATE_RDEV(mddev, rdev2, tmp2) { + if (rdev < rdev2 && + rdev->bdev->bd_contains == + rdev2->bdev->bd_contains) { + printk(KERN_WARNING + "%s: WARNING: %s appears to be" + " on the same physical disk as" + " %s.\n", + mdname(mddev), + bdevname(rdev->bdev,b), + bdevname(rdev2->bdev,b2)); + warned = 1; + } + } + } + if (warned) + printk(KERN_WARNING + "True protection against single-disk" + " failure might be compromised.\n"); + } + mddev->recovery = 0; mddev->resync_max_sectors = mddev->size << 1; /* may be over-ridden by personality */ mddev->barriers_work = 1; @@ -3133,9 +3167,12 @@ static int do_md_run(mddev_t * mddev) bitmap_destroy(mddev); return err; } - if (mddev->pers->sync_request) - sysfs_create_group(&mddev->kobj, &md_redundancy_group); - else if (mddev->ro == 2) /* auto-readonly not meaningful */ + if (mddev->pers->sync_request) { + if (sysfs_create_group(&mddev->kobj, &md_redundancy_group)) + printk(KERN_WARNING + "md: cannot register extra attributes for %s\n", + mdname(mddev)); + } else if (mddev->ro == 2) /* auto-readonly not meaningful */ mddev->ro = 0; atomic_set(&mddev->writes_pending,0); @@ -3149,7 +3186,9 @@ static int do_md_run(mddev_t * mddev) if (rdev->raid_disk >= 0) { char nm[20]; sprintf(nm, "rd%d", rdev->raid_disk); - sysfs_create_link(&mddev->kobj, &rdev->kobj, nm); + if (sysfs_create_link(&mddev->kobj, &rdev->kobj, nm)) + printk("md: cannot register %s for %s\n", + nm, mdname(mddev)); } set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); @@ -3311,6 +3350,10 @@ static int do_md_stop(mddev_t * mddev, int mode) set_disk_ro(disk, 0); blk_queue_make_request(mddev->queue, md_fail_request); mddev->pers->stop(mddev); + mddev->queue->merge_bvec_fn = NULL; + mddev->queue->unplug_fn = NULL; + mddev->queue->issue_flush_fn = NULL; + mddev->queue->backing_dev_info.congested_fn = NULL; if (mddev->pers->sync_request) sysfs_remove_group(&mddev->kobj, &md_redundancy_group); @@ -3357,6 +3400,9 @@ static int do_md_stop(mddev_t * mddev, int mode) sysfs_remove_link(&mddev->kobj, nm); } + /* make sure all delayed_delete calls have finished */ + flush_scheduled_work(); + export_array(mddev); mddev->array_size = 0; @@ -5343,6 +5389,48 @@ void md_do_sync(mddev_t *mddev) EXPORT_SYMBOL_GPL(md_do_sync); +static int remove_and_add_spares(mddev_t *mddev) +{ + mdk_rdev_t *rdev; + struct list_head *rtmp; + int spares = 0; + + ITERATE_RDEV(mddev,rdev,rtmp) + if (rdev->raid_disk >= 0 && + (test_bit(Faulty, &rdev->flags) || + ! test_bit(In_sync, &rdev->flags)) && + atomic_read(&rdev->nr_pending)==0) { + if (mddev->pers->hot_remove_disk( + mddev, rdev->raid_disk)==0) { + char nm[20]; + sprintf(nm,"rd%d", rdev->raid_disk); + sysfs_remove_link(&mddev->kobj, nm); + rdev->raid_disk = -1; + } + } + + if (mddev->degraded) { + ITERATE_RDEV(mddev,rdev,rtmp) + if (rdev->raid_disk < 0 + && !test_bit(Faulty, &rdev->flags)) { + rdev->recovery_offset = 0; + if (mddev->pers->hot_add_disk(mddev,rdev)) { + char nm[20]; + sprintf(nm, "rd%d", rdev->raid_disk); + if (sysfs_create_link(&mddev->kobj, + &rdev->kobj, nm)) + printk(KERN_WARNING + "md: cannot register " + "%s for %s\n", + nm, mdname(mddev)); + spares++; + md_new_event(mddev); + } else + break; + } + } + return spares; +} /* * This routine is regularly called by all per-raid-array threads to * deal with generic issues like resync and super-block update. @@ -5397,7 +5485,7 @@ void md_check_recovery(mddev_t *mddev) return; if (mddev_trylock(mddev)) { - int spares =0; + int spares = 0; spin_lock_irq(&mddev->write_lock); if (mddev->safemode && !atomic_read(&mddev->writes_pending) && @@ -5460,35 +5548,13 @@ void md_check_recovery(mddev_t *mddev) * Spare are also removed and re-added, to allow * the personality to fail the re-add. */ - ITERATE_RDEV(mddev,rdev,rtmp) - if (rdev->raid_disk >= 0 && - (test_bit(Faulty, &rdev->flags) || ! test_bit(In_sync, &rdev->flags)) && - atomic_read(&rdev->nr_pending)==0) { - if (mddev->pers->hot_remove_disk(mddev, rdev->raid_disk)==0) { - char nm[20]; - sprintf(nm,"rd%d", rdev->raid_disk); - sysfs_remove_link(&mddev->kobj, nm); - rdev->raid_disk = -1; - } - } - - if (mddev->degraded) { - ITERATE_RDEV(mddev,rdev,rtmp) - if (rdev->raid_disk < 0 - && !test_bit(Faulty, &rdev->flags)) { - rdev->recovery_offset = 0; - if (mddev->pers->hot_add_disk(mddev,rdev)) { - char nm[20]; - sprintf(nm, "rd%d", rdev->raid_disk); - sysfs_create_link(&mddev->kobj, &rdev->kobj, nm); - spares++; - md_new_event(mddev); - } else - break; - } - } - if (spares) { + if (mddev->reshape_position != MaxSector) { + if (mddev->pers->check_reshape(mddev) != 0) + /* Cannot proceed */ + goto unlock; + set_bit(MD_RECOVERY_RESHAPE, &mddev->recovery); + } else if ((spares = remove_and_add_spares(mddev))) { clear_bit(MD_RECOVERY_SYNC, &mddev->recovery); clear_bit(MD_RECOVERY_CHECK, &mddev->recovery); } else if (mddev->recovery_cp < MaxSector) { |