aboutsummaryrefslogtreecommitdiff
path: root/drivers/md
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/md')
-rw-r--r--drivers/md/md.c140
-rw-r--r--drivers/md/raid10.c38
-rw-r--r--drivers/md/raid5.c161
-rw-r--r--drivers/md/raid6mmx.c16
-rw-r--r--drivers/md/raid6sse1.c17
-rw-r--r--drivers/md/raid6sse2.c22
-rw-r--r--drivers/md/raid6x86.h218
7 files changed, 264 insertions, 348 deletions
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 05febfd9f07..6c06e825cff 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -1296,27 +1296,17 @@ static struct super_type super_types[] = {
.sync_super = super_1_sync,
},
};
-
-static mdk_rdev_t * match_dev_unit(mddev_t *mddev, mdk_rdev_t *dev)
-{
- struct list_head *tmp;
- mdk_rdev_t *rdev;
-
- ITERATE_RDEV(mddev,rdev,tmp)
- if (rdev->bdev->bd_contains == dev->bdev->bd_contains)
- return rdev;
-
- return NULL;
-}
static int match_mddev_units(mddev_t *mddev1, mddev_t *mddev2)
{
- struct list_head *tmp;
- mdk_rdev_t *rdev;
+ struct list_head *tmp, *tmp2;
+ mdk_rdev_t *rdev, *rdev2;
ITERATE_RDEV(mddev1,rdev,tmp)
- if (match_dev_unit(mddev2, rdev))
- return 1;
+ ITERATE_RDEV(mddev2, rdev2, tmp2)
+ if (rdev->bdev->bd_contains ==
+ rdev2->bdev->bd_contains)
+ return 1;
return 0;
}
@@ -1325,8 +1315,7 @@ static LIST_HEAD(pending_raid_disks);
static int bind_rdev_to_array(mdk_rdev_t * rdev, mddev_t * mddev)
{
- mdk_rdev_t *same_pdev;
- char b[BDEVNAME_SIZE], b2[BDEVNAME_SIZE];
+ char b[BDEVNAME_SIZE];
struct kobject *ko;
char *s;
@@ -1342,14 +1331,6 @@ static int bind_rdev_to_array(mdk_rdev_t * rdev, mddev_t * mddev)
else
mddev->size = rdev->size;
}
- same_pdev = match_dev_unit(mddev, rdev);
- if (same_pdev)
- printk(KERN_WARNING
- "%s: WARNING: %s appears to be on the same physical"
- " disk as %s. True\n protection against single-disk"
- " failure might be compromised.\n",
- mdname(mddev), bdevname(rdev->bdev,b),
- bdevname(same_pdev->bdev,b2));
/* Verify rdev->desc_nr is unique.
* If it is -1, assign a free number, else
@@ -3109,6 +3090,36 @@ static int do_md_run(mddev_t * mddev)
return -EINVAL;
}
+ if (pers->sync_request) {
+ /* Warn if this is a potentially silly
+ * configuration.
+ */
+ char b[BDEVNAME_SIZE], b2[BDEVNAME_SIZE];
+ mdk_rdev_t *rdev2;
+ struct list_head *tmp2;
+ int warned = 0;
+ ITERATE_RDEV(mddev, rdev, tmp) {
+ ITERATE_RDEV(mddev, rdev2, tmp2) {
+ if (rdev < rdev2 &&
+ rdev->bdev->bd_contains ==
+ rdev2->bdev->bd_contains) {
+ printk(KERN_WARNING
+ "%s: WARNING: %s appears to be"
+ " on the same physical disk as"
+ " %s.\n",
+ mdname(mddev),
+ bdevname(rdev->bdev,b),
+ bdevname(rdev2->bdev,b2));
+ warned = 1;
+ }
+ }
+ }
+ if (warned)
+ printk(KERN_WARNING
+ "True protection against single-disk"
+ " failure might be compromised.\n");
+ }
+
mddev->recovery = 0;
mddev->resync_max_sectors = mddev->size << 1; /* may be over-ridden by personality */
mddev->barriers_work = 1;
@@ -3311,6 +3322,9 @@ static int do_md_stop(mddev_t * mddev, int mode)
set_disk_ro(disk, 0);
blk_queue_make_request(mddev->queue, md_fail_request);
mddev->pers->stop(mddev);
+ mddev->queue->merge_bvec_fn = NULL;
+ mddev->queue->unplug_fn = NULL;
+ mddev->queue->issue_flush_fn = NULL;
if (mddev->pers->sync_request)
sysfs_remove_group(&mddev->kobj, &md_redundancy_group);
@@ -5343,6 +5357,44 @@ void md_do_sync(mddev_t *mddev)
EXPORT_SYMBOL_GPL(md_do_sync);
+static int remove_and_add_spares(mddev_t *mddev)
+{
+ mdk_rdev_t *rdev;
+ struct list_head *rtmp;
+ int spares = 0;
+
+ ITERATE_RDEV(mddev,rdev,rtmp)
+ if (rdev->raid_disk >= 0 &&
+ (test_bit(Faulty, &rdev->flags) ||
+ ! test_bit(In_sync, &rdev->flags)) &&
+ atomic_read(&rdev->nr_pending)==0) {
+ if (mddev->pers->hot_remove_disk(
+ mddev, rdev->raid_disk)==0) {
+ char nm[20];
+ sprintf(nm,"rd%d", rdev->raid_disk);
+ sysfs_remove_link(&mddev->kobj, nm);
+ rdev->raid_disk = -1;
+ }
+ }
+
+ if (mddev->degraded) {
+ ITERATE_RDEV(mddev,rdev,rtmp)
+ if (rdev->raid_disk < 0
+ && !test_bit(Faulty, &rdev->flags)) {
+ rdev->recovery_offset = 0;
+ if (mddev->pers->hot_add_disk(mddev,rdev)) {
+ char nm[20];
+ sprintf(nm, "rd%d", rdev->raid_disk);
+ sysfs_create_link(&mddev->kobj,
+ &rdev->kobj, nm);
+ spares++;
+ md_new_event(mddev);
+ } else
+ break;
+ }
+ }
+ return spares;
+}
/*
* This routine is regularly called by all per-raid-array threads to
* deal with generic issues like resync and super-block update.
@@ -5397,7 +5449,7 @@ void md_check_recovery(mddev_t *mddev)
return;
if (mddev_trylock(mddev)) {
- int spares =0;
+ int spares = 0;
spin_lock_irq(&mddev->write_lock);
if (mddev->safemode && !atomic_read(&mddev->writes_pending) &&
@@ -5460,35 +5512,13 @@ void md_check_recovery(mddev_t *mddev)
* Spare are also removed and re-added, to allow
* the personality to fail the re-add.
*/
- ITERATE_RDEV(mddev,rdev,rtmp)
- if (rdev->raid_disk >= 0 &&
- (test_bit(Faulty, &rdev->flags) || ! test_bit(In_sync, &rdev->flags)) &&
- atomic_read(&rdev->nr_pending)==0) {
- if (mddev->pers->hot_remove_disk(mddev, rdev->raid_disk)==0) {
- char nm[20];
- sprintf(nm,"rd%d", rdev->raid_disk);
- sysfs_remove_link(&mddev->kobj, nm);
- rdev->raid_disk = -1;
- }
- }
-
- if (mddev->degraded) {
- ITERATE_RDEV(mddev,rdev,rtmp)
- if (rdev->raid_disk < 0
- && !test_bit(Faulty, &rdev->flags)) {
- rdev->recovery_offset = 0;
- if (mddev->pers->hot_add_disk(mddev,rdev)) {
- char nm[20];
- sprintf(nm, "rd%d", rdev->raid_disk);
- sysfs_create_link(&mddev->kobj, &rdev->kobj, nm);
- spares++;
- md_new_event(mddev);
- } else
- break;
- }
- }
- if (spares) {
+ if (mddev->reshape_position != MaxSector) {
+ if (mddev->pers->check_reshape(mddev) != 0)
+ /* Cannot proceed */
+ goto unlock;
+ set_bit(MD_RECOVERY_RESHAPE, &mddev->recovery);
+ } else if ((spares = remove_and_add_spares(mddev))) {
clear_bit(MD_RECOVERY_SYNC, &mddev->recovery);
clear_bit(MD_RECOVERY_CHECK, &mddev->recovery);
} else if (mddev->recovery_cp < MaxSector) {
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index a9401c017e3..82249a69014 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -429,7 +429,7 @@ static sector_t raid10_find_virt(conf_t *conf, sector_t sector, int dev)
if (dev < 0)
dev += conf->raid_disks;
} else {
- while (sector > conf->stride) {
+ while (sector >= conf->stride) {
sector -= conf->stride;
if (dev < conf->near_copies)
dev += conf->raid_disks - conf->near_copies;
@@ -1801,6 +1801,7 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i
for (k=0; k<conf->copies; k++)
if (r10_bio->devs[k].devnum == i)
break;
+ BUG_ON(k == conf->copies);
bio = r10_bio->devs[1].bio;
bio->bi_next = biolist;
biolist = bio;
@@ -2021,19 +2022,30 @@ static int run(mddev_t *mddev)
if (!conf->tmppage)
goto out_free_conf;
+ conf->mddev = mddev;
+ conf->raid_disks = mddev->raid_disks;
conf->near_copies = nc;
conf->far_copies = fc;
conf->copies = nc*fc;
conf->far_offset = fo;
conf->chunk_mask = (sector_t)(mddev->chunk_size>>9)-1;
conf->chunk_shift = ffz(~mddev->chunk_size) - 9;
+ size = mddev->size >> (conf->chunk_shift-1);
+ sector_div(size, fc);
+ size = size * conf->raid_disks;
+ sector_div(size, nc);
+ /* 'size' is now the number of chunks in the array */
+ /* calculate "used chunks per device" in 'stride' */
+ stride = size * conf->copies;
+ sector_div(stride, conf->raid_disks);
+ mddev->size = stride << (conf->chunk_shift-1);
+
if (fo)
- conf->stride = 1 << conf->chunk_shift;
- else {
- stride = mddev->size >> (conf->chunk_shift-1);
+ stride = 1;
+ else
sector_div(stride, fc);
- conf->stride = stride << conf->chunk_shift;
- }
+ conf->stride = stride << conf->chunk_shift;
+
conf->r10bio_pool = mempool_create(NR_RAID10_BIOS, r10bio_pool_alloc,
r10bio_pool_free, conf);
if (!conf->r10bio_pool) {
@@ -2063,8 +2075,6 @@ static int run(mddev_t *mddev)
disk->head_position = 0;
}
- conf->raid_disks = mddev->raid_disks;
- conf->mddev = mddev;
spin_lock_init(&conf->device_lock);
INIT_LIST_HEAD(&conf->retry_list);
@@ -2106,16 +2116,8 @@ static int run(mddev_t *mddev)
/*
* Ok, everything is just fine now
*/
- if (conf->far_offset) {
- size = mddev->size >> (conf->chunk_shift-1);
- size *= conf->raid_disks;
- size <<= conf->chunk_shift;
- sector_div(size, conf->far_copies);
- } else
- size = conf->stride * conf->raid_disks;
- sector_div(size, conf->near_copies);
- mddev->array_size = size/2;
- mddev->resync_max_sectors = size;
+ mddev->array_size = size << (conf->chunk_shift-1);
+ mddev->resync_max_sectors = size << conf->chunk_shift;
mddev->queue->unplug_fn = raid10_unplug;
mddev->queue->issue_flush_fn = raid10_issue_flush;
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 11c3d7bfa79..54a1ad5eef4 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -1050,7 +1050,7 @@ static void compute_parity5(struct stripe_head *sh, int method)
static void compute_parity6(struct stripe_head *sh, int method)
{
raid6_conf_t *conf = sh->raid_conf;
- int i, pd_idx = sh->pd_idx, qd_idx, d0_idx, disks = conf->raid_disks, count;
+ int i, pd_idx = sh->pd_idx, qd_idx, d0_idx, disks = sh->disks, count;
struct bio *chosen;
/**** FIX THIS: This could be very bad if disks is close to 256 ****/
void *ptrs[disks];
@@ -1131,8 +1131,7 @@ static void compute_parity6(struct stripe_head *sh, int method)
/* Compute one missing block */
static void compute_block_1(struct stripe_head *sh, int dd_idx, int nozero)
{
- raid6_conf_t *conf = sh->raid_conf;
- int i, count, disks = conf->raid_disks;
+ int i, count, disks = sh->disks;
void *ptr[MAX_XOR_BLOCKS], *p;
int pd_idx = sh->pd_idx;
int qd_idx = raid6_next_disk(pd_idx, disks);
@@ -1170,8 +1169,7 @@ static void compute_block_1(struct stripe_head *sh, int dd_idx, int nozero)
/* Compute two missing blocks */
static void compute_block_2(struct stripe_head *sh, int dd_idx1, int dd_idx2)
{
- raid6_conf_t *conf = sh->raid_conf;
- int i, count, disks = conf->raid_disks;
+ int i, count, disks = sh->disks;
int pd_idx = sh->pd_idx;
int qd_idx = raid6_next_disk(pd_idx, disks);
int d0_idx = raid6_next_disk(qd_idx, disks);
@@ -1887,11 +1885,11 @@ static void handle_stripe5(struct stripe_head *sh)
static void handle_stripe6(struct stripe_head *sh, struct page *tmp_page)
{
raid6_conf_t *conf = sh->raid_conf;
- int disks = conf->raid_disks;
+ int disks = sh->disks;
struct bio *return_bi= NULL;
struct bio *bi;
int i;
- int syncing;
+ int syncing, expanding, expanded;
int locked=0, uptodate=0, to_read=0, to_write=0, failed=0, written=0;
int non_overwrite = 0;
int failed_num[2] = {0, 0};
@@ -1909,6 +1907,8 @@ static void handle_stripe6(struct stripe_head *sh, struct page *tmp_page)
clear_bit(STRIPE_DELAYED, &sh->state);
syncing = test_bit(STRIPE_SYNCING, &sh->state);
+ expanding = test_bit(STRIPE_EXPAND_SOURCE, &sh->state);
+ expanded = test_bit(STRIPE_EXPAND_READY, &sh->state);
/* Now to look around and see what can be done */
rcu_read_lock();
@@ -2114,13 +2114,15 @@ static void handle_stripe6(struct stripe_head *sh, struct page *tmp_page)
* parity, or to satisfy requests
* or to load a block that is being partially written.
*/
- if (to_read || non_overwrite || (to_write && failed) || (syncing && (uptodate < disks))) {
+ if (to_read || non_overwrite || (to_write && failed) ||
+ (syncing && (uptodate < disks)) || expanding) {
for (i=disks; i--;) {
dev = &sh->dev[i];
if (!test_bit(R5_LOCKED, &dev->flags) && !test_bit(R5_UPTODATE, &dev->flags) &&
(dev->toread ||
(dev->towrite && !test_bit(R5_OVERWRITE, &dev->flags)) ||
syncing ||
+ expanding ||
(failed >= 1 && (sh->dev[failed_num[0]].toread || to_write)) ||
(failed >= 2 && (sh->dev[failed_num[1]].toread || to_write))
)
@@ -2355,6 +2357,79 @@ static void handle_stripe6(struct stripe_head *sh, struct page *tmp_page)
}
}
}
+
+ if (expanded && test_bit(STRIPE_EXPANDING, &sh->state)) {
+ /* Need to write out all blocks after computing P&Q */
+ sh->disks = conf->raid_disks;
+ sh->pd_idx = stripe_to_pdidx(sh->sector, conf,
+ conf->raid_disks);
+ compute_parity6(sh, RECONSTRUCT_WRITE);
+ for (i = conf->raid_disks ; i-- ; ) {
+ set_bit(R5_LOCKED, &sh->dev[i].flags);
+ locked++;
+ set_bit(R5_Wantwrite, &sh->dev[i].flags);
+ }
+ clear_bit(STRIPE_EXPANDING, &sh->state);
+ } else if (expanded) {
+ clear_bit(STRIPE_EXPAND_READY, &sh->state);
+ atomic_dec(&conf->reshape_stripes);
+ wake_up(&conf->wait_for_overlap);
+ md_done_sync(conf->mddev, STRIPE_SECTORS, 1);
+ }
+
+ if (expanding && locked == 0) {
+ /* We have read all the blocks in this stripe and now we need to
+ * copy some of them into a target stripe for expand.
+ */
+ clear_bit(STRIPE_EXPAND_SOURCE, &sh->state);
+ for (i = 0; i < sh->disks ; i++)
+ if (i != pd_idx && i != qd_idx) {
+ int dd_idx2, pd_idx2, j;
+ struct stripe_head *sh2;
+
+ sector_t bn = compute_blocknr(sh, i);
+ sector_t s = raid5_compute_sector(
+ bn, conf->raid_disks,
+ conf->raid_disks - conf->max_degraded,
+ &dd_idx2, &pd_idx2, conf);
+ sh2 = get_active_stripe(conf, s,
+ conf->raid_disks,
+ pd_idx2, 1);
+ if (sh2 == NULL)
+ /* so for only the early blocks of
+ * this stripe have been requests.
+ * When later blocks get requests, we
+ * will try again
+ */
+ continue;
+ if (!test_bit(STRIPE_EXPANDING, &sh2->state) ||
+ test_bit(R5_Expanded,
+ &sh2->dev[dd_idx2].flags)) {
+ /* must have already done this block */
+ release_stripe(sh2);
+ continue;
+ }
+ memcpy(page_address(sh2->dev[dd_idx2].page),
+ page_address(sh->dev[i].page),
+ STRIPE_SIZE);
+ set_bit(R5_Expanded, &sh2->dev[dd_idx2].flags);
+ set_bit(R5_UPTODATE, &sh2->dev[dd_idx2].flags);
+ for (j = 0 ; j < conf->raid_disks ; j++)
+ if (j != sh2->pd_idx &&
+ j != raid6_next_disk(sh2->pd_idx,
+ sh2->disks) &&
+ !test_bit(R5_Expanded,
+ &sh2->dev[j].flags))
+ break;
+ if (j == conf->raid_disks) {
+ set_bit(STRIPE_EXPAND_READY,
+ &sh2->state);
+ set_bit(STRIPE_HANDLE, &sh2->state);
+ }
+ release_stripe(sh2);
+ }
+ }
+
spin_unlock(&sh->lock);
while ((bi=return_bi)) {
@@ -2395,7 +2470,7 @@ static void handle_stripe6(struct stripe_head *sh, struct page *tmp_page)
rcu_read_unlock();
if (rdev) {
- if (syncing)
+ if (syncing || expanding || expanded)
md_sync_acct(rdev->bdev, STRIPE_SECTORS);
bi->bi_bdev = rdev->bdev;
@@ -2915,8 +2990,9 @@ static sector_t reshape_request(mddev_t *mddev, sector_t sector_nr, int *skipped
struct stripe_head *sh;
int pd_idx;
sector_t first_sector, last_sector;
- int raid_disks;
- int data_disks;
+ int raid_disks = conf->previous_raid_disks;
+ int data_disks = raid_disks - conf->max_degraded;
+ int new_data_disks = conf->raid_disks - conf->max_degraded;
int i;
int dd_idx;
sector_t writepos, safepos, gap;
@@ -2925,7 +3001,7 @@ static sector_t reshape_request(mddev_t *mddev, sector_t sector_nr, int *skipped
conf->expand_progress != 0) {
/* restarting in the middle, skip the initial sectors */
sector_nr = conf->expand_progress;
- sector_div(sector_nr, conf->raid_disks-1);
+ sector_div(sector_nr, new_data_disks);
*skipped = 1;
return sector_nr;
}
@@ -2939,14 +3015,14 @@ static sector_t reshape_request(mddev_t *mddev, sector_t sector_nr, int *skipped
* to after where expand_lo old_maps to
*/
writepos = conf->expand_progress +
- conf->chunk_size/512*(conf->raid_disks-1);
- sector_div(writepos, conf->raid_disks-1);
+ conf->chunk_size/512*(new_data_disks);
+ sector_div(writepos, new_data_disks);
safepos = conf->expand_lo;
- sector_div(safepos, conf->previous_raid_disks-1);
+ sector_div(safepos, data_disks);
gap = conf->expand_progress - conf->expand_lo;
if (writepos >= safepos ||
- gap > (conf->raid_disks-1)*3000*2 /*3Meg*/) {
+ gap > (new_data_disks)*3000*2 /*3Meg*/) {
/* Cannot proceed until we've updated the superblock... */
wait_event(conf->wait_for_overlap,
atomic_read(&conf->reshape_stripes)==0);
@@ -2976,6 +3052,9 @@ static sector_t reshape_request(mddev_t *mddev, sector_t sector_nr, int *skipped
sector_t s;
if (j == sh->pd_idx)
continue;
+ if (conf->level == 6 &&
+ j == raid6_next_disk(sh->pd_idx, sh->disks))
+ continue;
s = compute_blocknr(sh, j);
if (s < (mddev->array_size<<1)) {
skipped = 1;
@@ -2992,28 +3071,27 @@ static sector_t reshape_request(mddev_t *mddev, sector_t sector_nr, int *skipped
release_stripe(sh);
}
spin_lock_irq(&conf->device_lock);
- conf->expand_progress = (sector_nr + i)*(conf->raid_disks-1);
+ conf->expand_progress = (sector_nr + i) * new_data_disks;
spin_unlock_irq(&conf->device_lock);
/* Ok, those stripe are ready. We can start scheduling
* reads on the source stripes.
* The source stripes are determined by mapping the first and last
* block on the destination stripes.
*/
- raid_disks = conf->previous_raid_disks;
- data_disks = raid_disks - 1;
first_sector =
- raid5_compute_sector(sector_nr*(conf->raid_disks-1),
+ raid5_compute_sector(sector_nr*(new_data_disks),
raid_disks, data_disks,
&dd_idx, &pd_idx, conf);
last_sector =
raid5_compute_sector((sector_nr+conf->chunk_size/512)
- *(conf->raid_disks-1) -1,
+ *(new_data_disks) -1,
raid_disks, data_disks,
&dd_idx, &pd_idx, conf);
if (last_sector >= (mddev->size<<1))
last_sector = (mddev->size<<1)-1;
while (first_sector <= last_sector) {
- pd_idx = stripe_to_pdidx(first_sector, conf, conf->previous_raid_disks);
+ pd_idx = stripe_to_pdidx(first_sector, conf,
+ conf->previous_raid_disks);
sh = get_active_stripe(conf, first_sector,
conf->previous_raid_disks, pd_idx, 0);
set_bit(STRIPE_EXPAND_SOURCE, &sh->state);
@@ -3348,35 +3426,44 @@ static int run(mddev_t *mddev)
*/
sector_t here_new, here_old;
int old_disks;
+ int max_degraded = (mddev->level == 5 ? 1 : 2);
if (mddev->new_level != mddev->level ||
mddev->new_layout != mddev->layout ||
mddev->new_chunk != mddev->chunk_size) {
- printk(KERN_ERR "raid5: %s: unsupported reshape required - aborting.\n",
+ printk(KERN_ERR "raid5: %s: unsupported reshape "
+ "required - aborting.\n",
mdname(mddev));
return -EINVAL;
}
if (mddev->delta_disks <= 0) {
- printk(KERN_ERR "raid5: %s: unsupported reshape (reduce disks) required - aborting.\n",
+ printk(KERN_ERR "raid5: %s: unsupported reshape "
+ "(reduce disks) required - aborting.\n",
mdname(mddev));
return -EINVAL;
}
old_disks = mddev->raid_disks - mddev->delta_disks;
/* reshape_position must be on a new-stripe boundary, and one
- * further up in new geometry must map after here in old geometry.
+ * further up in new geometry must map after here in old
+ * geometry.
*/
here_new = mddev->reshape_position;
- if (sector_div(here_new, (mddev->chunk_size>>9)*(mddev->raid_disks-1))) {
- printk(KERN_ERR "raid5: reshape_position not on a stripe boundary\n");
+ if (sector_div(here_new, (mddev->chunk_size>>9)*
+ (mddev->raid_disks - max_degraded))) {
+ printk(KERN_ERR "raid5: reshape_position not "
+ "on a stripe boundary\n");
return -EINVAL;
}
/* here_new is the stripe we will write to */
here_old = mddev->reshape_position;
- sector_div(here_old, (mddev->chunk_size>>9)*(old_disks-1));
- /* here_old is the first stripe that we might need to read from */
+ sector_div(here_old, (mddev->chunk_size>>9)*
+ (old_disks-max_degraded));
+ /* here_old is the first stripe that we might need to read
+ * from */
if (here_new >= here_old) {
/* Reading from the same stripe as writing to - bad */
- printk(KERN_ERR "raid5: reshape_position too early for auto-recovery - aborting.\n");
+ printk(KERN_ERR "raid5: reshape_position too early for "
+ "auto-recovery - aborting.\n");
return -EINVAL;
}
printk(KERN_INFO "raid5: reshape will continue\n");
@@ -3814,6 +3901,8 @@ static int raid5_check_reshape(mddev_t *mddev)
if (err)
return err;
+ if (mddev->degraded > conf->max_degraded)
+ return -EINVAL;
/* looks like we might be able to manage this */
return 0;
}
@@ -3827,8 +3916,7 @@ static int raid5_start_reshape(mddev_t *mddev)
int added_devices = 0;
unsigned long flags;
- if (mddev->degraded ||
- test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
+ if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
return -EBUSY;
ITERATE_RDEV(mddev, rdev, rtmp)
@@ -3836,7 +3924,7 @@ static int raid5_start_reshape(mddev_t *mddev)
!test_bit(Faulty, &rdev->flags))
spares++;
- if (spares < mddev->delta_disks-1)
+ if (spares - mddev->degraded < mddev->delta_disks - conf->max_degraded)
/* Not enough devices even to make a degraded array
* of that size
*/
@@ -3899,7 +3987,8 @@ static void end_reshape(raid5_conf_t *conf)
struct block_device *bdev;
if (!test_bit(MD_RECOVERY_INTR, &conf->mddev->recovery)) {
- conf->mddev->array_size = conf->mddev->size * (conf->raid_disks-1);
+ conf->mddev->array_size = conf->mddev->size *
+ (conf->raid_disks - conf->max_degraded);
set_capacity(conf->mddev->gendisk, conf->mddev->array_size << 1);
conf->mddev->changed = 1;
@@ -3972,6 +4061,10 @@ static struct mdk_personality raid6_personality =
.spare_active = raid5_spare_active,
.sync_request = sync_request,
.resize = raid5_resize,
+#ifdef CONFIG_MD_RAID5_RESHAPE
+ .check_reshape = raid5_check_reshape,
+ .start_reshape = raid5_start_reshape,
+#endif
.quiesce = raid5_quiesce,
};
static struct mdk_personality raid5_personality =
diff --git a/drivers/md/raid6mmx.c b/drivers/md/raid6mmx.c
index 359157aaf9e..6181a5a3365 100644
--- a/drivers/md/raid6mmx.c
+++ b/drivers/md/raid6mmx.c
@@ -30,14 +30,8 @@ const struct raid6_mmx_constants {
static int raid6_have_mmx(void)
{
-#ifdef __KERNEL__
/* Not really "boot_cpu" but "all_cpus" */
return boot_cpu_has(X86_FEATURE_MMX);
-#else
- /* User space test code */
- u32 features = cpuid_features();
- return ( (features & (1<<23)) == (1<<23) );
-#endif
}
/*
@@ -48,13 +42,12 @@ static void raid6_mmx1_gen_syndrome(int disks, size_t bytes, void **ptrs)
u8 **dptr = (u8 **)ptrs;
u8 *p, *q;
int d, z, z0;
- raid6_mmx_save_t sa;
z0 = disks - 3; /* Highest data disk */
p = dptr[z0+1]; /* XOR parity */
q = dptr[z0+2]; /* RS syndrome */
- raid6_before_mmx(&sa);
+ kernel_fpu_begin();
asm volatile("movq %0,%%mm0" : : "m" (raid6_mmx_constants.x1d));
asm volatile("pxor %mm5,%mm5"); /* Zero temp */
@@ -78,7 +71,7 @@ static void raid6_mmx1_gen_syndrome(int disks, size_t bytes, void **ptrs)
asm volatile("pxor %mm4,%mm4");
}
- raid6_after_mmx(&sa);
+ kernel_fpu_end();
}
const struct raid6_calls raid6_mmxx1 = {
@@ -96,13 +89,12 @@ static void raid6_mmx2_gen_syndrome(int disks, size_t bytes, void **ptrs)
u8 **dptr = (u8 **)ptrs;
u8 *p, *q;
int d, z, z0;
- raid6_mmx_save_t sa;
z0 = disks - 3; /* Highest data disk */
p = dptr[z0+1]; /* XOR parity */
q = dptr[z0+2]; /* RS syndrome */
- raid6_before_mmx(&sa);
+ kernel_fpu_begin();
asm volatile("movq %0,%%mm0" : : "m" (raid6_mmx_constants.x1d));
asm volatile("pxor %mm5,%mm5"); /* Zero temp */
@@ -137,7 +129,7 @@ static void raid6_mmx2_gen_syndrome(int disks, size_t bytes, void **ptrs)
asm volatile("movq %%mm6,%0" : "=m" (q[d+8]));
}
- raid6_after_mmx(&sa);
+ kernel_fpu_end();
}
const struct raid6_calls raid6_mmxx2 = {
diff --git a/drivers/md/raid6sse1.c b/drivers/md/raid6sse1.c
index f7e7859f71a..f0a1ba8f40b 100644
--- a/drivers/md/raid6sse1.c
+++ b/drivers/md/raid6sse1.c
@@ -33,16 +33,10 @@ extern const struct raid6_mmx_constants {
static int raid6_have_sse1_or_mmxext(void)
{
-#ifdef __KERNEL__
/* Not really boot_cpu but "all_cpus" */
return boot_cpu_has(X86_FEATURE_MMX) &&
(boot_cpu_has(X86_FEATURE_XMM) ||
boot_cpu_has(X86_FEATURE_MMXEXT));
-#else
- /* User space test code - this incorrectly breaks on some Athlons */
- u32 features = cpuid_features();
- return ( (features & (5<<23)) == (5<<23) );
-#endif
}
/*
@@ -53,14 +47,12 @@ static void raid6_sse11_gen_syndrome(int disks, size_t bytes, void **ptrs)
u8 **dptr = (u8 **)ptrs;
u8 *p, *q;
int d, z, z0;
- raid6_mmx_save_t sa;
z0 = disks - 3; /* Highest data disk */
p = dptr[z0+1]; /* XOR parity */
q = dptr[z0+2]; /* RS syndrome */
- /* This is really MMX code, not SSE */
- raid6_before_mmx(&sa);
+ kernel_fpu_begin();
asm volatile("movq %0,%%mm0" : : "m" (raid6_mmx_constants.x1d));
asm volatile("pxor %mm5,%mm5"); /* Zero temp */
@@ -94,8 +86,8 @@ static void raid6_sse11_gen_syndrome(int disks, size_t bytes, void **ptrs)
asm volatile("movntq %%mm4,%0" : "=m" (q[d]));
}
- raid6_after_mmx(&sa);
asm volatile("sfence" : : : "memory");
+ kernel_fpu_end();
}
const struct raid6_calls raid6_sse1x1 = {
@@ -113,13 +105,12 @@ static void raid6_sse12_gen_syndrome(int disks, size_t bytes, void **ptrs)
u8 **dptr = (u8 **)ptrs;
u8 *p, *q;
int d, z, z0;
- raid6_mmx_save_t sa;
z0 = disks - 3; /* Highest data disk */
p = dptr[z0+1]; /* XOR parity */
q = dptr[z0+2]; /* RS syndrome */
- raid6_before_mmx(&sa);
+ kernel_fpu_begin();
asm volatile("movq %0,%%mm0" : : "m" (raid6_mmx_constants.x1d));
asm volatile("pxor %mm5,%mm5"); /* Zero temp */
@@ -157,8 +148,8 @@ static void raid6_sse12_gen_syndrome(int disks, size_t bytes, void **ptrs)
asm volatile("movntq %%mm6,%0" : "=m" (q[d+8]));
}
- raid6_after_mmx(&sa);
asm volatile("sfence" : :: "memory");
+ kernel_fpu_end();
}
const struct raid6_calls raid6_sse1x2 = {
diff --git a/drivers/md/raid6sse2.c b/drivers/md/raid6sse2.c
index b3aa7fe0877..0f019762a7c 100644
--- a/drivers/md/raid6sse2.c
+++ b/drivers/md/raid6sse2.c
@@ -30,17 +30,11 @@ static const struct raid6_sse_constants {
static int raid6_have_sse2(void)
{
-#ifdef __KERNEL__
/* Not really boot_cpu but "all_cpus" */
return boot_cpu_has(X86_FEATURE_MMX) &&
boot_cpu_has(X86_FEATURE_FXSR) &&
boot_cpu_has(X86_FEATURE_XMM) &&
boot_cpu_has(X86_FEATURE_XMM2);
-#else
- /* User space test code */
- u32 features = cpuid_features();
- return ( (features & (15<<23)) == (15<<23) );
-#endif
}
/*
@@ -51,13 +45,12 @@ static void raid6_sse21_gen_syndrome(int disks, size_t bytes, void **ptrs)
u8 **dptr = (u8 **)ptrs;
u8 *p, *q;
int d, z, z0;
- raid6_sse_save_t sa;
z0 = disks - 3; /* Highest data disk */
p = dptr[z0+1]; /* XOR parity */
q = dptr[z0+2]; /* RS syndrome */
- raid6_before_sse2(&sa);
+ kernel_fpu_begin();
asm volatile("movdqa %0,%%xmm0" : : "m" (raid6_sse_constants.x1d[0]));
asm volatile("pxor %xmm5,%xmm5"); /* Zero temp */
@@ -93,8 +86,8 @@ static void raid6_sse21_gen_syndrome(int disks, size_t bytes, void **ptrs)
asm volatile("pxor %xmm4,%xmm4");
}
- raid6_after_sse2(&sa);
asm volatile("sfence" : : : "memory");
+ kernel_fpu_end();
}
const struct raid6_calls raid6_sse2x1 = {
@@ -112,13 +105,12 @@ static void raid6_sse22_gen_syndrome(int disks, size_t bytes, void **ptrs)
u8 **dptr = (u8 **)ptrs;
u8 *p, *q;
int d, z, z0;
- raid6_sse_save_t sa;
z0 = disks - 3; /* Highest data disk */
p = dptr[z0+1]; /* XOR parity */
q = dptr[z0+2]; /* RS syndrome */
- raid6_before_sse2(&sa);
+ kernel_fpu_begin();
asm volatile("movdqa %0,%%xmm0" : : "m" (raid6_sse_constants.x1d[0]));
asm volatile("pxor %xmm5,%xmm5"); /* Zero temp */
@@ -156,8 +148,8 @@ static void raid6_sse22_gen_syndrome(int disks, size_t bytes, void **ptrs)
asm volatile("movntdq %%xmm6,%0" : "=m" (q[d+16]));
}
- raid6_after_sse2(&sa);
asm volatile("sfence" : : : "memory");
+ kernel_fpu_end();
}
const struct raid6_calls raid6_sse2x2 = {
@@ -179,13 +171,12 @@ static void raid6_sse24_gen_syndrome(int disks, size_t bytes, void **ptrs)
u8 **dptr = (u8 **)ptrs;
u8 *p, *q;
int d, z, z0;
- raid6_sse16_save_t sa;
z0 = disks - 3; /* Highest data disk */
p = dptr[z0+1]; /* XOR parity */
q = dptr[z0+2]; /* RS syndrome */
- raid6_before_sse16(&sa);
+ kernel_fpu_begin();
asm volatile("movdqa %0,%%xmm0" :: "m" (raid6_sse_constants.x1d[0]));
asm volatile("pxor %xmm2,%xmm2"); /* P[0] */
@@ -256,8 +247,9 @@ static void raid6_sse24_gen_syndrome(int disks, size_t bytes, void **ptrs)
asm volatile("movntdq %%xmm14,%0" : "=m" (q[d+48]));
asm volatile("pxor %xmm14,%xmm14");
}
+
asm volatile("sfence" : : : "memory");
- raid6_after_sse16(&sa);
+ kernel_fpu_end();
}
const struct raid6_calls raid6_sse2x4 = {
diff --git a/drivers/md/raid6x86.h b/drivers/md/raid6x86.h
index 4cf20534fe4..9111950414f 100644
--- a/drivers/md/raid6x86.h
+++ b/drivers/md/raid6x86.h
@@ -21,224 +21,40 @@
#if defined(__i386__) || defined(__x86_64__)
-#ifdef __x86_64__
-
-typedef struct {
- unsigned int fsave[27];
- unsigned long cr0;
-} raid6_mmx_save_t __attribute__((aligned(16)));
-
-/* N.B.: For SSE we only save %xmm0-%xmm7 even for x86-64, since
- the code doesn't know about the additional x86-64 registers */
-typedef struct {
- unsigned int sarea[8*4+2];
- unsigned long cr0;
-} raid6_sse_save_t __attribute__((aligned(16)));
-
-/* This is for x86-64-specific code which uses all 16 XMM registers */
-typedef struct {
- unsigned int sarea[16*4+2];
- unsigned long cr0;
-} raid6_sse16_save_t __attribute__((aligned(16)));
-
-/* On x86-64 the stack *SHOULD* be 16-byte aligned, but currently this
- is buggy in the kernel and it's only 8-byte aligned in places, so
- we need to do this anyway. Sigh. */
-#define SAREA(x) ((unsigned int *)((((unsigned long)&(x)->sarea)+15) & ~15))
-
-#else /* __i386__ */
-
-typedef struct {
- unsigned int fsave[27];
- unsigned long cr0;
-} raid6_mmx_save_t;
-
-/* On i386, the stack is only 8-byte aligned, but SSE requires 16-byte
- alignment. The +3 is so we have the slack space to manually align
- a properly-sized area correctly. */
-typedef struct {
- unsigned int sarea[8*4+3];
- unsigned long cr0;
-} raid6_sse_save_t;
-
-/* Find the 16-byte aligned save area */
-#define SAREA(x) ((unsigned int *)((((unsigned long)&(x)->sarea)+15) & ~15))
-
-#endif
-
#ifdef __KERNEL__ /* Real code */
-/* Note: %cr0 is 32 bits on i386 and 64 bits on x86-64 */
-
-static inline unsigned long raid6_get_fpu(void)
-{
- unsigned long cr0;
-
- preempt_disable();
- asm volatile("mov %%cr0,%0 ; clts" : "=r" (cr0));
- return cr0;
-}
-
-static inline void raid6_put_fpu(unsigned long cr0)
-{
- asm volatile("mov %0,%%cr0" : : "r" (cr0));
- preempt_enable();
-}
+#include <asm/i387.h>
#else /* Dummy code for user space testing */
-static inline unsigned long raid6_get_fpu(void)
-{
- return 0xf00ba6;
-}
-
-static inline void raid6_put_fpu(unsigned long cr0)
-{
- (void)cr0;
-}
-
-#endif
-
-static inline void raid6_before_mmx(raid6_mmx_save_t *s)
-{
- s->cr0 = raid6_get_fpu();
- asm volatile("fsave %0 ; fwait" : "=m" (s->fsave[0]));
-}
-
-static inline void raid6_after_mmx(raid6_mmx_save_t *s)
-{
- asm volatile("frstor %0" : : "m" (s->fsave[0]));
- raid6_put_fpu(s->cr0);
-}
-
-static inline void raid6_before_sse(raid6_sse_save_t *s)
-{
- unsigned int *rsa = SAREA(s);
-
- s->cr0 = raid6_get_fpu();
-
- asm volatile("movaps %%xmm0,%0" : "=m" (rsa[0]));
- asm volatile("movaps %%xmm1,%0" : "=m" (rsa[4]));
- asm volatile("movaps %%xmm2,%0" : "=m" (rsa[8]));
- asm volatile("movaps %%xmm3,%0" : "=m" (rsa[12]));
- asm volatile("movaps %%xmm4,%0" : "=m" (rsa[16]));
- asm volatile("movaps %%xmm5,%0" : "=m" (rsa[20]));
- asm volatile("movaps %%xmm6,%0" : "=m" (rsa[24]));
- asm volatile("movaps %%xmm7,%0" : "=m" (rsa[28]));
-}
-
-static inline void raid6_after_sse(raid6_sse_save_t *s)
-{
- unsigned int *rsa = SAREA(s);
-
- asm volatile("movaps %0,%%xmm0" : : "m" (rsa[0]));
- asm volatile("movaps %0,%%xmm1" : : "m" (rsa[4]));
- asm volatile("movaps %0,%%xmm2" : : "m" (rsa[8]));
- asm volatile("movaps %0,%%xmm3" : : "m" (rsa[12]));
- asm volatile("movaps %0,%%xmm4" : : "m" (rsa[16]));
- asm volatile("movaps %0,%%xmm5" : : "m" (rsa[20]));
- asm volatile("movaps %0,%%xmm6" : : "m" (rsa[24]));
- asm volatile("movaps %0,%%xmm7" : : "m" (rsa[28]));
-
- raid6_put_fpu(s->cr0);
-}
-
-static inline void raid6_before_sse2(raid6_sse_save_t *s)
+static inline void kernel_fpu_begin(void)
{
- unsigned int *rsa = SAREA(s);
-
- s->cr0 = raid6_get_fpu();
-
- asm volatile("movdqa %%xmm0,%0" : "=m" (rsa[0]));
- asm volatile("movdqa %%xmm1,%0" : "=m" (rsa[4]));
- asm volatile("movdqa %%xmm2,%0" : "=m" (rsa[8]));
- asm volatile("movdqa %%xmm3,%0" : "=m" (rsa[12]));
- asm volatile("movdqa %%xmm4,%0" : "=m" (rsa[16]));
- asm volatile("movdqa %%xmm5,%0" : "=m" (rsa[20]));
- asm volatile("movdqa %%xmm6,%0" : "=m" (rsa[24]));
- asm volatile("movdqa %%xmm7,%0" : "=m" (rsa[28]));
}
-static inline void raid6_after_sse2(raid6_sse_save_t *s)
+static inline void kernel_fpu_end(void)
{
- unsigned int *rsa = SAREA(s);
-
- asm volatile("movdqa %0,%%xmm0" : : "m" (rsa[0]));
- asm volatile("movdqa %0,%%xmm1" : : "m" (rsa[4]));
- asm volatile("movdqa %0,%%xmm2" : : "m" (rsa[8]));
- asm volatile("movdqa %0,%%xmm3" : : "m" (rsa[12]));
- asm volatile("movdqa %0,%%xmm4" : : "m" (rsa[16]));
- asm volatile("movdqa %0,%%xmm5" : : "m" (rsa[20]));
- asm volatile("movdqa %0,%%xmm6" : : "m" (rsa[24]));
- asm volatile("movdqa %0,%%xmm7" : : "m" (rsa[28]));
-
- raid6_put_fpu(s->cr0);
}
-#ifdef __x86_64__
-
-static inline void raid6_before_sse16(raid6_sse16_save_t *s)
-{
- unsigned int *rsa = SAREA(s);
-
- s->cr0 = raid6_get_fpu();
+#define X86_FEATURE_MMX (0*32+23) /* Multimedia Extensions */
+#define X86_FEATURE_FXSR (0*32+24) /* FXSAVE and FXRSTOR instructions
+ * (fast save and restore) */
+#define X86_FEATURE_XMM (0*32+25) /* Streaming SIMD Extensions */
+#define X86_FEATURE_XMM2 (0*32+26) /* Streaming SIMD Extensions-2 */
+#define X86_FEATURE_MMXEXT (1*32+22) /* AMD MMX extensions */
- asm volatile("movdqa %%xmm0,%0" : "=m" (rsa[0]));
- asm volatile("movdqa %%xmm1,%0" : "=m" (rsa[4]));
- asm volatile("movdqa %%xmm2,%0" : "=m" (rsa[8]));
- asm volatile("movdqa %%xmm3,%0" : "=m" (rsa[12]));
- asm volatile("movdqa %%xmm4,%0" : "=m" (rsa[16]));
- asm volatile("movdqa %%xmm5,%0" : "=m" (rsa[20]));
- asm volatile("movdqa %%xmm6,%0" : "=m" (rsa[24]));
- asm volatile("movdqa %%xmm7,%0" : "=m" (rsa[28]));
- asm volatile("movdqa %%xmm8,%0" : "=m" (rsa[32]));
- asm volatile("movdqa %%xmm9,%0" : "=m" (rsa[36]));
- asm volatile("movdqa %%xmm10,%0" : "=m" (rsa[40]));
- asm volatile("movdqa %%xmm11,%0" : "=m" (rsa[44]));
- asm volatile("movdqa %%xmm12,%0" : "=m" (rsa[48]));
- asm volatile("movdqa %%xmm13,%0" : "=m" (rsa[52]));
- asm volatile("movdqa %%xmm14,%0" : "=m" (rsa[56]));
- asm volatile("movdqa %%xmm15,%0" : "=m" (rsa[60]));
-}
-
-static inline void raid6_after_sse16(raid6_sse16_save_t *s)
+/* Should work well enough on modern CPUs for testing */
+static inline int boot_cpu_has(int flag)
{
- unsigned int *rsa = SAREA(s);
+ u32 eax = (flag >> 5) ? 0x80000001 : 1;
+ u32 edx;
- asm volatile("movdqa %0,%%xmm0" : : "m" (rsa[0]));
- asm volatile("movdqa %0,%%xmm1" : : "m" (rsa[4]));
- asm volatile("movdqa %0,%%xmm2" : : "m" (rsa[8]));
- asm volatile("movdqa %0,%%xmm3" : : "m" (rsa[12]));
- asm volatile("movdqa %0,%%xmm4" : : "m" (rsa[16]));
- asm volatile("movdqa %0,%%xmm5" : : "m" (rsa[20]));
- asm volatile("movdqa %0,%%xmm6" : : "m" (rsa[24]));
- asm volatile("movdqa %0,%%xmm7" : : "m" (rsa[28]));
- asm volatile("movdqa %0,%%xmm8" : : "m" (rsa[32]));
- asm volatile("movdqa %0,%%xmm9" : : "m" (rsa[36]));
- asm volatile("movdqa %0,%%xmm10" : : "m" (rsa[40]));
- asm volatile("movdqa %0,%%xmm11" : : "m" (rsa[44]));
- asm volatile("movdqa %0,%%xmm12" : : "m" (rsa[48]));
- asm volatile("movdqa %0,%%xmm13" : : "m" (rsa[52]));
- asm volatile("movdqa %0,%%xmm14" : : "m" (rsa[56]));
- asm volatile("movdqa %0,%%xmm15" : : "m" (rsa[60]));
+ asm volatile("cpuid"
+ : "+a" (eax), "=d" (edx)
+ : : "ecx", "ebx");
- raid6_put_fpu(s->cr0);
+ return (edx >> (flag & 31)) & 1;
}
-#endif /* __x86_64__ */
-
-/* User space test hack */
-#ifndef __KERNEL__
-static inline int cpuid_features(void)
-{
- u32 eax = 1;
- u32 ebx, ecx, edx;
-
- asm volatile("cpuid" :
- "+a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx));
-
- return edx;
-}
#endif /* ndef __KERNEL__ */
#endif