aboutsummaryrefslogtreecommitdiff
path: root/drivers/md
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/md')
-rw-r--r--drivers/md/raid1.c156
1 files changed, 128 insertions, 28 deletions
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index a8bc93d6ff6..7fbb6088328 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -106,15 +106,30 @@ static void * r1buf_pool_alloc(gfp_t gfp_flags, void *data)
}
/*
* Allocate RESYNC_PAGES data pages and attach them to
- * the first bio;
+ * the first bio.
+ * If this is a user-requested check/repair, allocate
+ * RESYNC_PAGES for each bio.
*/
- bio = r1_bio->bios[0];
- for (i = 0; i < RESYNC_PAGES; i++) {
- page = alloc_page(gfp_flags);
- if (unlikely(!page))
- goto out_free_pages;
-
- bio->bi_io_vec[i].bv_page = page;
+ if (test_bit(MD_RECOVERY_REQUESTED, &pi->mddev->recovery))
+ j = pi->raid_disks;
+ else
+ j = 1;
+ while(j--) {
+ bio = r1_bio->bios[j];
+ for (i = 0; i < RESYNC_PAGES; i++) {
+ page = alloc_page(gfp_flags);
+ if (unlikely(!page))
+ goto out_free_pages;
+
+ bio->bi_io_vec[i].bv_page = page;
+ }
+ }
+ /* If not user-requests, copy the page pointers to all bios */
+ if (!test_bit(MD_RECOVERY_REQUESTED, &pi->mddev->recovery)) {
+ for (i=0; i<RESYNC_PAGES ; i++)
+ for (j=1; j<pi->raid_disks; j++)
+ r1_bio->bios[j]->bi_io_vec[i].bv_page =
+ r1_bio->bios[0]->bi_io_vec[i].bv_page;
}
r1_bio->master_bio = NULL;
@@ -122,8 +137,10 @@ static void * r1buf_pool_alloc(gfp_t gfp_flags, void *data)
return r1_bio;
out_free_pages:
- for ( ; i > 0 ; i--)
- __free_page(bio->bi_io_vec[i-1].bv_page);
+ for (i=0; i < RESYNC_PAGES ; i++)
+ for (j=0 ; j < pi->raid_disks; j++)
+ __free_page(r1_bio->bios[j]->bi_io_vec[i].bv_page);
+ j = -1;
out_free_bio:
while ( ++j < pi->raid_disks )
bio_put(r1_bio->bios[j]);
@@ -134,14 +151,16 @@ out_free_bio:
static void r1buf_pool_free(void *__r1_bio, void *data)
{
struct pool_info *pi = data;
- int i;
+ int i,j;
r1bio_t *r1bio = __r1_bio;
- struct bio *bio = r1bio->bios[0];
- for (i = 0; i < RESYNC_PAGES; i++) {
- __free_page(bio->bi_io_vec[i].bv_page);
- bio->bi_io_vec[i].bv_page = NULL;
- }
+ for (i = 0; i < RESYNC_PAGES; i++)
+ for (j = pi->raid_disks; j-- ;) {
+ if (j == 0 ||
+ r1bio->bios[j]->bi_io_vec[i].bv_page !=
+ r1bio->bios[0]->bi_io_vec[i].bv_page)
+ __free_page(r1bio->bios[j]->bi_io_vec[i].bv_page);
+ }
for (i=0 ; i < pi->raid_disks; i++)
bio_put(r1bio->bios[i]);
@@ -1077,13 +1096,16 @@ abort:
static int end_sync_read(struct bio *bio, unsigned int bytes_done, int error)
{
r1bio_t * r1_bio = (r1bio_t *)(bio->bi_private);
+ int i;
if (bio->bi_size)
return 1;
- if (r1_bio->bios[r1_bio->read_disk] != bio)
- BUG();
- update_head_pos(r1_bio->read_disk, r1_bio);
+ for (i=r1_bio->mddev->raid_disks; i--; )
+ if (r1_bio->bios[i] == bio)
+ break;
+ BUG_ON(i < 0);
+ update_head_pos(i, r1_bio);
/*
* we have read a block, now it needs to be re-written,
* or re-read if the read failed.
@@ -1091,7 +1113,9 @@ static int end_sync_read(struct bio *bio, unsigned int bytes_done, int error)
*/
if (test_bit(BIO_UPTODATE, &bio->bi_flags))
set_bit(R1BIO_Uptodate, &r1_bio->state);
- reschedule_retry(r1_bio);
+
+ if (atomic_dec_and_test(&r1_bio->remaining))
+ reschedule_retry(r1_bio);
return 0;
}
@@ -1134,9 +1158,65 @@ static void sync_request_write(mddev_t *mddev, r1bio_t *r1_bio)
bio = r1_bio->bios[r1_bio->read_disk];
- /*
- * schedule writes
- */
+ if (test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery)) {
+ /* We have read all readable devices. If we haven't
+ * got the block, then there is no hope left.
+ * If we have, then we want to do a comparison
+ * and skip the write if everything is the same.
+ * If any blocks failed to read, then we need to
+ * attempt an over-write
+ */
+ int primary;
+ if (!test_bit(R1BIO_Uptodate, &r1_bio->state)) {
+ for (i=0; i<mddev->raid_disks; i++)
+ if (r1_bio->bios[i]->bi_end_io == end_sync_read)
+ md_error(mddev, conf->mirrors[i].rdev);
+
+ md_done_sync(mddev, r1_bio->sectors, 1);
+ put_buf(r1_bio);
+ return;
+ }
+ for (primary=0; primary<mddev->raid_disks; primary++)
+ if (r1_bio->bios[primary]->bi_end_io == end_sync_read &&
+ test_bit(BIO_UPTODATE, &r1_bio->bios[primary]->bi_flags)) {
+ r1_bio->bios[primary]->bi_end_io = NULL;
+ break;
+ }
+ r1_bio->read_disk = primary;
+ for (i=0; i<mddev->raid_disks; i++)
+ if (r1_bio->bios[i]->bi_end_io == end_sync_read &&
+ test_bit(BIO_UPTODATE, &r1_bio->bios[i]->bi_flags)) {
+ int j;
+ int vcnt = r1_bio->sectors >> (PAGE_SHIFT- 9);
+ struct bio *pbio = r1_bio->bios[primary];
+ struct bio *sbio = r1_bio->bios[i];
+ for (j = vcnt; j-- ; )
+ if (memcmp(page_address(pbio->bi_io_vec[j].bv_page),
+ page_address(sbio->bi_io_vec[j].bv_page),
+ PAGE_SIZE))
+ break;
+ if (j >= 0)
+ mddev->resync_mismatches += r1_bio->sectors;
+ if (j < 0 || test_bit(MD_RECOVERY_CHECK, &mddev->recovery))
+ sbio->bi_end_io = NULL;
+ else {
+ /* fixup the bio for reuse */
+ sbio->bi_vcnt = vcnt;
+ sbio->bi_size = r1_bio->sectors << 9;
+ sbio->bi_idx = 0;
+ sbio->bi_phys_segments = 0;
+ sbio->bi_hw_segments = 0;
+ sbio->bi_hw_front_size = 0;
+ sbio->bi_hw_back_size = 0;
+ sbio->bi_flags &= ~(BIO_POOL_MASK - 1);
+ sbio->bi_flags |= 1 << BIO_UPTODATE;
+ sbio->bi_next = NULL;
+ sbio->bi_sector = r1_bio->sector +
+ conf->mirrors[i].rdev->data_offset;
+ sbio->bi_bdev = conf->mirrors[i].rdev->bdev;
+ }
+ }
+ }
if (!test_bit(R1BIO_Uptodate, &r1_bio->state)) {
/* ouch - failed to read all of that.
* Try some synchronous reads of other devices to get
@@ -1216,6 +1296,10 @@ static void sync_request_write(mddev_t *mddev, r1bio_t *r1_bio)
idx ++;
}
}
+
+ /*
+ * schedule writes
+ */
atomic_set(&r1_bio->remaining, 1);
for (i = 0; i < disks ; i++) {
wbio = r1_bio->bios[i];
@@ -1618,10 +1702,10 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i
for (i=0 ; i < conf->raid_disks; i++) {
bio = r1_bio->bios[i];
if (bio->bi_end_io) {
- page = r1_bio->bios[0]->bi_io_vec[bio->bi_vcnt].bv_page;
+ page = bio->bi_io_vec[bio->bi_vcnt].bv_page;
if (bio_add_page(bio, page, len, 0) == 0) {
/* stop here */
- r1_bio->bios[0]->bi_io_vec[bio->bi_vcnt].bv_page = page;
+ bio->bi_io_vec[bio->bi_vcnt].bv_page = page;
while (i > 0) {
i--;
bio = r1_bio->bios[i];
@@ -1641,12 +1725,28 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i
sync_blocks -= (len>>9);
} while (r1_bio->bios[disk]->bi_vcnt < RESYNC_PAGES);
bio_full:
- bio = r1_bio->bios[r1_bio->read_disk];
r1_bio->sectors = nr_sectors;
- md_sync_acct(conf->mirrors[r1_bio->read_disk].rdev->bdev, nr_sectors);
+ /* For a user-requested sync, we read all readable devices and do a
+ * compare
+ */
+ if (test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery)) {
+ atomic_set(&r1_bio->remaining, read_targets);
+ for (i=0; i<conf->raid_disks; i++) {
+ bio = r1_bio->bios[i];
+ if (bio->bi_end_io == end_sync_read) {
+ md_sync_acct(conf->mirrors[i].rdev->bdev, nr_sectors);
+ generic_make_request(bio);
+ }
+ }
+ } else {
+ atomic_set(&r1_bio->remaining, 1);
+ bio = r1_bio->bios[r1_bio->read_disk];
+ md_sync_acct(conf->mirrors[r1_bio->read_disk].rdev->bdev,
+ nr_sectors);
+ generic_make_request(bio);
- generic_make_request(bio);
+ }
return nr_sectors;
}