aboutsummaryrefslogtreecommitdiff
path: root/drivers/md
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/md')
-rw-r--r--drivers/md/bitmap.c53
-rw-r--r--drivers/md/dm-exception-store.c2
-rw-r--r--drivers/md/dm-log.c3
-rw-r--r--drivers/md/dm-snap-persistent.c2
-rw-r--r--drivers/md/dm-table.c38
-rw-r--r--drivers/md/dm.c8
-rw-r--r--drivers/md/linear.c2
-rw-r--r--drivers/md/md.c132
-rw-r--r--drivers/md/md.h21
-rw-r--r--drivers/md/multipath.c4
-rw-r--r--drivers/md/raid0.c2
-rw-r--r--drivers/md/raid1.c4
-rw-r--r--drivers/md/raid10.c20
-rw-r--r--drivers/md/raid5.c45
14 files changed, 192 insertions, 144 deletions
diff --git a/drivers/md/bitmap.c b/drivers/md/bitmap.c
index f8a9f7ab2cb..3319c2fec28 100644
--- a/drivers/md/bitmap.c
+++ b/drivers/md/bitmap.c
@@ -232,7 +232,7 @@ static struct page *read_sb_page(mddev_t *mddev, long offset,
target = rdev->sb_start + offset + index * (PAGE_SIZE/512);
if (sync_page_io(rdev->bdev, target,
- roundup(size, bdev_hardsect_size(rdev->bdev)),
+ roundup(size, bdev_logical_block_size(rdev->bdev)),
page, READ)) {
page->index = index;
attach_page_buffers(page, NULL); /* so that free_buffer will
@@ -287,7 +287,7 @@ static int write_sb_page(struct bitmap *bitmap, struct page *page, int wait)
int size = PAGE_SIZE;
if (page->index == bitmap->file_pages-1)
size = roundup(bitmap->last_page_size,
- bdev_hardsect_size(rdev->bdev));
+ bdev_logical_block_size(rdev->bdev));
/* Just make sure we aren't corrupting data or
* metadata
*/
@@ -986,6 +986,9 @@ static int bitmap_init_from_disk(struct bitmap *bitmap, sector_t start)
oldindex = index;
oldpage = page;
+ bitmap->filemap[bitmap->file_pages++] = page;
+ bitmap->last_page_size = count;
+
if (outofdate) {
/*
* if bitmap is out of date, dirty the
@@ -998,15 +1001,9 @@ static int bitmap_init_from_disk(struct bitmap *bitmap, sector_t start)
write_page(bitmap, page, 1);
ret = -EIO;
- if (bitmap->flags & BITMAP_WRITE_ERROR) {
- /* release, page not in filemap yet */
- put_page(page);
+ if (bitmap->flags & BITMAP_WRITE_ERROR)
goto err;
- }
}
-
- bitmap->filemap[bitmap->file_pages++] = page;
- bitmap->last_page_size = count;
}
paddr = kmap_atomic(page, KM_USER0);
if (bitmap->flags & BITMAP_HOSTENDIAN)
@@ -1016,9 +1013,11 @@ static int bitmap_init_from_disk(struct bitmap *bitmap, sector_t start)
kunmap_atomic(paddr, KM_USER0);
if (b) {
/* if the disk bit is set, set the memory bit */
- bitmap_set_memory_bits(bitmap, i << CHUNK_BLOCK_SHIFT(bitmap),
- ((i+1) << (CHUNK_BLOCK_SHIFT(bitmap)) >= start)
- );
+ int needed = ((sector_t)(i+1) << (CHUNK_BLOCK_SHIFT(bitmap))
+ >= start);
+ bitmap_set_memory_bits(bitmap,
+ (sector_t)i << CHUNK_BLOCK_SHIFT(bitmap),
+ needed);
bit_cnt++;
set_page_attr(bitmap, page, BITMAP_PAGE_CLEAN);
}
@@ -1098,14 +1097,12 @@ void bitmap_daemon_work(struct bitmap *bitmap)
}
bitmap->allclean = 1;
+ spin_lock_irqsave(&bitmap->lock, flags);
for (j = 0; j < bitmap->chunks; j++) {
bitmap_counter_t *bmc;
- spin_lock_irqsave(&bitmap->lock, flags);
- if (!bitmap->filemap) {
+ if (!bitmap->filemap)
/* error or shutdown */
- spin_unlock_irqrestore(&bitmap->lock, flags);
break;
- }
page = filemap_get_page(bitmap, j);
@@ -1122,6 +1119,8 @@ void bitmap_daemon_work(struct bitmap *bitmap)
write_page(bitmap, page, 0);
bitmap->allclean = 0;
}
+ spin_lock_irqsave(&bitmap->lock, flags);
+ j |= (PAGE_BITS - 1);
continue;
}
@@ -1154,8 +1153,9 @@ void bitmap_daemon_work(struct bitmap *bitmap)
spin_lock_irqsave(&bitmap->lock, flags);
clear_page_attr(bitmap, page, BITMAP_PAGE_CLEAN);
}
- bmc = bitmap_get_counter(bitmap, j << CHUNK_BLOCK_SHIFT(bitmap),
- &blocks, 0);
+ bmc = bitmap_get_counter(bitmap,
+ (sector_t)j << CHUNK_BLOCK_SHIFT(bitmap),
+ &blocks, 0);
if (bmc) {
/*
if (j < 100) printk("bitmap: j=%lu, *bmc = 0x%x\n", j, *bmc);
@@ -1169,7 +1169,8 @@ void bitmap_daemon_work(struct bitmap *bitmap)
} else if (*bmc == 1) {
/* we can clear the bit */
*bmc = 0;
- bitmap_count_page(bitmap, j << CHUNK_BLOCK_SHIFT(bitmap),
+ bitmap_count_page(bitmap,
+ (sector_t)j << CHUNK_BLOCK_SHIFT(bitmap),
-1);
/* clear the bit */
@@ -1180,9 +1181,10 @@ void bitmap_daemon_work(struct bitmap *bitmap)
ext2_clear_bit(file_page_offset(j), paddr);
kunmap_atomic(paddr, KM_USER0);
}
- }
- spin_unlock_irqrestore(&bitmap->lock, flags);
+ } else
+ j |= PAGE_COUNTER_MASK;
}
+ spin_unlock_irqrestore(&bitmap->lock, flags);
/* now sync the final page */
if (lastpage != NULL) {
@@ -1479,6 +1481,7 @@ void bitmap_cond_end_sync(struct bitmap *bitmap, sector_t sector)
s += blocks;
}
bitmap->last_end_sync = jiffies;
+ sysfs_notify(&bitmap->mddev->kobj, NULL, "sync_completed");
}
static void bitmap_set_memory_bits(struct bitmap *bitmap, sector_t offset, int needed)
@@ -1513,7 +1516,7 @@ void bitmap_dirty_bits(struct bitmap *bitmap, unsigned long s, unsigned long e)
unsigned long chunk;
for (chunk = s; chunk <= e; chunk++) {
- sector_t sec = chunk << CHUNK_BLOCK_SHIFT(bitmap);
+ sector_t sec = (sector_t)chunk << CHUNK_BLOCK_SHIFT(bitmap);
bitmap_set_memory_bits(bitmap, sec, 1);
bitmap_file_set_bit(bitmap, sec);
}
@@ -1589,7 +1592,7 @@ void bitmap_destroy(mddev_t *mddev)
int bitmap_create(mddev_t *mddev)
{
struct bitmap *bitmap;
- unsigned long blocks = mddev->resync_max_sectors;
+ sector_t blocks = mddev->resync_max_sectors;
unsigned long chunks;
unsigned long pages;
struct file *file = mddev->bitmap_file;
@@ -1631,8 +1634,8 @@ int bitmap_create(mddev_t *mddev)
bitmap->chunkshift = ffz(~bitmap->chunksize);
/* now that chunksize and chunkshift are set, we can use these macros */
- chunks = (blocks + CHUNK_BLOCK_RATIO(bitmap) - 1) /
- CHUNK_BLOCK_RATIO(bitmap);
+ chunks = (blocks + CHUNK_BLOCK_RATIO(bitmap) - 1) >>
+ CHUNK_BLOCK_SHIFT(bitmap);
pages = (chunks + PAGE_COUNTER_RATIO - 1) / PAGE_COUNTER_RATIO;
BUG_ON(!pages);
diff --git a/drivers/md/dm-exception-store.c b/drivers/md/dm-exception-store.c
index a2e26c24214..75d8081a904 100644
--- a/drivers/md/dm-exception-store.c
+++ b/drivers/md/dm-exception-store.c
@@ -178,7 +178,7 @@ static int set_chunk_size(struct dm_exception_store *store,
}
/* Validate the chunk size against the device block size */
- if (chunk_size_ulong % (bdev_hardsect_size(store->cow->bdev) >> 9)) {
+ if (chunk_size_ulong % (bdev_logical_block_size(store->cow->bdev) >> 9)) {
*error = "Chunk size is not a multiple of device blocksize";
return -EINVAL;
}
diff --git a/drivers/md/dm-log.c b/drivers/md/dm-log.c
index be233bc4d91..6fa8ccf91c7 100644
--- a/drivers/md/dm-log.c
+++ b/drivers/md/dm-log.c
@@ -413,7 +413,8 @@ static int create_log_context(struct dm_dirty_log *log, struct dm_target *ti,
* Buffer holds both header and bitset.
*/
buf_size = dm_round_up((LOG_OFFSET << SECTOR_SHIFT) +
- bitset_size, ti->limits.hardsect_size);
+ bitset_size,
+ ti->limits.logical_block_size);
if (buf_size > dev->bdev->bd_inode->i_size) {
DMWARN("log device %s too small: need %llu bytes",
diff --git a/drivers/md/dm-snap-persistent.c b/drivers/md/dm-snap-persistent.c
index e75c6dd76a9..2662a41337e 100644
--- a/drivers/md/dm-snap-persistent.c
+++ b/drivers/md/dm-snap-persistent.c
@@ -282,7 +282,7 @@ static int read_header(struct pstore *ps, int *new_snapshot)
*/
if (!ps->store->chunk_size) {
ps->store->chunk_size = max(DM_CHUNK_SIZE_DEFAULT_SECTORS,
- bdev_hardsect_size(ps->store->cow->bdev) >> 9);
+ bdev_logical_block_size(ps->store->cow->bdev) >> 9);
ps->store->chunk_mask = ps->store->chunk_size - 1;
ps->store->chunk_shift = ffs(ps->store->chunk_size) - 1;
chunk_size_supplied = 0;
diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c
index 429b50b975d..e9a73bb242b 100644
--- a/drivers/md/dm-table.c
+++ b/drivers/md/dm-table.c
@@ -108,7 +108,8 @@ static void combine_restrictions_low(struct io_restrictions *lhs,
lhs->max_hw_segments =
min_not_zero(lhs->max_hw_segments, rhs->max_hw_segments);
- lhs->hardsect_size = max(lhs->hardsect_size, rhs->hardsect_size);
+ lhs->logical_block_size = max(lhs->logical_block_size,
+ rhs->logical_block_size);
lhs->max_segment_size =
min_not_zero(lhs->max_segment_size, rhs->max_segment_size);
@@ -509,7 +510,7 @@ void dm_set_device_limits(struct dm_target *ti, struct block_device *bdev)
* combine_restrictions_low()
*/
rs->max_sectors =
- min_not_zero(rs->max_sectors, q->max_sectors);
+ min_not_zero(rs->max_sectors, queue_max_sectors(q));
/*
* Check if merge fn is supported.
@@ -524,24 +525,25 @@ void dm_set_device_limits(struct dm_target *ti, struct block_device *bdev)
rs->max_phys_segments =
min_not_zero(rs->max_phys_segments,
- q->max_phys_segments);
+ queue_max_phys_segments(q));
rs->max_hw_segments =
- min_not_zero(rs->max_hw_segments, q->max_hw_segments);
+ min_not_zero(rs->max_hw_segments, queue_max_hw_segments(q));
- rs->hardsect_size = max(rs->hardsect_size, q->hardsect_size);
+ rs->logical_block_size = max(rs->logical_block_size,
+ queue_logical_block_size(q));
rs->max_segment_size =
- min_not_zero(rs->max_segment_size, q->max_segment_size);
+ min_not_zero(rs->max_segment_size, queue_max_segment_size(q));
rs->max_hw_sectors =
- min_not_zero(rs->max_hw_sectors, q->max_hw_sectors);
+ min_not_zero(rs->max_hw_sectors, queue_max_hw_sectors(q));
rs->seg_boundary_mask =
min_not_zero(rs->seg_boundary_mask,
- q->seg_boundary_mask);
+ queue_segment_boundary(q));
- rs->bounce_pfn = min_not_zero(rs->bounce_pfn, q->bounce_pfn);
+ rs->bounce_pfn = min_not_zero(rs->bounce_pfn, queue_bounce_pfn(q));
rs->no_cluster |= !test_bit(QUEUE_FLAG_CLUSTER, &q->queue_flags);
}
@@ -683,8 +685,8 @@ static void check_for_valid_limits(struct io_restrictions *rs)
rs->max_phys_segments = MAX_PHYS_SEGMENTS;
if (!rs->max_hw_segments)
rs->max_hw_segments = MAX_HW_SEGMENTS;
- if (!rs->hardsect_size)
- rs->hardsect_size = 1 << SECTOR_SHIFT;
+ if (!rs->logical_block_size)
+ rs->logical_block_size = 1 << SECTOR_SHIFT;
if (!rs->max_segment_size)
rs->max_segment_size = MAX_SEGMENT_SIZE;
if (!rs->seg_boundary_mask)
@@ -912,13 +914,13 @@ void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q)
* restrictions.
*/
blk_queue_max_sectors(q, t->limits.max_sectors);
- q->max_phys_segments = t->limits.max_phys_segments;
- q->max_hw_segments = t->limits.max_hw_segments;
- q->hardsect_size = t->limits.hardsect_size;
- q->max_segment_size = t->limits.max_segment_size;
- q->max_hw_sectors = t->limits.max_hw_sectors;
- q->seg_boundary_mask = t->limits.seg_boundary_mask;
- q->bounce_pfn = t->limits.bounce_pfn;
+ blk_queue_max_phys_segments(q, t->limits.max_phys_segments);
+ blk_queue_max_hw_segments(q, t->limits.max_hw_segments);
+ blk_queue_logical_block_size(q, t->limits.logical_block_size);
+ blk_queue_max_segment_size(q, t->limits.max_segment_size);
+ blk_queue_max_hw_sectors(q, t->limits.max_hw_sectors);
+ blk_queue_segment_boundary(q, t->limits.seg_boundary_mask);
+ blk_queue_bounce_limit(q, t->limits.bounce_pfn);
if (t->limits.no_cluster)
queue_flag_clear_unlocked(QUEUE_FLAG_CLUSTER, q);
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 424f7b048c3..3fd8b1e6548 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -20,7 +20,8 @@
#include <linux/idr.h>
#include <linux/hdreg.h>
#include <linux/blktrace_api.h>
-#include <trace/block.h>
+
+#include <trace/events/block.h>
#define DM_MSG_PREFIX "core"
@@ -53,8 +54,6 @@ struct dm_target_io {
union map_info info;
};
-DEFINE_TRACE(block_bio_complete);
-
/*
* For request-based dm.
* One of these is allocated per request.
@@ -656,8 +655,7 @@ static void __map_bio(struct dm_target *ti, struct bio *clone,
/* the bio has been remapped so dispatch it */
trace_block_remap(bdev_get_queue(clone->bi_bdev), clone,
- tio->io->bio->bi_bdev->bd_dev,
- clone->bi_sector, sector);
+ tio->io->bio->bi_bdev->bd_dev, sector);
generic_make_request(clone);
} else if (r < 0 || r == DM_MAPIO_REQUEUE) {
diff --git a/drivers/md/linear.c b/drivers/md/linear.c
index 7a36e38393a..64f1f3e046e 100644
--- a/drivers/md/linear.c
+++ b/drivers/md/linear.c
@@ -146,7 +146,7 @@ static linear_conf_t *linear_conf(mddev_t *mddev, int raid_disks)
* a one page request is never in violation.
*/
if (rdev->bdev->bd_disk->queue->merge_bvec_fn &&
- mddev->queue->max_sectors > (PAGE_SIZE>>9))
+ queue_max_sectors(mddev->queue) > (PAGE_SIZE>>9))
blk_queue_max_sectors(mddev->queue, PAGE_SIZE>>9);
disk->num_sectors = rdev->sectors;
diff --git a/drivers/md/md.c b/drivers/md/md.c
index ed5727c089a..20f6ac33834 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -1202,7 +1202,7 @@ static int super_1_load(mdk_rdev_t *rdev, mdk_rdev_t *refdev, int minor_version)
atomic_set(&rdev->corrected_errors, le32_to_cpu(sb->cnt_corrected_read));
rdev->sb_size = le32_to_cpu(sb->max_dev) * 2 + 256;
- bmask = queue_hardsect_size(rdev->bdev->bd_disk->queue)-1;
+ bmask = queue_logical_block_size(rdev->bdev->bd_disk->queue)-1;
if (rdev->sb_size & bmask)
rdev->sb_size = (rdev->sb_size | bmask) + 1;
@@ -1375,6 +1375,9 @@ static void super_1_sync(mddev_t *mddev, mdk_rdev_t *rdev)
sb->raid_disks = cpu_to_le32(mddev->raid_disks);
sb->size = cpu_to_le64(mddev->dev_sectors);
+ sb->chunksize = cpu_to_le32(mddev->chunk_size >> 9);
+ sb->level = cpu_to_le32(mddev->level);
+ sb->layout = cpu_to_le32(mddev->layout);
if (mddev->bitmap && mddev->bitmap_file == NULL) {
sb->bitmap_offset = cpu_to_le32((__u32)mddev->bitmap_offset);
@@ -2017,6 +2020,8 @@ repeat:
clear_bit(MD_CHANGE_PENDING, &mddev->flags);
spin_unlock_irq(&mddev->write_lock);
wake_up(&mddev->sb_wait);
+ if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
+ sysfs_notify(&mddev->kobj, NULL, "sync_completed");
}
@@ -2086,6 +2091,7 @@ state_store(mdk_rdev_t *rdev, const char *buf, size_t len)
* -writemostly - clears write_mostly
* blocked - sets the Blocked flag
* -blocked - clears the Blocked flag
+ * insync - sets Insync providing device isn't active
*/
int err = -EINVAL;
if (cmd_match(buf, "faulty") && rdev->mddev->pers) {
@@ -2118,6 +2124,9 @@ state_store(mdk_rdev_t *rdev, const char *buf, size_t len)
md_wakeup_thread(rdev->mddev->thread);
err = 0;
+ } else if (cmd_match(buf, "insync") && rdev->raid_disk == -1) {
+ set_bit(In_sync, &rdev->flags);
+ err = 0;
}
if (!err && rdev->sysfs_state)
sysfs_notify_dirent(rdev->sysfs_state);
@@ -2190,7 +2199,7 @@ slot_store(mdk_rdev_t *rdev, const char *buf, size_t len)
} else if (rdev->mddev->pers) {
mdk_rdev_t *rdev2;
/* Activating a spare .. or possibly reactivating
- * if we every get bitmaps working here.
+ * if we ever get bitmaps working here.
*/
if (rdev->raid_disk != -1)
@@ -3060,11 +3069,8 @@ array_state_store(mddev_t *mddev, const char *buf, size_t len)
} else
err = -EBUSY;
spin_unlock_irq(&mddev->write_lock);
- } else {
- mddev->ro = 0;
- mddev->recovery_cp = MaxSector;
- err = do_md_run(mddev);
- }
+ } else
+ err = -EINVAL;
break;
case active:
if (mddev->pers) {
@@ -3300,7 +3306,9 @@ static ssize_t
action_show(mddev_t *mddev, char *page)
{
char *type = "idle";
- if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) ||
+ if (test_bit(MD_RECOVERY_FROZEN, &mddev->recovery))
+ type = "frozen";
+ else if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) ||
(!mddev->ro && test_bit(MD_RECOVERY_NEEDED, &mddev->recovery))) {
if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery))
type = "reshape";
@@ -3323,7 +3331,12 @@ action_store(mddev_t *mddev, const char *page, size_t len)
if (!mddev->pers || !mddev->pers->sync_request)
return -EINVAL;
- if (cmd_match(page, "idle")) {
+ if (cmd_match(page, "frozen"))
+ set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
+ else
+ clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
+
+ if (cmd_match(page, "idle") || cmd_match(page, "frozen")) {
if (mddev->sync_thread) {
set_bit(MD_RECOVERY_INTR, &mddev->recovery);
md_unregister_thread(mddev->sync_thread);
@@ -3482,12 +3495,15 @@ sync_completed_show(mddev_t *mddev, char *page)
{
unsigned long max_sectors, resync;
+ if (!test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
+ return sprintf(page, "none\n");
+
if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery))
max_sectors = mddev->resync_max_sectors;
else
max_sectors = mddev->dev_sectors;
- resync = (mddev->curr_resync - atomic_read(&mddev->recovery_active));
+ resync = mddev->curr_resync_completed;
return sprintf(page, "%lu / %lu\n", resync, max_sectors);
}
@@ -3674,7 +3690,7 @@ array_size_store(mddev_t *mddev, const char *buf, size_t len)
if (strict_blocks_to_sectors(buf, &sectors) < 0)
return -EINVAL;
if (mddev->pers && mddev->pers->size(mddev, 0, 0) < sectors)
- return -EINVAL;
+ return -E2BIG;
mddev->external_size = 1;
}
@@ -4288,6 +4304,7 @@ static int do_md_stop(mddev_t * mddev, int mode, int is_open)
{
int err = 0;
struct gendisk *disk = mddev->gendisk;
+ mdk_rdev_t *rdev;
if (atomic_read(&mddev->openers) > is_open) {
printk("md: %s still in use.\n",mdname(mddev));
@@ -4330,6 +4347,13 @@ static int do_md_stop(mddev_t * mddev, int mode, int is_open)
/* tell userspace to handle 'inactive' */
sysfs_notify_dirent(mddev->sysfs_state);
+ list_for_each_entry(rdev, &mddev->disks, same_set)
+ if (rdev->raid_disk >= 0) {
+ char nm[20];
+ sprintf(nm, "rd%d", rdev->raid_disk);
+ sysfs_remove_link(&mddev->kobj, nm);
+ }
+
set_capacity(disk, 0);
mddev->changed = 1;
@@ -4350,7 +4374,6 @@ static int do_md_stop(mddev_t * mddev, int mode, int is_open)
* Free resources if final stop
*/
if (mode == 0) {
- mdk_rdev_t *rdev;
printk(KERN_INFO "md: %s stopped.\n", mdname(mddev));
@@ -4362,13 +4385,6 @@ static int do_md_stop(mddev_t * mddev, int mode, int is_open)
}
mddev->bitmap_offset = 0;
- list_for_each_entry(rdev, &mddev->disks, same_set)
- if (rdev->raid_disk >= 0) {
- char nm[20];
- sprintf(nm, "rd%d", rdev->raid_disk);
- sysfs_remove_link(&mddev->kobj, nm);
- }
-
/* make sure all md_delayed_delete calls have finished */
flush_scheduled_work();
@@ -5551,7 +5567,7 @@ static struct block_device_operations md_fops =
.owner = THIS_MODULE,
.open = md_open,
.release = md_release,
- .locked_ioctl = md_ioctl,
+ .ioctl = md_ioctl,
.getgeo = md_getgeo,
.media_changed = md_media_changed,
.revalidate_disk= md_revalidate,
@@ -5696,37 +5712,38 @@ static void status_unused(struct seq_file *seq)
static void status_resync(struct seq_file *seq, mddev_t * mddev)
{
- sector_t max_blocks, resync, res;
- unsigned long dt, db, rt;
+ sector_t max_sectors, resync, res;
+ unsigned long dt, db;
+ sector_t rt;
int scale;
unsigned int per_milli;
- resync = (mddev->curr_resync - atomic_read(&mddev->recovery_active))/2;
+ resync = mddev->curr_resync - atomic_read(&mddev->recovery_active);
if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery))
- max_blocks = mddev->resync_max_sectors >> 1;
+ max_sectors = mddev->resync_max_sectors;
else
- max_blocks = mddev->dev_sectors / 2;
+ max_sectors = mddev->dev_sectors;
/*
* Should not happen.
*/
- if (!max_blocks) {
+ if (!max_sectors) {
MD_BUG();
return;
}
/* Pick 'scale' such that (resync>>scale)*1000 will fit
- * in a sector_t, and (max_blocks>>scale) will fit in a
+ * in a sector_t, and (max_sectors>>scale) will fit in a
* u32, as those are the requirements for sector_div.
* Thus 'scale' must be at least 10
*/
scale = 10;
if (sizeof(sector_t) > sizeof(unsigned long)) {
- while ( max_blocks/2 > (1ULL<<(scale+32)))
+ while ( max_sectors/2 > (1ULL<<(scale+32)))
scale++;
}
res = (resync>>scale)*1000;
- sector_div(res, (u32)((max_blocks>>scale)+1));
+ sector_div(res, (u32)((max_sectors>>scale)+1));
per_milli = res;
{
@@ -5747,25 +5764,35 @@ static void status_resync(struct seq_file *seq, mddev_t * mddev)
(test_bit(MD_RECOVERY_SYNC, &mddev->recovery) ?
"resync" : "recovery"))),
per_milli/10, per_milli % 10,
- (unsigned long long) resync,
- (unsigned long long) max_blocks);
+ (unsigned long long) resync/2,
+ (unsigned long long) max_sectors/2);
/*
- * We do not want to overflow, so the order of operands and
- * the * 100 / 100 trick are important. We do a +1 to be
- * safe against division by zero. We only estimate anyway.
- *
* dt: time from mark until now
* db: blocks written from mark until now
* rt: remaining time
+ *
+ * rt is a sector_t, so could be 32bit or 64bit.
+ * So we divide before multiply in case it is 32bit and close
+ * to the limit.
+ * We scale the divisor (db) by 32 to avoid loosing precision
+ * near the end of resync when the number of remaining sectors
+ * is close to 'db'.
+ * We then divide rt by 32 after multiplying by db to compensate.
+ * The '+1' avoids division by zero if db is very small.
*/
dt = ((jiffies - mddev->resync_mark) / HZ);
if (!dt) dt++;
db = (mddev->curr_mark_cnt - atomic_read(&mddev->recovery_active))
- mddev->resync_mark_cnt;
- rt = (dt * ((unsigned long)(max_blocks-resync) / (db/2/100+1)))/100;
- seq_printf(seq, " finish=%lu.%lumin", rt / 60, (rt % 60)/6);
+ rt = max_sectors - resync; /* number of remaining sectors */
+ sector_div(rt, db/32+1);
+ rt *= dt;
+ rt >>= 5;
+
+ seq_printf(seq, " finish=%lu.%lumin", (unsigned long)rt / 60,
+ ((unsigned long)rt % 60)/6);
seq_printf(seq, " speed=%ldK/sec", db/2/dt);
}
@@ -5956,7 +5983,7 @@ static int md_seq_show(struct seq_file *seq, void *v)
return 0;
}
-static struct seq_operations md_seq_ops = {
+static const struct seq_operations md_seq_ops = {
.start = md_seq_start,
.next = md_seq_next,
.stop = md_seq_stop,
@@ -6334,18 +6361,14 @@ void md_do_sync(mddev_t *mddev)
sector_t sectors;
skipped = 0;
- if (j >= mddev->resync_max) {
- sysfs_notify(&mddev->kobj, NULL, "sync_completed");
- wait_event(mddev->recovery_wait,
- mddev->resync_max > j
- || kthread_should_stop());
- }
- if (kthread_should_stop())
- goto interrupted;
- if (mddev->curr_resync > mddev->curr_resync_completed &&
- (mddev->curr_resync - mddev->curr_resync_completed)
- > (max_sectors >> 4)) {
+ if (!test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
+ ((mddev->curr_resync > mddev->curr_resync_completed &&
+ (mddev->curr_resync - mddev->curr_resync_completed)
+ > (max_sectors >> 4)) ||
+ (j - mddev->curr_resync_completed)*2
+ >= mddev->resync_max - mddev->curr_resync_completed
+ )) {
/* time to update curr_resync_completed */
blk_unplug(mddev->queue);
wait_event(mddev->recovery_wait,
@@ -6353,7 +6376,17 @@ void md_do_sync(mddev_t *mddev)
mddev->curr_resync_completed =
mddev->curr_resync;
set_bit(MD_CHANGE_CLEAN, &mddev->flags);
+ sysfs_notify(&mddev->kobj, NULL, "sync_completed");
}
+
+ if (j >= mddev->resync_max)
+ wait_event(mddev->recovery_wait,
+ mddev->resync_max > j
+ || kthread_should_stop());
+
+ if (kthread_should_stop())
+ goto interrupted;
+
sectors = mddev->pers->sync_request(mddev, j, &skipped,
currspeed < speed_min(mddev));
if (sectors == 0) {
@@ -6461,6 +6494,7 @@ void md_do_sync(mddev_t *mddev)
skip:
mddev->curr_resync = 0;
+ mddev->curr_resync_completed = 0;
mddev->resync_min = 0;
mddev->resync_max = MaxSector;
sysfs_notify(&mddev->kobj, NULL, "sync_completed");
diff --git a/drivers/md/md.h b/drivers/md/md.h
index e9b7f54c24d..8227ab909d4 100644
--- a/drivers/md/md.h
+++ b/drivers/md/md.h
@@ -12,10 +12,17 @@
Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
-#ifndef _MD_K_H
-#define _MD_K_H
-
-#ifdef CONFIG_BLOCK
+#ifndef _MD_MD_H
+#define _MD_MD_H
+
+#include <linux/blkdev.h>
+#include <linux/kobject.h>
+#include <linux/list.h>
+#include <linux/mm.h>
+#include <linux/mutex.h>
+#include <linux/timer.h>
+#include <linux/wait.h>
+#include <linux/workqueue.h>
#define MaxSector (~(sector_t)0)
@@ -408,10 +415,6 @@ static inline void safe_put_page(struct page *p)
if (p) put_page(p);
}
-#endif /* CONFIG_BLOCK */
-#endif
-
-
extern int register_md_personality(struct mdk_personality *p);
extern int unregister_md_personality(struct mdk_personality *p);
extern mdk_thread_t * md_register_thread(void (*run) (mddev_t *mddev),
@@ -434,3 +437,5 @@ extern void md_new_event(mddev_t *mddev);
extern int md_allow_write(mddev_t *mddev);
extern void md_wait_for_blocked_rdev(mdk_rdev_t *rdev, mddev_t *mddev);
extern void md_set_array_sectors(mddev_t *mddev, sector_t array_sectors);
+
+#endif /* _MD_MD_H */
diff --git a/drivers/md/multipath.c b/drivers/md/multipath.c
index 41ced0cbe82..4ee31aa13c4 100644
--- a/drivers/md/multipath.c
+++ b/drivers/md/multipath.c
@@ -303,7 +303,7 @@ static int multipath_add_disk(mddev_t *mddev, mdk_rdev_t *rdev)
* merge_bvec_fn will be involved in multipath.)
*/
if (q->merge_bvec_fn &&
- mddev->queue->max_sectors > (PAGE_SIZE>>9))
+ queue_max_sectors(q) > (PAGE_SIZE>>9))
blk_queue_max_sectors(mddev->queue, PAGE_SIZE>>9);
conf->working_disks++;
@@ -467,7 +467,7 @@ static int multipath_run (mddev_t *mddev)
* violating it, not that we ever expect a device with
* a merge_bvec_fn to be involved in multipath */
if (rdev->bdev->bd_disk->queue->merge_bvec_fn &&
- mddev->queue->max_sectors > (PAGE_SIZE>>9))
+ queue_max_sectors(mddev->queue) > (PAGE_SIZE>>9))
blk_queue_max_sectors(mddev->queue, PAGE_SIZE>>9);
if (!test_bit(Faulty, &rdev->flags))
diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c
index c08d7559be5..925507e7d67 100644
--- a/drivers/md/raid0.c
+++ b/drivers/md/raid0.c
@@ -144,7 +144,7 @@ static int create_strip_zones (mddev_t *mddev)
*/
if (rdev1->bdev->bd_disk->queue->merge_bvec_fn &&
- mddev->queue->max_sectors > (PAGE_SIZE>>9))
+ queue_max_sectors(mddev->queue) > (PAGE_SIZE>>9))
blk_queue_max_sectors(mddev->queue, PAGE_SIZE>>9);
if (!smallest || (rdev1->sectors < smallest->sectors))
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index 36df9109cde..e23758b4a34 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -1130,7 +1130,7 @@ static int raid1_add_disk(mddev_t *mddev, mdk_rdev_t *rdev)
* a one page request is never in violation.
*/
if (rdev->bdev->bd_disk->queue->merge_bvec_fn &&
- mddev->queue->max_sectors > (PAGE_SIZE>>9))
+ queue_max_sectors(mddev->queue) > (PAGE_SIZE>>9))
blk_queue_max_sectors(mddev->queue, PAGE_SIZE>>9);
p->head_position = 0;
@@ -1996,7 +1996,7 @@ static int run(mddev_t *mddev)
* a one page request is never in violation.
*/
if (rdev->bdev->bd_disk->queue->merge_bvec_fn &&
- mddev->queue->max_sectors > (PAGE_SIZE>>9))
+ queue_max_sectors(mddev->queue) > (PAGE_SIZE>>9))
blk_queue_max_sectors(mddev->queue, PAGE_SIZE>>9);
disk->head_position = 0;
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index 81a54f17417..750550c1166 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -1158,8 +1158,8 @@ static int raid10_add_disk(mddev_t *mddev, mdk_rdev_t *rdev)
* a one page request is never in violation.
*/
if (rdev->bdev->bd_disk->queue->merge_bvec_fn &&
- mddev->queue->max_sectors > (PAGE_SIZE>>9))
- mddev->queue->max_sectors = (PAGE_SIZE>>9);
+ queue_max_sectors(mddev->queue) > (PAGE_SIZE>>9))
+ blk_queue_max_sectors(mddev->queue, PAGE_SIZE>>9);
p->head_position = 0;
rdev->raid_disk = mirror;
@@ -1809,17 +1809,17 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i
r10_bio->sector = sect;
raid10_find_phys(conf, r10_bio);
- /* Need to check if this section will still be
+
+ /* Need to check if the array will still be
* degraded
*/
- for (j=0; j<conf->copies;j++) {
- int d = r10_bio->devs[j].devnum;
- if (conf->mirrors[d].rdev == NULL ||
- test_bit(Faulty, &conf->mirrors[d].rdev->flags)) {
+ for (j=0; j<conf->raid_disks; j++)
+ if (conf->mirrors[j].rdev == NULL ||
+ test_bit(Faulty, &conf->mirrors[j].rdev->flags)) {
still_degraded = 1;
break;
}
- }
+
must_sync = bitmap_start_sync(mddev->bitmap, sect,
&sync_blocks, still_degraded);
@@ -2145,8 +2145,8 @@ static int run(mddev_t *mddev)
* a one page request is never in violation.
*/
if (rdev->bdev->bd_disk->queue->merge_bvec_fn &&
- mddev->queue->max_sectors > (PAGE_SIZE>>9))
- mddev->queue->max_sectors = (PAGE_SIZE>>9);
+ queue_max_sectors(mddev->queue) > (PAGE_SIZE>>9))
+ blk_queue_max_sectors(mddev->queue, PAGE_SIZE>>9);
disk->head_position = 0;
}
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 3bbc6d64704..bef87669823 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -362,7 +362,7 @@ static void raid5_unplug_device(struct request_queue *q);
static struct stripe_head *
get_active_stripe(raid5_conf_t *conf, sector_t sector,
- int previous, int noblock)
+ int previous, int noblock, int noquiesce)
{
struct stripe_head *sh;
@@ -372,7 +372,7 @@ get_active_stripe(raid5_conf_t *conf, sector_t sector,
do {
wait_event_lock_irq(conf->wait_for_stripe,
- conf->quiesce == 0,
+ conf->quiesce == 0 || noquiesce,
conf->device_lock, /* nothing */);
sh = __find_stripe(conf, sector, conf->generation - previous);
if (!sh) {
@@ -2671,7 +2671,7 @@ static void handle_stripe_expansion(raid5_conf_t *conf, struct stripe_head *sh,
sector_t bn = compute_blocknr(sh, i, 1);
sector_t s = raid5_compute_sector(conf, bn, 0,
&dd_idx, NULL);
- sh2 = get_active_stripe(conf, s, 0, 1);
+ sh2 = get_active_stripe(conf, s, 0, 1, 1);
if (sh2 == NULL)
/* so far only the early blocks of this stripe
* have been requested. When later blocks
@@ -2944,7 +2944,7 @@ static bool handle_stripe5(struct stripe_head *sh)
/* Finish reconstruct operations initiated by the expansion process */
if (sh->reconstruct_state == reconstruct_state_result) {
struct stripe_head *sh2
- = get_active_stripe(conf, sh->sector, 1, 1);
+ = get_active_stripe(conf, sh->sector, 1, 1, 1);
if (sh2 && test_bit(STRIPE_EXPAND_SOURCE, &sh2->state)) {
/* sh cannot be written until sh2 has been read.
* so arrange for sh to be delayed a little
@@ -3189,7 +3189,7 @@ static bool handle_stripe6(struct stripe_head *sh, struct page *tmp_page)
if (s.expanded && test_bit(STRIPE_EXPANDING, &sh->state)) {
struct stripe_head *sh2
- = get_active_stripe(conf, sh->sector, 1, 1);
+ = get_active_stripe(conf, sh->sector, 1, 1, 1);
if (sh2 && test_bit(STRIPE_EXPAND_SOURCE, &sh2->state)) {
/* sh cannot be written until sh2 has been read.
* so arrange for sh to be delayed a little
@@ -3288,7 +3288,7 @@ static void unplug_slaves(mddev_t *mddev)
int i;
rcu_read_lock();
- for (i=0; i<mddev->raid_disks; i++) {
+ for (i = 0; i < conf->raid_disks; i++) {
mdk_rdev_t *rdev = rcu_dereference(conf->disks[i].rdev);
if (rdev && !test_bit(Faulty, &rdev->flags) && atomic_read(&rdev->nr_pending)) {
struct request_queue *r_queue = bdev_get_queue(rdev->bdev);
@@ -3463,10 +3463,10 @@ static int bio_fits_rdev(struct bio *bi)
{
struct request_queue *q = bdev_get_queue(bi->bi_bdev);
- if ((bi->bi_size>>9) > q->max_sectors)
+ if ((bi->bi_size>>9) > queue_max_sectors(q))
return 0;
blk_recount_segments(q, bi);
- if (bi->bi_phys_segments > q->max_phys_segments)
+ if (bi->bi_phys_segments > queue_max_phys_segments(q))
return 0;
if (q->merge_bvec_fn)
@@ -3675,7 +3675,7 @@ static int make_request(struct request_queue *q, struct bio * bi)
(unsigned long long)logical_sector);
sh = get_active_stripe(conf, new_sector, previous,
- (bi->bi_rw&RWA_MASK));
+ (bi->bi_rw&RWA_MASK), 0);
if (sh) {
if (unlikely(previous)) {
/* expansion might have moved on while waiting for a
@@ -3811,13 +3811,13 @@ static sector_t reshape_request(mddev_t *mddev, sector_t sector_nr, int *skipped
safepos = conf->reshape_safe;
sector_div(safepos, data_disks);
if (mddev->delta_disks < 0) {
- writepos -= reshape_sectors;
+ writepos -= min_t(sector_t, reshape_sectors, writepos);
readpos += reshape_sectors;
safepos += reshape_sectors;
} else {
writepos += reshape_sectors;
- readpos -= reshape_sectors;
- safepos -= reshape_sectors;
+ readpos -= min_t(sector_t, reshape_sectors, readpos);
+ safepos -= min_t(sector_t, reshape_sectors, safepos);
}
/* 'writepos' is the most advanced device address we might write.
@@ -3845,6 +3845,7 @@ static sector_t reshape_request(mddev_t *mddev, sector_t sector_nr, int *skipped
wait_event(conf->wait_for_overlap,
atomic_read(&conf->reshape_stripes)==0);
mddev->reshape_position = conf->reshape_progress;
+ mddev->curr_resync_completed = mddev->curr_resync;
conf->reshape_checkpoint = jiffies;
set_bit(MD_CHANGE_DEVS, &mddev->flags);
md_wakeup_thread(mddev->thread);
@@ -3854,6 +3855,7 @@ static sector_t reshape_request(mddev_t *mddev, sector_t sector_nr, int *skipped
conf->reshape_safe = mddev->reshape_position;
spin_unlock_irq(&conf->device_lock);
wake_up(&conf->wait_for_overlap);
+ sysfs_notify(&mddev->kobj, NULL, "sync_completed");
}
if (mddev->delta_disks < 0) {
@@ -3871,7 +3873,7 @@ static sector_t reshape_request(mddev_t *mddev, sector_t sector_nr, int *skipped
for (i = 0; i < reshape_sectors; i += STRIPE_SECTORS) {
int j;
int skipped = 0;
- sh = get_active_stripe(conf, stripe_addr+i, 0, 0);
+ sh = get_active_stripe(conf, stripe_addr+i, 0, 0, 1);
set_bit(STRIPE_EXPANDING, &sh->state);
atomic_inc(&conf->reshape_stripes);
/* If any of this stripe is beyond the end of the old
@@ -3914,13 +3916,13 @@ static sector_t reshape_request(mddev_t *mddev, sector_t sector_nr, int *skipped
raid5_compute_sector(conf, stripe_addr*(new_data_disks),
1, &dd_idx, NULL);
last_sector =
- raid5_compute_sector(conf, ((stripe_addr+conf->chunk_size/512)
+ raid5_compute_sector(conf, ((stripe_addr+reshape_sectors)
*(new_data_disks) - 1),
1, &dd_idx, NULL);
if (last_sector >= mddev->dev_sectors)
last_sector = mddev->dev_sectors - 1;
while (first_sector <= last_sector) {
- sh = get_active_stripe(conf, first_sector, 1, 0);
+ sh = get_active_stripe(conf, first_sector, 1, 0, 1);
set_bit(STRIPE_EXPAND_SOURCE, &sh->state);
set_bit(STRIPE_HANDLE, &sh->state);
release_stripe(sh);
@@ -3938,11 +3940,13 @@ static sector_t reshape_request(mddev_t *mddev, sector_t sector_nr, int *skipped
* then we need to write out the superblock.
*/
sector_nr += reshape_sectors;
- if (sector_nr >= mddev->resync_max) {
+ if ((sector_nr - mddev->curr_resync_completed) * 2
+ >= mddev->resync_max - mddev->curr_resync_completed) {
/* Cannot proceed until we've updated the superblock... */
wait_event(conf->wait_for_overlap,
atomic_read(&conf->reshape_stripes) == 0);
mddev->reshape_position = conf->reshape_progress;
+ mddev->curr_resync_completed = mddev->curr_resync;
conf->reshape_checkpoint = jiffies;
set_bit(MD_CHANGE_DEVS, &mddev->flags);
md_wakeup_thread(mddev->thread);
@@ -3953,6 +3957,7 @@ static sector_t reshape_request(mddev_t *mddev, sector_t sector_nr, int *skipped
conf->reshape_safe = mddev->reshape_position;
spin_unlock_irq(&conf->device_lock);
wake_up(&conf->wait_for_overlap);
+ sysfs_notify(&mddev->kobj, NULL, "sync_completed");
}
return reshape_sectors;
}
@@ -4017,9 +4022,9 @@ static inline sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *ski
bitmap_cond_end_sync(mddev->bitmap, sector_nr);
- sh = get_active_stripe(conf, sector_nr, 0, 1);
+ sh = get_active_stripe(conf, sector_nr, 0, 1, 0);
if (sh == NULL) {
- sh = get_active_stripe(conf, sector_nr, 0, 0);
+ sh = get_active_stripe(conf, sector_nr, 0, 0, 0);
/* make sure we don't swamp the stripe cache if someone else
* is trying to get access
*/
@@ -4029,7 +4034,7 @@ static inline sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *ski
* We don't need to check the 'failed' flag as when that gets set,
* recovery aborts.
*/
- for (i=0; i<mddev->raid_disks; i++)
+ for (i = 0; i < conf->raid_disks; i++)
if (conf->disks[i].rdev == NULL)
still_degraded = 1;
@@ -4081,7 +4086,7 @@ static int retry_aligned_read(raid5_conf_t *conf, struct bio *raid_bio)
/* already done this stripe */
continue;
- sh = get_active_stripe(conf, sector, 0, 1);
+ sh = get_active_stripe(conf, sector, 0, 1, 0);
if (!sh) {
/* failed to get a stripe - must wait */