diff options
Diffstat (limited to 'drivers/md')
-rw-r--r-- | drivers/md/bitmap.c | 53 | ||||
-rw-r--r-- | drivers/md/dm-exception-store.c | 2 | ||||
-rw-r--r-- | drivers/md/dm-log.c | 3 | ||||
-rw-r--r-- | drivers/md/dm-snap-persistent.c | 2 | ||||
-rw-r--r-- | drivers/md/dm-table.c | 38 | ||||
-rw-r--r-- | drivers/md/dm.c | 8 | ||||
-rw-r--r-- | drivers/md/linear.c | 2 | ||||
-rw-r--r-- | drivers/md/md.c | 132 | ||||
-rw-r--r-- | drivers/md/md.h | 21 | ||||
-rw-r--r-- | drivers/md/multipath.c | 4 | ||||
-rw-r--r-- | drivers/md/raid0.c | 2 | ||||
-rw-r--r-- | drivers/md/raid1.c | 4 | ||||
-rw-r--r-- | drivers/md/raid10.c | 20 | ||||
-rw-r--r-- | drivers/md/raid5.c | 45 |
14 files changed, 192 insertions, 144 deletions
diff --git a/drivers/md/bitmap.c b/drivers/md/bitmap.c index f8a9f7ab2cb..3319c2fec28 100644 --- a/drivers/md/bitmap.c +++ b/drivers/md/bitmap.c @@ -232,7 +232,7 @@ static struct page *read_sb_page(mddev_t *mddev, long offset, target = rdev->sb_start + offset + index * (PAGE_SIZE/512); if (sync_page_io(rdev->bdev, target, - roundup(size, bdev_hardsect_size(rdev->bdev)), + roundup(size, bdev_logical_block_size(rdev->bdev)), page, READ)) { page->index = index; attach_page_buffers(page, NULL); /* so that free_buffer will @@ -287,7 +287,7 @@ static int write_sb_page(struct bitmap *bitmap, struct page *page, int wait) int size = PAGE_SIZE; if (page->index == bitmap->file_pages-1) size = roundup(bitmap->last_page_size, - bdev_hardsect_size(rdev->bdev)); + bdev_logical_block_size(rdev->bdev)); /* Just make sure we aren't corrupting data or * metadata */ @@ -986,6 +986,9 @@ static int bitmap_init_from_disk(struct bitmap *bitmap, sector_t start) oldindex = index; oldpage = page; + bitmap->filemap[bitmap->file_pages++] = page; + bitmap->last_page_size = count; + if (outofdate) { /* * if bitmap is out of date, dirty the @@ -998,15 +1001,9 @@ static int bitmap_init_from_disk(struct bitmap *bitmap, sector_t start) write_page(bitmap, page, 1); ret = -EIO; - if (bitmap->flags & BITMAP_WRITE_ERROR) { - /* release, page not in filemap yet */ - put_page(page); + if (bitmap->flags & BITMAP_WRITE_ERROR) goto err; - } } - - bitmap->filemap[bitmap->file_pages++] = page; - bitmap->last_page_size = count; } paddr = kmap_atomic(page, KM_USER0); if (bitmap->flags & BITMAP_HOSTENDIAN) @@ -1016,9 +1013,11 @@ static int bitmap_init_from_disk(struct bitmap *bitmap, sector_t start) kunmap_atomic(paddr, KM_USER0); if (b) { /* if the disk bit is set, set the memory bit */ - bitmap_set_memory_bits(bitmap, i << CHUNK_BLOCK_SHIFT(bitmap), - ((i+1) << (CHUNK_BLOCK_SHIFT(bitmap)) >= start) - ); + int needed = ((sector_t)(i+1) << (CHUNK_BLOCK_SHIFT(bitmap)) + >= start); + bitmap_set_memory_bits(bitmap, + (sector_t)i << CHUNK_BLOCK_SHIFT(bitmap), + needed); bit_cnt++; set_page_attr(bitmap, page, BITMAP_PAGE_CLEAN); } @@ -1098,14 +1097,12 @@ void bitmap_daemon_work(struct bitmap *bitmap) } bitmap->allclean = 1; + spin_lock_irqsave(&bitmap->lock, flags); for (j = 0; j < bitmap->chunks; j++) { bitmap_counter_t *bmc; - spin_lock_irqsave(&bitmap->lock, flags); - if (!bitmap->filemap) { + if (!bitmap->filemap) /* error or shutdown */ - spin_unlock_irqrestore(&bitmap->lock, flags); break; - } page = filemap_get_page(bitmap, j); @@ -1122,6 +1119,8 @@ void bitmap_daemon_work(struct bitmap *bitmap) write_page(bitmap, page, 0); bitmap->allclean = 0; } + spin_lock_irqsave(&bitmap->lock, flags); + j |= (PAGE_BITS - 1); continue; } @@ -1154,8 +1153,9 @@ void bitmap_daemon_work(struct bitmap *bitmap) spin_lock_irqsave(&bitmap->lock, flags); clear_page_attr(bitmap, page, BITMAP_PAGE_CLEAN); } - bmc = bitmap_get_counter(bitmap, j << CHUNK_BLOCK_SHIFT(bitmap), - &blocks, 0); + bmc = bitmap_get_counter(bitmap, + (sector_t)j << CHUNK_BLOCK_SHIFT(bitmap), + &blocks, 0); if (bmc) { /* if (j < 100) printk("bitmap: j=%lu, *bmc = 0x%x\n", j, *bmc); @@ -1169,7 +1169,8 @@ void bitmap_daemon_work(struct bitmap *bitmap) } else if (*bmc == 1) { /* we can clear the bit */ *bmc = 0; - bitmap_count_page(bitmap, j << CHUNK_BLOCK_SHIFT(bitmap), + bitmap_count_page(bitmap, + (sector_t)j << CHUNK_BLOCK_SHIFT(bitmap), -1); /* clear the bit */ @@ -1180,9 +1181,10 @@ void bitmap_daemon_work(struct bitmap *bitmap) ext2_clear_bit(file_page_offset(j), paddr); kunmap_atomic(paddr, KM_USER0); } - } - spin_unlock_irqrestore(&bitmap->lock, flags); + } else + j |= PAGE_COUNTER_MASK; } + spin_unlock_irqrestore(&bitmap->lock, flags); /* now sync the final page */ if (lastpage != NULL) { @@ -1479,6 +1481,7 @@ void bitmap_cond_end_sync(struct bitmap *bitmap, sector_t sector) s += blocks; } bitmap->last_end_sync = jiffies; + sysfs_notify(&bitmap->mddev->kobj, NULL, "sync_completed"); } static void bitmap_set_memory_bits(struct bitmap *bitmap, sector_t offset, int needed) @@ -1513,7 +1516,7 @@ void bitmap_dirty_bits(struct bitmap *bitmap, unsigned long s, unsigned long e) unsigned long chunk; for (chunk = s; chunk <= e; chunk++) { - sector_t sec = chunk << CHUNK_BLOCK_SHIFT(bitmap); + sector_t sec = (sector_t)chunk << CHUNK_BLOCK_SHIFT(bitmap); bitmap_set_memory_bits(bitmap, sec, 1); bitmap_file_set_bit(bitmap, sec); } @@ -1589,7 +1592,7 @@ void bitmap_destroy(mddev_t *mddev) int bitmap_create(mddev_t *mddev) { struct bitmap *bitmap; - unsigned long blocks = mddev->resync_max_sectors; + sector_t blocks = mddev->resync_max_sectors; unsigned long chunks; unsigned long pages; struct file *file = mddev->bitmap_file; @@ -1631,8 +1634,8 @@ int bitmap_create(mddev_t *mddev) bitmap->chunkshift = ffz(~bitmap->chunksize); /* now that chunksize and chunkshift are set, we can use these macros */ - chunks = (blocks + CHUNK_BLOCK_RATIO(bitmap) - 1) / - CHUNK_BLOCK_RATIO(bitmap); + chunks = (blocks + CHUNK_BLOCK_RATIO(bitmap) - 1) >> + CHUNK_BLOCK_SHIFT(bitmap); pages = (chunks + PAGE_COUNTER_RATIO - 1) / PAGE_COUNTER_RATIO; BUG_ON(!pages); diff --git a/drivers/md/dm-exception-store.c b/drivers/md/dm-exception-store.c index a2e26c24214..75d8081a904 100644 --- a/drivers/md/dm-exception-store.c +++ b/drivers/md/dm-exception-store.c @@ -178,7 +178,7 @@ static int set_chunk_size(struct dm_exception_store *store, } /* Validate the chunk size against the device block size */ - if (chunk_size_ulong % (bdev_hardsect_size(store->cow->bdev) >> 9)) { + if (chunk_size_ulong % (bdev_logical_block_size(store->cow->bdev) >> 9)) { *error = "Chunk size is not a multiple of device blocksize"; return -EINVAL; } diff --git a/drivers/md/dm-log.c b/drivers/md/dm-log.c index be233bc4d91..6fa8ccf91c7 100644 --- a/drivers/md/dm-log.c +++ b/drivers/md/dm-log.c @@ -413,7 +413,8 @@ static int create_log_context(struct dm_dirty_log *log, struct dm_target *ti, * Buffer holds both header and bitset. */ buf_size = dm_round_up((LOG_OFFSET << SECTOR_SHIFT) + - bitset_size, ti->limits.hardsect_size); + bitset_size, + ti->limits.logical_block_size); if (buf_size > dev->bdev->bd_inode->i_size) { DMWARN("log device %s too small: need %llu bytes", diff --git a/drivers/md/dm-snap-persistent.c b/drivers/md/dm-snap-persistent.c index e75c6dd76a9..2662a41337e 100644 --- a/drivers/md/dm-snap-persistent.c +++ b/drivers/md/dm-snap-persistent.c @@ -282,7 +282,7 @@ static int read_header(struct pstore *ps, int *new_snapshot) */ if (!ps->store->chunk_size) { ps->store->chunk_size = max(DM_CHUNK_SIZE_DEFAULT_SECTORS, - bdev_hardsect_size(ps->store->cow->bdev) >> 9); + bdev_logical_block_size(ps->store->cow->bdev) >> 9); ps->store->chunk_mask = ps->store->chunk_size - 1; ps->store->chunk_shift = ffs(ps->store->chunk_size) - 1; chunk_size_supplied = 0; diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index 429b50b975d..e9a73bb242b 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c @@ -108,7 +108,8 @@ static void combine_restrictions_low(struct io_restrictions *lhs, lhs->max_hw_segments = min_not_zero(lhs->max_hw_segments, rhs->max_hw_segments); - lhs->hardsect_size = max(lhs->hardsect_size, rhs->hardsect_size); + lhs->logical_block_size = max(lhs->logical_block_size, + rhs->logical_block_size); lhs->max_segment_size = min_not_zero(lhs->max_segment_size, rhs->max_segment_size); @@ -509,7 +510,7 @@ void dm_set_device_limits(struct dm_target *ti, struct block_device *bdev) * combine_restrictions_low() */ rs->max_sectors = - min_not_zero(rs->max_sectors, q->max_sectors); + min_not_zero(rs->max_sectors, queue_max_sectors(q)); /* * Check if merge fn is supported. @@ -524,24 +525,25 @@ void dm_set_device_limits(struct dm_target *ti, struct block_device *bdev) rs->max_phys_segments = min_not_zero(rs->max_phys_segments, - q->max_phys_segments); + queue_max_phys_segments(q)); rs->max_hw_segments = - min_not_zero(rs->max_hw_segments, q->max_hw_segments); + min_not_zero(rs->max_hw_segments, queue_max_hw_segments(q)); - rs->hardsect_size = max(rs->hardsect_size, q->hardsect_size); + rs->logical_block_size = max(rs->logical_block_size, + queue_logical_block_size(q)); rs->max_segment_size = - min_not_zero(rs->max_segment_size, q->max_segment_size); + min_not_zero(rs->max_segment_size, queue_max_segment_size(q)); rs->max_hw_sectors = - min_not_zero(rs->max_hw_sectors, q->max_hw_sectors); + min_not_zero(rs->max_hw_sectors, queue_max_hw_sectors(q)); rs->seg_boundary_mask = min_not_zero(rs->seg_boundary_mask, - q->seg_boundary_mask); + queue_segment_boundary(q)); - rs->bounce_pfn = min_not_zero(rs->bounce_pfn, q->bounce_pfn); + rs->bounce_pfn = min_not_zero(rs->bounce_pfn, queue_bounce_pfn(q)); rs->no_cluster |= !test_bit(QUEUE_FLAG_CLUSTER, &q->queue_flags); } @@ -683,8 +685,8 @@ static void check_for_valid_limits(struct io_restrictions *rs) rs->max_phys_segments = MAX_PHYS_SEGMENTS; if (!rs->max_hw_segments) rs->max_hw_segments = MAX_HW_SEGMENTS; - if (!rs->hardsect_size) - rs->hardsect_size = 1 << SECTOR_SHIFT; + if (!rs->logical_block_size) + rs->logical_block_size = 1 << SECTOR_SHIFT; if (!rs->max_segment_size) rs->max_segment_size = MAX_SEGMENT_SIZE; if (!rs->seg_boundary_mask) @@ -912,13 +914,13 @@ void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q) * restrictions. */ blk_queue_max_sectors(q, t->limits.max_sectors); - q->max_phys_segments = t->limits.max_phys_segments; - q->max_hw_segments = t->limits.max_hw_segments; - q->hardsect_size = t->limits.hardsect_size; - q->max_segment_size = t->limits.max_segment_size; - q->max_hw_sectors = t->limits.max_hw_sectors; - q->seg_boundary_mask = t->limits.seg_boundary_mask; - q->bounce_pfn = t->limits.bounce_pfn; + blk_queue_max_phys_segments(q, t->limits.max_phys_segments); + blk_queue_max_hw_segments(q, t->limits.max_hw_segments); + blk_queue_logical_block_size(q, t->limits.logical_block_size); + blk_queue_max_segment_size(q, t->limits.max_segment_size); + blk_queue_max_hw_sectors(q, t->limits.max_hw_sectors); + blk_queue_segment_boundary(q, t->limits.seg_boundary_mask); + blk_queue_bounce_limit(q, t->limits.bounce_pfn); if (t->limits.no_cluster) queue_flag_clear_unlocked(QUEUE_FLAG_CLUSTER, q); diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 424f7b048c3..3fd8b1e6548 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -20,7 +20,8 @@ #include <linux/idr.h> #include <linux/hdreg.h> #include <linux/blktrace_api.h> -#include <trace/block.h> + +#include <trace/events/block.h> #define DM_MSG_PREFIX "core" @@ -53,8 +54,6 @@ struct dm_target_io { union map_info info; }; -DEFINE_TRACE(block_bio_complete); - /* * For request-based dm. * One of these is allocated per request. @@ -656,8 +655,7 @@ static void __map_bio(struct dm_target *ti, struct bio *clone, /* the bio has been remapped so dispatch it */ trace_block_remap(bdev_get_queue(clone->bi_bdev), clone, - tio->io->bio->bi_bdev->bd_dev, - clone->bi_sector, sector); + tio->io->bio->bi_bdev->bd_dev, sector); generic_make_request(clone); } else if (r < 0 || r == DM_MAPIO_REQUEUE) { diff --git a/drivers/md/linear.c b/drivers/md/linear.c index 7a36e38393a..64f1f3e046e 100644 --- a/drivers/md/linear.c +++ b/drivers/md/linear.c @@ -146,7 +146,7 @@ static linear_conf_t *linear_conf(mddev_t *mddev, int raid_disks) * a one page request is never in violation. */ if (rdev->bdev->bd_disk->queue->merge_bvec_fn && - mddev->queue->max_sectors > (PAGE_SIZE>>9)) + queue_max_sectors(mddev->queue) > (PAGE_SIZE>>9)) blk_queue_max_sectors(mddev->queue, PAGE_SIZE>>9); disk->num_sectors = rdev->sectors; diff --git a/drivers/md/md.c b/drivers/md/md.c index ed5727c089a..20f6ac33834 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -1202,7 +1202,7 @@ static int super_1_load(mdk_rdev_t *rdev, mdk_rdev_t *refdev, int minor_version) atomic_set(&rdev->corrected_errors, le32_to_cpu(sb->cnt_corrected_read)); rdev->sb_size = le32_to_cpu(sb->max_dev) * 2 + 256; - bmask = queue_hardsect_size(rdev->bdev->bd_disk->queue)-1; + bmask = queue_logical_block_size(rdev->bdev->bd_disk->queue)-1; if (rdev->sb_size & bmask) rdev->sb_size = (rdev->sb_size | bmask) + 1; @@ -1375,6 +1375,9 @@ static void super_1_sync(mddev_t *mddev, mdk_rdev_t *rdev) sb->raid_disks = cpu_to_le32(mddev->raid_disks); sb->size = cpu_to_le64(mddev->dev_sectors); + sb->chunksize = cpu_to_le32(mddev->chunk_size >> 9); + sb->level = cpu_to_le32(mddev->level); + sb->layout = cpu_to_le32(mddev->layout); if (mddev->bitmap && mddev->bitmap_file == NULL) { sb->bitmap_offset = cpu_to_le32((__u32)mddev->bitmap_offset); @@ -2017,6 +2020,8 @@ repeat: clear_bit(MD_CHANGE_PENDING, &mddev->flags); spin_unlock_irq(&mddev->write_lock); wake_up(&mddev->sb_wait); + if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) + sysfs_notify(&mddev->kobj, NULL, "sync_completed"); } @@ -2086,6 +2091,7 @@ state_store(mdk_rdev_t *rdev, const char *buf, size_t len) * -writemostly - clears write_mostly * blocked - sets the Blocked flag * -blocked - clears the Blocked flag + * insync - sets Insync providing device isn't active */ int err = -EINVAL; if (cmd_match(buf, "faulty") && rdev->mddev->pers) { @@ -2118,6 +2124,9 @@ state_store(mdk_rdev_t *rdev, const char *buf, size_t len) md_wakeup_thread(rdev->mddev->thread); err = 0; + } else if (cmd_match(buf, "insync") && rdev->raid_disk == -1) { + set_bit(In_sync, &rdev->flags); + err = 0; } if (!err && rdev->sysfs_state) sysfs_notify_dirent(rdev->sysfs_state); @@ -2190,7 +2199,7 @@ slot_store(mdk_rdev_t *rdev, const char *buf, size_t len) } else if (rdev->mddev->pers) { mdk_rdev_t *rdev2; /* Activating a spare .. or possibly reactivating - * if we every get bitmaps working here. + * if we ever get bitmaps working here. */ if (rdev->raid_disk != -1) @@ -3060,11 +3069,8 @@ array_state_store(mddev_t *mddev, const char *buf, size_t len) } else err = -EBUSY; spin_unlock_irq(&mddev->write_lock); - } else { - mddev->ro = 0; - mddev->recovery_cp = MaxSector; - err = do_md_run(mddev); - } + } else + err = -EINVAL; break; case active: if (mddev->pers) { @@ -3300,7 +3306,9 @@ static ssize_t action_show(mddev_t *mddev, char *page) { char *type = "idle"; - if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) || + if (test_bit(MD_RECOVERY_FROZEN, &mddev->recovery)) + type = "frozen"; + else if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) || (!mddev->ro && test_bit(MD_RECOVERY_NEEDED, &mddev->recovery))) { if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery)) type = "reshape"; @@ -3323,7 +3331,12 @@ action_store(mddev_t *mddev, const char *page, size_t len) if (!mddev->pers || !mddev->pers->sync_request) return -EINVAL; - if (cmd_match(page, "idle")) { + if (cmd_match(page, "frozen")) + set_bit(MD_RECOVERY_FROZEN, &mddev->recovery); + else + clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery); + + if (cmd_match(page, "idle") || cmd_match(page, "frozen")) { if (mddev->sync_thread) { set_bit(MD_RECOVERY_INTR, &mddev->recovery); md_unregister_thread(mddev->sync_thread); @@ -3482,12 +3495,15 @@ sync_completed_show(mddev_t *mddev, char *page) { unsigned long max_sectors, resync; + if (!test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) + return sprintf(page, "none\n"); + if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) max_sectors = mddev->resync_max_sectors; else max_sectors = mddev->dev_sectors; - resync = (mddev->curr_resync - atomic_read(&mddev->recovery_active)); + resync = mddev->curr_resync_completed; return sprintf(page, "%lu / %lu\n", resync, max_sectors); } @@ -3674,7 +3690,7 @@ array_size_store(mddev_t *mddev, const char *buf, size_t len) if (strict_blocks_to_sectors(buf, §ors) < 0) return -EINVAL; if (mddev->pers && mddev->pers->size(mddev, 0, 0) < sectors) - return -EINVAL; + return -E2BIG; mddev->external_size = 1; } @@ -4288,6 +4304,7 @@ static int do_md_stop(mddev_t * mddev, int mode, int is_open) { int err = 0; struct gendisk *disk = mddev->gendisk; + mdk_rdev_t *rdev; if (atomic_read(&mddev->openers) > is_open) { printk("md: %s still in use.\n",mdname(mddev)); @@ -4330,6 +4347,13 @@ static int do_md_stop(mddev_t * mddev, int mode, int is_open) /* tell userspace to handle 'inactive' */ sysfs_notify_dirent(mddev->sysfs_state); + list_for_each_entry(rdev, &mddev->disks, same_set) + if (rdev->raid_disk >= 0) { + char nm[20]; + sprintf(nm, "rd%d", rdev->raid_disk); + sysfs_remove_link(&mddev->kobj, nm); + } + set_capacity(disk, 0); mddev->changed = 1; @@ -4350,7 +4374,6 @@ static int do_md_stop(mddev_t * mddev, int mode, int is_open) * Free resources if final stop */ if (mode == 0) { - mdk_rdev_t *rdev; printk(KERN_INFO "md: %s stopped.\n", mdname(mddev)); @@ -4362,13 +4385,6 @@ static int do_md_stop(mddev_t * mddev, int mode, int is_open) } mddev->bitmap_offset = 0; - list_for_each_entry(rdev, &mddev->disks, same_set) - if (rdev->raid_disk >= 0) { - char nm[20]; - sprintf(nm, "rd%d", rdev->raid_disk); - sysfs_remove_link(&mddev->kobj, nm); - } - /* make sure all md_delayed_delete calls have finished */ flush_scheduled_work(); @@ -5551,7 +5567,7 @@ static struct block_device_operations md_fops = .owner = THIS_MODULE, .open = md_open, .release = md_release, - .locked_ioctl = md_ioctl, + .ioctl = md_ioctl, .getgeo = md_getgeo, .media_changed = md_media_changed, .revalidate_disk= md_revalidate, @@ -5696,37 +5712,38 @@ static void status_unused(struct seq_file *seq) static void status_resync(struct seq_file *seq, mddev_t * mddev) { - sector_t max_blocks, resync, res; - unsigned long dt, db, rt; + sector_t max_sectors, resync, res; + unsigned long dt, db; + sector_t rt; int scale; unsigned int per_milli; - resync = (mddev->curr_resync - atomic_read(&mddev->recovery_active))/2; + resync = mddev->curr_resync - atomic_read(&mddev->recovery_active); if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) - max_blocks = mddev->resync_max_sectors >> 1; + max_sectors = mddev->resync_max_sectors; else - max_blocks = mddev->dev_sectors / 2; + max_sectors = mddev->dev_sectors; /* * Should not happen. */ - if (!max_blocks) { + if (!max_sectors) { MD_BUG(); return; } /* Pick 'scale' such that (resync>>scale)*1000 will fit - * in a sector_t, and (max_blocks>>scale) will fit in a + * in a sector_t, and (max_sectors>>scale) will fit in a * u32, as those are the requirements for sector_div. * Thus 'scale' must be at least 10 */ scale = 10; if (sizeof(sector_t) > sizeof(unsigned long)) { - while ( max_blocks/2 > (1ULL<<(scale+32))) + while ( max_sectors/2 > (1ULL<<(scale+32))) scale++; } res = (resync>>scale)*1000; - sector_div(res, (u32)((max_blocks>>scale)+1)); + sector_div(res, (u32)((max_sectors>>scale)+1)); per_milli = res; { @@ -5747,25 +5764,35 @@ static void status_resync(struct seq_file *seq, mddev_t * mddev) (test_bit(MD_RECOVERY_SYNC, &mddev->recovery) ? "resync" : "recovery"))), per_milli/10, per_milli % 10, - (unsigned long long) resync, - (unsigned long long) max_blocks); + (unsigned long long) resync/2, + (unsigned long long) max_sectors/2); /* - * We do not want to overflow, so the order of operands and - * the * 100 / 100 trick are important. We do a +1 to be - * safe against division by zero. We only estimate anyway. - * * dt: time from mark until now * db: blocks written from mark until now * rt: remaining time + * + * rt is a sector_t, so could be 32bit or 64bit. + * So we divide before multiply in case it is 32bit and close + * to the limit. + * We scale the divisor (db) by 32 to avoid loosing precision + * near the end of resync when the number of remaining sectors + * is close to 'db'. + * We then divide rt by 32 after multiplying by db to compensate. + * The '+1' avoids division by zero if db is very small. */ dt = ((jiffies - mddev->resync_mark) / HZ); if (!dt) dt++; db = (mddev->curr_mark_cnt - atomic_read(&mddev->recovery_active)) - mddev->resync_mark_cnt; - rt = (dt * ((unsigned long)(max_blocks-resync) / (db/2/100+1)))/100; - seq_printf(seq, " finish=%lu.%lumin", rt / 60, (rt % 60)/6); + rt = max_sectors - resync; /* number of remaining sectors */ + sector_div(rt, db/32+1); + rt *= dt; + rt >>= 5; + + seq_printf(seq, " finish=%lu.%lumin", (unsigned long)rt / 60, + ((unsigned long)rt % 60)/6); seq_printf(seq, " speed=%ldK/sec", db/2/dt); } @@ -5956,7 +5983,7 @@ static int md_seq_show(struct seq_file *seq, void *v) return 0; } -static struct seq_operations md_seq_ops = { +static const struct seq_operations md_seq_ops = { .start = md_seq_start, .next = md_seq_next, .stop = md_seq_stop, @@ -6334,18 +6361,14 @@ void md_do_sync(mddev_t *mddev) sector_t sectors; skipped = 0; - if (j >= mddev->resync_max) { - sysfs_notify(&mddev->kobj, NULL, "sync_completed"); - wait_event(mddev->recovery_wait, - mddev->resync_max > j - || kthread_should_stop()); - } - if (kthread_should_stop()) - goto interrupted; - if (mddev->curr_resync > mddev->curr_resync_completed && - (mddev->curr_resync - mddev->curr_resync_completed) - > (max_sectors >> 4)) { + if (!test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) && + ((mddev->curr_resync > mddev->curr_resync_completed && + (mddev->curr_resync - mddev->curr_resync_completed) + > (max_sectors >> 4)) || + (j - mddev->curr_resync_completed)*2 + >= mddev->resync_max - mddev->curr_resync_completed + )) { /* time to update curr_resync_completed */ blk_unplug(mddev->queue); wait_event(mddev->recovery_wait, @@ -6353,7 +6376,17 @@ void md_do_sync(mddev_t *mddev) mddev->curr_resync_completed = mddev->curr_resync; set_bit(MD_CHANGE_CLEAN, &mddev->flags); + sysfs_notify(&mddev->kobj, NULL, "sync_completed"); } + + if (j >= mddev->resync_max) + wait_event(mddev->recovery_wait, + mddev->resync_max > j + || kthread_should_stop()); + + if (kthread_should_stop()) + goto interrupted; + sectors = mddev->pers->sync_request(mddev, j, &skipped, currspeed < speed_min(mddev)); if (sectors == 0) { @@ -6461,6 +6494,7 @@ void md_do_sync(mddev_t *mddev) skip: mddev->curr_resync = 0; + mddev->curr_resync_completed = 0; mddev->resync_min = 0; mddev->resync_max = MaxSector; sysfs_notify(&mddev->kobj, NULL, "sync_completed"); diff --git a/drivers/md/md.h b/drivers/md/md.h index e9b7f54c24d..8227ab909d4 100644 --- a/drivers/md/md.h +++ b/drivers/md/md.h @@ -12,10 +12,17 @@ Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ -#ifndef _MD_K_H -#define _MD_K_H - -#ifdef CONFIG_BLOCK +#ifndef _MD_MD_H +#define _MD_MD_H + +#include <linux/blkdev.h> +#include <linux/kobject.h> +#include <linux/list.h> +#include <linux/mm.h> +#include <linux/mutex.h> +#include <linux/timer.h> +#include <linux/wait.h> +#include <linux/workqueue.h> #define MaxSector (~(sector_t)0) @@ -408,10 +415,6 @@ static inline void safe_put_page(struct page *p) if (p) put_page(p); } -#endif /* CONFIG_BLOCK */ -#endif - - extern int register_md_personality(struct mdk_personality *p); extern int unregister_md_personality(struct mdk_personality *p); extern mdk_thread_t * md_register_thread(void (*run) (mddev_t *mddev), @@ -434,3 +437,5 @@ extern void md_new_event(mddev_t *mddev); extern int md_allow_write(mddev_t *mddev); extern void md_wait_for_blocked_rdev(mdk_rdev_t *rdev, mddev_t *mddev); extern void md_set_array_sectors(mddev_t *mddev, sector_t array_sectors); + +#endif /* _MD_MD_H */ diff --git a/drivers/md/multipath.c b/drivers/md/multipath.c index 41ced0cbe82..4ee31aa13c4 100644 --- a/drivers/md/multipath.c +++ b/drivers/md/multipath.c @@ -303,7 +303,7 @@ static int multipath_add_disk(mddev_t *mddev, mdk_rdev_t *rdev) * merge_bvec_fn will be involved in multipath.) */ if (q->merge_bvec_fn && - mddev->queue->max_sectors > (PAGE_SIZE>>9)) + queue_max_sectors(q) > (PAGE_SIZE>>9)) blk_queue_max_sectors(mddev->queue, PAGE_SIZE>>9); conf->working_disks++; @@ -467,7 +467,7 @@ static int multipath_run (mddev_t *mddev) * violating it, not that we ever expect a device with * a merge_bvec_fn to be involved in multipath */ if (rdev->bdev->bd_disk->queue->merge_bvec_fn && - mddev->queue->max_sectors > (PAGE_SIZE>>9)) + queue_max_sectors(mddev->queue) > (PAGE_SIZE>>9)) blk_queue_max_sectors(mddev->queue, PAGE_SIZE>>9); if (!test_bit(Faulty, &rdev->flags)) diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c index c08d7559be5..925507e7d67 100644 --- a/drivers/md/raid0.c +++ b/drivers/md/raid0.c @@ -144,7 +144,7 @@ static int create_strip_zones (mddev_t *mddev) */ if (rdev1->bdev->bd_disk->queue->merge_bvec_fn && - mddev->queue->max_sectors > (PAGE_SIZE>>9)) + queue_max_sectors(mddev->queue) > (PAGE_SIZE>>9)) blk_queue_max_sectors(mddev->queue, PAGE_SIZE>>9); if (!smallest || (rdev1->sectors < smallest->sectors)) diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index 36df9109cde..e23758b4a34 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -1130,7 +1130,7 @@ static int raid1_add_disk(mddev_t *mddev, mdk_rdev_t *rdev) * a one page request is never in violation. */ if (rdev->bdev->bd_disk->queue->merge_bvec_fn && - mddev->queue->max_sectors > (PAGE_SIZE>>9)) + queue_max_sectors(mddev->queue) > (PAGE_SIZE>>9)) blk_queue_max_sectors(mddev->queue, PAGE_SIZE>>9); p->head_position = 0; @@ -1996,7 +1996,7 @@ static int run(mddev_t *mddev) * a one page request is never in violation. */ if (rdev->bdev->bd_disk->queue->merge_bvec_fn && - mddev->queue->max_sectors > (PAGE_SIZE>>9)) + queue_max_sectors(mddev->queue) > (PAGE_SIZE>>9)) blk_queue_max_sectors(mddev->queue, PAGE_SIZE>>9); disk->head_position = 0; diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index 81a54f17417..750550c1166 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -1158,8 +1158,8 @@ static int raid10_add_disk(mddev_t *mddev, mdk_rdev_t *rdev) * a one page request is never in violation. */ if (rdev->bdev->bd_disk->queue->merge_bvec_fn && - mddev->queue->max_sectors > (PAGE_SIZE>>9)) - mddev->queue->max_sectors = (PAGE_SIZE>>9); + queue_max_sectors(mddev->queue) > (PAGE_SIZE>>9)) + blk_queue_max_sectors(mddev->queue, PAGE_SIZE>>9); p->head_position = 0; rdev->raid_disk = mirror; @@ -1809,17 +1809,17 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i r10_bio->sector = sect; raid10_find_phys(conf, r10_bio); - /* Need to check if this section will still be + + /* Need to check if the array will still be * degraded */ - for (j=0; j<conf->copies;j++) { - int d = r10_bio->devs[j].devnum; - if (conf->mirrors[d].rdev == NULL || - test_bit(Faulty, &conf->mirrors[d].rdev->flags)) { + for (j=0; j<conf->raid_disks; j++) + if (conf->mirrors[j].rdev == NULL || + test_bit(Faulty, &conf->mirrors[j].rdev->flags)) { still_degraded = 1; break; } - } + must_sync = bitmap_start_sync(mddev->bitmap, sect, &sync_blocks, still_degraded); @@ -2145,8 +2145,8 @@ static int run(mddev_t *mddev) * a one page request is never in violation. */ if (rdev->bdev->bd_disk->queue->merge_bvec_fn && - mddev->queue->max_sectors > (PAGE_SIZE>>9)) - mddev->queue->max_sectors = (PAGE_SIZE>>9); + queue_max_sectors(mddev->queue) > (PAGE_SIZE>>9)) + blk_queue_max_sectors(mddev->queue, PAGE_SIZE>>9); disk->head_position = 0; } diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 3bbc6d64704..bef87669823 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -362,7 +362,7 @@ static void raid5_unplug_device(struct request_queue *q); static struct stripe_head * get_active_stripe(raid5_conf_t *conf, sector_t sector, - int previous, int noblock) + int previous, int noblock, int noquiesce) { struct stripe_head *sh; @@ -372,7 +372,7 @@ get_active_stripe(raid5_conf_t *conf, sector_t sector, do { wait_event_lock_irq(conf->wait_for_stripe, - conf->quiesce == 0, + conf->quiesce == 0 || noquiesce, conf->device_lock, /* nothing */); sh = __find_stripe(conf, sector, conf->generation - previous); if (!sh) { @@ -2671,7 +2671,7 @@ static void handle_stripe_expansion(raid5_conf_t *conf, struct stripe_head *sh, sector_t bn = compute_blocknr(sh, i, 1); sector_t s = raid5_compute_sector(conf, bn, 0, &dd_idx, NULL); - sh2 = get_active_stripe(conf, s, 0, 1); + sh2 = get_active_stripe(conf, s, 0, 1, 1); if (sh2 == NULL) /* so far only the early blocks of this stripe * have been requested. When later blocks @@ -2944,7 +2944,7 @@ static bool handle_stripe5(struct stripe_head *sh) /* Finish reconstruct operations initiated by the expansion process */ if (sh->reconstruct_state == reconstruct_state_result) { struct stripe_head *sh2 - = get_active_stripe(conf, sh->sector, 1, 1); + = get_active_stripe(conf, sh->sector, 1, 1, 1); if (sh2 && test_bit(STRIPE_EXPAND_SOURCE, &sh2->state)) { /* sh cannot be written until sh2 has been read. * so arrange for sh to be delayed a little @@ -3189,7 +3189,7 @@ static bool handle_stripe6(struct stripe_head *sh, struct page *tmp_page) if (s.expanded && test_bit(STRIPE_EXPANDING, &sh->state)) { struct stripe_head *sh2 - = get_active_stripe(conf, sh->sector, 1, 1); + = get_active_stripe(conf, sh->sector, 1, 1, 1); if (sh2 && test_bit(STRIPE_EXPAND_SOURCE, &sh2->state)) { /* sh cannot be written until sh2 has been read. * so arrange for sh to be delayed a little @@ -3288,7 +3288,7 @@ static void unplug_slaves(mddev_t *mddev) int i; rcu_read_lock(); - for (i=0; i<mddev->raid_disks; i++) { + for (i = 0; i < conf->raid_disks; i++) { mdk_rdev_t *rdev = rcu_dereference(conf->disks[i].rdev); if (rdev && !test_bit(Faulty, &rdev->flags) && atomic_read(&rdev->nr_pending)) { struct request_queue *r_queue = bdev_get_queue(rdev->bdev); @@ -3463,10 +3463,10 @@ static int bio_fits_rdev(struct bio *bi) { struct request_queue *q = bdev_get_queue(bi->bi_bdev); - if ((bi->bi_size>>9) > q->max_sectors) + if ((bi->bi_size>>9) > queue_max_sectors(q)) return 0; blk_recount_segments(q, bi); - if (bi->bi_phys_segments > q->max_phys_segments) + if (bi->bi_phys_segments > queue_max_phys_segments(q)) return 0; if (q->merge_bvec_fn) @@ -3675,7 +3675,7 @@ static int make_request(struct request_queue *q, struct bio * bi) (unsigned long long)logical_sector); sh = get_active_stripe(conf, new_sector, previous, - (bi->bi_rw&RWA_MASK)); + (bi->bi_rw&RWA_MASK), 0); if (sh) { if (unlikely(previous)) { /* expansion might have moved on while waiting for a @@ -3811,13 +3811,13 @@ static sector_t reshape_request(mddev_t *mddev, sector_t sector_nr, int *skipped safepos = conf->reshape_safe; sector_div(safepos, data_disks); if (mddev->delta_disks < 0) { - writepos -= reshape_sectors; + writepos -= min_t(sector_t, reshape_sectors, writepos); readpos += reshape_sectors; safepos += reshape_sectors; } else { writepos += reshape_sectors; - readpos -= reshape_sectors; - safepos -= reshape_sectors; + readpos -= min_t(sector_t, reshape_sectors, readpos); + safepos -= min_t(sector_t, reshape_sectors, safepos); } /* 'writepos' is the most advanced device address we might write. @@ -3845,6 +3845,7 @@ static sector_t reshape_request(mddev_t *mddev, sector_t sector_nr, int *skipped wait_event(conf->wait_for_overlap, atomic_read(&conf->reshape_stripes)==0); mddev->reshape_position = conf->reshape_progress; + mddev->curr_resync_completed = mddev->curr_resync; conf->reshape_checkpoint = jiffies; set_bit(MD_CHANGE_DEVS, &mddev->flags); md_wakeup_thread(mddev->thread); @@ -3854,6 +3855,7 @@ static sector_t reshape_request(mddev_t *mddev, sector_t sector_nr, int *skipped conf->reshape_safe = mddev->reshape_position; spin_unlock_irq(&conf->device_lock); wake_up(&conf->wait_for_overlap); + sysfs_notify(&mddev->kobj, NULL, "sync_completed"); } if (mddev->delta_disks < 0) { @@ -3871,7 +3873,7 @@ static sector_t reshape_request(mddev_t *mddev, sector_t sector_nr, int *skipped for (i = 0; i < reshape_sectors; i += STRIPE_SECTORS) { int j; int skipped = 0; - sh = get_active_stripe(conf, stripe_addr+i, 0, 0); + sh = get_active_stripe(conf, stripe_addr+i, 0, 0, 1); set_bit(STRIPE_EXPANDING, &sh->state); atomic_inc(&conf->reshape_stripes); /* If any of this stripe is beyond the end of the old @@ -3914,13 +3916,13 @@ static sector_t reshape_request(mddev_t *mddev, sector_t sector_nr, int *skipped raid5_compute_sector(conf, stripe_addr*(new_data_disks), 1, &dd_idx, NULL); last_sector = - raid5_compute_sector(conf, ((stripe_addr+conf->chunk_size/512) + raid5_compute_sector(conf, ((stripe_addr+reshape_sectors) *(new_data_disks) - 1), 1, &dd_idx, NULL); if (last_sector >= mddev->dev_sectors) last_sector = mddev->dev_sectors - 1; while (first_sector <= last_sector) { - sh = get_active_stripe(conf, first_sector, 1, 0); + sh = get_active_stripe(conf, first_sector, 1, 0, 1); set_bit(STRIPE_EXPAND_SOURCE, &sh->state); set_bit(STRIPE_HANDLE, &sh->state); release_stripe(sh); @@ -3938,11 +3940,13 @@ static sector_t reshape_request(mddev_t *mddev, sector_t sector_nr, int *skipped * then we need to write out the superblock. */ sector_nr += reshape_sectors; - if (sector_nr >= mddev->resync_max) { + if ((sector_nr - mddev->curr_resync_completed) * 2 + >= mddev->resync_max - mddev->curr_resync_completed) { /* Cannot proceed until we've updated the superblock... */ wait_event(conf->wait_for_overlap, atomic_read(&conf->reshape_stripes) == 0); mddev->reshape_position = conf->reshape_progress; + mddev->curr_resync_completed = mddev->curr_resync; conf->reshape_checkpoint = jiffies; set_bit(MD_CHANGE_DEVS, &mddev->flags); md_wakeup_thread(mddev->thread); @@ -3953,6 +3957,7 @@ static sector_t reshape_request(mddev_t *mddev, sector_t sector_nr, int *skipped conf->reshape_safe = mddev->reshape_position; spin_unlock_irq(&conf->device_lock); wake_up(&conf->wait_for_overlap); + sysfs_notify(&mddev->kobj, NULL, "sync_completed"); } return reshape_sectors; } @@ -4017,9 +4022,9 @@ static inline sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *ski bitmap_cond_end_sync(mddev->bitmap, sector_nr); - sh = get_active_stripe(conf, sector_nr, 0, 1); + sh = get_active_stripe(conf, sector_nr, 0, 1, 0); if (sh == NULL) { - sh = get_active_stripe(conf, sector_nr, 0, 0); + sh = get_active_stripe(conf, sector_nr, 0, 0, 0); /* make sure we don't swamp the stripe cache if someone else * is trying to get access */ @@ -4029,7 +4034,7 @@ static inline sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *ski * We don't need to check the 'failed' flag as when that gets set, * recovery aborts. */ - for (i=0; i<mddev->raid_disks; i++) + for (i = 0; i < conf->raid_disks; i++) if (conf->disks[i].rdev == NULL) still_degraded = 1; @@ -4081,7 +4086,7 @@ static int retry_aligned_read(raid5_conf_t *conf, struct bio *raid_bio) /* already done this stripe */ continue; - sh = get_active_stripe(conf, sector, 0, 1); + sh = get_active_stripe(conf, sector, 0, 1, 0); if (!sh) { /* failed to get a stripe - must wait */ |