From 5037108acd4dc40c210321cc83b0bf8352eda95a Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Wed, 1 Oct 2008 14:39:17 +0100 Subject: dm: always allow one page in dm_merge_bvec Some callers assume they can always add at least one page to an empty bio, so dm_merge_bvec should not return 0 in this case: we'll reject the I/O later after the bio is submitted. Signed-off-by: Mikulas Patocka Signed-off-by: Alasdair G Kergon --- drivers/md/dm.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'drivers/md/dm.c') diff --git a/drivers/md/dm.c b/drivers/md/dm.c index bca448e1187..469cec54f37 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -837,10 +837,10 @@ static int dm_merge_bvec(struct request_queue *q, struct dm_table *map = dm_get_table(md); struct dm_target *ti; sector_t max_sectors; - int max_size; + int max_size = 0; if (unlikely(!map)) - return 0; + goto out; ti = dm_table_find_target(map, bvm->bi_sector); @@ -861,14 +861,15 @@ static int dm_merge_bvec(struct request_queue *q, if (max_size && ti->type->merge) max_size = ti->type->merge(ti, bvm, biovec, max_size); + dm_table_put(map); + +out: /* * Always allow an entire first page */ if (max_size <= biovec->bv_len && !(bvm->bi_size >> SECTOR_SHIFT)) max_size = biovec->bv_len; - dm_table_put(map); - return max_size; } -- cgit v1.2.3 From b01cd5ac43b00c49759c126c21e7d22c7e80b245 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Wed, 1 Oct 2008 14:39:24 +0100 Subject: dm: cope with access beyond end of device in dm_merge_bvec If for any reason dm_merge_bvec() is given an offset beyond the end of the device, avoid an oops and always allow one page to be added to an empty bio. We'll reject the I/O later after the bio is submitted. Signed-off-by: Mikulas Patocka Signed-off-by: Alasdair G Kergon --- drivers/md/dm.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'drivers/md/dm.c') diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 469cec54f37..ace998ce59f 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -843,6 +843,8 @@ static int dm_merge_bvec(struct request_queue *q, goto out; ti = dm_table_find_target(map, bvm->bi_sector); + if (!dm_target_is_valid(ti)) + goto out_table; /* * Find maximum amount of I/O that won't need splitting @@ -861,6 +863,7 @@ static int dm_merge_bvec(struct request_queue *q, if (max_size && ti->type->merge) max_size = ti->type->merge(ti, bvm, biovec, max_size); +out_table: dm_table_put(map); out: -- cgit v1.2.3 From f331c0296f2a9fee0d396a70598b954062603015 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 3 Sep 2008 09:01:48 +0200 Subject: block: don't depend on consecutive minor space * Implement disk_devt() and part_devt() and use them to directly access devt instead of computing it from ->major and ->first_minor. Note that all references to ->major and ->first_minor outside of block layer is used to determine devt of the disk (the part0) and as ->major and ->first_minor will continue to represent devt for the disk, converting these users aren't strictly necessary. However, convert them for consistency. * Implement disk_max_parts() to avoid directly deferencing genhd->minors. * Update bdget_disk() such that it doesn't assume consecutive minor space. * Move devt computation from register_disk() to add_disk() and make it the only one (all other usages use the initially determined value). These changes clean up the code and will help disk->part dereference fix and extended block device numbers. Signed-off-by: Tejun Heo Signed-off-by: Jens Axboe --- drivers/md/dm.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'drivers/md/dm.c') diff --git a/drivers/md/dm.c b/drivers/md/dm.c index ace998ce59f..a78caad2999 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -1146,7 +1146,7 @@ static void unlock_fs(struct mapped_device *md); static void free_dev(struct mapped_device *md) { - int minor = md->disk->first_minor; + int minor = MINOR(disk_devt(md->disk)); if (md->suspended_bdev) { unlock_fs(md); @@ -1267,7 +1267,7 @@ static struct mapped_device *dm_find_md(dev_t dev) md = idr_find(&_minor_idr, minor); if (md && (md == MINOR_ALLOCED || - (dm_disk(md)->first_minor != minor) || + (MINOR(disk_devt(dm_disk(md))) != minor) || test_bit(DMF_FREEING, &md->flags))) { md = NULL; goto out; @@ -1318,7 +1318,8 @@ void dm_put(struct mapped_device *md) if (atomic_dec_and_lock(&md->holders, &_minor_lock)) { map = dm_get_table(md); - idr_replace(&_minor_idr, MINOR_ALLOCED, dm_disk(md)->first_minor); + idr_replace(&_minor_idr, MINOR_ALLOCED, + MINOR(disk_devt(dm_disk(md)))); set_bit(DMF_FREEING, &md->flags); spin_unlock(&_minor_lock); if (!dm_suspended(md)) { -- cgit v1.2.3 From c9959059161ddd7bf4670cf47367033d6b2f79c4 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 25 Aug 2008 19:47:21 +0900 Subject: block: fix diskstats access There are two variants of stat functions - ones prefixed with double underbars which don't care about preemption and ones without which disable preemption before manipulating per-cpu counters. It's unclear whether the underbarred ones assume that preemtion is disabled on entry as some callers don't do that. This patch unifies diskstats access by implementing disk_stat_lock() and disk_stat_unlock() which take care of both RCU (for partition access) and preemption (for per-cpu counter access). diskstats access should always be enclosed between the two functions. As such, there's no need for the versions which disables preemption. They're removed and double underbars ones are renamed to drop the underbars. As an extra argument is added, there's no danger of using the old version unconverted. disk_stat_lock() uses get_cpu() and returns the cpu index and all diskstat functions which access per-cpu counters now has @cpu argument to help RT. This change adds RCU or preemption operations at some places but also collapses several preemption ops into one at others. Overall, the performance difference should be negligible as all involved ops are very lightweight per-cpu ones. Signed-off-by: Tejun Heo Cc: Peter Zijlstra Signed-off-by: Jens Axboe --- drivers/md/dm.c | 26 +++++++++++++++----------- 1 file changed, 15 insertions(+), 11 deletions(-) (limited to 'drivers/md/dm.c') diff --git a/drivers/md/dm.c b/drivers/md/dm.c index a78caad2999..653624792ea 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -377,12 +377,13 @@ static void free_tio(struct mapped_device *md, struct dm_target_io *tio) static void start_io_acct(struct dm_io *io) { struct mapped_device *md = io->md; + int cpu; io->start_time = jiffies; - preempt_disable(); - disk_round_stats(dm_disk(md)); - preempt_enable(); + cpu = disk_stat_lock(); + disk_round_stats(cpu, dm_disk(md)); + disk_stat_unlock(); dm_disk(md)->in_flight = atomic_inc_return(&md->pending); } @@ -391,15 +392,15 @@ static int end_io_acct(struct dm_io *io) struct mapped_device *md = io->md; struct bio *bio = io->bio; unsigned long duration = jiffies - io->start_time; - int pending; + int pending, cpu; int rw = bio_data_dir(bio); - preempt_disable(); - disk_round_stats(dm_disk(md)); - preempt_enable(); - dm_disk(md)->in_flight = pending = atomic_dec_return(&md->pending); + cpu = disk_stat_lock(); + disk_round_stats(cpu, dm_disk(md)); + disk_stat_add(cpu, dm_disk(md), ticks[rw], duration); + disk_stat_unlock(); - disk_stat_add(dm_disk(md), ticks[rw], duration); + dm_disk(md)->in_flight = pending = atomic_dec_return(&md->pending); return !pending; } @@ -885,6 +886,7 @@ static int dm_request(struct request_queue *q, struct bio *bio) int r = -EIO; int rw = bio_data_dir(bio); struct mapped_device *md = q->queuedata; + int cpu; /* * There is no use in forwarding any barrier request since we can't @@ -897,8 +899,10 @@ static int dm_request(struct request_queue *q, struct bio *bio) down_read(&md->io_lock); - disk_stat_inc(dm_disk(md), ios[rw]); - disk_stat_add(dm_disk(md), sectors[rw], bio_sectors(bio)); + cpu = disk_stat_lock(); + disk_stat_inc(cpu, dm_disk(md), ios[rw]); + disk_stat_add(cpu, dm_disk(md), sectors[rw], bio_sectors(bio)); + disk_stat_unlock(); /* * If we're suspended we have to queue -- cgit v1.2.3 From ed9e1982347b36573cd622ee5f4e2a7ccd79b3fd Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 25 Aug 2008 19:56:05 +0900 Subject: block: implement and use {disk|part}_to_dev() Implement {disk|part}_to_dev() and use them to access generic device instead of directly dereferencing {disk|part}->dev. To make sure no user is left behind, rename generic devices fields to __dev. This is in preparation of unifying partition 0 handling with other partitions. Signed-off-by: Tejun Heo Signed-off-by: Jens Axboe --- drivers/md/dm.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/md/dm.c') diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 653624792ea..637806695bb 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -1186,7 +1186,7 @@ static void event_callback(void *context) list_splice_init(&md->uevent_list, &uevents); spin_unlock_irqrestore(&md->uevent_lock, flags); - dm_send_uevents(&uevents, &md->disk->dev.kobj); + dm_send_uevents(&uevents, &disk_to_dev(md->disk)->kobj); atomic_inc(&md->event_nr); wake_up(&md->eventq); @@ -1643,7 +1643,7 @@ out: *---------------------------------------------------------------*/ void dm_kobject_uevent(struct mapped_device *md) { - kobject_uevent(&md->disk->dev.kobj, KOBJ_CHANGE); + kobject_uevent(&disk_to_dev(md->disk)->kobj, KOBJ_CHANGE); } uint32_t dm_next_uevent_seq(struct mapped_device *md) -- cgit v1.2.3 From 074a7aca7afa6f230104e8e65eba3420263714a5 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 25 Aug 2008 19:56:14 +0900 Subject: block: move stats from disk to part0 Move stats related fields - stamp, in_flight, dkstats - from disk to part0 and unify stat handling such that... * part_stat_*() now updates part0 together if the specified partition is not part0. ie. part_stat_*() are now essentially all_stat_*(). * {disk|all}_stat_*() are gone. * part_round_stats() is updated similary. It handles part0 stats automatically and disk_round_stats() is killed. * part_{inc|dec}_in_fligh() is implemented which automatically updates part0 stats for parts other than part0. * disk_map_sector_rcu() is updated to return part0 if no part matches. Combined with the above changes, this makes NULL special case handling in callers unnecessary. * Separate stats show code paths for disk are collapsed into part stats show code paths. * Rename disk_stat_lock/unlock() to part_stat_lock/unlock() While at it, reposition stat handling macros a bit and add missing parentheses around macro parameters. Signed-off-by: Tejun Heo Signed-off-by: Jens Axboe --- drivers/md/dm.c | 27 ++++++++++++++------------- 1 file changed, 14 insertions(+), 13 deletions(-) (limited to 'drivers/md/dm.c') diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 637806695bb..327de03a5bd 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -381,10 +381,10 @@ static void start_io_acct(struct dm_io *io) io->start_time = jiffies; - cpu = disk_stat_lock(); - disk_round_stats(cpu, dm_disk(md)); - disk_stat_unlock(); - dm_disk(md)->in_flight = atomic_inc_return(&md->pending); + cpu = part_stat_lock(); + part_round_stats(cpu, &dm_disk(md)->part0); + part_stat_unlock(); + dm_disk(md)->part0.in_flight = atomic_inc_return(&md->pending); } static int end_io_acct(struct dm_io *io) @@ -395,12 +395,13 @@ static int end_io_acct(struct dm_io *io) int pending, cpu; int rw = bio_data_dir(bio); - cpu = disk_stat_lock(); - disk_round_stats(cpu, dm_disk(md)); - disk_stat_add(cpu, dm_disk(md), ticks[rw], duration); - disk_stat_unlock(); + cpu = part_stat_lock(); + part_round_stats(cpu, &dm_disk(md)->part0); + part_stat_add(cpu, &dm_disk(md)->part0, ticks[rw], duration); + part_stat_unlock(); - dm_disk(md)->in_flight = pending = atomic_dec_return(&md->pending); + dm_disk(md)->part0.in_flight = pending = + atomic_dec_return(&md->pending); return !pending; } @@ -899,10 +900,10 @@ static int dm_request(struct request_queue *q, struct bio *bio) down_read(&md->io_lock); - cpu = disk_stat_lock(); - disk_stat_inc(cpu, dm_disk(md), ios[rw]); - disk_stat_add(cpu, dm_disk(md), sectors[rw], bio_sectors(bio)); - disk_stat_unlock(); + cpu = part_stat_lock(); + part_stat_inc(cpu, &dm_disk(md)->part0, ios[rw]); + part_stat_add(cpu, &dm_disk(md)->part0, sectors[rw], bio_sectors(bio)); + part_stat_unlock(); /* * If we're suspended we have to queue -- cgit v1.2.3