From cf771cb5a7b716f3f9e532fd42a1e3a0a75adec5 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 3 Sep 2008 09:01:09 +0200 Subject: block: make variable and argument names more consistent In hd_struct, @partno is used to denote partition number and a number of other places use @part to denote hd_struct. Functions use @part and @index instead. This causes confusion and makes it difficult to use consistent variable names for hd_struct. Always use @partno if a variable represents partition number. Also, print out functions use @f or @part for seq_file argument. Use @seqf uniformly instead. Signed-off-by: Tejun Heo Signed-off-by: Jens Axboe --- fs/block_dev.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'fs/block_dev.c') diff --git a/fs/block_dev.c b/fs/block_dev.c index aff54219e04..de0776cd721 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -930,7 +930,7 @@ static int do_open(struct block_device *bdev, struct file *file, int for_part) struct module *owner = NULL; struct gendisk *disk; int ret; - int part; + int partno; int perm = 0; if (file->f_mode & FMODE_READ) @@ -949,7 +949,7 @@ static int do_open(struct block_device *bdev, struct file *file, int for_part) ret = -ENXIO; file->f_mapping = bdev->bd_inode->i_mapping; lock_kernel(); - disk = get_gendisk(bdev->bd_dev, &part); + disk = get_gendisk(bdev->bd_dev, &partno); if (!disk) { unlock_kernel(); bdput(bdev); @@ -961,7 +961,7 @@ static int do_open(struct block_device *bdev, struct file *file, int for_part) if (!bdev->bd_openers) { bdev->bd_disk = disk; bdev->bd_contains = bdev; - if (!part) { + if (!partno) { struct backing_dev_info *bdi; if (disk->fops->open) { ret = disk->fops->open(bdev->bd_inode, file); @@ -989,7 +989,7 @@ static int do_open(struct block_device *bdev, struct file *file, int for_part) if (ret) goto out_first; bdev->bd_contains = whole; - p = disk->part[part - 1]; + p = disk->part[partno - 1]; bdev->bd_inode->i_data.backing_dev_info = whole->bd_inode->i_data.backing_dev_info; if (!(disk->flags & GENHD_FL_UP) || !p || !p->nr_sects) { -- cgit v1.2.3 From f331c0296f2a9fee0d396a70598b954062603015 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 3 Sep 2008 09:01:48 +0200 Subject: block: don't depend on consecutive minor space * Implement disk_devt() and part_devt() and use them to directly access devt instead of computing it from ->major and ->first_minor. Note that all references to ->major and ->first_minor outside of block layer is used to determine devt of the disk (the part0) and as ->major and ->first_minor will continue to represent devt for the disk, converting these users aren't strictly necessary. However, convert them for consistency. * Implement disk_max_parts() to avoid directly deferencing genhd->minors. * Update bdget_disk() such that it doesn't assume consecutive minor space. * Move devt computation from register_disk() to add_disk() and make it the only one (all other usages use the initially determined value). These changes clean up the code and will help disk->part dereference fix and extended block device numbers. Signed-off-by: Tejun Heo Signed-off-by: Jens Axboe --- fs/block_dev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/block_dev.c') diff --git a/fs/block_dev.c b/fs/block_dev.c index de0776cd721..72e0a2887cb 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -892,7 +892,7 @@ int check_disk_change(struct block_device *bdev) if (bdops->revalidate_disk) bdops->revalidate_disk(bdev->bd_disk); - if (bdev->bd_disk->minors > 1) + if (disk_max_parts(bdev->bd_disk)) bdev->bd_invalidated = 1; return 1; } -- cgit v1.2.3 From e71bf0d0ee89e51b92776391c5634938236977d5 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 3 Sep 2008 09:03:02 +0200 Subject: block: fix disk->part[] dereferencing race disk->part[] is protected by its matching bdev's lock. However, non-critical accesses like collecting stats and printing out sysfs and proc information used to be performed without any locking. As partitions can come and go dynamically, partitions can go away underneath those non-critical accesses. As some of those accesses are writes, this theoretically can lead to silent corruption. This patch fixes the race by using RCU for the partition array and dev reference counter to hold partitions. * Rename disk->part[] to disk->__part[] to make sure no one outside genhd layer proper accesses it directly. * Use RCU for disk->__part[] dereferencing. * Implement disk_{get|put}_part() which can be used to get and put partitions from gendisk respectively. * Iterators are implemented to help iterate through all partitions safely. * Functions which require RCU readlock are marked with _rcu suffix. * Use disk_put_part() in __blkdev_put() instead of directly putting the contained kobject. Signed-off-by: Tejun Heo Signed-off-by: Jens Axboe --- fs/block_dev.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) (limited to 'fs/block_dev.c') diff --git a/fs/block_dev.c b/fs/block_dev.c index 72e0a2887cb..2f2873b9a04 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -929,6 +929,7 @@ static int do_open(struct block_device *bdev, struct file *file, int for_part) { struct module *owner = NULL; struct gendisk *disk; + struct hd_struct *part = NULL; int ret; int partno; int perm = 0; @@ -978,7 +979,6 @@ static int do_open(struct block_device *bdev, struct file *file, int for_part) if (bdev->bd_invalidated) rescan_partitions(disk, bdev); } else { - struct hd_struct *p; struct block_device *whole; whole = bdget_disk(disk, 0); ret = -ENOMEM; @@ -989,16 +989,16 @@ static int do_open(struct block_device *bdev, struct file *file, int for_part) if (ret) goto out_first; bdev->bd_contains = whole; - p = disk->part[partno - 1]; + part = disk_get_part(disk, partno); bdev->bd_inode->i_data.backing_dev_info = whole->bd_inode->i_data.backing_dev_info; - if (!(disk->flags & GENHD_FL_UP) || !p || !p->nr_sects) { + if (!(disk->flags & GENHD_FL_UP) || + !part || !part->nr_sects) { ret = -ENXIO; goto out_first; } - kobject_get(&p->dev.kobj); - bdev->bd_part = p; - bd_set_size(bdev, (loff_t) p->nr_sects << 9); + bdev->bd_part = part; + bd_set_size(bdev, (loff_t)part->nr_sects << 9); } } else { put_disk(disk); @@ -1027,6 +1027,7 @@ out_first: __blkdev_put(bdev->bd_contains, 1); bdev->bd_contains = NULL; put_disk(disk); + disk_put_part(part); module_put(owner); out: mutex_unlock(&bdev->bd_mutex); @@ -1119,7 +1120,7 @@ static int __blkdev_put(struct block_device *bdev, int for_part) module_put(owner); if (bdev->bd_contains != bdev) { - kobject_put(&bdev->bd_part->dev.kobj); + disk_put_part(bdev->bd_part); bdev->bd_part = NULL; } bdev->bd_disk = NULL; -- cgit v1.2.3 From ed9e1982347b36573cd622ee5f4e2a7ccd79b3fd Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 25 Aug 2008 19:56:05 +0900 Subject: block: implement and use {disk|part}_to_dev() Implement {disk|part}_to_dev() and use them to access generic device instead of directly dereferencing {disk|part}->dev. To make sure no user is left behind, rename generic devices fields to __dev. This is in preparation of unifying partition 0 handling with other partitions. Signed-off-by: Tejun Heo Signed-off-by: Jens Axboe --- fs/block_dev.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs/block_dev.c') diff --git a/fs/block_dev.c b/fs/block_dev.c index 2f2873b9a04..a02df22f37c 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -543,9 +543,9 @@ EXPORT_SYMBOL(bd_release); static struct kobject *bdev_get_kobj(struct block_device *bdev) { if (bdev->bd_contains != bdev) - return kobject_get(&bdev->bd_part->dev.kobj); + return kobject_get(&part_to_dev(bdev->bd_part)->kobj); else - return kobject_get(&bdev->bd_disk->dev.kobj); + return kobject_get(&disk_to_dev(bdev->bd_disk)->kobj); } static struct kobject *bdev_get_holder(struct block_device *bdev) -- cgit v1.2.3 From b5d0b9df0ba5d9a044f3a21e7544f53d90bd1465 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 3 Sep 2008 09:06:42 +0200 Subject: block: introduce partition 0 genhd and partition code handled disk and partitions separately. All information about the whole disk was in struct genhd and partitions in struct hd_struct. However, the whole disk (part0) and other partitions have a lot in common and the data structures end up having good number of common fields and thus separate code paths doing the same thing. Also, the partition array was indexed by partno - 1 which gets pretty confusing at times. This patch introduces partition 0 and makes the partition array indexed by partno. Following patches will unify the handling of disk and parts piece-by-piece. This patch also implements disk_partitionable() which tests whether a disk is partitionable. With coming dynamic partition array change, the most common usage of disk_max_parts() will be testing whether a disk is partitionable and the number of max partitions will become much less important. Signed-off-by: Tejun Heo Signed-off-by: Jens Axboe --- fs/block_dev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/block_dev.c') diff --git a/fs/block_dev.c b/fs/block_dev.c index a02df22f37c..c982a910797 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -892,7 +892,7 @@ int check_disk_change(struct block_device *bdev) if (bdops->revalidate_disk) bdops->revalidate_disk(bdev->bd_disk); - if (disk_max_parts(bdev->bd_disk)) + if (disk_partitionable(bdev->bd_disk)) bdev->bd_invalidated = 1; return 1; } -- cgit v1.2.3 From 4c46501d1659475dc6c89554af6ce7fe6ecf615c Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 25 Aug 2008 19:56:11 +0900 Subject: block: move holder_dir from disk to part0 Move disk->holder_dir to part0->holder_dir. Kill now mostly superflous bdev_get_holder(). While at it, kill superflous kobject_get/put() around holder_dir, slave_dir and cmd_filter creation and collapse disk_sysfs_add_subdirs() into register_disk(). These serve no purpose but obfuscating the code. Signed-off-by: Tejun Heo Signed-off-by: Jens Axboe --- fs/block_dev.c | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) (limited to 'fs/block_dev.c') diff --git a/fs/block_dev.c b/fs/block_dev.c index c982a910797..57d57264285 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -548,14 +548,6 @@ static struct kobject *bdev_get_kobj(struct block_device *bdev) return kobject_get(&disk_to_dev(bdev->bd_disk)->kobj); } -static struct kobject *bdev_get_holder(struct block_device *bdev) -{ - if (bdev->bd_contains != bdev) - return kobject_get(bdev->bd_part->holder_dir); - else - return kobject_get(bdev->bd_disk->holder_dir); -} - static int add_symlink(struct kobject *from, struct kobject *to) { if (!from || !to) @@ -608,7 +600,7 @@ static int bd_holder_grab_dirs(struct block_device *bdev, if (!bo->sdev) goto fail_put_hdev; - bo->hdir = bdev_get_holder(bdev); + bo->hdir = kobject_get(bdev->bd_part->holder_dir); if (!bo->hdir) goto fail_put_sdev; -- cgit v1.2.3 From 0762b8bde9729f10f8e6249809660ff2ec3ad735 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 25 Aug 2008 19:56:12 +0900 Subject: block: always set bdev->bd_part Till now, bdev->bd_part is set only if the bdev was for parts other than part0. This patch makes bdev->bd_part always set so that code paths don't have to differenciate common handling. Signed-off-by: Tejun Heo Signed-off-by: Jens Axboe --- fs/block_dev.c | 67 +++++++++++++++++++++++++++------------------------------- 1 file changed, 31 insertions(+), 36 deletions(-) (limited to 'fs/block_dev.c') diff --git a/fs/block_dev.c b/fs/block_dev.c index 57d57264285..c3fa19bd64d 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -540,14 +540,6 @@ EXPORT_SYMBOL(bd_release); * /sys/block/sda/holders/dm-0 --> /sys/block/dm-0 */ -static struct kobject *bdev_get_kobj(struct block_device *bdev) -{ - if (bdev->bd_contains != bdev) - return kobject_get(&part_to_dev(bdev->bd_part)->kobj); - else - return kobject_get(&disk_to_dev(bdev->bd_disk)->kobj); -} - static int add_symlink(struct kobject *from, struct kobject *to) { if (!from || !to) @@ -596,7 +588,7 @@ static int bd_holder_grab_dirs(struct block_device *bdev, if (!bo->hdev) goto fail_put_sdir; - bo->sdev = bdev_get_kobj(bdev); + bo->sdev = kobject_get(&part_to_dev(bdev->bd_part)->kobj); if (!bo->sdev) goto fail_put_hdev; @@ -919,7 +911,6 @@ static int __blkdev_put(struct block_device *bdev, int for_part); static int do_open(struct block_device *bdev, struct file *file, int for_part) { - struct module *owner = NULL; struct gendisk *disk; struct hd_struct *part = NULL; int ret; @@ -941,25 +932,27 @@ static int do_open(struct block_device *bdev, struct file *file, int for_part) ret = -ENXIO; file->f_mapping = bdev->bd_inode->i_mapping; + lock_kernel(); + disk = get_gendisk(bdev->bd_dev, &partno); - if (!disk) { - unlock_kernel(); - bdput(bdev); - return ret; - } - owner = disk->fops->owner; + if (!disk) + goto out_unlock_kernel; + part = disk_get_part(disk, partno); + if (!part) + goto out_unlock_kernel; mutex_lock_nested(&bdev->bd_mutex, for_part); if (!bdev->bd_openers) { bdev->bd_disk = disk; + bdev->bd_part = part; bdev->bd_contains = bdev; if (!partno) { struct backing_dev_info *bdi; if (disk->fops->open) { ret = disk->fops->open(bdev->bd_inode, file); if (ret) - goto out_first; + goto out_clear; } if (!bdev->bd_openers) { bd_set_size(bdev,(loff_t)get_capacity(disk)<<9); @@ -975,31 +968,32 @@ static int do_open(struct block_device *bdev, struct file *file, int for_part) whole = bdget_disk(disk, 0); ret = -ENOMEM; if (!whole) - goto out_first; + goto out_clear; BUG_ON(for_part); ret = __blkdev_get(whole, file->f_mode, file->f_flags, 1); if (ret) - goto out_first; + goto out_clear; bdev->bd_contains = whole; - part = disk_get_part(disk, partno); bdev->bd_inode->i_data.backing_dev_info = whole->bd_inode->i_data.backing_dev_info; if (!(disk->flags & GENHD_FL_UP) || !part || !part->nr_sects) { ret = -ENXIO; - goto out_first; + goto out_clear; } - bdev->bd_part = part; bd_set_size(bdev, (loff_t)part->nr_sects << 9); } } else { + disk_put_part(part); put_disk(disk); - module_put(owner); + module_put(disk->fops->owner); + part = NULL; + disk = NULL; if (bdev->bd_contains == bdev) { if (bdev->bd_disk->fops->open) { ret = bdev->bd_disk->fops->open(bdev->bd_inode, file); if (ret) - goto out; + goto out_unlock_bdev; } if (bdev->bd_invalidated) rescan_partitions(bdev->bd_disk, bdev); @@ -1012,20 +1006,24 @@ static int do_open(struct block_device *bdev, struct file *file, int for_part) unlock_kernel(); return 0; -out_first: + out_clear: bdev->bd_disk = NULL; + bdev->bd_part = NULL; bdev->bd_inode->i_data.backing_dev_info = &default_backing_dev_info; if (bdev != bdev->bd_contains) __blkdev_put(bdev->bd_contains, 1); bdev->bd_contains = NULL; - put_disk(disk); - disk_put_part(part); - module_put(owner); -out: + out_unlock_bdev: mutex_unlock(&bdev->bd_mutex); + out_unlock_kernel: unlock_kernel(); - if (ret) - bdput(bdev); + + disk_put_part(part); + if (disk) + module_put(disk->fops->owner); + put_disk(disk); + bdput(bdev); + return ret; } @@ -1110,11 +1108,8 @@ static int __blkdev_put(struct block_device *bdev, int for_part) put_disk(disk); module_put(owner); - - if (bdev->bd_contains != bdev) { - disk_put_part(bdev->bd_part); - bdev->bd_part = NULL; - } + disk_put_part(bdev->bd_part); + bdev->bd_part = NULL; bdev->bd_disk = NULL; bdev->bd_inode->i_data.backing_dev_info = &default_backing_dev_info; if (bdev != bdev->bd_contains) -- cgit v1.2.3 From 0c002c2f74e10baa9021d3ecc50585c6eafea568 Mon Sep 17 00:00:00 2001 From: Andrew Patterson Date: Thu, 4 Sep 2008 14:27:20 -0600 Subject: Wrapper for lower-level revalidate_disk routines. This is a wrapper for the lower-level revalidate_disk call-backs such as sd_revalidate_disk(). It allows us to perform pre and post operations when calling them. We will use this wrapper in a later patch to adjust block device sizes after an online resize (a _post_ operation). Signed-off-by: Andrew Patterson Signed-off-by: Jens Axboe --- fs/block_dev.c | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) (limited to 'fs/block_dev.c') diff --git a/fs/block_dev.c b/fs/block_dev.c index c3fa19bd64d..4eeb69a8873 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -852,6 +852,27 @@ struct block_device *open_by_devnum(dev_t dev, unsigned mode) EXPORT_SYMBOL(open_by_devnum); +/** + * revalidate_disk - wrapper for lower-level driver's revalidate_disk + * call-back + * + * @disk: struct gendisk to be revalidated + * + * This routine is a wrapper for lower-level driver's revalidate_disk + * call-backs. It is used to do common pre and post operations needed + * for all revalidate_disk operations. + */ +int revalidate_disk(struct gendisk *disk) +{ + int ret = 0; + + if (disk->fops->revalidate_disk) + ret = disk->fops->revalidate_disk(disk); + + return ret; +} +EXPORT_SYMBOL(revalidate_disk); + /* * This routine checks whether a removable media has been changed, * and invalidates all buffer-cache-entries in that case. This -- cgit v1.2.3 From c3279d1454cdfed02a557d789d8a6d08ab4cbe70 Mon Sep 17 00:00:00 2001 From: Andrew Patterson Date: Thu, 4 Sep 2008 14:27:25 -0600 Subject: Adjust block device size after an online resize of a disk. The revalidate_disk routine now checks if a disk has been resized by comparing the gendisk capacity to the bdev inode size. If they are different (usually because the disk has been resized underneath the kernel) the bdev inode size is adjusted to match the capacity. Signed-off-by: Andrew Patterson Signed-off-by: Jens Axboe --- fs/block_dev.c | 37 +++++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) (limited to 'fs/block_dev.c') diff --git a/fs/block_dev.c b/fs/block_dev.c index 4eeb69a8873..b721955d382 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -852,6 +852,34 @@ struct block_device *open_by_devnum(dev_t dev, unsigned mode) EXPORT_SYMBOL(open_by_devnum); +/** + * check_disk_size_change - checks for disk size change and adjusts + * bdev size. + * + * @disk: struct gendisk to check + * @bdev: struct bdev to adjust. + * + * This routine checks to see if the bdev size does not match the disk size + * and adjusts it if it differs. + */ +void check_disk_size_change(struct gendisk *disk, struct block_device *bdev) +{ + loff_t disk_size, bdev_size; + + disk_size = (loff_t)get_capacity(disk) << 9; + bdev_size = i_size_read(bdev->bd_inode); + if (disk_size != bdev_size) { + char name[BDEVNAME_SIZE]; + + disk_name(disk, 0, name); + printk(KERN_INFO + "%s: detected capacity change from %lld to %lld\n", + name, bdev_size, disk_size); + i_size_write(bdev->bd_inode, disk_size); + } +} +EXPORT_SYMBOL(check_disk_size_change); + /** * revalidate_disk - wrapper for lower-level driver's revalidate_disk * call-back @@ -864,11 +892,20 @@ EXPORT_SYMBOL(open_by_devnum); */ int revalidate_disk(struct gendisk *disk) { + struct block_device *bdev; int ret = 0; if (disk->fops->revalidate_disk) ret = disk->fops->revalidate_disk(disk); + bdev = bdget_disk(disk, 0); + if (!bdev) + return ret; + + mutex_lock(&bdev->bd_mutex); + check_disk_size_change(disk, bdev); + mutex_unlock(&bdev->bd_mutex); + bdput(bdev); return ret; } EXPORT_SYMBOL(revalidate_disk); -- cgit v1.2.3 From 56ade44b46780fa291fa68b824f1dafdcb11b0ca Mon Sep 17 00:00:00 2001 From: Andrew Patterson Date: Thu, 4 Sep 2008 14:27:40 -0600 Subject: Added flush_disk to factor out common buffer cache flushing code. We need to be able to flush the buffer cache for for more than just when a disk is changed, so we factor out common cache flush code in check_disk_change() to an internal flush_disk() routine. This routine will then be used for both disk changes and disk resizes (in a later patch). Include the disk name in the text indicating that there are busy inodes on the device and increase the KERN severity of the message. Signed-off-by: Andrew Patterson Signed-off-by: Jens Axboe --- fs/block_dev.c | 32 +++++++++++++++++++++++++++----- 1 file changed, 27 insertions(+), 5 deletions(-) (limited to 'fs/block_dev.c') diff --git a/fs/block_dev.c b/fs/block_dev.c index b721955d382..33650fc537c 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -852,6 +852,32 @@ struct block_device *open_by_devnum(dev_t dev, unsigned mode) EXPORT_SYMBOL(open_by_devnum); +/** + * flush_disk - invalidates all buffer-cache entries on a disk + * + * @bdev: struct block device to be flushed + * + * Invalidates all buffer-cache entries on a disk. It should be called + * when a disk has been changed -- either by a media change or online + * resize. + */ +static void flush_disk(struct block_device *bdev) +{ + if (__invalidate_device(bdev)) { + char name[BDEVNAME_SIZE] = ""; + + if (bdev->bd_disk) + disk_name(bdev->bd_disk, 0, name); + printk(KERN_WARNING "VFS: busy inodes on changed media or " + "resized disk %s\n", name); + } + + if (!bdev->bd_disk) + return; + if (disk_partitionable(bdev->bd_disk)) + bdev->bd_invalidated = 1; +} + /** * check_disk_size_change - checks for disk size change and adjusts * bdev size. @@ -929,13 +955,9 @@ int check_disk_change(struct block_device *bdev) if (!bdops->media_changed(bdev->bd_disk)) return 0; - if (__invalidate_device(bdev)) - printk("VFS: busy inodes on changed media.\n"); - + flush_disk(bdev); if (bdops->revalidate_disk) bdops->revalidate_disk(bdev->bd_disk); - if (disk_partitionable(bdev->bd_disk)) - bdev->bd_invalidated = 1; return 1; } -- cgit v1.2.3 From 608aeef17a91747d6303de4df5e2c2e6899a95e8 Mon Sep 17 00:00:00 2001 From: Andrew Patterson Date: Thu, 4 Sep 2008 14:27:45 -0600 Subject: Call flush_disk() after detecting an online resize. We call flush_disk() to make sure the buffer cache for the disk is flushed after a disk resize. There are two resize cases, growing and shrinking. Given that users can shrink/then grow a disk before revalidate_disk() is called, we treat the grow case identically to shrinking. We need to flush the buffer cache after an online shrink because, as James Bottomley puts it, The two use cases for shrinking I can see are 1. planned: the fs is already shrunk to within the new boundaries and all data is relocated, so invalidate is fine (any dirty buffers that might exist in the shrunk region are there only because they were relocated but not yet written to their original location). 2. unplanned: In this case, the fs is probably toast, so whether we invalidate or not isn't going to make a whole lot of difference; it's still going to try to read or write from sectors beyond the new size and get I/O errors. Immediately invalidating shrunk disks will cause errors for outstanding I/Os for reads/write beyond the new end of the disk to be generated earlier then if we waited for the normal buffer cache operation. It also removes a potential security hole where we might keep old data around from beyond the end of the shrunk disk if the disk was not invalidated. Signed-off-by: Andrew Patterson Signed-off-by: Jens Axboe --- fs/block_dev.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs/block_dev.c') diff --git a/fs/block_dev.c b/fs/block_dev.c index 33650fc537c..57e2786dd2a 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -902,6 +902,7 @@ void check_disk_size_change(struct gendisk *disk, struct block_device *bdev) "%s: detected capacity change from %lld to %lld\n", name, bdev_size, disk_size); i_size_write(bdev->bd_inode, disk_size); + flush_disk(bdev); } } EXPORT_SYMBOL(check_disk_size_change); -- cgit v1.2.3 From 57d1b5366f46fe434e565b710baf683daff78dd8 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Thu, 9 Oct 2008 10:42:38 +0200 Subject: block_dev: fix kernel-doc in new functions Fix kernel-doc in new functions: Error(mmotm-2008-1002-1617//fs/block_dev.c:895): duplicate section name 'Description' Error(mmotm-2008-1002-1617//fs/block_dev.c:924): duplicate section name 'Description' Warning(mmotm-2008-1002-1617//fs/block_dev.c:1282): No description found for parameter 'pathname' Signed-off-by: Randy Dunlap cc: Andrew Patterson Signed-off-by: Jens Axboe --- fs/block_dev.c | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) (limited to 'fs/block_dev.c') diff --git a/fs/block_dev.c b/fs/block_dev.c index 57e2786dd2a..d84f0469a01 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -879,9 +879,7 @@ static void flush_disk(struct block_device *bdev) } /** - * check_disk_size_change - checks for disk size change and adjusts - * bdev size. - * + * check_disk_size_change - checks for disk size change and adjusts bdev size. * @disk: struct gendisk to check * @bdev: struct bdev to adjust. * @@ -908,9 +906,7 @@ void check_disk_size_change(struct gendisk *disk, struct block_device *bdev) EXPORT_SYMBOL(check_disk_size_change); /** - * revalidate_disk - wrapper for lower-level driver's revalidate_disk - * call-back - * + * revalidate_disk - wrapper for lower-level driver's revalidate_disk call-back * @disk: struct gendisk to be revalidated * * This routine is a wrapper for lower-level driver's revalidate_disk @@ -1266,10 +1262,9 @@ EXPORT_SYMBOL(ioctl_by_bdev); /** * lookup_bdev - lookup a struct block_device by name + * @pathname: special file representing the block device * - * @path: special file representing the block device - * - * Get a reference to the blockdevice at @path in the current + * Get a reference to the blockdevice at @pathname in the current * namespace if possible and return it. Return ERR_PTR(error) * otherwise. */ -- cgit v1.2.3