From 83121942b28daffc9526b14b7843d8cdbd3db641 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Wed, 22 Jul 2009 16:52:13 -0400 Subject: Btrfs: Fix crash on read failures at mount If the tree roots hit read errors during mount, btrfs is not properly erroring out. We need to check the uptodate bits after reading in the tree root node. Signed-off-by: David Woodhouse Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 0d50d49d990..55d9d188e69 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1783,6 +1783,11 @@ struct btrfs_root *open_ctree(struct super_block *sb, btrfs_super_chunk_root(disk_super), blocksize, generation); BUG_ON(!chunk_root->node); + if (!test_bit(EXTENT_BUFFER_UPTODATE, &chunk_root->node->bflags)) { + printk(KERN_WARNING "btrfs: failed to read chunk root on %s\n", + sb->s_id); + goto fail_chunk_root; + } btrfs_set_root_node(&chunk_root->root_item, chunk_root->node); chunk_root->commit_root = btrfs_root_node(chunk_root); @@ -1810,6 +1815,11 @@ struct btrfs_root *open_ctree(struct super_block *sb, blocksize, generation); if (!tree_root->node) goto fail_chunk_root; + if (!test_bit(EXTENT_BUFFER_UPTODATE, &tree_root->node->bflags)) { + printk(KERN_WARNING "btrfs: failed to read tree root on %s\n", + sb->s_id); + goto fail_tree_root; + } btrfs_set_root_node(&tree_root->root_item, tree_root->node); tree_root->commit_root = btrfs_root_node(tree_root); -- cgit v1.2.3 From 817d52f8dba26d0295c26035531c30ce5f1e3c3e Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Mon, 13 Jul 2009 21:29:25 -0400 Subject: Btrfs: async block group caching This patch moves the caching of the block group off to a kthread in order to allow people to allocate sooner. Instead of blocking up behind the caching mutex, we instead kick of the caching kthread, and then attempt to make an allocation. If we cannot, we wait on the block groups caching waitqueue, which the caching kthread will wake the waiting threads up everytime it finds 2 meg worth of space, and then again when its finished caching. This is how I tested the speedup from this mkfs the disk mount the disk fill the disk up with fs_mark unmount the disk mount the disk time touch /mnt/foo Without my changes this took 11 seconds on my box, with these changes it now takes 1 second. Another change thats been put in place is we lock the super mirror's in the pinned extent map in order to keep us from adding that stuff as free space when caching the block group. This doesn't really change anything else as far as the pinned extent map is concerned, since for actual pinned extents we use EXTENT_DIRTY, but it does mean that when we unmount we have to go in and unlock those extents to keep from leaking memory. I've also added a check where when we are reading block groups from disk, if the amount of space used == the size of the block group, we go ahead and mark the block group as cached. This drastically reduces the amount of time it takes to cache the block groups. Using the same test as above, except doing a dd to a file and then unmounting, it used to take 33 seconds to umount, now it takes 3 seconds. This version uses the commit_root in the caching kthread, and then keeps track of how many async caching threads are running at any given time so if one of the async threads is still running as we cross transactions we can wait until its finished before handling the pinned extents. Thank you, Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 55d9d188e69..ec2c915f7f4 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -907,6 +907,7 @@ static int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize, spin_lock_init(&root->inode_lock); mutex_init(&root->objectid_mutex); mutex_init(&root->log_mutex); + init_rwsem(&root->commit_root_sem); init_waitqueue_head(&root->log_writer_wait); init_waitqueue_head(&root->log_commit_wait[0]); init_waitqueue_head(&root->log_commit_wait[1]); @@ -1566,6 +1567,7 @@ struct btrfs_root *open_ctree(struct super_block *sb, atomic_set(&fs_info->async_delalloc_pages, 0); atomic_set(&fs_info->async_submit_draining, 0); atomic_set(&fs_info->nr_async_bios, 0); + atomic_set(&fs_info->async_caching_threads, 0); fs_info->sb = sb; fs_info->max_extent = (u64)-1; fs_info->max_inline = 8192 * 1024; @@ -2337,6 +2339,7 @@ int close_ctree(struct btrfs_root *root) free_extent_buffer(root->fs_info->csum_root->commit_root); btrfs_free_block_groups(root->fs_info); + btrfs_free_super_mirror_extents(root->fs_info); del_fs_roots(fs_info); -- cgit v1.2.3 From 68b38550ddbea13d296184bf69edff387618b1d3 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Mon, 27 Jul 2009 13:57:01 -0400 Subject: Btrfs: change how we unpin extents We are racy with async block caching and unpinning extents. This patch makes things much less complicated by only unpinning the extent if the block group is cached. We check the block_group->cached var under the block_group->lock spin lock. If it is set to BTRFS_CACHE_FINISHED then we update the pinned counters, and unpin the extent and add the free space back. If it is not set to this, we start the caching of the block group so the next time we unpin extents we can unpin the extent. This keeps us from racing with the async caching threads, lets us kill the fs wide async thread counter, and keeps us from having to set DELALLOC bits for every extent we hit if there are caching kthreads going. One thing that needed to be changed was btrfs_free_super_mirror_extents. Now instead of just looking for LOCKED extents, we also look for DIRTY extents, since we could have left some extents pinned in the previous transaction that will never get freed now that we are unmounting, which would cause us to leak memory. So btrfs_free_super_mirror_extents has been changed to btrfs_free_pinned_extents, and it will clear the extents locked for the super mirror, and any remaining pinned extents that may be present. Thank you, Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index ec2c915f7f4..c658397c747 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1567,7 +1567,6 @@ struct btrfs_root *open_ctree(struct super_block *sb, atomic_set(&fs_info->async_delalloc_pages, 0); atomic_set(&fs_info->async_submit_draining, 0); atomic_set(&fs_info->nr_async_bios, 0); - atomic_set(&fs_info->async_caching_threads, 0); fs_info->sb = sb; fs_info->max_extent = (u64)-1; fs_info->max_inline = 8192 * 1024; @@ -2339,7 +2338,7 @@ int close_ctree(struct btrfs_root *root) free_extent_buffer(root->fs_info->csum_root->commit_root); btrfs_free_block_groups(root->fs_info); - btrfs_free_super_mirror_extents(root->fs_info); + btrfs_free_pinned_extents(root->fs_info); del_fs_roots(fs_info); -- cgit v1.2.3 From f25784b35f590c81d5fb8245a8cd45e1afb6f1b2 Mon Sep 17 00:00:00 2001 From: Yan Zheng Date: Tue, 28 Jul 2009 08:41:57 -0400 Subject: Btrfs: Fix async caching interaction with unmount - don't stop the caching thread until btrfs_commit_super return. - if caching is interrupted by umount, set last to (u64)-1. otherwise the un-scanned range of block group will be considered as free extent. Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index c658397c747..3a9b8875988 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -2317,6 +2317,9 @@ int close_ctree(struct btrfs_root *root) printk(KERN_ERR "btrfs: commit super ret %d\n", ret); } + fs_info->closing = 2; + smp_mb(); + if (fs_info->delalloc_bytes) { printk(KERN_INFO "btrfs: at unmount delalloc count %llu\n", (unsigned long long)fs_info->delalloc_bytes); -- cgit v1.2.3