aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorZheng Yan <zheng.yan@oracle.com>2008-09-26 10:05:38 -0400
committerChris Mason <chris.mason@oracle.com>2008-09-26 10:05:38 -0400
commit5b21f2ed3f2947b5195b65c9fdbdd9e52904cc03 (patch)
tree9af8f539ac487c163f3207bc065767c3c8b37ae7
parente465768938f95388723b0fd3c50a0ae48173edb9 (diff)
Btrfs: extent_map and data=ordered fixes for space balancing
* Add an EXTENT_BOUNDARY state bit to keep the writepage code from merging data extents that are in the process of being relocated. This allows us to do accounting for them properly. * The balancing code relocates data extents indepdent of the underlying inode. The extent_map code was modified to properly account for things moving around (invalidating extent_map caches in the inode). * Don't take the drop_mutex in the create_subvol ioctl. It isn't required. * Fix walking of the ordered extent list to avoid races with sys_unlink * Change the lock ordering rules. Transaction start goes outside the drop_mutex. This allows btrfs_commit_transaction to directly drop the relocation trees. Signed-off-by: Chris Mason <chris.mason@oracle.com>
-rw-r--r--fs/btrfs/ctree.c9
-rw-r--r--fs/btrfs/ctree.h11
-rw-r--r--fs/btrfs/extent_io.c13
-rw-r--r--fs/btrfs/extent_io.h1
-rw-r--r--fs/btrfs/file.c31
-rw-r--r--fs/btrfs/inode-map.c4
-rw-r--r--fs/btrfs/inode.c52
-rw-r--r--fs/btrfs/ioctl.c2
-rw-r--r--fs/btrfs/ordered-data.c26
-rw-r--r--fs/btrfs/transaction.c8
10 files changed, 108 insertions, 49 deletions
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index 50aea8cb653..f9cd40967d0 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -290,7 +290,6 @@ int noinline btrfs_cow_block(struct btrfs_trans_handle *trans,
struct extent_buffer **cow_ret, u64 prealloc_dest)
{
u64 search_start;
- u64 header_trans;
int ret;
if (trans->transaction != root->fs_info->running_transaction) {
@@ -304,9 +303,9 @@ int noinline btrfs_cow_block(struct btrfs_trans_handle *trans,
WARN_ON(1);
}
- header_trans = btrfs_header_generation(buf);
spin_lock(&root->fs_info->hash_lock);
- if (header_trans == trans->transid &&
+ if (btrfs_header_generation(buf) == trans->transid &&
+ btrfs_header_owner(buf) == root->root_key.objectid &&
!btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN)) {
*cow_ret = buf;
spin_unlock(&root->fs_info->hash_lock);
@@ -1300,6 +1299,7 @@ again:
/* is a cow on this block not required */
spin_lock(&root->fs_info->hash_lock);
if (btrfs_header_generation(b) == trans->transid &&
+ btrfs_header_owner(b) == root->root_key.objectid &&
!btrfs_header_flag(b, BTRFS_HEADER_FLAG_WRITTEN)) {
spin_unlock(&root->fs_info->hash_lock);
goto cow_done;
@@ -1396,7 +1396,8 @@ cow_done:
/* this is only true while dropping a snapshot */
if (level == lowest_level) {
- break;
+ ret = 0;
+ goto done;
}
blocknr = btrfs_node_blockptr(b, slot);
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index b9f9f815ed0..3e62a1b0a1f 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -1486,6 +1486,9 @@ static inline struct dentry *fdentry(struct file *file)
/* extent-tree.c */
int btrfs_lookup_extent(struct btrfs_root *root, u64 start, u64 len);
+int btrfs_lookup_extent_ref(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root, u64 bytenr,
+ u64 num_bytes, u32 *refs);
int btrfs_update_pinned_extents(struct btrfs_root *root,
u64 bytenr, u64 num, int pin);
int btrfs_drop_leaf_ref(struct btrfs_trans_handle *trans,
@@ -1812,6 +1815,8 @@ void btrfs_destroy_inode(struct inode *inode);
int btrfs_init_cachep(void);
void btrfs_destroy_cachep(void);
long btrfs_ioctl_trans_end(struct file *file);
+struct inode *btrfs_ilookup(struct super_block *s, u64 objectid,
+ struct btrfs_root *root, int wait);
struct inode *btrfs_iget_locked(struct super_block *s, u64 objectid,
struct btrfs_root *root);
struct inode *btrfs_iget(struct super_block *s, struct btrfs_key *location,
@@ -1824,13 +1829,17 @@ struct extent_map *btrfs_get_extent(struct inode *inode, struct page *page,
int btrfs_update_inode(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct inode *inode);
+int btrfs_orphan_add(struct btrfs_trans_handle *trans, struct inode *inode);
+int btrfs_orphan_del(struct btrfs_trans_handle *trans, struct inode *inode);
+void btrfs_orphan_cleanup(struct btrfs_root *root);
/* ioctl.c */
long btrfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
/* file.c */
int btrfs_sync_file(struct file *file, struct dentry *dentry, int datasync);
-int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end);
+int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end,
+ int skip_pinned);
int btrfs_check_file(struct btrfs_root *root, struct inode *inode);
extern struct file_operations btrfs_file_operations;
int btrfs_drop_extents(struct btrfs_trans_handle *trans,
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index e3a25be5c66..8bd1b402f3f 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -292,7 +292,7 @@ static int merge_state(struct extent_io_tree *tree,
struct extent_state *other;
struct rb_node *other_node;
- if (state->state & EXTENT_IOBITS)
+ if (state->state & (EXTENT_IOBITS | EXTENT_BOUNDARY))
return 0;
other_node = rb_prev(&state->rb_node);
@@ -1070,7 +1070,8 @@ search_again:
while(1) {
state = rb_entry(node, struct extent_state, rb_node);
- if (found && state->start != cur_start) {
+ if (found && (state->start != cur_start ||
+ (state->state & EXTENT_BOUNDARY))) {
goto out;
}
if (!(state->state & EXTENT_DELALLOC)) {
@@ -1078,7 +1079,7 @@ search_again:
*end = state->end;
goto out;
}
- if (!found) {
+ if (!found && !(state->state & EXTENT_BOUNDARY)) {
struct extent_state *prev_state;
struct rb_node *prev_node = node;
while(1) {
@@ -1088,7 +1089,11 @@ search_again:
prev_state = rb_entry(prev_node,
struct extent_state,
rb_node);
- if (!(prev_state->state & EXTENT_DELALLOC))
+ if ((prev_state->end + 1 != state->start) ||
+ !(prev_state->state & EXTENT_DELALLOC))
+ break;
+ if ((cur_start - prev_state->start) * 2 >
+ max_bytes)
break;
state = prev_state;
node = prev_node;
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index 3cb411a5f4d..c9d1908a1ae 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -15,6 +15,7 @@
#define EXTENT_BUFFER_FILLED (1 << 8)
#define EXTENT_ORDERED (1 << 9)
#define EXTENT_ORDERED_METADATA (1 << 10)
+#define EXTENT_BOUNDARY (1 << 11)
#define EXTENT_IOBITS (EXTENT_LOCKED | EXTENT_WRITEBACK)
/*
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 8856570a0eb..1b7e51a9db0 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -294,7 +294,7 @@ static int noinline dirty_and_release_pages(struct btrfs_trans_handle *trans,
last_pos_in_file,
0, 0, hole_size, 0);
btrfs_drop_extent_cache(inode, last_pos_in_file,
- last_pos_in_file + hole_size -1);
+ last_pos_in_file + hole_size - 1, 0);
mutex_unlock(&BTRFS_I(inode)->extent_mutex);
btrfs_check_file(root, inode);
}
@@ -337,7 +337,7 @@ static int noinline dirty_and_release_pages(struct btrfs_trans_handle *trans,
inline_size -= start_pos;
err = insert_inline_extent(trans, root, inode, start_pos,
inline_size, pages, 0, num_pages);
- btrfs_drop_extent_cache(inode, start_pos, aligned_end - 1);
+ btrfs_drop_extent_cache(inode, start_pos, aligned_end - 1, 0);
BUG_ON(err);
mutex_unlock(&BTRFS_I(inode)->extent_mutex);
@@ -362,7 +362,8 @@ out_unlock:
return err;
}
-int noinline btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end)
+int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end,
+ int skip_pinned)
{
struct extent_map *em;
struct extent_map *split = NULL;
@@ -371,6 +372,7 @@ int noinline btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end)
u64 len = end - start + 1;
int ret;
int testend = 1;
+ unsigned long flags;
WARN_ON(end < start);
if (end == (u64)-1) {
@@ -389,6 +391,23 @@ int noinline btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end)
spin_unlock(&em_tree->lock);
break;
}
+ flags = em->flags;
+ if (skip_pinned && test_bit(EXTENT_FLAG_PINNED, &em->flags)) {
+ spin_unlock(&em_tree->lock);
+ if (em->start <= start &&
+ (!testend || em->start + em->len >= start + len)) {
+ free_extent_map(em);
+ break;
+ }
+ if (start < em->start) {
+ len = em->start - start;
+ } else {
+ len = start + len - (em->start + em->len);
+ start = em->start + em->len;
+ }
+ free_extent_map(em);
+ continue;
+ }
clear_bit(EXTENT_FLAG_PINNED, &em->flags);
remove_extent_mapping(em_tree, em);
@@ -398,7 +417,7 @@ int noinline btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end)
split->len = start - em->start;
split->block_start = em->block_start;
split->bdev = em->bdev;
- split->flags = em->flags;
+ split->flags = flags;
ret = add_extent_mapping(em_tree, split);
BUG_ON(ret);
free_extent_map(split);
@@ -412,7 +431,7 @@ int noinline btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end)
split->start = start + len;
split->len = em->start + em->len - (start + len);
split->bdev = em->bdev;
- split->flags = em->flags;
+ split->flags = flags;
split->block_start = em->block_start + diff;
@@ -541,7 +560,7 @@ int noinline btrfs_drop_extents(struct btrfs_trans_handle *trans,
int recow;
int ret;
- btrfs_drop_extent_cache(inode, start, end - 1);
+ btrfs_drop_extent_cache(inode, start, end - 1, 0);
path = btrfs_alloc_path();
if (!path)
diff --git a/fs/btrfs/inode-map.c b/fs/btrfs/inode-map.c
index cd6171c2da4..80038c5ef7c 100644
--- a/fs/btrfs/inode-map.c
+++ b/fs/btrfs/inode-map.c
@@ -117,10 +117,14 @@ int btrfs_find_free_objectid(struct btrfs_trans_handle *trans,
*objectid = last_ino;
goto found;
}
+ } else if (key.objectid > search_start) {
+ *objectid = search_start;
+ goto found;
}
}
if (key.objectid >= BTRFS_LAST_FREE_OBJECTID)
break;
+
start_found = 1;
last_ino = key.objectid + 1;
path->slots[0]++;
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 48a3dc03080..4516fbf0167 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -135,7 +135,7 @@ static int cow_file_range(struct inode *inode, u64 start, u64 end)
BUG_ON(num_bytes > btrfs_super_total_bytes(&root->fs_info->super_copy));
mutex_lock(&BTRFS_I(inode)->extent_mutex);
- btrfs_drop_extent_cache(inode, start, start + num_bytes - 1);
+ btrfs_drop_extent_cache(inode, start, start + num_bytes - 1, 0);
mutex_unlock(&BTRFS_I(inode)->extent_mutex);
while(num_bytes > 0) {
@@ -163,7 +163,7 @@ static int cow_file_range(struct inode *inode, u64 start, u64 end)
break;
}
btrfs_drop_extent_cache(inode, start,
- start + ins.offset - 1);
+ start + ins.offset - 1, 0);
}
mutex_unlock(&BTRFS_I(inode)->extent_mutex);
@@ -587,7 +587,7 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
btrfs_drop_extent_cache(inode, ordered_extent->file_offset,
ordered_extent->file_offset +
- ordered_extent->len - 1);
+ ordered_extent->len - 1, 0);
mutex_unlock(&BTRFS_I(inode)->extent_mutex);
ins.objectid = ordered_extent->start;
@@ -880,7 +880,7 @@ void btrfs_orphan_cleanup(struct btrfs_root *root)
int ret = 0, nr_unlink = 0, nr_truncate = 0;
/* don't do orphan cleanup if the fs is readonly. */
- if (root->inode->i_sb->s_flags & MS_RDONLY)
+ if (root->fs_info->sb->s_flags & MS_RDONLY)
return;
path = btrfs_alloc_path();
@@ -892,8 +892,6 @@ void btrfs_orphan_cleanup(struct btrfs_root *root)
btrfs_set_key_type(&key, BTRFS_ORPHAN_ITEM_KEY);
key.offset = (u64)-1;
- trans = btrfs_start_transaction(root, 1);
- btrfs_set_trans_block_group(trans, root->inode);
while (1) {
ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
@@ -933,7 +931,7 @@ void btrfs_orphan_cleanup(struct btrfs_root *root)
* crossing root thing. we store the inode number in the
* offset of the orphan item.
*/
- inode = btrfs_iget_locked(root->inode->i_sb,
+ inode = btrfs_iget_locked(root->fs_info->sb,
found_key.offset, root);
if (!inode)
break;
@@ -965,7 +963,9 @@ void btrfs_orphan_cleanup(struct btrfs_root *root)
* do a destroy_inode
*/
if (is_bad_inode(inode)) {
+ trans = btrfs_start_transaction(root, 1);
btrfs_orphan_del(trans, inode);
+ btrfs_end_transaction(trans, root);
iput(inode);
continue;
}
@@ -988,7 +988,6 @@ void btrfs_orphan_cleanup(struct btrfs_root *root)
printk(KERN_INFO "btrfs: truncated %d orphans\n", nr_truncate);
btrfs_free_path(path);
- btrfs_end_transaction(trans, root);
}
void btrfs_read_locked_inode(struct inode *inode)
@@ -1343,8 +1342,7 @@ noinline int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
u64 mask = root->sectorsize - 1;
if (root->ref_cows)
- btrfs_drop_extent_cache(inode,
- new_size & (~mask), (u64)-1);
+ btrfs_drop_extent_cache(inode, new_size & (~mask), (u64)-1, 0);
path = btrfs_alloc_path();
path->reada = -1;
BUG_ON(!path);
@@ -1677,7 +1675,7 @@ static int btrfs_setattr(struct dentry *dentry, struct iattr *attr)
hole_start, 0, 0,
hole_size, 0);
btrfs_drop_extent_cache(inode, hole_start,
- (u64)-1);
+ (u64)-1, 0);
btrfs_check_file(root, inode);
}
mutex_unlock(&BTRFS_I(inode)->extent_mutex);
@@ -1843,6 +1841,24 @@ static int btrfs_find_actor(struct inode *inode, void *opaque)
args->root == BTRFS_I(inode)->root);
}
+struct inode *btrfs_ilookup(struct super_block *s, u64 objectid,
+ struct btrfs_root *root, int wait)
+{
+ struct inode *inode;
+ struct btrfs_iget_args args;
+ args.ino = objectid;
+ args.root = root;
+
+ if (wait) {
+ inode = ilookup5(s, objectid, btrfs_find_actor,
+ (void *)&args);
+ } else {
+ inode = ilookup5_nowait(s, objectid, btrfs_find_actor,
+ (void *)&args);
+ }
+ return inode;
+}
+
struct inode *btrfs_iget_locked(struct super_block *s, u64 objectid,
struct btrfs_root *root)
{
@@ -3266,7 +3282,7 @@ void btrfs_destroy_inode(struct inode *inode)
btrfs_put_ordered_extent(ordered);
}
}
- btrfs_drop_extent_cache(inode, 0, (u64)-1);
+ btrfs_drop_extent_cache(inode, 0, (u64)-1, 0);
kmem_cache_free(btrfs_inode_cachep, BTRFS_I(inode));
}
@@ -3412,16 +3428,22 @@ int btrfs_start_delalloc_inodes(struct btrfs_root *root)
{
struct list_head *head = &root->fs_info->delalloc_inodes;
struct btrfs_inode *binode;
+ struct inode *inode;
unsigned long flags;
spin_lock_irqsave(&root->fs_info->delalloc_lock, flags);
while(!list_empty(head)) {
binode = list_entry(head->next, struct btrfs_inode,
delalloc_inodes);
- atomic_inc(&binode->vfs_inode.i_count);
+ inode = igrab(&binode->vfs_inode);
+ if (!inode)
+ list_del_init(&binode->delalloc_inodes);
spin_unlock_irqrestore(&root->fs_info->delalloc_lock, flags);
- filemap_write_and_wait(binode->vfs_inode.i_mapping);
- iput(&binode->vfs_inode);
+ if (inode) {
+ filemap_write_and_wait(inode->i_mapping);
+ iput(inode);
+ }
+ cond_resched();
spin_lock_irqsave(&root->fs_info->delalloc_lock, flags);
}
spin_unlock_irqrestore(&root->fs_info->delalloc_lock, flags);
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 4c6e0c15754..04de767a8db 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -444,12 +444,10 @@ static noinline int btrfs_ioctl_snap_create(struct btrfs_root *root,
goto out;
}
- mutex_lock(&root->fs_info->drop_mutex);
if (root == root->fs_info->tree_root)
ret = create_subvol(root, vol_args->name, namelen);
else
ret = create_snapshot(root, vol_args->name, namelen);
- mutex_unlock(&root->fs_info->drop_mutex);
out:
kfree(vol_args);
return ret;
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c
index da6d43eb41d..951eacff242 100644
--- a/fs/btrfs/ordered-data.c
+++ b/fs/btrfs/ordered-data.c
@@ -309,7 +309,6 @@ int btrfs_wait_ordered_extents(struct btrfs_root *root, int nocow_only)
{
struct list_head splice;
struct list_head *cur;
- struct list_head *tmp;
struct btrfs_ordered_extent *ordered;
struct inode *inode;
@@ -317,37 +316,38 @@ int btrfs_wait_ordered_extents(struct btrfs_root *root, int nocow_only)
spin_lock(&root->fs_info->ordered_extent_lock);
list_splice_init(&root->fs_info->ordered_extents, &splice);
- list_for_each_safe(cur, tmp, &splice) {
+ while (!list_empty(&splice)) {
cur = splice.next;
ordered = list_entry(cur, struct btrfs_ordered_extent,
root_extent_list);
if (nocow_only &&
!test_bit(BTRFS_ORDERED_NOCOW, &ordered->flags)) {
+ list_move(&ordered->root_extent_list,
+ &root->fs_info->ordered_extents);
cond_resched_lock(&root->fs_info->ordered_extent_lock);
continue;
}
list_del_init(&ordered->root_extent_list);
atomic_inc(&ordered->refs);
- inode = ordered->inode;
/*
- * the inode can't go away until all the pages are gone
- * and the pages won't go away while there is still
- * an ordered extent and the ordered extent won't go
- * away until it is off this list. So, we can safely
- * increment i_count here and call iput later
+ * the inode may be getting freed (in sys_unlink path).
*/
- atomic_inc(&inode->i_count);
+ inode = igrab(ordered->inode);
+
spin_unlock(&root->fs_info->ordered_extent_lock);
- btrfs_start_ordered_extent(inode, ordered, 1);
- btrfs_put_ordered_extent(ordered);
- iput(inode);
+ if (inode) {
+ btrfs_start_ordered_extent(inode, ordered, 1);
+ btrfs_put_ordered_extent(ordered);
+ iput(inode);
+ } else {
+ btrfs_put_ordered_extent(ordered);
+ }
spin_lock(&root->fs_info->ordered_extent_lock);
}
- list_splice_init(&splice, &root->fs_info->ordered_extents);
spin_unlock(&root->fs_info->ordered_extent_lock);
return 0;
}
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index 656baefa525..8c83cf464c8 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -109,6 +109,7 @@ noinline int btrfs_record_root_in_trans(struct btrfs_root *root)
spin_lock_init(&dirty->root->node_lock);
spin_lock_init(&dirty->root->list_lock);
mutex_init(&dirty->root->objectid_mutex);
+ mutex_init(&dirty->root->log_mutex);
INIT_LIST_HEAD(&dirty->root->dead_list);
dirty->root->node = root->commit_root;
dirty->root->commit_root = NULL;
@@ -590,13 +591,14 @@ static noinline int drop_dirty_roots(struct btrfs_root *tree_root,
root = dirty->latest_root;
atomic_inc(&root->fs_info->throttles);
- mutex_lock(&root->fs_info->drop_mutex);
while(1) {
trans = btrfs_start_transaction(tree_root, 1);
+ mutex_lock(&root->fs_info->drop_mutex);
ret = btrfs_drop_snapshot(trans, dirty->root);
if (ret != -EAGAIN) {
break;
}
+ mutex_unlock(&root->fs_info->drop_mutex);
err = btrfs_update_root(trans,
tree_root,
@@ -608,10 +610,8 @@ static noinline int drop_dirty_roots(struct btrfs_root *tree_root,
ret = btrfs_end_transaction(trans, tree_root);
BUG_ON(ret);
- mutex_unlock(&root->fs_info->drop_mutex);
btrfs_btree_balance_dirty(tree_root, nr);
cond_resched();
- mutex_lock(&root->fs_info->drop_mutex);
}
BUG_ON(ret);
atomic_dec(&root->fs_info->throttles);
@@ -689,7 +689,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
memcpy(new_root_item, &root->root_item, sizeof(*new_root_item));
key.objectid = objectid;
- key.offset = 1;
+ key.offset = trans->transid;
btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY);
old = btrfs_lock_root_node(root);