From 231b87d10920e024efaf0f9e86e1bab7bced1620 Mon Sep 17 00:00:00 2001 From: Tao Ma Date: Mon, 18 Aug 2008 17:38:42 +0800 Subject: ocfs2: Modify ocfs2_num_free_extents for future xattr usage. ocfs2_num_free_extents() is used to find the number of free extent records in an inode btree. Hence, it takes an "ocfs2_dinode" parameter. We want to use this for extended attribute trees in the future, so genericize the interface the take a buffer head. A future patch will allow that buffer_head to contain any structure rooting an ocfs2 btree. Signed-off-by: Tao Ma Signed-off-by: Mark Fasheh --- fs/ocfs2/aops.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'fs/ocfs2/aops.c') diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c index a53da146627..e2008dcec75 100644 --- a/fs/ocfs2/aops.c +++ b/fs/ocfs2/aops.c @@ -1712,8 +1712,9 @@ int ocfs2_write_begin_nolock(struct address_space *mapping, * ocfs2_lock_allocators(). It greatly over-estimates * the work to be done. */ - ret = ocfs2_lock_allocators(inode, di, clusters_to_alloc, - extents_to_split, &data_ac, &meta_ac); + ret = ocfs2_lock_allocators(inode, wc->w_di_bh, + clusters_to_alloc, extents_to_split, + &data_ac, &meta_ac); if (ret) { mlog_errno(ret); goto out; -- cgit v1.2.3 From 811f933df1e55615fd0bb4818f31e3868a8e6e23 Mon Sep 17 00:00:00 2001 From: Tao Ma Date: Mon, 18 Aug 2008 17:38:43 +0800 Subject: ocfs2: Use ocfs2_extent_list instead of ocfs2_dinode. ocfs2_extend_meta_needed(), ocfs2_calc_extend_credits() and ocfs2_reserve_new_metadata() are all useful for extent tree operations. But they are all limited to an inode btree because they use a struct ocfs2_dinode parameter. Change their parameter to struct ocfs2_extent_list (the part of an ocfs2_dinode they actually use) so that the xattr btree code can use these functions. Signed-off-by: Tao Ma Signed-off-by: Mark Fasheh --- fs/ocfs2/aops.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'fs/ocfs2/aops.c') diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c index e2008dcec75..bbe3f8b2d0e 100644 --- a/fs/ocfs2/aops.c +++ b/fs/ocfs2/aops.c @@ -1720,7 +1720,8 @@ int ocfs2_write_begin_nolock(struct address_space *mapping, goto out; } - credits = ocfs2_calc_extend_credits(inode->i_sb, di, + credits = ocfs2_calc_extend_credits(inode->i_sb, + &di->id2.i_list, clusters_to_alloc); } -- cgit v1.2.3 From e7d4cb6bc19658646357eeff134645cd9bc3479f Mon Sep 17 00:00:00 2001 From: Tao Ma Date: Mon, 18 Aug 2008 17:38:44 +0800 Subject: ocfs2: Abstract ocfs2_extent_tree in b-tree operations. In the old extent tree operation, we take the hypothesis that we are using the ocfs2_extent_list in ocfs2_dinode as the tree root. As xattr will also use ocfs2_extent_list to store large value for a xattr entry, we refactor the tree operation so that xattr can use it directly. The refactoring includes 4 steps: 1. Abstract set/get of last_eb_blk and update_clusters since they may be stored in different location for dinode and xattr. 2. Add a new structure named ocfs2_extent_tree to indicate the extent tree the operation will work on. 3. Remove all the use of fe_bh and di, use root_bh and root_el in extent tree instead. So now all the fe_bh is replaced with et->root_bh, el with root_el accordingly. 4. Make ocfs2_lock_allocators generic. Now it is limited to be only used in file extend allocation. But the whole function is useful when we want to store large EAs. Note: This patch doesn't touch ocfs2_commit_truncate() since it is not used for anything other than truncate inode data btrees. Signed-off-by: Tao Ma Signed-off-by: Mark Fasheh --- fs/ocfs2/aops.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) (limited to 'fs/ocfs2/aops.c') diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c index bbe3f8b2d0e..44ea5eb3fdc 100644 --- a/fs/ocfs2/aops.c +++ b/fs/ocfs2/aops.c @@ -1278,7 +1278,8 @@ static int ocfs2_write_cluster(struct address_space *mapping, } else if (unwritten) { ret = ocfs2_mark_extent_written(inode, wc->w_di_bh, wc->w_handle, cpos, 1, phys, - meta_ac, &wc->w_dealloc); + meta_ac, &wc->w_dealloc, + OCFS2_DINODE_EXTENT); if (ret < 0) { mlog_errno(ret); goto out; @@ -1712,7 +1713,13 @@ int ocfs2_write_begin_nolock(struct address_space *mapping, * ocfs2_lock_allocators(). It greatly over-estimates * the work to be done. */ - ret = ocfs2_lock_allocators(inode, wc->w_di_bh, + mlog(0, "extend inode %llu, i_size = %lld, di->i_clusters = %u," + " clusters_to_add = %u, extents_to_split = %u\n", + (unsigned long long)OCFS2_I(inode)->ip_blkno, + (long long)i_size_read(inode), le32_to_cpu(di->i_clusters), + clusters_to_alloc, extents_to_split); + + ret = ocfs2_lock_allocators(inode, wc->w_di_bh, &di->id2.i_list, clusters_to_alloc, extents_to_split, &data_ac, &meta_ac); if (ret) { -- cgit v1.2.3 From 0eb8d47e69a2211a36643b180f1843ef45f6017d Mon Sep 17 00:00:00 2001 From: Tao Ma Date: Mon, 18 Aug 2008 17:38:45 +0800 Subject: ocfs2: Make high level btree extend code generic Factor out the non-inode specifics of ocfs2_do_extend_allocation() into a more generic function, ocfs2_do_cluster_allocation(). ocfs2_do_extend_allocation calls ocfs2_do_cluster_allocation() now, but the latter can be used for other btree types as well. Signed-off-by: Tao Ma Signed-off-by: Mark Fasheh --- fs/ocfs2/aops.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'fs/ocfs2/aops.c') diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c index 44ea5eb3fdc..e7acd286790 100644 --- a/fs/ocfs2/aops.c +++ b/fs/ocfs2/aops.c @@ -1255,10 +1255,10 @@ static int ocfs2_write_cluster(struct address_space *mapping, * any additional semaphores or cluster locks. */ tmp_pos = cpos; - ret = ocfs2_do_extend_allocation(OCFS2_SB(inode->i_sb), inode, - &tmp_pos, 1, 0, wc->w_di_bh, - wc->w_handle, data_ac, - meta_ac, NULL); + ret = ocfs2_add_inode_data(OCFS2_SB(inode->i_sb), inode, + &tmp_pos, 1, 0, wc->w_di_bh, + wc->w_handle, data_ac, + meta_ac, NULL); /* * This shouldn't happen because we must have already * calculated the correct meta data allocation required. The -- cgit v1.2.3 From f56654c435c06f2b2bd5751889b1a08a3add7d6c Mon Sep 17 00:00:00 2001 From: Tao Ma Date: Mon, 18 Aug 2008 17:38:48 +0800 Subject: ocfs2: Add extent tree operation for xattr value btrees Add some thin wrappers around ocfs2_insert_extent() for each of the 3 different btree types, ocfs2_inode_insert_extent(), ocfs2_xattr_value_insert_extent() and ocfs2_xattr_tree_insert_extent(). The last is for the xattr index btree, which will be used in a followup patch. All the old callers in file.c etc will call ocfs2_dinode_insert_extent(), while the other two handle the xattr issue. And the init of extent tree are handled by these functions. When storing xattr value which is too large, we will allocate some clusters for it and here ocfs2_extent_list and ocfs2_extent_rec will also be used. In order to re-use the b-tree operation code, a new parameter named "private" is added into ocfs2_extent_tree and it is used to indicate the root of ocfs2_exent_list. The reason is that we can't deduce the root from the buffer_head now. It may be in an inode, an ocfs2_xattr_block or even worse, in any place in an ocfs2_xattr_bucket. Signed-off-by: Tao Ma Signed-off-by: Mark Fasheh --- fs/ocfs2/aops.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'fs/ocfs2/aops.c') diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c index e7acd286790..530b1ff599c 100644 --- a/fs/ocfs2/aops.c +++ b/fs/ocfs2/aops.c @@ -1279,7 +1279,7 @@ static int ocfs2_write_cluster(struct address_space *mapping, ret = ocfs2_mark_extent_written(inode, wc->w_di_bh, wc->w_handle, cpos, 1, phys, meta_ac, &wc->w_dealloc, - OCFS2_DINODE_EXTENT); + OCFS2_DINODE_EXTENT, NULL); if (ret < 0) { mlog_errno(ret); goto out; @@ -1721,7 +1721,8 @@ int ocfs2_write_begin_nolock(struct address_space *mapping, ret = ocfs2_lock_allocators(inode, wc->w_di_bh, &di->id2.i_list, clusters_to_alloc, extents_to_split, - &data_ac, &meta_ac); + &data_ac, &meta_ac, + OCFS2_DINODE_EXTENT, NULL); if (ret) { mlog_errno(ret); goto out; -- cgit v1.2.3 From f99b9b7ccf6a691f653cec45f36bfdd1e94769c7 Mon Sep 17 00:00:00 2001 From: Joel Becker Date: Wed, 20 Aug 2008 19:36:33 -0700 Subject: ocfs2: Make ocfs2_extent_tree the first-class representation of a tree. We now have three different kinds of extent trees in ocfs2: inode data (dinode), extended attributes (xattr_tree), and extended attribute values (xattr_value). There is a nice abstraction for them, ocfs2_extent_tree, but it is hidden in alloc.c. All the calling functions have to pick amongst a varied API and pass in type bits and often extraneous pointers. A better way is to make ocfs2_extent_tree a first-class object. Everyone converts their object to an ocfs2_extent_tree() via the ocfs2_get_*_extent_tree() calls, then uses the ocfs2_extent_tree for all tree calls to alloc.c. This simplifies a lot of callers, making for readability. It also provides an easy way to add additional extent tree types, as they only need to be defined in alloc.c with a ocfs2_get__extent_tree() function. Signed-off-by: Joel Becker Signed-off-by: Mark Fasheh --- fs/ocfs2/aops.c | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) (limited to 'fs/ocfs2/aops.c') diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c index 530b1ff599c..ed937fa9e4e 100644 --- a/fs/ocfs2/aops.c +++ b/fs/ocfs2/aops.c @@ -1242,6 +1242,7 @@ static int ocfs2_write_cluster(struct address_space *mapping, int ret, i, new, should_zero = 0; u64 v_blkno, p_blkno; struct inode *inode = mapping->host; + struct ocfs2_extent_tree et; new = phys == 0 ? 1 : 0; if (new || unwritten) @@ -1276,10 +1277,11 @@ static int ocfs2_write_cluster(struct address_space *mapping, goto out; } } else if (unwritten) { - ret = ocfs2_mark_extent_written(inode, wc->w_di_bh, + ocfs2_get_dinode_extent_tree(&et, inode, wc->w_di_bh); + ret = ocfs2_mark_extent_written(inode, &et, wc->w_handle, cpos, 1, phys, - meta_ac, &wc->w_dealloc, - OCFS2_DINODE_EXTENT, NULL); + meta_ac, &wc->w_dealloc); + ocfs2_put_extent_tree(&et); if (ret < 0) { mlog_errno(ret); goto out; @@ -1666,6 +1668,7 @@ int ocfs2_write_begin_nolock(struct address_space *mapping, struct ocfs2_alloc_context *data_ac = NULL; struct ocfs2_alloc_context *meta_ac = NULL; handle_t *handle; + struct ocfs2_extent_tree et; ret = ocfs2_alloc_write_ctxt(&wc, osb, pos, len, di_bh); if (ret) { @@ -1719,10 +1722,11 @@ int ocfs2_write_begin_nolock(struct address_space *mapping, (long long)i_size_read(inode), le32_to_cpu(di->i_clusters), clusters_to_alloc, extents_to_split); - ret = ocfs2_lock_allocators(inode, wc->w_di_bh, &di->id2.i_list, + ocfs2_get_dinode_extent_tree(&et, inode, wc->w_di_bh); + ret = ocfs2_lock_allocators(inode, &et, clusters_to_alloc, extents_to_split, - &data_ac, &meta_ac, - OCFS2_DINODE_EXTENT, NULL); + &data_ac, &meta_ac); + ocfs2_put_extent_tree(&et); if (ret) { mlog_errno(ret); goto out; -- cgit v1.2.3 From 8d6220d6a74a33552cf877bcea25503d7f6a59e6 Mon Sep 17 00:00:00 2001 From: Joel Becker Date: Fri, 22 Aug 2008 12:46:09 -0700 Subject: ocfs2: Change ocfs2_get_*_extent_tree() to ocfs2_init_*_extent_tree() The original get/put_extent_tree() functions held a reference on et_root_bh. However, every single caller already has a safe reference, making the get/put cycle irrelevant. We change ocfs2_get_*_extent_tree() to ocfs2_init_*_extent_tree(). It no longer gets a reference on et_root_bh. ocfs2_put_extent_tree() is removed. Callers now have a simpler init+use pattern. Signed-off-by: Joel Becker Signed-off-by: Mark Fasheh --- fs/ocfs2/aops.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'fs/ocfs2/aops.c') diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c index ed937fa9e4e..259775eedb8 100644 --- a/fs/ocfs2/aops.c +++ b/fs/ocfs2/aops.c @@ -1277,11 +1277,10 @@ static int ocfs2_write_cluster(struct address_space *mapping, goto out; } } else if (unwritten) { - ocfs2_get_dinode_extent_tree(&et, inode, wc->w_di_bh); + ocfs2_init_dinode_extent_tree(&et, inode, wc->w_di_bh); ret = ocfs2_mark_extent_written(inode, &et, wc->w_handle, cpos, 1, phys, meta_ac, &wc->w_dealloc); - ocfs2_put_extent_tree(&et); if (ret < 0) { mlog_errno(ret); goto out; @@ -1722,11 +1721,10 @@ int ocfs2_write_begin_nolock(struct address_space *mapping, (long long)i_size_read(inode), le32_to_cpu(di->i_clusters), clusters_to_alloc, extents_to_split); - ocfs2_get_dinode_extent_tree(&et, inode, wc->w_di_bh); + ocfs2_init_dinode_extent_tree(&et, inode, wc->w_di_bh); ret = ocfs2_lock_allocators(inode, &et, clusters_to_alloc, extents_to_split, &data_ac, &meta_ac); - ocfs2_put_extent_tree(&et); if (ret) { mlog_errno(ret); goto out; -- cgit v1.2.3 From 2b4e30fbde425828b17f0e9c8f8e3fd3ecb2bc75 Mon Sep 17 00:00:00 2001 From: Joel Becker Date: Wed, 3 Sep 2008 20:03:41 -0700 Subject: ocfs2: Switch over to JBD2. ocfs2 wants JBD2 for many reasons, not the least of which is that JBD is limiting our maximum filesystem size. It's a pretty trivial change. Most functions are just renamed. The only functional change is moving to Jan's inode-based ordered data mode. It's better, too. Because JBD2 reads and writes JBD journals, this is compatible with any existing filesystem. It can even interact with JBD-based ocfs2 as long as the journal is formated for JBD. We provide a compatibility option so that paranoid people can still use JBD for the time being. This will go away shortly. [ Moved call of ocfs2_begin_ordered_truncate() from ocfs2_delete_inode() to ocfs2_truncate_for_delete(). --Mark ] Signed-off-by: Joel Becker Signed-off-by: Mark Fasheh --- fs/ocfs2/aops.c | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) (limited to 'fs/ocfs2/aops.c') diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c index 259775eedb8..de179054a74 100644 --- a/fs/ocfs2/aops.c +++ b/fs/ocfs2/aops.c @@ -485,11 +485,14 @@ handle_t *ocfs2_start_walk_page_trans(struct inode *inode, } if (ocfs2_should_order_data(inode)) { + ret = ocfs2_jbd2_file_inode(handle, inode); +#ifdef CONFIG_OCFS2_COMPAT_JBD ret = walk_page_buffers(handle, page_buffers(page), from, to, NULL, ocfs2_journal_dirty_data); - if (ret < 0) +#endif + if (ret < 0) mlog_errno(ret); } out: @@ -669,7 +672,7 @@ static void ocfs2_invalidatepage(struct page *page, unsigned long offset) { journal_t *journal = OCFS2_SB(page->mapping->host->i_sb)->journal->j_journal; - journal_invalidatepage(journal, page, offset); + jbd2_journal_invalidatepage(journal, page, offset); } static int ocfs2_releasepage(struct page *page, gfp_t wait) @@ -678,7 +681,7 @@ static int ocfs2_releasepage(struct page *page, gfp_t wait) if (!page_has_buffers(page)) return 0; - return journal_try_to_free_buffers(journal, page, wait); + return jbd2_journal_try_to_free_buffers(journal, page, wait); } static ssize_t ocfs2_direct_IO(int rw, @@ -1074,11 +1077,15 @@ static void ocfs2_write_failure(struct inode *inode, tmppage = wc->w_pages[i]; if (page_has_buffers(tmppage)) { - if (ocfs2_should_order_data(inode)) + if (ocfs2_should_order_data(inode)) { + ocfs2_jbd2_file_inode(wc->w_handle, inode); +#ifdef CONFIG_OCFS2_COMPAT_JBD walk_page_buffers(wc->w_handle, page_buffers(tmppage), from, to, NULL, ocfs2_journal_dirty_data); +#endif + } block_commit_write(tmppage, from, to); } @@ -1917,11 +1924,15 @@ int ocfs2_write_end_nolock(struct address_space *mapping, } if (page_has_buffers(tmppage)) { - if (ocfs2_should_order_data(inode)) + if (ocfs2_should_order_data(inode)) { + ocfs2_jbd2_file_inode(wc->w_handle, inode); +#ifdef CONFIG_OCFS2_COMPAT_JBD walk_page_buffers(wc->w_handle, page_buffers(tmppage), from, to, NULL, ocfs2_journal_dirty_data); +#endif + } block_commit_write(tmppage, from, to); } } -- cgit v1.2.3 From a81cb88b64a479b78c6dd5666678d50171865db8 Mon Sep 17 00:00:00 2001 From: Mark Fasheh Date: Tue, 7 Oct 2008 14:25:16 -0700 Subject: ocfs2: Don't check for NULL before brelse() This is pointless as brelse() already does the check. Signed-off-by: Mark Fasheh --- fs/ocfs2/aops.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'fs/ocfs2/aops.c') diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c index de179054a74..98e16fb49e4 100644 --- a/fs/ocfs2/aops.c +++ b/fs/ocfs2/aops.c @@ -128,8 +128,7 @@ static int ocfs2_symlink_get_block(struct inode *inode, sector_t iblock, err = 0; bail: - if (bh) - brelse(bh); + brelse(bh); mlog_exit(err); return err; -- cgit v1.2.3 From 31d33073ca38603dea705dae45e094a64ca062d6 Mon Sep 17 00:00:00 2001 From: Joel Becker Date: Thu, 9 Oct 2008 17:20:30 -0700 Subject: ocfs2: Require an inode for ocfs2_read_block(s)(). Now that synchronous readers are using ocfs2_read_blocks_sync(), all callers of ocfs2_read_blocks() are passing an inode. Use it unconditionally. Since it's there, we don't need to pass the ocfs2_super either. Signed-off-by: Joel Becker Signed-off-by: Mark Fasheh --- fs/ocfs2/aops.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) (limited to 'fs/ocfs2/aops.c') diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c index 98e16fb49e4..f232a0e3c30 100644 --- a/fs/ocfs2/aops.c +++ b/fs/ocfs2/aops.c @@ -68,9 +68,8 @@ static int ocfs2_symlink_get_block(struct inode *inode, sector_t iblock, goto bail; } - status = ocfs2_read_block(OCFS2_SB(inode->i_sb), - OCFS2_I(inode)->ip_blkno, - &bh, OCFS2_BH_CACHED, inode); + status = ocfs2_read_block(inode, OCFS2_I(inode)->ip_blkno, + &bh, OCFS2_BH_CACHED); if (status < 0) { mlog_errno(status); goto bail; @@ -260,13 +259,12 @@ static int ocfs2_readpage_inline(struct inode *inode, struct page *page) { int ret; struct buffer_head *di_bh = NULL; - struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); BUG_ON(!PageLocked(page)); BUG_ON(!(OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL)); - ret = ocfs2_read_block(osb, OCFS2_I(inode)->ip_blkno, &di_bh, - OCFS2_BH_CACHED, inode); + ret = ocfs2_read_block(inode, OCFS2_I(inode)->ip_blkno, &di_bh, + OCFS2_BH_CACHED); if (ret) { mlog_errno(ret); goto out; -- cgit v1.2.3 From 0fcaa56a2a020dd6f90c202b7084e6f4cbedb6c2 Mon Sep 17 00:00:00 2001 From: Joel Becker Date: Thu, 9 Oct 2008 17:20:31 -0700 Subject: ocfs2: Simplify ocfs2_read_block() More than 30 callers of ocfs2_read_block() pass exactly OCFS2_BH_CACHED. Only six pass a different flag set. Rather than have every caller care, let's make ocfs2_read_block() take no flags and always do a cached read. The remaining six places can call ocfs2_read_blocks() directly. Signed-off-by: Joel Becker Signed-off-by: Mark Fasheh --- fs/ocfs2/aops.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'fs/ocfs2/aops.c') diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c index f232a0e3c30..c22543b3342 100644 --- a/fs/ocfs2/aops.c +++ b/fs/ocfs2/aops.c @@ -68,8 +68,7 @@ static int ocfs2_symlink_get_block(struct inode *inode, sector_t iblock, goto bail; } - status = ocfs2_read_block(inode, OCFS2_I(inode)->ip_blkno, - &bh, OCFS2_BH_CACHED); + status = ocfs2_read_block(inode, OCFS2_I(inode)->ip_blkno, &bh); if (status < 0) { mlog_errno(status); goto bail; @@ -263,8 +262,7 @@ static int ocfs2_readpage_inline(struct inode *inode, struct page *page) BUG_ON(!PageLocked(page)); BUG_ON(!(OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL)); - ret = ocfs2_read_block(inode, OCFS2_I(inode)->ip_blkno, &di_bh, - OCFS2_BH_CACHED); + ret = ocfs2_read_block(inode, OCFS2_I(inode)->ip_blkno, &di_bh); if (ret) { mlog_errno(ret); goto out; -- cgit v1.2.3