From 04da11bfcf511544ae19e0a7e5f994b3237752ac Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Wed, 20 Aug 2008 17:16:34 +0300 Subject: UBIFS: fix zero-length truncations Always allow truncations to zero, even if budgeting thinks there is no space. UBIFS reserves some space for deletions anyway. Otherwise, the following happans: 1. create a file, and write as much as possible there, until ENOSPC 2. truncate the file, which fails with ENOSPC, which is not good. Signed-off-by: Artem Bityutskiy --- fs/ubifs/dir.c | 1 - fs/ubifs/file.c | 20 ++++++++++++++++---- 2 files changed, 16 insertions(+), 5 deletions(-) (limited to 'fs') diff --git a/fs/ubifs/dir.c b/fs/ubifs/dir.c index 5c96f1fb701..2b267c9a180 100644 --- a/fs/ubifs/dir.c +++ b/fs/ubifs/dir.c @@ -587,7 +587,6 @@ static int ubifs_unlink(struct inode *dir, struct dentry *dentry) if (err) { if (err != -ENOSPC) return err; - err = 0; budgeted = 0; } diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c index 4071d1cae29..3d698e2022b 100644 --- a/fs/ubifs/file.c +++ b/fs/ubifs/file.c @@ -793,7 +793,7 @@ static int do_truncation(struct ubifs_info *c, struct inode *inode, int err; struct ubifs_budget_req req; loff_t old_size = inode->i_size, new_size = attr->ia_size; - int offset = new_size & (UBIFS_BLOCK_SIZE - 1); + int offset = new_size & (UBIFS_BLOCK_SIZE - 1), budgeted = 1; struct ubifs_inode *ui = ubifs_inode(inode); dbg_gen("ino %lu, size %lld -> %lld", inode->i_ino, old_size, new_size); @@ -811,8 +811,15 @@ static int do_truncation(struct ubifs_info *c, struct inode *inode, /* A funny way to budget for truncation node */ req.dirtied_ino_d = UBIFS_TRUN_NODE_SZ; err = ubifs_budget_space(c, &req); - if (err) - return err; + if (err) { + /* + * Treat truncations to zero as deletion and always allow them, + * just like we do for '->unlink()'. + */ + if (new_size || err != -ENOSPC) + return err; + budgeted = 0; + } err = vmtruncate(inode, new_size); if (err) @@ -869,7 +876,12 @@ static int do_truncation(struct ubifs_info *c, struct inode *inode, err = ubifs_jnl_truncate(c, inode, old_size, new_size); mutex_unlock(&ui->ui_mutex); out_budg: - ubifs_release_budget(c, &req); + if (budgeted) + ubifs_release_budget(c, &req); + else { + c->nospace = c->nospace_rp = 0; + smp_wmb(); + } return err; } -- cgit v1.2.3 From 761e29f3bb19b05bea55285dfdf2d28e001a63b8 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Wed, 20 Aug 2008 16:32:40 +0300 Subject: UBIFS: always read hashed-key nodes under TNC mutex Leaf-nodes that have a hashed key are stored in the leaf-node-cache (LNC) which is protected by the TNC mutex. Consequently, when reading a leaf node with a hashed key (i.e. directory entries, xattr entries) the TNC mutex is always required. Signed-off-by: Adrian Hunter --- fs/ubifs/tnc.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) (limited to 'fs') diff --git a/fs/ubifs/tnc.c b/fs/ubifs/tnc.c index e909f4a9644..4fbc5921688 100644 --- a/fs/ubifs/tnc.c +++ b/fs/ubifs/tnc.c @@ -1498,7 +1498,6 @@ static int do_lookup_nm(struct ubifs_info *c, const union ubifs_key *key, { int found, n, err; struct ubifs_znode *znode; - struct ubifs_zbranch zbr; dbg_tnc("name '%.*s' key %s", nm->len, nm->name, DBGKEY(key)); mutex_lock(&c->tnc_mutex); @@ -1522,11 +1521,7 @@ static int do_lookup_nm(struct ubifs_info *c, const union ubifs_key *key, goto out_unlock; } - zbr = znode->zbranch[n]; - mutex_unlock(&c->tnc_mutex); - - err = tnc_read_node_nm(c, &zbr, node); - return err; + err = tnc_read_node_nm(c, &znode->zbranch[n], node); out_unlock: mutex_unlock(&c->tnc_mutex); -- cgit v1.2.3 From 601c0bc46753007be011b513ba4fc50ed8e30aef Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Fri, 22 Aug 2008 14:23:35 +0300 Subject: UBIFS: allow for racing between GC and TNC The TNC mutex is unlocked prematurely when reading leaf nodes with non-hashed keys. This is unsafe because the node may be moved by garbage collection and the eraseblock unmapped, although that has never actually happened during stress testing. This patch fixes the flaw by detecting the race and retrying with the TNC mutex locked. Signed-off-by: Adrian Hunter --- fs/ubifs/gc.c | 6 +++ fs/ubifs/misc.h | 17 +++++++++ fs/ubifs/tnc.c | 109 ++++++++++++++++++++++++++++++------------------------- fs/ubifs/ubifs.h | 6 ++- 4 files changed, 87 insertions(+), 51 deletions(-) (limited to 'fs') diff --git a/fs/ubifs/gc.c b/fs/ubifs/gc.c index d0f3dac2908..13f1019c859 100644 --- a/fs/ubifs/gc.c +++ b/fs/ubifs/gc.c @@ -344,6 +344,12 @@ int ubifs_garbage_collect_leb(struct ubifs_info *c, struct ubifs_lprops *lp) if (err) goto out; + /* Allow for races with TNC */ + c->gced_lnum = lnum; + smp_wmb(); + c->gc_seq += 1; + smp_wmb(); + if (c->gc_lnum == -1) { c->gc_lnum = lnum; err = LEB_RETAINED; diff --git a/fs/ubifs/misc.h b/fs/ubifs/misc.h index 87dabf9fe74..87ced4c74a6 100644 --- a/fs/ubifs/misc.h +++ b/fs/ubifs/misc.h @@ -325,4 +325,21 @@ static inline struct timespec ubifs_current_time(struct inode *inode) current_fs_time(inode->i_sb) : CURRENT_TIME_SEC; } +/** + * ubifs_tnc_lookup - look up a file-system node. + * @c: UBIFS file-system description object + * @key: node key to lookup + * @node: the node is returned here + * + * This function look up and reads node with key @key. The caller has to make + * sure the @node buffer is large enough to fit the node. Returns zero in case + * of success, %-ENOENT if the node was not found, and a negative error code in + * case of failure. + */ +static inline int ubifs_tnc_lookup(struct ubifs_info *c, + const union ubifs_key *key, void *node) +{ + return ubifs_tnc_locate(c, key, node, NULL, NULL); +} + #endif /* __UBIFS_MISC_H__ */ diff --git a/fs/ubifs/tnc.c b/fs/ubifs/tnc.c index 4fbc5921688..7da209ab937 100644 --- a/fs/ubifs/tnc.c +++ b/fs/ubifs/tnc.c @@ -506,7 +506,7 @@ static int fallible_read_node(struct ubifs_info *c, const union ubifs_key *key, if (keys_cmp(c, key, &node_key) != 0) ret = 0; } - if (ret == 0) + if (ret == 0 && c->replaying) dbg_mnt("dangling branch LEB %d:%d len %d, key %s", zbr->lnum, zbr->offs, zbr->len, DBGKEY(key)); return ret; @@ -1382,50 +1382,39 @@ static int lookup_level0_dirty(struct ubifs_info *c, const union ubifs_key *key, } /** - * ubifs_tnc_lookup - look up a file-system node. + * maybe_leb_gced - determine if a LEB may have been garbage collected. * @c: UBIFS file-system description object - * @key: node key to lookup - * @node: the node is returned here + * @lnum: LEB number + * @gc_seq1: garbage collection sequence number * - * This function look up and reads node with key @key. The caller has to make - * sure the @node buffer is large enough to fit the node. Returns zero in case - * of success, %-ENOENT if the node was not found, and a negative error code in - * case of failure. + * This function determines if @lnum may have been garbage collected since + * sequence number @gc_seq1. If it may have been then %1 is returned, otherwise + * %0 is returned. */ -int ubifs_tnc_lookup(struct ubifs_info *c, const union ubifs_key *key, - void *node) +static int maybe_leb_gced(struct ubifs_info *c, int lnum, int gc_seq1) { - int found, n, err; - struct ubifs_znode *znode; - struct ubifs_zbranch zbr, *zt; - - mutex_lock(&c->tnc_mutex); - found = ubifs_lookup_level0(c, key, &znode, &n); - if (!found) { - err = -ENOENT; - goto out; - } else if (found < 0) { - err = found; - goto out; - } - zt = &znode->zbranch[n]; - if (is_hash_key(c, key)) { - /* - * In this case the leaf node cache gets used, so we pass the - * address of the zbranch and keep the mutex locked - */ - err = tnc_read_node_nm(c, zt, node); - goto out; - } - zbr = znode->zbranch[n]; - mutex_unlock(&c->tnc_mutex); - - err = ubifs_tnc_read_node(c, &zbr, node); - return err; + int gc_seq2, gced_lnum; -out: - mutex_unlock(&c->tnc_mutex); - return err; + gced_lnum = c->gced_lnum; + smp_rmb(); + gc_seq2 = c->gc_seq; + /* Same seq means no GC */ + if (gc_seq1 == gc_seq2) + return 0; + /* Different by more than 1 means we don't know */ + if (gc_seq1 + 1 != gc_seq2) + return 1; + /* + * We have seen the sequence number has increased by 1. Now we need to + * be sure we read the right LEB number, so read it again. + */ + smp_rmb(); + if (gced_lnum != c->gced_lnum) + return 1; + /* Finally we can check lnum */ + if (gced_lnum == lnum) + return 1; + return 0; } /** @@ -1436,16 +1425,19 @@ out: * @lnum: LEB number is returned here * @offs: offset is returned here * - * This function is the same as 'ubifs_tnc_lookup()' but it returns the node - * location also. See 'ubifs_tnc_lookup()'. + * This function look up and reads node with key @key. The caller has to make + * sure the @node buffer is large enough to fit the node. Returns zero in case + * of success, %-ENOENT if the node was not found, and a negative error code in + * case of failure. The node location can be returned in @lnum and @offs. */ int ubifs_tnc_locate(struct ubifs_info *c, const union ubifs_key *key, void *node, int *lnum, int *offs) { - int found, n, err; + int found, n, err, safely = 0, gc_seq1; struct ubifs_znode *znode; struct ubifs_zbranch zbr, *zt; +again: mutex_lock(&c->tnc_mutex); found = ubifs_lookup_level0(c, key, &znode, &n); if (!found) { @@ -1456,24 +1448,43 @@ int ubifs_tnc_locate(struct ubifs_info *c, const union ubifs_key *key, goto out; } zt = &znode->zbranch[n]; + if (lnum) { + *lnum = zt->lnum; + *offs = zt->offs; + } if (is_hash_key(c, key)) { /* * In this case the leaf node cache gets used, so we pass the * address of the zbranch and keep the mutex locked */ - *lnum = zt->lnum; - *offs = zt->offs; err = tnc_read_node_nm(c, zt, node); goto out; } + if (safely) { + err = ubifs_tnc_read_node(c, zt, node); + goto out; + } + /* Drop the TNC mutex prematurely and race with garbage collection */ zbr = znode->zbranch[n]; + gc_seq1 = c->gc_seq; mutex_unlock(&c->tnc_mutex); - *lnum = zbr.lnum; - *offs = zbr.offs; + if (ubifs_get_wbuf(c, zbr.lnum)) { + /* We do not GC journal heads */ + err = ubifs_tnc_read_node(c, &zbr, node); + return err; + } - err = ubifs_tnc_read_node(c, &zbr, node); - return err; + err = fallible_read_node(c, key, &zbr, node); + if (maybe_leb_gced(c, zbr.lnum, gc_seq1)) { + /* + * The node may have been GC'ed out from under us so try again + * while keeping the TNC mutex locked. + */ + safely = 1; + goto again; + } + return 0; out: mutex_unlock(&c->tnc_mutex); diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h index d7f706f7a30..7828d69ca4f 100644 --- a/fs/ubifs/ubifs.h +++ b/fs/ubifs/ubifs.h @@ -1028,6 +1028,8 @@ struct ubifs_mount_opts { * @sbuf: a buffer of LEB size used by GC and replay for scanning * @idx_gc: list of index LEBs that have been garbage collected * @idx_gc_cnt: number of elements on the idx_gc list + * @gc_seq: incremented for every non-index LEB garbage collected + * @gced_lnum: last non-index LEB that was garbage collected * * @infos_list: links all 'ubifs_info' objects * @umount_mutex: serializes shrinker and un-mount @@ -1257,6 +1259,8 @@ struct ubifs_info { void *sbuf; struct list_head idx_gc; int idx_gc_cnt; + volatile int gc_seq; + volatile int gced_lnum; struct list_head infos_list; struct mutex umount_mutex; @@ -1451,8 +1455,6 @@ int ubifs_save_dirty_idx_lnums(struct ubifs_info *c); /* tnc.c */ int ubifs_lookup_level0(struct ubifs_info *c, const union ubifs_key *key, struct ubifs_znode **zn, int *n); -int ubifs_tnc_lookup(struct ubifs_info *c, const union ubifs_key *key, - void *node); int ubifs_tnc_lookup_nm(struct ubifs_info *c, const union ubifs_key *key, void *node, const struct qstr *nm); int ubifs_tnc_locate(struct ubifs_info *c, const union ubifs_key *key, -- cgit v1.2.3 From 8191e1fa8131a422f4bf7b0f2dc1f8543fd17783 Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Fri, 22 Aug 2008 12:26:40 +0300 Subject: UBIFS: do not update min_idx_lebs in stafs This is bad because the rest of the code should not depend on it, and this may hide bugss, instead of revealing them. Signed-off-by: Artem Bityutskiy --- fs/ubifs/budget.c | 1 - 1 file changed, 1 deletion(-) (limited to 'fs') diff --git a/fs/ubifs/budget.c b/fs/ubifs/budget.c index 15409815747..ac0d2e1e73b 100644 --- a/fs/ubifs/budget.c +++ b/fs/ubifs/budget.c @@ -741,7 +741,6 @@ long long ubifs_budg_get_free_space(struct ubifs_info *c) available = ubifs_calc_available(c, min_idx_lebs); outstanding = c->budg_data_growth + c->budg_dd_growth; - c->min_idx_lebs = min_idx_lebs; spin_unlock(&c->space_lock); if (available > outstanding) -- cgit v1.2.3 From 9e5de3549615818cae9c20a0ee1fd3ad4a747758 Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Mon, 25 Aug 2008 17:29:43 +0300 Subject: UBIFS: push empty flash hack down We have a hack which forces the amount of flash space to be equivalent to 'c->blocks_cnt' in case of empty FS. This is to make users happy and see '%0' used in 'df' when they mount an empty FS. This hack is not needed in 'ubifs_calc_available()', but it is only needed the caller, in 'ubifs_budg_get_free_space()'. So push it down there. Signed-off-by: Artem Bityutskiy --- fs/ubifs/budget.c | 24 +++++++++++------------- fs/ubifs/super.c | 2 -- 2 files changed, 11 insertions(+), 15 deletions(-) (limited to 'fs') diff --git a/fs/ubifs/budget.c b/fs/ubifs/budget.c index ac0d2e1e73b..f6d2eaa7a06 100644 --- a/fs/ubifs/budget.c +++ b/fs/ubifs/budget.c @@ -302,18 +302,6 @@ long long ubifs_calc_available(const struct ubifs_info *c, int min_idx_lebs) int subtract_lebs; long long available; - /* - * Force the amount available to the total size reported if the used - * space is zero. - */ - if (c->lst.total_used <= UBIFS_INO_NODE_SZ && - c->budg_data_growth + c->budg_dd_growth == 0) { - /* Do the same calculation as for c->block_cnt */ - available = c->main_lebs - 2; - available *= c->leb_size - c->dark_wm; - return available; - } - available = c->main_bytes - c->lst.total_used; /* @@ -739,8 +727,18 @@ long long ubifs_budg_get_free_space(struct ubifs_info *c) return 0; } - available = ubifs_calc_available(c, min_idx_lebs); outstanding = c->budg_data_growth + c->budg_dd_growth; + + /* + * Force the amount available to the total size reported if the used + * space is zero. + */ + if (c->lst.total_used <= UBIFS_INO_NODE_SZ && !outstanding) { + spin_unlock(&c->space_lock); + return (long long)c->block_cnt << UBIFS_BLOCK_SHIFT; + } + + available = ubifs_calc_available(c, min_idx_lebs); spin_unlock(&c->space_lock); if (available > outstanding) diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c index f71e6b8822c..1018053519e 100644 --- a/fs/ubifs/super.c +++ b/fs/ubifs/super.c @@ -649,8 +649,6 @@ static int init_constants_late(struct ubifs_info *c) * * Subtract the LEB reserved for GC and the LEB which is reserved for * deletions. - * - * Review 'ubifs_calc_available()' if changing this calculation. */ tmp64 = c->main_lebs - 2; tmp64 *= (uint64_t)c->leb_size - c->dark_wm; -- cgit v1.2.3 From 8aabb75017291ba68c09ff5fdb998ef0a1fdaaf9 Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Mon, 25 Aug 2008 16:02:31 +0300 Subject: UBIFS: remove incorrect index space check When we report free space to user-space, we should not report 0 if the amount of empty LEBs is too low, because they would be produced by GC when needed. Thus, just call 'ubifs_calc_available()' straight away which would take 'min_idx_lebs' into account anyway. Signed-off-by: Artem Bityutskiy --- fs/ubifs/budget.c | 15 +-------------- 1 file changed, 1 insertion(+), 14 deletions(-) (limited to 'fs') diff --git a/fs/ubifs/budget.c b/fs/ubifs/budget.c index f6d2eaa7a06..9ef630a594c 100644 --- a/fs/ubifs/budget.c +++ b/fs/ubifs/budget.c @@ -709,24 +709,11 @@ void ubifs_release_dirty_inode_budget(struct ubifs_info *c, */ long long ubifs_budg_get_free_space(struct ubifs_info *c) { - int min_idx_lebs, rsvd_idx_lebs; + int min_idx_lebs; long long available, outstanding, free; - /* Do exactly the same calculations as in 'do_budget_space()' */ spin_lock(&c->space_lock); min_idx_lebs = ubifs_calc_min_idx_lebs(c); - - if (min_idx_lebs > c->lst.idx_lebs) - rsvd_idx_lebs = min_idx_lebs - c->lst.idx_lebs; - else - rsvd_idx_lebs = 0; - - if (rsvd_idx_lebs > c->lst.empty_lebs + c->freeable_cnt + c->idx_gc_cnt - - c->lst.taken_empty_lebs) { - spin_unlock(&c->space_lock); - return 0; - } - outstanding = c->budg_data_growth + c->budg_dd_growth; /* -- cgit v1.2.3 From 4b5f2762ec914c9dfd0e9d2377c0574f2ee9a8f9 Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Mon, 25 Aug 2008 16:15:56 +0300 Subject: UBIFS: improve statfs reporting Make free space calculation less pessimistic and more realistic, which in turn improves 'statfs()' reports. Now it lies by 10%-20%, instead of 20%-30% (10% more honest). Results of "freespace" test (120MiB volume, 16KiB LEB size, 512 bytes page size). Before the change: freespace: Test 1: fill the space we have 3 times freespace: was free: 78274560 bytes 74.6 MiB, wrote: 96489472 bytes 92.0 MiB, delta: 18214912 bytes 17.4 MiB, wrote 23.3% more than predicted freespace: was free: 76754944 bytes 73.2 MiB, wrote: 96493568 bytes 92.0 MiB, delta: 19738624 bytes 18.8 MiB, wrote 25.7% more than predicted freespace: was free: 76759040 bytes 73.2 MiB, wrote: 96489472 bytes 92.0 MiB, delta: 19730432 bytes 18.8 MiB, wrote 25.7% more than predicted freespace: Test 1 finished freespace: Test 2: gradually lessen amount of free space and fill the FS freespace: do 10 steps, lessen free space by 6977722 bytes 6.7 MiB each time freespace: was free: 72273920 bytes 68.9 MiB, wrote: 88891392 bytes 84.8 MiB, delta: 16617472 bytes 15.8 MiB, wrote 23.0% more than predicted freespace: was free: 66154496 bytes 63.1 MiB, wrote: 81506304 bytes 77.7 MiB, delta: 15351808 bytes 14.6 MiB, wrote 23.2% more than predicted freespace: was free: 58732544 bytes 56.0 MiB, wrote: 72572928 bytes 69.2 MiB, delta: 13840384 bytes 13.2 MiB, wrote 23.6% more than predicted freespace: was free: 51552256 bytes 49.2 MiB, wrote: 63754240 bytes 60.8 MiB, delta: 12201984 bytes 11.6 MiB, wrote 23.7% more than predicted freespace: was free: 44404736 bytes 42.3 MiB, wrote: 54943744 bytes 52.4 MiB, delta: 10539008 bytes 10.1 MiB, wrote 23.7% more than predicted freespace: was free: 37285888 bytes 35.6 MiB, wrote: 46161920 bytes 44.0 MiB, delta: 8876032 bytes 8.5 MiB, wrote 23.8% more than predicted freespace: was free: 30171136 bytes 28.8 MiB, wrote: 37384192 bytes 35.7 MiB, delta: 7213056 bytes 6.9 MiB, wrote 23.9% more than predicted freespace: was free: 23048192 bytes 22.0 MiB, wrote: 28606464 bytes 27.3 MiB, delta: 5558272 bytes 5.3 MiB, wrote 24.1% more than predicted freespace: was free: 15941632 bytes 15.2 MiB, wrote: 19828736 bytes 18.9 MiB, delta: 3887104 bytes 3.7 MiB, wrote 24.4% more than predicted freespace: was free: 8830976 bytes 8.4 MiB, wrote: 11063296 bytes 10.6 MiB, delta: 2232320 bytes 2.1 MiB, wrote 25.3% more than predicted freespace: Test 2 finished freespace: Test 3: gradually lessen amount of free space by trashing and fill the FS freespace: do 10 steps, lessen free space by 6985541 bytes 6.7 MiB each time freespace: trashing: was free: 76840960 bytes 73.3 MiB, need free: 6985550 bytes 6.7 MiB, files created: 248311, delete 225737 (90.9% of them) freespace: was free: 65228800 bytes 62.2 MiB, wrote: 82530304 bytes 78.7 MiB, delta: 17301504 bytes 16.5 MiB, wrote 26.5% more than predicted freespace: trashing: was free: 74485760 bytes 71.0 MiB, need free: 13971091 bytes 13.3 MiB, files created: 248712, delete 202061 (81.2% of them) freespace: was free: 55025664 bytes 52.5 MiB, wrote: 71925760 bytes 68.6 MiB, delta: 16900096 bytes 16.1 MiB, wrote 30.7% more than predicted freespace: trashing: was free: 75550720 bytes 72.1 MiB, need free: 20956632 bytes 20.0 MiB, files created: 248849, delete 179822 (72.3% of them) freespace: was free: 46669824 bytes 44.5 MiB, wrote: 63197184 bytes 60.3 MiB, delta: 16527360 bytes 15.8 MiB, wrote 35.4% more than predicted freespace: trashing: was free: 76214272 bytes 72.7 MiB, need free: 27942173 bytes 26.6 MiB, files created: 248789, delete 157576 (63.3% of them) freespace: was free: 39129088 bytes 37.3 MiB, wrote: 55164928 bytes 52.6 MiB, delta: 16035840 bytes 15.3 MiB, wrote 41.0% more than predicted freespace: trashing: was free: 77398016 bytes 73.8 MiB, need free: 34927714 bytes 33.3 MiB, files created: 248711, delete 136474 (54.9% of them) freespace: was free: 32325632 bytes 30.8 MiB, wrote: 48234496 bytes 46.0 MiB, delta: 15908864 bytes 15.2 MiB, wrote 49.2% more than predicted freespace: trashing: was free: 75796480 bytes 72.3 MiB, need free: 41913255 bytes 40.0 MiB, files created: 248674, delete 111164 (44.7% of them) freespace: was free: 25079808 bytes 23.9 MiB, wrote: 40775680 bytes 38.9 MiB, delta: 15695872 bytes 15.0 MiB, wrote 62.6% more than predicted freespace: trashing: was free: 78209024 bytes 74.6 MiB, need free: 48898796 bytes 46.6 MiB, files created: 248708, delete 93207 (37.5% of them) freespace: was free: 20582400 bytes 19.6 MiB, wrote: 34844672 bytes 33.2 MiB, delta: 14262272 bytes 13.6 MiB, wrote 69.3% more than predicted freespace: trashing: was free: 77328384 bytes 73.7 MiB, need free: 55884337 bytes 53.3 MiB, files created: 248644, delete 68951 (27.7% of them) freespace: was free: 14368768 bytes 13.7 MiB, wrote: 28278784 bytes 27.0 MiB, delta: 13910016 bytes 13.3 MiB, wrote 96.8% more than predicted freespace: trashing: was free: 77434880 bytes 73.8 MiB, need free: 62869878 bytes 60.0 MiB, files created: 248640, delete 46767 (18.8% of them) freespace: was free: 8286208 bytes 7.9 MiB, wrote: 21811200 bytes 20.8 MiB, delta: 13524992 bytes 12.9 MiB, wrote 163.2% more than predicted freespace: trashing: was free: 77856768 bytes 74.2 MiB, need free: 69855419 bytes 66.6 MiB, files created: 248576, delete 25546 (10.3% of them) freespace: was free: 5570560 bytes 5.3 MiB, wrote: 8187904 bytes 7.8 MiB, delta: 2617344 bytes 2.5 MiB, wrote 47.0% more than predicted freespace: Test 3 finished freespace: finished successfully After the change: freespace: Test 1: fill the space we have 3 times freespace: was free: 85204992 bytes 81.3 MiB, wrote: 96489472 bytes 92.0 MiB, delta: 11284480 bytes 10.8 MiB, wrote 13.2% more than predicted freespace: was free: 83554304 bytes 79.7 MiB, wrote: 96489472 bytes 92.0 MiB, delta: 12935168 bytes 12.3 MiB, wrote 15.5% more than predicted freespace: was free: 83554304 bytes 79.7 MiB, wrote: 96493568 bytes 92.0 MiB, delta: 12939264 bytes 12.3 MiB, wrote 15.5% more than predicted freespace: Test 1 finished freespace: Test 2: gradually lessen amount of free space and fill the FS freespace: do 10 steps, lessen free space by 7596218 bytes 7.2 MiB each time freespace: was free: 78675968 bytes 75.0 MiB, wrote: 88903680 bytes 84.8 MiB, delta: 10227712 bytes 9.8 MiB, wrote 13.0% more than predicted freespace: was free: 72015872 bytes 68.7 MiB, wrote: 81514496 bytes 77.7 MiB, delta: 9498624 bytes 9.1 MiB, wrote 13.2% more than predicted freespace: was free: 63938560 bytes 61.0 MiB, wrote: 72589312 bytes 69.2 MiB, delta: 8650752 bytes 8.2 MiB, wrote 13.5% more than predicted freespace: was free: 56127488 bytes 53.5 MiB, wrote: 63762432 bytes 60.8 MiB, delta: 7634944 bytes 7.3 MiB, wrote 13.6% more than predicted freespace: was free: 48336896 bytes 46.1 MiB, wrote: 54935552 bytes 52.4 MiB, delta: 6598656 bytes 6.3 MiB, wrote 13.7% more than predicted freespace: was free: 40587264 bytes 38.7 MiB, wrote: 46157824 bytes 44.0 MiB, delta: 5570560 bytes 5.3 MiB, wrote 13.7% more than predicted freespace: was free: 32841728 bytes 31.3 MiB, wrote: 37384192 bytes 35.7 MiB, delta: 4542464 bytes 4.3 MiB, wrote 13.8% more than predicted freespace: was free: 25100288 bytes 23.9 MiB, wrote: 28618752 bytes 27.3 MiB, delta: 3518464 bytes 3.4 MiB, wrote 14.0% more than predicted freespace: was free: 17342464 bytes 16.5 MiB, wrote: 19841024 bytes 18.9 MiB, delta: 2498560 bytes 2.4 MiB, wrote 14.4% more than predicted freespace: was free: 9605120 bytes 9.2 MiB, wrote: 11063296 bytes 10.6 MiB, delta: 1458176 bytes 1.4 MiB, wrote 15.2% more than predicted freespace: Test 2 finished freespace: Test 3: gradually lessen amount of free space by trashing and fill the FS freespace: do 10 steps, lessen free space by 7606272 bytes 7.3 MiB each time freespace: trashing: was free: 83668992 bytes 79.8 MiB, need free: 7606272 bytes 7.3 MiB, files created: 248297, delete 225724 (90.9% of them) freespace: was free: 70803456 bytes 67.5 MiB, wrote: 82485248 bytes 78.7 MiB, delta: 11681792 bytes 11.1 MiB, wrote 16.5% more than predicted freespace: trashing: was free: 81080320 bytes 77.3 MiB, need free: 15212544 bytes 14.5 MiB, files created: 248711, delete 202047 (81.2% of them) freespace: was free: 59867136 bytes 57.1 MiB, wrote: 71897088 bytes 68.6 MiB, delta: 12029952 bytes 11.5 MiB, wrote 20.1% more than predicted freespace: trashing: was free: 82243584 bytes 78.4 MiB, need free: 22818816 bytes 21.8 MiB, files created: 248866, delete 179817 (72.3% of them) freespace: was free: 50905088 bytes 48.5 MiB, wrote: 63168512 bytes 60.2 MiB, delta: 12263424 bytes 11.7 MiB, wrote 24.1% more than predicted freespace: trashing: was free: 83402752 bytes 79.5 MiB, need free: 30425088 bytes 29.0 MiB, files created: 248920, delete 158114 (63.5% of them) freespace: was free: 42651648 bytes 40.7 MiB, wrote: 55406592 bytes 52.8 MiB, delta: 12754944 bytes 12.2 MiB, wrote 29.9% more than predicted freespace: trashing: was free: 84402176 bytes 80.5 MiB, need free: 38031360 bytes 36.3 MiB, files created: 248709, delete 136641 (54.9% of them) freespace: was free: 35233792 bytes 33.6 MiB, wrote: 48250880 bytes 46.0 MiB, delta: 13017088 bytes 12.4 MiB, wrote 36.9% more than predicted freespace: trashing: was free: 82530304 bytes 78.7 MiB, need free: 45637632 bytes 43.5 MiB, files created: 248778, delete 111208 (44.7% of them) freespace: was free: 27287552 bytes 26.0 MiB, wrote: 40267776 bytes 38.4 MiB, delta: 12980224 bytes 12.4 MiB, wrote 47.6% more than predicted freespace: trashing: was free: 85114880 bytes 81.2 MiB, need free: 53243904 bytes 50.8 MiB, files created: 248508, delete 93052 (37.4% of them) freespace: was free: 22437888 bytes 21.4 MiB, wrote: 35328000 bytes 33.7 MiB, delta: 12890112 bytes 12.3 MiB, wrote 57.4% more than predicted freespace: trashing: was free: 84103168 bytes 80.2 MiB, need free: 60850176 bytes 58.0 MiB, files created: 248637, delete 68743 (27.6% of them) freespace: was free: 15536128 bytes 14.8 MiB, wrote: 28319744 bytes 27.0 MiB, delta: 12783616 bytes 12.2 MiB, wrote 82.3% more than predicted freespace: trashing: was free: 84357120 bytes 80.4 MiB, need free: 68456448 bytes 65.3 MiB, files created: 248567, delete 46852 (18.8% of them) freespace: was free: 9015296 bytes 8.6 MiB, wrote: 22044672 bytes 21.0 MiB, delta: 13029376 bytes 12.4 MiB, wrote 144.5% more than predicted freespace: trashing: was free: 84942848 bytes 81.0 MiB, need free: 76062720 bytes 72.5 MiB, files created: 248636, delete 25993 (10.5% of them) freespace: was free: 6086656 bytes 5.8 MiB, wrote: 8331264 bytes 7.9 MiB, delta: 2244608 bytes 2.1 MiB, wrote 36.9% more than predicted freespace: Test 3 finished freespace: finished successfully Signed-off-by: Artem Bityutskiy --- fs/ubifs/budget.c | 45 +++++++++++++++++++++++++++++++++++++++++++++ fs/ubifs/misc.h | 32 -------------------------------- fs/ubifs/ubifs.h | 1 + 3 files changed, 46 insertions(+), 32 deletions(-) (limited to 'fs') diff --git a/fs/ubifs/budget.c b/fs/ubifs/budget.c index 9ef630a594c..7851480a6ce 100644 --- a/fs/ubifs/budget.c +++ b/fs/ubifs/budget.c @@ -701,6 +701,51 @@ void ubifs_release_dirty_inode_budget(struct ubifs_info *c, ubifs_release_budget(c, &req); } +/** + * ubifs_reported_space - calculate reported free space. + * @c: the UBIFS file-system description object + * @free: amount of free space + * + * This function calculates amount of free space which will be reported to + * user-space. User-space application tend to expect that if the file-system + * (e.g., via the 'statfs()' call) reports that it has N bytes available, they + * are able to write a file of size N. UBIFS attaches node headers to each data + * node and it has to write indexind nodes as well. This introduces additional + * overhead, and UBIFS it has to report sligtly less free space to meet the + * above expectetion. + * + * This function assumes free space is made up of uncompressed data nodes and + * full index nodes (one per data node, tripled because we always allow enough + * space to write the index thrice). + * + * Note, the calculation is pessimistic, which means that most of the time + * UBIFS reports less space than it actually has. + */ +long long ubifs_reported_space(const struct ubifs_info *c, uint64_t free) +{ + int divisor, factor; + + /* + * Reported space size is @free * X, where X is UBIFS block size + * divided by UBIFS block size + all overhead one data block + * introduces. The overhead is the node header + indexing overhead. + * + * Indexing overhead is calculations are based on the following + * formula: I = N/(f - 1) + 1, where I - number of indexing nodes, N - + * number of data nodes, f - fanout. Because effective UBIFS fanout is + * twice as less than maximum fanout, we assume that each data node + * introduces 3 * @c->max_idx_node_sz / (@c->fanout/2 - 1) bytes. + * Note, the multiplier 3 is because UBIFS reseves thrice as more space + * for the index. + */ + factor = UBIFS_BLOCK_SIZE; + divisor = UBIFS_MAX_DATA_NODE_SZ; + divisor += (c->max_idx_node_sz * 3) / ((c->fanout >> 1) - 1); + free *= factor; + do_div(free, divisor); + return free; +} + /** * ubifs_budg_get_free_space - return amount of free space. * @c: UBIFS file-system description object diff --git a/fs/ubifs/misc.h b/fs/ubifs/misc.h index 87ced4c74a6..4c12a9215d7 100644 --- a/fs/ubifs/misc.h +++ b/fs/ubifs/misc.h @@ -283,38 +283,6 @@ static inline void *ubifs_idx_key(const struct ubifs_info *c, return (void *)((struct ubifs_branch *)idx->branches)->key; } -/** - * ubifs_reported_space - calculate reported free space. - * @c: the UBIFS file-system description object - * @free: amount of free space - * - * This function calculates amount of free space which will be reported to - * user-space. User-space application tend to expect that if the file-system - * (e.g., via the 'statfs()' call) reports that it has N bytes available, they - * are able to write a file of size N. UBIFS attaches node headers to each data - * node and it has to write indexind nodes as well. This introduces additional - * overhead, and UBIFS it has to report sligtly less free space to meet the - * above expectetion. - * - * This function assumes free space is made up of uncompressed data nodes and - * full index nodes (one per data node, doubled because we always allow enough - * space to write the index twice). - * - * Note, the calculation is pessimistic, which means that most of the time - * UBIFS reports less space than it actually has. - */ -static inline long long ubifs_reported_space(const struct ubifs_info *c, - uint64_t free) -{ - int divisor, factor; - - divisor = UBIFS_MAX_DATA_NODE_SZ + (c->max_idx_node_sz * 3); - factor = UBIFS_MAX_DATA_NODE_SZ - UBIFS_DATA_NODE_SZ; - do_div(free, divisor); - - return free * factor; -} - /** * ubifs_current_time - round current time to time granularity. * @inode: inode diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h index 7828d69ca4f..681d46e1628 100644 --- a/fs/ubifs/ubifs.h +++ b/fs/ubifs/ubifs.h @@ -1441,6 +1441,7 @@ void ubifs_cancel_ino_op(struct ubifs_info *c, struct inode *inode, long long ubifs_budg_get_free_space(struct ubifs_info *c); int ubifs_calc_min_idx_lebs(struct ubifs_info *c); void ubifs_convert_page_budget(struct ubifs_info *c); +long long ubifs_reported_space(const struct ubifs_info *c, uint64_t free); long long ubifs_calc_available(const struct ubifs_info *c, int min_idx_lebs); /* find.c */ -- cgit v1.2.3 From ad507653a39e0d27404291e5d813683265388a20 Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Mon, 25 Aug 2008 18:32:57 +0300 Subject: UBIFS: fix assertion The assertion was incorrect, because it did not take into account free space. This patch also amends the comments correspondingly, and cleans them up a little. Signed-off-by: Artem Bityutskiy --- fs/ubifs/find.c | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) (limited to 'fs') diff --git a/fs/ubifs/find.c b/fs/ubifs/find.c index adee7b5ddea..9fc55ae7b03 100644 --- a/fs/ubifs/find.c +++ b/fs/ubifs/find.c @@ -211,14 +211,8 @@ static const struct ubifs_lprops *scan_for_dirty(struct ubifs_info *c, * dirty index heap, and it falls-back to LPT scanning if the heaps are empty * or do not have an LEB which satisfies the @min_space criteria. * - * Note: - * o LEBs which have less than dead watermark of dirty space are never picked - * by this function; - * - * Returns zero and the LEB properties of - * found dirty LEB in case of success, %-ENOSPC if no dirty LEB was found and a - * negative error code in case of other failures. The returned LEB is marked as - * "taken". + * Note, LEBs which have less than dead watermark of free + dirty space are + * never picked by this function. * * The additional @pick_free argument controls if this function has to return a * free or freeable LEB if one is present. For example, GC must to set it to %1, @@ -231,6 +225,10 @@ static const struct ubifs_lprops *scan_for_dirty(struct ubifs_info *c, * * In addition @pick_free is set to %2 by the recovery process in order to * recover gc_lnum in which case an index LEB must not be returned. + * + * This function returns zero and the LEB properties of found dirty LEB in case + * of success, %-ENOSPC if no dirty LEB was found and a negative error code in + * case of other failures. The returned LEB is marked as "taken". */ int ubifs_find_dirty_leb(struct ubifs_info *c, struct ubifs_lprops *ret_lp, int min_space, int pick_free) @@ -317,7 +315,7 @@ int ubifs_find_dirty_leb(struct ubifs_info *c, struct ubifs_lprops *ret_lp, lp = idx_lp; if (lp) { - ubifs_assert(lp->dirty >= c->dead_wm); + ubifs_assert(lp->free + lp->dirty >= c->dead_wm); goto found; } -- cgit v1.2.3 From 131130b9a1e6e523c64b34137b14f88ae1382a6a Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Mon, 25 Aug 2008 18:34:45 +0300 Subject: UBIFS: add forgotten gc_idx_lebs component We add this component at other similar places, but not in this one. Signed-off-by: Artem Bityutskiy --- fs/ubifs/find.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/ubifs/find.c b/fs/ubifs/find.c index 9fc55ae7b03..e045c8b5542 100644 --- a/fs/ubifs/find.c +++ b/fs/ubifs/find.c @@ -243,7 +243,7 @@ int ubifs_find_dirty_leb(struct ubifs_info *c, struct ubifs_lprops *ret_lp, int lebs, rsvd_idx_lebs = 0; spin_lock(&c->space_lock); - lebs = c->lst.empty_lebs; + lebs = c->lst.empty_lebs + c->idx_gc_cnt; lebs += c->freeable_cnt - c->lst.taken_empty_lebs; /* -- cgit v1.2.3 From 9bbb5726efb64e2cfed42f6eec07db80cd87e63b Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Fri, 22 Aug 2008 18:23:22 +0300 Subject: UBIFS: introduce LEB overhead This is a preparational patch for the following statfs() report fix. Signed-off-by: Artem Bityutskiy --- fs/ubifs/super.c | 6 ++++++ fs/ubifs/ubifs.h | 5 +++++ 2 files changed, 11 insertions(+) (limited to 'fs') diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c index 1018053519e..be23fd3cfd8 100644 --- a/fs/ubifs/super.c +++ b/fs/ubifs/super.c @@ -530,6 +530,12 @@ static int init_constants_early(struct ubifs_info *c) c->dead_wm = ALIGN(MIN_WRITE_SZ, c->min_io_size); c->dark_wm = ALIGN(UBIFS_MAX_NODE_SZ, c->min_io_size); + /* + * Calculate how many bytes would be wasted at the end of LEB if it was + * fully filled with data nodes of maximum size. This is used in + * calculations when reporting free space. + */ + c->leb_overhead = c->leb_size % UBIFS_MAX_DATA_NODE_SZ; return 0; } diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h index 681d46e1628..57e58541de2 100644 --- a/fs/ubifs/ubifs.h +++ b/fs/ubifs/ubifs.h @@ -995,6 +995,9 @@ struct ubifs_mount_opts { * @max_idx_node_sz: maximum indexing node aligned on 8-bytes boundary * @max_inode_sz: maximum possible inode size in bytes * @max_znode_sz: size of znode in bytes + * + * @leb_overhead: how many bytes are wasted in an LEB when it is filled with + * data nodes of maximum size - used in free space reporting * @dead_wm: LEB dead space watermark * @dark_wm: LEB dark space watermark * @block_cnt: count of 4KiB blocks on the FS @@ -1226,6 +1229,8 @@ struct ubifs_info { int max_idx_node_sz; long long max_inode_sz; int max_znode_sz; + + int leb_overhead; int dead_wm; int dark_wm; int block_cnt; -- cgit v1.2.3 From 7dad181bbe58b8fe9e170da28bcd5f6ec9addd6d Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Mon, 25 Aug 2008 18:58:19 +0300 Subject: UBIFS: improve statfs reporting even more Since free space we report in statfs is file size which should fit to the FS - change the way we calculate free space and use leb_overhead instead of dark_wm in calculations. Results of "freespace" test (120MiB volume, 16KiB LEB size, 512 bytes page size). Before the change: freespace: Test 1: fill the space we have 3 times freespace: was free: 85204992 bytes 81.3 MiB, wrote: 96489472 bytes 92.0 MiB, delta: 11284480 bytes 10.8 MiB, wrote 13.2% more than predicted freespace: was free: 83554304 bytes 79.7 MiB, wrote: 96489472 bytes 92.0 MiB, delta: 12935168 bytes 12.3 MiB, wrote 15.5% more than predicted freespace: was free: 83554304 bytes 79.7 MiB, wrote: 96493568 bytes 92.0 MiB, delta: 12939264 bytes 12.3 MiB, wrote 15.5% more than predicted freespace: Test 1 finished freespace: Test 2: gradually lessen amount of free space and fill the FS freespace: do 10 steps, lessen free space by 7596218 bytes 7.2 MiB each time freespace: was free: 78675968 bytes 75.0 MiB, wrote: 88903680 bytes 84.8 MiB, delta: 10227712 bytes 9.8 MiB, wrote 13.0% more than predicted freespace: was free: 72015872 bytes 68.7 MiB, wrote: 81514496 bytes 77.7 MiB, delta: 9498624 bytes 9.1 MiB, wrote 13.2% more than predicted freespace: was free: 63938560 bytes 61.0 MiB, wrote: 72589312 bytes 69.2 MiB, delta: 8650752 bytes 8.2 MiB, wrote 13.5% more than predicted freespace: was free: 56127488 bytes 53.5 MiB, wrote: 63762432 bytes 60.8 MiB, delta: 7634944 bytes 7.3 MiB, wrote 13.6% more than predicted freespace: was free: 48336896 bytes 46.1 MiB, wrote: 54935552 bytes 52.4 MiB, delta: 6598656 bytes 6.3 MiB, wrote 13.7% more than predicted freespace: was free: 40587264 bytes 38.7 MiB, wrote: 46157824 bytes 44.0 MiB, delta: 5570560 bytes 5.3 MiB, wrote 13.7% more than predicted freespace: was free: 32841728 bytes 31.3 MiB, wrote: 37384192 bytes 35.7 MiB, delta: 4542464 bytes 4.3 MiB, wrote 13.8% more than predicted freespace: was free: 25100288 bytes 23.9 MiB, wrote: 28618752 bytes 27.3 MiB, delta: 3518464 bytes 3.4 MiB, wrote 14.0% more than predicted freespace: was free: 17342464 bytes 16.5 MiB, wrote: 19841024 bytes 18.9 MiB, delta: 2498560 bytes 2.4 MiB, wrote 14.4% more than predicted freespace: was free: 9605120 bytes 9.2 MiB, wrote: 11063296 bytes 10.6 MiB, delta: 1458176 bytes 1.4 MiB, wrote 15.2% more than predicted freespace: Test 2 finished freespace: Test 3: gradually lessen amount of free space by trashing and fill the FS freespace: do 10 steps, lessen free space by 7606272 bytes 7.3 MiB each time freespace: trashing: was free: 83668992 bytes 79.8 MiB, need free: 7606272 bytes 7.3 MiB, files created: 248297, delete 225724 (90.9% of them) freespace: was free: 70803456 bytes 67.5 MiB, wrote: 82485248 bytes 78.7 MiB, delta: 11681792 bytes 11.1 MiB, wrote 16.5% more than predicted freespace: trashing: was free: 81080320 bytes 77.3 MiB, need free: 15212544 bytes 14.5 MiB, files created: 248711, delete 202047 (81.2% of them) freespace: was free: 59867136 bytes 57.1 MiB, wrote: 71897088 bytes 68.6 MiB, delta: 12029952 bytes 11.5 MiB, wrote 20.1% more than predicted freespace: trashing: was free: 82243584 bytes 78.4 MiB, need free: 22818816 bytes 21.8 MiB, files created: 248866, delete 179817 (72.3% of them) freespace: was free: 50905088 bytes 48.5 MiB, wrote: 63168512 bytes 60.2 MiB, delta: 12263424 bytes 11.7 MiB, wrote 24.1% more than predicted freespace: trashing: was free: 83402752 bytes 79.5 MiB, need free: 30425088 bytes 29.0 MiB, files created: 248920, delete 158114 (63.5% of them) freespace: was free: 42651648 bytes 40.7 MiB, wrote: 55406592 bytes 52.8 MiB, delta: 12754944 bytes 12.2 MiB, wrote 29.9% more than predicted freespace: trashing: was free: 84402176 bytes 80.5 MiB, need free: 38031360 bytes 36.3 MiB, files created: 248709, delete 136641 (54.9% of them) freespace: was free: 35233792 bytes 33.6 MiB, wrote: 48250880 bytes 46.0 MiB, delta: 13017088 bytes 12.4 MiB, wrote 36.9% more than predicted freespace: trashing: was free: 82530304 bytes 78.7 MiB, need free: 45637632 bytes 43.5 MiB, files created: 248778, delete 111208 (44.7% of them) freespace: was free: 27287552 bytes 26.0 MiB, wrote: 40267776 bytes 38.4 MiB, delta: 12980224 bytes 12.4 MiB, wrote 47.6% more than predicted freespace: trashing: was free: 85114880 bytes 81.2 MiB, need free: 53243904 bytes 50.8 MiB, files created: 248508, delete 93052 (37.4% of them) freespace: was free: 22437888 bytes 21.4 MiB, wrote: 35328000 bytes 33.7 MiB, delta: 12890112 bytes 12.3 MiB, wrote 57.4% more than predicted freespace: trashing: was free: 84103168 bytes 80.2 MiB, need free: 60850176 bytes 58.0 MiB, files created: 248637, delete 68743 (27.6% of them) freespace: was free: 15536128 bytes 14.8 MiB, wrote: 28319744 bytes 27.0 MiB, delta: 12783616 bytes 12.2 MiB, wrote 82.3% more than predicted freespace: trashing: was free: 84357120 bytes 80.4 MiB, need free: 68456448 bytes 65.3 MiB, files created: 248567, delete 46852 (18.8% of them) freespace: was free: 9015296 bytes 8.6 MiB, wrote: 22044672 bytes 21.0 MiB, delta: 13029376 bytes 12.4 MiB, wrote 144.5% more than predicted freespace: trashing: was free: 84942848 bytes 81.0 MiB, need free: 76062720 bytes 72.5 MiB, files created: 248636, delete 25993 (10.5% of them) freespace: was free: 6086656 bytes 5.8 MiB, wrote: 8331264 bytes 7.9 MiB, delta: 2244608 bytes 2.1 MiB, wrote 36.9% more than predicted freespace: Test 3 finished freespace: finished successfully After the change: freespace: Test 1: fill the space we have 3 times freespace: was free: 94048256 bytes 89.7 MiB, wrote: 96489472 bytes 92.0 MiB, delta: 2441216 bytes 2.3 MiB, wrote 2.6% more than predicted freespace: was free: 92246016 bytes 88.0 MiB, wrote: 96493568 bytes 92.0 MiB, delta: 4247552 bytes 4.1 MiB, wrote 4.6% more than predicted freespace: was free: 92254208 bytes 88.0 MiB, wrote: 96489472 bytes 92.0 MiB, delta: 4235264 bytes 4.0 MiB, wrote 4.6% more than predicted freespace: Test 1 finished freespace: Test 2: gradually lessen amount of free space and fill the FS freespace: do 10 steps, lessen free space by 8386001 bytes 8.0 MiB each time freespace: was free: 86605824 bytes 82.6 MiB, wrote: 88252416 bytes 84.2 MiB, delta: 1646592 bytes 1.6 MiB, wrote 1.9% more than predicted freespace: was free: 78667776 bytes 75.0 MiB, wrote: 80715776 bytes 77.0 MiB, delta: 2048000 bytes 2.0 MiB, wrote 2.6% more than predicted freespace: was free: 69615616 bytes 66.4 MiB, wrote: 71630848 bytes 68.3 MiB, delta: 2015232 bytes 1.9 MiB, wrote 2.9% more than predicted freespace: was free: 61018112 bytes 58.2 MiB, wrote: 62783488 bytes 59.9 MiB, delta: 1765376 bytes 1.7 MiB, wrote 2.9% more than predicted freespace: was free: 52424704 bytes 50.0 MiB, wrote: 53968896 bytes 51.5 MiB, delta: 1544192 bytes 1.5 MiB, wrote 2.9% more than predicted freespace: was free: 43880448 bytes 41.8 MiB, wrote: 45199360 bytes 43.1 MiB, delta: 1318912 bytes 1.3 MiB, wrote 3.0% more than predicted freespace: was free: 35332096 bytes 33.7 MiB, wrote: 36425728 bytes 34.7 MiB, delta: 1093632 bytes 1.0 MiB, wrote 3.1% more than predicted freespace: was free: 26771456 bytes 25.5 MiB, wrote: 27643904 bytes 26.4 MiB, delta: 872448 bytes 852.0 KiB, wrote 3.3% more than predicted freespace: was free: 18231296 bytes 17.4 MiB, wrote: 18878464 bytes 18.0 MiB, delta: 647168 bytes 632.0 KiB, wrote 3.5% more than predicted freespace: was free: 9674752 bytes 9.2 MiB, wrote: 10088448 bytes 9.6 MiB, delta: 413696 bytes 404.0 KiB, wrote 4.3% more than predicted freespace: Test 2 finished freespace: Test 3: gradually lessen amount of free space by trashing and fill the FS freespace: do 10 steps, lessen free space by 8397544 bytes 8.0 MiB each time freespace: trashing: was free: 92372992 bytes 88.1 MiB, need free: 8397552 bytes 8.0 MiB, files created: 248296, delete 225723 (90.9% of them) freespace: was free: 71909376 bytes 68.6 MiB, wrote: 82472960 bytes 78.7 MiB, delta: 10563584 bytes 10.1 MiB, wrote 14.7% more than predicted freespace: trashing: was free: 88989696 bytes 84.9 MiB, need free: 16795096 bytes 16.0 MiB, files created: 248794, delete 201838 (81.1% of them) freespace: was free: 60354560 bytes 57.6 MiB, wrote: 71782400 bytes 68.5 MiB, delta: 11427840 bytes 10.9 MiB, wrote 18.9% more than predicted freespace: trashing: was free: 90304512 bytes 86.1 MiB, need free: 25192640 bytes 24.0 MiB, files created: 248733, delete 179342 (72.1% of them) freespace: was free: 51187712 bytes 48.8 MiB, wrote: 62943232 bytes 60.0 MiB, delta: 11755520 bytes 11.2 MiB, wrote 23.0% more than predicted freespace: trashing: was free: 91209728 bytes 87.0 MiB, need free: 33590184 bytes 32.0 MiB, files created: 248779, delete 157160 (63.2% of them) freespace: was free: 42704896 bytes 40.7 MiB, wrote: 55050240 bytes 52.5 MiB, delta: 12345344 bytes 11.8 MiB, wrote 28.9% more than predicted freespace: trashing: was free: 92700672 bytes 88.4 MiB, need free: 41987728 bytes 40.0 MiB, files created: 248848, delete 136135 (54.7% of them) freespace: was free: 35250176 bytes 33.6 MiB, wrote: 48115712 bytes 45.9 MiB, delta: 12865536 bytes 12.3 MiB, wrote 36.5% more than predicted freespace: trashing: was free: 93986816 bytes 89.6 MiB, need free: 50385272 bytes 48.1 MiB, files created: 248723, delete 115385 (46.4% of them) freespace: was free: 29995008 bytes 28.6 MiB, wrote: 41582592 bytes 39.7 MiB, delta: 11587584 bytes 11.1 MiB, wrote 38.6% more than predicted freespace: trashing: was free: 91881472 bytes 87.6 MiB, need free: 58782816 bytes 56.1 MiB, files created: 248645, delete 89569 (36.0% of them) freespace: was free: 22511616 bytes 21.5 MiB, wrote: 34705408 bytes 33.1 MiB, delta: 12193792 bytes 11.6 MiB, wrote 54.2% more than predicted freespace: trashing: was free: 91774976 bytes 87.5 MiB, need free: 67180360 bytes 64.1 MiB, files created: 248580, delete 66616 (26.8% of them) freespace: was free: 16908288 bytes 16.1 MiB, wrote: 26898432 bytes 25.7 MiB, delta: 9990144 bytes 9.5 MiB, wrote 59.1% more than predicted freespace: trashing: was free: 92450816 bytes 88.2 MiB, need free: 75577904 bytes 72.1 MiB, files created: 248654, delete 45381 (18.3% of them) freespace: was free: 10170368 bytes 9.7 MiB, wrote: 19111936 bytes 18.2 MiB, delta: 8941568 bytes 8.5 MiB, wrote 87.9% more than predicted freespace: trashing: was free: 93282304 bytes 89.0 MiB, need free: 83975448 bytes 80.1 MiB, files created: 248513, delete 24794 (10.0% of them) freespace: was free: 3911680 bytes 3.7 MiB, wrote: 7872512 bytes 7.5 MiB, delta: 3960832 bytes 3.8 MiB, wrote 101.3% more than predicted freespace: Test 3 finished freespace: finished successfully Signed-off-by: Artem Bityutskiy --- fs/ubifs/budget.c | 38 ++++++++++++++++++++++++++++++++++---- fs/ubifs/super.c | 10 +++++----- fs/ubifs/ubifs.h | 2 +- 3 files changed, 40 insertions(+), 10 deletions(-) (limited to 'fs') diff --git a/fs/ubifs/budget.c b/fs/ubifs/budget.c index 7851480a6ce..101d278c591 100644 --- a/fs/ubifs/budget.c +++ b/fs/ubifs/budget.c @@ -747,14 +747,24 @@ long long ubifs_reported_space(const struct ubifs_info *c, uint64_t free) } /** - * ubifs_budg_get_free_space - return amount of free space. + * ubifs_get_free_space - return amount of free space. * @c: UBIFS file-system description object * - * This function returns amount of free space on the file-system. + * This function calculates amount of free space to report to user-space. + * + * Because UBIFS may introduce substantial overhead (the index, node headers, + * alighment, wastage at the end of eraseblocks, etc), it cannot report real + * amount of free flash space it has (well, because not all dirty space is + * reclamable, UBIFS does not actually know the real amount). If UBIFS did so, + * it would bread user expectetion about what free space is. Users seem to + * accustomed to assume that if the file-system reports N bytes of free space, + * they would be able to fit a file of N bytes to the FS. This almost works for + * traditional file-systems, because they have way less overhead than UBIFS. + * So, to keep users happy, UBIFS tries to take the overhead into account. */ -long long ubifs_budg_get_free_space(struct ubifs_info *c) +long long ubifs_get_free_space(struct ubifs_info *c) { - int min_idx_lebs; + int min_idx_lebs, rsvd_idx_lebs, lebs; long long available, outstanding, free; spin_lock(&c->space_lock); @@ -771,6 +781,26 @@ long long ubifs_budg_get_free_space(struct ubifs_info *c) } available = ubifs_calc_available(c, min_idx_lebs); + + /* + * When reporting free space to user-space, UBIFS guarantees that it is + * possible to write a file of free space size. This means that for + * empty LEBs we may use more precise calculations than + * 'ubifs_calc_available()' is using. Namely, we know that in empty + * LEBs we would waste only @c->leb_overhead bytes, not @c->dark_wm. + * Thus, amend the available space. + * + * Note, the calculations below are similar to what we have in + * 'do_budget_space()', so refer there for comments. + */ + if (min_idx_lebs > c->lst.idx_lebs) + rsvd_idx_lebs = min_idx_lebs - c->lst.idx_lebs; + else + rsvd_idx_lebs = 0; + lebs = c->lst.empty_lebs + c->freeable_cnt + c->idx_gc_cnt - + c->lst.taken_empty_lebs; + lebs -= rsvd_idx_lebs; + available += lebs * (c->dark_wm - c->leb_overhead); spin_unlock(&c->space_lock); if (available > outstanding) diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c index be23fd3cfd8..1207bd51ead 100644 --- a/fs/ubifs/super.c +++ b/fs/ubifs/super.c @@ -371,7 +371,7 @@ static int ubifs_statfs(struct dentry *dentry, struct kstatfs *buf) struct ubifs_info *c = dentry->d_sb->s_fs_info; unsigned long long free; - free = ubifs_budg_get_free_space(c); + free = ubifs_get_free_space(c); dbg_gen("free space %lld bytes (%lld blocks)", free, free >> UBIFS_BLOCK_SHIFT); @@ -653,11 +653,11 @@ static int init_constants_late(struct ubifs_info *c) * internally because it does not make much sense for UBIFS, but it is * necessary to report something for the 'statfs()' call. * - * Subtract the LEB reserved for GC and the LEB which is reserved for - * deletions. + * Subtract the LEB reserved for GC, the LEB which is reserved for + * deletions, and assume only one journal head is available. */ - tmp64 = c->main_lebs - 2; - tmp64 *= (uint64_t)c->leb_size - c->dark_wm; + tmp64 = c->main_lebs - 2 - c->jhead_cnt + 1; + tmp64 *= (uint64_t)c->leb_size - c->leb_overhead; tmp64 = ubifs_reported_space(c, tmp64); c->block_cnt = tmp64 >> UBIFS_BLOCK_SHIFT; diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h index 57e58541de2..17c620b93ee 100644 --- a/fs/ubifs/ubifs.h +++ b/fs/ubifs/ubifs.h @@ -1443,7 +1443,7 @@ void ubifs_release_ino_dirty(struct ubifs_info *c, struct inode *inode, struct ubifs_budget_req *req); void ubifs_cancel_ino_op(struct ubifs_info *c, struct inode *inode, struct ubifs_budget_req *req); -long long ubifs_budg_get_free_space(struct ubifs_info *c); +long long ubifs_get_free_space(struct ubifs_info *c); int ubifs_calc_min_idx_lebs(struct ubifs_info *c); void ubifs_convert_page_budget(struct ubifs_info *c); long long ubifs_reported_space(const struct ubifs_info *c, uint64_t free); -- cgit v1.2.3 From b3385c278d3c32aec68d4900b35bc07df1b2240c Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Sun, 31 Aug 2008 17:13:18 +0300 Subject: UBIFS: fill f_fsid UBIFS stores 16-bit UUID in the superblock, and it is a good idea to return part of it in 'f_fsid' filed of kstatfs structure. Signed-off-by: Artem Bityutskiy --- fs/ubifs/super.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs') diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c index 1207bd51ead..0dee4042c6c 100644 --- a/fs/ubifs/super.c +++ b/fs/ubifs/super.c @@ -386,6 +386,7 @@ static int ubifs_statfs(struct dentry *dentry, struct kstatfs *buf) buf->f_files = 0; buf->f_ffree = 0; buf->f_namelen = UBIFS_MAX_NLEN; + memcpy(&buf->f_fsid, c->uuid, sizeof(__kernel_fsid_t)); return 0; } -- cgit v1.2.3 From 7c7cbadf7341a0792879c67d6e3020f040d6cd7f Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Wed, 3 Sep 2008 14:16:42 +0300 Subject: UBIFS: amend f_fsid David Woodhouse suggested to be consistent with other FSes and xor the beginning and the end of the UUID. Signed-off-by: Artem Bityutskiy --- fs/ubifs/super.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c index 0dee4042c6c..7562464ac83 100644 --- a/fs/ubifs/super.c +++ b/fs/ubifs/super.c @@ -370,6 +370,7 @@ static int ubifs_statfs(struct dentry *dentry, struct kstatfs *buf) { struct ubifs_info *c = dentry->d_sb->s_fs_info; unsigned long long free; + __le32 *uuid = (__le32 *)c->uuid; free = ubifs_get_free_space(c); dbg_gen("free space %lld bytes (%lld blocks)", @@ -386,8 +387,8 @@ static int ubifs_statfs(struct dentry *dentry, struct kstatfs *buf) buf->f_files = 0; buf->f_ffree = 0; buf->f_namelen = UBIFS_MAX_NLEN; - memcpy(&buf->f_fsid, c->uuid, sizeof(__kernel_fsid_t)); - + buf->f_fsid.val[0] = le32_to_cpu(uuid[0]) ^ le32_to_cpu(uuid[2]); + buf->f_fsid.val[1] = le32_to_cpu(uuid[1]) ^ le32_to_cpu(uuid[3]); return 0; } -- cgit v1.2.3 From 49048622eae698e5c4ae61f7e71200f265ccc529 Mon Sep 17 00:00:00 2001 From: Balbir Singh Date: Fri, 5 Sep 2008 18:12:23 +0200 Subject: sched: fix process time monotonicity Spencer reported a problem where utime and stime were going negative despite the fixes in commit b27f03d4bdc145a09fb7b0c0e004b29f1ee555fa. The suspected reason for the problem is that signal_struct maintains it's own utime and stime (of exited tasks), these are not updated using the new task_utime() routine, hence sig->utime can go backwards and cause the same problem to occur (sig->utime, adds tsk->utime and not task_utime()). This patch fixes the problem TODO: using max(task->prev_utime, derived utime) works for now, but a more generic solution is to implement cputime_max() and use the cputime_gt() function for comparison. Reported-by: spencer@bluehost.com Signed-off-by: Balbir Singh Signed-off-by: Peter Zijlstra Signed-off-by: Ingo Molnar --- fs/proc/array.c | 59 --------------------------------------------------------- 1 file changed, 59 deletions(-) (limited to 'fs') diff --git a/fs/proc/array.c b/fs/proc/array.c index 0d6eb33597c..71c9be59c9c 100644 --- a/fs/proc/array.c +++ b/fs/proc/array.c @@ -337,65 +337,6 @@ int proc_pid_status(struct seq_file *m, struct pid_namespace *ns, return 0; } -/* - * Use precise platform statistics if available: - */ -#ifdef CONFIG_VIRT_CPU_ACCOUNTING -static cputime_t task_utime(struct task_struct *p) -{ - return p->utime; -} - -static cputime_t task_stime(struct task_struct *p) -{ - return p->stime; -} -#else -static cputime_t task_utime(struct task_struct *p) -{ - clock_t utime = cputime_to_clock_t(p->utime), - total = utime + cputime_to_clock_t(p->stime); - u64 temp; - - /* - * Use CFS's precise accounting: - */ - temp = (u64)nsec_to_clock_t(p->se.sum_exec_runtime); - - if (total) { - temp *= utime; - do_div(temp, total); - } - utime = (clock_t)temp; - - p->prev_utime = max(p->prev_utime, clock_t_to_cputime(utime)); - return p->prev_utime; -} - -static cputime_t task_stime(struct task_struct *p) -{ - clock_t stime; - - /* - * Use CFS's precise accounting. (we subtract utime from - * the total, to make sure the total observed by userspace - * grows monotonically - apps rely on that): - */ - stime = nsec_to_clock_t(p->se.sum_exec_runtime) - - cputime_to_clock_t(task_utime(p)); - - if (stime >= 0) - p->prev_stime = max(p->prev_stime, clock_t_to_cputime(stime)); - - return p->prev_stime; -} -#endif - -static cputime_t task_gtime(struct task_struct *p) -{ - return p->gtime; -} - static int do_task_stat(struct seq_file *m, struct pid_namespace *ns, struct pid *pid, struct task_struct *task, int whole) { -- cgit v1.2.3 From f171d4d769c8ccac6675892960e37f6485837fae Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Wed, 3 Sep 2008 16:17:14 +0300 Subject: UBIFS: fix division by zero If fanout is 3, we have division by zero in 'ubifs_read_superblock()': divide error: 0000 [#1] PREEMPT SMP Pid: 28744, comm: mount Not tainted (2.6.27-rc4-ubifs-2.6 #23) EIP: 0060:[] EFLAGS: 00010202 CPU: 0 EIP is at ubifs_reported_space+0x2d/0x69 [ubifs] EAX: 00000000 EBX: 00000000 ECX: 00000000 EDX: 00000000 ESI: 00000000 EDI: f0ae64b0 EBP: f1f9fcf4 ESP: f1f9fce0 DS: 007b ES: 007b FS: 00d8 GS: 0033 SS: 0068 Signed-off-by: Artem Bityutskiy --- fs/ubifs/budget.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) (limited to 'fs') diff --git a/fs/ubifs/budget.c b/fs/ubifs/budget.c index 101d278c591..73db464cd08 100644 --- a/fs/ubifs/budget.c +++ b/fs/ubifs/budget.c @@ -723,24 +723,25 @@ void ubifs_release_dirty_inode_budget(struct ubifs_info *c, */ long long ubifs_reported_space(const struct ubifs_info *c, uint64_t free) { - int divisor, factor; + int divisor, factor, f; /* * Reported space size is @free * X, where X is UBIFS block size * divided by UBIFS block size + all overhead one data block * introduces. The overhead is the node header + indexing overhead. * - * Indexing overhead is calculations are based on the following - * formula: I = N/(f - 1) + 1, where I - number of indexing nodes, N - - * number of data nodes, f - fanout. Because effective UBIFS fanout is - * twice as less than maximum fanout, we assume that each data node + * Indexing overhead calculations are based on the following formula: + * I = N/(f - 1) + 1, where I - number of indexing nodes, N - number + * of data nodes, f - fanout. Because effective UBIFS fanout is twice + * as less than maximum fanout, we assume that each data node * introduces 3 * @c->max_idx_node_sz / (@c->fanout/2 - 1) bytes. * Note, the multiplier 3 is because UBIFS reseves thrice as more space * for the index. */ + f = c->fanout > 3 ? c->fanout >> 1 : 2; factor = UBIFS_BLOCK_SIZE; divisor = UBIFS_MAX_DATA_NODE_SZ; - divisor += (c->max_idx_node_sz * 3) / ((c->fanout >> 1) - 1); + divisor += (c->max_idx_node_sz * 3) / (f - 1); free *= factor; do_div(free, divisor); return free; -- cgit v1.2.3 From a5cb562d6977d9d7989c346b7b153cef31ec0228 Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Wed, 3 Sep 2008 18:26:47 +0300 Subject: UBIFS: make minimum fanout 3 UBIFS does not really work correctly when fanout is 2, because of the way we manage the indexing tree. It may just become a list and UBIFS screws up. Signed-off-by: Artem Bityutskiy --- fs/ubifs/ubifs-media.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/ubifs/ubifs-media.h b/fs/ubifs/ubifs-media.h index bd2121f3426..a9ecbd9af20 100644 --- a/fs/ubifs/ubifs-media.h +++ b/fs/ubifs/ubifs-media.h @@ -87,7 +87,7 @@ #define UBIFS_SK_LEN 8 /* Minimum index tree fanout */ -#define UBIFS_MIN_FANOUT 2 +#define UBIFS_MIN_FANOUT 3 /* Maximum number of levels in UBIFS indexing B-tree */ #define UBIFS_MAX_LEVELS 512 -- cgit v1.2.3 From af904deaf6da3f3285eb0a06a3dc6a1af0251030 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 8 Sep 2008 11:58:13 -0400 Subject: NFS: Restore missing hunk in NFS mount option parser Automounter maps can contain mount options valid for other NFS implementations but not for Linux. The Linux automounter uses the mount command's "-s" command line option ("s" for "sloppy") so that mount requests containing such options are not rejected. Commit f45663ce5fb30f76a3414ab3ac69f4dd320e760a attempted to address a known regression with text-based NFS mount option parsing. Unrecognized mount options would cause mount requests to fail, even if the "-s" option was used on the mount command line. Unfortunately, this commit was not complete as submitted. It adds a new mount option, "sloppy". But it is missing a hunk, so it now allows NFS mounts with unrecognized mount options, even if the "sloppy" option is not present. This could be a problem if a required critical mount option such as "sync" is misspelled, for example, and is considered a regression from 2.6.26. This patch restores the missing hunk. Now, the default behavior of text-based NFS mount options is as before: any unrecognized mount option will cause the mount to fail. Please include this in 2.6.27-rc. Thanks to Neil Brown for reporting this. Signed-off-by: Chuck Lever Acked-by: J. Bruce Fields Signed-off-by: Linus Torvalds --- fs/nfs/super.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'fs') diff --git a/fs/nfs/super.c b/fs/nfs/super.c index 9abcd2b329f..e9b20173fef 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -1279,6 +1279,12 @@ static int nfs_parse_mount_options(char *raw, } } + if (errors > 0) { + dfprintk(MOUNT, "NFS: parsing encountered %d error%s\n", + errors, (errors == 1 ? "" : "s")); + if (!sloppy) + return 0; + } return 1; out_nomem: -- cgit v1.2.3