aboutsummaryrefslogtreecommitdiff
path: root/fs/ubifs
diff options
context:
space:
mode:
Diffstat (limited to 'fs/ubifs')
-rw-r--r--fs/ubifs/Kconfig4
-rw-r--r--fs/ubifs/budget.c72
-rw-r--r--fs/ubifs/debug.c128
-rw-r--r--fs/ubifs/debug.h36
-rw-r--r--fs/ubifs/dir.c96
-rw-r--r--fs/ubifs/file.c34
-rw-r--r--fs/ubifs/find.c12
-rw-r--r--fs/ubifs/gc.c454
-rw-r--r--fs/ubifs/io.c22
-rw-r--r--fs/ubifs/journal.c9
-rw-r--r--fs/ubifs/key.h6
-rw-r--r--fs/ubifs/log.c5
-rw-r--r--fs/ubifs/lprops.c12
-rw-r--r--fs/ubifs/lpt_commit.c78
-rw-r--r--fs/ubifs/master.c2
-rw-r--r--fs/ubifs/orphan.c38
-rw-r--r--fs/ubifs/recovery.c70
-rw-r--r--fs/ubifs/replay.c2
-rw-r--r--fs/ubifs/sb.c36
-rw-r--r--fs/ubifs/shrinker.c6
-rw-r--r--fs/ubifs/super.c225
-rw-r--r--fs/ubifs/tnc.c14
-rw-r--r--fs/ubifs/ubifs-media.h30
-rw-r--r--fs/ubifs/ubifs.h39
24 files changed, 879 insertions, 551 deletions
diff --git a/fs/ubifs/Kconfig b/fs/ubifs/Kconfig
index e35b54d5059..830e3f76f44 100644
--- a/fs/ubifs/Kconfig
+++ b/fs/ubifs/Kconfig
@@ -22,7 +22,7 @@ config UBIFS_FS_ADVANCED_COMPR
depends on UBIFS_FS
help
This option allows to explicitly choose which compressions, if any,
- are enabled in UBIFS. Removing compressors means inbility to read
+ are enabled in UBIFS. Removing compressors means inability to read
existing file systems.
If unsure, say 'N'.
@@ -32,7 +32,7 @@ config UBIFS_FS_LZO
depends on UBIFS_FS
default y
help
- LZO compressor is generally faster then zlib but compresses worse.
+ LZO compressor is generally faster than zlib but compresses worse.
Say 'Y' if unsure.
config UBIFS_FS_ZLIB
diff --git a/fs/ubifs/budget.c b/fs/ubifs/budget.c
index 175f9c590b7..af1914462f0 100644
--- a/fs/ubifs/budget.c
+++ b/fs/ubifs/budget.c
@@ -194,29 +194,26 @@ static int make_free_space(struct ubifs_info *c)
}
/**
- * ubifs_calc_min_idx_lebs - calculate amount of eraseblocks for the index.
+ * ubifs_calc_min_idx_lebs - calculate amount of LEBs for the index.
* @c: UBIFS file-system description object
*
- * This function calculates and returns the number of eraseblocks which should
- * be kept for index usage.
+ * This function calculates and returns the number of LEBs which should be kept
+ * for index usage.
*/
int ubifs_calc_min_idx_lebs(struct ubifs_info *c)
{
- int idx_lebs, eff_leb_size = c->leb_size - c->max_idx_node_sz;
+ int idx_lebs;
long long idx_size;
idx_size = c->old_idx_sz + c->budg_idx_growth + c->budg_uncommitted_idx;
-
/* And make sure we have thrice the index size of space reserved */
- idx_size = idx_size + (idx_size << 1);
-
+ idx_size += idx_size << 1;
/*
* We do not maintain 'old_idx_size' as 'old_idx_lebs'/'old_idx_bytes'
* pair, nor similarly the two variables for the new index size, so we
* have to do this costly 64-bit division on fast-path.
*/
- idx_size += eff_leb_size - 1;
- idx_lebs = div_u64(idx_size, eff_leb_size);
+ idx_lebs = div_u64(idx_size + c->idx_leb_size - 1, c->idx_leb_size);
/*
* The index head is not available for the in-the-gaps method, so add an
* extra LEB to compensate.
@@ -310,23 +307,23 @@ static int can_use_rp(struct ubifs_info *c)
* do_budget_space - reserve flash space for index and data growth.
* @c: UBIFS file-system description object
*
- * This function makes sure UBIFS has enough free eraseblocks for index growth
- * and data.
+ * This function makes sure UBIFS has enough free LEBs for index growth and
+ * data.
*
* When budgeting index space, UBIFS reserves thrice as many LEBs as the index
* would take if it was consolidated and written to the flash. This guarantees
* that the "in-the-gaps" commit method always succeeds and UBIFS will always
* be able to commit dirty index. So this function basically adds amount of
* budgeted index space to the size of the current index, multiplies this by 3,
- * and makes sure this does not exceed the amount of free eraseblocks.
+ * and makes sure this does not exceed the amount of free LEBs.
*
* Notes about @c->min_idx_lebs and @c->lst.idx_lebs variables:
* o @c->lst.idx_lebs is the number of LEBs the index currently uses. It might
* be large, because UBIFS does not do any index consolidation as long as
* there is free space. IOW, the index may take a lot of LEBs, but the LEBs
* will contain a lot of dirt.
- * o @c->min_idx_lebs is the the index presumably takes. IOW, the index may be
- * consolidated to take up to @c->min_idx_lebs LEBs.
+ * o @c->min_idx_lebs is the number of LEBS the index presumably takes. IOW,
+ * the index may be consolidated to take up to @c->min_idx_lebs LEBs.
*
* This function returns zero in case of success, and %-ENOSPC in case of
* failure.
@@ -689,31 +686,29 @@ long long ubifs_reported_space(const struct ubifs_info *c, long long free)
}
/**
- * ubifs_get_free_space - return amount of free space.
+ * ubifs_get_free_space_nolock - return amount of free space.
* @c: UBIFS file-system description object
*
* This function calculates amount of free space to report to user-space.
*
* Because UBIFS may introduce substantial overhead (the index, node headers,
- * alignment, wastage at the end of eraseblocks, etc), it cannot report real
- * amount of free flash space it has (well, because not all dirty space is
- * reclaimable, UBIFS does not actually know the real amount). If UBIFS did so,
- * it would bread user expectations about what free space is. Users seem to
- * accustomed to assume that if the file-system reports N bytes of free space,
- * they would be able to fit a file of N bytes to the FS. This almost works for
+ * alignment, wastage at the end of LEBs, etc), it cannot report real amount of
+ * free flash space it has (well, because not all dirty space is reclaimable,
+ * UBIFS does not actually know the real amount). If UBIFS did so, it would
+ * bread user expectations about what free space is. Users seem to accustomed
+ * to assume that if the file-system reports N bytes of free space, they would
+ * be able to fit a file of N bytes to the FS. This almost works for
* traditional file-systems, because they have way less overhead than UBIFS.
* So, to keep users happy, UBIFS tries to take the overhead into account.
*/
-long long ubifs_get_free_space(struct ubifs_info *c)
+long long ubifs_get_free_space_nolock(struct ubifs_info *c)
{
- int min_idx_lebs, rsvd_idx_lebs, lebs;
+ int rsvd_idx_lebs, lebs;
long long available, outstanding, free;
- spin_lock(&c->space_lock);
- min_idx_lebs = c->min_idx_lebs;
- ubifs_assert(min_idx_lebs == ubifs_calc_min_idx_lebs(c));
+ ubifs_assert(c->min_idx_lebs == ubifs_calc_min_idx_lebs(c));
outstanding = c->budg_data_growth + c->budg_dd_growth;
- available = ubifs_calc_available(c, min_idx_lebs);
+ available = ubifs_calc_available(c, c->min_idx_lebs);
/*
* When reporting free space to user-space, UBIFS guarantees that it is
@@ -726,15 +721,14 @@ long long ubifs_get_free_space(struct ubifs_info *c)
* Note, the calculations below are similar to what we have in
* 'do_budget_space()', so refer there for comments.
*/
- if (min_idx_lebs > c->lst.idx_lebs)
- rsvd_idx_lebs = min_idx_lebs - c->lst.idx_lebs;
+ if (c->min_idx_lebs > c->lst.idx_lebs)
+ rsvd_idx_lebs = c->min_idx_lebs - c->lst.idx_lebs;
else
rsvd_idx_lebs = 0;
lebs = c->lst.empty_lebs + c->freeable_cnt + c->idx_gc_cnt -
c->lst.taken_empty_lebs;
lebs -= rsvd_idx_lebs;
available += lebs * (c->dark_wm - c->leb_overhead);
- spin_unlock(&c->space_lock);
if (available > outstanding)
free = ubifs_reported_space(c, available - outstanding);
@@ -742,3 +736,21 @@ long long ubifs_get_free_space(struct ubifs_info *c)
free = 0;
return free;
}
+
+/**
+ * ubifs_get_free_space - return amount of free space.
+ * @c: UBIFS file-system description object
+ *
+ * This function calculates and retuns amount of free space to report to
+ * user-space.
+ */
+long long ubifs_get_free_space(struct ubifs_info *c)
+{
+ long long free;
+
+ spin_lock(&c->space_lock);
+ free = ubifs_get_free_space_nolock(c);
+ spin_unlock(&c->space_lock);
+
+ return free;
+}
diff --git a/fs/ubifs/debug.c b/fs/ubifs/debug.c
index 792c5a16c18..ce2cd834361 100644
--- a/fs/ubifs/debug.c
+++ b/fs/ubifs/debug.c
@@ -479,9 +479,9 @@ void dbg_dump_node(const struct ubifs_info *c, const void *node)
"bad or corrupted node)");
else {
for (i = 0; i < nlen && dent->name[i]; i++)
- printk("%c", dent->name[i]);
+ printk(KERN_CONT "%c", dent->name[i]);
}
- printk("\n");
+ printk(KERN_CONT "\n");
break;
}
@@ -620,9 +620,11 @@ void dbg_dump_budg(struct ubifs_info *c)
c->dark_wm, c->dead_wm, c->max_idx_node_sz);
printk(KERN_DEBUG "\tgc_lnum %d, ihead_lnum %d\n",
c->gc_lnum, c->ihead_lnum);
- for (i = 0; i < c->jhead_cnt; i++)
- printk(KERN_DEBUG "\tjhead %d\t LEB %d\n",
- c->jheads[i].wbuf.jhead, c->jheads[i].wbuf.lnum);
+ /* If we are in R/O mode, journal heads do not exist */
+ if (c->jheads)
+ for (i = 0; i < c->jhead_cnt; i++)
+ printk(KERN_DEBUG "\tjhead %d\t LEB %d\n",
+ c->jheads[i].wbuf.jhead, c->jheads[i].wbuf.lnum);
for (rb = rb_first(&c->buds); rb; rb = rb_next(rb)) {
bud = rb_entry(rb, struct ubifs_bud, rb);
printk(KERN_DEBUG "\tbud LEB %d\n", bud->lnum);
@@ -637,10 +639,7 @@ void dbg_dump_budg(struct ubifs_info *c)
/* Print budgeting predictions */
available = ubifs_calc_available(c, c->min_idx_lebs);
outstanding = c->budg_data_growth + c->budg_dd_growth;
- if (available > outstanding)
- free = ubifs_reported_space(c, available - outstanding);
- else
- free = 0;
+ free = ubifs_get_free_space_nolock(c);
printk(KERN_DEBUG "Budgeting predictions:\n");
printk(KERN_DEBUG "\tavailable: %lld, outstanding %lld, free %lld\n",
available, outstanding, free);
@@ -861,6 +860,65 @@ void dbg_dump_index(struct ubifs_info *c)
}
/**
+ * dbg_save_space_info - save information about flash space.
+ * @c: UBIFS file-system description object
+ *
+ * This function saves information about UBIFS free space, dirty space, etc, in
+ * order to check it later.
+ */
+void dbg_save_space_info(struct ubifs_info *c)
+{
+ struct ubifs_debug_info *d = c->dbg;
+
+ ubifs_get_lp_stats(c, &d->saved_lst);
+
+ spin_lock(&c->space_lock);
+ d->saved_free = ubifs_get_free_space_nolock(c);
+ spin_unlock(&c->space_lock);
+}
+
+/**
+ * dbg_check_space_info - check flash space information.
+ * @c: UBIFS file-system description object
+ *
+ * This function compares current flash space information with the information
+ * which was saved when the 'dbg_save_space_info()' function was called.
+ * Returns zero if the information has not changed, and %-EINVAL it it has
+ * changed.
+ */
+int dbg_check_space_info(struct ubifs_info *c)
+{
+ struct ubifs_debug_info *d = c->dbg;
+ struct ubifs_lp_stats lst;
+ long long avail, free;
+
+ spin_lock(&c->space_lock);
+ avail = ubifs_calc_available(c, c->min_idx_lebs);
+ spin_unlock(&c->space_lock);
+ free = ubifs_get_free_space(c);
+
+ if (free != d->saved_free) {
+ ubifs_err("free space changed from %lld to %lld",
+ d->saved_free, free);
+ goto out;
+ }
+
+ return 0;
+
+out:
+ ubifs_msg("saved lprops statistics dump");
+ dbg_dump_lstats(&d->saved_lst);
+ ubifs_get_lp_stats(c, &lst);
+ ubifs_msg("current lprops statistics dump");
+ dbg_dump_lstats(&d->saved_lst);
+ spin_lock(&c->space_lock);
+ dbg_dump_budg(c);
+ spin_unlock(&c->space_lock);
+ dump_stack();
+ return -EINVAL;
+}
+
+/**
* dbg_check_synced_i_size - check synchronized inode size.
* @inode: inode to check
*
@@ -1156,7 +1214,7 @@ static int dbg_check_znode(struct ubifs_info *c, struct ubifs_zbranch *zbr)
/*
* Make sure the last key in our znode is less or
- * equivalent than the the key in zbranch which goes
+ * equivalent than the key in the zbranch which goes
* after our pointing zbranch.
*/
cmp = keys_cmp(c, max,
@@ -1349,7 +1407,7 @@ int dbg_check_tnc(struct ubifs_info *c, int extra)
* @c: UBIFS file-system description object
* @leaf_cb: called for each leaf node
* @znode_cb: called for each indexing node
- * @priv: private date which is passed to callbacks
+ * @priv: private data which is passed to callbacks
*
* This function walks the UBIFS index and calls the @leaf_cb for each leaf
* node and @znode_cb for each indexing node. Returns zero in case of success
@@ -2409,7 +2467,7 @@ void ubifs_debugging_exit(struct ubifs_info *c)
* Root directory for UBIFS stuff in debugfs. Contains sub-directories which
* contain the stuff specific to particular file-system mounts.
*/
-static struct dentry *debugfs_rootdir;
+static struct dentry *dfs_rootdir;
/**
* dbg_debugfs_init - initialize debugfs file-system.
@@ -2421,9 +2479,9 @@ static struct dentry *debugfs_rootdir;
*/
int dbg_debugfs_init(void)
{
- debugfs_rootdir = debugfs_create_dir("ubifs", NULL);
- if (IS_ERR(debugfs_rootdir)) {
- int err = PTR_ERR(debugfs_rootdir);
+ dfs_rootdir = debugfs_create_dir("ubifs", NULL);
+ if (IS_ERR(dfs_rootdir)) {
+ int err = PTR_ERR(dfs_rootdir);
ubifs_err("cannot create \"ubifs\" debugfs directory, "
"error %d\n", err);
return err;
@@ -2437,7 +2495,7 @@ int dbg_debugfs_init(void)
*/
void dbg_debugfs_exit(void)
{
- debugfs_remove(debugfs_rootdir);
+ debugfs_remove(dfs_rootdir);
}
static int open_debugfs_file(struct inode *inode, struct file *file)
@@ -2452,13 +2510,13 @@ static ssize_t write_debugfs_file(struct file *file, const char __user *buf,
struct ubifs_info *c = file->private_data;
struct ubifs_debug_info *d = c->dbg;
- if (file->f_path.dentry == d->dump_lprops)
+ if (file->f_path.dentry == d->dfs_dump_lprops)
dbg_dump_lprops(c);
- else if (file->f_path.dentry == d->dump_budg) {
+ else if (file->f_path.dentry == d->dfs_dump_budg) {
spin_lock(&c->space_lock);
dbg_dump_budg(c);
spin_unlock(&c->space_lock);
- } else if (file->f_path.dentry == d->dump_tnc) {
+ } else if (file->f_path.dentry == d->dfs_dump_tnc) {
mutex_lock(&c->tnc_mutex);
dbg_dump_tnc(c);
mutex_unlock(&c->tnc_mutex);
@@ -2469,7 +2527,7 @@ static ssize_t write_debugfs_file(struct file *file, const char __user *buf,
return count;
}
-static const struct file_operations debugfs_fops = {
+static const struct file_operations dfs_fops = {
.open = open_debugfs_file,
.write = write_debugfs_file,
.owner = THIS_MODULE,
@@ -2494,36 +2552,32 @@ int dbg_debugfs_init_fs(struct ubifs_info *c)
struct dentry *dent;
struct ubifs_debug_info *d = c->dbg;
- sprintf(d->debugfs_dir_name, "ubi%d_%d", c->vi.ubi_num, c->vi.vol_id);
- d->debugfs_dir = debugfs_create_dir(d->debugfs_dir_name,
- debugfs_rootdir);
- if (IS_ERR(d->debugfs_dir)) {
- err = PTR_ERR(d->debugfs_dir);
+ sprintf(d->dfs_dir_name, "ubi%d_%d", c->vi.ubi_num, c->vi.vol_id);
+ d->dfs_dir = debugfs_create_dir(d->dfs_dir_name, dfs_rootdir);
+ if (IS_ERR(d->dfs_dir)) {
+ err = PTR_ERR(d->dfs_dir);
ubifs_err("cannot create \"%s\" debugfs directory, error %d\n",
- d->debugfs_dir_name, err);
+ d->dfs_dir_name, err);
goto out;
}
fname = "dump_lprops";
- dent = debugfs_create_file(fname, S_IWUGO, d->debugfs_dir, c,
- &debugfs_fops);
+ dent = debugfs_create_file(fname, S_IWUGO, d->dfs_dir, c, &dfs_fops);
if (IS_ERR(dent))
goto out_remove;
- d->dump_lprops = dent;
+ d->dfs_dump_lprops = dent;
fname = "dump_budg";
- dent = debugfs_create_file(fname, S_IWUGO, d->debugfs_dir, c,
- &debugfs_fops);
+ dent = debugfs_create_file(fname, S_IWUGO, d->dfs_dir, c, &dfs_fops);
if (IS_ERR(dent))
goto out_remove;
- d->dump_budg = dent;
+ d->dfs_dump_budg = dent;
fname = "dump_tnc";
- dent = debugfs_create_file(fname, S_IWUGO, d->debugfs_dir, c,
- &debugfs_fops);
+ dent = debugfs_create_file(fname, S_IWUGO, d->dfs_dir, c, &dfs_fops);
if (IS_ERR(dent))
goto out_remove;
- d->dump_tnc = dent;
+ d->dfs_dump_tnc = dent;
return 0;
@@ -2531,7 +2585,7 @@ out_remove:
err = PTR_ERR(dent);
ubifs_err("cannot create \"%s\" debugfs directory, error %d\n",
fname, err);
- debugfs_remove_recursive(d->debugfs_dir);
+ debugfs_remove_recursive(d->dfs_dir);
out:
return err;
}
@@ -2542,7 +2596,7 @@ out:
*/
void dbg_debugfs_exit_fs(struct ubifs_info *c)
{
- debugfs_remove_recursive(c->dbg->debugfs_dir);
+ debugfs_remove_recursive(c->dbg->dfs_dir);
}
#endif /* CONFIG_UBIFS_FS_DEBUG */
diff --git a/fs/ubifs/debug.h b/fs/ubifs/debug.h
index 9820d6999f7..c1cd73b2e06 100644
--- a/fs/ubifs/debug.h
+++ b/fs/ubifs/debug.h
@@ -41,15 +41,17 @@
* @chk_lpt_wastage: used by LPT tree size checker
* @chk_lpt_lebs: used by LPT tree size checker
* @new_nhead_offs: used by LPT tree size checker
- * @new_ihead_lnum: used by debugging to check ihead_lnum
- * @new_ihead_offs: used by debugging to check ihead_offs
+ * @new_ihead_lnum: used by debugging to check @c->ihead_lnum
+ * @new_ihead_offs: used by debugging to check @c->ihead_offs
*
- * debugfs_dir_name: name of debugfs directory containing this file-system's
- * files
- * debugfs_dir: direntry object of the file-system debugfs directory
- * dump_lprops: "dump lprops" debugfs knob
- * dump_budg: "dump budgeting information" debugfs knob
- * dump_tnc: "dump TNC" debugfs knob
+ * @saved_lst: saved lprops statistics (used by 'dbg_save_space_info()')
+ * @saved_free: saved free space (used by 'dbg_save_space_info()')
+ *
+ * dfs_dir_name: name of debugfs directory containing this file-system's files
+ * dfs_dir: direntry object of the file-system debugfs directory
+ * dfs_dump_lprops: "dump lprops" debugfs knob
+ * dfs_dump_budg: "dump budgeting information" debugfs knob
+ * dfs_dump_tnc: "dump TNC" debugfs knob
*/
struct ubifs_debug_info {
void *buf;
@@ -69,11 +71,14 @@ struct ubifs_debug_info {
int new_ihead_lnum;
int new_ihead_offs;
- char debugfs_dir_name[100];
- struct dentry *debugfs_dir;
- struct dentry *dump_lprops;
- struct dentry *dump_budg;
- struct dentry *dump_tnc;
+ struct ubifs_lp_stats saved_lst;
+ long long saved_free;
+
+ char dfs_dir_name[100];
+ struct dentry *dfs_dir;
+ struct dentry *dfs_dump_lprops;
+ struct dentry *dfs_dump_budg;
+ struct dentry *dfs_dump_tnc;
};
#define ubifs_assert(expr) do { \
@@ -297,7 +302,8 @@ int dbg_walk_index(struct ubifs_info *c, dbg_leaf_callback leaf_cb,
dbg_znode_callback znode_cb, void *priv);
/* Checking functions */
-
+void dbg_save_space_info(struct ubifs_info *c);
+int dbg_check_space_info(struct ubifs_info *c);
int dbg_check_lprops(struct ubifs_info *c);
int dbg_old_index_check_init(struct ubifs_info *c, struct ubifs_zbranch *zroot);
int dbg_check_old_index(struct ubifs_info *c, struct ubifs_zbranch *zroot);
@@ -439,6 +445,8 @@ void dbg_debugfs_exit_fs(struct ubifs_info *c);
#define dbg_walk_index(c, leaf_cb, znode_cb, priv) 0
#define dbg_old_index_check_init(c, zroot) 0
+#define dbg_save_space_info(c) ({})
+#define dbg_check_space_info(c) 0
#define dbg_check_old_index(c, zroot) 0
#define dbg_check_cats(c) 0
#define dbg_check_ltab(c) 0
diff --git a/fs/ubifs/dir.c b/fs/ubifs/dir.c
index f448ab1f9c3..f55d523c52b 100644
--- a/fs/ubifs/dir.c
+++ b/fs/ubifs/dir.c
@@ -482,30 +482,29 @@ static int ubifs_dir_release(struct inode *dir, struct file *file)
}
/**
- * lock_2_inodes - lock two UBIFS inodes.
+ * lock_2_inodes - a wrapper for locking two UBIFS inodes.
* @inode1: first inode
* @inode2: second inode
+ *
+ * We do not implement any tricks to guarantee strict lock ordering, because
+ * VFS has already done it for us on the @i_mutex. So this is just a simple
+ * wrapper function.
*/
static void lock_2_inodes(struct inode *inode1, struct inode *inode2)
{
- if (inode1->i_ino < inode2->i_ino) {
- mutex_lock_nested(&ubifs_inode(inode1)->ui_mutex, WB_MUTEX_2);
- mutex_lock_nested(&ubifs_inode(inode2)->ui_mutex, WB_MUTEX_3);
- } else {
- mutex_lock_nested(&ubifs_inode(inode2)->ui_mutex, WB_MUTEX_2);
- mutex_lock_nested(&ubifs_inode(inode1)->ui_mutex, WB_MUTEX_3);
- }
+ mutex_lock_nested(&ubifs_inode(inode1)->ui_mutex, WB_MUTEX_1);
+ mutex_lock_nested(&ubifs_inode(inode2)->ui_mutex, WB_MUTEX_2);
}
/**
- * unlock_2_inodes - unlock two UBIFS inodes inodes.
+ * unlock_2_inodes - a wrapper for unlocking two UBIFS inodes.
* @inode1: first inode
* @inode2: second inode
*/
static void unlock_2_inodes(struct inode *inode1, struct inode *inode2)
{
- mutex_unlock(&ubifs_inode(inode1)->ui_mutex);
mutex_unlock(&ubifs_inode(inode2)->ui_mutex);
+ mutex_unlock(&ubifs_inode(inode1)->ui_mutex);
}
static int ubifs_link(struct dentry *old_dentry, struct inode *dir,
@@ -527,6 +526,8 @@ static int ubifs_link(struct dentry *old_dentry, struct inode *dir,
dbg_gen("dent '%.*s' to ino %lu (nlink %d) in dir ino %lu",
dentry->d_name.len, dentry->d_name.name, inode->i_ino,
inode->i_nlink, dir->i_ino);
+ ubifs_assert(mutex_is_locked(&dir->i_mutex));
+ ubifs_assert(mutex_is_locked(&inode->i_mutex));
err = dbg_check_synced_i_size(inode);
if (err)
return err;
@@ -580,6 +581,8 @@ static int ubifs_unlink(struct inode *dir, struct dentry *dentry)
dbg_gen("dent '%.*s' from ino %lu (nlink %d) in dir ino %lu",
dentry->d_name.len, dentry->d_name.name, inode->i_ino,
inode->i_nlink, dir->i_ino);
+ ubifs_assert(mutex_is_locked(&dir->i_mutex));
+ ubifs_assert(mutex_is_locked(&inode->i_mutex));
err = dbg_check_synced_i_size(inode);
if (err)
return err;
@@ -667,7 +670,8 @@ static int ubifs_rmdir(struct inode *dir, struct dentry *dentry)
dbg_gen("directory '%.*s', ino %lu in dir ino %lu", dentry->d_name.len,
dentry->d_name.name, inode->i_ino, dir->i_ino);
-
+ ubifs_assert(mutex_is_locked(&dir->i_mutex));
+ ubifs_assert(mutex_is_locked(&inode->i_mutex));
err = check_dir_empty(c, dentry->d_inode);
if (err)
return err;
@@ -922,59 +926,30 @@ out_budg:
}
/**
- * lock_3_inodes - lock three UBIFS inodes for rename.
+ * lock_3_inodes - a wrapper for locking three UBIFS inodes.
* @inode1: first inode
* @inode2: second inode
* @inode3: third inode
*
- * For 'ubifs_rename()', @inode1 may be the same as @inode2 whereas @inode3 may
- * be null.
+ * This function is used for 'ubifs_rename()' and @inode1 may be the same as
+ * @inode2 whereas @inode3 may be %NULL.
+ *
+ * We do not implement any tricks to guarantee strict lock ordering, because
+ * VFS has already done it for us on the @i_mutex. So this is just a simple
+ * wrapper function.
*/
static void lock_3_inodes(struct inode *inode1, struct inode *inode2,
struct inode *inode3)
{
- struct inode *i1, *i2, *i3;
-
- if (!inode3) {
- if (inode1 != inode2) {
- lock_2_inodes(inode1, inode2);
- return;
- }
- mutex_lock_nested(&ubifs_inode(inode1)->ui_mutex, WB_MUTEX_1);
- return;
- }
-
- if (inode1 == inode2) {
- lock_2_inodes(inode1, inode3);
- return;
- }
-
- /* 3 different inodes */
- if (inode1 < inode2) {
- i3 = inode2;
- if (inode1 < inode3) {
- i1 = inode1;
- i2 = inode3;
- } else {
- i1 = inode3;
- i2 = inode1;
- }
- } else {
- i3 = inode1;
- if (inode2 < inode3) {
- i1 = inode2;
- i2 = inode3;
- } else {
- i1 = inode3;
- i2 = inode2;
- }
- }
- mutex_lock_nested(&ubifs_inode(i1)->ui_mutex, WB_MUTEX_1);
- lock_2_inodes(i2, i3);
+ mutex_lock_nested(&ubifs_inode(inode1)->ui_mutex, WB_MUTEX_1);
+ if (inode2 != inode1)
+ mutex_lock_nested(&ubifs_inode(inode2)->ui_mutex, WB_MUTEX_2);
+ if (inode3)
+ mutex_lock_nested(&ubifs_inode(inode3)->ui_mutex, WB_MUTEX_3);
}
/**
- * unlock_3_inodes - unlock three UBIFS inodes for rename.
+ * unlock_3_inodes - a wrapper for unlocking three UBIFS inodes for rename.
* @inode1: first inode
* @inode2: second inode
* @inode3: third inode
@@ -982,11 +957,11 @@ static void lock_3_inodes(struct inode *inode1, struct inode *inode2,
static void unlock_3_inodes(struct inode *inode1, struct inode *inode2,
struct inode *inode3)
{
- mutex_unlock(&ubifs_inode(inode1)->ui_mutex);
- if (inode1 != inode2)
- mutex_unlock(&ubifs_inode(inode2)->ui_mutex);
if (inode3)
mutex_unlock(&ubifs_inode(inode3)->ui_mutex);
+ if (inode1 != inode2)
+ mutex_unlock(&ubifs_inode(inode2)->ui_mutex);
+ mutex_unlock(&ubifs_inode(inode1)->ui_mutex);
}
static int ubifs_rename(struct inode *old_dir, struct dentry *old_dentry,
@@ -1020,6 +995,11 @@ static int ubifs_rename(struct inode *old_dir, struct dentry *old_dentry,
"dir ino %lu", old_dentry->d_name.len, old_dentry->d_name.name,
old_inode->i_ino, old_dir->i_ino, new_dentry->d_name.len,
new_dentry->d_name.name, new_dir->i_ino);
+ ubifs_assert(mutex_is_locked(&old_dir->i_mutex));
+ ubifs_assert(mutex_is_locked(&new_dir->i_mutex));
+ if (unlink)
+ ubifs_assert(mutex_is_locked(&new_inode->i_mutex));
+
if (unlink && is_dir) {
err = check_dir_empty(c, new_inode);
@@ -1199,7 +1179,7 @@ int ubifs_getattr(struct vfsmount *mnt, struct dentry *dentry,
return 0;
}
-struct inode_operations ubifs_dir_inode_operations = {
+const struct inode_operations ubifs_dir_inode_operations = {
.lookup = ubifs_lookup,
.create = ubifs_create,
.link = ubifs_link,
@@ -1219,7 +1199,7 @@ struct inode_operations ubifs_dir_inode_operations = {
#endif
};
-struct file_operations ubifs_dir_operations = {
+const struct file_operations ubifs_dir_operations = {
.llseek = ubifs_dir_llseek,
.release = ubifs_dir_release,
.read = generic_read_dir,
diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c
index bf37374567f..6d34dc7e33e 100644
--- a/fs/ubifs/file.c
+++ b/fs/ubifs/file.c
@@ -430,9 +430,9 @@ static int ubifs_write_begin(struct file *file, struct address_space *mapping,
struct ubifs_inode *ui = ubifs_inode(inode);
pgoff_t index = pos >> PAGE_CACHE_SHIFT;
int uninitialized_var(err), appending = !!(pos + len > inode->i_size);
+ int skipped_read = 0;
struct page *page;
-
ubifs_assert(ubifs_inode(inode)->ui_size == inode->i_size);
if (unlikely(c->ro_media))
@@ -445,7 +445,7 @@ static int ubifs_write_begin(struct file *file, struct address_space *mapping,
if (!PageUptodate(page)) {
/* The page is not loaded from the flash */
- if (!(pos & ~PAGE_CACHE_MASK) && len == PAGE_CACHE_SIZE)
+ if (!(pos & ~PAGE_CACHE_MASK) && len == PAGE_CACHE_SIZE) {
/*
* We change whole page so no need to load it. But we
* have to set the @PG_checked flag to make the further
@@ -454,7 +454,8 @@ static int ubifs_write_begin(struct file *file, struct address_space *mapping,
* the media.
*/
SetPageChecked(page);
- else {
+ skipped_read = 1;
+ } else {
err = do_readpage(page);
if (err) {
unlock_page(page);
@@ -471,6 +472,14 @@ static int ubifs_write_begin(struct file *file, struct address_space *mapping,
if (unlikely(err)) {
ubifs_assert(err == -ENOSPC);
/*
+ * If we skipped reading the page because we were going to
+ * write all of it, then it is not up to date.
+ */
+ if (skipped_read) {
+ ClearPageChecked(page);
+ ClearPageUptodate(page);
+ }
+ /*
* Budgeting failed which means it would have to force
* write-back but didn't, because we set the @fast flag in the
* request. Write-back cannot be done now, while we have the
@@ -950,7 +959,7 @@ static int do_writepage(struct page *page, int len)
* whole index and correct all inode sizes, which is long an unacceptable.
*
* To prevent situations like this, UBIFS writes pages back only if they are
- * within last synchronized inode size, i.e. the the size which has been
+ * within the last synchronized inode size, i.e. the size which has been
* written to the flash media last time. Otherwise, UBIFS forces inode
* write-back, thus making sure the on-flash inode contains current inode size,
* and then keeps writing pages back.
@@ -1435,8 +1444,9 @@ static int ubifs_releasepage(struct page *page, gfp_t unused_gfp_flags)
* mmap()d file has taken write protection fault and is being made
* writable. UBIFS must ensure page is budgeted for.
*/
-static int ubifs_vm_page_mkwrite(struct vm_area_struct *vma, struct page *page)
+static int ubifs_vm_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
{
+ struct page *page = vmf->page;
struct inode *inode = vma->vm_file->f_path.dentry->d_inode;
struct ubifs_info *c = inode->i_sb->s_fs_info;
struct timespec now = ubifs_current_time(inode);
@@ -1448,7 +1458,7 @@ static int ubifs_vm_page_mkwrite(struct vm_area_struct *vma, struct page *page)
ubifs_assert(!(inode->i_sb->s_flags & MS_RDONLY));
if (unlikely(c->ro_media))
- return -EROFS;
+ return VM_FAULT_SIGBUS; /* -EROFS */
/*
* We have not locked @page so far so we may budget for changing the
@@ -1481,7 +1491,7 @@ static int ubifs_vm_page_mkwrite(struct vm_area_struct *vma, struct page *page)
if (err == -ENOSPC)
ubifs_warn("out of space for mmapped file "
"(inode number %lu)", inode->i_ino);
- return err;
+ return VM_FAULT_SIGBUS;
}
lock_page(page);
@@ -1521,6 +1531,8 @@ static int ubifs_vm_page_mkwrite(struct vm_area_struct *vma, struct page *page)
out_unlock:
unlock_page(page);
ubifs_release_budget(c, &req);
+ if (err)
+ err = VM_FAULT_SIGBUS;
return err;
}
@@ -1541,7 +1553,7 @@ static int ubifs_file_mmap(struct file *file, struct vm_area_struct *vma)
return 0;
}
-struct address_space_operations ubifs_file_address_operations = {
+const struct address_space_operations ubifs_file_address_operations = {
.readpage = ubifs_readpage,
.writepage = ubifs_writepage,
.write_begin = ubifs_write_begin,
@@ -1551,7 +1563,7 @@ struct address_space_operations ubifs_file_address_operations = {
.releasepage = ubifs_releasepage,
};
-struct inode_operations ubifs_file_inode_operations = {
+const struct inode_operations ubifs_file_inode_operations = {
.setattr = ubifs_setattr,
.getattr = ubifs_getattr,
#ifdef CONFIG_UBIFS_FS_XATTR
@@ -1562,14 +1574,14 @@ struct inode_operations ubifs_file_inode_operations = {
#endif
};
-struct inode_operations ubifs_symlink_inode_operations = {
+const struct inode_operations ubifs_symlink_inode_operations = {
.readlink = generic_readlink,
.follow_link = ubifs_follow_link,
.setattr = ubifs_setattr,
.getattr = ubifs_getattr,
};
-struct file_operations ubifs_file_operations = {
+const struct file_operations ubifs_file_operations = {
.llseek = generic_file_llseek,
.read = do_sync_read,
.write = do_sync_write,
diff --git a/fs/ubifs/find.c b/fs/ubifs/find.c
index 717d79c97c5..1d54383d126 100644
--- a/fs/ubifs/find.c
+++ b/fs/ubifs/find.c
@@ -478,7 +478,7 @@ const struct ubifs_lprops *do_find_free_space(struct ubifs_info *c,
* ubifs_find_free_space - find a data LEB with free space.
* @c: the UBIFS file-system description object
* @min_space: minimum amount of required free space
- * @free: contains amount of free space in the LEB on exit
+ * @offs: contains offset of where free space starts on exit
* @squeeze: whether to try to find space in a non-empty LEB first
*
* This function looks for an LEB with at least @min_space bytes of free space.
@@ -490,7 +490,7 @@ const struct ubifs_lprops *do_find_free_space(struct ubifs_info *c,
* failed to find a LEB with @min_space bytes of free space and other a negative
* error codes in case of failure.
*/
-int ubifs_find_free_space(struct ubifs_info *c, int min_space, int *free,
+int ubifs_find_free_space(struct ubifs_info *c, int min_space, int *offs,
int squeeze)
{
const struct ubifs_lprops *lprops;
@@ -558,10 +558,10 @@ int ubifs_find_free_space(struct ubifs_info *c, int min_space, int *free,
spin_unlock(&c->space_lock);
}
- *free = lprops->free;
+ *offs = c->leb_size - lprops->free;
ubifs_release_lprops(c);
- if (*free == c->leb_size) {
+ if (*offs == 0) {
/*
* Ensure that empty LEBs have been unmapped. They may not have
* been, for example, because of an unclean unmount. Also
@@ -573,8 +573,8 @@ int ubifs_find_free_space(struct ubifs_info *c, int min_space, int *free,
return err;
}
- dbg_find("found LEB %d, free %d", lnum, *free);
- ubifs_assert(*free >= min_space);
+ dbg_find("found LEB %d, free %d", lnum, c->leb_size - *offs);
+ ubifs_assert(*offs <= c->leb_size - min_space);
return lnum;
out:
diff --git a/fs/ubifs/gc.c b/fs/ubifs/gc.c
index 9832f9abe28..f0f5f15d384 100644
--- a/fs/ubifs/gc.c
+++ b/fs/ubifs/gc.c
@@ -31,20 +31,32 @@
* to be reused. Garbage collection will cause the number of dirty index nodes
* to grow, however sufficient space is reserved for the index to ensure the
* commit will never run out of space.
+ *
+ * Notes about dead watermark. At current UBIFS implementation we assume that
+ * LEBs which have less than @c->dead_wm bytes of free + dirty space are full
+ * and not worth garbage-collecting. The dead watermark is one min. I/O unit
+ * size, or min. UBIFS node size, depending on what is greater. Indeed, UBIFS
+ * Garbage Collector has to synchronize the GC head's write buffer before
+ * returning, so this is about wasting one min. I/O unit. However, UBIFS GC can
+ * actually reclaim even very small pieces of dirty space by garbage collecting
+ * enough dirty LEBs, but we do not bother doing this at this implementation.
+ *
+ * Notes about dark watermark. The results of GC work depends on how big are
+ * the UBIFS nodes GC deals with. Large nodes make GC waste more space. Indeed,
+ * if GC move data from LEB A to LEB B and nodes in LEB A are large, GC would
+ * have to waste large pieces of free space at the end of LEB B, because nodes
+ * from LEB A would not fit. And the worst situation is when all nodes are of
+ * maximum size. So dark watermark is the amount of free + dirty space in LEB
+ * which are guaranteed to be reclaimable. If LEB has less space, the GC might
+ * be unable to reclaim it. So, LEBs with free + dirty greater than dark
+ * watermark are "good" LEBs from GC's point of few. The other LEBs are not so
+ * good, and GC takes extra care when moving them.
*/
#include <linux/pagemap.h>
#include "ubifs.h"
/*
- * GC tries to optimize the way it fit nodes to available space, and it sorts
- * nodes a little. The below constants are watermarks which define "large",
- * "medium", and "small" nodes.
- */
-#define MEDIUM_NODE_WM (UBIFS_BLOCK_SIZE / 4)
-#define SMALL_NODE_WM UBIFS_MAX_DENT_NODE_SZ
-
-/*
* GC may need to move more than one LEB to make progress. The below constants
* define "soft" and "hard" limits on the number of LEBs the garbage collector
* may move.
@@ -96,83 +108,222 @@ static int switch_gc_head(struct ubifs_info *c)
}
/**
- * joinup - bring data nodes for an inode together.
- * @c: UBIFS file-system description object
- * @sleb: describes scanned LEB
- * @inum: inode number
- * @blk: block number
- * @data: list to which to add data nodes
+ * list_sort - sort a list.
+ * @priv: private data, passed to @cmp
+ * @head: the list to sort
+ * @cmp: the elements comparison function
+ *
+ * This function has been implemented by Mark J Roberts <mjr@znex.org>. It
+ * implements "merge sort" which has O(nlog(n)) complexity. The list is sorted
+ * in ascending order.
*
- * This function looks at the first few nodes in the scanned LEB @sleb and adds
- * them to @data if they are data nodes from @inum and have a larger block
- * number than @blk. This function returns %0 on success and a negative error
- * code on failure.
+ * The comparison function @cmp is supposed to return a negative value if @a is
+ * than @b, and a positive value if @a is greater than @b. If @a and @b are
+ * equivalent, then it does not matter what this function returns.
*/
-static int joinup(struct ubifs_info *c, struct ubifs_scan_leb *sleb, ino_t inum,
- unsigned int blk, struct list_head *data)
+static void list_sort(void *priv, struct list_head *head,
+ int (*cmp)(void *priv, struct list_head *a,
+ struct list_head *b))
{
- int err, cnt = 6, lnum = sleb->lnum, offs;
- struct ubifs_scan_node *snod, *tmp;
- union ubifs_key *key;
+ struct list_head *p, *q, *e, *list, *tail, *oldhead;
+ int insize, nmerges, psize, qsize, i;
+
+ if (list_empty(head))
+ return;
+
+ list = head->next;
+ list_del(head);
+ insize = 1;
+ for (;;) {
+ p = oldhead = list;
+ list = tail = NULL;
+ nmerges = 0;
+
+ while (p) {
+ nmerges++;
+ q = p;
+ psize = 0;
+ for (i = 0; i < insize; i++) {
+ psize++;
+ q = q->next == oldhead ? NULL : q->next;
+ if (!q)
+ break;
+ }
- list_for_each_entry_safe(snod, tmp, &sleb->nodes, list) {
- key = &snod->key;
- if (key_inum(c, key) == inum &&
- key_type(c, key) == UBIFS_DATA_KEY &&
- key_block(c, key) > blk) {
- offs = snod->offs;
- err = ubifs_tnc_has_node(c, key, 0, lnum, offs, 0);
- if (err < 0)
- return err;
- list_del(&snod->list);
- if (err) {
- list_add_tail(&snod->list, data);
- blk = key_block(c, key);
- } else
- kfree(snod);
- cnt = 6;
- } else if (--cnt == 0)
+ qsize = insize;
+ while (psize > 0 || (qsize > 0 && q)) {
+ if (!psize) {
+ e = q;
+ q = q->next;
+ qsize--;
+ if (q == oldhead)
+ q = NULL;
+ } else if (!qsize || !q) {
+ e = p;
+ p = p->next;
+ psize--;
+ if (p == oldhead)
+ p = NULL;
+ } else if (cmp(priv, p, q) <= 0) {
+ e = p;
+ p = p->next;
+ psize--;
+ if (p == oldhead)
+ p = NULL;
+ } else {
+ e = q;
+ q = q->next;
+ qsize--;
+ if (q == oldhead)
+ q = NULL;
+ }
+ if (tail)
+ tail->next = e;
+ else
+ list = e;
+ e->prev = tail;
+ tail = e;
+ }
+ p = q;
+ }
+
+ tail->next = list;
+ list->prev = tail;
+
+ if (nmerges <= 1)
break;
+
+ insize *= 2;
}
- return 0;
+
+ head->next = list;
+ head->prev = list->prev;
+ list->prev->next = head;
+ list->prev = head;
}
/**
- * move_nodes - move nodes.
+ * data_nodes_cmp - compare 2 data nodes.
+ * @priv: UBIFS file-system description object
+ * @a: first data node
+ * @a: second data node
+ *
+ * This function compares data nodes @a and @b. Returns %1 if @a has greater
+ * inode or block number, and %-1 otherwise.
+ */
+int data_nodes_cmp(void *priv, struct list_head *a, struct list_head *b)
+{
+ ino_t inuma, inumb;
+ struct ubifs_info *c = priv;
+ struct ubifs_scan_node *sa, *sb;
+
+ cond_resched();
+ sa = list_entry(a, struct ubifs_scan_node, list);
+ sb = list_entry(b, struct ubifs_scan_node, list);
+ ubifs_assert(key_type(c, &sa->key) == UBIFS_DATA_KEY);
+ ubifs_assert(key_type(c, &sb->key) == UBIFS_DATA_KEY);
+
+ inuma = key_inum(c, &sa->key);
+ inumb = key_inum(c, &sb->key);
+
+ if (inuma == inumb) {
+ unsigned int blka = key_block(c, &sa->key);
+ unsigned int blkb = key_block(c, &sb->key);
+
+ if (blka <= blkb)
+ return -1;
+ } else if (inuma <= inumb)
+ return -1;
+
+ return 1;
+}
+
+/*
+ * nondata_nodes_cmp - compare 2 non-data nodes.
+ * @priv: UBIFS file-system description object
+ * @a: first node
+ * @a: second node
+ *
+ * This function compares nodes @a and @b. It makes sure that inode nodes go
+ * first and sorted by length in descending order. Directory entry nodes go
+ * after inode nodes and are sorted in ascending hash valuer order.
+ */
+int nondata_nodes_cmp(void *priv, struct list_head *a, struct list_head *b)
+{
+ int typea, typeb;
+ ino_t inuma, inumb;
+ struct ubifs_info *c = priv;
+ struct ubifs_scan_node *sa, *sb;
+
+ cond_resched();
+ sa = list_entry(a, struct ubifs_scan_node, list);
+ sb = list_entry(b, struct ubifs_scan_node, list);
+ typea = key_type(c, &sa->key);
+ typeb = key_type(c, &sb->key);
+ ubifs_assert(typea != UBIFS_DATA_KEY && typeb != UBIFS_DATA_KEY);
+
+ /* Inodes go before directory entries */
+ if (typea == UBIFS_INO_KEY) {
+ if (typeb == UBIFS_INO_KEY)
+ return sb->len - sa->len;
+ return -1;
+ }
+ if (typeb == UBIFS_INO_KEY)
+ return 1;
+
+ ubifs_assert(typea == UBIFS_DENT_KEY && typeb == UBIFS_DENT_KEY);
+ inuma = key_inum(c, &sa->key);
+ inumb = key_inum(c, &sb->key);
+
+ if (inuma == inumb) {
+ uint32_t hasha = key_hash(c, &sa->key);
+ uint32_t hashb = key_hash(c, &sb->key);
+
+ if (hasha <= hashb)
+ return -1;
+ } else if (inuma <= inumb)
+ return -1;
+
+ return 1;
+}
+
+/**
+ * sort_nodes - sort nodes for GC.
* @c: UBIFS file-system description object
- * @sleb: describes nodes to move
+ * @sleb: describes nodes to sort and contains the result on exit
+ * @nondata: contains non-data nodes on exit
+ * @min: minimum node size is returned here
*
- * This function moves valid nodes from data LEB described by @sleb to the GC
- * journal head. The obsolete nodes are dropped.
+ * This function sorts the list of inodes to garbage collect. First of all, it
+ * kills obsolete nodes and separates data and non-data nodes to the
+ * @sleb->nodes and @nondata lists correspondingly.
+ *
+ * Data nodes are then sorted in block number order - this is important for
+ * bulk-read; data nodes with lower inode number go before data nodes with
+ * higher inode number, and data nodes with lower block number go before data
+ * nodes with higher block number;
*
- * When moving nodes we have to deal with classical bin-packing problem: the
- * space in the current GC journal head LEB and in @c->gc_lnum are the "bins",
- * where the nodes in the @sleb->nodes list are the elements which should be
- * fit optimally to the bins. This function uses the "first fit decreasing"
- * strategy, although it does not really sort the nodes but just split them on
- * 3 classes - large, medium, and small, so they are roughly sorted.
+ * Non-data nodes are sorted as follows.
+ * o First go inode nodes - they are sorted in descending length order.
+ * o Then go directory entry nodes - they are sorted in hash order, which
+ * should supposedly optimize 'readdir()'. Direntry nodes with lower parent
+ * inode number go before direntry nodes with higher parent inode number,
+ * and direntry nodes with lower name hash values go before direntry nodes
+ * with higher name hash values.
*
- * This function returns zero in case of success, %-EAGAIN if commit is
- * required, and other negative error codes in case of other failures.
+ * This function returns zero in case of success and a negative error code in
+ * case of failure.
*/
-static int move_nodes(struct ubifs_info *c, struct ubifs_scan_leb *sleb)
+static int sort_nodes(struct ubifs_info *c, struct ubifs_scan_leb *sleb,
+ struct list_head *nondata, int *min)
{
struct ubifs_scan_node *snod, *tmp;
- struct list_head data, large, medium, small;
- struct ubifs_wbuf *wbuf = &c->jheads[GCHD].wbuf;
- int avail, err, min = INT_MAX;
- unsigned int blk = 0;
- ino_t inum = 0;
- INIT_LIST_HEAD(&data);
- INIT_LIST_HEAD(&large);
- INIT_LIST_HEAD(&medium);
- INIT_LIST_HEAD(&small);
+ *min = INT_MAX;
- while (!list_empty(&sleb->nodes)) {
- struct list_head *lst = sleb->nodes.next;
-
- snod = list_entry(lst, struct ubifs_scan_node, list);
+ /* Separate data nodes and non-data nodes */
+ list_for_each_entry_safe(snod, tmp, &sleb->nodes, list) {
+ int err;
ubifs_assert(snod->type != UBIFS_IDX_NODE);
ubifs_assert(snod->type != UBIFS_REF_NODE);
@@ -181,53 +332,72 @@ static int move_nodes(struct ubifs_info *c, struct ubifs_scan_leb *sleb)
err = ubifs_tnc_has_node(c, &snod->key, 0, sleb->lnum,
snod->offs, 0);
if (err < 0)
- goto out;
+ return err;
- list_del(lst);
if (!err) {
/* The node is obsolete, remove it from the list */
+ list_del(&snod->list);
kfree(snod);
continue;
}
- /*
- * Sort the list of nodes so that data nodes go first, large
- * nodes go second, and small nodes go last.
- */
- if (key_type(c, &snod->key) == UBIFS_DATA_KEY) {
- if (inum != key_inum(c, &snod->key)) {
- if (inum) {
- /*
- * Try to move data nodes from the same
- * inode together.
- */
- err = joinup(c, sleb, inum, blk, &data);
- if (err)
- goto out;
- }
- inum = key_inum(c, &snod->key);
- blk = key_block(c, &snod->key);
- }
- list_add_tail(lst, &data);
- } else if (snod->len > MEDIUM_NODE_WM)
- list_add_tail(lst, &large);
- else if (snod->len > SMALL_NODE_WM)
- list_add_tail(lst, &medium);
- else
- list_add_tail(lst, &small);
-
- /* And find the smallest node */
- if (snod->len < min)
- min = snod->len;
+ if (snod->len < *min)
+ *min = snod->len;
+
+ if (key_type(c, &snod->key) != UBIFS_DATA_KEY)
+ list_move_tail(&snod->list, nondata);
}
- /*
- * Join the tree lists so that we'd have one roughly sorted list
- * ('large' will be the head of the joined list).
- */
- list_splice(&data, &large);
- list_splice(&medium, large.prev);
- list_splice(&small, large.prev);
+ /* Sort data and non-data nodes */
+ list_sort(c, &sleb->nodes, &data_nodes_cmp);
+ list_sort(c, nondata, &nondata_nodes_cmp);
+ return 0;
+}
+
+/**
+ * move_node - move a node.
+ * @c: UBIFS file-system description object
+ * @sleb: describes the LEB to move nodes from
+ * @snod: the mode to move
+ * @wbuf: write-buffer to move node to
+ *
+ * This function moves node @snod to @wbuf, changes TNC correspondingly, and
+ * destroys @snod. Returns zero in case of success and a negative error code in
+ * case of failure.
+ */
+static int move_node(struct ubifs_info *c, struct ubifs_scan_leb *sleb,
+ struct ubifs_scan_node *snod, struct ubifs_wbuf *wbuf)
+{
+ int err, new_lnum = wbuf->lnum, new_offs = wbuf->offs + wbuf->used;
+
+ cond_resched();
+ err = ubifs_wbuf_write_nolock(wbuf, snod->node, snod->len);
+ if (err)
+ return err;
+
+ err = ubifs_tnc_replace(c, &snod->key, sleb->lnum,
+ snod->offs, new_lnum, new_offs,
+ snod->len);
+ list_del(&snod->list);
+ kfree(snod);
+ return err;
+}
+
+/**
+ * move_nodes - move nodes.
+ * @c: UBIFS file-system description object
+ * @sleb: describes the LEB to move nodes from
+ *
+ * This function moves valid nodes from data LEB described by @sleb to the GC
+ * journal head. This function returns zero in case of success, %-EAGAIN if
+ * commit is required, and other negative error codes in case of other
+ * failures.
+ */
+static int move_nodes(struct ubifs_info *c, struct ubifs_scan_leb *sleb)
+{
+ int err, min;
+ LIST_HEAD(nondata);
+ struct ubifs_wbuf *wbuf = &c->jheads[GCHD].wbuf;
if (wbuf->lnum == -1) {
/*
@@ -236,42 +406,59 @@ static int move_nodes(struct ubifs_info *c, struct ubifs_scan_leb *sleb)
*/
err = switch_gc_head(c);
if (err)
- goto out;
+ return err;
}
+ err = sort_nodes(c, sleb, &nondata, &min);
+ if (err)
+ goto out;
+
/* Write nodes to their new location. Use the first-fit strategy */
while (1) {
- avail = c->leb_size - wbuf->offs - wbuf->used;
- list_for_each_entry_safe(snod, tmp, &large, list) {
- int new_lnum, new_offs;
+ int avail;
+ struct ubifs_scan_node *snod, *tmp;
+
+ /* Move data nodes */
+ list_for_each_entry_safe(snod, tmp, &sleb->nodes, list) {
+ avail = c->leb_size - wbuf->offs - wbuf->used;
+ if (snod->len > avail)
+ /*
+ * Do not skip data nodes in order to optimize
+ * bulk-read.
+ */
+ break;
+
+ err = move_node(c, sleb, snod, wbuf);
+ if (err)
+ goto out;
+ }
+ /* Move non-data nodes */
+ list_for_each_entry_safe(snod, tmp, &nondata, list) {
+ avail = c->leb_size - wbuf->offs - wbuf->used;
if (avail < min)
break;
- if (snod->len > avail)
- /* This node does not fit */
+ if (snod->len > avail) {
+ /*
+ * Keep going only if this is an inode with
+ * some data. Otherwise stop and switch the GC
+ * head. IOW, we assume that data-less inode
+ * nodes and direntry nodes are roughly of the
+ * same size.
+ */
+ if (key_type(c, &snod->key) == UBIFS_DENT_KEY ||
+ snod->len == UBIFS_INO_NODE_SZ)
+ break;
continue;
+ }
- cond_resched();
-
- new_lnum = wbuf->lnum;
- new_offs = wbuf->offs + wbuf->used;
- err = ubifs_wbuf_write_nolock(wbuf, snod->node,
- snod->len);
- if (err)
- goto out;
- err = ubifs_tnc_replace(c, &snod->key, sleb->lnum,
- snod->offs, new_lnum, new_offs,
- snod->len);
+ err = move_node(c, sleb, snod, wbuf);
if (err)
goto out;
-
- avail = c->leb_size - wbuf->offs - wbuf->used;
- list_del(&snod->list);
- kfree(snod);
}
- if (list_empty(&large))
+ if (list_empty(&sleb->nodes) && list_empty(&nondata))
break;
/*
@@ -286,10 +473,7 @@ static int move_nodes(struct ubifs_info *c, struct ubifs_scan_leb *sleb)
return 0;
out:
- list_for_each_entry_safe(snod, tmp, &large, list) {
- list_del(&snod->list);
- kfree(snod);
- }
+ list_splice_tail(&nondata, &sleb->nodes);
return err;
}
@@ -381,7 +565,7 @@ int ubifs_garbage_collect_leb(struct ubifs_info *c, struct ubifs_lprops *lp)
/*
* Don't release the LEB until after the next commit, because
- * it may contain date which is needed for recovery. So
+ * it may contain data which is needed for recovery. So
* although we freed this LEB, it will become usable only after
* the commit.
*/
@@ -810,8 +994,9 @@ out:
* ubifs_destroy_idx_gc - destroy idx_gc list.
* @c: UBIFS file-system description object
*
- * This function destroys the idx_gc list. It is called when unmounting or
- * remounting read-only so locks are not needed.
+ * This function destroys the @c->idx_gc list. It is called when unmounting
+ * so locks are not needed. Returns zero in case of success and a negative
+ * error code in case of failure.
*/
void ubifs_destroy_idx_gc(struct ubifs_info *c)
{
@@ -824,7 +1009,6 @@ void ubifs_destroy_idx_gc(struct ubifs_info *c)
list_del(&idx_gc->list);
kfree(idx_gc);
}
-
}
/**
diff --git a/fs/ubifs/io.c b/fs/ubifs/io.c
index 01682713af6..e8e632a1dcd 100644
--- a/fs/ubifs/io.c
+++ b/fs/ubifs/io.c
@@ -29,7 +29,7 @@
* would have been wasted for padding to the nearest minimal I/O unit boundary.
* Instead, data first goes to the write-buffer and is flushed when the
* buffer is full or when it is not used for some time (by timer). This is
- * similarto the mechanism is used by JFFS2.
+ * similar to the mechanism is used by JFFS2.
*
* Write-buffers are defined by 'struct ubifs_wbuf' objects and protected by
* mutexes defined inside these objects. Since sometimes upper-level code
@@ -75,7 +75,7 @@ void ubifs_ro_mode(struct ubifs_info *c, int err)
* @lnum: logical eraseblock number
* @offs: offset within the logical eraseblock
* @quiet: print no messages
- * @chk_crc: indicates whether to always check the CRC
+ * @must_chk_crc: indicates whether to always check the CRC
*
* This function checks node magic number and CRC checksum. This function also
* validates node length to prevent UBIFS from becoming crazy when an attacker
@@ -83,11 +83,17 @@ void ubifs_ro_mode(struct ubifs_info *c, int err)
* node length in the common header could cause UBIFS to read memory outside of
* allocated buffer when checking the CRC checksum.
*
- * This function returns zero in case of success %-EUCLEAN in case of bad CRC
- * or magic.
+ * This function may skip data nodes CRC checking if @c->no_chk_data_crc is
+ * true, which is controlled by corresponding UBIFS mount option. However, if
+ * @must_chk_crc is true, then @c->no_chk_data_crc is ignored and CRC is
+ * checked. Similarly, if @c->always_chk_crc is true, @c->no_chk_data_crc is
+ * ignored and CRC is checked.
+ *
+ * This function returns zero in case of success and %-EUCLEAN in case of bad
+ * CRC or magic.
*/
int ubifs_check_node(const struct ubifs_info *c, const void *buf, int lnum,
- int offs, int quiet, int chk_crc)
+ int offs, int quiet, int must_chk_crc)
{
int err = -EINVAL, type, node_len;
uint32_t crc, node_crc, magic;
@@ -123,9 +129,9 @@ int ubifs_check_node(const struct ubifs_info *c, const void *buf, int lnum,
node_len > c->ranges[type].max_len)
goto out_len;
- if (!chk_crc && type == UBIFS_DATA_NODE && !c->always_chk_crc)
- if (c->no_chk_data_crc)
- return 0;
+ if (!must_chk_crc && type == UBIFS_DATA_NODE && !c->always_chk_crc &&
+ c->no_chk_data_crc)
+ return 0;
crc = crc32(UBIFS_CRC32_INIT, buf + 8, node_len - 8);
node_crc = le32_to_cpu(ch->crc);
diff --git a/fs/ubifs/journal.c b/fs/ubifs/journal.c
index 9b7c54e0cd2..64b5f3a309f 100644
--- a/fs/ubifs/journal.c
+++ b/fs/ubifs/journal.c
@@ -114,7 +114,7 @@ static inline void zero_trun_node_unused(struct ubifs_trun_node *trun)
*/
static int reserve_space(struct ubifs_info *c, int jhead, int len)
{
- int err = 0, err1, retries = 0, avail, lnum, offs, free, squeeze;
+ int err = 0, err1, retries = 0, avail, lnum, offs, squeeze;
struct ubifs_wbuf *wbuf = &c->jheads[jhead].wbuf;
/*
@@ -139,10 +139,9 @@ again:
* Write buffer wasn't seek'ed or there is no enough space - look for an
* LEB with some empty space.
*/
- lnum = ubifs_find_free_space(c, len, &free, squeeze);
+ lnum = ubifs_find_free_space(c, len, &offs, squeeze);
if (lnum >= 0) {
/* Found an LEB, add it to the journal head */
- offs = c->leb_size - free;
err = ubifs_add_bud_to_log(c, jhead, lnum, offs);
if (err)
goto out_return;
@@ -208,7 +207,7 @@ again:
offs = 0;
out:
- err = ubifs_wbuf_seek_nolock(wbuf, lnum, offs, UBI_SHORTTERM);
+ err = ubifs_wbuf_seek_nolock(wbuf, lnum, offs, wbuf->dtype);
if (err)
goto out_unlock;
@@ -1366,7 +1365,7 @@ out_ro:
* @host: host inode
*
* This function writes the updated version of an extended attribute inode and
- * the host inode tho the journal (to the base head). The host inode is written
+ * the host inode to the journal (to the base head). The host inode is written
* after the extended attribute inode in order to guarantee that the extended
* attribute will be flushed when the inode is synchronized by 'fsync()' and
* consequently, the write-buffer is synchronized. This function returns zero
diff --git a/fs/ubifs/key.h b/fs/ubifs/key.h
index efb3430a258..5fa27ea031b 100644
--- a/fs/ubifs/key.h
+++ b/fs/ubifs/key.h
@@ -381,8 +381,8 @@ static inline ino_t key_inum_flash(const struct ubifs_info *c, const void *k)
* @c: UBIFS file-system description object
* @key: the key to get hash from
*/
-static inline int key_hash(const struct ubifs_info *c,
- const union ubifs_key *key)
+static inline uint32_t key_hash(const struct ubifs_info *c,
+ const union ubifs_key *key)
{
return key->u32[1] & UBIFS_S_KEY_HASH_MASK;
}
@@ -392,7 +392,7 @@ static inline int key_hash(const struct ubifs_info *c,
* @c: UBIFS file-system description object
* @k: the key to get hash from
*/
-static inline int key_hash_flash(const struct ubifs_info *c, const void *k)
+static inline uint32_t key_hash_flash(const struct ubifs_info *c, const void *k)
{
const union ubifs_key *key = k;
diff --git a/fs/ubifs/log.c b/fs/ubifs/log.c
index 3e0aa736755..56e33772a1e 100644
--- a/fs/ubifs/log.c
+++ b/fs/ubifs/log.c
@@ -239,7 +239,7 @@ int ubifs_add_bud_to_log(struct ubifs_info *c, int jhead, int lnum, int offs)
}
/*
- * Make sure the the amount of space in buds will not exceed
+ * Make sure the amount of space in buds will not exceed the
* 'c->max_bud_bytes' limit, because we want to guarantee mount time
* limits.
*
@@ -367,7 +367,6 @@ static void remove_buds(struct ubifs_info *c)
bud->jhead, c->leb_size - bud->start,
c->cmt_bud_bytes);
rb_erase(p1, &c->buds);
- list_del(&bud->list);
/*
* If the commit does not finish, the recovery will need
* to replay the journal, in which case the old buds
@@ -375,7 +374,7 @@ static void remove_buds(struct ubifs_info *c)
* commit i.e. do not allow them to be garbage
* collected.
*/
- list_add(&bud->list, &c->old_buds);
+ list_move(&bud->list, &c->old_buds);
}
}
spin_unlock(&c->buds_lock);
diff --git a/fs/ubifs/lprops.c b/fs/ubifs/lprops.c
index dfd2bcece27..4cdd284dea5 100644
--- a/fs/ubifs/lprops.c
+++ b/fs/ubifs/lprops.c
@@ -635,10 +635,10 @@ const struct ubifs_lprops *ubifs_change_lp(struct ubifs_info *c,
* @c: UBIFS file-system description object
* @st: return statistics
*/
-void ubifs_get_lp_stats(struct ubifs_info *c, struct ubifs_lp_stats *st)
+void ubifs_get_lp_stats(struct ubifs_info *c, struct ubifs_lp_stats *lst)
{
spin_lock(&c->space_lock);
- memcpy(st, &c->lst, sizeof(struct ubifs_lp_stats));
+ memcpy(lst, &c->lst, sizeof(struct ubifs_lp_stats));
spin_unlock(&c->space_lock);
}
@@ -678,6 +678,9 @@ int ubifs_change_one_lp(struct ubifs_info *c, int lnum, int free, int dirty,
out:
ubifs_release_lprops(c);
+ if (err)
+ ubifs_err("cannot change properties of LEB %d, error %d",
+ lnum, err);
return err;
}
@@ -714,6 +717,9 @@ int ubifs_update_one_lp(struct ubifs_info *c, int lnum, int free, int dirty,
out:
ubifs_release_lprops(c);
+ if (err)
+ ubifs_err("cannot update properties of LEB %d, error %d",
+ lnum, err);
return err;
}
@@ -737,6 +743,8 @@ int ubifs_read_one_lp(struct ubifs_info *c, int lnum, struct ubifs_lprops *lp)
lpp = ubifs_lpt_lookup(c, lnum);
if (IS_ERR(lpp)) {
err = PTR_ERR(lpp);
+ ubifs_err("cannot read properties of LEB %d, error %d",
+ lnum, err);
goto out;
}
diff --git a/fs/ubifs/lpt_commit.c b/fs/ubifs/lpt_commit.c
index 96ca9570717..8cbfb824802 100644
--- a/fs/ubifs/lpt_commit.c
+++ b/fs/ubifs/lpt_commit.c
@@ -229,7 +229,7 @@ static int layout_cnodes(struct ubifs_info *c)
while (offs + len > c->leb_size) {
alen = ALIGN(offs, c->min_io_size);
upd_ltab(c, lnum, c->leb_size - alen, alen - offs);
- dbg_chk_lpt_sz(c, 2, alen - offs);
+ dbg_chk_lpt_sz(c, 2, c->leb_size - offs);
err = alloc_lpt_leb(c, &lnum);
if (err)
goto no_space;
@@ -272,7 +272,7 @@ static int layout_cnodes(struct ubifs_info *c)
if (offs + c->lsave_sz > c->leb_size) {
alen = ALIGN(offs, c->min_io_size);
upd_ltab(c, lnum, c->leb_size - alen, alen - offs);
- dbg_chk_lpt_sz(c, 2, alen - offs);
+ dbg_chk_lpt_sz(c, 2, c->leb_size - offs);
err = alloc_lpt_leb(c, &lnum);
if (err)
goto no_space;
@@ -292,7 +292,7 @@ static int layout_cnodes(struct ubifs_info *c)
if (offs + c->ltab_sz > c->leb_size) {
alen = ALIGN(offs, c->min_io_size);
upd_ltab(c, lnum, c->leb_size - alen, alen - offs);
- dbg_chk_lpt_sz(c, 2, alen - offs);
+ dbg_chk_lpt_sz(c, 2, c->leb_size - offs);
err = alloc_lpt_leb(c, &lnum);
if (err)
goto no_space;
@@ -416,14 +416,12 @@ static int write_cnodes(struct ubifs_info *c)
alen, UBI_SHORTTERM);
if (err)
return err;
- dbg_chk_lpt_sz(c, 4, alen - wlen);
}
- dbg_chk_lpt_sz(c, 2, 0);
+ dbg_chk_lpt_sz(c, 2, c->leb_size - offs);
err = realloc_lpt_leb(c, &lnum);
if (err)
goto no_space;
- offs = 0;
- from = 0;
+ offs = from = 0;
ubifs_assert(lnum >= c->lpt_first &&
lnum <= c->lpt_last);
err = ubifs_leb_unmap(c, lnum);
@@ -477,11 +475,11 @@ static int write_cnodes(struct ubifs_info *c)
UBI_SHORTTERM);
if (err)
return err;
- dbg_chk_lpt_sz(c, 2, alen - wlen);
+ dbg_chk_lpt_sz(c, 2, c->leb_size - offs);
err = realloc_lpt_leb(c, &lnum);
if (err)
goto no_space;
- offs = 0;
+ offs = from = 0;
ubifs_assert(lnum >= c->lpt_first &&
lnum <= c->lpt_last);
err = ubifs_leb_unmap(c, lnum);
@@ -504,11 +502,11 @@ static int write_cnodes(struct ubifs_info *c)
UBI_SHORTTERM);
if (err)
return err;
- dbg_chk_lpt_sz(c, 2, alen - wlen);
+ dbg_chk_lpt_sz(c, 2, c->leb_size - offs);
err = realloc_lpt_leb(c, &lnum);
if (err)
goto no_space;
- offs = 0;
+ offs = from = 0;
ubifs_assert(lnum >= c->lpt_first &&
lnum <= c->lpt_last);
err = ubifs_leb_unmap(c, lnum);
@@ -556,23 +554,23 @@ no_space:
}
/**
- * next_pnode - find next pnode.
+ * next_pnode_to_dirty - find next pnode to dirty.
* @c: UBIFS file-system description object
* @pnode: pnode
*
- * This function returns the next pnode or %NULL if there are no more pnodes.
+ * This function returns the next pnode to dirty or %NULL if there are no more
+ * pnodes. Note that pnodes that have never been written (lnum == 0) are
+ * skipped.
*/
-static struct ubifs_pnode *next_pnode(struct ubifs_info *c,
- struct ubifs_pnode *pnode)
+static struct ubifs_pnode *next_pnode_to_dirty(struct ubifs_info *c,
+ struct ubifs_pnode *pnode)
{
struct ubifs_nnode *nnode;
int iip;
/* Try to go right */
nnode = pnode->parent;
- iip = pnode->iip + 1;
- if (iip < UBIFS_LPT_FANOUT) {
- /* We assume here that LEB zero is never an LPT LEB */
+ for (iip = pnode->iip + 1; iip < UBIFS_LPT_FANOUT; iip++) {
if (nnode->nbranch[iip].lnum)
return ubifs_get_pnode(c, nnode, iip);
}
@@ -583,8 +581,11 @@ static struct ubifs_pnode *next_pnode(struct ubifs_info *c,
nnode = nnode->parent;
if (!nnode)
return NULL;
- /* We assume here that LEB zero is never an LPT LEB */
- } while (iip >= UBIFS_LPT_FANOUT || !nnode->nbranch[iip].lnum);
+ for (; iip < UBIFS_LPT_FANOUT; iip++) {
+ if (nnode->nbranch[iip].lnum)
+ break;
+ }
+ } while (iip >= UBIFS_LPT_FANOUT);
/* Go right */
nnode = ubifs_get_nnode(c, nnode, iip);
@@ -593,12 +594,29 @@ static struct ubifs_pnode *next_pnode(struct ubifs_info *c,
/* Go down to level 1 */
while (nnode->level > 1) {
- nnode = ubifs_get_nnode(c, nnode, 0);
+ for (iip = 0; iip < UBIFS_LPT_FANOUT; iip++) {
+ if (nnode->nbranch[iip].lnum)
+ break;
+ }
+ if (iip >= UBIFS_LPT_FANOUT) {
+ /*
+ * Should not happen, but we need to keep going
+ * if it does.
+ */
+ iip = 0;
+ }
+ nnode = ubifs_get_nnode(c, nnode, iip);
if (IS_ERR(nnode))
return (void *)nnode;
}
- return ubifs_get_pnode(c, nnode, 0);
+ for (iip = 0; iip < UBIFS_LPT_FANOUT; iip++)
+ if (nnode->nbranch[iip].lnum)
+ break;
+ if (iip >= UBIFS_LPT_FANOUT)
+ /* Should not happen, but we need to keep going if it does */
+ iip = 0;
+ return ubifs_get_pnode(c, nnode, iip);
}
/**
@@ -688,7 +706,7 @@ static int make_tree_dirty(struct ubifs_info *c)
pnode = pnode_lookup(c, 0);
while (pnode) {
do_make_pnode_dirty(c, pnode);
- pnode = next_pnode(c, pnode);
+ pnode = next_pnode_to_dirty(c, pnode);
if (IS_ERR(pnode))
return PTR_ERR(pnode);
}
@@ -1736,10 +1754,16 @@ int dbg_chk_lpt_free_spc(struct ubifs_info *c)
/**
* dbg_chk_lpt_sz - check LPT does not write more than LPT size.
* @c: the UBIFS file-system description object
- * @action: action
+ * @action: what to do
* @len: length written
*
* This function returns %0 on success and a negative error code on failure.
+ * The @action argument may be one of:
+ * o %0 - LPT debugging checking starts, initialize debugging variables;
+ * o %1 - wrote an LPT node, increase LPT size by @len bytes;
+ * o %2 - switched to a different LEB and wasted @len bytes;
+ * o %3 - check that we've written the right number of bytes.
+ * o %4 - wasted @len bytes;
*/
int dbg_chk_lpt_sz(struct ubifs_info *c, int action, int len)
{
@@ -1897,12 +1921,12 @@ static void dump_lpt_leb(const struct ubifs_info *c, int lnum)
lnum, offs);
err = ubifs_unpack_nnode(c, buf, &nnode);
for (i = 0; i < UBIFS_LPT_FANOUT; i++) {
- printk("%d:%d", nnode.nbranch[i].lnum,
+ printk(KERN_CONT "%d:%d", nnode.nbranch[i].lnum,
nnode.nbranch[i].offs);
if (i != UBIFS_LPT_FANOUT - 1)
- printk(", ");
+ printk(KERN_CONT ", ");
}
- printk("\n");
+ printk(KERN_CONT "\n");
break;
}
case UBIFS_LPT_LTAB:
diff --git a/fs/ubifs/master.c b/fs/ubifs/master.c
index 71d5493bf56..a88f33801b9 100644
--- a/fs/ubifs/master.c
+++ b/fs/ubifs/master.c
@@ -354,7 +354,7 @@ int ubifs_write_master(struct ubifs_info *c)
int err, lnum, offs, len;
if (c->ro_media)
- return -EINVAL;
+ return -EROFS;
lnum = UBIFS_MST_LNUM;
offs = c->mst_offs + c->mst_node_alsz;
diff --git a/fs/ubifs/orphan.c b/fs/ubifs/orphan.c
index 9e6f403f170..152a7b34a14 100644
--- a/fs/ubifs/orphan.c
+++ b/fs/ubifs/orphan.c
@@ -46,7 +46,7 @@
* Orphans are accumulated in a rb-tree. When an inode's link count drops to
* zero, the inode number is added to the rb-tree. It is removed from the tree
* when the inode is deleted. Any new orphans that are in the orphan tree when
- * the commit is run, are written to the orphan area in 1 or more orph nodes.
+ * the commit is run, are written to the orphan area in 1 or more orphan nodes.
* If the orphan area is full, it is consolidated to make space. There is
* always enough space because validation prevents the user from creating more
* than the maximum number of orphans allowed.
@@ -231,7 +231,7 @@ static int tot_avail_orphs(struct ubifs_info *c)
}
/**
- * do_write_orph_node - write a node
+ * do_write_orph_node - write a node to the orphan head.
* @c: UBIFS file-system description object
* @len: length of node
* @atomic: write atomically
@@ -264,11 +264,11 @@ static int do_write_orph_node(struct ubifs_info *c, int len, int atomic)
}
/**
- * write_orph_node - write an orph node
+ * write_orph_node - write an orphan node.
* @c: UBIFS file-system description object
* @atomic: write atomically
*
- * This function builds an orph node from the cnext list and writes it to the
+ * This function builds an orphan node from the cnext list and writes it to the
* orphan head. On success, %0 is returned, otherwise a negative error code
* is returned.
*/
@@ -326,11 +326,11 @@ static int write_orph_node(struct ubifs_info *c, int atomic)
}
/**
- * write_orph_nodes - write orph nodes until there are no more to commit
+ * write_orph_nodes - write orphan nodes until there are no more to commit.
* @c: UBIFS file-system description object
* @atomic: write atomically
*
- * This function writes orph nodes for all the orphans to commit. On success,
+ * This function writes orphan nodes for all the orphans to commit. On success,
* %0 is returned, otherwise a negative error code is returned.
*/
static int write_orph_nodes(struct ubifs_info *c, int atomic)
@@ -478,14 +478,14 @@ int ubifs_orphan_end_commit(struct ubifs_info *c)
}
/**
- * clear_orphans - erase all LEBs used for orphans.
+ * ubifs_clear_orphans - erase all LEBs used for orphans.
* @c: UBIFS file-system description object
*
* If recovery is not required, then the orphans from the previous session
* are not needed. This function locates the LEBs used to record
* orphans, and un-maps them.
*/
-static int clear_orphans(struct ubifs_info *c)
+int ubifs_clear_orphans(struct ubifs_info *c)
{
int lnum, err;
@@ -547,9 +547,9 @@ static int insert_dead_orphan(struct ubifs_info *c, ino_t inum)
* do_kill_orphans - remove orphan inodes from the index.
* @c: UBIFS file-system description object
* @sleb: scanned LEB
- * @last_cmt_no: cmt_no of last orph node read is passed and returned here
+ * @last_cmt_no: cmt_no of last orphan node read is passed and returned here
* @outofdate: whether the LEB is out of date is returned here
- * @last_flagged: whether the end orph node is encountered
+ * @last_flagged: whether the end orphan node is encountered
*
* This function is a helper to the 'kill_orphans()' function. It goes through
* every orphan node in a LEB and for every inode number recorded, removes
@@ -580,8 +580,8 @@ static int do_kill_orphans(struct ubifs_info *c, struct ubifs_scan_leb *sleb,
/*
* The commit number on the master node may be less, because
* of a failed commit. If there are several failed commits in a
- * row, the commit number written on orph nodes will continue to
- * increase (because the commit number is adjusted here) even
+ * row, the commit number written on orphan nodes will continue
+ * to increase (because the commit number is adjusted here) even
* though the commit number on the master node stays the same
* because the master node has not been re-written.
*/
@@ -589,9 +589,9 @@ static int do_kill_orphans(struct ubifs_info *c, struct ubifs_scan_leb *sleb,
c->cmt_no = cmt_no;
if (cmt_no < *last_cmt_no && *last_flagged) {
/*
- * The last orph node had a higher commit number and was
- * flagged as the last written for that commit number.
- * That makes this orph node, out of date.
+ * The last orphan node had a higher commit number and
+ * was flagged as the last written for that commit
+ * number. That makes this orphan node, out of date.
*/
if (!first) {
ubifs_err("out of order commit number %llu in "
@@ -658,10 +658,10 @@ static int kill_orphans(struct ubifs_info *c)
/*
* Orph nodes always start at c->orph_first and are written to each
* successive LEB in turn. Generally unused LEBs will have been unmapped
- * but may contain out of date orph nodes if the unmap didn't go
- * through. In addition, the last orph node written for each commit is
+ * but may contain out of date orphan nodes if the unmap didn't go
+ * through. In addition, the last orphan node written for each commit is
* marked (top bit of orph->cmt_no is set to 1). It is possible that
- * there are orph nodes from the next commit (i.e. the commit did not
+ * there are orphan nodes from the next commit (i.e. the commit did not
* complete successfully). In that case, no orphans will have been lost
* due to the way that orphans are written, and any orphans added will
* be valid orphans anyway and so can be deleted.
@@ -718,7 +718,7 @@ int ubifs_mount_orphans(struct ubifs_info *c, int unclean, int read_only)
if (unclean)
err = kill_orphans(c);
else if (!read_only)
- err = clear_orphans(c);
+ err = ubifs_clear_orphans(c);
return err;
}
diff --git a/fs/ubifs/recovery.c b/fs/ubifs/recovery.c
index 90acac603e6..10662975d2e 100644
--- a/fs/ubifs/recovery.c
+++ b/fs/ubifs/recovery.c
@@ -425,59 +425,35 @@ static void clean_buf(const struct ubifs_info *c, void **buf, int lnum,
* @lnum: LEB number of the LEB from which @buf was read
* @offs: offset from which @buf was read
*
- * This function scans @buf for more nodes and returns %0 is a node is found and
- * %1 if no more nodes are found.
+ * This function ensures that the corrupted node at @offs is the last thing
+ * written to a LEB. This function returns %1 if more data is not found and
+ * %0 if more data is found.
*/
static int no_more_nodes(const struct ubifs_info *c, void *buf, int len,
int lnum, int offs)
{
- int skip, next_offs = 0;
+ struct ubifs_ch *ch = buf;
+ int skip, dlen = le32_to_cpu(ch->len);
- if (len > UBIFS_DATA_NODE_SZ) {
- struct ubifs_ch *ch = buf;
- int dlen = le32_to_cpu(ch->len);
-
- if (ch->node_type == UBIFS_DATA_NODE && dlen >= UBIFS_CH_SZ &&
- dlen <= UBIFS_MAX_DATA_NODE_SZ)
- /* The corrupt node looks like a data node */
- next_offs = ALIGN(offs + dlen, 8);
- }
-
- if (c->min_io_size == 1)
- skip = 8;
- else
- skip = ALIGN(offs + 1, c->min_io_size) - offs;
-
- offs += skip;
- buf += skip;
- len -= skip;
- while (len > 8) {
- struct ubifs_ch *ch = buf;
- uint32_t magic = le32_to_cpu(ch->magic);
- int ret;
-
- if (magic == UBIFS_NODE_MAGIC) {
- ret = ubifs_scan_a_node(c, buf, len, lnum, offs, 1);
- if (ret == SCANNED_A_NODE || ret > 0) {
- /*
- * There is a small chance this is just data in
- * a data node, so check that possibility. e.g.
- * this is part of a file that itself contains
- * a UBIFS image.
- */
- if (next_offs && offs + le32_to_cpu(ch->len) <=
- next_offs)
- continue;
- dbg_rcvry("unexpected node at %d:%d", lnum,
- offs);
- return 0;
- }
- }
- offs += 8;
- buf += 8;
- len -= 8;
+ /* Check for empty space after the corrupt node's common header */
+ skip = ALIGN(offs + UBIFS_CH_SZ, c->min_io_size) - offs;
+ if (is_empty(buf + skip, len - skip))
+ return 1;
+ /*
+ * The area after the common header size is not empty, so the common
+ * header must be intact. Check it.
+ */
+ if (ubifs_check_node(c, buf, lnum, offs, 1, 0) != -EUCLEAN) {
+ dbg_rcvry("unexpected bad common header at %d:%d", lnum, offs);
+ return 0;
}
- return 1;
+ /* Now we know the corrupt node's length we can skip over it */
+ skip = ALIGN(offs + dlen, c->min_io_size) - offs;
+ /* After which there should be empty space */
+ if (is_empty(buf + skip, len - skip))
+ return 1;
+ dbg_rcvry("unexpected data at %d:%d", lnum, offs + skip);
+ return 0;
}
/**
diff --git a/fs/ubifs/replay.c b/fs/ubifs/replay.c
index ce42a7b0ca5..11cc80125a4 100644
--- a/fs/ubifs/replay.c
+++ b/fs/ubifs/replay.c
@@ -143,7 +143,7 @@ static int set_bud_lprops(struct ubifs_info *c, struct replay_entry *r)
dirty -= c->leb_size - lp->free;
/*
* If the replay order was perfect the dirty space would now be
- * zero. The order is not perfect because the the journal heads
+ * zero. The order is not perfect because the journal heads
* race with each other. This is not a problem but is does mean
* that the dirty space may temporarily exceed c->leb_size
* during the replay.
diff --git a/fs/ubifs/sb.c b/fs/ubifs/sb.c
index e070c643d1b..57085e43320 100644
--- a/fs/ubifs/sb.c
+++ b/fs/ubifs/sb.c
@@ -193,6 +193,7 @@ static int create_default_filesystem(struct ubifs_info *c)
if (tmp64 > DEFAULT_MAX_RP_SIZE)
tmp64 = DEFAULT_MAX_RP_SIZE;
sup->rp_size = cpu_to_le64(tmp64);
+ sup->ro_compat_version = cpu_to_le32(UBIFS_RO_COMPAT_VERSION);
err = ubifs_write_node(c, sup, UBIFS_SB_NODE_SZ, 0, 0, UBI_LONGTERM);
kfree(sup);
@@ -532,17 +533,39 @@ int ubifs_read_superblock(struct ubifs_info *c)
if (IS_ERR(sup))
return PTR_ERR(sup);
+ c->fmt_version = le32_to_cpu(sup->fmt_version);
+ c->ro_compat_version = le32_to_cpu(sup->ro_compat_version);
+
/*
* The software supports all previous versions but not future versions,
* due to the unavailability of time-travelling equipment.
*/
- c->fmt_version = le32_to_cpu(sup->fmt_version);
if (c->fmt_version > UBIFS_FORMAT_VERSION) {
- ubifs_err("on-flash format version is %d, but software only "
- "supports up to version %d", c->fmt_version,
- UBIFS_FORMAT_VERSION);
- err = -EINVAL;
- goto out;
+ struct super_block *sb = c->vfs_sb;
+ int mounting_ro = sb->s_flags & MS_RDONLY;
+
+ ubifs_assert(!c->ro_media || mounting_ro);
+ if (!mounting_ro ||
+ c->ro_compat_version > UBIFS_RO_COMPAT_VERSION) {
+ ubifs_err("on-flash format version is w%d/r%d, but "
+ "software only supports up to version "
+ "w%d/r%d", c->fmt_version,
+ c->ro_compat_version, UBIFS_FORMAT_VERSION,
+ UBIFS_RO_COMPAT_VERSION);
+ if (c->ro_compat_version <= UBIFS_RO_COMPAT_VERSION) {
+ ubifs_msg("only R/O mounting is possible");
+ err = -EROFS;
+ } else
+ err = -EINVAL;
+ goto out;
+ }
+
+ /*
+ * The FS is mounted R/O, and the media format is
+ * R/O-compatible with the UBIFS implementation, so we can
+ * mount.
+ */
+ c->rw_incompat = 1;
}
if (c->fmt_version < 3) {
@@ -623,7 +646,6 @@ int ubifs_read_superblock(struct ubifs_info *c)
c->main_lebs = c->leb_cnt - UBIFS_SB_LEBS - UBIFS_MST_LEBS;
c->main_lebs -= c->log_lebs + c->lpt_lebs + c->orph_lebs;
c->main_first = c->leb_cnt - c->main_lebs;
- c->report_rp_size = ubifs_reported_space(c, c->rp_size);
err = validate_sb(c, sup);
out:
diff --git a/fs/ubifs/shrinker.c b/fs/ubifs/shrinker.c
index e7bab52a141..02feb59cefc 100644
--- a/fs/ubifs/shrinker.c
+++ b/fs/ubifs/shrinker.c
@@ -206,8 +206,7 @@ static int shrink_tnc_trees(int nr, int age, int *contention)
* Move this one to the end of the list to provide some
* fairness.
*/
- list_del(&c->infos_list);
- list_add_tail(&c->infos_list, &ubifs_infos);
+ list_move_tail(&c->infos_list, &ubifs_infos);
mutex_unlock(&c->umount_mutex);
if (freed >= nr)
break;
@@ -263,8 +262,7 @@ static int kick_a_thread(void)
}
if (i == 1) {
- list_del(&c->infos_list);
- list_add_tail(&c->infos_list, &ubifs_infos);
+ list_move_tail(&c->infos_list, &ubifs_infos);
spin_unlock(&ubifs_infos_lock);
ubifs_request_bg_commit(c);
diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c
index 89556ee7251..faa44f90608 100644
--- a/fs/ubifs/super.c
+++ b/fs/ubifs/super.c
@@ -397,6 +397,7 @@ static int ubifs_statfs(struct dentry *dentry, struct kstatfs *buf)
buf->f_namelen = UBIFS_MAX_NLEN;
buf->f_fsid.val[0] = le32_to_cpu(uuid[0]) ^ le32_to_cpu(uuid[2]);
buf->f_fsid.val[1] = le32_to_cpu(uuid[1]) ^ le32_to_cpu(uuid[3]);
+ ubifs_assert(buf->f_bfree <= c->block_cnt);
return 0;
}
@@ -420,8 +421,8 @@ static int ubifs_show_options(struct seq_file *s, struct vfsmount *mnt)
seq_printf(s, ",no_chk_data_crc");
if (c->mount_opts.override_compr) {
- seq_printf(s, ",compr=");
- seq_printf(s, ubifs_compr_name(c->mount_opts.compr_type));
+ seq_printf(s, ",compr=%s",
+ ubifs_compr_name(c->mount_opts.compr_type));
}
return 0;
@@ -432,33 +433,24 @@ static int ubifs_sync_fs(struct super_block *sb, int wait)
int i, err;
struct ubifs_info *c = sb->s_fs_info;
struct writeback_control wbc = {
- .sync_mode = wait ? WB_SYNC_ALL : WB_SYNC_NONE,
+ .sync_mode = WB_SYNC_ALL,
.range_start = 0,
.range_end = LLONG_MAX,
.nr_to_write = LONG_MAX,
};
/*
- * Note by akpm about WB_SYNC_NONE used above: zero @wait is just an
- * advisory thing to help the file system shove lots of data into the
- * queues. If some gets missed then it'll be picked up on the second
+ * Zero @wait is just an advisory thing to help the file system shove
+ * lots of data into the queues, and there will be the second
* '->sync_fs()' call, with non-zero @wait.
*/
+ if (!wait)
+ return 0;
if (sb->s_flags & MS_RDONLY)
return 0;
/*
- * Synchronize write buffers, because 'ubifs_run_commit()' does not
- * do this if it waits for an already running commit.
- */
- for (i = 0; i < c->jhead_cnt; i++) {
- err = ubifs_wbuf_sync(&c->jheads[i].wbuf);
- if (err)
- return err;
- }
-
- /*
* VFS calls '->sync_fs()' before synchronizing all dirty inodes and
* pages, so synchronize them first, then commit the journal. Strictly
* speaking, it is not necessary to commit the journal here,
@@ -469,6 +461,16 @@ static int ubifs_sync_fs(struct super_block *sb, int wait)
*/
generic_sync_sb_inodes(sb, &wbc);
+ /*
+ * Synchronize write buffers, because 'ubifs_run_commit()' does not
+ * do this if it waits for an already running commit.
+ */
+ for (i = 0; i < c->jhead_cnt; i++) {
+ err = ubifs_wbuf_sync(&c->jheads[i].wbuf);
+ if (err)
+ return err;
+ }
+
err = ubifs_run_commit(c);
if (err)
return err;
@@ -572,15 +574,8 @@ static int init_constants_early(struct ubifs_info *c)
c->ranges[UBIFS_IDX_NODE].max_len = INT_MAX;
/*
- * Initialize dead and dark LEB space watermarks.
- *
- * Dead space is the space which cannot be used. Its watermark is
- * equivalent to min. I/O unit or minimum node size if it is greater
- * then min. I/O unit.
- *
- * Dark space is the space which might be used, or might not, depending
- * on which node should be written to the LEB. Its watermark is
- * equivalent to maximum UBIFS node size.
+ * Initialize dead and dark LEB space watermarks. See gc.c for comments
+ * about these values.
*/
c->dead_wm = ALIGN(MIN_WRITE_SZ, c->min_io_size);
c->dark_wm = ALIGN(UBIFS_MAX_NODE_SZ, c->min_io_size);
@@ -705,6 +700,8 @@ static int init_constants_sb(struct ubifs_info *c)
if (err)
return err;
+ /* Initialize effective LEB size used in budgeting calculations */
+ c->idx_leb_size = c->leb_size - c->max_idx_node_sz;
return 0;
}
@@ -721,6 +718,7 @@ static void init_constants_master(struct ubifs_info *c)
long long tmp64;
c->min_idx_lebs = ubifs_calc_min_idx_lebs(c);
+ c->report_rp_size = ubifs_reported_space(c, c->rp_size);
/*
* Calculate total amount of FS blocks. This number is not used
@@ -741,12 +739,12 @@ static void init_constants_master(struct ubifs_info *c)
* take_gc_lnum - reserve GC LEB.
* @c: UBIFS file-system description object
*
- * This function ensures that the LEB reserved for garbage collection is
- * unmapped and is marked as "taken" in lprops. We also have to set free space
- * to LEB size and dirty space to zero, because lprops may contain out-of-date
- * information if the file-system was un-mounted before it has been committed.
- * This function returns zero in case of success and a negative error code in
- * case of failure.
+ * This function ensures that the LEB reserved for garbage collection is marked
+ * as "taken" in lprops. We also have to set free space to LEB size and dirty
+ * space to zero, because lprops may contain out-of-date information if the
+ * file-system was un-mounted before it has been committed. This function
+ * returns zero in case of success and a negative error code in case of
+ * failure.
*/
static int take_gc_lnum(struct ubifs_info *c)
{
@@ -757,10 +755,6 @@ static int take_gc_lnum(struct ubifs_info *c)
return -EINVAL;
}
- err = ubifs_leb_unmap(c, c->gc_lnum);
- if (err)
- return err;
-
/* And we have to tell lprops that this LEB is taken */
err = ubifs_change_one_lp(c, c->gc_lnum, c->leb_size, 0,
LPROPS_TAKEN, 0, 0);
@@ -966,13 +960,16 @@ static int ubifs_parse_options(struct ubifs_info *c, char *options,
token = match_token(p, tokens, args);
switch (token) {
+ /*
+ * %Opt_fast_unmount and %Opt_norm_unmount options are ignored.
+ * We accepte them in order to be backware-compatible. But this
+ * should be removed at some point.
+ */
case Opt_fast_unmount:
c->mount_opts.unmount_mode = 2;
- c->fast_unmount = 1;
break;
case Opt_norm_unmount:
c->mount_opts.unmount_mode = 1;
- c->fast_unmount = 0;
break;
case Opt_bulk_read:
c->mount_opts.bulk_read = 2;
@@ -1094,12 +1091,7 @@ static int check_free_space(struct ubifs_info *c)
ubifs_err("insufficient free space to mount in read/write mode");
dbg_dump_budg(c);
dbg_dump_lprops(c);
- /*
- * We return %-EINVAL instead of %-ENOSPC because it seems to
- * be the closest error code mentioned in the mount function
- * documentation.
- */
- return -EINVAL;
+ return -ENOSPC;
}
return 0;
}
@@ -1212,7 +1204,7 @@ static int mount_ubifs(struct ubifs_info *c)
goto out_cbuf;
/* Create background thread */
- c->bgt = kthread_create(ubifs_bg_thread, c, c->bgt_name);
+ c->bgt = kthread_create(ubifs_bg_thread, c, "%s", c->bgt_name);
if (IS_ERR(c->bgt)) {
err = PTR_ERR(c->bgt);
c->bgt = NULL;
@@ -1286,10 +1278,19 @@ static int mount_ubifs(struct ubifs_info *c)
if (err)
goto out_orphans;
err = ubifs_rcvry_gc_commit(c);
- } else
+ } else {
err = take_gc_lnum(c);
- if (err)
- goto out_orphans;
+ if (err)
+ goto out_orphans;
+
+ /*
+ * GC LEB may contain garbage if there was an unclean
+ * reboot, and it should be un-mapped.
+ */
+ err = ubifs_leb_unmap(c, c->gc_lnum);
+ if (err)
+ return err;
+ }
err = dbg_check_lprops(c);
if (err)
@@ -1298,6 +1299,16 @@ static int mount_ubifs(struct ubifs_info *c)
err = ubifs_recover_size(c);
if (err)
goto out_orphans;
+ } else {
+ /*
+ * Even if we mount read-only, we have to set space in GC LEB
+ * to proper value because this affects UBIFS free space
+ * reporting. We do not want to have a situation when
+ * re-mounting from R/O to R/W changes amount of free space.
+ */
+ err = take_gc_lnum(c);
+ if (err)
+ goto out_orphans;
}
spin_lock(&ubifs_infos_lock);
@@ -1310,14 +1321,21 @@ static int mount_ubifs(struct ubifs_info *c)
else {
c->need_recovery = 0;
ubifs_msg("recovery completed");
+ /*
+ * GC LEB has to be empty and taken at this point. But
+ * the journal head LEBs may also be accounted as
+ * "empty taken" if they are empty.
+ */
+ ubifs_assert(c->lst.taken_empty_lebs > 0);
}
- }
+ } else
+ ubifs_assert(c->lst.taken_empty_lebs > 0);
- err = dbg_debugfs_init_fs(c);
+ err = dbg_check_filesystem(c);
if (err)
goto out_infos;
- err = dbg_check_filesystem(c);
+ err = dbg_debugfs_init_fs(c);
if (err)
goto out_infos;
@@ -1333,8 +1351,9 @@ static int mount_ubifs(struct ubifs_info *c)
x = (long long)c->log_lebs * c->leb_size + c->max_bud_bytes;
ubifs_msg("journal size: %lld bytes (%lld KiB, %lld MiB, %d "
"LEBs)", x, x >> 10, x >> 20, c->log_lebs + c->max_bud_cnt);
- ubifs_msg("media format: %d (latest is %d)",
- c->fmt_version, UBIFS_FORMAT_VERSION);
+ ubifs_msg("media format: w%d/r%d (latest is w%d/r%d)",
+ c->fmt_version, c->ro_compat_version,
+ UBIFS_FORMAT_VERSION, UBIFS_RO_COMPAT_VERSION);
ubifs_msg("default compressor: %s", ubifs_compr_name(c->default_compr));
ubifs_msg("reserved for root: %llu bytes (%llu KiB)",
c->report_rp_size, c->report_rp_size >> 10);
@@ -1351,7 +1370,6 @@ static int mount_ubifs(struct ubifs_info *c)
c->uuid[4], c->uuid[5], c->uuid[6], c->uuid[7],
c->uuid[8], c->uuid[9], c->uuid[10], c->uuid[11],
c->uuid[12], c->uuid[13], c->uuid[14], c->uuid[15]);
- dbg_msg("fast unmount: %d", c->fast_unmount);
dbg_msg("big_lpt %d", c->big_lpt);
dbg_msg("log LEBs: %d (%d - %d)",
c->log_lebs, UBIFS_LOG_LNUM, c->log_last);
@@ -1475,10 +1493,17 @@ static int ubifs_remount_rw(struct ubifs_info *c)
{
int err, lnum;
- if (c->ro_media)
- return -EINVAL;
+ if (c->rw_incompat) {
+ ubifs_err("the file-system is not R/W-compatible");
+ ubifs_msg("on-flash format version is w%d/r%d, but software "
+ "only supports up to version w%d/r%d", c->fmt_version,
+ c->ro_compat_version, UBIFS_FORMAT_VERSION,
+ UBIFS_RO_COMPAT_VERSION);
+ return -EROFS;
+ }
mutex_lock(&c->umount_mutex);
+ dbg_save_space_info(c);
c->remounting_rw = 1;
c->always_chk_crc = 1;
@@ -1514,6 +1539,12 @@ static int ubifs_remount_rw(struct ubifs_info *c)
err = ubifs_recover_inl_heads(c, c->sbuf);
if (err)
goto out;
+ } else {
+ /* A readonly mount is not allowed to have orphans */
+ ubifs_assert(c->tot_orphans == 0);
+ err = ubifs_clear_orphans(c);
+ if (err)
+ goto out;
}
if (!(c->mst_node->flags & cpu_to_le32(UBIFS_MST_DIRTY))) {
@@ -1540,7 +1571,7 @@ static int ubifs_remount_rw(struct ubifs_info *c)
ubifs_create_buds_lists(c);
/* Create background thread */
- c->bgt = kthread_create(ubifs_bg_thread, c, c->bgt_name);
+ c->bgt = kthread_create(ubifs_bg_thread, c, "%s", c->bgt_name);
if (IS_ERR(c->bgt)) {
err = PTR_ERR(c->bgt);
c->bgt = NULL;
@@ -1569,7 +1600,7 @@ static int ubifs_remount_rw(struct ubifs_info *c)
if (c->need_recovery)
err = ubifs_rcvry_gc_commit(c);
else
- err = take_gc_lnum(c);
+ err = ubifs_leb_unmap(c, c->gc_lnum);
if (err)
goto out;
@@ -1582,8 +1613,9 @@ static int ubifs_remount_rw(struct ubifs_info *c)
c->vfs_sb->s_flags &= ~MS_RDONLY;
c->remounting_rw = 0;
c->always_chk_crc = 0;
+ err = dbg_check_space_info(c);
mutex_unlock(&c->umount_mutex);
- return 0;
+ return err;
out:
vfree(c->orph_buf);
@@ -1603,43 +1635,18 @@ out:
}
/**
- * commit_on_unmount - commit the journal when un-mounting.
- * @c: UBIFS file-system description object
- *
- * This function is called during un-mounting and re-mounting, and it commits
- * the journal unless the "fast unmount" mode is enabled.
- */
-static void commit_on_unmount(struct ubifs_info *c)
-{
- struct super_block *sb = c->vfs_sb;
- long long bud_bytes;
-
- /*
- * This function is called before the background thread is stopped, so
- * we may race with ongoing commit, which means we have to take
- * @c->bud_lock to access @c->bud_bytes.
- */
- spin_lock(&c->buds_lock);
- bud_bytes = c->bud_bytes;
- spin_unlock(&c->buds_lock);
-
- if (!c->fast_unmount && !(sb->s_flags & MS_RDONLY) && bud_bytes)
- ubifs_run_commit(c);
-}
-
-/**
* ubifs_remount_ro - re-mount in read-only mode.
* @c: UBIFS file-system description object
*
- * We rely on VFS to have stopped writing. Possibly the background thread could
- * be running a commit, however kthread_stop will wait in that case.
+ * We assume VFS has stopped writing. Possibly the background thread could be
+ * running a commit, however kthread_stop will wait in that case.
*/
static void ubifs_remount_ro(struct ubifs_info *c)
{
int i, err;
ubifs_assert(!c->need_recovery);
- commit_on_unmount(c);
+ ubifs_assert(!(c->vfs_sb->s_flags & MS_RDONLY));
mutex_lock(&c->umount_mutex);
if (c->bgt) {
@@ -1647,27 +1654,29 @@ static void ubifs_remount_ro(struct ubifs_info *c)
c->bgt = NULL;
}
+ dbg_save_space_info(c);
+
for (i = 0; i < c->jhead_cnt; i++) {
ubifs_wbuf_sync(&c->jheads[i].wbuf);
del_timer_sync(&c->jheads[i].wbuf.timer);
}
- if (!c->ro_media) {
- c->mst_node->flags &= ~cpu_to_le32(UBIFS_MST_DIRTY);
- c->mst_node->flags |= cpu_to_le32(UBIFS_MST_NO_ORPHS);
- c->mst_node->gc_lnum = cpu_to_le32(c->gc_lnum);
- err = ubifs_write_master(c);
- if (err)
- ubifs_ro_mode(c, err);
- }
+ c->mst_node->flags &= ~cpu_to_le32(UBIFS_MST_DIRTY);
+ c->mst_node->flags |= cpu_to_le32(UBIFS_MST_NO_ORPHS);
+ c->mst_node->gc_lnum = cpu_to_le32(c->gc_lnum);
+ err = ubifs_write_master(c);
+ if (err)
+ ubifs_ro_mode(c, err);
- ubifs_destroy_idx_gc(c);
free_wbufs(c);
vfree(c->orph_buf);
c->orph_buf = NULL;
vfree(c->ileb_buf);
c->ileb_buf = NULL;
ubifs_lpt_free(c, 1);
+ err = dbg_check_space_info(c);
+ if (err)
+ ubifs_ro_mode(c, err);
mutex_unlock(&c->umount_mutex);
}
@@ -1760,11 +1769,20 @@ static int ubifs_remount_fs(struct super_block *sb, int *flags, char *data)
}
if ((sb->s_flags & MS_RDONLY) && !(*flags & MS_RDONLY)) {
+ if (c->ro_media) {
+ ubifs_msg("cannot re-mount due to prior errors");
+ return -EROFS;
+ }
err = ubifs_remount_rw(c);
if (err)
return err;
- } else if (!(sb->s_flags & MS_RDONLY) && (*flags & MS_RDONLY))
+ } else if (!(sb->s_flags & MS_RDONLY) && (*flags & MS_RDONLY)) {
+ if (c->ro_media) {
+ ubifs_msg("cannot re-mount due to prior errors");
+ return -EROFS;
+ }
ubifs_remount_ro(c);
+ }
if (c->bulk_read == 1)
bu_init(c);
@@ -1774,10 +1792,11 @@ static int ubifs_remount_fs(struct super_block *sb, int *flags, char *data)
c->bu.buf = NULL;
}
+ ubifs_assert(c->lst.taken_empty_lebs > 0);
return 0;
}
-struct super_operations ubifs_super_operations = {
+const struct super_operations ubifs_super_operations = {
.alloc_inode = ubifs_alloc_inode,
.destroy_inode = ubifs_destroy_inode,
.put_super = ubifs_put_super,
@@ -2032,7 +2051,8 @@ static int ubifs_get_sb(struct file_system_type *fs_type, int flags,
/* 'fill_super()' opens ubi again so we must close it here */
ubi_close_volume(ubi);
- return simple_set_mnt(mnt, sb);
+ simple_set_mnt(mnt, sb);
+ return 0;
out_deact:
up_write(&sb->s_umount);
@@ -2044,15 +2064,6 @@ out_close:
static void ubifs_kill_sb(struct super_block *sb)
{
- struct ubifs_info *c = sb->s_fs_info;
-
- /*
- * We do 'commit_on_unmount()' here instead of 'ubifs_put_super()'
- * in order to be outside BKL.
- */
- if (sb->s_root)
- commit_on_unmount(c);
- /* The un-mount routine is actually done in put_super() */
generic_shutdown_super(sb);
}
diff --git a/fs/ubifs/tnc.c b/fs/ubifs/tnc.c
index f7e36f54552..f249f7b0d65 100644
--- a/fs/ubifs/tnc.c
+++ b/fs/ubifs/tnc.c
@@ -443,6 +443,11 @@ static int tnc_read_node_nm(struct ubifs_info *c, struct ubifs_zbranch *zbr,
* This function performs that same function as ubifs_read_node except that
* it does not require that there is actually a node present and instead
* the return code indicates if a node was read.
+ *
+ * Note, this function does not check CRC of data nodes if @c->no_chk_data_crc
+ * is true (it is controlled by corresponding mount option). However, if
+ * @c->always_chk_crc is true, @c->no_chk_data_crc is ignored and CRC is always
+ * checked.
*/
static int try_read_node(const struct ubifs_info *c, void *buf, int type,
int len, int lnum, int offs)
@@ -470,9 +475,8 @@ static int try_read_node(const struct ubifs_info *c, void *buf, int type,
if (node_len != len)
return 0;
- if (type == UBIFS_DATA_NODE && !c->always_chk_crc)
- if (c->no_chk_data_crc)
- return 0;
+ if (type == UBIFS_DATA_NODE && !c->always_chk_crc && c->no_chk_data_crc)
+ return 1;
crc = crc32(UBIFS_CRC32_INIT, buf + 8, node_len - 8);
node_crc = le32_to_cpu(ch->crc);
@@ -1248,7 +1252,7 @@ int ubifs_lookup_level0(struct ubifs_info *c, const union ubifs_key *key,
* splitting in the middle of the colliding sequence. Also, when
* removing the leftmost key, we would have to correct the key of the
* parent node, which would introduce additional complications. Namely,
- * if we changed the the leftmost key of the parent znode, the garbage
+ * if we changed the leftmost key of the parent znode, the garbage
* collector would be unable to find it (GC is doing this when GC'ing
* indexing LEBs). Although we already have an additional RB-tree where
* we save such changed znodes (see 'ins_clr_old_idx_znode()') until
@@ -1506,7 +1510,7 @@ out:
*
* Note, if the bulk-read buffer length (@bu->buf_len) is known, this function
* makes sure bulk-read nodes fit the buffer. Otherwise, this function prepares
- * maxumum possible amount of nodes for bulk-read.
+ * maximum possible amount of nodes for bulk-read.
*/
int ubifs_tnc_get_bu_keys(struct ubifs_info *c, struct bu_info *bu)
{
diff --git a/fs/ubifs/ubifs-media.h b/fs/ubifs/ubifs-media.h
index b25fc36cf72..3eee07e0c49 100644
--- a/fs/ubifs/ubifs-media.h
+++ b/fs/ubifs/ubifs-media.h
@@ -36,9 +36,31 @@
/* UBIFS node magic number (must not have the padding byte first or last) */
#define UBIFS_NODE_MAGIC 0x06101831
-/* UBIFS on-flash format version */
+/*
+ * UBIFS on-flash format version. This version is increased when the on-flash
+ * format is changing. If this happens, UBIFS is will support older versions as
+ * well. But older UBIFS code will not support newer formats. Format changes
+ * will be rare and only when absolutely necessary, e.g. to fix a bug or to add
+ * a new feature.
+ *
+ * UBIFS went into mainline kernel with format version 4. The older formats
+ * were development formats.
+ */
#define UBIFS_FORMAT_VERSION 4
+/*
+ * Read-only compatibility version. If the UBIFS format is changed, older UBIFS
+ * implementations will not be able to mount newer formats in read-write mode.
+ * However, depending on the change, it may be possible to mount newer formats
+ * in R/O mode. This is indicated by the R/O compatibility version which is
+ * stored in the super-block.
+ *
+ * This is needed to support boot-loaders which only need R/O mounting. With
+ * this flag it is possible to do UBIFS format changes without a need to update
+ * boot-loaders.
+ */
+#define UBIFS_RO_COMPAT_VERSION 0
+
/* Minimum logical eraseblock size in bytes */
#define UBIFS_MIN_LEB_SZ (15*1024)
@@ -53,7 +75,7 @@
/*
* If compressed data length is less than %UBIFS_MIN_COMPRESS_DIFF bytes
- * shorter than uncompressed data length, UBIFS preferes to leave this data
+ * shorter than uncompressed data length, UBIFS prefers to leave this data
* node uncompress, because it'll be read faster.
*/
#define UBIFS_MIN_COMPRESS_DIFF 64
@@ -586,6 +608,7 @@ struct ubifs_pad_node {
* @padding2: reserved for future, zeroes
* @time_gran: time granularity in nanoseconds
* @uuid: UUID generated when the file system image was created
+ * @ro_compat_version: UBIFS R/O compatibility version
*/
struct ubifs_sb_node {
struct ubifs_ch ch;
@@ -612,7 +635,8 @@ struct ubifs_sb_node {
__le64 rp_size;
__le32 time_gran;
__u8 uuid[16];
- __u8 padding2[3972];
+ __le32 ro_compat_version;
+ __u8 padding2[3968];
} __attribute__ ((packed));
/**
diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h
index fc2a4cc66d0..0a8341e1408 100644
--- a/fs/ubifs/ubifs.h
+++ b/fs/ubifs/ubifs.h
@@ -426,9 +426,9 @@ struct ubifs_unclean_leb {
* LEB properties flags.
*
* LPROPS_UNCAT: not categorized
- * LPROPS_DIRTY: dirty > 0, not index
+ * LPROPS_DIRTY: dirty > free, dirty >= @c->dead_wm, not index
* LPROPS_DIRTY_IDX: dirty + free > @c->min_idx_node_sze and index
- * LPROPS_FREE: free > 0, not empty, not index
+ * LPROPS_FREE: free > 0, dirty < @c->dead_wm, not empty, not index
* LPROPS_HEAP_CNT: number of heaps used for storing categorized LEBs
* LPROPS_EMPTY: LEB is empty, not taken
* LPROPS_FREEABLE: free + dirty == leb_size, not index, not taken
@@ -934,6 +934,7 @@ struct ubifs_debug_info;
* by @commit_sem
* @cnt_lock: protects @highest_inum and @max_sqnum counters
* @fmt_version: UBIFS on-flash format version
+ * @ro_compat_version: R/O compatibility version
* @uuid: UUID from super block
*
* @lhead_lnum: log head logical eraseblock number
@@ -961,12 +962,12 @@ struct ubifs_debug_info;
* @cs_lock: commit state lock
* @cmt_wq: wait queue to sleep on if the log is full and a commit is running
*
- * @fast_unmount: do not run journal commit before un-mounting
* @big_lpt: flag that LPT is too big to write whole during commit
* @no_chk_data_crc: do not check CRCs when reading data nodes (except during
* recovery)
* @bulk_read: enable bulk-reads
* @default_compr: default compression algorithm (%UBIFS_COMPR_LZO, etc)
+ * @rw_incompat: the media is not R/W compatible
*
* @tnc_mutex: protects the Tree Node Cache (TNC), @zroot, @cnext, @enext, and
* @calc_idx_sz
@@ -1016,6 +1017,8 @@ struct ubifs_debug_info;
* @min_io_shift: number of bits in @min_io_size minus one
* @leb_size: logical eraseblock size in bytes
* @half_leb_size: half LEB size
+ * @idx_leb_size: how many bytes of an LEB are effectively available when it is
+ * used to store indexing nodes (@leb_size - @max_idx_node_sz)
* @leb_cnt: count of logical eraseblocks
* @max_leb_cnt: maximum count of logical eraseblocks
* @old_leb_cnt: count of logical eraseblocks before re-size
@@ -1133,8 +1136,8 @@ struct ubifs_debug_info;
* previous commit start
* @uncat_list: list of un-categorized LEBs
* @empty_list: list of empty LEBs
- * @freeable_list: list of freeable non-index LEBs (free + dirty == leb_size)
- * @frdi_idx_list: list of freeable index LEBs (free + dirty == leb_size)
+ * @freeable_list: list of freeable non-index LEBs (free + dirty == @leb_size)
+ * @frdi_idx_list: list of freeable index LEBs (free + dirty == @leb_size)
* @freeable_cnt: number of freeable LEBs in @freeable_list
*
* @ltab_lnum: LEB number of LPT's own lprops table
@@ -1178,6 +1181,7 @@ struct ubifs_info {
unsigned long long cmt_no;
spinlock_t cnt_lock;
int fmt_version;
+ int ro_compat_version;
unsigned char uuid[16];
int lhead_lnum;
@@ -1202,11 +1206,11 @@ struct ubifs_info {
spinlock_t cs_lock;
wait_queue_head_t cmt_wq;
- unsigned int fast_unmount:1;
unsigned int big_lpt:1;
unsigned int no_chk_data_crc:1;
unsigned int bulk_read:1;
unsigned int default_compr:2;
+ unsigned int rw_incompat:1;
struct mutex tnc_mutex;
struct ubifs_zbranch zroot;
@@ -1255,6 +1259,7 @@ struct ubifs_info {
int min_io_shift;
int leb_size;
int half_leb_size;
+ int idx_leb_size;
int leb_cnt;
int max_leb_cnt;
int old_leb_cnt;
@@ -1405,13 +1410,13 @@ extern struct list_head ubifs_infos;
extern spinlock_t ubifs_infos_lock;
extern atomic_long_t ubifs_clean_zn_cnt;
extern struct kmem_cache *ubifs_inode_slab;
-extern struct super_operations ubifs_super_operations;
-extern struct address_space_operations ubifs_file_address_operations;
-extern struct file_operations ubifs_file_operations;
-extern struct inode_operations ubifs_file_inode_operations;
-extern struct file_operations ubifs_dir_operations;
-extern struct inode_operations ubifs_dir_inode_operations;
-extern struct inode_operations ubifs_symlink_inode_operations;
+extern const struct super_operations ubifs_super_operations;
+extern const struct address_space_operations ubifs_file_address_operations;
+extern const struct file_operations ubifs_file_operations;
+extern const struct inode_operations ubifs_file_inode_operations;
+extern const struct file_operations ubifs_dir_operations;
+extern const struct inode_operations ubifs_dir_inode_operations;
+extern const struct inode_operations ubifs_symlink_inode_operations;
extern struct backing_dev_info ubifs_backing_dev_info;
extern struct ubifs_compressor *ubifs_compressors[UBIFS_COMPR_TYPES_CNT];
@@ -1428,7 +1433,7 @@ int ubifs_read_node_wbuf(struct ubifs_wbuf *wbuf, void *buf, int type, int len,
int ubifs_write_node(struct ubifs_info *c, void *node, int len, int lnum,
int offs, int dtype);
int ubifs_check_node(const struct ubifs_info *c, const void *buf, int lnum,
- int offs, int quiet, int chk_crc);
+ int offs, int quiet, int must_chk_crc);
void ubifs_prepare_node(struct ubifs_info *c, void *buf, int len, int pad);
void ubifs_prep_grp_node(struct ubifs_info *c, void *node, int len, int last);
int ubifs_io_init(struct ubifs_info *c);
@@ -1495,13 +1500,14 @@ void ubifs_release_ino_dirty(struct ubifs_info *c, struct inode *inode,
void ubifs_cancel_ino_op(struct ubifs_info *c, struct inode *inode,
struct ubifs_budget_req *req);
long long ubifs_get_free_space(struct ubifs_info *c);
+long long ubifs_get_free_space_nolock(struct ubifs_info *c);
int ubifs_calc_min_idx_lebs(struct ubifs_info *c);
void ubifs_convert_page_budget(struct ubifs_info *c);
long long ubifs_reported_space(const struct ubifs_info *c, long long free);
long long ubifs_calc_available(const struct ubifs_info *c, int min_idx_lebs);
/* find.c */
-int ubifs_find_free_space(struct ubifs_info *c, int min_space, int *free,
+int ubifs_find_free_space(struct ubifs_info *c, int min_space, int *offs,
int squeeze);
int ubifs_find_free_leb_for_idx(struct ubifs_info *c);
int ubifs_find_dirty_leb(struct ubifs_info *c, struct ubifs_lprops *ret_lp,
@@ -1603,6 +1609,7 @@ void ubifs_delete_orphan(struct ubifs_info *c, ino_t inum);
int ubifs_orphan_start_commit(struct ubifs_info *c);
int ubifs_orphan_end_commit(struct ubifs_info *c);
int ubifs_mount_orphans(struct ubifs_info *c, int unclean, int read_only);
+int ubifs_clear_orphans(struct ubifs_info *c);
/* lpt.c */
int ubifs_calc_lpt_geom(struct ubifs_info *c);
@@ -1646,7 +1653,7 @@ const struct ubifs_lprops *ubifs_change_lp(struct ubifs_info *c,
const struct ubifs_lprops *lp,
int free, int dirty, int flags,
int idx_gc_cnt);
-void ubifs_get_lp_stats(struct ubifs_info *c, struct ubifs_lp_stats *stats);
+void ubifs_get_lp_stats(struct ubifs_info *c, struct ubifs_lp_stats *lst);
void ubifs_add_to_cat(struct ubifs_info *c, struct ubifs_lprops *lprops,
int cat);
void ubifs_replace_cat(struct ubifs_info *c, struct ubifs_lprops *old_lprops,