diff options
Diffstat (limited to 'fs/ubifs')
-rw-r--r-- | fs/ubifs/Kconfig | 4 | ||||
-rw-r--r-- | fs/ubifs/budget.c | 72 | ||||
-rw-r--r-- | fs/ubifs/debug.c | 128 | ||||
-rw-r--r-- | fs/ubifs/debug.h | 36 | ||||
-rw-r--r-- | fs/ubifs/dir.c | 96 | ||||
-rw-r--r-- | fs/ubifs/file.c | 34 | ||||
-rw-r--r-- | fs/ubifs/find.c | 12 | ||||
-rw-r--r-- | fs/ubifs/gc.c | 454 | ||||
-rw-r--r-- | fs/ubifs/io.c | 22 | ||||
-rw-r--r-- | fs/ubifs/journal.c | 9 | ||||
-rw-r--r-- | fs/ubifs/key.h | 6 | ||||
-rw-r--r-- | fs/ubifs/log.c | 5 | ||||
-rw-r--r-- | fs/ubifs/lprops.c | 12 | ||||
-rw-r--r-- | fs/ubifs/lpt_commit.c | 78 | ||||
-rw-r--r-- | fs/ubifs/master.c | 2 | ||||
-rw-r--r-- | fs/ubifs/orphan.c | 38 | ||||
-rw-r--r-- | fs/ubifs/recovery.c | 70 | ||||
-rw-r--r-- | fs/ubifs/replay.c | 2 | ||||
-rw-r--r-- | fs/ubifs/sb.c | 36 | ||||
-rw-r--r-- | fs/ubifs/shrinker.c | 6 | ||||
-rw-r--r-- | fs/ubifs/super.c | 225 | ||||
-rw-r--r-- | fs/ubifs/tnc.c | 14 | ||||
-rw-r--r-- | fs/ubifs/ubifs-media.h | 30 | ||||
-rw-r--r-- | fs/ubifs/ubifs.h | 39 |
24 files changed, 879 insertions, 551 deletions
diff --git a/fs/ubifs/Kconfig b/fs/ubifs/Kconfig index e35b54d5059..830e3f76f44 100644 --- a/fs/ubifs/Kconfig +++ b/fs/ubifs/Kconfig @@ -22,7 +22,7 @@ config UBIFS_FS_ADVANCED_COMPR depends on UBIFS_FS help This option allows to explicitly choose which compressions, if any, - are enabled in UBIFS. Removing compressors means inbility to read + are enabled in UBIFS. Removing compressors means inability to read existing file systems. If unsure, say 'N'. @@ -32,7 +32,7 @@ config UBIFS_FS_LZO depends on UBIFS_FS default y help - LZO compressor is generally faster then zlib but compresses worse. + LZO compressor is generally faster than zlib but compresses worse. Say 'Y' if unsure. config UBIFS_FS_ZLIB diff --git a/fs/ubifs/budget.c b/fs/ubifs/budget.c index 175f9c590b7..af1914462f0 100644 --- a/fs/ubifs/budget.c +++ b/fs/ubifs/budget.c @@ -194,29 +194,26 @@ static int make_free_space(struct ubifs_info *c) } /** - * ubifs_calc_min_idx_lebs - calculate amount of eraseblocks for the index. + * ubifs_calc_min_idx_lebs - calculate amount of LEBs for the index. * @c: UBIFS file-system description object * - * This function calculates and returns the number of eraseblocks which should - * be kept for index usage. + * This function calculates and returns the number of LEBs which should be kept + * for index usage. */ int ubifs_calc_min_idx_lebs(struct ubifs_info *c) { - int idx_lebs, eff_leb_size = c->leb_size - c->max_idx_node_sz; + int idx_lebs; long long idx_size; idx_size = c->old_idx_sz + c->budg_idx_growth + c->budg_uncommitted_idx; - /* And make sure we have thrice the index size of space reserved */ - idx_size = idx_size + (idx_size << 1); - + idx_size += idx_size << 1; /* * We do not maintain 'old_idx_size' as 'old_idx_lebs'/'old_idx_bytes' * pair, nor similarly the two variables for the new index size, so we * have to do this costly 64-bit division on fast-path. */ - idx_size += eff_leb_size - 1; - idx_lebs = div_u64(idx_size, eff_leb_size); + idx_lebs = div_u64(idx_size + c->idx_leb_size - 1, c->idx_leb_size); /* * The index head is not available for the in-the-gaps method, so add an * extra LEB to compensate. @@ -310,23 +307,23 @@ static int can_use_rp(struct ubifs_info *c) * do_budget_space - reserve flash space for index and data growth. * @c: UBIFS file-system description object * - * This function makes sure UBIFS has enough free eraseblocks for index growth - * and data. + * This function makes sure UBIFS has enough free LEBs for index growth and + * data. * * When budgeting index space, UBIFS reserves thrice as many LEBs as the index * would take if it was consolidated and written to the flash. This guarantees * that the "in-the-gaps" commit method always succeeds and UBIFS will always * be able to commit dirty index. So this function basically adds amount of * budgeted index space to the size of the current index, multiplies this by 3, - * and makes sure this does not exceed the amount of free eraseblocks. + * and makes sure this does not exceed the amount of free LEBs. * * Notes about @c->min_idx_lebs and @c->lst.idx_lebs variables: * o @c->lst.idx_lebs is the number of LEBs the index currently uses. It might * be large, because UBIFS does not do any index consolidation as long as * there is free space. IOW, the index may take a lot of LEBs, but the LEBs * will contain a lot of dirt. - * o @c->min_idx_lebs is the the index presumably takes. IOW, the index may be - * consolidated to take up to @c->min_idx_lebs LEBs. + * o @c->min_idx_lebs is the number of LEBS the index presumably takes. IOW, + * the index may be consolidated to take up to @c->min_idx_lebs LEBs. * * This function returns zero in case of success, and %-ENOSPC in case of * failure. @@ -689,31 +686,29 @@ long long ubifs_reported_space(const struct ubifs_info *c, long long free) } /** - * ubifs_get_free_space - return amount of free space. + * ubifs_get_free_space_nolock - return amount of free space. * @c: UBIFS file-system description object * * This function calculates amount of free space to report to user-space. * * Because UBIFS may introduce substantial overhead (the index, node headers, - * alignment, wastage at the end of eraseblocks, etc), it cannot report real - * amount of free flash space it has (well, because not all dirty space is - * reclaimable, UBIFS does not actually know the real amount). If UBIFS did so, - * it would bread user expectations about what free space is. Users seem to - * accustomed to assume that if the file-system reports N bytes of free space, - * they would be able to fit a file of N bytes to the FS. This almost works for + * alignment, wastage at the end of LEBs, etc), it cannot report real amount of + * free flash space it has (well, because not all dirty space is reclaimable, + * UBIFS does not actually know the real amount). If UBIFS did so, it would + * bread user expectations about what free space is. Users seem to accustomed + * to assume that if the file-system reports N bytes of free space, they would + * be able to fit a file of N bytes to the FS. This almost works for * traditional file-systems, because they have way less overhead than UBIFS. * So, to keep users happy, UBIFS tries to take the overhead into account. */ -long long ubifs_get_free_space(struct ubifs_info *c) +long long ubifs_get_free_space_nolock(struct ubifs_info *c) { - int min_idx_lebs, rsvd_idx_lebs, lebs; + int rsvd_idx_lebs, lebs; long long available, outstanding, free; - spin_lock(&c->space_lock); - min_idx_lebs = c->min_idx_lebs; - ubifs_assert(min_idx_lebs == ubifs_calc_min_idx_lebs(c)); + ubifs_assert(c->min_idx_lebs == ubifs_calc_min_idx_lebs(c)); outstanding = c->budg_data_growth + c->budg_dd_growth; - available = ubifs_calc_available(c, min_idx_lebs); + available = ubifs_calc_available(c, c->min_idx_lebs); /* * When reporting free space to user-space, UBIFS guarantees that it is @@ -726,15 +721,14 @@ long long ubifs_get_free_space(struct ubifs_info *c) * Note, the calculations below are similar to what we have in * 'do_budget_space()', so refer there for comments. */ - if (min_idx_lebs > c->lst.idx_lebs) - rsvd_idx_lebs = min_idx_lebs - c->lst.idx_lebs; + if (c->min_idx_lebs > c->lst.idx_lebs) + rsvd_idx_lebs = c->min_idx_lebs - c->lst.idx_lebs; else rsvd_idx_lebs = 0; lebs = c->lst.empty_lebs + c->freeable_cnt + c->idx_gc_cnt - c->lst.taken_empty_lebs; lebs -= rsvd_idx_lebs; available += lebs * (c->dark_wm - c->leb_overhead); - spin_unlock(&c->space_lock); if (available > outstanding) free = ubifs_reported_space(c, available - outstanding); @@ -742,3 +736,21 @@ long long ubifs_get_free_space(struct ubifs_info *c) free = 0; return free; } + +/** + * ubifs_get_free_space - return amount of free space. + * @c: UBIFS file-system description object + * + * This function calculates and retuns amount of free space to report to + * user-space. + */ +long long ubifs_get_free_space(struct ubifs_info *c) +{ + long long free; + + spin_lock(&c->space_lock); + free = ubifs_get_free_space_nolock(c); + spin_unlock(&c->space_lock); + + return free; +} diff --git a/fs/ubifs/debug.c b/fs/ubifs/debug.c index 792c5a16c18..ce2cd834361 100644 --- a/fs/ubifs/debug.c +++ b/fs/ubifs/debug.c @@ -479,9 +479,9 @@ void dbg_dump_node(const struct ubifs_info *c, const void *node) "bad or corrupted node)"); else { for (i = 0; i < nlen && dent->name[i]; i++) - printk("%c", dent->name[i]); + printk(KERN_CONT "%c", dent->name[i]); } - printk("\n"); + printk(KERN_CONT "\n"); break; } @@ -620,9 +620,11 @@ void dbg_dump_budg(struct ubifs_info *c) c->dark_wm, c->dead_wm, c->max_idx_node_sz); printk(KERN_DEBUG "\tgc_lnum %d, ihead_lnum %d\n", c->gc_lnum, c->ihead_lnum); - for (i = 0; i < c->jhead_cnt; i++) - printk(KERN_DEBUG "\tjhead %d\t LEB %d\n", - c->jheads[i].wbuf.jhead, c->jheads[i].wbuf.lnum); + /* If we are in R/O mode, journal heads do not exist */ + if (c->jheads) + for (i = 0; i < c->jhead_cnt; i++) + printk(KERN_DEBUG "\tjhead %d\t LEB %d\n", + c->jheads[i].wbuf.jhead, c->jheads[i].wbuf.lnum); for (rb = rb_first(&c->buds); rb; rb = rb_next(rb)) { bud = rb_entry(rb, struct ubifs_bud, rb); printk(KERN_DEBUG "\tbud LEB %d\n", bud->lnum); @@ -637,10 +639,7 @@ void dbg_dump_budg(struct ubifs_info *c) /* Print budgeting predictions */ available = ubifs_calc_available(c, c->min_idx_lebs); outstanding = c->budg_data_growth + c->budg_dd_growth; - if (available > outstanding) - free = ubifs_reported_space(c, available - outstanding); - else - free = 0; + free = ubifs_get_free_space_nolock(c); printk(KERN_DEBUG "Budgeting predictions:\n"); printk(KERN_DEBUG "\tavailable: %lld, outstanding %lld, free %lld\n", available, outstanding, free); @@ -861,6 +860,65 @@ void dbg_dump_index(struct ubifs_info *c) } /** + * dbg_save_space_info - save information about flash space. + * @c: UBIFS file-system description object + * + * This function saves information about UBIFS free space, dirty space, etc, in + * order to check it later. + */ +void dbg_save_space_info(struct ubifs_info *c) +{ + struct ubifs_debug_info *d = c->dbg; + + ubifs_get_lp_stats(c, &d->saved_lst); + + spin_lock(&c->space_lock); + d->saved_free = ubifs_get_free_space_nolock(c); + spin_unlock(&c->space_lock); +} + +/** + * dbg_check_space_info - check flash space information. + * @c: UBIFS file-system description object + * + * This function compares current flash space information with the information + * which was saved when the 'dbg_save_space_info()' function was called. + * Returns zero if the information has not changed, and %-EINVAL it it has + * changed. + */ +int dbg_check_space_info(struct ubifs_info *c) +{ + struct ubifs_debug_info *d = c->dbg; + struct ubifs_lp_stats lst; + long long avail, free; + + spin_lock(&c->space_lock); + avail = ubifs_calc_available(c, c->min_idx_lebs); + spin_unlock(&c->space_lock); + free = ubifs_get_free_space(c); + + if (free != d->saved_free) { + ubifs_err("free space changed from %lld to %lld", + d->saved_free, free); + goto out; + } + + return 0; + +out: + ubifs_msg("saved lprops statistics dump"); + dbg_dump_lstats(&d->saved_lst); + ubifs_get_lp_stats(c, &lst); + ubifs_msg("current lprops statistics dump"); + dbg_dump_lstats(&d->saved_lst); + spin_lock(&c->space_lock); + dbg_dump_budg(c); + spin_unlock(&c->space_lock); + dump_stack(); + return -EINVAL; +} + +/** * dbg_check_synced_i_size - check synchronized inode size. * @inode: inode to check * @@ -1156,7 +1214,7 @@ static int dbg_check_znode(struct ubifs_info *c, struct ubifs_zbranch *zbr) /* * Make sure the last key in our znode is less or - * equivalent than the the key in zbranch which goes + * equivalent than the key in the zbranch which goes * after our pointing zbranch. */ cmp = keys_cmp(c, max, @@ -1349,7 +1407,7 @@ int dbg_check_tnc(struct ubifs_info *c, int extra) * @c: UBIFS file-system description object * @leaf_cb: called for each leaf node * @znode_cb: called for each indexing node - * @priv: private date which is passed to callbacks + * @priv: private data which is passed to callbacks * * This function walks the UBIFS index and calls the @leaf_cb for each leaf * node and @znode_cb for each indexing node. Returns zero in case of success @@ -2409,7 +2467,7 @@ void ubifs_debugging_exit(struct ubifs_info *c) * Root directory for UBIFS stuff in debugfs. Contains sub-directories which * contain the stuff specific to particular file-system mounts. */ -static struct dentry *debugfs_rootdir; +static struct dentry *dfs_rootdir; /** * dbg_debugfs_init - initialize debugfs file-system. @@ -2421,9 +2479,9 @@ static struct dentry *debugfs_rootdir; */ int dbg_debugfs_init(void) { - debugfs_rootdir = debugfs_create_dir("ubifs", NULL); - if (IS_ERR(debugfs_rootdir)) { - int err = PTR_ERR(debugfs_rootdir); + dfs_rootdir = debugfs_create_dir("ubifs", NULL); + if (IS_ERR(dfs_rootdir)) { + int err = PTR_ERR(dfs_rootdir); ubifs_err("cannot create \"ubifs\" debugfs directory, " "error %d\n", err); return err; @@ -2437,7 +2495,7 @@ int dbg_debugfs_init(void) */ void dbg_debugfs_exit(void) { - debugfs_remove(debugfs_rootdir); + debugfs_remove(dfs_rootdir); } static int open_debugfs_file(struct inode *inode, struct file *file) @@ -2452,13 +2510,13 @@ static ssize_t write_debugfs_file(struct file *file, const char __user *buf, struct ubifs_info *c = file->private_data; struct ubifs_debug_info *d = c->dbg; - if (file->f_path.dentry == d->dump_lprops) + if (file->f_path.dentry == d->dfs_dump_lprops) dbg_dump_lprops(c); - else if (file->f_path.dentry == d->dump_budg) { + else if (file->f_path.dentry == d->dfs_dump_budg) { spin_lock(&c->space_lock); dbg_dump_budg(c); spin_unlock(&c->space_lock); - } else if (file->f_path.dentry == d->dump_tnc) { + } else if (file->f_path.dentry == d->dfs_dump_tnc) { mutex_lock(&c->tnc_mutex); dbg_dump_tnc(c); mutex_unlock(&c->tnc_mutex); @@ -2469,7 +2527,7 @@ static ssize_t write_debugfs_file(struct file *file, const char __user *buf, return count; } -static const struct file_operations debugfs_fops = { +static const struct file_operations dfs_fops = { .open = open_debugfs_file, .write = write_debugfs_file, .owner = THIS_MODULE, @@ -2494,36 +2552,32 @@ int dbg_debugfs_init_fs(struct ubifs_info *c) struct dentry *dent; struct ubifs_debug_info *d = c->dbg; - sprintf(d->debugfs_dir_name, "ubi%d_%d", c->vi.ubi_num, c->vi.vol_id); - d->debugfs_dir = debugfs_create_dir(d->debugfs_dir_name, - debugfs_rootdir); - if (IS_ERR(d->debugfs_dir)) { - err = PTR_ERR(d->debugfs_dir); + sprintf(d->dfs_dir_name, "ubi%d_%d", c->vi.ubi_num, c->vi.vol_id); + d->dfs_dir = debugfs_create_dir(d->dfs_dir_name, dfs_rootdir); + if (IS_ERR(d->dfs_dir)) { + err = PTR_ERR(d->dfs_dir); ubifs_err("cannot create \"%s\" debugfs directory, error %d\n", - d->debugfs_dir_name, err); + d->dfs_dir_name, err); goto out; } fname = "dump_lprops"; - dent = debugfs_create_file(fname, S_IWUGO, d->debugfs_dir, c, - &debugfs_fops); + dent = debugfs_create_file(fname, S_IWUGO, d->dfs_dir, c, &dfs_fops); if (IS_ERR(dent)) goto out_remove; - d->dump_lprops = dent; + d->dfs_dump_lprops = dent; fname = "dump_budg"; - dent = debugfs_create_file(fname, S_IWUGO, d->debugfs_dir, c, - &debugfs_fops); + dent = debugfs_create_file(fname, S_IWUGO, d->dfs_dir, c, &dfs_fops); if (IS_ERR(dent)) goto out_remove; - d->dump_budg = dent; + d->dfs_dump_budg = dent; fname = "dump_tnc"; - dent = debugfs_create_file(fname, S_IWUGO, d->debugfs_dir, c, - &debugfs_fops); + dent = debugfs_create_file(fname, S_IWUGO, d->dfs_dir, c, &dfs_fops); if (IS_ERR(dent)) goto out_remove; - d->dump_tnc = dent; + d->dfs_dump_tnc = dent; return 0; @@ -2531,7 +2585,7 @@ out_remove: err = PTR_ERR(dent); ubifs_err("cannot create \"%s\" debugfs directory, error %d\n", fname, err); - debugfs_remove_recursive(d->debugfs_dir); + debugfs_remove_recursive(d->dfs_dir); out: return err; } @@ -2542,7 +2596,7 @@ out: */ void dbg_debugfs_exit_fs(struct ubifs_info *c) { - debugfs_remove_recursive(c->dbg->debugfs_dir); + debugfs_remove_recursive(c->dbg->dfs_dir); } #endif /* CONFIG_UBIFS_FS_DEBUG */ diff --git a/fs/ubifs/debug.h b/fs/ubifs/debug.h index 9820d6999f7..c1cd73b2e06 100644 --- a/fs/ubifs/debug.h +++ b/fs/ubifs/debug.h @@ -41,15 +41,17 @@ * @chk_lpt_wastage: used by LPT tree size checker * @chk_lpt_lebs: used by LPT tree size checker * @new_nhead_offs: used by LPT tree size checker - * @new_ihead_lnum: used by debugging to check ihead_lnum - * @new_ihead_offs: used by debugging to check ihead_offs + * @new_ihead_lnum: used by debugging to check @c->ihead_lnum + * @new_ihead_offs: used by debugging to check @c->ihead_offs * - * debugfs_dir_name: name of debugfs directory containing this file-system's - * files - * debugfs_dir: direntry object of the file-system debugfs directory - * dump_lprops: "dump lprops" debugfs knob - * dump_budg: "dump budgeting information" debugfs knob - * dump_tnc: "dump TNC" debugfs knob + * @saved_lst: saved lprops statistics (used by 'dbg_save_space_info()') + * @saved_free: saved free space (used by 'dbg_save_space_info()') + * + * dfs_dir_name: name of debugfs directory containing this file-system's files + * dfs_dir: direntry object of the file-system debugfs directory + * dfs_dump_lprops: "dump lprops" debugfs knob + * dfs_dump_budg: "dump budgeting information" debugfs knob + * dfs_dump_tnc: "dump TNC" debugfs knob */ struct ubifs_debug_info { void *buf; @@ -69,11 +71,14 @@ struct ubifs_debug_info { int new_ihead_lnum; int new_ihead_offs; - char debugfs_dir_name[100]; - struct dentry *debugfs_dir; - struct dentry *dump_lprops; - struct dentry *dump_budg; - struct dentry *dump_tnc; + struct ubifs_lp_stats saved_lst; + long long saved_free; + + char dfs_dir_name[100]; + struct dentry *dfs_dir; + struct dentry *dfs_dump_lprops; + struct dentry *dfs_dump_budg; + struct dentry *dfs_dump_tnc; }; #define ubifs_assert(expr) do { \ @@ -297,7 +302,8 @@ int dbg_walk_index(struct ubifs_info *c, dbg_leaf_callback leaf_cb, dbg_znode_callback znode_cb, void *priv); /* Checking functions */ - +void dbg_save_space_info(struct ubifs_info *c); +int dbg_check_space_info(struct ubifs_info *c); int dbg_check_lprops(struct ubifs_info *c); int dbg_old_index_check_init(struct ubifs_info *c, struct ubifs_zbranch *zroot); int dbg_check_old_index(struct ubifs_info *c, struct ubifs_zbranch *zroot); @@ -439,6 +445,8 @@ void dbg_debugfs_exit_fs(struct ubifs_info *c); #define dbg_walk_index(c, leaf_cb, znode_cb, priv) 0 #define dbg_old_index_check_init(c, zroot) 0 +#define dbg_save_space_info(c) ({}) +#define dbg_check_space_info(c) 0 #define dbg_check_old_index(c, zroot) 0 #define dbg_check_cats(c) 0 #define dbg_check_ltab(c) 0 diff --git a/fs/ubifs/dir.c b/fs/ubifs/dir.c index f448ab1f9c3..f55d523c52b 100644 --- a/fs/ubifs/dir.c +++ b/fs/ubifs/dir.c @@ -482,30 +482,29 @@ static int ubifs_dir_release(struct inode *dir, struct file *file) } /** - * lock_2_inodes - lock two UBIFS inodes. + * lock_2_inodes - a wrapper for locking two UBIFS inodes. * @inode1: first inode * @inode2: second inode + * + * We do not implement any tricks to guarantee strict lock ordering, because + * VFS has already done it for us on the @i_mutex. So this is just a simple + * wrapper function. */ static void lock_2_inodes(struct inode *inode1, struct inode *inode2) { - if (inode1->i_ino < inode2->i_ino) { - mutex_lock_nested(&ubifs_inode(inode1)->ui_mutex, WB_MUTEX_2); - mutex_lock_nested(&ubifs_inode(inode2)->ui_mutex, WB_MUTEX_3); - } else { - mutex_lock_nested(&ubifs_inode(inode2)->ui_mutex, WB_MUTEX_2); - mutex_lock_nested(&ubifs_inode(inode1)->ui_mutex, WB_MUTEX_3); - } + mutex_lock_nested(&ubifs_inode(inode1)->ui_mutex, WB_MUTEX_1); + mutex_lock_nested(&ubifs_inode(inode2)->ui_mutex, WB_MUTEX_2); } /** - * unlock_2_inodes - unlock two UBIFS inodes inodes. + * unlock_2_inodes - a wrapper for unlocking two UBIFS inodes. * @inode1: first inode * @inode2: second inode */ static void unlock_2_inodes(struct inode *inode1, struct inode *inode2) { - mutex_unlock(&ubifs_inode(inode1)->ui_mutex); mutex_unlock(&ubifs_inode(inode2)->ui_mutex); + mutex_unlock(&ubifs_inode(inode1)->ui_mutex); } static int ubifs_link(struct dentry *old_dentry, struct inode *dir, @@ -527,6 +526,8 @@ static int ubifs_link(struct dentry *old_dentry, struct inode *dir, dbg_gen("dent '%.*s' to ino %lu (nlink %d) in dir ino %lu", dentry->d_name.len, dentry->d_name.name, inode->i_ino, inode->i_nlink, dir->i_ino); + ubifs_assert(mutex_is_locked(&dir->i_mutex)); + ubifs_assert(mutex_is_locked(&inode->i_mutex)); err = dbg_check_synced_i_size(inode); if (err) return err; @@ -580,6 +581,8 @@ static int ubifs_unlink(struct inode *dir, struct dentry *dentry) dbg_gen("dent '%.*s' from ino %lu (nlink %d) in dir ino %lu", dentry->d_name.len, dentry->d_name.name, inode->i_ino, inode->i_nlink, dir->i_ino); + ubifs_assert(mutex_is_locked(&dir->i_mutex)); + ubifs_assert(mutex_is_locked(&inode->i_mutex)); err = dbg_check_synced_i_size(inode); if (err) return err; @@ -667,7 +670,8 @@ static int ubifs_rmdir(struct inode *dir, struct dentry *dentry) dbg_gen("directory '%.*s', ino %lu in dir ino %lu", dentry->d_name.len, dentry->d_name.name, inode->i_ino, dir->i_ino); - + ubifs_assert(mutex_is_locked(&dir->i_mutex)); + ubifs_assert(mutex_is_locked(&inode->i_mutex)); err = check_dir_empty(c, dentry->d_inode); if (err) return err; @@ -922,59 +926,30 @@ out_budg: } /** - * lock_3_inodes - lock three UBIFS inodes for rename. + * lock_3_inodes - a wrapper for locking three UBIFS inodes. * @inode1: first inode * @inode2: second inode * @inode3: third inode * - * For 'ubifs_rename()', @inode1 may be the same as @inode2 whereas @inode3 may - * be null. + * This function is used for 'ubifs_rename()' and @inode1 may be the same as + * @inode2 whereas @inode3 may be %NULL. + * + * We do not implement any tricks to guarantee strict lock ordering, because + * VFS has already done it for us on the @i_mutex. So this is just a simple + * wrapper function. */ static void lock_3_inodes(struct inode *inode1, struct inode *inode2, struct inode *inode3) { - struct inode *i1, *i2, *i3; - - if (!inode3) { - if (inode1 != inode2) { - lock_2_inodes(inode1, inode2); - return; - } - mutex_lock_nested(&ubifs_inode(inode1)->ui_mutex, WB_MUTEX_1); - return; - } - - if (inode1 == inode2) { - lock_2_inodes(inode1, inode3); - return; - } - - /* 3 different inodes */ - if (inode1 < inode2) { - i3 = inode2; - if (inode1 < inode3) { - i1 = inode1; - i2 = inode3; - } else { - i1 = inode3; - i2 = inode1; - } - } else { - i3 = inode1; - if (inode2 < inode3) { - i1 = inode2; - i2 = inode3; - } else { - i1 = inode3; - i2 = inode2; - } - } - mutex_lock_nested(&ubifs_inode(i1)->ui_mutex, WB_MUTEX_1); - lock_2_inodes(i2, i3); + mutex_lock_nested(&ubifs_inode(inode1)->ui_mutex, WB_MUTEX_1); + if (inode2 != inode1) + mutex_lock_nested(&ubifs_inode(inode2)->ui_mutex, WB_MUTEX_2); + if (inode3) + mutex_lock_nested(&ubifs_inode(inode3)->ui_mutex, WB_MUTEX_3); } /** - * unlock_3_inodes - unlock three UBIFS inodes for rename. + * unlock_3_inodes - a wrapper for unlocking three UBIFS inodes for rename. * @inode1: first inode * @inode2: second inode * @inode3: third inode @@ -982,11 +957,11 @@ static void lock_3_inodes(struct inode *inode1, struct inode *inode2, static void unlock_3_inodes(struct inode *inode1, struct inode *inode2, struct inode *inode3) { - mutex_unlock(&ubifs_inode(inode1)->ui_mutex); - if (inode1 != inode2) - mutex_unlock(&ubifs_inode(inode2)->ui_mutex); if (inode3) mutex_unlock(&ubifs_inode(inode3)->ui_mutex); + if (inode1 != inode2) + mutex_unlock(&ubifs_inode(inode2)->ui_mutex); + mutex_unlock(&ubifs_inode(inode1)->ui_mutex); } static int ubifs_rename(struct inode *old_dir, struct dentry *old_dentry, @@ -1020,6 +995,11 @@ static int ubifs_rename(struct inode *old_dir, struct dentry *old_dentry, "dir ino %lu", old_dentry->d_name.len, old_dentry->d_name.name, old_inode->i_ino, old_dir->i_ino, new_dentry->d_name.len, new_dentry->d_name.name, new_dir->i_ino); + ubifs_assert(mutex_is_locked(&old_dir->i_mutex)); + ubifs_assert(mutex_is_locked(&new_dir->i_mutex)); + if (unlink) + ubifs_assert(mutex_is_locked(&new_inode->i_mutex)); + if (unlink && is_dir) { err = check_dir_empty(c, new_inode); @@ -1199,7 +1179,7 @@ int ubifs_getattr(struct vfsmount *mnt, struct dentry *dentry, return 0; } -struct inode_operations ubifs_dir_inode_operations = { +const struct inode_operations ubifs_dir_inode_operations = { .lookup = ubifs_lookup, .create = ubifs_create, .link = ubifs_link, @@ -1219,7 +1199,7 @@ struct inode_operations ubifs_dir_inode_operations = { #endif }; -struct file_operations ubifs_dir_operations = { +const struct file_operations ubifs_dir_operations = { .llseek = ubifs_dir_llseek, .release = ubifs_dir_release, .read = generic_read_dir, diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c index bf37374567f..6d34dc7e33e 100644 --- a/fs/ubifs/file.c +++ b/fs/ubifs/file.c @@ -430,9 +430,9 @@ static int ubifs_write_begin(struct file *file, struct address_space *mapping, struct ubifs_inode *ui = ubifs_inode(inode); pgoff_t index = pos >> PAGE_CACHE_SHIFT; int uninitialized_var(err), appending = !!(pos + len > inode->i_size); + int skipped_read = 0; struct page *page; - ubifs_assert(ubifs_inode(inode)->ui_size == inode->i_size); if (unlikely(c->ro_media)) @@ -445,7 +445,7 @@ static int ubifs_write_begin(struct file *file, struct address_space *mapping, if (!PageUptodate(page)) { /* The page is not loaded from the flash */ - if (!(pos & ~PAGE_CACHE_MASK) && len == PAGE_CACHE_SIZE) + if (!(pos & ~PAGE_CACHE_MASK) && len == PAGE_CACHE_SIZE) { /* * We change whole page so no need to load it. But we * have to set the @PG_checked flag to make the further @@ -454,7 +454,8 @@ static int ubifs_write_begin(struct file *file, struct address_space *mapping, * the media. */ SetPageChecked(page); - else { + skipped_read = 1; + } else { err = do_readpage(page); if (err) { unlock_page(page); @@ -471,6 +472,14 @@ static int ubifs_write_begin(struct file *file, struct address_space *mapping, if (unlikely(err)) { ubifs_assert(err == -ENOSPC); /* + * If we skipped reading the page because we were going to + * write all of it, then it is not up to date. + */ + if (skipped_read) { + ClearPageChecked(page); + ClearPageUptodate(page); + } + /* * Budgeting failed which means it would have to force * write-back but didn't, because we set the @fast flag in the * request. Write-back cannot be done now, while we have the @@ -950,7 +959,7 @@ static int do_writepage(struct page *page, int len) * whole index and correct all inode sizes, which is long an unacceptable. * * To prevent situations like this, UBIFS writes pages back only if they are - * within last synchronized inode size, i.e. the the size which has been + * within the last synchronized inode size, i.e. the size which has been * written to the flash media last time. Otherwise, UBIFS forces inode * write-back, thus making sure the on-flash inode contains current inode size, * and then keeps writing pages back. @@ -1435,8 +1444,9 @@ static int ubifs_releasepage(struct page *page, gfp_t unused_gfp_flags) * mmap()d file has taken write protection fault and is being made * writable. UBIFS must ensure page is budgeted for. */ -static int ubifs_vm_page_mkwrite(struct vm_area_struct *vma, struct page *page) +static int ubifs_vm_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) { + struct page *page = vmf->page; struct inode *inode = vma->vm_file->f_path.dentry->d_inode; struct ubifs_info *c = inode->i_sb->s_fs_info; struct timespec now = ubifs_current_time(inode); @@ -1448,7 +1458,7 @@ static int ubifs_vm_page_mkwrite(struct vm_area_struct *vma, struct page *page) ubifs_assert(!(inode->i_sb->s_flags & MS_RDONLY)); if (unlikely(c->ro_media)) - return -EROFS; + return VM_FAULT_SIGBUS; /* -EROFS */ /* * We have not locked @page so far so we may budget for changing the @@ -1481,7 +1491,7 @@ static int ubifs_vm_page_mkwrite(struct vm_area_struct *vma, struct page *page) if (err == -ENOSPC) ubifs_warn("out of space for mmapped file " "(inode number %lu)", inode->i_ino); - return err; + return VM_FAULT_SIGBUS; } lock_page(page); @@ -1521,6 +1531,8 @@ static int ubifs_vm_page_mkwrite(struct vm_area_struct *vma, struct page *page) out_unlock: unlock_page(page); ubifs_release_budget(c, &req); + if (err) + err = VM_FAULT_SIGBUS; return err; } @@ -1541,7 +1553,7 @@ static int ubifs_file_mmap(struct file *file, struct vm_area_struct *vma) return 0; } -struct address_space_operations ubifs_file_address_operations = { +const struct address_space_operations ubifs_file_address_operations = { .readpage = ubifs_readpage, .writepage = ubifs_writepage, .write_begin = ubifs_write_begin, @@ -1551,7 +1563,7 @@ struct address_space_operations ubifs_file_address_operations = { .releasepage = ubifs_releasepage, }; -struct inode_operations ubifs_file_inode_operations = { +const struct inode_operations ubifs_file_inode_operations = { .setattr = ubifs_setattr, .getattr = ubifs_getattr, #ifdef CONFIG_UBIFS_FS_XATTR @@ -1562,14 +1574,14 @@ struct inode_operations ubifs_file_inode_operations = { #endif }; -struct inode_operations ubifs_symlink_inode_operations = { +const struct inode_operations ubifs_symlink_inode_operations = { .readlink = generic_readlink, .follow_link = ubifs_follow_link, .setattr = ubifs_setattr, .getattr = ubifs_getattr, }; -struct file_operations ubifs_file_operations = { +const struct file_operations ubifs_file_operations = { .llseek = generic_file_llseek, .read = do_sync_read, .write = do_sync_write, diff --git a/fs/ubifs/find.c b/fs/ubifs/find.c index 717d79c97c5..1d54383d126 100644 --- a/fs/ubifs/find.c +++ b/fs/ubifs/find.c @@ -478,7 +478,7 @@ const struct ubifs_lprops *do_find_free_space(struct ubifs_info *c, * ubifs_find_free_space - find a data LEB with free space. * @c: the UBIFS file-system description object * @min_space: minimum amount of required free space - * @free: contains amount of free space in the LEB on exit + * @offs: contains offset of where free space starts on exit * @squeeze: whether to try to find space in a non-empty LEB first * * This function looks for an LEB with at least @min_space bytes of free space. @@ -490,7 +490,7 @@ const struct ubifs_lprops *do_find_free_space(struct ubifs_info *c, * failed to find a LEB with @min_space bytes of free space and other a negative * error codes in case of failure. */ -int ubifs_find_free_space(struct ubifs_info *c, int min_space, int *free, +int ubifs_find_free_space(struct ubifs_info *c, int min_space, int *offs, int squeeze) { const struct ubifs_lprops *lprops; @@ -558,10 +558,10 @@ int ubifs_find_free_space(struct ubifs_info *c, int min_space, int *free, spin_unlock(&c->space_lock); } - *free = lprops->free; + *offs = c->leb_size - lprops->free; ubifs_release_lprops(c); - if (*free == c->leb_size) { + if (*offs == 0) { /* * Ensure that empty LEBs have been unmapped. They may not have * been, for example, because of an unclean unmount. Also @@ -573,8 +573,8 @@ int ubifs_find_free_space(struct ubifs_info *c, int min_space, int *free, return err; } - dbg_find("found LEB %d, free %d", lnum, *free); - ubifs_assert(*free >= min_space); + dbg_find("found LEB %d, free %d", lnum, c->leb_size - *offs); + ubifs_assert(*offs <= c->leb_size - min_space); return lnum; out: diff --git a/fs/ubifs/gc.c b/fs/ubifs/gc.c index 9832f9abe28..f0f5f15d384 100644 --- a/fs/ubifs/gc.c +++ b/fs/ubifs/gc.c @@ -31,20 +31,32 @@ * to be reused. Garbage collection will cause the number of dirty index nodes * to grow, however sufficient space is reserved for the index to ensure the * commit will never run out of space. + * + * Notes about dead watermark. At current UBIFS implementation we assume that + * LEBs which have less than @c->dead_wm bytes of free + dirty space are full + * and not worth garbage-collecting. The dead watermark is one min. I/O unit + * size, or min. UBIFS node size, depending on what is greater. Indeed, UBIFS + * Garbage Collector has to synchronize the GC head's write buffer before + * returning, so this is about wasting one min. I/O unit. However, UBIFS GC can + * actually reclaim even very small pieces of dirty space by garbage collecting + * enough dirty LEBs, but we do not bother doing this at this implementation. + * + * Notes about dark watermark. The results of GC work depends on how big are + * the UBIFS nodes GC deals with. Large nodes make GC waste more space. Indeed, + * if GC move data from LEB A to LEB B and nodes in LEB A are large, GC would + * have to waste large pieces of free space at the end of LEB B, because nodes + * from LEB A would not fit. And the worst situation is when all nodes are of + * maximum size. So dark watermark is the amount of free + dirty space in LEB + * which are guaranteed to be reclaimable. If LEB has less space, the GC might + * be unable to reclaim it. So, LEBs with free + dirty greater than dark + * watermark are "good" LEBs from GC's point of few. The other LEBs are not so + * good, and GC takes extra care when moving them. */ #include <linux/pagemap.h> #include "ubifs.h" /* - * GC tries to optimize the way it fit nodes to available space, and it sorts - * nodes a little. The below constants are watermarks which define "large", - * "medium", and "small" nodes. - */ -#define MEDIUM_NODE_WM (UBIFS_BLOCK_SIZE / 4) -#define SMALL_NODE_WM UBIFS_MAX_DENT_NODE_SZ - -/* * GC may need to move more than one LEB to make progress. The below constants * define "soft" and "hard" limits on the number of LEBs the garbage collector * may move. @@ -96,83 +108,222 @@ static int switch_gc_head(struct ubifs_info *c) } /** - * joinup - bring data nodes for an inode together. - * @c: UBIFS file-system description object - * @sleb: describes scanned LEB - * @inum: inode number - * @blk: block number - * @data: list to which to add data nodes + * list_sort - sort a list. + * @priv: private data, passed to @cmp + * @head: the list to sort + * @cmp: the elements comparison function + * + * This function has been implemented by Mark J Roberts <mjr@znex.org>. It + * implements "merge sort" which has O(nlog(n)) complexity. The list is sorted + * in ascending order. * - * This function looks at the first few nodes in the scanned LEB @sleb and adds - * them to @data if they are data nodes from @inum and have a larger block - * number than @blk. This function returns %0 on success and a negative error - * code on failure. + * The comparison function @cmp is supposed to return a negative value if @a is + * than @b, and a positive value if @a is greater than @b. If @a and @b are + * equivalent, then it does not matter what this function returns. */ -static int joinup(struct ubifs_info *c, struct ubifs_scan_leb *sleb, ino_t inum, - unsigned int blk, struct list_head *data) +static void list_sort(void *priv, struct list_head *head, + int (*cmp)(void *priv, struct list_head *a, + struct list_head *b)) { - int err, cnt = 6, lnum = sleb->lnum, offs; - struct ubifs_scan_node *snod, *tmp; - union ubifs_key *key; + struct list_head *p, *q, *e, *list, *tail, *oldhead; + int insize, nmerges, psize, qsize, i; + + if (list_empty(head)) + return; + + list = head->next; + list_del(head); + insize = 1; + for (;;) { + p = oldhead = list; + list = tail = NULL; + nmerges = 0; + + while (p) { + nmerges++; + q = p; + psize = 0; + for (i = 0; i < insize; i++) { + psize++; + q = q->next == oldhead ? NULL : q->next; + if (!q) + break; + } - list_for_each_entry_safe(snod, tmp, &sleb->nodes, list) { - key = &snod->key; - if (key_inum(c, key) == inum && - key_type(c, key) == UBIFS_DATA_KEY && - key_block(c, key) > blk) { - offs = snod->offs; - err = ubifs_tnc_has_node(c, key, 0, lnum, offs, 0); - if (err < 0) - return err; - list_del(&snod->list); - if (err) { - list_add_tail(&snod->list, data); - blk = key_block(c, key); - } else - kfree(snod); - cnt = 6; - } else if (--cnt == 0) + qsize = insize; + while (psize > 0 || (qsize > 0 && q)) { + if (!psize) { + e = q; + q = q->next; + qsize--; + if (q == oldhead) + q = NULL; + } else if (!qsize || !q) { + e = p; + p = p->next; + psize--; + if (p == oldhead) + p = NULL; + } else if (cmp(priv, p, q) <= 0) { + e = p; + p = p->next; + psize--; + if (p == oldhead) + p = NULL; + } else { + e = q; + q = q->next; + qsize--; + if (q == oldhead) + q = NULL; + } + if (tail) + tail->next = e; + else + list = e; + e->prev = tail; + tail = e; + } + p = q; + } + + tail->next = list; + list->prev = tail; + + if (nmerges <= 1) break; + + insize *= 2; } - return 0; + + head->next = list; + head->prev = list->prev; + list->prev->next = head; + list->prev = head; } /** - * move_nodes - move nodes. + * data_nodes_cmp - compare 2 data nodes. + * @priv: UBIFS file-system description object + * @a: first data node + * @a: second data node + * + * This function compares data nodes @a and @b. Returns %1 if @a has greater + * inode or block number, and %-1 otherwise. + */ +int data_nodes_cmp(void *priv, struct list_head *a, struct list_head *b) +{ + ino_t inuma, inumb; + struct ubifs_info *c = priv; + struct ubifs_scan_node *sa, *sb; + + cond_resched(); + sa = list_entry(a, struct ubifs_scan_node, list); + sb = list_entry(b, struct ubifs_scan_node, list); + ubifs_assert(key_type(c, &sa->key) == UBIFS_DATA_KEY); + ubifs_assert(key_type(c, &sb->key) == UBIFS_DATA_KEY); + + inuma = key_inum(c, &sa->key); + inumb = key_inum(c, &sb->key); + + if (inuma == inumb) { + unsigned int blka = key_block(c, &sa->key); + unsigned int blkb = key_block(c, &sb->key); + + if (blka <= blkb) + return -1; + } else if (inuma <= inumb) + return -1; + + return 1; +} + +/* + * nondata_nodes_cmp - compare 2 non-data nodes. + * @priv: UBIFS file-system description object + * @a: first node + * @a: second node + * + * This function compares nodes @a and @b. It makes sure that inode nodes go + * first and sorted by length in descending order. Directory entry nodes go + * after inode nodes and are sorted in ascending hash valuer order. + */ +int nondata_nodes_cmp(void *priv, struct list_head *a, struct list_head *b) +{ + int typea, typeb; + ino_t inuma, inumb; + struct ubifs_info *c = priv; + struct ubifs_scan_node *sa, *sb; + + cond_resched(); + sa = list_entry(a, struct ubifs_scan_node, list); + sb = list_entry(b, struct ubifs_scan_node, list); + typea = key_type(c, &sa->key); + typeb = key_type(c, &sb->key); + ubifs_assert(typea != UBIFS_DATA_KEY && typeb != UBIFS_DATA_KEY); + + /* Inodes go before directory entries */ + if (typea == UBIFS_INO_KEY) { + if (typeb == UBIFS_INO_KEY) + return sb->len - sa->len; + return -1; + } + if (typeb == UBIFS_INO_KEY) + return 1; + + ubifs_assert(typea == UBIFS_DENT_KEY && typeb == UBIFS_DENT_KEY); + inuma = key_inum(c, &sa->key); + inumb = key_inum(c, &sb->key); + + if (inuma == inumb) { + uint32_t hasha = key_hash(c, &sa->key); + uint32_t hashb = key_hash(c, &sb->key); + + if (hasha <= hashb) + return -1; + } else if (inuma <= inumb) + return -1; + + return 1; +} + +/** + * sort_nodes - sort nodes for GC. * @c: UBIFS file-system description object - * @sleb: describes nodes to move + * @sleb: describes nodes to sort and contains the result on exit + * @nondata: contains non-data nodes on exit + * @min: minimum node size is returned here * - * This function moves valid nodes from data LEB described by @sleb to the GC - * journal head. The obsolete nodes are dropped. + * This function sorts the list of inodes to garbage collect. First of all, it + * kills obsolete nodes and separates data and non-data nodes to the + * @sleb->nodes and @nondata lists correspondingly. + * + * Data nodes are then sorted in block number order - this is important for + * bulk-read; data nodes with lower inode number go before data nodes with + * higher inode number, and data nodes with lower block number go before data + * nodes with higher block number; * - * When moving nodes we have to deal with classical bin-packing problem: the - * space in the current GC journal head LEB and in @c->gc_lnum are the "bins", - * where the nodes in the @sleb->nodes list are the elements which should be - * fit optimally to the bins. This function uses the "first fit decreasing" - * strategy, although it does not really sort the nodes but just split them on - * 3 classes - large, medium, and small, so they are roughly sorted. + * Non-data nodes are sorted as follows. + * o First go inode nodes - they are sorted in descending length order. + * o Then go directory entry nodes - they are sorted in hash order, which + * should supposedly optimize 'readdir()'. Direntry nodes with lower parent + * inode number go before direntry nodes with higher parent inode number, + * and direntry nodes with lower name hash values go before direntry nodes + * with higher name hash values. * - * This function returns zero in case of success, %-EAGAIN if commit is - * required, and other negative error codes in case of other failures. + * This function returns zero in case of success and a negative error code in + * case of failure. */ -static int move_nodes(struct ubifs_info *c, struct ubifs_scan_leb *sleb) +static int sort_nodes(struct ubifs_info *c, struct ubifs_scan_leb *sleb, + struct list_head *nondata, int *min) { struct ubifs_scan_node *snod, *tmp; - struct list_head data, large, medium, small; - struct ubifs_wbuf *wbuf = &c->jheads[GCHD].wbuf; - int avail, err, min = INT_MAX; - unsigned int blk = 0; - ino_t inum = 0; - INIT_LIST_HEAD(&data); - INIT_LIST_HEAD(&large); - INIT_LIST_HEAD(&medium); - INIT_LIST_HEAD(&small); + *min = INT_MAX; - while (!list_empty(&sleb->nodes)) { - struct list_head *lst = sleb->nodes.next; - - snod = list_entry(lst, struct ubifs_scan_node, list); + /* Separate data nodes and non-data nodes */ + list_for_each_entry_safe(snod, tmp, &sleb->nodes, list) { + int err; ubifs_assert(snod->type != UBIFS_IDX_NODE); ubifs_assert(snod->type != UBIFS_REF_NODE); @@ -181,53 +332,72 @@ static int move_nodes(struct ubifs_info *c, struct ubifs_scan_leb *sleb) err = ubifs_tnc_has_node(c, &snod->key, 0, sleb->lnum, snod->offs, 0); if (err < 0) - goto out; + return err; - list_del(lst); if (!err) { /* The node is obsolete, remove it from the list */ + list_del(&snod->list); kfree(snod); continue; } - /* - * Sort the list of nodes so that data nodes go first, large - * nodes go second, and small nodes go last. - */ - if (key_type(c, &snod->key) == UBIFS_DATA_KEY) { - if (inum != key_inum(c, &snod->key)) { - if (inum) { - /* - * Try to move data nodes from the same - * inode together. - */ - err = joinup(c, sleb, inum, blk, &data); - if (err) - goto out; - } - inum = key_inum(c, &snod->key); - blk = key_block(c, &snod->key); - } - list_add_tail(lst, &data); - } else if (snod->len > MEDIUM_NODE_WM) - list_add_tail(lst, &large); - else if (snod->len > SMALL_NODE_WM) - list_add_tail(lst, &medium); - else - list_add_tail(lst, &small); - - /* And find the smallest node */ - if (snod->len < min) - min = snod->len; + if (snod->len < *min) + *min = snod->len; + + if (key_type(c, &snod->key) != UBIFS_DATA_KEY) + list_move_tail(&snod->list, nondata); } - /* - * Join the tree lists so that we'd have one roughly sorted list - * ('large' will be the head of the joined list). - */ - list_splice(&data, &large); - list_splice(&medium, large.prev); - list_splice(&small, large.prev); + /* Sort data and non-data nodes */ + list_sort(c, &sleb->nodes, &data_nodes_cmp); + list_sort(c, nondata, &nondata_nodes_cmp); + return 0; +} + +/** + * move_node - move a node. + * @c: UBIFS file-system description object + * @sleb: describes the LEB to move nodes from + * @snod: the mode to move + * @wbuf: write-buffer to move node to + * + * This function moves node @snod to @wbuf, changes TNC correspondingly, and + * destroys @snod. Returns zero in case of success and a negative error code in + * case of failure. + */ +static int move_node(struct ubifs_info *c, struct ubifs_scan_leb *sleb, + struct ubifs_scan_node *snod, struct ubifs_wbuf *wbuf) +{ + int err, new_lnum = wbuf->lnum, new_offs = wbuf->offs + wbuf->used; + + cond_resched(); + err = ubifs_wbuf_write_nolock(wbuf, snod->node, snod->len); + if (err) + return err; + + err = ubifs_tnc_replace(c, &snod->key, sleb->lnum, + snod->offs, new_lnum, new_offs, + snod->len); + list_del(&snod->list); + kfree(snod); + return err; +} + +/** + * move_nodes - move nodes. + * @c: UBIFS file-system description object + * @sleb: describes the LEB to move nodes from + * + * This function moves valid nodes from data LEB described by @sleb to the GC + * journal head. This function returns zero in case of success, %-EAGAIN if + * commit is required, and other negative error codes in case of other + * failures. + */ +static int move_nodes(struct ubifs_info *c, struct ubifs_scan_leb *sleb) +{ + int err, min; + LIST_HEAD(nondata); + struct ubifs_wbuf *wbuf = &c->jheads[GCHD].wbuf; if (wbuf->lnum == -1) { /* @@ -236,42 +406,59 @@ static int move_nodes(struct ubifs_info *c, struct ubifs_scan_leb *sleb) */ err = switch_gc_head(c); if (err) - goto out; + return err; } + err = sort_nodes(c, sleb, &nondata, &min); + if (err) + goto out; + /* Write nodes to their new location. Use the first-fit strategy */ while (1) { - avail = c->leb_size - wbuf->offs - wbuf->used; - list_for_each_entry_safe(snod, tmp, &large, list) { - int new_lnum, new_offs; + int avail; + struct ubifs_scan_node *snod, *tmp; + + /* Move data nodes */ + list_for_each_entry_safe(snod, tmp, &sleb->nodes, list) { + avail = c->leb_size - wbuf->offs - wbuf->used; + if (snod->len > avail) + /* + * Do not skip data nodes in order to optimize + * bulk-read. + */ + break; + + err = move_node(c, sleb, snod, wbuf); + if (err) + goto out; + } + /* Move non-data nodes */ + list_for_each_entry_safe(snod, tmp, &nondata, list) { + avail = c->leb_size - wbuf->offs - wbuf->used; if (avail < min) break; - if (snod->len > avail) - /* This node does not fit */ + if (snod->len > avail) { + /* + * Keep going only if this is an inode with + * some data. Otherwise stop and switch the GC + * head. IOW, we assume that data-less inode + * nodes and direntry nodes are roughly of the + * same size. + */ + if (key_type(c, &snod->key) == UBIFS_DENT_KEY || + snod->len == UBIFS_INO_NODE_SZ) + break; continue; + } - cond_resched(); - - new_lnum = wbuf->lnum; - new_offs = wbuf->offs + wbuf->used; - err = ubifs_wbuf_write_nolock(wbuf, snod->node, - snod->len); - if (err) - goto out; - err = ubifs_tnc_replace(c, &snod->key, sleb->lnum, - snod->offs, new_lnum, new_offs, - snod->len); + err = move_node(c, sleb, snod, wbuf); if (err) goto out; - - avail = c->leb_size - wbuf->offs - wbuf->used; - list_del(&snod->list); - kfree(snod); } - if (list_empty(&large)) + if (list_empty(&sleb->nodes) && list_empty(&nondata)) break; /* @@ -286,10 +473,7 @@ static int move_nodes(struct ubifs_info *c, struct ubifs_scan_leb *sleb) return 0; out: - list_for_each_entry_safe(snod, tmp, &large, list) { - list_del(&snod->list); - kfree(snod); - } + list_splice_tail(&nondata, &sleb->nodes); return err; } @@ -381,7 +565,7 @@ int ubifs_garbage_collect_leb(struct ubifs_info *c, struct ubifs_lprops *lp) /* * Don't release the LEB until after the next commit, because - * it may contain date which is needed for recovery. So + * it may contain data which is needed for recovery. So * although we freed this LEB, it will become usable only after * the commit. */ @@ -810,8 +994,9 @@ out: * ubifs_destroy_idx_gc - destroy idx_gc list. * @c: UBIFS file-system description object * - * This function destroys the idx_gc list. It is called when unmounting or - * remounting read-only so locks are not needed. + * This function destroys the @c->idx_gc list. It is called when unmounting + * so locks are not needed. Returns zero in case of success and a negative + * error code in case of failure. */ void ubifs_destroy_idx_gc(struct ubifs_info *c) { @@ -824,7 +1009,6 @@ void ubifs_destroy_idx_gc(struct ubifs_info *c) list_del(&idx_gc->list); kfree(idx_gc); } - } /** diff --git a/fs/ubifs/io.c b/fs/ubifs/io.c index 01682713af6..e8e632a1dcd 100644 --- a/fs/ubifs/io.c +++ b/fs/ubifs/io.c @@ -29,7 +29,7 @@ * would have been wasted for padding to the nearest minimal I/O unit boundary. * Instead, data first goes to the write-buffer and is flushed when the * buffer is full or when it is not used for some time (by timer). This is - * similarto the mechanism is used by JFFS2. + * similar to the mechanism is used by JFFS2. * * Write-buffers are defined by 'struct ubifs_wbuf' objects and protected by * mutexes defined inside these objects. Since sometimes upper-level code @@ -75,7 +75,7 @@ void ubifs_ro_mode(struct ubifs_info *c, int err) * @lnum: logical eraseblock number * @offs: offset within the logical eraseblock * @quiet: print no messages - * @chk_crc: indicates whether to always check the CRC + * @must_chk_crc: indicates whether to always check the CRC * * This function checks node magic number and CRC checksum. This function also * validates node length to prevent UBIFS from becoming crazy when an attacker @@ -83,11 +83,17 @@ void ubifs_ro_mode(struct ubifs_info *c, int err) * node length in the common header could cause UBIFS to read memory outside of * allocated buffer when checking the CRC checksum. * - * This function returns zero in case of success %-EUCLEAN in case of bad CRC - * or magic. + * This function may skip data nodes CRC checking if @c->no_chk_data_crc is + * true, which is controlled by corresponding UBIFS mount option. However, if + * @must_chk_crc is true, then @c->no_chk_data_crc is ignored and CRC is + * checked. Similarly, if @c->always_chk_crc is true, @c->no_chk_data_crc is + * ignored and CRC is checked. + * + * This function returns zero in case of success and %-EUCLEAN in case of bad + * CRC or magic. */ int ubifs_check_node(const struct ubifs_info *c, const void *buf, int lnum, - int offs, int quiet, int chk_crc) + int offs, int quiet, int must_chk_crc) { int err = -EINVAL, type, node_len; uint32_t crc, node_crc, magic; @@ -123,9 +129,9 @@ int ubifs_check_node(const struct ubifs_info *c, const void *buf, int lnum, node_len > c->ranges[type].max_len) goto out_len; - if (!chk_crc && type == UBIFS_DATA_NODE && !c->always_chk_crc) - if (c->no_chk_data_crc) - return 0; + if (!must_chk_crc && type == UBIFS_DATA_NODE && !c->always_chk_crc && + c->no_chk_data_crc) + return 0; crc = crc32(UBIFS_CRC32_INIT, buf + 8, node_len - 8); node_crc = le32_to_cpu(ch->crc); diff --git a/fs/ubifs/journal.c b/fs/ubifs/journal.c index 9b7c54e0cd2..64b5f3a309f 100644 --- a/fs/ubifs/journal.c +++ b/fs/ubifs/journal.c @@ -114,7 +114,7 @@ static inline void zero_trun_node_unused(struct ubifs_trun_node *trun) */ static int reserve_space(struct ubifs_info *c, int jhead, int len) { - int err = 0, err1, retries = 0, avail, lnum, offs, free, squeeze; + int err = 0, err1, retries = 0, avail, lnum, offs, squeeze; struct ubifs_wbuf *wbuf = &c->jheads[jhead].wbuf; /* @@ -139,10 +139,9 @@ again: * Write buffer wasn't seek'ed or there is no enough space - look for an * LEB with some empty space. */ - lnum = ubifs_find_free_space(c, len, &free, squeeze); + lnum = ubifs_find_free_space(c, len, &offs, squeeze); if (lnum >= 0) { /* Found an LEB, add it to the journal head */ - offs = c->leb_size - free; err = ubifs_add_bud_to_log(c, jhead, lnum, offs); if (err) goto out_return; @@ -208,7 +207,7 @@ again: offs = 0; out: - err = ubifs_wbuf_seek_nolock(wbuf, lnum, offs, UBI_SHORTTERM); + err = ubifs_wbuf_seek_nolock(wbuf, lnum, offs, wbuf->dtype); if (err) goto out_unlock; @@ -1366,7 +1365,7 @@ out_ro: * @host: host inode * * This function writes the updated version of an extended attribute inode and - * the host inode tho the journal (to the base head). The host inode is written + * the host inode to the journal (to the base head). The host inode is written * after the extended attribute inode in order to guarantee that the extended * attribute will be flushed when the inode is synchronized by 'fsync()' and * consequently, the write-buffer is synchronized. This function returns zero diff --git a/fs/ubifs/key.h b/fs/ubifs/key.h index efb3430a258..5fa27ea031b 100644 --- a/fs/ubifs/key.h +++ b/fs/ubifs/key.h @@ -381,8 +381,8 @@ static inline ino_t key_inum_flash(const struct ubifs_info *c, const void *k) * @c: UBIFS file-system description object * @key: the key to get hash from */ -static inline int key_hash(const struct ubifs_info *c, - const union ubifs_key *key) +static inline uint32_t key_hash(const struct ubifs_info *c, + const union ubifs_key *key) { return key->u32[1] & UBIFS_S_KEY_HASH_MASK; } @@ -392,7 +392,7 @@ static inline int key_hash(const struct ubifs_info *c, * @c: UBIFS file-system description object * @k: the key to get hash from */ -static inline int key_hash_flash(const struct ubifs_info *c, const void *k) +static inline uint32_t key_hash_flash(const struct ubifs_info *c, const void *k) { const union ubifs_key *key = k; diff --git a/fs/ubifs/log.c b/fs/ubifs/log.c index 3e0aa736755..56e33772a1e 100644 --- a/fs/ubifs/log.c +++ b/fs/ubifs/log.c @@ -239,7 +239,7 @@ int ubifs_add_bud_to_log(struct ubifs_info *c, int jhead, int lnum, int offs) } /* - * Make sure the the amount of space in buds will not exceed + * Make sure the amount of space in buds will not exceed the * 'c->max_bud_bytes' limit, because we want to guarantee mount time * limits. * @@ -367,7 +367,6 @@ static void remove_buds(struct ubifs_info *c) bud->jhead, c->leb_size - bud->start, c->cmt_bud_bytes); rb_erase(p1, &c->buds); - list_del(&bud->list); /* * If the commit does not finish, the recovery will need * to replay the journal, in which case the old buds @@ -375,7 +374,7 @@ static void remove_buds(struct ubifs_info *c) * commit i.e. do not allow them to be garbage * collected. */ - list_add(&bud->list, &c->old_buds); + list_move(&bud->list, &c->old_buds); } } spin_unlock(&c->buds_lock); diff --git a/fs/ubifs/lprops.c b/fs/ubifs/lprops.c index dfd2bcece27..4cdd284dea5 100644 --- a/fs/ubifs/lprops.c +++ b/fs/ubifs/lprops.c @@ -635,10 +635,10 @@ const struct ubifs_lprops *ubifs_change_lp(struct ubifs_info *c, * @c: UBIFS file-system description object * @st: return statistics */ -void ubifs_get_lp_stats(struct ubifs_info *c, struct ubifs_lp_stats *st) +void ubifs_get_lp_stats(struct ubifs_info *c, struct ubifs_lp_stats *lst) { spin_lock(&c->space_lock); - memcpy(st, &c->lst, sizeof(struct ubifs_lp_stats)); + memcpy(lst, &c->lst, sizeof(struct ubifs_lp_stats)); spin_unlock(&c->space_lock); } @@ -678,6 +678,9 @@ int ubifs_change_one_lp(struct ubifs_info *c, int lnum, int free, int dirty, out: ubifs_release_lprops(c); + if (err) + ubifs_err("cannot change properties of LEB %d, error %d", + lnum, err); return err; } @@ -714,6 +717,9 @@ int ubifs_update_one_lp(struct ubifs_info *c, int lnum, int free, int dirty, out: ubifs_release_lprops(c); + if (err) + ubifs_err("cannot update properties of LEB %d, error %d", + lnum, err); return err; } @@ -737,6 +743,8 @@ int ubifs_read_one_lp(struct ubifs_info *c, int lnum, struct ubifs_lprops *lp) lpp = ubifs_lpt_lookup(c, lnum); if (IS_ERR(lpp)) { err = PTR_ERR(lpp); + ubifs_err("cannot read properties of LEB %d, error %d", + lnum, err); goto out; } diff --git a/fs/ubifs/lpt_commit.c b/fs/ubifs/lpt_commit.c index 96ca9570717..8cbfb824802 100644 --- a/fs/ubifs/lpt_commit.c +++ b/fs/ubifs/lpt_commit.c @@ -229,7 +229,7 @@ static int layout_cnodes(struct ubifs_info *c) while (offs + len > c->leb_size) { alen = ALIGN(offs, c->min_io_size); upd_ltab(c, lnum, c->leb_size - alen, alen - offs); - dbg_chk_lpt_sz(c, 2, alen - offs); + dbg_chk_lpt_sz(c, 2, c->leb_size - offs); err = alloc_lpt_leb(c, &lnum); if (err) goto no_space; @@ -272,7 +272,7 @@ static int layout_cnodes(struct ubifs_info *c) if (offs + c->lsave_sz > c->leb_size) { alen = ALIGN(offs, c->min_io_size); upd_ltab(c, lnum, c->leb_size - alen, alen - offs); - dbg_chk_lpt_sz(c, 2, alen - offs); + dbg_chk_lpt_sz(c, 2, c->leb_size - offs); err = alloc_lpt_leb(c, &lnum); if (err) goto no_space; @@ -292,7 +292,7 @@ static int layout_cnodes(struct ubifs_info *c) if (offs + c->ltab_sz > c->leb_size) { alen = ALIGN(offs, c->min_io_size); upd_ltab(c, lnum, c->leb_size - alen, alen - offs); - dbg_chk_lpt_sz(c, 2, alen - offs); + dbg_chk_lpt_sz(c, 2, c->leb_size - offs); err = alloc_lpt_leb(c, &lnum); if (err) goto no_space; @@ -416,14 +416,12 @@ static int write_cnodes(struct ubifs_info *c) alen, UBI_SHORTTERM); if (err) return err; - dbg_chk_lpt_sz(c, 4, alen - wlen); } - dbg_chk_lpt_sz(c, 2, 0); + dbg_chk_lpt_sz(c, 2, c->leb_size - offs); err = realloc_lpt_leb(c, &lnum); if (err) goto no_space; - offs = 0; - from = 0; + offs = from = 0; ubifs_assert(lnum >= c->lpt_first && lnum <= c->lpt_last); err = ubifs_leb_unmap(c, lnum); @@ -477,11 +475,11 @@ static int write_cnodes(struct ubifs_info *c) UBI_SHORTTERM); if (err) return err; - dbg_chk_lpt_sz(c, 2, alen - wlen); + dbg_chk_lpt_sz(c, 2, c->leb_size - offs); err = realloc_lpt_leb(c, &lnum); if (err) goto no_space; - offs = 0; + offs = from = 0; ubifs_assert(lnum >= c->lpt_first && lnum <= c->lpt_last); err = ubifs_leb_unmap(c, lnum); @@ -504,11 +502,11 @@ static int write_cnodes(struct ubifs_info *c) UBI_SHORTTERM); if (err) return err; - dbg_chk_lpt_sz(c, 2, alen - wlen); + dbg_chk_lpt_sz(c, 2, c->leb_size - offs); err = realloc_lpt_leb(c, &lnum); if (err) goto no_space; - offs = 0; + offs = from = 0; ubifs_assert(lnum >= c->lpt_first && lnum <= c->lpt_last); err = ubifs_leb_unmap(c, lnum); @@ -556,23 +554,23 @@ no_space: } /** - * next_pnode - find next pnode. + * next_pnode_to_dirty - find next pnode to dirty. * @c: UBIFS file-system description object * @pnode: pnode * - * This function returns the next pnode or %NULL if there are no more pnodes. + * This function returns the next pnode to dirty or %NULL if there are no more + * pnodes. Note that pnodes that have never been written (lnum == 0) are + * skipped. */ -static struct ubifs_pnode *next_pnode(struct ubifs_info *c, - struct ubifs_pnode *pnode) +static struct ubifs_pnode *next_pnode_to_dirty(struct ubifs_info *c, + struct ubifs_pnode *pnode) { struct ubifs_nnode *nnode; int iip; /* Try to go right */ nnode = pnode->parent; - iip = pnode->iip + 1; - if (iip < UBIFS_LPT_FANOUT) { - /* We assume here that LEB zero is never an LPT LEB */ + for (iip = pnode->iip + 1; iip < UBIFS_LPT_FANOUT; iip++) { if (nnode->nbranch[iip].lnum) return ubifs_get_pnode(c, nnode, iip); } @@ -583,8 +581,11 @@ static struct ubifs_pnode *next_pnode(struct ubifs_info *c, nnode = nnode->parent; if (!nnode) return NULL; - /* We assume here that LEB zero is never an LPT LEB */ - } while (iip >= UBIFS_LPT_FANOUT || !nnode->nbranch[iip].lnum); + for (; iip < UBIFS_LPT_FANOUT; iip++) { + if (nnode->nbranch[iip].lnum) + break; + } + } while (iip >= UBIFS_LPT_FANOUT); /* Go right */ nnode = ubifs_get_nnode(c, nnode, iip); @@ -593,12 +594,29 @@ static struct ubifs_pnode *next_pnode(struct ubifs_info *c, /* Go down to level 1 */ while (nnode->level > 1) { - nnode = ubifs_get_nnode(c, nnode, 0); + for (iip = 0; iip < UBIFS_LPT_FANOUT; iip++) { + if (nnode->nbranch[iip].lnum) + break; + } + if (iip >= UBIFS_LPT_FANOUT) { + /* + * Should not happen, but we need to keep going + * if it does. + */ + iip = 0; + } + nnode = ubifs_get_nnode(c, nnode, iip); if (IS_ERR(nnode)) return (void *)nnode; } - return ubifs_get_pnode(c, nnode, 0); + for (iip = 0; iip < UBIFS_LPT_FANOUT; iip++) + if (nnode->nbranch[iip].lnum) + break; + if (iip >= UBIFS_LPT_FANOUT) + /* Should not happen, but we need to keep going if it does */ + iip = 0; + return ubifs_get_pnode(c, nnode, iip); } /** @@ -688,7 +706,7 @@ static int make_tree_dirty(struct ubifs_info *c) pnode = pnode_lookup(c, 0); while (pnode) { do_make_pnode_dirty(c, pnode); - pnode = next_pnode(c, pnode); + pnode = next_pnode_to_dirty(c, pnode); if (IS_ERR(pnode)) return PTR_ERR(pnode); } @@ -1736,10 +1754,16 @@ int dbg_chk_lpt_free_spc(struct ubifs_info *c) /** * dbg_chk_lpt_sz - check LPT does not write more than LPT size. * @c: the UBIFS file-system description object - * @action: action + * @action: what to do * @len: length written * * This function returns %0 on success and a negative error code on failure. + * The @action argument may be one of: + * o %0 - LPT debugging checking starts, initialize debugging variables; + * o %1 - wrote an LPT node, increase LPT size by @len bytes; + * o %2 - switched to a different LEB and wasted @len bytes; + * o %3 - check that we've written the right number of bytes. + * o %4 - wasted @len bytes; */ int dbg_chk_lpt_sz(struct ubifs_info *c, int action, int len) { @@ -1897,12 +1921,12 @@ static void dump_lpt_leb(const struct ubifs_info *c, int lnum) lnum, offs); err = ubifs_unpack_nnode(c, buf, &nnode); for (i = 0; i < UBIFS_LPT_FANOUT; i++) { - printk("%d:%d", nnode.nbranch[i].lnum, + printk(KERN_CONT "%d:%d", nnode.nbranch[i].lnum, nnode.nbranch[i].offs); if (i != UBIFS_LPT_FANOUT - 1) - printk(", "); + printk(KERN_CONT ", "); } - printk("\n"); + printk(KERN_CONT "\n"); break; } case UBIFS_LPT_LTAB: diff --git a/fs/ubifs/master.c b/fs/ubifs/master.c index 71d5493bf56..a88f33801b9 100644 --- a/fs/ubifs/master.c +++ b/fs/ubifs/master.c @@ -354,7 +354,7 @@ int ubifs_write_master(struct ubifs_info *c) int err, lnum, offs, len; if (c->ro_media) - return -EINVAL; + return -EROFS; lnum = UBIFS_MST_LNUM; offs = c->mst_offs + c->mst_node_alsz; diff --git a/fs/ubifs/orphan.c b/fs/ubifs/orphan.c index 9e6f403f170..152a7b34a14 100644 --- a/fs/ubifs/orphan.c +++ b/fs/ubifs/orphan.c @@ -46,7 +46,7 @@ * Orphans are accumulated in a rb-tree. When an inode's link count drops to * zero, the inode number is added to the rb-tree. It is removed from the tree * when the inode is deleted. Any new orphans that are in the orphan tree when - * the commit is run, are written to the orphan area in 1 or more orph nodes. + * the commit is run, are written to the orphan area in 1 or more orphan nodes. * If the orphan area is full, it is consolidated to make space. There is * always enough space because validation prevents the user from creating more * than the maximum number of orphans allowed. @@ -231,7 +231,7 @@ static int tot_avail_orphs(struct ubifs_info *c) } /** - * do_write_orph_node - write a node + * do_write_orph_node - write a node to the orphan head. * @c: UBIFS file-system description object * @len: length of node * @atomic: write atomically @@ -264,11 +264,11 @@ static int do_write_orph_node(struct ubifs_info *c, int len, int atomic) } /** - * write_orph_node - write an orph node + * write_orph_node - write an orphan node. * @c: UBIFS file-system description object * @atomic: write atomically * - * This function builds an orph node from the cnext list and writes it to the + * This function builds an orphan node from the cnext list and writes it to the * orphan head. On success, %0 is returned, otherwise a negative error code * is returned. */ @@ -326,11 +326,11 @@ static int write_orph_node(struct ubifs_info *c, int atomic) } /** - * write_orph_nodes - write orph nodes until there are no more to commit + * write_orph_nodes - write orphan nodes until there are no more to commit. * @c: UBIFS file-system description object * @atomic: write atomically * - * This function writes orph nodes for all the orphans to commit. On success, + * This function writes orphan nodes for all the orphans to commit. On success, * %0 is returned, otherwise a negative error code is returned. */ static int write_orph_nodes(struct ubifs_info *c, int atomic) @@ -478,14 +478,14 @@ int ubifs_orphan_end_commit(struct ubifs_info *c) } /** - * clear_orphans - erase all LEBs used for orphans. + * ubifs_clear_orphans - erase all LEBs used for orphans. * @c: UBIFS file-system description object * * If recovery is not required, then the orphans from the previous session * are not needed. This function locates the LEBs used to record * orphans, and un-maps them. */ -static int clear_orphans(struct ubifs_info *c) +int ubifs_clear_orphans(struct ubifs_info *c) { int lnum, err; @@ -547,9 +547,9 @@ static int insert_dead_orphan(struct ubifs_info *c, ino_t inum) * do_kill_orphans - remove orphan inodes from the index. * @c: UBIFS file-system description object * @sleb: scanned LEB - * @last_cmt_no: cmt_no of last orph node read is passed and returned here + * @last_cmt_no: cmt_no of last orphan node read is passed and returned here * @outofdate: whether the LEB is out of date is returned here - * @last_flagged: whether the end orph node is encountered + * @last_flagged: whether the end orphan node is encountered * * This function is a helper to the 'kill_orphans()' function. It goes through * every orphan node in a LEB and for every inode number recorded, removes @@ -580,8 +580,8 @@ static int do_kill_orphans(struct ubifs_info *c, struct ubifs_scan_leb *sleb, /* * The commit number on the master node may be less, because * of a failed commit. If there are several failed commits in a - * row, the commit number written on orph nodes will continue to - * increase (because the commit number is adjusted here) even + * row, the commit number written on orphan nodes will continue + * to increase (because the commit number is adjusted here) even * though the commit number on the master node stays the same * because the master node has not been re-written. */ @@ -589,9 +589,9 @@ static int do_kill_orphans(struct ubifs_info *c, struct ubifs_scan_leb *sleb, c->cmt_no = cmt_no; if (cmt_no < *last_cmt_no && *last_flagged) { /* - * The last orph node had a higher commit number and was - * flagged as the last written for that commit number. - * That makes this orph node, out of date. + * The last orphan node had a higher commit number and + * was flagged as the last written for that commit + * number. That makes this orphan node, out of date. */ if (!first) { ubifs_err("out of order commit number %llu in " @@ -658,10 +658,10 @@ static int kill_orphans(struct ubifs_info *c) /* * Orph nodes always start at c->orph_first and are written to each * successive LEB in turn. Generally unused LEBs will have been unmapped - * but may contain out of date orph nodes if the unmap didn't go - * through. In addition, the last orph node written for each commit is + * but may contain out of date orphan nodes if the unmap didn't go + * through. In addition, the last orphan node written for each commit is * marked (top bit of orph->cmt_no is set to 1). It is possible that - * there are orph nodes from the next commit (i.e. the commit did not + * there are orphan nodes from the next commit (i.e. the commit did not * complete successfully). In that case, no orphans will have been lost * due to the way that orphans are written, and any orphans added will * be valid orphans anyway and so can be deleted. @@ -718,7 +718,7 @@ int ubifs_mount_orphans(struct ubifs_info *c, int unclean, int read_only) if (unclean) err = kill_orphans(c); else if (!read_only) - err = clear_orphans(c); + err = ubifs_clear_orphans(c); return err; } diff --git a/fs/ubifs/recovery.c b/fs/ubifs/recovery.c index 90acac603e6..10662975d2e 100644 --- a/fs/ubifs/recovery.c +++ b/fs/ubifs/recovery.c @@ -425,59 +425,35 @@ static void clean_buf(const struct ubifs_info *c, void **buf, int lnum, * @lnum: LEB number of the LEB from which @buf was read * @offs: offset from which @buf was read * - * This function scans @buf for more nodes and returns %0 is a node is found and - * %1 if no more nodes are found. + * This function ensures that the corrupted node at @offs is the last thing + * written to a LEB. This function returns %1 if more data is not found and + * %0 if more data is found. */ static int no_more_nodes(const struct ubifs_info *c, void *buf, int len, int lnum, int offs) { - int skip, next_offs = 0; + struct ubifs_ch *ch = buf; + int skip, dlen = le32_to_cpu(ch->len); - if (len > UBIFS_DATA_NODE_SZ) { - struct ubifs_ch *ch = buf; - int dlen = le32_to_cpu(ch->len); - - if (ch->node_type == UBIFS_DATA_NODE && dlen >= UBIFS_CH_SZ && - dlen <= UBIFS_MAX_DATA_NODE_SZ) - /* The corrupt node looks like a data node */ - next_offs = ALIGN(offs + dlen, 8); - } - - if (c->min_io_size == 1) - skip = 8; - else - skip = ALIGN(offs + 1, c->min_io_size) - offs; - - offs += skip; - buf += skip; - len -= skip; - while (len > 8) { - struct ubifs_ch *ch = buf; - uint32_t magic = le32_to_cpu(ch->magic); - int ret; - - if (magic == UBIFS_NODE_MAGIC) { - ret = ubifs_scan_a_node(c, buf, len, lnum, offs, 1); - if (ret == SCANNED_A_NODE || ret > 0) { - /* - * There is a small chance this is just data in - * a data node, so check that possibility. e.g. - * this is part of a file that itself contains - * a UBIFS image. - */ - if (next_offs && offs + le32_to_cpu(ch->len) <= - next_offs) - continue; - dbg_rcvry("unexpected node at %d:%d", lnum, - offs); - return 0; - } - } - offs += 8; - buf += 8; - len -= 8; + /* Check for empty space after the corrupt node's common header */ + skip = ALIGN(offs + UBIFS_CH_SZ, c->min_io_size) - offs; + if (is_empty(buf + skip, len - skip)) + return 1; + /* + * The area after the common header size is not empty, so the common + * header must be intact. Check it. + */ + if (ubifs_check_node(c, buf, lnum, offs, 1, 0) != -EUCLEAN) { + dbg_rcvry("unexpected bad common header at %d:%d", lnum, offs); + return 0; } - return 1; + /* Now we know the corrupt node's length we can skip over it */ + skip = ALIGN(offs + dlen, c->min_io_size) - offs; + /* After which there should be empty space */ + if (is_empty(buf + skip, len - skip)) + return 1; + dbg_rcvry("unexpected data at %d:%d", lnum, offs + skip); + return 0; } /** diff --git a/fs/ubifs/replay.c b/fs/ubifs/replay.c index ce42a7b0ca5..11cc80125a4 100644 --- a/fs/ubifs/replay.c +++ b/fs/ubifs/replay.c @@ -143,7 +143,7 @@ static int set_bud_lprops(struct ubifs_info *c, struct replay_entry *r) dirty -= c->leb_size - lp->free; /* * If the replay order was perfect the dirty space would now be - * zero. The order is not perfect because the the journal heads + * zero. The order is not perfect because the journal heads * race with each other. This is not a problem but is does mean * that the dirty space may temporarily exceed c->leb_size * during the replay. diff --git a/fs/ubifs/sb.c b/fs/ubifs/sb.c index e070c643d1b..57085e43320 100644 --- a/fs/ubifs/sb.c +++ b/fs/ubifs/sb.c @@ -193,6 +193,7 @@ static int create_default_filesystem(struct ubifs_info *c) if (tmp64 > DEFAULT_MAX_RP_SIZE) tmp64 = DEFAULT_MAX_RP_SIZE; sup->rp_size = cpu_to_le64(tmp64); + sup->ro_compat_version = cpu_to_le32(UBIFS_RO_COMPAT_VERSION); err = ubifs_write_node(c, sup, UBIFS_SB_NODE_SZ, 0, 0, UBI_LONGTERM); kfree(sup); @@ -532,17 +533,39 @@ int ubifs_read_superblock(struct ubifs_info *c) if (IS_ERR(sup)) return PTR_ERR(sup); + c->fmt_version = le32_to_cpu(sup->fmt_version); + c->ro_compat_version = le32_to_cpu(sup->ro_compat_version); + /* * The software supports all previous versions but not future versions, * due to the unavailability of time-travelling equipment. */ - c->fmt_version = le32_to_cpu(sup->fmt_version); if (c->fmt_version > UBIFS_FORMAT_VERSION) { - ubifs_err("on-flash format version is %d, but software only " - "supports up to version %d", c->fmt_version, - UBIFS_FORMAT_VERSION); - err = -EINVAL; - goto out; + struct super_block *sb = c->vfs_sb; + int mounting_ro = sb->s_flags & MS_RDONLY; + + ubifs_assert(!c->ro_media || mounting_ro); + if (!mounting_ro || + c->ro_compat_version > UBIFS_RO_COMPAT_VERSION) { + ubifs_err("on-flash format version is w%d/r%d, but " + "software only supports up to version " + "w%d/r%d", c->fmt_version, + c->ro_compat_version, UBIFS_FORMAT_VERSION, + UBIFS_RO_COMPAT_VERSION); + if (c->ro_compat_version <= UBIFS_RO_COMPAT_VERSION) { + ubifs_msg("only R/O mounting is possible"); + err = -EROFS; + } else + err = -EINVAL; + goto out; + } + + /* + * The FS is mounted R/O, and the media format is + * R/O-compatible with the UBIFS implementation, so we can + * mount. + */ + c->rw_incompat = 1; } if (c->fmt_version < 3) { @@ -623,7 +646,6 @@ int ubifs_read_superblock(struct ubifs_info *c) c->main_lebs = c->leb_cnt - UBIFS_SB_LEBS - UBIFS_MST_LEBS; c->main_lebs -= c->log_lebs + c->lpt_lebs + c->orph_lebs; c->main_first = c->leb_cnt - c->main_lebs; - c->report_rp_size = ubifs_reported_space(c, c->rp_size); err = validate_sb(c, sup); out: diff --git a/fs/ubifs/shrinker.c b/fs/ubifs/shrinker.c index e7bab52a141..02feb59cefc 100644 --- a/fs/ubifs/shrinker.c +++ b/fs/ubifs/shrinker.c @@ -206,8 +206,7 @@ static int shrink_tnc_trees(int nr, int age, int *contention) * Move this one to the end of the list to provide some * fairness. */ - list_del(&c->infos_list); - list_add_tail(&c->infos_list, &ubifs_infos); + list_move_tail(&c->infos_list, &ubifs_infos); mutex_unlock(&c->umount_mutex); if (freed >= nr) break; @@ -263,8 +262,7 @@ static int kick_a_thread(void) } if (i == 1) { - list_del(&c->infos_list); - list_add_tail(&c->infos_list, &ubifs_infos); + list_move_tail(&c->infos_list, &ubifs_infos); spin_unlock(&ubifs_infos_lock); ubifs_request_bg_commit(c); diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c index 89556ee7251..faa44f90608 100644 --- a/fs/ubifs/super.c +++ b/fs/ubifs/super.c @@ -397,6 +397,7 @@ static int ubifs_statfs(struct dentry *dentry, struct kstatfs *buf) buf->f_namelen = UBIFS_MAX_NLEN; buf->f_fsid.val[0] = le32_to_cpu(uuid[0]) ^ le32_to_cpu(uuid[2]); buf->f_fsid.val[1] = le32_to_cpu(uuid[1]) ^ le32_to_cpu(uuid[3]); + ubifs_assert(buf->f_bfree <= c->block_cnt); return 0; } @@ -420,8 +421,8 @@ static int ubifs_show_options(struct seq_file *s, struct vfsmount *mnt) seq_printf(s, ",no_chk_data_crc"); if (c->mount_opts.override_compr) { - seq_printf(s, ",compr="); - seq_printf(s, ubifs_compr_name(c->mount_opts.compr_type)); + seq_printf(s, ",compr=%s", + ubifs_compr_name(c->mount_opts.compr_type)); } return 0; @@ -432,33 +433,24 @@ static int ubifs_sync_fs(struct super_block *sb, int wait) int i, err; struct ubifs_info *c = sb->s_fs_info; struct writeback_control wbc = { - .sync_mode = wait ? WB_SYNC_ALL : WB_SYNC_NONE, + .sync_mode = WB_SYNC_ALL, .range_start = 0, .range_end = LLONG_MAX, .nr_to_write = LONG_MAX, }; /* - * Note by akpm about WB_SYNC_NONE used above: zero @wait is just an - * advisory thing to help the file system shove lots of data into the - * queues. If some gets missed then it'll be picked up on the second + * Zero @wait is just an advisory thing to help the file system shove + * lots of data into the queues, and there will be the second * '->sync_fs()' call, with non-zero @wait. */ + if (!wait) + return 0; if (sb->s_flags & MS_RDONLY) return 0; /* - * Synchronize write buffers, because 'ubifs_run_commit()' does not - * do this if it waits for an already running commit. - */ - for (i = 0; i < c->jhead_cnt; i++) { - err = ubifs_wbuf_sync(&c->jheads[i].wbuf); - if (err) - return err; - } - - /* * VFS calls '->sync_fs()' before synchronizing all dirty inodes and * pages, so synchronize them first, then commit the journal. Strictly * speaking, it is not necessary to commit the journal here, @@ -469,6 +461,16 @@ static int ubifs_sync_fs(struct super_block *sb, int wait) */ generic_sync_sb_inodes(sb, &wbc); + /* + * Synchronize write buffers, because 'ubifs_run_commit()' does not + * do this if it waits for an already running commit. + */ + for (i = 0; i < c->jhead_cnt; i++) { + err = ubifs_wbuf_sync(&c->jheads[i].wbuf); + if (err) + return err; + } + err = ubifs_run_commit(c); if (err) return err; @@ -572,15 +574,8 @@ static int init_constants_early(struct ubifs_info *c) c->ranges[UBIFS_IDX_NODE].max_len = INT_MAX; /* - * Initialize dead and dark LEB space watermarks. - * - * Dead space is the space which cannot be used. Its watermark is - * equivalent to min. I/O unit or minimum node size if it is greater - * then min. I/O unit. - * - * Dark space is the space which might be used, or might not, depending - * on which node should be written to the LEB. Its watermark is - * equivalent to maximum UBIFS node size. + * Initialize dead and dark LEB space watermarks. See gc.c for comments + * about these values. */ c->dead_wm = ALIGN(MIN_WRITE_SZ, c->min_io_size); c->dark_wm = ALIGN(UBIFS_MAX_NODE_SZ, c->min_io_size); @@ -705,6 +700,8 @@ static int init_constants_sb(struct ubifs_info *c) if (err) return err; + /* Initialize effective LEB size used in budgeting calculations */ + c->idx_leb_size = c->leb_size - c->max_idx_node_sz; return 0; } @@ -721,6 +718,7 @@ static void init_constants_master(struct ubifs_info *c) long long tmp64; c->min_idx_lebs = ubifs_calc_min_idx_lebs(c); + c->report_rp_size = ubifs_reported_space(c, c->rp_size); /* * Calculate total amount of FS blocks. This number is not used @@ -741,12 +739,12 @@ static void init_constants_master(struct ubifs_info *c) * take_gc_lnum - reserve GC LEB. * @c: UBIFS file-system description object * - * This function ensures that the LEB reserved for garbage collection is - * unmapped and is marked as "taken" in lprops. We also have to set free space - * to LEB size and dirty space to zero, because lprops may contain out-of-date - * information if the file-system was un-mounted before it has been committed. - * This function returns zero in case of success and a negative error code in - * case of failure. + * This function ensures that the LEB reserved for garbage collection is marked + * as "taken" in lprops. We also have to set free space to LEB size and dirty + * space to zero, because lprops may contain out-of-date information if the + * file-system was un-mounted before it has been committed. This function + * returns zero in case of success and a negative error code in case of + * failure. */ static int take_gc_lnum(struct ubifs_info *c) { @@ -757,10 +755,6 @@ static int take_gc_lnum(struct ubifs_info *c) return -EINVAL; } - err = ubifs_leb_unmap(c, c->gc_lnum); - if (err) - return err; - /* And we have to tell lprops that this LEB is taken */ err = ubifs_change_one_lp(c, c->gc_lnum, c->leb_size, 0, LPROPS_TAKEN, 0, 0); @@ -966,13 +960,16 @@ static int ubifs_parse_options(struct ubifs_info *c, char *options, token = match_token(p, tokens, args); switch (token) { + /* + * %Opt_fast_unmount and %Opt_norm_unmount options are ignored. + * We accepte them in order to be backware-compatible. But this + * should be removed at some point. + */ case Opt_fast_unmount: c->mount_opts.unmount_mode = 2; - c->fast_unmount = 1; break; case Opt_norm_unmount: c->mount_opts.unmount_mode = 1; - c->fast_unmount = 0; break; case Opt_bulk_read: c->mount_opts.bulk_read = 2; @@ -1094,12 +1091,7 @@ static int check_free_space(struct ubifs_info *c) ubifs_err("insufficient free space to mount in read/write mode"); dbg_dump_budg(c); dbg_dump_lprops(c); - /* - * We return %-EINVAL instead of %-ENOSPC because it seems to - * be the closest error code mentioned in the mount function - * documentation. - */ - return -EINVAL; + return -ENOSPC; } return 0; } @@ -1212,7 +1204,7 @@ static int mount_ubifs(struct ubifs_info *c) goto out_cbuf; /* Create background thread */ - c->bgt = kthread_create(ubifs_bg_thread, c, c->bgt_name); + c->bgt = kthread_create(ubifs_bg_thread, c, "%s", c->bgt_name); if (IS_ERR(c->bgt)) { err = PTR_ERR(c->bgt); c->bgt = NULL; @@ -1286,10 +1278,19 @@ static int mount_ubifs(struct ubifs_info *c) if (err) goto out_orphans; err = ubifs_rcvry_gc_commit(c); - } else + } else { err = take_gc_lnum(c); - if (err) - goto out_orphans; + if (err) + goto out_orphans; + + /* + * GC LEB may contain garbage if there was an unclean + * reboot, and it should be un-mapped. + */ + err = ubifs_leb_unmap(c, c->gc_lnum); + if (err) + return err; + } err = dbg_check_lprops(c); if (err) @@ -1298,6 +1299,16 @@ static int mount_ubifs(struct ubifs_info *c) err = ubifs_recover_size(c); if (err) goto out_orphans; + } else { + /* + * Even if we mount read-only, we have to set space in GC LEB + * to proper value because this affects UBIFS free space + * reporting. We do not want to have a situation when + * re-mounting from R/O to R/W changes amount of free space. + */ + err = take_gc_lnum(c); + if (err) + goto out_orphans; } spin_lock(&ubifs_infos_lock); @@ -1310,14 +1321,21 @@ static int mount_ubifs(struct ubifs_info *c) else { c->need_recovery = 0; ubifs_msg("recovery completed"); + /* + * GC LEB has to be empty and taken at this point. But + * the journal head LEBs may also be accounted as + * "empty taken" if they are empty. + */ + ubifs_assert(c->lst.taken_empty_lebs > 0); } - } + } else + ubifs_assert(c->lst.taken_empty_lebs > 0); - err = dbg_debugfs_init_fs(c); + err = dbg_check_filesystem(c); if (err) goto out_infos; - err = dbg_check_filesystem(c); + err = dbg_debugfs_init_fs(c); if (err) goto out_infos; @@ -1333,8 +1351,9 @@ static int mount_ubifs(struct ubifs_info *c) x = (long long)c->log_lebs * c->leb_size + c->max_bud_bytes; ubifs_msg("journal size: %lld bytes (%lld KiB, %lld MiB, %d " "LEBs)", x, x >> 10, x >> 20, c->log_lebs + c->max_bud_cnt); - ubifs_msg("media format: %d (latest is %d)", - c->fmt_version, UBIFS_FORMAT_VERSION); + ubifs_msg("media format: w%d/r%d (latest is w%d/r%d)", + c->fmt_version, c->ro_compat_version, + UBIFS_FORMAT_VERSION, UBIFS_RO_COMPAT_VERSION); ubifs_msg("default compressor: %s", ubifs_compr_name(c->default_compr)); ubifs_msg("reserved for root: %llu bytes (%llu KiB)", c->report_rp_size, c->report_rp_size >> 10); @@ -1351,7 +1370,6 @@ static int mount_ubifs(struct ubifs_info *c) c->uuid[4], c->uuid[5], c->uuid[6], c->uuid[7], c->uuid[8], c->uuid[9], c->uuid[10], c->uuid[11], c->uuid[12], c->uuid[13], c->uuid[14], c->uuid[15]); - dbg_msg("fast unmount: %d", c->fast_unmount); dbg_msg("big_lpt %d", c->big_lpt); dbg_msg("log LEBs: %d (%d - %d)", c->log_lebs, UBIFS_LOG_LNUM, c->log_last); @@ -1475,10 +1493,17 @@ static int ubifs_remount_rw(struct ubifs_info *c) { int err, lnum; - if (c->ro_media) - return -EINVAL; + if (c->rw_incompat) { + ubifs_err("the file-system is not R/W-compatible"); + ubifs_msg("on-flash format version is w%d/r%d, but software " + "only supports up to version w%d/r%d", c->fmt_version, + c->ro_compat_version, UBIFS_FORMAT_VERSION, + UBIFS_RO_COMPAT_VERSION); + return -EROFS; + } mutex_lock(&c->umount_mutex); + dbg_save_space_info(c); c->remounting_rw = 1; c->always_chk_crc = 1; @@ -1514,6 +1539,12 @@ static int ubifs_remount_rw(struct ubifs_info *c) err = ubifs_recover_inl_heads(c, c->sbuf); if (err) goto out; + } else { + /* A readonly mount is not allowed to have orphans */ + ubifs_assert(c->tot_orphans == 0); + err = ubifs_clear_orphans(c); + if (err) + goto out; } if (!(c->mst_node->flags & cpu_to_le32(UBIFS_MST_DIRTY))) { @@ -1540,7 +1571,7 @@ static int ubifs_remount_rw(struct ubifs_info *c) ubifs_create_buds_lists(c); /* Create background thread */ - c->bgt = kthread_create(ubifs_bg_thread, c, c->bgt_name); + c->bgt = kthread_create(ubifs_bg_thread, c, "%s", c->bgt_name); if (IS_ERR(c->bgt)) { err = PTR_ERR(c->bgt); c->bgt = NULL; @@ -1569,7 +1600,7 @@ static int ubifs_remount_rw(struct ubifs_info *c) if (c->need_recovery) err = ubifs_rcvry_gc_commit(c); else - err = take_gc_lnum(c); + err = ubifs_leb_unmap(c, c->gc_lnum); if (err) goto out; @@ -1582,8 +1613,9 @@ static int ubifs_remount_rw(struct ubifs_info *c) c->vfs_sb->s_flags &= ~MS_RDONLY; c->remounting_rw = 0; c->always_chk_crc = 0; + err = dbg_check_space_info(c); mutex_unlock(&c->umount_mutex); - return 0; + return err; out: vfree(c->orph_buf); @@ -1603,43 +1635,18 @@ out: } /** - * commit_on_unmount - commit the journal when un-mounting. - * @c: UBIFS file-system description object - * - * This function is called during un-mounting and re-mounting, and it commits - * the journal unless the "fast unmount" mode is enabled. - */ -static void commit_on_unmount(struct ubifs_info *c) -{ - struct super_block *sb = c->vfs_sb; - long long bud_bytes; - - /* - * This function is called before the background thread is stopped, so - * we may race with ongoing commit, which means we have to take - * @c->bud_lock to access @c->bud_bytes. - */ - spin_lock(&c->buds_lock); - bud_bytes = c->bud_bytes; - spin_unlock(&c->buds_lock); - - if (!c->fast_unmount && !(sb->s_flags & MS_RDONLY) && bud_bytes) - ubifs_run_commit(c); -} - -/** * ubifs_remount_ro - re-mount in read-only mode. * @c: UBIFS file-system description object * - * We rely on VFS to have stopped writing. Possibly the background thread could - * be running a commit, however kthread_stop will wait in that case. + * We assume VFS has stopped writing. Possibly the background thread could be + * running a commit, however kthread_stop will wait in that case. */ static void ubifs_remount_ro(struct ubifs_info *c) { int i, err; ubifs_assert(!c->need_recovery); - commit_on_unmount(c); + ubifs_assert(!(c->vfs_sb->s_flags & MS_RDONLY)); mutex_lock(&c->umount_mutex); if (c->bgt) { @@ -1647,27 +1654,29 @@ static void ubifs_remount_ro(struct ubifs_info *c) c->bgt = NULL; } + dbg_save_space_info(c); + for (i = 0; i < c->jhead_cnt; i++) { ubifs_wbuf_sync(&c->jheads[i].wbuf); del_timer_sync(&c->jheads[i].wbuf.timer); } - if (!c->ro_media) { - c->mst_node->flags &= ~cpu_to_le32(UBIFS_MST_DIRTY); - c->mst_node->flags |= cpu_to_le32(UBIFS_MST_NO_ORPHS); - c->mst_node->gc_lnum = cpu_to_le32(c->gc_lnum); - err = ubifs_write_master(c); - if (err) - ubifs_ro_mode(c, err); - } + c->mst_node->flags &= ~cpu_to_le32(UBIFS_MST_DIRTY); + c->mst_node->flags |= cpu_to_le32(UBIFS_MST_NO_ORPHS); + c->mst_node->gc_lnum = cpu_to_le32(c->gc_lnum); + err = ubifs_write_master(c); + if (err) + ubifs_ro_mode(c, err); - ubifs_destroy_idx_gc(c); free_wbufs(c); vfree(c->orph_buf); c->orph_buf = NULL; vfree(c->ileb_buf); c->ileb_buf = NULL; ubifs_lpt_free(c, 1); + err = dbg_check_space_info(c); + if (err) + ubifs_ro_mode(c, err); mutex_unlock(&c->umount_mutex); } @@ -1760,11 +1769,20 @@ static int ubifs_remount_fs(struct super_block *sb, int *flags, char *data) } if ((sb->s_flags & MS_RDONLY) && !(*flags & MS_RDONLY)) { + if (c->ro_media) { + ubifs_msg("cannot re-mount due to prior errors"); + return -EROFS; + } err = ubifs_remount_rw(c); if (err) return err; - } else if (!(sb->s_flags & MS_RDONLY) && (*flags & MS_RDONLY)) + } else if (!(sb->s_flags & MS_RDONLY) && (*flags & MS_RDONLY)) { + if (c->ro_media) { + ubifs_msg("cannot re-mount due to prior errors"); + return -EROFS; + } ubifs_remount_ro(c); + } if (c->bulk_read == 1) bu_init(c); @@ -1774,10 +1792,11 @@ static int ubifs_remount_fs(struct super_block *sb, int *flags, char *data) c->bu.buf = NULL; } + ubifs_assert(c->lst.taken_empty_lebs > 0); return 0; } -struct super_operations ubifs_super_operations = { +const struct super_operations ubifs_super_operations = { .alloc_inode = ubifs_alloc_inode, .destroy_inode = ubifs_destroy_inode, .put_super = ubifs_put_super, @@ -2032,7 +2051,8 @@ static int ubifs_get_sb(struct file_system_type *fs_type, int flags, /* 'fill_super()' opens ubi again so we must close it here */ ubi_close_volume(ubi); - return simple_set_mnt(mnt, sb); + simple_set_mnt(mnt, sb); + return 0; out_deact: up_write(&sb->s_umount); @@ -2044,15 +2064,6 @@ out_close: static void ubifs_kill_sb(struct super_block *sb) { - struct ubifs_info *c = sb->s_fs_info; - - /* - * We do 'commit_on_unmount()' here instead of 'ubifs_put_super()' - * in order to be outside BKL. - */ - if (sb->s_root) - commit_on_unmount(c); - /* The un-mount routine is actually done in put_super() */ generic_shutdown_super(sb); } diff --git a/fs/ubifs/tnc.c b/fs/ubifs/tnc.c index f7e36f54552..f249f7b0d65 100644 --- a/fs/ubifs/tnc.c +++ b/fs/ubifs/tnc.c @@ -443,6 +443,11 @@ static int tnc_read_node_nm(struct ubifs_info *c, struct ubifs_zbranch *zbr, * This function performs that same function as ubifs_read_node except that * it does not require that there is actually a node present and instead * the return code indicates if a node was read. + * + * Note, this function does not check CRC of data nodes if @c->no_chk_data_crc + * is true (it is controlled by corresponding mount option). However, if + * @c->always_chk_crc is true, @c->no_chk_data_crc is ignored and CRC is always + * checked. */ static int try_read_node(const struct ubifs_info *c, void *buf, int type, int len, int lnum, int offs) @@ -470,9 +475,8 @@ static int try_read_node(const struct ubifs_info *c, void *buf, int type, if (node_len != len) return 0; - if (type == UBIFS_DATA_NODE && !c->always_chk_crc) - if (c->no_chk_data_crc) - return 0; + if (type == UBIFS_DATA_NODE && !c->always_chk_crc && c->no_chk_data_crc) + return 1; crc = crc32(UBIFS_CRC32_INIT, buf + 8, node_len - 8); node_crc = le32_to_cpu(ch->crc); @@ -1248,7 +1252,7 @@ int ubifs_lookup_level0(struct ubifs_info *c, const union ubifs_key *key, * splitting in the middle of the colliding sequence. Also, when * removing the leftmost key, we would have to correct the key of the * parent node, which would introduce additional complications. Namely, - * if we changed the the leftmost key of the parent znode, the garbage + * if we changed the leftmost key of the parent znode, the garbage * collector would be unable to find it (GC is doing this when GC'ing * indexing LEBs). Although we already have an additional RB-tree where * we save such changed znodes (see 'ins_clr_old_idx_znode()') until @@ -1506,7 +1510,7 @@ out: * * Note, if the bulk-read buffer length (@bu->buf_len) is known, this function * makes sure bulk-read nodes fit the buffer. Otherwise, this function prepares - * maxumum possible amount of nodes for bulk-read. + * maximum possible amount of nodes for bulk-read. */ int ubifs_tnc_get_bu_keys(struct ubifs_info *c, struct bu_info *bu) { diff --git a/fs/ubifs/ubifs-media.h b/fs/ubifs/ubifs-media.h index b25fc36cf72..3eee07e0c49 100644 --- a/fs/ubifs/ubifs-media.h +++ b/fs/ubifs/ubifs-media.h @@ -36,9 +36,31 @@ /* UBIFS node magic number (must not have the padding byte first or last) */ #define UBIFS_NODE_MAGIC 0x06101831 -/* UBIFS on-flash format version */ +/* + * UBIFS on-flash format version. This version is increased when the on-flash + * format is changing. If this happens, UBIFS is will support older versions as + * well. But older UBIFS code will not support newer formats. Format changes + * will be rare and only when absolutely necessary, e.g. to fix a bug or to add + * a new feature. + * + * UBIFS went into mainline kernel with format version 4. The older formats + * were development formats. + */ #define UBIFS_FORMAT_VERSION 4 +/* + * Read-only compatibility version. If the UBIFS format is changed, older UBIFS + * implementations will not be able to mount newer formats in read-write mode. + * However, depending on the change, it may be possible to mount newer formats + * in R/O mode. This is indicated by the R/O compatibility version which is + * stored in the super-block. + * + * This is needed to support boot-loaders which only need R/O mounting. With + * this flag it is possible to do UBIFS format changes without a need to update + * boot-loaders. + */ +#define UBIFS_RO_COMPAT_VERSION 0 + /* Minimum logical eraseblock size in bytes */ #define UBIFS_MIN_LEB_SZ (15*1024) @@ -53,7 +75,7 @@ /* * If compressed data length is less than %UBIFS_MIN_COMPRESS_DIFF bytes - * shorter than uncompressed data length, UBIFS preferes to leave this data + * shorter than uncompressed data length, UBIFS prefers to leave this data * node uncompress, because it'll be read faster. */ #define UBIFS_MIN_COMPRESS_DIFF 64 @@ -586,6 +608,7 @@ struct ubifs_pad_node { * @padding2: reserved for future, zeroes * @time_gran: time granularity in nanoseconds * @uuid: UUID generated when the file system image was created + * @ro_compat_version: UBIFS R/O compatibility version */ struct ubifs_sb_node { struct ubifs_ch ch; @@ -612,7 +635,8 @@ struct ubifs_sb_node { __le64 rp_size; __le32 time_gran; __u8 uuid[16]; - __u8 padding2[3972]; + __le32 ro_compat_version; + __u8 padding2[3968]; } __attribute__ ((packed)); /** diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h index fc2a4cc66d0..0a8341e1408 100644 --- a/fs/ubifs/ubifs.h +++ b/fs/ubifs/ubifs.h @@ -426,9 +426,9 @@ struct ubifs_unclean_leb { * LEB properties flags. * * LPROPS_UNCAT: not categorized - * LPROPS_DIRTY: dirty > 0, not index + * LPROPS_DIRTY: dirty > free, dirty >= @c->dead_wm, not index * LPROPS_DIRTY_IDX: dirty + free > @c->min_idx_node_sze and index - * LPROPS_FREE: free > 0, not empty, not index + * LPROPS_FREE: free > 0, dirty < @c->dead_wm, not empty, not index * LPROPS_HEAP_CNT: number of heaps used for storing categorized LEBs * LPROPS_EMPTY: LEB is empty, not taken * LPROPS_FREEABLE: free + dirty == leb_size, not index, not taken @@ -934,6 +934,7 @@ struct ubifs_debug_info; * by @commit_sem * @cnt_lock: protects @highest_inum and @max_sqnum counters * @fmt_version: UBIFS on-flash format version + * @ro_compat_version: R/O compatibility version * @uuid: UUID from super block * * @lhead_lnum: log head logical eraseblock number @@ -961,12 +962,12 @@ struct ubifs_debug_info; * @cs_lock: commit state lock * @cmt_wq: wait queue to sleep on if the log is full and a commit is running * - * @fast_unmount: do not run journal commit before un-mounting * @big_lpt: flag that LPT is too big to write whole during commit * @no_chk_data_crc: do not check CRCs when reading data nodes (except during * recovery) * @bulk_read: enable bulk-reads * @default_compr: default compression algorithm (%UBIFS_COMPR_LZO, etc) + * @rw_incompat: the media is not R/W compatible * * @tnc_mutex: protects the Tree Node Cache (TNC), @zroot, @cnext, @enext, and * @calc_idx_sz @@ -1016,6 +1017,8 @@ struct ubifs_debug_info; * @min_io_shift: number of bits in @min_io_size minus one * @leb_size: logical eraseblock size in bytes * @half_leb_size: half LEB size + * @idx_leb_size: how many bytes of an LEB are effectively available when it is + * used to store indexing nodes (@leb_size - @max_idx_node_sz) * @leb_cnt: count of logical eraseblocks * @max_leb_cnt: maximum count of logical eraseblocks * @old_leb_cnt: count of logical eraseblocks before re-size @@ -1133,8 +1136,8 @@ struct ubifs_debug_info; * previous commit start * @uncat_list: list of un-categorized LEBs * @empty_list: list of empty LEBs - * @freeable_list: list of freeable non-index LEBs (free + dirty == leb_size) - * @frdi_idx_list: list of freeable index LEBs (free + dirty == leb_size) + * @freeable_list: list of freeable non-index LEBs (free + dirty == @leb_size) + * @frdi_idx_list: list of freeable index LEBs (free + dirty == @leb_size) * @freeable_cnt: number of freeable LEBs in @freeable_list * * @ltab_lnum: LEB number of LPT's own lprops table @@ -1178,6 +1181,7 @@ struct ubifs_info { unsigned long long cmt_no; spinlock_t cnt_lock; int fmt_version; + int ro_compat_version; unsigned char uuid[16]; int lhead_lnum; @@ -1202,11 +1206,11 @@ struct ubifs_info { spinlock_t cs_lock; wait_queue_head_t cmt_wq; - unsigned int fast_unmount:1; unsigned int big_lpt:1; unsigned int no_chk_data_crc:1; unsigned int bulk_read:1; unsigned int default_compr:2; + unsigned int rw_incompat:1; struct mutex tnc_mutex; struct ubifs_zbranch zroot; @@ -1255,6 +1259,7 @@ struct ubifs_info { int min_io_shift; int leb_size; int half_leb_size; + int idx_leb_size; int leb_cnt; int max_leb_cnt; int old_leb_cnt; @@ -1405,13 +1410,13 @@ extern struct list_head ubifs_infos; extern spinlock_t ubifs_infos_lock; extern atomic_long_t ubifs_clean_zn_cnt; extern struct kmem_cache *ubifs_inode_slab; -extern struct super_operations ubifs_super_operations; -extern struct address_space_operations ubifs_file_address_operations; -extern struct file_operations ubifs_file_operations; -extern struct inode_operations ubifs_file_inode_operations; -extern struct file_operations ubifs_dir_operations; -extern struct inode_operations ubifs_dir_inode_operations; -extern struct inode_operations ubifs_symlink_inode_operations; +extern const struct super_operations ubifs_super_operations; +extern const struct address_space_operations ubifs_file_address_operations; +extern const struct file_operations ubifs_file_operations; +extern const struct inode_operations ubifs_file_inode_operations; +extern const struct file_operations ubifs_dir_operations; +extern const struct inode_operations ubifs_dir_inode_operations; +extern const struct inode_operations ubifs_symlink_inode_operations; extern struct backing_dev_info ubifs_backing_dev_info; extern struct ubifs_compressor *ubifs_compressors[UBIFS_COMPR_TYPES_CNT]; @@ -1428,7 +1433,7 @@ int ubifs_read_node_wbuf(struct ubifs_wbuf *wbuf, void *buf, int type, int len, int ubifs_write_node(struct ubifs_info *c, void *node, int len, int lnum, int offs, int dtype); int ubifs_check_node(const struct ubifs_info *c, const void *buf, int lnum, - int offs, int quiet, int chk_crc); + int offs, int quiet, int must_chk_crc); void ubifs_prepare_node(struct ubifs_info *c, void *buf, int len, int pad); void ubifs_prep_grp_node(struct ubifs_info *c, void *node, int len, int last); int ubifs_io_init(struct ubifs_info *c); @@ -1495,13 +1500,14 @@ void ubifs_release_ino_dirty(struct ubifs_info *c, struct inode *inode, void ubifs_cancel_ino_op(struct ubifs_info *c, struct inode *inode, struct ubifs_budget_req *req); long long ubifs_get_free_space(struct ubifs_info *c); +long long ubifs_get_free_space_nolock(struct ubifs_info *c); int ubifs_calc_min_idx_lebs(struct ubifs_info *c); void ubifs_convert_page_budget(struct ubifs_info *c); long long ubifs_reported_space(const struct ubifs_info *c, long long free); long long ubifs_calc_available(const struct ubifs_info *c, int min_idx_lebs); /* find.c */ -int ubifs_find_free_space(struct ubifs_info *c, int min_space, int *free, +int ubifs_find_free_space(struct ubifs_info *c, int min_space, int *offs, int squeeze); int ubifs_find_free_leb_for_idx(struct ubifs_info *c); int ubifs_find_dirty_leb(struct ubifs_info *c, struct ubifs_lprops *ret_lp, @@ -1603,6 +1609,7 @@ void ubifs_delete_orphan(struct ubifs_info *c, ino_t inum); int ubifs_orphan_start_commit(struct ubifs_info *c); int ubifs_orphan_end_commit(struct ubifs_info *c); int ubifs_mount_orphans(struct ubifs_info *c, int unclean, int read_only); +int ubifs_clear_orphans(struct ubifs_info *c); /* lpt.c */ int ubifs_calc_lpt_geom(struct ubifs_info *c); @@ -1646,7 +1653,7 @@ const struct ubifs_lprops *ubifs_change_lp(struct ubifs_info *c, const struct ubifs_lprops *lp, int free, int dirty, int flags, int idx_gc_cnt); -void ubifs_get_lp_stats(struct ubifs_info *c, struct ubifs_lp_stats *stats); +void ubifs_get_lp_stats(struct ubifs_info *c, struct ubifs_lp_stats *lst); void ubifs_add_to_cat(struct ubifs_info *c, struct ubifs_lprops *lprops, int cat); void ubifs_replace_cat(struct ubifs_info *c, struct ubifs_lprops *old_lprops, |