diff options
Diffstat (limited to 'fs')
71 files changed, 1753 insertions, 937 deletions
diff --git a/fs/Kconfig b/fs/Kconfig index 522469a7eca..ff0e8198020 100644 --- a/fs/Kconfig +++ b/fs/Kconfig @@ -270,44 +270,7 @@ config OCFS2_COMPAT_JBD endif # BLOCK -config DNOTIFY - bool "Dnotify support" - default y - help - Dnotify is a directory-based per-fd file change notification system - that uses signals to communicate events to user-space. There exist - superior alternatives, but some applications may still rely on - dnotify. - - If unsure, say Y. - -config INOTIFY - bool "Inotify file change notification support" - default y - ---help--- - Say Y here to enable inotify support. Inotify is a file change - notification system and a replacement for dnotify. Inotify fixes - numerous shortcomings in dnotify and introduces several new features - including multiple file events, one-shot support, and unmount - notification. - - For more information, see <file:Documentation/filesystems/inotify.txt> - - If unsure, say Y. - -config INOTIFY_USER - bool "Inotify support for userspace" - depends on INOTIFY - default y - ---help--- - Say Y here to enable inotify support for userspace, including the - associated system calls. Inotify allows monitoring of both files and - directories via a single open fd. Events are read from the file - descriptor, which is also select()- and poll()-able. - - For more information, see <file:Documentation/filesystems/inotify.txt> - - If unsure, say Y. +source "fs/notify/Kconfig" config QUOTA bool "Quota support" diff --git a/fs/Makefile b/fs/Makefile index d9f8afe6f0c..e6f423d1d22 100644 --- a/fs/Makefile +++ b/fs/Makefile @@ -20,8 +20,7 @@ obj-y += no-block.o endif obj-$(CONFIG_BLK_DEV_INTEGRITY) += bio-integrity.o -obj-$(CONFIG_INOTIFY) += inotify.o -obj-$(CONFIG_INOTIFY_USER) += inotify_user.o +obj-y += notify/ obj-$(CONFIG_EPOLL) += eventpoll.o obj-$(CONFIG_ANON_INODES) += anon_inodes.o obj-$(CONFIG_SIGNALFD) += signalfd.o @@ -57,8 +56,6 @@ obj-$(CONFIG_QFMT_V1) += quota_v1.o obj-$(CONFIG_QFMT_V2) += quota_v2.o obj-$(CONFIG_QUOTACTL) += quota.o -obj-$(CONFIG_DNOTIFY) += dnotify.o - obj-$(CONFIG_PROC_FS) += proc/ obj-y += partitions/ obj-$(CONFIG_SYSFS) += sysfs/ diff --git a/fs/anon_inodes.c b/fs/anon_inodes.c index c16d9be1b01..3bbdb9d0237 100644 --- a/fs/anon_inodes.c +++ b/fs/anon_inodes.c @@ -79,9 +79,12 @@ int anon_inode_getfd(const char *name, const struct file_operations *fops, if (IS_ERR(anon_inode_inode)) return -ENODEV; + if (fops->owner && !try_module_get(fops->owner)) + return -ENOENT; + error = get_unused_fd_flags(flags); if (error < 0) - return error; + goto err_module; fd = error; /* @@ -128,6 +131,8 @@ err_dput: dput(dentry); err_put_unused_fd: put_unused_fd(fd); +err_module: + module_put(fops->owner); return error; } EXPORT_SYMBOL_GPL(anon_inode_getfd); diff --git a/fs/bad_inode.c b/fs/bad_inode.c index 5f1538c03b1..a05287a23f6 100644 --- a/fs/bad_inode.c +++ b/fs/bad_inode.c @@ -132,11 +132,6 @@ static int bad_file_check_flags(int flags) return -EIO; } -static int bad_file_dir_notify(struct file *file, unsigned long arg) -{ - return -EIO; -} - static int bad_file_flock(struct file *filp, int cmd, struct file_lock *fl) { return -EIO; @@ -179,7 +174,6 @@ static const struct file_operations bad_file_ops = .sendpage = bad_file_sendpage, .get_unmapped_area = bad_file_get_unmapped_area, .check_flags = bad_file_check_flags, - .dir_notify = bad_file_dir_notify, .flock = bad_file_flock, .splice_write = bad_file_splice_write, .splice_read = bad_file_splice_read, diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c index b6dfee37c7b..d06cb023ad0 100644 --- a/fs/befs/linuxvfs.c +++ b/fs/befs/linuxvfs.c @@ -378,7 +378,8 @@ static struct inode *befs_iget(struct super_block *sb, unsigned long ino) inode->i_size = 0; inode->i_blocks = befs_sb->block_size / VFS_BLOCK_SIZE; strncpy(befs_ino->i_data.symlink, raw_inode->data.symlink, - BEFS_SYMLINK_LEN); + BEFS_SYMLINK_LEN - 1); + befs_ino->i_data.symlink[BEFS_SYMLINK_LEN - 1] = '\0'; } else { int num_blks; @@ -477,6 +478,8 @@ befs_follow_link(struct dentry *dentry, struct nameidata *nd) kfree(link); befs_error(sb, "Failed to read entire long symlink"); link = ERR_PTR(-EIO); + } else { + link[len - 1] = '\0'; } } else { link = befs_ino->i_data.symlink; diff --git a/fs/binfmt_aout.c b/fs/binfmt_aout.c index f1f3f4192a6..b639dcf7c77 100644 --- a/fs/binfmt_aout.c +++ b/fs/binfmt_aout.c @@ -95,92 +95,55 @@ static int aout_core_dump(long signr, struct pt_regs *regs, struct file *file, u int has_dumped = 0; unsigned long dump_start, dump_size; struct user dump; -#if defined(__alpha__) +#ifdef __alpha__ # define START_DATA(u) (u.start_data) -#elif defined(__arm__) +#else # define START_DATA(u) ((u.u_tsize << PAGE_SHIFT) + u.start_code) -#elif defined(__sparc__) -# define START_DATA(u) (u.u_tsize) -#elif defined(__i386__) || defined(__mc68000__) || defined(__arch_um__) -# define START_DATA(u) (u.u_tsize << PAGE_SHIFT) #endif -#ifdef __sparc__ -# define START_STACK(u) ((regs->u_regs[UREG_FP]) & ~(PAGE_SIZE - 1)) -#else # define START_STACK(u) (u.start_stack) -#endif fs = get_fs(); set_fs(KERNEL_DS); has_dumped = 1; current->flags |= PF_DUMPCORE; strncpy(dump.u_comm, current->comm, sizeof(dump.u_comm)); -#ifndef __sparc__ dump.u_ar0 = offsetof(struct user, regs); -#endif dump.signal = signr; aout_dump_thread(regs, &dump); /* If the size of the dump file exceeds the rlimit, then see what would happen if we wrote the stack, but not the data area. */ -#ifdef __sparc__ - if ((dump.u_dsize + dump.u_ssize) > limit) - dump.u_dsize = 0; -#else if ((dump.u_dsize + dump.u_ssize+1) * PAGE_SIZE > limit) dump.u_dsize = 0; -#endif /* Make sure we have enough room to write the stack and data areas. */ -#ifdef __sparc__ - if (dump.u_ssize > limit) - dump.u_ssize = 0; -#else if ((dump.u_ssize + 1) * PAGE_SIZE > limit) dump.u_ssize = 0; -#endif /* make sure we actually have a data and stack area to dump */ set_fs(USER_DS); -#ifdef __sparc__ - if (!access_ok(VERIFY_READ, (void __user *)START_DATA(dump), dump.u_dsize)) - dump.u_dsize = 0; - if (!access_ok(VERIFY_READ, (void __user *)START_STACK(dump), dump.u_ssize)) - dump.u_ssize = 0; -#else if (!access_ok(VERIFY_READ, (void __user *)START_DATA(dump), dump.u_dsize << PAGE_SHIFT)) dump.u_dsize = 0; if (!access_ok(VERIFY_READ, (void __user *)START_STACK(dump), dump.u_ssize << PAGE_SHIFT)) dump.u_ssize = 0; -#endif set_fs(KERNEL_DS); /* struct user */ DUMP_WRITE(&dump,sizeof(dump)); /* Now dump all of the user data. Include malloced stuff as well */ -#ifndef __sparc__ DUMP_SEEK(PAGE_SIZE); -#endif /* now we start writing out the user space info */ set_fs(USER_DS); /* Dump the data area */ if (dump.u_dsize != 0) { dump_start = START_DATA(dump); -#ifdef __sparc__ - dump_size = dump.u_dsize; -#else dump_size = dump.u_dsize << PAGE_SHIFT; -#endif DUMP_WRITE(dump_start,dump_size); } /* Now prepare to dump the stack area */ if (dump.u_ssize != 0) { dump_start = START_STACK(dump); -#ifdef __sparc__ - dump_size = dump.u_ssize; -#else dump_size = dump.u_ssize << PAGE_SHIFT; -#endif DUMP_WRITE(dump_start,dump_size); } /* Finally dump the task struct. Not be used by gdb, but could be useful */ @@ -205,29 +168,24 @@ static unsigned long __user *create_aout_tables(char __user *p, struct linux_bin int envc = bprm->envc; sp = (void __user *)((-(unsigned long)sizeof(char *)) & (unsigned long) p); -#ifdef __sparc__ - /* This imposes the proper stack alignment for a new process. */ - sp = (void __user *) (((unsigned long) sp) & ~7); - if ((envc+argc+3)&1) --sp; -#endif #ifdef __alpha__ /* whee.. test-programs are so much fun. */ put_user(0, --sp); put_user(0, --sp); if (bprm->loader) { put_user(0, --sp); - put_user(0x3eb, --sp); + put_user(1003, --sp); put_user(bprm->loader, --sp); - put_user(0x3ea, --sp); + put_user(1002, --sp); } put_user(bprm->exec, --sp); - put_user(0x3e9, --sp); + put_user(1001, --sp); #endif sp -= envc+1; envp = (char __user * __user *) sp; sp -= argc+1; argv = (char __user * __user *) sp; -#if defined(__i386__) || defined(__mc68000__) || defined(__arm__) || defined(__arch_um__) +#ifndef __alpha__ put_user((unsigned long) envp,--sp); put_user((unsigned long) argv,--sp); #endif @@ -300,13 +258,8 @@ static int load_aout_binary(struct linux_binprm * bprm, struct pt_regs * regs) return retval; /* OK, This is the point of no return */ -#if defined(__alpha__) +#ifdef __alpha__ SET_AOUT_PERSONALITY(bprm, ex); -#elif defined(__sparc__) - set_personality(PER_SUNOS); -#if !defined(__sparc_v9__) - memcpy(¤t->thread.core_exec, &ex, sizeof(struct exec)); -#endif #else set_personality(PER_LINUX); #endif @@ -322,24 +275,6 @@ static int load_aout_binary(struct linux_binprm * bprm, struct pt_regs * regs) install_exec_creds(bprm); current->flags &= ~PF_FORKNOEXEC; -#ifdef __sparc__ - if (N_MAGIC(ex) == NMAGIC) { - loff_t pos = fd_offset; - /* Fuck me plenty... */ - /* <AOL></AOL> */ - down_write(¤t->mm->mmap_sem); - error = do_brk(N_TXTADDR(ex), ex.a_text); - up_write(¤t->mm->mmap_sem); - bprm->file->f_op->read(bprm->file, (char *) N_TXTADDR(ex), - ex.a_text, &pos); - down_write(¤t->mm->mmap_sem); - error = do_brk(N_DATADDR(ex), ex.a_data); - up_write(¤t->mm->mmap_sem); - bprm->file->f_op->read(bprm->file, (char *) N_DATADDR(ex), - ex.a_data, &pos); - goto beyond_if; - } -#endif if (N_MAGIC(ex) == OMAGIC) { unsigned long text_addr, map_size; @@ -347,7 +282,7 @@ static int load_aout_binary(struct linux_binprm * bprm, struct pt_regs * regs) text_addr = N_TXTADDR(ex); -#if defined(__alpha__) || defined(__sparc__) +#ifdef __alpha__ pos = fd_offset; map_size = ex.a_text+ex.a_data + PAGE_SIZE - 1; #else diff --git a/fs/block_dev.c b/fs/block_dev.c index 99e0ae1a4c7..349a26c1000 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -326,12 +326,13 @@ static struct file_system_type bd_type = { .kill_sb = kill_anon_super, }; -static struct vfsmount *bd_mnt __read_mostly; -struct super_block *blockdev_superblock; +struct super_block *blockdev_superblock __read_mostly; void __init bdev_cache_init(void) { int err; + struct vfsmount *bd_mnt; + bdev_cachep = kmem_cache_create("bdev_cache", sizeof(struct bdev_inode), 0, (SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT| SLAB_MEM_SPREAD|SLAB_PANIC), @@ -373,7 +374,7 @@ struct block_device *bdget(dev_t dev) struct block_device *bdev; struct inode *inode; - inode = iget5_locked(bd_mnt->mnt_sb, hash(dev), + inode = iget5_locked(blockdev_superblock, hash(dev), bdev_test, bdev_set, &dev); if (!inode) @@ -463,7 +464,7 @@ void bd_forget(struct inode *inode) spin_lock(&bdev_lock); if (inode->i_bdev) { - if (inode->i_sb != blockdev_superblock) + if (!sb_is_blkdev_sb(inode->i_sb)) bdev = inode->i_bdev; __bd_forget(inode); } diff --git a/fs/cifs/Makefile b/fs/cifs/Makefile index 6ba43fb346f..9948c0030e8 100644 --- a/fs/cifs/Makefile +++ b/fs/cifs/Makefile @@ -5,7 +5,7 @@ obj-$(CONFIG_CIFS) += cifs.o cifs-y := cifsfs.o cifssmb.o cifs_debug.o connect.o dir.o file.o inode.o \ link.o misc.o netmisc.o smbdes.o smbencrypt.o transport.o asn1.o \ - md4.o md5.o cifs_unicode.o nterr.o xattr.o cifsencrypt.o fcntl.o \ + md4.o md5.o cifs_unicode.o nterr.o xattr.o cifsencrypt.o \ readdir.o ioctl.o sess.o export.o cifsacl.o cifs-$(CONFIG_CIFS_UPCALL) += cifs_spnego.o diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index 0005a194a75..13ea53251dc 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -747,7 +747,6 @@ const struct file_operations cifs_file_ops = { #endif /* CONFIG_CIFS_POSIX */ #ifdef CONFIG_CIFS_EXPERIMENTAL - .dir_notify = cifs_dir_notify, .setlease = cifs_setlease, #endif /* CONFIG_CIFS_EXPERIMENTAL */ }; @@ -768,7 +767,6 @@ const struct file_operations cifs_file_direct_ops = { #endif /* CONFIG_CIFS_POSIX */ .llseek = cifs_llseek, #ifdef CONFIG_CIFS_EXPERIMENTAL - .dir_notify = cifs_dir_notify, .setlease = cifs_setlease, #endif /* CONFIG_CIFS_EXPERIMENTAL */ }; @@ -789,7 +787,6 @@ const struct file_operations cifs_file_nobrl_ops = { #endif /* CONFIG_CIFS_POSIX */ #ifdef CONFIG_CIFS_EXPERIMENTAL - .dir_notify = cifs_dir_notify, .setlease = cifs_setlease, #endif /* CONFIG_CIFS_EXPERIMENTAL */ }; @@ -809,7 +806,6 @@ const struct file_operations cifs_file_direct_nobrl_ops = { #endif /* CONFIG_CIFS_POSIX */ .llseek = cifs_llseek, #ifdef CONFIG_CIFS_EXPERIMENTAL - .dir_notify = cifs_dir_notify, .setlease = cifs_setlease, #endif /* CONFIG_CIFS_EXPERIMENTAL */ }; @@ -818,9 +814,6 @@ const struct file_operations cifs_dir_ops = { .readdir = cifs_readdir, .release = cifs_closedir, .read = generic_read_dir, -#ifdef CONFIG_CIFS_EXPERIMENTAL - .dir_notify = cifs_dir_notify, -#endif /* CONFIG_CIFS_EXPERIMENTAL */ .unlocked_ioctl = cifs_ioctl, .llseek = generic_file_llseek, }; diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h index 2ce04c73d74..7ac481841f8 100644 --- a/fs/cifs/cifsfs.h +++ b/fs/cifs/cifsfs.h @@ -76,7 +76,6 @@ extern int cifs_file_mmap(struct file * , struct vm_area_struct *); extern const struct file_operations cifs_dir_ops; extern int cifs_dir_open(struct inode *inode, struct file *file); extern int cifs_readdir(struct file *file, void *direntry, filldir_t filldir); -extern int cifs_dir_notify(struct file *, unsigned long arg); /* Functions related to dir entries */ extern struct dentry_operations cifs_dentry_ops; diff --git a/fs/cifs/fcntl.c b/fs/cifs/fcntl.c deleted file mode 100644 index 5a57581eb4b..00000000000 --- a/fs/cifs/fcntl.c +++ /dev/null @@ -1,118 +0,0 @@ -/* - * fs/cifs/fcntl.c - * - * vfs operations that deal with the file control API - * - * Copyright (C) International Business Machines Corp., 2003,2004 - * Author(s): Steve French (sfrench@us.ibm.com) - * - * This library is free software; you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License as published - * by the Free Software Foundation; either version 2.1 of the License, or - * (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See - * the GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public License - * along with this library; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ -#include <linux/fs.h> -#include <linux/stat.h> -#include <linux/fcntl.h> -#include "cifsglob.h" -#include "cifsproto.h" -#include "cifs_unicode.h" -#include "cifs_debug.h" -#include "cifsfs.h" - -static __u32 convert_to_cifs_notify_flags(unsigned long fcntl_notify_flags) -{ - __u32 cifs_ntfy_flags = 0; - - /* No way on Linux VFS to ask to monitor xattr - changes (and no stream support either */ - if (fcntl_notify_flags & DN_ACCESS) - cifs_ntfy_flags |= FILE_NOTIFY_CHANGE_LAST_ACCESS; - if (fcntl_notify_flags & DN_MODIFY) { - /* What does this mean on directories? */ - cifs_ntfy_flags |= FILE_NOTIFY_CHANGE_LAST_WRITE | - FILE_NOTIFY_CHANGE_SIZE; - } - if (fcntl_notify_flags & DN_CREATE) { - cifs_ntfy_flags |= FILE_NOTIFY_CHANGE_CREATION | - FILE_NOTIFY_CHANGE_LAST_WRITE; - } - if (fcntl_notify_flags & DN_DELETE) - cifs_ntfy_flags |= FILE_NOTIFY_CHANGE_LAST_WRITE; - if (fcntl_notify_flags & DN_RENAME) { - /* BB review this - checking various server behaviors */ - cifs_ntfy_flags |= FILE_NOTIFY_CHANGE_DIR_NAME | - FILE_NOTIFY_CHANGE_FILE_NAME; - } - if (fcntl_notify_flags & DN_ATTRIB) { - cifs_ntfy_flags |= FILE_NOTIFY_CHANGE_SECURITY | - FILE_NOTIFY_CHANGE_ATTRIBUTES; - } -/* if (fcntl_notify_flags & DN_MULTISHOT) { - cifs_ntfy_flags |= ; - } */ /* BB fixme - not sure how to handle this with CIFS yet */ - - return cifs_ntfy_flags; -} - -int cifs_dir_notify(struct file *file, unsigned long arg) -{ - int xid; - int rc = -EINVAL; - int oplock = 0; - struct cifs_sb_info *cifs_sb; - struct cifsTconInfo *pTcon; - char *full_path = NULL; - __u32 filter = FILE_NOTIFY_CHANGE_NAME | FILE_NOTIFY_CHANGE_ATTRIBUTES; - __u16 netfid; - - if (experimEnabled == 0) - return 0; - - xid = GetXid(); - cifs_sb = CIFS_SB(file->f_path.dentry->d_sb); - pTcon = cifs_sb->tcon; - - full_path = build_path_from_dentry(file->f_path.dentry); - - if (full_path == NULL) { - rc = -ENOMEM; - } else { - cFYI(1, ("dir notify on file %s Arg 0x%lx", full_path, arg)); - rc = CIFSSMBOpen(xid, pTcon, full_path, FILE_OPEN, - GENERIC_READ | SYNCHRONIZE, 0 /* create options */, - &netfid, &oplock, NULL, cifs_sb->local_nls, - cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); - /* BB fixme - add this handle to a notify handle list */ - if (rc) { - cFYI(1, ("Could not open directory for notify")); - } else { - filter = convert_to_cifs_notify_flags(arg); - if (filter != 0) { - rc = CIFSSMBNotify(xid, pTcon, - 0 /* no subdirs */, netfid, - filter, file, arg & DN_MULTISHOT, - cifs_sb->local_nls); - } else { - rc = -EINVAL; - } - /* BB add code to close file eventually (at unmount - it would close automatically but may be a way - to do it easily when inode freed or when - notify info is cleared/changed */ - cFYI(1, ("notify rc %d", rc)); - } - } - - FreeXid(xid); - return rc; -} diff --git a/fs/dcache.c b/fs/dcache.c index a1d86c7f3e6..e88c23b85a3 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -34,7 +34,6 @@ #include <linux/bootmem.h> #include "internal.h" - int sysctl_vfs_cache_pressure __read_mostly = 100; EXPORT_SYMBOL_GPL(sysctl_vfs_cache_pressure); @@ -948,9 +947,6 @@ struct dentry *d_alloc(struct dentry * parent, const struct qstr *name) dentry->d_op = NULL; dentry->d_fsdata = NULL; dentry->d_mounted = 0; -#ifdef CONFIG_PROFILING - dentry->d_cookie = NULL; -#endif INIT_HLIST_NODE(&dentry->d_hash); INIT_LIST_HEAD(&dentry->d_lru); INIT_LIST_HEAD(&dentry->d_subdirs); @@ -1336,7 +1332,7 @@ err_out: * * Searches the children of the parent dentry for the name in question. If * the dentry is found its reference count is incremented and the dentry - * is returned. The caller must use d_put to free the entry when it has + * is returned. The caller must use dput to free the entry when it has * finished using it. %NULL is returned on failure. * * __d_lookup is dcache_lock free. The hash list is protected using RCU. @@ -1620,8 +1616,11 @@ static void switch_names(struct dentry *dentry, struct dentry *target) */ memcpy(dentry->d_iname, target->d_name.name, target->d_name.len + 1); + dentry->d_name.len = target->d_name.len; + return; } } + do_switch(dentry->d_name.len, target->d_name.len); } /* @@ -1681,7 +1680,6 @@ already_unhashed: /* Switch the names.. */ switch_names(dentry, target); - do_switch(dentry->d_name.len, target->d_name.len); do_switch(dentry->d_name.hash, target->d_name.hash); /* ... and switch the parents */ @@ -1791,7 +1789,6 @@ static void __d_materialise_dentry(struct dentry *dentry, struct dentry *anon) struct dentry *dparent, *aparent; switch_names(dentry, anon); - do_switch(dentry->d_name.len, anon->d_name.len); do_switch(dentry->d_name.hash, anon->d_name.hash); dparent = dentry->d_parent; @@ -1911,7 +1908,8 @@ static int prepend_name(char **buffer, int *buflen, struct qstr *name) * Convert a dentry into an ASCII path name. If the entry has been deleted * the string " (deleted)" is appended. Note that this is ambiguous. * - * Returns the buffer or an error code if the path was too long. + * Returns a pointer into the buffer or an error code if the + * path was too long. * * "buflen" should be positive. Caller holds the dcache_lock. * @@ -1987,7 +1985,10 @@ Elong: * Convert a dentry into an ASCII path name. If the entry has been deleted * the string " (deleted)" is appended. Note that this is ambiguous. * - * Returns the buffer or an error code if the path was too long. + * Returns a pointer into the buffer or an error code if the path was + * too long. Note: Callers should use the returned pointer, not the passed + * in buffer, to use the name! The implementation often starts at an offset + * into the buffer, and may leave 0 bytes at the start. * * "buflen" should be positive. */ @@ -2313,9 +2314,6 @@ static void __init dcache_init(void) /* SLAB cache for __getname() consumers */ struct kmem_cache *names_cachep __read_mostly; -/* SLAB cache for file structures */ -struct kmem_cache *filp_cachep __read_mostly; - EXPORT_SYMBOL(d_genocide); void __init vfs_caches_init_early(void) @@ -2337,9 +2335,6 @@ void __init vfs_caches_init(unsigned long mempages) names_cachep = kmem_cache_create("names_cache", PATH_MAX, 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL); - filp_cachep = kmem_cache_create("filp", sizeof(struct file), 0, - SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL); - dcache_init(); inode_init(); files_init(mempages); diff --git a/fs/dcookies.c b/fs/dcookies.c index 855d4b1d619..180e9fec4ad 100644 --- a/fs/dcookies.c +++ b/fs/dcookies.c @@ -93,10 +93,15 @@ static struct dcookie_struct *alloc_dcookie(struct path *path) { struct dcookie_struct *dcs = kmem_cache_alloc(dcookie_cache, GFP_KERNEL); + struct dentry *d; if (!dcs) return NULL; - path->dentry->d_cookie = dcs; + d = path->dentry; + spin_lock(&d->d_lock); + d->d_flags |= DCACHE_COOKIE; + spin_unlock(&d->d_lock); + dcs->path = *path; path_get(path); hash_dcookie(dcs); @@ -119,14 +124,14 @@ int get_dcookie(struct path *path, unsigned long *cookie) goto out; } - dcs = path->dentry->d_cookie; - - if (!dcs) + if (path->dentry->d_flags & DCACHE_COOKIE) { + dcs = find_dcookie((unsigned long)path->dentry); + } else { dcs = alloc_dcookie(path); - - if (!dcs) { - err = -ENOMEM; - goto out; + if (!dcs) { + err = -ENOMEM; + goto out; + } } *cookie = dcookie_value(dcs); @@ -251,7 +256,12 @@ out_kmem: static void free_dcookie(struct dcookie_struct * dcs) { - dcs->path.dentry->d_cookie = NULL; + struct dentry *d = dcs->path.dentry; + + spin_lock(&d->d_lock); + d->d_flags &= ~DCACHE_COOKIE; + spin_unlock(&d->d_lock); + path_put(&dcs->path); kmem_cache_free(dcookie_cache, dcs); } diff --git a/fs/devpts/inode.c b/fs/devpts/inode.c index 5d61b7c06e1..fff96e152c0 100644 --- a/fs/devpts/inode.c +++ b/fs/devpts/inode.c @@ -27,25 +27,32 @@ #define DEVPTS_SUPER_MAGIC 0x1cd1 #define DEVPTS_DEFAULT_MODE 0600 +/* + * ptmx is a new node in /dev/pts and will be unused in legacy (single- + * instance) mode. To prevent surprises in user space, set permissions of + * ptmx to 0. Use 'chmod' or remount with '-o ptmxmode' to set meaningful + * permissions. + */ +#define DEVPTS_DEFAULT_PTMX_MODE 0000 #define PTMX_MINOR 2 extern int pty_limit; /* Config limit on Unix98 ptys */ -static DEFINE_IDA(allocated_ptys); static DEFINE_MUTEX(allocated_ptys_lock); static struct vfsmount *devpts_mnt; -static struct dentry *devpts_root; -static struct { +struct pts_mount_opts { int setuid; int setgid; uid_t uid; gid_t gid; umode_t mode; -} config = {.mode = DEVPTS_DEFAULT_MODE}; + umode_t ptmxmode; + int newinstance; +}; enum { - Opt_uid, Opt_gid, Opt_mode, + Opt_uid, Opt_gid, Opt_mode, Opt_ptmxmode, Opt_newinstance, Opt_err }; @@ -53,18 +60,50 @@ static const match_table_t tokens = { {Opt_uid, "uid=%u"}, {Opt_gid, "gid=%u"}, {Opt_mode, "mode=%o"}, +#ifdef CONFIG_DEVPTS_MULTIPLE_INSTANCES + {Opt_ptmxmode, "ptmxmode=%o"}, + {Opt_newinstance, "newinstance"}, +#endif {Opt_err, NULL} }; -static int devpts_remount(struct super_block *sb, int *flags, char *data) +struct pts_fs_info { + struct ida allocated_ptys; + struct pts_mount_opts mount_opts; + struct dentry *ptmx_dentry; +}; + +static inline struct pts_fs_info *DEVPTS_SB(struct super_block *sb) +{ + return sb->s_fs_info; +} + +static inline struct super_block *pts_sb_from_inode(struct inode *inode) +{ +#ifdef CONFIG_DEVPTS_MULTIPLE_INSTANCES + if (inode->i_sb->s_magic == DEVPTS_SUPER_MAGIC) + return inode->i_sb; +#endif + return devpts_mnt->mnt_sb; +} + +#define PARSE_MOUNT 0 +#define PARSE_REMOUNT 1 + +static int parse_mount_options(char *data, int op, struct pts_mount_opts *opts) { char *p; - config.setuid = 0; - config.setgid = 0; - config.uid = 0; - config.gid = 0; - config.mode = DEVPTS_DEFAULT_MODE; + opts->setuid = 0; + opts->setgid = 0; + opts->uid = 0; + opts->gid = 0; + opts->mode = DEVPTS_DEFAULT_MODE; + opts->ptmxmode = DEVPTS_DEFAULT_PTMX_MODE; + + /* newinstance makes sense only on initial mount */ + if (op == PARSE_MOUNT) + opts->newinstance = 0; while ((p = strsep(&data, ",")) != NULL) { substring_t args[MAX_OPT_ARGS]; @@ -79,20 +118,32 @@ static int devpts_remount(struct super_block *sb, int *flags, char *data) case Opt_uid: if (match_int(&args[0], &option)) return -EINVAL; - config.uid = option; - config.setuid = 1; + opts->uid = option; + opts->setuid = 1; break; case Opt_gid: if (match_int(&args[0], &option)) return -EINVAL; - config.gid = option; - config.setgid = 1; + opts->gid = option; + opts->setgid = 1; break; case Opt_mode: if (match_octal(&args[0], &option)) return -EINVAL; - config.mode = option & S_IALLUGO; + opts->mode = option & S_IALLUGO; + break; +#ifdef CONFIG_DEVPTS_MULTIPLE_INSTANCES + case Opt_ptmxmode: + if (match_octal(&args[0], &option)) + return -EINVAL; + opts->ptmxmode = option & S_IALLUGO; + break; + case Opt_newinstance: + /* newinstance makes sense only on initial mount */ + if (op == PARSE_MOUNT) + opts->newinstance = 1; break; +#endif default: printk(KERN_ERR "devpts: called with bogus options\n"); return -EINVAL; @@ -102,13 +153,108 @@ static int devpts_remount(struct super_block *sb, int *flags, char *data) return 0; } +#ifdef CONFIG_DEVPTS_MULTIPLE_INSTANCES +static int mknod_ptmx(struct super_block *sb) +{ + int mode; + int rc = -ENOMEM; + struct dentry *dentry; + struct inode *inode; + struct dentry *root = sb->s_root; + struct pts_fs_info *fsi = DEVPTS_SB(sb); + struct pts_mount_opts *opts = &fsi->mount_opts; + + mutex_lock(&root->d_inode->i_mutex); + + /* If we have already created ptmx node, return */ + if (fsi->ptmx_dentry) { + rc = 0; + goto out; + } + + dentry = d_alloc_name(root, "ptmx"); + if (!dentry) { + printk(KERN_NOTICE "Unable to alloc dentry for ptmx node\n"); + goto out; + } + + /* + * Create a new 'ptmx' node in this mount of devpts. + */ + inode = new_inode(sb); + if (!inode) { + printk(KERN_ERR "Unable to alloc inode for ptmx node\n"); + dput(dentry); + goto out; + } + + inode->i_ino = 2; + inode->i_uid = inode->i_gid = 0; + inode->i_blocks = 0; + inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; + + mode = S_IFCHR|opts->ptmxmode; + init_special_inode(inode, mode, MKDEV(TTYAUX_MAJOR, 2)); + + d_add(dentry, inode); + + fsi->ptmx_dentry = dentry; + rc = 0; + + printk(KERN_DEBUG "Created ptmx node in devpts ino %lu\n", + inode->i_ino); +out: + mutex_unlock(&root->d_inode->i_mutex); + return rc; +} + +static void update_ptmx_mode(struct pts_fs_info *fsi) +{ + struct inode *inode; + if (fsi->ptmx_dentry) { + inode = fsi->ptmx_dentry->d_inode; + inode->i_mode = S_IFCHR|fsi->mount_opts.ptmxmode; + } +} +#else +static inline void update_ptmx_mode(struct pts_fs_info *fsi) +{ + return; +} +#endif + +static int devpts_remount(struct super_block *sb, int *flags, char *data) +{ + int err; + struct pts_fs_info *fsi = DEVPTS_SB(sb); + struct pts_mount_opts *opts = &fsi->mount_opts; + + err = parse_mount_options(data, PARSE_REMOUNT, opts); + + /* + * parse_mount_options() restores options to default values + * before parsing and may have changed ptmxmode. So, update the + * mode in the inode too. Bogus options don't fail the remount, + * so do this even on error return. + */ + update_ptmx_mode(fsi); + + return err; +} + static int devpts_show_options(struct seq_file *seq, struct vfsmount *vfs) { - if (config.setuid) - seq_printf(seq, ",uid=%u", config.uid); - if (config.setgid) - seq_printf(seq, ",gid=%u", config.gid); - seq_printf(seq, ",mode=%03o", config.mode); + struct pts_fs_info *fsi = DEVPTS_SB(vfs->mnt_sb); + struct pts_mount_opts *opts = &fsi->mount_opts; + + if (opts->setuid) + seq_printf(seq, ",uid=%u", opts->uid); + if (opts->setgid) + seq_printf(seq, ",gid=%u", opts->gid); + seq_printf(seq, ",mode=%03o", opts->mode); +#ifdef CONFIG_DEVPTS_MULTIPLE_INSTANCES + seq_printf(seq, ",ptmxmode=%03o", opts->ptmxmode); +#endif return 0; } @@ -119,10 +265,25 @@ static const struct super_operations devpts_sops = { .show_options = devpts_show_options, }; +static void *new_pts_fs_info(void) +{ + struct pts_fs_info *fsi; + + fsi = kzalloc(sizeof(struct pts_fs_info), GFP_KERNEL); + if (!fsi) + return NULL; + + ida_init(&fsi->allocated_ptys); + fsi->mount_opts.mode = DEVPTS_DEFAULT_MODE; + fsi->mount_opts.ptmxmode = DEVPTS_DEFAULT_PTMX_MODE; + + return fsi; +} + static int devpts_fill_super(struct super_block *s, void *data, int silent) { - struct inode * inode; + struct inode *inode; s->s_blocksize = 1024; s->s_blocksize_bits = 10; @@ -130,9 +291,13 @@ devpts_fill_super(struct super_block *s, void *data, int silent) s->s_op = &devpts_sops; s->s_time_gran = 1; + s->s_fs_info = new_pts_fs_info(); + if (!s->s_fs_info) + goto fail; + inode = new_inode(s); if (!inode) - goto fail; + goto free_fsi; inode->i_ino = 1; inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; inode->i_blocks = 0; @@ -142,27 +307,226 @@ devpts_fill_super(struct super_block *s, void *data, int silent) inode->i_fop = &simple_dir_operations; inode->i_nlink = 2; - devpts_root = s->s_root = d_alloc_root(inode); + s->s_root = d_alloc_root(inode); if (s->s_root) return 0; - - printk("devpts: get root dentry failed\n"); + + printk(KERN_ERR "devpts: get root dentry failed\n"); iput(inode); + +free_fsi: + kfree(s->s_fs_info); fail: return -ENOMEM; } +#ifdef CONFIG_DEVPTS_MULTIPLE_INSTANCES +static int compare_init_pts_sb(struct super_block *s, void *p) +{ + if (devpts_mnt) + return devpts_mnt->mnt_sb == s; + return 0; +} + +/* + * Safely parse the mount options in @data and update @opts. + * + * devpts ends up parsing options two times during mount, due to the + * two modes of operation it supports. The first parse occurs in + * devpts_get_sb() when determining the mode (single-instance or + * multi-instance mode). The second parse happens in devpts_remount() + * or new_pts_mount() depending on the mode. + * + * Parsing of options modifies the @data making subsequent parsing + * incorrect. So make a local copy of @data and parse it. + * + * Return: 0 On success, -errno on error + */ +static int safe_parse_mount_options(void *data, struct pts_mount_opts *opts) +{ + int rc; + void *datacp; + + if (!data) + return 0; + + /* Use kstrdup() ? */ + datacp = kmalloc(PAGE_SIZE, GFP_KERNEL); + if (!datacp) + return -ENOMEM; + + memcpy(datacp, data, PAGE_SIZE); + rc = parse_mount_options((char *)datacp, PARSE_MOUNT, opts); + kfree(datacp); + + return rc; +} + +/* + * Mount a new (private) instance of devpts. PTYs created in this + * instance are independent of the PTYs in other devpts instances. + */ +static int new_pts_mount(struct file_system_type *fs_type, int flags, + void *data, struct vfsmount *mnt) +{ + int err; + struct pts_fs_info *fsi; + struct pts_mount_opts *opts; + + printk(KERN_NOTICE "devpts: newinstance mount\n"); + + err = get_sb_nodev(fs_type, flags, data, devpts_fill_super, mnt); + if (err) + return err; + + fsi = DEVPTS_SB(mnt->mnt_sb); + opts = &fsi->mount_opts; + + err = parse_mount_options(data, PARSE_MOUNT, opts); + if (err) + goto fail; + + err = mknod_ptmx(mnt->mnt_sb); + if (err) + goto fail; + + return 0; + +fail: + dput(mnt->mnt_sb->s_root); + deactivate_super(mnt->mnt_sb); + return err; +} + +/* + * Check if 'newinstance' mount option was specified in @data. + * + * Return: -errno on error (eg: invalid mount options specified) + * : 1 if 'newinstance' mount option was specified + * : 0 if 'newinstance' mount option was NOT specified + */ +static int is_new_instance_mount(void *data) +{ + int rc; + struct pts_mount_opts opts; + + if (!data) + return 0; + + rc = safe_parse_mount_options(data, &opts); + if (!rc) + rc = opts.newinstance; + + return rc; +} + +/* + * get_init_pts_sb() + * + * This interface is needed to support multiple namespace semantics in + * devpts while preserving backward compatibility of the current 'single- + * namespace' semantics. i.e all mounts of devpts without the 'newinstance' + * mount option should bind to the initial kernel mount, like + * get_sb_single(). + * + * Mounts with 'newinstance' option create a new private namespace. + * + * But for single-mount semantics, devpts cannot use get_sb_single(), + * because get_sb_single()/sget() find and use the super-block from + * the most recent mount of devpts. But that recent mount may be a + * 'newinstance' mount and get_sb_single() would pick the newinstance + * super-block instead of the initial super-block. + * + * This interface is identical to get_sb_single() except that it + * consistently selects the 'single-namespace' superblock even in the + * presence of the private namespace (i.e 'newinstance') super-blocks. + */ +static int get_init_pts_sb(struct file_system_type *fs_type, int flags, + void *data, struct vfsmount *mnt) +{ + struct super_block *s; + int error; + + s = sget(fs_type, compare_init_pts_sb, set_anon_super, NULL); + if (IS_ERR(s)) + return PTR_ERR(s); + + if (!s->s_root) { + s->s_flags = flags; + error = devpts_fill_super(s, data, flags & MS_SILENT ? 1 : 0); + if (error) { + up_write(&s->s_umount); + deactivate_super(s); + return error; + } + s->s_flags |= MS_ACTIVE; + } + do_remount_sb(s, flags, data, 0); + return simple_set_mnt(mnt, s); +} + +/* + * Mount or remount the initial kernel mount of devpts. This type of + * mount maintains the legacy, single-instance semantics, while the + * kernel still allows multiple-instances. + */ +static int init_pts_mount(struct file_system_type *fs_type, int flags, + void *data, struct vfsmount *mnt) +{ + int err; + + err = get_init_pts_sb(fs_type, flags, data, mnt); + if (err) + return err; + + err = mknod_ptmx(mnt->mnt_sb); + if (err) { + dput(mnt->mnt_sb->s_root); + deactivate_super(mnt->mnt_sb); + } + + return err; +} + static int devpts_get_sb(struct file_system_type *fs_type, int flags, const char *dev_name, void *data, struct vfsmount *mnt) { + int new; + + new = is_new_instance_mount(data); + if (new < 0) + return new; + + if (new) + return new_pts_mount(fs_type, flags, data, mnt); + + return init_pts_mount(fs_type, flags, data, mnt); +} +#else +/* + * This supports only the legacy single-instance semantics (no + * multiple-instance semantics) + */ +static int devpts_get_sb(struct file_system_type *fs_type, int flags, + const char *dev_name, void *data, struct vfsmount *mnt) +{ return get_sb_single(fs_type, flags, data, devpts_fill_super, mnt); } +#endif + +static void devpts_kill_sb(struct super_block *sb) +{ + struct pts_fs_info *fsi = DEVPTS_SB(sb); + + kfree(fsi); + kill_litter_super(sb); +} static struct file_system_type devpts_fs_type = { .owner = THIS_MODULE, .name = "devpts", .get_sb = devpts_get_sb, - .kill_sb = kill_anon_super, + .kill_sb = devpts_kill_sb, }; /* @@ -172,16 +536,17 @@ static struct file_system_type devpts_fs_type = { int devpts_new_index(struct inode *ptmx_inode) { + struct super_block *sb = pts_sb_from_inode(ptmx_inode); + struct pts_fs_info *fsi = DEVPTS_SB(sb); int index; int ida_ret; retry: - if (!ida_pre_get(&allocated_ptys, GFP_KERNEL)) { + if (!ida_pre_get(&fsi->allocated_ptys, GFP_KERNEL)) return -ENOMEM; - } mutex_lock(&allocated_ptys_lock); - ida_ret = ida_get_new(&allocated_ptys, &index); + ida_ret = ida_get_new(&fsi->allocated_ptys, &index); if (ida_ret < 0) { mutex_unlock(&allocated_ptys_lock); if (ida_ret == -EAGAIN) @@ -190,7 +555,7 @@ retry: } if (index >= pty_limit) { - ida_remove(&allocated_ptys, index); + ida_remove(&fsi->allocated_ptys, index); mutex_unlock(&allocated_ptys_lock); return -EIO; } @@ -200,18 +565,26 @@ retry: void devpts_kill_index(struct inode *ptmx_inode, int idx) { + struct super_block *sb = pts_sb_from_inode(ptmx_inode); + struct pts_fs_info *fsi = DEVPTS_SB(sb); + mutex_lock(&allocated_ptys_lock); - ida_remove(&allocated_ptys, idx); + ida_remove(&fsi->allocated_ptys, idx); mutex_unlock(&allocated_ptys_lock); } int devpts_pty_new(struct inode *ptmx_inode, struct tty_struct *tty) { - int number = tty->index; /* tty layer puts index from devpts_new_index() in here */ + /* tty layer puts index from devpts_new_index() in here */ + int number = tty->index; struct tty_driver *driver = tty->driver; dev_t device = MKDEV(driver->major, driver->minor_start+number); struct dentry *dentry; - struct inode *inode = new_inode(devpts_mnt->mnt_sb); + struct super_block *sb = pts_sb_from_inode(ptmx_inode); + struct inode *inode = new_inode(sb); + struct dentry *root = sb->s_root; + struct pts_fs_info *fsi = DEVPTS_SB(sb); + struct pts_mount_opts *opts = &fsi->mount_opts; char s[12]; /* We're supposed to be given the slave end of a pty */ @@ -221,25 +594,25 @@ int devpts_pty_new(struct inode *ptmx_inode, struct tty_struct *tty) if (!inode) return -ENOMEM; - inode->i_ino = number+2; - inode->i_uid = config.setuid ? config.uid : current_fsuid(); - inode->i_gid = config.setgid ? config.gid : current_fsgid(); + inode->i_ino = number + 3; + inode->i_uid = opts->setuid ? opts->uid : current_fsuid(); + inode->i_gid = opts->setgid ? opts->gid : current_fsgid(); inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; - init_special_inode(inode, S_IFCHR|config.mode, device); + init_special_inode(inode, S_IFCHR|opts->mode, device); inode->i_private = tty; tty->driver_data = inode; sprintf(s, "%d", number); - mutex_lock(&devpts_root->d_inode->i_mutex); + mutex_lock(&root->d_inode->i_mutex); - dentry = d_alloc_name(devpts_root, s); + dentry = d_alloc_name(root, s); if (!IS_ERR(dentry)) { d_add(dentry, inode); - fsnotify_create(devpts_root->d_inode, dentry); + fsnotify_create(root->d_inode, dentry); } - mutex_unlock(&devpts_root->d_inode->i_mutex); + mutex_unlock(&root->d_inode->i_mutex); return 0; } @@ -256,20 +629,27 @@ struct tty_struct *devpts_get_tty(struct inode *pts_inode, int number) void devpts_pty_kill(struct tty_struct *tty) { struct inode *inode = tty->driver_data; + struct super_block *sb = pts_sb_from_inode(inode); + struct dentry *root = sb->s_root; struct dentry *dentry; BUG_ON(inode->i_rdev == MKDEV(TTYAUX_MAJOR, PTMX_MINOR)); - mutex_lock(&devpts_root->d_inode->i_mutex); + mutex_lock(&root->d_inode->i_mutex); dentry = d_find_alias(inode); - if (dentry && !IS_ERR(dentry)) { + if (IS_ERR(dentry)) + goto out; + + if (dentry) { inode->i_nlink--; d_delete(dentry); - dput(dentry); + dput(dentry); /* d_alloc_name() in devpts_pty_new() */ } - mutex_unlock(&devpts_root->d_inode->i_mutex); + dput(dentry); /* d_find_alias above */ +out: + mutex_unlock(&root->d_inode->i_mutex); } static int __init init_devpts_fs(void) diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c index 89209f00f9c..5e78fc17988 100644 --- a/fs/ecryptfs/inode.c +++ b/fs/ecryptfs/inode.c @@ -673,10 +673,11 @@ static void *ecryptfs_follow_link(struct dentry *dentry, struct nameidata *nd) ecryptfs_printk(KERN_DEBUG, "Calling readlink w/ " "dentry->d_name.name = [%s]\n", dentry->d_name.name); rc = dentry->d_inode->i_op->readlink(dentry, (char __user *)buf, len); - buf[rc] = '\0'; set_fs(old_fs); if (rc < 0) goto out_free; + else + buf[rc] = '\0'; rc = 0; nd_set_link(nd, buf); goto out; diff --git a/fs/exec.c b/fs/exec.c index 02d2e120542..3ef9cf9b187 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -57,11 +57,6 @@ #include <asm/tlb.h> #include "internal.h" -#ifdef __alpha__ -/* for /sbin/loader handling in search_binary_handler() */ -#include <linux/a.out.h> -#endif - int core_uses_pid; char core_pattern[CORENAME_MAX_SIZE] = "core"; int suid_dumpable = 0; @@ -127,7 +122,8 @@ asmlinkage long sys_uselib(const char __user * library) if (nd.path.mnt->mnt_flags & MNT_NOEXEC) goto exit; - error = vfs_permission(&nd, MAY_READ | MAY_EXEC | MAY_OPEN); + error = inode_permission(nd.path.dentry->d_inode, + MAY_READ | MAY_EXEC | MAY_OPEN); if (error) goto exit; @@ -680,7 +676,7 @@ struct file *open_exec(const char *name) if (nd.path.mnt->mnt_flags & MNT_NOEXEC) goto out_path_put; - err = vfs_permission(&nd, MAY_EXEC | MAY_OPEN); + err = inode_permission(nd.path.dentry->d_inode, MAY_EXEC | MAY_OPEN); if (err) goto out_path_put; @@ -1171,41 +1167,7 @@ int search_binary_handler(struct linux_binprm *bprm,struct pt_regs *regs) unsigned int depth = bprm->recursion_depth; int try,retval; struct linux_binfmt *fmt; -#ifdef __alpha__ - /* handle /sbin/loader.. */ - { - struct exec * eh = (struct exec *) bprm->buf; - if (!bprm->loader && eh->fh.f_magic == 0x183 && - (eh->fh.f_flags & 0x3000) == 0x3000) - { - struct file * file; - unsigned long loader; - - allow_write_access(bprm->file); - fput(bprm->file); - bprm->file = NULL; - - loader = bprm->vma->vm_end - sizeof(void *); - - file = open_exec("/sbin/loader"); - retval = PTR_ERR(file); - if (IS_ERR(file)) - return retval; - - /* Remember if the application is TASO. */ - bprm->taso = eh->ah.entry < 0x100000000UL; - - bprm->file = file; - bprm->loader = loader; - retval = prepare_binprm(bprm); - if (retval<0) - return retval; - /* should call search_binary_handler recursively here, - but it does not matter */ - } - } -#endif retval = security_bprm_check(bprm); if (retval) return retval; diff --git a/fs/ext2/ialloc.c b/fs/ext2/ialloc.c index 8d0add62587..c454d5db28a 100644 --- a/fs/ext2/ialloc.c +++ b/fs/ext2/ialloc.c @@ -585,7 +585,10 @@ got: spin_lock(&sbi->s_next_gen_lock); inode->i_generation = sbi->s_next_generation++; spin_unlock(&sbi->s_next_gen_lock); - insert_inode_hash(inode); + if (insert_inode_locked(inode) < 0) { + err = -EINVAL; + goto fail_drop; + } if (DQUOT_ALLOC_INODE(inode)) { err = -EDQUOT; @@ -612,6 +615,7 @@ fail_drop: DQUOT_DROP(inode); inode->i_flags |= S_NOQUOTA; inode->i_nlink = 0; + unlock_new_inode(inode); iput(inode); return ERR_PTR(err); diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c index 7658b33e265..02b39a5deb7 100644 --- a/fs/ext2/inode.c +++ b/fs/ext2/inode.c @@ -32,6 +32,7 @@ #include <linux/buffer_head.h> #include <linux/mpage.h> #include <linux/fiemap.h> +#include <linux/namei.h> #include "ext2.h" #include "acl.h" #include "xip.h" @@ -1286,9 +1287,11 @@ struct inode *ext2_iget (struct super_block *sb, unsigned long ino) else inode->i_mapping->a_ops = &ext2_aops; } else if (S_ISLNK(inode->i_mode)) { - if (ext2_inode_is_fast_symlink(inode)) + if (ext2_inode_is_fast_symlink(inode)) { inode->i_op = &ext2_fast_symlink_inode_operations; - else { + nd_terminate_link(ei->i_data, inode->i_size, + sizeof(ei->i_data) - 1); + } else { inode->i_op = &ext2_symlink_inode_operations; if (test_opt(inode->i_sb, NOBH)) inode->i_mapping->a_ops = &ext2_nobh_aops; diff --git a/fs/ext2/namei.c b/fs/ext2/namei.c index 2a747252ec1..90ea17998a7 100644 --- a/fs/ext2/namei.c +++ b/fs/ext2/namei.c @@ -41,9 +41,11 @@ static inline int ext2_add_nondir(struct dentry *dentry, struct inode *inode) int err = ext2_add_link(dentry, inode); if (!err) { d_instantiate(dentry, inode); + unlock_new_inode(inode); return 0; } inode_dec_link_count(inode); + unlock_new_inode(inode); iput(inode); return err; } @@ -170,6 +172,7 @@ out: out_fail: inode_dec_link_count(inode); + unlock_new_inode(inode); iput (inode); goto out; } @@ -178,6 +181,7 @@ static int ext2_link (struct dentry * old_dentry, struct inode * dir, struct dentry *dentry) { struct inode *inode = old_dentry->d_inode; + int err; if (inode->i_nlink >= EXT2_LINK_MAX) return -EMLINK; @@ -186,7 +190,14 @@ static int ext2_link (struct dentry * old_dentry, struct inode * dir, inode_inc_link_count(inode); atomic_inc(&inode->i_count); - return ext2_add_nondir(dentry, inode); + err = ext2_add_link(dentry, inode); + if (!err) { + d_instantiate(dentry, inode); + return 0; + } + inode_dec_link_count(inode); + iput(inode); + return err; } static int ext2_mkdir(struct inode * dir, struct dentry * dentry, int mode) @@ -222,12 +233,14 @@ static int ext2_mkdir(struct inode * dir, struct dentry * dentry, int mode) goto out_fail; d_instantiate(dentry, inode); + unlock_new_inode(inode); out: return err; out_fail: inode_dec_link_count(inode); inode_dec_link_count(inode); + unlock_new_inode(inode); iput(inode); out_dir: inode_dec_link_count(dir); diff --git a/fs/ext3/ialloc.c b/fs/ext3/ialloc.c index 490bd0ed789..5655fbcbd11 100644 --- a/fs/ext3/ialloc.c +++ b/fs/ext3/ialloc.c @@ -579,7 +579,10 @@ got: ext3_set_inode_flags(inode); if (IS_DIRSYNC(inode)) handle->h_sync = 1; - insert_inode_hash(inode); + if (insert_inode_locked(inode) < 0) { + err = -EINVAL; + goto fail_drop; + } spin_lock(&sbi->s_next_gen_lock); inode->i_generation = sbi->s_next_generation++; spin_unlock(&sbi->s_next_gen_lock); @@ -627,6 +630,7 @@ fail_drop: DQUOT_DROP(inode); inode->i_flags |= S_NOQUOTA; inode->i_nlink = 0; + unlock_new_inode(inode); iput(inode); brelse(bitmap_bh); return ERR_PTR(err); diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c index f8424ad8997..c4bdccf976b 100644 --- a/fs/ext3/inode.c +++ b/fs/ext3/inode.c @@ -37,6 +37,7 @@ #include <linux/uio.h> #include <linux/bio.h> #include <linux/fiemap.h> +#include <linux/namei.h> #include "xattr.h" #include "acl.h" @@ -2817,9 +2818,11 @@ struct inode *ext3_iget(struct super_block *sb, unsigned long ino) inode->i_op = &ext3_dir_inode_operations; inode->i_fop = &ext3_dir_operations; } else if (S_ISLNK(inode->i_mode)) { - if (ext3_inode_is_fast_symlink(inode)) + if (ext3_inode_is_fast_symlink(inode)) { inode->i_op = &ext3_fast_symlink_inode_operations; - else { + nd_terminate_link(ei->i_data, inode->i_size, + sizeof(ei->i_data) - 1); + } else { inode->i_op = &ext3_symlink_inode_operations; ext3_set_aops(inode); } diff --git a/fs/ext3/namei.c b/fs/ext3/namei.c index 3e5edc92aa0..297ea8dfac7 100644 --- a/fs/ext3/namei.c +++ b/fs/ext3/namei.c @@ -1652,9 +1652,11 @@ static int ext3_add_nondir(handle_t *handle, if (!err) { ext3_mark_inode_dirty(handle, inode); d_instantiate(dentry, inode); + unlock_new_inode(inode); return 0; } drop_nlink(inode); + unlock_new_inode(inode); iput(inode); return err; } @@ -1765,6 +1767,7 @@ retry: dir_block = ext3_bread (handle, inode, 0, 1, &err); if (!dir_block) { drop_nlink(inode); /* is this nlink == 0? */ + unlock_new_inode(inode); ext3_mark_inode_dirty(handle, inode); iput (inode); goto out_stop; @@ -1792,6 +1795,7 @@ retry: err = ext3_add_entry (handle, dentry, inode); if (err) { inode->i_nlink = 0; + unlock_new_inode(inode); ext3_mark_inode_dirty(handle, inode); iput (inode); goto out_stop; @@ -1800,6 +1804,7 @@ retry: ext3_update_dx_flag(dir); ext3_mark_inode_dirty(handle, dir); d_instantiate(dentry, inode); + unlock_new_inode(inode); out_stop: ext3_journal_stop(handle); if (err == -ENOSPC && ext3_should_retry_alloc(dir->i_sb, &retries)) @@ -2174,6 +2179,7 @@ retry: mapping_gfp_mask(inode->i_mapping) & ~__GFP_FS); if (err) { drop_nlink(inode); + unlock_new_inode(inode); ext3_mark_inode_dirty(handle, inode); iput (inode); goto out_stop; @@ -2221,7 +2227,14 @@ retry: inc_nlink(inode); atomic_inc(&inode->i_count); - err = ext3_add_nondir(handle, dentry, inode); + err = ext3_add_entry(handle, dentry, inode); + if (!err) { + ext3_mark_inode_dirty(handle, inode); + d_instantiate(dentry, inode); + } else { + drop_nlink(inode); + iput(inode); + } ext3_journal_stop(handle); if (err == -ENOSPC && ext3_should_retry_alloc(dir->i_sb, &retries)) goto retry; diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c index 08cac9fcace..6e6052879aa 100644 --- a/fs/ext4/ialloc.c +++ b/fs/ext4/ialloc.c @@ -826,7 +826,10 @@ got: ext4_set_inode_flags(inode); if (IS_DIRSYNC(inode)) handle->h_sync = 1; - insert_inode_hash(inode); + if (insert_inode_locked(inode) < 0) { + err = -EINVAL; + goto fail_drop; + } spin_lock(&sbi->s_next_gen_lock); inode->i_generation = sbi->s_next_generation++; spin_unlock(&sbi->s_next_gen_lock); @@ -881,6 +884,7 @@ fail_drop: DQUOT_DROP(inode); inode->i_flags |= S_NOQUOTA; inode->i_nlink = 0; + unlock_new_inode(inode); iput(inode); brelse(bitmap_bh); return ERR_PTR(err); diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index be21a5ae33c..7c3325e0b00 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -34,6 +34,7 @@ #include <linux/writeback.h> #include <linux/pagevec.h> #include <linux/mpage.h> +#include <linux/namei.h> #include <linux/uio.h> #include <linux/bio.h> #include "ext4_jbd2.h" @@ -4164,9 +4165,11 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino) inode->i_op = &ext4_dir_inode_operations; inode->i_fop = &ext4_dir_operations; } else if (S_ISLNK(inode->i_mode)) { - if (ext4_inode_is_fast_symlink(inode)) + if (ext4_inode_is_fast_symlink(inode)) { inode->i_op = &ext4_fast_symlink_inode_operations; - else { + nd_terminate_link(ei->i_data, inode->i_size, + sizeof(ei->i_data) - 1); + } else { inode->i_op = &ext4_symlink_inode_operations; ext4_set_aops(inode); } diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index 63adcb79298..da98a9012fa 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -1693,9 +1693,11 @@ static int ext4_add_nondir(handle_t *handle, if (!err) { ext4_mark_inode_dirty(handle, inode); d_instantiate(dentry, inode); + unlock_new_inode(inode); return 0; } drop_nlink(inode); + unlock_new_inode(inode); iput(inode); return err; } @@ -1830,6 +1832,7 @@ retry: if (err) { out_clear_inode: clear_nlink(inode); + unlock_new_inode(inode); ext4_mark_inode_dirty(handle, inode); iput(inode); goto out_stop; @@ -1838,6 +1841,7 @@ out_clear_inode: ext4_update_dx_flag(dir); ext4_mark_inode_dirty(handle, dir); d_instantiate(dentry, inode); + unlock_new_inode(inode); out_stop: ext4_journal_stop(handle); if (err == -ENOSPC && ext4_should_retry_alloc(dir->i_sb, &retries)) @@ -2212,6 +2216,7 @@ retry: mapping_gfp_mask(inode->i_mapping) & ~__GFP_FS); if (err) { clear_nlink(inode); + unlock_new_inode(inode); ext4_mark_inode_dirty(handle, inode); iput(inode); goto out_stop; @@ -2262,7 +2267,14 @@ retry: ext4_inc_count(handle, inode); atomic_inc(&inode->i_count); - err = ext4_add_nondir(handle, dentry, inode); + err = ext4_add_entry(handle, dentry, inode); + if (!err) { + ext4_mark_inode_dirty(handle, inode); + d_instantiate(dentry, inode); + } else { + drop_nlink(inode); + iput(inode); + } ext4_journal_stop(handle); if (err == -ENOSPC && ext4_should_retry_alloc(dir->i_sb, &retries)) goto retry; diff --git a/fs/fat/dir.c b/fs/fat/dir.c index 67e05835709..3a7f603b698 100644 --- a/fs/fat/dir.c +++ b/fs/fat/dir.c @@ -841,7 +841,6 @@ const struct file_operations fat_dir_operations = { .compat_ioctl = fat_compat_dir_ioctl, #endif .fsync = file_fsync, - .llseek = generic_file_llseek, }; static int fat_get_short_entry(struct inode *dir, loff_t *pos, diff --git a/fs/fat/inode.c b/fs/fat/inode.c index d937aaf7737..6b74d09adbe 100644 --- a/fs/fat/inode.c +++ b/fs/fat/inode.c @@ -749,6 +749,8 @@ static struct dentry *fat_get_parent(struct dentry *child) brelse(bh); parent = d_obtain_alias(inode); + if (!IS_ERR(parent)) + parent->d_op = sb->s_root->d_op; out: unlock_super(sb); diff --git a/fs/fat/namei_vfat.c b/fs/fat/namei_vfat.c index bf326d4356a..8ae32e37673 100644 --- a/fs/fat/namei_vfat.c +++ b/fs/fat/namei_vfat.c @@ -78,7 +78,7 @@ static int vfat_revalidate_ci(struct dentry *dentry, struct nameidata *nd) * for creation. */ if (!(nd->flags & (LOOKUP_CONTINUE | LOOKUP_PARENT))) { - if (nd->flags & LOOKUP_CREATE) + if (nd->flags & (LOOKUP_CREATE | LOOKUP_RENAME_TARGET)) return 0; } diff --git a/fs/file_table.c b/fs/file_table.c index 0fbcacc3ea7..bbeeac6efa1 100644 --- a/fs/file_table.c +++ b/fs/file_table.c @@ -32,6 +32,9 @@ struct files_stat_struct files_stat = { /* public. Not pretty! */ __cacheline_aligned_in_smp DEFINE_SPINLOCK(files_lock); +/* SLAB cache for file structures */ +static struct kmem_cache *filp_cachep __read_mostly; + static struct percpu_counter nr_files __cacheline_aligned_in_smp; static inline void file_free_rcu(struct rcu_head *head) @@ -397,7 +400,12 @@ too_bad: void __init files_init(unsigned long mempages) { int n; - /* One file with associated inode and dcache is very roughly 1K. + + filp_cachep = kmem_cache_create("filp", sizeof(struct file), 0, + SLAB_HWCACHE_ALIGN | SLAB_PANIC, NULL); + + /* + * One file with associated inode and dcache is very roughly 1K. * Per default don't use more than 10% of our memory for files. */ diff --git a/fs/freevxfs/vxfs_inode.c b/fs/freevxfs/vxfs_inode.c index 9f3f2ceb73f..03a6ea5e99f 100644 --- a/fs/freevxfs/vxfs_inode.c +++ b/fs/freevxfs/vxfs_inode.c @@ -325,8 +325,10 @@ vxfs_iget(struct super_block *sbp, ino_t ino) if (!VXFS_ISIMMED(vip)) { ip->i_op = &page_symlink_inode_operations; ip->i_mapping->a_ops = &vxfs_aops; - } else + } else { ip->i_op = &vxfs_immed_symlink_iops; + vip->vii_immed.vi_immed[ip->i_size] = '\0'; + } } else init_special_inode(ip, ip->i_mode, old_decode_dev(vip->vii_rdev)); diff --git a/fs/inode.c b/fs/inode.c index 098a2443196..7de1cda9248 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -1032,6 +1032,65 @@ struct inode *iget_locked(struct super_block *sb, unsigned long ino) EXPORT_SYMBOL(iget_locked); +int insert_inode_locked(struct inode *inode) +{ + struct super_block *sb = inode->i_sb; + ino_t ino = inode->i_ino; + struct hlist_head *head = inode_hashtable + hash(sb, ino); + struct inode *old; + + inode->i_state |= I_LOCK|I_NEW; + while (1) { + spin_lock(&inode_lock); + old = find_inode_fast(sb, head, ino); + if (likely(!old)) { + hlist_add_head(&inode->i_hash, head); + spin_unlock(&inode_lock); + return 0; + } + __iget(old); + spin_unlock(&inode_lock); + wait_on_inode(old); + if (unlikely(!hlist_unhashed(&old->i_hash))) { + iput(old); + return -EBUSY; + } + iput(old); + } +} + +EXPORT_SYMBOL(insert_inode_locked); + +int insert_inode_locked4(struct inode *inode, unsigned long hashval, + int (*test)(struct inode *, void *), void *data) +{ + struct super_block *sb = inode->i_sb; + struct hlist_head *head = inode_hashtable + hash(sb, hashval); + struct inode *old; + + inode->i_state |= I_LOCK|I_NEW; + + while (1) { + spin_lock(&inode_lock); + old = find_inode(sb, head, test, data); + if (likely(!old)) { + hlist_add_head(&inode->i_hash, head); + spin_unlock(&inode_lock); + return 0; + } + __iget(old); + spin_unlock(&inode_lock); + wait_on_inode(old); + if (unlikely(!hlist_unhashed(&old->i_hash))) { + iput(old); + return -EBUSY; + } + iput(old); + } +} + +EXPORT_SYMBOL(insert_inode_locked4); + /** * __insert_inode_hash - hash an inode * @inode: unhashed inode diff --git a/fs/jfs/jfs_inode.c b/fs/jfs/jfs_inode.c index 70022fd1c53..d4d142c2edd 100644 --- a/fs/jfs/jfs_inode.c +++ b/fs/jfs/jfs_inode.c @@ -79,7 +79,8 @@ struct inode *ialloc(struct inode *parent, umode_t mode) inode = new_inode(sb); if (!inode) { jfs_warn("ialloc: new_inode returned NULL!"); - return ERR_PTR(-ENOMEM); + rc = -ENOMEM; + goto fail; } jfs_inode = JFS_IP(inode); @@ -89,8 +90,12 @@ struct inode *ialloc(struct inode *parent, umode_t mode) jfs_warn("ialloc: diAlloc returned %d!", rc); if (rc == -EIO) make_bad_inode(inode); - iput(inode); - return ERR_PTR(rc); + goto fail_put; + } + + if (insert_inode_locked(inode) < 0) { + rc = -EINVAL; + goto fail_unlock; } inode->i_uid = current_fsuid(); @@ -112,11 +117,8 @@ struct inode *ialloc(struct inode *parent, umode_t mode) * Allocate inode to quota. */ if (DQUOT_ALLOC_INODE(inode)) { - DQUOT_DROP(inode); - inode->i_flags |= S_NOQUOTA; - inode->i_nlink = 0; - iput(inode); - return ERR_PTR(-EDQUOT); + rc = -EDQUOT; + goto fail_drop; } inode->i_mode = mode; @@ -158,4 +160,15 @@ struct inode *ialloc(struct inode *parent, umode_t mode) jfs_info("ialloc returns inode = 0x%p\n", inode); return inode; + +fail_drop: + DQUOT_DROP(inode); + inode->i_flags |= S_NOQUOTA; +fail_unlock: + inode->i_nlink = 0; + unlock_new_inode(inode); +fail_put: + iput(inode); +fail: + return ERR_PTR(rc); } diff --git a/fs/jfs/namei.c b/fs/jfs/namei.c index cc3cedffbfa..b4de56b851e 100644 --- a/fs/jfs/namei.c +++ b/fs/jfs/namei.c @@ -155,7 +155,6 @@ static int jfs_create(struct inode *dip, struct dentry *dentry, int mode, ip->i_fop = &jfs_file_operations; ip->i_mapping->a_ops = &jfs_aops; - insert_inode_hash(ip); mark_inode_dirty(ip); dip->i_ctime = dip->i_mtime = CURRENT_TIME; @@ -171,9 +170,12 @@ static int jfs_create(struct inode *dip, struct dentry *dentry, int mode, if (rc) { free_ea_wmap(ip); ip->i_nlink = 0; + unlock_new_inode(ip); iput(ip); - } else + } else { d_instantiate(dentry, ip); + unlock_new_inode(ip); + } out2: free_UCSname(&dname); @@ -289,7 +291,6 @@ static int jfs_mkdir(struct inode *dip, struct dentry *dentry, int mode) ip->i_op = &jfs_dir_inode_operations; ip->i_fop = &jfs_dir_operations; - insert_inode_hash(ip); mark_inode_dirty(ip); /* update parent directory inode */ @@ -306,9 +307,12 @@ static int jfs_mkdir(struct inode *dip, struct dentry *dentry, int mode) if (rc) { free_ea_wmap(ip); ip->i_nlink = 0; + unlock_new_inode(ip); iput(ip); - } else + } else { d_instantiate(dentry, ip); + unlock_new_inode(ip); + } out2: free_UCSname(&dname); @@ -1019,7 +1023,6 @@ static int jfs_symlink(struct inode *dip, struct dentry *dentry, goto out3; } - insert_inode_hash(ip); mark_inode_dirty(ip); dip->i_ctime = dip->i_mtime = CURRENT_TIME; @@ -1039,9 +1042,12 @@ static int jfs_symlink(struct inode *dip, struct dentry *dentry, if (rc) { free_ea_wmap(ip); ip->i_nlink = 0; + unlock_new_inode(ip); iput(ip); - } else + } else { d_instantiate(dentry, ip); + unlock_new_inode(ip); + } out2: free_UCSname(&dname); @@ -1399,7 +1405,6 @@ static int jfs_mknod(struct inode *dir, struct dentry *dentry, jfs_ip->dev = new_encode_dev(rdev); init_special_inode(ip, ip->i_mode, rdev); - insert_inode_hash(ip); mark_inode_dirty(ip); dir->i_ctime = dir->i_mtime = CURRENT_TIME; @@ -1417,9 +1422,12 @@ static int jfs_mknod(struct inode *dir, struct dentry *dentry, if (rc) { free_ea_wmap(ip); ip->i_nlink = 0; + unlock_new_inode(ip); iput(ip); - } else + } else { d_instantiate(dentry, ip); + unlock_new_inode(ip); + } out1: free_UCSname(&dname); diff --git a/fs/namei.c b/fs/namei.c index af3783fff1d..dd5c9f0bf82 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -226,6 +226,16 @@ int generic_permission(struct inode *inode, int mask, return -EACCES; } +/** + * inode_permission - check for access rights to a given inode + * @inode: inode to check permission on + * @mask: right to check for (%MAY_READ, %MAY_WRITE, %MAY_EXEC) + * + * Used to check for read/write/execute permissions on an inode. + * We use "fsuid" for this, letting us set arbitrary permissions + * for filesystem access without changing the "normal" uids which + * are used for other things. + */ int inode_permission(struct inode *inode, int mask) { int retval; @@ -247,7 +257,6 @@ int inode_permission(struct inode *inode, int mask) return -EACCES; } - /* Ordinary permission routines do not understand MAY_APPEND. */ if (inode->i_op && inode->i_op->permission) retval = inode->i_op->permission(inode, mask); else @@ -265,21 +274,6 @@ int inode_permission(struct inode *inode, int mask) } /** - * vfs_permission - check for access rights to a given path - * @nd: lookup result that describes the path - * @mask: right to check for (%MAY_READ, %MAY_WRITE, %MAY_EXEC) - * - * Used to check for read/write/execute permissions on a path. - * We use "fsuid" for this, letting us set arbitrary permissions - * for filesystem access without changing the "normal" uids which - * are used for other things. - */ -int vfs_permission(struct nameidata *nd, int mask) -{ - return inode_permission(nd->path.dentry->d_inode, mask); -} - -/** * file_permission - check for additional access rights to a given file * @file: file to check access rights for * @mask: right to check for (%MAY_READ, %MAY_WRITE, %MAY_EXEC) @@ -289,7 +283,7 @@ int vfs_permission(struct nameidata *nd, int mask) * * Note: * Do not use this function in new code. All access checks should - * be done using vfs_permission(). + * be done using inode_permission(). */ int file_permission(struct file *file, int mask) { @@ -527,18 +521,6 @@ out_unlock: return result; } -/* SMP-safe */ -static __always_inline void -walk_init_root(const char *name, struct nameidata *nd) -{ - struct fs_struct *fs = current->fs; - - read_lock(&fs->lock); - nd->path = fs->root; - path_get(&fs->root); - read_unlock(&fs->lock); -} - /* * Wrapper to retry pathname resolution whenever the underlying * file system returns an ESTALE. @@ -576,9 +558,16 @@ static __always_inline int __vfs_follow_link(struct nameidata *nd, const char *l goto fail; if (*link == '/') { + struct fs_struct *fs = current->fs; + path_put(&nd->path); - walk_init_root(link, nd); + + read_lock(&fs->lock); + nd->path = fs->root; + path_get(&fs->root); + read_unlock(&fs->lock); } + res = link_path_walk(link, nd); if (nd->depth || res || nd->last_type!=LAST_NORM) return res; @@ -859,7 +848,8 @@ static int __link_path_walk(const char *name, struct nameidata *nd) nd->flags |= LOOKUP_CONTINUE; err = exec_permission_lite(inode); if (err == -EAGAIN) - err = vfs_permission(nd, MAY_EXEC); + err = inode_permission(nd->path.dentry->d_inode, + MAY_EXEC); if (err) break; @@ -1493,9 +1483,9 @@ int vfs_create(struct inode *dir, struct dentry *dentry, int mode, return error; } -int may_open(struct nameidata *nd, int acc_mode, int flag) +int may_open(struct path *path, int acc_mode, int flag) { - struct dentry *dentry = nd->path.dentry; + struct dentry *dentry = path->dentry; struct inode *inode = dentry->d_inode; int error; @@ -1516,13 +1506,13 @@ int may_open(struct nameidata *nd, int acc_mode, int flag) if (S_ISFIFO(inode->i_mode) || S_ISSOCK(inode->i_mode)) { flag &= ~O_TRUNC; } else if (S_ISBLK(inode->i_mode) || S_ISCHR(inode->i_mode)) { - if (nd->path.mnt->mnt_flags & MNT_NODEV) + if (path->mnt->mnt_flags & MNT_NODEV) return -EACCES; flag &= ~O_TRUNC; } - error = vfs_permission(nd, acc_mode); + error = inode_permission(inode, acc_mode); if (error) return error; /* @@ -1556,6 +1546,9 @@ int may_open(struct nameidata *nd, int acc_mode, int flag) * Refuse to truncate files with mandatory locks held on them. */ error = locks_verify_locked(inode); + if (!error) + error = security_path_truncate(path, 0, + ATTR_MTIME|ATTR_CTIME|ATTR_OPEN); if (!error) { DQUOT_INIT(inode); @@ -1586,14 +1579,18 @@ static int __open_namei_create(struct nameidata *nd, struct path *path, if (!IS_POSIXACL(dir->d_inode)) mode &= ~current->fs->umask; + error = security_path_mknod(&nd->path, path->dentry, mode, 0); + if (error) + goto out_unlock; error = vfs_create(dir->d_inode, path->dentry, mode, nd); +out_unlock: mutex_unlock(&dir->d_inode->i_mutex); dput(nd->path.dentry); nd->path.dentry = path->dentry; if (error) return error; /* Don't check for write permission, don't truncate */ - return may_open(nd, 0, flag & ~O_TRUNC); + return may_open(&nd->path, 0, flag & ~O_TRUNC); } /* @@ -1779,7 +1776,7 @@ ok: if (error) goto exit; } - error = may_open(&nd, acc_mode, flag); + error = may_open(&nd.path, acc_mode, flag); if (error) { if (will_write) mnt_drop_write(nd.path.mnt); @@ -1999,6 +1996,9 @@ asmlinkage long sys_mknodat(int dfd, const char __user *filename, int mode, error = mnt_want_write(nd.path.mnt); if (error) goto out_dput; + error = security_path_mknod(&nd.path, dentry, mode, dev); + if (error) + goto out_drop_write; switch (mode & S_IFMT) { case 0: case S_IFREG: error = vfs_create(nd.path.dentry->d_inode,dentry,mode,&nd); @@ -2011,6 +2011,7 @@ asmlinkage long sys_mknodat(int dfd, const char __user *filename, int mode, error = vfs_mknod(nd.path.dentry->d_inode,dentry,mode,0); break; } +out_drop_write: mnt_drop_write(nd.path.mnt); out_dput: dput(dentry); @@ -2070,7 +2071,11 @@ asmlinkage long sys_mkdirat(int dfd, const char __user *pathname, int mode) error = mnt_want_write(nd.path.mnt); if (error) goto out_dput; + error = security_path_mkdir(&nd.path, dentry, mode); + if (error) + goto out_drop_write; error = vfs_mkdir(nd.path.dentry->d_inode, dentry, mode); +out_drop_write: mnt_drop_write(nd.path.mnt); out_dput: dput(dentry); @@ -2180,7 +2185,11 @@ static long do_rmdir(int dfd, const char __user *pathname) error = mnt_want_write(nd.path.mnt); if (error) goto exit3; + error = security_path_rmdir(&nd.path, dentry); + if (error) + goto exit4; error = vfs_rmdir(nd.path.dentry->d_inode, dentry); +exit4: mnt_drop_write(nd.path.mnt); exit3: dput(dentry); @@ -2265,7 +2274,11 @@ static long do_unlinkat(int dfd, const char __user *pathname) error = mnt_want_write(nd.path.mnt); if (error) goto exit2; + error = security_path_unlink(&nd.path, dentry); + if (error) + goto exit3; error = vfs_unlink(nd.path.dentry->d_inode, dentry); +exit3: mnt_drop_write(nd.path.mnt); exit2: dput(dentry); @@ -2346,7 +2359,11 @@ asmlinkage long sys_symlinkat(const char __user *oldname, error = mnt_want_write(nd.path.mnt); if (error) goto out_dput; + error = security_path_symlink(&nd.path, dentry, from); + if (error) + goto out_drop_write; error = vfs_symlink(nd.path.dentry->d_inode, dentry, from); +out_drop_write: mnt_drop_write(nd.path.mnt); out_dput: dput(dentry); @@ -2443,7 +2460,11 @@ asmlinkage long sys_linkat(int olddfd, const char __user *oldname, error = mnt_want_write(nd.path.mnt); if (error) goto out_dput; + error = security_path_link(old_path.dentry, &nd.path, new_dentry); + if (error) + goto out_drop_write; error = vfs_link(old_path.dentry, nd.path.dentry->d_inode, new_dentry); +out_drop_write: mnt_drop_write(nd.path.mnt); out_dput: dput(new_dentry); @@ -2679,8 +2700,13 @@ asmlinkage long sys_renameat(int olddfd, const char __user *oldname, error = mnt_want_write(oldnd.path.mnt); if (error) goto exit5; + error = security_path_rename(&oldnd.path, old_dentry, + &newnd.path, new_dentry); + if (error) + goto exit6; error = vfs_rename(old_dir->d_inode, old_dentry, new_dir->d_inode, new_dentry); +exit6: mnt_drop_write(oldnd.path.mnt); exit5: dput(new_dentry); @@ -2750,13 +2776,16 @@ int vfs_follow_link(struct nameidata *nd, const char *link) /* get the link contents into pagecache */ static char *page_getlink(struct dentry * dentry, struct page **ppage) { - struct page * page; + char *kaddr; + struct page *page; struct address_space *mapping = dentry->d_inode->i_mapping; page = read_mapping_page(mapping, 0, NULL); if (IS_ERR(page)) return (char*)page; *ppage = page; - return kmap(page); + kaddr = kmap(page); + nd_terminate_link(kaddr, dentry->d_inode->i_size, PAGE_SIZE - 1); + return kaddr; } int page_readlink(struct dentry *dentry, char __user *buffer, int buflen) @@ -2849,7 +2878,6 @@ EXPORT_SYMBOL(path_lookup); EXPORT_SYMBOL(kern_path); EXPORT_SYMBOL(vfs_path_lookup); EXPORT_SYMBOL(inode_permission); -EXPORT_SYMBOL(vfs_permission); EXPORT_SYMBOL(file_permission); EXPORT_SYMBOL(unlock_rename); EXPORT_SYMBOL(vfs_create); @@ -2865,3 +2893,10 @@ EXPORT_SYMBOL(vfs_symlink); EXPORT_SYMBOL(vfs_unlink); EXPORT_SYMBOL(dentry_unhash); EXPORT_SYMBOL(generic_readlink); + +/* to be mentioned only in INIT_TASK */ +struct fs_struct init_fs = { + .count = ATOMIC_INIT(1), + .lock = __RW_LOCK_UNLOCKED(init_fs.lock), + .umask = 0022, +}; diff --git a/fs/namespace.c b/fs/namespace.c index 1c09cab8f7c..a40685d800a 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -1990,7 +1990,7 @@ static struct mnt_namespace *dup_mnt_ns(struct mnt_namespace *mnt_ns, if (!new_ns->root) { up_write(&namespace_sem); kfree(new_ns); - return ERR_PTR(-ENOMEM);; + return ERR_PTR(-ENOMEM); } spin_lock(&vfsmount_lock); list_add_tail(&new_ns->list, &new_ns->root->mnt_list); diff --git a/fs/nfsctl.c b/fs/nfsctl.c index b1acbd6ab6f..b27451909df 100644 --- a/fs/nfsctl.c +++ b/fs/nfsctl.c @@ -38,9 +38,10 @@ static struct file *do_open(char *name, int flags) return ERR_PTR(error); if (flags == O_RDWR) - error = may_open(&nd,MAY_READ|MAY_WRITE,FMODE_READ|FMODE_WRITE); + error = may_open(&nd.path, MAY_READ|MAY_WRITE, + FMODE_READ|FMODE_WRITE); else - error = may_open(&nd, MAY_WRITE, FMODE_WRITE); + error = may_open(&nd.path, MAY_WRITE, FMODE_WRITE); if (!error) return dentry_open(nd.path.dentry, nd.path.mnt, flags, diff --git a/fs/notify/Kconfig b/fs/notify/Kconfig new file mode 100644 index 00000000000..50914d7303c --- /dev/null +++ b/fs/notify/Kconfig @@ -0,0 +1,2 @@ +source "fs/notify/dnotify/Kconfig" +source "fs/notify/inotify/Kconfig" diff --git a/fs/notify/Makefile b/fs/notify/Makefile new file mode 100644 index 00000000000..5a95b6010ce --- /dev/null +++ b/fs/notify/Makefile @@ -0,0 +1,2 @@ +obj-y += dnotify/ +obj-y += inotify/ diff --git a/fs/notify/dnotify/Kconfig b/fs/notify/dnotify/Kconfig new file mode 100644 index 00000000000..26adf5dfa64 --- /dev/null +++ b/fs/notify/dnotify/Kconfig @@ -0,0 +1,10 @@ +config DNOTIFY + bool "Dnotify support" + default y + help + Dnotify is a directory-based per-fd file change notification system + that uses signals to communicate events to user-space. There exist + superior alternatives, but some applications may still rely on + dnotify. + + If unsure, say Y. diff --git a/fs/notify/dnotify/Makefile b/fs/notify/dnotify/Makefile new file mode 100644 index 00000000000..f145251dcad --- /dev/null +++ b/fs/notify/dnotify/Makefile @@ -0,0 +1 @@ +obj-$(CONFIG_DNOTIFY) += dnotify.o diff --git a/fs/dnotify.c b/fs/notify/dnotify/dnotify.c index 676073b8dda..b0aa2cde80b 100644 --- a/fs/dnotify.c +++ b/fs/notify/dnotify/dnotify.c @@ -115,9 +115,6 @@ int fcntl_dirnotify(int fd, struct file *filp, unsigned long arg) dn->dn_next = inode->i_dnotify; inode->i_dnotify = dn; spin_unlock(&inode->i_lock); - - if (filp->f_op && filp->f_op->dir_notify) - return filp->f_op->dir_notify(filp, arg); return 0; out_free: diff --git a/fs/notify/inotify/Kconfig b/fs/notify/inotify/Kconfig new file mode 100644 index 00000000000..44679284102 --- /dev/null +++ b/fs/notify/inotify/Kconfig @@ -0,0 +1,27 @@ +config INOTIFY + bool "Inotify file change notification support" + default y + ---help--- + Say Y here to enable inotify support. Inotify is a file change + notification system and a replacement for dnotify. Inotify fixes + numerous shortcomings in dnotify and introduces several new features + including multiple file events, one-shot support, and unmount + notification. + + For more information, see <file:Documentation/filesystems/inotify.txt> + + If unsure, say Y. + +config INOTIFY_USER + bool "Inotify support for userspace" + depends on INOTIFY + default y + ---help--- + Say Y here to enable inotify support for userspace, including the + associated system calls. Inotify allows monitoring of both files and + directories via a single open fd. Events are read from the file + descriptor, which is also select()- and poll()-able. + + For more information, see <file:Documentation/filesystems/inotify.txt> + + If unsure, say Y. diff --git a/fs/notify/inotify/Makefile b/fs/notify/inotify/Makefile new file mode 100644 index 00000000000..e290f3bb9d8 --- /dev/null +++ b/fs/notify/inotify/Makefile @@ -0,0 +1,2 @@ +obj-$(CONFIG_INOTIFY) += inotify.o +obj-$(CONFIG_INOTIFY_USER) += inotify_user.o diff --git a/fs/inotify.c b/fs/notify/inotify/inotify.c index dae3f28f30d..dae3f28f30d 100644 --- a/fs/inotify.c +++ b/fs/notify/inotify/inotify.c diff --git a/fs/inotify_user.c b/fs/notify/inotify/inotify_user.c index e2425bbd871..400f8064a54 100644 --- a/fs/inotify_user.c +++ b/fs/notify/inotify/inotify_user.c @@ -76,10 +76,10 @@ struct inotify_device { struct mutex ev_mutex; /* protects event queue */ struct mutex up_mutex; /* synchronizes watch updates */ struct list_head events; /* list of queued events */ - atomic_t count; /* reference count */ struct user_struct *user; /* user who opened this dev */ struct inotify_handle *ih; /* inotify handle */ struct fasync_struct *fa; /* async notification */ + atomic_t count; /* reference count */ unsigned int queue_size; /* size of the queue (bytes) */ unsigned int event_count; /* number of pending events */ unsigned int max_events; /* maximum number of events */ diff --git a/fs/open.c b/fs/open.c index c0a426d5766..1cd7d40e999 100644 --- a/fs/open.c +++ b/fs/open.c @@ -272,6 +272,8 @@ static long do_sys_truncate(const char __user *pathname, loff_t length) goto put_write_and_out; error = locks_verify_truncate(inode, NULL, length); + if (!error) + error = security_path_truncate(&path, length, 0); if (!error) { DQUOT_INIT(inode); error = do_truncate(path.dentry, length, 0, NULL); @@ -329,6 +331,9 @@ static long do_sys_ftruncate(unsigned int fd, loff_t length, int small) error = locks_verify_truncate(inode, file, length); if (!error) + error = security_path_truncate(&file->f_path, length, + ATTR_MTIME|ATTR_CTIME); + if (!error) error = do_truncate(dentry, length, ATTR_MTIME|ATTR_CTIME, file); out_putf: fput(file); diff --git a/fs/proc/stat.c b/fs/proc/stat.c index 3bb1cf1e742..f75efa22df5 100644 --- a/fs/proc/stat.c +++ b/fs/proc/stat.c @@ -9,6 +9,7 @@ #include <linux/seq_file.h> #include <linux/slab.h> #include <linux/time.h> +#include <linux/irqnr.h> #include <asm/cputime.h> #ifndef arch_irq_stat_cpu @@ -45,10 +46,6 @@ static int show_stat(struct seq_file *p, void *v) steal = cputime64_add(steal, kstat_cpu(i).cpustat.steal); guest = cputime64_add(guest, kstat_cpu(i).cpustat.guest); for_each_irq_nr(j) { -#ifdef CONFIG_SPARSE_IRQ - if (!irq_to_desc(j)) - continue; -#endif sum += kstat_irqs_cpu(j, i); } sum += arch_irq_stat_cpu(i); @@ -95,12 +92,6 @@ static int show_stat(struct seq_file *p, void *v) /* sum again ? it could be updated? */ for_each_irq_nr(j) { per_irq_sum = 0; -#ifdef CONFIG_SPARSE_IRQ - if (!irq_to_desc(j)) { - seq_printf(p, " %u", per_irq_sum); - continue; - } -#endif for_each_possible_cpu(i) per_irq_sum += kstat_irqs_cpu(j, i); diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c index 6c4c2c69449..145c2d3e5e0 100644 --- a/fs/reiserfs/inode.c +++ b/fs/reiserfs/inode.c @@ -1753,6 +1753,7 @@ int reiserfs_new_inode(struct reiserfs_transaction_handle *th, struct inode *inode) { struct super_block *sb; + struct reiserfs_iget_args args; INITIALIZE_PATH(path_to_key); struct cpu_key key; struct item_head ih; @@ -1780,6 +1781,14 @@ int reiserfs_new_inode(struct reiserfs_transaction_handle *th, err = -ENOMEM; goto out_bad_inode; } + args.objectid = inode->i_ino = le32_to_cpu(ih.ih_key.k_objectid); + memcpy(INODE_PKEY(inode), &(ih.ih_key), KEY_SIZE); + args.dirid = le32_to_cpu(ih.ih_key.k_dir_id); + if (insert_inode_locked4(inode, args.objectid, + reiserfs_find_actor, &args) < 0) { + err = -EINVAL; + goto out_bad_inode; + } if (old_format_only(sb)) /* not a perfect generation count, as object ids can be reused, but ** this is as good as reiserfs can do right now. @@ -1859,13 +1868,9 @@ int reiserfs_new_inode(struct reiserfs_transaction_handle *th, } else { inode2sd(&sd, inode, inode->i_size); } - // these do not go to on-disk stat data - inode->i_ino = le32_to_cpu(ih.ih_key.k_objectid); - // store in in-core inode the key of stat data and version all // object items will have (directory items will have old offset // format, other new objects will consist of new items) - memcpy(INODE_PKEY(inode), &(ih.ih_key), KEY_SIZE); if (old_format_only(sb) || S_ISDIR(mode) || S_ISLNK(mode)) set_inode_item_key_version(inode, KEY_FORMAT_3_5); else @@ -1929,7 +1934,6 @@ int reiserfs_new_inode(struct reiserfs_transaction_handle *th, reiserfs_mark_inode_private(inode); } - insert_inode_hash(inode); reiserfs_update_sd(th, inode); reiserfs_check_path(&path_to_key); @@ -1956,6 +1960,7 @@ int reiserfs_new_inode(struct reiserfs_transaction_handle *th, out_inserted_sd: inode->i_nlink = 0; th->t_trans_id = 0; /* so the caller can't use this handle later */ + unlock_new_inode(inode); /* OK to do even if we hadn't locked it */ /* If we were inheriting an ACL, we need to release the lock so that * iput doesn't deadlock in reiserfs_delete_xattrs. The locking diff --git a/fs/reiserfs/namei.c b/fs/reiserfs/namei.c index 4f322e5ed84..738967f6c8e 100644 --- a/fs/reiserfs/namei.c +++ b/fs/reiserfs/namei.c @@ -646,6 +646,7 @@ static int reiserfs_create(struct inode *dir, struct dentry *dentry, int mode, err = journal_end(&th, dir->i_sb, jbegin_count); if (err) retval = err; + unlock_new_inode(inode); iput(inode); goto out_failed; } @@ -653,6 +654,7 @@ static int reiserfs_create(struct inode *dir, struct dentry *dentry, int mode, reiserfs_update_inode_transaction(dir); d_instantiate(dentry, inode); + unlock_new_inode(inode); retval = journal_end(&th, dir->i_sb, jbegin_count); out_failed: @@ -727,11 +729,13 @@ static int reiserfs_mknod(struct inode *dir, struct dentry *dentry, int mode, err = journal_end(&th, dir->i_sb, jbegin_count); if (err) retval = err; + unlock_new_inode(inode); iput(inode); goto out_failed; } d_instantiate(dentry, inode); + unlock_new_inode(inode); retval = journal_end(&th, dir->i_sb, jbegin_count); out_failed: @@ -812,6 +816,7 @@ static int reiserfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) err = journal_end(&th, dir->i_sb, jbegin_count); if (err) retval = err; + unlock_new_inode(inode); iput(inode); goto out_failed; } @@ -819,6 +824,7 @@ static int reiserfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) reiserfs_update_sd(&th, dir); d_instantiate(dentry, inode); + unlock_new_inode(inode); retval = journal_end(&th, dir->i_sb, jbegin_count); out_failed: if (locked) @@ -1096,11 +1102,13 @@ static int reiserfs_symlink(struct inode *parent_dir, err = journal_end(&th, parent_dir->i_sb, jbegin_count); if (err) retval = err; + unlock_new_inode(inode); iput(inode); goto out_failed; } d_instantiate(dentry, inode); + unlock_new_inode(inode); retval = journal_end(&th, parent_dir->i_sb, jbegin_count); out_failed: reiserfs_write_unlock(parent_dir->i_sb); diff --git a/fs/seq_file.c b/fs/seq_file.c index c99358a5217..b569ff1c4dc 100644 --- a/fs/seq_file.c +++ b/fs/seq_file.c @@ -389,8 +389,14 @@ char *mangle_path(char *s, char *p, char *esc) } EXPORT_SYMBOL(mangle_path); -/* - * return the absolute path of 'dentry' residing in mount 'mnt'. +/** + * seq_path - seq_file interface to print a pathname + * @m: the seq_file handle + * @path: the struct path to print + * @esc: set of characters to escape in the output + * + * return the absolute path of 'path', as represented by the + * dentry / mnt pair in the path parameter. */ int seq_path(struct seq_file *m, struct path *path, char *esc) { diff --git a/fs/sysv/inode.c b/fs/sysv/inode.c index df0d435baa4..3d81bf58dae 100644 --- a/fs/sysv/inode.c +++ b/fs/sysv/inode.c @@ -27,6 +27,7 @@ #include <linux/init.h> #include <linux/buffer_head.h> #include <linux/vfs.h> +#include <linux/namei.h> #include <asm/byteorder.h> #include "sysv.h" @@ -163,8 +164,11 @@ void sysv_set_inode(struct inode *inode, dev_t rdev) if (inode->i_blocks) { inode->i_op = &sysv_symlink_inode_operations; inode->i_mapping->a_ops = &sysv_aops; - } else + } else { inode->i_op = &sysv_fast_symlink_inode_operations; + nd_terminate_link(SYSV_I(inode)->i_data, inode->i_size, + sizeof(SYSV_I(inode)->i_data) - 1); + } } else init_special_inode(inode, inode->i_mode, rdev); } diff --git a/fs/ubifs/budget.c b/fs/ubifs/budget.c index 4a18f084cc4..0e5e54d8292 100644 --- a/fs/ubifs/budget.c +++ b/fs/ubifs/budget.c @@ -32,18 +32,15 @@ #include "ubifs.h" #include <linux/writeback.h> -#include <asm/div64.h> +#include <linux/math64.h> /* * When pessimistic budget calculations say that there is no enough space, * UBIFS starts writing back dirty inodes and pages, doing garbage collection, - * or committing. The below constants define maximum number of times UBIFS + * or committing. The below constant defines maximum number of times UBIFS * repeats the operations. */ -#define MAX_SHRINK_RETRIES 8 -#define MAX_GC_RETRIES 4 -#define MAX_CMT_RETRIES 2 -#define MAX_NOSPC_RETRIES 1 +#define MAX_MKSPC_RETRIES 3 /* * The below constant defines amount of dirty pages which should be written @@ -52,30 +49,6 @@ #define NR_TO_WRITE 16 /** - * struct retries_info - information about re-tries while making free space. - * @prev_liability: previous liability - * @shrink_cnt: how many times the liability was shrinked - * @shrink_retries: count of liability shrink re-tries (increased when - * liability does not shrink) - * @try_gc: GC should be tried first - * @gc_retries: how many times GC was run - * @cmt_retries: how many times commit has been done - * @nospc_retries: how many times GC returned %-ENOSPC - * - * Since we consider budgeting to be the fast-path, and this structure has to - * be allocated on stack and zeroed out, we make it smaller using bit-fields. - */ -struct retries_info { - long long prev_liability; - unsigned int shrink_cnt; - unsigned int shrink_retries:5; - unsigned int try_gc:1; - unsigned int gc_retries:4; - unsigned int cmt_retries:3; - unsigned int nospc_retries:1; -}; - -/** * shrink_liability - write-back some dirty pages/inodes. * @c: UBIFS file-system description object * @nr_to_write: how many dirty pages to write-back @@ -147,9 +120,25 @@ static int run_gc(struct ubifs_info *c) } /** + * get_liability - calculate current liability. + * @c: UBIFS file-system description object + * + * This function calculates and returns current UBIFS liability, i.e. the + * amount of bytes UBIFS has "promised" to write to the media. + */ +static long long get_liability(struct ubifs_info *c) +{ + long long liab; + + spin_lock(&c->space_lock); + liab = c->budg_idx_growth + c->budg_data_growth + c->budg_dd_growth; + spin_unlock(&c->space_lock); + return liab; +} + +/** * make_free_space - make more free space on the file-system. * @c: UBIFS file-system description object - * @ri: information about previous invocations of this function * * This function is called when an operation cannot be budgeted because there * is supposedly no free space. But in most cases there is some free space: @@ -165,87 +154,42 @@ static int run_gc(struct ubifs_info *c) * Returns %-ENOSPC if it couldn't do more free space, and other negative error * codes on failures. */ -static int make_free_space(struct ubifs_info *c, struct retries_info *ri) +static int make_free_space(struct ubifs_info *c) { - int err; - - /* - * If we have some dirty pages and inodes (liability), try to write - * them back unless this was tried too many times without effect - * already. - */ - if (ri->shrink_retries < MAX_SHRINK_RETRIES && !ri->try_gc) { - long long liability; - - spin_lock(&c->space_lock); - liability = c->budg_idx_growth + c->budg_data_growth + - c->budg_dd_growth; - spin_unlock(&c->space_lock); + int err, retries = 0; + long long liab1, liab2; - if (ri->prev_liability >= liability) { - /* Liability does not shrink, next time try GC then */ - ri->shrink_retries += 1; - if (ri->gc_retries < MAX_GC_RETRIES) - ri->try_gc = 1; - dbg_budg("liability did not shrink: retries %d of %d", - ri->shrink_retries, MAX_SHRINK_RETRIES); - } + do { + liab1 = get_liability(c); + /* + * We probably have some dirty pages or inodes (liability), try + * to write them back. + */ + dbg_budg("liability %lld, run write-back", liab1); + shrink_liability(c, NR_TO_WRITE); - dbg_budg("force write-back (count %d)", ri->shrink_cnt); - shrink_liability(c, NR_TO_WRITE + ri->shrink_cnt); + liab2 = get_liability(c); + if (liab2 < liab1) + return -EAGAIN; - ri->prev_liability = liability; - ri->shrink_cnt += 1; - return -EAGAIN; - } + dbg_budg("new liability %lld (not shrinked)", liab2); - /* - * Try to run garbage collector unless it was already tried too many - * times. - */ - if (ri->gc_retries < MAX_GC_RETRIES) { - ri->gc_retries += 1; - dbg_budg("run GC, retries %d of %d", - ri->gc_retries, MAX_GC_RETRIES); - - ri->try_gc = 0; + /* Liability did not shrink again, try GC */ + dbg_budg("Run GC"); err = run_gc(c); if (!err) return -EAGAIN; - if (err == -EAGAIN) { - dbg_budg("GC asked to commit"); - err = ubifs_run_commit(c); - if (err) - return err; - return -EAGAIN; - } - - if (err != -ENOSPC) - return err; - - /* - * GC could not make any progress. If this is the first time, - * then it makes sense to try to commit, because it might make - * some dirty space. - */ - dbg_budg("GC returned -ENOSPC, retries %d", - ri->nospc_retries); - if (ri->nospc_retries >= MAX_NOSPC_RETRIES) + if (err != -EAGAIN && err != -ENOSPC) + /* Some real error happened */ return err; - ri->nospc_retries += 1; - } - /* Neither GC nor write-back helped, try to commit */ - if (ri->cmt_retries < MAX_CMT_RETRIES) { - ri->cmt_retries += 1; - dbg_budg("run commit, retries %d of %d", - ri->cmt_retries, MAX_CMT_RETRIES); + dbg_budg("Run commit (retries %d)", retries); err = ubifs_run_commit(c); if (err) return err; - return -EAGAIN; - } + } while (retries++ < MAX_MKSPC_RETRIES); + return -ENOSPC; } @@ -258,8 +202,8 @@ static int make_free_space(struct ubifs_info *c, struct retries_info *ri) */ int ubifs_calc_min_idx_lebs(struct ubifs_info *c) { - int ret; - uint64_t idx_size; + int idx_lebs, eff_leb_size = c->leb_size - c->max_idx_node_sz; + long long idx_size; idx_size = c->old_idx_sz + c->budg_idx_growth + c->budg_uncommitted_idx; @@ -271,23 +215,16 @@ int ubifs_calc_min_idx_lebs(struct ubifs_info *c) * pair, nor similarly the two variables for the new index size, so we * have to do this costly 64-bit division on fast-path. */ - if (do_div(idx_size, c->leb_size - c->max_idx_node_sz)) - ret = idx_size + 1; - else - ret = idx_size; + idx_size += eff_leb_size - 1; + idx_lebs = div_u64(idx_size, eff_leb_size); /* * The index head is not available for the in-the-gaps method, so add an * extra LEB to compensate. */ - ret += 1; - /* - * At present the index needs at least 2 LEBs: one for the index head - * and one for in-the-gaps method (which currently does not cater for - * the index head and so excludes it from consideration). - */ - if (ret < 2) - ret = 2; - return ret; + idx_lebs += 1; + if (idx_lebs < MIN_INDEX_LEBS) + idx_lebs = MIN_INDEX_LEBS; + return idx_lebs; } /** @@ -530,8 +467,7 @@ static int calc_dd_growth(const struct ubifs_info *c, int ubifs_budget_space(struct ubifs_info *c, struct ubifs_budget_req *req) { int uninitialized_var(cmt_retries), uninitialized_var(wb_retries); - int err, idx_growth, data_growth, dd_growth; - struct retries_info ri; + int err, idx_growth, data_growth, dd_growth, retried = 0; ubifs_assert(req->new_page <= 1); ubifs_assert(req->dirtied_page <= 1); @@ -549,7 +485,6 @@ int ubifs_budget_space(struct ubifs_info *c, struct ubifs_budget_req *req) if (!data_growth && !dd_growth) return 0; idx_growth = calc_idx_growth(c, req); - memset(&ri, 0, sizeof(struct retries_info)); again: spin_lock(&c->space_lock); @@ -587,12 +522,17 @@ again: return err; } - err = make_free_space(c, &ri); + err = make_free_space(c); + cond_resched(); if (err == -EAGAIN) { dbg_budg("try again"); - cond_resched(); goto again; } else if (err == -ENOSPC) { + if (!retried) { + retried = 1; + dbg_budg("-ENOSPC, but anyway try once again"); + goto again; + } dbg_budg("FS is full, -ENOSPC"); c->nospace = 1; if (can_use_rp(c) || c->rp_size == 0) @@ -712,9 +652,9 @@ void ubifs_release_dirty_inode_budget(struct ubifs_info *c, * user-space. User-space application tend to expect that if the file-system * (e.g., via the 'statfs()' call) reports that it has N bytes available, they * are able to write a file of size N. UBIFS attaches node headers to each data - * node and it has to write indexind nodes as well. This introduces additional - * overhead, and UBIFS it has to report sligtly less free space to meet the - * above expectetion. + * node and it has to write indexing nodes as well. This introduces additional + * overhead, and UBIFS has to report slightly less free space to meet the above + * expectations. * * This function assumes free space is made up of uncompressed data nodes and * full index nodes (one per data node, tripled because we always allow enough @@ -723,7 +663,7 @@ void ubifs_release_dirty_inode_budget(struct ubifs_info *c, * Note, the calculation is pessimistic, which means that most of the time * UBIFS reports less space than it actually has. */ -long long ubifs_reported_space(const struct ubifs_info *c, uint64_t free) +long long ubifs_reported_space(const struct ubifs_info *c, long long free) { int divisor, factor, f; @@ -737,7 +677,7 @@ long long ubifs_reported_space(const struct ubifs_info *c, uint64_t free) * of data nodes, f - fanout. Because effective UBIFS fanout is twice * as less than maximum fanout, we assume that each data node * introduces 3 * @c->max_idx_node_sz / (@c->fanout/2 - 1) bytes. - * Note, the multiplier 3 is because UBIFS reseves thrice as more space + * Note, the multiplier 3 is because UBIFS reserves thrice as more space * for the index. */ f = c->fanout > 3 ? c->fanout >> 1 : 2; @@ -745,8 +685,7 @@ long long ubifs_reported_space(const struct ubifs_info *c, uint64_t free) divisor = UBIFS_MAX_DATA_NODE_SZ; divisor += (c->max_idx_node_sz * 3) / (f - 1); free *= factor; - do_div(free, divisor); - return free; + return div_u64(free, divisor); } /** @@ -756,10 +695,10 @@ long long ubifs_reported_space(const struct ubifs_info *c, uint64_t free) * This function calculates amount of free space to report to user-space. * * Because UBIFS may introduce substantial overhead (the index, node headers, - * alighment, wastage at the end of eraseblocks, etc), it cannot report real + * alignment, wastage at the end of eraseblocks, etc), it cannot report real * amount of free flash space it has (well, because not all dirty space is - * reclamable, UBIFS does not actually know the real amount). If UBIFS did so, - * it would bread user expectetion about what free space is. Users seem to + * reclaimable, UBIFS does not actually know the real amount). If UBIFS did so, + * it would bread user expectations about what free space is. Users seem to * accustomed to assume that if the file-system reports N bytes of free space, * they would be able to fit a file of N bytes to the FS. This almost works for * traditional file-systems, because they have way less overhead than UBIFS. @@ -771,18 +710,9 @@ long long ubifs_get_free_space(struct ubifs_info *c) long long available, outstanding, free; spin_lock(&c->space_lock); - min_idx_lebs = ubifs_calc_min_idx_lebs(c); + min_idx_lebs = c->min_idx_lebs; + ubifs_assert(min_idx_lebs == ubifs_calc_min_idx_lebs(c)); outstanding = c->budg_data_growth + c->budg_dd_growth; - - /* - * Force the amount available to the total size reported if the used - * space is zero. - */ - if (c->lst.total_used <= UBIFS_INO_NODE_SZ && !outstanding) { - spin_unlock(&c->space_lock); - return (long long)c->block_cnt << UBIFS_BLOCK_SHIFT; - } - available = ubifs_calc_available(c, min_idx_lebs); /* diff --git a/fs/ubifs/commit.c b/fs/ubifs/commit.c index b49884c8c10..f3a7945527f 100644 --- a/fs/ubifs/commit.c +++ b/fs/ubifs/commit.c @@ -470,12 +470,12 @@ int dbg_old_index_check_init(struct ubifs_info *c, struct ubifs_zbranch *zroot) { struct ubifs_idx_node *idx; int lnum, offs, len, err = 0; + struct ubifs_debug_info *d = c->dbg; - c->old_zroot = *zroot; - - lnum = c->old_zroot.lnum; - offs = c->old_zroot.offs; - len = c->old_zroot.len; + d->old_zroot = *zroot; + lnum = d->old_zroot.lnum; + offs = d->old_zroot.offs; + len = d->old_zroot.len; idx = kmalloc(c->max_idx_node_sz, GFP_NOFS); if (!idx) @@ -485,8 +485,8 @@ int dbg_old_index_check_init(struct ubifs_info *c, struct ubifs_zbranch *zroot) if (err) goto out; - c->old_zroot_level = le16_to_cpu(idx->level); - c->old_zroot_sqnum = le64_to_cpu(idx->ch.sqnum); + d->old_zroot_level = le16_to_cpu(idx->level); + d->old_zroot_sqnum = le64_to_cpu(idx->ch.sqnum); out: kfree(idx); return err; @@ -509,6 +509,7 @@ int dbg_check_old_index(struct ubifs_info *c, struct ubifs_zbranch *zroot) { int lnum, offs, len, err = 0, uninitialized_var(last_level), child_cnt; int first = 1, iip; + struct ubifs_debug_info *d = c->dbg; union ubifs_key lower_key, upper_key, l_key, u_key; unsigned long long uninitialized_var(last_sqnum); struct ubifs_idx_node *idx; @@ -525,9 +526,9 @@ int dbg_check_old_index(struct ubifs_info *c, struct ubifs_zbranch *zroot) UBIFS_IDX_NODE_SZ; /* Start at the old zroot */ - lnum = c->old_zroot.lnum; - offs = c->old_zroot.offs; - len = c->old_zroot.len; + lnum = d->old_zroot.lnum; + offs = d->old_zroot.offs; + len = d->old_zroot.len; iip = 0; /* @@ -560,11 +561,11 @@ int dbg_check_old_index(struct ubifs_info *c, struct ubifs_zbranch *zroot) if (first) { first = 0; /* Check root level and sqnum */ - if (le16_to_cpu(idx->level) != c->old_zroot_level) { + if (le16_to_cpu(idx->level) != d->old_zroot_level) { err = 2; goto out_dump; } - if (le64_to_cpu(idx->ch.sqnum) != c->old_zroot_sqnum) { + if (le64_to_cpu(idx->ch.sqnum) != d->old_zroot_sqnum) { err = 3; goto out_dump; } diff --git a/fs/ubifs/compress.c b/fs/ubifs/compress.c index a0ada596b17..11e4132f314 100644 --- a/fs/ubifs/compress.c +++ b/fs/ubifs/compress.c @@ -33,7 +33,7 @@ /* Fake description object for the "none" compressor */ static struct ubifs_compressor none_compr = { .compr_type = UBIFS_COMPR_NONE, - .name = "no compression", + .name = "none", .capi_name = "", }; @@ -43,13 +43,13 @@ static DEFINE_MUTEX(lzo_mutex); static struct ubifs_compressor lzo_compr = { .compr_type = UBIFS_COMPR_LZO, .comp_mutex = &lzo_mutex, - .name = "LZO", + .name = "lzo", .capi_name = "lzo", }; #else static struct ubifs_compressor lzo_compr = { .compr_type = UBIFS_COMPR_LZO, - .name = "LZO", + .name = "lzo", }; #endif @@ -108,7 +108,7 @@ void ubifs_compress(const void *in_buf, int in_len, void *out_buf, int *out_len, if (compr->comp_mutex) mutex_lock(compr->comp_mutex); err = crypto_comp_compress(compr->cc, in_buf, in_len, out_buf, - out_len); + (unsigned int *)out_len); if (compr->comp_mutex) mutex_unlock(compr->comp_mutex); if (unlikely(err)) { @@ -119,10 +119,10 @@ void ubifs_compress(const void *in_buf, int in_len, void *out_buf, int *out_len, } /* - * Presently, we just require that compression results in less data, - * rather than any defined minimum compression ratio or amount. + * If the data compressed only slightly, it is better to leave it + * uncompressed to improve read speed. */ - if (ALIGN(*out_len, 8) >= ALIGN(in_len, 8)) + if (in_len - *out_len < UBIFS_MIN_COMPRESS_DIFF) goto no_compr; return; @@ -172,7 +172,7 @@ int ubifs_decompress(const void *in_buf, int in_len, void *out_buf, if (compr->decomp_mutex) mutex_lock(compr->decomp_mutex); err = crypto_comp_decompress(compr->cc, in_buf, in_len, out_buf, - out_len); + (unsigned int *)out_len); if (compr->decomp_mutex) mutex_unlock(compr->decomp_mutex); if (err) @@ -244,7 +244,7 @@ out_lzo: /** * ubifs_compressors_exit - de-initialize UBIFS compressors. */ -void __exit ubifs_compressors_exit(void) +void ubifs_compressors_exit(void) { compr_exit(&lzo_compr); compr_exit(&zlib_compr); diff --git a/fs/ubifs/debug.c b/fs/ubifs/debug.c index 510ffa0bbda..792c5a16c18 100644 --- a/fs/ubifs/debug.c +++ b/fs/ubifs/debug.c @@ -32,6 +32,8 @@ #include "ubifs.h" #include <linux/module.h> #include <linux/moduleparam.h> +#include <linux/debugfs.h> +#include <linux/math64.h> #ifdef CONFIG_UBIFS_FS_DEBUG @@ -596,7 +598,9 @@ void dbg_dump_budg(struct ubifs_info *c) struct rb_node *rb; struct ubifs_bud *bud; struct ubifs_gced_idx_leb *idx_gc; + long long available, outstanding, free; + ubifs_assert(spin_is_locked(&c->space_lock)); spin_lock(&dbg_lock); printk(KERN_DEBUG "(pid %d) Budgeting info: budg_data_growth %lld, " "budg_dd_growth %lld, budg_idx_growth %lld\n", current->pid, @@ -629,6 +633,17 @@ void dbg_dump_budg(struct ubifs_info *c) printk(KERN_DEBUG "\tGC'ed idx LEB %d unmap %d\n", idx_gc->lnum, idx_gc->unmap); printk(KERN_DEBUG "\tcommit state %d\n", c->cmt_state); + + /* Print budgeting predictions */ + available = ubifs_calc_available(c, c->min_idx_lebs); + outstanding = c->budg_data_growth + c->budg_dd_growth; + if (available > outstanding) + free = ubifs_reported_space(c, available - outstanding); + else + free = 0; + printk(KERN_DEBUG "Budgeting predictions:\n"); + printk(KERN_DEBUG "\tavailable: %lld, outstanding %lld, free %lld\n", + available, outstanding, free); spin_unlock(&dbg_lock); } @@ -645,7 +660,8 @@ void dbg_dump_lprops(struct ubifs_info *c) struct ubifs_lprops lp; struct ubifs_lp_stats lst; - printk(KERN_DEBUG "(pid %d) Dumping LEB properties\n", current->pid); + printk(KERN_DEBUG "(pid %d) start dumping LEB properties\n", + current->pid); ubifs_get_lp_stats(c, &lst); dbg_dump_lstats(&lst); @@ -656,6 +672,8 @@ void dbg_dump_lprops(struct ubifs_info *c) dbg_dump_lprop(c, &lp); } + printk(KERN_DEBUG "(pid %d) finish dumping LEB properties\n", + current->pid); } void dbg_dump_lpt_info(struct ubifs_info *c) @@ -663,6 +681,7 @@ void dbg_dump_lpt_info(struct ubifs_info *c) int i; spin_lock(&dbg_lock); + printk(KERN_DEBUG "(pid %d) dumping LPT information\n", current->pid); printk(KERN_DEBUG "\tlpt_sz: %lld\n", c->lpt_sz); printk(KERN_DEBUG "\tpnode_sz: %d\n", c->pnode_sz); printk(KERN_DEBUG "\tnnode_sz: %d\n", c->nnode_sz); @@ -684,7 +703,8 @@ void dbg_dump_lpt_info(struct ubifs_info *c) printk(KERN_DEBUG "\tLPT root is at %d:%d\n", c->lpt_lnum, c->lpt_offs); printk(KERN_DEBUG "\tLPT head is at %d:%d\n", c->nhead_lnum, c->nhead_offs); - printk(KERN_DEBUG "\tLPT ltab is at %d:%d\n", c->ltab_lnum, c->ltab_offs); + printk(KERN_DEBUG "\tLPT ltab is at %d:%d\n", + c->ltab_lnum, c->ltab_offs); if (c->big_lpt) printk(KERN_DEBUG "\tLPT lsave is at %d:%d\n", c->lsave_lnum, c->lsave_offs); @@ -703,9 +723,9 @@ void dbg_dump_leb(const struct ubifs_info *c, int lnum) if (dbg_failure_mode) return; - printk(KERN_DEBUG "(pid %d) Dumping LEB %d\n", current->pid, lnum); - - sleb = ubifs_scan(c, lnum, 0, c->dbg_buf); + printk(KERN_DEBUG "(pid %d) start dumping LEB %d\n", + current->pid, lnum); + sleb = ubifs_scan(c, lnum, 0, c->dbg->buf); if (IS_ERR(sleb)) { ubifs_err("scan error %d", (int)PTR_ERR(sleb)); return; @@ -721,6 +741,8 @@ void dbg_dump_leb(const struct ubifs_info *c, int lnum) dbg_dump_node(c, snod->node); } + printk(KERN_DEBUG "(pid %d) finish dumping LEB %d\n", + current->pid, lnum); ubifs_scan_destroy(sleb); return; } @@ -768,7 +790,7 @@ void dbg_dump_heap(struct ubifs_info *c, struct ubifs_lpt_heap *heap, int cat) { int i; - printk(KERN_DEBUG "(pid %d) Dumping heap cat %d (%d elements)\n", + printk(KERN_DEBUG "(pid %d) start dumping heap cat %d (%d elements)\n", current->pid, cat, heap->cnt); for (i = 0; i < heap->cnt; i++) { struct ubifs_lprops *lprops = heap->arr[i]; @@ -777,6 +799,7 @@ void dbg_dump_heap(struct ubifs_info *c, struct ubifs_lpt_heap *heap, int cat) "flags %d\n", i, lprops->lnum, lprops->hpos, lprops->free, lprops->dirty, lprops->flags); } + printk(KERN_DEBUG "(pid %d) finish dumping heap\n", current->pid); } void dbg_dump_pnode(struct ubifs_info *c, struct ubifs_pnode *pnode, @@ -784,7 +807,7 @@ void dbg_dump_pnode(struct ubifs_info *c, struct ubifs_pnode *pnode, { int i; - printk(KERN_DEBUG "(pid %d) Dumping pnode:\n", current->pid); + printk(KERN_DEBUG "(pid %d) dumping pnode:\n", current->pid); printk(KERN_DEBUG "\taddress %zx parent %zx cnext %zx\n", (size_t)pnode, (size_t)parent, (size_t)pnode->cnext); printk(KERN_DEBUG "\tflags %lu iip %d level %d num %d\n", @@ -803,7 +826,7 @@ void dbg_dump_tnc(struct ubifs_info *c) int level; printk(KERN_DEBUG "\n"); - printk(KERN_DEBUG "(pid %d) Dumping the TNC tree\n", current->pid); + printk(KERN_DEBUG "(pid %d) start dumping TNC tree\n", current->pid); znode = ubifs_tnc_levelorder_next(c->zroot.znode, NULL); level = znode->level; printk(KERN_DEBUG "== Level %d ==\n", level); @@ -815,8 +838,7 @@ void dbg_dump_tnc(struct ubifs_info *c) dbg_dump_znode(c, znode); znode = ubifs_tnc_levelorder_next(c->zroot.znode, znode); } - - printk(KERN_DEBUG "\n"); + printk(KERN_DEBUG "(pid %d) finish dumping TNC tree\n", current->pid); } static int dump_znode(struct ubifs_info *c, struct ubifs_znode *znode, @@ -992,8 +1014,8 @@ static int dbg_check_key_order(struct ubifs_info *c, struct ubifs_zbranch *zbr1, zbr1->offs, DBGKEY(&key)); dbg_err("but it should have key %s according to tnc", DBGKEY(&zbr1->key)); - dbg_dump_node(c, dent1); - goto out_free; + dbg_dump_node(c, dent1); + goto out_free; } key_read(c, &dent2->key, &key); @@ -1002,8 +1024,8 @@ static int dbg_check_key_order(struct ubifs_info *c, struct ubifs_zbranch *zbr1, zbr1->offs, DBGKEY(&key)); dbg_err("but it should have key %s according to tnc", DBGKEY(&zbr2->key)); - dbg_dump_node(c, dent2); - goto out_free; + dbg_dump_node(c, dent2); + goto out_free; } nlen1 = le16_to_cpu(dent1->nlen); @@ -1020,9 +1042,9 @@ static int dbg_check_key_order(struct ubifs_info *c, struct ubifs_zbranch *zbr1, dbg_err("bad order of colliding key %s", DBGKEY(&key)); - dbg_msg("first node at %d:%d\n", zbr1->lnum, zbr1->offs); + ubifs_msg("first node at %d:%d\n", zbr1->lnum, zbr1->offs); dbg_dump_node(c, dent1); - dbg_msg("second node at %d:%d\n", zbr2->lnum, zbr2->offs); + ubifs_msg("second node at %d:%d\n", zbr2->lnum, zbr2->offs); dbg_dump_node(c, dent2); out_free: @@ -2097,13 +2119,13 @@ static int simple_rand(void) return (next >> 16) & 32767; } -void dbg_failure_mode_registration(struct ubifs_info *c) +static void failure_mode_init(struct ubifs_info *c) { struct failure_mode_info *fmi; fmi = kmalloc(sizeof(struct failure_mode_info), GFP_NOFS); if (!fmi) { - dbg_err("Failed to register failure mode - no memory"); + ubifs_err("Failed to register failure mode - no memory"); return; } fmi->c = c; @@ -2112,7 +2134,7 @@ void dbg_failure_mode_registration(struct ubifs_info *c) spin_unlock(&fmi_lock); } -void dbg_failure_mode_deregistration(struct ubifs_info *c) +static void failure_mode_exit(struct ubifs_info *c) { struct failure_mode_info *fmi, *tmp; @@ -2146,42 +2168,44 @@ static int in_failure_mode(struct ubi_volume_desc *desc) struct ubifs_info *c = dbg_find_info(desc); if (c && dbg_failure_mode) - return c->failure_mode; + return c->dbg->failure_mode; return 0; } static int do_fail(struct ubi_volume_desc *desc, int lnum, int write) { struct ubifs_info *c = dbg_find_info(desc); + struct ubifs_debug_info *d; if (!c || !dbg_failure_mode) return 0; - if (c->failure_mode) + d = c->dbg; + if (d->failure_mode) return 1; - if (!c->fail_cnt) { + if (!d->fail_cnt) { /* First call - decide delay to failure */ if (chance(1, 2)) { unsigned int delay = 1 << (simple_rand() >> 11); if (chance(1, 2)) { - c->fail_delay = 1; - c->fail_timeout = jiffies + + d->fail_delay = 1; + d->fail_timeout = jiffies + msecs_to_jiffies(delay); dbg_rcvry("failing after %ums", delay); } else { - c->fail_delay = 2; - c->fail_cnt_max = delay; + d->fail_delay = 2; + d->fail_cnt_max = delay; dbg_rcvry("failing after %u calls", delay); } } - c->fail_cnt += 1; + d->fail_cnt += 1; } /* Determine if failure delay has expired */ - if (c->fail_delay == 1) { - if (time_before(jiffies, c->fail_timeout)) + if (d->fail_delay == 1) { + if (time_before(jiffies, d->fail_timeout)) return 0; - } else if (c->fail_delay == 2) - if (c->fail_cnt++ < c->fail_cnt_max) + } else if (d->fail_delay == 2) + if (d->fail_cnt++ < d->fail_cnt_max) return 0; if (lnum == UBIFS_SB_LNUM) { if (write) { @@ -2239,7 +2263,7 @@ static int do_fail(struct ubi_volume_desc *desc, int lnum, int write) dbg_rcvry("failing in bud LEB %d commit not running", lnum); } ubifs_err("*** SETTING FAILURE MODE ON (LEB %d) ***", lnum); - c->failure_mode = 1; + d->failure_mode = 1; dump_stack(); return 1; } @@ -2344,4 +2368,181 @@ int dbg_leb_map(struct ubi_volume_desc *desc, int lnum, int dtype) return 0; } +/** + * ubifs_debugging_init - initialize UBIFS debugging. + * @c: UBIFS file-system description object + * + * This function initializes debugging-related data for the file system. + * Returns zero in case of success and a negative error code in case of + * failure. + */ +int ubifs_debugging_init(struct ubifs_info *c) +{ + c->dbg = kzalloc(sizeof(struct ubifs_debug_info), GFP_KERNEL); + if (!c->dbg) + return -ENOMEM; + + c->dbg->buf = vmalloc(c->leb_size); + if (!c->dbg->buf) + goto out; + + failure_mode_init(c); + return 0; + +out: + kfree(c->dbg); + return -ENOMEM; +} + +/** + * ubifs_debugging_exit - free debugging data. + * @c: UBIFS file-system description object + */ +void ubifs_debugging_exit(struct ubifs_info *c) +{ + failure_mode_exit(c); + vfree(c->dbg->buf); + kfree(c->dbg); +} + +/* + * Root directory for UBIFS stuff in debugfs. Contains sub-directories which + * contain the stuff specific to particular file-system mounts. + */ +static struct dentry *debugfs_rootdir; + +/** + * dbg_debugfs_init - initialize debugfs file-system. + * + * UBIFS uses debugfs file-system to expose various debugging knobs to + * user-space. This function creates "ubifs" directory in the debugfs + * file-system. Returns zero in case of success and a negative error code in + * case of failure. + */ +int dbg_debugfs_init(void) +{ + debugfs_rootdir = debugfs_create_dir("ubifs", NULL); + if (IS_ERR(debugfs_rootdir)) { + int err = PTR_ERR(debugfs_rootdir); + ubifs_err("cannot create \"ubifs\" debugfs directory, " + "error %d\n", err); + return err; + } + + return 0; +} + +/** + * dbg_debugfs_exit - remove the "ubifs" directory from debugfs file-system. + */ +void dbg_debugfs_exit(void) +{ + debugfs_remove(debugfs_rootdir); +} + +static int open_debugfs_file(struct inode *inode, struct file *file) +{ + file->private_data = inode->i_private; + return 0; +} + +static ssize_t write_debugfs_file(struct file *file, const char __user *buf, + size_t count, loff_t *ppos) +{ + struct ubifs_info *c = file->private_data; + struct ubifs_debug_info *d = c->dbg; + + if (file->f_path.dentry == d->dump_lprops) + dbg_dump_lprops(c); + else if (file->f_path.dentry == d->dump_budg) { + spin_lock(&c->space_lock); + dbg_dump_budg(c); + spin_unlock(&c->space_lock); + } else if (file->f_path.dentry == d->dump_tnc) { + mutex_lock(&c->tnc_mutex); + dbg_dump_tnc(c); + mutex_unlock(&c->tnc_mutex); + } else + return -EINVAL; + + *ppos += count; + return count; +} + +static const struct file_operations debugfs_fops = { + .open = open_debugfs_file, + .write = write_debugfs_file, + .owner = THIS_MODULE, +}; + +/** + * dbg_debugfs_init_fs - initialize debugfs for UBIFS instance. + * @c: UBIFS file-system description object + * + * This function creates all debugfs files for this instance of UBIFS. Returns + * zero in case of success and a negative error code in case of failure. + * + * Note, the only reason we have not merged this function with the + * 'ubifs_debugging_init()' function is because it is better to initialize + * debugfs interfaces at the very end of the mount process, and remove them at + * the very beginning of the mount process. + */ +int dbg_debugfs_init_fs(struct ubifs_info *c) +{ + int err; + const char *fname; + struct dentry *dent; + struct ubifs_debug_info *d = c->dbg; + + sprintf(d->debugfs_dir_name, "ubi%d_%d", c->vi.ubi_num, c->vi.vol_id); + d->debugfs_dir = debugfs_create_dir(d->debugfs_dir_name, + debugfs_rootdir); + if (IS_ERR(d->debugfs_dir)) { + err = PTR_ERR(d->debugfs_dir); + ubifs_err("cannot create \"%s\" debugfs directory, error %d\n", + d->debugfs_dir_name, err); + goto out; + } + + fname = "dump_lprops"; + dent = debugfs_create_file(fname, S_IWUGO, d->debugfs_dir, c, + &debugfs_fops); + if (IS_ERR(dent)) + goto out_remove; + d->dump_lprops = dent; + + fname = "dump_budg"; + dent = debugfs_create_file(fname, S_IWUGO, d->debugfs_dir, c, + &debugfs_fops); + if (IS_ERR(dent)) + goto out_remove; + d->dump_budg = dent; + + fname = "dump_tnc"; + dent = debugfs_create_file(fname, S_IWUGO, d->debugfs_dir, c, + &debugfs_fops); + if (IS_ERR(dent)) + goto out_remove; + d->dump_tnc = dent; + + return 0; + +out_remove: + err = PTR_ERR(dent); + ubifs_err("cannot create \"%s\" debugfs directory, error %d\n", + fname, err); + debugfs_remove_recursive(d->debugfs_dir); +out: + return err; +} + +/** + * dbg_debugfs_exit_fs - remove all debugfs files. + * @c: UBIFS file-system description object + */ +void dbg_debugfs_exit_fs(struct ubifs_info *c) +{ + debugfs_remove_recursive(c->dbg->debugfs_dir); +} + #endif /* CONFIG_UBIFS_FS_DEBUG */ diff --git a/fs/ubifs/debug.h b/fs/ubifs/debug.h index 33d6b95071e..9820d6999f7 100644 --- a/fs/ubifs/debug.h +++ b/fs/ubifs/debug.h @@ -25,7 +25,56 @@ #ifdef CONFIG_UBIFS_FS_DEBUG -#define UBIFS_DBG(op) op +/** + * ubifs_debug_info - per-FS debugging information. + * @buf: a buffer of LEB size, used for various purposes + * @old_zroot: old index root - used by 'dbg_check_old_index()' + * @old_zroot_level: old index root level - used by 'dbg_check_old_index()' + * @old_zroot_sqnum: old index root sqnum - used by 'dbg_check_old_index()' + * @failure_mode: failure mode for recovery testing + * @fail_delay: 0=>don't delay, 1=>delay a time, 2=>delay a number of calls + * @fail_timeout: time in jiffies when delay of failure mode expires + * @fail_cnt: current number of calls to failure mode I/O functions + * @fail_cnt_max: number of calls by which to delay failure mode + * @chk_lpt_sz: used by LPT tree size checker + * @chk_lpt_sz2: used by LPT tree size checker + * @chk_lpt_wastage: used by LPT tree size checker + * @chk_lpt_lebs: used by LPT tree size checker + * @new_nhead_offs: used by LPT tree size checker + * @new_ihead_lnum: used by debugging to check ihead_lnum + * @new_ihead_offs: used by debugging to check ihead_offs + * + * debugfs_dir_name: name of debugfs directory containing this file-system's + * files + * debugfs_dir: direntry object of the file-system debugfs directory + * dump_lprops: "dump lprops" debugfs knob + * dump_budg: "dump budgeting information" debugfs knob + * dump_tnc: "dump TNC" debugfs knob + */ +struct ubifs_debug_info { + void *buf; + struct ubifs_zbranch old_zroot; + int old_zroot_level; + unsigned long long old_zroot_sqnum; + int failure_mode; + int fail_delay; + unsigned long fail_timeout; + unsigned int fail_cnt; + unsigned int fail_cnt_max; + long long chk_lpt_sz; + long long chk_lpt_sz2; + long long chk_lpt_wastage; + int chk_lpt_lebs; + int new_nhead_offs; + int new_ihead_lnum; + int new_ihead_offs; + + char debugfs_dir_name[100]; + struct dentry *debugfs_dir; + struct dentry *dump_lprops; + struct dentry *dump_budg; + struct dentry *dump_tnc; +}; #define ubifs_assert(expr) do { \ if (unlikely(!(expr))) { \ @@ -211,14 +260,18 @@ extern unsigned int ubifs_msg_flags; extern unsigned int ubifs_chk_flags; extern unsigned int ubifs_tst_flags; -/* Dump functions */ +int ubifs_debugging_init(struct ubifs_info *c); +void ubifs_debugging_exit(struct ubifs_info *c); +/* Dump functions */ const char *dbg_ntype(int type); const char *dbg_cstate(int cmt_state); const char *dbg_get_key_dump(const struct ubifs_info *c, const union ubifs_key *key); void dbg_dump_inode(const struct ubifs_info *c, const struct inode *inode); void dbg_dump_node(const struct ubifs_info *c, const void *node); +void dbg_dump_lpt_node(const struct ubifs_info *c, void *node, int lnum, + int offs); void dbg_dump_budget_req(const struct ubifs_budget_req *req); void dbg_dump_lstats(const struct ubifs_lp_stats *lst); void dbg_dump_budg(struct ubifs_info *c); @@ -233,9 +286,9 @@ void dbg_dump_pnode(struct ubifs_info *c, struct ubifs_pnode *pnode, struct ubifs_nnode *parent, int iip); void dbg_dump_tnc(struct ubifs_info *c); void dbg_dump_index(struct ubifs_info *c); +void dbg_dump_lpt_lebs(const struct ubifs_info *c); /* Checking helper functions */ - typedef int (*dbg_leaf_callback)(struct ubifs_info *c, struct ubifs_zbranch *zbr, void *priv); typedef int (*dbg_znode_callback)(struct ubifs_info *c, @@ -274,9 +327,6 @@ int dbg_force_in_the_gaps(void); #define dbg_failure_mode (ubifs_tst_flags & UBIFS_TST_RCVRY) -void dbg_failure_mode_registration(struct ubifs_info *c); -void dbg_failure_mode_deregistration(struct ubifs_info *c); - #ifndef UBIFS_DBG_PRESERVE_UBI #define ubi_leb_read dbg_leb_read @@ -318,9 +368,13 @@ static inline int dbg_change(struct ubi_volume_desc *desc, int lnum, return dbg_leb_change(desc, lnum, buf, len, UBI_UNKNOWN); } -#else /* !CONFIG_UBIFS_FS_DEBUG */ +/* Debugfs-related stuff */ +int dbg_debugfs_init(void); +void dbg_debugfs_exit(void); +int dbg_debugfs_init_fs(struct ubifs_info *c); +void dbg_debugfs_exit_fs(struct ubifs_info *c); -#define UBIFS_DBG(op) +#else /* !CONFIG_UBIFS_FS_DEBUG */ /* Use "if (0)" to make compiler check arguments even if debugging is off */ #define ubifs_assert(expr) do { \ @@ -360,23 +414,28 @@ static inline int dbg_change(struct ubi_volume_desc *desc, int lnum, #define DBGKEY(key) ((char *)(key)) #define DBGKEY1(key) ((char *)(key)) -#define dbg_ntype(type) "" -#define dbg_cstate(cmt_state) "" -#define dbg_get_key_dump(c, key) ({}) -#define dbg_dump_inode(c, inode) ({}) -#define dbg_dump_node(c, node) ({}) -#define dbg_dump_budget_req(req) ({}) -#define dbg_dump_lstats(lst) ({}) -#define dbg_dump_budg(c) ({}) -#define dbg_dump_lprop(c, lp) ({}) -#define dbg_dump_lprops(c) ({}) -#define dbg_dump_lpt_info(c) ({}) -#define dbg_dump_leb(c, lnum) ({}) -#define dbg_dump_znode(c, znode) ({}) -#define dbg_dump_heap(c, heap, cat) ({}) -#define dbg_dump_pnode(c, pnode, parent, iip) ({}) -#define dbg_dump_tnc(c) ({}) -#define dbg_dump_index(c) ({}) +#define ubifs_debugging_init(c) 0 +#define ubifs_debugging_exit(c) ({}) + +#define dbg_ntype(type) "" +#define dbg_cstate(cmt_state) "" +#define dbg_get_key_dump(c, key) ({}) +#define dbg_dump_inode(c, inode) ({}) +#define dbg_dump_node(c, node) ({}) +#define dbg_dump_lpt_node(c, node, lnum, offs) ({}) +#define dbg_dump_budget_req(req) ({}) +#define dbg_dump_lstats(lst) ({}) +#define dbg_dump_budg(c) ({}) +#define dbg_dump_lprop(c, lp) ({}) +#define dbg_dump_lprops(c) ({}) +#define dbg_dump_lpt_info(c) ({}) +#define dbg_dump_leb(c, lnum) ({}) +#define dbg_dump_znode(c, znode) ({}) +#define dbg_dump_heap(c, heap, cat) ({}) +#define dbg_dump_pnode(c, pnode, parent, iip) ({}) +#define dbg_dump_tnc(c) ({}) +#define dbg_dump_index(c) ({}) +#define dbg_dump_lpt_lebs(c) ({}) #define dbg_walk_index(c, leaf_cb, znode_cb, priv) 0 #define dbg_old_index_check_init(c, zroot) 0 @@ -396,9 +455,11 @@ static inline int dbg_change(struct ubi_volume_desc *desc, int lnum, #define dbg_force_in_the_gaps_enabled 0 #define dbg_force_in_the_gaps() 0 #define dbg_failure_mode 0 -#define dbg_failure_mode_registration(c) ({}) -#define dbg_failure_mode_deregistration(c) ({}) -#endif /* !CONFIG_UBIFS_FS_DEBUG */ +#define dbg_debugfs_init() 0 +#define dbg_debugfs_exit() +#define dbg_debugfs_init_fs(c) 0 +#define dbg_debugfs_exit_fs(c) 0 +#endif /* !CONFIG_UBIFS_FS_DEBUG */ #endif /* !__UBIFS_DEBUG_H__ */ diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c index 2624411d975..fe82d2464d4 100644 --- a/fs/ubifs/file.c +++ b/fs/ubifs/file.c @@ -72,8 +72,8 @@ static int read_block(struct inode *inode, void *addr, unsigned int block, return err; } - ubifs_assert(le64_to_cpu(dn->ch.sqnum) > ubifs_inode(inode)->creat_sqnum); - + ubifs_assert(le64_to_cpu(dn->ch.sqnum) > + ubifs_inode(inode)->creat_sqnum); len = le32_to_cpu(dn->size); if (len <= 0 || len > UBIFS_BLOCK_SIZE) goto dump; @@ -254,7 +254,7 @@ static int write_begin_slow(struct address_space *mapping, } if (!PageUptodate(page)) { - if (!(pos & PAGE_CACHE_MASK) && len == PAGE_CACHE_SIZE) + if (!(pos & ~PAGE_CACHE_MASK) && len == PAGE_CACHE_SIZE) SetPageChecked(page); else { err = do_readpage(page); @@ -444,7 +444,7 @@ static int ubifs_write_begin(struct file *file, struct address_space *mapping, if (!PageUptodate(page)) { /* The page is not loaded from the flash */ - if (!(pos & PAGE_CACHE_MASK) && len == PAGE_CACHE_SIZE) + if (!(pos & ~PAGE_CACHE_MASK) && len == PAGE_CACHE_SIZE) /* * We change whole page so no need to load it. But we * have to set the @PG_checked flag to make the further diff --git a/fs/ubifs/ioctl.c b/fs/ubifs/ioctl.c index 5e82cffe969..6db7a6be6c9 100644 --- a/fs/ubifs/ioctl.c +++ b/fs/ubifs/ioctl.c @@ -154,6 +154,7 @@ long ubifs_ioctl(struct file *file, unsigned int cmd, unsigned long arg) case FS_IOC_GETFLAGS: flags = ubifs2ioctl(ubifs_inode(inode)->flags); + dbg_gen("get flags: %#x, i_flags %#x", flags, inode->i_flags); return put_user(flags, (int __user *) arg); case FS_IOC_SETFLAGS: { @@ -176,6 +177,7 @@ long ubifs_ioctl(struct file *file, unsigned int cmd, unsigned long arg) err = mnt_want_write(file->f_path.mnt); if (err) return err; + dbg_gen("set flags: %#x, i_flags %#x", flags, inode->i_flags); err = setflags(inode, flags); mnt_drop_write(file->f_path.mnt); return err; diff --git a/fs/ubifs/journal.c b/fs/ubifs/journal.c index f91b745908e..10ae25b7d1d 100644 --- a/fs/ubifs/journal.c +++ b/fs/ubifs/journal.c @@ -704,7 +704,7 @@ int ubifs_jnl_write_data(struct ubifs_info *c, const struct inode *inode, data->size = cpu_to_le32(len); zero_data_node_unused(data); - if (!(ui->flags && UBIFS_COMPR_FL)) + if (!(ui->flags & UBIFS_COMPR_FL)) /* Compression is disabled for this inode */ compr_type = UBIFS_COMPR_NONE; else @@ -1220,7 +1220,7 @@ int ubifs_jnl_truncate(struct ubifs_info *c, const struct inode *inode, data_key_init(c, &key, inum, blk); bit = old_size & (UBIFS_BLOCK_SIZE - 1); - blk = (old_size >> UBIFS_BLOCK_SHIFT) - (bit ? 0: 1); + blk = (old_size >> UBIFS_BLOCK_SHIFT) - (bit ? 0 : 1); data_key_init(c, &to_key, inum, blk); err = ubifs_tnc_remove_range(c, &key, &to_key); diff --git a/fs/ubifs/key.h b/fs/ubifs/key.h index 3f1f16bc25c..efb3430a258 100644 --- a/fs/ubifs/key.h +++ b/fs/ubifs/key.h @@ -38,6 +38,22 @@ #define __UBIFS_KEY_H__ /** + * key_mask_hash - mask a valid hash value. + * @val: value to be masked + * + * We use hash values as offset in directories, so values %0 and %1 are + * reserved for "." and "..". %2 is reserved for "end of readdir" marker. This + * function makes sure the reserved values are not used. + */ +static inline uint32_t key_mask_hash(uint32_t hash) +{ + hash &= UBIFS_S_KEY_HASH_MASK; + if (unlikely(hash <= 2)) + hash += 3; + return hash; +} + +/** * key_r5_hash - R5 hash function (borrowed from reiserfs). * @s: direntry name * @len: name length @@ -54,16 +70,7 @@ static inline uint32_t key_r5_hash(const char *s, int len) str++; } - a &= UBIFS_S_KEY_HASH_MASK; - - /* - * We use hash values as offset in directories, so values %0 and %1 are - * reserved for "." and "..". %2 is reserved for "end of readdir" - * marker. - */ - if (unlikely(a >= 0 && a <= 2)) - a += 3; - return a; + return key_mask_hash(a); } /** @@ -77,10 +84,7 @@ static inline uint32_t key_test_hash(const char *str, int len) len = min_t(uint32_t, len, 4); memcpy(&a, str, len); - a &= UBIFS_S_KEY_HASH_MASK; - if (unlikely(a >= 0 && a <= 2)) - a += 3; - return a; + return key_mask_hash(a); } /** diff --git a/fs/ubifs/lprops.c b/fs/ubifs/lprops.c index f27176e9b70..dfd2bcece27 100644 --- a/fs/ubifs/lprops.c +++ b/fs/ubifs/lprops.c @@ -520,13 +520,13 @@ static int is_lprops_dirty(struct ubifs_info *c, struct ubifs_lprops *lprops) * @flags: new flags * @idx_gc_cnt: change to the count of idx_gc list * - * This function changes LEB properties. This function does not change a LEB - * property (@free, @dirty or @flag) if the value passed is %LPROPS_NC. + * This function changes LEB properties (@free, @dirty or @flag). However, the + * property which has the %LPROPS_NC value is not changed. Returns a pointer to + * the updated LEB properties on success and a negative error code on failure. * - * This function returns a pointer to the updated LEB properties on success - * and a negative error code on failure. N.B. the LEB properties may have had to - * be copied (due to COW) and consequently the pointer returned may not be the - * same as the pointer passed. + * Note, the LEB properties may have had to be copied (due to COW) and + * consequently the pointer returned may not be the same as the pointer + * passed. */ const struct ubifs_lprops *ubifs_change_lp(struct ubifs_info *c, const struct ubifs_lprops *lp, @@ -1088,7 +1088,7 @@ static int scan_check_cb(struct ubifs_info *c, } } - sleb = ubifs_scan(c, lnum, 0, c->dbg_buf); + sleb = ubifs_scan(c, lnum, 0, c->dbg->buf); if (IS_ERR(sleb)) { /* * After an unclean unmount, empty and freeable LEBs diff --git a/fs/ubifs/lpt.c b/fs/ubifs/lpt.c index db8bd0e518b..b2792e84d24 100644 --- a/fs/ubifs/lpt.c +++ b/fs/ubifs/lpt.c @@ -36,15 +36,16 @@ * can be written into a single eraseblock. In that case, garbage collection * consists of just writing the whole table, which therefore makes all other * eraseblocks reusable. In the case of the big model, dirty eraseblocks are - * selected for garbage collection, which consists are marking the nodes in + * selected for garbage collection, which consists of marking the clean nodes in * that LEB as dirty, and then only the dirty nodes are written out. Also, in * the case of the big model, a table of LEB numbers is saved so that the entire * LPT does not to be scanned looking for empty eraseblocks when UBIFS is first * mounted. */ -#include <linux/crc16.h> #include "ubifs.h" +#include <linux/crc16.h> +#include <linux/math64.h> /** * do_calc_lpt_geom - calculate sizes for the LPT area. @@ -135,15 +136,13 @@ static void do_calc_lpt_geom(struct ubifs_info *c) int ubifs_calc_lpt_geom(struct ubifs_info *c) { int lebs_needed; - uint64_t sz; + long long sz; do_calc_lpt_geom(c); /* Verify that lpt_lebs is big enough */ sz = c->lpt_sz * 2; /* Must have at least 2 times the size */ - sz += c->leb_size - 1; - do_div(sz, c->leb_size); - lebs_needed = sz; + lebs_needed = div_u64(sz + c->leb_size - 1, c->leb_size); if (lebs_needed > c->lpt_lebs) { ubifs_err("too few LPT LEBs"); return -EINVAL; @@ -156,7 +155,6 @@ int ubifs_calc_lpt_geom(struct ubifs_info *c) } c->check_lpt_free = c->big_lpt; - return 0; } @@ -176,7 +174,7 @@ static int calc_dflt_lpt_geom(struct ubifs_info *c, int *main_lebs, int *big_lpt) { int i, lebs_needed; - uint64_t sz; + long long sz; /* Start by assuming the minimum number of LPT LEBs */ c->lpt_lebs = UBIFS_MIN_LPT_LEBS; @@ -203,9 +201,7 @@ static int calc_dflt_lpt_geom(struct ubifs_info *c, int *main_lebs, /* Now check there are enough LPT LEBs */ for (i = 0; i < 64 ; i++) { sz = c->lpt_sz * 4; /* Allow 4 times the size */ - sz += c->leb_size - 1; - do_div(sz, c->leb_size); - lebs_needed = sz; + lebs_needed = div_u64(sz + c->leb_size - 1, c->leb_size); if (lebs_needed > c->lpt_lebs) { /* Not enough LPT LEBs so try again with more */ c->lpt_lebs = lebs_needed; @@ -558,7 +554,7 @@ static int calc_nnode_num(int row, int col) * This function calculates and returns the nnode number based on the parent's * nnode number and the index in parent. */ -static int calc_nnode_num_from_parent(struct ubifs_info *c, +static int calc_nnode_num_from_parent(const struct ubifs_info *c, struct ubifs_nnode *parent, int iip) { int num, shft; @@ -583,7 +579,7 @@ static int calc_nnode_num_from_parent(struct ubifs_info *c, * This function calculates and returns the pnode number based on the parent's * nnode number and the index in parent. */ -static int calc_pnode_num_from_parent(struct ubifs_info *c, +static int calc_pnode_num_from_parent(const struct ubifs_info *c, struct ubifs_nnode *parent, int iip) { int i, n = c->lpt_hght - 1, pnum = parent->num, num = 0; @@ -966,7 +962,7 @@ static int check_lpt_type(uint8_t **addr, int *pos, int type) * * This function returns %0 on success and a negative error code on failure. */ -static int unpack_pnode(struct ubifs_info *c, void *buf, +static int unpack_pnode(const struct ubifs_info *c, void *buf, struct ubifs_pnode *pnode) { uint8_t *addr = buf + UBIFS_LPT_CRC_BYTES; @@ -996,15 +992,15 @@ static int unpack_pnode(struct ubifs_info *c, void *buf, } /** - * unpack_nnode - unpack a nnode. + * ubifs_unpack_nnode - unpack a nnode. * @c: UBIFS file-system description object * @buf: buffer containing packed nnode to unpack * @nnode: nnode structure to fill * * This function returns %0 on success and a negative error code on failure. */ -static int unpack_nnode(struct ubifs_info *c, void *buf, - struct ubifs_nnode *nnode) +int ubifs_unpack_nnode(const struct ubifs_info *c, void *buf, + struct ubifs_nnode *nnode) { uint8_t *addr = buf + UBIFS_LPT_CRC_BYTES; int i, pos = 0, err; @@ -1036,7 +1032,7 @@ static int unpack_nnode(struct ubifs_info *c, void *buf, * * This function returns %0 on success and a negative error code on failure. */ -static int unpack_ltab(struct ubifs_info *c, void *buf) +static int unpack_ltab(const struct ubifs_info *c, void *buf) { uint8_t *addr = buf + UBIFS_LPT_CRC_BYTES; int i, pos = 0, err; @@ -1068,7 +1064,7 @@ static int unpack_ltab(struct ubifs_info *c, void *buf) * * This function returns %0 on success and a negative error code on failure. */ -static int unpack_lsave(struct ubifs_info *c, void *buf) +static int unpack_lsave(const struct ubifs_info *c, void *buf) { uint8_t *addr = buf + UBIFS_LPT_CRC_BYTES; int i, pos = 0, err; @@ -1096,7 +1092,7 @@ static int unpack_lsave(struct ubifs_info *c, void *buf) * * This function returns %0 on success and a negative error code on failure. */ -static int validate_nnode(struct ubifs_info *c, struct ubifs_nnode *nnode, +static int validate_nnode(const struct ubifs_info *c, struct ubifs_nnode *nnode, struct ubifs_nnode *parent, int iip) { int i, lvl, max_offs; @@ -1140,7 +1136,7 @@ static int validate_nnode(struct ubifs_info *c, struct ubifs_nnode *nnode, * * This function returns %0 on success and a negative error code on failure. */ -static int validate_pnode(struct ubifs_info *c, struct ubifs_pnode *pnode, +static int validate_pnode(const struct ubifs_info *c, struct ubifs_pnode *pnode, struct ubifs_nnode *parent, int iip) { int i; @@ -1174,7 +1170,8 @@ static int validate_pnode(struct ubifs_info *c, struct ubifs_pnode *pnode, * This function calculates the LEB numbers for the LEB properties it contains * based on the pnode number. */ -static void set_pnode_lnum(struct ubifs_info *c, struct ubifs_pnode *pnode) +static void set_pnode_lnum(const struct ubifs_info *c, + struct ubifs_pnode *pnode) { int i, lnum; @@ -1227,7 +1224,7 @@ int ubifs_read_nnode(struct ubifs_info *c, struct ubifs_nnode *parent, int iip) err = ubi_read(c->ubi, lnum, buf, offs, c->nnode_sz); if (err) goto out; - err = unpack_nnode(c, buf, nnode); + err = ubifs_unpack_nnode(c, buf, nnode); if (err) goto out; } @@ -1816,7 +1813,7 @@ static struct ubifs_nnode *scan_get_nnode(struct ubifs_info *c, c->nnode_sz); if (err) return ERR_PTR(err); - err = unpack_nnode(c, buf, nnode); + err = ubifs_unpack_nnode(c, buf, nnode); if (err) return ERR_PTR(err); } diff --git a/fs/ubifs/lpt_commit.c b/fs/ubifs/lpt_commit.c index a41434b4278..96ca9570717 100644 --- a/fs/ubifs/lpt_commit.c +++ b/fs/ubifs/lpt_commit.c @@ -320,6 +320,8 @@ no_space: dbg_err("LPT out of space at LEB %d:%d needing %d, done_ltab %d, " "done_lsave %d", lnum, offs, len, done_ltab, done_lsave); dbg_dump_lpt_info(c); + dbg_dump_lpt_lebs(c); + dump_stack(); return err; } @@ -546,8 +548,10 @@ static int write_cnodes(struct ubifs_info *c) no_space: ubifs_err("LPT out of space mismatch"); dbg_err("LPT out of space mismatch at LEB %d:%d needing %d, done_ltab " - "%d, done_lsave %d", lnum, offs, len, done_ltab, done_lsave); + "%d, done_lsave %d", lnum, offs, len, done_ltab, done_lsave); dbg_dump_lpt_info(c); + dbg_dump_lpt_lebs(c); + dump_stack(); return err; } @@ -749,7 +753,7 @@ static void lpt_tgc_start(struct ubifs_info *c) * LPT trivial garbage collection is where a LPT LEB contains only dirty and * free space and so may be reused as soon as the next commit is completed. * This function is called after the commit is completed (master node has been - * written) and unmaps LPT LEBs that were marked for trivial GC. + * written) and un-maps LPT LEBs that were marked for trivial GC. */ static int lpt_tgc_end(struct ubifs_info *c) { @@ -1025,7 +1029,7 @@ static int make_node_dirty(struct ubifs_info *c, int node_type, int node_num, * @c: UBIFS file-system description object * @node_type: LPT node type */ -static int get_lpt_node_len(struct ubifs_info *c, int node_type) +static int get_lpt_node_len(const struct ubifs_info *c, int node_type) { switch (node_type) { case UBIFS_LPT_NNODE: @@ -1046,7 +1050,7 @@ static int get_lpt_node_len(struct ubifs_info *c, int node_type) * @buf: buffer * @len: length of buffer */ -static int get_pad_len(struct ubifs_info *c, uint8_t *buf, int len) +static int get_pad_len(const struct ubifs_info *c, uint8_t *buf, int len) { int offs, pad_len; @@ -1063,7 +1067,8 @@ static int get_pad_len(struct ubifs_info *c, uint8_t *buf, int len) * @buf: buffer * @node_num: node number is returned here */ -static int get_lpt_node_type(struct ubifs_info *c, uint8_t *buf, int *node_num) +static int get_lpt_node_type(const struct ubifs_info *c, uint8_t *buf, + int *node_num) { uint8_t *addr = buf + UBIFS_LPT_CRC_BYTES; int pos = 0, node_type; @@ -1081,7 +1086,7 @@ static int get_lpt_node_type(struct ubifs_info *c, uint8_t *buf, int *node_num) * * This function returns %1 if the buffer contains a node or %0 if it does not. */ -static int is_a_node(struct ubifs_info *c, uint8_t *buf, int len) +static int is_a_node(const struct ubifs_info *c, uint8_t *buf, int len) { uint8_t *addr = buf + UBIFS_LPT_CRC_BYTES; int pos = 0, node_type, node_len; @@ -1105,7 +1110,6 @@ static int is_a_node(struct ubifs_info *c, uint8_t *buf, int len) return 1; } - /** * lpt_gc_lnum - garbage collect a LPT LEB. * @c: UBIFS file-system description object @@ -1463,7 +1467,7 @@ void ubifs_lpt_free(struct ubifs_info *c, int wr_only) #ifdef CONFIG_UBIFS_FS_DEBUG /** - * dbg_is_all_ff - determine if a buffer contains only 0xff bytes. + * dbg_is_all_ff - determine if a buffer contains only 0xFF bytes. * @buf: buffer * @len: buffer length */ @@ -1488,7 +1492,7 @@ static int dbg_is_nnode_dirty(struct ubifs_info *c, int lnum, int offs) struct ubifs_nnode *nnode; int hght; - /* Entire tree is in memory so first_nnode / next_nnode are ok */ + /* Entire tree is in memory so first_nnode / next_nnode are OK */ nnode = first_nnode(c, &hght); for (; nnode; nnode = next_nnode(c, nnode, &hght)) { struct ubifs_nbranch *branch; @@ -1602,7 +1606,10 @@ static int dbg_check_ltab_lnum(struct ubifs_info *c, int lnum) { int err, len = c->leb_size, dirty = 0, node_type, node_num, node_len; int ret; - void *buf = c->dbg_buf; + void *buf = c->dbg->buf; + + if (!(ubifs_chk_flags & UBIFS_CHK_LPROPS)) + return 0; dbg_lp("LEB %d", lnum); err = ubi_read(c->ubi, lnum, buf, 0, c->leb_size); @@ -1704,6 +1711,9 @@ int dbg_chk_lpt_free_spc(struct ubifs_info *c) long long free = 0; int i; + if (!(ubifs_chk_flags & UBIFS_CHK_LPROPS)) + return 0; + for (i = 0; i < c->lpt_lebs; i++) { if (c->ltab[i].tgc || c->ltab[i].cmt) continue; @@ -1716,6 +1726,8 @@ int dbg_chk_lpt_free_spc(struct ubifs_info *c) dbg_err("LPT space error: free %lld lpt_sz %lld", free, c->lpt_sz); dbg_dump_lpt_info(c); + dbg_dump_lpt_lebs(c); + dump_stack(); return -EINVAL; } return 0; @@ -1731,15 +1743,19 @@ int dbg_chk_lpt_free_spc(struct ubifs_info *c) */ int dbg_chk_lpt_sz(struct ubifs_info *c, int action, int len) { + struct ubifs_debug_info *d = c->dbg; long long chk_lpt_sz, lpt_sz; int err = 0; + if (!(ubifs_chk_flags & UBIFS_CHK_LPROPS)) + return 0; + switch (action) { case 0: - c->chk_lpt_sz = 0; - c->chk_lpt_sz2 = 0; - c->chk_lpt_lebs = 0; - c->chk_lpt_wastage = 0; + d->chk_lpt_sz = 0; + d->chk_lpt_sz2 = 0; + d->chk_lpt_lebs = 0; + d->chk_lpt_wastage = 0; if (c->dirty_pn_cnt > c->pnode_cnt) { dbg_err("dirty pnodes %d exceed max %d", c->dirty_pn_cnt, c->pnode_cnt); @@ -1752,35 +1768,35 @@ int dbg_chk_lpt_sz(struct ubifs_info *c, int action, int len) } return err; case 1: - c->chk_lpt_sz += len; + d->chk_lpt_sz += len; return 0; case 2: - c->chk_lpt_sz += len; - c->chk_lpt_wastage += len; - c->chk_lpt_lebs += 1; + d->chk_lpt_sz += len; + d->chk_lpt_wastage += len; + d->chk_lpt_lebs += 1; return 0; case 3: chk_lpt_sz = c->leb_size; - chk_lpt_sz *= c->chk_lpt_lebs; + chk_lpt_sz *= d->chk_lpt_lebs; chk_lpt_sz += len - c->nhead_offs; - if (c->chk_lpt_sz != chk_lpt_sz) { + if (d->chk_lpt_sz != chk_lpt_sz) { dbg_err("LPT wrote %lld but space used was %lld", - c->chk_lpt_sz, chk_lpt_sz); + d->chk_lpt_sz, chk_lpt_sz); err = -EINVAL; } - if (c->chk_lpt_sz > c->lpt_sz) { + if (d->chk_lpt_sz > c->lpt_sz) { dbg_err("LPT wrote %lld but lpt_sz is %lld", - c->chk_lpt_sz, c->lpt_sz); + d->chk_lpt_sz, c->lpt_sz); err = -EINVAL; } - if (c->chk_lpt_sz2 && c->chk_lpt_sz != c->chk_lpt_sz2) { + if (d->chk_lpt_sz2 && d->chk_lpt_sz != d->chk_lpt_sz2) { dbg_err("LPT layout size %lld but wrote %lld", - c->chk_lpt_sz, c->chk_lpt_sz2); + d->chk_lpt_sz, d->chk_lpt_sz2); err = -EINVAL; } - if (c->chk_lpt_sz2 && c->new_nhead_offs != len) { + if (d->chk_lpt_sz2 && d->new_nhead_offs != len) { dbg_err("LPT new nhead offs: expected %d was %d", - c->new_nhead_offs, len); + d->new_nhead_offs, len); err = -EINVAL; } lpt_sz = (long long)c->pnode_cnt * c->pnode_sz; @@ -1788,26 +1804,146 @@ int dbg_chk_lpt_sz(struct ubifs_info *c, int action, int len) lpt_sz += c->ltab_sz; if (c->big_lpt) lpt_sz += c->lsave_sz; - if (c->chk_lpt_sz - c->chk_lpt_wastage > lpt_sz) { + if (d->chk_lpt_sz - d->chk_lpt_wastage > lpt_sz) { dbg_err("LPT chk_lpt_sz %lld + waste %lld exceeds %lld", - c->chk_lpt_sz, c->chk_lpt_wastage, lpt_sz); + d->chk_lpt_sz, d->chk_lpt_wastage, lpt_sz); err = -EINVAL; } - if (err) + if (err) { dbg_dump_lpt_info(c); - c->chk_lpt_sz2 = c->chk_lpt_sz; - c->chk_lpt_sz = 0; - c->chk_lpt_wastage = 0; - c->chk_lpt_lebs = 0; - c->new_nhead_offs = len; + dbg_dump_lpt_lebs(c); + dump_stack(); + } + d->chk_lpt_sz2 = d->chk_lpt_sz; + d->chk_lpt_sz = 0; + d->chk_lpt_wastage = 0; + d->chk_lpt_lebs = 0; + d->new_nhead_offs = len; return err; case 4: - c->chk_lpt_sz += len; - c->chk_lpt_wastage += len; + d->chk_lpt_sz += len; + d->chk_lpt_wastage += len; return 0; default: return -EINVAL; } } +/** + * dbg_dump_lpt_leb - dump an LPT LEB. + * @c: UBIFS file-system description object + * @lnum: LEB number to dump + * + * This function dumps an LEB from LPT area. Nodes in this area are very + * different to nodes in the main area (e.g., they do not have common headers, + * they do not have 8-byte alignments, etc), so we have a separate function to + * dump LPT area LEBs. Note, LPT has to be locked by the caller. + */ +static void dump_lpt_leb(const struct ubifs_info *c, int lnum) +{ + int err, len = c->leb_size, node_type, node_num, node_len, offs; + void *buf = c->dbg->buf; + + printk(KERN_DEBUG "(pid %d) start dumping LEB %d\n", + current->pid, lnum); + err = ubi_read(c->ubi, lnum, buf, 0, c->leb_size); + if (err) { + ubifs_err("cannot read LEB %d, error %d", lnum, err); + return; + } + while (1) { + offs = c->leb_size - len; + if (!is_a_node(c, buf, len)) { + int pad_len; + + pad_len = get_pad_len(c, buf, len); + if (pad_len) { + printk(KERN_DEBUG "LEB %d:%d, pad %d bytes\n", + lnum, offs, pad_len); + buf += pad_len; + len -= pad_len; + continue; + } + if (len) + printk(KERN_DEBUG "LEB %d:%d, free %d bytes\n", + lnum, offs, len); + break; + } + + node_type = get_lpt_node_type(c, buf, &node_num); + switch (node_type) { + case UBIFS_LPT_PNODE: + { + node_len = c->pnode_sz; + if (c->big_lpt) + printk(KERN_DEBUG "LEB %d:%d, pnode num %d\n", + lnum, offs, node_num); + else + printk(KERN_DEBUG "LEB %d:%d, pnode\n", + lnum, offs); + break; + } + case UBIFS_LPT_NNODE: + { + int i; + struct ubifs_nnode nnode; + + node_len = c->nnode_sz; + if (c->big_lpt) + printk(KERN_DEBUG "LEB %d:%d, nnode num %d, ", + lnum, offs, node_num); + else + printk(KERN_DEBUG "LEB %d:%d, nnode, ", + lnum, offs); + err = ubifs_unpack_nnode(c, buf, &nnode); + for (i = 0; i < UBIFS_LPT_FANOUT; i++) { + printk("%d:%d", nnode.nbranch[i].lnum, + nnode.nbranch[i].offs); + if (i != UBIFS_LPT_FANOUT - 1) + printk(", "); + } + printk("\n"); + break; + } + case UBIFS_LPT_LTAB: + node_len = c->ltab_sz; + printk(KERN_DEBUG "LEB %d:%d, ltab\n", + lnum, offs); + break; + case UBIFS_LPT_LSAVE: + node_len = c->lsave_sz; + printk(KERN_DEBUG "LEB %d:%d, lsave len\n", lnum, offs); + break; + default: + ubifs_err("LPT node type %d not recognized", node_type); + return; + } + + buf += node_len; + len -= node_len; + } + + printk(KERN_DEBUG "(pid %d) finish dumping LEB %d\n", + current->pid, lnum); +} + +/** + * dbg_dump_lpt_lebs - dump LPT lebs. + * @c: UBIFS file-system description object + * + * This function dumps all LPT LEBs. The caller has to make sure the LPT is + * locked. + */ +void dbg_dump_lpt_lebs(const struct ubifs_info *c) +{ + int i; + + printk(KERN_DEBUG "(pid %d) start dumping all LPT LEBs\n", + current->pid); + for (i = 0; i < c->lpt_lebs; i++) + dump_lpt_leb(c, i + c->lpt_first); + printk(KERN_DEBUG "(pid %d) finish dumping all LPT LEBs\n", + current->pid); +} + #endif /* CONFIG_UBIFS_FS_DEBUG */ diff --git a/fs/ubifs/orphan.c b/fs/ubifs/orphan.c index 9bd5a43d452..9e6f403f170 100644 --- a/fs/ubifs/orphan.c +++ b/fs/ubifs/orphan.c @@ -899,7 +899,7 @@ static int dbg_scan_orphans(struct ubifs_info *c, struct check_info *ci) for (lnum = c->orph_first; lnum <= c->orph_last; lnum++) { struct ubifs_scan_leb *sleb; - sleb = ubifs_scan(c, lnum, 0, c->dbg_buf); + sleb = ubifs_scan(c, lnum, 0, c->dbg->buf); if (IS_ERR(sleb)) { err = PTR_ERR(sleb); break; diff --git a/fs/ubifs/replay.c b/fs/ubifs/replay.c index 21f7d047c30..ce42a7b0ca5 100644 --- a/fs/ubifs/replay.c +++ b/fs/ubifs/replay.c @@ -144,7 +144,7 @@ static int set_bud_lprops(struct ubifs_info *c, struct replay_entry *r) /* * If the replay order was perfect the dirty space would now be * zero. The order is not perfect because the the journal heads - * race with eachother. This is not a problem but is does mean + * race with each other. This is not a problem but is does mean * that the dirty space may temporarily exceed c->leb_size * during the replay. */ @@ -656,7 +656,7 @@ out_dump: * @dirty: amount of dirty space from padding and deletion nodes * * This function inserts a reference node to the replay tree and returns zero - * in case of success ort a negative error code in case of failure. + * in case of success or a negative error code in case of failure. */ static int insert_ref_node(struct ubifs_info *c, int lnum, int offs, unsigned long long sqnum, int free, int dirty) @@ -883,7 +883,7 @@ static int replay_log_leb(struct ubifs_info *c, int lnum, int offs, void *sbuf) * This means that we reached end of log and now * look to the older log data, which was already * committed but the eraseblock was not erased (UBIFS - * only unmaps it). So this basically means we have to + * only un-maps it). So this basically means we have to * exit with "end of log" code. */ err = 1; @@ -1062,6 +1062,15 @@ int ubifs_replay_journal(struct ubifs_info *c) if (err) goto out; + /* + * UBIFS budgeting calculations use @c->budg_uncommitted_idx variable + * to roughly estimate index growth. Things like @c->min_idx_lebs + * depend on it. This means we have to initialize it to make sure + * budgeting works properly. + */ + c->budg_uncommitted_idx = atomic_long_read(&c->dirty_zn_cnt); + c->budg_uncommitted_idx *= c->max_idx_node_sz; + ubifs_assert(c->bud_bytes <= c->max_bud_bytes || c->need_recovery); dbg_mnt("finished, log head LEB %d:%d, max_sqnum %llu, " "highest_inum %lu", c->lhead_lnum, c->lhead_offs, c->max_sqnum, diff --git a/fs/ubifs/sb.c b/fs/ubifs/sb.c index 0f392351dc5..e070c643d1b 100644 --- a/fs/ubifs/sb.c +++ b/fs/ubifs/sb.c @@ -28,6 +28,7 @@ #include "ubifs.h" #include <linux/random.h> +#include <linux/math64.h> /* * Default journal size in logical eraseblocks as a percent of total @@ -80,7 +81,7 @@ static int create_default_filesystem(struct ubifs_info *c) int err, tmp, jnl_lebs, log_lebs, max_buds, main_lebs, main_first; int lpt_lebs, lpt_first, orph_lebs, big_lpt, ino_waste, sup_flags = 0; int min_leb_cnt = UBIFS_MIN_LEB_CNT; - uint64_t tmp64, main_bytes; + long long tmp64, main_bytes; __le64 tmp_le64; /* Some functions called from here depend on the @c->key_len filed */ @@ -160,7 +161,7 @@ static int create_default_filesystem(struct ubifs_info *c) if (!sup) return -ENOMEM; - tmp64 = (uint64_t)max_buds * c->leb_size; + tmp64 = (long long)max_buds * c->leb_size; if (big_lpt) sup_flags |= UBIFS_FLG_BIGLPT; @@ -179,14 +180,16 @@ static int create_default_filesystem(struct ubifs_info *c) sup->fanout = cpu_to_le32(DEFAULT_FANOUT); sup->lsave_cnt = cpu_to_le32(c->lsave_cnt); sup->fmt_version = cpu_to_le32(UBIFS_FORMAT_VERSION); - sup->default_compr = cpu_to_le16(UBIFS_COMPR_LZO); sup->time_gran = cpu_to_le32(DEFAULT_TIME_GRAN); + if (c->mount_opts.override_compr) + sup->default_compr = cpu_to_le16(c->mount_opts.compr_type); + else + sup->default_compr = cpu_to_le16(UBIFS_COMPR_LZO); generate_random_uuid(sup->uuid); - main_bytes = (uint64_t)main_lebs * c->leb_size; - tmp64 = main_bytes * DEFAULT_RP_PERCENT; - do_div(tmp64, 100); + main_bytes = (long long)main_lebs * c->leb_size; + tmp64 = div_u64(main_bytes * DEFAULT_RP_PERCENT, 100); if (tmp64 > DEFAULT_MAX_RP_SIZE) tmp64 = DEFAULT_MAX_RP_SIZE; sup->rp_size = cpu_to_le64(tmp64); @@ -582,16 +585,15 @@ int ubifs_read_superblock(struct ubifs_info *c) c->jhead_cnt = le32_to_cpu(sup->jhead_cnt) + NONDATA_JHEADS_CNT; c->fanout = le32_to_cpu(sup->fanout); c->lsave_cnt = le32_to_cpu(sup->lsave_cnt); - c->default_compr = le16_to_cpu(sup->default_compr); c->rp_size = le64_to_cpu(sup->rp_size); c->rp_uid = le32_to_cpu(sup->rp_uid); c->rp_gid = le32_to_cpu(sup->rp_gid); sup_flags = le32_to_cpu(sup->flags); + if (!c->mount_opts.override_compr) + c->default_compr = le16_to_cpu(sup->default_compr); c->vfs_sb->s_time_gran = le32_to_cpu(sup->time_gran); - memcpy(&c->uuid, &sup->uuid, 16); - c->big_lpt = !!(sup_flags & UBIFS_FLG_BIGLPT); /* Automatically increase file system size to the maximum size */ diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c index d80b2aef42b..0d7564b95f8 100644 --- a/fs/ubifs/super.c +++ b/fs/ubifs/super.c @@ -34,6 +34,8 @@ #include <linux/parser.h> #include <linux/seq_file.h> #include <linux/mount.h> +#include <linux/math64.h> +#include <linux/writeback.h> #include "ubifs.h" /* @@ -417,39 +419,54 @@ static int ubifs_show_options(struct seq_file *s, struct vfsmount *mnt) else if (c->mount_opts.chk_data_crc == 1) seq_printf(s, ",no_chk_data_crc"); + if (c->mount_opts.override_compr) { + seq_printf(s, ",compr="); + seq_printf(s, ubifs_compr_name(c->mount_opts.compr_type)); + } + return 0; } static int ubifs_sync_fs(struct super_block *sb, int wait) { + int i, err; struct ubifs_info *c = sb->s_fs_info; - int i, ret = 0, err; - long long bud_bytes; - - if (c->jheads) { - for (i = 0; i < c->jhead_cnt; i++) { - err = ubifs_wbuf_sync(&c->jheads[i].wbuf); - if (err && !ret) - ret = err; - } + struct writeback_control wbc = { + .sync_mode = wait ? WB_SYNC_ALL : WB_SYNC_HOLD, + .range_start = 0, + .range_end = LLONG_MAX, + .nr_to_write = LONG_MAX, + }; + + if (sb->s_flags & MS_RDONLY) + return 0; - /* Commit the journal unless it has too little data */ - spin_lock(&c->buds_lock); - bud_bytes = c->bud_bytes; - spin_unlock(&c->buds_lock); - if (bud_bytes > c->leb_size) { - err = ubifs_run_commit(c); - if (err) - return err; - } + /* + * Synchronize write buffers, because 'ubifs_run_commit()' does not + * do this if it waits for an already running commit. + */ + for (i = 0; i < c->jhead_cnt; i++) { + err = ubifs_wbuf_sync(&c->jheads[i].wbuf); + if (err) + return err; } /* - * We ought to call sync for c->ubi but it does not have one. If it had - * it would in turn call mtd->sync, however mtd operations are - * synchronous anyway, so we don't lose any sleep here. + * VFS calls '->sync_fs()' before synchronizing all dirty inodes and + * pages, so synchronize them first, then commit the journal. Strictly + * speaking, it is not necessary to commit the journal here, + * synchronizing write-buffers would be enough. But committing makes + * UBIFS free space predictions much more accurate, so we want to let + * the user be able to get more accurate results of 'statfs()' after + * they synchronize the file system. */ - return ret; + generic_sync_sb_inodes(sb, &wbc); + + err = ubifs_run_commit(c); + if (err) + return err; + + return ubi_sync(c->vi.ubi_num); } /** @@ -596,7 +613,7 @@ static int bud_wbuf_callback(struct ubifs_info *c, int lnum, int free, int pad) } /* - * init_constants_late - initialize UBIFS constants. + * init_constants_sb - initialize UBIFS constants. * @c: UBIFS file-system description object * * This is a helper function which initializes various UBIFS constants after @@ -604,10 +621,10 @@ static int bud_wbuf_callback(struct ubifs_info *c, int lnum, int free, int pad) * makes sure they are all right. Returns zero in case of success and a * negative error code in case of failure. */ -static int init_constants_late(struct ubifs_info *c) +static int init_constants_sb(struct ubifs_info *c) { int tmp, err; - uint64_t tmp64; + long long tmp64; c->main_bytes = (long long)c->main_lebs * c->leb_size; c->max_znode_sz = sizeof(struct ubifs_znode) + @@ -634,9 +651,8 @@ static int init_constants_late(struct ubifs_info *c) * Make sure that the log is large enough to fit reference nodes for * all buds plus one reserved LEB. */ - tmp64 = c->max_bud_bytes; - tmp = do_div(tmp64, c->leb_size); - c->max_bud_cnt = tmp64 + !!tmp; + tmp64 = c->max_bud_bytes + c->leb_size - 1; + c->max_bud_cnt = div_u64(tmp64, c->leb_size); tmp = (c->ref_node_alsz * c->max_bud_cnt + c->leb_size - 1); tmp /= c->leb_size; tmp += 1; @@ -672,7 +688,7 @@ static int init_constants_late(struct ubifs_info *c) * Consequently, if the journal is too small, UBIFS will treat it as * always full. */ - tmp64 = (uint64_t)(c->jhead_cnt + 1) * c->leb_size + 1; + tmp64 = (long long)(c->jhead_cnt + 1) * c->leb_size + 1; if (c->bg_bud_bytes < tmp64) c->bg_bud_bytes = tmp64; if (c->max_bud_bytes < tmp64 + c->leb_size) @@ -682,6 +698,21 @@ static int init_constants_late(struct ubifs_info *c) if (err) return err; + return 0; +} + +/* + * init_constants_master - initialize UBIFS constants. + * @c: UBIFS file-system description object + * + * This is a helper function which initializes various UBIFS constants after + * the master node has been read. It also checks various UBIFS parameters and + * makes sure they are all right. + */ +static void init_constants_master(struct ubifs_info *c) +{ + long long tmp64; + c->min_idx_lebs = ubifs_calc_min_idx_lebs(c); /* @@ -690,14 +721,13 @@ static int init_constants_late(struct ubifs_info *c) * necessary to report something for the 'statfs()' call. * * Subtract the LEB reserved for GC, the LEB which is reserved for - * deletions, and assume only one journal head is available. + * deletions, minimum LEBs for the index, and assume only one journal + * head is available. */ - tmp64 = c->main_lebs - 2 - c->jhead_cnt + 1; - tmp64 *= (uint64_t)c->leb_size - c->leb_overhead; + tmp64 = c->main_lebs - 1 - 1 - MIN_INDEX_LEBS - c->jhead_cnt + 1; + tmp64 *= (long long)c->leb_size - c->leb_overhead; tmp64 = ubifs_reported_space(c, tmp64); c->block_cnt = tmp64 >> UBIFS_BLOCK_SHIFT; - - return 0; } /** @@ -878,6 +908,7 @@ static int check_volume_empty(struct ubifs_info *c) * Opt_no_bulk_read: disable bulk-reads * Opt_chk_data_crc: check CRCs when reading data nodes * Opt_no_chk_data_crc: do not check CRCs when reading data nodes + * Opt_override_compr: override default compressor * Opt_err: just end of array marker */ enum { @@ -887,6 +918,7 @@ enum { Opt_no_bulk_read, Opt_chk_data_crc, Opt_no_chk_data_crc, + Opt_override_compr, Opt_err, }; @@ -897,6 +929,7 @@ static const match_table_t tokens = { {Opt_no_bulk_read, "no_bulk_read"}, {Opt_chk_data_crc, "chk_data_crc"}, {Opt_no_chk_data_crc, "no_chk_data_crc"}, + {Opt_override_compr, "compr=%s"}, {Opt_err, NULL}, }; @@ -950,6 +983,28 @@ static int ubifs_parse_options(struct ubifs_info *c, char *options, c->mount_opts.chk_data_crc = 1; c->no_chk_data_crc = 1; break; + case Opt_override_compr: + { + char *name = match_strdup(&args[0]); + + if (!name) + return -ENOMEM; + if (!strcmp(name, "none")) + c->mount_opts.compr_type = UBIFS_COMPR_NONE; + else if (!strcmp(name, "lzo")) + c->mount_opts.compr_type = UBIFS_COMPR_LZO; + else if (!strcmp(name, "zlib")) + c->mount_opts.compr_type = UBIFS_COMPR_ZLIB; + else { + ubifs_err("unknown compressor \"%s\"", name); + kfree(name); + return -EINVAL; + } + kfree(name); + c->mount_opts.override_compr = 1; + c->default_compr = c->mount_opts.compr_type; + break; + } default: ubifs_err("unrecognized mount option \"%s\" " "or missing value", p); @@ -1019,6 +1074,30 @@ again: } /** + * check_free_space - check if there is enough free space to mount. + * @c: UBIFS file-system description object + * + * This function makes sure UBIFS has enough free space to be mounted in + * read/write mode. UBIFS must always have some free space to allow deletions. + */ +static int check_free_space(struct ubifs_info *c) +{ + ubifs_assert(c->dark_wm > 0); + if (c->lst.total_free + c->lst.total_dirty < c->dark_wm) { + ubifs_err("insufficient free space to mount in read/write mode"); + dbg_dump_budg(c); + dbg_dump_lprops(c); + /* + * We return %-EINVAL instead of %-ENOSPC because it seems to + * be the closest error code mentioned in the mount function + * documentation. + */ + return -EINVAL; + } + return 0; +} + +/** * mount_ubifs - mount UBIFS file-system. * @c: UBIFS file-system description object * @@ -1039,11 +1118,9 @@ static int mount_ubifs(struct ubifs_info *c) if (err) return err; -#ifdef CONFIG_UBIFS_FS_DEBUG - c->dbg_buf = vmalloc(c->leb_size); - if (!c->dbg_buf) - return -ENOMEM; -#endif + err = ubifs_debugging_init(c); + if (err) + return err; err = check_volume_empty(c); if (err) @@ -1100,27 +1177,25 @@ static int mount_ubifs(struct ubifs_info *c) goto out_free; /* - * Make sure the compressor which is set as the default on in the - * superblock was actually compiled in. + * Make sure the compressor which is set as default in the superblock + * or overridden by mount options is actually compiled in. */ if (!ubifs_compr_present(c->default_compr)) { - ubifs_warn("'%s' compressor is set by superblock, but not " - "compiled in", ubifs_compr_name(c->default_compr)); - c->default_compr = UBIFS_COMPR_NONE; + ubifs_err("'compressor \"%s\" is not compiled in", + ubifs_compr_name(c->default_compr)); + goto out_free; } - dbg_failure_mode_registration(c); - - err = init_constants_late(c); + err = init_constants_sb(c); if (err) - goto out_dereg; + goto out_free; sz = ALIGN(c->max_idx_node_sz, c->min_io_size); sz = ALIGN(sz + c->max_idx_node_sz, c->min_io_size); c->cbuf = kmalloc(sz, GFP_NOFS); if (!c->cbuf) { err = -ENOMEM; - goto out_dereg; + goto out_free; } sprintf(c->bgt_name, BGT_NAME_PATTERN, c->vi.ubi_num, c->vi.vol_id); @@ -1145,6 +1220,8 @@ static int mount_ubifs(struct ubifs_info *c) if (err) goto out_master; + init_constants_master(c); + if ((c->mst_node->flags & cpu_to_le32(UBIFS_MST_DIRTY)) != 0) { ubifs_msg("recovery needed"); c->need_recovery = 1; @@ -1183,12 +1260,9 @@ static int mount_ubifs(struct ubifs_info *c) if (!mounted_read_only) { int lnum; - /* Check for enough free space */ - if (ubifs_calc_available(c, c->min_idx_lebs) <= 0) { - ubifs_err("insufficient available space"); - err = -EINVAL; + err = check_free_space(c); + if (err) goto out_orphans; - } /* Check for enough log space */ lnum = c->lhead_lnum + 1; @@ -1232,6 +1306,10 @@ static int mount_ubifs(struct ubifs_info *c) } } + err = dbg_debugfs_init_fs(c); + if (err) + goto out_infos; + err = dbg_check_filesystem(c); if (err) goto out_infos; @@ -1283,8 +1361,20 @@ static int mount_ubifs(struct ubifs_info *c) dbg_msg("tree fanout: %d", c->fanout); dbg_msg("reserved GC LEB: %d", c->gc_lnum); dbg_msg("first main LEB: %d", c->main_first); + dbg_msg("max. znode size %d", c->max_znode_sz); + dbg_msg("max. index node size %d", c->max_idx_node_sz); + dbg_msg("node sizes: data %zu, inode %zu, dentry %zu", + UBIFS_DATA_NODE_SZ, UBIFS_INO_NODE_SZ, UBIFS_DENT_NODE_SZ); + dbg_msg("node sizes: trun %zu, sb %zu, master %zu", + UBIFS_TRUN_NODE_SZ, UBIFS_SB_NODE_SZ, UBIFS_MST_NODE_SZ); + dbg_msg("node sizes: ref %zu, cmt. start %zu, orph %zu", + UBIFS_REF_NODE_SZ, UBIFS_CS_NODE_SZ, UBIFS_ORPH_NODE_SZ); + dbg_msg("max. node sizes: data %zu, inode %zu dentry %zu", + UBIFS_MAX_DATA_NODE_SZ, UBIFS_MAX_INO_NODE_SZ, + UBIFS_MAX_DENT_NODE_SZ); dbg_msg("dead watermark: %d", c->dead_wm); dbg_msg("dark watermark: %d", c->dark_wm); + dbg_msg("LEB overhead: %d", c->leb_overhead); x = (long long)c->main_lebs * c->dark_wm; dbg_msg("max. dark space: %lld (%lld KiB, %lld MiB)", x, x >> 10, x >> 20); @@ -1320,14 +1410,12 @@ out_wbufs: free_wbufs(c); out_cbuf: kfree(c->cbuf); -out_dereg: - dbg_failure_mode_deregistration(c); out_free: kfree(c->bu.buf); vfree(c->ileb_buf); vfree(c->sbuf); kfree(c->bottom_up_buf); - UBIFS_DBG(vfree(c->dbg_buf)); + ubifs_debugging_exit(c); return err; } @@ -1345,6 +1433,7 @@ static void ubifs_umount(struct ubifs_info *c) dbg_gen("un-mounting UBI device %d, volume %d", c->vi.ubi_num, c->vi.vol_id); + dbg_debugfs_exit_fs(c); spin_lock(&ubifs_infos_lock); list_del(&c->infos_list); spin_unlock(&ubifs_infos_lock); @@ -1364,8 +1453,7 @@ static void ubifs_umount(struct ubifs_info *c) vfree(c->ileb_buf); vfree(c->sbuf); kfree(c->bottom_up_buf); - UBIFS_DBG(vfree(c->dbg_buf)); - dbg_failure_mode_deregistration(c); + ubifs_debugging_exit(c); } /** @@ -1387,12 +1475,9 @@ static int ubifs_remount_rw(struct ubifs_info *c) c->remounting_rw = 1; c->always_chk_crc = 1; - /* Check for enough free space */ - if (ubifs_calc_available(c, c->min_idx_lebs) <= 0) { - ubifs_err("insufficient available space"); - err = -EINVAL; + err = check_free_space(c); + if (err) goto out; - } if (c->old_leb_cnt != c->leb_cnt) { struct ubifs_sb_node *sup; @@ -1515,20 +1600,24 @@ out: * @c: UBIFS file-system description object * * This function is called during un-mounting and re-mounting, and it commits - * the journal unless the "fast unmount" mode is enabled. It also avoids - * committing the journal if it contains too few data. + * the journal unless the "fast unmount" mode is enabled. */ static void commit_on_unmount(struct ubifs_info *c) { - if (!c->fast_unmount) { - long long bud_bytes; + struct super_block *sb = c->vfs_sb; + long long bud_bytes; - spin_lock(&c->buds_lock); - bud_bytes = c->bud_bytes; - spin_unlock(&c->buds_lock); - if (bud_bytes > c->leb_size) - ubifs_run_commit(c); - } + /* + * This function is called before the background thread is stopped, so + * we may race with ongoing commit, which means we have to take + * @c->bud_lock to access @c->bud_bytes. + */ + spin_lock(&c->buds_lock); + bud_bytes = c->bud_bytes; + spin_unlock(&c->buds_lock); + + if (!c->fast_unmount && !(sb->s_flags & MS_RDONLY) && bud_bytes) + ubifs_run_commit(c); } /** @@ -1849,7 +1938,6 @@ static int ubifs_fill_super(struct super_block *sb, void *data, int silent) goto out_iput; mutex_unlock(&c->umount_mutex); - return 0; out_iput: @@ -1955,7 +2043,7 @@ static void ubifs_kill_sb(struct super_block *sb) * We do 'commit_on_unmount()' here instead of 'ubifs_put_super()' * in order to be outside BKL. */ - if (sb->s_root && !(sb->s_flags & MS_RDONLY)) + if (sb->s_root) commit_on_unmount(c); /* The un-mount routine is actually done in put_super() */ generic_shutdown_super(sb); @@ -2021,6 +2109,14 @@ static int __init ubifs_init(void) BUILD_BUG_ON(UBIFS_REF_NODE_SZ != 64); /* + * We use 2 bit wide bit-fields to store compression type, which should + * be amended if more compressors are added. The bit-fields are: + * @compr_type in 'struct ubifs_inode', @default_compr in + * 'struct ubifs_info' and @compr_type in 'struct ubifs_mount_opts'. + */ + BUILD_BUG_ON(UBIFS_COMPR_TYPES_CNT > 4); + + /* * We require that PAGE_CACHE_SIZE is greater-than-or-equal-to * UBIFS_BLOCK_SIZE. It is assumed that both are powers of 2. */ @@ -2049,11 +2145,17 @@ static int __init ubifs_init(void) err = ubifs_compressors_init(); if (err) + goto out_shrinker; + + err = dbg_debugfs_init(); + if (err) goto out_compr; return 0; out_compr: + ubifs_compressors_exit(); +out_shrinker: unregister_shrinker(&ubifs_shrinker_info); kmem_cache_destroy(ubifs_inode_slab); out_reg: @@ -2068,6 +2170,7 @@ static void __exit ubifs_exit(void) ubifs_assert(list_empty(&ubifs_infos)); ubifs_assert(atomic_long_read(&ubifs_clean_zn_cnt) == 0); + dbg_debugfs_exit(); ubifs_compressors_exit(); unregister_shrinker(&ubifs_shrinker_info); kmem_cache_destroy(ubifs_inode_slab); diff --git a/fs/ubifs/tnc.c b/fs/ubifs/tnc.c index 6eef5344a14..f7e36f54552 100644 --- a/fs/ubifs/tnc.c +++ b/fs/ubifs/tnc.c @@ -2245,12 +2245,11 @@ int ubifs_tnc_replace(struct ubifs_info *c, const union ubifs_key *key, if (found) { /* Ensure the znode is dirtied */ if (znode->cnext || !ubifs_zn_dirty(znode)) { - znode = dirty_cow_bottom_up(c, - znode); - if (IS_ERR(znode)) { - err = PTR_ERR(znode); - goto out_unlock; - } + znode = dirty_cow_bottom_up(c, znode); + if (IS_ERR(znode)) { + err = PTR_ERR(znode); + goto out_unlock; + } } zbr = &znode->zbranch[n]; lnc_free(zbr); @@ -2317,11 +2316,11 @@ int ubifs_tnc_add_nm(struct ubifs_info *c, const union ubifs_key *key, /* Ensure the znode is dirtied */ if (znode->cnext || !ubifs_zn_dirty(znode)) { - znode = dirty_cow_bottom_up(c, znode); - if (IS_ERR(znode)) { - err = PTR_ERR(znode); - goto out_unlock; - } + znode = dirty_cow_bottom_up(c, znode); + if (IS_ERR(znode)) { + err = PTR_ERR(znode); + goto out_unlock; + } } if (found == 1) { @@ -2627,11 +2626,11 @@ int ubifs_tnc_remove_range(struct ubifs_info *c, union ubifs_key *from_key, /* Ensure the znode is dirtied */ if (znode->cnext || !ubifs_zn_dirty(znode)) { - znode = dirty_cow_bottom_up(c, znode); - if (IS_ERR(znode)) { - err = PTR_ERR(znode); - goto out_unlock; - } + znode = dirty_cow_bottom_up(c, znode); + if (IS_ERR(znode)) { + err = PTR_ERR(znode); + goto out_unlock; + } } /* Remove all keys in range except the first */ diff --git a/fs/ubifs/tnc_commit.c b/fs/ubifs/tnc_commit.c index 8ac76b1c2d5..fde8d127c76 100644 --- a/fs/ubifs/tnc_commit.c +++ b/fs/ubifs/tnc_commit.c @@ -553,8 +553,8 @@ static int layout_in_empty_space(struct ubifs_info *c) } #ifdef CONFIG_UBIFS_FS_DEBUG - c->new_ihead_lnum = lnum; - c->new_ihead_offs = buf_offs; + c->dbg->new_ihead_lnum = lnum; + c->dbg->new_ihead_offs = buf_offs; #endif return 0; @@ -802,8 +802,10 @@ int ubifs_tnc_start_commit(struct ubifs_info *c, struct ubifs_zbranch *zroot) * budgeting subsystem to assume the index is already committed, * even though it is not. */ + ubifs_assert(c->min_idx_lebs == ubifs_calc_min_idx_lebs(c)); c->old_idx_sz = c->calc_idx_sz; c->budg_uncommitted_idx = 0; + c->min_idx_lebs = ubifs_calc_min_idx_lebs(c); spin_unlock(&c->space_lock); mutex_unlock(&c->tnc_mutex); @@ -1002,7 +1004,8 @@ static int write_index(struct ubifs_info *c) } #ifdef CONFIG_UBIFS_FS_DEBUG - if (lnum != c->new_ihead_lnum || buf_offs != c->new_ihead_offs) { + if (lnum != c->dbg->new_ihead_lnum || + buf_offs != c->dbg->new_ihead_offs) { ubifs_err("inconsistent ihead"); return -EINVAL; } diff --git a/fs/ubifs/ubifs-media.h b/fs/ubifs/ubifs-media.h index 0b378042a3a..b25fc36cf72 100644 --- a/fs/ubifs/ubifs-media.h +++ b/fs/ubifs/ubifs-media.h @@ -51,6 +51,13 @@ */ #define UBIFS_MIN_COMPR_LEN 128 +/* + * If compressed data length is less than %UBIFS_MIN_COMPRESS_DIFF bytes + * shorter than uncompressed data length, UBIFS preferes to leave this data + * node uncompress, because it'll be read faster. + */ +#define UBIFS_MIN_COMPRESS_DIFF 64 + /* Root inode number */ #define UBIFS_ROOT_INO 1 diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h index 46b172560a0..fc2a4cc66d0 100644 --- a/fs/ubifs/ubifs.h +++ b/fs/ubifs/ubifs.h @@ -63,6 +63,14 @@ #define SQNUM_WARN_WATERMARK 0xFFFFFFFF00000000ULL #define SQNUM_WATERMARK 0xFFFFFFFFFF000000ULL +/* + * Minimum amount of LEBs reserved for the index. At present the index needs at + * least 2 LEBs: one for the index head and one for in-the-gaps method (which + * currently does not cater for the index head and so excludes it from + * consideration). + */ +#define MIN_INDEX_LEBS 2 + /* Minimum amount of data UBIFS writes to the flash */ #define MIN_WRITE_SZ (UBIFS_DATA_NODE_SZ + 8) @@ -386,12 +394,12 @@ struct ubifs_inode { unsigned int dirty:1; unsigned int xattr:1; unsigned int bulk_read:1; + unsigned int compr_type:2; struct mutex ui_mutex; spinlock_t ui_lock; loff_t synced_i_size; loff_t ui_size; int flags; - int compr_type; pgoff_t last_page_read; pgoff_t read_in_a_row; int data_len; @@ -419,7 +427,7 @@ struct ubifs_unclean_leb { * * LPROPS_UNCAT: not categorized * LPROPS_DIRTY: dirty > 0, not index - * LPROPS_DIRTY_IDX: dirty + free > UBIFS_CH_SZ and index + * LPROPS_DIRTY_IDX: dirty + free > @c->min_idx_node_sze and index * LPROPS_FREE: free > 0, not empty, not index * LPROPS_HEAP_CNT: number of heaps used for storing categorized LEBs * LPROPS_EMPTY: LEB is empty, not taken @@ -473,8 +481,8 @@ struct ubifs_lprops { struct ubifs_lpt_lprops { int free; int dirty; - unsigned tgc : 1; - unsigned cmt : 1; + unsigned tgc:1; + unsigned cmt:1; }; /** @@ -482,24 +490,26 @@ struct ubifs_lpt_lprops { * @empty_lebs: number of empty LEBs * @taken_empty_lebs: number of taken LEBs * @idx_lebs: number of indexing LEBs - * @total_free: total free space in bytes - * @total_dirty: total dirty space in bytes - * @total_used: total used space in bytes (includes only data LEBs) - * @total_dead: total dead space in bytes (includes only data LEBs) - * @total_dark: total dark space in bytes (includes only data LEBs) + * @total_free: total free space in bytes (includes all LEBs) + * @total_dirty: total dirty space in bytes (includes all LEBs) + * @total_used: total used space in bytes (does not include index LEBs) + * @total_dead: total dead space in bytes (does not include index LEBs) + * @total_dark: total dark space in bytes (does not include index LEBs) + * + * The @taken_empty_lebs field counts the LEBs that are in the transient state + * of having been "taken" for use but not yet written to. @taken_empty_lebs is + * needed to account correctly for @gc_lnum, otherwise @empty_lebs could be + * used by itself (in which case 'unused_lebs' would be a better name). In the + * case of @gc_lnum, it is "taken" at mount time or whenever a LEB is retained + * by GC, but unlike other empty LEBs that are "taken", it may not be written + * straight away (i.e. before the next commit start or unmount), so either + * @gc_lnum must be specially accounted for, or the current approach followed + * i.e. count it under @taken_empty_lebs. * - * N.B. total_dirty and total_used are different to other total_* fields, - * because they account _all_ LEBs, not just data LEBs. + * @empty_lebs includes @taken_empty_lebs. * - * 'taken_empty_lebs' counts the LEBs that are in the transient state of having - * been 'taken' for use but not yet written to. 'taken_empty_lebs' is needed - * to account correctly for gc_lnum, otherwise 'empty_lebs' could be used - * by itself (in which case 'unused_lebs' would be a better name). In the case - * of gc_lnum, it is 'taken' at mount time or whenever a LEB is retained by GC, - * but unlike other empty LEBs that are 'taken', it may not be written straight - * away (i.e. before the next commit start or unmount), so either gc_lnum must - * be specially accounted for, or the current approach followed i.e. count it - * under 'taken_empty_lebs'. + * @total_used, @total_dead and @total_dark fields do not account indexing + * LEBs. */ struct ubifs_lp_stats { int empty_lebs; @@ -893,15 +903,25 @@ struct ubifs_orphan { /** * struct ubifs_mount_opts - UBIFS-specific mount options information. * @unmount_mode: selected unmount mode (%0 default, %1 normal, %2 fast) - * @bulk_read: enable bulk-reads - * @chk_data_crc: check CRCs when reading data nodes + * @bulk_read: enable/disable bulk-reads (%0 default, %1 disabe, %2 enable) + * @chk_data_crc: enable/disable CRC data checking when reading data nodes + * (%0 default, %1 disabe, %2 enable) + * @override_compr: override default compressor (%0 - do not override and use + * superblock compressor, %1 - override and use compressor + * specified in @compr_type) + * @compr_type: compressor type to override the superblock compressor with + * (%UBIFS_COMPR_NONE, etc) */ struct ubifs_mount_opts { unsigned int unmount_mode:2; unsigned int bulk_read:2; unsigned int chk_data_crc:2; + unsigned int override_compr:1; + unsigned int compr_type:2; }; +struct ubifs_debug_info; + /** * struct ubifs_info - UBIFS file-system description data structure * (per-superblock). @@ -946,6 +966,7 @@ struct ubifs_mount_opts { * @no_chk_data_crc: do not check CRCs when reading data nodes (except during * recovery) * @bulk_read: enable bulk-reads + * @default_compr: default compression algorithm (%UBIFS_COMPR_LZO, etc) * * @tnc_mutex: protects the Tree Node Cache (TNC), @zroot, @cnext, @enext, and * @calc_idx_sz @@ -963,8 +984,6 @@ struct ubifs_mount_opts { * @ileb_nxt: next pre-allocated index LEBs * @old_idx: tree of index nodes obsoleted since the last commit start * @bottom_up_buf: a buffer which is used by 'dirty_cow_bottom_up()' in tnc.c - * @new_ihead_lnum: used by debugging to check ihead_lnum - * @new_ihead_offs: used by debugging to check ihead_offs * * @mst_node: master node * @mst_offs: offset of valid master node @@ -986,7 +1005,6 @@ struct ubifs_mount_opts { * @main_lebs: count of LEBs in the main area * @main_first: first LEB of the main area * @main_bytes: main area size in bytes - * @default_compr: default compression algorithm (%UBIFS_COMPR_LZO, etc) * * @key_hash_type: type of the key hash * @key_hash: direntry key hash function @@ -1149,15 +1167,7 @@ struct ubifs_mount_opts { * @always_chk_crc: always check CRCs (while mounting and remounting rw) * @mount_opts: UBIFS-specific mount options * - * @dbg_buf: a buffer of LEB size used for debugging purposes - * @old_zroot: old index root - used by 'dbg_check_old_index()' - * @old_zroot_level: old index root level - used by 'dbg_check_old_index()' - * @old_zroot_sqnum: old index root sqnum - used by 'dbg_check_old_index()' - * @failure_mode: failure mode for recovery testing - * @fail_delay: 0=>don't delay, 1=>delay a time, 2=>delay a number of calls - * @fail_timeout: time in jiffies when delay of failure mode expires - * @fail_cnt: current number of calls to failure mode I/O functions - * @fail_cnt_max: number of calls by which to delay failure mode + * @dbg: debugging-related information */ struct ubifs_info { struct super_block *vfs_sb; @@ -1196,6 +1206,7 @@ struct ubifs_info { unsigned int big_lpt:1; unsigned int no_chk_data_crc:1; unsigned int bulk_read:1; + unsigned int default_compr:2; struct mutex tnc_mutex; struct ubifs_zbranch zroot; @@ -1212,10 +1223,6 @@ struct ubifs_info { int ileb_nxt; struct rb_root old_idx; int *bottom_up_buf; -#ifdef CONFIG_UBIFS_FS_DEBUG - int new_ihead_lnum; - int new_ihead_offs; -#endif struct ubifs_mst_node *mst_node; int mst_offs; @@ -1237,7 +1244,6 @@ struct ubifs_info { int main_lebs; int main_first; long long main_bytes; - int default_compr; uint8_t key_hash_type; uint32_t (*key_hash)(const char *str, int len); @@ -1315,8 +1321,8 @@ struct ubifs_info { void *sbuf; struct list_head idx_gc; int idx_gc_cnt; - volatile int gc_seq; - volatile int gced_lnum; + int gc_seq; + int gced_lnum; struct list_head infos_list; struct mutex umount_mutex; @@ -1391,21 +1397,7 @@ struct ubifs_info { struct ubifs_mount_opts mount_opts; #ifdef CONFIG_UBIFS_FS_DEBUG - void *dbg_buf; - struct ubifs_zbranch old_zroot; - int old_zroot_level; - unsigned long long old_zroot_sqnum; - int failure_mode; - int fail_delay; - unsigned long fail_timeout; - unsigned int fail_cnt; - unsigned int fail_cnt_max; - long long chk_lpt_sz; - long long chk_lpt_sz2; - long long chk_lpt_wastage; - int chk_lpt_lebs; - int new_nhead_lnum; - int new_nhead_offs; + struct ubifs_debug_info *dbg; #endif }; @@ -1505,7 +1497,7 @@ void ubifs_cancel_ino_op(struct ubifs_info *c, struct inode *inode, long long ubifs_get_free_space(struct ubifs_info *c); int ubifs_calc_min_idx_lebs(struct ubifs_info *c); void ubifs_convert_page_budget(struct ubifs_info *c); -long long ubifs_reported_space(const struct ubifs_info *c, uint64_t free); +long long ubifs_reported_space(const struct ubifs_info *c, long long free); long long ubifs_calc_available(const struct ubifs_info *c, int min_idx_lebs); /* find.c */ @@ -1639,6 +1631,9 @@ void ubifs_add_lpt_dirt(struct ubifs_info *c, int lnum, int dirty); void ubifs_add_nnode_dirt(struct ubifs_info *c, struct ubifs_nnode *nnode); uint32_t ubifs_unpack_bits(uint8_t **addr, int *pos, int nrbits); struct ubifs_nnode *ubifs_first_nnode(struct ubifs_info *c, int *hght); +/* Needed only in debugging code in lpt_commit.c */ +int ubifs_unpack_nnode(const struct ubifs_info *c, void *buf, + struct ubifs_nnode *nnode); /* lpt_commit.c */ int ubifs_lpt_start_commit(struct ubifs_info *c); @@ -1714,7 +1709,7 @@ long ubifs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg); /* compressor.c */ int __init ubifs_compressors_init(void); -void __exit ubifs_compressors_exit(void); +void ubifs_compressors_exit(void); void ubifs_compress(const void *in_buf, int in_len, void *out_buf, int *out_len, int *compr_type); int ubifs_decompress(const void *buf, int len, void *out, int *out_len, |