diff options
Diffstat (limited to 'fs')
246 files changed, 7307 insertions, 2866 deletions
diff --git a/fs/9p/fid.h b/fs/9p/fid.h index 26e07df783b..c3bbd6af996 100644 --- a/fs/9p/fid.h +++ b/fs/9p/fid.h @@ -22,6 +22,21 @@ #include <linux/list.h> +/** + * struct v9fs_dentry - 9p private data stored in dentry d_fsdata + * @lock: protects the fidlist + * @fidlist: list of FIDs currently associated with this dentry + * + * This structure defines the 9p private data associated with + * a particular dentry. In particular, this private data is used + * to lookup which 9P FID handle should be used for a particular VFS + * operation. FID handles are associated with dentries instead of + * inodes in order to more closely map functionality to the Plan 9 + * expected behavior for FID reclaimation and tracking. + * + * See Also: Mapping FIDs to Linux VFS model in + * Design and Implementation of the Linux 9P File System documentation + */ struct v9fs_dentry { spinlock_t lock; /* protect fidlist */ struct list_head fidlist; diff --git a/fs/9p/v9fs.c b/fs/9p/v9fs.c index 9b0f0222e8b..047c791427a 100644 --- a/fs/9p/v9fs.c +++ b/fs/9p/v9fs.c @@ -71,19 +71,19 @@ static match_table_t tokens = { /** * v9fs_parse_options - parse mount options into session structure - * @options: options string passed from mount * @v9ses: existing v9fs session information * + * Return 0 upon success, -ERRNO upon failure. */ -static void v9fs_parse_options(struct v9fs_session_info *v9ses) +static int v9fs_parse_options(struct v9fs_session_info *v9ses) { char *options; substring_t args[MAX_OPT_ARGS]; char *p; int option = 0; char *s, *e; - int ret; + int ret = 0; /* setup defaults */ v9ses->afid = ~0; @@ -91,19 +91,26 @@ static void v9fs_parse_options(struct v9fs_session_info *v9ses) v9ses->cache = 0; if (!v9ses->options) - return; + return 0; options = kstrdup(v9ses->options, GFP_KERNEL); + if (!options) { + P9_DPRINTK(P9_DEBUG_ERROR, + "failed to allocate copy of option string\n"); + return -ENOMEM; + } + while ((p = strsep(&options, ",")) != NULL) { int token; if (!*p) continue; token = match_token(p, tokens, args); if (token < Opt_uname) { - ret = match_int(&args[0], &option); - if (ret < 0) { + int r = match_int(&args[0], &option); + if (r < 0) { P9_DPRINTK(P9_DEBUG_ERROR, "integer field, but no integer?\n"); + ret = r; continue; } } @@ -125,10 +132,10 @@ static void v9fs_parse_options(struct v9fs_session_info *v9ses) v9ses->afid = option; break; case Opt_uname: - match_strcpy(v9ses->uname, &args[0]); + match_strlcpy(v9ses->uname, &args[0], PATH_MAX); break; case Opt_remotename: - match_strcpy(v9ses->aname, &args[0]); + match_strlcpy(v9ses->aname, &args[0], PATH_MAX); break; case Opt_nodevmap: v9ses->nodev = 1; @@ -139,6 +146,13 @@ static void v9fs_parse_options(struct v9fs_session_info *v9ses) case Opt_access: s = match_strdup(&args[0]); + if (!s) { + P9_DPRINTK(P9_DEBUG_ERROR, + "failed to allocate copy" + " of option argument\n"); + ret = -ENOMEM; + break; + } v9ses->flags &= ~V9FS_ACCESS_MASK; if (strcmp(s, "user") == 0) v9ses->flags |= V9FS_ACCESS_USER; @@ -158,6 +172,7 @@ static void v9fs_parse_options(struct v9fs_session_info *v9ses) } } kfree(options); + return ret; } /** @@ -173,6 +188,7 @@ struct p9_fid *v9fs_session_init(struct v9fs_session_info *v9ses, { int retval = -EINVAL; struct p9_fid *fid; + int rc; v9ses->uname = __getname(); if (!v9ses->uname) @@ -190,8 +206,21 @@ struct p9_fid *v9fs_session_init(struct v9fs_session_info *v9ses, v9ses->uid = ~0; v9ses->dfltuid = V9FS_DEFUID; v9ses->dfltgid = V9FS_DEFGID; - v9ses->options = kstrdup(data, GFP_KERNEL); - v9fs_parse_options(v9ses); + if (data) { + v9ses->options = kstrdup(data, GFP_KERNEL); + if (!v9ses->options) { + P9_DPRINTK(P9_DEBUG_ERROR, + "failed to allocate copy of option string\n"); + retval = -ENOMEM; + goto error; + } + } + + rc = v9fs_parse_options(v9ses); + if (rc < 0) { + retval = rc; + goto error; + } v9ses->clnt = p9_client_create(dev_name, v9ses->options); @@ -233,7 +262,6 @@ struct p9_fid *v9fs_session_init(struct v9fs_session_info *v9ses, return fid; error: - v9fs_session_close(v9ses); return ERR_PTR(retval); } @@ -256,9 +284,12 @@ void v9fs_session_close(struct v9fs_session_info *v9ses) } /** - * v9fs_session_cancel - mark transport as disconnected - * and cancel all pending requests. + * v9fs_session_cancel - terminate a session + * @v9ses: session to terminate + * + * mark transport as disconnected and cancel all pending requests. */ + void v9fs_session_cancel(struct v9fs_session_info *v9ses) { P9_DPRINTK(P9_DEBUG_ERROR, "cancel session %p\n", v9ses); p9_client_disconnect(v9ses->clnt); diff --git a/fs/9p/v9fs.h b/fs/9p/v9fs.h index 7d3a1018db5..a7d56719299 100644 --- a/fs/9p/v9fs.h +++ b/fs/9p/v9fs.h @@ -21,18 +21,69 @@ * */ -/* - * Session structure provides information for an opened session - * - */ +/** + * enum p9_session_flags - option flags for each 9P session + * @V9FS_EXTENDED: whether or not to use 9P2000.u extensions + * @V9FS_ACCESS_SINGLE: only the mounting user can access the hierarchy + * @V9FS_ACCESS_USER: a new attach will be issued for every user (default) + * @V9FS_ACCESS_ANY: use a single attach for all users + * @V9FS_ACCESS_MASK: bit mask of different ACCESS options + * + * Session flags reflect options selected by users at mount time + */ +enum p9_session_flags { + V9FS_EXTENDED = 0x01, + V9FS_ACCESS_SINGLE = 0x02, + V9FS_ACCESS_USER = 0x04, + V9FS_ACCESS_ANY = 0x06, + V9FS_ACCESS_MASK = 0x06, +}; + +/* possible values of ->cache */ +/** + * enum p9_cache_modes - user specified cache preferences + * @CACHE_NONE: do not cache data, dentries, or directory contents (default) + * @CACHE_LOOSE: cache data, dentries, and directory contents w/no consistency + * + * eventually support loose, tight, time, session, default always none + */ + +enum p9_cache_modes { + CACHE_NONE, + CACHE_LOOSE, +}; + +/** + * struct v9fs_session_info - per-instance session information + * @flags: session options of type &p9_session_flags + * @nodev: set to 1 to disable device mapping + * @debug: debug level + * @afid: authentication handle + * @cache: cache mode of type &p9_cache_modes + * @options: copy of options string given by user + * @uname: string user name to mount hierarchy as + * @aname: mount specifier for remote hierarchy + * @maxdata: maximum data to be sent/recvd per protocol message + * @dfltuid: default numeric userid to mount hierarchy as + * @dfltgid: default numeric groupid to mount hierarchy as + * @uid: if %V9FS_ACCESS_SINGLE, the numeric uid which mounted the hierarchy + * @clnt: reference to 9P network client instantiated for this session + * @debugfs_dir: reference to debugfs_dir which can be used for add'l debug + * + * This structure holds state for each session instance established during + * a sys_mount() . + * + * Bugs: there seems to be a lot of state which could be condensed and/or + * removed. + */ struct v9fs_session_info { /* options */ - unsigned char flags; /* session flags */ - unsigned char nodev; /* set to 1 if no disable device mapping */ - unsigned short debug; /* debug level */ - unsigned int afid; /* authentication fid */ - unsigned int cache; /* cache mode */ + unsigned char flags; + unsigned char nodev; + unsigned short debug; + unsigned int afid; + unsigned int cache; char *options; /* copy of mount options */ char *uname; /* user name to mount as */ @@ -45,22 +96,6 @@ struct v9fs_session_info { struct dentry *debugfs_dir; }; -/* session flags */ -enum { - V9FS_EXTENDED = 0x01, /* 9P2000.u */ - V9FS_ACCESS_MASK = 0x06, /* access mask */ - V9FS_ACCESS_SINGLE = 0x02, /* only one user can access the files */ - V9FS_ACCESS_USER = 0x04, /* attache per user */ - V9FS_ACCESS_ANY = 0x06, /* use the same attach for all users */ -}; - -/* possible values of ->cache */ -/* eventually support loose, tight, time, session, default always none */ -enum { - CACHE_NONE, /* default */ - CACHE_LOOSE, /* no consistency */ -}; - extern struct dentry *v9fs_debugfs_root; struct p9_fid *v9fs_session_init(struct v9fs_session_info *, const char *, diff --git a/fs/9p/vfs_addr.c b/fs/9p/vfs_addr.c index 6248f0e727a..97d3aed5798 100644 --- a/fs/9p/vfs_addr.c +++ b/fs/9p/vfs_addr.c @@ -43,7 +43,7 @@ /** * v9fs_vfs_readpage - read an entire page in from 9P * - * @file: file being read + * @filp: file being read * @page: structure to page * */ diff --git a/fs/9p/vfs_dir.c b/fs/9p/vfs_dir.c index 0924d4477da..88e3787c6ea 100644 --- a/fs/9p/vfs_dir.c +++ b/fs/9p/vfs_dir.c @@ -60,7 +60,7 @@ static inline int dt_type(struct p9_stat *mistat) /** * v9fs_dir_readdir - read a directory - * @filep: opened file structure + * @filp: opened file structure * @dirent: directory structure ??? * @filldir: function to populate directory structure ??? * diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c index a616fff8906..0d55affe37d 100644 --- a/fs/9p/vfs_file.c +++ b/fs/9p/vfs_file.c @@ -90,10 +90,11 @@ int v9fs_file_open(struct inode *inode, struct file *file) /** * v9fs_file_lock - lock a file (or directory) - * @inode: inode to be opened - * @file: file being opened + * @filp: file to be locked + * @cmd: lock command + * @fl: file lock structure * - * XXX - this looks like a local only lock, we should extend into 9P + * Bugs: this looks like a local only lock, we should extend into 9P * by using open exclusive */ @@ -118,7 +119,7 @@ static int v9fs_file_lock(struct file *filp, int cmd, struct file_lock *fl) /** * v9fs_file_read - read from a file - * @filep: file pointer to read + * @filp: file pointer to read * @data: data buffer to read data into * @count: size of buffer * @offset: offset at which to read data @@ -142,7 +143,7 @@ v9fs_file_read(struct file *filp, char __user * data, size_t count, /** * v9fs_file_write - write to a file - * @filep: file pointer to write + * @filp: file pointer to write * @data: data buffer to write data from * @count: size of buffer * @offset: offset at which to write data diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c index 6a28842052e..40fa807bd92 100644 --- a/fs/9p/vfs_inode.c +++ b/fs/9p/vfs_inode.c @@ -129,6 +129,12 @@ static int p9mode2unixmode(struct v9fs_session_info *v9ses, int mode) return res; } +/** + * v9fs_uflags2omode- convert posix open flags to plan 9 mode bits + * @uflags: flags to convert + * + */ + int v9fs_uflags2omode(int uflags) { int ret; @@ -312,6 +318,14 @@ error: } */ +/** + * v9fs_inode_from_fid - populate an inode by issuing a attribute request + * @v9ses: session information + * @fid: fid to issue attribute request for + * @sb: superblock on which to create inode + * + */ + static struct inode * v9fs_inode_from_fid(struct v9fs_session_info *v9ses, struct p9_fid *fid, struct super_block *sb) @@ -384,9 +398,12 @@ v9fs_open_created(struct inode *inode, struct file *file) /** * v9fs_create - Create a file + * @v9ses: session information + * @dir: directory that dentry is being created in * @dentry: dentry that is being created * @perm: create permissions * @mode: open mode + * @extension: 9p2000.u extension string to support devices, etc. * */ static struct p9_fid * @@ -461,7 +478,7 @@ error: /** * v9fs_vfs_create - VFS hook to create files - * @inode: directory inode that is being created + * @dir: directory inode that is being created * @dentry: dentry that is being deleted * @mode: create permissions * @nd: path information @@ -519,7 +536,7 @@ error: /** * v9fs_vfs_mkdir - VFS mkdir hook to create a directory - * @inode: inode that is being unlinked + * @dir: inode that is being unlinked * @dentry: dentry that is being unlinked * @mode: mode for new directory * @@ -703,9 +720,9 @@ done: /** * v9fs_vfs_getattr - retrieve file metadata - * @mnt - mount information - * @dentry - file to get attributes on - * @stat - metadata structure to populate + * @mnt: mount information + * @dentry: file to get attributes on + * @stat: metadata structure to populate * */ @@ -928,7 +945,7 @@ done: /** * v9fs_vfs_readlink - read a symlink's location * @dentry: dentry for symlink - * @buf: buffer to load symlink location into + * @buffer: buffer to load symlink location into * @buflen: length of buffer * */ @@ -996,10 +1013,12 @@ static void *v9fs_vfs_follow_link(struct dentry *dentry, struct nameidata *nd) * v9fs_vfs_put_link - release a symlink path * @dentry: dentry for symlink * @nd: nameidata + * @p: unused * */ -static void v9fs_vfs_put_link(struct dentry *dentry, struct nameidata *nd, void *p) +static void +v9fs_vfs_put_link(struct dentry *dentry, struct nameidata *nd, void *p) { char *s = nd_get_link(nd); @@ -1008,6 +1027,15 @@ static void v9fs_vfs_put_link(struct dentry *dentry, struct nameidata *nd, void __putname(s); } +/** + * v9fs_vfs_mkspecial - create a special file + * @dir: inode to create special file in + * @dentry: dentry to create + * @mode: mode to create special file + * @extension: 9p2000.u format extension string representing special file + * + */ + static int v9fs_vfs_mkspecial(struct inode *dir, struct dentry *dentry, int mode, const char *extension) { @@ -1037,7 +1065,7 @@ static int v9fs_vfs_mkspecial(struct inode *dir, struct dentry *dentry, * @dentry: dentry for symlink * @symname: symlink data * - * See 9P2000.u RFC for more information + * See Also: 9P2000.u RFC for more information * */ @@ -1058,10 +1086,6 @@ v9fs_vfs_symlink(struct inode *dir, struct dentry *dentry, const char *symname) * */ -/* XXX - lots of code dup'd from symlink and creates, - * figure out a better reuse strategy - */ - static int v9fs_vfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry) @@ -1098,7 +1122,7 @@ clunk_fid: * @dir: inode destination for new link * @dentry: dentry for file * @mode: mode for creation - * @dev_t: device associated with special file + * @rdev: device associated with special file * */ diff --git a/fs/9p/vfs_super.c b/fs/9p/vfs_super.c index a452ac67fc9..bf59c396049 100644 --- a/fs/9p/vfs_super.c +++ b/fs/9p/vfs_super.c @@ -75,6 +75,7 @@ static int v9fs_set_super(struct super_block *s, void *data) * v9fs_fill_super - populate superblock with info * @sb: superblock * @v9ses: session information + * @flags: flags propagated from v9fs_get_sb() * */ @@ -127,29 +128,26 @@ static int v9fs_get_sb(struct file_system_type *fs_type, int flags, fid = v9fs_session_init(v9ses, dev_name, data); if (IS_ERR(fid)) { retval = PTR_ERR(fid); - fid = NULL; - kfree(v9ses); - v9ses = NULL; - goto error; + goto close_session; } st = p9_client_stat(fid); if (IS_ERR(st)) { retval = PTR_ERR(st); - goto error; + goto clunk_fid; } sb = sget(fs_type, NULL, v9fs_set_super, v9ses); if (IS_ERR(sb)) { retval = PTR_ERR(sb); - goto error; + goto free_stat; } v9fs_fill_super(sb, v9ses, flags); inode = v9fs_get_inode(sb, S_IFDIR | mode); if (IS_ERR(inode)) { retval = PTR_ERR(inode); - goto error; + goto release_sb; } inode->i_uid = uid; @@ -158,7 +156,7 @@ static int v9fs_get_sb(struct file_system_type *fs_type, int flags, root = d_alloc_root(inode); if (!root) { retval = -ENOMEM; - goto error; + goto release_sb; } sb->s_root = root; @@ -169,21 +167,22 @@ static int v9fs_get_sb(struct file_system_type *fs_type, int flags, return simple_set_mnt(mnt, sb); -error: - kfree(st); - if (fid) - p9_client_clunk(fid); - - if (v9ses) { - v9fs_session_close(v9ses); - kfree(v9ses); - } - +release_sb: if (sb) { up_write(&sb->s_umount); deactivate_super(sb); } +free_stat: + kfree(st); + +clunk_fid: + p9_client_clunk(fid); + +close_session: + v9fs_session_close(v9ses); + kfree(v9ses); + return retval; } diff --git a/fs/Kconfig b/fs/Kconfig index 2e43d46f65d..cf12c403b8c 100644 --- a/fs/Kconfig +++ b/fs/Kconfig @@ -1005,7 +1005,8 @@ config TMPFS_POSIX_ACL config HUGETLBFS bool "HugeTLB file system support" - depends on X86 || IA64 || PPC64 || SPARC64 || (SUPERH && MMU) || BROKEN + depends on X86 || IA64 || PPC64 || SPARC64 || (SUPERH && MMU) || \ + (S390 && 64BIT) || BROKEN help hugetlbfs is a filesystem backing for HugeTLB pages, based on ramfs. For architectures that support it, say Y here and read diff --git a/fs/Kconfig.binfmt b/fs/Kconfig.binfmt index 853845abcca..55e8ee1900a 100644 --- a/fs/Kconfig.binfmt +++ b/fs/Kconfig.binfmt @@ -41,7 +41,7 @@ config BINFMT_ELF_FDPIC It is also possible to run FDPIC ELF binaries on MMU linux also. config BINFMT_FLAT - tristate "Kernel support for flat binaries" + bool "Kernel support for flat binaries" depends on !MMU help Support uClinux FLAT format binaries. diff --git a/fs/adfs/adfs.h b/fs/adfs/adfs.h index 936f2af39c4..831157502d5 100644 --- a/fs/adfs/adfs.h +++ b/fs/adfs/adfs.h @@ -75,7 +75,7 @@ extern unsigned int adfs_map_free(struct super_block *sb); /* Misc */ void __adfs_error(struct super_block *sb, const char *function, const char *fmt, ...); -#define adfs_error(sb, fmt...) __adfs_error(sb, __FUNCTION__, fmt) +#define adfs_error(sb, fmt...) __adfs_error(sb, __func__, fmt) /* super.c */ diff --git a/fs/adfs/dir_f.c b/fs/adfs/dir_f.c index b9b2b27b68c..ea7df214692 100644 --- a/fs/adfs/dir_f.c +++ b/fs/adfs/dir_f.c @@ -122,9 +122,9 @@ adfs_dir_checkbyte(const struct adfs_dir *dir) ptr.ptr8 = bufoff(bh, i); end.ptr8 = ptr.ptr8 + last - i; - do + do { dircheck = *ptr.ptr8++ ^ ror13(dircheck); - while (ptr.ptr8 < end.ptr8); + } while (ptr.ptr8 < end.ptr8); } /* diff --git a/fs/affs/affs.h b/fs/affs/affs.h index d5bd497ab9c..223b1917093 100644 --- a/fs/affs/affs.h +++ b/fs/affs/affs.h @@ -48,7 +48,7 @@ struct affs_ext_key { * affs fs inode data in memory */ struct affs_inode_info { - u32 i_opencnt; + atomic_t i_opencnt; struct semaphore i_link_lock; /* Protects internal inode access. */ struct semaphore i_ext_lock; /* Protects internal inode access. */ #define i_hash_lock i_ext_lock @@ -170,8 +170,6 @@ extern int affs_rename(struct inode *old_dir, struct dentry *old_dentry, extern unsigned long affs_parent_ino(struct inode *dir); extern struct inode *affs_new_inode(struct inode *dir); extern int affs_notify_change(struct dentry *dentry, struct iattr *attr); -extern void affs_put_inode(struct inode *inode); -extern void affs_drop_inode(struct inode *inode); extern void affs_delete_inode(struct inode *inode); extern void affs_clear_inode(struct inode *inode); extern struct inode *affs_iget(struct super_block *sb, diff --git a/fs/affs/file.c b/fs/affs/file.c index 6e0c9399200..6eac7bdeec9 100644 --- a/fs/affs/file.c +++ b/fs/affs/file.c @@ -48,8 +48,9 @@ affs_file_open(struct inode *inode, struct file *filp) { if (atomic_read(&filp->f_count) != 1) return 0; - pr_debug("AFFS: open(%d)\n", AFFS_I(inode)->i_opencnt); - AFFS_I(inode)->i_opencnt++; + pr_debug("AFFS: open(%lu,%d)\n", + inode->i_ino, atomic_read(&AFFS_I(inode)->i_opencnt)); + atomic_inc(&AFFS_I(inode)->i_opencnt); return 0; } @@ -58,10 +59,16 @@ affs_file_release(struct inode *inode, struct file *filp) { if (atomic_read(&filp->f_count) != 0) return 0; - pr_debug("AFFS: release(%d)\n", AFFS_I(inode)->i_opencnt); - AFFS_I(inode)->i_opencnt--; - if (!AFFS_I(inode)->i_opencnt) + pr_debug("AFFS: release(%lu, %d)\n", + inode->i_ino, atomic_read(&AFFS_I(inode)->i_opencnt)); + + if (atomic_dec_and_test(&AFFS_I(inode)->i_opencnt)) { + mutex_lock(&inode->i_mutex); + if (inode->i_size != AFFS_I(inode)->mmu_private) + affs_truncate(inode); affs_free_prealloc(inode); + mutex_unlock(&inode->i_mutex); + } return 0; } @@ -180,7 +187,7 @@ affs_get_extblock(struct inode *inode, u32 ext) /* inline the simplest case: same extended block as last time */ struct buffer_head *bh = AFFS_I(inode)->i_ext_bh; if (ext == AFFS_I(inode)->i_ext_last) - atomic_inc(&bh->b_count); + get_bh(bh); else /* we have to do more (not inlined) */ bh = affs_get_extblock_slow(inode, ext); @@ -306,7 +313,7 @@ store_ext: affs_brelse(AFFS_I(inode)->i_ext_bh); AFFS_I(inode)->i_ext_last = ext; AFFS_I(inode)->i_ext_bh = bh; - atomic_inc(&bh->b_count); + get_bh(bh); return bh; @@ -324,9 +331,7 @@ affs_get_block(struct inode *inode, sector_t block, struct buffer_head *bh_resul pr_debug("AFFS: get_block(%u, %lu)\n", (u32)inode->i_ino, (unsigned long)block); - - if (block > (sector_t)0x7fffffffUL) - BUG(); + BUG_ON(block > (sector_t)0x7fffffffUL); if (block >= AFFS_I(inode)->i_blkcnt) { if (block > AFFS_I(inode)->i_blkcnt || !create) @@ -493,8 +498,7 @@ affs_do_readpage_ofs(struct file *file, struct page *page, unsigned from, unsign u32 tmp; pr_debug("AFFS: read_page(%u, %ld, %d, %d)\n", (u32)inode->i_ino, page->index, from, to); - if (from > to || to > PAGE_CACHE_SIZE) - BUG(); + BUG_ON(from > to || to > PAGE_CACHE_SIZE); kmap(page); data = page_address(page); bsize = AFFS_SB(sb)->s_data_blksize; @@ -507,8 +511,7 @@ affs_do_readpage_ofs(struct file *file, struct page *page, unsigned from, unsign if (IS_ERR(bh)) return PTR_ERR(bh); tmp = min(bsize - boff, to - from); - if (from + tmp > to || tmp > bsize) - BUG(); + BUG_ON(from + tmp > to || tmp > bsize); memcpy(data + from, AFFS_DATA(bh) + boff, tmp); affs_brelse(bh); bidx++; @@ -540,10 +543,9 @@ affs_extent_file_ofs(struct inode *inode, u32 newsize) if (IS_ERR(bh)) return PTR_ERR(bh); tmp = min(bsize - boff, newsize - size); - if (boff + tmp > bsize || tmp > bsize) - BUG(); + BUG_ON(boff + tmp > bsize || tmp > bsize); memset(AFFS_DATA(bh) + boff, 0, tmp); - AFFS_DATA_HEAD(bh)->size = cpu_to_be32(be32_to_cpu(AFFS_DATA_HEAD(bh)->size) + tmp); + be32_add_cpu(&AFFS_DATA_HEAD(bh)->size, tmp); affs_fix_checksum(sb, bh); mark_buffer_dirty_inode(bh, inode); size += tmp; @@ -560,8 +562,7 @@ affs_extent_file_ofs(struct inode *inode, u32 newsize) if (IS_ERR(bh)) goto out; tmp = min(bsize, newsize - size); - if (tmp > bsize) - BUG(); + BUG_ON(tmp > bsize); AFFS_DATA_HEAD(bh)->ptype = cpu_to_be32(T_DATA); AFFS_DATA_HEAD(bh)->key = cpu_to_be32(inode->i_ino); AFFS_DATA_HEAD(bh)->sequence = cpu_to_be32(bidx); @@ -683,10 +684,9 @@ static int affs_write_end_ofs(struct file *file, struct address_space *mapping, if (IS_ERR(bh)) return PTR_ERR(bh); tmp = min(bsize - boff, to - from); - if (boff + tmp > bsize || tmp > bsize) - BUG(); + BUG_ON(boff + tmp > bsize || tmp > bsize); memcpy(AFFS_DATA(bh) + boff, data + from, tmp); - AFFS_DATA_HEAD(bh)->size = cpu_to_be32(be32_to_cpu(AFFS_DATA_HEAD(bh)->size) + tmp); + be32_add_cpu(&AFFS_DATA_HEAD(bh)->size, tmp); affs_fix_checksum(sb, bh); mark_buffer_dirty_inode(bh, inode); written += tmp; @@ -732,8 +732,7 @@ static int affs_write_end_ofs(struct file *file, struct address_space *mapping, if (IS_ERR(bh)) goto out; tmp = min(bsize, to - from); - if (tmp > bsize) - BUG(); + BUG_ON(tmp > bsize); memcpy(AFFS_DATA(bh), data + from, tmp); if (buffer_new(bh)) { AFFS_DATA_HEAD(bh)->ptype = cpu_to_be32(T_DATA); @@ -834,6 +833,8 @@ affs_truncate(struct inode *inode) res = mapping->a_ops->write_begin(NULL, mapping, size, 0, 0, &page, &fsdata); if (!res) res = mapping->a_ops->write_end(NULL, mapping, size, 0, 0, page, fsdata); + else + inode->i_size = AFFS_I(inode)->mmu_private; mark_inode_dirty(inode); return; } else if (inode->i_size == AFFS_I(inode)->mmu_private) @@ -869,6 +870,7 @@ affs_truncate(struct inode *inode) blk++; } else AFFS_HEAD(ext_bh)->first_data = 0; + AFFS_HEAD(ext_bh)->block_count = cpu_to_be32(i); size = AFFS_SB(sb)->s_hashsize; if (size > blkcnt - blk + i) size = blkcnt - blk + i; diff --git a/fs/affs/inode.c b/fs/affs/inode.c index 27fe6cbe43a..a13b334a391 100644 --- a/fs/affs/inode.c +++ b/fs/affs/inode.c @@ -58,7 +58,7 @@ struct inode *affs_iget(struct super_block *sb, unsigned long ino) AFFS_I(inode)->i_extcnt = 1; AFFS_I(inode)->i_ext_last = ~1; AFFS_I(inode)->i_protect = prot; - AFFS_I(inode)->i_opencnt = 0; + atomic_set(&AFFS_I(inode)->i_opencnt, 0); AFFS_I(inode)->i_blkcnt = 0; AFFS_I(inode)->i_lc = NULL; AFFS_I(inode)->i_lc_size = 0; @@ -108,8 +108,6 @@ struct inode *affs_iget(struct super_block *sb, unsigned long ino) inode->i_mode |= S_IFDIR; } else inode->i_mode = S_IRUGO | S_IXUGO | S_IWUSR | S_IFDIR; - if (tail->link_chain) - inode->i_nlink = 2; /* Maybe it should be controlled by mount parameter? */ //inode->i_mode |= S_ISVTX; inode->i_op = &affs_dir_inode_operations; @@ -245,31 +243,12 @@ out: } void -affs_put_inode(struct inode *inode) -{ - pr_debug("AFFS: put_inode(ino=%lu, nlink=%u)\n", inode->i_ino, inode->i_nlink); - affs_free_prealloc(inode); -} - -void -affs_drop_inode(struct inode *inode) -{ - mutex_lock(&inode->i_mutex); - if (inode->i_size != AFFS_I(inode)->mmu_private) - affs_truncate(inode); - mutex_unlock(&inode->i_mutex); - - generic_drop_inode(inode); -} - -void affs_delete_inode(struct inode *inode) { pr_debug("AFFS: delete_inode(ino=%lu, nlink=%u)\n", inode->i_ino, inode->i_nlink); truncate_inode_pages(&inode->i_data, 0); inode->i_size = 0; - if (S_ISREG(inode->i_mode)) - affs_truncate(inode); + affs_truncate(inode); clear_inode(inode); affs_free_block(inode->i_sb, inode->i_ino); } @@ -277,9 +256,12 @@ affs_delete_inode(struct inode *inode) void affs_clear_inode(struct inode *inode) { - unsigned long cache_page = (unsigned long) AFFS_I(inode)->i_lc; + unsigned long cache_page; pr_debug("AFFS: clear_inode(ino=%lu, nlink=%u)\n", inode->i_ino, inode->i_nlink); + + affs_free_prealloc(inode); + cache_page = (unsigned long)AFFS_I(inode)->i_lc; if (cache_page) { pr_debug("AFFS: freeing ext cache\n"); AFFS_I(inode)->i_lc = NULL; @@ -316,7 +298,7 @@ affs_new_inode(struct inode *dir) inode->i_ino = block; inode->i_nlink = 1; inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME_SEC; - AFFS_I(inode)->i_opencnt = 0; + atomic_set(&AFFS_I(inode)->i_opencnt, 0); AFFS_I(inode)->i_blkcnt = 0; AFFS_I(inode)->i_lc = NULL; AFFS_I(inode)->i_lc_size = 0; @@ -369,12 +351,12 @@ affs_add_entry(struct inode *dir, struct inode *inode, struct dentry *dentry, s3 switch (type) { case ST_LINKFILE: case ST_LINKDIR: - inode_bh = bh; retval = -ENOSPC; block = affs_alloc_block(dir, dir->i_ino); if (!block) goto err; retval = -EIO; + inode_bh = bh; bh = affs_getzeroblk(sb, block); if (!bh) goto err; diff --git a/fs/affs/namei.c b/fs/affs/namei.c index 2218f1ee71c..cfcf1b6cf82 100644 --- a/fs/affs/namei.c +++ b/fs/affs/namei.c @@ -234,7 +234,8 @@ affs_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd) int affs_unlink(struct inode *dir, struct dentry *dentry) { - pr_debug("AFFS: unlink(dir=%d, \"%.*s\")\n", (u32)dir->i_ino, + pr_debug("AFFS: unlink(dir=%d, %lu \"%.*s\")\n", (u32)dir->i_ino, + dentry->d_inode->i_ino, (int)dentry->d_name.len, dentry->d_name.name); return affs_remove_header(dentry); @@ -302,7 +303,8 @@ affs_mkdir(struct inode *dir, struct dentry *dentry, int mode) int affs_rmdir(struct inode *dir, struct dentry *dentry) { - pr_debug("AFFS: rmdir(dir=%u, \"%.*s\")\n", (u32)dir->i_ino, + pr_debug("AFFS: rmdir(dir=%u, %lu \"%.*s\")\n", (u32)dir->i_ino, + dentry->d_inode->i_ino, (int)dentry->d_name.len, dentry->d_name.name); return affs_remove_header(dentry); diff --git a/fs/affs/super.c b/fs/affs/super.c index d2dc047cb47..d214837d5e4 100644 --- a/fs/affs/super.c +++ b/fs/affs/super.c @@ -71,12 +71,18 @@ static struct kmem_cache * affs_inode_cachep; static struct inode *affs_alloc_inode(struct super_block *sb) { - struct affs_inode_info *ei; - ei = (struct affs_inode_info *)kmem_cache_alloc(affs_inode_cachep, GFP_KERNEL); - if (!ei) + struct affs_inode_info *i; + + i = kmem_cache_alloc(affs_inode_cachep, GFP_KERNEL); + if (!i) return NULL; - ei->vfs_inode.i_version = 1; - return &ei->vfs_inode; + + i->vfs_inode.i_version = 1; + i->i_lc = NULL; + i->i_ext_bh = NULL; + i->i_pa_cnt = 0; + + return &i->vfs_inode; } static void affs_destroy_inode(struct inode *inode) @@ -114,8 +120,6 @@ static const struct super_operations affs_sops = { .alloc_inode = affs_alloc_inode, .destroy_inode = affs_destroy_inode, .write_inode = affs_write_inode, - .put_inode = affs_put_inode, - .drop_inode = affs_drop_inode, .delete_inode = affs_delete_inode, .clear_inode = affs_clear_inode, .put_super = affs_put_super, @@ -199,7 +203,6 @@ parse_options(char *options, uid_t *uid, gid_t *gid, int *mode, int *reserved, s case Opt_prefix: /* Free any previous prefix */ kfree(*prefix); - *prefix = NULL; *prefix = match_strdup(&args[0]); if (!*prefix) return 0; @@ -233,6 +236,8 @@ parse_options(char *options, uid_t *uid, gid_t *gid, int *mode, int *reserved, s break; case Opt_volume: { char *vol = match_strdup(&args[0]); + if (!vol) + return 0; strlcpy(volume, vol, 32); kfree(vol); break; diff --git a/fs/afs/afs_cm.h b/fs/afs/afs_cm.h index 7b4d4fab4c8..255f5dd6040 100644 --- a/fs/afs/afs_cm.h +++ b/fs/afs/afs_cm.h @@ -24,7 +24,8 @@ enum AFS_CM_Operations { CBGetXStatsVersion = 209, /* get version of extended statistics */ CBGetXStats = 210, /* get contents of extended statistics data */ CBInitCallBackState3 = 213, /* initialise callback state, version 3 */ - CBGetCapabilities = 65538, /* get client capabilities */ + CBProbeUuid = 214, /* check the client hasn't rebooted */ + CBTellMeAboutYourself = 65538, /* get client capabilities */ }; #define AFS_CAP_ERROR_TRANSLATION 0x1 diff --git a/fs/afs/cell.c b/fs/afs/cell.c index 584bb0f9c36..5e1df14e16b 100644 --- a/fs/afs/cell.c +++ b/fs/afs/cell.c @@ -20,7 +20,7 @@ DECLARE_RWSEM(afs_proc_cells_sem); LIST_HEAD(afs_proc_cells); -static struct list_head afs_cells = LIST_HEAD_INIT(afs_cells); +static LIST_HEAD(afs_cells); static DEFINE_RWLOCK(afs_cells_lock); static DECLARE_RWSEM(afs_cells_sem); /* add/remove serialisation */ static DECLARE_WAIT_QUEUE_HEAD(afs_cells_freeable_wq); diff --git a/fs/afs/cmservice.c b/fs/afs/cmservice.c index 47b71c8947f..eb765489164 100644 --- a/fs/afs/cmservice.c +++ b/fs/afs/cmservice.c @@ -26,8 +26,9 @@ static int afs_deliver_cb_init_call_back_state3(struct afs_call *, struct sk_buff *, bool); static int afs_deliver_cb_probe(struct afs_call *, struct sk_buff *, bool); static int afs_deliver_cb_callback(struct afs_call *, struct sk_buff *, bool); -static int afs_deliver_cb_get_capabilities(struct afs_call *, struct sk_buff *, - bool); +static int afs_deliver_cb_probe_uuid(struct afs_call *, struct sk_buff *, bool); +static int afs_deliver_cb_tell_me_about_yourself(struct afs_call *, + struct sk_buff *, bool); static void afs_cm_destructor(struct afs_call *); /* @@ -71,11 +72,21 @@ static const struct afs_call_type afs_SRXCBProbe = { }; /* - * CB.GetCapabilities operation type + * CB.ProbeUuid operation type */ -static const struct afs_call_type afs_SRXCBGetCapabilites = { - .name = "CB.GetCapabilities", - .deliver = afs_deliver_cb_get_capabilities, +static const struct afs_call_type afs_SRXCBProbeUuid = { + .name = "CB.ProbeUuid", + .deliver = afs_deliver_cb_probe_uuid, + .abort_to_error = afs_abort_to_error, + .destructor = afs_cm_destructor, +}; + +/* + * CB.TellMeAboutYourself operation type + */ +static const struct afs_call_type afs_SRXCBTellMeAboutYourself = { + .name = "CB.TellMeAboutYourself", + .deliver = afs_deliver_cb_tell_me_about_yourself, .abort_to_error = afs_abort_to_error, .destructor = afs_cm_destructor, }; @@ -103,8 +114,8 @@ bool afs_cm_incoming_call(struct afs_call *call) case CBProbe: call->type = &afs_SRXCBProbe; return true; - case CBGetCapabilities: - call->type = &afs_SRXCBGetCapabilites; + case CBTellMeAboutYourself: + call->type = &afs_SRXCBTellMeAboutYourself; return true; default: return false; @@ -393,9 +404,105 @@ static int afs_deliver_cb_probe(struct afs_call *call, struct sk_buff *skb, } /* + * allow the fileserver to quickly find out if the fileserver has been rebooted + */ +static void SRXAFSCB_ProbeUuid(struct work_struct *work) +{ + struct afs_call *call = container_of(work, struct afs_call, work); + struct afs_uuid *r = call->request; + + struct { + __be32 match; + } reply; + + _enter(""); + + + if (memcmp(r, &afs_uuid, sizeof(afs_uuid)) == 0) + reply.match = htonl(0); + else + reply.match = htonl(1); + + afs_send_simple_reply(call, &reply, sizeof(reply)); + _leave(""); +} + +/* + * deliver request data to a CB.ProbeUuid call + */ +static int afs_deliver_cb_probe_uuid(struct afs_call *call, struct sk_buff *skb, + bool last) +{ + struct afs_uuid *r; + unsigned loop; + __be32 *b; + int ret; + + _enter("{%u},{%u},%d", call->unmarshall, skb->len, last); + + if (skb->len > 0) + return -EBADMSG; + if (!last) + return 0; + + switch (call->unmarshall) { + case 0: + call->offset = 0; + call->buffer = kmalloc(11 * sizeof(__be32), GFP_KERNEL); + if (!call->buffer) + return -ENOMEM; + call->unmarshall++; + + case 1: + _debug("extract UUID"); + ret = afs_extract_data(call, skb, last, call->buffer, + 11 * sizeof(__be32)); + switch (ret) { + case 0: break; + case -EAGAIN: return 0; + default: return ret; + } + + _debug("unmarshall UUID"); + call->request = kmalloc(sizeof(struct afs_uuid), GFP_KERNEL); + if (!call->request) + return -ENOMEM; + + b = call->buffer; + r = call->request; + r->time_low = ntohl(b[0]); + r->time_mid = ntohl(b[1]); + r->time_hi_and_version = ntohl(b[2]); + r->clock_seq_hi_and_reserved = ntohl(b[3]); + r->clock_seq_low = ntohl(b[4]); + + for (loop = 0; loop < 6; loop++) + r->node[loop] = ntohl(b[loop + 5]); + + call->offset = 0; + call->unmarshall++; + + case 2: + _debug("trailer"); + if (skb->len != 0) + return -EBADMSG; + break; + } + + if (!last) + return 0; + + call->state = AFS_CALL_REPLYING; + + INIT_WORK(&call->work, SRXAFSCB_ProbeUuid); + schedule_work(&call->work); + return 0; +} + +/* * allow the fileserver to ask about the cache manager's capabilities */ -static void SRXAFSCB_GetCapabilities(struct work_struct *work) +static void SRXAFSCB_TellMeAboutYourself(struct work_struct *work) { struct afs_interface *ifs; struct afs_call *call = container_of(work, struct afs_call, work); @@ -456,10 +563,10 @@ static void SRXAFSCB_GetCapabilities(struct work_struct *work) } /* - * deliver request data to a CB.GetCapabilities call + * deliver request data to a CB.TellMeAboutYourself call */ -static int afs_deliver_cb_get_capabilities(struct afs_call *call, - struct sk_buff *skb, bool last) +static int afs_deliver_cb_tell_me_about_yourself(struct afs_call *call, + struct sk_buff *skb, bool last) { _enter(",{%u},%d", skb->len, last); @@ -471,7 +578,7 @@ static int afs_deliver_cb_get_capabilities(struct afs_call *call, /* no unmarshalling required */ call->state = AFS_CALL_REPLYING; - INIT_WORK(&call->work, SRXAFSCB_GetCapabilities); + INIT_WORK(&call->work, SRXAFSCB_TellMeAboutYourself); schedule_work(&call->work); return 0; } diff --git a/fs/afs/dir.c b/fs/afs/dir.c index b58af8f18bc..dfda03d4397 100644 --- a/fs/afs/dir.c +++ b/fs/afs/dir.c @@ -140,7 +140,7 @@ static inline void afs_dir_check_page(struct inode *dir, struct page *page) if (page->index == 0 && qty != ntohs(dbuf->blocks[0].pagehdr.npages)) { printk("kAFS: %s(%lu): wrong number of dir blocks %d!=%hu\n", - __FUNCTION__, dir->i_ino, qty, + __func__, dir->i_ino, qty, ntohs(dbuf->blocks[0].pagehdr.npages)); goto error; } @@ -159,7 +159,7 @@ static inline void afs_dir_check_page(struct inode *dir, struct page *page) for (tmp = 0; tmp < qty; tmp++) { if (dbuf->blocks[tmp].pagehdr.magic != AFS_DIR_MAGIC) { printk("kAFS: %s(%lu): bad magic %d/%d is %04hx\n", - __FUNCTION__, dir->i_ino, tmp, qty, + __func__, dir->i_ino, tmp, qty, ntohs(dbuf->blocks[tmp].pagehdr.magic)); goto error; } diff --git a/fs/afs/internal.h b/fs/afs/internal.h index eec41c76de7..7102824ba84 100644 --- a/fs/afs/internal.h +++ b/fs/afs/internal.h @@ -757,8 +757,8 @@ void _dbprintk(const char *fmt, ...) { } -#define kenter(FMT,...) dbgprintk("==> %s("FMT")",__FUNCTION__ ,##__VA_ARGS__) -#define kleave(FMT,...) dbgprintk("<== %s()"FMT"",__FUNCTION__ ,##__VA_ARGS__) +#define kenter(FMT,...) dbgprintk("==> %s("FMT")",__func__ ,##__VA_ARGS__) +#define kleave(FMT,...) dbgprintk("<== %s()"FMT"",__func__ ,##__VA_ARGS__) #define kdebug(FMT,...) dbgprintk(" "FMT ,##__VA_ARGS__) @@ -791,8 +791,8 @@ do { \ } while (0) #else -#define _enter(FMT,...) _dbprintk("==> %s("FMT")",__FUNCTION__ ,##__VA_ARGS__) -#define _leave(FMT,...) _dbprintk("<== %s()"FMT"",__FUNCTION__ ,##__VA_ARGS__) +#define _enter(FMT,...) _dbprintk("==> %s("FMT")",__func__ ,##__VA_ARGS__) +#define _leave(FMT,...) _dbprintk("<== %s()"FMT"",__func__ ,##__VA_ARGS__) #define _debug(FMT,...) _dbprintk(" "FMT ,##__VA_ARGS__) #endif diff --git a/fs/afs/proc.c b/fs/afs/proc.c index 846c7615ac9..9f7d1ae7026 100644 --- a/fs/afs/proc.c +++ b/fs/afs/proc.c @@ -41,6 +41,7 @@ static const struct file_operations afs_proc_cells_fops = { .write = afs_proc_cells_write, .llseek = seq_lseek, .release = seq_release, + .owner = THIS_MODULE, }; static int afs_proc_rootcell_open(struct inode *inode, struct file *file); @@ -56,7 +57,8 @@ static const struct file_operations afs_proc_rootcell_fops = { .read = afs_proc_rootcell_read, .write = afs_proc_rootcell_write, .llseek = no_llseek, - .release = afs_proc_rootcell_release + .release = afs_proc_rootcell_release, + .owner = THIS_MODULE, }; static int afs_proc_cell_volumes_open(struct inode *inode, struct file *file); @@ -80,6 +82,7 @@ static const struct file_operations afs_proc_cell_volumes_fops = { .read = seq_read, .llseek = seq_lseek, .release = afs_proc_cell_volumes_release, + .owner = THIS_MODULE, }; static int afs_proc_cell_vlservers_open(struct inode *inode, @@ -104,6 +107,7 @@ static const struct file_operations afs_proc_cell_vlservers_fops = { .read = seq_read, .llseek = seq_lseek, .release = afs_proc_cell_vlservers_release, + .owner = THIS_MODULE, }; static int afs_proc_cell_servers_open(struct inode *inode, struct file *file); @@ -127,6 +131,7 @@ static const struct file_operations afs_proc_cell_servers_fops = { .read = seq_read, .llseek = seq_lseek, .release = afs_proc_cell_servers_release, + .owner = THIS_MODULE, }; /* @@ -143,17 +148,13 @@ int afs_proc_init(void) goto error_dir; proc_afs->owner = THIS_MODULE; - p = create_proc_entry("cells", 0, proc_afs); + p = proc_create("cells", 0, proc_afs, &afs_proc_cells_fops); if (!p) goto error_cells; - p->proc_fops = &afs_proc_cells_fops; - p->owner = THIS_MODULE; - p = create_proc_entry("rootcell", 0, proc_afs); + p = proc_create("rootcell", 0, proc_afs, &afs_proc_rootcell_fops); if (!p) goto error_rootcell; - p->proc_fops = &afs_proc_rootcell_fops; - p->owner = THIS_MODULE; _leave(" = 0"); return 0; @@ -395,26 +396,20 @@ int afs_proc_cell_setup(struct afs_cell *cell) if (!cell->proc_dir) goto error_dir; - p = create_proc_entry("servers", 0, cell->proc_dir); + p = proc_create_data("servers", 0, cell->proc_dir, + &afs_proc_cell_servers_fops, cell); if (!p) goto error_servers; - p->proc_fops = &afs_proc_cell_servers_fops; - p->owner = THIS_MODULE; - p->data = cell; - p = create_proc_entry("vlservers", 0, cell->proc_dir); + p = proc_create_data("vlservers", 0, cell->proc_dir, + &afs_proc_cell_vlservers_fops, cell); if (!p) goto error_vlservers; - p->proc_fops = &afs_proc_cell_vlservers_fops; - p->owner = THIS_MODULE; - p->data = cell; - p = create_proc_entry("volumes", 0, cell->proc_dir); + p = proc_create_data("volumes", 0, cell->proc_dir, + &afs_proc_cell_volumes_fops, cell); if (!p) goto error_volumes; - p->proc_fops = &afs_proc_cell_volumes_fops; - p->owner = THIS_MODULE; - p->data = cell; _leave(" = 0"); return 0; @@ -191,6 +191,43 @@ static int aio_setup_ring(struct kioctx *ctx) kunmap_atomic((void *)((unsigned long)__event & PAGE_MASK), km); \ } while(0) + +/* __put_ioctx + * Called when the last user of an aio context has gone away, + * and the struct needs to be freed. + */ +static void __put_ioctx(struct kioctx *ctx) +{ + unsigned nr_events = ctx->max_reqs; + + BUG_ON(ctx->reqs_active); + + cancel_delayed_work(&ctx->wq); + cancel_work_sync(&ctx->wq.work); + aio_free_ring(ctx); + mmdrop(ctx->mm); + ctx->mm = NULL; + pr_debug("__put_ioctx: freeing %p\n", ctx); + kmem_cache_free(kioctx_cachep, ctx); + + if (nr_events) { + spin_lock(&aio_nr_lock); + BUG_ON(aio_nr - nr_events > aio_nr); + aio_nr -= nr_events; + spin_unlock(&aio_nr_lock); + } +} + +#define get_ioctx(kioctx) do { \ + BUG_ON(atomic_read(&(kioctx)->users) <= 0); \ + atomic_inc(&(kioctx)->users); \ +} while (0) +#define put_ioctx(kioctx) do { \ + BUG_ON(atomic_read(&(kioctx)->users) <= 0); \ + if (unlikely(atomic_dec_and_test(&(kioctx)->users))) \ + __put_ioctx(kioctx); \ +} while (0) + /* ioctx_alloc * Allocates and initializes an ioctx. Returns an ERR_PTR if it failed. */ @@ -240,7 +277,7 @@ static struct kioctx *ioctx_alloc(unsigned nr_events) if (ctx->max_reqs == 0) goto out_cleanup; - /* now link into global list. kludge. FIXME */ + /* now link into global list. */ write_lock(&mm->ioctx_list_lock); ctx->next = mm->ioctx_list; mm->ioctx_list = ctx; @@ -361,32 +398,6 @@ void exit_aio(struct mm_struct *mm) } } -/* __put_ioctx - * Called when the last user of an aio context has gone away, - * and the struct needs to be freed. - */ -void __put_ioctx(struct kioctx *ctx) -{ - unsigned nr_events = ctx->max_reqs; - - BUG_ON(ctx->reqs_active); - - cancel_delayed_work(&ctx->wq); - cancel_work_sync(&ctx->wq.work); - aio_free_ring(ctx); - mmdrop(ctx->mm); - ctx->mm = NULL; - pr_debug("__put_ioctx: freeing %p\n", ctx); - kmem_cache_free(kioctx_cachep, ctx); - - if (nr_events) { - spin_lock(&aio_nr_lock); - BUG_ON(aio_nr - nr_events > aio_nr); - aio_nr -= nr_events; - spin_unlock(&aio_nr_lock); - } -} - /* aio_get_req * Allocate a slot for an aio request. Increments the users count * of the kioctx so that the kioctx stays around until all requests are @@ -542,10 +553,7 @@ int aio_put_req(struct kiocb *req) return ret; } -/* Lookup an ioctx id. ioctx_list is lockless for reads. - * FIXME: this is O(n) and is only suitable for development. - */ -struct kioctx *lookup_ioctx(unsigned long ctx_id) +static struct kioctx *lookup_ioctx(unsigned long ctx_id) { struct kioctx *ioctx; struct mm_struct *mm; @@ -1070,9 +1078,7 @@ static void timeout_func(unsigned long data) static inline void init_timeout(struct aio_timeout *to) { - init_timer(&to->timer); - to->timer.data = (unsigned long)to; - to->timer.function = timeout_func; + setup_timer_on_stack(&to->timer, timeout_func, (unsigned long) to); to->timed_out = 0; to->p = current; } @@ -1205,6 +1211,7 @@ retry: if (timeout) clear_timeout(&to); out: + destroy_timer_on_stack(&to.timer); return i ? i : ret; } @@ -1552,7 +1559,7 @@ static int aio_wake_function(wait_queue_t *wait, unsigned mode, return 1; } -int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb, +static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb, struct iocb *iocb) { struct kiocb *req; @@ -1593,7 +1600,7 @@ int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb, * event using the eventfd_signal() function. */ req->ki_eventfd = eventfd_fget((int) iocb->aio_resfd); - if (unlikely(IS_ERR(req->ki_eventfd))) { + if (IS_ERR(req->ki_eventfd)) { ret = PTR_ERR(req->ki_eventfd); goto out_put_req; } diff --git a/fs/anon_inodes.c b/fs/anon_inodes.c index f42be069e08..977ef208c05 100644 --- a/fs/anon_inodes.c +++ b/fs/anon_inodes.c @@ -57,9 +57,6 @@ static struct dentry_operations anon_inodefs_dentry_operations = { * anonymous inode, and a dentry that describe the "class" * of the file * - * @pfd: [out] pointer to the file descriptor - * @dpinode: [out] pointer to the inode - * @pfile: [out] pointer to the file struct * @name: [in] name of the "class" of the new file * @fops [in] file operations for the new file * @priv [in] private data for the new file (will be file's private_data) @@ -68,10 +65,9 @@ static struct dentry_operations anon_inodefs_dentry_operations = { * that do not need to have a full-fledged inode in order to operate correctly. * All the files created with anon_inode_getfd() will share a single inode, * hence saving memory and avoiding code duplication for the file/inode/dentry - * setup. + * setup. Returns new descriptor or -error. */ -int anon_inode_getfd(int *pfd, struct inode **pinode, struct file **pfile, - const char *name, const struct file_operations *fops, +int anon_inode_getfd(const char *name, const struct file_operations *fops, void *priv) { struct qstr this; @@ -125,10 +121,7 @@ int anon_inode_getfd(int *pfd, struct inode **pinode, struct file **pfile, fd_install(fd, file); - *pfd = fd; - *pinode = anon_inode_inode; - *pfile = file; - return 0; + return fd; err_dput: dput(dentry); diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h index 2d4ae40718d..c3d352d7fa9 100644 --- a/fs/autofs4/autofs_i.h +++ b/fs/autofs4/autofs_i.h @@ -35,7 +35,7 @@ /* #define DEBUG */ #ifdef DEBUG -#define DPRINTK(fmt,args...) do { printk(KERN_DEBUG "pid %d: %s: " fmt "\n" , current->pid , __FUNCTION__ , ##args); } while(0) +#define DPRINTK(fmt,args...) do { printk(KERN_DEBUG "pid %d: %s: " fmt "\n" , current->pid , __func__ , ##args); } while(0) #else #define DPRINTK(fmt,args...) do {} while(0) #endif diff --git a/fs/autofs4/expire.c b/fs/autofs4/expire.c index d96e5c14a9c..894fee54d4d 100644 --- a/fs/autofs4/expire.c +++ b/fs/autofs4/expire.c @@ -73,8 +73,8 @@ static int autofs4_mount_busy(struct vfsmount *mnt, struct dentry *dentry) status = 0; done: DPRINTK("returning = %d", status); - mntput(mnt); dput(dentry); + mntput(mnt); return status; } @@ -333,7 +333,7 @@ static struct dentry *autofs4_expire_indirect(struct super_block *sb, /* Can we expire this guy */ if (autofs4_can_expire(dentry, timeout, do_now)) { expired = dentry; - break; + goto found; } goto next; } @@ -352,7 +352,7 @@ static struct dentry *autofs4_expire_indirect(struct super_block *sb, inf->flags |= AUTOFS_INF_EXPIRING; spin_unlock(&sbi->fs_lock); expired = dentry; - break; + goto found; } spin_unlock(&sbi->fs_lock); /* @@ -363,7 +363,7 @@ static struct dentry *autofs4_expire_indirect(struct super_block *sb, expired = autofs4_check_leaves(mnt, dentry, timeout, do_now); if (expired) { dput(dentry); - break; + goto found; } } next: @@ -371,18 +371,16 @@ next: spin_lock(&dcache_lock); next = next->next; } - - if (expired) { - DPRINTK("returning %p %.*s", - expired, (int)expired->d_name.len, expired->d_name.name); - spin_lock(&dcache_lock); - list_move(&expired->d_parent->d_subdirs, &expired->d_u.d_child); - spin_unlock(&dcache_lock); - return expired; - } spin_unlock(&dcache_lock); - return NULL; + +found: + DPRINTK("returning %p %.*s", + expired, (int)expired->d_name.len, expired->d_name.name); + spin_lock(&dcache_lock); + list_move(&expired->d_parent->d_subdirs, &expired->d_u.d_child); + spin_unlock(&dcache_lock); + return expired; } /* Perform an expiry operation */ diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c index a54a946a50a..edf5b6bddb5 100644 --- a/fs/autofs4/root.c +++ b/fs/autofs4/root.c @@ -146,17 +146,17 @@ static int autofs4_dir_open(struct inode *inode, struct file *file) if (d_mountpoint(dentry)) { struct file *fp = NULL; - struct vfsmount *fp_mnt = mntget(mnt); - struct dentry *fp_dentry = dget(dentry); + struct path fp_path = { .dentry = dentry, .mnt = mnt }; - if (!autofs4_follow_mount(&fp_mnt, &fp_dentry)) { - dput(fp_dentry); - mntput(fp_mnt); + path_get(&fp_path); + + if (!autofs4_follow_mount(&fp_path.mnt, &fp_path.dentry)) { + path_put(&fp_path); dcache_dir_close(inode, file); goto out; } - fp = dentry_open(fp_dentry, fp_mnt, file->f_flags); + fp = dentry_open(fp_path.dentry, fp_path.mnt, file->f_flags); status = PTR_ERR(fp); if (IS_ERR(fp)) { dcache_dir_close(inode, file); @@ -242,7 +242,8 @@ static int try_to_fill_dentry(struct dentry *dentry, int flags) { struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb); struct autofs_info *ino = autofs4_dentry_ino(dentry); - int status = 0; + struct dentry *new; + int status; /* Block on any pending expiry here; invalidate the dentry when expiration is done to trigger mount request with a new @@ -318,7 +319,28 @@ static int try_to_fill_dentry(struct dentry *dentry, int flags) spin_lock(&dentry->d_lock); dentry->d_flags &= ~DCACHE_AUTOFS_PENDING; spin_unlock(&dentry->d_lock); - return status; + + /* + * The dentry that is passed in from lookup may not be the one + * we end up using, as mkdir can create a new one. If this + * happens, and another process tries the lookup at the same time, + * it will set the PENDING flag on this new dentry, but add itself + * to our waitq. Then, if after the lookup succeeds, the first + * process that requested the mount performs another lookup of the + * same directory, it will show up as still pending! So, we need + * to redo the lookup here and clear pending on that dentry. + */ + if (d_unhashed(dentry)) { + new = d_lookup(dentry->d_parent, &dentry->d_name); + if (new) { + spin_lock(&new->d_lock); + new->d_flags &= ~DCACHE_AUTOFS_PENDING; + spin_unlock(&new->d_lock); + dput(new); + } + } + + return 0; } /* For autofs direct mounts the follow link triggers the mount */ @@ -533,9 +555,9 @@ static struct dentry *autofs4_lookup_unhashed(struct autofs_sb_info *sbi, struct goto next; if (d_unhashed(dentry)) { - struct autofs_info *ino = autofs4_dentry_ino(dentry); struct inode *inode = dentry->d_inode; + ino = autofs4_dentry_ino(dentry); list_del_init(&ino->rehash); dget(dentry); /* diff --git a/fs/autofs4/waitq.c b/fs/autofs4/waitq.c index 1fe28e4754c..75e5955c3f6 100644 --- a/fs/autofs4/waitq.c +++ b/fs/autofs4/waitq.c @@ -171,7 +171,7 @@ static int autofs4_getpath(struct autofs_sb_info *sbi, for (tmp = dentry ; tmp != root ; tmp = tmp->d_parent) len += tmp->d_name.len + 1; - if (--len > NAME_MAX) { + if (!len || --len > NAME_MAX) { spin_unlock(&dcache_lock); return 0; } diff --git a/fs/befs/endian.h b/fs/befs/endian.h index e254a20869f..6cb84d896d0 100644 --- a/fs/befs/endian.h +++ b/fs/befs/endian.h @@ -9,7 +9,7 @@ #ifndef LINUX_BEFS_ENDIAN #define LINUX_BEFS_ENDIAN -#include <linux/byteorder/generic.h> +#include <asm/byteorder.h> static inline u64 fs64_to_cpu(const struct super_block *sb, fs64 n) diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c index 82123ff3e1d..e8717de3bab 100644 --- a/fs/befs/linuxvfs.c +++ b/fs/befs/linuxvfs.c @@ -489,9 +489,9 @@ static void befs_put_link(struct dentry *dentry, struct nameidata *nd, void *p) { befs_inode_info *befs_ino = BEFS_I(dentry->d_inode); if (befs_ino->i_flags & BEFS_LONG_SYMLINK) { - char *p = nd_get_link(nd); - if (!IS_ERR(p)) - kfree(p); + char *link = nd_get_link(nd); + if (!IS_ERR(link)) + kfree(link); } } diff --git a/fs/bfs/bfs.h b/fs/bfs/bfs.h index 71faf4d2390..70f5d3a8eed 100644 --- a/fs/bfs/bfs.h +++ b/fs/bfs/bfs.h @@ -42,7 +42,7 @@ static inline struct bfs_inode_info *BFS_I(struct inode *inode) #define printf(format, args...) \ - printk(KERN_ERR "BFS-fs: %s(): " format, __FUNCTION__, ## args) + printk(KERN_ERR "BFS-fs: %s(): " format, __func__, ## args) /* inode.c */ extern struct inode *bfs_iget(struct super_block *sb, unsigned long ino); diff --git a/fs/binfmt_aout.c b/fs/binfmt_aout.c index a1bb2244cac..ba4cddb92f1 100644 --- a/fs/binfmt_aout.c +++ b/fs/binfmt_aout.c @@ -372,21 +372,17 @@ static int load_aout_binary(struct linux_binprm * bprm, struct pt_regs * regs) flush_icache_range(text_addr, text_addr+ex.a_text+ex.a_data); } else { - static unsigned long error_time, error_time2; if ((ex.a_text & 0xfff || ex.a_data & 0xfff) && - (N_MAGIC(ex) != NMAGIC) && (jiffies-error_time2) > 5*HZ) + (N_MAGIC(ex) != NMAGIC) && printk_ratelimit()) { printk(KERN_NOTICE "executable not page aligned\n"); - error_time2 = jiffies; } - if ((fd_offset & ~PAGE_MASK) != 0 && - (jiffies-error_time) > 5*HZ) + if ((fd_offset & ~PAGE_MASK) != 0 && printk_ratelimit()) { printk(KERN_WARNING "fd_offset is not page aligned. Please convert program: %s\n", bprm->file->f_path.dentry->d_name.name); - error_time = jiffies; } if (!bprm->file->f_op->mmap||((fd_offset & ~PAGE_MASK) != 0)) { @@ -495,15 +491,13 @@ static int load_aout_library(struct file *file) start_addr = ex.a_entry & 0xfffff000; if ((N_TXTOFF(ex) & ~PAGE_MASK) != 0) { - static unsigned long error_time; loff_t pos = N_TXTOFF(ex); - if ((jiffies-error_time) > 5*HZ) + if (printk_ratelimit()) { printk(KERN_WARNING "N_TXTOFF is not page aligned. Please convert library: %s\n", file->f_path.dentry->d_name.name); - error_time = jiffies; } down_write(¤t->mm->mmap_sem); do_brk(start_addr, ex.a_text + ex.a_data + ex.a_bss); diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index 9924581df6f..b25707fee2c 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -1255,26 +1255,23 @@ static int writenote(struct memelfnote *men, struct file *file, static void fill_elf_header(struct elfhdr *elf, int segs, u16 machine, u32 flags, u8 osabi) { + memset(elf, 0, sizeof(*elf)); + memcpy(elf->e_ident, ELFMAG, SELFMAG); elf->e_ident[EI_CLASS] = ELF_CLASS; elf->e_ident[EI_DATA] = ELF_DATA; elf->e_ident[EI_VERSION] = EV_CURRENT; elf->e_ident[EI_OSABI] = ELF_OSABI; - memset(elf->e_ident+EI_PAD, 0, EI_NIDENT-EI_PAD); elf->e_type = ET_CORE; elf->e_machine = machine; elf->e_version = EV_CURRENT; - elf->e_entry = 0; elf->e_phoff = sizeof(struct elfhdr); - elf->e_shoff = 0; elf->e_flags = flags; elf->e_ehsize = sizeof(struct elfhdr); elf->e_phentsize = sizeof(struct elf_phdr); elf->e_phnum = segs; - elf->e_shentsize = 0; - elf->e_shnum = 0; - elf->e_shstrndx = 0; + return; } @@ -1725,26 +1722,25 @@ static int fill_note_info(struct elfhdr *elf, int phdrs, info->thread_status_size = 0; if (signr) { - struct elf_thread_status *tmp; + struct elf_thread_status *ets; rcu_read_lock(); do_each_thread(g, p) if (current->mm == p->mm && current != p) { - tmp = kzalloc(sizeof(*tmp), GFP_ATOMIC); - if (!tmp) { + ets = kzalloc(sizeof(*ets), GFP_ATOMIC); + if (!ets) { rcu_read_unlock(); return 0; } - tmp->thread = p; - list_add(&tmp->list, &info->thread_list); + ets->thread = p; + list_add(&ets->list, &info->thread_list); } while_each_thread(g, p); rcu_read_unlock(); list_for_each(t, &info->thread_list) { - struct elf_thread_status *tmp; int sz; - tmp = list_entry(t, struct elf_thread_status, list); - sz = elf_dump_thread_status(signr, tmp); + ets = list_entry(t, struct elf_thread_status, list); + sz = elf_dump_thread_status(signr, ets); info->thread_status_size += sz; } } @@ -2000,10 +1996,10 @@ static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file, un for (addr = vma->vm_start; addr < end; addr += PAGE_SIZE) { struct page *page; - struct vm_area_struct *vma; + struct vm_area_struct *tmp_vma; if (get_user_pages(current, current->mm, addr, 1, 0, 1, - &page, &vma) <= 0) { + &page, &tmp_vma) <= 0) { DUMP_SEEK(PAGE_SIZE); } else { if (page == ZERO_PAGE(0)) { @@ -2013,7 +2009,7 @@ static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file, un } } else { void *kaddr; - flush_cache_page(vma, addr, + flush_cache_page(tmp_vma, addr, page_to_pfn(page)); kaddr = kmap(page); if ((size += PAGE_SIZE) > limit || diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c index 32649f2a165..ddd35d87339 100644 --- a/fs/binfmt_elf_fdpic.c +++ b/fs/binfmt_elf_fdpic.c @@ -136,8 +136,8 @@ static int elf_fdpic_fetch_phdrs(struct elf_fdpic_params *params, retval = kernel_read(file, params->hdr.e_phoff, (char *) params->phdrs, size); - if (retval < 0) - return retval; + if (unlikely(retval != size)) + return retval < 0 ? retval : -ENOEXEC; /* determine stack size for this binary */ phdr = params->phdrs; @@ -218,8 +218,11 @@ static int load_elf_fdpic_binary(struct linux_binprm *bprm, phdr->p_offset, interpreter_name, phdr->p_filesz); - if (retval < 0) + if (unlikely(retval != phdr->p_filesz)) { + if (retval >= 0) + retval = -ENOEXEC; goto error; + } retval = -ENOENT; if (interpreter_name[phdr->p_filesz - 1] != '\0') @@ -245,8 +248,11 @@ static int load_elf_fdpic_binary(struct linux_binprm *bprm, retval = kernel_read(interpreter, 0, bprm->buf, BINPRM_BUF_SIZE); - if (retval < 0) + if (unlikely(retval != BINPRM_BUF_SIZE)) { + if (retval >= 0) + retval = -ENOEXEC; goto error; + } interp_params.hdr = *((struct elfhdr *) bprm->buf); break; diff --git a/fs/binfmt_em86.c b/fs/binfmt_em86.c index f95ae9789c9..f9c88d0c8ce 100644 --- a/fs/binfmt_em86.c +++ b/fs/binfmt_em86.c @@ -43,7 +43,7 @@ static int load_em86(struct linux_binprm *bprm,struct pt_regs *regs) return -ENOEXEC; } - bprm->sh_bang++; /* Well, the bang-shell is implicit... */ + bprm->sh_bang = 1; /* Well, the bang-shell is implicit... */ allow_write_access(bprm->file); fput(bprm->file); bprm->file = NULL; diff --git a/fs/binfmt_flat.c b/fs/binfmt_flat.c index 0498b181dd5..3b40d45a3a1 100644 --- a/fs/binfmt_flat.c +++ b/fs/binfmt_flat.c @@ -531,7 +531,8 @@ static int load_flat_file(struct linux_binprm * bprm, DBG_FLT("BINFMT_FLAT: ROM mapping of file (we hope)\n"); down_write(¤t->mm->mmap_sem); - textpos = do_mmap(bprm->file, 0, text_len, PROT_READ|PROT_EXEC, MAP_PRIVATE, 0); + textpos = do_mmap(bprm->file, 0, text_len, PROT_READ|PROT_EXEC, + MAP_PRIVATE|MAP_EXECUTABLE, 0); up_write(¤t->mm->mmap_sem); if (!textpos || textpos >= (unsigned long) -4096) { if (!textpos) @@ -932,14 +933,8 @@ static int __init init_flat_binfmt(void) return register_binfmt(&flat_format); } -static void __exit exit_flat_binfmt(void) -{ - unregister_binfmt(&flat_format); -} - /****************************************************************************/ core_initcall(init_flat_binfmt); -module_exit(exit_flat_binfmt); /****************************************************************************/ diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c index dbf0ac0523d..7191306367c 100644 --- a/fs/binfmt_misc.c +++ b/fs/binfmt_misc.c @@ -115,6 +115,12 @@ static int load_misc_binary(struct linux_binprm *bprm, struct pt_regs *regs) if (!enabled) goto _ret; + retval = -ENOEXEC; + if (bprm->misc_bang) + goto _ret; + + bprm->misc_bang = 1; + /* to keep locking time low, we copy the interpreter string */ read_lock(&entries_lock); fmt = check_file(bprm); diff --git a/fs/binfmt_script.c b/fs/binfmt_script.c index ab33939b12a..9e3963f7ebf 100644 --- a/fs/binfmt_script.c +++ b/fs/binfmt_script.c @@ -29,7 +29,7 @@ static int load_script(struct linux_binprm *bprm,struct pt_regs *regs) * Sorta complicated, but hopefully it will work. -TYT */ - bprm->sh_bang++; + bprm->sh_bang = 1; allow_write_access(bprm->file); fput(bprm->file); bprm->file = NULL; @@ -158,7 +158,7 @@ struct bio *bio_alloc_bioset(gfp_t gfp_mask, int nr_iovecs, struct bio_set *bs) bio_init(bio); if (likely(nr_iovecs)) { - unsigned long idx = 0; /* shut up gcc */ + unsigned long uninitialized_var(idx); bvl = bvec_alloc_bs(gfp_mask, nr_iovecs, &idx, bs); if (unlikely(!bvl)) { @@ -937,6 +937,96 @@ struct bio *bio_map_kern(struct request_queue *q, void *data, unsigned int len, return ERR_PTR(-EINVAL); } +static void bio_copy_kern_endio(struct bio *bio, int err) +{ + struct bio_vec *bvec; + const int read = bio_data_dir(bio) == READ; + char *p = bio->bi_private; + int i; + + __bio_for_each_segment(bvec, bio, i, 0) { + char *addr = page_address(bvec->bv_page); + + if (read && !err) + memcpy(p, addr, bvec->bv_len); + + __free_page(bvec->bv_page); + p += bvec->bv_len; + } + + bio_put(bio); +} + +/** + * bio_copy_kern - copy kernel address into bio + * @q: the struct request_queue for the bio + * @data: pointer to buffer to copy + * @len: length in bytes + * @gfp_mask: allocation flags for bio and page allocation + * @reading: data direction is READ + * + * copy the kernel address into a bio suitable for io to a block + * device. Returns an error pointer in case of error. + */ +struct bio *bio_copy_kern(struct request_queue *q, void *data, unsigned int len, + gfp_t gfp_mask, int reading) +{ + unsigned long kaddr = (unsigned long)data; + unsigned long end = (kaddr + len + PAGE_SIZE - 1) >> PAGE_SHIFT; + unsigned long start = kaddr >> PAGE_SHIFT; + const int nr_pages = end - start; + struct bio *bio; + struct bio_vec *bvec; + int i, ret; + + bio = bio_alloc(gfp_mask, nr_pages); + if (!bio) + return ERR_PTR(-ENOMEM); + + while (len) { + struct page *page; + unsigned int bytes = PAGE_SIZE; + + if (bytes > len) + bytes = len; + + page = alloc_page(q->bounce_gfp | gfp_mask); + if (!page) { + ret = -ENOMEM; + goto cleanup; + } + + if (bio_add_pc_page(q, bio, page, bytes, 0) < bytes) { + ret = -EINVAL; + goto cleanup; + } + + len -= bytes; + } + + if (!reading) { + void *p = data; + + bio_for_each_segment(bvec, bio, i) { + char *addr = page_address(bvec->bv_page); + + memcpy(addr, p, bvec->bv_len); + p += bvec->bv_len; + } + } + + bio->bi_private = data; + bio->bi_end_io = bio_copy_kern_endio; + return bio; +cleanup: + bio_for_each_segment(bvec, bio, i) + __free_page(bvec->bv_page); + + bio_put(bio); + + return ERR_PTR(ret); +} + /* * bio_set_pages_dirty() and bio_check_pages_dirty() are support functions * for performing direct-IO in BIOs. @@ -1273,6 +1363,7 @@ EXPORT_SYMBOL(bio_get_nr_vecs); EXPORT_SYMBOL(bio_map_user); EXPORT_SYMBOL(bio_unmap_user); EXPORT_SYMBOL(bio_map_kern); +EXPORT_SYMBOL(bio_copy_kern); EXPORT_SYMBOL(bio_pair_release); EXPORT_SYMBOL(bio_split); EXPORT_SYMBOL(bio_split_pool); diff --git a/fs/buffer.c b/fs/buffer.c index 3db4a26adc4..a073f3f4f01 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -1101,7 +1101,7 @@ grow_buffers(struct block_device *bdev, sector_t block, int size) printk(KERN_ERR "%s: requested out-of-range block %llu for " "device %s\n", - __FUNCTION__, (unsigned long long)block, + __func__, (unsigned long long)block, bdevname(bdev, b)); return -EIO; } @@ -2211,8 +2211,8 @@ out: return err; } -int cont_expand_zero(struct file *file, struct address_space *mapping, - loff_t pos, loff_t *bytes) +static int cont_expand_zero(struct file *file, struct address_space *mapping, + loff_t pos, loff_t *bytes) { struct inode *inode = mapping->host; unsigned blocksize = 1 << inode->i_blkbits; @@ -2328,23 +2328,6 @@ int block_commit_write(struct page *page, unsigned from, unsigned to) return 0; } -int generic_commit_write(struct file *file, struct page *page, - unsigned from, unsigned to) -{ - struct inode *inode = page->mapping->host; - loff_t pos = ((loff_t)page->index << PAGE_CACHE_SHIFT) + to; - __block_commit_write(inode,page,from,to); - /* - * No need to use i_size_read() here, the i_size - * cannot change under us because we hold i_mutex. - */ - if (pos > inode->i_size) { - i_size_write(inode, pos); - mark_inode_dirty(inode); - } - return 0; -} - /* * block_page_mkwrite() is not allowed to change the file size as it gets * called from a page fault handler when a page is first dirtied. Hence we must @@ -3315,7 +3298,6 @@ EXPORT_SYMBOL(end_buffer_write_sync); EXPORT_SYMBOL(file_fsync); EXPORT_SYMBOL(fsync_bdev); EXPORT_SYMBOL(generic_block_bmap); -EXPORT_SYMBOL(generic_commit_write); EXPORT_SYMBOL(generic_cont_expand_simple); EXPORT_SYMBOL(init_buffer); EXPORT_SYMBOL(invalidate_bdev); diff --git a/fs/char_dev.c b/fs/char_dev.c index 038674aa88a..68e510b8845 100644 --- a/fs/char_dev.c +++ b/fs/char_dev.c @@ -55,7 +55,6 @@ static struct char_device_struct { unsigned int baseminor; int minorct; char name[64]; - struct file_operations *fops; struct cdev *cdev; /* will die */ } *chrdevs[CHRDEV_MAJOR_HASH_SIZE]; diff --git a/fs/cifs/CHANGES b/fs/cifs/CHANGES index 05c9da6181c..8355e918fdd 100644 --- a/fs/cifs/CHANGES +++ b/fs/cifs/CHANGES @@ -1,3 +1,6 @@ +Version 1.53 +------------ + Version 1.52 ------------ Fix oops on second mount to server when null auth is used. diff --git a/fs/cifs/asn1.c b/fs/cifs/asn1.c index bcda2c6b6a0..cb52cbbe45f 100644 --- a/fs/cifs/asn1.c +++ b/fs/cifs/asn1.c @@ -460,8 +460,8 @@ decode_negTokenInit(unsigned char *security_blob, int length, unsigned char *sequence_end; unsigned long *oid = NULL; unsigned int cls, con, tag, oidlen, rc; - int use_ntlmssp = FALSE; - int use_kerberos = FALSE; + bool use_ntlmssp = false; + bool use_kerberos = false; *secType = NTLM; /* BB eventually make Kerberos or NLTMSSP the default*/ @@ -561,15 +561,15 @@ decode_negTokenInit(unsigned char *security_blob, int length, if (compare_oid(oid, oidlen, MSKRB5_OID, MSKRB5_OID_LEN)) - use_kerberos = TRUE; + use_kerberos = true; else if (compare_oid(oid, oidlen, KRB5_OID, KRB5_OID_LEN)) - use_kerberos = TRUE; + use_kerberos = true; else if (compare_oid(oid, oidlen, NTLMSSP_OID, NTLMSSP_OID_LEN)) - use_ntlmssp = TRUE; + use_ntlmssp = true; kfree(oid); } diff --git a/fs/cifs/cifs_debug.c b/fs/cifs/cifs_debug.c index 0228ed06069..cc950f69e51 100644 --- a/fs/cifs/cifs_debug.c +++ b/fs/cifs/cifs_debug.c @@ -468,7 +468,7 @@ cifs_proc_init(void) { struct proc_dir_entry *pde; - proc_fs_cifs = proc_mkdir("cifs", proc_root_fs); + proc_fs_cifs = proc_mkdir("fs/cifs", NULL); if (proc_fs_cifs == NULL) return; @@ -559,7 +559,7 @@ cifs_proc_clean(void) remove_proc_entry("LinuxExtensionsEnabled", proc_fs_cifs); remove_proc_entry("Experimental", proc_fs_cifs); remove_proc_entry("LookupCacheEnabled", proc_fs_cifs); - remove_proc_entry("cifs", proc_root_fs); + remove_proc_entry("fs/cifs", NULL); } static int diff --git a/fs/cifs/cifs_dfs_ref.c b/fs/cifs/cifs_dfs_ref.c index 95024c066d8..f6fdecf6598 100644 --- a/fs/cifs/cifs_dfs_ref.c +++ b/fs/cifs/cifs_dfs_ref.c @@ -93,15 +93,11 @@ static char *cifs_get_share_name(const char *node_name) /* find sharename end */ pSep++; pSep = memchr(UNC+(pSep-UNC), '\\', len-(pSep-UNC)); - if (!pSep) { - cERROR(1, ("%s:2 cant find share name in node name: %s", - __func__, node_name)); - kfree(UNC); - return NULL; + if (pSep) { + /* trim path up to sharename end + * now we have share name in UNC */ + *pSep = 0; } - /* trim path up to sharename end - * * now we have share name in UNC */ - *pSep = 0; return UNC; } @@ -188,7 +184,7 @@ static char *compose_mount_options(const char *sb_mountdata, tkn_e = strchr(tkn_e+1, '\\'); if (tkn_e) { strcat(mountdata, ",prefixpath="); - strcat(mountdata, tkn_e); + strcat(mountdata, tkn_e+1); } } @@ -244,7 +240,8 @@ static char *build_full_dfs_path_from_dentry(struct dentry *dentry) return NULL; if (cifs_sb->tcon->Flags & SMB_SHARE_IS_IN_DFS) { - /* we should use full path name to correct working with DFS */ + int i; + /* we should use full path name for correct working with DFS */ l_max_len = strnlen(cifs_sb->tcon->treeName, MAX_TREE_SIZE+1) + strnlen(search_path, MAX_PATHCONF) + 1; tmp_path = kmalloc(l_max_len, GFP_KERNEL); @@ -253,8 +250,14 @@ static char *build_full_dfs_path_from_dentry(struct dentry *dentry) return NULL; } strncpy(tmp_path, cifs_sb->tcon->treeName, l_max_len); - strcat(tmp_path, search_path); tmp_path[l_max_len-1] = 0; + if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_POSIX_PATHS) + for (i = 0; i < l_max_len; i++) { + if (tmp_path[i] == '\\') + tmp_path[i] = '/'; + } + strncat(tmp_path, search_path, l_max_len - strlen(tmp_path)); + full_path = tmp_path; kfree(search_path); } else { diff --git a/fs/cifs/cifsacl.c b/fs/cifs/cifsacl.c index e99d4faf5f0..34902cff540 100644 --- a/fs/cifs/cifsacl.c +++ b/fs/cifs/cifsacl.c @@ -559,7 +559,7 @@ static struct cifs_ntsd *get_cifs_acl(u32 *pacllen, struct inode *inode, const char *path, const __u16 *pfid) { struct cifsFileInfo *open_file = NULL; - int unlock_file = FALSE; + bool unlock_file = false; int xid; int rc = -EIO; __u16 fid; @@ -586,10 +586,10 @@ static struct cifs_ntsd *get_cifs_acl(u32 *pacllen, struct inode *inode, cifs_sb = CIFS_SB(sb); if (open_file) { - unlock_file = TRUE; + unlock_file = true; fid = open_file->netfid; } else if (pfid == NULL) { - int oplock = FALSE; + int oplock = 0; /* open file */ rc = CIFSSMBOpen(xid, cifs_sb->tcon, path, FILE_OPEN, READ_CONTROL, 0, &fid, &oplock, NULL, @@ -604,7 +604,7 @@ static struct cifs_ntsd *get_cifs_acl(u32 *pacllen, struct inode *inode, rc = CIFSSMBGetCIFSACL(xid, cifs_sb->tcon, fid, &pntsd, pacllen); cFYI(1, ("GetCIFSACL rc = %d ACL len %d", rc, *pacllen)); - if (unlock_file == TRUE) /* find_readable_file increments ref count */ + if (unlock_file == true) /* find_readable_file increments ref count */ atomic_dec(&open_file->wrtPending); else if (pfid == NULL) /* if opened above we have to close the handle */ CIFSSMBClose(xid, cifs_sb->tcon, fid); @@ -619,7 +619,7 @@ static int set_cifs_acl(struct cifs_ntsd *pnntsd, __u32 acllen, struct inode *inode, const char *path) { struct cifsFileInfo *open_file; - int unlock_file = FALSE; + bool unlock_file = false; int xid; int rc = -EIO; __u16 fid; @@ -640,10 +640,10 @@ static int set_cifs_acl(struct cifs_ntsd *pnntsd, __u32 acllen, open_file = find_readable_file(CIFS_I(inode)); if (open_file) { - unlock_file = TRUE; + unlock_file = true; fid = open_file->netfid; } else { - int oplock = FALSE; + int oplock = 0; /* open file */ rc = CIFSSMBOpen(xid, cifs_sb->tcon, path, FILE_OPEN, WRITE_DAC, 0, &fid, &oplock, NULL, @@ -658,7 +658,7 @@ static int set_cifs_acl(struct cifs_ntsd *pnntsd, __u32 acllen, rc = CIFSSMBSetCIFSACL(xid, cifs_sb->tcon, fid, pnntsd, acllen); cFYI(DBG2, ("SetCIFSACL rc = %d", rc)); - if (unlock_file == TRUE) + if (unlock_file) atomic_dec(&open_file->wrtPending); else CIFSSMBClose(xid, cifs_sb->tcon, fid); diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index 39c2cbdface..427a7c69589 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -222,50 +222,50 @@ static int cifs_statfs(struct dentry *dentry, struct kstatfs *buf) { struct super_block *sb = dentry->d_sb; - int xid; + struct cifs_sb_info *cifs_sb = CIFS_SB(sb); + struct cifsTconInfo *tcon = cifs_sb->tcon; int rc = -EOPNOTSUPP; - struct cifs_sb_info *cifs_sb; - struct cifsTconInfo *pTcon; + int xid; xid = GetXid(); - cifs_sb = CIFS_SB(sb); - pTcon = cifs_sb->tcon; - buf->f_type = CIFS_MAGIC_NUMBER; - /* instead could get the real value via SMB_QUERY_FS_ATTRIBUTE_INFO */ - buf->f_namelen = PATH_MAX; /* PATH_MAX may be too long - it would - presumably be total path, but note - that some servers (includinng Samba 3) - have a shorter maximum path */ + /* + * PATH_MAX may be too long - it would presumably be total path, + * but note that some servers (includinng Samba 3) have a shorter + * maximum path. + * + * Instead could get the real value via SMB_QUERY_FS_ATTRIBUTE_INFO. + */ + buf->f_namelen = PATH_MAX; buf->f_files = 0; /* undefined */ buf->f_ffree = 0; /* unlimited */ -/* BB we could add a second check for a QFS Unix capability bit */ -/* BB FIXME check CIFS_POSIX_EXTENSIONS Unix cap first FIXME BB */ - if ((pTcon->ses->capabilities & CAP_UNIX) && (CIFS_POSIX_EXTENSIONS & - le64_to_cpu(pTcon->fsUnixInfo.Capability))) - rc = CIFSSMBQFSPosixInfo(xid, pTcon, buf); - - /* Only need to call the old QFSInfo if failed - on newer one */ - if (rc) - if (pTcon->ses->capabilities & CAP_NT_SMBS) - rc = CIFSSMBQFSInfo(xid, pTcon, buf); /* not supported by OS2 */ - - /* Some old Windows servers also do not support level 103, retry with - older level one if old server failed the previous call or we - bypassed it because we detected that this was an older LANMAN sess */ + /* + * We could add a second check for a QFS Unix capability bit + */ + if ((tcon->ses->capabilities & CAP_UNIX) && + (CIFS_POSIX_EXTENSIONS & le64_to_cpu(tcon->fsUnixInfo.Capability))) + rc = CIFSSMBQFSPosixInfo(xid, tcon, buf); + + /* + * Only need to call the old QFSInfo if failed on newer one, + * e.g. by OS/2. + **/ + if (rc && (tcon->ses->capabilities & CAP_NT_SMBS)) + rc = CIFSSMBQFSInfo(xid, tcon, buf); + + /* + * Some old Windows servers also do not support level 103, retry with + * older level one if old server failed the previous call or we + * bypassed it because we detected that this was an older LANMAN sess + */ if (rc) - rc = SMBOldQFSInfo(xid, pTcon, buf); - /* int f_type; - __fsid_t f_fsid; - int f_namelen; */ - /* BB get from info in tcon struct at mount time call to QFSAttrInfo */ + rc = SMBOldQFSInfo(xid, tcon, buf); + FreeXid(xid); - return 0; /* always return success? what if volume is no - longer available? */ + return 0; } static int cifs_permission(struct inode *inode, int mask, struct nameidata *nd) @@ -306,8 +306,8 @@ cifs_alloc_inode(struct super_block *sb) /* Until the file is open and we have gotten oplock info back from the server, can not assume caching of file data or metadata */ - cifs_inode->clientCanCacheRead = FALSE; - cifs_inode->clientCanCacheAll = FALSE; + cifs_inode->clientCanCacheRead = false; + cifs_inode->clientCanCacheAll = false; cifs_inode->vfs_inode.i_blkbits = 14; /* 2**14 = CIFS_MAX_MSGSIZE */ /* Can not set i_flags here - they get immediately overwritten @@ -940,7 +940,7 @@ static int cifs_oplock_thread(void *dummyarg) rc = CIFSSMBLock(0, pTcon, netfid, 0 /* len */ , 0 /* offset */, 0, 0, LOCKING_ANDX_OPLOCK_RELEASE, - 0 /* wait flag */); + false /* wait flag */); cFYI(1, ("Oplock release rc = %d", rc)); } } else diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h index e1dd9f32e1d..cd1301a09b3 100644 --- a/fs/cifs/cifsfs.h +++ b/fs/cifs/cifsfs.h @@ -24,14 +24,6 @@ #define ROOT_I 2 -#ifndef FALSE -#define FALSE 0 -#endif - -#ifndef TRUE -#define TRUE 1 -#endif - extern struct file_system_type cifs_fs_type; extern const struct address_space_operations cifs_addr_ops; extern const struct address_space_operations cifs_addr_ops_smallbuf; @@ -110,5 +102,5 @@ extern int cifs_ioctl(struct inode *inode, struct file *filep, extern const struct export_operations cifs_export_ops; #endif /* EXPERIMENTAL */ -#define CIFS_VERSION "1.52" +#define CIFS_VERSION "1.53" #endif /* _CIFSFS_H */ diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index 69a2e194254..b7d9f698e63 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h @@ -57,14 +57,6 @@ #include "cifspdu.h" -#ifndef FALSE -#define FALSE 0 -#endif - -#ifndef TRUE -#define TRUE 1 -#endif - #ifndef XATTR_DOS_ATTRIB #define XATTR_DOS_ATTRIB "user.DOSATTRIB" #endif @@ -147,7 +139,7 @@ struct TCP_Server_Info { enum protocolEnum protocolType; char versionMajor; char versionMinor; - unsigned svlocal:1; /* local server or remote */ + bool svlocal:1; /* local server or remote */ atomic_t socketUseCount; /* number of open cifs sessions on socket */ atomic_t inFlight; /* number of requests on the wire to server */ #ifdef CONFIG_CIFS_STATS2 @@ -286,10 +278,10 @@ struct cifsTconInfo { FILE_SYSTEM_DEVICE_INFO fsDevInfo; FILE_SYSTEM_ATTRIBUTE_INFO fsAttrInfo; /* ok if fs name truncated */ FILE_SYSTEM_UNIX_INFO fsUnixInfo; - unsigned ipc:1; /* set if connection to IPC$ eg for RPC/PIPES */ - unsigned retry:1; - unsigned nocase:1; - unsigned unix_ext:1; /* if off disable Linux extensions to CIFS protocol + bool ipc:1; /* set if connection to IPC$ eg for RPC/PIPES */ + bool retry:1; + bool nocase:1; + bool unix_ext:1; /* if false disable Linux extensions to CIFS protocol for this mount even if server would support */ /* BB add field for back pointer to sb struct(s)? */ }; @@ -317,10 +309,10 @@ struct cifs_search_info { char *srch_entries_start; char *presume_name; unsigned int resume_name_len; - unsigned endOfSearch:1; - unsigned emptyDir:1; - unsigned unicode:1; - unsigned smallBuf:1; /* so we know which buf_release function to call */ + bool endOfSearch:1; + bool emptyDir:1; + bool unicode:1; + bool smallBuf:1; /* so we know which buf_release function to call */ }; struct cifsFileInfo { @@ -335,9 +327,9 @@ struct cifsFileInfo { struct inode *pInode; /* needed for oplock break */ struct mutex lock_mutex; struct list_head llist; /* list of byte range locks we have. */ - unsigned closePend:1; /* file is marked to close */ - unsigned invalidHandle:1; /* file closed via session abend */ - unsigned messageMode:1; /* for pipes: message vs byte mode */ + bool closePend:1; /* file is marked to close */ + bool invalidHandle:1; /* file closed via session abend */ + bool messageMode:1; /* for pipes: message vs byte mode */ atomic_t wrtPending; /* handle in use - defer close */ struct semaphore fh_sem; /* prevents reopen race after dead ses*/ char *search_resume_name; /* BB removeme BB */ @@ -356,9 +348,9 @@ struct cifsInodeInfo { __u32 cifsAttrs; /* e.g. DOS archive bit, sparse, compressed, system */ atomic_t inUse; /* num concurrent users (local openers cifs) of file*/ unsigned long time; /* jiffies of last update/check of inode */ - unsigned clientCanCacheRead:1; /* read oplock */ - unsigned clientCanCacheAll:1; /* read and writebehind oplock */ - unsigned oplockPending:1; + bool clientCanCacheRead:1; /* read oplock */ + bool clientCanCacheAll:1; /* read and writebehind oplock */ + bool oplockPending:1; struct inode vfs_inode; }; @@ -426,9 +418,9 @@ struct mid_q_entry { struct smb_hdr *resp_buf; /* response buffer */ int midState; /* wish this were enum but can not pass to wait_event */ __u8 command; /* smb command code */ - unsigned largeBuf:1; /* if valid response, is pointer to large buf */ - unsigned multiRsp:1; /* multiple trans2 responses for one request */ - unsigned multiEnd:1; /* both received */ + bool largeBuf:1; /* if valid response, is pointer to large buf */ + bool multiRsp:1; /* multiple trans2 responses for one request */ + bool multiEnd:1; /* both received */ }; struct oplock_q_entry { diff --git a/fs/cifs/cifspdu.h b/fs/cifs/cifspdu.h index 9f49c2f3582..c43bf4b7a55 100644 --- a/fs/cifs/cifspdu.h +++ b/fs/cifs/cifspdu.h @@ -340,6 +340,7 @@ #define OPEN_NO_RECALL 0x00400000 #define OPEN_FREE_SPACE_QUERY 0x00800000 /* should be zero */ #define CREATE_OPTIONS_MASK 0x007FFFFF +#define CREATE_OPTION_READONLY 0x10000000 #define CREATE_OPTION_SPECIAL 0x20000000 /* system. NB not sent over wire */ /* ImpersonationLevel flags */ @@ -2050,7 +2051,7 @@ typedef struct { to 0xFFFF00 */ #define CIFS_UNIX_LARGE_WRITE_CAP 0x00000080 #define CIFS_UNIX_TRANSPORT_ENCRYPTION_CAP 0x00000100 /* can do SPNEGO crypt */ -#define CIFS_UNIX_TRANPSORT_ENCRYPTION_MANDATORY_CAP 0x00000200 /* must do */ +#define CIFS_UNIX_TRANSPORT_ENCRYPTION_MANDATORY_CAP 0x00000200 /* must do */ #define CIFS_UNIX_PROXY_CAP 0x00000400 /* Proxy cap: 0xACE ioctl and QFS PROXY call */ #ifdef CONFIG_CIFS_POSIX diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h index 50f9fdae19b..d481f6c5a2b 100644 --- a/fs/cifs/cifsproto.h +++ b/fs/cifs/cifsproto.h @@ -59,8 +59,9 @@ extern int SendReceiveBlockingLock(const unsigned int xid, struct smb_hdr *out_buf, int *bytes_returned); extern int checkSMB(struct smb_hdr *smb, __u16 mid, unsigned int length); -extern int is_valid_oplock_break(struct smb_hdr *smb, struct TCP_Server_Info *); -extern int is_size_safe_to_change(struct cifsInodeInfo *, __u64 eof); +extern bool is_valid_oplock_break(struct smb_hdr *smb, + struct TCP_Server_Info *); +extern bool is_size_safe_to_change(struct cifsInodeInfo *, __u64 eof); extern struct cifsFileInfo *find_writable_file(struct cifsInodeInfo *); #ifdef CONFIG_CIFS_EXPERIMENTAL extern struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *); @@ -69,7 +70,7 @@ extern unsigned int smbCalcSize(struct smb_hdr *ptr); extern unsigned int smbCalcSize_LE(struct smb_hdr *ptr); extern int decode_negTokenInit(unsigned char *security_blob, int length, enum securityEnum *secType); -extern int cifs_inet_pton(int, char *source, void *dst); +extern int cifs_inet_pton(const int, const char *source, void *dst); extern int map_smb_to_linux_error(struct smb_hdr *smb, int logErr); extern void header_assemble(struct smb_hdr *, char /* command */ , const struct cifsTconInfo *, int /* length of @@ -187,12 +188,12 @@ extern int CIFSSMBSetAttrLegacy(int xid, struct cifsTconInfo *tcon, #endif /* possibly unneeded function */ extern int CIFSSMBSetEOF(const int xid, struct cifsTconInfo *tcon, const char *fileName, __u64 size, - int setAllocationSizeFlag, + bool setAllocationSizeFlag, const struct nls_table *nls_codepage, int remap_special_chars); extern int CIFSSMBSetFileSize(const int xid, struct cifsTconInfo *tcon, __u64 size, __u16 fileHandle, __u32 opener_pid, - int AllocSizeFlag); + bool AllocSizeFlag); extern int CIFSSMBUnixSetPerms(const int xid, struct cifsTconInfo *pTcon, char *full_path, __u64 mode, __u64 uid, __u64 gid, dev_t dev, @@ -291,11 +292,11 @@ extern int CIFSSMBLock(const int xid, struct cifsTconInfo *tcon, const __u16 netfid, const __u64 len, const __u64 offset, const __u32 numUnlock, const __u32 numLock, const __u8 lockType, - const int waitFlag); + const bool waitFlag); extern int CIFSSMBPosixLock(const int xid, struct cifsTconInfo *tcon, const __u16 smb_file_id, const int get_flag, const __u64 len, struct file_lock *, - const __u16 lock_type, const int waitFlag); + const __u16 lock_type, const bool waitFlag); extern int CIFSSMBTDis(const int xid, struct cifsTconInfo *tcon); extern int CIFSSMBLogoff(const int xid, struct cifsSesInfo *ses); diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c index 4728fa982a4..95fbba4ea7d 100644 --- a/fs/cifs/cifssmb.c +++ b/fs/cifs/cifssmb.c @@ -95,7 +95,7 @@ static void mark_open_files_invalid(struct cifsTconInfo *pTcon) list_for_each_safe(tmp, tmp1, &pTcon->openFileList) { open_file = list_entry(tmp, struct cifsFileInfo, tlist); if (open_file) - open_file->invalidHandle = TRUE; + open_file->invalidHandle = true; } write_unlock(&GlobalSMBSeslock); /* BB Add call to invalidate_inodes(sb) for all superblocks mounted @@ -141,7 +141,7 @@ small_smb_init(int smb_command, int wct, struct cifsTconInfo *tcon, if (tcon->ses->server->tcpStatus == CifsNeedReconnect) { /* on "soft" mounts we wait once */ - if ((tcon->retry == FALSE) || + if (!tcon->retry || (tcon->ses->status == CifsExiting)) { cFYI(1, ("gave up waiting on " "reconnect in smb_init")); @@ -289,7 +289,7 @@ smb_init(int smb_command, int wct, struct cifsTconInfo *tcon, if (tcon->ses->server->tcpStatus == CifsNeedReconnect) { /* on "soft" mounts we wait once */ - if ((tcon->retry == FALSE) || + if (!tcon->retry || (tcon->ses->status == CifsExiting)) { cFYI(1, ("gave up waiting on " "reconnect in smb_init")); @@ -1224,11 +1224,8 @@ OldOpenRetry: else /* BB FIXME BB */ pSMB->FileAttributes = cpu_to_le16(0/*ATTR_NORMAL*/); - /* if ((omode & S_IWUGO) == 0) - pSMB->FileAttributes |= cpu_to_le32(ATTR_READONLY);*/ - /* Above line causes problems due to vfs splitting create into two - pieces - need to set mode after file created not while it is - being created */ + if (create_options & CREATE_OPTION_READONLY) + pSMB->FileAttributes |= cpu_to_le16(ATTR_READONLY); /* BB FIXME BB */ /* pSMB->CreateOptions = cpu_to_le32(create_options & @@ -1331,17 +1328,16 @@ openRetry: pSMB->FileAttributes = cpu_to_le32(ATTR_SYSTEM); else pSMB->FileAttributes = cpu_to_le32(ATTR_NORMAL); + /* XP does not handle ATTR_POSIX_SEMANTICS */ /* but it helps speed up case sensitive checks for other servers such as Samba */ if (tcon->ses->capabilities & CAP_UNIX) pSMB->FileAttributes |= cpu_to_le32(ATTR_POSIX_SEMANTICS); - /* if ((omode & S_IWUGO) == 0) - pSMB->FileAttributes |= cpu_to_le32(ATTR_READONLY);*/ - /* Above line causes problems due to vfs splitting create into two - pieces - need to set mode after file created not while it is - being created */ + if (create_options & CREATE_OPTION_READONLY) + pSMB->FileAttributes |= cpu_to_le32(ATTR_READONLY); + pSMB->ShareAccess = cpu_to_le32(FILE_SHARE_ALL); pSMB->CreateDisposition = cpu_to_le32(openDisposition); pSMB->CreateOptions = cpu_to_le32(create_options & CREATE_OPTIONS_MASK); @@ -1686,7 +1682,7 @@ int CIFSSMBLock(const int xid, struct cifsTconInfo *tcon, const __u16 smb_file_id, const __u64 len, const __u64 offset, const __u32 numUnlock, - const __u32 numLock, const __u8 lockType, const int waitFlag) + const __u32 numLock, const __u8 lockType, const bool waitFlag) { int rc = 0; LOCK_REQ *pSMB = NULL; @@ -1695,7 +1691,7 @@ CIFSSMBLock(const int xid, struct cifsTconInfo *tcon, int timeout = 0; __u16 count; - cFYI(1, ("CIFSSMBLock timeout %d numLock %d", waitFlag, numLock)); + cFYI(1, ("CIFSSMBLock timeout %d numLock %d", (int)waitFlag, numLock)); rc = small_smb_init(SMB_COM_LOCKING_ANDX, 8, tcon, (void **) &pSMB); if (rc) @@ -1706,7 +1702,7 @@ CIFSSMBLock(const int xid, struct cifsTconInfo *tcon, if (lockType == LOCKING_ANDX_OPLOCK_RELEASE) { timeout = CIFS_ASYNC_OP; /* no response expected */ pSMB->Timeout = 0; - } else if (waitFlag == TRUE) { + } else if (waitFlag) { timeout = CIFS_BLOCKING_OP; /* blocking operation, no timeout */ pSMB->Timeout = cpu_to_le32(-1);/* blocking - do not time out */ } else { @@ -1756,7 +1752,7 @@ int CIFSSMBPosixLock(const int xid, struct cifsTconInfo *tcon, const __u16 smb_file_id, const int get_flag, const __u64 len, struct file_lock *pLockData, const __u16 lock_type, - const int waitFlag) + const bool waitFlag) { struct smb_com_transaction2_sfi_req *pSMB = NULL; struct smb_com_transaction2_sfi_rsp *pSMBr = NULL; @@ -3581,9 +3577,9 @@ findFirstRetry: rc = validate_t2((struct smb_t2_rsp *)pSMBr); if (rc == 0) { if (pSMBr->hdr.Flags2 & SMBFLG2_UNICODE) - psrch_inf->unicode = TRUE; + psrch_inf->unicode = true; else - psrch_inf->unicode = FALSE; + psrch_inf->unicode = false; psrch_inf->ntwrk_buf_start = (char *)pSMBr; psrch_inf->smallBuf = 0; @@ -3594,9 +3590,9 @@ findFirstRetry: le16_to_cpu(pSMBr->t2.ParameterOffset)); if (parms->EndofSearch) - psrch_inf->endOfSearch = TRUE; + psrch_inf->endOfSearch = true; else - psrch_inf->endOfSearch = FALSE; + psrch_inf->endOfSearch = false; psrch_inf->entries_in_buffer = le16_to_cpu(parms->SearchCount); @@ -3624,7 +3620,7 @@ int CIFSFindNext(const int xid, struct cifsTconInfo *tcon, cFYI(1, ("In FindNext")); - if (psrch_inf->endOfSearch == TRUE) + if (psrch_inf->endOfSearch) return -ENOENT; rc = smb_init(SMB_COM_TRANSACTION2, 15, tcon, (void **) &pSMB, @@ -3682,7 +3678,7 @@ int CIFSFindNext(const int xid, struct cifsTconInfo *tcon, cifs_stats_inc(&tcon->num_fnext); if (rc) { if (rc == -EBADF) { - psrch_inf->endOfSearch = TRUE; + psrch_inf->endOfSearch = true; rc = 0; /* search probably was closed at end of search*/ } else cFYI(1, ("FindNext returned = %d", rc)); @@ -3692,9 +3688,9 @@ int CIFSFindNext(const int xid, struct cifsTconInfo *tcon, if (rc == 0) { /* BB fixme add lock for file (srch_info) struct here */ if (pSMBr->hdr.Flags2 & SMBFLG2_UNICODE) - psrch_inf->unicode = TRUE; + psrch_inf->unicode = true; else - psrch_inf->unicode = FALSE; + psrch_inf->unicode = false; response_data = (char *) &pSMBr->hdr.Protocol + le16_to_cpu(pSMBr->t2.ParameterOffset); parms = (T2_FNEXT_RSP_PARMS *)response_data; @@ -3709,9 +3705,9 @@ int CIFSFindNext(const int xid, struct cifsTconInfo *tcon, psrch_inf->ntwrk_buf_start = (char *)pSMB; psrch_inf->smallBuf = 0; if (parms->EndofSearch) - psrch_inf->endOfSearch = TRUE; + psrch_inf->endOfSearch = true; else - psrch_inf->endOfSearch = FALSE; + psrch_inf->endOfSearch = false; psrch_inf->entries_in_buffer = le16_to_cpu(parms->SearchCount); psrch_inf->index_of_last_entry += @@ -4586,7 +4582,7 @@ QFSPosixRetry: int CIFSSMBSetEOF(const int xid, struct cifsTconInfo *tcon, const char *fileName, - __u64 size, int SetAllocation, + __u64 size, bool SetAllocation, const struct nls_table *nls_codepage, int remap) { struct smb_com_transaction2_spi_req *pSMB = NULL; @@ -4675,7 +4671,7 @@ SetEOFRetry: int CIFSSMBSetFileSize(const int xid, struct cifsTconInfo *tcon, __u64 size, - __u16 fid, __u32 pid_of_opener, int SetAllocation) + __u16 fid, __u32 pid_of_opener, bool SetAllocation) { struct smb_com_transaction2_sfi_req *pSMB = NULL; char *data_offset; diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index e1710673016..f428bf3bf1a 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -49,8 +49,6 @@ #define CIFS_PORT 445 #define RFC1001_PORT 139 -static DECLARE_COMPLETION(cifsd_complete); - extern void SMBNTencrypt(unsigned char *passwd, unsigned char *c8, unsigned char *p24); @@ -71,23 +69,23 @@ struct smb_vol { mode_t file_mode; mode_t dir_mode; unsigned secFlg; - unsigned rw:1; - unsigned retry:1; - unsigned intr:1; - unsigned setuids:1; - unsigned override_uid:1; - unsigned override_gid:1; - unsigned noperm:1; - unsigned no_psx_acl:1; /* set if posix acl support should be disabled */ - unsigned cifs_acl:1; - unsigned no_xattr:1; /* set if xattr (EA) support should be disabled*/ - unsigned server_ino:1; /* use inode numbers from server ie UniqueId */ - unsigned direct_io:1; - unsigned remap:1; /* set to remap seven reserved chars in filenames */ - unsigned posix_paths:1; /* unset to not ask for posix pathnames. */ - unsigned no_linux_ext:1; - unsigned sfu_emul:1; - unsigned nullauth:1; /* attempt to authenticate with null user */ + bool rw:1; + bool retry:1; + bool intr:1; + bool setuids:1; + bool override_uid:1; + bool override_gid:1; + bool noperm:1; + bool no_psx_acl:1; /* set if posix acl support should be disabled */ + bool cifs_acl:1; + bool no_xattr:1; /* set if xattr (EA) support should be disabled*/ + bool server_ino:1; /* use inode numbers from server ie UniqueId */ + bool direct_io:1; + bool remap:1; /* set to remap seven reserved chars in filenames */ + bool posix_paths:1; /* unset to not ask for posix pathnames. */ + bool no_linux_ext:1; + bool sfu_emul:1; + bool nullauth:1; /* attempt to authenticate with null user */ unsigned nocase; /* request case insensitive filenames */ unsigned nobrl; /* disable sending byte range locks to srv */ unsigned int rsize; @@ -345,18 +343,16 @@ cifs_demultiplex_thread(struct TCP_Server_Info *server) struct task_struct *task_to_wake = NULL; struct mid_q_entry *mid_entry; char temp; - int isLargeBuf = FALSE; - int isMultiRsp; + bool isLargeBuf = false; + bool isMultiRsp; int reconnect; current->flags |= PF_MEMALLOC; - server->tsk = current; /* save process info to wake at shutdown */ cFYI(1, ("Demultiplex PID: %d", task_pid_nr(current))); write_lock(&GlobalSMBSeslock); atomic_inc(&tcpSesAllocCount); length = tcpSesAllocCount.counter; write_unlock(&GlobalSMBSeslock); - complete(&cifsd_complete); if (length > 1) mempool_resize(cifs_req_poolp, length + cifs_min_rcv, GFP_KERNEL); @@ -390,8 +386,8 @@ cifs_demultiplex_thread(struct TCP_Server_Info *server) } else /* if existing small buf clear beginning */ memset(smallbuf, 0, sizeof(struct smb_hdr)); - isLargeBuf = FALSE; - isMultiRsp = FALSE; + isLargeBuf = false; + isMultiRsp = false; smb_buffer = smallbuf; iov.iov_base = smb_buffer; iov.iov_len = 4; @@ -517,7 +513,7 @@ incomplete_rcv: reconnect = 0; if (pdu_length > MAX_CIFS_SMALL_BUFFER_SIZE - 4) { - isLargeBuf = TRUE; + isLargeBuf = true; memcpy(bigbuf, smallbuf, 4); smb_buffer = bigbuf; } @@ -582,16 +578,18 @@ incomplete_rcv: (mid_entry->command == smb_buffer->Command)) { if (check2ndT2(smb_buffer,server->maxBuf) > 0) { /* We have a multipart transact2 resp */ - isMultiRsp = TRUE; + isMultiRsp = true; if (mid_entry->resp_buf) { /* merge response - fix up 1st*/ if (coalesce_t2(smb_buffer, mid_entry->resp_buf)) { - mid_entry->multiRsp = 1; + mid_entry->multiRsp = + true; break; } else { /* all parts received */ - mid_entry->multiEnd = 1; + mid_entry->multiEnd = + true; goto multi_t2_fnd; } } else { @@ -603,17 +601,15 @@ incomplete_rcv: /* Have first buffer */ mid_entry->resp_buf = smb_buffer; - mid_entry->largeBuf = 1; + mid_entry->largeBuf = + true; bigbuf = NULL; } } break; } mid_entry->resp_buf = smb_buffer; - if (isLargeBuf) - mid_entry->largeBuf = 1; - else - mid_entry->largeBuf = 0; + mid_entry->largeBuf = isLargeBuf; multi_t2_fnd: task_to_wake = mid_entry->tsk; mid_entry->midState = MID_RESPONSE_RECEIVED; @@ -638,8 +634,8 @@ multi_t2_fnd: smallbuf = NULL; } wake_up_process(task_to_wake); - } else if ((is_valid_oplock_break(smb_buffer, server) == FALSE) - && (isMultiRsp == FALSE)) { + } else if (!is_valid_oplock_break(smb_buffer, server) && + !isMultiRsp) { cERROR(1, ("No task to wake, unknown frame received! " "NumMids %d", midCount.counter)); cifs_dump_mem("Received Data is: ", (char *)smb_buffer, @@ -654,10 +650,20 @@ multi_t2_fnd: spin_lock(&GlobalMid_Lock); server->tcpStatus = CifsExiting; - server->tsk = NULL; + spin_unlock(&GlobalMid_Lock); + + /* don't exit until kthread_stop is called */ + set_current_state(TASK_UNINTERRUPTIBLE); + while (!kthread_should_stop()) { + schedule(); + set_current_state(TASK_UNINTERRUPTIBLE); + } + set_current_state(TASK_RUNNING); + /* check if we have blocked requests that need to free */ /* Note that cifs_max_pending is normally 50, but can be set at module install time to as little as two */ + spin_lock(&GlobalMid_Lock); if (atomic_read(&server->inFlight) >= cifs_max_pending) atomic_set(&server->inFlight, cifs_max_pending - 1); /* We do not want to set the max_pending too low or we @@ -825,7 +831,7 @@ cifs_parse_mount_options(char *options, const char *devname, vol->file_mode = (S_IRWXUGO | S_ISGID) & (~S_IXGRP); /* vol->retry default is 0 (i.e. "soft" limited retry not hard retry) */ - vol->rw = TRUE; + vol->rw = true; /* default is always to request posix paths. */ vol->posix_paths = 1; @@ -1181,7 +1187,7 @@ cifs_parse_mount_options(char *options, const char *devname, } else if (strnicmp(data, "guest", 5) == 0) { /* ignore */ } else if (strnicmp(data, "rw", 2) == 0) { - vol->rw = TRUE; + vol->rw = true; } else if ((strnicmp(data, "suid", 4) == 0) || (strnicmp(data, "nosuid", 6) == 0) || (strnicmp(data, "exec", 4) == 0) || @@ -1197,7 +1203,7 @@ cifs_parse_mount_options(char *options, const char *devname, is ok to just ignore them */ continue; } else if (strnicmp(data, "ro", 2) == 0) { - vol->rw = FALSE; + vol->rw = false; } else if (strnicmp(data, "hard", 4) == 0) { vol->retry = 1; } else if (strnicmp(data, "soft", 4) == 0) { @@ -1305,6 +1311,9 @@ cifs_parse_mount_options(char *options, const char *devname, "begin with // or \\\\ \n"); return 1; } + value = strpbrk(vol->UNC+2, "/\\"); + if (value) + *value = '\\'; } else { printk(KERN_WARNING "CIFS: UNC name too long\n"); return 1; @@ -1318,42 +1327,43 @@ cifs_parse_mount_options(char *options, const char *devname, static struct cifsSesInfo * cifs_find_tcp_session(struct in_addr *target_ip_addr, - struct in6_addr *target_ip6_addr, - char *userName, struct TCP_Server_Info **psrvTcp) + struct in6_addr *target_ip6_addr, + char *userName, struct TCP_Server_Info **psrvTcp) { struct list_head *tmp; struct cifsSesInfo *ses; + *psrvTcp = NULL; - read_lock(&GlobalSMBSeslock); + read_lock(&GlobalSMBSeslock); list_for_each(tmp, &GlobalSMBSessionList) { ses = list_entry(tmp, struct cifsSesInfo, cifsSessionList); - if (ses->server) { - if ((target_ip_addr && - (ses->server->addr.sockAddr.sin_addr.s_addr - == target_ip_addr->s_addr)) || (target_ip6_addr - && memcmp(&ses->server->addr.sockAddr6.sin6_addr, - target_ip6_addr, sizeof(*target_ip6_addr)))) { - /* BB lock server and tcp session and increment - use count here?? */ - - /* found a match on the TCP session */ - *psrvTcp = ses->server; - - /* BB check if reconnection needed */ - if (strncmp - (ses->userName, userName, - MAX_USERNAME_SIZE) == 0){ - read_unlock(&GlobalSMBSeslock); - /* Found exact match on both TCP and - SMB sessions */ - return ses; - } - } + if (!ses->server) + continue; + + if (target_ip_addr && + ses->server->addr.sockAddr.sin_addr.s_addr != target_ip_addr->s_addr) + continue; + else if (target_ip6_addr && + memcmp(&ses->server->addr.sockAddr6.sin6_addr, + target_ip6_addr, sizeof(*target_ip6_addr))) + continue; + /* BB lock server and tcp session; increment use count here?? */ + + /* found a match on the TCP session */ + *psrvTcp = ses->server; + + /* BB check if reconnection needed */ + if (strncmp(ses->userName, userName, MAX_USERNAME_SIZE) == 0) { + read_unlock(&GlobalSMBSeslock); + /* Found exact match on both TCP and + SMB sessions */ + return ses; } /* else tcp and smb sessions need reconnection */ } read_unlock(&GlobalSMBSeslock); + return NULL; } @@ -1362,45 +1372,43 @@ find_unc(__be32 new_target_ip_addr, char *uncName, char *userName) { struct list_head *tmp; struct cifsTconInfo *tcon; + __be32 old_ip; read_lock(&GlobalSMBSeslock); + list_for_each(tmp, &GlobalTreeConnectionList) { cFYI(1, ("Next tcon")); tcon = list_entry(tmp, struct cifsTconInfo, cifsConnectionList); - if (tcon->ses) { - if (tcon->ses->server) { - cFYI(1, - ("old ip addr: %x == new ip %x ?", - tcon->ses->server->addr.sockAddr.sin_addr. - s_addr, new_target_ip_addr)); - if (tcon->ses->server->addr.sockAddr.sin_addr. - s_addr == new_target_ip_addr) { - /* BB lock tcon, server and tcp session and increment use count here? */ - /* found a match on the TCP session */ - /* BB check if reconnection needed */ - cFYI(1, - ("IP match, old UNC: %s new: %s", - tcon->treeName, uncName)); - if (strncmp - (tcon->treeName, uncName, - MAX_TREE_SIZE) == 0) { - cFYI(1, - ("and old usr: %s new: %s", - tcon->treeName, uncName)); - if (strncmp - (tcon->ses->userName, - userName, - MAX_USERNAME_SIZE) == 0) { - read_unlock(&GlobalSMBSeslock); - /* matched smb session - (user name */ - return tcon; - } - } - } - } - } + if (!tcon->ses || !tcon->ses->server) + continue; + + old_ip = tcon->ses->server->addr.sockAddr.sin_addr.s_addr; + cFYI(1, ("old ip addr: %x == new ip %x ?", + old_ip, new_target_ip_addr)); + + if (old_ip != new_target_ip_addr) + continue; + + /* BB lock tcon, server, tcp session and increment use count? */ + /* found a match on the TCP session */ + /* BB check if reconnection needed */ + cFYI(1, ("IP match, old UNC: %s new: %s", + tcon->treeName, uncName)); + + if (strncmp(tcon->treeName, uncName, MAX_TREE_SIZE)) + continue; + + cFYI(1, ("and old usr: %s new: %s", + tcon->treeName, uncName)); + + if (strncmp(tcon->ses->userName, userName, MAX_USERNAME_SIZE)) + continue; + + /* matched smb session (user name) */ + read_unlock(&GlobalSMBSeslock); + return tcon; } + read_unlock(&GlobalSMBSeslock); return NULL; } @@ -1982,7 +1990,6 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb, kfree(srvTcp->hostname); goto out; } - wait_for_completion(&cifsd_complete); rc = 0; memcpy(srvTcp->workstation_RFC1001_name, volume_info.source_rfc1001_name, 16); @@ -2189,15 +2196,12 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb, srvTcp->tcpStatus = CifsExiting; spin_unlock(&GlobalMid_Lock); if (srvTcp->tsk) { - struct task_struct *tsk; /* If we could verify that kthread_stop would always wake up processes blocked in tcp in recv_mesg then we could remove the send_sig call */ force_sig(SIGKILL, srvTcp->tsk); - tsk = srvTcp->tsk; - if (tsk) - kthread_stop(tsk); + kthread_stop(srvTcp->tsk); } } /* If find_unc succeeded then rc == 0 so we can not end */ @@ -2213,23 +2217,17 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb, if ((temp_rc == -ESHUTDOWN) && (pSesInfo->server) && (pSesInfo->server->tsk)) { - struct task_struct *tsk; force_sig(SIGKILL, pSesInfo->server->tsk); - tsk = pSesInfo->server->tsk; - if (tsk) - kthread_stop(tsk); + kthread_stop(pSesInfo->server->tsk); } } else { cFYI(1, ("No session or bad tcon")); if ((pSesInfo->server) && (pSesInfo->server->tsk)) { - struct task_struct *tsk; force_sig(SIGKILL, pSesInfo->server->tsk); - tsk = pSesInfo->server->tsk; - if (tsk) - kthread_stop(tsk); + kthread_stop(pSesInfo->server->tsk); } } sesInfoFree(pSesInfo); @@ -2602,7 +2600,7 @@ sesssetup_nomem: /* do not return an error on nomem for the info strings, static int CIFSNTLMSSPNegotiateSessSetup(unsigned int xid, - struct cifsSesInfo *ses, int *pNTLMv2_flag, + struct cifsSesInfo *ses, bool *pNTLMv2_flag, const struct nls_table *nls_codepage) { struct smb_hdr *smb_buffer; @@ -2625,7 +2623,7 @@ CIFSNTLMSSPNegotiateSessSetup(unsigned int xid, if (ses == NULL) return -EINVAL; domain = ses->domainName; - *pNTLMv2_flag = FALSE; + *pNTLMv2_flag = false; smb_buffer = cifs_buf_get(); if (smb_buffer == NULL) { return -ENOMEM; @@ -2778,7 +2776,7 @@ CIFSNTLMSSPNegotiateSessSetup(unsigned int xid, CIFS_CRYPTO_KEY_SIZE); if (SecurityBlob2->NegotiateFlags & cpu_to_le32(NTLMSSP_NEGOTIATE_NTLMV2)) - *pNTLMv2_flag = TRUE; + *pNTLMv2_flag = true; if ((SecurityBlob2->NegotiateFlags & cpu_to_le32(NTLMSSP_NEGOTIATE_ALWAYS_SIGN)) @@ -2939,7 +2937,7 @@ CIFSNTLMSSPNegotiateSessSetup(unsigned int xid, } static int CIFSNTLMSSPAuthSessSetup(unsigned int xid, struct cifsSesInfo *ses, - char *ntlm_session_key, int ntlmv2_flag, + char *ntlm_session_key, bool ntlmv2_flag, const struct nls_table *nls_codepage) { struct smb_hdr *smb_buffer; @@ -3556,8 +3554,6 @@ cifs_umount(struct super_block *sb, struct cifs_sb_info *cifs_sb) cifs_sb->prepath = NULL; kfree(tmp); if (ses) - schedule_timeout_interruptible(msecs_to_jiffies(500)); - if (ses) sesInfoFree(ses); FreeXid(xid); @@ -3569,7 +3565,7 @@ int cifs_setup_session(unsigned int xid, struct cifsSesInfo *pSesInfo, { int rc = 0; char ntlm_session_key[CIFS_SESS_KEY_SIZE]; - int ntlmv2_flag = FALSE; + bool ntlmv2_flag = false; int first_time = 0; /* what if server changes its buffer size after dropping the session? */ diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c index 0f5c62ba403..e4e0078a052 100644 --- a/fs/cifs/dir.c +++ b/fs/cifs/dir.c @@ -119,6 +119,7 @@ cifs_create(struct inode *inode, struct dentry *direntry, int mode, { int rc = -ENOENT; int xid; + int create_options = CREATE_NOT_DIR; int oplock = 0; int desiredAccess = GENERIC_READ | GENERIC_WRITE; __u16 fileHandle; @@ -130,7 +131,7 @@ cifs_create(struct inode *inode, struct dentry *direntry, int mode, struct cifsFileInfo *pCifsFile = NULL; struct cifsInodeInfo *pCifsInode; int disposition = FILE_OVERWRITE_IF; - int write_only = FALSE; + bool write_only = false; xid = GetXid(); @@ -152,7 +153,7 @@ cifs_create(struct inode *inode, struct dentry *direntry, int mode, if (oflags & FMODE_WRITE) { desiredAccess |= GENERIC_WRITE; if (!(oflags & FMODE_READ)) - write_only = TRUE; + write_only = true; } if ((oflags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL)) @@ -176,9 +177,19 @@ cifs_create(struct inode *inode, struct dentry *direntry, int mode, FreeXid(xid); return -ENOMEM; } + + mode &= ~current->fs->umask; + + /* + * if we're not using unix extensions, see if we need to set + * ATTR_READONLY on the create call + */ + if (!pTcon->unix_ext && (mode & S_IWUGO) == 0) + create_options |= CREATE_OPTION_READONLY; + if (cifs_sb->tcon->ses->capabilities & CAP_NT_SMBS) rc = CIFSSMBOpen(xid, pTcon, full_path, disposition, - desiredAccess, CREATE_NOT_DIR, + desiredAccess, create_options, &fileHandle, &oplock, buf, cifs_sb->local_nls, cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); else @@ -187,7 +198,7 @@ cifs_create(struct inode *inode, struct dentry *direntry, int mode, if (rc == -EIO) { /* old server, retry the open legacy style */ rc = SMBLegacyOpen(xid, pTcon, full_path, disposition, - desiredAccess, CREATE_NOT_DIR, + desiredAccess, create_options, &fileHandle, &oplock, buf, cifs_sb->local_nls, cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); } @@ -197,7 +208,6 @@ cifs_create(struct inode *inode, struct dentry *direntry, int mode, /* If Open reported that we actually created a file then we now have to set the mode if possible */ if ((pTcon->unix_ext) && (oplock & CIFS_CREATE_ACTION)) { - mode &= ~current->fs->umask; if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID) { CIFSSMBUnixSetPerms(xid, pTcon, full_path, mode, (__u64)current->fsuid, @@ -254,7 +264,7 @@ cifs_create(struct inode *inode, struct dentry *direntry, int mode, d_instantiate(direntry, newinode); } if ((nd == NULL /* nfsd case - nfs srv does not set nd */) || - ((nd->flags & LOOKUP_OPEN) == FALSE)) { + (!(nd->flags & LOOKUP_OPEN))) { /* mknod case - do not leave file open */ CIFSSMBClose(xid, pTcon, fileHandle); } else if (newinode) { @@ -266,8 +276,8 @@ cifs_create(struct inode *inode, struct dentry *direntry, int mode, pCifsFile->netfid = fileHandle; pCifsFile->pid = current->tgid; pCifsFile->pInode = newinode; - pCifsFile->invalidHandle = FALSE; - pCifsFile->closePend = FALSE; + pCifsFile->invalidHandle = false; + pCifsFile->closePend = false; init_MUTEX(&pCifsFile->fh_sem); mutex_init(&pCifsFile->lock_mutex); INIT_LIST_HEAD(&pCifsFile->llist); @@ -280,7 +290,7 @@ cifs_create(struct inode *inode, struct dentry *direntry, int mode, pCifsInode = CIFS_I(newinode); if (pCifsInode) { /* if readable file instance put first in list*/ - if (write_only == TRUE) { + if (write_only) { list_add_tail(&pCifsFile->flist, &pCifsInode->openFileList); } else { @@ -288,12 +298,12 @@ cifs_create(struct inode *inode, struct dentry *direntry, int mode, &pCifsInode->openFileList); } if ((oplock & 0xF) == OPLOCK_EXCLUSIVE) { - pCifsInode->clientCanCacheAll = TRUE; - pCifsInode->clientCanCacheRead = TRUE; + pCifsInode->clientCanCacheAll = true; + pCifsInode->clientCanCacheRead = true; cFYI(1, ("Exclusive Oplock inode %p", newinode)); } else if ((oplock & 0xF) == OPLOCK_READ) - pCifsInode->clientCanCacheRead = TRUE; + pCifsInode->clientCanCacheRead = true; } write_unlock(&GlobalSMBSeslock); } diff --git a/fs/cifs/dns_resolve.c b/fs/cifs/dns_resolve.c index 7cc86c41818..939e256f849 100644 --- a/fs/cifs/dns_resolve.c +++ b/fs/cifs/dns_resolve.c @@ -55,6 +55,32 @@ struct key_type key_type_dns_resolver = { .match = user_match, }; +/* Checks if supplied name is IP address + * returns: + * 1 - name is IP + * 0 - name is not IP + */ +static int is_ip(const char *name) +{ + int rc; + struct sockaddr_in sin_server; + struct sockaddr_in6 sin_server6; + + rc = cifs_inet_pton(AF_INET, name, + &sin_server.sin_addr.s_addr); + + if (rc <= 0) { + /* not ipv4 address, try ipv6 */ + rc = cifs_inet_pton(AF_INET6, name, + &sin_server6.sin6_addr.in6_u); + if (rc > 0) + return 1; + } else { + return 1; + } + /* we failed translating address */ + return 0; +} /* Resolves server name to ip address. * input: @@ -67,8 +93,9 @@ int dns_resolve_server_name_to_ip(const char *unc, char **ip_addr) { int rc = -EAGAIN; - struct key *rkey; + struct key *rkey = ERR_PTR(-EAGAIN); char *name; + char *data = NULL; int len; if (!ip_addr || !unc) @@ -97,26 +124,41 @@ dns_resolve_server_name_to_ip(const char *unc, char **ip_addr) memcpy(name, unc+2, len); name[len] = 0; + if (is_ip(name)) { + cFYI(1, ("%s: it is IP, skipping dns upcall: %s", + __func__, name)); + data = name; + goto skip_upcall; + } + rkey = request_key(&key_type_dns_resolver, name, ""); if (!IS_ERR(rkey)) { - len = strlen(rkey->payload.data); - *ip_addr = kmalloc(len+1, GFP_KERNEL); - if (*ip_addr) { - memcpy(*ip_addr, rkey->payload.data, len); - (*ip_addr)[len] = '\0'; - cFYI(1, ("%s: resolved: %s to %s", __func__, + data = rkey->payload.data; + cFYI(1, ("%s: resolved: %s to %s", __func__, rkey->description, *ip_addr )); + } else { + cERROR(1, ("%s: unable to resolve: %s", __func__, name)); + goto out; + } + +skip_upcall: + if (data) { + len = strlen(data); + *ip_addr = kmalloc(len+1, GFP_KERNEL); + if (*ip_addr) { + memcpy(*ip_addr, data, len); + (*ip_addr)[len] = '\0'; rc = 0; } else { rc = -ENOMEM; } - key_put(rkey); - } else { - cERROR(1, ("%s: unable to resolve: %s", __func__, name)); + if (!IS_ERR(rkey)) + key_put(rkey); } +out: kfree(name); return rc; } diff --git a/fs/cifs/fcntl.c b/fs/cifs/fcntl.c index 7d1d5aa4c43..5a57581eb4b 100644 --- a/fs/cifs/fcntl.c +++ b/fs/cifs/fcntl.c @@ -68,7 +68,7 @@ int cifs_dir_notify(struct file *file, unsigned long arg) { int xid; int rc = -EINVAL; - int oplock = FALSE; + int oplock = 0; struct cifs_sb_info *cifs_sb; struct cifsTconInfo *pTcon; char *full_path = NULL; diff --git a/fs/cifs/file.c b/fs/cifs/file.c index 40b690073fc..31a0a33b9d9 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -51,8 +51,8 @@ static inline struct cifsFileInfo *cifs_init_private( INIT_LIST_HEAD(&private_data->llist); private_data->pfile = file; /* needed for writepage */ private_data->pInode = inode; - private_data->invalidHandle = FALSE; - private_data->closePend = FALSE; + private_data->invalidHandle = false; + private_data->closePend = false; /* we have to track num writers to the inode, since writepages does not tell us which handle the write is for so there can be a close (overlapping with write) of the filehandle that @@ -148,12 +148,12 @@ client_can_cache: full_path, buf, inode->i_sb, xid, NULL); if ((*oplock & 0xF) == OPLOCK_EXCLUSIVE) { - pCifsInode->clientCanCacheAll = TRUE; - pCifsInode->clientCanCacheRead = TRUE; + pCifsInode->clientCanCacheAll = true; + pCifsInode->clientCanCacheRead = true; cFYI(1, ("Exclusive Oplock granted on inode %p", file->f_path.dentry->d_inode)); } else if ((*oplock & 0xF) == OPLOCK_READ) - pCifsInode->clientCanCacheRead = TRUE; + pCifsInode->clientCanCacheRead = true; return rc; } @@ -247,7 +247,7 @@ int cifs_open(struct inode *inode, struct file *file) if (oplockEnabled) oplock = REQ_OPLOCK; else - oplock = FALSE; + oplock = 0; /* BB pass O_SYNC flag through on file attributes .. BB */ @@ -339,7 +339,7 @@ static int cifs_relock_file(struct cifsFileInfo *cifsFile) return rc; } -static int cifs_reopen_file(struct file *file, int can_flush) +static int cifs_reopen_file(struct file *file, bool can_flush) { int rc = -EACCES; int xid, oplock; @@ -360,7 +360,7 @@ static int cifs_reopen_file(struct file *file, int can_flush) xid = GetXid(); down(&pCifsFile->fh_sem); - if (pCifsFile->invalidHandle == FALSE) { + if (!pCifsFile->invalidHandle) { up(&pCifsFile->fh_sem); FreeXid(xid); return 0; @@ -404,7 +404,7 @@ reopen_error_exit: if (oplockEnabled) oplock = REQ_OPLOCK; else - oplock = FALSE; + oplock = 0; /* Can not refresh inode by passing in file_info buf to be returned by SMBOpen and then calling get_inode_info with returned buf @@ -422,7 +422,7 @@ reopen_error_exit: cFYI(1, ("oplock: %d", oplock)); } else { pCifsFile->netfid = netfid; - pCifsFile->invalidHandle = FALSE; + pCifsFile->invalidHandle = false; up(&pCifsFile->fh_sem); pCifsInode = CIFS_I(inode); if (pCifsInode) { @@ -432,8 +432,8 @@ reopen_error_exit: CIFS_I(inode)->write_behind_rc = rc; /* temporarily disable caching while we go to server to get inode info */ - pCifsInode->clientCanCacheAll = FALSE; - pCifsInode->clientCanCacheRead = FALSE; + pCifsInode->clientCanCacheAll = false; + pCifsInode->clientCanCacheRead = false; if (pTcon->unix_ext) rc = cifs_get_inode_info_unix(&inode, full_path, inode->i_sb, xid); @@ -448,16 +448,16 @@ reopen_error_exit: we can not go to the server to get the new inod info */ if ((oplock & 0xF) == OPLOCK_EXCLUSIVE) { - pCifsInode->clientCanCacheAll = TRUE; - pCifsInode->clientCanCacheRead = TRUE; + pCifsInode->clientCanCacheAll = true; + pCifsInode->clientCanCacheRead = true; cFYI(1, ("Exclusive Oplock granted on inode %p", file->f_path.dentry->d_inode)); } else if ((oplock & 0xF) == OPLOCK_READ) { - pCifsInode->clientCanCacheRead = TRUE; - pCifsInode->clientCanCacheAll = FALSE; + pCifsInode->clientCanCacheRead = true; + pCifsInode->clientCanCacheAll = false; } else { - pCifsInode->clientCanCacheRead = FALSE; - pCifsInode->clientCanCacheAll = FALSE; + pCifsInode->clientCanCacheRead = false; + pCifsInode->clientCanCacheAll = false; } cifs_relock_file(pCifsFile); } @@ -484,7 +484,7 @@ int cifs_close(struct inode *inode, struct file *file) if (pSMBFile) { struct cifsLockInfo *li, *tmp; - pSMBFile->closePend = TRUE; + pSMBFile->closePend = true; if (pTcon) { /* no sense reconnecting to close a file that is already closed */ @@ -553,8 +553,8 @@ int cifs_close(struct inode *inode, struct file *file) cFYI(1, ("closing last open instance for inode %p", inode)); /* if the file is not open we do not know if we can cache info on this inode, much less write behind and read ahead */ - CIFS_I(inode)->clientCanCacheRead = FALSE; - CIFS_I(inode)->clientCanCacheAll = FALSE; + CIFS_I(inode)->clientCanCacheRead = false; + CIFS_I(inode)->clientCanCacheAll = false; } read_unlock(&GlobalSMBSeslock); if ((rc == 0) && CIFS_I(inode)->write_behind_rc) @@ -583,9 +583,9 @@ int cifs_closedir(struct inode *inode, struct file *file) pTcon = cifs_sb->tcon; cFYI(1, ("Freeing private data in close dir")); - if ((pCFileStruct->srch_inf.endOfSearch == FALSE) && - (pCFileStruct->invalidHandle == FALSE)) { - pCFileStruct->invalidHandle = TRUE; + if (!pCFileStruct->srch_inf.endOfSearch && + !pCFileStruct->invalidHandle) { + pCFileStruct->invalidHandle = true; rc = CIFSFindClose(xid, pTcon, pCFileStruct->netfid); cFYI(1, ("Closing uncompleted readdir with rc %d", rc)); @@ -637,12 +637,12 @@ int cifs_lock(struct file *file, int cmd, struct file_lock *pfLock) __u32 numLock = 0; __u32 numUnlock = 0; __u64 length; - int wait_flag = FALSE; + bool wait_flag = false; struct cifs_sb_info *cifs_sb; struct cifsTconInfo *pTcon; __u16 netfid; __u8 lockType = LOCKING_ANDX_LARGE_FILES; - int posix_locking; + bool posix_locking; length = 1 + pfLock->fl_end - pfLock->fl_start; rc = -EACCES; @@ -659,7 +659,7 @@ int cifs_lock(struct file *file, int cmd, struct file_lock *pfLock) cFYI(1, ("Flock")); if (pfLock->fl_flags & FL_SLEEP) { cFYI(1, ("Blocking lock")); - wait_flag = TRUE; + wait_flag = true; } if (pfLock->fl_flags & FL_ACCESS) cFYI(1, ("Process suspended by mandatory locking - " @@ -794,7 +794,7 @@ int cifs_lock(struct file *file, int cmd, struct file_lock *pfLock) stored_rc = CIFSSMBLock(xid, pTcon, netfid, li->length, li->offset, - 1, 0, li->type, FALSE); + 1, 0, li->type, false); if (stored_rc) rc = stored_rc; @@ -866,7 +866,7 @@ ssize_t cifs_user_write(struct file *file, const char __user *write_data, filemap_fdatawait from here so tell reopen_file not to flush data to server now */ - rc = cifs_reopen_file(file, FALSE); + rc = cifs_reopen_file(file, false); if (rc != 0) break; } @@ -966,7 +966,7 @@ static ssize_t cifs_write(struct file *file, const char *write_data, filemap_fdatawait from here so tell reopen_file not to flush data to server now */ - rc = cifs_reopen_file(file, FALSE); + rc = cifs_reopen_file(file, false); if (rc != 0) break; } @@ -1093,7 +1093,7 @@ refind_writable: read_unlock(&GlobalSMBSeslock); /* Had to unlock since following call can block */ - rc = cifs_reopen_file(open_file->pfile, FALSE); + rc = cifs_reopen_file(open_file->pfile, false); if (!rc) { if (!open_file->closePend) return open_file; @@ -1608,7 +1608,7 @@ ssize_t cifs_user_read(struct file *file, char __user *read_data, int buf_type = CIFS_NO_BUFFER; if ((open_file->invalidHandle) && (!open_file->closePend)) { - rc = cifs_reopen_file(file, TRUE); + rc = cifs_reopen_file(file, true); if (rc != 0) break; } @@ -1693,7 +1693,7 @@ static ssize_t cifs_read(struct file *file, char *read_data, size_t read_size, while (rc == -EAGAIN) { if ((open_file->invalidHandle) && (!open_file->closePend)) { - rc = cifs_reopen_file(file, TRUE); + rc = cifs_reopen_file(file, true); if (rc != 0) break; } @@ -1850,7 +1850,7 @@ static int cifs_readpages(struct file *file, struct address_space *mapping, while (rc == -EAGAIN) { if ((open_file->invalidHandle) && (!open_file->closePend)) { - rc = cifs_reopen_file(file, TRUE); + rc = cifs_reopen_file(file, true); if (rc != 0) break; } @@ -2009,10 +2009,10 @@ static int is_inode_writable(struct cifsInodeInfo *cifs_inode) refreshing the inode only on increases in the file size but this is tricky to do without racing with writebehind page caching in the current Linux kernel design */ -int is_size_safe_to_change(struct cifsInodeInfo *cifsInode, __u64 end_of_file) +bool is_size_safe_to_change(struct cifsInodeInfo *cifsInode, __u64 end_of_file) { if (!cifsInode) - return 1; + return true; if (is_inode_writable(cifsInode)) { /* This inode is open for write at least once */ @@ -2022,15 +2022,15 @@ int is_size_safe_to_change(struct cifsInodeInfo *cifsInode, __u64 end_of_file) if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DIRECT_IO) { /* since no page cache to corrupt on directio we can change size safely */ - return 1; + return true; } if (i_size_read(&cifsInode->vfs_inode) < end_of_file) - return 1; + return true; - return 0; + return false; } else - return 1; + return true; } static int cifs_prepare_write(struct file *file, struct page *page, diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index e1031b9e2c5..fcbdbb6ad7b 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c @@ -281,7 +281,7 @@ static int decode_sfu_inode(struct inode *inode, __u64 size, struct cifs_sb_info *cifs_sb, int xid) { int rc; - int oplock = FALSE; + int oplock = 0; __u16 netfid; struct cifsTconInfo *pTcon = cifs_sb->tcon; char buf[24]; @@ -389,7 +389,7 @@ int cifs_get_inode_info(struct inode **pinode, struct cifs_sb_info *cifs_sb = CIFS_SB(sb); const unsigned char *full_path = NULL; char *buf = NULL; - int adjustTZ = FALSE; + bool adjustTZ = false; bool is_dfs_referral = false; pTcon = cifs_sb->tcon; @@ -425,7 +425,7 @@ try_again_CIFSSMBQPathInfo: pfindData, cifs_sb->local_nls, cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); - adjustTZ = TRUE; + adjustTZ = true; } } /* dump_mem("\nQPathInfo return data",&findData, sizeof(findData)); */ @@ -703,7 +703,7 @@ psx_del_no_retry: } else if (rc == -ENOENT) { d_drop(direntry); } else if (rc == -ETXTBSY) { - int oplock = FALSE; + int oplock = 0; __u16 netfid; rc = CIFSSMBOpen(xid, pTcon, full_path, FILE_OPEN, DELETE, @@ -736,7 +736,7 @@ psx_del_no_retry: rc = -EOPNOTSUPP; if (rc == -EOPNOTSUPP) { - int oplock = FALSE; + int oplock = 0; __u16 netfid; /* rc = CIFSSMBSetAttrLegacy(xid, pTcon, full_path, @@ -774,7 +774,7 @@ psx_del_no_retry: if (direntry->d_inode) drop_nlink(direntry->d_inode); } else if (rc == -ETXTBSY) { - int oplock = FALSE; + int oplock = 0; __u16 netfid; rc = CIFSSMBOpen(xid, pTcon, full_path, @@ -974,8 +974,8 @@ mkdir_get_info: * failed to get it from the server or was set bogus */ if ((direntry->d_inode) && (direntry->d_inode->i_nlink < 2)) direntry->d_inode->i_nlink = 2; + mode &= ~current->fs->umask; if (pTcon->unix_ext) { - mode &= ~current->fs->umask; if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID) { CIFSSMBUnixSetPerms(xid, pTcon, full_path, mode, @@ -994,9 +994,16 @@ mkdir_get_info: CIFS_MOUNT_MAP_SPECIAL_CHR); } } else { - /* BB to be implemented via Windows secrty descriptors - eg CIFSSMBWinSetPerms(xid, pTcon, full_path, mode, - -1, -1, local_nls); */ + if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_ACL) && + (mode & S_IWUGO) == 0) { + FILE_BASIC_INFO pInfo; + memset(&pInfo, 0, sizeof(pInfo)); + pInfo.Attributes = cpu_to_le32(ATTR_READONLY); + CIFSSMBSetTimes(xid, pTcon, full_path, + &pInfo, cifs_sb->local_nls, + cifs_sb->mnt_cifs_flags & + CIFS_MOUNT_MAP_SPECIAL_CHR); + } if (direntry->d_inode) { direntry->d_inode->i_mode = mode; direntry->d_inode->i_mode |= S_IFDIR; @@ -1149,7 +1156,7 @@ int cifs_rename(struct inode *source_inode, struct dentry *source_direntry, cFYI(1, ("rename rc %d", rc)); if ((rc == -EIO) || (rc == -EEXIST)) { - int oplock = FALSE; + int oplock = 0; __u16 netfid; /* BB FIXME Is Generic Read correct for rename? */ @@ -1186,7 +1193,7 @@ int cifs_revalidate(struct dentry *direntry) struct cifsInodeInfo *cifsInode; loff_t local_size; struct timespec local_mtime; - int invalidate_inode = FALSE; + bool invalidate_inode = false; if (direntry->d_inode == NULL) return -ENOENT; @@ -1268,7 +1275,7 @@ int cifs_revalidate(struct dentry *direntry) only ones who could have modified the file and the server copy is staler than ours */ } else { - invalidate_inode = TRUE; + invalidate_inode = true; } } @@ -1402,24 +1409,25 @@ int cifs_setattr(struct dentry *direntry, struct iattr *attrs) int rc = -EACCES; struct cifsFileInfo *open_file = NULL; FILE_BASIC_INFO time_buf; - int set_time = FALSE; - int set_dosattr = FALSE; + bool set_time = false; + bool set_dosattr = false; __u64 mode = 0xFFFFFFFFFFFFFFFFULL; __u64 uid = 0xFFFFFFFFFFFFFFFFULL; __u64 gid = 0xFFFFFFFFFFFFFFFFULL; struct cifsInodeInfo *cifsInode; + struct inode *inode = direntry->d_inode; xid = GetXid(); cFYI(1, ("setattr on file %s attrs->iavalid 0x%x", direntry->d_name.name, attrs->ia_valid)); - cifs_sb = CIFS_SB(direntry->d_inode->i_sb); + cifs_sb = CIFS_SB(inode->i_sb); pTcon = cifs_sb->tcon; if ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_PERM) == 0) { /* check if we have permission to change attrs */ - rc = inode_change_ok(direntry->d_inode, attrs); + rc = inode_change_ok(inode, attrs); if (rc < 0) { FreeXid(xid); return rc; @@ -1432,7 +1440,7 @@ int cifs_setattr(struct dentry *direntry, struct iattr *attrs) FreeXid(xid); return -ENOMEM; } - cifsInode = CIFS_I(direntry->d_inode); + cifsInode = CIFS_I(inode); if ((attrs->ia_valid & ATTR_MTIME) || (attrs->ia_valid & ATTR_SIZE)) { /* @@ -1443,9 +1451,9 @@ int cifs_setattr(struct dentry *direntry, struct iattr *attrs) will be truncated anyway? Also, should we error out here if the flush returns error? */ - rc = filemap_write_and_wait(direntry->d_inode->i_mapping); + rc = filemap_write_and_wait(inode->i_mapping); if (rc != 0) { - CIFS_I(direntry->d_inode)->write_behind_rc = rc; + cifsInode->write_behind_rc = rc; rc = 0; } } @@ -1464,7 +1472,7 @@ int cifs_setattr(struct dentry *direntry, struct iattr *attrs) __u16 nfid = open_file->netfid; __u32 npid = open_file->pid; rc = CIFSSMBSetFileSize(xid, pTcon, attrs->ia_size, - nfid, npid, FALSE); + nfid, npid, false); atomic_dec(&open_file->wrtPending); cFYI(1, ("SetFSize for attrs rc = %d", rc)); if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) { @@ -1484,14 +1492,14 @@ int cifs_setattr(struct dentry *direntry, struct iattr *attrs) it was found or because there was an error setting it by handle */ rc = CIFSSMBSetEOF(xid, pTcon, full_path, - attrs->ia_size, FALSE, + attrs->ia_size, false, cifs_sb->local_nls, cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); cFYI(1, ("SetEOF by path (setattrs) rc = %d", rc)); if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) { __u16 netfid; - int oplock = FALSE; + int oplock = 0; rc = SMBLegacyOpen(xid, pTcon, full_path, FILE_OPEN, @@ -1516,14 +1524,13 @@ int cifs_setattr(struct dentry *direntry, struct iattr *attrs) /* Server is ok setting allocation size implicitly - no need to call: - CIFSSMBSetEOF(xid, pTcon, full_path, attrs->ia_size, TRUE, + CIFSSMBSetEOF(xid, pTcon, full_path, attrs->ia_size, true, cifs_sb->local_nls); */ if (rc == 0) { - rc = cifs_vmtruncate(direntry->d_inode, attrs->ia_size); - cifs_truncate_page(direntry->d_inode->i_mapping, - direntry->d_inode->i_size); + rc = cifs_vmtruncate(inode, attrs->ia_size); + cifs_truncate_page(inode->i_mapping, inode->i_size); } else goto cifs_setattr_exit; } @@ -1557,14 +1564,14 @@ int cifs_setattr(struct dentry *direntry, struct iattr *attrs) rc = 0; #ifdef CONFIG_CIFS_EXPERIMENTAL if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_ACL) - rc = mode_to_acl(direntry->d_inode, full_path, mode); + rc = mode_to_acl(inode, full_path, mode); else if ((mode & S_IWUGO) == 0) { #else if ((mode & S_IWUGO) == 0) { #endif /* not writeable */ if ((cifsInode->cifsAttrs & ATTR_READONLY) == 0) { - set_dosattr = TRUE; + set_dosattr = true; time_buf.Attributes = cpu_to_le32(cifsInode->cifsAttrs | ATTR_READONLY); @@ -1574,28 +1581,24 @@ int cifs_setattr(struct dentry *direntry, struct iattr *attrs) not be able to write to it - so if any write bit is enabled for user or group or other we need to at least try to remove r/o dos attr */ - set_dosattr = TRUE; + set_dosattr = true; time_buf.Attributes = cpu_to_le32(cifsInode->cifsAttrs & (~ATTR_READONLY)); /* Windows ignores set to zero */ if (time_buf.Attributes == 0) time_buf.Attributes |= cpu_to_le32(ATTR_NORMAL); } -#ifdef CONFIG_CIFS_EXPERIMENTAL - if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_ACL) - mode_to_acl(direntry->d_inode, full_path, mode); -#endif } if (attrs->ia_valid & ATTR_ATIME) { - set_time = TRUE; + set_time = true; time_buf.LastAccessTime = cpu_to_le64(cifs_UnixTimeToNT(attrs->ia_atime)); } else time_buf.LastAccessTime = 0; if (attrs->ia_valid & ATTR_MTIME) { - set_time = TRUE; + set_time = true; time_buf.LastWriteTime = cpu_to_le64(cifs_UnixTimeToNT(attrs->ia_mtime)); } else @@ -1606,7 +1609,7 @@ int cifs_setattr(struct dentry *direntry, struct iattr *attrs) server times */ if (set_time && (attrs->ia_valid & ATTR_CTIME)) { - set_time = TRUE; + set_time = true; /* Although Samba throws this field away it may be useful to Windows - but we do not want to set ctime unless some other @@ -1630,7 +1633,7 @@ int cifs_setattr(struct dentry *direntry, struct iattr *attrs) rc = -EOPNOTSUPP; if (rc == -EOPNOTSUPP) { - int oplock = FALSE; + int oplock = 0; __u16 netfid; cFYI(1, ("calling SetFileInfo since SetPathInfo for " @@ -1669,7 +1672,7 @@ int cifs_setattr(struct dentry *direntry, struct iattr *attrs) /* do not need local check to inode_check_ok since the server does that */ if (!rc) - rc = inode_setattr(direntry->d_inode, attrs); + rc = inode_setattr(inode, attrs); cifs_setattr_exit: kfree(full_path); FreeXid(xid); diff --git a/fs/cifs/link.c b/fs/cifs/link.c index d4e7ec93285..1c2c3ce5020 100644 --- a/fs/cifs/link.c +++ b/fs/cifs/link.c @@ -230,7 +230,7 @@ cifs_readlink(struct dentry *direntry, char __user *pBuffer, int buflen) struct inode *inode = direntry->d_inode; int rc = -EACCES; int xid; - int oplock = FALSE; + int oplock = 0; struct cifs_sb_info *cifs_sb; struct cifsTconInfo *pTcon; char *full_path = NULL; diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c index 2a42d9fedbb..1d69b8014e0 100644 --- a/fs/cifs/misc.c +++ b/fs/cifs/misc.c @@ -496,7 +496,8 @@ checkSMB(struct smb_hdr *smb, __u16 mid, unsigned int length) } return 0; } -int + +bool is_valid_oplock_break(struct smb_hdr *buf, struct TCP_Server_Info *srv) { struct smb_com_lock_req *pSMB = (struct smb_com_lock_req *)buf; @@ -522,17 +523,17 @@ is_valid_oplock_break(struct smb_hdr *buf, struct TCP_Server_Info *srv) pnotify->Action)); /* BB removeme BB */ /* cifs_dump_mem("Rcvd notify Data: ",buf, sizeof(struct smb_hdr)+60); */ - return TRUE; + return true; } if (pSMBr->hdr.Status.CifsError) { cFYI(1, ("notify err 0x%d", pSMBr->hdr.Status.CifsError)); - return TRUE; + return true; } - return FALSE; + return false; } if (pSMB->hdr.Command != SMB_COM_LOCKING_ANDX) - return FALSE; + return false; if (pSMB->hdr.Flags & SMBFLG_RESPONSE) { /* no sense logging error on invalid handle on oplock break - harmless race between close request and oplock @@ -541,21 +542,21 @@ is_valid_oplock_break(struct smb_hdr *buf, struct TCP_Server_Info *srv) if ((NT_STATUS_INVALID_HANDLE) == le32_to_cpu(pSMB->hdr.Status.CifsError)) { cFYI(1, ("invalid handle on oplock break")); - return TRUE; + return true; } else if (ERRbadfid == le16_to_cpu(pSMB->hdr.Status.DosError.Error)) { - return TRUE; + return true; } else { - return FALSE; /* on valid oplock brk we get "request" */ + return false; /* on valid oplock brk we get "request" */ } } if (pSMB->hdr.WordCount != 8) - return FALSE; + return false; cFYI(1, ("oplock type 0x%d level 0x%d", pSMB->LockType, pSMB->OplockLevel)); if (!(pSMB->LockType & LOCKING_ANDX_OPLOCK_RELEASE)) - return FALSE; + return false; /* look up tcon based on tid & uid */ read_lock(&GlobalSMBSeslock); @@ -573,11 +574,11 @@ is_valid_oplock_break(struct smb_hdr *buf, struct TCP_Server_Info *srv) ("file id match, oplock break")); pCifsInode = CIFS_I(netfile->pInode); - pCifsInode->clientCanCacheAll = FALSE; + pCifsInode->clientCanCacheAll = false; if (pSMB->OplockLevel == 0) pCifsInode->clientCanCacheRead - = FALSE; - pCifsInode->oplockPending = TRUE; + = false; + pCifsInode->oplockPending = true; AllocOplockQEntry(netfile->pInode, netfile->netfid, tcon); @@ -585,17 +586,17 @@ is_valid_oplock_break(struct smb_hdr *buf, struct TCP_Server_Info *srv) ("about to wake up oplock thread")); if (oplockThread) wake_up_process(oplockThread); - return TRUE; + return true; } } read_unlock(&GlobalSMBSeslock); cFYI(1, ("No matching file for oplock break")); - return TRUE; + return true; } } read_unlock(&GlobalSMBSeslock); cFYI(1, ("Can not process oplock break for non-existent connection")); - return TRUE; + return true; } void diff --git a/fs/cifs/netmisc.c b/fs/cifs/netmisc.c index 3b5a5ce882b..00f4cff400b 100644 --- a/fs/cifs/netmisc.c +++ b/fs/cifs/netmisc.c @@ -132,47 +132,17 @@ static const struct smb_to_posix_error mapping_table_ERRHRD[] = { {0, 0} }; - -/* if the mount helper is missing we need to reverse the 1st slash - from '/' to backslash in order to format the UNC properly for - ip address parsing and for tree connect (unless the user - remembered to put the UNC name in properly). Fortunately we do - not have to call this twice (we check for IPv4 addresses - first, so it is already converted by the time we - try IPv6 addresses */ -static int canonicalize_unc(char *cp) -{ - int i; - - for (i = 0; i <= 46 /* INET6_ADDRSTRLEN */ ; i++) { - if (cp[i] == 0) - break; - if (cp[i] == '\\') - break; - if (cp[i] == '/') { - cFYI(DBG2, ("change slash to \\ in malformed UNC")); - cp[i] = '\\'; - return 1; - } - } - return 0; -} - /* Convert string containing dotted ip address to binary form */ /* returns 0 if invalid address */ int -cifs_inet_pton(int address_family, char *cp, void *dst) +cifs_inet_pton(const int address_family, const char *cp, void *dst) { int ret = 0; /* calculate length by finding first slash or NULL */ if (address_family == AF_INET) { ret = in4_pton(cp, -1 /* len */, dst, '\\', NULL); - if (ret == 0) { - if (canonicalize_unc(cp)) - ret = in4_pton(cp, -1, dst, '\\', NULL); - } } else if (address_family == AF_INET6) { ret = in6_pton(cp, -1 /* len */, dst , '\\', NULL); } diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c index 32b445edc88..34ec32100c7 100644 --- a/fs/cifs/readdir.c +++ b/fs/cifs/readdir.c @@ -447,8 +447,8 @@ static int initiate_cifs_search(const int xid, struct file *file) if (file->private_data == NULL) return -ENOMEM; cifsFile = file->private_data; - cifsFile->invalidHandle = TRUE; - cifsFile->srch_inf.endOfSearch = FALSE; + cifsFile->invalidHandle = true; + cifsFile->srch_inf.endOfSearch = false; cifs_sb = CIFS_SB(file->f_path.dentry->d_sb); if (cifs_sb == NULL) @@ -485,7 +485,7 @@ ffirst_retry: cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR, CIFS_DIR_SEP(cifs_sb)); if (rc == 0) - cifsFile->invalidHandle = FALSE; + cifsFile->invalidHandle = false; if ((rc == -EOPNOTSUPP) && (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SERVER_INUM)) { cifs_sb->mnt_cifs_flags &= ~CIFS_MOUNT_SERVER_INUM; @@ -670,7 +670,7 @@ static int find_cifs_entry(const int xid, struct cifsTconInfo *pTcon, (index_to_find < first_entry_in_buffer)) { /* close and restart search */ cFYI(1, ("search backing up - close and restart search")); - cifsFile->invalidHandle = TRUE; + cifsFile->invalidHandle = true; CIFSFindClose(xid, pTcon, cifsFile->netfid); kfree(cifsFile->search_resume_name); cifsFile->search_resume_name = NULL; @@ -692,7 +692,7 @@ static int find_cifs_entry(const int xid, struct cifsTconInfo *pTcon, } while ((index_to_find >= cifsFile->srch_inf.index_of_last_entry) && - (rc == 0) && (cifsFile->srch_inf.endOfSearch == FALSE)) { + (rc == 0) && !cifsFile->srch_inf.endOfSearch) { cFYI(1, ("calling findnext2")); rc = CIFSFindNext(xid, pTcon, cifsFile->netfid, &cifsFile->srch_inf); @@ -1038,7 +1038,7 @@ int cifs_readdir(struct file *file, void *direntry, filldir_t filldir) break; } } /* else { - cifsFile->invalidHandle = TRUE; + cifsFile->invalidHandle = true; CIFSFindClose(xid, pTcon, cifsFile->netfid); } kfree(cifsFile->search_resume_name); diff --git a/fs/cifs/smbencrypt.c b/fs/cifs/smbencrypt.c index 58bbfd992cc..ff3232fa101 100644 --- a/fs/cifs/smbencrypt.c +++ b/fs/cifs/smbencrypt.c @@ -35,11 +35,11 @@ #include "cifs_debug.h" #include "cifsencrypt.h" -#ifndef FALSE -#define FALSE 0 +#ifndef false +#define false 0 #endif -#ifndef TRUE -#define TRUE 1 +#ifndef true +#define true 1 #endif /* following came from the other byteorder.h to avoid include conflicts */ diff --git a/fs/cifs/xattr.c b/fs/cifs/xattr.c index 8cd6a445b01..e9527eedc63 100644 --- a/fs/cifs/xattr.c +++ b/fs/cifs/xattr.c @@ -264,7 +264,7 @@ ssize_t cifs_getxattr(struct dentry *direntry, const char *ea_name, #ifdef CONFIG_CIFS_EXPERIMENTAL else if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_ACL) { __u16 fid; - int oplock = FALSE; + int oplock = 0; struct cifs_ntsd *pacl = NULL; __u32 buflen = 0; if (experimEnabled) diff --git a/fs/coda/coda_linux.c b/fs/coda/coda_linux.c index 95a54253c04..e1c854890f9 100644 --- a/fs/coda/coda_linux.c +++ b/fs/coda/coda_linux.c @@ -134,7 +134,7 @@ void coda_iattr_to_vattr(struct iattr *iattr, struct coda_vattr *vattr) unsigned int valid; /* clean out */ - vattr->va_mode = (umode_t) -1; + vattr->va_mode = -1; vattr->va_uid = (vuid_t) -1; vattr->va_gid = (vgid_t) -1; vattr->va_size = (off_t) -1; diff --git a/fs/coda/dir.c b/fs/coda/dir.c index f89ff083079..3d2580e00a3 100644 --- a/fs/coda/dir.c +++ b/fs/coda/dir.c @@ -345,7 +345,7 @@ static int coda_symlink(struct inode *dir_inode, struct dentry *de, } /* destruction routines: unlink, rmdir */ -int coda_unlink(struct inode *dir, struct dentry *de) +static int coda_unlink(struct inode *dir, struct dentry *de) { int error; const char *name = de->d_name.name; @@ -365,7 +365,7 @@ int coda_unlink(struct inode *dir, struct dentry *de) return 0; } -int coda_rmdir(struct inode *dir, struct dentry *de) +static int coda_rmdir(struct inode *dir, struct dentry *de) { const char *name = de->d_name.name; int len = de->d_name.len; @@ -424,7 +424,7 @@ static int coda_rename(struct inode *old_dir, struct dentry *old_dentry, /* file operations for directories */ -int coda_readdir(struct file *coda_file, void *buf, filldir_t filldir) +static int coda_readdir(struct file *coda_file, void *buf, filldir_t filldir) { struct coda_file_info *cfi; struct file *host_file; diff --git a/fs/compat.c b/fs/compat.c index 2ce4456aad3..332a869d2c5 100644 --- a/fs/compat.c +++ b/fs/compat.c @@ -24,6 +24,7 @@ #include <linux/fcntl.h> #include <linux/namei.h> #include <linux/file.h> +#include <linux/fdtable.h> #include <linux/vfs.h> #include <linux/ioctl.h> #include <linux/init.h> @@ -1634,7 +1635,7 @@ sticky: return ret; } -#ifdef TIF_RESTORE_SIGMASK +#ifdef HAVE_SET_RESTORE_SIGMASK asmlinkage long compat_sys_pselect7(int n, compat_ulong_t __user *inp, compat_ulong_t __user *outp, compat_ulong_t __user *exp, struct compat_timespec __user *tsp, compat_sigset_t __user *sigmask, @@ -1720,7 +1721,7 @@ sticky: if (sigmask) { memcpy(¤t->saved_sigmask, &sigsaved, sizeof(sigsaved)); - set_thread_flag(TIF_RESTORE_SIGMASK); + set_restore_sigmask(); } } else if (sigmask) sigprocmask(SIG_SETMASK, &sigsaved, NULL); @@ -1791,7 +1792,7 @@ asmlinkage long compat_sys_ppoll(struct pollfd __user *ufds, if (sigmask) { memcpy(¤t->saved_sigmask, &sigsaved, sizeof(sigsaved)); - set_thread_flag(TIF_RESTORE_SIGMASK); + set_restore_sigmask(); } ret = -ERESTARTNOHAND; } else if (sigmask) @@ -1825,7 +1826,7 @@ sticky: return ret; } -#endif /* TIF_RESTORE_SIGMASK */ +#endif /* HAVE_SET_RESTORE_SIGMASK */ #if defined(CONFIG_NFSD) || defined(CONFIG_NFSD_MODULE) /* Stuff for NFS server syscalls... */ @@ -2080,7 +2081,7 @@ long asmlinkage compat_sys_nfsservctl(int cmd, void *notused, void *notused2) #ifdef CONFIG_EPOLL -#ifdef TIF_RESTORE_SIGMASK +#ifdef HAVE_SET_RESTORE_SIGMASK asmlinkage long compat_sys_epoll_pwait(int epfd, struct compat_epoll_event __user *events, int maxevents, int timeout, @@ -2117,14 +2118,14 @@ asmlinkage long compat_sys_epoll_pwait(int epfd, if (err == -EINTR) { memcpy(¤t->saved_sigmask, &sigsaved, sizeof(sigsaved)); - set_thread_flag(TIF_RESTORE_SIGMASK); + set_restore_sigmask(); } else sigprocmask(SIG_SETMASK, &sigsaved, NULL); } return err; } -#endif /* TIF_RESTORE_SIGMASK */ +#endif /* HAVE_SET_RESTORE_SIGMASK */ #endif /* CONFIG_EPOLL */ diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c index c6e72aebd16..97dba0d9234 100644 --- a/fs/compat_ioctl.c +++ b/fs/compat_ioctl.c @@ -1046,14 +1046,14 @@ static int vt_check(struct file *file) struct inode *inode = file->f_path.dentry->d_inode; struct vc_data *vc; - if (file->f_op->ioctl != tty_ioctl) + if (file->f_op->unlocked_ioctl != tty_ioctl) return -EINVAL; tty = (struct tty_struct *)file->private_data; if (tty_paranoia_check(tty, inode, "tty_ioctl")) return -EINVAL; - if (tty->driver->ioctl != vt_ioctl) + if (tty->ops->ioctl != vt_ioctl) return -EINVAL; vc = (struct vc_data *)tty->driver_data; diff --git a/fs/configfs/file.c b/fs/configfs/file.c index 397cb503a18..2b6cb23dd14 100644 --- a/fs/configfs/file.c +++ b/fs/configfs/file.c @@ -115,7 +115,7 @@ configfs_read_file(struct file *file, char __user *buf, size_t count, loff_t *pp goto out; } pr_debug("%s: count = %zd, ppos = %lld, buf = %s\n", - __FUNCTION__, count, *ppos, buffer->page); + __func__, count, *ppos, buffer->page); retval = simple_read_from_buffer(buf, count, ppos, buffer->page, buffer->count); out: diff --git a/fs/configfs/inode.c b/fs/configfs/inode.c index 4c1ebff778e..b9a1d810346 100644 --- a/fs/configfs/inode.c +++ b/fs/configfs/inode.c @@ -47,7 +47,7 @@ static const struct address_space_operations configfs_aops = { static struct backing_dev_info configfs_backing_dev_info = { .ra_pages = 0, /* No readahead */ - .capabilities = BDI_CAP_NO_ACCT_DIRTY | BDI_CAP_NO_WRITEBACK, + .capabilities = BDI_CAP_NO_ACCT_AND_WRITEBACK, }; static const struct inode_operations configfs_inode_operations ={ diff --git a/fs/configfs/mount.c b/fs/configfs/mount.c index de3b31d0a37..8421cea7d8c 100644 --- a/fs/configfs/mount.c +++ b/fs/configfs/mount.c @@ -92,7 +92,7 @@ static int configfs_fill_super(struct super_block *sb, void *data, int silent) root = d_alloc_root(inode); if (!root) { - pr_debug("%s: could not get root dentry!\n",__FUNCTION__); + pr_debug("%s: could not get root dentry!\n",__func__); iput(inode); return -ENOMEM; } diff --git a/fs/configfs/symlink.c b/fs/configfs/symlink.c index 78929ea84ff..2a731ef5f30 100644 --- a/fs/configfs/symlink.c +++ b/fs/configfs/symlink.c @@ -210,13 +210,13 @@ static int configfs_get_target_path(struct config_item * item, struct config_ite if (size > PATH_MAX) return -ENAMETOOLONG; - pr_debug("%s: depth = %d, size = %d\n", __FUNCTION__, depth, size); + pr_debug("%s: depth = %d, size = %d\n", __func__, depth, size); for (s = path; depth--; s += 3) strcpy(s,"../"); fill_item_path(target, path, size); - pr_debug("%s: path = '%s'\n", __FUNCTION__, path); + pr_debug("%s: path = '%s'\n", __func__, path); return 0; } diff --git a/fs/debugfs/file.c b/fs/debugfs/file.c index fddffe4851f..159a5efd6a8 100644 --- a/fs/debugfs/file.c +++ b/fs/debugfs/file.c @@ -9,7 +9,7 @@ * 2 as published by the Free Software Foundation. * * debugfs is for people to use instead of /proc or /sys. - * See Documentation/DocBook/kernel-api for more details. + * See Documentation/DocBook/filesystems for more details. * */ diff --git a/fs/devpts/inode.c b/fs/devpts/inode.c index f120e120787..285b64a8b06 100644 --- a/fs/devpts/inode.c +++ b/fs/devpts/inode.c @@ -17,6 +17,8 @@ #include <linux/namei.h> #include <linux/mount.h> #include <linux/tty.h> +#include <linux/mutex.h> +#include <linux/idr.h> #include <linux/devpts_fs.h> #include <linux/parser.h> #include <linux/fsnotify.h> @@ -26,6 +28,10 @@ #define DEVPTS_DEFAULT_MODE 0600 +extern int pty_limit; /* Config limit on Unix98 ptys */ +static DEFINE_IDR(allocated_ptys); +static DEFINE_MUTEX(allocated_ptys_lock); + static struct vfsmount *devpts_mnt; static struct dentry *devpts_root; @@ -171,9 +177,44 @@ static struct dentry *get_node(int num) return lookup_one_len(s, root, sprintf(s, "%d", num)); } +int devpts_new_index(void) +{ + int index; + int idr_ret; + +retry: + if (!idr_pre_get(&allocated_ptys, GFP_KERNEL)) { + return -ENOMEM; + } + + mutex_lock(&allocated_ptys_lock); + idr_ret = idr_get_new(&allocated_ptys, NULL, &index); + if (idr_ret < 0) { + mutex_unlock(&allocated_ptys_lock); + if (idr_ret == -EAGAIN) + goto retry; + return -EIO; + } + + if (index >= pty_limit) { + idr_remove(&allocated_ptys, index); + mutex_unlock(&allocated_ptys_lock); + return -EIO; + } + mutex_unlock(&allocated_ptys_lock); + return index; +} + +void devpts_kill_index(int idx) +{ + mutex_lock(&allocated_ptys_lock); + idr_remove(&allocated_ptys, idx); + mutex_unlock(&allocated_ptys_lock); +} + int devpts_pty_new(struct tty_struct *tty) { - int number = tty->index; + int number = tty->index; /* tty layer puts index from devpts_new_index() in here */ struct tty_driver *driver = tty->driver; dev_t device = MKDEV(driver->major, driver->minor_start+number); struct dentry *dentry; diff --git a/fs/dlm/lockspace.c b/fs/dlm/lockspace.c index b64e55e0515..499e16759e9 100644 --- a/fs/dlm/lockspace.c +++ b/fs/dlm/lockspace.c @@ -200,7 +200,7 @@ int __init dlm_lockspace_init(void) dlm_kset = kset_create_and_add("dlm", NULL, kernel_kobj); if (!dlm_kset) { - printk(KERN_WARNING "%s: can not create kset\n", __FUNCTION__); + printk(KERN_WARNING "%s: can not create kset\n", __func__); return -ENOMEM; } return 0; diff --git a/fs/dnotify.c b/fs/dnotify.c index 28d01ed66de..676073b8dda 100644 --- a/fs/dnotify.c +++ b/fs/dnotify.c @@ -20,6 +20,7 @@ #include <linux/init.h> #include <linux/spinlock.h> #include <linux/slab.h> +#include <linux/fdtable.h> int dir_notify_enable __read_mostly = 1; @@ -66,6 +67,7 @@ int fcntl_dirnotify(int fd, struct file *filp, unsigned long arg) struct dnotify_struct **prev; struct inode *inode; fl_owner_t id = current->files; + struct file *f; int error = 0; if ((arg & ~DN_MULTISHOT) == 0) { @@ -92,6 +94,15 @@ int fcntl_dirnotify(int fd, struct file *filp, unsigned long arg) prev = &odn->dn_next; } + rcu_read_lock(); + f = fcheck(fd); + rcu_read_unlock(); + /* we'd lost the race with close(), sod off silently */ + /* note that inode->i_lock prevents reordering problems + * between accesses to descriptor table and ->i_dnotify */ + if (f != filp) + goto out_free; + error = __f_setown(filp, task_pid(current), PIDTYPE_PID, 0); if (error) goto out_free; diff --git a/fs/dquot.c b/fs/dquot.c index dfba1623ccc..5ac77da1995 100644 --- a/fs/dquot.c +++ b/fs/dquot.c @@ -1491,6 +1491,16 @@ int vfs_quota_off(struct super_block *sb, int type, int remount) /* We need to serialize quota_off() for device */ mutex_lock(&dqopt->dqonoff_mutex); + + /* + * Skip everything if there's nothing to do. We have to do this because + * sometimes we are called when fill_super() failed and calling + * sync_fs() in such cases does no good. + */ + if (!sb_any_quota_enabled(sb) && !sb_any_quota_suspended(sb)) { + mutex_unlock(&dqopt->dqonoff_mutex); + return 0; + } for (cnt = 0; cnt < MAXQUOTAS; cnt++) { toputinode[cnt] = NULL; if (type != -1 && cnt != type) diff --git a/fs/drop_caches.c b/fs/drop_caches.c index 59375efcf39..3e5637fc377 100644 --- a/fs/drop_caches.c +++ b/fs/drop_caches.c @@ -14,18 +14,26 @@ int sysctl_drop_caches; static void drop_pagecache_sb(struct super_block *sb) { - struct inode *inode; + struct inode *inode, *toput_inode = NULL; spin_lock(&inode_lock); list_for_each_entry(inode, &sb->s_inodes, i_sb_list) { if (inode->i_state & (I_FREEING|I_WILL_FREE)) continue; + if (inode->i_mapping->nrpages == 0) + continue; + __iget(inode); + spin_unlock(&inode_lock); __invalidate_mapping_pages(inode->i_mapping, 0, -1, true); + iput(toput_inode); + toput_inode = inode; + spin_lock(&inode_lock); } spin_unlock(&inode_lock); + iput(toput_inode); } -void drop_pagecache(void) +static void drop_pagecache(void) { struct super_block *sb; @@ -45,7 +53,7 @@ restart: spin_unlock(&sb_lock); } -void drop_slab(void) +static void drop_slab(void) { int nr_objects; diff --git a/fs/ecryptfs/Makefile b/fs/ecryptfs/Makefile index 76885701551..1e34a7fd488 100644 --- a/fs/ecryptfs/Makefile +++ b/fs/ecryptfs/Makefile @@ -4,4 +4,4 @@ obj-$(CONFIG_ECRYPT_FS) += ecryptfs.o -ecryptfs-objs := dentry.o file.o inode.o main.o super.o mmap.o read_write.o crypto.o keystore.o messaging.o netlink.o debug.o +ecryptfs-objs := dentry.o file.o inode.o main.o super.o mmap.o read_write.o crypto.o keystore.o messaging.o netlink.o miscdev.o debug.o diff --git a/fs/ecryptfs/crypto.c b/fs/ecryptfs/crypto.c index a066e109ad9..cd62d75b2cc 100644 --- a/fs/ecryptfs/crypto.c +++ b/fs/ecryptfs/crypto.c @@ -119,21 +119,21 @@ static int ecryptfs_calculate_md5(char *dst, if (rc) { printk(KERN_ERR "%s: Error initializing crypto hash; rc = [%d]\n", - __FUNCTION__, rc); + __func__, rc); goto out; } rc = crypto_hash_update(&desc, &sg, len); if (rc) { printk(KERN_ERR "%s: Error updating crypto hash; rc = [%d]\n", - __FUNCTION__, rc); + __func__, rc); goto out; } rc = crypto_hash_final(&desc, dst); if (rc) { printk(KERN_ERR "%s: Error finalizing crypto hash; rc = [%d]\n", - __FUNCTION__, rc); + __func__, rc); goto out; } out: @@ -437,7 +437,7 @@ static int ecryptfs_encrypt_extent(struct page *enc_extent_page, if (rc < 0) { printk(KERN_ERR "%s: Error attempting to encrypt page with " "page->index = [%ld], extent_offset = [%ld]; " - "rc = [%d]\n", __FUNCTION__, page->index, extent_offset, + "rc = [%d]\n", __func__, page->index, extent_offset, rc); goto out; } @@ -487,7 +487,7 @@ int ecryptfs_encrypt_page(struct page *page) 0, PAGE_CACHE_SIZE); if (rc) printk(KERN_ERR "%s: Error attempting to copy " - "page at index [%ld]\n", __FUNCTION__, + "page at index [%ld]\n", __func__, page->index); goto out; } @@ -508,7 +508,7 @@ int ecryptfs_encrypt_page(struct page *page) extent_offset); if (rc) { printk(KERN_ERR "%s: Error encrypting extent; " - "rc = [%d]\n", __FUNCTION__, rc); + "rc = [%d]\n", __func__, rc); goto out; } ecryptfs_lower_offset_for_extent( @@ -569,7 +569,7 @@ static int ecryptfs_decrypt_extent(struct page *page, if (rc < 0) { printk(KERN_ERR "%s: Error attempting to decrypt to page with " "page->index = [%ld], extent_offset = [%ld]; " - "rc = [%d]\n", __FUNCTION__, page->index, extent_offset, + "rc = [%d]\n", __func__, page->index, extent_offset, rc); goto out; } @@ -622,7 +622,7 @@ int ecryptfs_decrypt_page(struct page *page) ecryptfs_inode); if (rc) printk(KERN_ERR "%s: Error attempting to copy " - "page at index [%ld]\n", __FUNCTION__, + "page at index [%ld]\n", __func__, page->index); goto out; } @@ -656,7 +656,7 @@ int ecryptfs_decrypt_page(struct page *page) extent_offset); if (rc) { printk(KERN_ERR "%s: Error encrypting extent; " - "rc = [%d]\n", __FUNCTION__, rc); + "rc = [%d]\n", __func__, rc); goto out; } } @@ -1215,7 +1215,7 @@ int ecryptfs_read_and_validate_header_region(char *data, ecryptfs_inode); if (rc) { printk(KERN_ERR "%s: Error reading header region; rc = [%d]\n", - __FUNCTION__, rc); + __func__, rc); goto out; } if (!contains_ecryptfs_marker(data + ECRYPTFS_FILE_SIZE_BYTES)) { @@ -1246,7 +1246,6 @@ ecryptfs_write_header_metadata(char *virt, (*written) = 6; } -struct kmem_cache *ecryptfs_header_cache_0; struct kmem_cache *ecryptfs_header_cache_1; struct kmem_cache *ecryptfs_header_cache_2; @@ -1320,7 +1319,7 @@ ecryptfs_write_metadata_to_contents(struct ecryptfs_crypt_stat *crypt_stat, 0, crypt_stat->num_header_bytes_at_front); if (rc) printk(KERN_ERR "%s: Error attempting to write header " - "information to lower file; rc = [%d]\n", __FUNCTION__, + "information to lower file; rc = [%d]\n", __func__, rc); return rc; } @@ -1365,14 +1364,14 @@ int ecryptfs_write_metadata(struct dentry *ecryptfs_dentry) } } else { printk(KERN_WARNING "%s: Encrypted flag not set\n", - __FUNCTION__); + __func__); rc = -EINVAL; goto out; } /* Released in this function */ virt = kzalloc(crypt_stat->num_header_bytes_at_front, GFP_KERNEL); if (!virt) { - printk(KERN_ERR "%s: Out of memory\n", __FUNCTION__); + printk(KERN_ERR "%s: Out of memory\n", __func__); rc = -ENOMEM; goto out; } @@ -1380,7 +1379,7 @@ int ecryptfs_write_metadata(struct dentry *ecryptfs_dentry) ecryptfs_dentry); if (unlikely(rc)) { printk(KERN_ERR "%s: Error whilst writing headers; rc = [%d]\n", - __FUNCTION__, rc); + __func__, rc); goto out_free; } if (crypt_stat->flags & ECRYPTFS_METADATA_IN_XATTR) @@ -1391,7 +1390,7 @@ int ecryptfs_write_metadata(struct dentry *ecryptfs_dentry) ecryptfs_dentry, virt); if (rc) { printk(KERN_ERR "%s: Error writing metadata out to lower file; " - "rc = [%d]\n", __FUNCTION__, rc); + "rc = [%d]\n", __func__, rc); goto out_free; } out_free: @@ -1585,7 +1584,7 @@ int ecryptfs_read_metadata(struct dentry *ecryptfs_dentry) if (!page_virt) { rc = -ENOMEM; printk(KERN_ERR "%s: Unable to allocate page_virt\n", - __FUNCTION__); + __func__); goto out; } rc = ecryptfs_read_lower(page_virt, 0, crypt_stat->extent_size, diff --git a/fs/ecryptfs/ecryptfs_kernel.h b/fs/ecryptfs/ecryptfs_kernel.h index 5007f788da0..951ee33a022 100644 --- a/fs/ecryptfs/ecryptfs_kernel.h +++ b/fs/ecryptfs/ecryptfs_kernel.h @@ -4,7 +4,7 @@ * * Copyright (C) 1997-2003 Erez Zadok * Copyright (C) 2001-2003 Stony Brook University - * Copyright (C) 2004-2007 International Business Machines Corp. + * Copyright (C) 2004-2008 International Business Machines Corp. * Author(s): Michael A. Halcrow <mahalcro@us.ibm.com> * Trevor S. Highland <trevor.highland@gmail.com> * Tyler Hicks <tyhicks@ou.edu> @@ -34,6 +34,7 @@ #include <linux/namei.h> #include <linux/scatterlist.h> #include <linux/hash.h> +#include <linux/nsproxy.h> /* Version verification for shared data structures w/ userspace */ #define ECRYPTFS_VERSION_MAJOR 0x00 @@ -49,11 +50,13 @@ #define ECRYPTFS_VERSIONING_POLICY 0x00000008 #define ECRYPTFS_VERSIONING_XATTR 0x00000010 #define ECRYPTFS_VERSIONING_MULTKEY 0x00000020 +#define ECRYPTFS_VERSIONING_DEVMISC 0x00000040 #define ECRYPTFS_VERSIONING_MASK (ECRYPTFS_VERSIONING_PASSPHRASE \ | ECRYPTFS_VERSIONING_PLAINTEXT_PASSTHROUGH \ | ECRYPTFS_VERSIONING_PUBKEY \ | ECRYPTFS_VERSIONING_XATTR \ - | ECRYPTFS_VERSIONING_MULTKEY) + | ECRYPTFS_VERSIONING_MULTKEY \ + | ECRYPTFS_VERSIONING_DEVMISC) #define ECRYPTFS_MAX_PASSWORD_LENGTH 64 #define ECRYPTFS_MAX_PASSPHRASE_BYTES ECRYPTFS_MAX_PASSWORD_LENGTH #define ECRYPTFS_SALT_SIZE 8 @@ -73,17 +76,14 @@ #define ECRYPTFS_DEFAULT_MSG_CTX_ELEMS 32 #define ECRYPTFS_DEFAULT_SEND_TIMEOUT HZ #define ECRYPTFS_MAX_MSG_CTX_TTL (HZ*3) -#define ECRYPTFS_NLMSG_HELO 100 -#define ECRYPTFS_NLMSG_QUIT 101 -#define ECRYPTFS_NLMSG_REQUEST 102 -#define ECRYPTFS_NLMSG_RESPONSE 103 #define ECRYPTFS_MAX_PKI_NAME_BYTES 16 #define ECRYPTFS_DEFAULT_NUM_USERS 4 #define ECRYPTFS_MAX_NUM_USERS 32768 #define ECRYPTFS_TRANSPORT_NETLINK 0 #define ECRYPTFS_TRANSPORT_CONNECTOR 1 #define ECRYPTFS_TRANSPORT_RELAYFS 2 -#define ECRYPTFS_DEFAULT_TRANSPORT ECRYPTFS_TRANSPORT_NETLINK +#define ECRYPTFS_TRANSPORT_MISCDEV 3 +#define ECRYPTFS_DEFAULT_TRANSPORT ECRYPTFS_TRANSPORT_MISCDEV #define ECRYPTFS_XATTR_NAME "user.ecryptfs" #define RFC2440_CIPHER_DES3_EDE 0x02 @@ -366,32 +366,63 @@ struct ecryptfs_auth_tok_list_item { }; struct ecryptfs_message { + /* Can never be greater than ecryptfs_message_buf_len */ + /* Used to find the parent msg_ctx */ + /* Inherits from msg_ctx->index */ u32 index; u32 data_len; u8 data[]; }; struct ecryptfs_msg_ctx { -#define ECRYPTFS_MSG_CTX_STATE_FREE 0x0001 -#define ECRYPTFS_MSG_CTX_STATE_PENDING 0x0002 -#define ECRYPTFS_MSG_CTX_STATE_DONE 0x0003 - u32 state; - unsigned int index; - unsigned int counter; +#define ECRYPTFS_MSG_CTX_STATE_FREE 0x01 +#define ECRYPTFS_MSG_CTX_STATE_PENDING 0x02 +#define ECRYPTFS_MSG_CTX_STATE_DONE 0x03 +#define ECRYPTFS_MSG_CTX_STATE_NO_REPLY 0x04 + u8 state; +#define ECRYPTFS_MSG_HELO 100 +#define ECRYPTFS_MSG_QUIT 101 +#define ECRYPTFS_MSG_REQUEST 102 +#define ECRYPTFS_MSG_RESPONSE 103 + u8 type; + u32 index; + /* Counter converts to a sequence number. Each message sent + * out for which we expect a response has an associated + * sequence number. The response must have the same sequence + * number as the counter for the msg_stc for the message to be + * valid. */ + u32 counter; + size_t msg_size; struct ecryptfs_message *msg; struct task_struct *task; struct list_head node; + struct list_head daemon_out_list; struct mutex mux; }; extern unsigned int ecryptfs_transport; -struct ecryptfs_daemon_id { - pid_t pid; - uid_t uid; - struct hlist_node id_chain; +struct ecryptfs_daemon; + +struct ecryptfs_daemon { +#define ECRYPTFS_DAEMON_IN_READ 0x00000001 +#define ECRYPTFS_DAEMON_IN_POLL 0x00000002 +#define ECRYPTFS_DAEMON_ZOMBIE 0x00000004 +#define ECRYPTFS_DAEMON_MISCDEV_OPEN 0x00000008 + u32 flags; + u32 num_queued_msg_ctx; + struct pid *pid; + uid_t euid; + struct user_namespace *user_ns; + struct task_struct *task; + struct mutex mux; + struct list_head msg_ctx_out_queue; + wait_queue_head_t wait; + struct hlist_node euid_chain; }; +extern struct mutex ecryptfs_daemon_hash_mux; + static inline struct ecryptfs_file_info * ecryptfs_file_to_private(struct file *file) { @@ -500,7 +531,7 @@ ecryptfs_set_dentry_lower_mnt(struct dentry *dentry, struct vfsmount *lower_mnt) } #define ecryptfs_printk(type, fmt, arg...) \ - __ecryptfs_printk(type "%s: " fmt, __FUNCTION__, ## arg); + __ecryptfs_printk(type "%s: " fmt, __func__, ## arg); void __ecryptfs_printk(const char *fmt, ...); extern const struct file_operations ecryptfs_main_fops; @@ -581,10 +612,13 @@ int ecryptfs_setxattr(struct dentry *dentry, const char *name, const void *value, size_t size, int flags); int ecryptfs_read_xattr_region(char *page_virt, struct inode *ecryptfs_inode); -int ecryptfs_process_helo(unsigned int transport, uid_t uid, pid_t pid); -int ecryptfs_process_quit(uid_t uid, pid_t pid); -int ecryptfs_process_response(struct ecryptfs_message *msg, uid_t uid, - pid_t pid, u32 seq); +int ecryptfs_process_helo(unsigned int transport, uid_t euid, + struct user_namespace *user_ns, struct pid *pid); +int ecryptfs_process_quit(uid_t euid, struct user_namespace *user_ns, + struct pid *pid); +int ecryptfs_process_response(struct ecryptfs_message *msg, uid_t euid, + struct user_namespace *user_ns, struct pid *pid, + u32 seq); int ecryptfs_send_message(unsigned int transport, char *data, int data_len, struct ecryptfs_msg_ctx **msg_ctx); int ecryptfs_wait_for_response(struct ecryptfs_msg_ctx *msg_ctx, @@ -593,14 +627,14 @@ int ecryptfs_init_messaging(unsigned int transport); void ecryptfs_release_messaging(unsigned int transport); int ecryptfs_send_netlink(char *data, int data_len, - struct ecryptfs_msg_ctx *msg_ctx, u16 msg_type, - u16 msg_flags, pid_t daemon_pid); + struct ecryptfs_msg_ctx *msg_ctx, u8 msg_type, + u16 msg_flags, struct pid *daemon_pid); int ecryptfs_init_netlink(void); void ecryptfs_release_netlink(void); int ecryptfs_send_connector(char *data, int data_len, - struct ecryptfs_msg_ctx *msg_ctx, u16 msg_type, - u16 msg_flags, pid_t daemon_pid); + struct ecryptfs_msg_ctx *msg_ctx, u8 msg_type, + u16 msg_flags, struct pid *daemon_pid); int ecryptfs_init_connector(void); void ecryptfs_release_connector(void); void @@ -642,5 +676,21 @@ int ecryptfs_read_lower_page_segment(struct page *page_for_ecryptfs, size_t offset_in_page, size_t size, struct inode *ecryptfs_inode); struct page *ecryptfs_get_locked_page(struct file *file, loff_t index); +int ecryptfs_exorcise_daemon(struct ecryptfs_daemon *daemon); +int ecryptfs_find_daemon_by_euid(struct ecryptfs_daemon **daemon, uid_t euid, + struct user_namespace *user_ns); +int ecryptfs_parse_packet_length(unsigned char *data, size_t *size, + size_t *length_size); +int ecryptfs_write_packet_length(char *dest, size_t size, + size_t *packet_size_length); +int ecryptfs_init_ecryptfs_miscdev(void); +void ecryptfs_destroy_ecryptfs_miscdev(void); +int ecryptfs_send_miscdev(char *data, size_t data_size, + struct ecryptfs_msg_ctx *msg_ctx, u8 msg_type, + u16 msg_flags, struct ecryptfs_daemon *daemon); +void ecryptfs_msg_ctx_alloc_to_free(struct ecryptfs_msg_ctx *msg_ctx); +int +ecryptfs_spawn_daemon(struct ecryptfs_daemon **daemon, uid_t euid, + struct user_namespace *user_ns, struct pid *pid); #endif /* #ifndef ECRYPTFS_KERNEL_H */ diff --git a/fs/ecryptfs/file.c b/fs/ecryptfs/file.c index 2b8f5ed4ade..2258b8f654a 100644 --- a/fs/ecryptfs/file.c +++ b/fs/ecryptfs/file.c @@ -195,7 +195,9 @@ static int ecryptfs_open(struct inode *inode, struct file *file) file, ecryptfs_inode_to_private(inode)->lower_file); if (S_ISDIR(ecryptfs_dentry->d_inode->i_mode)) { ecryptfs_printk(KERN_DEBUG, "This is a directory\n"); + mutex_lock(&crypt_stat->cs_mutex); crypt_stat->flags &= ~(ECRYPTFS_ENCRYPTED); + mutex_unlock(&crypt_stat->cs_mutex); rc = 0; goto out; } diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c index e2386115210..c92cc1c00aa 100644 --- a/fs/ecryptfs/inode.c +++ b/fs/ecryptfs/inode.c @@ -37,17 +37,11 @@ static struct dentry *lock_parent(struct dentry *dentry) { struct dentry *dir; - dir = dget(dentry->d_parent); + dir = dget_parent(dentry); mutex_lock_nested(&(dir->d_inode->i_mutex), I_MUTEX_PARENT); return dir; } -static void unlock_parent(struct dentry *dentry) -{ - mutex_unlock(&(dentry->d_parent->d_inode->i_mutex)); - dput(dentry->d_parent); -} - static void unlock_dir(struct dentry *dir) { mutex_unlock(&dir->d_inode->i_mutex); @@ -111,7 +105,7 @@ ecryptfs_do_create(struct inode *directory_inode, lower_dentry = ecryptfs_dentry_to_lower(ecryptfs_dentry); lower_dir_dentry = lock_parent(lower_dentry); - if (unlikely(IS_ERR(lower_dir_dentry))) { + if (IS_ERR(lower_dir_dentry)) { ecryptfs_printk(KERN_ERR, "Error locking directory of " "dentry\n"); rc = PTR_ERR(lower_dir_dentry); @@ -121,7 +115,7 @@ ecryptfs_do_create(struct inode *directory_inode, ecryptfs_dentry, mode, nd); if (rc) { printk(KERN_ERR "%s: Failure to create dentry in lower fs; " - "rc = [%d]\n", __FUNCTION__, rc); + "rc = [%d]\n", __func__, rc); goto out_lock; } rc = ecryptfs_interpose(lower_dentry, ecryptfs_dentry, @@ -426,8 +420,9 @@ static int ecryptfs_unlink(struct inode *dir, struct dentry *dentry) int rc = 0; struct dentry *lower_dentry = ecryptfs_dentry_to_lower(dentry); struct inode *lower_dir_inode = ecryptfs_inode_to_lower(dir); + struct dentry *lower_dir_dentry; - lock_parent(lower_dentry); + lower_dir_dentry = lock_parent(lower_dentry); rc = vfs_unlink(lower_dir_inode, lower_dentry); if (rc) { printk(KERN_ERR "Error in vfs_unlink; rc = [%d]\n", rc); @@ -439,7 +434,7 @@ static int ecryptfs_unlink(struct inode *dir, struct dentry *dentry) dentry->d_inode->i_ctime = dir->i_ctime; d_drop(dentry); out_unlock: - unlock_parent(lower_dentry); + unlock_dir(lower_dir_dentry); return rc; } @@ -908,7 +903,9 @@ static int ecryptfs_setattr(struct dentry *dentry, struct iattr *ia) if (ia->ia_valid & (ATTR_KILL_SUID | ATTR_KILL_SGID)) ia->ia_valid &= ~ATTR_MODE; + mutex_lock(&lower_dentry->d_inode->i_mutex); rc = notify_change(lower_dentry, ia); + mutex_unlock(&lower_dentry->d_inode->i_mutex); out: fsstack_copy_attr_all(inode, lower_inode, NULL); return rc; diff --git a/fs/ecryptfs/keystore.c b/fs/ecryptfs/keystore.c index 682b1b2482c..e82b457180b 100644 --- a/fs/ecryptfs/keystore.c +++ b/fs/ecryptfs/keystore.c @@ -65,7 +65,7 @@ static int process_request_key_err(long err_code) } /** - * parse_packet_length + * ecryptfs_parse_packet_length * @data: Pointer to memory containing length at offset * @size: This function writes the decoded size to this memory * address; zero on error @@ -73,8 +73,8 @@ static int process_request_key_err(long err_code) * * Returns zero on success; non-zero on error */ -static int parse_packet_length(unsigned char *data, size_t *size, - size_t *length_size) +int ecryptfs_parse_packet_length(unsigned char *data, size_t *size, + size_t *length_size) { int rc = 0; @@ -105,7 +105,7 @@ out: } /** - * write_packet_length + * ecryptfs_write_packet_length * @dest: The byte array target into which to write the length. Must * have at least 5 bytes allocated. * @size: The length to write. @@ -114,8 +114,8 @@ out: * * Returns zero on success; non-zero on error. */ -static int write_packet_length(char *dest, size_t size, - size_t *packet_size_length) +int ecryptfs_write_packet_length(char *dest, size_t size, + size_t *packet_size_length) { int rc = 0; @@ -162,8 +162,8 @@ write_tag_64_packet(char *signature, struct ecryptfs_session_key *session_key, goto out; } message[i++] = ECRYPTFS_TAG_64_PACKET_TYPE; - rc = write_packet_length(&message[i], ECRYPTFS_SIG_SIZE_HEX, - &packet_size_len); + rc = ecryptfs_write_packet_length(&message[i], ECRYPTFS_SIG_SIZE_HEX, + &packet_size_len); if (rc) { ecryptfs_printk(KERN_ERR, "Error generating tag 64 packet " "header; cannot generate packet length\n"); @@ -172,8 +172,9 @@ write_tag_64_packet(char *signature, struct ecryptfs_session_key *session_key, i += packet_size_len; memcpy(&message[i], signature, ECRYPTFS_SIG_SIZE_HEX); i += ECRYPTFS_SIG_SIZE_HEX; - rc = write_packet_length(&message[i], session_key->encrypted_key_size, - &packet_size_len); + rc = ecryptfs_write_packet_length(&message[i], + session_key->encrypted_key_size, + &packet_size_len); if (rc) { ecryptfs_printk(KERN_ERR, "Error generating tag 64 packet " "header; cannot generate packet length\n"); @@ -225,7 +226,7 @@ parse_tag_65_packet(struct ecryptfs_session_key *session_key, u8 *cipher_code, rc = -EIO; goto out; } - rc = parse_packet_length(&data[i], &m_size, &data_len); + rc = ecryptfs_parse_packet_length(&data[i], &m_size, &data_len); if (rc) { ecryptfs_printk(KERN_WARNING, "Error parsing packet length; " "rc = [%d]\n", rc); @@ -304,8 +305,8 @@ write_tag_66_packet(char *signature, u8 cipher_code, goto out; } message[i++] = ECRYPTFS_TAG_66_PACKET_TYPE; - rc = write_packet_length(&message[i], ECRYPTFS_SIG_SIZE_HEX, - &packet_size_len); + rc = ecryptfs_write_packet_length(&message[i], ECRYPTFS_SIG_SIZE_HEX, + &packet_size_len); if (rc) { ecryptfs_printk(KERN_ERR, "Error generating tag 66 packet " "header; cannot generate packet length\n"); @@ -315,8 +316,8 @@ write_tag_66_packet(char *signature, u8 cipher_code, memcpy(&message[i], signature, ECRYPTFS_SIG_SIZE_HEX); i += ECRYPTFS_SIG_SIZE_HEX; /* The encrypted key includes 1 byte cipher code and 2 byte checksum */ - rc = write_packet_length(&message[i], crypt_stat->key_size + 3, - &packet_size_len); + rc = ecryptfs_write_packet_length(&message[i], crypt_stat->key_size + 3, + &packet_size_len); if (rc) { ecryptfs_printk(KERN_ERR, "Error generating tag 66 packet " "header; cannot generate packet length\n"); @@ -357,20 +358,25 @@ parse_tag_67_packet(struct ecryptfs_key_record *key_rec, /* verify that everything through the encrypted FEK size is present */ if (message_len < 4) { rc = -EIO; + printk(KERN_ERR "%s: message_len is [%Zd]; minimum acceptable " + "message length is [%d]\n", __func__, message_len, 4); goto out; } if (data[i++] != ECRYPTFS_TAG_67_PACKET_TYPE) { - ecryptfs_printk(KERN_ERR, "Type should be ECRYPTFS_TAG_67\n"); rc = -EIO; + printk(KERN_ERR "%s: Type should be ECRYPTFS_TAG_67\n", + __func__); goto out; } if (data[i++]) { - ecryptfs_printk(KERN_ERR, "Status indicator has non zero value" - " [%d]\n", data[i-1]); rc = -EIO; + printk(KERN_ERR "%s: Status indicator has non zero " + "value [%d]\n", __func__, data[i-1]); + goto out; } - rc = parse_packet_length(&data[i], &key_rec->enc_key_size, &data_len); + rc = ecryptfs_parse_packet_length(&data[i], &key_rec->enc_key_size, + &data_len); if (rc) { ecryptfs_printk(KERN_WARNING, "Error parsing packet length; " "rc = [%d]\n", rc); @@ -378,17 +384,17 @@ parse_tag_67_packet(struct ecryptfs_key_record *key_rec, } i += data_len; if (message_len < (i + key_rec->enc_key_size)) { - ecryptfs_printk(KERN_ERR, "message_len [%d]; max len is [%d]\n", - message_len, (i + key_rec->enc_key_size)); rc = -EIO; + printk(KERN_ERR "%s: message_len [%Zd]; max len is [%Zd]\n", + __func__, message_len, (i + key_rec->enc_key_size)); goto out; } if (key_rec->enc_key_size > ECRYPTFS_MAX_ENCRYPTED_KEY_BYTES) { - ecryptfs_printk(KERN_ERR, "Encrypted key_size [%d] larger than " - "the maximum key size [%d]\n", - key_rec->enc_key_size, - ECRYPTFS_MAX_ENCRYPTED_KEY_BYTES); rc = -EIO; + printk(KERN_ERR "%s: Encrypted key_size [%Zd] larger than " + "the maximum key size [%d]\n", __func__, + key_rec->enc_key_size, + ECRYPTFS_MAX_ENCRYPTED_KEY_BYTES); goto out; } memcpy(key_rec->enc_key, &data[i], key_rec->enc_key_size); @@ -445,7 +451,7 @@ decrypt_pki_encrypted_session_key(struct ecryptfs_auth_tok *auth_tok, rc = write_tag_64_packet(auth_tok_sig, &(auth_tok->session_key), &netlink_message, &netlink_message_length); if (rc) { - ecryptfs_printk(KERN_ERR, "Failed to write tag 64 packet"); + ecryptfs_printk(KERN_ERR, "Failed to write tag 64 packet\n"); goto out; } rc = ecryptfs_send_message(ecryptfs_transport, netlink_message, @@ -570,8 +576,8 @@ parse_tag_1_packet(struct ecryptfs_crypt_stat *crypt_stat, goto out; } (*new_auth_tok) = &auth_tok_list_item->auth_tok; - rc = parse_packet_length(&data[(*packet_size)], &body_size, - &length_size); + rc = ecryptfs_parse_packet_length(&data[(*packet_size)], &body_size, + &length_size); if (rc) { printk(KERN_WARNING "Error parsing packet length; " "rc = [%d]\n", rc); @@ -704,8 +710,8 @@ parse_tag_3_packet(struct ecryptfs_crypt_stat *crypt_stat, goto out; } (*new_auth_tok) = &auth_tok_list_item->auth_tok; - rc = parse_packet_length(&data[(*packet_size)], &body_size, - &length_size); + rc = ecryptfs_parse_packet_length(&data[(*packet_size)], &body_size, + &length_size); if (rc) { printk(KERN_WARNING "Error parsing packet length; rc = [%d]\n", rc); @@ -852,8 +858,8 @@ parse_tag_11_packet(unsigned char *data, unsigned char *contents, rc = -EINVAL; goto out; } - rc = parse_packet_length(&data[(*packet_size)], &body_size, - &length_size); + rc = ecryptfs_parse_packet_length(&data[(*packet_size)], &body_size, + &length_size); if (rc) { printk(KERN_WARNING "Invalid tag 11 packet format\n"); goto out; @@ -1405,8 +1411,8 @@ write_tag_1_packet(char *dest, size_t *remaining_bytes, auth_tok->token.private_key.key_size; rc = pki_encrypt_session_key(auth_tok, crypt_stat, key_rec); if (rc) { - ecryptfs_printk(KERN_ERR, "Failed to encrypt session key " - "via a pki"); + printk(KERN_ERR "Failed to encrypt session key via a key " + "module; rc = [%d]\n", rc); goto out; } if (ecryptfs_verbosity > 0) { @@ -1430,8 +1436,9 @@ encrypted_session_key_set: goto out; } dest[(*packet_size)++] = ECRYPTFS_TAG_1_PACKET_TYPE; - rc = write_packet_length(&dest[(*packet_size)], (max_packet_size - 4), - &packet_size_length); + rc = ecryptfs_write_packet_length(&dest[(*packet_size)], + (max_packet_size - 4), + &packet_size_length); if (rc) { ecryptfs_printk(KERN_ERR, "Error generating tag 1 packet " "header; cannot generate packet length\n"); @@ -1489,8 +1496,9 @@ write_tag_11_packet(char *dest, size_t *remaining_bytes, char *contents, goto out; } dest[(*packet_length)++] = ECRYPTFS_TAG_11_PACKET_TYPE; - rc = write_packet_length(&dest[(*packet_length)], - (max_packet_size - 4), &packet_size_length); + rc = ecryptfs_write_packet_length(&dest[(*packet_length)], + (max_packet_size - 4), + &packet_size_length); if (rc) { printk(KERN_ERR "Error generating tag 11 packet header; cannot " "generate packet length. rc = [%d]\n", rc); @@ -1682,8 +1690,9 @@ encrypted_session_key_set: dest[(*packet_size)++] = ECRYPTFS_TAG_3_PACKET_TYPE; /* Chop off the Tag 3 identifier(1) and Tag 3 packet size(3) * to get the number of octets in the actual Tag 3 packet */ - rc = write_packet_length(&dest[(*packet_size)], (max_packet_size - 4), - &packet_size_length); + rc = ecryptfs_write_packet_length(&dest[(*packet_size)], + (max_packet_size - 4), + &packet_size_length); if (rc) { printk(KERN_ERR "Error generating tag 3 packet header; cannot " "generate packet length. rc = [%d]\n", rc); diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c index d25ac9500a9..d603631601e 100644 --- a/fs/ecryptfs/main.c +++ b/fs/ecryptfs/main.c @@ -219,7 +219,7 @@ int ecryptfs_interpose(struct dentry *lower_dentry, struct dentry *dentry, if (rc) { printk(KERN_ERR "%s: Error attempting to initialize the " "persistent file for the dentry with name [%s]; " - "rc = [%d]\n", __FUNCTION__, dentry->d_name.name, rc); + "rc = [%d]\n", __func__, dentry->d_name.name, rc); goto out; } out: diff --git a/fs/ecryptfs/messaging.c b/fs/ecryptfs/messaging.c index 9cc2aec27b0..1b5c20058ac 100644 --- a/fs/ecryptfs/messaging.c +++ b/fs/ecryptfs/messaging.c @@ -1,7 +1,7 @@ /** * eCryptfs: Linux filesystem encryption layer * - * Copyright (C) 2004-2006 International Business Machines Corp. + * Copyright (C) 2004-2008 International Business Machines Corp. * Author(s): Michael A. Halcrow <mhalcrow@us.ibm.com> * Tyler Hicks <tyhicks@ou.edu> * @@ -20,19 +20,21 @@ * 02111-1307, USA. */ #include <linux/sched.h> +#include <linux/user_namespace.h> +#include <linux/nsproxy.h> #include "ecryptfs_kernel.h" static LIST_HEAD(ecryptfs_msg_ctx_free_list); static LIST_HEAD(ecryptfs_msg_ctx_alloc_list); static struct mutex ecryptfs_msg_ctx_lists_mux; -static struct hlist_head *ecryptfs_daemon_id_hash; -static struct mutex ecryptfs_daemon_id_hash_mux; +static struct hlist_head *ecryptfs_daemon_hash; +struct mutex ecryptfs_daemon_hash_mux; static int ecryptfs_hash_buckets; #define ecryptfs_uid_hash(uid) \ hash_long((unsigned long)uid, ecryptfs_hash_buckets) -static unsigned int ecryptfs_msg_counter; +static u32 ecryptfs_msg_counter; static struct ecryptfs_msg_ctx *ecryptfs_msg_ctx_arr; /** @@ -40,9 +42,10 @@ static struct ecryptfs_msg_ctx *ecryptfs_msg_ctx_arr; * @msg_ctx: The context that was acquired from the free list * * Acquires a context element from the free list and locks the mutex - * on the context. Returns zero on success; non-zero on error or upon - * failure to acquire a free context element. Be sure to lock the - * list mutex before calling. + * on the context. Sets the msg_ctx task to current. Returns zero on + * success; non-zero on error or upon failure to acquire a free + * context element. Must be called with ecryptfs_msg_ctx_lists_mux + * held. */ static int ecryptfs_acquire_free_msg_ctx(struct ecryptfs_msg_ctx **msg_ctx) { @@ -50,11 +53,11 @@ static int ecryptfs_acquire_free_msg_ctx(struct ecryptfs_msg_ctx **msg_ctx) int rc; if (list_empty(&ecryptfs_msg_ctx_free_list)) { - ecryptfs_printk(KERN_WARNING, "The eCryptfs free " - "context list is empty. It may be helpful to " - "specify the ecryptfs_message_buf_len " - "parameter to be greater than the current " - "value of [%d]\n", ecryptfs_message_buf_len); + printk(KERN_WARNING "%s: The eCryptfs free " + "context list is empty. It may be helpful to " + "specify the ecryptfs_message_buf_len " + "parameter to be greater than the current " + "value of [%d]\n", __func__, ecryptfs_message_buf_len); rc = -ENOMEM; goto out; } @@ -75,8 +78,7 @@ out: * ecryptfs_msg_ctx_free_to_alloc * @msg_ctx: The context to move from the free list to the alloc list * - * Be sure to lock the list mutex and the context mutex before - * calling. + * Must be called with ecryptfs_msg_ctx_lists_mux held. */ static void ecryptfs_msg_ctx_free_to_alloc(struct ecryptfs_msg_ctx *msg_ctx) { @@ -89,36 +91,39 @@ static void ecryptfs_msg_ctx_free_to_alloc(struct ecryptfs_msg_ctx *msg_ctx) * ecryptfs_msg_ctx_alloc_to_free * @msg_ctx: The context to move from the alloc list to the free list * - * Be sure to lock the list mutex and the context mutex before - * calling. + * Must be called with ecryptfs_msg_ctx_lists_mux held. */ -static void ecryptfs_msg_ctx_alloc_to_free(struct ecryptfs_msg_ctx *msg_ctx) +void ecryptfs_msg_ctx_alloc_to_free(struct ecryptfs_msg_ctx *msg_ctx) { list_move(&(msg_ctx->node), &ecryptfs_msg_ctx_free_list); if (msg_ctx->msg) kfree(msg_ctx->msg); + msg_ctx->msg = NULL; msg_ctx->state = ECRYPTFS_MSG_CTX_STATE_FREE; } /** - * ecryptfs_find_daemon_id - * @uid: The user id which maps to the desired daemon id - * @id: If return value is zero, points to the desired daemon id - * pointer + * ecryptfs_find_daemon_by_euid + * @euid: The effective user id which maps to the desired daemon id + * @user_ns: The namespace in which @euid applies + * @daemon: If return value is zero, points to the desired daemon pointer * - * Search the hash list for the given user id. Returns zero if the - * user id exists in the list; non-zero otherwise. The daemon id hash - * mutex should be held before calling this function. + * Must be called with ecryptfs_daemon_hash_mux held. + * + * Search the hash list for the given user id. + * + * Returns zero if the user id exists in the list; non-zero otherwise. */ -static int ecryptfs_find_daemon_id(uid_t uid, struct ecryptfs_daemon_id **id) +int ecryptfs_find_daemon_by_euid(struct ecryptfs_daemon **daemon, uid_t euid, + struct user_namespace *user_ns) { struct hlist_node *elem; int rc; - hlist_for_each_entry(*id, elem, - &ecryptfs_daemon_id_hash[ecryptfs_uid_hash(uid)], - id_chain) { - if ((*id)->uid == uid) { + hlist_for_each_entry(*daemon, elem, + &ecryptfs_daemon_hash[ecryptfs_uid_hash(euid)], + euid_chain) { + if ((*daemon)->euid == euid && (*daemon)->user_ns == user_ns) { rc = 0; goto out; } @@ -128,181 +133,325 @@ out: return rc; } -static int ecryptfs_send_raw_message(unsigned int transport, u16 msg_type, - pid_t pid) +static int +ecryptfs_send_message_locked(unsigned int transport, char *data, int data_len, + u8 msg_type, struct ecryptfs_msg_ctx **msg_ctx); + +/** + * ecryptfs_send_raw_message + * @transport: Transport type + * @msg_type: Message type + * @daemon: Daemon struct for recipient of message + * + * A raw message is one that does not include an ecryptfs_message + * struct. It simply has a type. + * + * Must be called with ecryptfs_daemon_hash_mux held. + * + * Returns zero on success; non-zero otherwise + */ +static int ecryptfs_send_raw_message(unsigned int transport, u8 msg_type, + struct ecryptfs_daemon *daemon) { + struct ecryptfs_msg_ctx *msg_ctx; int rc; switch(transport) { case ECRYPTFS_TRANSPORT_NETLINK: - rc = ecryptfs_send_netlink(NULL, 0, NULL, msg_type, 0, pid); + rc = ecryptfs_send_netlink(NULL, 0, NULL, msg_type, 0, + daemon->pid); + break; + case ECRYPTFS_TRANSPORT_MISCDEV: + rc = ecryptfs_send_message_locked(transport, NULL, 0, msg_type, + &msg_ctx); + if (rc) { + printk(KERN_ERR "%s: Error whilst attempting to send " + "message via procfs; rc = [%d]\n", __func__, rc); + goto out; + } + /* Raw messages are logically context-free (e.g., no + * reply is expected), so we set the state of the + * ecryptfs_msg_ctx object to indicate that it should + * be freed as soon as the transport sends out the message. */ + mutex_lock(&msg_ctx->mux); + msg_ctx->state = ECRYPTFS_MSG_CTX_STATE_NO_REPLY; + mutex_unlock(&msg_ctx->mux); break; case ECRYPTFS_TRANSPORT_CONNECTOR: case ECRYPTFS_TRANSPORT_RELAYFS: default: rc = -ENOSYS; } +out: + return rc; +} + +/** + * ecryptfs_spawn_daemon - Create and initialize a new daemon struct + * @daemon: Pointer to set to newly allocated daemon struct + * @euid: Effective user id for the daemon + * @user_ns: The namespace in which @euid applies + * @pid: Process id for the daemon + * + * Must be called ceremoniously while in possession of + * ecryptfs_sacred_daemon_hash_mux + * + * Returns zero on success; non-zero otherwise + */ +int +ecryptfs_spawn_daemon(struct ecryptfs_daemon **daemon, uid_t euid, + struct user_namespace *user_ns, struct pid *pid) +{ + int rc = 0; + + (*daemon) = kzalloc(sizeof(**daemon), GFP_KERNEL); + if (!(*daemon)) { + rc = -ENOMEM; + printk(KERN_ERR "%s: Failed to allocate [%Zd] bytes of " + "GFP_KERNEL memory\n", __func__, sizeof(**daemon)); + goto out; + } + (*daemon)->euid = euid; + (*daemon)->user_ns = get_user_ns(user_ns); + (*daemon)->pid = get_pid(pid); + (*daemon)->task = current; + mutex_init(&(*daemon)->mux); + INIT_LIST_HEAD(&(*daemon)->msg_ctx_out_queue); + init_waitqueue_head(&(*daemon)->wait); + (*daemon)->num_queued_msg_ctx = 0; + hlist_add_head(&(*daemon)->euid_chain, + &ecryptfs_daemon_hash[ecryptfs_uid_hash(euid)]); +out: return rc; } /** * ecryptfs_process_helo * @transport: The underlying transport (netlink, etc.) - * @uid: The user ID owner of the message + * @euid: The user ID owner of the message + * @user_ns: The namespace in which @euid applies * @pid: The process ID for the userspace program that sent the * message * - * Adds the uid and pid values to the daemon id hash. If a uid + * Adds the euid and pid values to the daemon euid hash. If an euid * already has a daemon pid registered, the daemon will be - * unregistered before the new daemon id is put into the hash list. - * Returns zero after adding a new daemon id to the hash list; + * unregistered before the new daemon is put into the hash list. + * Returns zero after adding a new daemon to the hash list; * non-zero otherwise. */ -int ecryptfs_process_helo(unsigned int transport, uid_t uid, pid_t pid) +int ecryptfs_process_helo(unsigned int transport, uid_t euid, + struct user_namespace *user_ns, struct pid *pid) { - struct ecryptfs_daemon_id *new_id; - struct ecryptfs_daemon_id *old_id; + struct ecryptfs_daemon *new_daemon; + struct ecryptfs_daemon *old_daemon; int rc; - mutex_lock(&ecryptfs_daemon_id_hash_mux); - new_id = kmalloc(sizeof(*new_id), GFP_KERNEL); - if (!new_id) { - rc = -ENOMEM; - ecryptfs_printk(KERN_ERR, "Failed to allocate memory; unable " - "to register daemon [%d] for user [%d]\n", - pid, uid); - goto unlock; - } - if (!ecryptfs_find_daemon_id(uid, &old_id)) { + mutex_lock(&ecryptfs_daemon_hash_mux); + rc = ecryptfs_find_daemon_by_euid(&old_daemon, euid, user_ns); + if (rc != 0) { printk(KERN_WARNING "Received request from user [%d] " - "to register daemon [%d]; unregistering daemon " - "[%d]\n", uid, pid, old_id->pid); - hlist_del(&old_id->id_chain); - rc = ecryptfs_send_raw_message(transport, ECRYPTFS_NLMSG_QUIT, - old_id->pid); + "to register daemon [0x%p]; unregistering daemon " + "[0x%p]\n", euid, pid, old_daemon->pid); + rc = ecryptfs_send_raw_message(transport, ECRYPTFS_MSG_QUIT, + old_daemon); if (rc) printk(KERN_WARNING "Failed to send QUIT " - "message to daemon [%d]; rc = [%d]\n", - old_id->pid, rc); - kfree(old_id); + "message to daemon [0x%p]; rc = [%d]\n", + old_daemon->pid, rc); + hlist_del(&old_daemon->euid_chain); + kfree(old_daemon); } - new_id->uid = uid; - new_id->pid = pid; - hlist_add_head(&new_id->id_chain, - &ecryptfs_daemon_id_hash[ecryptfs_uid_hash(uid)]); - rc = 0; -unlock: - mutex_unlock(&ecryptfs_daemon_id_hash_mux); + rc = ecryptfs_spawn_daemon(&new_daemon, euid, user_ns, pid); + if (rc) + printk(KERN_ERR "%s: The gods are displeased with this attempt " + "to create a new daemon object for euid [%d]; pid " + "[0x%p]; rc = [%d]\n", __func__, euid, pid, rc); + mutex_unlock(&ecryptfs_daemon_hash_mux); + return rc; +} + +/** + * ecryptfs_exorcise_daemon - Destroy the daemon struct + * + * Must be called ceremoniously while in possession of + * ecryptfs_daemon_hash_mux and the daemon's own mux. + */ +int ecryptfs_exorcise_daemon(struct ecryptfs_daemon *daemon) +{ + struct ecryptfs_msg_ctx *msg_ctx, *msg_ctx_tmp; + int rc = 0; + + mutex_lock(&daemon->mux); + if ((daemon->flags & ECRYPTFS_DAEMON_IN_READ) + || (daemon->flags & ECRYPTFS_DAEMON_IN_POLL)) { + rc = -EBUSY; + printk(KERN_WARNING "%s: Attempt to destroy daemon with pid " + "[0x%p], but it is in the midst of a read or a poll\n", + __func__, daemon->pid); + mutex_unlock(&daemon->mux); + goto out; + } + list_for_each_entry_safe(msg_ctx, msg_ctx_tmp, + &daemon->msg_ctx_out_queue, daemon_out_list) { + list_del(&msg_ctx->daemon_out_list); + daemon->num_queued_msg_ctx--; + printk(KERN_WARNING "%s: Warning: dropping message that is in " + "the out queue of a dying daemon\n", __func__); + ecryptfs_msg_ctx_alloc_to_free(msg_ctx); + } + hlist_del(&daemon->euid_chain); + if (daemon->task) + wake_up_process(daemon->task); + if (daemon->pid) + put_pid(daemon->pid); + if (daemon->user_ns) + put_user_ns(daemon->user_ns); + mutex_unlock(&daemon->mux); + memset(daemon, 0, sizeof(*daemon)); + kfree(daemon); +out: return rc; } /** * ecryptfs_process_quit - * @uid: The user ID owner of the message + * @euid: The user ID owner of the message + * @user_ns: The namespace in which @euid applies * @pid: The process ID for the userspace program that sent the * message * - * Deletes the corresponding daemon id for the given uid and pid, if + * Deletes the corresponding daemon for the given euid and pid, if * it is the registered that is requesting the deletion. Returns zero - * after deleting the desired daemon id; non-zero otherwise. + * after deleting the desired daemon; non-zero otherwise. */ -int ecryptfs_process_quit(uid_t uid, pid_t pid) +int ecryptfs_process_quit(uid_t euid, struct user_namespace *user_ns, + struct pid *pid) { - struct ecryptfs_daemon_id *id; + struct ecryptfs_daemon *daemon; int rc; - mutex_lock(&ecryptfs_daemon_id_hash_mux); - if (ecryptfs_find_daemon_id(uid, &id)) { + mutex_lock(&ecryptfs_daemon_hash_mux); + rc = ecryptfs_find_daemon_by_euid(&daemon, euid, user_ns); + if (rc || !daemon) { rc = -EINVAL; - ecryptfs_printk(KERN_ERR, "Received request from user [%d] to " - "unregister unrecognized daemon [%d]\n", uid, - pid); - goto unlock; + printk(KERN_ERR "Received request from user [%d] to " + "unregister unrecognized daemon [0x%p]\n", euid, pid); + goto out_unlock; } - if (id->pid != pid) { - rc = -EINVAL; - ecryptfs_printk(KERN_WARNING, "Received request from user [%d] " - "with pid [%d] to unregister daemon [%d]\n", - uid, pid, id->pid); - goto unlock; - } - hlist_del(&id->id_chain); - kfree(id); - rc = 0; -unlock: - mutex_unlock(&ecryptfs_daemon_id_hash_mux); + rc = ecryptfs_exorcise_daemon(daemon); +out_unlock: + mutex_unlock(&ecryptfs_daemon_hash_mux); return rc; } /** * ecryptfs_process_reponse * @msg: The ecryptfs message received; the caller should sanity check - * msg->data_len + * msg->data_len and free the memory * @pid: The process ID of the userspace application that sent the * message - * @seq: The sequence number of the message + * @seq: The sequence number of the message; must match the sequence + * number for the existing message context waiting for this + * response + * + * Processes a response message after sending an operation request to + * userspace. Some other process is awaiting this response. Before + * sending out its first communications, the other process allocated a + * msg_ctx from the ecryptfs_msg_ctx_arr at a particular index. The + * response message contains this index so that we can copy over the + * response message into the msg_ctx that the process holds a + * reference to. The other process is going to wake up, check to see + * that msg_ctx->state == ECRYPTFS_MSG_CTX_STATE_DONE, and then + * proceed to read off and process the response message. Returns zero + * upon delivery to desired context element; non-zero upon delivery + * failure or error. * - * Processes a response message after sending a operation request to - * userspace. Returns zero upon delivery to desired context element; - * non-zero upon delivery failure or error. + * Returns zero on success; non-zero otherwise */ -int ecryptfs_process_response(struct ecryptfs_message *msg, uid_t uid, - pid_t pid, u32 seq) +int ecryptfs_process_response(struct ecryptfs_message *msg, uid_t euid, + struct user_namespace *user_ns, struct pid *pid, + u32 seq) { - struct ecryptfs_daemon_id *id; + struct ecryptfs_daemon *daemon; struct ecryptfs_msg_ctx *msg_ctx; - int msg_size; + size_t msg_size; + struct nsproxy *nsproxy; + struct user_namespace *current_user_ns; int rc; if (msg->index >= ecryptfs_message_buf_len) { rc = -EINVAL; - ecryptfs_printk(KERN_ERR, "Attempt to reference " - "context buffer at index [%d]; maximum " - "allowable is [%d]\n", msg->index, - (ecryptfs_message_buf_len - 1)); + printk(KERN_ERR "%s: Attempt to reference " + "context buffer at index [%d]; maximum " + "allowable is [%d]\n", __func__, msg->index, + (ecryptfs_message_buf_len - 1)); goto out; } msg_ctx = &ecryptfs_msg_ctx_arr[msg->index]; mutex_lock(&msg_ctx->mux); - if (ecryptfs_find_daemon_id(msg_ctx->task->euid, &id)) { + mutex_lock(&ecryptfs_daemon_hash_mux); + rcu_read_lock(); + nsproxy = task_nsproxy(msg_ctx->task); + if (nsproxy == NULL) { rc = -EBADMSG; - ecryptfs_printk(KERN_WARNING, "User [%d] received a " - "message response from process [%d] but does " - "not have a registered daemon\n", - msg_ctx->task->euid, pid); + printk(KERN_ERR "%s: Receiving process is a zombie. Dropping " + "message.\n", __func__); + rcu_read_unlock(); + mutex_unlock(&ecryptfs_daemon_hash_mux); goto wake_up; } - if (msg_ctx->task->euid != uid) { + current_user_ns = nsproxy->user_ns; + rc = ecryptfs_find_daemon_by_euid(&daemon, msg_ctx->task->euid, + current_user_ns); + rcu_read_unlock(); + mutex_unlock(&ecryptfs_daemon_hash_mux); + if (rc) { + rc = -EBADMSG; + printk(KERN_WARNING "%s: User [%d] received a " + "message response from process [0x%p] but does " + "not have a registered daemon\n", __func__, + msg_ctx->task->euid, pid); + goto wake_up; + } + if (msg_ctx->task->euid != euid) { rc = -EBADMSG; - ecryptfs_printk(KERN_WARNING, "Received message from user " - "[%d]; expected message from user [%d]\n", - uid, msg_ctx->task->euid); + printk(KERN_WARNING "%s: Received message from user " + "[%d]; expected message from user [%d]\n", __func__, + euid, msg_ctx->task->euid); goto unlock; } - if (id->pid != pid) { + if (current_user_ns != user_ns) { rc = -EBADMSG; - ecryptfs_printk(KERN_ERR, "User [%d] received a " - "message response from an unrecognized " - "process [%d]\n", msg_ctx->task->euid, pid); + printk(KERN_WARNING "%s: Received message from user_ns " + "[0x%p]; expected message from user_ns [0x%p]\n", + __func__, user_ns, nsproxy->user_ns); + goto unlock; + } + if (daemon->pid != pid) { + rc = -EBADMSG; + printk(KERN_ERR "%s: User [%d] sent a message response " + "from an unrecognized process [0x%p]\n", + __func__, msg_ctx->task->euid, pid); goto unlock; } if (msg_ctx->state != ECRYPTFS_MSG_CTX_STATE_PENDING) { rc = -EINVAL; - ecryptfs_printk(KERN_WARNING, "Desired context element is not " - "pending a response\n"); + printk(KERN_WARNING "%s: Desired context element is not " + "pending a response\n", __func__); goto unlock; } else if (msg_ctx->counter != seq) { rc = -EINVAL; - ecryptfs_printk(KERN_WARNING, "Invalid message sequence; " - "expected [%d]; received [%d]\n", - msg_ctx->counter, seq); + printk(KERN_WARNING "%s: Invalid message sequence; " + "expected [%d]; received [%d]\n", __func__, + msg_ctx->counter, seq); goto unlock; } - msg_size = sizeof(*msg) + msg->data_len; + msg_size = (sizeof(*msg) + msg->data_len); msg_ctx->msg = kmalloc(msg_size, GFP_KERNEL); if (!msg_ctx->msg) { rc = -ENOMEM; - ecryptfs_printk(KERN_ERR, "Failed to allocate memory\n"); + printk(KERN_ERR "%s: Failed to allocate [%Zd] bytes of " + "GFP_KERNEL memory\n", __func__, msg_size); goto unlock; } memcpy(msg_ctx->msg, msg, msg_size); @@ -317,34 +466,38 @@ out: } /** - * ecryptfs_send_message + * ecryptfs_send_message_locked * @transport: The transport over which to send the message (i.e., * netlink) * @data: The data to send * @data_len: The length of data * @msg_ctx: The message context allocated for the send + * + * Must be called with ecryptfs_daemon_hash_mux held. + * + * Returns zero on success; non-zero otherwise */ -int ecryptfs_send_message(unsigned int transport, char *data, int data_len, - struct ecryptfs_msg_ctx **msg_ctx) +static int +ecryptfs_send_message_locked(unsigned int transport, char *data, int data_len, + u8 msg_type, struct ecryptfs_msg_ctx **msg_ctx) { - struct ecryptfs_daemon_id *id; + struct ecryptfs_daemon *daemon; int rc; - mutex_lock(&ecryptfs_daemon_id_hash_mux); - if (ecryptfs_find_daemon_id(current->euid, &id)) { - mutex_unlock(&ecryptfs_daemon_id_hash_mux); + rc = ecryptfs_find_daemon_by_euid(&daemon, current->euid, + current->nsproxy->user_ns); + if (rc || !daemon) { rc = -ENOTCONN; - ecryptfs_printk(KERN_ERR, "User [%d] does not have a daemon " - "registered\n", current->euid); + printk(KERN_ERR "%s: User [%d] does not have a daemon " + "registered\n", __func__, current->euid); goto out; } - mutex_unlock(&ecryptfs_daemon_id_hash_mux); mutex_lock(&ecryptfs_msg_ctx_lists_mux); rc = ecryptfs_acquire_free_msg_ctx(msg_ctx); if (rc) { mutex_unlock(&ecryptfs_msg_ctx_lists_mux); - ecryptfs_printk(KERN_WARNING, "Could not claim a free " - "context element\n"); + printk(KERN_WARNING "%s: Could not claim a free " + "context element\n", __func__); goto out; } ecryptfs_msg_ctx_free_to_alloc(*msg_ctx); @@ -352,23 +505,50 @@ int ecryptfs_send_message(unsigned int transport, char *data, int data_len, mutex_unlock(&ecryptfs_msg_ctx_lists_mux); switch (transport) { case ECRYPTFS_TRANSPORT_NETLINK: - rc = ecryptfs_send_netlink(data, data_len, *msg_ctx, - ECRYPTFS_NLMSG_REQUEST, 0, id->pid); + rc = ecryptfs_send_netlink(data, data_len, *msg_ctx, msg_type, + 0, daemon->pid); + break; + case ECRYPTFS_TRANSPORT_MISCDEV: + rc = ecryptfs_send_miscdev(data, data_len, *msg_ctx, msg_type, + 0, daemon); break; case ECRYPTFS_TRANSPORT_CONNECTOR: case ECRYPTFS_TRANSPORT_RELAYFS: default: rc = -ENOSYS; } - if (rc) { - printk(KERN_ERR "Error attempting to send message to userspace " - "daemon; rc = [%d]\n", rc); - } + if (rc) + printk(KERN_ERR "%s: Error attempting to send message to " + "userspace daemon; rc = [%d]\n", __func__, rc); out: return rc; } /** + * ecryptfs_send_message + * @transport: The transport over which to send the message (i.e., + * netlink) + * @data: The data to send + * @data_len: The length of data + * @msg_ctx: The message context allocated for the send + * + * Grabs ecryptfs_daemon_hash_mux. + * + * Returns zero on success; non-zero otherwise + */ +int ecryptfs_send_message(unsigned int transport, char *data, int data_len, + struct ecryptfs_msg_ctx **msg_ctx) +{ + int rc; + + mutex_lock(&ecryptfs_daemon_hash_mux); + rc = ecryptfs_send_message_locked(transport, data, data_len, + ECRYPTFS_MSG_REQUEST, msg_ctx); + mutex_unlock(&ecryptfs_daemon_hash_mux); + return rc; +} + +/** * ecryptfs_wait_for_response * @msg_ctx: The context that was assigned when sending a message * @msg: The incoming message from userspace; not set if rc != 0 @@ -377,7 +557,7 @@ out: * of time exceeds ecryptfs_message_wait_timeout. If zero is * returned, msg will point to a valid message from userspace; a * non-zero value is returned upon failure to receive a message or an - * error occurs. + * error occurs. Callee must free @msg on success. */ int ecryptfs_wait_for_response(struct ecryptfs_msg_ctx *msg_ctx, struct ecryptfs_message **msg) @@ -413,32 +593,32 @@ int ecryptfs_init_messaging(unsigned int transport) if (ecryptfs_number_of_users > ECRYPTFS_MAX_NUM_USERS) { ecryptfs_number_of_users = ECRYPTFS_MAX_NUM_USERS; - ecryptfs_printk(KERN_WARNING, "Specified number of users is " - "too large, defaulting to [%d] users\n", - ecryptfs_number_of_users); + printk(KERN_WARNING "%s: Specified number of users is " + "too large, defaulting to [%d] users\n", __func__, + ecryptfs_number_of_users); } - mutex_init(&ecryptfs_daemon_id_hash_mux); - mutex_lock(&ecryptfs_daemon_id_hash_mux); + mutex_init(&ecryptfs_daemon_hash_mux); + mutex_lock(&ecryptfs_daemon_hash_mux); ecryptfs_hash_buckets = 1; while (ecryptfs_number_of_users >> ecryptfs_hash_buckets) ecryptfs_hash_buckets++; - ecryptfs_daemon_id_hash = kmalloc(sizeof(struct hlist_head) - * ecryptfs_hash_buckets, GFP_KERNEL); - if (!ecryptfs_daemon_id_hash) { + ecryptfs_daemon_hash = kmalloc((sizeof(struct hlist_head) + * ecryptfs_hash_buckets), GFP_KERNEL); + if (!ecryptfs_daemon_hash) { rc = -ENOMEM; - ecryptfs_printk(KERN_ERR, "Failed to allocate memory\n"); - mutex_unlock(&ecryptfs_daemon_id_hash_mux); + printk(KERN_ERR "%s: Failed to allocate memory\n", __func__); + mutex_unlock(&ecryptfs_daemon_hash_mux); goto out; } for (i = 0; i < ecryptfs_hash_buckets; i++) - INIT_HLIST_HEAD(&ecryptfs_daemon_id_hash[i]); - mutex_unlock(&ecryptfs_daemon_id_hash_mux); - + INIT_HLIST_HEAD(&ecryptfs_daemon_hash[i]); + mutex_unlock(&ecryptfs_daemon_hash_mux); ecryptfs_msg_ctx_arr = kmalloc((sizeof(struct ecryptfs_msg_ctx) - * ecryptfs_message_buf_len), GFP_KERNEL); + * ecryptfs_message_buf_len), + GFP_KERNEL); if (!ecryptfs_msg_ctx_arr) { rc = -ENOMEM; - ecryptfs_printk(KERN_ERR, "Failed to allocate memory\n"); + printk(KERN_ERR "%s: Failed to allocate memory\n", __func__); goto out; } mutex_init(&ecryptfs_msg_ctx_lists_mux); @@ -446,6 +626,7 @@ int ecryptfs_init_messaging(unsigned int transport) ecryptfs_msg_counter = 0; for (i = 0; i < ecryptfs_message_buf_len; i++) { INIT_LIST_HEAD(&ecryptfs_msg_ctx_arr[i].node); + INIT_LIST_HEAD(&ecryptfs_msg_ctx_arr[i].daemon_out_list); mutex_init(&ecryptfs_msg_ctx_arr[i].mux); mutex_lock(&ecryptfs_msg_ctx_arr[i].mux); ecryptfs_msg_ctx_arr[i].index = i; @@ -464,6 +645,11 @@ int ecryptfs_init_messaging(unsigned int transport) if (rc) ecryptfs_release_messaging(transport); break; + case ECRYPTFS_TRANSPORT_MISCDEV: + rc = ecryptfs_init_ecryptfs_miscdev(); + if (rc) + ecryptfs_release_messaging(transport); + break; case ECRYPTFS_TRANSPORT_CONNECTOR: case ECRYPTFS_TRANSPORT_RELAYFS: default: @@ -488,27 +674,37 @@ void ecryptfs_release_messaging(unsigned int transport) kfree(ecryptfs_msg_ctx_arr); mutex_unlock(&ecryptfs_msg_ctx_lists_mux); } - if (ecryptfs_daemon_id_hash) { + if (ecryptfs_daemon_hash) { struct hlist_node *elem; - struct ecryptfs_daemon_id *id; + struct ecryptfs_daemon *daemon; int i; - mutex_lock(&ecryptfs_daemon_id_hash_mux); + mutex_lock(&ecryptfs_daemon_hash_mux); for (i = 0; i < ecryptfs_hash_buckets; i++) { - hlist_for_each_entry(id, elem, - &ecryptfs_daemon_id_hash[i], - id_chain) { - hlist_del(elem); - kfree(id); + int rc; + + hlist_for_each_entry(daemon, elem, + &ecryptfs_daemon_hash[i], + euid_chain) { + rc = ecryptfs_exorcise_daemon(daemon); + if (rc) + printk(KERN_ERR "%s: Error whilst " + "attempting to destroy daemon; " + "rc = [%d]. Dazed and confused, " + "but trying to continue.\n", + __func__, rc); } } - kfree(ecryptfs_daemon_id_hash); - mutex_unlock(&ecryptfs_daemon_id_hash_mux); + kfree(ecryptfs_daemon_hash); + mutex_unlock(&ecryptfs_daemon_hash_mux); } switch(transport) { case ECRYPTFS_TRANSPORT_NETLINK: ecryptfs_release_netlink(); break; + case ECRYPTFS_TRANSPORT_MISCDEV: + ecryptfs_destroy_ecryptfs_miscdev(); + break; case ECRYPTFS_TRANSPORT_CONNECTOR: case ECRYPTFS_TRANSPORT_RELAYFS: default: diff --git a/fs/ecryptfs/miscdev.c b/fs/ecryptfs/miscdev.c new file mode 100644 index 00000000000..6560da1a58c --- /dev/null +++ b/fs/ecryptfs/miscdev.c @@ -0,0 +1,600 @@ +/** + * eCryptfs: Linux filesystem encryption layer + * + * Copyright (C) 2008 International Business Machines Corp. + * Author(s): Michael A. Halcrow <mhalcrow@us.ibm.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version + * 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA + * 02111-1307, USA. + */ + +#include <linux/fs.h> +#include <linux/hash.h> +#include <linux/random.h> +#include <linux/miscdevice.h> +#include <linux/poll.h> +#include <linux/wait.h> +#include <linux/module.h> +#include "ecryptfs_kernel.h" + +static atomic_t ecryptfs_num_miscdev_opens; + +/** + * ecryptfs_miscdev_poll + * @file: dev file (ignored) + * @pt: dev poll table (ignored) + * + * Returns the poll mask + */ +static unsigned int +ecryptfs_miscdev_poll(struct file *file, poll_table *pt) +{ + struct ecryptfs_daemon *daemon; + unsigned int mask = 0; + int rc; + + mutex_lock(&ecryptfs_daemon_hash_mux); + /* TODO: Just use file->private_data? */ + rc = ecryptfs_find_daemon_by_euid(&daemon, current->euid, + current->nsproxy->user_ns); + BUG_ON(rc || !daemon); + mutex_lock(&daemon->mux); + mutex_unlock(&ecryptfs_daemon_hash_mux); + if (daemon->flags & ECRYPTFS_DAEMON_ZOMBIE) { + printk(KERN_WARNING "%s: Attempt to poll on zombified " + "daemon\n", __func__); + goto out_unlock_daemon; + } + if (daemon->flags & ECRYPTFS_DAEMON_IN_READ) + goto out_unlock_daemon; + if (daemon->flags & ECRYPTFS_DAEMON_IN_POLL) + goto out_unlock_daemon; + daemon->flags |= ECRYPTFS_DAEMON_IN_POLL; + mutex_unlock(&daemon->mux); + poll_wait(file, &daemon->wait, pt); + mutex_lock(&daemon->mux); + if (!list_empty(&daemon->msg_ctx_out_queue)) + mask |= POLLIN | POLLRDNORM; +out_unlock_daemon: + daemon->flags &= ~ECRYPTFS_DAEMON_IN_POLL; + mutex_unlock(&daemon->mux); + return mask; +} + +/** + * ecryptfs_miscdev_open + * @inode: inode of miscdev handle (ignored) + * @file: file for miscdev handle (ignored) + * + * Returns zero on success; non-zero otherwise + */ +static int +ecryptfs_miscdev_open(struct inode *inode, struct file *file) +{ + struct ecryptfs_daemon *daemon = NULL; + int rc; + + mutex_lock(&ecryptfs_daemon_hash_mux); + rc = try_module_get(THIS_MODULE); + if (rc == 0) { + rc = -EIO; + printk(KERN_ERR "%s: Error attempting to increment module use " + "count; rc = [%d]\n", __func__, rc); + goto out_unlock_daemon_list; + } + rc = ecryptfs_find_daemon_by_euid(&daemon, current->euid, + current->nsproxy->user_ns); + if (rc || !daemon) { + rc = ecryptfs_spawn_daemon(&daemon, current->euid, + current->nsproxy->user_ns, + task_pid(current)); + if (rc) { + printk(KERN_ERR "%s: Error attempting to spawn daemon; " + "rc = [%d]\n", __func__, rc); + goto out_module_put_unlock_daemon_list; + } + } + mutex_lock(&daemon->mux); + if (daemon->pid != task_pid(current)) { + rc = -EINVAL; + printk(KERN_ERR "%s: pid [0x%p] has registered with euid [%d], " + "but pid [0x%p] has attempted to open the handle " + "instead\n", __func__, daemon->pid, daemon->euid, + task_pid(current)); + goto out_unlock_daemon; + } + if (daemon->flags & ECRYPTFS_DAEMON_MISCDEV_OPEN) { + rc = -EBUSY; + printk(KERN_ERR "%s: Miscellaneous device handle may only be " + "opened once per daemon; pid [0x%p] already has this " + "handle open\n", __func__, daemon->pid); + goto out_unlock_daemon; + } + daemon->flags |= ECRYPTFS_DAEMON_MISCDEV_OPEN; + atomic_inc(&ecryptfs_num_miscdev_opens); +out_unlock_daemon: + mutex_unlock(&daemon->mux); +out_module_put_unlock_daemon_list: + if (rc) + module_put(THIS_MODULE); +out_unlock_daemon_list: + mutex_unlock(&ecryptfs_daemon_hash_mux); + return rc; +} + +/** + * ecryptfs_miscdev_release + * @inode: inode of fs/ecryptfs/euid handle (ignored) + * @file: file for fs/ecryptfs/euid handle (ignored) + * + * This keeps the daemon registered until the daemon sends another + * ioctl to fs/ecryptfs/ctl or until the kernel module unregisters. + * + * Returns zero on success; non-zero otherwise + */ +static int +ecryptfs_miscdev_release(struct inode *inode, struct file *file) +{ + struct ecryptfs_daemon *daemon = NULL; + int rc; + + mutex_lock(&ecryptfs_daemon_hash_mux); + rc = ecryptfs_find_daemon_by_euid(&daemon, current->euid, + current->nsproxy->user_ns); + BUG_ON(rc || !daemon); + mutex_lock(&daemon->mux); + BUG_ON(daemon->pid != task_pid(current)); + BUG_ON(!(daemon->flags & ECRYPTFS_DAEMON_MISCDEV_OPEN)); + daemon->flags &= ~ECRYPTFS_DAEMON_MISCDEV_OPEN; + atomic_dec(&ecryptfs_num_miscdev_opens); + mutex_unlock(&daemon->mux); + rc = ecryptfs_exorcise_daemon(daemon); + if (rc) { + printk(KERN_CRIT "%s: Fatal error whilst attempting to " + "shut down daemon; rc = [%d]. Please report this " + "bug.\n", __func__, rc); + BUG(); + } + module_put(THIS_MODULE); + mutex_unlock(&ecryptfs_daemon_hash_mux); + return rc; +} + +/** + * ecryptfs_send_miscdev + * @data: Data to send to daemon; may be NULL + * @data_size: Amount of data to send to daemon + * @msg_ctx: Message context, which is used to handle the reply. If + * this is NULL, then we do not expect a reply. + * @msg_type: Type of message + * @msg_flags: Flags for message + * @daemon: eCryptfs daemon object + * + * Add msg_ctx to queue and then, if it exists, notify the blocked + * miscdevess about the data being available. Must be called with + * ecryptfs_daemon_hash_mux held. + * + * Returns zero on success; non-zero otherwise + */ +int ecryptfs_send_miscdev(char *data, size_t data_size, + struct ecryptfs_msg_ctx *msg_ctx, u8 msg_type, + u16 msg_flags, struct ecryptfs_daemon *daemon) +{ + int rc = 0; + + mutex_lock(&msg_ctx->mux); + if (data) { + msg_ctx->msg = kmalloc((sizeof(*msg_ctx->msg) + data_size), + GFP_KERNEL); + if (!msg_ctx->msg) { + rc = -ENOMEM; + printk(KERN_ERR "%s: Out of memory whilst attempting " + "to kmalloc(%Zd, GFP_KERNEL)\n", __func__, + (sizeof(*msg_ctx->msg) + data_size)); + goto out_unlock; + } + } else + msg_ctx->msg = NULL; + msg_ctx->msg->index = msg_ctx->index; + msg_ctx->msg->data_len = data_size; + msg_ctx->type = msg_type; + if (data) { + memcpy(msg_ctx->msg->data, data, data_size); + msg_ctx->msg_size = (sizeof(*msg_ctx->msg) + data_size); + } else + msg_ctx->msg_size = 0; + mutex_lock(&daemon->mux); + list_add_tail(&msg_ctx->daemon_out_list, &daemon->msg_ctx_out_queue); + daemon->num_queued_msg_ctx++; + wake_up_interruptible(&daemon->wait); + mutex_unlock(&daemon->mux); +out_unlock: + mutex_unlock(&msg_ctx->mux); + return rc; +} + +/** + * ecryptfs_miscdev_read - format and send message from queue + * @file: fs/ecryptfs/euid miscdevfs handle (ignored) + * @buf: User buffer into which to copy the next message on the daemon queue + * @count: Amount of space available in @buf + * @ppos: Offset in file (ignored) + * + * Pulls the most recent message from the daemon queue, formats it for + * being sent via a miscdevfs handle, and copies it into @buf + * + * Returns the number of bytes copied into the user buffer + */ +static ssize_t +ecryptfs_miscdev_read(struct file *file, char __user *buf, size_t count, + loff_t *ppos) +{ + struct ecryptfs_daemon *daemon; + struct ecryptfs_msg_ctx *msg_ctx; + size_t packet_length_size; + u32 counter_nbo; + char packet_length[3]; + size_t i; + size_t total_length; + int rc; + + mutex_lock(&ecryptfs_daemon_hash_mux); + /* TODO: Just use file->private_data? */ + rc = ecryptfs_find_daemon_by_euid(&daemon, current->euid, + current->nsproxy->user_ns); + BUG_ON(rc || !daemon); + mutex_lock(&daemon->mux); + if (daemon->flags & ECRYPTFS_DAEMON_ZOMBIE) { + rc = 0; + mutex_unlock(&ecryptfs_daemon_hash_mux); + printk(KERN_WARNING "%s: Attempt to read from zombified " + "daemon\n", __func__); + goto out_unlock_daemon; + } + if (daemon->flags & ECRYPTFS_DAEMON_IN_READ) { + rc = 0; + mutex_unlock(&ecryptfs_daemon_hash_mux); + goto out_unlock_daemon; + } + /* This daemon will not go away so long as this flag is set */ + daemon->flags |= ECRYPTFS_DAEMON_IN_READ; + mutex_unlock(&ecryptfs_daemon_hash_mux); +check_list: + if (list_empty(&daemon->msg_ctx_out_queue)) { + mutex_unlock(&daemon->mux); + rc = wait_event_interruptible( + daemon->wait, !list_empty(&daemon->msg_ctx_out_queue)); + mutex_lock(&daemon->mux); + if (rc < 0) { + rc = 0; + goto out_unlock_daemon; + } + } + if (daemon->flags & ECRYPTFS_DAEMON_ZOMBIE) { + rc = 0; + goto out_unlock_daemon; + } + if (list_empty(&daemon->msg_ctx_out_queue)) { + /* Something else jumped in since the + * wait_event_interruptable() and removed the + * message from the queue; try again */ + goto check_list; + } + BUG_ON(current->euid != daemon->euid); + BUG_ON(current->nsproxy->user_ns != daemon->user_ns); + BUG_ON(task_pid(current) != daemon->pid); + msg_ctx = list_first_entry(&daemon->msg_ctx_out_queue, + struct ecryptfs_msg_ctx, daemon_out_list); + BUG_ON(!msg_ctx); + mutex_lock(&msg_ctx->mux); + if (msg_ctx->msg) { + rc = ecryptfs_write_packet_length(packet_length, + msg_ctx->msg_size, + &packet_length_size); + if (rc) { + rc = 0; + printk(KERN_WARNING "%s: Error writing packet length; " + "rc = [%d]\n", __func__, rc); + goto out_unlock_msg_ctx; + } + } else { + packet_length_size = 0; + msg_ctx->msg_size = 0; + } + /* miscdevfs packet format: + * Octet 0: Type + * Octets 1-4: network byte order msg_ctx->counter + * Octets 5-N0: Size of struct ecryptfs_message to follow + * Octets N0-N1: struct ecryptfs_message (including data) + * + * Octets 5-N1 not written if the packet type does not + * include a message */ + total_length = (1 + 4 + packet_length_size + msg_ctx->msg_size); + if (count < total_length) { + rc = 0; + printk(KERN_WARNING "%s: Only given user buffer of " + "size [%Zd], but we need [%Zd] to read the " + "pending message\n", __func__, count, total_length); + goto out_unlock_msg_ctx; + } + i = 0; + buf[i++] = msg_ctx->type; + counter_nbo = cpu_to_be32(msg_ctx->counter); + memcpy(&buf[i], (char *)&counter_nbo, 4); + i += 4; + if (msg_ctx->msg) { + memcpy(&buf[i], packet_length, packet_length_size); + i += packet_length_size; + rc = copy_to_user(&buf[i], msg_ctx->msg, msg_ctx->msg_size); + if (rc) { + printk(KERN_ERR "%s: copy_to_user returned error " + "[%d]\n", __func__, rc); + goto out_unlock_msg_ctx; + } + i += msg_ctx->msg_size; + } + rc = i; + list_del(&msg_ctx->daemon_out_list); + kfree(msg_ctx->msg); + msg_ctx->msg = NULL; + /* We do not expect a reply from the userspace daemon for any + * message type other than ECRYPTFS_MSG_REQUEST */ + if (msg_ctx->type != ECRYPTFS_MSG_REQUEST) + ecryptfs_msg_ctx_alloc_to_free(msg_ctx); +out_unlock_msg_ctx: + mutex_unlock(&msg_ctx->mux); +out_unlock_daemon: + daemon->flags &= ~ECRYPTFS_DAEMON_IN_READ; + mutex_unlock(&daemon->mux); + return rc; +} + +/** + * ecryptfs_miscdev_helo + * @euid: effective user id of miscdevess sending helo packet + * @user_ns: The namespace in which @euid applies + * @pid: miscdevess id of miscdevess sending helo packet + * + * Returns zero on success; non-zero otherwise + */ +static int ecryptfs_miscdev_helo(uid_t euid, struct user_namespace *user_ns, + struct pid *pid) +{ + int rc; + + rc = ecryptfs_process_helo(ECRYPTFS_TRANSPORT_MISCDEV, euid, user_ns, + pid); + if (rc) + printk(KERN_WARNING "Error processing HELO; rc = [%d]\n", rc); + return rc; +} + +/** + * ecryptfs_miscdev_quit + * @euid: effective user id of miscdevess sending quit packet + * @user_ns: The namespace in which @euid applies + * @pid: miscdevess id of miscdevess sending quit packet + * + * Returns zero on success; non-zero otherwise + */ +static int ecryptfs_miscdev_quit(uid_t euid, struct user_namespace *user_ns, + struct pid *pid) +{ + int rc; + + rc = ecryptfs_process_quit(euid, user_ns, pid); + if (rc) + printk(KERN_WARNING + "Error processing QUIT message; rc = [%d]\n", rc); + return rc; +} + +/** + * ecryptfs_miscdev_response - miscdevess response to message previously sent to daemon + * @data: Bytes comprising struct ecryptfs_message + * @data_size: sizeof(struct ecryptfs_message) + data len + * @euid: Effective user id of miscdevess sending the miscdev response + * @user_ns: The namespace in which @euid applies + * @pid: Miscdevess id of miscdevess sending the miscdev response + * @seq: Sequence number for miscdev response packet + * + * Returns zero on success; non-zero otherwise + */ +static int ecryptfs_miscdev_response(char *data, size_t data_size, + uid_t euid, struct user_namespace *user_ns, + struct pid *pid, u32 seq) +{ + struct ecryptfs_message *msg = (struct ecryptfs_message *)data; + int rc; + + if ((sizeof(*msg) + msg->data_len) != data_size) { + printk(KERN_WARNING "%s: (sizeof(*msg) + msg->data_len) = " + "[%Zd]; data_size = [%Zd]. Invalid packet.\n", __func__, + (sizeof(*msg) + msg->data_len), data_size); + rc = -EINVAL; + goto out; + } + rc = ecryptfs_process_response(msg, euid, user_ns, pid, seq); + if (rc) + printk(KERN_ERR + "Error processing response message; rc = [%d]\n", rc); +out: + return rc; +} + +/** + * ecryptfs_miscdev_write - handle write to daemon miscdev handle + * @file: File for misc dev handle (ignored) + * @buf: Buffer containing user data + * @count: Amount of data in @buf + * @ppos: Pointer to offset in file (ignored) + * + * miscdevfs packet format: + * Octet 0: Type + * Octets 1-4: network byte order msg_ctx->counter (0's for non-response) + * Octets 5-N0: Size of struct ecryptfs_message to follow + * Octets N0-N1: struct ecryptfs_message (including data) + * + * Returns the number of bytes read from @buf + */ +static ssize_t +ecryptfs_miscdev_write(struct file *file, const char __user *buf, + size_t count, loff_t *ppos) +{ + u32 counter_nbo, seq; + size_t packet_size, packet_size_length, i; + ssize_t sz = 0; + char *data; + int rc; + + if (count == 0) + goto out; + data = kmalloc(count, GFP_KERNEL); + if (!data) { + printk(KERN_ERR "%s: Out of memory whilst attempting to " + "kmalloc([%Zd], GFP_KERNEL)\n", __func__, count); + goto out; + } + rc = copy_from_user(data, buf, count); + if (rc) { + printk(KERN_ERR "%s: copy_from_user returned error [%d]\n", + __func__, rc); + goto out_free; + } + sz = count; + i = 0; + switch (data[i++]) { + case ECRYPTFS_MSG_RESPONSE: + if (count < (1 + 4 + 1 + sizeof(struct ecryptfs_message))) { + printk(KERN_WARNING "%s: Minimum acceptable packet " + "size is [%Zd], but amount of data written is " + "only [%Zd]. Discarding response packet.\n", + __func__, + (1 + 4 + 1 + sizeof(struct ecryptfs_message)), + count); + goto out_free; + } + memcpy((char *)&counter_nbo, &data[i], 4); + seq = be32_to_cpu(counter_nbo); + i += 4; + rc = ecryptfs_parse_packet_length(&data[i], &packet_size, + &packet_size_length); + if (rc) { + printk(KERN_WARNING "%s: Error parsing packet length; " + "rc = [%d]\n", __func__, rc); + goto out_free; + } + i += packet_size_length; + if ((1 + 4 + packet_size_length + packet_size) != count) { + printk(KERN_WARNING "%s: (1 + packet_size_length([%Zd])" + " + packet_size([%Zd]))([%Zd]) != " + "count([%Zd]). Invalid packet format.\n", + __func__, packet_size_length, packet_size, + (1 + packet_size_length + packet_size), count); + goto out_free; + } + rc = ecryptfs_miscdev_response(&data[i], packet_size, + current->euid, + current->nsproxy->user_ns, + task_pid(current), seq); + if (rc) + printk(KERN_WARNING "%s: Failed to deliver miscdev " + "response to requesting operation; rc = [%d]\n", + __func__, rc); + break; + case ECRYPTFS_MSG_HELO: + rc = ecryptfs_miscdev_helo(current->euid, + current->nsproxy->user_ns, + task_pid(current)); + if (rc) { + printk(KERN_ERR "%s: Error attempting to process " + "helo from pid [0x%p]; rc = [%d]\n", __func__, + task_pid(current), rc); + goto out_free; + } + break; + case ECRYPTFS_MSG_QUIT: + rc = ecryptfs_miscdev_quit(current->euid, + current->nsproxy->user_ns, + task_pid(current)); + if (rc) { + printk(KERN_ERR "%s: Error attempting to process " + "quit from pid [0x%p]; rc = [%d]\n", __func__, + task_pid(current), rc); + goto out_free; + } + break; + default: + ecryptfs_printk(KERN_WARNING, "Dropping miscdev " + "message of unrecognized type [%d]\n", + data[0]); + break; + } +out_free: + kfree(data); +out: + return sz; +} + + +static const struct file_operations ecryptfs_miscdev_fops = { + .open = ecryptfs_miscdev_open, + .poll = ecryptfs_miscdev_poll, + .read = ecryptfs_miscdev_read, + .write = ecryptfs_miscdev_write, + .release = ecryptfs_miscdev_release, +}; + +static struct miscdevice ecryptfs_miscdev = { + .minor = MISC_DYNAMIC_MINOR, + .name = "ecryptfs", + .fops = &ecryptfs_miscdev_fops +}; + +/** + * ecryptfs_init_ecryptfs_miscdev + * + * Messages sent to the userspace daemon from the kernel are placed on + * a queue associated with the daemon. The next read against the + * miscdev handle by that daemon will return the oldest message placed + * on the message queue for the daemon. + * + * Returns zero on success; non-zero otherwise + */ +int ecryptfs_init_ecryptfs_miscdev(void) +{ + int rc; + + atomic_set(&ecryptfs_num_miscdev_opens, 0); + mutex_lock(&ecryptfs_daemon_hash_mux); + rc = misc_register(&ecryptfs_miscdev); + if (rc) + printk(KERN_ERR "%s: Failed to register miscellaneous device " + "for communications with userspace daemons; rc = [%d]\n", + __func__, rc); + mutex_unlock(&ecryptfs_daemon_hash_mux); + return rc; +} + +/** + * ecryptfs_destroy_ecryptfs_miscdev + * + * All of the daemons must be exorcised prior to calling this + * function. + */ +void ecryptfs_destroy_ecryptfs_miscdev(void) +{ + BUG_ON(atomic_read(&ecryptfs_num_miscdev_opens) != 0); + misc_deregister(&ecryptfs_miscdev); +} diff --git a/fs/ecryptfs/mmap.c b/fs/ecryptfs/mmap.c index 6df1debdccc..2b6fe1e6e8b 100644 --- a/fs/ecryptfs/mmap.c +++ b/fs/ecryptfs/mmap.c @@ -153,7 +153,7 @@ ecryptfs_copy_up_encrypted_with_header(struct page *page, flush_dcache_page(page); if (rc) { printk(KERN_ERR "%s: Error reading xattr " - "region; rc = [%d]\n", __FUNCTION__, rc); + "region; rc = [%d]\n", __func__, rc); goto out; } } else { @@ -169,7 +169,7 @@ ecryptfs_copy_up_encrypted_with_header(struct page *page, if (rc) { printk(KERN_ERR "%s: Error attempting to read " "extent at offset [%lld] in the lower " - "file; rc = [%d]\n", __FUNCTION__, + "file; rc = [%d]\n", __func__, lower_offset, rc); goto out; } @@ -212,7 +212,7 @@ static int ecryptfs_readpage(struct file *file, struct page *page) "the encrypted content from the lower " "file whilst inserting the metadata " "from the xattr into the header; rc = " - "[%d]\n", __FUNCTION__, rc); + "[%d]\n", __func__, rc); goto out; } @@ -293,7 +293,7 @@ static int ecryptfs_prepare_write(struct file *file, struct page *page, if (rc) { printk(KERN_ERR "%s: Error attemping to read " "lower page segment; rc = [%d]\n", - __FUNCTION__, rc); + __func__, rc); ClearPageUptodate(page); goto out; } else @@ -308,7 +308,7 @@ static int ecryptfs_prepare_write(struct file *file, struct page *page, "from the lower file whilst " "inserting the metadata from " "the xattr into the header; rc " - "= [%d]\n", __FUNCTION__, rc); + "= [%d]\n", __func__, rc); ClearPageUptodate(page); goto out; } @@ -320,7 +320,7 @@ static int ecryptfs_prepare_write(struct file *file, struct page *page, if (rc) { printk(KERN_ERR "%s: Error reading " "page; rc = [%d]\n", - __FUNCTION__, rc); + __func__, rc); ClearPageUptodate(page); goto out; } @@ -331,7 +331,7 @@ static int ecryptfs_prepare_write(struct file *file, struct page *page, if (rc) { printk(KERN_ERR "%s: Error decrypting page " "at index [%ld]; rc = [%d]\n", - __FUNCTION__, page->index, rc); + __func__, page->index, rc); ClearPageUptodate(page); goto out; } @@ -348,7 +348,7 @@ static int ecryptfs_prepare_write(struct file *file, struct page *page, if (rc) { printk(KERN_ERR "%s: Error on attempt to " "truncate to (higher) offset [%lld];" - " rc = [%d]\n", __FUNCTION__, + " rc = [%d]\n", __func__, prev_page_end_size, rc); goto out; } @@ -389,7 +389,7 @@ static int ecryptfs_write_inode_size_to_header(struct inode *ecryptfs_inode) kfree(file_size_virt); if (rc) printk(KERN_ERR "%s: Error writing file size to header; " - "rc = [%d]\n", __FUNCTION__, rc); + "rc = [%d]\n", __func__, rc); out: return rc; } diff --git a/fs/ecryptfs/netlink.c b/fs/ecryptfs/netlink.c index f638a698dc5..e0abad62b39 100644 --- a/fs/ecryptfs/netlink.c +++ b/fs/ecryptfs/netlink.c @@ -44,8 +44,8 @@ static struct sock *ecryptfs_nl_sock; * upon sending the message; non-zero upon error. */ int ecryptfs_send_netlink(char *data, int data_len, - struct ecryptfs_msg_ctx *msg_ctx, u16 msg_type, - u16 msg_flags, pid_t daemon_pid) + struct ecryptfs_msg_ctx *msg_ctx, u8 msg_type, + u16 msg_flags, struct pid *daemon_pid) { struct sk_buff *skb; struct nlmsghdr *nlh; @@ -60,7 +60,7 @@ int ecryptfs_send_netlink(char *data, int data_len, ecryptfs_printk(KERN_ERR, "Failed to allocate socket buffer\n"); goto out; } - nlh = NLMSG_PUT(skb, daemon_pid, msg_ctx ? msg_ctx->counter : 0, + nlh = NLMSG_PUT(skb, pid_nr(daemon_pid), msg_ctx ? msg_ctx->counter : 0, msg_type, payload_len); nlh->nlmsg_flags = msg_flags; if (msg_ctx && payload_len) { @@ -69,7 +69,7 @@ int ecryptfs_send_netlink(char *data, int data_len, msg->data_len = data_len; memcpy(msg->data, data, data_len); } - rc = netlink_unicast(ecryptfs_nl_sock, skb, daemon_pid, 0); + rc = netlink_unicast(ecryptfs_nl_sock, skb, pid_nr(daemon_pid), 0); if (rc < 0) { ecryptfs_printk(KERN_ERR, "Failed to send eCryptfs netlink " "message; rc = [%d]\n", rc); @@ -99,6 +99,7 @@ static int ecryptfs_process_nl_response(struct sk_buff *skb) { struct nlmsghdr *nlh = nlmsg_hdr(skb); struct ecryptfs_message *msg = NLMSG_DATA(nlh); + struct pid *pid; int rc; if (skb->len - NLMSG_HDRLEN - sizeof(*msg) != msg->data_len) { @@ -107,8 +108,10 @@ static int ecryptfs_process_nl_response(struct sk_buff *skb) "incorrectly specified data length\n"); goto out; } - rc = ecryptfs_process_response(msg, NETLINK_CREDS(skb)->uid, - NETLINK_CREDS(skb)->pid, nlh->nlmsg_seq); + pid = find_get_pid(NETLINK_CREDS(skb)->pid); + rc = ecryptfs_process_response(msg, NETLINK_CREDS(skb)->uid, NULL, + pid, nlh->nlmsg_seq); + put_pid(pid); if (rc) printk(KERN_ERR "Error processing response message; rc = [%d]\n", rc); @@ -126,11 +129,13 @@ out: */ static int ecryptfs_process_nl_helo(struct sk_buff *skb) { + struct pid *pid; int rc; + pid = find_get_pid(NETLINK_CREDS(skb)->pid); rc = ecryptfs_process_helo(ECRYPTFS_TRANSPORT_NETLINK, - NETLINK_CREDS(skb)->uid, - NETLINK_CREDS(skb)->pid); + NETLINK_CREDS(skb)->uid, NULL, pid); + put_pid(pid); if (rc) printk(KERN_WARNING "Error processing HELO; rc = [%d]\n", rc); return rc; @@ -147,10 +152,12 @@ static int ecryptfs_process_nl_helo(struct sk_buff *skb) */ static int ecryptfs_process_nl_quit(struct sk_buff *skb) { + struct pid *pid; int rc; - rc = ecryptfs_process_quit(NETLINK_CREDS(skb)->uid, - NETLINK_CREDS(skb)->pid); + pid = find_get_pid(NETLINK_CREDS(skb)->pid); + rc = ecryptfs_process_quit(NETLINK_CREDS(skb)->uid, NULL, pid); + put_pid(pid); if (rc) printk(KERN_WARNING "Error processing QUIT message; rc = [%d]\n", rc); @@ -176,20 +183,20 @@ static void ecryptfs_receive_nl_message(struct sk_buff *skb) goto free; } switch (nlh->nlmsg_type) { - case ECRYPTFS_NLMSG_RESPONSE: + case ECRYPTFS_MSG_RESPONSE: if (ecryptfs_process_nl_response(skb)) { ecryptfs_printk(KERN_WARNING, "Failed to " "deliver netlink response to " "requesting operation\n"); } break; - case ECRYPTFS_NLMSG_HELO: + case ECRYPTFS_MSG_HELO: if (ecryptfs_process_nl_helo(skb)) { ecryptfs_printk(KERN_WARNING, "Failed to " "fulfill HELO request\n"); } break; - case ECRYPTFS_NLMSG_QUIT: + case ECRYPTFS_MSG_QUIT: if (ecryptfs_process_nl_quit(skb)) { ecryptfs_printk(KERN_WARNING, "Failed to " "fulfill QUIT request\n"); diff --git a/fs/ecryptfs/read_write.c b/fs/ecryptfs/read_write.c index 0c4928623bb..ebf55150be5 100644 --- a/fs/ecryptfs/read_write.c +++ b/fs/ecryptfs/read_write.c @@ -55,7 +55,7 @@ int ecryptfs_write_lower(struct inode *ecryptfs_inode, char *data, set_fs(fs_save); if (octets_written < 0) { printk(KERN_ERR "%s: octets_written = [%td]; " - "expected [%td]\n", __FUNCTION__, octets_written, size); + "expected [%td]\n", __func__, octets_written, size); rc = -EINVAL; } mutex_unlock(&inode_info->lower_file_mutex); @@ -153,7 +153,7 @@ int ecryptfs_write(struct file *ecryptfs_file, char *data, loff_t offset, rc = PTR_ERR(ecryptfs_page); printk(KERN_ERR "%s: Error getting page at " "index [%ld] from eCryptfs inode " - "mapping; rc = [%d]\n", __FUNCTION__, + "mapping; rc = [%d]\n", __func__, ecryptfs_page_idx, rc); goto out; } @@ -165,7 +165,7 @@ int ecryptfs_write(struct file *ecryptfs_file, char *data, loff_t offset, if (rc) { printk(KERN_ERR "%s: Error decrypting " "page; rc = [%d]\n", - __FUNCTION__, rc); + __func__, rc); ClearPageUptodate(ecryptfs_page); page_cache_release(ecryptfs_page); goto out; @@ -202,7 +202,7 @@ int ecryptfs_write(struct file *ecryptfs_file, char *data, loff_t offset, page_cache_release(ecryptfs_page); if (rc) { printk(KERN_ERR "%s: Error encrypting " - "page; rc = [%d]\n", __FUNCTION__, rc); + "page; rc = [%d]\n", __func__, rc); goto out; } pos += num_bytes; @@ -254,7 +254,7 @@ int ecryptfs_read_lower(char *data, loff_t offset, size_t size, set_fs(fs_save); if (octets_read < 0) { printk(KERN_ERR "%s: octets_read = [%td]; " - "expected [%td]\n", __FUNCTION__, octets_read, size); + "expected [%td]\n", __func__, octets_read, size); rc = -EINVAL; } mutex_unlock(&inode_info->lower_file_mutex); @@ -327,7 +327,7 @@ int ecryptfs_read(char *data, loff_t offset, size_t size, printk(KERN_ERR "%s: Attempt to read data past the end of the " "file; offset = [%lld]; size = [%td]; " "ecryptfs_file_size = [%lld]\n", - __FUNCTION__, offset, size, ecryptfs_file_size); + __func__, offset, size, ecryptfs_file_size); goto out; } pos = offset; @@ -345,14 +345,14 @@ int ecryptfs_read(char *data, loff_t offset, size_t size, rc = PTR_ERR(ecryptfs_page); printk(KERN_ERR "%s: Error getting page at " "index [%ld] from eCryptfs inode " - "mapping; rc = [%d]\n", __FUNCTION__, + "mapping; rc = [%d]\n", __func__, ecryptfs_page_idx, rc); goto out; } rc = ecryptfs_decrypt_page(ecryptfs_page); if (rc) { printk(KERN_ERR "%s: Error decrypting " - "page; rc = [%d]\n", __FUNCTION__, rc); + "page; rc = [%d]\n", __func__, rc); ClearPageUptodate(ecryptfs_page); page_cache_release(ecryptfs_page); goto out; diff --git a/fs/eventfd.c b/fs/eventfd.c index a9f130cd50a..343942deeec 100644 --- a/fs/eventfd.c +++ b/fs/eventfd.c @@ -200,10 +200,8 @@ struct file *eventfd_fget(int fd) asmlinkage long sys_eventfd(unsigned int count) { - int error, fd; + int fd; struct eventfd_ctx *ctx; - struct file *file; - struct inode *inode; ctx = kmalloc(sizeof(*ctx), GFP_KERNEL); if (!ctx) @@ -216,12 +214,9 @@ asmlinkage long sys_eventfd(unsigned int count) * When we call this, the initialization must be complete, since * anon_inode_getfd() will install the fd. */ - error = anon_inode_getfd(&fd, &inode, &file, "[eventfd]", - &eventfd_fops, ctx); - if (!error) - return fd; - - kfree(ctx); - return error; + fd = anon_inode_getfd("[eventfd]", &eventfd_fops, ctx); + if (fd < 0) + kfree(ctx); + return fd; } diff --git a/fs/eventpoll.c b/fs/eventpoll.c index a415f42d32c..990c01d2d66 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -257,25 +257,6 @@ static inline int ep_cmp_ffd(struct epoll_filefd *p1, (p1->file < p2->file ? -1 : p1->fd - p2->fd)); } -/* Special initialization for the RB tree node to detect linkage */ -static inline void ep_rb_initnode(struct rb_node *n) -{ - rb_set_parent(n, n); -} - -/* Removes a node from the RB tree and marks it for a fast is-linked check */ -static inline void ep_rb_erase(struct rb_node *n, struct rb_root *r) -{ - rb_erase(n, r); - rb_set_parent(n, n); -} - -/* Fast check to verify that the item is linked to the main RB tree */ -static inline int ep_rb_linked(struct rb_node *n) -{ - return rb_parent(n) != n; -} - /* Tells us if the item is currently linked */ static inline int ep_is_linked(struct list_head *p) { @@ -283,13 +264,13 @@ static inline int ep_is_linked(struct list_head *p) } /* Get the "struct epitem" from a wait queue pointer */ -static inline struct epitem * ep_item_from_wait(wait_queue_t *p) +static inline struct epitem *ep_item_from_wait(wait_queue_t *p) { return container_of(p, struct eppoll_entry, wait)->base; } /* Get the "struct epitem" from an epoll queue wrapper */ -static inline struct epitem * ep_item_from_epqueue(poll_table *p) +static inline struct epitem *ep_item_from_epqueue(poll_table *p) { return container_of(p, struct ep_pqueue, pt)->epi; } @@ -411,8 +392,7 @@ static int ep_remove(struct eventpoll *ep, struct epitem *epi) list_del_init(&epi->fllink); spin_unlock(&file->f_ep_lock); - if (ep_rb_linked(&epi->rbn)) - ep_rb_erase(&epi->rbn, &ep->rbr); + rb_erase(&epi->rbn, &ep->rbr); spin_lock_irqsave(&ep->lock, flags); if (ep_is_linked(&epi->rdllink)) @@ -728,7 +708,6 @@ static int ep_insert(struct eventpoll *ep, struct epoll_event *event, goto error_return; /* Item initialization follow here ... */ - ep_rb_initnode(&epi->rbn); INIT_LIST_HEAD(&epi->rdllink); INIT_LIST_HEAD(&epi->fllink); INIT_LIST_HEAD(&epi->pwqlist); @@ -1071,8 +1050,6 @@ asmlinkage long sys_epoll_create(int size) { int error, fd = -1; struct eventpoll *ep; - struct inode *inode; - struct file *file; DNPRINTK(3, (KERN_INFO "[%p] eventpoll: sys_epoll_create(%d)\n", current, size)); @@ -1082,29 +1059,24 @@ asmlinkage long sys_epoll_create(int size) * structure ( "struct eventpoll" ). */ error = -EINVAL; - if (size <= 0 || (error = ep_alloc(&ep)) != 0) + if (size <= 0 || (error = ep_alloc(&ep)) < 0) { + fd = error; goto error_return; + } /* * Creates all the items needed to setup an eventpoll file. That is, - * a file structure, and inode and a free file descriptor. + * a file structure and a free file descriptor. */ - error = anon_inode_getfd(&fd, &inode, &file, "[eventpoll]", - &eventpoll_fops, ep); - if (error) - goto error_free; + fd = anon_inode_getfd("[eventpoll]", &eventpoll_fops, ep); + if (fd < 0) + ep_free(ep); +error_return: DNPRINTK(3, (KERN_INFO "[%p] eventpoll: sys_epoll_create(%d) = %d\n", current, size, fd)); return fd; - -error_free: - ep_free(ep); -error_return: - DNPRINTK(3, (KERN_INFO "[%p] eventpoll: sys_epoll_create(%d) = %d\n", - current, size, error)); - return error; } /* @@ -1262,7 +1234,7 @@ error_return: return error; } -#ifdef TIF_RESTORE_SIGMASK +#ifdef HAVE_SET_RESTORE_SIGMASK /* * Implement the event wait interface for the eventpoll file. It is the kernel @@ -1300,7 +1272,7 @@ asmlinkage long sys_epoll_pwait(int epfd, struct epoll_event __user *events, if (error == -EINTR) { memcpy(¤t->saved_sigmask, &sigsaved, sizeof(sigsaved)); - set_thread_flag(TIF_RESTORE_SIGMASK); + set_restore_sigmask(); } else sigprocmask(SIG_SETMASK, &sigsaved, NULL); } @@ -1308,7 +1280,7 @@ asmlinkage long sys_epoll_pwait(int epfd, struct epoll_event __user *events, return error; } -#endif /* #ifdef TIF_RESTORE_SIGMASK */ +#endif /* HAVE_SET_RESTORE_SIGMASK */ static int __init eventpoll_init(void) { @@ -1330,4 +1302,3 @@ static int __init eventpoll_init(void) return 0; } fs_initcall(eventpoll_init); - diff --git a/fs/exec.c b/fs/exec.c index b152029f18f..1f8a24aa1f8 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -24,6 +24,7 @@ #include <linux/slab.h> #include <linux/file.h> +#include <linux/fdtable.h> #include <linux/mman.h> #include <linux/a.out.h> #include <linux/stat.h> @@ -735,6 +736,7 @@ static int exec_mmap(struct mm_struct *mm) tsk->active_mm = mm; activate_mm(active_mm, mm); task_unlock(tsk); + mm_update_next_owner(old_mm); arch_pick_mmap_layout(mm); if (old_mm) { up_read(&old_mm->mmap_sem); @@ -765,9 +767,7 @@ static int de_thread(struct task_struct *tsk) /* * Kill all other threads in the thread group. - * We must hold tasklist_lock to call zap_other_threads. */ - read_lock(&tasklist_lock); spin_lock_irq(lock); if (signal_group_exit(sig)) { /* @@ -775,21 +775,10 @@ static int de_thread(struct task_struct *tsk) * return so that the signal is processed. */ spin_unlock_irq(lock); - read_unlock(&tasklist_lock); return -EAGAIN; } - - /* - * child_reaper ignores SIGKILL, change it now. - * Reparenting needs write_lock on tasklist_lock, - * so it is safe to do it under read_lock. - */ - if (unlikely(tsk->group_leader == task_child_reaper(tsk))) - task_active_pid_ns(tsk)->child_reaper = tsk; - sig->group_exit_task = tsk; zap_other_threads(tsk); - read_unlock(&tasklist_lock); /* Account for the thread group leader hanging around: */ count = thread_group_leader(tsk) ? 1 : 2; @@ -810,7 +799,7 @@ static int de_thread(struct task_struct *tsk) if (!thread_group_leader(tsk)) { leader = tsk->group_leader; - sig->notify_count = -1; + sig->notify_count = -1; /* for exit_notify() */ for (;;) { write_lock_irq(&tasklist_lock); if (likely(leader->exit_state)) @@ -820,6 +809,8 @@ static int de_thread(struct task_struct *tsk) schedule(); } + if (unlikely(task_child_reaper(tsk) == leader)) + task_active_pid_ns(tsk)->child_reaper = tsk; /* * The only record we have of the real-time age of a * process, regardless of execs it's done, is start_time. @@ -963,6 +954,8 @@ int flush_old_exec(struct linux_binprm * bprm) if (retval) goto out; + set_mm_exe_file(bprm->mm, bprm->file); + /* * Release all of the old mmap stuff */ @@ -1268,7 +1261,6 @@ int do_execve(char * filename, { struct linux_binprm *bprm; struct file *file; - unsigned long env_p; struct files_struct *displaced; int retval; @@ -1321,11 +1313,9 @@ int do_execve(char * filename, if (retval < 0) goto out; - env_p = bprm->p; retval = copy_strings(bprm->argc, argv, bprm); if (retval < 0) goto out; - bprm->argv_len = env_p - bprm->p; retval = search_binary_handler(bprm,regs); if (retval >= 0) { diff --git a/fs/exportfs/expfs.c b/fs/exportfs/expfs.c index 109ab5e44ec..cc91227d3bb 100644 --- a/fs/exportfs/expfs.c +++ b/fs/exportfs/expfs.c @@ -150,12 +150,12 @@ reconnect_path(struct vfsmount *mnt, struct dentry *target_dir) if (IS_ERR(ppd)) { err = PTR_ERR(ppd); dprintk("%s: get_parent of %ld failed, err %d\n", - __FUNCTION__, pd->d_inode->i_ino, err); + __func__, pd->d_inode->i_ino, err); dput(pd); break; } - dprintk("%s: find name of %lu in %lu\n", __FUNCTION__, + dprintk("%s: find name of %lu in %lu\n", __func__, pd->d_inode->i_ino, ppd->d_inode->i_ino); err = exportfs_get_name(mnt, ppd, nbuf, pd); if (err) { @@ -168,14 +168,14 @@ reconnect_path(struct vfsmount *mnt, struct dentry *target_dir) continue; break; } - dprintk("%s: found name: %s\n", __FUNCTION__, nbuf); + dprintk("%s: found name: %s\n", __func__, nbuf); mutex_lock(&ppd->d_inode->i_mutex); npd = lookup_one_len(nbuf, ppd, strlen(nbuf)); mutex_unlock(&ppd->d_inode->i_mutex); if (IS_ERR(npd)) { err = PTR_ERR(npd); dprintk("%s: lookup failed: %d\n", - __FUNCTION__, err); + __func__, err); dput(ppd); dput(pd); break; @@ -188,7 +188,7 @@ reconnect_path(struct vfsmount *mnt, struct dentry *target_dir) if (npd == pd) noprogress = 0; else - printk("%s: npd != pd\n", __FUNCTION__); + printk("%s: npd != pd\n", __func__); dput(npd); dput(ppd); if (IS_ROOT(pd)) { diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c index cc47b76091b..6ae4ecf3ce4 100644 --- a/fs/ext3/inode.c +++ b/fs/ext3/inode.c @@ -1261,10 +1261,11 @@ static int ext3_ordered_write_end(struct file *file, new_i_size = pos + copied; if (new_i_size > EXT3_I(inode)->i_disksize) EXT3_I(inode)->i_disksize = new_i_size; - copied = ext3_generic_write_end(file, mapping, pos, len, copied, + ret2 = ext3_generic_write_end(file, mapping, pos, len, copied, page, fsdata); - if (copied < 0) - ret = copied; + copied = ret2; + if (ret2 < 0) + ret = ret2; } ret2 = ext3_journal_stop(handle); if (!ret) @@ -1289,10 +1290,11 @@ static int ext3_writeback_write_end(struct file *file, if (new_i_size > EXT3_I(inode)->i_disksize) EXT3_I(inode)->i_disksize = new_i_size; - copied = ext3_generic_write_end(file, mapping, pos, len, copied, + ret2 = ext3_generic_write_end(file, mapping, pos, len, copied, page, fsdata); - if (copied < 0) - ret = copied; + copied = ret2; + if (ret2 < 0) + ret = ret2; ret2 = ext3_journal_stop(handle); if (!ret) diff --git a/fs/ext3/xattr.c b/fs/ext3/xattr.c index d4a4f0e9ff6..175414ac221 100644 --- a/fs/ext3/xattr.c +++ b/fs/ext3/xattr.c @@ -1000,6 +1000,11 @@ ext3_xattr_set_handle(handle_t *handle, struct inode *inode, int name_index, i.value = NULL; error = ext3_xattr_block_set(handle, inode, &i, &bs); } else if (error == -ENOSPC) { + if (EXT3_I(inode)->i_file_acl && !bs.s.base) { + error = ext3_xattr_block_find(inode, &i, &bs); + if (error) + goto cleanup; + } error = ext3_xattr_block_set(handle, inode, &i, &bs); if (error) goto cleanup; diff --git a/fs/ext4/acl.c b/fs/ext4/acl.c index a8bae8cd1d5..3c8dab880d9 100644 --- a/fs/ext4/acl.c +++ b/fs/ext4/acl.c @@ -9,8 +9,8 @@ #include <linux/slab.h> #include <linux/capability.h> #include <linux/fs.h> -#include <linux/ext4_jbd2.h> -#include <linux/ext4_fs.h> +#include "ext4_jbd2.h" +#include "ext4.h" #include "xattr.h" #include "acl.h" @@ -37,7 +37,7 @@ ext4_acl_from_disk(const void *value, size_t size) return ERR_PTR(-EINVAL); if (count == 0) return NULL; - acl = posix_acl_alloc(count, GFP_KERNEL); + acl = posix_acl_alloc(count, GFP_NOFS); if (!acl) return ERR_PTR(-ENOMEM); for (n=0; n < count; n++) { @@ -91,7 +91,7 @@ ext4_acl_to_disk(const struct posix_acl *acl, size_t *size) *size = ext4_acl_size(acl->a_count); ext_acl = kmalloc(sizeof(ext4_acl_header) + acl->a_count * - sizeof(ext4_acl_entry), GFP_KERNEL); + sizeof(ext4_acl_entry), GFP_NOFS); if (!ext_acl) return ERR_PTR(-ENOMEM); ext_acl->a_version = cpu_to_le32(EXT4_ACL_VERSION); @@ -187,7 +187,7 @@ ext4_get_acl(struct inode *inode, int type) } retval = ext4_xattr_get(inode, name_index, "", NULL, 0); if (retval > 0) { - value = kmalloc(retval, GFP_KERNEL); + value = kmalloc(retval, GFP_NOFS); if (!value) return ERR_PTR(-ENOMEM); retval = ext4_xattr_get(inode, name_index, "", value, retval); @@ -335,7 +335,7 @@ ext4_init_acl(handle_t *handle, struct inode *inode, struct inode *dir) if (error) goto cleanup; } - clone = posix_acl_clone(acl, GFP_KERNEL); + clone = posix_acl_clone(acl, GFP_NOFS); error = -ENOMEM; if (!clone) goto cleanup; diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c index 0737e05ba3d..30494c5da84 100644 --- a/fs/ext4/balloc.c +++ b/fs/ext4/balloc.c @@ -15,12 +15,12 @@ #include <linux/capability.h> #include <linux/fs.h> #include <linux/jbd2.h> -#include <linux/ext4_fs.h> -#include <linux/ext4_jbd2.h> #include <linux/quotaops.h> #include <linux/buffer_head.h> - +#include "ext4.h" +#include "ext4_jbd2.h" #include "group.h" + /* * balloc.c contains the blocks allocation and deallocation routines */ @@ -48,7 +48,6 @@ void ext4_get_group_no_and_offset(struct super_block *sb, ext4_fsblk_t blocknr, unsigned ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh, ext4_group_t block_group, struct ext4_group_desc *gdp) { - unsigned long start; int bit, bit_max; unsigned free_blocks, group_blocks; struct ext4_sb_info *sbi = EXT4_SB(sb); @@ -59,7 +58,7 @@ unsigned ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh, /* If checksum is bad mark all blocks used to prevent allocation * essentially implementing a per-group read-only flag. */ if (!ext4_group_desc_csum_verify(sbi, block_group, gdp)) { - ext4_error(sb, __FUNCTION__, + ext4_error(sb, __func__, "Checksum bad for group %lu\n", block_group); gdp->bg_free_blocks_count = 0; gdp->bg_free_inodes_count = 0; @@ -106,11 +105,12 @@ unsigned ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh, free_blocks = group_blocks - bit_max; if (bh) { + ext4_fsblk_t start; + for (bit = 0; bit < bit_max; bit++) ext4_set_bit(bit, bh->b_data); - start = block_group * EXT4_BLOCKS_PER_GROUP(sb) + - le32_to_cpu(sbi->s_es->s_first_data_block); + start = ext4_group_first_block_no(sb, block_group); /* Set bits for block and inode bitmaps, and inode table */ ext4_set_bit(ext4_block_bitmap(sb, gdp) - start, bh->b_data); @@ -235,7 +235,7 @@ static int ext4_valid_block_bitmap(struct super_block *sb, return 1; err_out: - ext4_error(sb, __FUNCTION__, + ext4_error(sb, __func__, "Invalid block bitmap - " "block_group = %d, block = %llu", block_group, bitmap_blk); @@ -264,7 +264,7 @@ read_block_bitmap(struct super_block *sb, ext4_group_t block_group) bitmap_blk = ext4_block_bitmap(sb, desc); bh = sb_getblk(sb, bitmap_blk); if (unlikely(!bh)) { - ext4_error(sb, __FUNCTION__, + ext4_error(sb, __func__, "Cannot read block bitmap - " "block_group = %d, block_bitmap = %llu", (int)block_group, (unsigned long long)bitmap_blk); @@ -281,17 +281,17 @@ read_block_bitmap(struct super_block *sb, ext4_group_t block_group) } if (bh_submit_read(bh) < 0) { put_bh(bh); - ext4_error(sb, __FUNCTION__, + ext4_error(sb, __func__, "Cannot read block bitmap - " "block_group = %d, block_bitmap = %llu", (int)block_group, (unsigned long long)bitmap_blk); return NULL; } - if (!ext4_valid_block_bitmap(sb, desc, block_group, bh)) { - put_bh(bh); - return NULL; - } - + ext4_valid_block_bitmap(sb, desc, block_group, bh); + /* + * file system mounted not to panic on error, + * continue with corrupt bitmap + */ return bh; } /* @@ -360,7 +360,7 @@ restart: BUG(); } #define rsv_window_dump(root, verbose) \ - __rsv_window_dump((root), (verbose), __FUNCTION__) + __rsv_window_dump((root), (verbose), __func__) #else #define rsv_window_dump(root, verbose) do {} while (0) #endif @@ -740,7 +740,7 @@ do_more: if (!ext4_clear_bit_atomic(sb_bgl_lock(sbi, block_group), bit + i, bitmap_bh->b_data)) { jbd_unlock_bh_state(bitmap_bh); - ext4_error(sb, __FUNCTION__, + ext4_error(sb, __func__, "bit already cleared for block %llu", (ext4_fsblk_t)(block + i)); jbd_lock_bh_state(bitmap_bh); @@ -752,9 +752,7 @@ do_more: jbd_unlock_bh_state(bitmap_bh); spin_lock(sb_bgl_lock(sbi, block_group)); - desc->bg_free_blocks_count = - cpu_to_le16(le16_to_cpu(desc->bg_free_blocks_count) + - group_freed); + le16_add_cpu(&desc->bg_free_blocks_count, group_freed); desc->bg_checksum = ext4_group_desc_csum(sbi, block_group, desc); spin_unlock(sb_bgl_lock(sbi, block_group)); percpu_counter_add(&sbi->s_freeblocks_counter, count); @@ -1772,7 +1770,12 @@ allocated: "Allocating block in system zone - " "blocks from %llu, length %lu", ret_block, num); - goto out; + /* + * claim_block marked the blocks we allocated + * as in use. So we may want to selectively + * mark some of the blocks as free + */ + goto retry_alloc; } performed_allocation = 1; @@ -1798,7 +1801,7 @@ allocated: if (ext4_test_bit(grp_alloc_blk+i, bh2jh(bitmap_bh)->b_committed_data)) { printk("%s: block was unexpectedly set in " - "b_committed_data\n", __FUNCTION__); + "b_committed_data\n", __func__); } } } @@ -1823,8 +1826,7 @@ allocated: spin_lock(sb_bgl_lock(sbi, group_no)); if (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) gdp->bg_flags &= cpu_to_le16(~EXT4_BG_BLOCK_UNINIT); - gdp->bg_free_blocks_count = - cpu_to_le16(le16_to_cpu(gdp->bg_free_blocks_count)-num); + le16_add_cpu(&gdp->bg_free_blocks_count, -num); gdp->bg_checksum = ext4_group_desc_csum(sbi, group_no, gdp); spin_unlock(sb_bgl_lock(sbi, group_no)); percpu_counter_sub(&sbi->s_freeblocks_counter, num); diff --git a/fs/ext4/bitmap.c b/fs/ext4/bitmap.c index 420554f8f79..d37ea675045 100644 --- a/fs/ext4/bitmap.c +++ b/fs/ext4/bitmap.c @@ -9,7 +9,7 @@ #include <linux/buffer_head.h> #include <linux/jbd2.h> -#include <linux/ext4_fs.h> +#include "ext4.h" #ifdef EXT4FS_DEBUG diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c index 2c23bade9aa..2bf0331ea19 100644 --- a/fs/ext4/dir.c +++ b/fs/ext4/dir.c @@ -23,10 +23,10 @@ #include <linux/fs.h> #include <linux/jbd2.h> -#include <linux/ext4_fs.h> #include <linux/buffer_head.h> #include <linux/slab.h> #include <linux/rbtree.h> +#include "ext4.h" static unsigned char ext4_filetype_table[] = { DT_UNKNOWN, DT_REG, DT_DIR, DT_CHR, DT_BLK, DT_FIFO, DT_SOCK, DT_LNK @@ -42,7 +42,7 @@ const struct file_operations ext4_dir_operations = { .llseek = generic_file_llseek, .read = generic_read_dir, .readdir = ext4_readdir, /* we take BKL. needed?*/ - .ioctl = ext4_ioctl, /* BKL held */ + .unlocked_ioctl = ext4_ioctl, #ifdef CONFIG_COMPAT .compat_ioctl = ext4_compat_ioctl, #endif diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h new file mode 100644 index 00000000000..8158083f7ac --- /dev/null +++ b/fs/ext4/ext4.h @@ -0,0 +1,1205 @@ +/* + * ext4.h + * + * Copyright (C) 1992, 1993, 1994, 1995 + * Remy Card (card@masi.ibp.fr) + * Laboratoire MASI - Institut Blaise Pascal + * Universite Pierre et Marie Curie (Paris VI) + * + * from + * + * linux/include/linux/minix_fs.h + * + * Copyright (C) 1991, 1992 Linus Torvalds + */ + +#ifndef _EXT4_H +#define _EXT4_H + +#include <linux/types.h> +#include <linux/blkdev.h> +#include <linux/magic.h> +#include "ext4_i.h" + +/* + * The second extended filesystem constants/structures + */ + +/* + * Define EXT4FS_DEBUG to produce debug messages + */ +#undef EXT4FS_DEBUG + +/* + * Define EXT4_RESERVATION to reserve data blocks for expanding files + */ +#define EXT4_DEFAULT_RESERVE_BLOCKS 8 +/*max window size: 1024(direct blocks) + 3([t,d]indirect blocks) */ +#define EXT4_MAX_RESERVE_BLOCKS 1027 +#define EXT4_RESERVE_WINDOW_NOT_ALLOCATED 0 + +/* + * Debug code + */ +#ifdef EXT4FS_DEBUG +#define ext4_debug(f, a...) \ + do { \ + printk (KERN_DEBUG "EXT4-fs DEBUG (%s, %d): %s:", \ + __FILE__, __LINE__, __FUNCTION__); \ + printk (KERN_DEBUG f, ## a); \ + } while (0) +#else +#define ext4_debug(f, a...) do {} while (0) +#endif + +#define EXT4_MULTIBLOCK_ALLOCATOR 1 + +/* prefer goal again. length */ +#define EXT4_MB_HINT_MERGE 1 +/* blocks already reserved */ +#define EXT4_MB_HINT_RESERVED 2 +/* metadata is being allocated */ +#define EXT4_MB_HINT_METADATA 4 +/* first blocks in the file */ +#define EXT4_MB_HINT_FIRST 8 +/* search for the best chunk */ +#define EXT4_MB_HINT_BEST 16 +/* data is being allocated */ +#define EXT4_MB_HINT_DATA 32 +/* don't preallocate (for tails) */ +#define EXT4_MB_HINT_NOPREALLOC 64 +/* allocate for locality group */ +#define EXT4_MB_HINT_GROUP_ALLOC 128 +/* allocate goal blocks or none */ +#define EXT4_MB_HINT_GOAL_ONLY 256 +/* goal is meaningful */ +#define EXT4_MB_HINT_TRY_GOAL 512 + +struct ext4_allocation_request { + /* target inode for block we're allocating */ + struct inode *inode; + /* logical block in target inode */ + ext4_lblk_t logical; + /* phys. target (a hint) */ + ext4_fsblk_t goal; + /* the closest logical allocated block to the left */ + ext4_lblk_t lleft; + /* phys. block for ^^^ */ + ext4_fsblk_t pleft; + /* the closest logical allocated block to the right */ + ext4_lblk_t lright; + /* phys. block for ^^^ */ + ext4_fsblk_t pright; + /* how many blocks we want to allocate */ + unsigned long len; + /* flags. see above EXT4_MB_HINT_* */ + unsigned long flags; +}; + +/* + * Special inodes numbers + */ +#define EXT4_BAD_INO 1 /* Bad blocks inode */ +#define EXT4_ROOT_INO 2 /* Root inode */ +#define EXT4_BOOT_LOADER_INO 5 /* Boot loader inode */ +#define EXT4_UNDEL_DIR_INO 6 /* Undelete directory inode */ +#define EXT4_RESIZE_INO 7 /* Reserved group descriptors inode */ +#define EXT4_JOURNAL_INO 8 /* Journal inode */ + +/* First non-reserved inode for old ext4 filesystems */ +#define EXT4_GOOD_OLD_FIRST_INO 11 + +/* + * Maximal count of links to a file + */ +#define EXT4_LINK_MAX 65000 + +/* + * Macro-instructions used to manage several block sizes + */ +#define EXT4_MIN_BLOCK_SIZE 1024 +#define EXT4_MAX_BLOCK_SIZE 65536 +#define EXT4_MIN_BLOCK_LOG_SIZE 10 +#ifdef __KERNEL__ +# define EXT4_BLOCK_SIZE(s) ((s)->s_blocksize) +#else +# define EXT4_BLOCK_SIZE(s) (EXT4_MIN_BLOCK_SIZE << (s)->s_log_block_size) +#endif +#define EXT4_ADDR_PER_BLOCK(s) (EXT4_BLOCK_SIZE(s) / sizeof (__u32)) +#ifdef __KERNEL__ +# define EXT4_BLOCK_SIZE_BITS(s) ((s)->s_blocksize_bits) +#else +# define EXT4_BLOCK_SIZE_BITS(s) ((s)->s_log_block_size + 10) +#endif +#ifdef __KERNEL__ +#define EXT4_ADDR_PER_BLOCK_BITS(s) (EXT4_SB(s)->s_addr_per_block_bits) +#define EXT4_INODE_SIZE(s) (EXT4_SB(s)->s_inode_size) +#define EXT4_FIRST_INO(s) (EXT4_SB(s)->s_first_ino) +#else +#define EXT4_INODE_SIZE(s) (((s)->s_rev_level == EXT4_GOOD_OLD_REV) ? \ + EXT4_GOOD_OLD_INODE_SIZE : \ + (s)->s_inode_size) +#define EXT4_FIRST_INO(s) (((s)->s_rev_level == EXT4_GOOD_OLD_REV) ? \ + EXT4_GOOD_OLD_FIRST_INO : \ + (s)->s_first_ino) +#endif +#define EXT4_BLOCK_ALIGN(size, blkbits) ALIGN((size), (1 << (blkbits))) + +/* + * Structure of a blocks group descriptor + */ +struct ext4_group_desc +{ + __le32 bg_block_bitmap_lo; /* Blocks bitmap block */ + __le32 bg_inode_bitmap_lo; /* Inodes bitmap block */ + __le32 bg_inode_table_lo; /* Inodes table block */ + __le16 bg_free_blocks_count; /* Free blocks count */ + __le16 bg_free_inodes_count; /* Free inodes count */ + __le16 bg_used_dirs_count; /* Directories count */ + __le16 bg_flags; /* EXT4_BG_flags (INODE_UNINIT, etc) */ + __u32 bg_reserved[2]; /* Likely block/inode bitmap checksum */ + __le16 bg_itable_unused; /* Unused inodes count */ + __le16 bg_checksum; /* crc16(sb_uuid+group+desc) */ + __le32 bg_block_bitmap_hi; /* Blocks bitmap block MSB */ + __le32 bg_inode_bitmap_hi; /* Inodes bitmap block MSB */ + __le32 bg_inode_table_hi; /* Inodes table block MSB */ + __le16 bg_free_blocks_count_hi;/* Free blocks count MSB */ + __le16 bg_free_inodes_count_hi;/* Free inodes count MSB */ + __le16 bg_used_dirs_count_hi; /* Directories count MSB */ + __le16 bg_itable_unused_hi; /* Unused inodes count MSB */ + __u32 bg_reserved2[3]; +}; + +#define EXT4_BG_INODE_UNINIT 0x0001 /* Inode table/bitmap not in use */ +#define EXT4_BG_BLOCK_UNINIT 0x0002 /* Block bitmap not in use */ +#define EXT4_BG_INODE_ZEROED 0x0004 /* On-disk itable initialized to zero */ + +#ifdef __KERNEL__ +#include "ext4_sb.h" +#endif +/* + * Macro-instructions used to manage group descriptors + */ +#define EXT4_MIN_DESC_SIZE 32 +#define EXT4_MIN_DESC_SIZE_64BIT 64 +#define EXT4_MAX_DESC_SIZE EXT4_MIN_BLOCK_SIZE +#define EXT4_DESC_SIZE(s) (EXT4_SB(s)->s_desc_size) +#ifdef __KERNEL__ +# define EXT4_BLOCKS_PER_GROUP(s) (EXT4_SB(s)->s_blocks_per_group) +# define EXT4_DESC_PER_BLOCK(s) (EXT4_SB(s)->s_desc_per_block) +# define EXT4_INODES_PER_GROUP(s) (EXT4_SB(s)->s_inodes_per_group) +# define EXT4_DESC_PER_BLOCK_BITS(s) (EXT4_SB(s)->s_desc_per_block_bits) +#else +# define EXT4_BLOCKS_PER_GROUP(s) ((s)->s_blocks_per_group) +# define EXT4_DESC_PER_BLOCK(s) (EXT4_BLOCK_SIZE(s) / EXT4_DESC_SIZE(s)) +# define EXT4_INODES_PER_GROUP(s) ((s)->s_inodes_per_group) +#endif + +/* + * Constants relative to the data blocks + */ +#define EXT4_NDIR_BLOCKS 12 +#define EXT4_IND_BLOCK EXT4_NDIR_BLOCKS +#define EXT4_DIND_BLOCK (EXT4_IND_BLOCK + 1) +#define EXT4_TIND_BLOCK (EXT4_DIND_BLOCK + 1) +#define EXT4_N_BLOCKS (EXT4_TIND_BLOCK + 1) + +/* + * Inode flags + */ +#define EXT4_SECRM_FL 0x00000001 /* Secure deletion */ +#define EXT4_UNRM_FL 0x00000002 /* Undelete */ +#define EXT4_COMPR_FL 0x00000004 /* Compress file */ +#define EXT4_SYNC_FL 0x00000008 /* Synchronous updates */ +#define EXT4_IMMUTABLE_FL 0x00000010 /* Immutable file */ +#define EXT4_APPEND_FL 0x00000020 /* writes to file may only append */ +#define EXT4_NODUMP_FL 0x00000040 /* do not dump file */ +#define EXT4_NOATIME_FL 0x00000080 /* do not update atime */ +/* Reserved for compression usage... */ +#define EXT4_DIRTY_FL 0x00000100 +#define EXT4_COMPRBLK_FL 0x00000200 /* One or more compressed clusters */ +#define EXT4_NOCOMPR_FL 0x00000400 /* Don't compress */ +#define EXT4_ECOMPR_FL 0x00000800 /* Compression error */ +/* End compression flags --- maybe not all used */ +#define EXT4_INDEX_FL 0x00001000 /* hash-indexed directory */ +#define EXT4_IMAGIC_FL 0x00002000 /* AFS directory */ +#define EXT4_JOURNAL_DATA_FL 0x00004000 /* file data should be journaled */ +#define EXT4_NOTAIL_FL 0x00008000 /* file tail should not be merged */ +#define EXT4_DIRSYNC_FL 0x00010000 /* dirsync behaviour (directories only) */ +#define EXT4_TOPDIR_FL 0x00020000 /* Top of directory hierarchies*/ +#define EXT4_HUGE_FILE_FL 0x00040000 /* Set to each huge file */ +#define EXT4_EXTENTS_FL 0x00080000 /* Inode uses extents */ +#define EXT4_EXT_MIGRATE 0x00100000 /* Inode is migrating */ +#define EXT4_RESERVED_FL 0x80000000 /* reserved for ext4 lib */ + +#define EXT4_FL_USER_VISIBLE 0x000BDFFF /* User visible flags */ +#define EXT4_FL_USER_MODIFIABLE 0x000380FF /* User modifiable flags */ + +/* + * Inode dynamic state flags + */ +#define EXT4_STATE_JDATA 0x00000001 /* journaled data exists */ +#define EXT4_STATE_NEW 0x00000002 /* inode is newly created */ +#define EXT4_STATE_XATTR 0x00000004 /* has in-inode xattrs */ +#define EXT4_STATE_NO_EXPAND 0x00000008 /* No space for expansion */ + +/* Used to pass group descriptor data when online resize is done */ +struct ext4_new_group_input { + __u32 group; /* Group number for this data */ + __u64 block_bitmap; /* Absolute block number of block bitmap */ + __u64 inode_bitmap; /* Absolute block number of inode bitmap */ + __u64 inode_table; /* Absolute block number of inode table start */ + __u32 blocks_count; /* Total number of blocks in this group */ + __u16 reserved_blocks; /* Number of reserved blocks in this group */ + __u16 unused; +}; + +/* The struct ext4_new_group_input in kernel space, with free_blocks_count */ +struct ext4_new_group_data { + __u32 group; + __u64 block_bitmap; + __u64 inode_bitmap; + __u64 inode_table; + __u32 blocks_count; + __u16 reserved_blocks; + __u16 unused; + __u32 free_blocks_count; +}; + +/* + * Following is used by preallocation code to tell get_blocks() that we + * want uninitialzed extents. + */ +#define EXT4_CREATE_UNINITIALIZED_EXT 2 + +/* + * ioctl commands + */ +#define EXT4_IOC_GETFLAGS FS_IOC_GETFLAGS +#define EXT4_IOC_SETFLAGS FS_IOC_SETFLAGS +#define EXT4_IOC_GETVERSION _IOR('f', 3, long) +#define EXT4_IOC_SETVERSION _IOW('f', 4, long) +#define EXT4_IOC_GROUP_EXTEND _IOW('f', 7, unsigned long) +#define EXT4_IOC_GROUP_ADD _IOW('f', 8,struct ext4_new_group_input) +#define EXT4_IOC_GETVERSION_OLD FS_IOC_GETVERSION +#define EXT4_IOC_SETVERSION_OLD FS_IOC_SETVERSION +#ifdef CONFIG_JBD2_DEBUG +#define EXT4_IOC_WAIT_FOR_READONLY _IOR('f', 99, long) +#endif +#define EXT4_IOC_GETRSVSZ _IOR('f', 5, long) +#define EXT4_IOC_SETRSVSZ _IOW('f', 6, long) +#define EXT4_IOC_MIGRATE _IO('f', 7) + +/* + * ioctl commands in 32 bit emulation + */ +#define EXT4_IOC32_GETFLAGS FS_IOC32_GETFLAGS +#define EXT4_IOC32_SETFLAGS FS_IOC32_SETFLAGS +#define EXT4_IOC32_GETVERSION _IOR('f', 3, int) +#define EXT4_IOC32_SETVERSION _IOW('f', 4, int) +#define EXT4_IOC32_GETRSVSZ _IOR('f', 5, int) +#define EXT4_IOC32_SETRSVSZ _IOW('f', 6, int) +#define EXT4_IOC32_GROUP_EXTEND _IOW('f', 7, unsigned int) +#ifdef CONFIG_JBD2_DEBUG +#define EXT4_IOC32_WAIT_FOR_READONLY _IOR('f', 99, int) +#endif +#define EXT4_IOC32_GETVERSION_OLD FS_IOC32_GETVERSION +#define EXT4_IOC32_SETVERSION_OLD FS_IOC32_SETVERSION + + +/* + * Mount options + */ +struct ext4_mount_options { + unsigned long s_mount_opt; + uid_t s_resuid; + gid_t s_resgid; + unsigned long s_commit_interval; +#ifdef CONFIG_QUOTA + int s_jquota_fmt; + char *s_qf_names[MAXQUOTAS]; +#endif +}; + +/* + * Structure of an inode on the disk + */ +struct ext4_inode { + __le16 i_mode; /* File mode */ + __le16 i_uid; /* Low 16 bits of Owner Uid */ + __le32 i_size_lo; /* Size in bytes */ + __le32 i_atime; /* Access time */ + __le32 i_ctime; /* Inode Change time */ + __le32 i_mtime; /* Modification time */ + __le32 i_dtime; /* Deletion Time */ + __le16 i_gid; /* Low 16 bits of Group Id */ + __le16 i_links_count; /* Links count */ + __le32 i_blocks_lo; /* Blocks count */ + __le32 i_flags; /* File flags */ + union { + struct { + __le32 l_i_version; + } linux1; + struct { + __u32 h_i_translator; + } hurd1; + struct { + __u32 m_i_reserved1; + } masix1; + } osd1; /* OS dependent 1 */ + __le32 i_block[EXT4_N_BLOCKS];/* Pointers to blocks */ + __le32 i_generation; /* File version (for NFS) */ + __le32 i_file_acl_lo; /* File ACL */ + __le32 i_size_high; + __le32 i_obso_faddr; /* Obsoleted fragment address */ + union { + struct { + __le16 l_i_blocks_high; /* were l_i_reserved1 */ + __le16 l_i_file_acl_high; + __le16 l_i_uid_high; /* these 2 fields */ + __le16 l_i_gid_high; /* were reserved2[0] */ + __u32 l_i_reserved2; + } linux2; + struct { + __le16 h_i_reserved1; /* Obsoleted fragment number/size which are removed in ext4 */ + __u16 h_i_mode_high; + __u16 h_i_uid_high; + __u16 h_i_gid_high; + __u32 h_i_author; + } hurd2; + struct { + __le16 h_i_reserved1; /* Obsoleted fragment number/size which are removed in ext4 */ + __le16 m_i_file_acl_high; + __u32 m_i_reserved2[2]; + } masix2; + } osd2; /* OS dependent 2 */ + __le16 i_extra_isize; + __le16 i_pad1; + __le32 i_ctime_extra; /* extra Change time (nsec << 2 | epoch) */ + __le32 i_mtime_extra; /* extra Modification time(nsec << 2 | epoch) */ + __le32 i_atime_extra; /* extra Access time (nsec << 2 | epoch) */ + __le32 i_crtime; /* File Creation time */ + __le32 i_crtime_extra; /* extra FileCreationtime (nsec << 2 | epoch) */ + __le32 i_version_hi; /* high 32 bits for 64-bit version */ +}; + + +#define EXT4_EPOCH_BITS 2 +#define EXT4_EPOCH_MASK ((1 << EXT4_EPOCH_BITS) - 1) +#define EXT4_NSEC_MASK (~0UL << EXT4_EPOCH_BITS) + +/* + * Extended fields will fit into an inode if the filesystem was formatted + * with large inodes (-I 256 or larger) and there are not currently any EAs + * consuming all of the available space. For new inodes we always reserve + * enough space for the kernel's known extended fields, but for inodes + * created with an old kernel this might not have been the case. None of + * the extended inode fields is critical for correct filesystem operation. + * This macro checks if a certain field fits in the inode. Note that + * inode-size = GOOD_OLD_INODE_SIZE + i_extra_isize + */ +#define EXT4_FITS_IN_INODE(ext4_inode, einode, field) \ + ((offsetof(typeof(*ext4_inode), field) + \ + sizeof((ext4_inode)->field)) \ + <= (EXT4_GOOD_OLD_INODE_SIZE + \ + (einode)->i_extra_isize)) \ + +static inline __le32 ext4_encode_extra_time(struct timespec *time) +{ + return cpu_to_le32((sizeof(time->tv_sec) > 4 ? + time->tv_sec >> 32 : 0) | + ((time->tv_nsec << 2) & EXT4_NSEC_MASK)); +} + +static inline void ext4_decode_extra_time(struct timespec *time, __le32 extra) +{ + if (sizeof(time->tv_sec) > 4) + time->tv_sec |= (__u64)(le32_to_cpu(extra) & EXT4_EPOCH_MASK) + << 32; + time->tv_nsec = (le32_to_cpu(extra) & EXT4_NSEC_MASK) >> 2; +} + +#define EXT4_INODE_SET_XTIME(xtime, inode, raw_inode) \ +do { \ + (raw_inode)->xtime = cpu_to_le32((inode)->xtime.tv_sec); \ + if (EXT4_FITS_IN_INODE(raw_inode, EXT4_I(inode), xtime ## _extra)) \ + (raw_inode)->xtime ## _extra = \ + ext4_encode_extra_time(&(inode)->xtime); \ +} while (0) + +#define EXT4_EINODE_SET_XTIME(xtime, einode, raw_inode) \ +do { \ + if (EXT4_FITS_IN_INODE(raw_inode, einode, xtime)) \ + (raw_inode)->xtime = cpu_to_le32((einode)->xtime.tv_sec); \ + if (EXT4_FITS_IN_INODE(raw_inode, einode, xtime ## _extra)) \ + (raw_inode)->xtime ## _extra = \ + ext4_encode_extra_time(&(einode)->xtime); \ +} while (0) + +#define EXT4_INODE_GET_XTIME(xtime, inode, raw_inode) \ +do { \ + (inode)->xtime.tv_sec = (signed)le32_to_cpu((raw_inode)->xtime); \ + if (EXT4_FITS_IN_INODE(raw_inode, EXT4_I(inode), xtime ## _extra)) \ + ext4_decode_extra_time(&(inode)->xtime, \ + raw_inode->xtime ## _extra); \ +} while (0) + +#define EXT4_EINODE_GET_XTIME(xtime, einode, raw_inode) \ +do { \ + if (EXT4_FITS_IN_INODE(raw_inode, einode, xtime)) \ + (einode)->xtime.tv_sec = \ + (signed)le32_to_cpu((raw_inode)->xtime); \ + if (EXT4_FITS_IN_INODE(raw_inode, einode, xtime ## _extra)) \ + ext4_decode_extra_time(&(einode)->xtime, \ + raw_inode->xtime ## _extra); \ +} while (0) + +#define i_disk_version osd1.linux1.l_i_version + +#if defined(__KERNEL__) || defined(__linux__) +#define i_reserved1 osd1.linux1.l_i_reserved1 +#define i_file_acl_high osd2.linux2.l_i_file_acl_high +#define i_blocks_high osd2.linux2.l_i_blocks_high +#define i_uid_low i_uid +#define i_gid_low i_gid +#define i_uid_high osd2.linux2.l_i_uid_high +#define i_gid_high osd2.linux2.l_i_gid_high +#define i_reserved2 osd2.linux2.l_i_reserved2 + +#elif defined(__GNU__) + +#define i_translator osd1.hurd1.h_i_translator +#define i_uid_high osd2.hurd2.h_i_uid_high +#define i_gid_high osd2.hurd2.h_i_gid_high +#define i_author osd2.hurd2.h_i_author + +#elif defined(__masix__) + +#define i_reserved1 osd1.masix1.m_i_reserved1 +#define i_file_acl_high osd2.masix2.m_i_file_acl_high +#define i_reserved2 osd2.masix2.m_i_reserved2 + +#endif /* defined(__KERNEL__) || defined(__linux__) */ + +/* + * File system states + */ +#define EXT4_VALID_FS 0x0001 /* Unmounted cleanly */ +#define EXT4_ERROR_FS 0x0002 /* Errors detected */ +#define EXT4_ORPHAN_FS 0x0004 /* Orphans being recovered */ + +/* + * Misc. filesystem flags + */ +#define EXT2_FLAGS_SIGNED_HASH 0x0001 /* Signed dirhash in use */ +#define EXT2_FLAGS_UNSIGNED_HASH 0x0002 /* Unsigned dirhash in use */ +#define EXT2_FLAGS_TEST_FILESYS 0x0004 /* to test development code */ + +/* + * Mount flags + */ +#define EXT4_MOUNT_CHECK 0x00001 /* Do mount-time checks */ +#define EXT4_MOUNT_OLDALLOC 0x00002 /* Don't use the new Orlov allocator */ +#define EXT4_MOUNT_GRPID 0x00004 /* Create files with directory's group */ +#define EXT4_MOUNT_DEBUG 0x00008 /* Some debugging messages */ +#define EXT4_MOUNT_ERRORS_CONT 0x00010 /* Continue on errors */ +#define EXT4_MOUNT_ERRORS_RO 0x00020 /* Remount fs ro on errors */ +#define EXT4_MOUNT_ERRORS_PANIC 0x00040 /* Panic on errors */ +#define EXT4_MOUNT_MINIX_DF 0x00080 /* Mimics the Minix statfs */ +#define EXT4_MOUNT_NOLOAD 0x00100 /* Don't use existing journal*/ +#define EXT4_MOUNT_ABORT 0x00200 /* Fatal error detected */ +#define EXT4_MOUNT_DATA_FLAGS 0x00C00 /* Mode for data writes: */ +#define EXT4_MOUNT_JOURNAL_DATA 0x00400 /* Write data to journal */ +#define EXT4_MOUNT_ORDERED_DATA 0x00800 /* Flush data before commit */ +#define EXT4_MOUNT_WRITEBACK_DATA 0x00C00 /* No data ordering */ +#define EXT4_MOUNT_UPDATE_JOURNAL 0x01000 /* Update the journal format */ +#define EXT4_MOUNT_NO_UID32 0x02000 /* Disable 32-bit UIDs */ +#define EXT4_MOUNT_XATTR_USER 0x04000 /* Extended user attributes */ +#define EXT4_MOUNT_POSIX_ACL 0x08000 /* POSIX Access Control Lists */ +#define EXT4_MOUNT_RESERVATION 0x10000 /* Preallocation */ +#define EXT4_MOUNT_BARRIER 0x20000 /* Use block barriers */ +#define EXT4_MOUNT_NOBH 0x40000 /* No bufferheads */ +#define EXT4_MOUNT_QUOTA 0x80000 /* Some quota option set */ +#define EXT4_MOUNT_USRQUOTA 0x100000 /* "old" user quota */ +#define EXT4_MOUNT_GRPQUOTA 0x200000 /* "old" group quota */ +#define EXT4_MOUNT_EXTENTS 0x400000 /* Extents support */ +#define EXT4_MOUNT_JOURNAL_CHECKSUM 0x800000 /* Journal checksums */ +#define EXT4_MOUNT_JOURNAL_ASYNC_COMMIT 0x1000000 /* Journal Async Commit */ +#define EXT4_MOUNT_I_VERSION 0x2000000 /* i_version support */ +#define EXT4_MOUNT_MBALLOC 0x4000000 /* Buddy allocation support */ +/* Compatibility, for having both ext2_fs.h and ext4_fs.h included at once */ +#ifndef _LINUX_EXT2_FS_H +#define clear_opt(o, opt) o &= ~EXT4_MOUNT_##opt +#define set_opt(o, opt) o |= EXT4_MOUNT_##opt +#define test_opt(sb, opt) (EXT4_SB(sb)->s_mount_opt & \ + EXT4_MOUNT_##opt) +#else +#define EXT2_MOUNT_NOLOAD EXT4_MOUNT_NOLOAD +#define EXT2_MOUNT_ABORT EXT4_MOUNT_ABORT +#define EXT2_MOUNT_DATA_FLAGS EXT4_MOUNT_DATA_FLAGS +#endif + +#define ext4_set_bit ext2_set_bit +#define ext4_set_bit_atomic ext2_set_bit_atomic +#define ext4_clear_bit ext2_clear_bit +#define ext4_clear_bit_atomic ext2_clear_bit_atomic +#define ext4_test_bit ext2_test_bit +#define ext4_find_first_zero_bit ext2_find_first_zero_bit +#define ext4_find_next_zero_bit ext2_find_next_zero_bit +#define ext4_find_next_bit ext2_find_next_bit + +/* + * Maximal mount counts between two filesystem checks + */ +#define EXT4_DFL_MAX_MNT_COUNT 20 /* Allow 20 mounts */ +#define EXT4_DFL_CHECKINTERVAL 0 /* Don't use interval check */ + +/* + * Behaviour when detecting errors + */ +#define EXT4_ERRORS_CONTINUE 1 /* Continue execution */ +#define EXT4_ERRORS_RO 2 /* Remount fs read-only */ +#define EXT4_ERRORS_PANIC 3 /* Panic */ +#define EXT4_ERRORS_DEFAULT EXT4_ERRORS_CONTINUE + +/* + * Structure of the super block + */ +struct ext4_super_block { +/*00*/ __le32 s_inodes_count; /* Inodes count */ + __le32 s_blocks_count_lo; /* Blocks count */ + __le32 s_r_blocks_count_lo; /* Reserved blocks count */ + __le32 s_free_blocks_count_lo; /* Free blocks count */ +/*10*/ __le32 s_free_inodes_count; /* Free inodes count */ + __le32 s_first_data_block; /* First Data Block */ + __le32 s_log_block_size; /* Block size */ + __le32 s_obso_log_frag_size; /* Obsoleted fragment size */ +/*20*/ __le32 s_blocks_per_group; /* # Blocks per group */ + __le32 s_obso_frags_per_group; /* Obsoleted fragments per group */ + __le32 s_inodes_per_group; /* # Inodes per group */ + __le32 s_mtime; /* Mount time */ +/*30*/ __le32 s_wtime; /* Write time */ + __le16 s_mnt_count; /* Mount count */ + __le16 s_max_mnt_count; /* Maximal mount count */ + __le16 s_magic; /* Magic signature */ + __le16 s_state; /* File system state */ + __le16 s_errors; /* Behaviour when detecting errors */ + __le16 s_minor_rev_level; /* minor revision level */ +/*40*/ __le32 s_lastcheck; /* time of last check */ + __le32 s_checkinterval; /* max. time between checks */ + __le32 s_creator_os; /* OS */ + __le32 s_rev_level; /* Revision level */ +/*50*/ __le16 s_def_resuid; /* Default uid for reserved blocks */ + __le16 s_def_resgid; /* Default gid for reserved blocks */ + /* + * These fields are for EXT4_DYNAMIC_REV superblocks only. + * + * Note: the difference between the compatible feature set and + * the incompatible feature set is that if there is a bit set + * in the incompatible feature set that the kernel doesn't + * know about, it should refuse to mount the filesystem. + * + * e2fsck's requirements are more strict; if it doesn't know + * about a feature in either the compatible or incompatible + * feature set, it must abort and not try to meddle with + * things it doesn't understand... + */ + __le32 s_first_ino; /* First non-reserved inode */ + __le16 s_inode_size; /* size of inode structure */ + __le16 s_block_group_nr; /* block group # of this superblock */ + __le32 s_feature_compat; /* compatible feature set */ +/*60*/ __le32 s_feature_incompat; /* incompatible feature set */ + __le32 s_feature_ro_compat; /* readonly-compatible feature set */ +/*68*/ __u8 s_uuid[16]; /* 128-bit uuid for volume */ +/*78*/ char s_volume_name[16]; /* volume name */ +/*88*/ char s_last_mounted[64]; /* directory where last mounted */ +/*C8*/ __le32 s_algorithm_usage_bitmap; /* For compression */ + /* + * Performance hints. Directory preallocation should only + * happen if the EXT4_FEATURE_COMPAT_DIR_PREALLOC flag is on. + */ + __u8 s_prealloc_blocks; /* Nr of blocks to try to preallocate*/ + __u8 s_prealloc_dir_blocks; /* Nr to preallocate for dirs */ + __le16 s_reserved_gdt_blocks; /* Per group desc for online growth */ + /* + * Journaling support valid if EXT4_FEATURE_COMPAT_HAS_JOURNAL set. + */ +/*D0*/ __u8 s_journal_uuid[16]; /* uuid of journal superblock */ +/*E0*/ __le32 s_journal_inum; /* inode number of journal file */ + __le32 s_journal_dev; /* device number of journal file */ + __le32 s_last_orphan; /* start of list of inodes to delete */ + __le32 s_hash_seed[4]; /* HTREE hash seed */ + __u8 s_def_hash_version; /* Default hash version to use */ + __u8 s_reserved_char_pad; + __le16 s_desc_size; /* size of group descriptor */ +/*100*/ __le32 s_default_mount_opts; + __le32 s_first_meta_bg; /* First metablock block group */ + __le32 s_mkfs_time; /* When the filesystem was created */ + __le32 s_jnl_blocks[17]; /* Backup of the journal inode */ + /* 64bit support valid if EXT4_FEATURE_COMPAT_64BIT */ +/*150*/ __le32 s_blocks_count_hi; /* Blocks count */ + __le32 s_r_blocks_count_hi; /* Reserved blocks count */ + __le32 s_free_blocks_count_hi; /* Free blocks count */ + __le16 s_min_extra_isize; /* All inodes have at least # bytes */ + __le16 s_want_extra_isize; /* New inodes should reserve # bytes */ + __le32 s_flags; /* Miscellaneous flags */ + __le16 s_raid_stride; /* RAID stride */ + __le16 s_mmp_interval; /* # seconds to wait in MMP checking */ + __le64 s_mmp_block; /* Block for multi-mount protection */ + __le32 s_raid_stripe_width; /* blocks on all data disks (N*stride)*/ + __u32 s_reserved[163]; /* Padding to the end of the block */ +}; + +#ifdef __KERNEL__ +static inline struct ext4_sb_info * EXT4_SB(struct super_block *sb) +{ + return sb->s_fs_info; +} +static inline struct ext4_inode_info *EXT4_I(struct inode *inode) +{ + return container_of(inode, struct ext4_inode_info, vfs_inode); +} + +static inline struct timespec ext4_current_time(struct inode *inode) +{ + return (inode->i_sb->s_time_gran < NSEC_PER_SEC) ? + current_fs_time(inode->i_sb) : CURRENT_TIME_SEC; +} + + +static inline int ext4_valid_inum(struct super_block *sb, unsigned long ino) +{ + return ino == EXT4_ROOT_INO || + ino == EXT4_JOURNAL_INO || + ino == EXT4_RESIZE_INO || + (ino >= EXT4_FIRST_INO(sb) && + ino <= le32_to_cpu(EXT4_SB(sb)->s_es->s_inodes_count)); +} +#else +/* Assume that user mode programs are passing in an ext4fs superblock, not + * a kernel struct super_block. This will allow us to call the feature-test + * macros from user land. */ +#define EXT4_SB(sb) (sb) +#endif + +#define NEXT_ORPHAN(inode) EXT4_I(inode)->i_dtime + +/* + * Codes for operating systems + */ +#define EXT4_OS_LINUX 0 +#define EXT4_OS_HURD 1 +#define EXT4_OS_MASIX 2 +#define EXT4_OS_FREEBSD 3 +#define EXT4_OS_LITES 4 + +/* + * Revision levels + */ +#define EXT4_GOOD_OLD_REV 0 /* The good old (original) format */ +#define EXT4_DYNAMIC_REV 1 /* V2 format w/ dynamic inode sizes */ + +#define EXT4_CURRENT_REV EXT4_GOOD_OLD_REV +#define EXT4_MAX_SUPP_REV EXT4_DYNAMIC_REV + +#define EXT4_GOOD_OLD_INODE_SIZE 128 + +/* + * Feature set definitions + */ + +#define EXT4_HAS_COMPAT_FEATURE(sb,mask) \ + ( EXT4_SB(sb)->s_es->s_feature_compat & cpu_to_le32(mask) ) +#define EXT4_HAS_RO_COMPAT_FEATURE(sb,mask) \ + ( EXT4_SB(sb)->s_es->s_feature_ro_compat & cpu_to_le32(mask) ) +#define EXT4_HAS_INCOMPAT_FEATURE(sb,mask) \ + ( EXT4_SB(sb)->s_es->s_feature_incompat & cpu_to_le32(mask) ) +#define EXT4_SET_COMPAT_FEATURE(sb,mask) \ + EXT4_SB(sb)->s_es->s_feature_compat |= cpu_to_le32(mask) +#define EXT4_SET_RO_COMPAT_FEATURE(sb,mask) \ + EXT4_SB(sb)->s_es->s_feature_ro_compat |= cpu_to_le32(mask) +#define EXT4_SET_INCOMPAT_FEATURE(sb,mask) \ + EXT4_SB(sb)->s_es->s_feature_incompat |= cpu_to_le32(mask) +#define EXT4_CLEAR_COMPAT_FEATURE(sb,mask) \ + EXT4_SB(sb)->s_es->s_feature_compat &= ~cpu_to_le32(mask) +#define EXT4_CLEAR_RO_COMPAT_FEATURE(sb,mask) \ + EXT4_SB(sb)->s_es->s_feature_ro_compat &= ~cpu_to_le32(mask) +#define EXT4_CLEAR_INCOMPAT_FEATURE(sb,mask) \ + EXT4_SB(sb)->s_es->s_feature_incompat &= ~cpu_to_le32(mask) + +#define EXT4_FEATURE_COMPAT_DIR_PREALLOC 0x0001 +#define EXT4_FEATURE_COMPAT_IMAGIC_INODES 0x0002 +#define EXT4_FEATURE_COMPAT_HAS_JOURNAL 0x0004 +#define EXT4_FEATURE_COMPAT_EXT_ATTR 0x0008 +#define EXT4_FEATURE_COMPAT_RESIZE_INODE 0x0010 +#define EXT4_FEATURE_COMPAT_DIR_INDEX 0x0020 + +#define EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER 0x0001 +#define EXT4_FEATURE_RO_COMPAT_LARGE_FILE 0x0002 +#define EXT4_FEATURE_RO_COMPAT_BTREE_DIR 0x0004 +#define EXT4_FEATURE_RO_COMPAT_HUGE_FILE 0x0008 +#define EXT4_FEATURE_RO_COMPAT_GDT_CSUM 0x0010 +#define EXT4_FEATURE_RO_COMPAT_DIR_NLINK 0x0020 +#define EXT4_FEATURE_RO_COMPAT_EXTRA_ISIZE 0x0040 + +#define EXT4_FEATURE_INCOMPAT_COMPRESSION 0x0001 +#define EXT4_FEATURE_INCOMPAT_FILETYPE 0x0002 +#define EXT4_FEATURE_INCOMPAT_RECOVER 0x0004 /* Needs recovery */ +#define EXT4_FEATURE_INCOMPAT_JOURNAL_DEV 0x0008 /* Journal device */ +#define EXT4_FEATURE_INCOMPAT_META_BG 0x0010 +#define EXT4_FEATURE_INCOMPAT_EXTENTS 0x0040 /* extents support */ +#define EXT4_FEATURE_INCOMPAT_64BIT 0x0080 +#define EXT4_FEATURE_INCOMPAT_MMP 0x0100 +#define EXT4_FEATURE_INCOMPAT_FLEX_BG 0x0200 + +#define EXT4_FEATURE_COMPAT_SUPP EXT2_FEATURE_COMPAT_EXT_ATTR +#define EXT4_FEATURE_INCOMPAT_SUPP (EXT4_FEATURE_INCOMPAT_FILETYPE| \ + EXT4_FEATURE_INCOMPAT_RECOVER| \ + EXT4_FEATURE_INCOMPAT_META_BG| \ + EXT4_FEATURE_INCOMPAT_EXTENTS| \ + EXT4_FEATURE_INCOMPAT_64BIT| \ + EXT4_FEATURE_INCOMPAT_FLEX_BG) +#define EXT4_FEATURE_RO_COMPAT_SUPP (EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER| \ + EXT4_FEATURE_RO_COMPAT_LARGE_FILE| \ + EXT4_FEATURE_RO_COMPAT_GDT_CSUM| \ + EXT4_FEATURE_RO_COMPAT_DIR_NLINK | \ + EXT4_FEATURE_RO_COMPAT_EXTRA_ISIZE | \ + EXT4_FEATURE_RO_COMPAT_BTREE_DIR |\ + EXT4_FEATURE_RO_COMPAT_HUGE_FILE) + +/* + * Default values for user and/or group using reserved blocks + */ +#define EXT4_DEF_RESUID 0 +#define EXT4_DEF_RESGID 0 + +/* + * Default mount options + */ +#define EXT4_DEFM_DEBUG 0x0001 +#define EXT4_DEFM_BSDGROUPS 0x0002 +#define EXT4_DEFM_XATTR_USER 0x0004 +#define EXT4_DEFM_ACL 0x0008 +#define EXT4_DEFM_UID16 0x0010 +#define EXT4_DEFM_JMODE 0x0060 +#define EXT4_DEFM_JMODE_DATA 0x0020 +#define EXT4_DEFM_JMODE_ORDERED 0x0040 +#define EXT4_DEFM_JMODE_WBACK 0x0060 + +/* + * Structure of a directory entry + */ +#define EXT4_NAME_LEN 255 + +struct ext4_dir_entry { + __le32 inode; /* Inode number */ + __le16 rec_len; /* Directory entry length */ + __le16 name_len; /* Name length */ + char name[EXT4_NAME_LEN]; /* File name */ +}; + +/* + * The new version of the directory entry. Since EXT4 structures are + * stored in intel byte order, and the name_len field could never be + * bigger than 255 chars, it's safe to reclaim the extra byte for the + * file_type field. + */ +struct ext4_dir_entry_2 { + __le32 inode; /* Inode number */ + __le16 rec_len; /* Directory entry length */ + __u8 name_len; /* Name length */ + __u8 file_type; + char name[EXT4_NAME_LEN]; /* File name */ +}; + +/* + * Ext4 directory file types. Only the low 3 bits are used. The + * other bits are reserved for now. + */ +#define EXT4_FT_UNKNOWN 0 +#define EXT4_FT_REG_FILE 1 +#define EXT4_FT_DIR 2 +#define EXT4_FT_CHRDEV 3 +#define EXT4_FT_BLKDEV 4 +#define EXT4_FT_FIFO 5 +#define EXT4_FT_SOCK 6 +#define EXT4_FT_SYMLINK 7 + +#define EXT4_FT_MAX 8 + +/* + * EXT4_DIR_PAD defines the directory entries boundaries + * + * NOTE: It must be a multiple of 4 + */ +#define EXT4_DIR_PAD 4 +#define EXT4_DIR_ROUND (EXT4_DIR_PAD - 1) +#define EXT4_DIR_REC_LEN(name_len) (((name_len) + 8 + EXT4_DIR_ROUND) & \ + ~EXT4_DIR_ROUND) +#define EXT4_MAX_REC_LEN ((1<<16)-1) + +static inline unsigned ext4_rec_len_from_disk(__le16 dlen) +{ + unsigned len = le16_to_cpu(dlen); + + if (len == EXT4_MAX_REC_LEN) + return 1 << 16; + return len; +} + +static inline __le16 ext4_rec_len_to_disk(unsigned len) +{ + if (len == (1 << 16)) + return cpu_to_le16(EXT4_MAX_REC_LEN); + else if (len > (1 << 16)) + BUG(); + return cpu_to_le16(len); +} + +/* + * Hash Tree Directory indexing + * (c) Daniel Phillips, 2001 + */ + +#define is_dx(dir) (EXT4_HAS_COMPAT_FEATURE(dir->i_sb, \ + EXT4_FEATURE_COMPAT_DIR_INDEX) && \ + (EXT4_I(dir)->i_flags & EXT4_INDEX_FL)) +#define EXT4_DIR_LINK_MAX(dir) (!is_dx(dir) && (dir)->i_nlink >= EXT4_LINK_MAX) +#define EXT4_DIR_LINK_EMPTY(dir) ((dir)->i_nlink == 2 || (dir)->i_nlink == 1) + +/* Legal values for the dx_root hash_version field: */ + +#define DX_HASH_LEGACY 0 +#define DX_HASH_HALF_MD4 1 +#define DX_HASH_TEA 2 + +#ifdef __KERNEL__ + +/* hash info structure used by the directory hash */ +struct dx_hash_info +{ + u32 hash; + u32 minor_hash; + int hash_version; + u32 *seed; +}; + +#define EXT4_HTREE_EOF 0x7fffffff + +/* + * Control parameters used by ext4_htree_next_block + */ +#define HASH_NB_ALWAYS 1 + + +/* + * Describe an inode's exact location on disk and in memory + */ +struct ext4_iloc +{ + struct buffer_head *bh; + unsigned long offset; + ext4_group_t block_group; +}; + +static inline struct ext4_inode *ext4_raw_inode(struct ext4_iloc *iloc) +{ + return (struct ext4_inode *) (iloc->bh->b_data + iloc->offset); +} + +/* + * This structure is stuffed into the struct file's private_data field + * for directories. It is where we put information so that we can do + * readdir operations in hash tree order. + */ +struct dir_private_info { + struct rb_root root; + struct rb_node *curr_node; + struct fname *extra_fname; + loff_t last_pos; + __u32 curr_hash; + __u32 curr_minor_hash; + __u32 next_hash; +}; + +/* calculate the first block number of the group */ +static inline ext4_fsblk_t +ext4_group_first_block_no(struct super_block *sb, ext4_group_t group_no) +{ + return group_no * (ext4_fsblk_t)EXT4_BLOCKS_PER_GROUP(sb) + + le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block); +} + +/* + * Special error return code only used by dx_probe() and its callers. + */ +#define ERR_BAD_DX_DIR -75000 + +void ext4_get_group_no_and_offset(struct super_block *sb, ext4_fsblk_t blocknr, + unsigned long *blockgrpp, ext4_grpblk_t *offsetp); + +/* + * Function prototypes + */ + +/* + * Ok, these declarations are also in <linux/kernel.h> but none of the + * ext4 source programs needs to include it so they are duplicated here. + */ +# define NORET_TYPE /**/ +# define ATTRIB_NORET __attribute__((noreturn)) +# define NORET_AND noreturn, + +/* balloc.c */ +extern unsigned int ext4_block_group(struct super_block *sb, + ext4_fsblk_t blocknr); +extern ext4_grpblk_t ext4_block_group_offset(struct super_block *sb, + ext4_fsblk_t blocknr); +extern int ext4_bg_has_super(struct super_block *sb, ext4_group_t group); +extern unsigned long ext4_bg_num_gdb(struct super_block *sb, + ext4_group_t group); +extern ext4_fsblk_t ext4_new_block (handle_t *handle, struct inode *inode, + ext4_fsblk_t goal, int *errp); +extern ext4_fsblk_t ext4_new_blocks (handle_t *handle, struct inode *inode, + ext4_fsblk_t goal, unsigned long *count, int *errp); +extern ext4_fsblk_t ext4_new_blocks_old(handle_t *handle, struct inode *inode, + ext4_fsblk_t goal, unsigned long *count, int *errp); +extern void ext4_free_blocks (handle_t *handle, struct inode *inode, + ext4_fsblk_t block, unsigned long count, int metadata); +extern void ext4_free_blocks_sb (handle_t *handle, struct super_block *sb, + ext4_fsblk_t block, unsigned long count, + unsigned long *pdquot_freed_blocks); +extern ext4_fsblk_t ext4_count_free_blocks (struct super_block *); +extern void ext4_check_blocks_bitmap (struct super_block *); +extern struct ext4_group_desc * ext4_get_group_desc(struct super_block * sb, + ext4_group_t block_group, + struct buffer_head ** bh); +extern int ext4_should_retry_alloc(struct super_block *sb, int *retries); +extern void ext4_init_block_alloc_info(struct inode *); +extern void ext4_rsv_window_add(struct super_block *sb, struct ext4_reserve_window_node *rsv); + +/* dir.c */ +extern int ext4_check_dir_entry(const char *, struct inode *, + struct ext4_dir_entry_2 *, + struct buffer_head *, unsigned long); +extern int ext4_htree_store_dirent(struct file *dir_file, __u32 hash, + __u32 minor_hash, + struct ext4_dir_entry_2 *dirent); +extern void ext4_htree_free_dir_info(struct dir_private_info *p); + +/* fsync.c */ +extern int ext4_sync_file (struct file *, struct dentry *, int); + +/* hash.c */ +extern int ext4fs_dirhash(const char *name, int len, struct + dx_hash_info *hinfo); + +/* ialloc.c */ +extern struct inode * ext4_new_inode (handle_t *, struct inode *, int); +extern void ext4_free_inode (handle_t *, struct inode *); +extern struct inode * ext4_orphan_get (struct super_block *, unsigned long); +extern unsigned long ext4_count_free_inodes (struct super_block *); +extern unsigned long ext4_count_dirs (struct super_block *); +extern void ext4_check_inodes_bitmap (struct super_block *); +extern unsigned long ext4_count_free (struct buffer_head *, unsigned); + +/* mballoc.c */ +extern long ext4_mb_stats; +extern long ext4_mb_max_to_scan; +extern int ext4_mb_init(struct super_block *, int); +extern int ext4_mb_release(struct super_block *); +extern ext4_fsblk_t ext4_mb_new_blocks(handle_t *, + struct ext4_allocation_request *, int *); +extern int ext4_mb_reserve_blocks(struct super_block *, int); +extern void ext4_mb_discard_inode_preallocations(struct inode *); +extern int __init init_ext4_mballoc(void); +extern void exit_ext4_mballoc(void); +extern void ext4_mb_free_blocks(handle_t *, struct inode *, + unsigned long, unsigned long, int, unsigned long *); + + +/* inode.c */ +int ext4_forget(handle_t *handle, int is_metadata, struct inode *inode, + struct buffer_head *bh, ext4_fsblk_t blocknr); +struct buffer_head *ext4_getblk(handle_t *, struct inode *, + ext4_lblk_t, int, int *); +struct buffer_head *ext4_bread(handle_t *, struct inode *, + ext4_lblk_t, int, int *); +int ext4_get_blocks_handle(handle_t *handle, struct inode *inode, + ext4_lblk_t iblock, unsigned long maxblocks, + struct buffer_head *bh_result, + int create, int extend_disksize); + +extern struct inode *ext4_iget(struct super_block *, unsigned long); +extern int ext4_write_inode (struct inode *, int); +extern int ext4_setattr (struct dentry *, struct iattr *); +extern void ext4_delete_inode (struct inode *); +extern int ext4_sync_inode (handle_t *, struct inode *); +extern void ext4_discard_reservation (struct inode *); +extern void ext4_dirty_inode(struct inode *); +extern int ext4_change_inode_journal_flag(struct inode *, int); +extern int ext4_get_inode_loc(struct inode *, struct ext4_iloc *); +extern void ext4_truncate (struct inode *); +extern void ext4_set_inode_flags(struct inode *); +extern void ext4_get_inode_flags(struct ext4_inode_info *); +extern void ext4_set_aops(struct inode *inode); +extern int ext4_writepage_trans_blocks(struct inode *); +extern int ext4_block_truncate_page(handle_t *handle, struct page *page, + struct address_space *mapping, loff_t from); + +/* ioctl.c */ +extern long ext4_ioctl(struct file *, unsigned int, unsigned long); +extern long ext4_compat_ioctl (struct file *, unsigned int, unsigned long); + +/* migrate.c */ +extern int ext4_ext_migrate(struct inode *, struct file *, unsigned int, + unsigned long); +/* namei.c */ +extern int ext4_orphan_add(handle_t *, struct inode *); +extern int ext4_orphan_del(handle_t *, struct inode *); +extern int ext4_htree_fill_tree(struct file *dir_file, __u32 start_hash, + __u32 start_minor_hash, __u32 *next_hash); + +/* resize.c */ +extern int ext4_group_add(struct super_block *sb, + struct ext4_new_group_data *input); +extern int ext4_group_extend(struct super_block *sb, + struct ext4_super_block *es, + ext4_fsblk_t n_blocks_count); + +/* super.c */ +extern void ext4_error (struct super_block *, const char *, const char *, ...) + __attribute__ ((format (printf, 3, 4))); +extern void __ext4_std_error (struct super_block *, const char *, int); +extern void ext4_abort (struct super_block *, const char *, const char *, ...) + __attribute__ ((format (printf, 3, 4))); +extern void ext4_warning (struct super_block *, const char *, const char *, ...) + __attribute__ ((format (printf, 3, 4))); +extern void ext4_update_dynamic_rev (struct super_block *sb); +extern int ext4_update_compat_feature(handle_t *handle, struct super_block *sb, + __u32 compat); +extern int ext4_update_rocompat_feature(handle_t *handle, + struct super_block *sb, __u32 rocompat); +extern int ext4_update_incompat_feature(handle_t *handle, + struct super_block *sb, __u32 incompat); +extern ext4_fsblk_t ext4_block_bitmap(struct super_block *sb, + struct ext4_group_desc *bg); +extern ext4_fsblk_t ext4_inode_bitmap(struct super_block *sb, + struct ext4_group_desc *bg); +extern ext4_fsblk_t ext4_inode_table(struct super_block *sb, + struct ext4_group_desc *bg); +extern void ext4_block_bitmap_set(struct super_block *sb, + struct ext4_group_desc *bg, ext4_fsblk_t blk); +extern void ext4_inode_bitmap_set(struct super_block *sb, + struct ext4_group_desc *bg, ext4_fsblk_t blk); +extern void ext4_inode_table_set(struct super_block *sb, + struct ext4_group_desc *bg, ext4_fsblk_t blk); + +static inline ext4_fsblk_t ext4_blocks_count(struct ext4_super_block *es) +{ + return ((ext4_fsblk_t)le32_to_cpu(es->s_blocks_count_hi) << 32) | + le32_to_cpu(es->s_blocks_count_lo); +} + +static inline ext4_fsblk_t ext4_r_blocks_count(struct ext4_super_block *es) +{ + return ((ext4_fsblk_t)le32_to_cpu(es->s_r_blocks_count_hi) << 32) | + le32_to_cpu(es->s_r_blocks_count_lo); +} + +static inline ext4_fsblk_t ext4_free_blocks_count(struct ext4_super_block *es) +{ + return ((ext4_fsblk_t)le32_to_cpu(es->s_free_blocks_count_hi) << 32) | + le32_to_cpu(es->s_free_blocks_count_lo); +} + +static inline void ext4_blocks_count_set(struct ext4_super_block *es, + ext4_fsblk_t blk) +{ + es->s_blocks_count_lo = cpu_to_le32((u32)blk); + es->s_blocks_count_hi = cpu_to_le32(blk >> 32); +} + +static inline void ext4_free_blocks_count_set(struct ext4_super_block *es, + ext4_fsblk_t blk) +{ + es->s_free_blocks_count_lo = cpu_to_le32((u32)blk); + es->s_free_blocks_count_hi = cpu_to_le32(blk >> 32); +} + +static inline void ext4_r_blocks_count_set(struct ext4_super_block *es, + ext4_fsblk_t blk) +{ + es->s_r_blocks_count_lo = cpu_to_le32((u32)blk); + es->s_r_blocks_count_hi = cpu_to_le32(blk >> 32); +} + +static inline loff_t ext4_isize(struct ext4_inode *raw_inode) +{ + return ((loff_t)le32_to_cpu(raw_inode->i_size_high) << 32) | + le32_to_cpu(raw_inode->i_size_lo); +} + +static inline void ext4_isize_set(struct ext4_inode *raw_inode, loff_t i_size) +{ + raw_inode->i_size_lo = cpu_to_le32(i_size); + raw_inode->i_size_high = cpu_to_le32(i_size >> 32); +} + +static inline +struct ext4_group_info *ext4_get_group_info(struct super_block *sb, + ext4_group_t group) +{ + struct ext4_group_info ***grp_info; + long indexv, indexh; + grp_info = EXT4_SB(sb)->s_group_info; + indexv = group >> (EXT4_DESC_PER_BLOCK_BITS(sb)); + indexh = group & ((EXT4_DESC_PER_BLOCK(sb)) - 1); + return grp_info[indexv][indexh]; +} + + +#define ext4_std_error(sb, errno) \ +do { \ + if ((errno)) \ + __ext4_std_error((sb), __FUNCTION__, (errno)); \ +} while (0) + +/* + * Inodes and files operations + */ + +/* dir.c */ +extern const struct file_operations ext4_dir_operations; + +/* file.c */ +extern const struct inode_operations ext4_file_inode_operations; +extern const struct file_operations ext4_file_operations; + +/* namei.c */ +extern const struct inode_operations ext4_dir_inode_operations; +extern const struct inode_operations ext4_special_inode_operations; + +/* symlink.c */ +extern const struct inode_operations ext4_symlink_inode_operations; +extern const struct inode_operations ext4_fast_symlink_inode_operations; + +/* extents.c */ +extern int ext4_ext_tree_init(handle_t *handle, struct inode *); +extern int ext4_ext_writepage_trans_blocks(struct inode *, int); +extern int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, + ext4_lblk_t iblock, + unsigned long max_blocks, struct buffer_head *bh_result, + int create, int extend_disksize); +extern void ext4_ext_truncate(struct inode *, struct page *); +extern void ext4_ext_init(struct super_block *); +extern void ext4_ext_release(struct super_block *); +extern long ext4_fallocate(struct inode *inode, int mode, loff_t offset, + loff_t len); +extern int ext4_get_blocks_wrap(handle_t *handle, struct inode *inode, + sector_t block, unsigned long max_blocks, + struct buffer_head *bh, int create, + int extend_disksize); +#endif /* __KERNEL__ */ + +#endif /* _EXT4_H */ diff --git a/fs/ext4/ext4_extents.h b/fs/ext4/ext4_extents.h new file mode 100644 index 00000000000..75333b595fa --- /dev/null +++ b/fs/ext4/ext4_extents.h @@ -0,0 +1,232 @@ +/* + * Copyright (c) 2003-2006, Cluster File Systems, Inc, info@clusterfs.com + * Written by Alex Tomas <alex@clusterfs.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public Licens + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111- + */ + +#ifndef _EXT4_EXTENTS +#define _EXT4_EXTENTS + +#include "ext4.h" + +/* + * With AGGRESSIVE_TEST defined, the capacity of index/leaf blocks + * becomes very small, so index split, in-depth growing and + * other hard changes happen much more often. + * This is for debug purposes only. + */ +#define AGGRESSIVE_TEST_ + +/* + * With EXTENTS_STATS defined, the number of blocks and extents + * are collected in the truncate path. They'll be shown at + * umount time. + */ +#define EXTENTS_STATS__ + +/* + * If CHECK_BINSEARCH is defined, then the results of the binary search + * will also be checked by linear search. + */ +#define CHECK_BINSEARCH__ + +/* + * If EXT_DEBUG is defined you can use the 'extdebug' mount option + * to get lots of info about what's going on. + */ +#define EXT_DEBUG__ +#ifdef EXT_DEBUG +#define ext_debug(a...) printk(a) +#else +#define ext_debug(a...) +#endif + +/* + * If EXT_STATS is defined then stats numbers are collected. + * These number will be displayed at umount time. + */ +#define EXT_STATS_ + + +/* + * ext4_inode has i_block array (60 bytes total). + * The first 12 bytes store ext4_extent_header; + * the remainder stores an array of ext4_extent. + */ + +/* + * This is the extent on-disk structure. + * It's used at the bottom of the tree. + */ +struct ext4_extent { + __le32 ee_block; /* first logical block extent covers */ + __le16 ee_len; /* number of blocks covered by extent */ + __le16 ee_start_hi; /* high 16 bits of physical block */ + __le32 ee_start_lo; /* low 32 bits of physical block */ +}; + +/* + * This is index on-disk structure. + * It's used at all the levels except the bottom. + */ +struct ext4_extent_idx { + __le32 ei_block; /* index covers logical blocks from 'block' */ + __le32 ei_leaf_lo; /* pointer to the physical block of the next * + * level. leaf or next index could be there */ + __le16 ei_leaf_hi; /* high 16 bits of physical block */ + __u16 ei_unused; +}; + +/* + * Each block (leaves and indexes), even inode-stored has header. + */ +struct ext4_extent_header { + __le16 eh_magic; /* probably will support different formats */ + __le16 eh_entries; /* number of valid entries */ + __le16 eh_max; /* capacity of store in entries */ + __le16 eh_depth; /* has tree real underlying blocks? */ + __le32 eh_generation; /* generation of the tree */ +}; + +#define EXT4_EXT_MAGIC cpu_to_le16(0xf30a) + +/* + * Array of ext4_ext_path contains path to some extent. + * Creation/lookup routines use it for traversal/splitting/etc. + * Truncate uses it to simulate recursive walking. + */ +struct ext4_ext_path { + ext4_fsblk_t p_block; + __u16 p_depth; + struct ext4_extent *p_ext; + struct ext4_extent_idx *p_idx; + struct ext4_extent_header *p_hdr; + struct buffer_head *p_bh; +}; + +/* + * structure for external API + */ + +#define EXT4_EXT_CACHE_NO 0 +#define EXT4_EXT_CACHE_GAP 1 +#define EXT4_EXT_CACHE_EXTENT 2 + + +#define EXT_MAX_BLOCK 0xffffffff + +/* + * EXT_INIT_MAX_LEN is the maximum number of blocks we can have in an + * initialized extent. This is 2^15 and not (2^16 - 1), since we use the + * MSB of ee_len field in the extent datastructure to signify if this + * particular extent is an initialized extent or an uninitialized (i.e. + * preallocated). + * EXT_UNINIT_MAX_LEN is the maximum number of blocks we can have in an + * uninitialized extent. + * If ee_len is <= 0x8000, it is an initialized extent. Otherwise, it is an + * uninitialized one. In other words, if MSB of ee_len is set, it is an + * uninitialized extent with only one special scenario when ee_len = 0x8000. + * In this case we can not have an uninitialized extent of zero length and + * thus we make it as a special case of initialized extent with 0x8000 length. + * This way we get better extent-to-group alignment for initialized extents. + * Hence, the maximum number of blocks we can have in an *initialized* + * extent is 2^15 (32768) and in an *uninitialized* extent is 2^15-1 (32767). + */ +#define EXT_INIT_MAX_LEN (1UL << 15) +#define EXT_UNINIT_MAX_LEN (EXT_INIT_MAX_LEN - 1) + + +#define EXT_FIRST_EXTENT(__hdr__) \ + ((struct ext4_extent *) (((char *) (__hdr__)) + \ + sizeof(struct ext4_extent_header))) +#define EXT_FIRST_INDEX(__hdr__) \ + ((struct ext4_extent_idx *) (((char *) (__hdr__)) + \ + sizeof(struct ext4_extent_header))) +#define EXT_HAS_FREE_INDEX(__path__) \ + (le16_to_cpu((__path__)->p_hdr->eh_entries) \ + < le16_to_cpu((__path__)->p_hdr->eh_max)) +#define EXT_LAST_EXTENT(__hdr__) \ + (EXT_FIRST_EXTENT((__hdr__)) + le16_to_cpu((__hdr__)->eh_entries) - 1) +#define EXT_LAST_INDEX(__hdr__) \ + (EXT_FIRST_INDEX((__hdr__)) + le16_to_cpu((__hdr__)->eh_entries) - 1) +#define EXT_MAX_EXTENT(__hdr__) \ + (EXT_FIRST_EXTENT((__hdr__)) + le16_to_cpu((__hdr__)->eh_max) - 1) +#define EXT_MAX_INDEX(__hdr__) \ + (EXT_FIRST_INDEX((__hdr__)) + le16_to_cpu((__hdr__)->eh_max) - 1) + +static inline struct ext4_extent_header *ext_inode_hdr(struct inode *inode) +{ + return (struct ext4_extent_header *) EXT4_I(inode)->i_data; +} + +static inline struct ext4_extent_header *ext_block_hdr(struct buffer_head *bh) +{ + return (struct ext4_extent_header *) bh->b_data; +} + +static inline unsigned short ext_depth(struct inode *inode) +{ + return le16_to_cpu(ext_inode_hdr(inode)->eh_depth); +} + +static inline void ext4_ext_tree_changed(struct inode *inode) +{ + EXT4_I(inode)->i_ext_generation++; +} + +static inline void +ext4_ext_invalidate_cache(struct inode *inode) +{ + EXT4_I(inode)->i_cached_extent.ec_type = EXT4_EXT_CACHE_NO; +} + +static inline void ext4_ext_mark_uninitialized(struct ext4_extent *ext) +{ + /* We can not have an uninitialized extent of zero length! */ + BUG_ON((le16_to_cpu(ext->ee_len) & ~EXT_INIT_MAX_LEN) == 0); + ext->ee_len |= cpu_to_le16(EXT_INIT_MAX_LEN); +} + +static inline int ext4_ext_is_uninitialized(struct ext4_extent *ext) +{ + /* Extent with ee_len of 0x8000 is treated as an initialized extent */ + return (le16_to_cpu(ext->ee_len) > EXT_INIT_MAX_LEN); +} + +static inline int ext4_ext_get_actual_len(struct ext4_extent *ext) +{ + return (le16_to_cpu(ext->ee_len) <= EXT_INIT_MAX_LEN ? + le16_to_cpu(ext->ee_len) : + (le16_to_cpu(ext->ee_len) - EXT_INIT_MAX_LEN)); +} + +extern ext4_fsblk_t idx_pblock(struct ext4_extent_idx *); +extern void ext4_ext_store_pblock(struct ext4_extent *, ext4_fsblk_t); +extern int ext4_extent_tree_init(handle_t *, struct inode *); +extern int ext4_ext_calc_credits_for_insert(struct inode *, struct ext4_ext_path *); +extern int ext4_ext_try_to_merge(struct inode *inode, + struct ext4_ext_path *path, + struct ext4_extent *); +extern unsigned int ext4_ext_check_overlap(struct inode *, struct ext4_extent *, struct ext4_ext_path *); +extern int ext4_ext_insert_extent(handle_t *, struct inode *, struct ext4_ext_path *, struct ext4_extent *); +extern struct ext4_ext_path *ext4_ext_find_extent(struct inode *, ext4_lblk_t, + struct ext4_ext_path *); +extern int ext4_ext_search_left(struct inode *, struct ext4_ext_path *, + ext4_lblk_t *, ext4_fsblk_t *); +extern int ext4_ext_search_right(struct inode *, struct ext4_ext_path *, + ext4_lblk_t *, ext4_fsblk_t *); +extern void ext4_ext_drop_refs(struct ext4_ext_path *); +#endif /* _EXT4_EXTENTS */ + diff --git a/fs/ext4/ext4_i.h b/fs/ext4/ext4_i.h new file mode 100644 index 00000000000..26a4ae255d7 --- /dev/null +++ b/fs/ext4/ext4_i.h @@ -0,0 +1,167 @@ +/* + * ext4_i.h + * + * Copyright (C) 1992, 1993, 1994, 1995 + * Remy Card (card@masi.ibp.fr) + * Laboratoire MASI - Institut Blaise Pascal + * Universite Pierre et Marie Curie (Paris VI) + * + * from + * + * linux/include/linux/minix_fs_i.h + * + * Copyright (C) 1991, 1992 Linus Torvalds + */ + +#ifndef _EXT4_I +#define _EXT4_I + +#include <linux/rwsem.h> +#include <linux/rbtree.h> +#include <linux/seqlock.h> +#include <linux/mutex.h> + +/* data type for block offset of block group */ +typedef int ext4_grpblk_t; + +/* data type for filesystem-wide blocks number */ +typedef unsigned long long ext4_fsblk_t; + +/* data type for file logical block number */ +typedef __u32 ext4_lblk_t; + +/* data type for block group number */ +typedef unsigned long ext4_group_t; + +struct ext4_reserve_window { + ext4_fsblk_t _rsv_start; /* First byte reserved */ + ext4_fsblk_t _rsv_end; /* Last byte reserved or 0 */ +}; + +struct ext4_reserve_window_node { + struct rb_node rsv_node; + __u32 rsv_goal_size; + __u32 rsv_alloc_hit; + struct ext4_reserve_window rsv_window; +}; + +struct ext4_block_alloc_info { + /* information about reservation window */ + struct ext4_reserve_window_node rsv_window_node; + /* + * was i_next_alloc_block in ext4_inode_info + * is the logical (file-relative) number of the + * most-recently-allocated block in this file. + * We use this for detecting linearly ascending allocation requests. + */ + ext4_lblk_t last_alloc_logical_block; + /* + * Was i_next_alloc_goal in ext4_inode_info + * is the *physical* companion to i_next_alloc_block. + * it the physical block number of the block which was most-recentl + * allocated to this file. This give us the goal (target) for the next + * allocation when we detect linearly ascending requests. + */ + ext4_fsblk_t last_alloc_physical_block; +}; + +#define rsv_start rsv_window._rsv_start +#define rsv_end rsv_window._rsv_end + +/* + * storage for cached extent + */ +struct ext4_ext_cache { + ext4_fsblk_t ec_start; + ext4_lblk_t ec_block; + __u32 ec_len; /* must be 32bit to return holes */ + __u32 ec_type; +}; + +/* + * third extended file system inode data in memory + */ +struct ext4_inode_info { + __le32 i_data[15]; /* unconverted */ + __u32 i_flags; + ext4_fsblk_t i_file_acl; + __u32 i_dtime; + + /* + * i_block_group is the number of the block group which contains + * this file's inode. Constant across the lifetime of the inode, + * it is ued for making block allocation decisions - we try to + * place a file's data blocks near its inode block, and new inodes + * near to their parent directory's inode. + */ + ext4_group_t i_block_group; + __u32 i_state; /* Dynamic state flags for ext4 */ + + /* block reservation info */ + struct ext4_block_alloc_info *i_block_alloc_info; + + ext4_lblk_t i_dir_start_lookup; +#ifdef CONFIG_EXT4DEV_FS_XATTR + /* + * Extended attributes can be read independently of the main file + * data. Taking i_mutex even when reading would cause contention + * between readers of EAs and writers of regular file data, so + * instead we synchronize on xattr_sem when reading or changing + * EAs. + */ + struct rw_semaphore xattr_sem; +#endif +#ifdef CONFIG_EXT4DEV_FS_POSIX_ACL + struct posix_acl *i_acl; + struct posix_acl *i_default_acl; +#endif + + struct list_head i_orphan; /* unlinked but open inodes */ + + /* + * i_disksize keeps track of what the inode size is ON DISK, not + * in memory. During truncate, i_size is set to the new size by + * the VFS prior to calling ext4_truncate(), but the filesystem won't + * set i_disksize to 0 until the truncate is actually under way. + * + * The intent is that i_disksize always represents the blocks which + * are used by this file. This allows recovery to restart truncate + * on orphans if we crash during truncate. We actually write i_disksize + * into the on-disk inode when writing inodes out, instead of i_size. + * + * The only time when i_disksize and i_size may be different is when + * a truncate is in progress. The only things which change i_disksize + * are ext4_get_block (growth) and ext4_truncate (shrinkth). + */ + loff_t i_disksize; + + /* on-disk additional length */ + __u16 i_extra_isize; + + /* + * i_data_sem is for serialising ext4_truncate() against + * ext4_getblock(). In the 2.4 ext2 design, great chunks of inode's + * data tree are chopped off during truncate. We can't do that in + * ext4 because whenever we perform intermediate commits during + * truncate, the inode and all the metadata blocks *must* be in a + * consistent state which allows truncation of the orphans to restart + * during recovery. Hence we must fix the get_block-vs-truncate race + * by other means, so we have i_data_sem. + */ + struct rw_semaphore i_data_sem; + struct inode vfs_inode; + + unsigned long i_ext_generation; + struct ext4_ext_cache i_cached_extent; + /* + * File creation time. Its function is same as that of + * struct timespec i_{a,c,m}time in the generic inode. + */ + struct timespec i_crtime; + + /* mballoc */ + struct list_head i_prealloc_list; + spinlock_t i_prealloc_lock; +}; + +#endif /* _EXT4_I */ diff --git a/fs/ext4/ext4_jbd2.c b/fs/ext4/ext4_jbd2.c index d6afe4e2734..c75384b34f2 100644 --- a/fs/ext4/ext4_jbd2.c +++ b/fs/ext4/ext4_jbd2.c @@ -2,14 +2,14 @@ * Interface between ext4 and JBD */ -#include <linux/ext4_jbd2.h> +#include "ext4_jbd2.h" int __ext4_journal_get_undo_access(const char *where, handle_t *handle, struct buffer_head *bh) { int err = jbd2_journal_get_undo_access(handle, bh); if (err) - ext4_journal_abort_handle(where, __FUNCTION__, bh, handle,err); + ext4_journal_abort_handle(where, __func__, bh, handle, err); return err; } @@ -18,7 +18,7 @@ int __ext4_journal_get_write_access(const char *where, handle_t *handle, { int err = jbd2_journal_get_write_access(handle, bh); if (err) - ext4_journal_abort_handle(where, __FUNCTION__, bh, handle,err); + ext4_journal_abort_handle(where, __func__, bh, handle, err); return err; } @@ -27,7 +27,7 @@ int __ext4_journal_forget(const char *where, handle_t *handle, { int err = jbd2_journal_forget(handle, bh); if (err) - ext4_journal_abort_handle(where, __FUNCTION__, bh, handle,err); + ext4_journal_abort_handle(where, __func__, bh, handle, err); return err; } @@ -36,7 +36,7 @@ int __ext4_journal_revoke(const char *where, handle_t *handle, { int err = jbd2_journal_revoke(handle, blocknr, bh); if (err) - ext4_journal_abort_handle(where, __FUNCTION__, bh, handle,err); + ext4_journal_abort_handle(where, __func__, bh, handle, err); return err; } @@ -45,7 +45,7 @@ int __ext4_journal_get_create_access(const char *where, { int err = jbd2_journal_get_create_access(handle, bh); if (err) - ext4_journal_abort_handle(where, __FUNCTION__, bh, handle,err); + ext4_journal_abort_handle(where, __func__, bh, handle, err); return err; } @@ -54,6 +54,6 @@ int __ext4_journal_dirty_metadata(const char *where, { int err = jbd2_journal_dirty_metadata(handle, bh); if (err) - ext4_journal_abort_handle(where, __FUNCTION__, bh, handle,err); + ext4_journal_abort_handle(where, __func__, bh, handle, err); return err; } diff --git a/fs/ext4/ext4_jbd2.h b/fs/ext4/ext4_jbd2.h new file mode 100644 index 00000000000..9255a7d28b2 --- /dev/null +++ b/fs/ext4/ext4_jbd2.h @@ -0,0 +1,231 @@ +/* + * ext4_jbd2.h + * + * Written by Stephen C. Tweedie <sct@redhat.com>, 1999 + * + * Copyright 1998--1999 Red Hat corp --- All Rights Reserved + * + * This file is part of the Linux kernel and is made available under + * the terms of the GNU General Public License, version 2, or at your + * option, any later version, incorporated herein by reference. + * + * Ext4-specific journaling extensions. + */ + +#ifndef _EXT4_JBD2_H +#define _EXT4_JBD2_H + +#include <linux/fs.h> +#include <linux/jbd2.h> +#include "ext4.h" + +#define EXT4_JOURNAL(inode) (EXT4_SB((inode)->i_sb)->s_journal) + +/* Define the number of blocks we need to account to a transaction to + * modify one block of data. + * + * We may have to touch one inode, one bitmap buffer, up to three + * indirection blocks, the group and superblock summaries, and the data + * block to complete the transaction. + * + * For extents-enabled fs we may have to allocate and modify up to + * 5 levels of tree + root which are stored in the inode. */ + +#define EXT4_SINGLEDATA_TRANS_BLOCKS(sb) \ + (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_EXTENTS) \ + || test_opt(sb, EXTENTS) ? 27U : 8U) + +/* Extended attribute operations touch at most two data buffers, + * two bitmap buffers, and two group summaries, in addition to the inode + * and the superblock, which are already accounted for. */ + +#define EXT4_XATTR_TRANS_BLOCKS 6U + +/* Define the minimum size for a transaction which modifies data. This + * needs to take into account the fact that we may end up modifying two + * quota files too (one for the group, one for the user quota). The + * superblock only gets updated once, of course, so don't bother + * counting that again for the quota updates. */ + +#define EXT4_DATA_TRANS_BLOCKS(sb) (EXT4_SINGLEDATA_TRANS_BLOCKS(sb) + \ + EXT4_XATTR_TRANS_BLOCKS - 2 + \ + 2*EXT4_QUOTA_TRANS_BLOCKS(sb)) + +/* Delete operations potentially hit one directory's namespace plus an + * entire inode, plus arbitrary amounts of bitmap/indirection data. Be + * generous. We can grow the delete transaction later if necessary. */ + +#define EXT4_DELETE_TRANS_BLOCKS(sb) (2 * EXT4_DATA_TRANS_BLOCKS(sb) + 64) + +/* Define an arbitrary limit for the amount of data we will anticipate + * writing to any given transaction. For unbounded transactions such as + * write(2) and truncate(2) we can write more than this, but we always + * start off at the maximum transaction size and grow the transaction + * optimistically as we go. */ + +#define EXT4_MAX_TRANS_DATA 64U + +/* We break up a large truncate or write transaction once the handle's + * buffer credits gets this low, we need either to extend the + * transaction or to start a new one. Reserve enough space here for + * inode, bitmap, superblock, group and indirection updates for at least + * one block, plus two quota updates. Quota allocations are not + * needed. */ + +#define EXT4_RESERVE_TRANS_BLOCKS 12U + +#define EXT4_INDEX_EXTRA_TRANS_BLOCKS 8 + +#ifdef CONFIG_QUOTA +/* Amount of blocks needed for quota update - we know that the structure was + * allocated so we need to update only inode+data */ +#define EXT4_QUOTA_TRANS_BLOCKS(sb) (test_opt(sb, QUOTA) ? 2 : 0) +/* Amount of blocks needed for quota insert/delete - we do some block writes + * but inode, sb and group updates are done only once */ +#define EXT4_QUOTA_INIT_BLOCKS(sb) (test_opt(sb, QUOTA) ? (DQUOT_INIT_ALLOC*\ + (EXT4_SINGLEDATA_TRANS_BLOCKS(sb)-3)+3+DQUOT_INIT_REWRITE) : 0) +#define EXT4_QUOTA_DEL_BLOCKS(sb) (test_opt(sb, QUOTA) ? (DQUOT_DEL_ALLOC*\ + (EXT4_SINGLEDATA_TRANS_BLOCKS(sb)-3)+3+DQUOT_DEL_REWRITE) : 0) +#else +#define EXT4_QUOTA_TRANS_BLOCKS(sb) 0 +#define EXT4_QUOTA_INIT_BLOCKS(sb) 0 +#define EXT4_QUOTA_DEL_BLOCKS(sb) 0 +#endif + +int +ext4_mark_iloc_dirty(handle_t *handle, + struct inode *inode, + struct ext4_iloc *iloc); + +/* + * On success, We end up with an outstanding reference count against + * iloc->bh. This _must_ be cleaned up later. + */ + +int ext4_reserve_inode_write(handle_t *handle, struct inode *inode, + struct ext4_iloc *iloc); + +int ext4_mark_inode_dirty(handle_t *handle, struct inode *inode); + +/* + * Wrapper functions with which ext4 calls into JBD. The intent here is + * to allow these to be turned into appropriate stubs so ext4 can control + * ext2 filesystems, so ext2+ext4 systems only nee one fs. This work hasn't + * been done yet. + */ + +static inline void ext4_journal_release_buffer(handle_t *handle, + struct buffer_head *bh) +{ + jbd2_journal_release_buffer(handle, bh); +} + +void ext4_journal_abort_handle(const char *caller, const char *err_fn, + struct buffer_head *bh, handle_t *handle, int err); + +int __ext4_journal_get_undo_access(const char *where, handle_t *handle, + struct buffer_head *bh); + +int __ext4_journal_get_write_access(const char *where, handle_t *handle, + struct buffer_head *bh); + +int __ext4_journal_forget(const char *where, handle_t *handle, + struct buffer_head *bh); + +int __ext4_journal_revoke(const char *where, handle_t *handle, + ext4_fsblk_t blocknr, struct buffer_head *bh); + +int __ext4_journal_get_create_access(const char *where, + handle_t *handle, struct buffer_head *bh); + +int __ext4_journal_dirty_metadata(const char *where, + handle_t *handle, struct buffer_head *bh); + +#define ext4_journal_get_undo_access(handle, bh) \ + __ext4_journal_get_undo_access(__FUNCTION__, (handle), (bh)) +#define ext4_journal_get_write_access(handle, bh) \ + __ext4_journal_get_write_access(__FUNCTION__, (handle), (bh)) +#define ext4_journal_revoke(handle, blocknr, bh) \ + __ext4_journal_revoke(__FUNCTION__, (handle), (blocknr), (bh)) +#define ext4_journal_get_create_access(handle, bh) \ + __ext4_journal_get_create_access(__FUNCTION__, (handle), (bh)) +#define ext4_journal_dirty_metadata(handle, bh) \ + __ext4_journal_dirty_metadata(__FUNCTION__, (handle), (bh)) +#define ext4_journal_forget(handle, bh) \ + __ext4_journal_forget(__FUNCTION__, (handle), (bh)) + +int ext4_journal_dirty_data(handle_t *handle, struct buffer_head *bh); + +handle_t *ext4_journal_start_sb(struct super_block *sb, int nblocks); +int __ext4_journal_stop(const char *where, handle_t *handle); + +static inline handle_t *ext4_journal_start(struct inode *inode, int nblocks) +{ + return ext4_journal_start_sb(inode->i_sb, nblocks); +} + +#define ext4_journal_stop(handle) \ + __ext4_journal_stop(__FUNCTION__, (handle)) + +static inline handle_t *ext4_journal_current_handle(void) +{ + return journal_current_handle(); +} + +static inline int ext4_journal_extend(handle_t *handle, int nblocks) +{ + return jbd2_journal_extend(handle, nblocks); +} + +static inline int ext4_journal_restart(handle_t *handle, int nblocks) +{ + return jbd2_journal_restart(handle, nblocks); +} + +static inline int ext4_journal_blocks_per_page(struct inode *inode) +{ + return jbd2_journal_blocks_per_page(inode); +} + +static inline int ext4_journal_force_commit(journal_t *journal) +{ + return jbd2_journal_force_commit(journal); +} + +/* super.c */ +int ext4_force_commit(struct super_block *sb); + +static inline int ext4_should_journal_data(struct inode *inode) +{ + if (!S_ISREG(inode->i_mode)) + return 1; + if (test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) + return 1; + if (EXT4_I(inode)->i_flags & EXT4_JOURNAL_DATA_FL) + return 1; + return 0; +} + +static inline int ext4_should_order_data(struct inode *inode) +{ + if (!S_ISREG(inode->i_mode)) + return 0; + if (EXT4_I(inode)->i_flags & EXT4_JOURNAL_DATA_FL) + return 0; + if (test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA) + return 1; + return 0; +} + +static inline int ext4_should_writeback_data(struct inode *inode) +{ + if (!S_ISREG(inode->i_mode)) + return 0; + if (EXT4_I(inode)->i_flags & EXT4_JOURNAL_DATA_FL) + return 0; + if (test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_WRITEBACK_DATA) + return 1; + return 0; +} + +#endif /* _EXT4_JBD2_H */ diff --git a/fs/ext4/ext4_sb.h b/fs/ext4/ext4_sb.h new file mode 100644 index 00000000000..5802e69f219 --- /dev/null +++ b/fs/ext4/ext4_sb.h @@ -0,0 +1,148 @@ +/* + * ext4_sb.h + * + * Copyright (C) 1992, 1993, 1994, 1995 + * Remy Card (card@masi.ibp.fr) + * Laboratoire MASI - Institut Blaise Pascal + * Universite Pierre et Marie Curie (Paris VI) + * + * from + * + * linux/include/linux/minix_fs_sb.h + * + * Copyright (C) 1991, 1992 Linus Torvalds + */ + +#ifndef _EXT4_SB +#define _EXT4_SB + +#ifdef __KERNEL__ +#include <linux/timer.h> +#include <linux/wait.h> +#include <linux/blockgroup_lock.h> +#include <linux/percpu_counter.h> +#endif +#include <linux/rbtree.h> + +/* + * third extended-fs super-block data in memory + */ +struct ext4_sb_info { + unsigned long s_desc_size; /* Size of a group descriptor in bytes */ + unsigned long s_inodes_per_block;/* Number of inodes per block */ + unsigned long s_blocks_per_group;/* Number of blocks in a group */ + unsigned long s_inodes_per_group;/* Number of inodes in a group */ + unsigned long s_itb_per_group; /* Number of inode table blocks per group */ + unsigned long s_gdb_count; /* Number of group descriptor blocks */ + unsigned long s_desc_per_block; /* Number of group descriptors per block */ + ext4_group_t s_groups_count; /* Number of groups in the fs */ + unsigned long s_overhead_last; /* Last calculated overhead */ + unsigned long s_blocks_last; /* Last seen block count */ + loff_t s_bitmap_maxbytes; /* max bytes for bitmap files */ + struct buffer_head * s_sbh; /* Buffer containing the super block */ + struct ext4_super_block * s_es; /* Pointer to the super block in the buffer */ + struct buffer_head ** s_group_desc; + unsigned long s_mount_opt; + ext4_fsblk_t s_sb_block; + uid_t s_resuid; + gid_t s_resgid; + unsigned short s_mount_state; + unsigned short s_pad; + int s_addr_per_block_bits; + int s_desc_per_block_bits; + int s_inode_size; + int s_first_ino; + spinlock_t s_next_gen_lock; + u32 s_next_generation; + u32 s_hash_seed[4]; + int s_def_hash_version; + struct percpu_counter s_freeblocks_counter; + struct percpu_counter s_freeinodes_counter; + struct percpu_counter s_dirs_counter; + struct blockgroup_lock s_blockgroup_lock; + + /* root of the per fs reservation window tree */ + spinlock_t s_rsv_window_lock; + struct rb_root s_rsv_window_root; + struct ext4_reserve_window_node s_rsv_window_head; + + /* Journaling */ + struct inode * s_journal_inode; + struct journal_s * s_journal; + struct list_head s_orphan; + unsigned long s_commit_interval; + struct block_device *journal_bdev; +#ifdef CONFIG_JBD2_DEBUG + struct timer_list turn_ro_timer; /* For turning read-only (crash simulation) */ + wait_queue_head_t ro_wait_queue; /* For people waiting for the fs to go read-only */ +#endif +#ifdef CONFIG_QUOTA + char *s_qf_names[MAXQUOTAS]; /* Names of quota files with journalled quota */ + int s_jquota_fmt; /* Format of quota to use */ +#endif + unsigned int s_want_extra_isize; /* New inodes should reserve # bytes */ + +#ifdef EXTENTS_STATS + /* ext4 extents stats */ + unsigned long s_ext_min; + unsigned long s_ext_max; + unsigned long s_depth_max; + spinlock_t s_ext_stats_lock; + unsigned long s_ext_blocks; + unsigned long s_ext_extents; +#endif + + /* for buddy allocator */ + struct ext4_group_info ***s_group_info; + struct inode *s_buddy_cache; + long s_blocks_reserved; + spinlock_t s_reserve_lock; + struct list_head s_active_transaction; + struct list_head s_closed_transaction; + struct list_head s_committed_transaction; + spinlock_t s_md_lock; + tid_t s_last_transaction; + unsigned short *s_mb_offsets, *s_mb_maxs; + + /* tunables */ + unsigned long s_stripe; + unsigned long s_mb_stream_request; + unsigned long s_mb_max_to_scan; + unsigned long s_mb_min_to_scan; + unsigned long s_mb_stats; + unsigned long s_mb_order2_reqs; + unsigned long s_mb_group_prealloc; + /* where last allocation was done - for stream allocation */ + unsigned long s_mb_last_group; + unsigned long s_mb_last_start; + + /* history to debug policy */ + struct ext4_mb_history *s_mb_history; + int s_mb_history_cur; + int s_mb_history_max; + int s_mb_history_num; + struct proc_dir_entry *s_mb_proc; + spinlock_t s_mb_history_lock; + int s_mb_history_filter; + + /* stats for buddy allocator */ + spinlock_t s_mb_pa_lock; + atomic_t s_bal_reqs; /* number of reqs with len > 1 */ + atomic_t s_bal_success; /* we found long enough chunks */ + atomic_t s_bal_allocated; /* in blocks */ + atomic_t s_bal_ex_scanned; /* total extents scanned */ + atomic_t s_bal_goals; /* goal hits */ + atomic_t s_bal_breaks; /* too long searches */ + atomic_t s_bal_2orders; /* 2^order hits */ + spinlock_t s_bal_lock; + unsigned long s_mb_buddies_generated; + unsigned long long s_mb_generation_time; + atomic_t s_mb_lost_chunks; + atomic_t s_mb_preallocated; + atomic_t s_mb_discarded; + + /* locality groups */ + struct ext4_locality_group *s_locality_groups; +}; + +#endif /* _EXT4_SB */ diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 9ae6e67090c..47929c4e3da 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -32,7 +32,6 @@ #include <linux/module.h> #include <linux/fs.h> #include <linux/time.h> -#include <linux/ext4_jbd2.h> #include <linux/jbd2.h> #include <linux/highuid.h> #include <linux/pagemap.h> @@ -40,8 +39,9 @@ #include <linux/string.h> #include <linux/slab.h> #include <linux/falloc.h> -#include <linux/ext4_fs_extents.h> #include <asm/uaccess.h> +#include "ext4_jbd2.h" +#include "ext4_extents.h" /* @@ -308,7 +308,7 @@ corrupted: } #define ext4_ext_check_header(inode, eh, depth) \ - __ext4_ext_check_header(__FUNCTION__, inode, eh, depth) + __ext4_ext_check_header(__func__, inode, eh, depth) #ifdef EXT_DEBUG static void ext4_ext_show_path(struct inode *inode, struct ext4_ext_path *path) @@ -614,7 +614,7 @@ static int ext4_ext_insert_index(handle_t *handle, struct inode *inode, ix->ei_block = cpu_to_le32(logical); ext4_idx_store_pblock(ix, ptr); - curp->p_hdr->eh_entries = cpu_to_le16(le16_to_cpu(curp->p_hdr->eh_entries)+1); + le16_add_cpu(&curp->p_hdr->eh_entries, 1); BUG_ON(le16_to_cpu(curp->p_hdr->eh_entries) > le16_to_cpu(curp->p_hdr->eh_max)); @@ -736,7 +736,7 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode, } if (m) { memmove(ex, path[depth].p_ext-m, sizeof(struct ext4_extent)*m); - neh->eh_entries = cpu_to_le16(le16_to_cpu(neh->eh_entries)+m); + le16_add_cpu(&neh->eh_entries, m); } set_buffer_uptodate(bh); @@ -753,8 +753,7 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode, err = ext4_ext_get_access(handle, inode, path + depth); if (err) goto cleanup; - path[depth].p_hdr->eh_entries = - cpu_to_le16(le16_to_cpu(path[depth].p_hdr->eh_entries)-m); + le16_add_cpu(&path[depth].p_hdr->eh_entries, -m); err = ext4_ext_dirty(handle, inode, path + depth); if (err) goto cleanup; @@ -817,8 +816,7 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode, if (m) { memmove(++fidx, path[i].p_idx - m, sizeof(struct ext4_extent_idx) * m); - neh->eh_entries = - cpu_to_le16(le16_to_cpu(neh->eh_entries) + m); + le16_add_cpu(&neh->eh_entries, m); } set_buffer_uptodate(bh); unlock_buffer(bh); @@ -834,7 +832,7 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode, err = ext4_ext_get_access(handle, inode, path + i); if (err) goto cleanup; - path[i].p_hdr->eh_entries = cpu_to_le16(le16_to_cpu(path[i].p_hdr->eh_entries)-m); + le16_add_cpu(&path[i].p_hdr->eh_entries, -m); err = ext4_ext_dirty(handle, inode, path + i); if (err) goto cleanup; @@ -1369,7 +1367,7 @@ int ext4_ext_try_to_merge(struct inode *inode, * sizeof(struct ext4_extent); memmove(ex + 1, ex + 2, len); } - eh->eh_entries = cpu_to_le16(le16_to_cpu(eh->eh_entries) - 1); + le16_add_cpu(&eh->eh_entries, -1); merge_done = 1; WARN_ON(eh->eh_entries == 0); if (!eh->eh_entries) @@ -1560,7 +1558,7 @@ has_space: path[depth].p_ext = nearex; } - eh->eh_entries = cpu_to_le16(le16_to_cpu(eh->eh_entries)+1); + le16_add_cpu(&eh->eh_entries, 1); nearex = path[depth].p_ext; nearex->ee_block = newext->ee_block; ext4_ext_store_pblock(nearex, ext_pblock(newext)); @@ -1699,7 +1697,7 @@ static int ext4_ext_rm_idx(handle_t *handle, struct inode *inode, err = ext4_ext_get_access(handle, inode, path); if (err) return err; - path->p_hdr->eh_entries = cpu_to_le16(le16_to_cpu(path->p_hdr->eh_entries)-1); + le16_add_cpu(&path->p_hdr->eh_entries, -1); err = ext4_ext_dirty(handle, inode, path); if (err) return err; @@ -1902,7 +1900,7 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode, if (num == 0) { /* this extent is removed; mark slot entirely unused */ ext4_ext_store_pblock(ex, 0); - eh->eh_entries = cpu_to_le16(le16_to_cpu(eh->eh_entries)-1); + le16_add_cpu(&eh->eh_entries, -1); } ex->ee_block = cpu_to_le32(block); @@ -1979,7 +1977,7 @@ static int ext4_ext_remove_space(struct inode *inode, ext4_lblk_t start) * We start scanning from right side, freeing all the blocks * after i_size and walking into the tree depth-wise. */ - path = kzalloc(sizeof(struct ext4_ext_path) * (depth + 1), GFP_KERNEL); + path = kzalloc(sizeof(struct ext4_ext_path) * (depth + 1), GFP_NOFS); if (path == NULL) { ext4_journal_stop(handle); return -ENOMEM; @@ -2138,6 +2136,82 @@ void ext4_ext_release(struct super_block *sb) #endif } +static void bi_complete(struct bio *bio, int error) +{ + complete((struct completion *)bio->bi_private); +} + +/* FIXME!! we need to try to merge to left or right after zero-out */ +static int ext4_ext_zeroout(struct inode *inode, struct ext4_extent *ex) +{ + int ret = -EIO; + struct bio *bio; + int blkbits, blocksize; + sector_t ee_pblock; + struct completion event; + unsigned int ee_len, len, done, offset; + + + blkbits = inode->i_blkbits; + blocksize = inode->i_sb->s_blocksize; + ee_len = ext4_ext_get_actual_len(ex); + ee_pblock = ext_pblock(ex); + + /* convert ee_pblock to 512 byte sectors */ + ee_pblock = ee_pblock << (blkbits - 9); + + while (ee_len > 0) { + + if (ee_len > BIO_MAX_PAGES) + len = BIO_MAX_PAGES; + else + len = ee_len; + + bio = bio_alloc(GFP_NOIO, len); + if (!bio) + return -ENOMEM; + bio->bi_sector = ee_pblock; + bio->bi_bdev = inode->i_sb->s_bdev; + + done = 0; + offset = 0; + while (done < len) { + ret = bio_add_page(bio, ZERO_PAGE(0), + blocksize, offset); + if (ret != blocksize) { + /* + * We can't add any more pages because of + * hardware limitations. Start a new bio. + */ + break; + } + done++; + offset += blocksize; + if (offset >= PAGE_CACHE_SIZE) + offset = 0; + } + + init_completion(&event); + bio->bi_private = &event; + bio->bi_end_io = bi_complete; + submit_bio(WRITE, bio); + wait_for_completion(&event); + + if (test_bit(BIO_UPTODATE, &bio->bi_flags)) + ret = 0; + else { + ret = -EIO; + break; + } + bio_put(bio); + ee_len -= done; + ee_pblock += done << (blkbits - 9); + } + return ret; +} + +#define EXT4_EXT_ZERO_LEN 7 + /* * This function is called by ext4_ext_get_blocks() if someone tries to write * to an uninitialized extent. It may result in splitting the uninitialized @@ -2154,7 +2228,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle, ext4_lblk_t iblock, unsigned long max_blocks) { - struct ext4_extent *ex, newex; + struct ext4_extent *ex, newex, orig_ex; struct ext4_extent *ex1 = NULL; struct ext4_extent *ex2 = NULL; struct ext4_extent *ex3 = NULL; @@ -2173,10 +2247,26 @@ static int ext4_ext_convert_to_initialized(handle_t *handle, allocated = ee_len - (iblock - ee_block); newblock = iblock - ee_block + ext_pblock(ex); ex2 = ex; + orig_ex.ee_block = ex->ee_block; + orig_ex.ee_len = cpu_to_le16(ee_len); + ext4_ext_store_pblock(&orig_ex, ext_pblock(ex)); err = ext4_ext_get_access(handle, inode, path + depth); if (err) goto out; + /* If extent has less than 2*EXT4_EXT_ZERO_LEN zerout directly */ + if (ee_len <= 2*EXT4_EXT_ZERO_LEN) { + err = ext4_ext_zeroout(inode, &orig_ex); + if (err) + goto fix_extent_len; + /* update the extent length and mark as initialized */ + ex->ee_block = orig_ex.ee_block; + ex->ee_len = orig_ex.ee_len; + ext4_ext_store_pblock(ex, ext_pblock(&orig_ex)); + ext4_ext_dirty(handle, inode, path + depth); + /* zeroed the full extent */ + return allocated; + } /* ex1: ee_block to iblock - 1 : uninitialized */ if (iblock > ee_block) { @@ -2195,19 +2285,103 @@ static int ext4_ext_convert_to_initialized(handle_t *handle, /* ex3: to ee_block + ee_len : uninitialised */ if (allocated > max_blocks) { unsigned int newdepth; + /* If extent has less than EXT4_EXT_ZERO_LEN zerout directly */ + if (allocated <= EXT4_EXT_ZERO_LEN) { + /* Mark first half uninitialized. + * Mark second half initialized and zero out the + * initialized extent + */ + ex->ee_block = orig_ex.ee_block; + ex->ee_len = cpu_to_le16(ee_len - allocated); + ext4_ext_mark_uninitialized(ex); + ext4_ext_store_pblock(ex, ext_pblock(&orig_ex)); + ext4_ext_dirty(handle, inode, path + depth); + + ex3 = &newex; + ex3->ee_block = cpu_to_le32(iblock); + ext4_ext_store_pblock(ex3, newblock); + ex3->ee_len = cpu_to_le16(allocated); + err = ext4_ext_insert_extent(handle, inode, path, ex3); + if (err == -ENOSPC) { + err = ext4_ext_zeroout(inode, &orig_ex); + if (err) + goto fix_extent_len; + ex->ee_block = orig_ex.ee_block; + ex->ee_len = orig_ex.ee_len; + ext4_ext_store_pblock(ex, ext_pblock(&orig_ex)); + ext4_ext_dirty(handle, inode, path + depth); + /* zeroed the full extent */ + return allocated; + + } else if (err) + goto fix_extent_len; + + /* + * We need to zero out the second half because + * an fallocate request can update file size and + * converting the second half to initialized extent + * implies that we can leak some junk data to user + * space. + */ + err = ext4_ext_zeroout(inode, ex3); + if (err) { + /* + * We should actually mark the + * second half as uninit and return error + * Insert would have changed the extent + */ + depth = ext_depth(inode); + ext4_ext_drop_refs(path); + path = ext4_ext_find_extent(inode, + iblock, path); + if (IS_ERR(path)) { + err = PTR_ERR(path); + return err; + } + ex = path[depth].p_ext; + err = ext4_ext_get_access(handle, inode, + path + depth); + if (err) + return err; + ext4_ext_mark_uninitialized(ex); + ext4_ext_dirty(handle, inode, path + depth); + return err; + } + + /* zeroed the second half */ + return allocated; + } ex3 = &newex; ex3->ee_block = cpu_to_le32(iblock + max_blocks); ext4_ext_store_pblock(ex3, newblock + max_blocks); ex3->ee_len = cpu_to_le16(allocated - max_blocks); ext4_ext_mark_uninitialized(ex3); err = ext4_ext_insert_extent(handle, inode, path, ex3); - if (err) - goto out; + if (err == -ENOSPC) { + err = ext4_ext_zeroout(inode, &orig_ex); + if (err) + goto fix_extent_len; + /* update the extent length and mark as initialized */ + ex->ee_block = orig_ex.ee_block; + ex->ee_len = orig_ex.ee_len; + ext4_ext_store_pblock(ex, ext_pblock(&orig_ex)); + ext4_ext_dirty(handle, inode, path + depth); + /* zeroed the full extent */ + return allocated; + + } else if (err) + goto fix_extent_len; /* * The depth, and hence eh & ex might change * as part of the insert above. */ newdepth = ext_depth(inode); + /* + * update the extent length after successfull insert of the + * split extent + */ + orig_ex.ee_len = cpu_to_le16(ee_len - + ext4_ext_get_actual_len(ex3)); if (newdepth != depth) { depth = newdepth; ext4_ext_drop_refs(path); @@ -2226,6 +2400,24 @@ static int ext4_ext_convert_to_initialized(handle_t *handle, goto out; } allocated = max_blocks; + + /* If extent has less than EXT4_EXT_ZERO_LEN and we are trying + * to insert a extent in the middle zerout directly + * otherwise give the extent a chance to merge to left + */ + if (le16_to_cpu(orig_ex.ee_len) <= EXT4_EXT_ZERO_LEN && + iblock != ee_block) { + err = ext4_ext_zeroout(inode, &orig_ex); + if (err) + goto fix_extent_len; + /* update the extent length and mark as initialized */ + ex->ee_block = orig_ex.ee_block; + ex->ee_len = orig_ex.ee_len; + ext4_ext_store_pblock(ex, ext_pblock(&orig_ex)); + ext4_ext_dirty(handle, inode, path + depth); + /* zero out the first half */ + return allocated; + } } /* * If there was a change of depth as part of the @@ -2282,8 +2474,29 @@ static int ext4_ext_convert_to_initialized(handle_t *handle, goto out; insert: err = ext4_ext_insert_extent(handle, inode, path, &newex); + if (err == -ENOSPC) { + err = ext4_ext_zeroout(inode, &orig_ex); + if (err) + goto fix_extent_len; + /* update the extent length and mark as initialized */ + ex->ee_block = orig_ex.ee_block; + ex->ee_len = orig_ex.ee_len; + ext4_ext_store_pblock(ex, ext_pblock(&orig_ex)); + ext4_ext_dirty(handle, inode, path + depth); + /* zero out the first half */ + return allocated; + } else if (err) + goto fix_extent_len; out: return err ? err : allocated; + +fix_extent_len: + ex->ee_block = orig_ex.ee_block; + ex->ee_len = orig_ex.ee_len; + ext4_ext_store_pblock(ex, ext_pblock(&orig_ex)); + ext4_ext_mark_uninitialized(ex); + ext4_ext_dirty(handle, inode, path + depth); + return err; } /* @@ -2393,8 +2606,20 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, } if (create == EXT4_CREATE_UNINITIALIZED_EXT) goto out; - if (!create) + if (!create) { + /* + * We have blocks reserved already. We + * return allocated blocks so that delalloc + * won't do block reservation for us. But + * the buffer head will be unmapped so that + * a read from the block returns 0s. + */ + if (allocated > max_blocks) + allocated = max_blocks; + /* mark the buffer unwritten */ + __set_bit(BH_Unwritten, &bh_result->b_state); goto out2; + } ret = ext4_ext_convert_to_initialized(handle, inode, path, iblock, @@ -2584,6 +2809,8 @@ out_stop: ext4_orphan_del(handle, inode); up_write(&EXT4_I(inode)->i_data_sem); + inode->i_mtime = inode->i_ctime = ext4_current_time(inode); + ext4_mark_inode_dirty(handle, inode); ext4_journal_stop(handle); } @@ -2608,6 +2835,28 @@ int ext4_ext_writepage_trans_blocks(struct inode *inode, int num) return needed; } +static void ext4_falloc_update_inode(struct inode *inode, + int mode, loff_t new_size, int update_ctime) +{ + struct timespec now; + + if (update_ctime) { + now = current_fs_time(inode->i_sb); + if (!timespec_equal(&inode->i_ctime, &now)) + inode->i_ctime = now; + } + /* + * Update only when preallocation was requested beyond + * the file size. + */ + if (!(mode & FALLOC_FL_KEEP_SIZE) && + new_size > i_size_read(inode)) { + i_size_write(inode, new_size); + EXT4_I(inode)->i_disksize = new_size; + } + +} + /* * preallocate space for a file. This implements ext4's fallocate inode * operation, which gets called from sys_fallocate system call. @@ -2619,8 +2868,8 @@ long ext4_fallocate(struct inode *inode, int mode, loff_t offset, loff_t len) { handle_t *handle; ext4_lblk_t block; + loff_t new_size; unsigned long max_blocks; - ext4_fsblk_t nblocks = 0; int ret = 0; int ret2 = 0; int retries = 0; @@ -2639,9 +2888,12 @@ long ext4_fallocate(struct inode *inode, int mode, loff_t offset, loff_t len) return -ENODEV; block = offset >> blkbits; + /* + * We can't just convert len to max_blocks because + * If blocksize = 4096 offset = 3072 and len = 2048 + */ max_blocks = (EXT4_BLOCK_ALIGN(len + offset, blkbits) >> blkbits) - - block; - + - block; /* * credits to insert 1 extent into extent tree + buffers to be able to * modify 1 super block, 1 block bitmap and 1 group descriptor. @@ -2657,7 +2909,6 @@ retry: ret = PTR_ERR(handle); break; } - ret = ext4_get_blocks_wrap(handle, inode, block, max_blocks, &map_bh, EXT4_CREATE_UNINITIALIZED_EXT, 0); @@ -2673,61 +2924,24 @@ retry: ret2 = ext4_journal_stop(handle); break; } - if (ret > 0) { - /* check wrap through sign-bit/zero here */ - if ((block + ret) < 0 || (block + ret) < block) { - ret = -EIO; - ext4_mark_inode_dirty(handle, inode); - ret2 = ext4_journal_stop(handle); - break; - } - if (buffer_new(&map_bh) && ((block + ret) > - (EXT4_BLOCK_ALIGN(i_size_read(inode), blkbits) - >> blkbits))) - nblocks = nblocks + ret; - } - - /* Update ctime if new blocks get allocated */ - if (nblocks) { - struct timespec now; - - now = current_fs_time(inode->i_sb); - if (!timespec_equal(&inode->i_ctime, &now)) - inode->i_ctime = now; - } + if ((block + ret) >= (EXT4_BLOCK_ALIGN(offset + len, + blkbits) >> blkbits)) + new_size = offset + len; + else + new_size = (block + ret) << blkbits; + ext4_falloc_update_inode(inode, mode, new_size, + buffer_new(&map_bh)); ext4_mark_inode_dirty(handle, inode); ret2 = ext4_journal_stop(handle); if (ret2) break; } - - if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries)) + if (ret == -ENOSPC && + ext4_should_retry_alloc(inode->i_sb, &retries)) { + ret = 0; goto retry; - - /* - * Time to update the file size. - * Update only when preallocation was requested beyond the file size. - */ - if (!(mode & FALLOC_FL_KEEP_SIZE) && - (offset + len) > i_size_read(inode)) { - if (ret > 0) { - /* - * if no error, we assume preallocation succeeded - * completely - */ - i_size_write(inode, offset + len); - EXT4_I(inode)->i_disksize = i_size_read(inode); - } else if (ret < 0 && nblocks) { - /* Handle partial allocation scenario */ - loff_t newsize; - - newsize = (nblocks << blkbits) + i_size_read(inode); - i_size_write(inode, EXT4_BLOCK_ALIGN(newsize, blkbits)); - EXT4_I(inode)->i_disksize = i_size_read(inode); - } } - mutex_unlock(&inode->i_mutex); return ret > 0 ? ret2 : ret; } diff --git a/fs/ext4/file.c b/fs/ext4/file.c index ac35ec58db5..4159be6366a 100644 --- a/fs/ext4/file.c +++ b/fs/ext4/file.c @@ -21,8 +21,8 @@ #include <linux/time.h> #include <linux/fs.h> #include <linux/jbd2.h> -#include <linux/ext4_fs.h> -#include <linux/ext4_jbd2.h> +#include "ext4.h" +#include "ext4_jbd2.h" #include "xattr.h" #include "acl.h" @@ -129,7 +129,7 @@ const struct file_operations ext4_file_operations = { .write = do_sync_write, .aio_read = generic_file_aio_read, .aio_write = ext4_file_write, - .ioctl = ext4_ioctl, + .unlocked_ioctl = ext4_ioctl, #ifdef CONFIG_COMPAT .compat_ioctl = ext4_compat_ioctl, #endif diff --git a/fs/ext4/fsync.c b/fs/ext4/fsync.c index 8d50879d1c2..1c8ba48d4f8 100644 --- a/fs/ext4/fsync.c +++ b/fs/ext4/fsync.c @@ -27,8 +27,8 @@ #include <linux/sched.h> #include <linux/writeback.h> #include <linux/jbd2.h> -#include <linux/ext4_fs.h> -#include <linux/ext4_jbd2.h> +#include "ext4.h" +#include "ext4_jbd2.h" /* * akpm: A new design for ext4_sync_file(). @@ -72,6 +72,9 @@ int ext4_sync_file(struct file * file, struct dentry *dentry, int datasync) goto out; } + if (datasync && !(inode->i_state & I_DIRTY_DATASYNC)) + goto out; + /* * The VFS has written the file data. If the inode is unaltered * then we need not start a commit. diff --git a/fs/ext4/hash.c b/fs/ext4/hash.c index 1555024e3b3..1d6329dbe39 100644 --- a/fs/ext4/hash.c +++ b/fs/ext4/hash.c @@ -11,8 +11,8 @@ #include <linux/fs.h> #include <linux/jbd2.h> -#include <linux/ext4_fs.h> #include <linux/cryptohash.h> +#include "ext4.h" #define DELTA 0x9E3779B9 diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c index 486e46a3918..c6efbab0c80 100644 --- a/fs/ext4/ialloc.c +++ b/fs/ext4/ialloc.c @@ -15,8 +15,6 @@ #include <linux/time.h> #include <linux/fs.h> #include <linux/jbd2.h> -#include <linux/ext4_fs.h> -#include <linux/ext4_jbd2.h> #include <linux/stat.h> #include <linux/string.h> #include <linux/quotaops.h> @@ -25,7 +23,8 @@ #include <linux/bitops.h> #include <linux/blkdev.h> #include <asm/byteorder.h> - +#include "ext4.h" +#include "ext4_jbd2.h" #include "xattr.h" #include "acl.h" #include "group.h" @@ -75,7 +74,7 @@ unsigned ext4_init_inode_bitmap(struct super_block *sb, struct buffer_head *bh, /* If checksum is bad mark all blocks and inodes use to prevent * allocation, essentially implementing a per-group read-only flag. */ if (!ext4_group_desc_csum_verify(sbi, block_group, gdp)) { - ext4_error(sb, __FUNCTION__, "Checksum bad for group %lu\n", + ext4_error(sb, __func__, "Checksum bad for group %lu\n", block_group); gdp->bg_free_blocks_count = 0; gdp->bg_free_inodes_count = 0; @@ -223,11 +222,9 @@ void ext4_free_inode (handle_t *handle, struct inode * inode) if (gdp) { spin_lock(sb_bgl_lock(sbi, block_group)); - gdp->bg_free_inodes_count = cpu_to_le16( - le16_to_cpu(gdp->bg_free_inodes_count) + 1); + le16_add_cpu(&gdp->bg_free_inodes_count, 1); if (is_directory) - gdp->bg_used_dirs_count = cpu_to_le16( - le16_to_cpu(gdp->bg_used_dirs_count) - 1); + le16_add_cpu(&gdp->bg_used_dirs_count, -1); gdp->bg_checksum = ext4_group_desc_csum(sbi, block_group, gdp); spin_unlock(sb_bgl_lock(sbi, block_group)); @@ -588,7 +585,7 @@ got: ino++; if ((group == 0 && ino < EXT4_FIRST_INO(sb)) || ino > EXT4_INODES_PER_GROUP(sb)) { - ext4_error(sb, __FUNCTION__, + ext4_error(sb, __func__, "reserved inode or inode > inodes count - " "block_group = %lu, inode=%lu", group, ino + group * EXT4_INODES_PER_GROUP(sb)); @@ -664,11 +661,9 @@ got: cpu_to_le16(EXT4_INODES_PER_GROUP(sb) - ino); } - gdp->bg_free_inodes_count = - cpu_to_le16(le16_to_cpu(gdp->bg_free_inodes_count) - 1); + le16_add_cpu(&gdp->bg_free_inodes_count, -1); if (S_ISDIR(mode)) { - gdp->bg_used_dirs_count = - cpu_to_le16(le16_to_cpu(gdp->bg_used_dirs_count) + 1); + le16_add_cpu(&gdp->bg_used_dirs_count, 1); } gdp->bg_checksum = ext4_group_desc_csum(sbi, group, gdp); spin_unlock(sb_bgl_lock(sbi, group)); @@ -744,23 +739,24 @@ got: if (err) goto fail_free_drop; - err = ext4_mark_inode_dirty(handle, inode); - if (err) { - ext4_std_error(sb, err); - goto fail_free_drop; - } if (test_opt(sb, EXTENTS)) { - /* set extent flag only for directory and file */ - if (S_ISDIR(mode) || S_ISREG(mode)) { + /* set extent flag only for diretory, file and normal symlink*/ + if (S_ISDIR(mode) || S_ISREG(mode) || S_ISLNK(mode)) { EXT4_I(inode)->i_flags |= EXT4_EXTENTS_FL; ext4_ext_tree_init(handle, inode); err = ext4_update_incompat_feature(handle, sb, EXT4_FEATURE_INCOMPAT_EXTENTS); if (err) - goto fail; + goto fail_free_drop; } } + err = ext4_mark_inode_dirty(handle, inode); + if (err) { + ext4_std_error(sb, err); + goto fail_free_drop; + } + ext4_debug("allocating inode %lu\n", inode->i_ino); goto really_out; fail: @@ -796,7 +792,7 @@ struct inode *ext4_orphan_get(struct super_block *sb, unsigned long ino) /* Error cases - e2fsck has already cleaned up for us */ if (ino > max_ino) { - ext4_warning(sb, __FUNCTION__, + ext4_warning(sb, __func__, "bad orphan ino %lu! e2fsck was run?", ino); goto error; } @@ -805,7 +801,7 @@ struct inode *ext4_orphan_get(struct super_block *sb, unsigned long ino) bit = (ino - 1) % EXT4_INODES_PER_GROUP(sb); bitmap_bh = read_inode_bitmap(sb, block_group); if (!bitmap_bh) { - ext4_warning(sb, __FUNCTION__, + ext4_warning(sb, __func__, "inode bitmap error for orphan %lu", ino); goto error; } @@ -830,7 +826,7 @@ iget_failed: err = PTR_ERR(inode); inode = NULL; bad_orphan: - ext4_warning(sb, __FUNCTION__, + ext4_warning(sb, __func__, "bad orphan inode %lu! e2fsck was run?", ino); printk(KERN_NOTICE "ext4_test_bit(bit=%d, block=%llu) = %d\n", bit, (unsigned long long)bitmap_bh->b_blocknr, diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 8fab233cb05..8d970774641 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -25,7 +25,6 @@ #include <linux/module.h> #include <linux/fs.h> #include <linux/time.h> -#include <linux/ext4_jbd2.h> #include <linux/jbd2.h> #include <linux/highuid.h> #include <linux/pagemap.h> @@ -36,6 +35,7 @@ #include <linux/mpage.h> #include <linux/uio.h> #include <linux/bio.h> +#include "ext4_jbd2.h" #include "xattr.h" #include "acl.h" @@ -93,7 +93,7 @@ int ext4_forget(handle_t *handle, int is_metadata, struct inode *inode, BUFFER_TRACE(bh, "call ext4_journal_revoke"); err = ext4_journal_revoke(handle, blocknr, bh); if (err) - ext4_abort(inode->i_sb, __FUNCTION__, + ext4_abort(inode->i_sb, __func__, "error %d when attempting revoke", err); BUFFER_TRACE(bh, "exit"); return err; @@ -985,6 +985,16 @@ int ext4_get_blocks_wrap(handle_t *handle, struct inode *inode, sector_t block, } else { retval = ext4_get_blocks_handle(handle, inode, block, max_blocks, bh, create, extend_disksize); + + if (retval > 0 && buffer_new(bh)) { + /* + * We allocated new blocks which will result in + * i_data's format changing. Force the migrate + * to fail by clearing migrate flags + */ + EXT4_I(inode)->i_flags = EXT4_I(inode)->i_flags & + ~EXT4_EXT_MIGRATE; + } } up_write((&EXT4_I(inode)->i_data_sem)); return retval; @@ -1230,7 +1240,7 @@ int ext4_journal_dirty_data(handle_t *handle, struct buffer_head *bh) { int err = jbd2_journal_dirty_data(handle, bh); if (err) - ext4_journal_abort_handle(__FUNCTION__, __FUNCTION__, + ext4_journal_abort_handle(__func__, __func__, bh, handle, err); return err; } @@ -1301,10 +1311,11 @@ static int ext4_ordered_write_end(struct file *file, new_i_size = pos + copied; if (new_i_size > EXT4_I(inode)->i_disksize) EXT4_I(inode)->i_disksize = new_i_size; - copied = ext4_generic_write_end(file, mapping, pos, len, copied, + ret2 = ext4_generic_write_end(file, mapping, pos, len, copied, page, fsdata); - if (copied < 0) - ret = copied; + copied = ret2; + if (ret2 < 0) + ret = ret2; } ret2 = ext4_journal_stop(handle); if (!ret) @@ -1329,10 +1340,11 @@ static int ext4_writeback_write_end(struct file *file, if (new_i_size > EXT4_I(inode)->i_disksize) EXT4_I(inode)->i_disksize = new_i_size; - copied = ext4_generic_write_end(file, mapping, pos, len, copied, + ret2 = ext4_generic_write_end(file, mapping, pos, len, copied, page, fsdata); - if (copied < 0) - ret = copied; + copied = ret2; + if (ret2 < 0) + ret = ret2; ret2 = ext4_journal_stop(handle); if (!ret) @@ -2501,12 +2513,10 @@ out_stop: static ext4_fsblk_t ext4_get_inode_block(struct super_block *sb, unsigned long ino, struct ext4_iloc *iloc) { - unsigned long desc, group_desc; ext4_group_t block_group; unsigned long offset; ext4_fsblk_t block; - struct buffer_head *bh; - struct ext4_group_desc * gdp; + struct ext4_group_desc *gdp; if (!ext4_valid_inum(sb, ino)) { /* @@ -2518,22 +2528,10 @@ static ext4_fsblk_t ext4_get_inode_block(struct super_block *sb, } block_group = (ino - 1) / EXT4_INODES_PER_GROUP(sb); - if (block_group >= EXT4_SB(sb)->s_groups_count) { - ext4_error(sb,"ext4_get_inode_block","group >= groups count"); + gdp = ext4_get_group_desc(sb, block_group, NULL); + if (!gdp) return 0; - } - smp_rmb(); - group_desc = block_group >> EXT4_DESC_PER_BLOCK_BITS(sb); - desc = block_group & (EXT4_DESC_PER_BLOCK(sb) - 1); - bh = EXT4_SB(sb)->s_group_desc[group_desc]; - if (!bh) { - ext4_error (sb, "ext4_get_inode_block", - "Descriptor not loaded"); - return 0; - } - gdp = (struct ext4_group_desc *)((__u8 *)bh->b_data + - desc * EXT4_DESC_SIZE(sb)); /* * Figure out the offset within the block group inode table */ @@ -2976,7 +2974,8 @@ static int ext4_do_update_inode(handle_t *handle, if (ext4_inode_blocks_set(handle, raw_inode, ei)) goto out_brelse; raw_inode->i_dtime = cpu_to_le32(ei->i_dtime); - raw_inode->i_flags = cpu_to_le32(ei->i_flags); + /* clear the migrate flag in the raw_inode */ + raw_inode->i_flags = cpu_to_le32(ei->i_flags & ~EXT4_EXT_MIGRATE); if (EXT4_SB(inode->i_sb)->s_es->s_creator_os != cpu_to_le32(EXT4_OS_HURD)) raw_inode->i_file_acl_high = @@ -3374,7 +3373,7 @@ int ext4_mark_inode_dirty(handle_t *handle, struct inode *inode) EXT4_I(inode)->i_state |= EXT4_STATE_NO_EXPAND; if (mnt_count != le16_to_cpu(sbi->s_es->s_mnt_count)) { - ext4_warning(inode->i_sb, __FUNCTION__, + ext4_warning(inode->i_sb, __func__, "Unable to expand inode %lu. Delete" " some EAs or run e2fsck.", inode->i_ino); @@ -3415,7 +3414,7 @@ void ext4_dirty_inode(struct inode *inode) current_handle->h_transaction != handle->h_transaction) { /* This task has a transaction open against a different fs */ printk(KERN_EMERG "%s: transactions do not match!\n", - __FUNCTION__); + __func__); } else { jbd_debug(5, "marking dirty. outer handle=%p\n", current_handle); diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c index 25b13ede808..7a6c2f1faba 100644 --- a/fs/ext4/ioctl.c +++ b/fs/ext4/ioctl.c @@ -10,17 +10,17 @@ #include <linux/fs.h> #include <linux/jbd2.h> #include <linux/capability.h> -#include <linux/ext4_fs.h> -#include <linux/ext4_jbd2.h> #include <linux/time.h> #include <linux/compat.h> #include <linux/smp_lock.h> #include <linux/mount.h> #include <asm/uaccess.h> +#include "ext4_jbd2.h" +#include "ext4.h" -int ext4_ioctl (struct inode * inode, struct file * filp, unsigned int cmd, - unsigned long arg) +long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) { + struct inode *inode = filp->f_dentry->d_inode; struct ext4_inode_info *ei = EXT4_I(inode); unsigned int flags; unsigned short rsv_window_size; @@ -277,9 +277,6 @@ setversion_out: #ifdef CONFIG_COMPAT long ext4_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { - struct inode *inode = file->f_path.dentry->d_inode; - int ret; - /* These are just misnamed, they actually get/put from/to user an int */ switch (cmd) { case EXT4_IOC32_GETFLAGS: @@ -319,9 +316,6 @@ long ext4_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) default: return -ENOIOCTLCMD; } - lock_kernel(); - ret = ext4_ioctl(inode, file, cmd, (unsigned long) compat_ptr(arg)); - unlock_kernel(); - return ret; + return ext4_ioctl(file, cmd, (unsigned long) compat_ptr(arg)); } #endif diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index ef97f19c2f9..873ad9b3418 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -21,21 +21,7 @@ * mballoc.c contains the multiblocks allocation routines */ -#include <linux/time.h> -#include <linux/fs.h> -#include <linux/namei.h> -#include <linux/ext4_jbd2.h> -#include <linux/ext4_fs.h> -#include <linux/quotaops.h> -#include <linux/buffer_head.h> -#include <linux/module.h> -#include <linux/swap.h> -#include <linux/proc_fs.h> -#include <linux/pagemap.h> -#include <linux/seq_file.h> -#include <linux/version.h> -#include "group.h" - +#include "mballoc.h" /* * MUSTDO: * - test ext4_ext_search_left() and ext4_ext_search_right() @@ -345,288 +331,6 @@ * */ -/* - * with AGGRESSIVE_CHECK allocator runs consistency checks over - * structures. these checks slow things down a lot - */ -#define AGGRESSIVE_CHECK__ - -/* - * with DOUBLE_CHECK defined mballoc creates persistent in-core - * bitmaps, maintains and uses them to check for double allocations - */ -#define DOUBLE_CHECK__ - -/* - */ -#define MB_DEBUG__ -#ifdef MB_DEBUG -#define mb_debug(fmt, a...) printk(fmt, ##a) -#else -#define mb_debug(fmt, a...) -#endif - -/* - * with EXT4_MB_HISTORY mballoc stores last N allocations in memory - * and you can monitor it in /proc/fs/ext4/<dev>/mb_history - */ -#define EXT4_MB_HISTORY -#define EXT4_MB_HISTORY_ALLOC 1 /* allocation */ -#define EXT4_MB_HISTORY_PREALLOC 2 /* preallocated blocks used */ -#define EXT4_MB_HISTORY_DISCARD 4 /* preallocation discarded */ -#define EXT4_MB_HISTORY_FREE 8 /* free */ - -#define EXT4_MB_HISTORY_DEFAULT (EXT4_MB_HISTORY_ALLOC | \ - EXT4_MB_HISTORY_PREALLOC) - -/* - * How long mballoc can look for a best extent (in found extents) - */ -#define MB_DEFAULT_MAX_TO_SCAN 200 - -/* - * How long mballoc must look for a best extent - */ -#define MB_DEFAULT_MIN_TO_SCAN 10 - -/* - * How many groups mballoc will scan looking for the best chunk - */ -#define MB_DEFAULT_MAX_GROUPS_TO_SCAN 5 - -/* - * with 'ext4_mb_stats' allocator will collect stats that will be - * shown at umount. The collecting costs though! - */ -#define MB_DEFAULT_STATS 1 - -/* - * files smaller than MB_DEFAULT_STREAM_THRESHOLD are served - * by the stream allocator, which purpose is to pack requests - * as close each to other as possible to produce smooth I/O traffic - * We use locality group prealloc space for stream request. - * We can tune the same via /proc/fs/ext4/<parition>/stream_req - */ -#define MB_DEFAULT_STREAM_THRESHOLD 16 /* 64K */ - -/* - * for which requests use 2^N search using buddies - */ -#define MB_DEFAULT_ORDER2_REQS 2 - -/* - * default group prealloc size 512 blocks - */ -#define MB_DEFAULT_GROUP_PREALLOC 512 - -static struct kmem_cache *ext4_pspace_cachep; -static struct kmem_cache *ext4_ac_cachep; - -#ifdef EXT4_BB_MAX_BLOCKS -#undef EXT4_BB_MAX_BLOCKS -#endif -#define EXT4_BB_MAX_BLOCKS 30 - -struct ext4_free_metadata { - ext4_group_t group; - unsigned short num; - ext4_grpblk_t blocks[EXT4_BB_MAX_BLOCKS]; - struct list_head list; -}; - -struct ext4_group_info { - unsigned long bb_state; - unsigned long bb_tid; - struct ext4_free_metadata *bb_md_cur; - unsigned short bb_first_free; - unsigned short bb_free; - unsigned short bb_fragments; - struct list_head bb_prealloc_list; -#ifdef DOUBLE_CHECK - void *bb_bitmap; -#endif - unsigned short bb_counters[]; -}; - -#define EXT4_GROUP_INFO_NEED_INIT_BIT 0 -#define EXT4_GROUP_INFO_LOCKED_BIT 1 - -#define EXT4_MB_GRP_NEED_INIT(grp) \ - (test_bit(EXT4_GROUP_INFO_NEED_INIT_BIT, &((grp)->bb_state))) - - -struct ext4_prealloc_space { - struct list_head pa_inode_list; - struct list_head pa_group_list; - union { - struct list_head pa_tmp_list; - struct rcu_head pa_rcu; - } u; - spinlock_t pa_lock; - atomic_t pa_count; - unsigned pa_deleted; - ext4_fsblk_t pa_pstart; /* phys. block */ - ext4_lblk_t pa_lstart; /* log. block */ - unsigned short pa_len; /* len of preallocated chunk */ - unsigned short pa_free; /* how many blocks are free */ - unsigned short pa_linear; /* consumed in one direction - * strictly, for grp prealloc */ - spinlock_t *pa_obj_lock; - struct inode *pa_inode; /* hack, for history only */ -}; - - -struct ext4_free_extent { - ext4_lblk_t fe_logical; - ext4_grpblk_t fe_start; - ext4_group_t fe_group; - int fe_len; -}; - -/* - * Locality group: - * we try to group all related changes together - * so that writeback can flush/allocate them together as well - */ -struct ext4_locality_group { - /* for allocator */ - struct mutex lg_mutex; /* to serialize allocates */ - struct list_head lg_prealloc_list;/* list of preallocations */ - spinlock_t lg_prealloc_lock; -}; - -struct ext4_allocation_context { - struct inode *ac_inode; - struct super_block *ac_sb; - - /* original request */ - struct ext4_free_extent ac_o_ex; - - /* goal request (after normalization) */ - struct ext4_free_extent ac_g_ex; - - /* the best found extent */ - struct ext4_free_extent ac_b_ex; - - /* copy of the bext found extent taken before preallocation efforts */ - struct ext4_free_extent ac_f_ex; - - /* number of iterations done. we have to track to limit searching */ - unsigned long ac_ex_scanned; - __u16 ac_groups_scanned; - __u16 ac_found; - __u16 ac_tail; - __u16 ac_buddy; - __u16 ac_flags; /* allocation hints */ - __u8 ac_status; - __u8 ac_criteria; - __u8 ac_repeats; - __u8 ac_2order; /* if request is to allocate 2^N blocks and - * N > 0, the field stores N, otherwise 0 */ - __u8 ac_op; /* operation, for history only */ - struct page *ac_bitmap_page; - struct page *ac_buddy_page; - struct ext4_prealloc_space *ac_pa; - struct ext4_locality_group *ac_lg; -}; - -#define AC_STATUS_CONTINUE 1 -#define AC_STATUS_FOUND 2 -#define AC_STATUS_BREAK 3 - -struct ext4_mb_history { - struct ext4_free_extent orig; /* orig allocation */ - struct ext4_free_extent goal; /* goal allocation */ - struct ext4_free_extent result; /* result allocation */ - unsigned pid; - unsigned ino; - __u16 found; /* how many extents have been found */ - __u16 groups; /* how many groups have been scanned */ - __u16 tail; /* what tail broke some buddy */ - __u16 buddy; /* buddy the tail ^^^ broke */ - __u16 flags; - __u8 cr:3; /* which phase the result extent was found at */ - __u8 op:4; - __u8 merged:1; -}; - -struct ext4_buddy { - struct page *bd_buddy_page; - void *bd_buddy; - struct page *bd_bitmap_page; - void *bd_bitmap; - struct ext4_group_info *bd_info; - struct super_block *bd_sb; - __u16 bd_blkbits; - ext4_group_t bd_group; -}; -#define EXT4_MB_BITMAP(e4b) ((e4b)->bd_bitmap) -#define EXT4_MB_BUDDY(e4b) ((e4b)->bd_buddy) - -#ifndef EXT4_MB_HISTORY -static inline void ext4_mb_store_history(struct ext4_allocation_context *ac) -{ - return; -} -#else -static void ext4_mb_store_history(struct ext4_allocation_context *ac); -#endif - -#define in_range(b, first, len) ((b) >= (first) && (b) <= (first) + (len) - 1) - -static struct proc_dir_entry *proc_root_ext4; -struct buffer_head *read_block_bitmap(struct super_block *, ext4_group_t); -ext4_fsblk_t ext4_new_blocks_old(handle_t *handle, struct inode *inode, - ext4_fsblk_t goal, unsigned long *count, int *errp); - -static void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap, - ext4_group_t group); -static void ext4_mb_poll_new_transaction(struct super_block *, handle_t *); -static void ext4_mb_free_committed_blocks(struct super_block *); -static void ext4_mb_return_to_preallocation(struct inode *inode, - struct ext4_buddy *e4b, sector_t block, - int count); -static void ext4_mb_put_pa(struct ext4_allocation_context *, - struct super_block *, struct ext4_prealloc_space *pa); -static int ext4_mb_init_per_dev_proc(struct super_block *sb); -static int ext4_mb_destroy_per_dev_proc(struct super_block *sb); - - -static inline void ext4_lock_group(struct super_block *sb, ext4_group_t group) -{ - struct ext4_group_info *grinfo = ext4_get_group_info(sb, group); - - bit_spin_lock(EXT4_GROUP_INFO_LOCKED_BIT, &(grinfo->bb_state)); -} - -static inline void ext4_unlock_group(struct super_block *sb, - ext4_group_t group) -{ - struct ext4_group_info *grinfo = ext4_get_group_info(sb, group); - - bit_spin_unlock(EXT4_GROUP_INFO_LOCKED_BIT, &(grinfo->bb_state)); -} - -static inline int ext4_is_group_locked(struct super_block *sb, - ext4_group_t group) -{ - struct ext4_group_info *grinfo = ext4_get_group_info(sb, group); - - return bit_spin_is_locked(EXT4_GROUP_INFO_LOCKED_BIT, - &(grinfo->bb_state)); -} - -static ext4_fsblk_t ext4_grp_offs_to_block(struct super_block *sb, - struct ext4_free_extent *fex) -{ - ext4_fsblk_t block; - - block = (ext4_fsblk_t) fex->fe_group * EXT4_BLOCKS_PER_GROUP(sb) - + fex->fe_start - + le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block); - return block; -} - static inline void *mb_correct_addr_and_bit(int *bit, void *addr) { #if BITS_PER_LONG == 64 @@ -736,7 +440,7 @@ static void mb_free_blocks_double(struct inode *inode, struct ext4_buddy *e4b, blocknr += le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block); - ext4_error(sb, __FUNCTION__, "double-free of inode" + ext4_error(sb, __func__, "double-free of inode" " %lu's block %llu(bit %u in group %lu)\n", inode ? inode->i_ino : 0, blocknr, first + i, e4b->bd_group); @@ -898,17 +602,17 @@ static int __mb_check_buddy(struct ext4_buddy *e4b, char *file, list_for_each(cur, &grp->bb_prealloc_list) { ext4_group_t groupnr; struct ext4_prealloc_space *pa; - pa = list_entry(cur, struct ext4_prealloc_space, group_list); - ext4_get_group_no_and_offset(sb, pa->pstart, &groupnr, &k); + pa = list_entry(cur, struct ext4_prealloc_space, pa_group_list); + ext4_get_group_no_and_offset(sb, pa->pa_pstart, &groupnr, &k); MB_CHECK_ASSERT(groupnr == e4b->bd_group); - for (i = 0; i < pa->len; i++) + for (i = 0; i < pa->pa_len; i++) MB_CHECK_ASSERT(mb_test_bit(k + i, buddy)); } return 0; } #undef MB_CHECK_ASSERT #define mb_check_buddy(e4b) __mb_check_buddy(e4b, \ - __FILE__, __FUNCTION__, __LINE__) + __FILE__, __func__, __LINE__) #else #define mb_check_buddy(e4b) #endif @@ -982,7 +686,7 @@ static void ext4_mb_generate_buddy(struct super_block *sb, grp->bb_fragments = fragments; if (free != grp->bb_free) { - ext4_error(sb, __FUNCTION__, + ext4_error(sb, __func__, "EXT4-fs: group %lu: %u blocks in bitmap, %u in gd\n", group, free, grp->bb_free); /* @@ -1168,8 +872,9 @@ out: return err; } -static int ext4_mb_load_buddy(struct super_block *sb, ext4_group_t group, - struct ext4_buddy *e4b) +static noinline_for_stack int +ext4_mb_load_buddy(struct super_block *sb, ext4_group_t group, + struct ext4_buddy *e4b) { struct ext4_sb_info *sbi = EXT4_SB(sb); struct inode *inode = sbi->s_buddy_cache; @@ -1367,7 +1072,7 @@ static int mb_free_blocks(struct inode *inode, struct ext4_buddy *e4b, blocknr += le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block); - ext4_error(sb, __FUNCTION__, "double-free of inode" + ext4_error(sb, __func__, "double-free of inode" " %lu's block %llu(bit %u in group %lu)\n", inode ? inode->i_ino : 0, blocknr, block, e4b->bd_group); @@ -1848,7 +1553,7 @@ static void ext4_mb_complex_scan_group(struct ext4_allocation_context *ac, * free blocks even though group info says we * we have free blocks */ - ext4_error(sb, __FUNCTION__, "%d free blocks as per " + ext4_error(sb, __func__, "%d free blocks as per " "group info. But bitmap says 0\n", free); break; @@ -1857,7 +1562,7 @@ static void ext4_mb_complex_scan_group(struct ext4_allocation_context *ac, mb_find_extent(e4b, 0, i, ac->ac_g_ex.fe_len, &ex); BUG_ON(ex.fe_len <= 0); if (free < ex.fe_len) { - ext4_error(sb, __FUNCTION__, "%d free blocks as per " + ext4_error(sb, __func__, "%d free blocks as per " "group info. But got %d blocks\n", free, ex.fe_len); /* @@ -1965,7 +1670,8 @@ static int ext4_mb_good_group(struct ext4_allocation_context *ac, return 0; } -static int ext4_mb_regular_allocator(struct ext4_allocation_context *ac) +static noinline_for_stack int +ext4_mb_regular_allocator(struct ext4_allocation_context *ac) { ext4_group_t group; ext4_group_t i; @@ -2449,17 +2155,10 @@ static void ext4_mb_history_init(struct super_block *sb) int i; if (sbi->s_mb_proc != NULL) { - struct proc_dir_entry *p; - p = create_proc_entry("mb_history", S_IRUGO, sbi->s_mb_proc); - if (p) { - p->proc_fops = &ext4_mb_seq_history_fops; - p->data = sb; - } - p = create_proc_entry("mb_groups", S_IRUGO, sbi->s_mb_proc); - if (p) { - p->proc_fops = &ext4_mb_seq_groups_fops; - p->data = sb; - } + proc_create_data("mb_history", S_IRUGO, sbi->s_mb_proc, + &ext4_mb_seq_history_fops, sb); + proc_create_data("mb_groups", S_IRUGO, sbi->s_mb_proc, + &ext4_mb_seq_groups_fops, sb); } sbi->s_mb_history_max = 1000; @@ -2472,7 +2171,8 @@ static void ext4_mb_history_init(struct super_block *sb) /* if we can't allocate history, then we simple won't use it */ } -static void ext4_mb_store_history(struct ext4_allocation_context *ac) +static noinline_for_stack void +ext4_mb_store_history(struct ext4_allocation_context *ac) { struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb); struct ext4_mb_history h; @@ -2572,13 +2272,13 @@ static int ext4_mb_init_backend(struct super_block *sb) meta_group_info[j] = kzalloc(len, GFP_KERNEL); if (meta_group_info[j] == NULL) { printk(KERN_ERR "EXT4-fs: can't allocate buddy mem\n"); - i--; goto err_freebuddy; } desc = ext4_get_group_desc(sb, i, NULL); if (desc == NULL) { printk(KERN_ERR "EXT4-fs: can't read descriptor %lu\n", i); + i++; goto err_freebuddy; } memset(meta_group_info[j], 0, len); @@ -2618,13 +2318,11 @@ static int ext4_mb_init_backend(struct super_block *sb) return 0; err_freebuddy: - while (i >= 0) { + while (i-- > 0) kfree(ext4_get_group_info(sb, i)); - i--; - } i = num_meta_group_infos; err_freemeta: - while (--i >= 0) + while (i-- > 0) kfree(sbi->s_group_info[i]); iput(sbi->s_buddy_cache); err_freesgi: @@ -2808,7 +2506,8 @@ int ext4_mb_release(struct super_block *sb) return 0; } -static void ext4_mb_free_committed_blocks(struct super_block *sb) +static noinline_for_stack void +ext4_mb_free_committed_blocks(struct super_block *sb) { struct ext4_sb_info *sbi = EXT4_SB(sb); int err; @@ -2867,7 +2566,6 @@ static void ext4_mb_free_committed_blocks(struct super_block *sb) mb_debug("freed %u blocks in %u structures\n", count, count2); } -#define EXT4_ROOT "ext4" #define EXT4_MB_STATS_NAME "stats" #define EXT4_MB_MAX_TO_SCAN_NAME "max_to_scan" #define EXT4_MB_MIN_TO_SCAN_NAME "min_to_scan" @@ -2941,8 +2639,7 @@ static int ext4_mb_init_per_dev_proc(struct super_block *sb) struct proc_dir_entry *proc; char devname[64]; - snprintf(devname, sizeof(devname) - 1, "%s", - bdevname(sb->s_bdev, devname)); + bdevname(sb->s_bdev, devname); sbi->s_mb_proc = proc_mkdir(devname, proc_root_ext4); MB_PROC_HANDLER(EXT4_MB_STATS_NAME, stats); @@ -2976,8 +2673,7 @@ static int ext4_mb_destroy_per_dev_proc(struct super_block *sb) if (sbi->s_mb_proc == NULL) return -EINVAL; - snprintf(devname, sizeof(devname) - 1, "%s", - bdevname(sb->s_bdev, devname)); + bdevname(sb->s_bdev, devname); remove_proc_entry(EXT4_MB_GROUP_PREALLOC, sbi->s_mb_proc); remove_proc_entry(EXT4_MB_STREAM_REQ, sbi->s_mb_proc); remove_proc_entry(EXT4_MB_ORDER2_REQ, sbi->s_mb_proc); @@ -3007,9 +2703,9 @@ int __init init_ext4_mballoc(void) return -ENOMEM; } #ifdef CONFIG_PROC_FS - proc_root_ext4 = proc_mkdir(EXT4_ROOT, proc_root_fs); + proc_root_ext4 = proc_mkdir("fs/ext4", NULL); if (proc_root_ext4 == NULL) - printk(KERN_ERR "EXT4-fs: Unable to create %s\n", EXT4_ROOT); + printk(KERN_ERR "EXT4-fs: Unable to create fs/ext4\n"); #endif return 0; } @@ -3020,7 +2716,7 @@ void exit_ext4_mballoc(void) kmem_cache_destroy(ext4_pspace_cachep); kmem_cache_destroy(ext4_ac_cachep); #ifdef CONFIG_PROC_FS - remove_proc_entry(EXT4_ROOT, proc_root_fs); + remove_proc_entry("fs/ext4", NULL); #endif } @@ -3029,7 +2725,8 @@ void exit_ext4_mballoc(void) * Check quota and mark choosed space (ac->ac_b_ex) non-free in bitmaps * Returns 0 if success or error code */ -static int ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac, +static noinline_for_stack int +ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac, handle_t *handle) { struct buffer_head *bitmap_bh = NULL; @@ -3039,7 +2736,7 @@ static int ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac, struct ext4_sb_info *sbi; struct super_block *sb; ext4_fsblk_t block; - int err; + int err, len; BUG_ON(ac->ac_status != AC_STATUS_FOUND); BUG_ON(ac->ac_b_ex.fe_len <= 0); @@ -3073,14 +2770,27 @@ static int ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac, + ac->ac_b_ex.fe_start + le32_to_cpu(es->s_first_data_block); - if (block == ext4_block_bitmap(sb, gdp) || - block == ext4_inode_bitmap(sb, gdp) || - in_range(block, ext4_inode_table(sb, gdp), - EXT4_SB(sb)->s_itb_per_group)) { - - ext4_error(sb, __FUNCTION__, + len = ac->ac_b_ex.fe_len; + if (in_range(ext4_block_bitmap(sb, gdp), block, len) || + in_range(ext4_inode_bitmap(sb, gdp), block, len) || + in_range(block, ext4_inode_table(sb, gdp), + EXT4_SB(sb)->s_itb_per_group) || + in_range(block + len - 1, ext4_inode_table(sb, gdp), + EXT4_SB(sb)->s_itb_per_group)) { + ext4_error(sb, __func__, "Allocating block in system zone - block = %llu", block); + /* File system mounted not to panic on error + * Fix the bitmap and repeat the block allocation + * We leak some of the blocks here. + */ + mb_set_bits(sb_bgl_lock(sbi, ac->ac_b_ex.fe_group), + bitmap_bh->b_data, ac->ac_b_ex.fe_start, + ac->ac_b_ex.fe_len); + err = ext4_journal_dirty_metadata(handle, bitmap_bh); + if (!err) + err = -EAGAIN; + goto out_err; } #ifdef AGGRESSIVE_CHECK { @@ -3102,9 +2812,7 @@ static int ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac, ac->ac_b_ex.fe_group, gdp)); } - gdp->bg_free_blocks_count = - cpu_to_le16(le16_to_cpu(gdp->bg_free_blocks_count) - - ac->ac_b_ex.fe_len); + le16_add_cpu(&gdp->bg_free_blocks_count, -ac->ac_b_ex.fe_len); gdp->bg_checksum = ext4_group_desc_csum(sbi, ac->ac_b_ex.fe_group, gdp); spin_unlock(sb_bgl_lock(sbi, ac->ac_b_ex.fe_group)); percpu_counter_sub(&sbi->s_freeblocks_counter, ac->ac_b_ex.fe_len); @@ -3138,7 +2846,7 @@ static void ext4_mb_normalize_group_request(struct ext4_allocation_context *ac) ac->ac_g_ex.fe_len = EXT4_SB(sb)->s_stripe; else ac->ac_g_ex.fe_len = EXT4_SB(sb)->s_mb_group_prealloc; - mb_debug("#%u: goal %lu blocks for locality group\n", + mb_debug("#%u: goal %u blocks for locality group\n", current->pid, ac->ac_g_ex.fe_len); } @@ -3146,15 +2854,16 @@ static void ext4_mb_normalize_group_request(struct ext4_allocation_context *ac) * Normalization means making request better in terms of * size and alignment */ -static void ext4_mb_normalize_request(struct ext4_allocation_context *ac, +static noinline_for_stack void +ext4_mb_normalize_request(struct ext4_allocation_context *ac, struct ext4_allocation_request *ar) { int bsbits, max; ext4_lblk_t end; - struct list_head *cur; loff_t size, orig_size, start_off; ext4_lblk_t start, orig_start; struct ext4_inode_info *ei = EXT4_I(ac->ac_inode); + struct ext4_prealloc_space *pa; /* do normalize only data requests, metadata requests do not need preallocation */ @@ -3184,12 +2893,11 @@ static void ext4_mb_normalize_request(struct ext4_allocation_context *ac, if (size < i_size_read(ac->ac_inode)) size = i_size_read(ac->ac_inode); - /* max available blocks in a free group */ - max = EXT4_BLOCKS_PER_GROUP(ac->ac_sb) - 1 - 1 - - EXT4_SB(ac->ac_sb)->s_itb_per_group; + /* max size of free chunks */ + max = 2 << bsbits; -#define NRL_CHECK_SIZE(req, size, max,bits) \ - (req <= (size) || max <= ((size) >> bits)) +#define NRL_CHECK_SIZE(req, size, max, chunk_size) \ + (req <= (size) || max <= (chunk_size)) /* first, try to predict filesize */ /* XXX: should this table be tunable? */ @@ -3208,16 +2916,16 @@ static void ext4_mb_normalize_request(struct ext4_allocation_context *ac, size = 512 * 1024; } else if (size <= 1024 * 1024) { size = 1024 * 1024; - } else if (NRL_CHECK_SIZE(size, 4 * 1024 * 1024, max, bsbits)) { + } else if (NRL_CHECK_SIZE(size, 4 * 1024 * 1024, max, 2 * 1024)) { start_off = ((loff_t)ac->ac_o_ex.fe_logical >> - (20 - bsbits)) << 20; - size = 1024 * 1024; - } else if (NRL_CHECK_SIZE(size, 8 * 1024 * 1024, max, bsbits)) { + (21 - bsbits)) << 21; + size = 2 * 1024 * 1024; + } else if (NRL_CHECK_SIZE(size, 8 * 1024 * 1024, max, 4 * 1024)) { start_off = ((loff_t)ac->ac_o_ex.fe_logical >> (22 - bsbits)) << 22; size = 4 * 1024 * 1024; } else if (NRL_CHECK_SIZE(ac->ac_o_ex.fe_len, - (8<<20)>>bsbits, max, bsbits)) { + (8<<20)>>bsbits, max, 8 * 1024)) { start_off = ((loff_t)ac->ac_o_ex.fe_logical >> (23 - bsbits)) << 23; size = 8 * 1024 * 1024; @@ -3240,12 +2948,9 @@ static void ext4_mb_normalize_request(struct ext4_allocation_context *ac, /* check we don't cross already preallocated blocks */ rcu_read_lock(); - list_for_each_rcu(cur, &ei->i_prealloc_list) { - struct ext4_prealloc_space *pa; + list_for_each_entry_rcu(pa, &ei->i_prealloc_list, pa_inode_list) { unsigned long pa_end; - pa = list_entry(cur, struct ext4_prealloc_space, pa_inode_list); - if (pa->pa_deleted) continue; spin_lock(&pa->pa_lock); @@ -3287,10 +2992,8 @@ static void ext4_mb_normalize_request(struct ext4_allocation_context *ac, /* XXX: extra loop to check we really don't overlap preallocations */ rcu_read_lock(); - list_for_each_rcu(cur, &ei->i_prealloc_list) { - struct ext4_prealloc_space *pa; + list_for_each_entry_rcu(pa, &ei->i_prealloc_list, pa_inode_list) { unsigned long pa_end; - pa = list_entry(cur, struct ext4_prealloc_space, pa_inode_list); spin_lock(&pa->pa_lock); if (pa->pa_deleted == 0) { pa_end = pa->pa_lstart + pa->pa_len; @@ -3382,7 +3085,7 @@ static void ext4_mb_use_inode_pa(struct ext4_allocation_context *ac, BUG_ON(pa->pa_free < len); pa->pa_free -= len; - mb_debug("use %llu/%lu from inode pa %p\n", start, len, pa); + mb_debug("use %llu/%u from inode pa %p\n", start, len, pa); } /* @@ -3412,12 +3115,12 @@ static void ext4_mb_use_group_pa(struct ext4_allocation_context *ac, /* * search goal blocks in preallocated space */ -static int ext4_mb_use_preallocated(struct ext4_allocation_context *ac) +static noinline_for_stack int +ext4_mb_use_preallocated(struct ext4_allocation_context *ac) { struct ext4_inode_info *ei = EXT4_I(ac->ac_inode); struct ext4_locality_group *lg; struct ext4_prealloc_space *pa; - struct list_head *cur; /* only data can be preallocated */ if (!(ac->ac_flags & EXT4_MB_HINT_DATA)) @@ -3425,8 +3128,7 @@ static int ext4_mb_use_preallocated(struct ext4_allocation_context *ac) /* first, try per-file preallocation */ rcu_read_lock(); - list_for_each_rcu(cur, &ei->i_prealloc_list) { - pa = list_entry(cur, struct ext4_prealloc_space, pa_inode_list); + list_for_each_entry_rcu(pa, &ei->i_prealloc_list, pa_inode_list) { /* all fields in this condition don't change, * so we can skip locking for them */ @@ -3458,8 +3160,7 @@ static int ext4_mb_use_preallocated(struct ext4_allocation_context *ac) return 0; rcu_read_lock(); - list_for_each_rcu(cur, &lg->lg_prealloc_list) { - pa = list_entry(cur, struct ext4_prealloc_space, pa_inode_list); + list_for_each_entry_rcu(pa, &lg->lg_prealloc_list, pa_inode_list) { spin_lock(&pa->pa_lock); if (pa->pa_deleted == 0 && pa->pa_free >= ac->ac_o_ex.fe_len) { atomic_inc(&pa->pa_count); @@ -3579,7 +3280,8 @@ static void ext4_mb_put_pa(struct ext4_allocation_context *ac, /* * creates new preallocated space for given inode */ -static int ext4_mb_new_inode_pa(struct ext4_allocation_context *ac) +static noinline_for_stack int +ext4_mb_new_inode_pa(struct ext4_allocation_context *ac) { struct super_block *sb = ac->ac_sb; struct ext4_prealloc_space *pa; @@ -3666,7 +3368,8 @@ static int ext4_mb_new_inode_pa(struct ext4_allocation_context *ac) /* * creates new preallocated space for locality group inodes belongs to */ -static int ext4_mb_new_group_pa(struct ext4_allocation_context *ac) +static noinline_for_stack int +ext4_mb_new_group_pa(struct ext4_allocation_context *ac) { struct super_block *sb = ac->ac_sb; struct ext4_locality_group *lg; @@ -3739,11 +3442,11 @@ static int ext4_mb_new_preallocation(struct ext4_allocation_context *ac) * the caller MUST hold group/inode locks. * TODO: optimize the case when there are no in-core structures yet */ -static int ext4_mb_release_inode_pa(struct ext4_buddy *e4b, - struct buffer_head *bitmap_bh, - struct ext4_prealloc_space *pa) +static noinline_for_stack int +ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh, + struct ext4_prealloc_space *pa, + struct ext4_allocation_context *ac) { - struct ext4_allocation_context *ac; struct super_block *sb = e4b->bd_sb; struct ext4_sb_info *sbi = EXT4_SB(sb); unsigned long end; @@ -3759,8 +3462,6 @@ static int ext4_mb_release_inode_pa(struct ext4_buddy *e4b, BUG_ON(group != e4b->bd_group && pa->pa_len != 0); end = bit + pa->pa_len; - ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS); - if (ac) { ac->ac_sb = sb; ac->ac_inode = pa->pa_inode; @@ -3797,7 +3498,7 @@ static int ext4_mb_release_inode_pa(struct ext4_buddy *e4b, pa, (unsigned long) pa->pa_lstart, (unsigned long) pa->pa_pstart, (unsigned long) pa->pa_len); - ext4_error(sb, __FUNCTION__, "free %u, pa_free %u\n", + ext4_error(sb, __func__, "free %u, pa_free %u\n", free, pa->pa_free); /* * pa is already deleted so we use the value obtained @@ -3805,22 +3506,19 @@ static int ext4_mb_release_inode_pa(struct ext4_buddy *e4b, */ } atomic_add(free, &sbi->s_mb_discarded); - if (ac) - kmem_cache_free(ext4_ac_cachep, ac); return err; } -static int ext4_mb_release_group_pa(struct ext4_buddy *e4b, - struct ext4_prealloc_space *pa) +static noinline_for_stack int +ext4_mb_release_group_pa(struct ext4_buddy *e4b, + struct ext4_prealloc_space *pa, + struct ext4_allocation_context *ac) { - struct ext4_allocation_context *ac; struct super_block *sb = e4b->bd_sb; ext4_group_t group; ext4_grpblk_t bit; - ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS); - if (ac) ac->ac_op = EXT4_MB_HISTORY_DISCARD; @@ -3838,7 +3536,6 @@ static int ext4_mb_release_group_pa(struct ext4_buddy *e4b, ac->ac_b_ex.fe_len = pa->pa_len; ac->ac_b_ex.fe_logical = 0; ext4_mb_store_history(ac); - kmem_cache_free(ext4_ac_cachep, ac); } return 0; @@ -3853,12 +3550,14 @@ static int ext4_mb_release_group_pa(struct ext4_buddy *e4b, * - how many do we discard * 1) how many requested */ -static int ext4_mb_discard_group_preallocations(struct super_block *sb, +static noinline_for_stack int +ext4_mb_discard_group_preallocations(struct super_block *sb, ext4_group_t group, int needed) { struct ext4_group_info *grp = ext4_get_group_info(sb, group); struct buffer_head *bitmap_bh = NULL; struct ext4_prealloc_space *pa, *tmp; + struct ext4_allocation_context *ac; struct list_head list; struct ext4_buddy e4b; int err; @@ -3886,6 +3585,7 @@ static int ext4_mb_discard_group_preallocations(struct super_block *sb, grp = ext4_get_group_info(sb, group); INIT_LIST_HEAD(&list); + ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS); repeat: ext4_lock_group(sb, group); list_for_each_entry_safe(pa, tmp, @@ -3940,9 +3640,9 @@ repeat: spin_unlock(pa->pa_obj_lock); if (pa->pa_linear) - ext4_mb_release_group_pa(&e4b, pa); + ext4_mb_release_group_pa(&e4b, pa, ac); else - ext4_mb_release_inode_pa(&e4b, bitmap_bh, pa); + ext4_mb_release_inode_pa(&e4b, bitmap_bh, pa, ac); list_del(&pa->u.pa_tmp_list); call_rcu(&(pa)->u.pa_rcu, ext4_mb_pa_callback); @@ -3950,6 +3650,8 @@ repeat: out: ext4_unlock_group(sb, group); + if (ac) + kmem_cache_free(ext4_ac_cachep, ac); ext4_mb_release_desc(&e4b); put_bh(bitmap_bh); return free; @@ -3970,6 +3672,7 @@ void ext4_mb_discard_inode_preallocations(struct inode *inode) struct super_block *sb = inode->i_sb; struct buffer_head *bitmap_bh = NULL; struct ext4_prealloc_space *pa, *tmp; + struct ext4_allocation_context *ac; ext4_group_t group = 0; struct list_head list; struct ext4_buddy e4b; @@ -3984,6 +3687,7 @@ void ext4_mb_discard_inode_preallocations(struct inode *inode) INIT_LIST_HEAD(&list); + ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS); repeat: /* first, collect all pa's in the inode */ spin_lock(&ei->i_prealloc_lock); @@ -4048,7 +3752,7 @@ repeat: ext4_lock_group(sb, group); list_del(&pa->pa_group_list); - ext4_mb_release_inode_pa(&e4b, bitmap_bh, pa); + ext4_mb_release_inode_pa(&e4b, bitmap_bh, pa, ac); ext4_unlock_group(sb, group); ext4_mb_release_desc(&e4b); @@ -4057,6 +3761,8 @@ repeat: list_del(&pa->u.pa_tmp_list); call_rcu(&(pa)->u.pa_rcu, ext4_mb_pa_callback); } + if (ac) + kmem_cache_free(ext4_ac_cachep, ac); } /* @@ -4116,7 +3822,7 @@ static void ext4_mb_show_ac(struct ext4_allocation_context *ac) printk(KERN_ERR "PA:%lu:%d:%u \n", i, start, pa->pa_len); } - ext4_lock_group(sb, i); + ext4_unlock_group(sb, i); if (grp->bb_free == 0) continue; @@ -4175,7 +3881,8 @@ static void ext4_mb_group_or_file(struct ext4_allocation_context *ac) mutex_lock(&ac->ac_lg->lg_mutex); } -static int ext4_mb_initialize_context(struct ext4_allocation_context *ac, +static noinline_for_stack int +ext4_mb_initialize_context(struct ext4_allocation_context *ac, struct ext4_allocation_request *ar) { struct super_block *sb = ar->inode->i_sb; @@ -4338,7 +4045,6 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle, ac->ac_op = EXT4_MB_HISTORY_ALLOC; ext4_mb_normalize_request(ac, ar); - repeat: /* allocate space in core */ ext4_mb_regular_allocator(ac); @@ -4352,10 +4058,21 @@ repeat: } if (likely(ac->ac_status == AC_STATUS_FOUND)) { - ext4_mb_mark_diskspace_used(ac, handle); - *errp = 0; - block = ext4_grp_offs_to_block(sb, &ac->ac_b_ex); - ar->len = ac->ac_b_ex.fe_len; + *errp = ext4_mb_mark_diskspace_used(ac, handle); + if (*errp == -EAGAIN) { + ac->ac_b_ex.fe_group = 0; + ac->ac_b_ex.fe_start = 0; + ac->ac_b_ex.fe_len = 0; + ac->ac_status = AC_STATUS_CONTINUE; + goto repeat; + } else if (*errp) { + ac->ac_b_ex.fe_len = 0; + ar->len = 0; + ext4_mb_show_ac(ac); + } else { + block = ext4_grp_offs_to_block(sb, &ac->ac_b_ex); + ar->len = ac->ac_b_ex.fe_len; + } } else { freed = ext4_mb_discard_preallocations(sb, ac->ac_o_ex.fe_len); if (freed) @@ -4406,7 +4123,8 @@ static void ext4_mb_poll_new_transaction(struct super_block *sb, ext4_mb_free_committed_blocks(sb); } -static int ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b, +static noinline_for_stack int +ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b, ext4_group_t group, ext4_grpblk_t block, int count) { struct ext4_group_info *db = e4b->bd_info; @@ -4497,7 +4215,7 @@ void ext4_mb_free_blocks(handle_t *handle, struct inode *inode, if (block < le32_to_cpu(es->s_first_data_block) || block + count < block || block + count > ext4_blocks_count(es)) { - ext4_error(sb, __FUNCTION__, + ext4_error(sb, __func__, "Freeing blocks not in datazone - " "block = %lu, count = %lu", block, count); goto error_return; @@ -4538,9 +4256,11 @@ do_more: in_range(block + count - 1, ext4_inode_table(sb, gdp), EXT4_SB(sb)->s_itb_per_group)) { - ext4_error(sb, __FUNCTION__, + ext4_error(sb, __func__, "Freeing blocks in system zone - " "Block = %lu, count = %lu", block, count); + /* err = 0. ext4_std_error should be a no op */ + goto error_return; } BUFFER_TRACE(bitmap_bh, "getting write access"); @@ -4596,8 +4316,7 @@ do_more: } spin_lock(sb_bgl_lock(sbi, block_group)); - gdp->bg_free_blocks_count = - cpu_to_le16(le16_to_cpu(gdp->bg_free_blocks_count) + count); + le16_add_cpu(&gdp->bg_free_blocks_count, count); gdp->bg_checksum = ext4_group_desc_csum(sbi, block_group, gdp); spin_unlock(sb_bgl_lock(sbi, block_group)); percpu_counter_add(&sbi->s_freeblocks_counter, count); diff --git a/fs/ext4/mballoc.h b/fs/ext4/mballoc.h new file mode 100644 index 00000000000..bfe6add46bc --- /dev/null +++ b/fs/ext4/mballoc.h @@ -0,0 +1,304 @@ +/* + * fs/ext4/mballoc.h + * + * Written by: Alex Tomas <alex@clusterfs.com> + * + */ +#ifndef _EXT4_MBALLOC_H +#define _EXT4_MBALLOC_H + +#include <linux/time.h> +#include <linux/fs.h> +#include <linux/namei.h> +#include <linux/quotaops.h> +#include <linux/buffer_head.h> +#include <linux/module.h> +#include <linux/swap.h> +#include <linux/proc_fs.h> +#include <linux/pagemap.h> +#include <linux/seq_file.h> +#include <linux/version.h> +#include "ext4_jbd2.h" +#include "ext4.h" +#include "group.h" + +/* + * with AGGRESSIVE_CHECK allocator runs consistency checks over + * structures. these checks slow things down a lot + */ +#define AGGRESSIVE_CHECK__ + +/* + * with DOUBLE_CHECK defined mballoc creates persistent in-core + * bitmaps, maintains and uses them to check for double allocations + */ +#define DOUBLE_CHECK__ + +/* + */ +#define MB_DEBUG__ +#ifdef MB_DEBUG +#define mb_debug(fmt, a...) printk(fmt, ##a) +#else +#define mb_debug(fmt, a...) +#endif + +/* + * with EXT4_MB_HISTORY mballoc stores last N allocations in memory + * and you can monitor it in /proc/fs/ext4/<dev>/mb_history + */ +#define EXT4_MB_HISTORY +#define EXT4_MB_HISTORY_ALLOC 1 /* allocation */ +#define EXT4_MB_HISTORY_PREALLOC 2 /* preallocated blocks used */ +#define EXT4_MB_HISTORY_DISCARD 4 /* preallocation discarded */ +#define EXT4_MB_HISTORY_FREE 8 /* free */ + +#define EXT4_MB_HISTORY_DEFAULT (EXT4_MB_HISTORY_ALLOC | \ + EXT4_MB_HISTORY_PREALLOC) + +/* + * How long mballoc can look for a best extent (in found extents) + */ +#define MB_DEFAULT_MAX_TO_SCAN 200 + +/* + * How long mballoc must look for a best extent + */ +#define MB_DEFAULT_MIN_TO_SCAN 10 + +/* + * How many groups mballoc will scan looking for the best chunk + */ +#define MB_DEFAULT_MAX_GROUPS_TO_SCAN 5 + +/* + * with 'ext4_mb_stats' allocator will collect stats that will be + * shown at umount. The collecting costs though! + */ +#define MB_DEFAULT_STATS 1 + +/* + * files smaller than MB_DEFAULT_STREAM_THRESHOLD are served + * by the stream allocator, which purpose is to pack requests + * as close each to other as possible to produce smooth I/O traffic + * We use locality group prealloc space for stream request. + * We can tune the same via /proc/fs/ext4/<parition>/stream_req + */ +#define MB_DEFAULT_STREAM_THRESHOLD 16 /* 64K */ + +/* + * for which requests use 2^N search using buddies + */ +#define MB_DEFAULT_ORDER2_REQS 2 + +/* + * default group prealloc size 512 blocks + */ +#define MB_DEFAULT_GROUP_PREALLOC 512 + +static struct kmem_cache *ext4_pspace_cachep; +static struct kmem_cache *ext4_ac_cachep; + +#ifdef EXT4_BB_MAX_BLOCKS +#undef EXT4_BB_MAX_BLOCKS +#endif +#define EXT4_BB_MAX_BLOCKS 30 + +struct ext4_free_metadata { + ext4_group_t group; + unsigned short num; + ext4_grpblk_t blocks[EXT4_BB_MAX_BLOCKS]; + struct list_head list; +}; + +struct ext4_group_info { + unsigned long bb_state; + unsigned long bb_tid; + struct ext4_free_metadata *bb_md_cur; + unsigned short bb_first_free; + unsigned short bb_free; + unsigned short bb_fragments; + struct list_head bb_prealloc_list; +#ifdef DOUBLE_CHECK + void *bb_bitmap; +#endif + unsigned short bb_counters[]; +}; + +#define EXT4_GROUP_INFO_NEED_INIT_BIT 0 +#define EXT4_GROUP_INFO_LOCKED_BIT 1 + +#define EXT4_MB_GRP_NEED_INIT(grp) \ + (test_bit(EXT4_GROUP_INFO_NEED_INIT_BIT, &((grp)->bb_state))) + + +struct ext4_prealloc_space { + struct list_head pa_inode_list; + struct list_head pa_group_list; + union { + struct list_head pa_tmp_list; + struct rcu_head pa_rcu; + } u; + spinlock_t pa_lock; + atomic_t pa_count; + unsigned pa_deleted; + ext4_fsblk_t pa_pstart; /* phys. block */ + ext4_lblk_t pa_lstart; /* log. block */ + unsigned short pa_len; /* len of preallocated chunk */ + unsigned short pa_free; /* how many blocks are free */ + unsigned short pa_linear; /* consumed in one direction + * strictly, for grp prealloc */ + spinlock_t *pa_obj_lock; + struct inode *pa_inode; /* hack, for history only */ +}; + + +struct ext4_free_extent { + ext4_lblk_t fe_logical; + ext4_grpblk_t fe_start; + ext4_group_t fe_group; + int fe_len; +}; + +/* + * Locality group: + * we try to group all related changes together + * so that writeback can flush/allocate them together as well + */ +struct ext4_locality_group { + /* for allocator */ + struct mutex lg_mutex; /* to serialize allocates */ + struct list_head lg_prealloc_list;/* list of preallocations */ + spinlock_t lg_prealloc_lock; +}; + +struct ext4_allocation_context { + struct inode *ac_inode; + struct super_block *ac_sb; + + /* original request */ + struct ext4_free_extent ac_o_ex; + + /* goal request (after normalization) */ + struct ext4_free_extent ac_g_ex; + + /* the best found extent */ + struct ext4_free_extent ac_b_ex; + + /* copy of the bext found extent taken before preallocation efforts */ + struct ext4_free_extent ac_f_ex; + + /* number of iterations done. we have to track to limit searching */ + unsigned long ac_ex_scanned; + __u16 ac_groups_scanned; + __u16 ac_found; + __u16 ac_tail; + __u16 ac_buddy; + __u16 ac_flags; /* allocation hints */ + __u8 ac_status; + __u8 ac_criteria; + __u8 ac_repeats; + __u8 ac_2order; /* if request is to allocate 2^N blocks and + * N > 0, the field stores N, otherwise 0 */ + __u8 ac_op; /* operation, for history only */ + struct page *ac_bitmap_page; + struct page *ac_buddy_page; + struct ext4_prealloc_space *ac_pa; + struct ext4_locality_group *ac_lg; +}; + +#define AC_STATUS_CONTINUE 1 +#define AC_STATUS_FOUND 2 +#define AC_STATUS_BREAK 3 + +struct ext4_mb_history { + struct ext4_free_extent orig; /* orig allocation */ + struct ext4_free_extent goal; /* goal allocation */ + struct ext4_free_extent result; /* result allocation */ + unsigned pid; + unsigned ino; + __u16 found; /* how many extents have been found */ + __u16 groups; /* how many groups have been scanned */ + __u16 tail; /* what tail broke some buddy */ + __u16 buddy; /* buddy the tail ^^^ broke */ + __u16 flags; + __u8 cr:3; /* which phase the result extent was found at */ + __u8 op:4; + __u8 merged:1; +}; + +struct ext4_buddy { + struct page *bd_buddy_page; + void *bd_buddy; + struct page *bd_bitmap_page; + void *bd_bitmap; + struct ext4_group_info *bd_info; + struct super_block *bd_sb; + __u16 bd_blkbits; + ext4_group_t bd_group; +}; +#define EXT4_MB_BITMAP(e4b) ((e4b)->bd_bitmap) +#define EXT4_MB_BUDDY(e4b) ((e4b)->bd_buddy) + +#ifndef EXT4_MB_HISTORY +static inline void ext4_mb_store_history(struct ext4_allocation_context *ac) +{ + return; +} +#else +static void ext4_mb_store_history(struct ext4_allocation_context *ac); +#endif + +#define in_range(b, first, len) ((b) >= (first) && (b) <= (first) + (len) - 1) + +static struct proc_dir_entry *proc_root_ext4; +struct buffer_head *read_block_bitmap(struct super_block *, ext4_group_t); + +static void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap, + ext4_group_t group); +static void ext4_mb_poll_new_transaction(struct super_block *, handle_t *); +static void ext4_mb_free_committed_blocks(struct super_block *); +static void ext4_mb_return_to_preallocation(struct inode *inode, + struct ext4_buddy *e4b, sector_t block, + int count); +static void ext4_mb_put_pa(struct ext4_allocation_context *, + struct super_block *, struct ext4_prealloc_space *pa); +static int ext4_mb_init_per_dev_proc(struct super_block *sb); +static int ext4_mb_destroy_per_dev_proc(struct super_block *sb); + + +static inline void ext4_lock_group(struct super_block *sb, ext4_group_t group) +{ + struct ext4_group_info *grinfo = ext4_get_group_info(sb, group); + + bit_spin_lock(EXT4_GROUP_INFO_LOCKED_BIT, &(grinfo->bb_state)); +} + +static inline void ext4_unlock_group(struct super_block *sb, + ext4_group_t group) +{ + struct ext4_group_info *grinfo = ext4_get_group_info(sb, group); + + bit_spin_unlock(EXT4_GROUP_INFO_LOCKED_BIT, &(grinfo->bb_state)); +} + +static inline int ext4_is_group_locked(struct super_block *sb, + ext4_group_t group) +{ + struct ext4_group_info *grinfo = ext4_get_group_info(sb, group); + + return bit_spin_is_locked(EXT4_GROUP_INFO_LOCKED_BIT, + &(grinfo->bb_state)); +} + +static ext4_fsblk_t ext4_grp_offs_to_block(struct super_block *sb, + struct ext4_free_extent *fex) +{ + ext4_fsblk_t block; + + block = (ext4_fsblk_t) fex->fe_group * EXT4_BLOCKS_PER_GROUP(sb) + + fex->fe_start + + le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block); + return block; +} +#endif diff --git a/fs/ext4/migrate.c b/fs/ext4/migrate.c index 5c1e27de775..b9e077ba07e 100644 --- a/fs/ext4/migrate.c +++ b/fs/ext4/migrate.c @@ -13,8 +13,8 @@ */ #include <linux/module.h> -#include <linux/ext4_jbd2.h> -#include <linux/ext4_fs_extents.h> +#include "ext4_jbd2.h" +#include "ext4_extents.h" /* * The contiguous blocks details which can be @@ -327,7 +327,7 @@ static int free_ind_block(handle_t *handle, struct inode *inode, __le32 *i_data) } static int ext4_ext_swap_inode_data(handle_t *handle, struct inode *inode, - struct inode *tmp_inode) + struct inode *tmp_inode) { int retval; __le32 i_data[3]; @@ -339,7 +339,7 @@ static int ext4_ext_swap_inode_data(handle_t *handle, struct inode *inode, * i_data field of the original inode */ retval = ext4_journal_extend(handle, 1); - if (retval != 0) { + if (retval) { retval = ext4_journal_restart(handle, 1); if (retval) goto err_out; @@ -351,6 +351,18 @@ static int ext4_ext_swap_inode_data(handle_t *handle, struct inode *inode, down_write(&EXT4_I(inode)->i_data_sem); /* + * if EXT4_EXT_MIGRATE is cleared a block allocation + * happened after we started the migrate. We need to + * fail the migrate + */ + if (!(EXT4_I(inode)->i_flags & EXT4_EXT_MIGRATE)) { + retval = -EAGAIN; + up_write(&EXT4_I(inode)->i_data_sem); + goto err_out; + } else + EXT4_I(inode)->i_flags = EXT4_I(inode)->i_flags & + ~EXT4_EXT_MIGRATE; + /* * We have the extent map build with the tmp inode. * Now copy the i_data across */ @@ -508,6 +520,17 @@ int ext4_ext_migrate(struct inode *inode, struct file *filp, * switch the inode format to prevent read. */ mutex_lock(&(inode->i_mutex)); + /* + * Even though we take i_mutex we can still cause block allocation + * via mmap write to holes. If we have allocated new blocks we fail + * migrate. New block allocation will clear EXT4_EXT_MIGRATE flag. + * The flag is updated with i_data_sem held to prevent racing with + * block allocation. + */ + down_read((&EXT4_I(inode)->i_data_sem)); + EXT4_I(inode)->i_flags = EXT4_I(inode)->i_flags | EXT4_EXT_MIGRATE; + up_read((&EXT4_I(inode)->i_data_sem)); + handle = ext4_journal_start(inode, 1); ei = EXT4_I(inode); @@ -559,9 +582,15 @@ err_out: * tmp_inode */ free_ext_block(handle, tmp_inode); - else - retval = ext4_ext_swap_inode_data(handle, inode, - tmp_inode); + else { + retval = ext4_ext_swap_inode_data(handle, inode, tmp_inode); + if (retval) + /* + * if we fail to swap inode data free the extent + * details of the tmp inode + */ + free_ext_block(handle, tmp_inode); + } /* We mark the tmp_inode dirty via ext4_ext_tree_init. */ if (ext4_journal_extend(handle, 1) != 0) diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index 28aa2ed4297..ab16beaa830 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -28,14 +28,14 @@ #include <linux/pagemap.h> #include <linux/jbd2.h> #include <linux/time.h> -#include <linux/ext4_fs.h> -#include <linux/ext4_jbd2.h> #include <linux/fcntl.h> #include <linux/stat.h> #include <linux/string.h> #include <linux/quotaops.h> #include <linux/buffer_head.h> #include <linux/bio.h> +#include "ext4.h" +#include "ext4_jbd2.h" #include "namei.h" #include "xattr.h" @@ -57,10 +57,15 @@ static struct buffer_head *ext4_append(handle_t *handle, *block = inode->i_size >> inode->i_sb->s_blocksize_bits; - if ((bh = ext4_bread(handle, inode, *block, 1, err))) { + bh = ext4_bread(handle, inode, *block, 1, err); + if (bh) { inode->i_size += inode->i_sb->s_blocksize; EXT4_I(inode)->i_disksize = inode->i_size; - ext4_journal_get_write_access(handle,bh); + *err = ext4_journal_get_write_access(handle, bh); + if (*err) { + brelse(bh); + bh = NULL; + } } return bh; } @@ -348,7 +353,7 @@ dx_probe(struct dentry *dentry, struct inode *dir, if (root->info.hash_version != DX_HASH_TEA && root->info.hash_version != DX_HASH_HALF_MD4 && root->info.hash_version != DX_HASH_LEGACY) { - ext4_warning(dir->i_sb, __FUNCTION__, + ext4_warning(dir->i_sb, __func__, "Unrecognised inode hash code %d", root->info.hash_version); brelse(bh); @@ -362,7 +367,7 @@ dx_probe(struct dentry *dentry, struct inode *dir, hash = hinfo->hash; if (root->info.unused_flags & 1) { - ext4_warning(dir->i_sb, __FUNCTION__, + ext4_warning(dir->i_sb, __func__, "Unimplemented inode hash flags: %#06x", root->info.unused_flags); brelse(bh); @@ -371,7 +376,7 @@ dx_probe(struct dentry *dentry, struct inode *dir, } if ((indirect = root->info.indirect_levels) > 1) { - ext4_warning(dir->i_sb, __FUNCTION__, + ext4_warning(dir->i_sb, __func__, "Unimplemented inode hash depth: %#06x", root->info.indirect_levels); brelse(bh); @@ -384,7 +389,7 @@ dx_probe(struct dentry *dentry, struct inode *dir, if (dx_get_limit(entries) != dx_root_limit(dir, root->info.info_length)) { - ext4_warning(dir->i_sb, __FUNCTION__, + ext4_warning(dir->i_sb, __func__, "dx entry: limit != root limit"); brelse(bh); *err = ERR_BAD_DX_DIR; @@ -396,7 +401,7 @@ dx_probe(struct dentry *dentry, struct inode *dir, { count = dx_get_count(entries); if (!count || count > dx_get_limit(entries)) { - ext4_warning(dir->i_sb, __FUNCTION__, + ext4_warning(dir->i_sb, __func__, "dx entry: no count or count > limit"); brelse(bh); *err = ERR_BAD_DX_DIR; @@ -441,7 +446,7 @@ dx_probe(struct dentry *dentry, struct inode *dir, goto fail2; at = entries = ((struct dx_node *) bh->b_data)->entries; if (dx_get_limit(entries) != dx_node_limit (dir)) { - ext4_warning(dir->i_sb, __FUNCTION__, + ext4_warning(dir->i_sb, __func__, "dx entry: limit != node limit"); brelse(bh); *err = ERR_BAD_DX_DIR; @@ -457,7 +462,7 @@ fail2: } fail: if (*err == ERR_BAD_DX_DIR) - ext4_warning(dir->i_sb, __FUNCTION__, + ext4_warning(dir->i_sb, __func__, "Corrupt dir inode %ld, running e2fsck is " "recommended.", dir->i_ino); return NULL; @@ -914,7 +919,7 @@ restart: wait_on_buffer(bh); if (!buffer_uptodate(bh)) { /* read error, skip block & hope for the best */ - ext4_error(sb, __FUNCTION__, "reading directory #%lu " + ext4_error(sb, __func__, "reading directory #%lu " "offset %lu", dir->i_ino, (unsigned long)block); brelse(bh); @@ -1007,7 +1012,7 @@ static struct buffer_head * ext4_dx_find_entry(struct dentry *dentry, retval = ext4_htree_next_block(dir, hash, frame, frames, NULL); if (retval < 0) { - ext4_warning(sb, __FUNCTION__, + ext4_warning(sb, __func__, "error reading index page in directory #%lu", dir->i_ino); *err = retval; @@ -1532,7 +1537,7 @@ static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry, if (levels && (dx_get_count(frames->entries) == dx_get_limit(frames->entries))) { - ext4_warning(sb, __FUNCTION__, + ext4_warning(sb, __func__, "Directory index full!"); err = -ENOSPC; goto cleanup; @@ -1860,11 +1865,11 @@ static int empty_dir (struct inode * inode) if (inode->i_size < EXT4_DIR_REC_LEN(1) + EXT4_DIR_REC_LEN(2) || !(bh = ext4_bread (NULL, inode, 0, 0, &err))) { if (err) - ext4_error(inode->i_sb, __FUNCTION__, + ext4_error(inode->i_sb, __func__, "error %d reading directory #%lu offset 0", err, inode->i_ino); else - ext4_warning(inode->i_sb, __FUNCTION__, + ext4_warning(inode->i_sb, __func__, "bad directory (dir #%lu) - no data block", inode->i_ino); return 1; @@ -1893,7 +1898,7 @@ static int empty_dir (struct inode * inode) offset >> EXT4_BLOCK_SIZE_BITS(sb), 0, &err); if (!bh) { if (err) - ext4_error(sb, __FUNCTION__, + ext4_error(sb, __func__, "error %d reading directory" " #%lu offset %lu", err, inode->i_ino, offset); @@ -2217,6 +2222,8 @@ retry: goto out_stop; } } else { + /* clear the extent format for fast symlink */ + EXT4_I(inode)->i_flags &= ~EXT4_EXTENTS_FL; inode->i_op = &ext4_fast_symlink_inode_operations; memcpy((char*)&EXT4_I(inode)->i_data,symname,l); inode->i_size = l-1; @@ -2347,6 +2354,9 @@ static int ext4_rename (struct inode * old_dir, struct dentry *old_dentry, EXT4_FEATURE_INCOMPAT_FILETYPE)) new_de->file_type = old_de->file_type; new_dir->i_version++; + new_dir->i_ctime = new_dir->i_mtime = + ext4_current_time(new_dir); + ext4_mark_inode_dirty(handle, new_dir); BUFFER_TRACE(new_bh, "call ext4_journal_dirty_metadata"); ext4_journal_dirty_metadata(handle, new_bh); brelse(new_bh); diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index e29efa0f9d6..9f086a6a472 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c @@ -11,11 +11,10 @@ #define EXT4FS_DEBUG -#include <linux/ext4_jbd2.h> - #include <linux/errno.h> #include <linux/slab.h> +#include "ext4_jbd2.h" #include "group.h" #define outside(b, first, last) ((b) < (first) || (b) >= (last)) @@ -50,63 +49,63 @@ static int verify_group_input(struct super_block *sb, ext4_get_group_no_and_offset(sb, start, NULL, &offset); if (group != sbi->s_groups_count) - ext4_warning(sb, __FUNCTION__, + ext4_warning(sb, __func__, "Cannot add at group %u (only %lu groups)", input->group, sbi->s_groups_count); else if (offset != 0) - ext4_warning(sb, __FUNCTION__, "Last group not full"); + ext4_warning(sb, __func__, "Last group not full"); else if (input->reserved_blocks > input->blocks_count / 5) - ext4_warning(sb, __FUNCTION__, "Reserved blocks too high (%u)", + ext4_warning(sb, __func__, "Reserved blocks too high (%u)", input->reserved_blocks); else if (free_blocks_count < 0) - ext4_warning(sb, __FUNCTION__, "Bad blocks count %u", + ext4_warning(sb, __func__, "Bad blocks count %u", input->blocks_count); else if (!(bh = sb_bread(sb, end - 1))) - ext4_warning(sb, __FUNCTION__, + ext4_warning(sb, __func__, "Cannot read last block (%llu)", end - 1); else if (outside(input->block_bitmap, start, end)) - ext4_warning(sb, __FUNCTION__, + ext4_warning(sb, __func__, "Block bitmap not in group (block %llu)", (unsigned long long)input->block_bitmap); else if (outside(input->inode_bitmap, start, end)) - ext4_warning(sb, __FUNCTION__, + ext4_warning(sb, __func__, "Inode bitmap not in group (block %llu)", (unsigned long long)input->inode_bitmap); else if (outside(input->inode_table, start, end) || outside(itend - 1, start, end)) - ext4_warning(sb, __FUNCTION__, + ext4_warning(sb, __func__, "Inode table not in group (blocks %llu-%llu)", (unsigned long long)input->inode_table, itend - 1); else if (input->inode_bitmap == input->block_bitmap) - ext4_warning(sb, __FUNCTION__, + ext4_warning(sb, __func__, "Block bitmap same as inode bitmap (%llu)", (unsigned long long)input->block_bitmap); else if (inside(input->block_bitmap, input->inode_table, itend)) - ext4_warning(sb, __FUNCTION__, + ext4_warning(sb, __func__, "Block bitmap (%llu) in inode table (%llu-%llu)", (unsigned long long)input->block_bitmap, (unsigned long long)input->inode_table, itend - 1); else if (inside(input->inode_bitmap, input->inode_table, itend)) - ext4_warning(sb, __FUNCTION__, + ext4_warning(sb, __func__, "Inode bitmap (%llu) in inode table (%llu-%llu)", (unsigned long long)input->inode_bitmap, (unsigned long long)input->inode_table, itend - 1); else if (inside(input->block_bitmap, start, metaend)) - ext4_warning(sb, __FUNCTION__, + ext4_warning(sb, __func__, "Block bitmap (%llu) in GDT table" " (%llu-%llu)", (unsigned long long)input->block_bitmap, start, metaend - 1); else if (inside(input->inode_bitmap, start, metaend)) - ext4_warning(sb, __FUNCTION__, + ext4_warning(sb, __func__, "Inode bitmap (%llu) in GDT table" " (%llu-%llu)", (unsigned long long)input->inode_bitmap, start, metaend - 1); else if (inside(input->inode_table, start, metaend) || inside(itend - 1, start, metaend)) - ext4_warning(sb, __FUNCTION__, + ext4_warning(sb, __func__, "Inode table (%llu-%llu) overlaps" "GDT table (%llu-%llu)", (unsigned long long)input->inode_table, @@ -368,7 +367,7 @@ static int verify_reserved_gdb(struct super_block *sb, while ((grp = ext4_list_backups(sb, &three, &five, &seven)) < end) { if (le32_to_cpu(*p++) != grp * EXT4_BLOCKS_PER_GROUP(sb) + blk){ - ext4_warning(sb, __FUNCTION__, + ext4_warning(sb, __func__, "reserved GDT %llu" " missing grp %d (%llu)", blk, grp, @@ -424,7 +423,7 @@ static int add_new_gdb(handle_t *handle, struct inode *inode, */ if (EXT4_SB(sb)->s_sbh->b_blocknr != le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block)) { - ext4_warning(sb, __FUNCTION__, + ext4_warning(sb, __func__, "won't resize using backup superblock at %llu", (unsigned long long)EXT4_SB(sb)->s_sbh->b_blocknr); return -EPERM; @@ -448,7 +447,7 @@ static int add_new_gdb(handle_t *handle, struct inode *inode, data = (__le32 *)dind->b_data; if (le32_to_cpu(data[gdb_num % EXT4_ADDR_PER_BLOCK(sb)]) != gdblock) { - ext4_warning(sb, __FUNCTION__, + ext4_warning(sb, __func__, "new group %u GDT block %llu not reserved", input->group, gdblock); err = -EINVAL; @@ -469,10 +468,10 @@ static int add_new_gdb(handle_t *handle, struct inode *inode, goto exit_dindj; n_group_desc = kmalloc((gdb_num + 1) * sizeof(struct buffer_head *), - GFP_KERNEL); + GFP_NOFS); if (!n_group_desc) { err = -ENOMEM; - ext4_warning (sb, __FUNCTION__, + ext4_warning(sb, __func__, "not enough memory for %lu groups", gdb_num + 1); goto exit_inode; } @@ -502,8 +501,7 @@ static int add_new_gdb(handle_t *handle, struct inode *inode, EXT4_SB(sb)->s_gdb_count++; kfree(o_group_desc); - es->s_reserved_gdt_blocks = - cpu_to_le16(le16_to_cpu(es->s_reserved_gdt_blocks) - 1); + le16_add_cpu(&es->s_reserved_gdt_blocks, -1); ext4_journal_dirty_metadata(handle, EXT4_SB(sb)->s_sbh); return 0; @@ -553,7 +551,7 @@ static int reserve_backup_gdb(handle_t *handle, struct inode *inode, int res, i; int err; - primary = kmalloc(reserved_gdb * sizeof(*primary), GFP_KERNEL); + primary = kmalloc(reserved_gdb * sizeof(*primary), GFP_NOFS); if (!primary) return -ENOMEM; @@ -571,7 +569,7 @@ static int reserve_backup_gdb(handle_t *handle, struct inode *inode, /* Get each reserved primary GDT block and verify it holds backups */ for (res = 0; res < reserved_gdb; res++, blk++) { if (le32_to_cpu(*data) != blk) { - ext4_warning(sb, __FUNCTION__, + ext4_warning(sb, __func__, "reserved block %llu" " not at offset %ld", blk, @@ -715,7 +713,7 @@ static void update_backups(struct super_block *sb, */ exit_err: if (err) { - ext4_warning(sb, __FUNCTION__, + ext4_warning(sb, __func__, "can't update backup for group %lu (err %d), " "forcing fsck on next reboot", group, err); sbi->s_mount_state &= ~EXT4_VALID_FS; @@ -755,33 +753,33 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input) if (gdb_off == 0 && !EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER)) { - ext4_warning(sb, __FUNCTION__, + ext4_warning(sb, __func__, "Can't resize non-sparse filesystem further"); return -EPERM; } if (ext4_blocks_count(es) + input->blocks_count < ext4_blocks_count(es)) { - ext4_warning(sb, __FUNCTION__, "blocks_count overflow\n"); + ext4_warning(sb, __func__, "blocks_count overflow\n"); return -EINVAL; } if (le32_to_cpu(es->s_inodes_count) + EXT4_INODES_PER_GROUP(sb) < le32_to_cpu(es->s_inodes_count)) { - ext4_warning(sb, __FUNCTION__, "inodes_count overflow\n"); + ext4_warning(sb, __func__, "inodes_count overflow\n"); return -EINVAL; } if (reserved_gdb || gdb_off == 0) { if (!EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_RESIZE_INODE)){ - ext4_warning(sb, __FUNCTION__, + ext4_warning(sb, __func__, "No reserved GDT blocks, can't resize"); return -EPERM; } inode = ext4_iget(sb, EXT4_RESIZE_INO); if (IS_ERR(inode)) { - ext4_warning(sb, __FUNCTION__, + ext4_warning(sb, __func__, "Error opening resize inode"); return PTR_ERR(inode); } @@ -810,7 +808,7 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input) lock_super(sb); if (input->group != sbi->s_groups_count) { - ext4_warning(sb, __FUNCTION__, + ext4_warning(sb, __func__, "multiple resizers run on filesystem!"); err = -EBUSY; goto exit_journal; @@ -877,8 +875,7 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input) */ ext4_blocks_count_set(es, ext4_blocks_count(es) + input->blocks_count); - es->s_inodes_count = cpu_to_le32(le32_to_cpu(es->s_inodes_count) + - EXT4_INODES_PER_GROUP(sb)); + le32_add_cpu(&es->s_inodes_count, EXT4_INODES_PER_GROUP(sb)); /* * We need to protect s_groups_count against other CPUs seeing @@ -977,13 +974,13 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es, " too large to resize to %llu blocks safely\n", sb->s_id, n_blocks_count); if (sizeof(sector_t) < 8) - ext4_warning(sb, __FUNCTION__, + ext4_warning(sb, __func__, "CONFIG_LBD not enabled\n"); return -EINVAL; } if (n_blocks_count < o_blocks_count) { - ext4_warning(sb, __FUNCTION__, + ext4_warning(sb, __func__, "can't shrink FS - resize aborted"); return -EBUSY; } @@ -992,7 +989,7 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es, ext4_get_group_no_and_offset(sb, o_blocks_count, NULL, &last); if (last == 0) { - ext4_warning(sb, __FUNCTION__, + ext4_warning(sb, __func__, "need to use ext2online to resize further"); return -EPERM; } @@ -1000,7 +997,7 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es, add = EXT4_BLOCKS_PER_GROUP(sb) - last; if (o_blocks_count + add < o_blocks_count) { - ext4_warning(sb, __FUNCTION__, "blocks_count overflow"); + ext4_warning(sb, __func__, "blocks_count overflow"); return -EINVAL; } @@ -1008,7 +1005,7 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es, add = n_blocks_count - o_blocks_count; if (o_blocks_count + add < n_blocks_count) - ext4_warning(sb, __FUNCTION__, + ext4_warning(sb, __func__, "will only finish group (%llu" " blocks, %u new)", o_blocks_count + add, add); @@ -1016,7 +1013,7 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es, /* See if the device is actually as big as what was requested */ bh = sb_bread(sb, o_blocks_count + add -1); if (!bh) { - ext4_warning(sb, __FUNCTION__, + ext4_warning(sb, __func__, "can't read last block, resize aborted"); return -ENOSPC; } @@ -1028,13 +1025,13 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es, handle = ext4_journal_start_sb(sb, 3); if (IS_ERR(handle)) { err = PTR_ERR(handle); - ext4_warning(sb, __FUNCTION__, "error %d on journal start",err); + ext4_warning(sb, __func__, "error %d on journal start", err); goto exit_put; } lock_super(sb); if (o_blocks_count != ext4_blocks_count(es)) { - ext4_warning(sb, __FUNCTION__, + ext4_warning(sb, __func__, "multiple resizers run on filesystem!"); unlock_super(sb); ext4_journal_stop(handle); @@ -1044,7 +1041,7 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es, if ((err = ext4_journal_get_write_access(handle, EXT4_SB(sb)->s_sbh))) { - ext4_warning(sb, __FUNCTION__, + ext4_warning(sb, __func__, "error %d on journal write access", err); unlock_super(sb); ext4_journal_stop(handle); diff --git a/fs/ext4/super.c b/fs/ext4/super.c index c81a8e759ba..09d9359c805 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -21,8 +21,6 @@ #include <linux/fs.h> #include <linux/time.h> #include <linux/jbd2.h> -#include <linux/ext4_fs.h> -#include <linux/ext4_jbd2.h> #include <linux/slab.h> #include <linux/init.h> #include <linux/blkdev.h> @@ -38,9 +36,10 @@ #include <linux/seq_file.h> #include <linux/log2.h> #include <linux/crc16.h> - #include <asm/uaccess.h> +#include "ext4.h" +#include "ext4_jbd2.h" #include "xattr.h" #include "acl.h" #include "namei.h" @@ -135,7 +134,7 @@ handle_t *ext4_journal_start_sb(struct super_block *sb, int nblocks) * take the FS itself readonly cleanly. */ journal = EXT4_SB(sb)->s_journal; if (is_journal_aborted(journal)) { - ext4_abort(sb, __FUNCTION__, + ext4_abort(sb, __func__, "Detected aborted journal"); return ERR_PTR(-EROFS); } @@ -355,7 +354,7 @@ void ext4_update_dynamic_rev(struct super_block *sb) if (le32_to_cpu(es->s_rev_level) > EXT4_GOOD_OLD_REV) return; - ext4_warning(sb, __FUNCTION__, + ext4_warning(sb, __func__, "updating to rev %d because of new feature flag, " "running e2fsck is recommended", EXT4_DYNAMIC_REV); @@ -945,8 +944,8 @@ static match_table_t tokens = { {Opt_mballoc, "mballoc"}, {Opt_nomballoc, "nomballoc"}, {Opt_stripe, "stripe=%u"}, - {Opt_err, NULL}, {Opt_resize, "resize"}, + {Opt_err, NULL}, }; static ext4_fsblk_t get_sb_block(void **data) @@ -980,7 +979,7 @@ static int parse_options (char *options, struct super_block *sb, int data_opt = 0; int option; #ifdef CONFIG_QUOTA - int qtype; + int qtype, qfmt; char *qname; #endif @@ -1163,9 +1162,11 @@ static int parse_options (char *options, struct super_block *sb, case Opt_grpjquota: qtype = GRPQUOTA; set_qf_name: - if (sb_any_quota_enabled(sb)) { + if ((sb_any_quota_enabled(sb) || + sb_any_quota_suspended(sb)) && + !sbi->s_qf_names[qtype]) { printk(KERN_ERR - "EXT4-fs: Cannot change journalled " + "EXT4-fs: Cannot change journaled " "quota options when quota turned on.\n"); return 0; } @@ -1201,9 +1202,11 @@ set_qf_name: case Opt_offgrpjquota: qtype = GRPQUOTA; clear_qf_name: - if (sb_any_quota_enabled(sb)) { + if ((sb_any_quota_enabled(sb) || + sb_any_quota_suspended(sb)) && + sbi->s_qf_names[qtype]) { printk(KERN_ERR "EXT4-fs: Cannot change " - "journalled quota options when " + "journaled quota options when " "quota turned on.\n"); return 0; } @@ -1214,10 +1217,20 @@ clear_qf_name: sbi->s_qf_names[qtype] = NULL; break; case Opt_jqfmt_vfsold: - sbi->s_jquota_fmt = QFMT_VFS_OLD; - break; + qfmt = QFMT_VFS_OLD; + goto set_qf_format; case Opt_jqfmt_vfsv0: - sbi->s_jquota_fmt = QFMT_VFS_V0; + qfmt = QFMT_VFS_V0; +set_qf_format: + if ((sb_any_quota_enabled(sb) || + sb_any_quota_suspended(sb)) && + sbi->s_jquota_fmt != qfmt) { + printk(KERN_ERR "EXT4-fs: Cannot change " + "journaled quota options when " + "quota turned on.\n"); + return 0; + } + sbi->s_jquota_fmt = qfmt; break; case Opt_quota: case Opt_usrquota: @@ -1242,6 +1255,9 @@ clear_qf_name: case Opt_quota: case Opt_usrquota: case Opt_grpquota: + printk(KERN_ERR + "EXT4-fs: quota options not supported.\n"); + break; case Opt_usrjquota: case Opt_grpjquota: case Opt_offusrjquota: @@ -1249,7 +1265,7 @@ clear_qf_name: case Opt_jqfmt_vfsold: case Opt_jqfmt_vfsv0: printk(KERN_ERR - "EXT4-fs: journalled quota options not " + "EXT4-fs: journaled quota options not " "supported.\n"); break; case Opt_noquota: @@ -1334,14 +1350,14 @@ clear_qf_name: } if (!sbi->s_jquota_fmt) { - printk(KERN_ERR "EXT4-fs: journalled quota format " + printk(KERN_ERR "EXT4-fs: journaled quota format " "not specified.\n"); return 0; } } else { if (sbi->s_jquota_fmt) { - printk(KERN_ERR "EXT4-fs: journalled quota format " - "specified with no journalling " + printk(KERN_ERR "EXT4-fs: journaled quota format " + "specified with no journaling " "enabled.\n"); return 0; } @@ -1388,11 +1404,11 @@ static int ext4_setup_super(struct super_block *sb, struct ext4_super_block *es, * a plain journaled filesystem we can keep it set as * valid forever! :) */ - es->s_state = cpu_to_le16(le16_to_cpu(es->s_state) & ~EXT4_VALID_FS); + es->s_state &= cpu_to_le16(~EXT4_VALID_FS); #endif if (!(__s16) le16_to_cpu(es->s_max_mnt_count)) es->s_max_mnt_count = cpu_to_le16(EXT4_DFL_MAX_MNT_COUNT); - es->s_mnt_count=cpu_to_le16(le16_to_cpu(es->s_mnt_count) + 1); + le16_add_cpu(&es->s_mnt_count, 1); es->s_mtime = cpu_to_le32(get_seconds()); ext4_update_dynamic_rev(sb); EXT4_SET_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); @@ -1485,36 +1501,33 @@ static int ext4_check_descriptors(struct super_block *sb) block_bitmap = ext4_block_bitmap(sb, gdp); if (block_bitmap < first_block || block_bitmap > last_block) { - ext4_error (sb, "ext4_check_descriptors", - "Block bitmap for group %lu" - " not in group (block %llu)!", - i, block_bitmap); + printk(KERN_ERR "EXT4-fs: ext4_check_descriptors: " + "Block bitmap for group %lu not in group " + "(block %llu)!", i, block_bitmap); return 0; } inode_bitmap = ext4_inode_bitmap(sb, gdp); if (inode_bitmap < first_block || inode_bitmap > last_block) { - ext4_error (sb, "ext4_check_descriptors", - "Inode bitmap for group %lu" - " not in group (block %llu)!", - i, inode_bitmap); + printk(KERN_ERR "EXT4-fs: ext4_check_descriptors: " + "Inode bitmap for group %lu not in group " + "(block %llu)!", i, inode_bitmap); return 0; } inode_table = ext4_inode_table(sb, gdp); if (inode_table < first_block || inode_table + sbi->s_itb_per_group - 1 > last_block) { - ext4_error (sb, "ext4_check_descriptors", - "Inode table for group %lu" - " not in group (block %llu)!", - i, inode_table); + printk(KERN_ERR "EXT4-fs: ext4_check_descriptors: " + "Inode table for group %lu not in group " + "(block %llu)!", i, inode_table); return 0; } if (!ext4_group_desc_csum_verify(sbi, i, gdp)) { - ext4_error(sb, __FUNCTION__, - "Checksum for group %lu failed (%u!=%u)\n", - i, le16_to_cpu(ext4_group_desc_csum(sbi, i, - gdp)), le16_to_cpu(gdp->bg_checksum)); + printk(KERN_ERR "EXT4-fs: ext4_check_descriptors: " + "Checksum for group %lu failed (%u!=%u)\n", + i, le16_to_cpu(ext4_group_desc_csum(sbi, i, + gdp)), le16_to_cpu(gdp->bg_checksum)); return 0; } if (!flexbg_flag) @@ -1585,7 +1598,7 @@ static void ext4_orphan_cleanup (struct super_block * sb, int ret = ext4_quota_on_mount(sb, i); if (ret < 0) printk(KERN_ERR - "EXT4-fs: Cannot turn on journalled " + "EXT4-fs: Cannot turn on journaled " "quota: error %d\n", ret); } } @@ -1594,8 +1607,8 @@ static void ext4_orphan_cleanup (struct super_block * sb, while (es->s_last_orphan) { struct inode *inode; - if (!(inode = - ext4_orphan_get(sb, le32_to_cpu(es->s_last_orphan)))) { + inode = ext4_orphan_get(sb, le32_to_cpu(es->s_last_orphan)); + if (IS_ERR(inode)) { es->s_last_orphan = 0; break; } @@ -1605,7 +1618,7 @@ static void ext4_orphan_cleanup (struct super_block * sb, if (inode->i_nlink) { printk(KERN_DEBUG "%s: truncating inode %lu to %Ld bytes\n", - __FUNCTION__, inode->i_ino, inode->i_size); + __func__, inode->i_ino, inode->i_size); jbd_debug(2, "truncating inode %lu to %Ld bytes\n", inode->i_ino, inode->i_size); ext4_truncate(inode); @@ -1613,7 +1626,7 @@ static void ext4_orphan_cleanup (struct super_block * sb, } else { printk(KERN_DEBUG "%s: deleting unreferenced inode %lu\n", - __FUNCTION__, inode->i_ino); + __func__, inode->i_ino); jbd_debug(2, "deleting unreferenced inode %lu\n", inode->i_ino); nr_orphans++; @@ -2699,9 +2712,9 @@ static void ext4_clear_journal_err(struct super_block * sb, char nbuf[16]; errstr = ext4_decode_error(sb, j_errno, nbuf); - ext4_warning(sb, __FUNCTION__, "Filesystem error recorded " + ext4_warning(sb, __func__, "Filesystem error recorded " "from previous mount: %s", errstr); - ext4_warning(sb, __FUNCTION__, "Marking fs in need of " + ext4_warning(sb, __func__, "Marking fs in need of " "filesystem check."); EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS; @@ -2828,7 +2841,7 @@ static int ext4_remount (struct super_block * sb, int * flags, char * data) } if (sbi->s_mount_opt & EXT4_MOUNT_ABORT) - ext4_abort(sb, __FUNCTION__, "Abort forced by user"); + ext4_abort(sb, __func__, "Abort forced by user"); sb->s_flags = (sb->s_flags & ~MS_POSIXACL) | ((sbi->s_mount_opt & EXT4_MOUNT_POSIX_ACL) ? MS_POSIXACL : 0); @@ -3040,8 +3053,14 @@ static int ext4_dquot_drop(struct inode *inode) /* We may delete quota structure so we need to reserve enough blocks */ handle = ext4_journal_start(inode, 2*EXT4_QUOTA_DEL_BLOCKS(inode->i_sb)); - if (IS_ERR(handle)) + if (IS_ERR(handle)) { + /* + * We call dquot_drop() anyway to at least release references + * to quota structures so that umount does not hang. + */ + dquot_drop(inode); return PTR_ERR(handle); + } ret = dquot_drop(inode); err = ext4_journal_stop(handle); if (!ret) @@ -3104,7 +3123,7 @@ static int ext4_release_dquot(struct dquot *dquot) static int ext4_mark_dquot_dirty(struct dquot *dquot) { - /* Are we journalling quotas? */ + /* Are we journaling quotas? */ if (EXT4_SB(dquot->dq_sb)->s_qf_names[USRQUOTA] || EXT4_SB(dquot->dq_sb)->s_qf_names[GRPQUOTA]) { dquot_mark_dquot_dirty(dquot); @@ -3151,23 +3170,42 @@ static int ext4_quota_on(struct super_block *sb, int type, int format_id, if (!test_opt(sb, QUOTA)) return -EINVAL; - /* Not journalling quota? */ - if ((!EXT4_SB(sb)->s_qf_names[USRQUOTA] && - !EXT4_SB(sb)->s_qf_names[GRPQUOTA]) || remount) + /* When remounting, no checks are needed and in fact, path is NULL */ + if (remount) return vfs_quota_on(sb, type, format_id, path, remount); + err = path_lookup(path, LOOKUP_FOLLOW, &nd); if (err) return err; + /* Quotafile not on the same filesystem? */ if (nd.path.mnt->mnt_sb != sb) { path_put(&nd.path); return -EXDEV; } - /* Quotafile not of fs root? */ - if (nd.path.dentry->d_parent->d_inode != sb->s_root->d_inode) - printk(KERN_WARNING - "EXT4-fs: Quota file not on filesystem root. " - "Journalled quota will not work.\n"); + /* Journaling quota? */ + if (EXT4_SB(sb)->s_qf_names[type]) { + /* Quotafile not of fs root? */ + if (nd.path.dentry->d_parent->d_inode != sb->s_root->d_inode) + printk(KERN_WARNING + "EXT4-fs: Quota file not on filesystem root. " + "Journaled quota will not work.\n"); + } + + /* + * When we journal data on quota file, we have to flush journal to see + * all updates to the file when we bypass pagecache... + */ + if (ext4_should_journal_data(nd.path.dentry->d_inode)) { + /* + * We don't need to lock updates but journal_flush() could + * otherwise be livelocked... + */ + jbd2_journal_lock_updates(EXT4_SB(sb)->s_journal); + jbd2_journal_flush(EXT4_SB(sb)->s_journal); + jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal); + } + path_put(&nd.path); return vfs_quota_on(sb, type, format_id, path, remount); } diff --git a/fs/ext4/symlink.c b/fs/ext4/symlink.c index e6f9da4287c..e9178643dc0 100644 --- a/fs/ext4/symlink.c +++ b/fs/ext4/symlink.c @@ -19,8 +19,8 @@ #include <linux/fs.h> #include <linux/jbd2.h> -#include <linux/ext4_fs.h> #include <linux/namei.h> +#include "ext4.h" #include "xattr.h" static void * ext4_follow_link(struct dentry *dentry, struct nameidata *nd) diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c index e9054c1c7d9..ff08633f398 100644 --- a/fs/ext4/xattr.c +++ b/fs/ext4/xattr.c @@ -53,11 +53,11 @@ #include <linux/init.h> #include <linux/fs.h> #include <linux/slab.h> -#include <linux/ext4_jbd2.h> -#include <linux/ext4_fs.h> #include <linux/mbcache.h> #include <linux/quotaops.h> #include <linux/rwsem.h> +#include "ext4_jbd2.h" +#include "ext4.h" #include "xattr.h" #include "acl.h" @@ -92,6 +92,8 @@ static struct buffer_head *ext4_xattr_cache_find(struct inode *, struct mb_cache_entry **); static void ext4_xattr_rehash(struct ext4_xattr_header *, struct ext4_xattr_entry *); +static int ext4_xattr_list(struct inode *inode, char *buffer, + size_t buffer_size); static struct mb_cache *ext4_xattr_cache; @@ -225,7 +227,7 @@ ext4_xattr_block_get(struct inode *inode, int name_index, const char *name, ea_bdebug(bh, "b_count=%d, refcount=%d", atomic_read(&(bh->b_count)), le32_to_cpu(BHDR(bh)->h_refcount)); if (ext4_xattr_check_block(bh)) { -bad_block: ext4_error(inode->i_sb, __FUNCTION__, +bad_block: ext4_error(inode->i_sb, __func__, "inode %lu: bad block %llu", inode->i_ino, EXT4_I(inode)->i_file_acl); error = -EIO; @@ -367,7 +369,7 @@ ext4_xattr_block_list(struct inode *inode, char *buffer, size_t buffer_size) ea_bdebug(bh, "b_count=%d, refcount=%d", atomic_read(&(bh->b_count)), le32_to_cpu(BHDR(bh)->h_refcount)); if (ext4_xattr_check_block(bh)) { - ext4_error(inode->i_sb, __FUNCTION__, + ext4_error(inode->i_sb, __func__, "inode %lu: bad block %llu", inode->i_ino, EXT4_I(inode)->i_file_acl); error = -EIO; @@ -420,7 +422,7 @@ cleanup: * Returns a negative error number on failure, or the number of bytes * used / required on success. */ -int +static int ext4_xattr_list(struct inode *inode, char *buffer, size_t buffer_size) { int i_error, b_error; @@ -484,8 +486,7 @@ ext4_xattr_release_block(handle_t *handle, struct inode *inode, get_bh(bh); ext4_forget(handle, 1, inode, bh, bh->b_blocknr); } else { - BHDR(bh)->h_refcount = cpu_to_le32( - le32_to_cpu(BHDR(bh)->h_refcount) - 1); + le32_add_cpu(&BHDR(bh)->h_refcount, -1); error = ext4_journal_dirty_metadata(handle, bh); if (IS_SYNC(inode)) handle->h_sync = 1; @@ -660,7 +661,7 @@ ext4_xattr_block_find(struct inode *inode, struct ext4_xattr_info *i, atomic_read(&(bs->bh->b_count)), le32_to_cpu(BHDR(bs->bh)->h_refcount)); if (ext4_xattr_check_block(bs->bh)) { - ext4_error(sb, __FUNCTION__, + ext4_error(sb, __func__, "inode %lu: bad block %llu", inode->i_ino, EXT4_I(inode)->i_file_acl); error = -EIO; @@ -738,7 +739,7 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode, ce = NULL; } ea_bdebug(bs->bh, "cloning"); - s->base = kmalloc(bs->bh->b_size, GFP_KERNEL); + s->base = kmalloc(bs->bh->b_size, GFP_NOFS); error = -ENOMEM; if (s->base == NULL) goto cleanup; @@ -750,7 +751,7 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode, } } else { /* Allocate a buffer where we construct the new block. */ - s->base = kzalloc(sb->s_blocksize, GFP_KERNEL); + s->base = kzalloc(sb->s_blocksize, GFP_NOFS); /* assert(header == s->base) */ error = -ENOMEM; if (s->base == NULL) @@ -789,8 +790,7 @@ inserted: if (error) goto cleanup_dquot; lock_buffer(new_bh); - BHDR(new_bh)->h_refcount = cpu_to_le32(1 + - le32_to_cpu(BHDR(new_bh)->h_refcount)); + le32_add_cpu(&BHDR(new_bh)->h_refcount, 1); ea_bdebug(new_bh, "reusing; refcount now=%d", le32_to_cpu(BHDR(new_bh)->h_refcount)); unlock_buffer(new_bh); @@ -808,10 +808,8 @@ inserted: get_bh(new_bh); } else { /* We need to allocate a new block */ - ext4_fsblk_t goal = le32_to_cpu( - EXT4_SB(sb)->s_es->s_first_data_block) + - (ext4_fsblk_t)EXT4_I(inode)->i_block_group * - EXT4_BLOCKS_PER_GROUP(sb); + ext4_fsblk_t goal = ext4_group_first_block_no(sb, + EXT4_I(inode)->i_block_group); ext4_fsblk_t block = ext4_new_block(handle, inode, goal, &error); if (error) @@ -863,7 +861,7 @@ cleanup_dquot: goto cleanup; bad_block: - ext4_error(inode->i_sb, __FUNCTION__, + ext4_error(inode->i_sb, __func__, "inode %lu: bad block %llu", inode->i_ino, EXT4_I(inode)->i_file_acl); goto cleanup; @@ -1011,6 +1009,11 @@ ext4_xattr_set_handle(handle_t *handle, struct inode *inode, int name_index, i.value = NULL; error = ext4_xattr_block_set(handle, inode, &i, &bs); } else if (error == -ENOSPC) { + if (EXT4_I(inode)->i_file_acl && !bs.s.base) { + error = ext4_xattr_block_find(inode, &i, &bs); + if (error) + goto cleanup; + } error = ext4_xattr_block_set(handle, inode, &i, &bs); if (error) goto cleanup; @@ -1166,7 +1169,7 @@ retry: if (!bh) goto cleanup; if (ext4_xattr_check_block(bh)) { - ext4_error(inode->i_sb, __FUNCTION__, + ext4_error(inode->i_sb, __func__, "inode %lu: bad block %llu", inode->i_ino, EXT4_I(inode)->i_file_acl); error = -EIO; @@ -1341,14 +1344,14 @@ ext4_xattr_delete_inode(handle_t *handle, struct inode *inode) goto cleanup; bh = sb_bread(inode->i_sb, EXT4_I(inode)->i_file_acl); if (!bh) { - ext4_error(inode->i_sb, __FUNCTION__, + ext4_error(inode->i_sb, __func__, "inode %lu: block %llu read error", inode->i_ino, EXT4_I(inode)->i_file_acl); goto cleanup; } if (BHDR(bh)->h_magic != cpu_to_le32(EXT4_XATTR_MAGIC) || BHDR(bh)->h_blocks != cpu_to_le32(1)) { - ext4_error(inode->i_sb, __FUNCTION__, + ext4_error(inode->i_sb, __func__, "inode %lu: bad block %llu", inode->i_ino, EXT4_I(inode)->i_file_acl); goto cleanup; @@ -1475,7 +1478,7 @@ again: } bh = sb_bread(inode->i_sb, ce->e_block); if (!bh) { - ext4_error(inode->i_sb, __FUNCTION__, + ext4_error(inode->i_sb, __func__, "inode %lu: block %lu read error", inode->i_ino, (unsigned long) ce->e_block); } else if (le32_to_cpu(BHDR(bh)->h_refcount) >= diff --git a/fs/ext4/xattr.h b/fs/ext4/xattr.h index d7f5d6a1265..5992fe979bb 100644 --- a/fs/ext4/xattr.h +++ b/fs/ext4/xattr.h @@ -74,7 +74,6 @@ extern struct xattr_handler ext4_xattr_security_handler; extern ssize_t ext4_listxattr(struct dentry *, char *, size_t); extern int ext4_xattr_get(struct inode *, int, const char *, void *, size_t); -extern int ext4_xattr_list(struct inode *, char *, size_t); extern int ext4_xattr_set(struct inode *, int, const char *, const void *, size_t, int); extern int ext4_xattr_set_handle(handle_t *, struct inode *, int, const char *, const void *, size_t, int); @@ -99,12 +98,6 @@ ext4_xattr_get(struct inode *inode, int name_index, const char *name, } static inline int -ext4_xattr_list(struct inode *inode, void *buffer, size_t size) -{ - return -EOPNOTSUPP; -} - -static inline int ext4_xattr_set(struct inode *inode, int name_index, const char *name, const void *value, size_t size, int flags) { diff --git a/fs/ext4/xattr_security.c b/fs/ext4/xattr_security.c index f17eaf2321b..ca5f89fc6ca 100644 --- a/fs/ext4/xattr_security.c +++ b/fs/ext4/xattr_security.c @@ -6,9 +6,9 @@ #include <linux/module.h> #include <linux/string.h> #include <linux/fs.h> -#include <linux/ext4_jbd2.h> -#include <linux/ext4_fs.h> #include <linux/security.h> +#include "ext4_jbd2.h" +#include "ext4.h" #include "xattr.h" static size_t diff --git a/fs/ext4/xattr_trusted.c b/fs/ext4/xattr_trusted.c index e0f05acdafe..fff33382cad 100644 --- a/fs/ext4/xattr_trusted.c +++ b/fs/ext4/xattr_trusted.c @@ -9,8 +9,8 @@ #include <linux/string.h> #include <linux/capability.h> #include <linux/fs.h> -#include <linux/ext4_jbd2.h> -#include <linux/ext4_fs.h> +#include "ext4_jbd2.h" +#include "ext4.h" #include "xattr.h" #define XATTR_TRUSTED_PREFIX "trusted." diff --git a/fs/ext4/xattr_user.c b/fs/ext4/xattr_user.c index 7ed3d8ebf09..67be723fcc4 100644 --- a/fs/ext4/xattr_user.c +++ b/fs/ext4/xattr_user.c @@ -8,8 +8,8 @@ #include <linux/module.h> #include <linux/string.h> #include <linux/fs.h> -#include <linux/ext4_jbd2.h> -#include <linux/ext4_fs.h> +#include "ext4_jbd2.h" +#include "ext4.h" #include "xattr.h" #define XATTR_USER_PREFIX "user." diff --git a/fs/fat/cache.c b/fs/fat/cache.c index 639b3b4f86d..fda25479af2 100644 --- a/fs/fat/cache.c +++ b/fs/fat/cache.c @@ -242,7 +242,7 @@ int fat_get_cluster(struct inode *inode, int cluster, int *fclus, int *dclus) /* prevent the infinite loop of cluster chain */ if (*fclus > limit) { fat_fs_panic(sb, "%s: detected the cluster chain loop" - " (i_pos %lld)", __FUNCTION__, + " (i_pos %lld)", __func__, MSDOS_I(inode)->i_pos); nr = -EIO; goto out; @@ -253,7 +253,7 @@ int fat_get_cluster(struct inode *inode, int cluster, int *fclus, int *dclus) goto out; else if (nr == FAT_ENT_FREE) { fat_fs_panic(sb, "%s: invalid cluster chain" - " (i_pos %lld)", __FUNCTION__, + " (i_pos %lld)", __func__, MSDOS_I(inode)->i_pos); nr = -EIO; goto out; @@ -286,7 +286,7 @@ static int fat_bmap_cluster(struct inode *inode, int cluster) return ret; else if (ret == FAT_ENT_EOF) { fat_fs_panic(sb, "%s: request beyond EOF (i_pos %lld)", - __FUNCTION__, MSDOS_I(inode)->i_pos); + __func__, MSDOS_I(inode)->i_pos); return -EIO; } return dclus; diff --git a/fs/fat/fatent.c b/fs/fat/fatent.c index 13ab763cc51..302e95c4af7 100644 --- a/fs/fat/fatent.c +++ b/fs/fat/fatent.c @@ -546,7 +546,7 @@ int fat_free_clusters(struct inode *inode, int cluster) goto error; } else if (cluster == FAT_ENT_FREE) { fat_fs_panic(sb, "%s: deleting FAT entry beyond EOF", - __FUNCTION__); + __func__); err = -EIO; goto error; } diff --git a/fs/fat/file.c b/fs/fat/file.c index d604bb13242..27cc1164ec3 100644 --- a/fs/fat/file.c +++ b/fs/fat/file.c @@ -208,7 +208,7 @@ static int fat_free(struct inode *inode, int skip) } else if (ret == FAT_ENT_FREE) { fat_fs_panic(sb, "%s: invalid cluster chain (i_pos %lld)", - __FUNCTION__, MSDOS_I(inode)->i_pos); + __func__, MSDOS_I(inode)->i_pos); ret = -EIO; } else if (ret > 0) { err = fat_ent_write(inode, &fatent, FAT_ENT_EOF, wait); diff --git a/fs/fat/inode.c b/fs/fat/inode.c index 5f522a55b59..4e0a3dd9d67 100644 --- a/fs/fat/inode.c +++ b/fs/fat/inode.c @@ -1222,8 +1222,7 @@ int fat_fill_super(struct super_block *sb, void *data, int silent, brelse(bh); goto out_invalid; } - logical_sector_size = - le16_to_cpu(get_unaligned((__le16 *)&b->sector_size)); + logical_sector_size = get_unaligned_le16(&b->sector_size); if (!is_power_of_2(logical_sector_size) || (logical_sector_size < 512) || (logical_sector_size > 4096)) { @@ -1322,8 +1321,7 @@ int fat_fill_super(struct super_block *sb, void *data, int silent, sbi->dir_per_block_bits = ffs(sbi->dir_per_block) - 1; sbi->dir_start = sbi->fat_start + sbi->fats * sbi->fat_length; - sbi->dir_entries = - le16_to_cpu(get_unaligned((__le16 *)&b->dir_entries)); + sbi->dir_entries = get_unaligned_le16(&b->dir_entries); if (sbi->dir_entries & (sbi->dir_per_block - 1)) { if (!silent) printk(KERN_ERR "FAT: bogus directroy-entries per block" @@ -1335,7 +1333,7 @@ int fat_fill_super(struct super_block *sb, void *data, int silent, rootdir_sectors = sbi->dir_entries * sizeof(struct msdos_dir_entry) / sb->s_blocksize; sbi->data_start = sbi->dir_start + rootdir_sectors; - total_sectors = le16_to_cpu(get_unaligned((__le16 *)&b->sectors)); + total_sectors = get_unaligned_le16(&b->sectors); if (total_sectors == 0) total_sectors = le32_to_cpu(b->total_sect); diff --git a/fs/fcntl.c b/fs/fcntl.c index 3f3ac630ccd..bfd776509a7 100644 --- a/fs/fcntl.c +++ b/fs/fcntl.c @@ -9,6 +9,7 @@ #include <linux/mm.h> #include <linux/fs.h> #include <linux/file.h> +#include <linux/fdtable.h> #include <linux/capability.h> #include <linux/dnotify.h> #include <linux/smp_lock.h> diff --git a/fs/file.c b/fs/file.c index 5110acb1c9e..4c6f0ea12c4 100644 --- a/fs/file.c +++ b/fs/file.c @@ -12,6 +12,7 @@ #include <linux/slab.h> #include <linux/vmalloc.h> #include <linux/file.h> +#include <linux/fdtable.h> #include <linux/bitops.h> #include <linux/interrupt.h> #include <linux/spinlock.h> @@ -149,8 +150,16 @@ static struct fdtable * alloc_fdtable(unsigned int nr) nr /= (1024 / sizeof(struct file *)); nr = roundup_pow_of_two(nr + 1); nr *= (1024 / sizeof(struct file *)); - if (nr > sysctl_nr_open) - nr = sysctl_nr_open; + /* + * Note that this can drive nr *below* what we had passed if sysctl_nr_open + * had been set lower between the check in expand_files() and here. Deal + * with that in caller, it's cheaper that way. + * + * We make sure that nr remains a multiple of BITS_PER_LONG - otherwise + * bitmaps handling below becomes unpleasant, to put it mildly... + */ + if (unlikely(nr > sysctl_nr_open)) + nr = ((sysctl_nr_open - 1) | (BITS_PER_LONG - 1)) + 1; fdt = kmalloc(sizeof(struct fdtable), GFP_KERNEL); if (!fdt) @@ -199,6 +208,16 @@ static int expand_fdtable(struct files_struct *files, int nr) if (!new_fdt) return -ENOMEM; /* + * extremely unlikely race - sysctl_nr_open decreased between the check in + * caller and alloc_fdtable(). Cheaper to catch it here... + */ + if (unlikely(new_fdt->max_fds <= nr)) { + free_fdarr(new_fdt); + free_fdset(new_fdt); + kfree(new_fdt); + return -EMFILE; + } + /* * Check again since another task may have expanded the fd table while * we dropped the lock */ diff --git a/fs/file_table.c b/fs/file_table.c index 7a0a9b87225..83084225b4c 100644 --- a/fs/file_table.c +++ b/fs/file_table.c @@ -8,6 +8,7 @@ #include <linux/string.h> #include <linux/slab.h> #include <linux/file.h> +#include <linux/fdtable.h> #include <linux/init.h> #include <linux/module.h> #include <linux/fs.h> diff --git a/fs/freevxfs/vxfs_extern.h b/fs/freevxfs/vxfs_extern.h index 2b46064f66b..50ab5eecb99 100644 --- a/fs/freevxfs/vxfs_extern.h +++ b/fs/freevxfs/vxfs_extern.h @@ -50,7 +50,11 @@ extern daddr_t vxfs_bmap1(struct inode *, long); /* vxfs_fshead.c */ extern int vxfs_read_fshead(struct super_block *); +/* vxfs_immed.c */ +extern const struct inode_operations vxfs_immed_symlink_iops; + /* vxfs_inode.c */ +extern const struct address_space_operations vxfs_immed_aops; extern struct kmem_cache *vxfs_inode_cachep; extern void vxfs_dumpi(struct vxfs_inode_info *, ino_t); extern struct inode * vxfs_get_fake_inode(struct super_block *, @@ -69,6 +73,7 @@ extern const struct file_operations vxfs_dir_operations; extern int vxfs_read_olt(struct super_block *, u_long); /* vxfs_subr.c */ +extern const struct address_space_operations vxfs_aops; extern struct page * vxfs_get_page(struct address_space *, u_long); extern void vxfs_put_page(struct page *); extern struct buffer_head * vxfs_bread(struct inode *, int); diff --git a/fs/freevxfs/vxfs_immed.c b/fs/freevxfs/vxfs_immed.c index 8a5959a61ba..c36aeaf92e4 100644 --- a/fs/freevxfs/vxfs_immed.c +++ b/fs/freevxfs/vxfs_immed.c @@ -35,6 +35,7 @@ #include <linux/namei.h> #include "vxfs.h" +#include "vxfs_extern.h" #include "vxfs_inode.h" diff --git a/fs/freevxfs/vxfs_inode.c b/fs/freevxfs/vxfs_inode.c index ad88d2364bc..9f3f2ceb73f 100644 --- a/fs/freevxfs/vxfs_inode.c +++ b/fs/freevxfs/vxfs_inode.c @@ -41,11 +41,6 @@ #include "vxfs_extern.h" -extern const struct address_space_operations vxfs_aops; -extern const struct address_space_operations vxfs_immed_aops; - -extern const struct inode_operations vxfs_immed_symlink_iops; - struct kmem_cache *vxfs_inode_cachep; diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 06557679ca4..ae45f77765c 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -25,6 +25,45 @@ #include <linux/buffer_head.h> #include "internal.h" + +/** + * writeback_acquire - attempt to get exclusive writeback access to a device + * @bdi: the device's backing_dev_info structure + * + * It is a waste of resources to have more than one pdflush thread blocked on + * a single request queue. Exclusion at the request_queue level is obtained + * via a flag in the request_queue's backing_dev_info.state. + * + * Non-request_queue-backed address_spaces will share default_backing_dev_info, + * unless they implement their own. Which is somewhat inefficient, as this + * may prevent concurrent writeback against multiple devices. + */ +static int writeback_acquire(struct backing_dev_info *bdi) +{ + return !test_and_set_bit(BDI_pdflush, &bdi->state); +} + +/** + * writeback_in_progress - determine whether there is writeback in progress + * @bdi: the device's backing_dev_info structure. + * + * Determine whether there is writeback in progress against a backing device. + */ +int writeback_in_progress(struct backing_dev_info *bdi) +{ + return test_bit(BDI_pdflush, &bdi->state); +} + +/** + * writeback_release - relinquish exclusive writeback access against a device. + * @bdi: the device's backing_dev_info structure + */ +static void writeback_release(struct backing_dev_info *bdi) +{ + BUG_ON(!writeback_in_progress(bdi)); + clear_bit(BDI_pdflush, &bdi->state); +} + /** * __mark_inode_dirty - internal function * @inode: inode to mark @@ -747,43 +786,4 @@ int generic_osync_inode(struct inode *inode, struct address_space *mapping, int return err; } - EXPORT_SYMBOL(generic_osync_inode); - -/** - * writeback_acquire - attempt to get exclusive writeback access to a device - * @bdi: the device's backing_dev_info structure - * - * It is a waste of resources to have more than one pdflush thread blocked on - * a single request queue. Exclusion at the request_queue level is obtained - * via a flag in the request_queue's backing_dev_info.state. - * - * Non-request_queue-backed address_spaces will share default_backing_dev_info, - * unless they implement their own. Which is somewhat inefficient, as this - * may prevent concurrent writeback against multiple devices. - */ -int writeback_acquire(struct backing_dev_info *bdi) -{ - return !test_and_set_bit(BDI_pdflush, &bdi->state); -} - -/** - * writeback_in_progress - determine whether there is writeback in progress - * @bdi: the device's backing_dev_info structure. - * - * Determine whether there is writeback in progress against a backing device. - */ -int writeback_in_progress(struct backing_dev_info *bdi) -{ - return test_bit(BDI_pdflush, &bdi->state); -} - -/** - * writeback_release - relinquish exclusive writeback access against a device. - * @bdi: the device's backing_dev_info structure - */ -void writeback_release(struct backing_dev_info *bdi) -{ - BUG_ON(!writeback_in_progress(bdi)); - clear_bit(BDI_pdflush, &bdi->state); -} diff --git a/fs/fuse/control.c b/fs/fuse/control.c index 105d4a271e0..4f3cab32141 100644 --- a/fs/fuse/control.c +++ b/fs/fuse/control.c @@ -117,7 +117,7 @@ int fuse_ctl_add_conn(struct fuse_conn *fc) parent = fuse_control_sb->s_root; inc_nlink(parent->d_inode); - sprintf(name, "%llu", (unsigned long long) fc->id); + sprintf(name, "%u", fc->dev); parent = fuse_ctl_add_dentry(parent, fc, name, S_IFDIR | 0500, 2, &simple_dir_inode_operations, &simple_dir_operations); diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index af639807524..87250b6a868 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -47,6 +47,14 @@ struct fuse_req *fuse_request_alloc(void) return req; } +struct fuse_req *fuse_request_alloc_nofs(void) +{ + struct fuse_req *req = kmem_cache_alloc(fuse_req_cachep, GFP_NOFS); + if (req) + fuse_request_init(req); + return req; +} + void fuse_request_free(struct fuse_req *req) { kmem_cache_free(fuse_req_cachep, req); @@ -291,6 +299,7 @@ static void request_end(struct fuse_conn *fc, struct fuse_req *req) static void wait_answer_interruptible(struct fuse_conn *fc, struct fuse_req *req) + __releases(fc->lock) __acquires(fc->lock) { if (signal_pending(current)) return; @@ -307,8 +316,8 @@ static void queue_interrupt(struct fuse_conn *fc, struct fuse_req *req) kill_fasync(&fc->fasync, SIGIO, POLL_IN); } -/* Called with fc->lock held. Releases, and then reacquires it. */ static void request_wait_answer(struct fuse_conn *fc, struct fuse_req *req) + __releases(fc->lock) __acquires(fc->lock) { if (!fc->no_interrupt) { /* Any signal may interrupt this */ @@ -430,6 +439,17 @@ void request_send_background(struct fuse_conn *fc, struct fuse_req *req) } /* + * Called under fc->lock + * + * fc->connected must have been checked previously + */ +void request_send_background_locked(struct fuse_conn *fc, struct fuse_req *req) +{ + req->isreply = 1; + request_send_nowait_locked(fc, req); +} + +/* * Lock the request. Up to the next unlock_request() there mustn't be * anything that could cause a page-fault. If the request was already * aborted bail out. @@ -968,6 +988,7 @@ static void end_requests(struct fuse_conn *fc, struct list_head *head) * locked). */ static void end_io_requests(struct fuse_conn *fc) + __releases(fc->lock) __acquires(fc->lock) { while (!list_empty(&fc->io)) { struct fuse_req *req = diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index c4807b3fc8a..2060bf06b90 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -132,7 +132,7 @@ static void fuse_lookup_init(struct fuse_req *req, struct inode *dir, req->out.args[0].value = outarg; } -static u64 fuse_get_attr_version(struct fuse_conn *fc) +u64 fuse_get_attr_version(struct fuse_conn *fc) { u64 curr_version; @@ -1107,6 +1107,50 @@ static void iattr_to_fattr(struct iattr *iattr, struct fuse_setattr_in *arg) } /* + * Prevent concurrent writepages on inode + * + * This is done by adding a negative bias to the inode write counter + * and waiting for all pending writes to finish. + */ +void fuse_set_nowrite(struct inode *inode) +{ + struct fuse_conn *fc = get_fuse_conn(inode); + struct fuse_inode *fi = get_fuse_inode(inode); + + BUG_ON(!mutex_is_locked(&inode->i_mutex)); + + spin_lock(&fc->lock); + BUG_ON(fi->writectr < 0); + fi->writectr += FUSE_NOWRITE; + spin_unlock(&fc->lock); + wait_event(fi->page_waitq, fi->writectr == FUSE_NOWRITE); +} + +/* + * Allow writepages on inode + * + * Remove the bias from the writecounter and send any queued + * writepages. + */ +static void __fuse_release_nowrite(struct inode *inode) +{ + struct fuse_inode *fi = get_fuse_inode(inode); + + BUG_ON(fi->writectr != FUSE_NOWRITE); + fi->writectr = 0; + fuse_flush_writepages(inode); +} + +void fuse_release_nowrite(struct inode *inode) +{ + struct fuse_conn *fc = get_fuse_conn(inode); + + spin_lock(&fc->lock); + __fuse_release_nowrite(inode); + spin_unlock(&fc->lock); +} + +/* * Set attributes, and at the same time refresh them. * * Truncation is slightly complicated, because the 'truncate' request @@ -1122,6 +1166,8 @@ static int fuse_do_setattr(struct dentry *entry, struct iattr *attr, struct fuse_req *req; struct fuse_setattr_in inarg; struct fuse_attr_out outarg; + bool is_truncate = false; + loff_t oldsize; int err; if (!fuse_allow_task(fc, current)) @@ -1145,12 +1191,16 @@ static int fuse_do_setattr(struct dentry *entry, struct iattr *attr, send_sig(SIGXFSZ, current, 0); return -EFBIG; } + is_truncate = true; } req = fuse_get_req(fc); if (IS_ERR(req)) return PTR_ERR(req); + if (is_truncate) + fuse_set_nowrite(inode); + memset(&inarg, 0, sizeof(inarg)); memset(&outarg, 0, sizeof(outarg)); iattr_to_fattr(attr, &inarg); @@ -1181,16 +1231,44 @@ static int fuse_do_setattr(struct dentry *entry, struct iattr *attr, if (err) { if (err == -EINTR) fuse_invalidate_attr(inode); - return err; + goto error; } if ((inode->i_mode ^ outarg.attr.mode) & S_IFMT) { make_bad_inode(inode); - return -EIO; + err = -EIO; + goto error; + } + + spin_lock(&fc->lock); + fuse_change_attributes_common(inode, &outarg.attr, + attr_timeout(&outarg)); + oldsize = inode->i_size; + i_size_write(inode, outarg.attr.size); + + if (is_truncate) { + /* NOTE: this may release/reacquire fc->lock */ + __fuse_release_nowrite(inode); + } + spin_unlock(&fc->lock); + + /* + * Only call invalidate_inode_pages2() after removing + * FUSE_NOWRITE, otherwise fuse_launder_page() would deadlock. + */ + if (S_ISREG(inode->i_mode) && oldsize != outarg.attr.size) { + if (outarg.attr.size < oldsize) + fuse_truncate(inode->i_mapping, outarg.attr.size); + invalidate_inode_pages2(inode->i_mapping); } - fuse_change_attributes(inode, &outarg.attr, attr_timeout(&outarg), 0); return 0; + +error: + if (is_truncate) + fuse_release_nowrite(inode); + + return err; } static int fuse_setattr(struct dentry *entry, struct iattr *attr) diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 676b0bc8a86..8092f0d9fd1 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -210,6 +210,49 @@ u64 fuse_lock_owner_id(struct fuse_conn *fc, fl_owner_t id) return (u64) v0 + ((u64) v1 << 32); } +/* + * Check if page is under writeback + * + * This is currently done by walking the list of writepage requests + * for the inode, which can be pretty inefficient. + */ +static bool fuse_page_is_writeback(struct inode *inode, pgoff_t index) +{ + struct fuse_conn *fc = get_fuse_conn(inode); + struct fuse_inode *fi = get_fuse_inode(inode); + struct fuse_req *req; + bool found = false; + + spin_lock(&fc->lock); + list_for_each_entry(req, &fi->writepages, writepages_entry) { + pgoff_t curr_index; + + BUG_ON(req->inode != inode); + curr_index = req->misc.write.in.offset >> PAGE_CACHE_SHIFT; + if (curr_index == index) { + found = true; + break; + } + } + spin_unlock(&fc->lock); + + return found; +} + +/* + * Wait for page writeback to be completed. + * + * Since fuse doesn't rely on the VM writeback tracking, this has to + * use some other means. + */ +static int fuse_wait_on_page_writeback(struct inode *inode, pgoff_t index) +{ + struct fuse_inode *fi = get_fuse_inode(inode); + + wait_event(fi->page_waitq, !fuse_page_is_writeback(inode, index)); + return 0; +} + static int fuse_flush(struct file *file, fl_owner_t id) { struct inode *inode = file->f_path.dentry->d_inode; @@ -245,6 +288,21 @@ static int fuse_flush(struct file *file, fl_owner_t id) return err; } +/* + * Wait for all pending writepages on the inode to finish. + * + * This is currently done by blocking further writes with FUSE_NOWRITE + * and waiting for all sent writes to complete. + * + * This must be called under i_mutex, otherwise the FUSE_NOWRITE usage + * could conflict with truncation. + */ +static void fuse_sync_writes(struct inode *inode) +{ + fuse_set_nowrite(inode); + fuse_release_nowrite(inode); +} + int fuse_fsync_common(struct file *file, struct dentry *de, int datasync, int isdir) { @@ -261,6 +319,17 @@ int fuse_fsync_common(struct file *file, struct dentry *de, int datasync, if ((!isdir && fc->no_fsync) || (isdir && fc->no_fsyncdir)) return 0; + /* + * Start writeback against all dirty pages of the inode, then + * wait for all outstanding writes, before sending the FSYNC + * request. + */ + err = write_inode_now(inode, 0); + if (err) + return err; + + fuse_sync_writes(inode); + req = fuse_get_req(fc); if (IS_ERR(req)) return PTR_ERR(req); @@ -294,7 +363,7 @@ static int fuse_fsync(struct file *file, struct dentry *de, int datasync) void fuse_read_fill(struct fuse_req *req, struct file *file, struct inode *inode, loff_t pos, size_t count, int opcode) { - struct fuse_read_in *inarg = &req->misc.read_in; + struct fuse_read_in *inarg = &req->misc.read.in; struct fuse_file *ff = file->private_data; inarg->fh = ff->fh; @@ -320,7 +389,7 @@ static size_t fuse_send_read(struct fuse_req *req, struct file *file, fuse_read_fill(req, file, inode, pos, count, FUSE_READ); if (owner != NULL) { - struct fuse_read_in *inarg = &req->misc.read_in; + struct fuse_read_in *inarg = &req->misc.read.in; inarg->read_flags |= FUSE_READ_LOCKOWNER; inarg->lock_owner = fuse_lock_owner_id(fc, owner); @@ -329,31 +398,66 @@ static size_t fuse_send_read(struct fuse_req *req, struct file *file, return req->out.args[0].size; } +static void fuse_read_update_size(struct inode *inode, loff_t size, + u64 attr_ver) +{ + struct fuse_conn *fc = get_fuse_conn(inode); + struct fuse_inode *fi = get_fuse_inode(inode); + + spin_lock(&fc->lock); + if (attr_ver == fi->attr_version && size < inode->i_size) { + fi->attr_version = ++fc->attr_version; + i_size_write(inode, size); + } + spin_unlock(&fc->lock); +} + static int fuse_readpage(struct file *file, struct page *page) { struct inode *inode = page->mapping->host; struct fuse_conn *fc = get_fuse_conn(inode); struct fuse_req *req; + size_t num_read; + loff_t pos = page_offset(page); + size_t count = PAGE_CACHE_SIZE; + u64 attr_ver; int err; err = -EIO; if (is_bad_inode(inode)) goto out; + /* + * Page writeback can extend beyond the liftime of the + * page-cache page, so make sure we read a properly synced + * page. + */ + fuse_wait_on_page_writeback(inode, page->index); + req = fuse_get_req(fc); err = PTR_ERR(req); if (IS_ERR(req)) goto out; + attr_ver = fuse_get_attr_version(fc); + req->out.page_zeroing = 1; req->num_pages = 1; req->pages[0] = page; - fuse_send_read(req, file, inode, page_offset(page), PAGE_CACHE_SIZE, - NULL); + num_read = fuse_send_read(req, file, inode, pos, count, NULL); err = req->out.h.error; fuse_put_request(fc, req); - if (!err) + + if (!err) { + /* + * Short read means EOF. If file size is larger, truncate it + */ + if (num_read < count) + fuse_read_update_size(inode, pos + num_read, attr_ver); + SetPageUptodate(page); + } + fuse_invalidate_attr(inode); /* atime changed */ out: unlock_page(page); @@ -363,8 +467,19 @@ static int fuse_readpage(struct file *file, struct page *page) static void fuse_readpages_end(struct fuse_conn *fc, struct fuse_req *req) { int i; + size_t count = req->misc.read.in.size; + size_t num_read = req->out.args[0].size; + struct inode *inode = req->pages[0]->mapping->host; + + /* + * Short read means EOF. If file size is larger, truncate it + */ + if (!req->out.h.error && num_read < count) { + loff_t pos = page_offset(req->pages[0]) + num_read; + fuse_read_update_size(inode, pos, req->misc.read.attr_ver); + } - fuse_invalidate_attr(req->pages[0]->mapping->host); /* atime changed */ + fuse_invalidate_attr(inode); /* atime changed */ for (i = 0; i < req->num_pages; i++) { struct page *page = req->pages[i]; @@ -387,6 +502,7 @@ static void fuse_send_readpages(struct fuse_req *req, struct file *file, size_t count = req->num_pages << PAGE_CACHE_SHIFT; req->out.page_zeroing = 1; fuse_read_fill(req, file, inode, pos, count, FUSE_READ); + req->misc.read.attr_ver = fuse_get_attr_version(fc); if (fc->async_read) { struct fuse_file *ff = file->private_data; req->ff = fuse_file_get(ff); @@ -411,6 +527,8 @@ static int fuse_readpages_fill(void *_data, struct page *page) struct inode *inode = data->inode; struct fuse_conn *fc = get_fuse_conn(inode); + fuse_wait_on_page_writeback(inode, page->index); + if (req->num_pages && (req->num_pages == FUSE_MAX_PAGES_PER_REQ || (req->num_pages + 1) * PAGE_CACHE_SIZE > fc->max_read || @@ -477,11 +595,10 @@ static ssize_t fuse_file_aio_read(struct kiocb *iocb, const struct iovec *iov, } static void fuse_write_fill(struct fuse_req *req, struct file *file, - struct inode *inode, loff_t pos, size_t count, - int writepage) + struct fuse_file *ff, struct inode *inode, + loff_t pos, size_t count, int writepage) { struct fuse_conn *fc = get_fuse_conn(inode); - struct fuse_file *ff = file->private_data; struct fuse_write_in *inarg = &req->misc.write.in; struct fuse_write_out *outarg = &req->misc.write.out; @@ -490,7 +607,7 @@ static void fuse_write_fill(struct fuse_req *req, struct file *file, inarg->offset = pos; inarg->size = count; inarg->write_flags = writepage ? FUSE_WRITE_CACHE : 0; - inarg->flags = file->f_flags; + inarg->flags = file ? file->f_flags : 0; req->in.h.opcode = FUSE_WRITE; req->in.h.nodeid = get_node_id(inode); req->in.argpages = 1; @@ -511,7 +628,7 @@ static size_t fuse_send_write(struct fuse_req *req, struct file *file, fl_owner_t owner) { struct fuse_conn *fc = get_fuse_conn(inode); - fuse_write_fill(req, file, inode, pos, count, 0); + fuse_write_fill(req, file, file->private_data, inode, pos, count, 0); if (owner != NULL) { struct fuse_write_in *inarg = &req->misc.write.in; inarg->write_flags |= FUSE_WRITE_LOCKOWNER; @@ -533,19 +650,36 @@ static int fuse_write_begin(struct file *file, struct address_space *mapping, return 0; } +static void fuse_write_update_size(struct inode *inode, loff_t pos) +{ + struct fuse_conn *fc = get_fuse_conn(inode); + struct fuse_inode *fi = get_fuse_inode(inode); + + spin_lock(&fc->lock); + fi->attr_version = ++fc->attr_version; + if (pos > inode->i_size) + i_size_write(inode, pos); + spin_unlock(&fc->lock); +} + static int fuse_buffered_write(struct file *file, struct inode *inode, loff_t pos, unsigned count, struct page *page) { int err; size_t nres; struct fuse_conn *fc = get_fuse_conn(inode); - struct fuse_inode *fi = get_fuse_inode(inode); unsigned offset = pos & (PAGE_CACHE_SIZE - 1); struct fuse_req *req; if (is_bad_inode(inode)) return -EIO; + /* + * Make sure writepages on the same page are not mixed up with + * plain writes. + */ + fuse_wait_on_page_writeback(inode, page->index); + req = fuse_get_req(fc); if (IS_ERR(req)) return PTR_ERR(req); @@ -560,12 +694,7 @@ static int fuse_buffered_write(struct file *file, struct inode *inode, err = -EIO; if (!err) { pos += nres; - spin_lock(&fc->lock); - fi->attr_version = ++fc->attr_version; - if (pos > inode->i_size) - i_size_write(inode, pos); - spin_unlock(&fc->lock); - + fuse_write_update_size(inode, pos); if (count == PAGE_CACHE_SIZE) SetPageUptodate(page); } @@ -588,6 +717,200 @@ static int fuse_write_end(struct file *file, struct address_space *mapping, return res; } +static size_t fuse_send_write_pages(struct fuse_req *req, struct file *file, + struct inode *inode, loff_t pos, + size_t count) +{ + size_t res; + unsigned offset; + unsigned i; + + for (i = 0; i < req->num_pages; i++) + fuse_wait_on_page_writeback(inode, req->pages[i]->index); + + res = fuse_send_write(req, file, inode, pos, count, NULL); + + offset = req->page_offset; + count = res; + for (i = 0; i < req->num_pages; i++) { + struct page *page = req->pages[i]; + + if (!req->out.h.error && !offset && count >= PAGE_CACHE_SIZE) + SetPageUptodate(page); + + if (count > PAGE_CACHE_SIZE - offset) + count -= PAGE_CACHE_SIZE - offset; + else + count = 0; + offset = 0; + + unlock_page(page); + page_cache_release(page); + } + + return res; +} + +static ssize_t fuse_fill_write_pages(struct fuse_req *req, + struct address_space *mapping, + struct iov_iter *ii, loff_t pos) +{ + struct fuse_conn *fc = get_fuse_conn(mapping->host); + unsigned offset = pos & (PAGE_CACHE_SIZE - 1); + size_t count = 0; + int err; + + req->page_offset = offset; + + do { + size_t tmp; + struct page *page; + pgoff_t index = pos >> PAGE_CACHE_SHIFT; + size_t bytes = min_t(size_t, PAGE_CACHE_SIZE - offset, + iov_iter_count(ii)); + + bytes = min_t(size_t, bytes, fc->max_write - count); + + again: + err = -EFAULT; + if (iov_iter_fault_in_readable(ii, bytes)) + break; + + err = -ENOMEM; + page = __grab_cache_page(mapping, index); + if (!page) + break; + + pagefault_disable(); + tmp = iov_iter_copy_from_user_atomic(page, ii, offset, bytes); + pagefault_enable(); + flush_dcache_page(page); + + if (!tmp) { + unlock_page(page); + page_cache_release(page); + bytes = min(bytes, iov_iter_single_seg_count(ii)); + goto again; + } + + err = 0; + req->pages[req->num_pages] = page; + req->num_pages++; + + iov_iter_advance(ii, tmp); + count += tmp; + pos += tmp; + offset += tmp; + if (offset == PAGE_CACHE_SIZE) + offset = 0; + + if (!fc->big_writes) + break; + } while (iov_iter_count(ii) && count < fc->max_write && + req->num_pages < FUSE_MAX_PAGES_PER_REQ && offset == 0); + + return count > 0 ? count : err; +} + +static ssize_t fuse_perform_write(struct file *file, + struct address_space *mapping, + struct iov_iter *ii, loff_t pos) +{ + struct inode *inode = mapping->host; + struct fuse_conn *fc = get_fuse_conn(inode); + int err = 0; + ssize_t res = 0; + + if (is_bad_inode(inode)) + return -EIO; + + do { + struct fuse_req *req; + ssize_t count; + + req = fuse_get_req(fc); + if (IS_ERR(req)) { + err = PTR_ERR(req); + break; + } + + count = fuse_fill_write_pages(req, mapping, ii, pos); + if (count <= 0) { + err = count; + } else { + size_t num_written; + + num_written = fuse_send_write_pages(req, file, inode, + pos, count); + err = req->out.h.error; + if (!err) { + res += num_written; + pos += num_written; + + /* break out of the loop on short write */ + if (num_written != count) + err = -EIO; + } + } + fuse_put_request(fc, req); + } while (!err && iov_iter_count(ii)); + + if (res > 0) + fuse_write_update_size(inode, pos); + + fuse_invalidate_attr(inode); + + return res > 0 ? res : err; +} + +static ssize_t fuse_file_aio_write(struct kiocb *iocb, const struct iovec *iov, + unsigned long nr_segs, loff_t pos) +{ + struct file *file = iocb->ki_filp; + struct address_space *mapping = file->f_mapping; + size_t count = 0; + ssize_t written = 0; + struct inode *inode = mapping->host; + ssize_t err; + struct iov_iter i; + + WARN_ON(iocb->ki_pos != pos); + + err = generic_segment_checks(iov, &nr_segs, &count, VERIFY_READ); + if (err) + return err; + + mutex_lock(&inode->i_mutex); + vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE); + + /* We can write back this queue in page reclaim */ + current->backing_dev_info = mapping->backing_dev_info; + + err = generic_write_checks(file, &pos, &count, S_ISBLK(inode->i_mode)); + if (err) + goto out; + + if (count == 0) + goto out; + + err = remove_suid(file->f_path.dentry); + if (err) + goto out; + + file_update_time(file); + + iov_iter_init(&i, iov, nr_segs, count, 0); + written = fuse_perform_write(file, mapping, &i, pos); + if (written >= 0) + iocb->ki_pos = pos + written; + +out: + current->backing_dev_info = NULL; + mutex_unlock(&inode->i_mutex); + + return written ? written : err; +} + static void fuse_release_user_pages(struct fuse_req *req, int write) { unsigned i; @@ -613,7 +936,7 @@ static int fuse_get_user_pages(struct fuse_req *req, const char __user *buf, nbytes = min(nbytes, (unsigned) FUSE_MAX_PAGES_PER_REQ << PAGE_SHIFT); npages = (nbytes + offset + PAGE_SIZE - 1) >> PAGE_SHIFT; - npages = min(max(npages, 1), FUSE_MAX_PAGES_PER_REQ); + npages = clamp(npages, 1, FUSE_MAX_PAGES_PER_REQ); down_read(¤t->mm->mmap_sem); npages = get_user_pages(current, current->mm, user_addr, npages, write, 0, req->pages, NULL); @@ -645,14 +968,15 @@ static ssize_t fuse_direct_io(struct file *file, const char __user *buf, while (count) { size_t nres; - size_t nbytes = min(count, nmax); - int err = fuse_get_user_pages(req, buf, nbytes, !write); + size_t nbytes_limit = min(count, nmax); + size_t nbytes; + int err = fuse_get_user_pages(req, buf, nbytes_limit, !write); if (err) { res = err; break; } nbytes = (req->num_pages << PAGE_SHIFT) - req->page_offset; - nbytes = min(count, nbytes); + nbytes = min(nbytes_limit, nbytes); if (write) nres = fuse_send_write(req, file, inode, pos, nbytes, current->files); @@ -683,12 +1007,8 @@ static ssize_t fuse_direct_io(struct file *file, const char __user *buf, } fuse_put_request(fc, req); if (res > 0) { - if (write) { - spin_lock(&fc->lock); - if (pos > inode->i_size) - i_size_write(inode, pos); - spin_unlock(&fc->lock); - } + if (write) + fuse_write_update_size(inode, pos); *ppos = pos; } fuse_invalidate_attr(inode); @@ -716,21 +1036,225 @@ static ssize_t fuse_direct_write(struct file *file, const char __user *buf, return res; } -static int fuse_file_mmap(struct file *file, struct vm_area_struct *vma) +static void fuse_writepage_free(struct fuse_conn *fc, struct fuse_req *req) { - if ((vma->vm_flags & VM_SHARED)) { - if ((vma->vm_flags & VM_WRITE)) - return -ENODEV; - else - vma->vm_flags &= ~VM_MAYWRITE; + __free_page(req->pages[0]); + fuse_file_put(req->ff); + fuse_put_request(fc, req); +} + +static void fuse_writepage_finish(struct fuse_conn *fc, struct fuse_req *req) +{ + struct inode *inode = req->inode; + struct fuse_inode *fi = get_fuse_inode(inode); + struct backing_dev_info *bdi = inode->i_mapping->backing_dev_info; + + list_del(&req->writepages_entry); + dec_bdi_stat(bdi, BDI_WRITEBACK); + dec_zone_page_state(req->pages[0], NR_WRITEBACK_TEMP); + bdi_writeout_inc(bdi); + wake_up(&fi->page_waitq); +} + +/* Called under fc->lock, may release and reacquire it */ +static void fuse_send_writepage(struct fuse_conn *fc, struct fuse_req *req) +{ + struct fuse_inode *fi = get_fuse_inode(req->inode); + loff_t size = i_size_read(req->inode); + struct fuse_write_in *inarg = &req->misc.write.in; + + if (!fc->connected) + goto out_free; + + if (inarg->offset + PAGE_CACHE_SIZE <= size) { + inarg->size = PAGE_CACHE_SIZE; + } else if (inarg->offset < size) { + inarg->size = size & (PAGE_CACHE_SIZE - 1); + } else { + /* Got truncated off completely */ + goto out_free; + } + + req->in.args[1].size = inarg->size; + fi->writectr++; + request_send_background_locked(fc, req); + return; + + out_free: + fuse_writepage_finish(fc, req); + spin_unlock(&fc->lock); + fuse_writepage_free(fc, req); + spin_lock(&fc->lock); +} + +/* + * If fi->writectr is positive (no truncate or fsync going on) send + * all queued writepage requests. + * + * Called with fc->lock + */ +void fuse_flush_writepages(struct inode *inode) +{ + struct fuse_conn *fc = get_fuse_conn(inode); + struct fuse_inode *fi = get_fuse_inode(inode); + struct fuse_req *req; + + while (fi->writectr >= 0 && !list_empty(&fi->queued_writes)) { + req = list_entry(fi->queued_writes.next, struct fuse_req, list); + list_del_init(&req->list); + fuse_send_writepage(fc, req); + } +} + +static void fuse_writepage_end(struct fuse_conn *fc, struct fuse_req *req) +{ + struct inode *inode = req->inode; + struct fuse_inode *fi = get_fuse_inode(inode); + + mapping_set_error(inode->i_mapping, req->out.h.error); + spin_lock(&fc->lock); + fi->writectr--; + fuse_writepage_finish(fc, req); + spin_unlock(&fc->lock); + fuse_writepage_free(fc, req); +} + +static int fuse_writepage_locked(struct page *page) +{ + struct address_space *mapping = page->mapping; + struct inode *inode = mapping->host; + struct fuse_conn *fc = get_fuse_conn(inode); + struct fuse_inode *fi = get_fuse_inode(inode); + struct fuse_req *req; + struct fuse_file *ff; + struct page *tmp_page; + + set_page_writeback(page); + + req = fuse_request_alloc_nofs(); + if (!req) + goto err; + + tmp_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM); + if (!tmp_page) + goto err_free; + + spin_lock(&fc->lock); + BUG_ON(list_empty(&fi->write_files)); + ff = list_entry(fi->write_files.next, struct fuse_file, write_entry); + req->ff = fuse_file_get(ff); + spin_unlock(&fc->lock); + + fuse_write_fill(req, NULL, ff, inode, page_offset(page), 0, 1); + + copy_highpage(tmp_page, page); + req->num_pages = 1; + req->pages[0] = tmp_page; + req->page_offset = 0; + req->end = fuse_writepage_end; + req->inode = inode; + + inc_bdi_stat(mapping->backing_dev_info, BDI_WRITEBACK); + inc_zone_page_state(tmp_page, NR_WRITEBACK_TEMP); + end_page_writeback(page); + + spin_lock(&fc->lock); + list_add(&req->writepages_entry, &fi->writepages); + list_add_tail(&req->list, &fi->queued_writes); + fuse_flush_writepages(inode); + spin_unlock(&fc->lock); + + return 0; + +err_free: + fuse_request_free(req); +err: + end_page_writeback(page); + return -ENOMEM; +} + +static int fuse_writepage(struct page *page, struct writeback_control *wbc) +{ + int err; + + err = fuse_writepage_locked(page); + unlock_page(page); + + return err; +} + +static int fuse_launder_page(struct page *page) +{ + int err = 0; + if (clear_page_dirty_for_io(page)) { + struct inode *inode = page->mapping->host; + err = fuse_writepage_locked(page); + if (!err) + fuse_wait_on_page_writeback(inode, page->index); } - return generic_file_mmap(file, vma); + return err; } -static int fuse_set_page_dirty(struct page *page) +/* + * Write back dirty pages now, because there may not be any suitable + * open files later + */ +static void fuse_vma_close(struct vm_area_struct *vma) { - printk("fuse_set_page_dirty: should not happen\n"); - dump_stack(); + filemap_write_and_wait(vma->vm_file->f_mapping); +} + +/* + * Wait for writeback against this page to complete before allowing it + * to be marked dirty again, and hence written back again, possibly + * before the previous writepage completed. + * + * Block here, instead of in ->writepage(), so that the userspace fs + * can only block processes actually operating on the filesystem. + * + * Otherwise unprivileged userspace fs would be able to block + * unrelated: + * + * - page migration + * - sync(2) + * - try_to_free_pages() with order > PAGE_ALLOC_COSTLY_ORDER + */ +static int fuse_page_mkwrite(struct vm_area_struct *vma, struct page *page) +{ + /* + * Don't use page->mapping as it may become NULL from a + * concurrent truncate. + */ + struct inode *inode = vma->vm_file->f_mapping->host; + + fuse_wait_on_page_writeback(inode, page->index); + return 0; +} + +static struct vm_operations_struct fuse_file_vm_ops = { + .close = fuse_vma_close, + .fault = filemap_fault, + .page_mkwrite = fuse_page_mkwrite, +}; + +static int fuse_file_mmap(struct file *file, struct vm_area_struct *vma) +{ + if ((vma->vm_flags & VM_SHARED) && (vma->vm_flags & VM_MAYWRITE)) { + struct inode *inode = file->f_dentry->d_inode; + struct fuse_conn *fc = get_fuse_conn(inode); + struct fuse_inode *fi = get_fuse_inode(inode); + struct fuse_file *ff = file->private_data; + /* + * file may be written through mmap, so chain it onto the + * inodes's write_file list + */ + spin_lock(&fc->lock); + if (list_empty(&ff->write_entry)) + list_add(&ff->write_entry, &fi->write_files); + spin_unlock(&fc->lock); + } + file_accessed(file); + vma->vm_ops = &fuse_file_vm_ops; return 0; } @@ -909,12 +1433,37 @@ static sector_t fuse_bmap(struct address_space *mapping, sector_t block) return err ? 0 : outarg.block; } +static loff_t fuse_file_llseek(struct file *file, loff_t offset, int origin) +{ + loff_t retval; + struct inode *inode = file->f_path.dentry->d_inode; + + mutex_lock(&inode->i_mutex); + switch (origin) { + case SEEK_END: + offset += i_size_read(inode); + break; + case SEEK_CUR: + offset += file->f_pos; + } + retval = -EINVAL; + if (offset >= 0 && offset <= inode->i_sb->s_maxbytes) { + if (offset != file->f_pos) { + file->f_pos = offset; + file->f_version = 0; + } + retval = offset; + } + mutex_unlock(&inode->i_mutex); + return retval; +} + static const struct file_operations fuse_file_operations = { - .llseek = generic_file_llseek, + .llseek = fuse_file_llseek, .read = do_sync_read, .aio_read = fuse_file_aio_read, .write = do_sync_write, - .aio_write = generic_file_aio_write, + .aio_write = fuse_file_aio_write, .mmap = fuse_file_mmap, .open = fuse_open, .flush = fuse_flush, @@ -926,7 +1475,7 @@ static const struct file_operations fuse_file_operations = { }; static const struct file_operations fuse_direct_io_file_operations = { - .llseek = generic_file_llseek, + .llseek = fuse_file_llseek, .read = fuse_direct_read, .write = fuse_direct_write, .open = fuse_open, @@ -940,10 +1489,12 @@ static const struct file_operations fuse_direct_io_file_operations = { static const struct address_space_operations fuse_file_aops = { .readpage = fuse_readpage, + .writepage = fuse_writepage, + .launder_page = fuse_launder_page, .write_begin = fuse_write_begin, .write_end = fuse_write_end, .readpages = fuse_readpages, - .set_page_dirty = fuse_set_page_dirty, + .set_page_dirty = __set_page_dirty_nobuffers, .bmap = fuse_bmap, }; diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index 67aaf6ee38e..bae948657c4 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -15,6 +15,7 @@ #include <linux/mm.h> #include <linux/backing-dev.h> #include <linux/mutex.h> +#include <linux/rwsem.h> /** Max number of pages that can be used in a single read request */ #define FUSE_MAX_PAGES_PER_REQ 32 @@ -25,6 +26,9 @@ /** Congestion starts at 75% of maximum */ #define FUSE_CONGESTION_THRESHOLD (FUSE_MAX_BACKGROUND * 75 / 100) +/** Bias for fi->writectr, meaning new writepages must not be sent */ +#define FUSE_NOWRITE INT_MIN + /** It could be as large as PATH_MAX, but would that have any uses? */ #define FUSE_NAME_MAX 1024 @@ -73,6 +77,19 @@ struct fuse_inode { /** Files usable in writepage. Protected by fc->lock */ struct list_head write_files; + + /** Writepages pending on truncate or fsync */ + struct list_head queued_writes; + + /** Number of sent writes, a negative bias (FUSE_NOWRITE) + * means more writes are blocked */ + int writectr; + + /** Waitq for writepage completion */ + wait_queue_head_t page_waitq; + + /** List of writepage requestst (pending or sent) */ + struct list_head writepages; }; /** FUSE specific file data */ @@ -222,7 +239,10 @@ struct fuse_req { } release; struct fuse_init_in init_in; struct fuse_init_out init_out; - struct fuse_read_in read_in; + struct { + struct fuse_read_in in; + u64 attr_ver; + } read; struct { struct fuse_write_in in; struct fuse_write_out out; @@ -242,6 +262,12 @@ struct fuse_req { /** File used in the request (or NULL) */ struct fuse_file *ff; + /** Inode used in the request or NULL */ + struct inode *inode; + + /** Link on fi->writepages */ + struct list_head writepages_entry; + /** Request completion callback */ void (*end)(struct fuse_conn *, struct fuse_req *); @@ -378,6 +404,9 @@ struct fuse_conn { /** Is bmap not implemented by fs? */ unsigned no_bmap : 1; + /** Do multi-page cached writes */ + unsigned big_writes : 1; + /** The number of requests waiting for completion */ atomic_t num_waiting; @@ -390,8 +419,8 @@ struct fuse_conn { /** Entry on the fuse_conn_list */ struct list_head entry; - /** Unique ID */ - u64 id; + /** Device ID from super block */ + dev_t dev; /** Dentries in the control filesystem */ struct dentry *ctl_dentry[FUSE_CTL_NUM_DENTRIES]; @@ -438,7 +467,7 @@ extern const struct file_operations fuse_dev_operations; /** * Get a filled in inode */ -struct inode *fuse_iget(struct super_block *sb, unsigned long nodeid, +struct inode *fuse_iget(struct super_block *sb, u64 nodeid, int generation, struct fuse_attr *attr, u64 attr_valid, u64 attr_version); @@ -446,7 +475,7 @@ struct inode *fuse_iget(struct super_block *sb, unsigned long nodeid, * Send FORGET command */ void fuse_send_forget(struct fuse_conn *fc, struct fuse_req *req, - unsigned long nodeid, u64 nlookup); + u64 nodeid, u64 nlookup); /** * Initialize READ or READDIR request @@ -504,6 +533,11 @@ void fuse_init_symlink(struct inode *inode); void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr, u64 attr_valid, u64 attr_version); +void fuse_change_attributes_common(struct inode *inode, struct fuse_attr *attr, + u64 attr_valid); + +void fuse_truncate(struct address_space *mapping, loff_t offset); + /** * Initialize the client device */ @@ -522,6 +556,8 @@ void fuse_ctl_cleanup(void); */ struct fuse_req *fuse_request_alloc(void); +struct fuse_req *fuse_request_alloc_nofs(void); + /** * Free a request */ @@ -558,6 +594,8 @@ void request_send_noreply(struct fuse_conn *fc, struct fuse_req *req); */ void request_send_background(struct fuse_conn *fc, struct fuse_req *req); +void request_send_background_locked(struct fuse_conn *fc, struct fuse_req *req); + /* Abort all requests */ void fuse_abort_conn(struct fuse_conn *fc); @@ -600,3 +638,10 @@ u64 fuse_lock_owner_id(struct fuse_conn *fc, fl_owner_t id); int fuse_update_attributes(struct inode *inode, struct kstat *stat, struct file *file, bool *refreshed); + +void fuse_flush_writepages(struct inode *inode); + +void fuse_set_nowrite(struct inode *inode); +void fuse_release_nowrite(struct inode *inode); + +u64 fuse_get_attr_version(struct fuse_conn *fc); diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index 4df34da2284..fb77e096213 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -59,7 +59,11 @@ static struct inode *fuse_alloc_inode(struct super_block *sb) fi->nodeid = 0; fi->nlookup = 0; fi->attr_version = 0; + fi->writectr = 0; INIT_LIST_HEAD(&fi->write_files); + INIT_LIST_HEAD(&fi->queued_writes); + INIT_LIST_HEAD(&fi->writepages); + init_waitqueue_head(&fi->page_waitq); fi->forget_req = fuse_request_alloc(); if (!fi->forget_req) { kmem_cache_free(fuse_inode_cachep, inode); @@ -73,13 +77,14 @@ static void fuse_destroy_inode(struct inode *inode) { struct fuse_inode *fi = get_fuse_inode(inode); BUG_ON(!list_empty(&fi->write_files)); + BUG_ON(!list_empty(&fi->queued_writes)); if (fi->forget_req) fuse_request_free(fi->forget_req); kmem_cache_free(fuse_inode_cachep, inode); } void fuse_send_forget(struct fuse_conn *fc, struct fuse_req *req, - unsigned long nodeid, u64 nlookup) + u64 nodeid, u64 nlookup) { struct fuse_forget_in *inarg = &req->misc.forget_in; inarg->nlookup = nlookup; @@ -109,7 +114,7 @@ static int fuse_remount_fs(struct super_block *sb, int *flags, char *data) return 0; } -static void fuse_truncate(struct address_space *mapping, loff_t offset) +void fuse_truncate(struct address_space *mapping, loff_t offset) { /* See vmtruncate() */ unmap_mapping_range(mapping, offset + PAGE_SIZE - 1, 0, 1); @@ -117,19 +122,12 @@ static void fuse_truncate(struct address_space *mapping, loff_t offset) unmap_mapping_range(mapping, offset + PAGE_SIZE - 1, 0, 1); } - -void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr, - u64 attr_valid, u64 attr_version) +void fuse_change_attributes_common(struct inode *inode, struct fuse_attr *attr, + u64 attr_valid) { struct fuse_conn *fc = get_fuse_conn(inode); struct fuse_inode *fi = get_fuse_inode(inode); - loff_t oldsize; - spin_lock(&fc->lock); - if (attr_version != 0 && fi->attr_version > attr_version) { - spin_unlock(&fc->lock); - return; - } fi->attr_version = ++fc->attr_version; fi->i_time = attr_valid; @@ -159,6 +157,22 @@ void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr, fi->orig_i_mode = inode->i_mode; if (!(fc->flags & FUSE_DEFAULT_PERMISSIONS)) inode->i_mode &= ~S_ISVTX; +} + +void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr, + u64 attr_valid, u64 attr_version) +{ + struct fuse_conn *fc = get_fuse_conn(inode); + struct fuse_inode *fi = get_fuse_inode(inode); + loff_t oldsize; + + spin_lock(&fc->lock); + if (attr_version != 0 && fi->attr_version > attr_version) { + spin_unlock(&fc->lock); + return; + } + + fuse_change_attributes_common(inode, attr, attr_valid); oldsize = inode->i_size; i_size_write(inode, attr->size); @@ -193,7 +207,7 @@ static void fuse_init_inode(struct inode *inode, struct fuse_attr *attr) static int fuse_inode_eq(struct inode *inode, void *_nodeidp) { - unsigned long nodeid = *(unsigned long *) _nodeidp; + u64 nodeid = *(u64 *) _nodeidp; if (get_node_id(inode) == nodeid) return 1; else @@ -202,12 +216,12 @@ static int fuse_inode_eq(struct inode *inode, void *_nodeidp) static int fuse_inode_set(struct inode *inode, void *_nodeidp) { - unsigned long nodeid = *(unsigned long *) _nodeidp; + u64 nodeid = *(u64 *) _nodeidp; get_fuse_inode(inode)->nodeid = nodeid; return 0; } -struct inode *fuse_iget(struct super_block *sb, unsigned long nodeid, +struct inode *fuse_iget(struct super_block *sb, u64 nodeid, int generation, struct fuse_attr *attr, u64 attr_valid, u64 attr_version) { @@ -447,7 +461,7 @@ static int fuse_show_options(struct seq_file *m, struct vfsmount *mnt) return 0; } -static struct fuse_conn *new_conn(void) +static struct fuse_conn *new_conn(struct super_block *sb) { struct fuse_conn *fc; int err; @@ -468,19 +482,41 @@ static struct fuse_conn *new_conn(void) atomic_set(&fc->num_waiting, 0); fc->bdi.ra_pages = (VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE; fc->bdi.unplug_io_fn = default_unplug_io_fn; + /* fuse does it's own writeback accounting */ + fc->bdi.capabilities = BDI_CAP_NO_ACCT_WB; + fc->dev = sb->s_dev; err = bdi_init(&fc->bdi); - if (err) { - kfree(fc); - fc = NULL; - goto out; - } + if (err) + goto error_kfree; + err = bdi_register_dev(&fc->bdi, fc->dev); + if (err) + goto error_bdi_destroy; + /* + * For a single fuse filesystem use max 1% of dirty + + * writeback threshold. + * + * This gives about 1M of write buffer for memory maps on a + * machine with 1G and 10% dirty_ratio, which should be more + * than enough. + * + * Privileged users can raise it by writing to + * + * /sys/class/bdi/<bdi>/max_ratio + */ + bdi_set_max_ratio(&fc->bdi, 1); fc->reqctr = 0; fc->blocked = 1; fc->attr_version = 1; get_random_bytes(&fc->scramble_key, sizeof(fc->scramble_key)); } -out: return fc; + +error_bdi_destroy: + bdi_destroy(&fc->bdi); +error_kfree: + mutex_destroy(&fc->inst_mutex); + kfree(fc); + return NULL; } void fuse_conn_put(struct fuse_conn *fc) @@ -540,6 +576,8 @@ static void process_init_reply(struct fuse_conn *fc, struct fuse_req *req) fc->no_lock = 1; if (arg->flags & FUSE_ATOMIC_O_TRUNC) fc->atomic_o_trunc = 1; + if (arg->flags & FUSE_BIG_WRITES) + fc->big_writes = 1; } else { ra_pages = fc->max_read / PAGE_CACHE_SIZE; fc->no_lock = 1; @@ -548,6 +586,7 @@ static void process_init_reply(struct fuse_conn *fc, struct fuse_req *req) fc->bdi.ra_pages = min(fc->bdi.ra_pages, ra_pages); fc->minor = arg->minor; fc->max_write = arg->minor < 5 ? 4096 : arg->max_write; + fc->max_write = min_t(unsigned, 4096, fc->max_write); fc->conn_init = 1; } fuse_put_request(fc, req); @@ -562,7 +601,8 @@ static void fuse_send_init(struct fuse_conn *fc, struct fuse_req *req) arg->major = FUSE_KERNEL_VERSION; arg->minor = FUSE_KERNEL_MINOR_VERSION; arg->max_readahead = fc->bdi.ra_pages * PAGE_CACHE_SIZE; - arg->flags |= FUSE_ASYNC_READ | FUSE_POSIX_LOCKS | FUSE_ATOMIC_O_TRUNC; + arg->flags |= FUSE_ASYNC_READ | FUSE_POSIX_LOCKS | FUSE_ATOMIC_O_TRUNC | + FUSE_BIG_WRITES; req->in.h.opcode = FUSE_INIT; req->in.numargs = 1; req->in.args[0].size = sizeof(*arg); @@ -578,12 +618,6 @@ static void fuse_send_init(struct fuse_conn *fc, struct fuse_req *req) request_send_background(fc, req); } -static u64 conn_id(void) -{ - static u64 ctr = 1; - return ctr++; -} - static int fuse_fill_super(struct super_block *sb, void *data, int silent) { struct fuse_conn *fc; @@ -621,14 +655,14 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent) if (file->f_op != &fuse_dev_operations) return -EINVAL; - fc = new_conn(); + fc = new_conn(sb); if (!fc) return -ENOMEM; fc->flags = d.flags; fc->user_id = d.user_id; fc->group_id = d.group_id; - fc->max_read = d.max_read; + fc->max_read = min_t(unsigned, 4096, d.max_read); /* Used by get_root_inode() */ sb->s_fs_info = fc; @@ -659,7 +693,6 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent) if (file->private_data) goto err_unlock; - fc->id = conn_id(); err = fuse_ctl_add_conn(fc); if (err) goto err_unlock; diff --git a/fs/gfs2/locking/dlm/sysfs.c b/fs/gfs2/locking/dlm/sysfs.c index 8479da47049..a4ff271df9e 100644 --- a/fs/gfs2/locking/dlm/sysfs.c +++ b/fs/gfs2/locking/dlm/sysfs.c @@ -212,7 +212,7 @@ int gdlm_sysfs_init(void) { gdlm_kset = kset_create_and_add("lock_dlm", NULL, kernel_kobj); if (!gdlm_kset) { - printk(KERN_WARNING "%s: can not create kset\n", __FUNCTION__); + printk(KERN_WARNING "%s: can not create kset\n", __func__); return -ENOMEM; } return 0; diff --git a/fs/gfs2/util.h b/fs/gfs2/util.h index 509c5d60bd8..7f48576289c 100644 --- a/fs/gfs2/util.h +++ b/fs/gfs2/util.h @@ -41,7 +41,7 @@ int gfs2_assert_withdraw_i(struct gfs2_sbd *sdp, char *assertion, #define gfs2_assert_withdraw(sdp, assertion) \ ((likely(assertion)) ? 0 : gfs2_assert_withdraw_i((sdp), #assertion, \ - __FUNCTION__, __FILE__, __LINE__)) + __func__, __FILE__, __LINE__)) int gfs2_assert_warn_i(struct gfs2_sbd *sdp, char *assertion, @@ -49,28 +49,28 @@ int gfs2_assert_warn_i(struct gfs2_sbd *sdp, char *assertion, #define gfs2_assert_warn(sdp, assertion) \ ((likely(assertion)) ? 0 : gfs2_assert_warn_i((sdp), #assertion, \ - __FUNCTION__, __FILE__, __LINE__)) + __func__, __FILE__, __LINE__)) int gfs2_consist_i(struct gfs2_sbd *sdp, int cluster_wide, const char *function, char *file, unsigned int line); #define gfs2_consist(sdp) \ -gfs2_consist_i((sdp), 0, __FUNCTION__, __FILE__, __LINE__) +gfs2_consist_i((sdp), 0, __func__, __FILE__, __LINE__) int gfs2_consist_inode_i(struct gfs2_inode *ip, int cluster_wide, const char *function, char *file, unsigned int line); #define gfs2_consist_inode(ip) \ -gfs2_consist_inode_i((ip), 0, __FUNCTION__, __FILE__, __LINE__) +gfs2_consist_inode_i((ip), 0, __func__, __FILE__, __LINE__) int gfs2_consist_rgrpd_i(struct gfs2_rgrpd *rgd, int cluster_wide, const char *function, char *file, unsigned int line); #define gfs2_consist_rgrpd(rgd) \ -gfs2_consist_rgrpd_i((rgd), 0, __FUNCTION__, __FILE__, __LINE__) +gfs2_consist_rgrpd_i((rgd), 0, __func__, __FILE__, __LINE__) int gfs2_meta_check_ii(struct gfs2_sbd *sdp, struct buffer_head *bh, @@ -91,7 +91,7 @@ static inline int gfs2_meta_check_i(struct gfs2_sbd *sdp, } #define gfs2_meta_check(sdp, bh) \ -gfs2_meta_check_i((sdp), (bh), __FUNCTION__, __FILE__, __LINE__) +gfs2_meta_check_i((sdp), (bh), __func__, __FILE__, __LINE__) int gfs2_metatype_check_ii(struct gfs2_sbd *sdp, struct buffer_head *bh, @@ -118,7 +118,7 @@ static inline int gfs2_metatype_check_i(struct gfs2_sbd *sdp, } #define gfs2_metatype_check(sdp, bh, type) \ -gfs2_metatype_check_i((sdp), (bh), (type), __FUNCTION__, __FILE__, __LINE__) +gfs2_metatype_check_i((sdp), (bh), (type), __func__, __FILE__, __LINE__) static inline void gfs2_metatype_set(struct buffer_head *bh, u16 type, u16 format) @@ -134,14 +134,14 @@ int gfs2_io_error_i(struct gfs2_sbd *sdp, const char *function, char *file, unsigned int line); #define gfs2_io_error(sdp) \ -gfs2_io_error_i((sdp), __FUNCTION__, __FILE__, __LINE__); +gfs2_io_error_i((sdp), __func__, __FILE__, __LINE__); int gfs2_io_error_bh_i(struct gfs2_sbd *sdp, struct buffer_head *bh, const char *function, char *file, unsigned int line); #define gfs2_io_error_bh(sdp, bh) \ -gfs2_io_error_bh_i((sdp), (bh), __FUNCTION__, __FILE__, __LINE__); +gfs2_io_error_bh_i((sdp), (bh), __func__, __FILE__, __LINE__); extern struct kmem_cache *gfs2_glock_cachep; diff --git a/fs/hfs/btree.c b/fs/hfs/btree.c index 24cf6fc4302..f6621a78520 100644 --- a/fs/hfs/btree.c +++ b/fs/hfs/btree.c @@ -208,7 +208,9 @@ struct hfs_bnode *hfs_bmap_alloc(struct hfs_btree *tree) struct hfs_bnode *node, *next_node; struct page **pagep; u32 nidx, idx; - u16 off, len; + unsigned off; + u16 off16; + u16 len; u8 *data, byte, m; int i; @@ -235,7 +237,8 @@ struct hfs_bnode *hfs_bmap_alloc(struct hfs_btree *tree) node = hfs_bnode_find(tree, nidx); if (IS_ERR(node)) return node; - len = hfs_brec_lenoff(node, 2, &off); + len = hfs_brec_lenoff(node, 2, &off16); + off = off16; off += node->page_offset; pagep = node->page + (off >> PAGE_CACHE_SHIFT); @@ -280,7 +283,8 @@ struct hfs_bnode *hfs_bmap_alloc(struct hfs_btree *tree) return next_node; node = next_node; - len = hfs_brec_lenoff(node, 0, &off); + len = hfs_brec_lenoff(node, 0, &off16); + off = off16; off += node->page_offset; pagep = node->page + (off >> PAGE_CACHE_SHIFT); data = kmap(*pagep); diff --git a/fs/hfs/mdb.c b/fs/hfs/mdb.c index b4651e128d7..36ca2e1a4fa 100644 --- a/fs/hfs/mdb.c +++ b/fs/hfs/mdb.c @@ -215,7 +215,7 @@ int hfs_mdb_get(struct super_block *sb) attrib &= cpu_to_be16(~HFS_SB_ATTRIB_UNMNT); attrib |= cpu_to_be16(HFS_SB_ATTRIB_INCNSTNT); mdb->drAtrb = attrib; - mdb->drWrCnt = cpu_to_be32(be32_to_cpu(mdb->drWrCnt) + 1); + be32_add_cpu(&mdb->drWrCnt, 1); mdb->drLsMod = hfs_mtime(); mark_buffer_dirty(HFS_SB(sb)->mdb_bh); diff --git a/fs/hfs/super.c b/fs/hfs/super.c index 32de44ed002..8cf67974adf 100644 --- a/fs/hfs/super.c +++ b/fs/hfs/super.c @@ -297,7 +297,8 @@ static int parse_options(char *options, struct hfs_sb_info *hsb) return 0; } p = match_strdup(&args[0]); - hsb->nls_disk = load_nls(p); + if (p) + hsb->nls_disk = load_nls(p); if (!hsb->nls_disk) { printk(KERN_ERR "hfs: unable to load codepage \"%s\"\n", p); kfree(p); @@ -311,7 +312,8 @@ static int parse_options(char *options, struct hfs_sb_info *hsb) return 0; } p = match_strdup(&args[0]); - hsb->nls_io = load_nls(p); + if (p) + hsb->nls_io = load_nls(p); if (!hsb->nls_io) { printk(KERN_ERR "hfs: unable to load iocharset \"%s\"\n", p); kfree(p); diff --git a/fs/hfsplus/btree.c b/fs/hfsplus/btree.c index bb5433608a4..e49fcee1e29 100644 --- a/fs/hfsplus/btree.c +++ b/fs/hfsplus/btree.c @@ -184,7 +184,9 @@ struct hfs_bnode *hfs_bmap_alloc(struct hfs_btree *tree) struct hfs_bnode *node, *next_node; struct page **pagep; u32 nidx, idx; - u16 off, len; + unsigned off; + u16 off16; + u16 len; u8 *data, byte, m; int i; @@ -211,7 +213,8 @@ struct hfs_bnode *hfs_bmap_alloc(struct hfs_btree *tree) node = hfs_bnode_find(tree, nidx); if (IS_ERR(node)) return node; - len = hfs_brec_lenoff(node, 2, &off); + len = hfs_brec_lenoff(node, 2, &off16); + off = off16; off += node->page_offset; pagep = node->page + (off >> PAGE_CACHE_SHIFT); @@ -256,7 +259,8 @@ struct hfs_bnode *hfs_bmap_alloc(struct hfs_btree *tree) return next_node; node = next_node; - len = hfs_brec_lenoff(node, 0, &off); + len = hfs_brec_lenoff(node, 0, &off16); + off = off16; off += node->page_offset; pagep = node->page + (off >> PAGE_CACHE_SHIFT); data = kmap(*pagep); diff --git a/fs/hfsplus/hfsplus_fs.h b/fs/hfsplus/hfsplus_fs.h index d72d0a8b25a..9e59537b43d 100644 --- a/fs/hfsplus/hfsplus_fs.h +++ b/fs/hfsplus/hfsplus_fs.h @@ -311,6 +311,10 @@ int hfsplus_delete_cat(u32, struct inode *, struct qstr *); int hfsplus_rename_cat(u32, struct inode *, struct qstr *, struct inode *, struct qstr *); +/* dir.c */ +extern const struct inode_operations hfsplus_dir_inode_operations; +extern const struct file_operations hfsplus_dir_operations; + /* extents.c */ int hfsplus_ext_cmp_key(const hfsplus_btree_key *, const hfsplus_btree_key *); void hfsplus_ext_write_extent(struct inode *); diff --git a/fs/hfsplus/inode.c b/fs/hfsplus/inode.c index 37744cf3706..67e1c8b467c 100644 --- a/fs/hfsplus/inode.c +++ b/fs/hfsplus/inode.c @@ -65,6 +65,8 @@ static int hfsplus_releasepage(struct page *page, gfp_t mask) BUG(); return 0; } + if (!tree) + return 0; if (tree->node_size >= PAGE_CACHE_SIZE) { nidx = page->index >> (tree->node_size_shift - PAGE_CACHE_SHIFT); spin_lock(&tree->hash_lock); @@ -278,9 +280,6 @@ static int hfsplus_file_release(struct inode *inode, struct file *file) return 0; } -extern const struct inode_operations hfsplus_dir_inode_operations; -extern struct file_operations hfsplus_dir_operations; - static const struct inode_operations hfsplus_file_inode_operations = { .lookup = hfsplus_file_lookup, .truncate = hfsplus_file_truncate, diff --git a/fs/hfsplus/options.c b/fs/hfsplus/options.c index dc64fac0083..9997cbf8beb 100644 --- a/fs/hfsplus/options.c +++ b/fs/hfsplus/options.c @@ -132,7 +132,8 @@ int hfsplus_parse_options(char *input, struct hfsplus_sb_info *sbi) return 0; } p = match_strdup(&args[0]); - sbi->nls = load_nls(p); + if (p) + sbi->nls = load_nls(p); if (!sbi->nls) { printk(KERN_ERR "hfs: unable to load nls mapping \"%s\"\n", p); kfree(p); diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c index b0f9ad362d1..ce97a54518d 100644 --- a/fs/hfsplus/super.c +++ b/fs/hfsplus/super.c @@ -357,7 +357,7 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent) printk(KERN_WARNING "hfs: Filesystem is marked locked, mounting read-only.\n"); sb->s_flags |= MS_RDONLY; } else if (vhdr->attributes & cpu_to_be32(HFSPLUS_VOL_JOURNALED)) { - printk(KERN_WARNING "hfs: write access to a jounaled filesystem is not supported, " + printk(KERN_WARNING "hfs: write access to a journaled filesystem is not supported, " "use the force option at your own risk, mounting read-only.\n"); sb->s_flags |= MS_RDONLY; } @@ -423,7 +423,7 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent) */ vhdr->last_mount_vers = cpu_to_be32(HFSP_MOUNT_VERSION); vhdr->modify_date = hfsp_now2mt(); - vhdr->write_count = cpu_to_be32(be32_to_cpu(vhdr->write_count) + 1); + be32_add_cpu(&vhdr->write_count, 1); vhdr->attributes &= cpu_to_be32(~HFSPLUS_VOL_UNMNT); vhdr->attributes |= cpu_to_be32(HFSPLUS_VOL_INCNSTNT); mark_buffer_dirty(HFSPLUS_SB(sb).s_vhbh); diff --git a/fs/hfsplus/wrapper.c b/fs/hfsplus/wrapper.c index 72cab78f050..175d08eacc8 100644 --- a/fs/hfsplus/wrapper.c +++ b/fs/hfsplus/wrapper.c @@ -47,7 +47,7 @@ static int hfsplus_read_mdb(void *bufptr, struct hfsplus_wd *wd) return 0; wd->ablk_start = be16_to_cpu(*(__be16 *)(bufptr + HFSP_WRAPOFF_ABLKSTART)); - extent = be32_to_cpu(get_unaligned((__be32 *)(bufptr + HFSP_WRAPOFF_EMBEDEXT))); + extent = get_unaligned_be32(bufptr + HFSP_WRAPOFF_EMBEDEXT); wd->embed_start = (extent >> 16) & 0xFFFF; wd->embed_count = extent & 0xFFFF; diff --git a/fs/hppfs/Makefile b/fs/hppfs/Makefile index 6890433f759..8a1f5034436 100644 --- a/fs/hppfs/Makefile +++ b/fs/hppfs/Makefile @@ -1,9 +1,9 @@ # -# Copyright (C) 2002, 2003 Jeff Dike (jdike@karaya.com) +# Copyright (C) 2002 - 2008 Jeff Dike (jdike@{addtoit,linux.intel}.com) # Licensed under the GPL # -hppfs-objs := hppfs_kern.o +hppfs-objs := hppfs.o obj-y = -obj-$(CONFIG_HPPFS) += hppfs.o +obj-$(CONFIG_HPPFS) += $(hppfs-objs) diff --git a/fs/hppfs/hppfs_kern.c b/fs/hppfs/hppfs.c index 8601d8ef3b5..65077aa90f0 100644 --- a/fs/hppfs/hppfs_kern.c +++ b/fs/hppfs/hppfs.c @@ -33,7 +33,7 @@ struct hppfs_private { }; struct hppfs_inode_info { - struct dentry *proc_dentry; + struct dentry *proc_dentry; struct inode vfs_inode; }; @@ -52,7 +52,7 @@ static int is_pid(struct dentry *dentry) int i; sb = dentry->d_sb; - if ((sb->s_op != &hppfs_sbops) || (dentry->d_parent != sb->s_root)) + if (dentry->d_parent != sb->s_root) return 0; for (i = 0; i < dentry->d_name.len; i++) { @@ -136,7 +136,7 @@ static int file_removed(struct dentry *dentry, const char *file) } static struct dentry *hppfs_lookup(struct inode *ino, struct dentry *dentry, - struct nameidata *nd) + struct nameidata *nd) { struct dentry *proc_dentry, *new, *parent; struct inode *inode; @@ -254,6 +254,8 @@ static ssize_t hppfs_read(struct file *file, char __user *buf, size_t count, int err; if (hppfs->contents != NULL) { + int rem; + if (*ppos >= hppfs->len) return 0; @@ -267,8 +269,10 @@ static ssize_t hppfs_read(struct file *file, char __user *buf, size_t count, if (off + count > hppfs->len) count = hppfs->len - off; - copy_to_user(buf, &data->contents[off], count); - *ppos += count; + rem = copy_to_user(buf, &data->contents[off], count); + *ppos += count - rem; + if (rem > 0) + return -EFAULT; } else if (hppfs->host_fd != -1) { err = os_seek_file(hppfs->host_fd, *ppos); if (err) { @@ -285,21 +289,15 @@ static ssize_t hppfs_read(struct file *file, char __user *buf, size_t count, return count; } -static ssize_t hppfs_write(struct file *file, const char __user *buf, size_t len, - loff_t *ppos) +static ssize_t hppfs_write(struct file *file, const char __user *buf, + size_t len, loff_t *ppos) { struct hppfs_private *data = file->private_data; struct file *proc_file = data->proc_file; ssize_t (*write)(struct file *, const char __user *, size_t, loff_t *); - int err; write = proc_file->f_path.dentry->d_inode->i_fop->write; - - proc_file->f_pos = file->f_pos; - err = (*write)(proc_file, buf, len, &proc_file->f_pos); - file->f_pos = proc_file->f_pos; - - return err; + return (*write)(proc_file, buf, len, ppos); } static int open_host_sock(char *host_file, int *filter_out) @@ -357,7 +355,7 @@ static struct hppfs_data *hppfs_get_data(int fd, int filter, if (filter) { while ((n = read_proc(proc_file, data->contents, - sizeof(data->contents), NULL, 0)) > 0) + sizeof(data->contents), NULL, 0)) > 0) os_write_file(fd, data->contents, n); err = os_shutdown_socket(fd, 0, 1); if (err) { @@ -429,8 +427,8 @@ static int file_mode(int fmode) static int hppfs_open(struct inode *inode, struct file *file) { struct hppfs_private *data; - struct dentry *proc_dentry; struct vfsmount *proc_mnt; + struct dentry *proc_dentry; char *host_file; int err, fd, type, filter; @@ -492,8 +490,8 @@ static int hppfs_open(struct inode *inode, struct file *file) static int hppfs_dir_open(struct inode *inode, struct file *file) { struct hppfs_private *data; - struct dentry *proc_dentry; struct vfsmount *proc_mnt; + struct dentry *proc_dentry; int err; err = -ENOMEM; @@ -620,6 +618,9 @@ static struct inode *hppfs_alloc_inode(struct super_block *sb) void hppfs_delete_inode(struct inode *ino) { + dput(HPPFS_I(ino)->proc_dentry); + mntput(ino->i_sb->s_fs_info); + clear_inode(ino); } @@ -628,69 +629,46 @@ static void hppfs_destroy_inode(struct inode *inode) kfree(HPPFS_I(inode)); } -static void hppfs_put_super(struct super_block *sb) -{ - mntput(sb->s_fs_info); -} - static const struct super_operations hppfs_sbops = { .alloc_inode = hppfs_alloc_inode, .destroy_inode = hppfs_destroy_inode, .delete_inode = hppfs_delete_inode, .statfs = hppfs_statfs, - .put_super = hppfs_put_super, }; static int hppfs_readlink(struct dentry *dentry, char __user *buffer, int buflen) { - struct file *proc_file; struct dentry *proc_dentry; - struct vfsmount *proc_mnt; - int ret; proc_dentry = HPPFS_I(dentry->d_inode)->proc_dentry; - proc_mnt = dentry->d_sb->s_fs_info; - - proc_file = dentry_open(dget(proc_dentry), mntget(proc_mnt), O_RDONLY); - if (IS_ERR(proc_file)) - return PTR_ERR(proc_file); - - ret = proc_dentry->d_inode->i_op->readlink(proc_dentry, buffer, buflen); - - fput(proc_file); - - return ret; + return proc_dentry->d_inode->i_op->readlink(proc_dentry, buffer, + buflen); } -static void* hppfs_follow_link(struct dentry *dentry, struct nameidata *nd) +static void *hppfs_follow_link(struct dentry *dentry, struct nameidata *nd) { - struct file *proc_file; struct dentry *proc_dentry; - struct vfsmount *proc_mnt; - void *ret; proc_dentry = HPPFS_I(dentry->d_inode)->proc_dentry; - proc_mnt = dentry->d_sb->s_fs_info; - - proc_file = dentry_open(dget(proc_dentry), mntget(proc_mnt), O_RDONLY); - if (IS_ERR(proc_file)) - return proc_file; - - ret = proc_dentry->d_inode->i_op->follow_link(proc_dentry, nd); - fput(proc_file); + return proc_dentry->d_inode->i_op->follow_link(proc_dentry, nd); +} - return ret; +int hppfs_permission(struct inode *inode, int mask, struct nameidata *nd) +{ + return generic_permission(inode, mask, NULL); } static const struct inode_operations hppfs_dir_iops = { .lookup = hppfs_lookup, + .permission = hppfs_permission, }; static const struct inode_operations hppfs_link_iops = { .readlink = hppfs_readlink, .follow_link = hppfs_follow_link, + .permission = hppfs_permission, }; static struct inode *get_inode(struct super_block *sb, struct dentry *dentry) @@ -712,7 +690,7 @@ static struct inode *get_inode(struct super_block *sb, struct dentry *dentry) inode->i_fop = &hppfs_file_fops; } - HPPFS_I(inode)->proc_dentry = dentry; + HPPFS_I(inode)->proc_dentry = dget(dentry); inode->i_uid = proc_ino->i_uid; inode->i_gid = proc_ino->i_gid; @@ -725,7 +703,7 @@ static struct inode *get_inode(struct super_block *sb, struct dentry *dentry) inode->i_size = proc_ino->i_size; inode->i_blocks = proc_ino->i_blocks; - return 0; + return inode; } static int hppfs_fill_super(struct super_block *sb, void *d, int silent) diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index 9783723e8ff..aeabf80f81a 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c @@ -45,7 +45,7 @@ static const struct inode_operations hugetlbfs_inode_operations; static struct backing_dev_info hugetlbfs_backing_dev_info = { .ra_pages = 0, /* No readahead */ - .capabilities = BDI_CAP_NO_ACCT_DIRTY | BDI_CAP_NO_WRITEBACK, + .capabilities = BDI_CAP_NO_ACCT_AND_WRITEBACK, }; int sysctl_hugetlb_shm_group; diff --git a/fs/inode.c b/fs/inode.c index 27ee1af50d0..c36d9480335 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -495,8 +495,7 @@ static struct inode * find_inode(struct super_block * sb, struct hlist_head *hea struct inode * inode = NULL; repeat: - hlist_for_each (node, head) { - inode = hlist_entry(node, struct inode, i_hash); + hlist_for_each_entry(inode, node, head, i_hash) { if (inode->i_sb != sb) continue; if (!test(inode, data)) @@ -520,8 +519,7 @@ static struct inode * find_inode_fast(struct super_block * sb, struct hlist_head struct inode * inode = NULL; repeat: - hlist_for_each (node, head) { - inode = hlist_entry(node, struct inode, i_hash); + hlist_for_each_entry(inode, node, head, i_hash) { if (inode->i_ino != ino) continue; if (inode->i_sb != sb) @@ -1151,13 +1149,8 @@ static inline void iput_final(struct inode *inode) void iput(struct inode *inode) { if (inode) { - const struct super_operations *op = inode->i_sb->s_op; - BUG_ON(inode->i_state == I_CLEAR); - if (op && op->put_inode) - op->put_inode(inode); - if (atomic_dec_and_lock(&inode->i_count, &inode_lock)) iput_final(inode); } diff --git a/fs/inotify_user.c b/fs/inotify_user.c index 7b94a1e3c01..6676c06bb7c 100644 --- a/fs/inotify_user.c +++ b/fs/inotify_user.c @@ -598,7 +598,7 @@ asmlinkage long sys_inotify_init(void) } ih = inotify_init(&inotify_user_ops); - if (unlikely(IS_ERR(ih))) { + if (IS_ERR(ih)) { ret = PTR_ERR(ih); goto out_free_dev; } diff --git a/fs/ioctl.c b/fs/ioctl.c index f32fbde2175..7db32b3382d 100644 --- a/fs/ioctl.c +++ b/fs/ioctl.c @@ -28,8 +28,8 @@ * * Returns 0 on success, -errno on error. */ -long vfs_ioctl(struct file *filp, unsigned int cmd, - unsigned long arg) +static long vfs_ioctl(struct file *filp, unsigned int cmd, + unsigned long arg) { int error = -ENOTTY; diff --git a/fs/isofs/dir.c b/fs/isofs/dir.c index 1ba407c64df..2f0dc5a1463 100644 --- a/fs/isofs/dir.c +++ b/fs/isofs/dir.c @@ -145,6 +145,14 @@ static int do_isofs_readdir(struct inode *inode, struct file *filp, } de = tmpde; } + /* Basic sanity check, whether name doesn't exceed dir entry */ + if (de_len < de->name_len[0] + + sizeof(struct iso_directory_record)) { + printk(KERN_NOTICE "iso9660: Corrupted directory entry" + " in block %lu of inode %lu\n", block, + inode->i_ino); + return -EIO; + } if (first_de) { isofs_normalize_block_and_offset(de, diff --git a/fs/isofs/isofs.h b/fs/isofs/isofs.h index d1bdf8adb35..ccbf72faf27 100644 --- a/fs/isofs/isofs.h +++ b/fs/isofs/isofs.h @@ -78,29 +78,29 @@ static inline int isonum_712(char *p) } static inline unsigned int isonum_721(char *p) { - return le16_to_cpu(get_unaligned((__le16 *)p)); + return get_unaligned_le16(p); } static inline unsigned int isonum_722(char *p) { - return be16_to_cpu(get_unaligned((__le16 *)p)); + return get_unaligned_be16(p); } static inline unsigned int isonum_723(char *p) { /* Ignore bigendian datum due to broken mastering programs */ - return le16_to_cpu(get_unaligned((__le16 *)p)); + return get_unaligned_le16(p); } static inline unsigned int isonum_731(char *p) { - return le32_to_cpu(get_unaligned((__le32 *)p)); + return get_unaligned_le32(p); } static inline unsigned int isonum_732(char *p) { - return be32_to_cpu(get_unaligned((__le32 *)p)); + return get_unaligned_be32(p); } static inline unsigned int isonum_733(char *p) { /* Ignore bigendian datum due to broken mastering programs */ - return le32_to_cpu(get_unaligned((__le32 *)p)); + return get_unaligned_le32(p); } extern int iso_date(char *, int); diff --git a/fs/isofs/namei.c b/fs/isofs/namei.c index 344b247bc29..8299889a835 100644 --- a/fs/isofs/namei.c +++ b/fs/isofs/namei.c @@ -111,6 +111,13 @@ isofs_find_entry(struct inode *dir, struct dentry *dentry, dlen = de->name_len[0]; dpnt = de->name; + /* Basic sanity check, whether name doesn't exceed dir entry */ + if (de_len < dlen + sizeof(struct iso_directory_record)) { + printk(KERN_NOTICE "iso9660: Corrupted directory entry" + " in block %lu of inode %lu\n", block, + dir->i_ino); + return 0; + } if (sbi->s_rock && ((i = get_rock_ridge_filename(de, tmpname, dir)))) { diff --git a/fs/jbd/commit.c b/fs/jbd/commit.c index cd931ef1f00..5a8ca61498c 100644 --- a/fs/jbd/commit.c +++ b/fs/jbd/commit.c @@ -470,7 +470,9 @@ void journal_commit_transaction(journal_t *journal) * transaction! Now comes the tricky part: we need to write out * metadata. Loop over the transaction's entire buffer list: */ + spin_lock(&journal->j_state_lock); commit_transaction->t_state = T_COMMIT; + spin_unlock(&journal->j_state_lock); J_ASSERT(commit_transaction->t_nr_buffers <= commit_transaction->t_outstanding_credits); diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c index a8173081f83..4d99685fdce 100644 --- a/fs/jbd2/commit.c +++ b/fs/jbd2/commit.c @@ -520,22 +520,6 @@ void jbd2_journal_commit_transaction(journal_t *journal) jbd_debug (3, "JBD: commit phase 2\n"); /* - * First, drop modified flag: all accesses to the buffers - * will be tracked for a new trasaction only -bzzz - */ - spin_lock(&journal->j_list_lock); - if (commit_transaction->t_buffers) { - new_jh = jh = commit_transaction->t_buffers->b_tnext; - do { - J_ASSERT_JH(new_jh, new_jh->b_modified == 1 || - new_jh->b_modified == 0); - new_jh->b_modified = 0; - new_jh = new_jh->b_tnext; - } while (new_jh != jh); - } - spin_unlock(&journal->j_list_lock); - - /* * Now start flushing things to disk, in the order they appear * on the transaction lists. Data blocks go first. */ @@ -576,7 +560,9 @@ void jbd2_journal_commit_transaction(journal_t *journal) * transaction! Now comes the tricky part: we need to write out * metadata. Loop over the transaction's entire buffer list: */ + spin_lock(&journal->j_state_lock); commit_transaction->t_state = T_COMMIT; + spin_unlock(&journal->j_state_lock); stats.u.run.rs_logging = jiffies; stats.u.run.rs_flushing = jbd2_time_diff(stats.u.run.rs_flushing, @@ -584,6 +570,9 @@ void jbd2_journal_commit_transaction(journal_t *journal) stats.u.run.rs_blocks = commit_transaction->t_outstanding_credits; stats.u.run.rs_blocks_logged = 0; + J_ASSERT(commit_transaction->t_nr_buffers <= + commit_transaction->t_outstanding_credits); + descriptor = NULL; bufs = 0; while (commit_transaction->t_buffers) { diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index 954cff001df..2e24567c4a7 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c @@ -534,7 +534,7 @@ int jbd2_log_wait_commit(journal_t *journal, tid_t tid) if (!tid_geq(journal->j_commit_request, tid)) { printk(KERN_EMERG "%s: error: j_commit_request=%d, tid=%d\n", - __FUNCTION__, journal->j_commit_request, tid); + __func__, journal->j_commit_request, tid); } spin_unlock(&journal->j_state_lock); #endif @@ -599,7 +599,7 @@ int jbd2_journal_bmap(journal_t *journal, unsigned long blocknr, printk(KERN_ALERT "%s: journal block not found " "at offset %lu on %s\n", - __FUNCTION__, + __func__, blocknr, bdevname(journal->j_dev, b)); err = -EIO; @@ -901,22 +901,13 @@ static void jbd2_stats_proc_init(journal_t *journal) { char name[BDEVNAME_SIZE]; - snprintf(name, sizeof(name) - 1, "%s", bdevname(journal->j_dev, name)); + bdevname(journal->j_dev, name); journal->j_proc_entry = proc_mkdir(name, proc_jbd2_stats); if (journal->j_proc_entry) { - struct proc_dir_entry *p; - p = create_proc_entry("history", S_IRUGO, - journal->j_proc_entry); - if (p) { - p->proc_fops = &jbd2_seq_history_fops; - p->data = journal; - p = create_proc_entry("info", S_IRUGO, - journal->j_proc_entry); - if (p) { - p->proc_fops = &jbd2_seq_info_fops; - p->data = journal; - } - } + proc_create_data("history", S_IRUGO, journal->j_proc_entry, + &jbd2_seq_history_fops, journal); + proc_create_data("info", S_IRUGO, journal->j_proc_entry, + &jbd2_seq_info_fops, journal); } } @@ -924,7 +915,7 @@ static void jbd2_stats_proc_exit(journal_t *journal) { char name[BDEVNAME_SIZE]; - snprintf(name, sizeof(name) - 1, "%s", bdevname(journal->j_dev, name)); + bdevname(journal->j_dev, name); remove_proc_entry("info", journal->j_proc_entry); remove_proc_entry("history", journal->j_proc_entry); remove_proc_entry(name, proc_jbd2_stats); @@ -1006,13 +997,14 @@ fail: */ /** - * journal_t * jbd2_journal_init_dev() - creates an initialises a journal structure + * journal_t * jbd2_journal_init_dev() - creates and initialises a journal structure * @bdev: Block device on which to create the journal * @fs_dev: Device which hold journalled filesystem for this journal. * @start: Block nr Start of journal. * @len: Length of the journal in blocks. * @blocksize: blocksize of journalling device - * @returns: a newly created journal_t * + * + * Returns: a newly created journal_t * * * jbd2_journal_init_dev creates a journal which maps a fixed contiguous * range of blocks on an arbitrary block device. @@ -1036,7 +1028,7 @@ journal_t * jbd2_journal_init_dev(struct block_device *bdev, journal->j_wbuf = kmalloc(n * sizeof(struct buffer_head*), GFP_KERNEL); if (!journal->j_wbuf) { printk(KERN_ERR "%s: Cant allocate bhs for commit thread\n", - __FUNCTION__); + __func__); kfree(journal); journal = NULL; goto out; @@ -1092,7 +1084,7 @@ journal_t * jbd2_journal_init_inode (struct inode *inode) journal->j_wbuf = kmalloc(n * sizeof(struct buffer_head*), GFP_KERNEL); if (!journal->j_wbuf) { printk(KERN_ERR "%s: Cant allocate bhs for commit thread\n", - __FUNCTION__); + __func__); kfree(journal); return NULL; } @@ -1101,7 +1093,7 @@ journal_t * jbd2_journal_init_inode (struct inode *inode) /* If that failed, give up */ if (err) { printk(KERN_ERR "%s: Cannnot locate journal superblock\n", - __FUNCTION__); + __func__); kfree(journal); return NULL; } @@ -1187,7 +1179,7 @@ int jbd2_journal_create(journal_t *journal) */ printk(KERN_EMERG "%s: creation of journal on external device!\n", - __FUNCTION__); + __func__); BUG(); } @@ -1985,9 +1977,10 @@ static int journal_init_jbd2_journal_head_cache(void) static void jbd2_journal_destroy_jbd2_journal_head_cache(void) { - J_ASSERT(jbd2_journal_head_cache != NULL); - kmem_cache_destroy(jbd2_journal_head_cache); - jbd2_journal_head_cache = NULL; + if (jbd2_journal_head_cache) { + kmem_cache_destroy(jbd2_journal_head_cache); + jbd2_journal_head_cache = NULL; + } } /* @@ -2006,7 +1999,7 @@ static struct journal_head *journal_alloc_journal_head(void) jbd_debug(1, "out of memory for journal_head\n"); if (time_after(jiffies, last_warning + 5*HZ)) { printk(KERN_NOTICE "ENOMEM in %s, retrying.\n", - __FUNCTION__); + __func__); last_warning = jiffies; } while (!ret) { @@ -2143,13 +2136,13 @@ static void __journal_remove_journal_head(struct buffer_head *bh) if (jh->b_frozen_data) { printk(KERN_WARNING "%s: freeing " "b_frozen_data\n", - __FUNCTION__); + __func__); jbd2_free(jh->b_frozen_data, bh->b_size); } if (jh->b_committed_data) { printk(KERN_WARNING "%s: freeing " "b_committed_data\n", - __FUNCTION__); + __func__); jbd2_free(jh->b_committed_data, bh->b_size); } bh->b_private = NULL; @@ -2314,10 +2307,12 @@ static int __init journal_init(void) BUILD_BUG_ON(sizeof(struct journal_superblock_s) != 1024); ret = journal_init_caches(); - if (ret != 0) + if (ret == 0) { + jbd2_create_debugfs_entry(); + jbd2_create_jbd_stats_proc_entry(); + } else { jbd2_journal_destroy_caches(); - jbd2_create_debugfs_entry(); - jbd2_create_jbd_stats_proc_entry(); + } return ret; } diff --git a/fs/jbd2/revoke.c b/fs/jbd2/revoke.c index 2e1453a5e99..257ff262576 100644 --- a/fs/jbd2/revoke.c +++ b/fs/jbd2/revoke.c @@ -139,7 +139,7 @@ repeat: oom: if (!journal_oom_retry) return -ENOMEM; - jbd_debug(1, "ENOMEM in %s, retrying\n", __FUNCTION__); + jbd_debug(1, "ENOMEM in %s, retrying\n", __func__); yield(); goto repeat; } @@ -167,138 +167,121 @@ static struct jbd2_revoke_record_s *find_revoke_record(journal_t *journal, return NULL; } +void jbd2_journal_destroy_revoke_caches(void) +{ + if (jbd2_revoke_record_cache) { + kmem_cache_destroy(jbd2_revoke_record_cache); + jbd2_revoke_record_cache = NULL; + } + if (jbd2_revoke_table_cache) { + kmem_cache_destroy(jbd2_revoke_table_cache); + jbd2_revoke_table_cache = NULL; + } +} + int __init jbd2_journal_init_revoke_caches(void) { + J_ASSERT(!jbd2_revoke_record_cache); + J_ASSERT(!jbd2_revoke_table_cache); + jbd2_revoke_record_cache = kmem_cache_create("jbd2_revoke_record", sizeof(struct jbd2_revoke_record_s), 0, SLAB_HWCACHE_ALIGN|SLAB_TEMPORARY, NULL); if (!jbd2_revoke_record_cache) - return -ENOMEM; + goto record_cache_failure; jbd2_revoke_table_cache = kmem_cache_create("jbd2_revoke_table", sizeof(struct jbd2_revoke_table_s), 0, SLAB_TEMPORARY, NULL); - if (!jbd2_revoke_table_cache) { - kmem_cache_destroy(jbd2_revoke_record_cache); - jbd2_revoke_record_cache = NULL; - return -ENOMEM; - } + if (!jbd2_revoke_table_cache) + goto table_cache_failure; return 0; +table_cache_failure: + jbd2_journal_destroy_revoke_caches(); +record_cache_failure: + return -ENOMEM; } -void jbd2_journal_destroy_revoke_caches(void) +static struct jbd2_revoke_table_s *jbd2_journal_init_revoke_table(int hash_size) { - kmem_cache_destroy(jbd2_revoke_record_cache); - jbd2_revoke_record_cache = NULL; - kmem_cache_destroy(jbd2_revoke_table_cache); - jbd2_revoke_table_cache = NULL; -} - -/* Initialise the revoke table for a given journal to a given size. */ - -int jbd2_journal_init_revoke(journal_t *journal, int hash_size) -{ - int shift, tmp; + int shift = 0; + int tmp = hash_size; + struct jbd2_revoke_table_s *table; - J_ASSERT (journal->j_revoke_table[0] == NULL); + table = kmem_cache_alloc(jbd2_revoke_table_cache, GFP_KERNEL); + if (!table) + goto out; - shift = 0; - tmp = hash_size; while((tmp >>= 1UL) != 0UL) shift++; - journal->j_revoke_table[0] = kmem_cache_alloc(jbd2_revoke_table_cache, GFP_KERNEL); - if (!journal->j_revoke_table[0]) - return -ENOMEM; - journal->j_revoke = journal->j_revoke_table[0]; - - /* Check that the hash_size is a power of two */ - J_ASSERT(is_power_of_2(hash_size)); - - journal->j_revoke->hash_size = hash_size; - - journal->j_revoke->hash_shift = shift; - - journal->j_revoke->hash_table = + table->hash_size = hash_size; + table->hash_shift = shift; + table->hash_table = kmalloc(hash_size * sizeof(struct list_head), GFP_KERNEL); - if (!journal->j_revoke->hash_table) { - kmem_cache_free(jbd2_revoke_table_cache, journal->j_revoke_table[0]); - journal->j_revoke = NULL; - return -ENOMEM; + if (!table->hash_table) { + kmem_cache_free(jbd2_revoke_table_cache, table); + table = NULL; + goto out; } for (tmp = 0; tmp < hash_size; tmp++) - INIT_LIST_HEAD(&journal->j_revoke->hash_table[tmp]); + INIT_LIST_HEAD(&table->hash_table[tmp]); - journal->j_revoke_table[1] = kmem_cache_alloc(jbd2_revoke_table_cache, GFP_KERNEL); - if (!journal->j_revoke_table[1]) { - kfree(journal->j_revoke_table[0]->hash_table); - kmem_cache_free(jbd2_revoke_table_cache, journal->j_revoke_table[0]); - return -ENOMEM; +out: + return table; +} + +static void jbd2_journal_destroy_revoke_table(struct jbd2_revoke_table_s *table) +{ + int i; + struct list_head *hash_list; + + for (i = 0; i < table->hash_size; i++) { + hash_list = &table->hash_table[i]; + J_ASSERT(list_empty(hash_list)); } - journal->j_revoke = journal->j_revoke_table[1]; + kfree(table->hash_table); + kmem_cache_free(jbd2_revoke_table_cache, table); +} - /* Check that the hash_size is a power of two */ +/* Initialise the revoke table for a given journal to a given size. */ +int jbd2_journal_init_revoke(journal_t *journal, int hash_size) +{ + J_ASSERT(journal->j_revoke_table[0] == NULL); J_ASSERT(is_power_of_2(hash_size)); - journal->j_revoke->hash_size = hash_size; - - journal->j_revoke->hash_shift = shift; + journal->j_revoke_table[0] = jbd2_journal_init_revoke_table(hash_size); + if (!journal->j_revoke_table[0]) + goto fail0; - journal->j_revoke->hash_table = - kmalloc(hash_size * sizeof(struct list_head), GFP_KERNEL); - if (!journal->j_revoke->hash_table) { - kfree(journal->j_revoke_table[0]->hash_table); - kmem_cache_free(jbd2_revoke_table_cache, journal->j_revoke_table[0]); - kmem_cache_free(jbd2_revoke_table_cache, journal->j_revoke_table[1]); - journal->j_revoke = NULL; - return -ENOMEM; - } + journal->j_revoke_table[1] = jbd2_journal_init_revoke_table(hash_size); + if (!journal->j_revoke_table[1]) + goto fail1; - for (tmp = 0; tmp < hash_size; tmp++) - INIT_LIST_HEAD(&journal->j_revoke->hash_table[tmp]); + journal->j_revoke = journal->j_revoke_table[1]; spin_lock_init(&journal->j_revoke_lock); return 0; -} -/* Destoy a journal's revoke table. The table must already be empty! */ +fail1: + jbd2_journal_destroy_revoke_table(journal->j_revoke_table[0]); +fail0: + return -ENOMEM; +} +/* Destroy a journal's revoke table. The table must already be empty! */ void jbd2_journal_destroy_revoke(journal_t *journal) { - struct jbd2_revoke_table_s *table; - struct list_head *hash_list; - int i; - - table = journal->j_revoke_table[0]; - if (!table) - return; - - for (i=0; i<table->hash_size; i++) { - hash_list = &table->hash_table[i]; - J_ASSERT (list_empty(hash_list)); - } - - kfree(table->hash_table); - kmem_cache_free(jbd2_revoke_table_cache, table); - journal->j_revoke = NULL; - - table = journal->j_revoke_table[1]; - if (!table) - return; - - for (i=0; i<table->hash_size; i++) { - hash_list = &table->hash_table[i]; - J_ASSERT (list_empty(hash_list)); - } - - kfree(table->hash_table); - kmem_cache_free(jbd2_revoke_table_cache, table); journal->j_revoke = NULL; + if (journal->j_revoke_table[0]) + jbd2_journal_destroy_revoke_table(journal->j_revoke_table[0]); + if (journal->j_revoke_table[1]) + jbd2_journal_destroy_revoke_table(journal->j_revoke_table[1]); } diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c index b9b0b6f899b..d6e006e6780 100644 --- a/fs/jbd2/transaction.c +++ b/fs/jbd2/transaction.c @@ -618,6 +618,12 @@ repeat: goto done; /* + * this is the first time this transaction is touching this buffer, + * reset the modified flag + */ + jh->b_modified = 0; + + /* * If there is already a copy-out version of this buffer, then we don't * need to make another one */ @@ -690,7 +696,7 @@ repeat: if (!frozen_buffer) { printk(KERN_EMERG "%s: OOM for frozen_buffer\n", - __FUNCTION__); + __func__); JBUFFER_TRACE(jh, "oom!"); error = -ENOMEM; jbd_lock_bh_state(bh); @@ -829,9 +835,16 @@ int jbd2_journal_get_create_access(handle_t *handle, struct buffer_head *bh) if (jh->b_transaction == NULL) { jh->b_transaction = transaction; + + /* first access by this transaction */ + jh->b_modified = 0; + JBUFFER_TRACE(jh, "file as BJ_Reserved"); __jbd2_journal_file_buffer(jh, transaction, BJ_Reserved); } else if (jh->b_transaction == journal->j_committing_transaction) { + /* first access by this transaction */ + jh->b_modified = 0; + JBUFFER_TRACE(jh, "set next transaction"); jh->b_next_transaction = transaction; } @@ -901,7 +914,7 @@ repeat: committed_data = jbd2_alloc(jh2bh(jh)->b_size, GFP_NOFS); if (!committed_data) { printk(KERN_EMERG "%s: No memory for committed data\n", - __FUNCTION__); + __func__); err = -ENOMEM; goto out; } @@ -1230,6 +1243,7 @@ int jbd2_journal_forget (handle_t *handle, struct buffer_head *bh) struct journal_head *jh; int drop_reserve = 0; int err = 0; + int was_modified = 0; BUFFER_TRACE(bh, "entry"); @@ -1248,6 +1262,9 @@ int jbd2_journal_forget (handle_t *handle, struct buffer_head *bh) goto not_jbd; } + /* keep track of wether or not this transaction modified us */ + was_modified = jh->b_modified; + /* * The buffer's going from the transaction, we must drop * all references -bzzz @@ -1265,7 +1282,12 @@ int jbd2_journal_forget (handle_t *handle, struct buffer_head *bh) JBUFFER_TRACE(jh, "belongs to current transaction: unfile"); - drop_reserve = 1; + /* + * we only want to drop a reference if this transaction + * modified the buffer + */ + if (was_modified) + drop_reserve = 1; /* * We are no longer going to journal this buffer. @@ -1305,7 +1327,13 @@ int jbd2_journal_forget (handle_t *handle, struct buffer_head *bh) if (jh->b_next_transaction) { J_ASSERT(jh->b_next_transaction == transaction); jh->b_next_transaction = NULL; - drop_reserve = 1; + + /* + * only drop a reference if this transaction modified + * the buffer + */ + if (was_modified) + drop_reserve = 1; } } @@ -1434,7 +1462,8 @@ int jbd2_journal_stop(handle_t *handle) return err; } -/**int jbd2_journal_force_commit() - force any uncommitted transactions +/** + * int jbd2_journal_force_commit() - force any uncommitted transactions * @journal: journal to force * * For synchronous operations: force any uncommitted transactions @@ -2077,7 +2106,7 @@ void __jbd2_journal_refile_buffer(struct journal_head *jh) jh->b_transaction = jh->b_next_transaction; jh->b_next_transaction = NULL; __jbd2_journal_file_buffer(jh, jh->b_transaction, - was_dirty ? BJ_Metadata : BJ_Reserved); + jh->b_modified ? BJ_Metadata : BJ_Reserved); J_ASSERT_JH(jh, jh->b_transaction->t_state == T_RUNNING); if (was_dirty) diff --git a/fs/jffs2/build.c b/fs/jffs2/build.c index d58f845ccb8..c5e1450d79f 100644 --- a/fs/jffs2/build.c +++ b/fs/jffs2/build.c @@ -46,7 +46,7 @@ next_inode(int *i, struct jffs2_inode_cache *ic, struct jffs2_sb_info *c) static void jffs2_build_inode_pass1(struct jffs2_sb_info *c, - struct jffs2_inode_cache *ic) + struct jffs2_inode_cache *ic) { struct jffs2_full_dirent *fd; @@ -68,11 +68,17 @@ static void jffs2_build_inode_pass1(struct jffs2_sb_info *c, continue; } - if (child_ic->nlink++ && fd->type == DT_DIR) { - JFFS2_ERROR("child dir \"%s\" (ino #%u) of dir ino #%u appears to be a hard link\n", - fd->name, fd->ino, ic->ino); - /* TODO: What do we do about it? */ - } + if (fd->type == DT_DIR) { + if (child_ic->pino_nlink) { + JFFS2_ERROR("child dir \"%s\" (ino #%u) of dir ino #%u appears to be a hard link\n", + fd->name, fd->ino, ic->ino); + /* TODO: What do we do about it? */ + } else { + child_ic->pino_nlink = ic->ino; + } + } else + child_ic->pino_nlink++; + dbg_fsbuild("increased nlink for child \"%s\" (ino #%u)\n", fd->name, fd->ino); /* Can't free scan_dents so far. We might need them in pass 2 */ } @@ -125,7 +131,7 @@ static int jffs2_build_filesystem(struct jffs2_sb_info *c) dbg_fsbuild("pass 2 starting\n"); for_each_inode(i, c, ic) { - if (ic->nlink) + if (ic->pino_nlink) continue; jffs2_build_remove_unlinked_inode(c, ic, &dead_fds); @@ -232,16 +238,19 @@ static void jffs2_build_remove_unlinked_inode(struct jffs2_sb_info *c, /* Reduce nlink of the child. If it's now zero, stick it on the dead_fds list to be cleaned up later. Else just free the fd */ - child_ic->nlink--; + if (fd->type == DT_DIR) + child_ic->pino_nlink = 0; + else + child_ic->pino_nlink--; - if (!child_ic->nlink) { - dbg_fsbuild("inode #%u (\"%s\") has now got zero nlink, adding to dead_fds list.\n", + if (!child_ic->pino_nlink) { + dbg_fsbuild("inode #%u (\"%s\") now has no links; adding to dead_fds list.\n", fd->ino, fd->name); fd->next = *dead_fds; *dead_fds = fd; } else { dbg_fsbuild("inode #%u (\"%s\") has now got nlink %d. Ignoring.\n", - fd->ino, fd->name, child_ic->nlink); + fd->ino, fd->name, child_ic->pino_nlink); jffs2_free_full_dirent(fd); } } diff --git a/fs/jffs2/debug.h b/fs/jffs2/debug.h index 9645275023e..a113ecc3baf 100644 --- a/fs/jffs2/debug.h +++ b/fs/jffs2/debug.h @@ -82,28 +82,28 @@ do { \ printk(JFFS2_ERR_MSG_PREFIX \ " (%d) %s: " fmt, task_pid_nr(current), \ - __FUNCTION__ , ##__VA_ARGS__); \ + __func__ , ##__VA_ARGS__); \ } while(0) #define JFFS2_WARNING(fmt, ...) \ do { \ printk(JFFS2_WARN_MSG_PREFIX \ " (%d) %s: " fmt, task_pid_nr(current), \ - __FUNCTION__ , ##__VA_ARGS__); \ + __func__ , ##__VA_ARGS__); \ } while(0) #define JFFS2_NOTICE(fmt, ...) \ do { \ printk(JFFS2_NOTICE_MSG_PREFIX \ " (%d) %s: " fmt, task_pid_nr(current), \ - __FUNCTION__ , ##__VA_ARGS__); \ + __func__ , ##__VA_ARGS__); \ } while(0) #define JFFS2_DEBUG(fmt, ...) \ do { \ printk(JFFS2_DBG_MSG_PREFIX \ " (%d) %s: " fmt, task_pid_nr(current), \ - __FUNCTION__ , ##__VA_ARGS__); \ + __func__ , ##__VA_ARGS__); \ } while(0) /* diff --git a/fs/jffs2/dir.c b/fs/jffs2/dir.c index c63e7a96af0..c0c141f6fde 100644 --- a/fs/jffs2/dir.c +++ b/fs/jffs2/dir.c @@ -208,6 +208,13 @@ static int jffs2_create(struct inode *dir_i, struct dentry *dentry, int mode, f = JFFS2_INODE_INFO(inode); dir_f = JFFS2_INODE_INFO(dir_i); + /* jffs2_do_create() will want to lock it, _after_ reserving + space and taking c-alloc_sem. If we keep it locked here, + lockdep gets unhappy (although it's a false positive; + nothing else will be looking at this inode yet so there's + no chance of AB-BA deadlock involving its f->sem). */ + mutex_unlock(&f->sem); + ret = jffs2_do_create(c, dir_f, f, ri, dentry->d_name.name, dentry->d_name.len); if (ret) @@ -219,7 +226,8 @@ static int jffs2_create(struct inode *dir_i, struct dentry *dentry, int mode, d_instantiate(dentry, inode); D1(printk(KERN_DEBUG "jffs2_create: Created ino #%lu with mode %o, nlink %d(%d). nrpages %ld\n", - inode->i_ino, inode->i_mode, inode->i_nlink, f->inocache->nlink, inode->i_mapping->nrpages)); + inode->i_ino, inode->i_mode, inode->i_nlink, + f->inocache->pino_nlink, inode->i_mapping->nrpages)); return 0; fail: @@ -243,7 +251,7 @@ static int jffs2_unlink(struct inode *dir_i, struct dentry *dentry) ret = jffs2_do_unlink(c, dir_f, dentry->d_name.name, dentry->d_name.len, dead_f, now); if (dead_f->inocache) - dentry->d_inode->i_nlink = dead_f->inocache->nlink; + dentry->d_inode->i_nlink = dead_f->inocache->pino_nlink; if (!ret) dir_i->i_mtime = dir_i->i_ctime = ITIME(now); return ret; @@ -276,7 +284,7 @@ static int jffs2_link (struct dentry *old_dentry, struct inode *dir_i, struct de if (!ret) { mutex_lock(&f->sem); - old_dentry->d_inode->i_nlink = ++f->inocache->nlink; + old_dentry->d_inode->i_nlink = ++f->inocache->pino_nlink; mutex_unlock(&f->sem); d_instantiate(dentry, old_dentry->d_inode); dir_i->i_mtime = dir_i->i_ctime = ITIME(now); @@ -493,11 +501,14 @@ static int jffs2_mkdir (struct inode *dir_i, struct dentry *dentry, int mode) inode->i_op = &jffs2_dir_inode_operations; inode->i_fop = &jffs2_dir_operations; - /* Directories get nlink 2 at start */ - inode->i_nlink = 2; f = JFFS2_INODE_INFO(inode); + /* Directories get nlink 2 at start */ + inode->i_nlink = 2; + /* but ic->pino_nlink is the parent ino# */ + f->inocache->pino_nlink = dir_i->i_ino; + ri->data_crc = cpu_to_je32(0); ri->node_crc = cpu_to_je32(crc32(0, ri, sizeof(*ri)-8)); @@ -594,17 +605,25 @@ static int jffs2_mkdir (struct inode *dir_i, struct dentry *dentry, int mode) static int jffs2_rmdir (struct inode *dir_i, struct dentry *dentry) { + struct jffs2_sb_info *c = JFFS2_SB_INFO(dir_i->i_sb); + struct jffs2_inode_info *dir_f = JFFS2_INODE_INFO(dir_i); struct jffs2_inode_info *f = JFFS2_INODE_INFO(dentry->d_inode); struct jffs2_full_dirent *fd; int ret; + uint32_t now = get_seconds(); for (fd = f->dents ; fd; fd = fd->next) { if (fd->ino) return -ENOTEMPTY; } - ret = jffs2_unlink(dir_i, dentry); - if (!ret) + + ret = jffs2_do_unlink(c, dir_f, dentry->d_name.name, + dentry->d_name.len, f, now); + if (!ret) { + dir_i->i_mtime = dir_i->i_ctime = ITIME(now); + clear_nlink(dentry->d_inode); drop_nlink(dir_i); + } return ret; } @@ -817,7 +836,10 @@ static int jffs2_rename (struct inode *old_dir_i, struct dentry *old_dentry, inode which didn't exist. */ if (victim_f->inocache) { mutex_lock(&victim_f->sem); - victim_f->inocache->nlink--; + if (S_ISDIR(new_dentry->d_inode->i_mode)) + victim_f->inocache->pino_nlink = 0; + else + victim_f->inocache->pino_nlink--; mutex_unlock(&victim_f->sem); } } @@ -838,8 +860,8 @@ static int jffs2_rename (struct inode *old_dir_i, struct dentry *old_dentry, struct jffs2_inode_info *f = JFFS2_INODE_INFO(old_dentry->d_inode); mutex_lock(&f->sem); inc_nlink(old_dentry->d_inode); - if (f->inocache) - f->inocache->nlink++; + if (f->inocache && !S_ISDIR(old_dentry->d_inode->i_mode)) + f->inocache->pino_nlink++; mutex_unlock(&f->sem); printk(KERN_NOTICE "jffs2_rename(): Link succeeded, unlink failed (err %d). You now have a hard link\n", ret); diff --git a/fs/jffs2/erase.c b/fs/jffs2/erase.c index 25a640e566d..dddb2a6c9e2 100644 --- a/fs/jffs2/erase.c +++ b/fs/jffs2/erase.c @@ -294,7 +294,7 @@ static inline void jffs2_remove_node_refs_from_ino_list(struct jffs2_sb_info *c, break; #endif default: - if (ic->nodes == (void *)ic && ic->nlink == 0) + if (ic->nodes == (void *)ic && ic->pino_nlink == 0) jffs2_del_ino_cache(c, ic); } } @@ -332,7 +332,8 @@ static int jffs2_block_check_erase(struct jffs2_sb_info *c, struct jffs2_erasebl if (c->mtd->point) { unsigned long *wordebuf; - ret = c->mtd->point(c->mtd, jeb->offset, c->sector_size, &retlen, (unsigned char **)&ebuf); + ret = c->mtd->point(c->mtd, jeb->offset, c->sector_size, + &retlen, &ebuf, NULL); if (ret) { D1(printk(KERN_DEBUG "MTD point failed %d\n", ret)); goto do_flash_read; @@ -340,7 +341,7 @@ static int jffs2_block_check_erase(struct jffs2_sb_info *c, struct jffs2_erasebl if (retlen < c->sector_size) { /* Don't muck about if it won't let us point to the whole erase sector */ D1(printk(KERN_DEBUG "MTD point returned len too short: 0x%zx\n", retlen)); - c->mtd->unpoint(c->mtd, ebuf, jeb->offset, retlen); + c->mtd->unpoint(c->mtd, jeb->offset, retlen); goto do_flash_read; } wordebuf = ebuf-sizeof(*wordebuf); @@ -349,7 +350,7 @@ static int jffs2_block_check_erase(struct jffs2_sb_info *c, struct jffs2_erasebl if (*++wordebuf != ~0) break; } while(--retlen); - c->mtd->unpoint(c->mtd, ebuf, jeb->offset, c->sector_size); + c->mtd->unpoint(c->mtd, jeb->offset, c->sector_size); if (retlen) { printk(KERN_WARNING "Newly-erased block contained word 0x%lx at offset 0x%08tx\n", *wordebuf, jeb->offset + c->sector_size-retlen*sizeof(*wordebuf)); diff --git a/fs/jffs2/fs.c b/fs/jffs2/fs.c index 3eb1c84b0a3..086c4383022 100644 --- a/fs/jffs2/fs.c +++ b/fs/jffs2/fs.c @@ -273,7 +273,7 @@ struct inode *jffs2_iget(struct super_block *sb, unsigned long ino) inode->i_mtime = ITIME(je32_to_cpu(latest_node.mtime)); inode->i_ctime = ITIME(je32_to_cpu(latest_node.ctime)); - inode->i_nlink = f->inocache->nlink; + inode->i_nlink = f->inocache->pino_nlink; inode->i_blocks = (inode->i_size + 511) >> 9; @@ -286,13 +286,12 @@ struct inode *jffs2_iget(struct super_block *sb, unsigned long ino) case S_IFDIR: { struct jffs2_full_dirent *fd; + inode->i_nlink = 2; /* parent and '.' */ for (fd=f->dents; fd; fd = fd->next) { if (fd->type == DT_DIR && fd->ino) inc_nlink(inode); } - /* and '..' */ - inc_nlink(inode); /* Root dir gets i_nlink 3 for some reason */ if (inode->i_ino == 1) inc_nlink(inode); @@ -586,11 +585,12 @@ void jffs2_gc_release_inode(struct jffs2_sb_info *c, } struct jffs2_inode_info *jffs2_gc_fetch_inode(struct jffs2_sb_info *c, - int inum, int nlink) + int inum, int unlinked) { struct inode *inode; struct jffs2_inode_cache *ic; - if (!nlink) { + + if (unlinked) { /* The inode has zero nlink but its nodes weren't yet marked obsolete. This has to be because we're still waiting for the final (close() and) iput() to happen. @@ -638,8 +638,8 @@ struct jffs2_inode_info *jffs2_gc_fetch_inode(struct jffs2_sb_info *c, return ERR_CAST(inode); } if (is_bad_inode(inode)) { - printk(KERN_NOTICE "Eep. read_inode() failed for ino #%u. nlink %d\n", - inum, nlink); + printk(KERN_NOTICE "Eep. read_inode() failed for ino #%u. unlinked %d\n", + inum, unlinked); /* NB. This will happen again. We need to do something appropriate here. */ iput(inode); return ERR_PTR(-EIO); diff --git a/fs/jffs2/gc.c b/fs/jffs2/gc.c index bad005664e3..090c556ffed 100644 --- a/fs/jffs2/gc.c +++ b/fs/jffs2/gc.c @@ -161,8 +161,8 @@ int jffs2_garbage_collect_pass(struct jffs2_sb_info *c) continue; } - if (!ic->nlink) { - D1(printk(KERN_DEBUG "Skipping check of ino #%d with nlink zero\n", + if (!ic->pino_nlink) { + D1(printk(KERN_DEBUG "Skipping check of ino #%d with nlink/pino zero\n", ic->ino)); spin_unlock(&c->inocache_lock); jffs2_xattr_delete_inode(c, ic); @@ -398,10 +398,10 @@ int jffs2_garbage_collect_pass(struct jffs2_sb_info *c) it's vaguely possible. */ inum = ic->ino; - nlink = ic->nlink; + nlink = ic->pino_nlink; spin_unlock(&c->inocache_lock); - f = jffs2_gc_fetch_inode(c, inum, nlink); + f = jffs2_gc_fetch_inode(c, inum, !nlink); if (IS_ERR(f)) { ret = PTR_ERR(f); goto release_sem; diff --git a/fs/jffs2/nodelist.h b/fs/jffs2/nodelist.h index 8219df6eb6d..1750445556c 100644 --- a/fs/jffs2/nodelist.h +++ b/fs/jffs2/nodelist.h @@ -177,7 +177,10 @@ struct jffs2_inode_cache { #ifdef CONFIG_JFFS2_FS_XATTR struct jffs2_xattr_ref *xref; #endif - int nlink; + uint32_t pino_nlink; /* Directories store parent inode + here; other inodes store nlink. + Zero always means that it's + completely unlinked. */ }; /* Inode states for 'state' above. We need the 'GC' state to prevent diff --git a/fs/jffs2/nodemgmt.c b/fs/jffs2/nodemgmt.c index 9df8f3ef20d..a9bf9603c1b 100644 --- a/fs/jffs2/nodemgmt.c +++ b/fs/jffs2/nodemgmt.c @@ -709,7 +709,7 @@ void jffs2_mark_node_obsolete(struct jffs2_sb_info *c, struct jffs2_raw_node_ref break; #endif default: - if (ic->nodes == (void *)ic && ic->nlink == 0) + if (ic->nodes == (void *)ic && ic->pino_nlink == 0) jffs2_del_ino_cache(c, ic); break; } diff --git a/fs/jffs2/os-linux.h b/fs/jffs2/os-linux.h index 1b10d259409..2cc866cf134 100644 --- a/fs/jffs2/os-linux.h +++ b/fs/jffs2/os-linux.h @@ -187,7 +187,7 @@ int jffs2_do_fill_super(struct super_block *sb, void *data, int silent); void jffs2_gc_release_inode(struct jffs2_sb_info *c, struct jffs2_inode_info *f); struct jffs2_inode_info *jffs2_gc_fetch_inode(struct jffs2_sb_info *c, - int inum, int nlink); + int inum, int unlinked); unsigned char *jffs2_gc_fetch_page(struct jffs2_sb_info *c, struct jffs2_inode_info *f, diff --git a/fs/jffs2/readinode.c b/fs/jffs2/readinode.c index 4cb4d76de07..6ca08ad887c 100644 --- a/fs/jffs2/readinode.c +++ b/fs/jffs2/readinode.c @@ -63,10 +63,11 @@ static int check_node_data(struct jffs2_sb_info *c, struct jffs2_tmp_dnode_info /* TODO: instead, incapsulate point() stuff to jffs2_flash_read(), * adding and jffs2_flash_read_end() interface. */ if (c->mtd->point) { - err = c->mtd->point(c->mtd, ofs, len, &retlen, &buffer); + err = c->mtd->point(c->mtd, ofs, len, &retlen, + (void **)&buffer, NULL); if (!err && retlen < len) { JFFS2_WARNING("MTD point returned len too short: %zu instead of %u.\n", retlen, tn->csize); - c->mtd->unpoint(c->mtd, buffer, ofs, retlen); + c->mtd->unpoint(c->mtd, ofs, retlen); } else if (err) JFFS2_WARNING("MTD point failed: error code %d.\n", err); else @@ -100,7 +101,7 @@ static int check_node_data(struct jffs2_sb_info *c, struct jffs2_tmp_dnode_info kfree(buffer); #ifndef __ECOS else - c->mtd->unpoint(c->mtd, buffer, ofs, len); + c->mtd->unpoint(c->mtd, ofs, len); #endif if (crc != tn->data_crc) { @@ -136,7 +137,7 @@ free_out: kfree(buffer); #ifndef __ECOS else - c->mtd->unpoint(c->mtd, buffer, ofs, len); + c->mtd->unpoint(c->mtd, ofs, len); #endif return err; } @@ -1123,7 +1124,8 @@ static int jffs2_do_read_inode_internal(struct jffs2_sb_info *c, size_t retlen; int ret; - dbg_readinode("ino #%u nlink is %d\n", f->inocache->ino, f->inocache->nlink); + dbg_readinode("ino #%u pino/nlink is %d\n", f->inocache->ino, + f->inocache->pino_nlink); memset(&rii, 0, sizeof(rii)); @@ -1358,7 +1360,7 @@ int jffs2_do_read_inode(struct jffs2_sb_info *c, struct jffs2_inode_info *f, } dbg_readinode("creating inocache for root inode\n"); memset(f->inocache, 0, sizeof(struct jffs2_inode_cache)); - f->inocache->ino = f->inocache->nlink = 1; + f->inocache->ino = f->inocache->pino_nlink = 1; f->inocache->nodes = (struct jffs2_raw_node_ref *)f->inocache; f->inocache->state = INO_STATE_READING; jffs2_add_ino_cache(c, f->inocache); @@ -1401,7 +1403,7 @@ void jffs2_do_clear_inode(struct jffs2_sb_info *c, struct jffs2_inode_info *f) jffs2_clear_acl(f); jffs2_xattr_delete_inode(c, f->inocache); mutex_lock(&f->sem); - deleted = f->inocache && !f->inocache->nlink; + deleted = f->inocache && !f->inocache->pino_nlink; if (f->inocache && f->inocache->state != INO_STATE_CHECKING) jffs2_set_inocache_state(c, f->inocache, INO_STATE_CLEARING); diff --git a/fs/jffs2/scan.c b/fs/jffs2/scan.c index 272872d27fd..1d437de1e9a 100644 --- a/fs/jffs2/scan.c +++ b/fs/jffs2/scan.c @@ -97,11 +97,12 @@ int jffs2_scan_medium(struct jffs2_sb_info *c) size_t pointlen; if (c->mtd->point) { - ret = c->mtd->point (c->mtd, 0, c->mtd->size, &pointlen, &flashbuf); + ret = c->mtd->point(c->mtd, 0, c->mtd->size, &pointlen, + (void **)&flashbuf, NULL); if (!ret && pointlen < c->mtd->size) { /* Don't muck about if it won't let us point to the whole flash */ D1(printk(KERN_DEBUG "MTD point returned len too short: 0x%zx\n", pointlen)); - c->mtd->unpoint(c->mtd, flashbuf, 0, pointlen); + c->mtd->unpoint(c->mtd, 0, pointlen); flashbuf = NULL; } if (ret) @@ -267,7 +268,7 @@ int jffs2_scan_medium(struct jffs2_sb_info *c) kfree(flashbuf); #ifndef __ECOS else - c->mtd->unpoint(c->mtd, flashbuf, 0, c->mtd->size); + c->mtd->unpoint(c->mtd, 0, c->mtd->size); #endif if (s) kfree(s); @@ -940,7 +941,7 @@ struct jffs2_inode_cache *jffs2_scan_make_ino_cache(struct jffs2_sb_info *c, uin ic->nodes = (void *)ic; jffs2_add_ino_cache(c, ic); if (ino == 1) - ic->nlink = 1; + ic->pino_nlink = 1; return ic; } diff --git a/fs/jffs2/super.c b/fs/jffs2/super.c index f3353df178e..7da69eae49e 100644 --- a/fs/jffs2/super.c +++ b/fs/jffs2/super.c @@ -31,11 +31,12 @@ static struct kmem_cache *jffs2_inode_cachep; static struct inode *jffs2_alloc_inode(struct super_block *sb) { - struct jffs2_inode_info *ei; - ei = (struct jffs2_inode_info *)kmem_cache_alloc(jffs2_inode_cachep, GFP_KERNEL); - if (!ei) + struct jffs2_inode_info *f; + + f = kmem_cache_alloc(jffs2_inode_cachep, GFP_KERNEL); + if (!f) return NULL; - return &ei->vfs_inode; + return &f->vfs_inode; } static void jffs2_destroy_inode(struct inode *inode) @@ -45,10 +46,10 @@ static void jffs2_destroy_inode(struct inode *inode) static void jffs2_i_init_once(struct kmem_cache *cachep, void *foo) { - struct jffs2_inode_info *ei = (struct jffs2_inode_info *) foo; + struct jffs2_inode_info *f = foo; - mutex_init(&ei->sem); - inode_init_once(&ei->vfs_inode); + mutex_init(&f->sem); + inode_init_once(&f->vfs_inode); } static int jffs2_sync_fs(struct super_block *sb, int wait) diff --git a/fs/jffs2/wbuf.c b/fs/jffs2/wbuf.c index 8de52b60767..0e78b00035e 100644 --- a/fs/jffs2/wbuf.c +++ b/fs/jffs2/wbuf.c @@ -494,7 +494,7 @@ static void jffs2_wbuf_recover(struct jffs2_sb_info *c) /* If it's an in-core inode, then we have to adjust any full_dirent or full_dnode structure to point to the new version instead of the old */ - f = jffs2_gc_fetch_inode(c, ic->ino, ic->nlink); + f = jffs2_gc_fetch_inode(c, ic->ino, !ic->pino_nlink); if (IS_ERR(f)) { /* Should never happen; it _must_ be present */ JFFS2_ERROR("Failed to iget() ino #%u, err %ld\n", diff --git a/fs/jffs2/write.c b/fs/jffs2/write.c index 665fce9797d..ca29440e943 100644 --- a/fs/jffs2/write.c +++ b/fs/jffs2/write.c @@ -19,7 +19,8 @@ #include "compr.h" -int jffs2_do_new_inode(struct jffs2_sb_info *c, struct jffs2_inode_info *f, uint32_t mode, struct jffs2_raw_inode *ri) +int jffs2_do_new_inode(struct jffs2_sb_info *c, struct jffs2_inode_info *f, + uint32_t mode, struct jffs2_raw_inode *ri) { struct jffs2_inode_cache *ic; @@ -31,7 +32,7 @@ int jffs2_do_new_inode(struct jffs2_sb_info *c, struct jffs2_inode_info *f, uint memset(ic, 0, sizeof(*ic)); f->inocache = ic; - f->inocache->nlink = 1; + f->inocache->pino_nlink = 1; /* Will be overwritten shortly for directories */ f->inocache->nodes = (struct jffs2_raw_node_ref *)f->inocache; f->inocache->state = INO_STATE_PRESENT; @@ -438,10 +439,10 @@ int jffs2_do_create(struct jffs2_sb_info *c, struct jffs2_inode_info *dir_f, str ret = jffs2_reserve_space(c, sizeof(*ri), &alloclen, ALLOC_NORMAL, JFFS2_SUMMARY_INODE_SIZE); D1(printk(KERN_DEBUG "jffs2_do_create(): reserved 0x%x bytes\n", alloclen)); - if (ret) { - mutex_unlock(&f->sem); + if (ret) return ret; - } + + mutex_lock(&f->sem); ri->data_crc = cpu_to_je32(0); ri->node_crc = cpu_to_je32(crc32(0, ri, sizeof(*ri)-8)); @@ -635,9 +636,9 @@ int jffs2_do_unlink(struct jffs2_sb_info *c, struct jffs2_inode_info *dir_f, jffs2_mark_node_obsolete(c, fd->raw); jffs2_free_full_dirent(fd); } - } - - dead_f->inocache->nlink--; + dead_f->inocache->pino_nlink = 0; + } else + dead_f->inocache->pino_nlink--; /* NB: Caller must set inode nlink if appropriate */ mutex_unlock(&dead_f->sem); } diff --git a/fs/jffs2/xattr.c b/fs/jffs2/xattr.c index e48665984cb..082e844ab2d 100644 --- a/fs/jffs2/xattr.c +++ b/fs/jffs2/xattr.c @@ -82,7 +82,7 @@ static int is_xattr_datum_unchecked(struct jffs2_sb_info *c, struct jffs2_xattr_ static void unload_xattr_datum(struct jffs2_sb_info *c, struct jffs2_xattr_datum *xd) { /* must be called under down_write(xattr_sem) */ - D1(dbg_xattr("%s: xid=%u, version=%u\n", __FUNCTION__, xd->xid, xd->version)); + D1(dbg_xattr("%s: xid=%u, version=%u\n", __func__, xd->xid, xd->version)); if (xd->xname) { c->xdatum_mem_usage -= (xd->name_len + 1 + xd->value_len); kfree(xd->xname); @@ -592,7 +592,7 @@ void jffs2_xattr_delete_inode(struct jffs2_sb_info *c, struct jffs2_inode_cache When an inode with XATTR is removed, those XATTRs must be removed. */ struct jffs2_xattr_ref *ref, *_ref; - if (!ic || ic->nlink > 0) + if (!ic || ic->pino_nlink > 0) return; down_write(&c->xattr_sem); @@ -829,7 +829,7 @@ void jffs2_build_xattr_subsystem(struct jffs2_sb_info *c) ref->xd and ref->ic are not valid yet. */ xd = jffs2_find_xattr_datum(c, ref->xid); ic = jffs2_get_ino_cache(c, ref->ino); - if (!xd || !ic || !ic->nlink) { + if (!xd || !ic || !ic->pino_nlink) { dbg_xattr("xref(ino=%u, xid=%u, xseqno=%u) is orphan.\n", ref->ino, ref->xid, ref->xseqno); ref->xseqno |= XREF_DELETE_MARKER; @@ -1252,7 +1252,7 @@ int jffs2_garbage_collect_xattr_ref(struct jffs2_sb_info *c, struct jffs2_xattr_ rc = jffs2_reserve_space_gc(c, totlen, &length, JFFS2_SUMMARY_XREF_SIZE); if (rc) { JFFS2_WARNING("%s: jffs2_reserve_space_gc() = %d, request = %u\n", - __FUNCTION__, rc, totlen); + __func__, rc, totlen); rc = rc ? rc : -EBADFD; goto out; } diff --git a/fs/jfs/jfs_debug.c b/fs/jfs/jfs_debug.c index 887f5759e53..bf6ab19b86e 100644 --- a/fs/jfs/jfs_debug.c +++ b/fs/jfs/jfs_debug.c @@ -89,7 +89,7 @@ void jfs_proc_init(void) { int i; - if (!(base = proc_mkdir("jfs", proc_root_fs))) + if (!(base = proc_mkdir("fs/jfs", NULL))) return; base->owner = THIS_MODULE; @@ -109,7 +109,7 @@ void jfs_proc_clean(void) if (base) { for (i = 0; i < NPROCENT; i++) remove_proc_entry(Entries[i].name, base); - remove_proc_entry("jfs", proc_root_fs); + remove_proc_entry("fs/jfs", NULL); } } diff --git a/fs/lockd/clntproc.c b/fs/lockd/clntproc.c index 40b16f23e49..5df517b81f3 100644 --- a/fs/lockd/clntproc.c +++ b/fs/lockd/clntproc.c @@ -573,7 +573,7 @@ again: /* Ensure the resulting lock will get added to granted list */ fl->fl_flags |= FL_SLEEP; if (do_vfs_lock(fl) < 0) - printk(KERN_WARNING "%s: VFS is out of sync with lock manager!\n", __FUNCTION__); + printk(KERN_WARNING "%s: VFS is out of sync with lock manager!\n", __func__); up_read(&host->h_rwsem); fl->fl_flags = fl_flags; status = 0; diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c index 4d81553d294..81aca859bfd 100644 --- a/fs/lockd/svclock.c +++ b/fs/lockd/svclock.c @@ -752,7 +752,7 @@ nlmsvc_grant_blocked(struct nlm_block *block) return; default: printk(KERN_WARNING "lockd: unexpected error %d in %s!\n", - -error, __FUNCTION__); + -error, __func__); nlmsvc_insert_block(block, 10 * HZ); nlmsvc_release_block(block); return; diff --git a/fs/locks.c b/fs/locks.c index 44d9a6a7ec5..11dbf08651b 100644 --- a/fs/locks.c +++ b/fs/locks.c @@ -116,6 +116,7 @@ #include <linux/capability.h> #include <linux/file.h> +#include <linux/fdtable.h> #include <linux/fs.h> #include <linux/init.h> #include <linux/module.h> @@ -772,7 +773,7 @@ static int flock_lock_file(struct file *filp, struct file_lock *request) * give it the opportunity to lock the file. */ if (found) - cond_resched(); + cond_resched_bkl(); find_conflict: for_each_lock(inode, before) { @@ -1752,6 +1753,7 @@ int fcntl_setlk(unsigned int fd, struct file *filp, unsigned int cmd, struct file_lock *file_lock = locks_alloc_lock(); struct flock flock; struct inode *inode; + struct file *f; int error; if (file_lock == NULL) @@ -1824,7 +1826,15 @@ again: * Attempt to detect a close/fcntl race and recover by * releasing the lock that was just acquired. */ - if (!error && fcheck(fd) != filp && flock.l_type != F_UNLCK) { + /* + * we need that spin_lock here - it prevents reordering between + * update of inode->i_flock and check for it done in close(). + * rcu_read_lock() wouldn't do. + */ + spin_lock(¤t->files->file_lock); + f = fcheck(fd); + spin_unlock(¤t->files->file_lock); + if (!error && f != filp && flock.l_type != F_UNLCK) { flock.l_type = F_UNLCK; goto again; } @@ -1880,6 +1890,7 @@ int fcntl_setlk64(unsigned int fd, struct file *filp, unsigned int cmd, struct file_lock *file_lock = locks_alloc_lock(); struct flock64 flock; struct inode *inode; + struct file *f; int error; if (file_lock == NULL) @@ -1952,7 +1963,10 @@ again: * Attempt to detect a close/fcntl race and recover by * releasing the lock that was just acquired. */ - if (!error && fcheck(fd) != filp && flock.l_type != F_UNLCK) { + spin_lock(¤t->files->file_lock); + f = fcheck(fd); + spin_unlock(¤t->files->file_lock); + if (!error && f != filp && flock.l_type != F_UNLCK) { flock.l_type = F_UNLCK; goto again; } diff --git a/fs/msdos/namei.c b/fs/msdos/namei.c index 2d4358c59f6..05ff4f1d702 100644 --- a/fs/msdos/namei.c +++ b/fs/msdos/namei.c @@ -609,7 +609,7 @@ error_inode: if (corrupt < 0) { fat_fs_panic(new_dir->i_sb, "%s: Filesystem corrupted (i_pos %lld)", - __FUNCTION__, sinfo.i_pos); + __func__, sinfo.i_pos); } goto out; } diff --git a/fs/namei.c b/fs/namei.c index e179f71bfcb..32fd9655485 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -30,6 +30,7 @@ #include <linux/capability.h> #include <linux/file.h> #include <linux/fcntl.h> +#include <linux/device_cgroup.h> #include <asm/namei.h> #include <asm/uaccess.h> @@ -281,6 +282,10 @@ int permission(struct inode *inode, int mask, struct nameidata *nd) if (retval) return retval; + retval = devcgroup_inode_permission(inode, mask); + if (retval) + return retval; + return security_inode_permission(inode, mask, nd); } @@ -2028,6 +2033,10 @@ int vfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev) if (!dir->i_op || !dir->i_op->mknod) return -EPERM; + error = devcgroup_inode_mknod(mode, dev); + if (error) + return error; + error = security_inode_mknod(dir, dentry, mode, dev); if (error) return error; diff --git a/fs/namespace.c b/fs/namespace.c index fe376805cf5..4fc302c2a0e 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -1176,17 +1176,6 @@ static int mount_is_safe(struct nameidata *nd) #endif } -static int lives_below_in_same_fs(struct dentry *d, struct dentry *dentry) -{ - while (1) { - if (d == dentry) - return 1; - if (d == NULL || d == d->d_parent) - return 0; - d = d->d_parent; - } -} - struct vfsmount *copy_tree(struct vfsmount *mnt, struct dentry *dentry, int flag) { @@ -1203,7 +1192,7 @@ struct vfsmount *copy_tree(struct vfsmount *mnt, struct dentry *dentry, p = mnt; list_for_each_entry(r, &mnt->mnt_mounts, mnt_child) { - if (!lives_below_in_same_fs(r->mnt_mountpoint, dentry)) + if (!is_subdir(r->mnt_mountpoint, dentry)) continue; for (s = r; s; s = next_mnt(s, r)) { @@ -2340,10 +2329,10 @@ void __init mnt_init(void) err = sysfs_init(); if (err) printk(KERN_WARNING "%s: sysfs_init error: %d\n", - __FUNCTION__, err); + __func__, err); fs_kobj = kobject_create_and_add("fs", NULL); if (!fs_kobj) - printk(KERN_WARNING "%s: kobj create error\n", __FUNCTION__); + printk(KERN_WARNING "%s: kobj create error\n", __func__); init_rootfs(); init_mount_tree(); } diff --git a/fs/ncpfs/ncplib_kernel.c b/fs/ncpfs/ncplib_kernel.c index df6d60bdfcd..97645f11211 100644 --- a/fs/ncpfs/ncplib_kernel.c +++ b/fs/ncpfs/ncplib_kernel.c @@ -102,48 +102,47 @@ static inline void ncp_init_request_s(struct ncp_server *server, int subfunction } static inline char * - ncp_reply_data(struct ncp_server *server, int offset) +ncp_reply_data(struct ncp_server *server, int offset) { return &(server->packet[sizeof(struct ncp_reply_header) + offset]); } -static inline __u8 BVAL(void* data) +static inline u8 BVAL(void *data) { - return get_unaligned((__u8*)data); + return *(u8 *)data; } -static __u8 - ncp_reply_byte(struct ncp_server *server, int offset) +static u8 ncp_reply_byte(struct ncp_server *server, int offset) { - return get_unaligned((__u8 *) ncp_reply_data(server, offset)); + return *(u8 *)ncp_reply_data(server, offset); } -static inline __u16 WVAL_LH(void* data) +static inline u16 WVAL_LH(void *data) { - return le16_to_cpu(get_unaligned((__le16*)data)); + return get_unaligned_le16(data); } -static __u16 - ncp_reply_le16(struct ncp_server *server, int offset) +static u16 +ncp_reply_le16(struct ncp_server *server, int offset) { - return le16_to_cpu(get_unaligned((__le16 *) ncp_reply_data(server, offset))); + return get_unaligned_le16(ncp_reply_data(server, offset)); } -static __u16 - ncp_reply_be16(struct ncp_server *server, int offset) +static u16 +ncp_reply_be16(struct ncp_server *server, int offset) { - return be16_to_cpu(get_unaligned((__be16 *) ncp_reply_data(server, offset))); + return get_unaligned_be16(ncp_reply_data(server, offset)); } -static inline __u32 DVAL_LH(void* data) +static inline u32 DVAL_LH(void *data) { - return le32_to_cpu(get_unaligned((__le32*)data)); + return get_unaligned_le32(data); } static __le32 - ncp_reply_dword(struct ncp_server *server, int offset) +ncp_reply_dword(struct ncp_server *server, int offset) { - return get_unaligned((__le32 *) ncp_reply_data(server, offset)); + return get_unaligned((__le32 *)ncp_reply_data(server, offset)); } static inline __u32 ncp_reply_dword_lh(struct ncp_server* server, int offset) { @@ -1006,8 +1005,8 @@ ncp_read_bounce(struct ncp_server *server, const char *file_id, result = ncp_request2(server, 72, bounce, bufsize); ncp_unlock_server(server); if (!result) { - int len = be16_to_cpu(get_unaligned((__be16*)((char*)bounce + - sizeof(struct ncp_reply_header)))); + int len = get_unaligned_be16((char *)bounce + + sizeof(struct ncp_reply_header)); result = -EIO; if (len <= to_read) { char* source; diff --git a/fs/nfs/client.c b/fs/nfs/client.c index f2f3b284e6d..89ac5bb0401 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -1321,6 +1321,7 @@ static const struct file_operations nfs_server_list_fops = { .read = seq_read, .llseek = seq_lseek, .release = seq_release, + .owner = THIS_MODULE, }; static int nfs_volume_list_open(struct inode *inode, struct file *file); @@ -1341,6 +1342,7 @@ static const struct file_operations nfs_volume_list_fops = { .read = seq_read, .llseek = seq_lseek, .release = seq_release, + .owner = THIS_MODULE, }; /* @@ -1500,33 +1502,29 @@ int __init nfs_fs_proc_init(void) { struct proc_dir_entry *p; - proc_fs_nfs = proc_mkdir("nfsfs", proc_root_fs); + proc_fs_nfs = proc_mkdir("fs/nfsfs", NULL); if (!proc_fs_nfs) goto error_0; proc_fs_nfs->owner = THIS_MODULE; /* a file of servers with which we're dealing */ - p = create_proc_entry("servers", S_IFREG|S_IRUGO, proc_fs_nfs); + p = proc_create("servers", S_IFREG|S_IRUGO, + proc_fs_nfs, &nfs_server_list_fops); if (!p) goto error_1; - p->proc_fops = &nfs_server_list_fops; - p->owner = THIS_MODULE; - /* a file of volumes that we have mounted */ - p = create_proc_entry("volumes", S_IFREG|S_IRUGO, proc_fs_nfs); + p = proc_create("volumes", S_IFREG|S_IRUGO, + proc_fs_nfs, &nfs_volume_list_fops); if (!p) goto error_2; - - p->proc_fops = &nfs_volume_list_fops; - p->owner = THIS_MODULE; return 0; error_2: remove_proc_entry("servers", proc_fs_nfs); error_1: - remove_proc_entry("nfsfs", proc_root_fs); + remove_proc_entry("fs/nfsfs", NULL); error_0: return -ENOMEM; } @@ -1538,7 +1536,7 @@ void nfs_fs_proc_exit(void) { remove_proc_entry("volumes", proc_fs_nfs); remove_proc_entry("servers", proc_fs_nfs); - remove_proc_entry("nfsfs", proc_root_fs); + remove_proc_entry("fs/nfsfs", NULL); } #endif /* CONFIG_PROC_FS */ diff --git a/fs/nfs/super.c b/fs/nfs/super.c index fa220dc7460..7226a506f3c 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -1575,6 +1575,11 @@ static int nfs_compare_super(struct super_block *sb, void *data) return nfs_compare_mount_options(sb, server, mntflags); } +static int nfs_bdi_register(struct nfs_server *server) +{ + return bdi_register_dev(&server->backing_dev_info, server->s_dev); +} + static int nfs_get_sb(struct file_system_type *fs_type, int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt) { @@ -1617,6 +1622,10 @@ static int nfs_get_sb(struct file_system_type *fs_type, if (s->s_fs_info != server) { nfs_free_server(server); server = NULL; + } else { + error = nfs_bdi_register(server); + if (error) + goto error_splat_super; } if (!s->s_root) { @@ -1664,6 +1673,7 @@ static void nfs_kill_super(struct super_block *s) { struct nfs_server *server = NFS_SB(s); + bdi_unregister(&server->backing_dev_info); kill_anon_super(s); nfs_free_server(server); } @@ -1708,6 +1718,10 @@ static int nfs_xdev_get_sb(struct file_system_type *fs_type, int flags, if (s->s_fs_info != server) { nfs_free_server(server); server = NULL; + } else { + error = nfs_bdi_register(server); + if (error) + goto error_splat_super; } if (!s->s_root) { @@ -1984,6 +1998,10 @@ static int nfs4_get_sb(struct file_system_type *fs_type, if (s->s_fs_info != server) { nfs_free_server(server); server = NULL; + } else { + error = nfs_bdi_register(server); + if (error) + goto error_splat_super; } if (!s->s_root) { @@ -2070,6 +2088,10 @@ static int nfs4_xdev_get_sb(struct file_system_type *fs_type, int flags, if (s->s_fs_info != server) { nfs_free_server(server); server = NULL; + } else { + error = nfs_bdi_register(server); + if (error) + goto error_splat_super; } if (!s->s_root) { @@ -2149,6 +2171,10 @@ static int nfs4_referral_get_sb(struct file_system_type *fs_type, int flags, if (s->s_fs_info != server) { nfs_free_server(server); server = NULL; + } else { + error = nfs_bdi_register(server); + if (error) + goto error_splat_super; } if (!s->s_root) { diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c index 562abf3380d..0b3ffa9840c 100644 --- a/fs/nfsd/nfs4callback.c +++ b/fs/nfsd/nfs4callback.c @@ -104,7 +104,7 @@ xdr_writemem(__be32 *p, const void *ptr, int nbytes) } while (0) #define RESERVE_SPACE(nbytes) do { \ p = xdr_reserve_space(xdr, nbytes); \ - if (!p) dprintk("NFSD: RESERVE_SPACE(%d) failed in function %s\n", (int) (nbytes), __FUNCTION__); \ + if (!p) dprintk("NFSD: RESERVE_SPACE(%d) failed in function %s\n", (int) (nbytes), __func__); \ BUG_ON(!p); \ } while (0) @@ -134,7 +134,7 @@ xdr_error: \ p = xdr_inline_decode(xdr, nbytes); \ if (!p) { \ dprintk("NFSD: %s: reply buffer overflowed in line %d.\n", \ - __FUNCTION__, __LINE__); \ + __func__, __LINE__); \ return -EIO; \ } \ } while (0) diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c index 42f3820ee8f..5ac00c4fee9 100644 --- a/fs/nfsd/nfsctl.c +++ b/fs/nfsd/nfsctl.c @@ -169,6 +169,7 @@ static const struct file_operations exports_operations = { .read = seq_read, .llseek = seq_lseek, .release = seq_release, + .owner = THIS_MODULE, }; /*----------------------------------------------------------------------------*/ @@ -801,10 +802,9 @@ static int create_proc_exports_entry(void) entry = proc_mkdir("fs/nfs", NULL); if (!entry) return -ENOMEM; - entry = create_proc_entry("fs/nfs/exports", 0, NULL); + entry = proc_create("exports", 0, entry, &exports_operations); if (!entry) return -ENOMEM; - entry->proc_fops = &exports_operations; return 0; } #else /* CONFIG_PROC_FS */ diff --git a/fs/ntfs/debug.h b/fs/ntfs/debug.h index 8ac37c33d12..5e6724c1afd 100644 --- a/fs/ntfs/debug.h +++ b/fs/ntfs/debug.h @@ -45,7 +45,7 @@ static void ntfs_debug(const char *f, ...); extern void __ntfs_debug (const char *file, int line, const char *function, const char *format, ...) __attribute__ ((format (printf, 4, 5))); #define ntfs_debug(f, a...) \ - __ntfs_debug(__FILE__, __LINE__, __FUNCTION__, f, ##a) + __ntfs_debug(__FILE__, __LINE__, __func__, f, ##a) extern void ntfs_debug_dump_runlist(const runlist_element *rl); @@ -58,10 +58,10 @@ extern void ntfs_debug_dump_runlist(const runlist_element *rl); extern void __ntfs_warning(const char *function, const struct super_block *sb, const char *fmt, ...) __attribute__ ((format (printf, 3, 4))); -#define ntfs_warning(sb, f, a...) __ntfs_warning(__FUNCTION__, sb, f, ##a) +#define ntfs_warning(sb, f, a...) __ntfs_warning(__func__, sb, f, ##a) extern void __ntfs_error(const char *function, const struct super_block *sb, const char *fmt, ...) __attribute__ ((format (printf, 3, 4))); -#define ntfs_error(sb, f, a...) __ntfs_error(__FUNCTION__, sb, f, ##a) +#define ntfs_error(sb, f, a...) __ntfs_error(__func__, sb, f, ##a) #endif /* _LINUX_NTFS_DEBUG_H */ diff --git a/fs/ntfs/mft.c b/fs/ntfs/mft.c index 2ad5c8b104b..790defb847e 100644 --- a/fs/ntfs/mft.c +++ b/fs/ntfs/mft.c @@ -1191,7 +1191,7 @@ static int ntfs_mft_bitmap_find_and_alloc_free_rec_nolock(ntfs_volume *vol, if (size) { page = ntfs_map_page(mftbmp_mapping, ofs >> PAGE_CACHE_SHIFT); - if (unlikely(IS_ERR(page))) { + if (IS_ERR(page)) { ntfs_error(vol->sb, "Failed to read mft " "bitmap, aborting."); return PTR_ERR(page); @@ -2118,7 +2118,7 @@ static int ntfs_mft_record_format(const ntfs_volume *vol, const s64 mft_no) } /* Read, map, and pin the page containing the mft record. */ page = ntfs_map_page(mft_vi->i_mapping, index); - if (unlikely(IS_ERR(page))) { + if (IS_ERR(page)) { ntfs_error(vol->sb, "Failed to map page containing mft record " "to format 0x%llx.", (long long)mft_no); return PTR_ERR(page); @@ -2519,7 +2519,7 @@ mft_rec_already_initialized: ofs = (bit << vol->mft_record_size_bits) & ~PAGE_CACHE_MASK; /* Read, map, and pin the page containing the mft record. */ page = ntfs_map_page(vol->mft_ino->i_mapping, index); - if (unlikely(IS_ERR(page))) { + if (IS_ERR(page)) { ntfs_error(vol->sb, "Failed to map page containing allocated " "mft record 0x%llx.", (long long)bit); err = PTR_ERR(page); diff --git a/fs/ocfs2/cluster/sys.c b/fs/ocfs2/cluster/sys.c index 98429fd6849..bc702dab5d1 100644 --- a/fs/ocfs2/cluster/sys.c +++ b/fs/ocfs2/cluster/sys.c @@ -65,7 +65,7 @@ int o2cb_sys_init(void) { int ret; - o2cb_kset = kset_create_and_add("o2cb", NULL, NULL); + o2cb_kset = kset_create_and_add("o2cb", NULL, fs_kobj); if (!o2cb_kset) return -ENOMEM; diff --git a/fs/ocfs2/dlm/dlmdebug.c b/fs/ocfs2/dlm/dlmdebug.c index 5f6d858770a..1b81dcba175 100644 --- a/fs/ocfs2/dlm/dlmdebug.c +++ b/fs/ocfs2/dlm/dlmdebug.c @@ -44,7 +44,8 @@ #define MLOG_MASK_PREFIX ML_DLM #include "cluster/masklog.h" -int stringify_lockname(const char *lockname, int locklen, char *buf, int len); +static int stringify_lockname(const char *lockname, int locklen, char *buf, + int len); void dlm_print_one_lock_resource(struct dlm_lock_resource *res) { @@ -251,7 +252,8 @@ EXPORT_SYMBOL_GPL(dlm_errname); * * For more on lockname formats, please refer to dlmglue.c and ocfs2_lockid.h. */ -int stringify_lockname(const char *lockname, int locklen, char *buf, int len) +static int stringify_lockname(const char *lockname, int locklen, char *buf, + int len) { int out = 0; __be64 inode_blkno_be; @@ -368,7 +370,7 @@ static void dlm_debug_free(struct kref *kref) kfree(dc); } -void dlm_debug_put(struct dlm_debug_ctxt *dc) +static void dlm_debug_put(struct dlm_debug_ctxt *dc) { if (dc) kref_put(&dc->debug_refcnt, dlm_debug_free); diff --git a/fs/ocfs2/dlm/dlmfs.c b/fs/ocfs2/dlm/dlmfs.c index 61a000f8524..e48aba698b7 100644 --- a/fs/ocfs2/dlm/dlmfs.c +++ b/fs/ocfs2/dlm/dlmfs.c @@ -327,7 +327,7 @@ clear_fields: static struct backing_dev_info dlmfs_backing_dev_info = { .ra_pages = 0, /* No readahead */ - .capabilities = BDI_CAP_NO_ACCT_DIRTY | BDI_CAP_NO_WRITEBACK, + .capabilities = BDI_CAP_NO_ACCT_AND_WRITEBACK, }; static struct inode *dlmfs_get_root_inode(struct super_block *sb) diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index 9154c82d325..57e0d30cde9 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c @@ -1048,6 +1048,10 @@ int ocfs2_setattr(struct dentry *dentry, struct iattr *attr) mlog_entry("(0x%p, '%.*s')\n", dentry, dentry->d_name.len, dentry->d_name.name); + /* ensuring we don't even attempt to truncate a symlink */ + if (S_ISLNK(inode->i_mode)) + attr->ia_valid &= ~ATTR_SIZE; + if (attr->ia_valid & ATTR_MODE) mlog(0, "mode change: %d\n", attr->ia_mode); if (attr->ia_valid & ATTR_UID) diff --git a/fs/ocfs2/localalloc.c b/fs/ocfs2/localalloc.c index ce0dc147602..be774bdc8b3 100644 --- a/fs/ocfs2/localalloc.c +++ b/fs/ocfs2/localalloc.c @@ -260,7 +260,7 @@ void ocfs2_shutdown_local_alloc(struct ocfs2_super *osb) bh = osb->local_alloc_bh; alloc = (struct ocfs2_dinode *) bh->b_data; - alloc_copy = kmalloc(bh->b_size, GFP_KERNEL); + alloc_copy = kmalloc(bh->b_size, GFP_NOFS); if (!alloc_copy) { status = -ENOMEM; goto out_commit; @@ -931,7 +931,7 @@ static int ocfs2_local_alloc_slide_window(struct ocfs2_super *osb, * local alloc shutdown won't try to double free main bitmap * bits. Make a copy so the sync function knows which bits to * free. */ - alloc_copy = kmalloc(osb->local_alloc_bh->b_size, GFP_KERNEL); + alloc_copy = kmalloc(osb->local_alloc_bh->b_size, GFP_NOFS); if (!alloc_copy) { status = -ENOMEM; mlog_errno(status); diff --git a/fs/ocfs2/stack_o2cb.c b/fs/ocfs2/stack_o2cb.c index ac1d74c63bf..bbd1667aa7d 100644 --- a/fs/ocfs2/stack_o2cb.c +++ b/fs/ocfs2/stack_o2cb.c @@ -385,7 +385,7 @@ static int o2cb_cluster_this_node(unsigned int *node) return 0; } -struct ocfs2_stack_operations o2cb_stack_ops = { +static struct ocfs2_stack_operations o2cb_stack_ops = { .connect = o2cb_cluster_connect, .disconnect = o2cb_cluster_disconnect, .hangup = o2cb_cluster_hangup, diff --git a/fs/ocfs2/stack_user.c b/fs/ocfs2/stack_user.c index 7428663f9cb..b503772cd0e 100644 --- a/fs/ocfs2/stack_user.c +++ b/fs/ocfs2/stack_user.c @@ -635,7 +635,7 @@ static const struct file_operations ocfs2_control_fops = { .owner = THIS_MODULE, }; -struct miscdevice ocfs2_control_device = { +static struct miscdevice ocfs2_control_device = { .minor = MISC_DYNAMIC_MINOR, .name = "ocfs2_control", .fops = &ocfs2_control_fops, diff --git a/fs/ocfs2/symlink.c b/fs/ocfs2/symlink.c index 7134007ba22..ba9dbb51d25 100644 --- a/fs/ocfs2/symlink.c +++ b/fs/ocfs2/symlink.c @@ -167,9 +167,11 @@ const struct inode_operations ocfs2_symlink_inode_operations = { .readlink = page_readlink, .follow_link = ocfs2_follow_link, .getattr = ocfs2_getattr, + .setattr = ocfs2_setattr, }; const struct inode_operations ocfs2_fast_symlink_inode_operations = { .readlink = ocfs2_readlink, .follow_link = ocfs2_follow_link, .getattr = ocfs2_getattr, + .setattr = ocfs2_setattr, }; diff --git a/fs/open.c b/fs/open.c index 7af1f05d597..a1450086e92 100644 --- a/fs/open.c +++ b/fs/open.c @@ -7,6 +7,7 @@ #include <linux/string.h> #include <linux/mm.h> #include <linux/file.h> +#include <linux/fdtable.h> #include <linux/quotaops.h> #include <linux/fsnotify.h> #include <linux/module.h> diff --git a/fs/partitions/ldm.c b/fs/partitions/ldm.c index e7dd1d4e347..0fdda2e8a4c 100644 --- a/fs/partitions/ldm.c +++ b/fs/partitions/ldm.c @@ -41,12 +41,12 @@ #ifndef CONFIG_LDM_DEBUG #define ldm_debug(...) do {} while (0) #else -#define ldm_debug(f, a...) _ldm_printk (KERN_DEBUG, __FUNCTION__, f, ##a) +#define ldm_debug(f, a...) _ldm_printk (KERN_DEBUG, __func__, f, ##a) #endif -#define ldm_crit(f, a...) _ldm_printk (KERN_CRIT, __FUNCTION__, f, ##a) -#define ldm_error(f, a...) _ldm_printk (KERN_ERR, __FUNCTION__, f, ##a) -#define ldm_info(f, a...) _ldm_printk (KERN_INFO, __FUNCTION__, f, ##a) +#define ldm_crit(f, a...) _ldm_printk (KERN_CRIT, __func__, f, ##a) +#define ldm_error(f, a...) _ldm_printk (KERN_ERR, __func__, f, ##a) +#define ldm_info(f, a...) _ldm_printk (KERN_INFO, __func__, f, ##a) __attribute__ ((format (printf, 3, 4))) static void _ldm_printk (const char *level, const char *function, diff --git a/fs/pipe.c b/fs/pipe.c index f73492b6817..ec228bc9f88 100644 --- a/fs/pipe.c +++ b/fs/pipe.c @@ -17,6 +17,7 @@ #include <linux/highmem.h> #include <linux/pagemap.h> #include <linux/audit.h> +#include <linux/syscalls.h> #include <asm/uaccess.h> #include <asm/ioctls.h> @@ -1076,6 +1077,26 @@ int do_pipe(int *fd) } /* + * sys_pipe() is the normal C calling standard for creating + * a pipe. It's not the way Unix traditionally does this, though. + */ +asmlinkage long __weak sys_pipe(int __user *fildes) +{ + int fd[2]; + int error; + + error = do_pipe(fd); + if (!error) { + if (copy_to_user(fildes, fd, sizeof(fd))) { + sys_close(fd[0]); + sys_close(fd[1]); + error = -EFAULT; + } + } + return error; +} + +/* * pipefs should _never_ be mounted by userland - too much of security hassle, * no real gain from having the whole whorehouse mounted. So we don't need * any operations on the root directory. However, we need a non-trivial diff --git a/fs/proc/array.c b/fs/proc/array.c index 07d6c4853fe..9e3b8c33c24 100644 --- a/fs/proc/array.c +++ b/fs/proc/array.c @@ -73,6 +73,7 @@ #include <linux/signal.h> #include <linux/highmem.h> #include <linux/file.h> +#include <linux/fdtable.h> #include <linux/times.h> #include <linux/cpuset.h> #include <linux/rcupdate.h> @@ -297,6 +298,7 @@ static inline void task_cap(struct seq_file *m, struct task_struct *p) render_cap_t(m, "CapInh:\t", &p->cap_inheritable); render_cap_t(m, "CapPrm:\t", &p->cap_permitted); render_cap_t(m, "CapEff:\t", &p->cap_effective); + render_cap_t(m, "CapBnd:\t", &p->cap_bset); } static inline void task_context_switch_counts(struct seq_file *m, @@ -425,12 +427,13 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns, cutime = cstime = utime = stime = cputime_zero; cgtime = gtime = cputime_zero; - rcu_read_lock(); if (lock_task_sighand(task, &flags)) { struct signal_struct *sig = task->signal; if (sig->tty) { - tty_pgrp = pid_nr_ns(sig->tty->pgrp, ns); + struct pid *pgrp = tty_get_pgrp(sig->tty); + tty_pgrp = pid_nr_ns(pgrp, ns); + put_pid(pgrp); tty_nr = new_encode_dev(tty_devnum(sig->tty)); } @@ -469,7 +472,6 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns, unlock_task_sighand(task, &flags); } - rcu_read_unlock(); if (!whole || num_threads < 2) wchan = get_wchan(task); diff --git a/fs/proc/base.c b/fs/proc/base.c index c5e412a00b1..808cbdc193d 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -56,6 +56,7 @@ #include <linux/init.h> #include <linux/capability.h> #include <linux/file.h> +#include <linux/fdtable.h> #include <linux/string.h> #include <linux/seq_file.h> #include <linux/namei.h> @@ -195,12 +196,32 @@ static int proc_root_link(struct inode *inode, struct path *path) return result; } -#define MAY_PTRACE(task) \ - (task == current || \ - (task->parent == current && \ - (task->ptrace & PT_PTRACED) && \ - (task_is_stopped_or_traced(task)) && \ - security_ptrace(current,task) == 0)) +/* + * Return zero if current may access user memory in @task, -error if not. + */ +static int check_mem_permission(struct task_struct *task) +{ + /* + * A task can always look at itself, in case it chooses + * to use system calls instead of load instructions. + */ + if (task == current) + return 0; + + /* + * If current is actively ptrace'ing, and would also be + * permitted to freshly attach with ptrace now, permit it. + */ + if (task->parent == current && (task->ptrace & PT_PTRACED) && + task_is_stopped_or_traced(task) && + ptrace_may_attach(task)) + return 0; + + /* + * Noone else is allowed. + */ + return -EPERM; +} struct mm_struct *mm_for_maps(struct task_struct *task) { @@ -722,7 +743,7 @@ static ssize_t mem_read(struct file * file, char __user * buf, if (!task) goto out_no_task; - if (!MAY_PTRACE(task) || !ptrace_may_attach(task)) + if (check_mem_permission(task)) goto out; ret = -ENOMEM; @@ -748,7 +769,7 @@ static ssize_t mem_read(struct file * file, char __user * buf, this_len = (count > PAGE_SIZE) ? PAGE_SIZE : count; retval = access_process_vm(task, src, page, this_len, 0); - if (!retval || !MAY_PTRACE(task) || !ptrace_may_attach(task)) { + if (!retval || check_mem_permission(task)) { if (!ret) ret = -EIO; break; @@ -792,7 +813,7 @@ static ssize_t mem_write(struct file * file, const char __user *buf, if (!task) goto out_no_task; - if (!MAY_PTRACE(task) || !ptrace_may_attach(task)) + if (check_mem_permission(task)) goto out; copied = -ENOMEM; @@ -1181,6 +1202,81 @@ static const struct file_operations proc_pid_sched_operations = { #endif +/* + * We added or removed a vma mapping the executable. The vmas are only mapped + * during exec and are not mapped with the mmap system call. + * Callers must hold down_write() on the mm's mmap_sem for these + */ +void added_exe_file_vma(struct mm_struct *mm) +{ + mm->num_exe_file_vmas++; +} + +void removed_exe_file_vma(struct mm_struct *mm) +{ + mm->num_exe_file_vmas--; + if ((mm->num_exe_file_vmas == 0) && mm->exe_file){ + fput(mm->exe_file); + mm->exe_file = NULL; + } + +} + +void set_mm_exe_file(struct mm_struct *mm, struct file *new_exe_file) +{ + if (new_exe_file) + get_file(new_exe_file); + if (mm->exe_file) + fput(mm->exe_file); + mm->exe_file = new_exe_file; + mm->num_exe_file_vmas = 0; +} + +struct file *get_mm_exe_file(struct mm_struct *mm) +{ + struct file *exe_file; + + /* We need mmap_sem to protect against races with removal of + * VM_EXECUTABLE vmas */ + down_read(&mm->mmap_sem); + exe_file = mm->exe_file; + if (exe_file) + get_file(exe_file); + up_read(&mm->mmap_sem); + return exe_file; +} + +void dup_mm_exe_file(struct mm_struct *oldmm, struct mm_struct *newmm) +{ + /* It's safe to write the exe_file pointer without exe_file_lock because + * this is called during fork when the task is not yet in /proc */ + newmm->exe_file = get_mm_exe_file(oldmm); +} + +static int proc_exe_link(struct inode *inode, struct path *exe_path) +{ + struct task_struct *task; + struct mm_struct *mm; + struct file *exe_file; + + task = get_proc_task(inode); + if (!task) + return -ENOENT; + mm = get_task_mm(task); + put_task_struct(task); + if (!mm) + return -ENOENT; + exe_file = get_mm_exe_file(mm); + mmput(mm); + if (exe_file) { + *exe_path = exe_file->f_path; + path_get(&exe_file->f_path); + fput(exe_file); + return 0; + } else + return -ENOENT; +} + static void *proc_pid_follow_link(struct dentry *dentry, struct nameidata *nd) { struct inode *inode = dentry->d_inode; diff --git a/fs/proc/generic.c b/fs/proc/generic.c index a36ad3c75cf..43e54e86cef 100644 --- a/fs/proc/generic.c +++ b/fs/proc/generic.c @@ -69,12 +69,7 @@ proc_file_read(struct file *file, char __user *buf, size_t nbytes, count = min_t(size_t, PROC_BLOCK_SIZE, nbytes); start = NULL; - if (dp->get_info) { - /* Handle old net routines */ - n = dp->get_info(page, &start, *ppos, count); - if (n < count) - eof = 1; - } else if (dp->read_proc) { + if (dp->read_proc) { /* * How to be a proc read function * ------------------------------ @@ -277,8 +272,11 @@ static int xlate_proc_name(const char *name, int len; int rtn = 0; + de = *ret; + if (!de) + de = &proc_root; + spin_lock(&proc_subdir_lock); - de = &proc_root; while (1) { next = strchr(cp, '/'); if (!next) @@ -385,20 +383,18 @@ struct dentry *proc_lookup_de(struct proc_dir_entry *de, struct inode *dir, lock_kernel(); spin_lock(&proc_subdir_lock); - if (de) { - for (de = de->subdir; de ; de = de->next) { - if (de->namelen != dentry->d_name.len) - continue; - if (!memcmp(dentry->d_name.name, de->name, de->namelen)) { - unsigned int ino; + for (de = de->subdir; de ; de = de->next) { + if (de->namelen != dentry->d_name.len) + continue; + if (!memcmp(dentry->d_name.name, de->name, de->namelen)) { + unsigned int ino; - ino = de->low_ino; - de_get(de); - spin_unlock(&proc_subdir_lock); - error = -EINVAL; - inode = proc_get_inode(dir->i_sb, ino, de); - goto out_unlock; - } + ino = de->low_ino; + de_get(de); + spin_unlock(&proc_subdir_lock); + error = -EINVAL; + inode = proc_get_inode(dir->i_sb, ino, de); + goto out_unlock; } } spin_unlock(&proc_subdir_lock); @@ -410,7 +406,8 @@ out_unlock: d_add(dentry, inode); return NULL; } - de_put(de); + if (de) + de_put(de); return ERR_PTR(error); } @@ -440,10 +437,6 @@ int proc_readdir_de(struct proc_dir_entry *de, struct file *filp, void *dirent, lock_kernel(); ino = inode->i_ino; - if (!de) { - ret = -EINVAL; - goto out; - } i = filp->f_pos; switch (i) { case 0: @@ -582,7 +575,7 @@ static struct proc_dir_entry *__proc_create(struct proc_dir_entry **parent, /* make sure name is valid */ if (!name || !strlen(name)) goto out; - if (!(*parent) && xlate_proc_name(name, parent, &fn) != 0) + if (xlate_proc_name(name, parent, &fn) != 0) goto out; /* At this point there must not be any '/' characters beyond *fn */ @@ -648,6 +641,23 @@ struct proc_dir_entry *proc_mkdir_mode(const char *name, mode_t mode, return ent; } +struct proc_dir_entry *proc_net_mkdir(struct net *net, const char *name, + struct proc_dir_entry *parent) +{ + struct proc_dir_entry *ent; + + ent = __proc_create(&parent, name, S_IFDIR | S_IRUGO | S_IXUGO, 2); + if (ent) { + ent->data = net; + if (proc_register(parent, ent) < 0) { + kfree(ent); + ent = NULL; + } + } + return ent; +} +EXPORT_SYMBOL_GPL(proc_net_mkdir); + struct proc_dir_entry *proc_mkdir(const char *name, struct proc_dir_entry *parent) { @@ -682,9 +692,10 @@ struct proc_dir_entry *create_proc_entry(const char *name, mode_t mode, return ent; } -struct proc_dir_entry *proc_create(const char *name, mode_t mode, - struct proc_dir_entry *parent, - const struct file_operations *proc_fops) +struct proc_dir_entry *proc_create_data(const char *name, mode_t mode, + struct proc_dir_entry *parent, + const struct file_operations *proc_fops, + void *data) { struct proc_dir_entry *pde; nlink_t nlink; @@ -705,6 +716,7 @@ struct proc_dir_entry *proc_create(const char *name, mode_t mode, if (!pde) goto out; pde->proc_fops = proc_fops; + pde->data = data; if (proc_register(parent, pde) < 0) goto out_free; return pde; @@ -734,55 +746,58 @@ void free_proc_entry(struct proc_dir_entry *de) void remove_proc_entry(const char *name, struct proc_dir_entry *parent) { struct proc_dir_entry **p; - struct proc_dir_entry *de; + struct proc_dir_entry *de = NULL; const char *fn = name; int len; - if (!parent && xlate_proc_name(name, &parent, &fn) != 0) - goto out; + if (xlate_proc_name(name, &parent, &fn) != 0) + return; len = strlen(fn); spin_lock(&proc_subdir_lock); for (p = &parent->subdir; *p; p=&(*p)->next ) { - if (!proc_match(len, fn, *p)) - continue; - de = *p; - *p = de->next; - de->next = NULL; - - spin_lock(&de->pde_unload_lock); - /* - * Stop accepting new callers into module. If you're - * dynamically allocating ->proc_fops, save a pointer somewhere. - */ - de->proc_fops = NULL; - /* Wait until all existing callers into module are done. */ - if (de->pde_users > 0) { - DECLARE_COMPLETION_ONSTACK(c); - - if (!de->pde_unload_completion) - de->pde_unload_completion = &c; - - spin_unlock(&de->pde_unload_lock); - spin_unlock(&proc_subdir_lock); + if (proc_match(len, fn, *p)) { + de = *p; + *p = de->next; + de->next = NULL; + break; + } + } + spin_unlock(&proc_subdir_lock); + if (!de) + return; - wait_for_completion(de->pde_unload_completion); + spin_lock(&de->pde_unload_lock); + /* + * Stop accepting new callers into module. If you're + * dynamically allocating ->proc_fops, save a pointer somewhere. + */ + de->proc_fops = NULL; + /* Wait until all existing callers into module are done. */ + if (de->pde_users > 0) { + DECLARE_COMPLETION_ONSTACK(c); + + if (!de->pde_unload_completion) + de->pde_unload_completion = &c; - spin_lock(&proc_subdir_lock); - goto continue_removing; - } spin_unlock(&de->pde_unload_lock); + wait_for_completion(de->pde_unload_completion); + + goto continue_removing; + } + spin_unlock(&de->pde_unload_lock); + continue_removing: - if (S_ISDIR(de->mode)) - parent->nlink--; - de->nlink = 0; - WARN_ON(de->subdir); - if (atomic_dec_and_test(&de->count)) - free_proc_entry(de); - break; + if (S_ISDIR(de->mode)) + parent->nlink--; + de->nlink = 0; + if (de->subdir) { + printk(KERN_WARNING "%s: removing non-empty directory " + "'%s/%s', leaking at least '%s'\n", __func__, + de->parent->name, de->name, de->subdir->name); + WARN_ON(1); } - spin_unlock(&proc_subdir_lock); -out: - return; + if (atomic_dec_and_test(&de->count)) + free_proc_entry(de); } diff --git a/fs/proc/inode.c b/fs/proc/inode.c index 82b3a1b5a70..6f4e8dc97da 100644 --- a/fs/proc/inode.c +++ b/fs/proc/inode.c @@ -25,8 +25,7 @@ struct proc_dir_entry *de_get(struct proc_dir_entry *de) { - if (de) - atomic_inc(&de->count); + atomic_inc(&de->count); return de; } @@ -35,18 +34,16 @@ struct proc_dir_entry *de_get(struct proc_dir_entry *de) */ void de_put(struct proc_dir_entry *de) { - if (de) { - lock_kernel(); - if (!atomic_read(&de->count)) { - printk("de_put: entry %s already free!\n", de->name); - unlock_kernel(); - return; - } - - if (atomic_dec_and_test(&de->count)) - free_proc_entry(de); + lock_kernel(); + if (!atomic_read(&de->count)) { + printk("de_put: entry %s already free!\n", de->name); unlock_kernel(); + return; } + + if (atomic_dec_and_test(&de->count)) + free_proc_entry(de); + unlock_kernel(); } /* @@ -392,7 +389,7 @@ struct inode *proc_get_inode(struct super_block *sb, unsigned int ino, { struct inode * inode; - if (de != NULL && !try_module_get(de->owner)) + if (!try_module_get(de->owner)) goto out_mod; inode = iget_locked(sb, ino); @@ -402,30 +399,29 @@ struct inode *proc_get_inode(struct super_block *sb, unsigned int ino, inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; PROC_I(inode)->fd = 0; PROC_I(inode)->pde = de; - if (de) { - if (de->mode) { - inode->i_mode = de->mode; - inode->i_uid = de->uid; - inode->i_gid = de->gid; - } - if (de->size) - inode->i_size = de->size; - if (de->nlink) - inode->i_nlink = de->nlink; - if (de->proc_iops) - inode->i_op = de->proc_iops; - if (de->proc_fops) { - if (S_ISREG(inode->i_mode)) { + + if (de->mode) { + inode->i_mode = de->mode; + inode->i_uid = de->uid; + inode->i_gid = de->gid; + } + if (de->size) + inode->i_size = de->size; + if (de->nlink) + inode->i_nlink = de->nlink; + if (de->proc_iops) + inode->i_op = de->proc_iops; + if (de->proc_fops) { + if (S_ISREG(inode->i_mode)) { #ifdef CONFIG_COMPAT - if (!de->proc_fops->compat_ioctl) - inode->i_fop = - &proc_reg_file_ops_no_compat; - else + if (!de->proc_fops->compat_ioctl) + inode->i_fop = + &proc_reg_file_ops_no_compat; + else #endif - inode->i_fop = &proc_reg_file_ops; - } else { - inode->i_fop = de->proc_fops; - } + inode->i_fop = &proc_reg_file_ops; + } else { + inode->i_fop = de->proc_fops; } } unlock_new_inode(inode); @@ -433,8 +429,7 @@ struct inode *proc_get_inode(struct super_block *sb, unsigned int ino, return inode; out_ino: - if (de != NULL) - module_put(de->owner); + module_put(de->owner); out_mod: return NULL; } diff --git a/fs/proc/internal.h b/fs/proc/internal.h index bc72f5c8c47..28cbca80590 100644 --- a/fs/proc/internal.h +++ b/fs/proc/internal.h @@ -11,6 +11,7 @@ #include <linux/proc_fs.h> +extern struct proc_dir_entry proc_root; #ifdef CONFIG_PROC_SYSCTL extern int proc_sys_init(void); #else @@ -46,9 +47,6 @@ extern int nommu_vma_show(struct seq_file *, struct vm_area_struct *); extern int maps_protect; -extern void create_seq_entry(char *name, mode_t mode, - const struct file_operations *f); -extern int proc_exe_link(struct inode *, struct path *); extern int proc_tid_stat(struct seq_file *m, struct pid_namespace *ns, struct pid *pid, struct task_struct *task); extern int proc_tgid_stat(struct seq_file *m, struct pid_namespace *ns, diff --git a/fs/proc/nommu.c b/fs/proc/nommu.c index 941e95114b5..79ecd281d2c 100644 --- a/fs/proc/nommu.c +++ b/fs/proc/nommu.c @@ -137,7 +137,7 @@ static const struct file_operations proc_nommu_vma_list_operations = { static int __init proc_nommu_init(void) { - create_seq_entry("maps", S_IRUGO, &proc_nommu_vma_list_operations); + proc_create("maps", S_IRUGO, NULL, &proc_nommu_vma_list_operations); return 0; } diff --git a/fs/proc/proc_misc.c b/fs/proc/proc_misc.c index 441a32f0e5f..74a323d2b85 100644 --- a/fs/proc/proc_misc.c +++ b/fs/proc/proc_misc.c @@ -179,6 +179,7 @@ static int meminfo_read_proc(char *page, char **start, off_t off, "PageTables: %8lu kB\n" "NFS_Unstable: %8lu kB\n" "Bounce: %8lu kB\n" + "WritebackTmp: %8lu kB\n" "CommitLimit: %8lu kB\n" "Committed_AS: %8lu kB\n" "VmallocTotal: %8lu kB\n" @@ -210,6 +211,7 @@ static int meminfo_read_proc(char *page, char **start, off_t off, K(global_page_state(NR_PAGETABLE)), K(global_page_state(NR_UNSTABLE_NFS)), K(global_page_state(NR_BOUNCE)), + K(global_page_state(NR_WRITEBACK_TEMP)), K(allowed), K(committed), (unsigned long)VMALLOC_TOTAL >> 10, @@ -826,14 +828,6 @@ static struct file_operations proc_kpageflags_operations = { struct proc_dir_entry *proc_root_kcore; -void create_seq_entry(char *name, mode_t mode, const struct file_operations *f) -{ - struct proc_dir_entry *entry; - entry = create_proc_entry(name, mode, NULL); - if (entry) - entry->proc_fops = f; -} - void __init proc_misc_init(void) { static struct { @@ -862,66 +856,52 @@ void __init proc_misc_init(void) /* And now for trickier ones */ #ifdef CONFIG_PRINTK - { - struct proc_dir_entry *entry; - entry = create_proc_entry("kmsg", S_IRUSR, &proc_root); - if (entry) - entry->proc_fops = &proc_kmsg_operations; - } + proc_create("kmsg", S_IRUSR, NULL, &proc_kmsg_operations); #endif - create_seq_entry("locks", 0, &proc_locks_operations); - create_seq_entry("devices", 0, &proc_devinfo_operations); - create_seq_entry("cpuinfo", 0, &proc_cpuinfo_operations); + proc_create("locks", 0, NULL, &proc_locks_operations); + proc_create("devices", 0, NULL, &proc_devinfo_operations); + proc_create("cpuinfo", 0, NULL, &proc_cpuinfo_operations); #ifdef CONFIG_BLOCK - create_seq_entry("partitions", 0, &proc_partitions_operations); + proc_create("partitions", 0, NULL, &proc_partitions_operations); #endif - create_seq_entry("stat", 0, &proc_stat_operations); - create_seq_entry("interrupts", 0, &proc_interrupts_operations); + proc_create("stat", 0, NULL, &proc_stat_operations); + proc_create("interrupts", 0, NULL, &proc_interrupts_operations); #ifdef CONFIG_SLABINFO - create_seq_entry("slabinfo",S_IWUSR|S_IRUGO,&proc_slabinfo_operations); + proc_create("slabinfo",S_IWUSR|S_IRUGO,NULL,&proc_slabinfo_operations); #ifdef CONFIG_DEBUG_SLAB_LEAK - create_seq_entry("slab_allocators", 0 ,&proc_slabstats_operations); + proc_create("slab_allocators", 0, NULL, &proc_slabstats_operations); #endif #endif #ifdef CONFIG_MMU proc_create("vmallocinfo", S_IRUSR, NULL, &proc_vmalloc_operations); #endif - create_seq_entry("buddyinfo",S_IRUGO, &fragmentation_file_operations); - create_seq_entry("pagetypeinfo", S_IRUGO, &pagetypeinfo_file_ops); - create_seq_entry("vmstat",S_IRUGO, &proc_vmstat_file_operations); - create_seq_entry("zoneinfo",S_IRUGO, &proc_zoneinfo_file_operations); + proc_create("buddyinfo", S_IRUGO, NULL, &fragmentation_file_operations); + proc_create("pagetypeinfo", S_IRUGO, NULL, &pagetypeinfo_file_ops); + proc_create("vmstat", S_IRUGO, NULL, &proc_vmstat_file_operations); + proc_create("zoneinfo", S_IRUGO, NULL, &proc_zoneinfo_file_operations); #ifdef CONFIG_BLOCK - create_seq_entry("diskstats", 0, &proc_diskstats_operations); + proc_create("diskstats", 0, NULL, &proc_diskstats_operations); #endif #ifdef CONFIG_MODULES - create_seq_entry("modules", 0, &proc_modules_operations); + proc_create("modules", 0, NULL, &proc_modules_operations); #endif #ifdef CONFIG_SCHEDSTATS - create_seq_entry("schedstat", 0, &proc_schedstat_operations); + proc_create("schedstat", 0, NULL, &proc_schedstat_operations); #endif #ifdef CONFIG_PROC_KCORE - proc_root_kcore = create_proc_entry("kcore", S_IRUSR, NULL); - if (proc_root_kcore) { - proc_root_kcore->proc_fops = &proc_kcore_operations; + proc_root_kcore = proc_create("kcore", S_IRUSR, NULL, &proc_kcore_operations); + if (proc_root_kcore) proc_root_kcore->size = (size_t)high_memory - PAGE_OFFSET + PAGE_SIZE; - } #endif #ifdef CONFIG_PROC_PAGE_MONITOR - create_seq_entry("kpagecount", S_IRUSR, &proc_kpagecount_operations); - create_seq_entry("kpageflags", S_IRUSR, &proc_kpageflags_operations); + proc_create("kpagecount", S_IRUSR, NULL, &proc_kpagecount_operations); + proc_create("kpageflags", S_IRUSR, NULL, &proc_kpageflags_operations); #endif #ifdef CONFIG_PROC_VMCORE - proc_vmcore = create_proc_entry("vmcore", S_IRUSR, NULL); - if (proc_vmcore) - proc_vmcore->proc_fops = &proc_vmcore_operations; + proc_vmcore = proc_create("vmcore", S_IRUSR, NULL, &proc_vmcore_operations); #endif #ifdef CONFIG_MAGIC_SYSRQ - { - struct proc_dir_entry *entry; - entry = create_proc_entry("sysrq-trigger", S_IWUSR, NULL); - if (entry) - entry->proc_fops = &proc_sysrq_trigger_operations; - } + proc_create("sysrq-trigger", S_IWUSR, NULL, &proc_sysrq_trigger_operations); #endif } diff --git a/fs/proc/proc_net.c b/fs/proc/proc_net.c index 13cd7835d0d..83f357b30d7 100644 --- a/fs/proc/proc_net.c +++ b/fs/proc/proc_net.c @@ -159,17 +159,6 @@ struct net *get_proc_net(const struct inode *inode) } EXPORT_SYMBOL_GPL(get_proc_net); -struct proc_dir_entry *proc_net_mkdir(struct net *net, const char *name, - struct proc_dir_entry *parent) -{ - struct proc_dir_entry *pde; - pde = proc_mkdir_mode(name, S_IRUGO | S_IXUGO, parent); - if (pde != NULL) - pde->data = net; - return pde; -} -EXPORT_SYMBOL_GPL(proc_net_mkdir); - static __net_init int proc_net_ns_init(struct net *net) { struct proc_dir_entry *netd, *net_statd; diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c index 614c34b6d1c..5acc001d49f 100644 --- a/fs/proc/proc_sysctl.c +++ b/fs/proc/proc_sysctl.c @@ -165,8 +165,8 @@ out: return err; } -static ssize_t proc_sys_read(struct file *filp, char __user *buf, - size_t count, loff_t *ppos) +static ssize_t proc_sys_call_handler(struct file *filp, void __user *buf, + size_t count, loff_t *ppos, int write) { struct dentry *dentry = filp->f_dentry; struct ctl_table_header *head; @@ -190,12 +190,12 @@ static ssize_t proc_sys_read(struct file *filp, char __user *buf, * and won't be until we finish. */ error = -EPERM; - if (sysctl_perm(table, MAY_READ)) + if (sysctl_perm(head->root, table, write ? MAY_WRITE : MAY_READ)) goto out; /* careful: calling conventions are nasty here */ res = count; - error = table->proc_handler(table, 0, filp, buf, &res, ppos); + error = table->proc_handler(table, write, filp, buf, &res, ppos); if (!error) error = res; out: @@ -204,44 +204,16 @@ out: return error; } -static ssize_t proc_sys_write(struct file *filp, const char __user *buf, +static ssize_t proc_sys_read(struct file *filp, char __user *buf, size_t count, loff_t *ppos) { - struct dentry *dentry = filp->f_dentry; - struct ctl_table_header *head; - struct ctl_table *table; - ssize_t error; - size_t res; - - table = do_proc_sys_lookup(dentry->d_parent, &dentry->d_name, &head); - /* Has the sysctl entry disappeared on us? */ - error = -ENOENT; - if (!table) - goto out; - - /* Has the sysctl entry been replaced by a directory? */ - error = -EISDIR; - if (!table->proc_handler) - goto out; - - /* - * At this point we know that the sysctl was not unregistered - * and won't be until we finish. - */ - error = -EPERM; - if (sysctl_perm(table, MAY_WRITE)) - goto out; - - /* careful: calling conventions are nasty here */ - res = count; - error = table->proc_handler(table, 1, filp, (char __user *)buf, - &res, ppos); - if (!error) - error = res; -out: - sysctl_head_finish(head); + return proc_sys_call_handler(filp, (void __user *)buf, count, ppos, 0); +} - return error; +static ssize_t proc_sys_write(struct file *filp, const char __user *buf, + size_t count, loff_t *ppos) +{ + return proc_sys_call_handler(filp, (void __user *)buf, count, ppos, 1); } @@ -416,7 +388,7 @@ static int proc_sys_permission(struct inode *inode, int mask, struct nameidata * goto out; /* Use the permissions on the sysctl table entry */ - error = sysctl_perm(table, mask); + error = sysctl_perm(head->root, table, mask); out: sysctl_head_finish(head); return error; diff --git a/fs/proc/proc_tty.c b/fs/proc/proc_tty.c index 49816e00b51..21f490f5d65 100644 --- a/fs/proc/proc_tty.c +++ b/fs/proc/proc_tty.c @@ -5,7 +5,7 @@ */ #include <asm/uaccess.h> - +#include <linux/module.h> #include <linux/init.h> #include <linux/errno.h> #include <linux/time.h> @@ -136,39 +136,54 @@ static const struct file_operations proc_tty_drivers_operations = { .release = seq_release, }; -/* - * This is the handler for /proc/tty/ldiscs - */ -static int tty_ldiscs_read_proc(char *page, char **start, off_t off, - int count, int *eof, void *data) +static void * tty_ldiscs_seq_start(struct seq_file *m, loff_t *pos) { - int i; - int len = 0; - off_t begin = 0; + return (*pos < NR_LDISCS) ? pos : NULL; +} + +static void * tty_ldiscs_seq_next(struct seq_file *m, void *v, loff_t *pos) +{ + (*pos)++; + return (*pos < NR_LDISCS) ? pos : NULL; +} + +static void tty_ldiscs_seq_stop(struct seq_file *m, void *v) +{ +} + +static int tty_ldiscs_seq_show(struct seq_file *m, void *v) +{ + int i = *(loff_t *)v; struct tty_ldisc *ld; - for (i=0; i < NR_LDISCS; i++) { - ld = tty_ldisc_get(i); - if (ld == NULL) - continue; - len += sprintf(page+len, "%-10s %2d\n", - ld->name ? ld->name : "???", i); - tty_ldisc_put(i); - if (len+begin > off+count) - break; - if (len+begin < off) { - begin += len; - len = 0; - } - } - if (i >= NR_LDISCS) - *eof = 1; - if (off >= len+begin) + ld = tty_ldisc_get(i); + if (ld == NULL) return 0; - *start = page + (off-begin); - return ((count < begin+len-off) ? count : begin+len-off); + seq_printf(m, "%-10s %2d\n", ld->name ? ld->name : "???", i); + tty_ldisc_put(i); + return 0; +} + +static const struct seq_operations tty_ldiscs_seq_ops = { + .start = tty_ldiscs_seq_start, + .next = tty_ldiscs_seq_next, + .stop = tty_ldiscs_seq_stop, + .show = tty_ldiscs_seq_show, +}; + +static int proc_tty_ldiscs_open(struct inode *inode, struct file *file) +{ + return seq_open(file, &tty_ldiscs_seq_ops); } +static const struct file_operations tty_ldiscs_proc_fops = { + .owner = THIS_MODULE, + .open = proc_tty_ldiscs_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release, +}; + /* * This function is called by tty_register_driver() to handle * registering the driver's /proc handler into /proc/tty/driver/<foo> @@ -177,16 +192,14 @@ void proc_tty_register_driver(struct tty_driver *driver) { struct proc_dir_entry *ent; - if ((!driver->read_proc && !driver->write_proc) || - !driver->driver_name || + if (!driver->ops->read_proc || !driver->driver_name || driver->proc_entry) return; ent = create_proc_entry(driver->driver_name, 0, proc_tty_driver); if (!ent) return; - ent->read_proc = driver->read_proc; - ent->write_proc = driver->write_proc; + ent->read_proc = driver->ops->read_proc; ent->owner = driver->owner; ent->data = driver; @@ -214,7 +227,6 @@ void proc_tty_unregister_driver(struct tty_driver *driver) */ void __init proc_tty_init(void) { - struct proc_dir_entry *entry; if (!proc_mkdir("tty", NULL)) return; proc_tty_ldisc = proc_mkdir("tty/ldisc", NULL); @@ -224,10 +236,7 @@ void __init proc_tty_init(void) * password lengths and inter-keystroke timings during password * entry. */ - proc_tty_driver = proc_mkdir_mode("tty/driver", S_IRUSR | S_IXUSR, NULL); - - create_proc_read_entry("tty/ldiscs", 0, NULL, tty_ldiscs_read_proc, NULL); - entry = create_proc_entry("tty/drivers", 0, NULL); - if (entry) - entry->proc_fops = &proc_tty_drivers_operations; + proc_tty_driver = proc_mkdir_mode("tty/driver", S_IRUSR|S_IXUSR, NULL); + proc_create("tty/ldiscs", 0, NULL, &tty_ldiscs_proc_fops); + proc_create("tty/drivers", 0, NULL, &proc_tty_drivers_operations); } diff --git a/fs/proc/root.c b/fs/proc/root.c index ef0fb57fc9e..95117538a4f 100644 --- a/fs/proc/root.c +++ b/fs/proc/root.c @@ -22,8 +22,6 @@ #include "internal.h" -struct proc_dir_entry *proc_bus, *proc_root_fs, *proc_root_driver; - static int proc_test_super(struct super_block *sb, void *data) { return sb->s_fs_info == data; @@ -126,8 +124,8 @@ void __init proc_root_init(void) #ifdef CONFIG_SYSVIPC proc_mkdir("sysvipc", NULL); #endif - proc_root_fs = proc_mkdir("fs", NULL); - proc_root_driver = proc_mkdir("driver", NULL); + proc_mkdir("fs", NULL); + proc_mkdir("driver", NULL); proc_mkdir("fs/nfsd", NULL); /* somewhere for the nfsd filesystem to be mounted */ #if defined(CONFIG_SUN_OPENPROMFS) || defined(CONFIG_SUN_OPENPROMFS_MODULE) /* just give it a mountpoint */ @@ -137,7 +135,7 @@ void __init proc_root_init(void) #ifdef CONFIG_PROC_DEVICETREE proc_device_tree_init(); #endif - proc_bus = proc_mkdir("bus", NULL); + proc_mkdir("bus", NULL); proc_sys_init(); } @@ -232,9 +230,5 @@ void pid_ns_release_proc(struct pid_namespace *ns) EXPORT_SYMBOL(proc_symlink); EXPORT_SYMBOL(proc_mkdir); EXPORT_SYMBOL(create_proc_entry); -EXPORT_SYMBOL(proc_create); +EXPORT_SYMBOL(proc_create_data); EXPORT_SYMBOL(remove_proc_entry); -EXPORT_SYMBOL(proc_root); -EXPORT_SYMBOL(proc_root_fs); -EXPORT_SYMBOL(proc_bus); -EXPORT_SYMBOL(proc_root_driver); diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 7415eeb7cc3..88717c0f941 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -5,11 +5,9 @@ #include <linux/highmem.h> #include <linux/ptrace.h> #include <linux/pagemap.h> -#include <linux/ptrace.h> #include <linux/mempolicy.h> #include <linux/swap.h> #include <linux/swapops.h> -#include <linux/seq_file.h> #include <asm/elf.h> #include <asm/uaccess.h> @@ -75,40 +73,6 @@ int task_statm(struct mm_struct *mm, int *shared, int *text, return mm->total_vm; } -int proc_exe_link(struct inode *inode, struct path *path) -{ - struct vm_area_struct * vma; - int result = -ENOENT; - struct task_struct *task = get_proc_task(inode); - struct mm_struct * mm = NULL; - - if (task) { - mm = get_task_mm(task); - put_task_struct(task); - } - if (!mm) - goto out; - down_read(&mm->mmap_sem); - - vma = mm->mmap; - while (vma) { - if ((vma->vm_flags & VM_EXECUTABLE) && vma->vm_file) - break; - vma = vma->vm_next; - } - - if (vma) { - *path = vma->vm_file->f_path; - path_get(&vma->vm_file->f_path); - result = 0; - } - - up_read(&mm->mmap_sem); - mmput(mm); -out: - return result; -} - static void pad_len_spaces(struct seq_file *m, int len) { len = 25 + sizeof(void*) * 6 - len; diff --git a/fs/proc/task_nommu.c b/fs/proc/task_nommu.c index 8011528518b..4b4f9cc2f18 100644 --- a/fs/proc/task_nommu.c +++ b/fs/proc/task_nommu.c @@ -1,6 +1,7 @@ #include <linux/mm.h> #include <linux/file.h> +#include <linux/fdtable.h> #include <linux/mount.h> #include <linux/ptrace.h> #include <linux/seq_file.h> @@ -103,40 +104,6 @@ int task_statm(struct mm_struct *mm, int *shared, int *text, return size; } -int proc_exe_link(struct inode *inode, struct path *path) -{ - struct vm_list_struct *vml; - struct vm_area_struct *vma; - struct task_struct *task = get_proc_task(inode); - struct mm_struct *mm = get_task_mm(task); - int result = -ENOENT; - - if (!mm) - goto out; - down_read(&mm->mmap_sem); - - vml = mm->context.vmlist; - vma = NULL; - while (vml) { - if ((vml->vma->vm_flags & VM_EXECUTABLE) && vml->vma->vm_file) { - vma = vml->vma; - break; - } - vml = vml->next; - } - - if (vma) { - *path = vma->vm_file->f_path; - path_get(&vma->vm_file->f_path); - result = 0; - } - - up_read(&mm->mmap_sem); - mmput(mm); -out: - return result; -} - /* * display mapping lines for a particular process's /proc/pid/maps */ diff --git a/fs/quota_v2.c b/fs/quota_v2.c index 23b647f25d0..234ada90363 100644 --- a/fs/quota_v2.c +++ b/fs/quota_v2.c @@ -306,7 +306,7 @@ static uint find_free_dqentry(struct dquot *dquot, int *err) printk(KERN_ERR "VFS: find_free_dqentry(): Can't remove block (%u) from entry free list.\n", blk); goto out_buf; } - dh->dqdh_entries = cpu_to_le16(le16_to_cpu(dh->dqdh_entries)+1); + le16_add_cpu(&dh->dqdh_entries, 1); memset(&fakedquot, 0, sizeof(struct v2_disk_dqblk)); /* Find free structure in block */ for (i = 0; i < V2_DQSTRINBLK && memcmp(&fakedquot, ddquot+i, sizeof(struct v2_disk_dqblk)); i++); @@ -448,7 +448,7 @@ static int free_dqentry(struct dquot *dquot, uint blk) goto out_buf; } dh = (struct v2_disk_dqdbheader *)buf; - dh->dqdh_entries = cpu_to_le16(le16_to_cpu(dh->dqdh_entries)-1); + le16_add_cpu(&dh->dqdh_entries, -1); if (!le16_to_cpu(dh->dqdh_entries)) { /* Block got free? */ if ((ret = remove_free_dqentry(sb, type, buf, blk)) < 0 || (ret = put_free_dqblk(sb, type, buf, blk)) < 0) { diff --git a/fs/ramfs/file-mmu.c b/fs/ramfs/file-mmu.c index b41a514b097..9590b902430 100644 --- a/fs/ramfs/file-mmu.c +++ b/fs/ramfs/file-mmu.c @@ -26,6 +26,9 @@ #include <linux/fs.h> #include <linux/mm.h> +#include <linux/ramfs.h> + +#include "internal.h" const struct address_space_operations ramfs_aops = { .readpage = simple_readpage, diff --git a/fs/ramfs/inode.c b/fs/ramfs/inode.c index 8428d5b2711..b13123424e4 100644 --- a/fs/ramfs/inode.c +++ b/fs/ramfs/inode.c @@ -44,7 +44,7 @@ static const struct inode_operations ramfs_dir_inode_operations; static struct backing_dev_info ramfs_backing_dev_info = { .ra_pages = 0, /* No readahead */ - .capabilities = BDI_CAP_NO_ACCT_DIRTY | BDI_CAP_NO_WRITEBACK | + .capabilities = BDI_CAP_NO_ACCT_AND_WRITEBACK | BDI_CAP_MAP_DIRECT | BDI_CAP_MAP_COPY | BDI_CAP_READ_MAP | BDI_CAP_WRITE_MAP | BDI_CAP_EXEC_MAP, }; diff --git a/fs/ramfs/internal.h b/fs/ramfs/internal.h index af7cc074a47..6b330639b51 100644 --- a/fs/ramfs/internal.h +++ b/fs/ramfs/internal.h @@ -11,5 +11,4 @@ extern const struct address_space_operations ramfs_aops; -extern const struct file_operations ramfs_file_operations; extern const struct inode_operations ramfs_file_inode_operations; diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c index da86042b3e0..e396b2fa474 100644 --- a/fs/reiserfs/journal.c +++ b/fs/reiserfs/journal.c @@ -2574,11 +2574,9 @@ static int release_journal_dev(struct super_block *super, result = 0; - if (journal->j_dev_file != NULL) { - result = filp_close(journal->j_dev_file, NULL); - journal->j_dev_file = NULL; - journal->j_dev_bd = NULL; - } else if (journal->j_dev_bd != NULL) { + if (journal->j_dev_bd != NULL) { + if (journal->j_dev_bd->bd_dev != super->s_dev) + bd_release(journal->j_dev_bd); result = blkdev_put(journal->j_dev_bd); journal->j_dev_bd = NULL; } @@ -2603,7 +2601,6 @@ static int journal_init_dev(struct super_block *super, result = 0; journal->j_dev_bd = NULL; - journal->j_dev_file = NULL; jdev = SB_ONDISK_JOURNAL_DEVICE(super) ? new_decode_dev(SB_ONDISK_JOURNAL_DEVICE(super)) : super->s_dev; @@ -2620,35 +2617,34 @@ static int journal_init_dev(struct super_block *super, "cannot init journal device '%s': %i", __bdevname(jdev, b), result); return result; - } else if (jdev != super->s_dev) + } else if (jdev != super->s_dev) { + result = bd_claim(journal->j_dev_bd, journal); + if (result) { + blkdev_put(journal->j_dev_bd); + return result; + } + set_blocksize(journal->j_dev_bd, super->s_blocksize); + } + return 0; } - journal->j_dev_file = filp_open(jdev_name, 0, 0); - if (!IS_ERR(journal->j_dev_file)) { - struct inode *jdev_inode = journal->j_dev_file->f_mapping->host; - if (!S_ISBLK(jdev_inode->i_mode)) { - reiserfs_warning(super, "journal_init_dev: '%s' is " - "not a block device", jdev_name); - result = -ENOTBLK; - release_journal_dev(super, journal); - } else { - /* ok */ - journal->j_dev_bd = I_BDEV(jdev_inode); - set_blocksize(journal->j_dev_bd, super->s_blocksize); - reiserfs_info(super, - "journal_init_dev: journal device: %s\n", - bdevname(journal->j_dev_bd, b)); - } - } else { - result = PTR_ERR(journal->j_dev_file); - journal->j_dev_file = NULL; + journal->j_dev_bd = open_bdev_excl(jdev_name, 0, journal); + if (IS_ERR(journal->j_dev_bd)) { + result = PTR_ERR(journal->j_dev_bd); + journal->j_dev_bd = NULL; reiserfs_warning(super, "journal_init_dev: Cannot open '%s': %i", jdev_name, result); + return result; } - return result; + + set_blocksize(journal->j_dev_bd, super->s_blocksize); + reiserfs_info(super, + "journal_init_dev: journal device: %s\n", + bdevname(journal->j_dev_bd, b)); + return 0; } /** diff --git a/fs/reiserfs/procfs.c b/fs/reiserfs/procfs.c index 8f86c52b30d..b9dbeeca704 100644 --- a/fs/reiserfs/procfs.c +++ b/fs/reiserfs/procfs.c @@ -467,6 +467,7 @@ static const struct file_operations r_file_operations = { .read = seq_read, .llseek = seq_lseek, .release = seq_release, + .owner = THIS_MODULE, }; static struct proc_dir_entry *proc_info_root = NULL; @@ -475,12 +476,8 @@ static const char proc_info_root_name[] = "fs/reiserfs"; static void add_file(struct super_block *sb, char *name, int (*func) (struct seq_file *, struct super_block *)) { - struct proc_dir_entry *de; - de = create_proc_entry(name, 0, REISERFS_SB(sb)->procdir); - if (de) { - de->data = func; - de->proc_fops = &r_file_operations; - } + proc_create_data(name, 0, REISERFS_SB(sb)->procdir, + &r_file_operations, func); } int reiserfs_proc_info_init(struct super_block *sb) diff --git a/fs/select.c b/fs/select.c index 00f58c5c7e0..8dda969614a 100644 --- a/fs/select.c +++ b/fs/select.c @@ -21,6 +21,7 @@ #include <linux/poll.h> #include <linux/personality.h> /* for STICKY_TIMEOUTS */ #include <linux/file.h> +#include <linux/fdtable.h> #include <linux/fs.h> #include <linux/rcupdate.h> @@ -298,7 +299,7 @@ int do_select(int n, fd_set_bits *fds, s64 *timeout) #define MAX_SELECT_SECONDS \ ((unsigned long) (MAX_SCHEDULE_TIMEOUT / HZ)-1) -static int core_sys_select(int n, fd_set __user *inp, fd_set __user *outp, +int core_sys_select(int n, fd_set __user *inp, fd_set __user *outp, fd_set __user *exp, s64 *timeout) { fd_set_bits fds; @@ -425,7 +426,7 @@ sticky: return ret; } -#ifdef TIF_RESTORE_SIGMASK +#ifdef HAVE_SET_RESTORE_SIGMASK asmlinkage long sys_pselect7(int n, fd_set __user *inp, fd_set __user *outp, fd_set __user *exp, struct timespec __user *tsp, const sigset_t __user *sigmask, size_t sigsetsize) @@ -498,7 +499,7 @@ sticky: if (sigmask) { memcpy(¤t->saved_sigmask, &sigsaved, sizeof(sigsaved)); - set_thread_flag(TIF_RESTORE_SIGMASK); + set_restore_sigmask(); } } else if (sigmask) sigprocmask(SIG_SETMASK, &sigsaved, NULL); @@ -528,7 +529,7 @@ asmlinkage long sys_pselect6(int n, fd_set __user *inp, fd_set __user *outp, return sys_pselect7(n, inp, outp, exp, tsp, up, sigsetsize); } -#endif /* TIF_RESTORE_SIGMASK */ +#endif /* HAVE_SET_RESTORE_SIGMASK */ struct poll_list { struct poll_list *next; @@ -759,7 +760,7 @@ asmlinkage long sys_poll(struct pollfd __user *ufds, unsigned int nfds, return ret; } -#ifdef TIF_RESTORE_SIGMASK +#ifdef HAVE_SET_RESTORE_SIGMASK asmlinkage long sys_ppoll(struct pollfd __user *ufds, unsigned int nfds, struct timespec __user *tsp, const sigset_t __user *sigmask, size_t sigsetsize) @@ -805,7 +806,7 @@ asmlinkage long sys_ppoll(struct pollfd __user *ufds, unsigned int nfds, if (sigmask) { memcpy(¤t->saved_sigmask, &sigsaved, sizeof(sigsaved)); - set_thread_flag(TIF_RESTORE_SIGMASK); + set_restore_sigmask(); } ret = -ERESTARTNOHAND; } else if (sigmask) @@ -839,4 +840,4 @@ asmlinkage long sys_ppoll(struct pollfd __user *ufds, unsigned int nfds, return ret; } -#endif /* TIF_RESTORE_SIGMASK */ +#endif /* HAVE_SET_RESTORE_SIGMASK */ diff --git a/fs/signalfd.c b/fs/signalfd.c index 8ead0db3593..619725644c7 100644 --- a/fs/signalfd.c +++ b/fs/signalfd.c @@ -207,11 +207,8 @@ static const struct file_operations signalfd_fops = { asmlinkage long sys_signalfd(int ufd, sigset_t __user *user_mask, size_t sizemask) { - int error; sigset_t sigmask; struct signalfd_ctx *ctx; - struct file *file; - struct inode *inode; if (sizemask != sizeof(sigset_t) || copy_from_user(&sigmask, user_mask, sizeof(sigmask))) @@ -230,12 +227,11 @@ asmlinkage long sys_signalfd(int ufd, sigset_t __user *user_mask, size_t sizemas * When we call this, the initialization must be complete, since * anon_inode_getfd() will install the fd. */ - error = anon_inode_getfd(&ufd, &inode, &file, "[signalfd]", - &signalfd_fops, ctx); - if (error) - goto err_fdalloc; + ufd = anon_inode_getfd("[signalfd]", &signalfd_fops, ctx); + if (ufd < 0) + kfree(ctx); } else { - file = fget(ufd); + struct file *file = fget(ufd); if (!file) return -EBADF; ctx = file->private_data; @@ -252,9 +248,4 @@ asmlinkage long sys_signalfd(int ufd, sigset_t __user *user_mask, size_t sizemas } return ufd; - -err_fdalloc: - kfree(ctx); - return error; } - diff --git a/fs/smbfs/smb_debug.h b/fs/smbfs/smb_debug.h index 734972b9269..fc4b1a5dd75 100644 --- a/fs/smbfs/smb_debug.h +++ b/fs/smbfs/smb_debug.h @@ -11,14 +11,14 @@ * these are normally enabled. */ #ifdef SMBFS_PARANOIA -# define PARANOIA(f, a...) printk(KERN_NOTICE "%s: " f, __FUNCTION__ , ## a) +# define PARANOIA(f, a...) printk(KERN_NOTICE "%s: " f, __func__ , ## a) #else # define PARANOIA(f, a...) do { ; } while(0) #endif /* lots of debug messages */ #ifdef SMBFS_DEBUG_VERBOSE -# define VERBOSE(f, a...) printk(KERN_DEBUG "%s: " f, __FUNCTION__ , ## a) +# define VERBOSE(f, a...) printk(KERN_DEBUG "%s: " f, __func__ , ## a) #else # define VERBOSE(f, a...) do { ; } while(0) #endif @@ -28,7 +28,7 @@ * too common name. */ #ifdef SMBFS_DEBUG -#define DEBUG1(f, a...) printk(KERN_DEBUG "%s: " f, __FUNCTION__ , ## a) +#define DEBUG1(f, a...) printk(KERN_DEBUG "%s: " f, __func__ , ## a) #else #define DEBUG1(f, a...) do { ; } while(0) #endif diff --git a/fs/splice.c b/fs/splice.c index eeb1a86a701..78150038b58 100644 --- a/fs/splice.c +++ b/fs/splice.c @@ -811,24 +811,19 @@ generic_file_splice_write(struct pipe_inode_info *pipe, struct file *out, { struct address_space *mapping = out->f_mapping; struct inode *inode = mapping->host; - int killsuid, killpriv; + struct splice_desc sd = { + .total_len = len, + .flags = flags, + .pos = *ppos, + .u.file = out, + }; ssize_t ret; - int err = 0; - - killpriv = security_inode_need_killpriv(out->f_path.dentry); - killsuid = should_remove_suid(out->f_path.dentry); - if (unlikely(killsuid || killpriv)) { - mutex_lock(&inode->i_mutex); - if (killpriv) - err = security_inode_killpriv(out->f_path.dentry); - if (!err && killsuid) - err = __remove_suid(out->f_path.dentry, killsuid); - mutex_unlock(&inode->i_mutex); - if (err) - return err; - } - ret = splice_from_pipe(pipe, out, ppos, len, flags, pipe_to_file); + inode_double_lock(inode, pipe->inode); + ret = remove_suid(out->f_path.dentry); + if (likely(!ret)) + ret = __splice_from_pipe(pipe, &sd, pipe_to_file); + inode_double_unlock(inode, pipe->inode); if (ret > 0) { unsigned long nr_pages; @@ -840,6 +835,8 @@ generic_file_splice_write(struct pipe_inode_info *pipe, struct file *out, * sync it. */ if (unlikely((out->f_flags & O_SYNC) || IS_SYNC(inode))) { + int err; + mutex_lock(&inode->i_mutex); err = generic_osync_inode(inode, mapping, OSYNC_METADATA|OSYNC_DATA); diff --git a/fs/super.c b/fs/super.c index a5a4aca7e22..453877c5697 100644 --- a/fs/super.c +++ b/fs/super.c @@ -117,7 +117,7 @@ static inline void destroy_super(struct super_block *s) * Drop a superblock's refcount. Returns non-zero if the superblock was * destroyed. The caller must hold sb_lock. */ -int __put_super(struct super_block *sb) +static int __put_super(struct super_block *sb) { int ret = 0; diff --git a/fs/sync.c b/fs/sync.c index 7cd005ea763..228e17b5e9e 100644 --- a/fs/sync.c +++ b/fs/sync.c @@ -64,7 +64,7 @@ int file_fsync(struct file *filp, struct dentry *dentry, int datasync) /* sync the superblock to buffers */ sb = inode->i_sb; lock_super(sb); - if (sb->s_op->write_super) + if (sb->s_dirt && sb->s_op->write_super) sb->s_op->write_super(sb); unlock_super(sb); diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c index dbdfabbfd60..e7735f643cd 100644 --- a/fs/sysfs/file.c +++ b/fs/sysfs/file.c @@ -135,7 +135,7 @@ sysfs_read_file(struct file *file, char __user *buf, size_t count, loff_t *ppos) goto out; } pr_debug("%s: count = %zd, ppos = %lld, buf = %s\n", - __FUNCTION__, count, *ppos, buffer->page); + __func__, count, *ppos, buffer->page); retval = simple_read_from_buffer(buf, count, ppos, buffer->page, buffer->count); out: diff --git a/fs/sysfs/inode.c b/fs/sysfs/inode.c index d9262f74f94..eb53c632f85 100644 --- a/fs/sysfs/inode.c +++ b/fs/sysfs/inode.c @@ -30,7 +30,7 @@ static const struct address_space_operations sysfs_aops = { static struct backing_dev_info sysfs_backing_dev_info = { .ra_pages = 0, /* No readahead */ - .capabilities = BDI_CAP_NO_ACCT_DIRTY | BDI_CAP_NO_WRITEBACK, + .capabilities = BDI_CAP_NO_ACCT_AND_WRITEBACK, }; static const struct inode_operations sysfs_inode_operations ={ @@ -59,6 +59,8 @@ int sysfs_setattr(struct dentry * dentry, struct iattr * iattr) if (error) return error; + iattr->ia_valid &= ~ATTR_SIZE; /* ignore size changes */ + error = inode_setattr(inode, iattr); if (error) return error; diff --git a/fs/sysfs/mount.c b/fs/sysfs/mount.c index 74168266cd5..14f0023984d 100644 --- a/fs/sysfs/mount.c +++ b/fs/sysfs/mount.c @@ -61,7 +61,7 @@ static int sysfs_fill_super(struct super_block *sb, void *data, int silent) /* instantiate and link root dentry */ root = d_alloc_root(inode); if (!root) { - pr_debug("%s: could not get root dentry!\n",__FUNCTION__); + pr_debug("%s: could not get root dentry!\n",__func__); iput(inode); return -ENOMEM; } diff --git a/fs/sysv/sysv.h b/fs/sysv/sysv.h index 42d51d1c05c..38ebe3f85b3 100644 --- a/fs/sysv/sysv.h +++ b/fs/sysv/sysv.h @@ -217,9 +217,9 @@ static inline __fs32 fs32_add(struct sysv_sb_info *sbi, __fs32 *n, int d) if (sbi->s_bytesex == BYTESEX_PDP) *(__u32*)n = PDP_swab(PDP_swab(*(__u32*)n)+d); else if (sbi->s_bytesex == BYTESEX_LE) - *(__le32*)n = cpu_to_le32(le32_to_cpu(*(__le32*)n)+d); + le32_add_cpu((__le32 *)n, d); else - *(__be32*)n = cpu_to_be32(be32_to_cpu(*(__be32*)n)+d); + be32_add_cpu((__be32 *)n, d); return *n; } @@ -242,9 +242,9 @@ static inline __fs16 cpu_to_fs16(struct sysv_sb_info *sbi, __u16 n) static inline __fs16 fs16_add(struct sysv_sb_info *sbi, __fs16 *n, int d) { if (sbi->s_bytesex != BYTESEX_BE) - *(__le16*)n = cpu_to_le16(le16_to_cpu(*(__le16 *)n)+d); + le16_add_cpu((__le16 *)n, d); else - *(__be16*)n = cpu_to_be16(be16_to_cpu(*(__be16 *)n)+d); + be16_add_cpu((__be16 *)n, d); return *n; } diff --git a/fs/timerfd.c b/fs/timerfd.c index 10c80b59ec4..d87d354ec42 100644 --- a/fs/timerfd.c +++ b/fs/timerfd.c @@ -20,6 +20,7 @@ #include <linux/hrtimer.h> #include <linux/anon_inodes.h> #include <linux/timerfd.h> +#include <linux/syscalls.h> struct timerfd_ctx { struct hrtimer tmr; @@ -180,10 +181,8 @@ static struct file *timerfd_fget(int fd) asmlinkage long sys_timerfd_create(int clockid, int flags) { - int error, ufd; + int ufd; struct timerfd_ctx *ctx; - struct file *file; - struct inode *inode; if (flags) return -EINVAL; @@ -199,12 +198,9 @@ asmlinkage long sys_timerfd_create(int clockid, int flags) ctx->clockid = clockid; hrtimer_init(&ctx->tmr, clockid, HRTIMER_MODE_ABS); - error = anon_inode_getfd(&ufd, &inode, &file, "[timerfd]", - &timerfd_fops, ctx); - if (error) { + ufd = anon_inode_getfd("[timerfd]", &timerfd_fops, ctx); + if (ufd < 0) kfree(ctx); - return error; - } return ufd; } diff --git a/fs/udf/namei.c b/fs/udf/namei.c index 2b34c8ca6c8..d3231947db1 100644 --- a/fs/udf/namei.c +++ b/fs/udf/namei.c @@ -32,6 +32,7 @@ #include <linux/buffer_head.h> #include <linux/sched.h> #include <linux/crc-itu-t.h> +#include <linux/exportfs.h> static inline int udf_match(int len1, const char *name1, int len2, const char *name2) @@ -158,6 +159,8 @@ static struct fileIdentDesc *udf_find_entry(struct inode *dir, sector_t offset; struct extent_position epos = {}; struct udf_inode_info *dinfo = UDF_I(dir); + int isdotdot = dentry->d_name.len == 2 && + dentry->d_name.name[0] == '.' && dentry->d_name.name[1] == '.'; size = udf_ext0_offset(dir) + dir->i_size; f_pos = udf_ext0_offset(dir); @@ -225,6 +228,12 @@ static struct fileIdentDesc *udf_find_entry(struct inode *dir, continue; } + if ((cfi->fileCharacteristics & FID_FILE_CHAR_PARENT) && + isdotdot) { + brelse(epos.bh); + return fi; + } + if (!lfi) continue; @@ -286,9 +295,8 @@ static struct dentry *udf_lookup(struct inode *dir, struct dentry *dentry, } } unlock_kernel(); - d_add(dentry, inode); - return NULL; + return d_splice_alias(inode, dentry); } static struct fileIdentDesc *udf_add_entry(struct inode *dir, @@ -307,7 +315,7 @@ static struct fileIdentDesc *udf_add_entry(struct inode *dir, uint16_t liu; int block; kernel_lb_addr eloc; - uint32_t elen; + uint32_t elen = 0; sector_t offset; struct extent_position epos = {}; struct udf_inode_info *dinfo; @@ -398,7 +406,8 @@ static struct fileIdentDesc *udf_add_entry(struct inode *dir, } add: - if (dinfo->i_alloc_type != ICBTAG_FLAG_AD_IN_ICB) { + /* Is there any extent whose size we need to round up? */ + if (dinfo->i_alloc_type != ICBTAG_FLAG_AD_IN_ICB && elen) { elen = (elen + sb->s_blocksize - 1) & ~(sb->s_blocksize - 1); if (dinfo->i_alloc_type == ICBTAG_FLAG_AD_SHORT) epos.offset -= sizeof(short_ad); @@ -1232,6 +1241,134 @@ end_rename: return retval; } +static struct dentry *udf_get_parent(struct dentry *child) +{ + struct dentry *parent; + struct inode *inode = NULL; + struct dentry dotdot; + struct fileIdentDesc cfi; + struct udf_fileident_bh fibh; + + dotdot.d_name.name = ".."; + dotdot.d_name.len = 2; + + lock_kernel(); + if (!udf_find_entry(child->d_inode, &dotdot, &fibh, &cfi)) + goto out_unlock; + + if (fibh.sbh != fibh.ebh) + brelse(fibh.ebh); + brelse(fibh.sbh); + + inode = udf_iget(child->d_inode->i_sb, + lelb_to_cpu(cfi.icb.extLocation)); + if (!inode) + goto out_unlock; + unlock_kernel(); + + parent = d_alloc_anon(inode); + if (!parent) { + iput(inode); + parent = ERR_PTR(-ENOMEM); + } + + return parent; +out_unlock: + unlock_kernel(); + return ERR_PTR(-EACCES); +} + + +static struct dentry *udf_nfs_get_inode(struct super_block *sb, u32 block, + u16 partref, __u32 generation) +{ + struct inode *inode; + struct dentry *result; + kernel_lb_addr loc; + + if (block == 0) + return ERR_PTR(-ESTALE); + + loc.logicalBlockNum = block; + loc.partitionReferenceNum = partref; + inode = udf_iget(sb, loc); + + if (inode == NULL) + return ERR_PTR(-ENOMEM); + + if (generation && inode->i_generation != generation) { + iput(inode); + return ERR_PTR(-ESTALE); + } + result = d_alloc_anon(inode); + if (!result) { + iput(inode); + return ERR_PTR(-ENOMEM); + } + return result; +} + +static struct dentry *udf_fh_to_dentry(struct super_block *sb, + struct fid *fid, int fh_len, int fh_type) +{ + if ((fh_len != 3 && fh_len != 5) || + (fh_type != FILEID_UDF_WITH_PARENT && + fh_type != FILEID_UDF_WITHOUT_PARENT)) + return NULL; + + return udf_nfs_get_inode(sb, fid->udf.block, fid->udf.partref, + fid->udf.generation); +} + +static struct dentry *udf_fh_to_parent(struct super_block *sb, + struct fid *fid, int fh_len, int fh_type) +{ + if (fh_len != 5 || fh_type != FILEID_UDF_WITH_PARENT) + return NULL; + + return udf_nfs_get_inode(sb, fid->udf.parent_block, + fid->udf.parent_partref, + fid->udf.parent_generation); +} +static int udf_encode_fh(struct dentry *de, __u32 *fh, int *lenp, + int connectable) +{ + int len = *lenp; + struct inode *inode = de->d_inode; + kernel_lb_addr location = UDF_I(inode)->i_location; + struct fid *fid = (struct fid *)fh; + int type = FILEID_UDF_WITHOUT_PARENT; + + if (len < 3 || (connectable && len < 5)) + return 255; + + *lenp = 3; + fid->udf.block = location.logicalBlockNum; + fid->udf.partref = location.partitionReferenceNum; + fid->udf.generation = inode->i_generation; + + if (connectable && !S_ISDIR(inode->i_mode)) { + spin_lock(&de->d_lock); + inode = de->d_parent->d_inode; + location = UDF_I(inode)->i_location; + fid->udf.parent_block = location.logicalBlockNum; + fid->udf.parent_partref = location.partitionReferenceNum; + fid->udf.parent_generation = inode->i_generation; + spin_unlock(&de->d_lock); + *lenp = 5; + type = FILEID_UDF_WITH_PARENT; + } + + return type; +} + +const struct export_operations udf_export_ops = { + .encode_fh = udf_encode_fh, + .fh_to_dentry = udf_fh_to_dentry, + .fh_to_parent = udf_fh_to_parent, + .get_parent = udf_get_parent, +}; + const struct inode_operations udf_dir_inode_operations = { .lookup = udf_lookup, .create = udf_create, diff --git a/fs/udf/partition.c b/fs/udf/partition.c index 63610f026ae..96dfd207c3d 100644 --- a/fs/udf/partition.c +++ b/fs/udf/partition.c @@ -27,8 +27,8 @@ #include <linux/slab.h> #include <linux/buffer_head.h> -inline uint32_t udf_get_pblock(struct super_block *sb, uint32_t block, - uint16_t partition, uint32_t offset) +uint32_t udf_get_pblock(struct super_block *sb, uint32_t block, + uint16_t partition, uint32_t offset) { struct udf_sb_info *sbi = UDF_SB(sb); struct udf_part_map *map; diff --git a/fs/udf/super.c b/fs/udf/super.c index b564fc140fe..7a5f69be6ac 100644 --- a/fs/udf/super.c +++ b/fs/udf/super.c @@ -240,7 +240,7 @@ static int udf_sb_alloc_partition_maps(struct super_block *sb, u32 count) sbi->s_partmaps = kcalloc(count, sizeof(struct udf_part_map), GFP_KERNEL); if (!sbi->s_partmaps) { - udf_error(sb, __FUNCTION__, + udf_error(sb, __func__, "Unable to allocate space for %d partition maps", count); sbi->s_partitions = 0; @@ -1086,7 +1086,7 @@ static struct udf_bitmap *udf_sb_alloc_bitmap(struct super_block *sb, u32 index) bitmap = vmalloc(size); /* TODO: get rid of vmalloc */ if (bitmap == NULL) { - udf_error(sb, __FUNCTION__, + udf_error(sb, __func__, "Unable to allocate space for bitmap " "and %d buffer_head pointers", nr_groups); return NULL; @@ -1933,6 +1933,7 @@ static int udf_fill_super(struct super_block *sb, void *options, int silent) /* Fill in the rest of the superblock */ sb->s_op = &udf_sb_ops; + sb->s_export_op = &udf_export_ops; sb->dq_op = NULL; sb->s_dirt = 0; sb->s_magic = UDF_SUPER_MAGIC; diff --git a/fs/udf/udfdecl.h b/fs/udf/udfdecl.h index f3f45d02927..8fa9c2d7091 100644 --- a/fs/udf/udfdecl.h +++ b/fs/udf/udfdecl.h @@ -73,6 +73,7 @@ struct task_struct; struct buffer_head; struct super_block; +extern const struct export_operations udf_export_ops; extern const struct inode_operations udf_dir_inode_operations; extern const struct file_operations udf_dir_operations; extern const struct inode_operations udf_file_inode_operations; diff --git a/fs/ufs/ufs.h b/fs/ufs/ufs.h index 244a1aaa940..11c035168ea 100644 --- a/fs/ufs/ufs.h +++ b/fs/ufs/ufs.h @@ -107,7 +107,6 @@ extern struct inode * ufs_new_inode (struct inode *, int); /* inode.c */ extern struct inode *ufs_iget(struct super_block *, unsigned long); -extern void ufs_put_inode (struct inode *); extern int ufs_write_inode (struct inode *, int); extern int ufs_sync_inode (struct inode *); extern void ufs_delete_inode (struct inode *); diff --git a/fs/utimes.c b/fs/utimes.c index a2bef77dc9c..af059d5cb48 100644 --- a/fs/utimes.c +++ b/fs/utimes.c @@ -40,9 +40,14 @@ asmlinkage long sys_utime(char __user *filename, struct utimbuf __user *times) #endif +static bool nsec_special(long nsec) +{ + return nsec == UTIME_OMIT || nsec == UTIME_NOW; +} + static bool nsec_valid(long nsec) { - if (nsec == UTIME_OMIT || nsec == UTIME_NOW) + if (nsec_special(nsec)) return true; return nsec >= 0 && nsec <= 999999999; @@ -119,7 +124,15 @@ long do_utimes(int dfd, char __user *filename, struct timespec *times, int flags newattrs.ia_mtime.tv_nsec = times[1].tv_nsec; newattrs.ia_valid |= ATTR_MTIME_SET; } - } else { + } + + /* + * If times is NULL or both times are either UTIME_OMIT or + * UTIME_NOW, then need to check permissions, because + * inode_change_ok() won't do it. + */ + if (!times || (nsec_special(times[0].tv_nsec) && + nsec_special(times[1].tv_nsec))) { error = -EACCES; if (IS_IMMUTABLE(inode)) goto mnt_drop_write_and_out; diff --git a/fs/vfat/namei.c b/fs/vfat/namei.c index 5b66162d074..a3522727ea5 100644 --- a/fs/vfat/namei.c +++ b/fs/vfat/namei.c @@ -986,7 +986,7 @@ error_inode: if (corrupt < 0) { fat_fs_panic(new_dir->i_sb, "%s: Filesystem corrupted (i_pos %lld)", - __FUNCTION__, sinfo.i_pos); + __func__, sinfo.i_pos); } goto out; } diff --git a/fs/xattr.c b/fs/xattr.c index 89a942f07e1..4706a8b1f49 100644 --- a/fs/xattr.c +++ b/fs/xattr.c @@ -67,7 +67,7 @@ xattr_permission(struct inode *inode, const char *name, int mask) } int -vfs_setxattr(struct dentry *dentry, char *name, void *value, +vfs_setxattr(struct dentry *dentry, const char *name, const void *value, size_t size, int flags) { struct inode *inode = dentry->d_inode; @@ -131,7 +131,7 @@ out_noalloc: EXPORT_SYMBOL_GPL(xattr_getsecurity); ssize_t -vfs_getxattr(struct dentry *dentry, char *name, void *value, size_t size) +vfs_getxattr(struct dentry *dentry, const char *name, void *value, size_t size) { struct inode *inode = dentry->d_inode; int error; @@ -187,7 +187,7 @@ vfs_listxattr(struct dentry *d, char *list, size_t size) EXPORT_SYMBOL_GPL(vfs_listxattr); int -vfs_removexattr(struct dentry *dentry, char *name) +vfs_removexattr(struct dentry *dentry, const char *name) { struct inode *inode = dentry->d_inode; int error; @@ -218,7 +218,7 @@ EXPORT_SYMBOL_GPL(vfs_removexattr); * Extended attribute SET operations */ static long -setxattr(struct dentry *d, char __user *name, void __user *value, +setxattr(struct dentry *d, const char __user *name, const void __user *value, size_t size, int flags) { int error; @@ -252,8 +252,8 @@ setxattr(struct dentry *d, char __user *name, void __user *value, } asmlinkage long -sys_setxattr(char __user *path, char __user *name, void __user *value, - size_t size, int flags) +sys_setxattr(const char __user *path, const char __user *name, + const void __user *value, size_t size, int flags) { struct nameidata nd; int error; @@ -271,8 +271,8 @@ sys_setxattr(char __user *path, char __user *name, void __user *value, } asmlinkage long -sys_lsetxattr(char __user *path, char __user *name, void __user *value, - size_t size, int flags) +sys_lsetxattr(const char __user *path, const char __user *name, + const void __user *value, size_t size, int flags) { struct nameidata nd; int error; @@ -290,7 +290,7 @@ sys_lsetxattr(char __user *path, char __user *name, void __user *value, } asmlinkage long -sys_fsetxattr(int fd, char __user *name, void __user *value, +sys_fsetxattr(int fd, const char __user *name, const void __user *value, size_t size, int flags) { struct file *f; @@ -315,7 +315,8 @@ sys_fsetxattr(int fd, char __user *name, void __user *value, * Extended attribute GET operations */ static ssize_t -getxattr(struct dentry *d, char __user *name, void __user *value, size_t size) +getxattr(struct dentry *d, const char __user *name, void __user *value, + size_t size) { ssize_t error; void *kvalue = NULL; @@ -349,8 +350,8 @@ getxattr(struct dentry *d, char __user *name, void __user *value, size_t size) } asmlinkage ssize_t -sys_getxattr(char __user *path, char __user *name, void __user *value, - size_t size) +sys_getxattr(const char __user *path, const char __user *name, + void __user *value, size_t size) { struct nameidata nd; ssize_t error; @@ -364,7 +365,7 @@ sys_getxattr(char __user *path, char __user *name, void __user *value, } asmlinkage ssize_t -sys_lgetxattr(char __user *path, char __user *name, void __user *value, +sys_lgetxattr(const char __user *path, const char __user *name, void __user *value, size_t size) { struct nameidata nd; @@ -379,7 +380,7 @@ sys_lgetxattr(char __user *path, char __user *name, void __user *value, } asmlinkage ssize_t -sys_fgetxattr(int fd, char __user *name, void __user *value, size_t size) +sys_fgetxattr(int fd, const char __user *name, void __user *value, size_t size) { struct file *f; ssize_t error = -EBADF; @@ -424,7 +425,7 @@ listxattr(struct dentry *d, char __user *list, size_t size) } asmlinkage ssize_t -sys_listxattr(char __user *path, char __user *list, size_t size) +sys_listxattr(const char __user *path, char __user *list, size_t size) { struct nameidata nd; ssize_t error; @@ -438,7 +439,7 @@ sys_listxattr(char __user *path, char __user *list, size_t size) } asmlinkage ssize_t -sys_llistxattr(char __user *path, char __user *list, size_t size) +sys_llistxattr(const char __user *path, char __user *list, size_t size) { struct nameidata nd; ssize_t error; @@ -470,7 +471,7 @@ sys_flistxattr(int fd, char __user *list, size_t size) * Extended attribute REMOVE operations */ static long -removexattr(struct dentry *d, char __user *name) +removexattr(struct dentry *d, const char __user *name) { int error; char kname[XATTR_NAME_MAX + 1]; @@ -485,7 +486,7 @@ removexattr(struct dentry *d, char __user *name) } asmlinkage long -sys_removexattr(char __user *path, char __user *name) +sys_removexattr(const char __user *path, const char __user *name) { struct nameidata nd; int error; @@ -503,7 +504,7 @@ sys_removexattr(char __user *path, char __user *name) } asmlinkage long -sys_lremovexattr(char __user *path, char __user *name) +sys_lremovexattr(const char __user *path, const char __user *name) { struct nameidata nd; int error; @@ -521,7 +522,7 @@ sys_lremovexattr(char __user *path, char __user *name) } asmlinkage long -sys_fremovexattr(int fd, char __user *name) +sys_fremovexattr(int fd, const char __user *name) { struct file *f; struct dentry *dentry; |