From 454e2398be9b9fa30433fccc548db34d19aa9958 Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 23 Jun 2006 02:02:57 -0700 Subject: [PATCH] VFS: Permit filesystem to override root dentry on mount Extend the get_sb() filesystem operation to take an extra argument that permits the VFS to pass in the target vfsmount that defines the mountpoint. The filesystem is then required to manually set the superblock and root dentry pointers. For most filesystems, this should be done with simple_set_mnt() which will set the superblock pointer and then set the root dentry to the superblock's s_root (as per the old default behaviour). The get_sb() op now returns an integer as there's now no need to return the superblock pointer. This patch permits a superblock to be implicitly shared amongst several mount points, such as can be done with NFS to avoid potential inode aliasing. In such a case, simple_set_mnt() would not be called, and instead the mnt_root and mnt_sb would be set directly. The patch also makes the following changes: (*) the get_sb_*() convenience functions in the core kernel now take a vfsmount pointer argument and return an integer, so most filesystems have to change very little. (*) If one of the convenience function is not used, then get_sb() should normally call simple_set_mnt() to instantiate the vfsmount. This will always return 0, and so can be tail-called from get_sb(). (*) generic_shutdown_super() now calls shrink_dcache_sb() to clean up the dcache upon superblock destruction rather than shrink_dcache_anon(). This is required because the superblock may now have multiple trees that aren't actually bound to s_root, but that still need to be cleaned up. The currently called functions assume that the whole tree is rooted at s_root, and that anonymous dentries are not the roots of trees which results in dentries being left unculled. However, with the way NFS superblock sharing are currently set to be implemented, these assumptions are violated: the root of the filesystem is simply a dummy dentry and inode (the real inode for '/' may well be inaccessible), and all the vfsmounts are rooted on anonymous[*] dentries with child trees. [*] Anonymous until discovered from another tree. (*) The documentation has been adjusted, including the additional bit of changing ext2_* into foo_* in the documentation. [akpm@osdl.org: convert ipath_fs, do other stuff] Signed-off-by: David Howells Acked-by: Al Viro Cc: Nathan Scott Cc: Roland Dreier Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/nfs/inode.c | 96 +++++++++++++++++++++++++++++++++++----------------------- 1 file changed, 58 insertions(+), 38 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index d0b991a9232..ff645a961bc 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -1690,8 +1690,8 @@ static int nfs_compare_super(struct super_block *sb, void *data) return !nfs_compare_fh(&old->fh, &server->fh); } -static struct super_block *nfs_get_sb(struct file_system_type *fs_type, - int flags, const char *dev_name, void *raw_data) +static int nfs_get_sb(struct file_system_type *fs_type, + int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt) { int error; struct nfs_server *server = NULL; @@ -1699,14 +1699,14 @@ static struct super_block *nfs_get_sb(struct file_system_type *fs_type, struct nfs_fh *root; struct nfs_mount_data *data = raw_data; - s = ERR_PTR(-EINVAL); + error = -EINVAL; if (data == NULL) { dprintk("%s: missing data argument\n", __FUNCTION__); - goto out_err; + goto out_err_noserver; } if (data->version <= 0 || data->version > NFS_MOUNT_VERSION) { dprintk("%s: bad mount version\n", __FUNCTION__); - goto out_err; + goto out_err_noserver; } switch (data->version) { case 1: @@ -1718,7 +1718,7 @@ static struct super_block *nfs_get_sb(struct file_system_type *fs_type, dprintk("%s: mount structure version %d does not support NFSv3\n", __FUNCTION__, data->version); - goto out_err; + goto out_err_noserver; } data->root.size = NFS2_FHSIZE; memcpy(data->root.data, data->old_root.data, NFS2_FHSIZE); @@ -1727,24 +1727,24 @@ static struct super_block *nfs_get_sb(struct file_system_type *fs_type, dprintk("%s: mount structure version %d does not support strong security\n", __FUNCTION__, data->version); - goto out_err; + goto out_err_noserver; } case 5: memset(data->context, 0, sizeof(data->context)); } #ifndef CONFIG_NFS_V3 /* If NFSv3 is not compiled in, return -EPROTONOSUPPORT */ - s = ERR_PTR(-EPROTONOSUPPORT); + error = -EPROTONOSUPPORT; if (data->flags & NFS_MOUNT_VER3) { dprintk("%s: NFSv3 not compiled into kernel\n", __FUNCTION__); - goto out_err; + goto out_err_noserver; } #endif /* CONFIG_NFS_V3 */ - s = ERR_PTR(-ENOMEM); + error = -ENOMEM; server = kzalloc(sizeof(struct nfs_server), GFP_KERNEL); if (!server) - goto out_err; + goto out_err_noserver; /* Zero out the NFS state stuff */ init_nfsv4_state(server); server->client = server->client_sys = server->client_acl = ERR_PTR(-EINVAL); @@ -1754,7 +1754,7 @@ static struct super_block *nfs_get_sb(struct file_system_type *fs_type, root->size = data->root.size; else root->size = NFS2_FHSIZE; - s = ERR_PTR(-EINVAL); + error = -EINVAL; if (root->size > sizeof(root->data)) { dprintk("%s: invalid root filehandle\n", __FUNCTION__); goto out_err; @@ -1770,15 +1770,20 @@ static struct super_block *nfs_get_sb(struct file_system_type *fs_type, } /* Fire up rpciod if not yet running */ - s = ERR_PTR(rpciod_up()); - if (IS_ERR(s)) { - dprintk("%s: couldn't start rpciod! Error = %ld\n", - __FUNCTION__, PTR_ERR(s)); + error = rpciod_up(); + if (error < 0) { + dprintk("%s: couldn't start rpciod! Error = %d\n", + __FUNCTION__, error); goto out_err; } s = sget(fs_type, nfs_compare_super, nfs_set_super, server); - if (IS_ERR(s) || s->s_root) + if (IS_ERR(s)) { + error = PTR_ERR(s); + goto out_err_rpciod; + } + + if (s->s_root) goto out_rpciod_down; s->s_flags = flags; @@ -1787,15 +1792,22 @@ static struct super_block *nfs_get_sb(struct file_system_type *fs_type, if (error) { up_write(&s->s_umount); deactivate_super(s); - return ERR_PTR(error); + return error; } s->s_flags |= MS_ACTIVE; - return s; + return simple_set_mnt(mnt, s); + out_rpciod_down: rpciod_down(); + kfree(server); + return simple_set_mnt(mnt, s); + +out_err_rpciod: + rpciod_down(); out_err: kfree(server); - return s; +out_err_noserver: + return error; } static void nfs_kill_super(struct super_block *s) @@ -2032,8 +2044,8 @@ nfs_copy_user_string(char *dst, struct nfs_string *src, int maxlen) return dst; } -static struct super_block *nfs4_get_sb(struct file_system_type *fs_type, - int flags, const char *dev_name, void *raw_data) +static int nfs4_get_sb(struct file_system_type *fs_type, + int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt) { int error; struct nfs_server *server; @@ -2043,16 +2055,16 @@ static struct super_block *nfs4_get_sb(struct file_system_type *fs_type, if (data == NULL) { dprintk("%s: missing data argument\n", __FUNCTION__); - return ERR_PTR(-EINVAL); + return -EINVAL; } if (data->version <= 0 || data->version > NFS4_MOUNT_VERSION) { dprintk("%s: bad mount version\n", __FUNCTION__); - return ERR_PTR(-EINVAL); + return -EINVAL; } server = kzalloc(sizeof(struct nfs_server), GFP_KERNEL); if (!server) - return ERR_PTR(-ENOMEM); + return -ENOMEM; /* Zero out the NFS state stuff */ init_nfsv4_state(server); server->client = server->client_sys = server->client_acl = ERR_PTR(-EINVAL); @@ -2074,33 +2086,41 @@ static struct super_block *nfs4_get_sb(struct file_system_type *fs_type, /* We now require that the mount process passes the remote address */ if (data->host_addrlen != sizeof(server->addr)) { - s = ERR_PTR(-EINVAL); + error = -EINVAL; goto out_free; } if (copy_from_user(&server->addr, data->host_addr, sizeof(server->addr))) { - s = ERR_PTR(-EFAULT); + error = -EFAULT; goto out_free; } if (server->addr.sin_family != AF_INET || server->addr.sin_addr.s_addr == INADDR_ANY) { dprintk("%s: mount program didn't pass remote IP address!\n", __FUNCTION__); - s = ERR_PTR(-EINVAL); + error = -EINVAL; goto out_free; } /* Fire up rpciod if not yet running */ - s = ERR_PTR(rpciod_up()); - if (IS_ERR(s)) { - dprintk("%s: couldn't start rpciod! Error = %ld\n", - __FUNCTION__, PTR_ERR(s)); + error = rpciod_up(); + if (error < 0) { + dprintk("%s: couldn't start rpciod! Error = %d\n", + __FUNCTION__, error); goto out_free; } s = sget(fs_type, nfs4_compare_super, nfs_set_super, server); - - if (IS_ERR(s) || s->s_root) + if (IS_ERR(s)) { + error = PTR_ERR(s); goto out_free; + } + + if (s->s_root) { + kfree(server->mnt_path); + kfree(server->hostname); + kfree(server); + return simple_set_mnt(mnt, s); + } s->s_flags = flags; @@ -2108,17 +2128,17 @@ static struct super_block *nfs4_get_sb(struct file_system_type *fs_type, if (error) { up_write(&s->s_umount); deactivate_super(s); - return ERR_PTR(error); + return error; } s->s_flags |= MS_ACTIVE; - return s; + return simple_set_mnt(mnt, s); out_err: - s = (struct super_block *)p; + error = PTR_ERR(p); out_free: kfree(server->mnt_path); kfree(server->hostname); kfree(server); - return s; + return error; } static void nfs4_kill_super(struct super_block *sb) -- cgit v1.2.3 From 726c334223180e3c0197cc980a432681370d4baf Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 23 Jun 2006 02:02:58 -0700 Subject: [PATCH] VFS: Permit filesystem to perform statfs with a known root dentry Give the statfs superblock operation a dentry pointer rather than a superblock pointer. This complements the get_sb() patch. That reduced the significance of sb->s_root, allowing NFS to place a fake root there. However, NFS does require a dentry to use as a target for the statfs operation. This permits the root in the vfsmount to be used instead. linux/mount.h has been added where necessary to make allyesconfig build successfully. Interest has also been expressed for use with the FUSE and XFS filesystems. Signed-off-by: David Howells Acked-by: Al Viro Cc: Nathan Scott Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/nfs/inode.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index ff645a961bc..937fbfc381b 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -65,7 +65,7 @@ static int nfs_write_inode(struct inode *,int); static void nfs_delete_inode(struct inode *); static void nfs_clear_inode(struct inode *); static void nfs_umount_begin(struct super_block *); -static int nfs_statfs(struct super_block *, struct kstatfs *); +static int nfs_statfs(struct dentry *, struct kstatfs *); static int nfs_show_options(struct seq_file *, struct vfsmount *); static int nfs_show_stats(struct seq_file *, struct vfsmount *); static void nfs_zap_acl_cache(struct inode *); @@ -534,8 +534,9 @@ nfs_fill_super(struct super_block *sb, struct nfs_mount_data *data, int silent) } static int -nfs_statfs(struct super_block *sb, struct kstatfs *buf) +nfs_statfs(struct dentry *dentry, struct kstatfs *buf) { + struct super_block *sb = dentry->d_sb; struct nfs_server *server = NFS_SB(sb); unsigned char blockbits; unsigned long blockres; -- cgit v1.2.3 From 75e1fcc0b18df0a65ab113198e9dc0e98999a08c Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Fri, 23 Jun 2006 02:05:12 -0700 Subject: [PATCH] vfs: add lock owner argument to flush operation Pass the POSIX lock owner ID to the flush operation. This is useful for filesystems which don't want to store any locking state in inode->i_flock but want to handle locking/unlocking POSIX locks internally. FUSE is one such filesystem but I think it possible that some network filesystems would need this also. Also add a flag to indicate that a POSIX locking request was generated by close(), so filesystems using the above feature won't send an extra locking request in this case. Signed-off-by: Miklos Szeredi Cc: Trond Myklebust Cc: Al Viro Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/nfs/file.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/file.c b/fs/nfs/file.c index fade02c15e6..fa05c027ea1 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -43,7 +43,7 @@ static int nfs_file_mmap(struct file *, struct vm_area_struct *); static ssize_t nfs_file_sendfile(struct file *, loff_t *, size_t, read_actor_t, void *); static ssize_t nfs_file_read(struct kiocb *, char __user *, size_t, loff_t); static ssize_t nfs_file_write(struct kiocb *, const char __user *, size_t, loff_t); -static int nfs_file_flush(struct file *); +static int nfs_file_flush(struct file *, fl_owner_t id); static int nfs_fsync(struct file *, struct dentry *dentry, int datasync); static int nfs_check_flags(int flags); static int nfs_lock(struct file *filp, int cmd, struct file_lock *fl); @@ -188,7 +188,7 @@ static loff_t nfs_file_llseek(struct file *filp, loff_t offset, int origin) * */ static int -nfs_file_flush(struct file *file) +nfs_file_flush(struct file *file, fl_owner_t id) { struct nfs_open_context *ctx = (struct nfs_open_context *)file->private_data; struct inode *inode = file->f_dentry->d_inode; -- cgit v1.2.3