From d296d30a9948e895bff005d92c38309e8bd30975 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 19 Jan 2009 02:02:57 +0100 Subject: xfs: fix dentry aliasing issues in open_by_handle Open by handle just grabs an inode by handle and then creates itself a dentry for it. While this works for regular files it is horribly broken for directories, where the VFS locking relies on the fact that there is only just one single dentry for a given inode, and that these are always connected to the root of the filesystem so that it's locking algorithms work (see Documentations/filesystems/Locking) Remove all the existing open by handle code and replace it with a small wrapper around the exportfs code which deals with all these issues. At the same time we also make the checks for a valid handle strict enough to reject all not perfectly well formed handles - given that we never hand out others that's okay and simplifies the code. Signed-off-by: Christoph Hellwig Reviewed-by: Dave Chinner --- fs/xfs/Kconfig | 1 + fs/xfs/linux-2.6/xfs_ioctl.c | 305 ++++++++++++++++++----------------------- fs/xfs/linux-2.6/xfs_ioctl.h | 15 +- fs/xfs/linux-2.6/xfs_ioctl32.c | 175 +++++++---------------- 4 files changed, 196 insertions(+), 300 deletions(-) diff --git a/fs/xfs/Kconfig b/fs/xfs/Kconfig index 3f53dd101f9..29228f5899c 100644 --- a/fs/xfs/Kconfig +++ b/fs/xfs/Kconfig @@ -1,6 +1,7 @@ config XFS_FS tristate "XFS filesystem support" depends on BLOCK + select EXPORTFS help XFS is a high performance journaling filesystem which originated on the SGI IRIX platform. It is completely multi-threaded, can diff --git a/fs/xfs/linux-2.6/xfs_ioctl.c b/fs/xfs/linux-2.6/xfs_ioctl.c index e5be1e0be80..4bd112313f3 100644 --- a/fs/xfs/linux-2.6/xfs_ioctl.c +++ b/fs/xfs/linux-2.6/xfs_ioctl.c @@ -50,12 +50,14 @@ #include "xfs_vnodeops.h" #include "xfs_quota.h" #include "xfs_inode_item.h" +#include "xfs_export.h" #include #include #include #include #include +#include /* * xfs_find_handle maps from userspace xfs_fsop_handlereq structure to @@ -164,97 +166,69 @@ xfs_find_handle( return 0; } - /* - * Convert userspace handle data into inode. - * - * We use the fact that all the fsop_handlereq ioctl calls have a data - * structure argument whose first component is always a xfs_fsop_handlereq_t, - * so we can pass that sub structure into this handy, shared routine. - * - * If no error, caller must always iput the returned inode. + * No need to do permission checks on the various pathname components + * as the handle operations are privileged. */ STATIC int -xfs_vget_fsop_handlereq( - xfs_mount_t *mp, - struct inode *parinode, /* parent inode pointer */ - xfs_fsop_handlereq_t *hreq, - struct inode **inode) +xfs_handle_acceptable( + void *context, + struct dentry *dentry) +{ + return 1; +} + +/* + * Convert userspace handle data into a dentry. + */ +struct dentry * +xfs_handle_to_dentry( + struct file *parfilp, + void __user *uhandle, + u32 hlen) { - void __user *hanp; - size_t hlen; - xfs_fid_t *xfid; - xfs_handle_t *handlep; xfs_handle_t handle; - xfs_inode_t *ip; - xfs_ino_t ino; - __u32 igen; - int error; + struct xfs_fid64 fid; /* * Only allow handle opens under a directory. */ - if (!S_ISDIR(parinode->i_mode)) - return XFS_ERROR(ENOTDIR); - - hanp = hreq->ihandle; - hlen = hreq->ihandlen; - handlep = &handle; - - if (hlen < sizeof(handlep->ha_fsid) || hlen > sizeof(*handlep)) - return XFS_ERROR(EINVAL); - if (copy_from_user(handlep, hanp, hlen)) - return XFS_ERROR(EFAULT); - if (hlen < sizeof(*handlep)) - memset(((char *)handlep) + hlen, 0, sizeof(*handlep) - hlen); - if (hlen > sizeof(handlep->ha_fsid)) { - if (handlep->ha_fid.fid_len != - (hlen - sizeof(handlep->ha_fsid) - - sizeof(handlep->ha_fid.fid_len)) || - handlep->ha_fid.fid_pad) - return XFS_ERROR(EINVAL); - } - - /* - * Crack the handle, obtain the inode # & generation # - */ - xfid = (struct xfs_fid *)&handlep->ha_fid; - if (xfid->fid_len == sizeof(*xfid) - sizeof(xfid->fid_len)) { - ino = xfid->fid_ino; - igen = xfid->fid_gen; - } else { - return XFS_ERROR(EINVAL); - } - - /* - * Get the XFS inode, building a Linux inode to go with it. - */ - error = xfs_iget(mp, NULL, ino, 0, XFS_ILOCK_SHARED, &ip, 0); - if (error) - return error; - if (ip == NULL) - return XFS_ERROR(EIO); - if (ip->i_d.di_gen != igen) { - xfs_iput_new(ip, XFS_ILOCK_SHARED); - return XFS_ERROR(ENOENT); - } - - xfs_iunlock(ip, XFS_ILOCK_SHARED); + if (!S_ISDIR(parfilp->f_path.dentry->d_inode->i_mode)) + return ERR_PTR(-ENOTDIR); + + if (hlen != sizeof(xfs_handle_t)) + return ERR_PTR(-EINVAL); + if (copy_from_user(&handle, uhandle, hlen)) + return ERR_PTR(-EFAULT); + if (handle.ha_fid.fid_len != + sizeof(handle.ha_fid) - sizeof(handle.ha_fid.fid_len)) + return ERR_PTR(-EINVAL); + + memset(&fid, 0, sizeof(struct fid)); + fid.ino = handle.ha_fid.fid_ino; + fid.gen = handle.ha_fid.fid_gen; + + return exportfs_decode_fh(parfilp->f_path.mnt, (struct fid *)&fid, 3, + FILEID_INO32_GEN | XFS_FILEID_TYPE_64FLAG, + xfs_handle_acceptable, NULL); +} - *inode = VFS_I(ip); - return 0; +STATIC struct dentry * +xfs_handlereq_to_dentry( + struct file *parfilp, + xfs_fsop_handlereq_t *hreq) +{ + return xfs_handle_to_dentry(parfilp, hreq->ihandle, hreq->ihandlen); } int xfs_open_by_handle( - xfs_mount_t *mp, - xfs_fsop_handlereq_t *hreq, struct file *parfilp, - struct inode *parinode) + xfs_fsop_handlereq_t *hreq) { const struct cred *cred = current_cred(); int error; - int new_fd; + int fd; int permflag; struct file *filp; struct inode *inode; @@ -263,19 +237,21 @@ xfs_open_by_handle( if (!capable(CAP_SYS_ADMIN)) return -XFS_ERROR(EPERM); - error = xfs_vget_fsop_handlereq(mp, parinode, hreq, &inode); - if (error) - return -error; + dentry = xfs_handlereq_to_dentry(parfilp, hreq); + if (IS_ERR(dentry)) + return PTR_ERR(dentry); + inode = dentry->d_inode; /* Restrict xfs_open_by_handle to directories & regular files. */ if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode))) { - iput(inode); - return -XFS_ERROR(EINVAL); + error = -XFS_ERROR(EPERM); + goto out_dput; } #if BITS_PER_LONG != 32 hreq->oflags |= O_LARGEFILE; #endif + /* Put open permission in namei format. */ permflag = hreq->oflags; if ((permflag+1) & O_ACCMODE) @@ -285,50 +261,45 @@ xfs_open_by_handle( if ((!(permflag & O_APPEND) || (permflag & O_TRUNC)) && (permflag & FMODE_WRITE) && IS_APPEND(inode)) { - iput(inode); - return -XFS_ERROR(EPERM); + error = -XFS_ERROR(EPERM); + goto out_dput; } if ((permflag & FMODE_WRITE) && IS_IMMUTABLE(inode)) { - iput(inode); - return -XFS_ERROR(EACCES); + error = -XFS_ERROR(EACCES); + goto out_dput; } /* Can't write directories. */ - if ( S_ISDIR(inode->i_mode) && (permflag & FMODE_WRITE)) { - iput(inode); - return -XFS_ERROR(EISDIR); + if (S_ISDIR(inode->i_mode) && (permflag & FMODE_WRITE)) { + error = -XFS_ERROR(EISDIR); + goto out_dput; } - if ((new_fd = get_unused_fd()) < 0) { - iput(inode); - return new_fd; + fd = get_unused_fd(); + if (fd < 0) { + error = fd; + goto out_dput; } - dentry = d_obtain_alias(inode); - if (IS_ERR(dentry)) { - put_unused_fd(new_fd); - return PTR_ERR(dentry); - } - - /* Ensure umount returns EBUSY on umounts while this file is open. */ - mntget(parfilp->f_path.mnt); - - /* Create file pointer. */ - filp = dentry_open(dentry, parfilp->f_path.mnt, hreq->oflags, cred); + filp = dentry_open(dentry, mntget(parfilp->f_path.mnt), + hreq->oflags, cred); if (IS_ERR(filp)) { - put_unused_fd(new_fd); - return -XFS_ERROR(-PTR_ERR(filp)); + put_unused_fd(fd); + return PTR_ERR(filp); } if (inode->i_mode & S_IFREG) { - /* invisible operation should not change atime */ filp->f_flags |= O_NOATIME; filp->f_mode |= FMODE_NOCMTIME; } - fd_install(new_fd, filp); - return new_fd; + fd_install(fd, filp); + return fd; + + out_dput: + dput(dentry); + return error; } /* @@ -359,11 +330,10 @@ do_readlink( int xfs_readlink_by_handle( - xfs_mount_t *mp, - xfs_fsop_handlereq_t *hreq, - struct inode *parinode) + struct file *parfilp, + xfs_fsop_handlereq_t *hreq) { - struct inode *inode; + struct dentry *dentry; __u32 olen; void *link; int error; @@ -371,26 +341,28 @@ xfs_readlink_by_handle( if (!capable(CAP_SYS_ADMIN)) return -XFS_ERROR(EPERM); - error = xfs_vget_fsop_handlereq(mp, parinode, hreq, &inode); - if (error) - return -error; + dentry = xfs_handlereq_to_dentry(parfilp, hreq); + if (IS_ERR(dentry)) + return PTR_ERR(dentry); /* Restrict this handle operation to symlinks only. */ - if (!S_ISLNK(inode->i_mode)) { + if (!S_ISLNK(dentry->d_inode->i_mode)) { error = -XFS_ERROR(EINVAL); - goto out_iput; + goto out_dput; } if (copy_from_user(&olen, hreq->ohandlen, sizeof(__u32))) { error = -XFS_ERROR(EFAULT); - goto out_iput; + goto out_dput; } link = kmalloc(MAXPATHLEN+1, GFP_KERNEL); - if (!link) - goto out_iput; + if (!link) { + error = -XFS_ERROR(ENOMEM); + goto out_dput; + } - error = -xfs_readlink(XFS_I(inode), link); + error = -xfs_readlink(XFS_I(dentry->d_inode), link); if (error) goto out_kfree; error = do_readlink(hreq->ohandle, olen, link); @@ -399,32 +371,31 @@ xfs_readlink_by_handle( out_kfree: kfree(link); - out_iput: - iput(inode); + out_dput: + dput(dentry); return error; } STATIC int xfs_fssetdm_by_handle( - xfs_mount_t *mp, - void __user *arg, - struct inode *parinode) + struct file *parfilp, + void __user *arg) { int error; struct fsdmidata fsd; xfs_fsop_setdm_handlereq_t dmhreq; - struct inode *inode; + struct dentry *dentry; if (!capable(CAP_MKNOD)) return -XFS_ERROR(EPERM); if (copy_from_user(&dmhreq, arg, sizeof(xfs_fsop_setdm_handlereq_t))) return -XFS_ERROR(EFAULT); - error = xfs_vget_fsop_handlereq(mp, parinode, &dmhreq.hreq, &inode); - if (error) - return -error; + dentry = xfs_handlereq_to_dentry(parfilp, &dmhreq.hreq); + if (IS_ERR(dentry)) + return PTR_ERR(dentry); - if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) { + if (IS_IMMUTABLE(dentry->d_inode) || IS_APPEND(dentry->d_inode)) { error = -XFS_ERROR(EPERM); goto out; } @@ -434,24 +405,23 @@ xfs_fssetdm_by_handle( goto out; } - error = -xfs_set_dmattrs(XFS_I(inode), fsd.fsd_dmevmask, + error = -xfs_set_dmattrs(XFS_I(dentry->d_inode), fsd.fsd_dmevmask, fsd.fsd_dmstate); out: - iput(inode); + dput(dentry); return error; } STATIC int xfs_attrlist_by_handle( - xfs_mount_t *mp, - void __user *arg, - struct inode *parinode) + struct file *parfilp, + void __user *arg) { - int error; + int error = -ENOMEM; attrlist_cursor_kern_t *cursor; xfs_fsop_attrlist_handlereq_t al_hreq; - struct inode *inode; + struct dentry *dentry; char *kbuf; if (!capable(CAP_SYS_ADMIN)) @@ -467,16 +437,16 @@ xfs_attrlist_by_handle( if (al_hreq.flags & ~(ATTR_ROOT | ATTR_SECURE)) return -XFS_ERROR(EINVAL); - error = xfs_vget_fsop_handlereq(mp, parinode, &al_hreq.hreq, &inode); - if (error) - goto out; + dentry = xfs_handlereq_to_dentry(parfilp, &al_hreq.hreq); + if (IS_ERR(dentry)) + return PTR_ERR(dentry); kbuf = kmalloc(al_hreq.buflen, GFP_KERNEL); if (!kbuf) - goto out_vn_rele; + goto out_dput; cursor = (attrlist_cursor_kern_t *)&al_hreq.pos; - error = xfs_attr_list(XFS_I(inode), kbuf, al_hreq.buflen, + error = -xfs_attr_list(XFS_I(dentry->d_inode), kbuf, al_hreq.buflen, al_hreq.flags, cursor); if (error) goto out_kfree; @@ -486,10 +456,9 @@ xfs_attrlist_by_handle( out_kfree: kfree(kbuf); - out_vn_rele: - iput(inode); - out: - return -error; + out_dput: + dput(dentry); + return error; } int @@ -564,15 +533,13 @@ xfs_attrmulti_attr_remove( STATIC int xfs_attrmulti_by_handle( - xfs_mount_t *mp, - void __user *arg, struct file *parfilp, - struct inode *parinode) + void __user *arg) { int error; xfs_attr_multiop_t *ops; xfs_fsop_attrmulti_handlereq_t am_hreq; - struct inode *inode; + struct dentry *dentry; unsigned int i, size; char *attr_name; @@ -581,19 +548,19 @@ xfs_attrmulti_by_handle( if (copy_from_user(&am_hreq, arg, sizeof(xfs_fsop_attrmulti_handlereq_t))) return -XFS_ERROR(EFAULT); - error = xfs_vget_fsop_handlereq(mp, parinode, &am_hreq.hreq, &inode); - if (error) - goto out; + dentry = xfs_handlereq_to_dentry(parfilp, &am_hreq.hreq); + if (IS_ERR(dentry)) + return PTR_ERR(dentry); error = E2BIG; size = am_hreq.opcount * sizeof(xfs_attr_multiop_t); if (!size || size > 16 * PAGE_SIZE) - goto out_vn_rele; + goto out_dput; error = ENOMEM; ops = kmalloc(size, GFP_KERNEL); if (!ops) - goto out_vn_rele; + goto out_dput; error = EFAULT; if (copy_from_user(ops, am_hreq.ops, size)) @@ -615,25 +582,28 @@ xfs_attrmulti_by_handle( switch (ops[i].am_opcode) { case ATTR_OP_GET: - ops[i].am_error = xfs_attrmulti_attr_get(inode, - attr_name, ops[i].am_attrvalue, - &ops[i].am_length, ops[i].am_flags); + ops[i].am_error = xfs_attrmulti_attr_get( + dentry->d_inode, attr_name, + ops[i].am_attrvalue, &ops[i].am_length, + ops[i].am_flags); break; case ATTR_OP_SET: ops[i].am_error = mnt_want_write(parfilp->f_path.mnt); if (ops[i].am_error) break; - ops[i].am_error = xfs_attrmulti_attr_set(inode, - attr_name, ops[i].am_attrvalue, - ops[i].am_length, ops[i].am_flags); + ops[i].am_error = xfs_attrmulti_attr_set( + dentry->d_inode, attr_name, + ops[i].am_attrvalue, ops[i].am_length, + ops[i].am_flags); mnt_drop_write(parfilp->f_path.mnt); break; case ATTR_OP_REMOVE: ops[i].am_error = mnt_want_write(parfilp->f_path.mnt); if (ops[i].am_error) break; - ops[i].am_error = xfs_attrmulti_attr_remove(inode, - attr_name, ops[i].am_flags); + ops[i].am_error = xfs_attrmulti_attr_remove( + dentry->d_inode, attr_name, + ops[i].am_flags); mnt_drop_write(parfilp->f_path.mnt); break; default: @@ -647,9 +617,8 @@ xfs_attrmulti_by_handle( kfree(attr_name); out_kfree_ops: kfree(ops); - out_vn_rele: - iput(inode); - out: + out_dput: + dput(dentry); return -error; } @@ -1440,23 +1409,23 @@ xfs_file_ioctl( if (copy_from_user(&hreq, arg, sizeof(xfs_fsop_handlereq_t))) return -XFS_ERROR(EFAULT); - return xfs_open_by_handle(mp, &hreq, filp, inode); + return xfs_open_by_handle(filp, &hreq); } case XFS_IOC_FSSETDM_BY_HANDLE: - return xfs_fssetdm_by_handle(mp, arg, inode); + return xfs_fssetdm_by_handle(filp, arg); case XFS_IOC_READLINK_BY_HANDLE: { xfs_fsop_handlereq_t hreq; if (copy_from_user(&hreq, arg, sizeof(xfs_fsop_handlereq_t))) return -XFS_ERROR(EFAULT); - return xfs_readlink_by_handle(mp, &hreq, inode); + return xfs_readlink_by_handle(filp, &hreq); } case XFS_IOC_ATTRLIST_BY_HANDLE: - return xfs_attrlist_by_handle(mp, arg, inode); + return xfs_attrlist_by_handle(filp, arg); case XFS_IOC_ATTRMULTI_BY_HANDLE: - return xfs_attrmulti_by_handle(mp, arg, filp, inode); + return xfs_attrmulti_by_handle(filp, arg); case XFS_IOC_SWAPEXT: { struct xfs_swapext sxp; diff --git a/fs/xfs/linux-2.6/xfs_ioctl.h b/fs/xfs/linux-2.6/xfs_ioctl.h index 8c16bf2d7e0..7bd7c6afc1e 100644 --- a/fs/xfs/linux-2.6/xfs_ioctl.h +++ b/fs/xfs/linux-2.6/xfs_ioctl.h @@ -34,16 +34,13 @@ xfs_find_handle( extern int xfs_open_by_handle( - xfs_mount_t *mp, - xfs_fsop_handlereq_t *hreq, struct file *parfilp, - struct inode *parinode); + xfs_fsop_handlereq_t *hreq); extern int xfs_readlink_by_handle( - xfs_mount_t *mp, - xfs_fsop_handlereq_t *hreq, - struct inode *parinode); + struct file *parfilp, + xfs_fsop_handlereq_t *hreq); extern int xfs_attrmulti_attr_get( @@ -67,6 +64,12 @@ xfs_attrmulti_attr_remove( char *name, __uint32_t flags); +extern struct dentry * +xfs_handle_to_dentry( + struct file *parfilp, + void __user *uhandle, + u32 hlen); + extern long xfs_file_ioctl( struct file *filp, diff --git a/fs/xfs/linux-2.6/xfs_ioctl32.c b/fs/xfs/linux-2.6/xfs_ioctl32.c index 50903ad3182..fd4362063f2 100644 --- a/fs/xfs/linux-2.6/xfs_ioctl32.c +++ b/fs/xfs/linux-2.6/xfs_ioctl32.c @@ -340,96 +340,24 @@ xfs_compat_handlereq_copyin( return 0; } -/* - * Convert userspace handle data into inode. - * - * We use the fact that all the fsop_handlereq ioctl calls have a data - * structure argument whose first component is always a xfs_fsop_handlereq_t, - * so we can pass that sub structure into this handy, shared routine. - * - * If no error, caller must always iput the returned inode. - */ -STATIC int -xfs_vget_fsop_handlereq_compat( - xfs_mount_t *mp, - struct inode *parinode, /* parent inode pointer */ - compat_xfs_fsop_handlereq_t *hreq, - struct inode **inode) +STATIC struct dentry * +xfs_compat_handlereq_to_dentry( + struct file *parfilp, + compat_xfs_fsop_handlereq_t *hreq) { - void __user *hanp; - size_t hlen; - xfs_fid_t *xfid; - xfs_handle_t *handlep; - xfs_handle_t handle; - xfs_inode_t *ip; - xfs_ino_t ino; - __u32 igen; - int error; - - /* - * Only allow handle opens under a directory. - */ - if (!S_ISDIR(parinode->i_mode)) - return XFS_ERROR(ENOTDIR); - - hanp = compat_ptr(hreq->ihandle); - hlen = hreq->ihandlen; - handlep = &handle; - - if (hlen < sizeof(handlep->ha_fsid) || hlen > sizeof(*handlep)) - return XFS_ERROR(EINVAL); - if (copy_from_user(handlep, hanp, hlen)) - return XFS_ERROR(EFAULT); - if (hlen < sizeof(*handlep)) - memset(((char *)handlep) + hlen, 0, sizeof(*handlep) - hlen); - if (hlen > sizeof(handlep->ha_fsid)) { - if (handlep->ha_fid.fid_len != - (hlen - sizeof(handlep->ha_fsid) - - sizeof(handlep->ha_fid.fid_len)) || - handlep->ha_fid.fid_pad) - return XFS_ERROR(EINVAL); - } - - /* - * Crack the handle, obtain the inode # & generation # - */ - xfid = (struct xfs_fid *)&handlep->ha_fid; - if (xfid->fid_len == sizeof(*xfid) - sizeof(xfid->fid_len)) { - ino = xfid->fid_ino; - igen = xfid->fid_gen; - } else { - return XFS_ERROR(EINVAL); - } - - /* - * Get the XFS inode, building a Linux inode to go with it. - */ - error = xfs_iget(mp, NULL, ino, 0, XFS_ILOCK_SHARED, &ip, 0); - if (error) - return error; - if (ip == NULL) - return XFS_ERROR(EIO); - if (ip->i_d.di_gen != igen) { - xfs_iput_new(ip, XFS_ILOCK_SHARED); - return XFS_ERROR(ENOENT); - } - - xfs_iunlock(ip, XFS_ILOCK_SHARED); - - *inode = VFS_I(ip); - return 0; + return xfs_handle_to_dentry(parfilp, + compat_ptr(hreq->ihandle), hreq->ihandlen); } STATIC int xfs_compat_attrlist_by_handle( - xfs_mount_t *mp, - void __user *arg, - struct inode *parinode) + struct file *parfilp, + void __user *arg) { int error; attrlist_cursor_kern_t *cursor; compat_xfs_fsop_attrlist_handlereq_t al_hreq; - struct inode *inode; + struct dentry *dentry; char *kbuf; if (!capable(CAP_SYS_ADMIN)) @@ -446,17 +374,17 @@ xfs_compat_attrlist_by_handle( if (al_hreq.flags & ~(ATTR_ROOT | ATTR_SECURE)) return -XFS_ERROR(EINVAL); - error = xfs_vget_fsop_handlereq_compat(mp, parinode, &al_hreq.hreq, - &inode); - if (error) - goto out; + dentry = xfs_compat_handlereq_to_dentry(parfilp, &al_hreq.hreq); + if (IS_ERR(dentry)) + return PTR_ERR(dentry); + error = -ENOMEM; kbuf = kmalloc(al_hreq.buflen, GFP_KERNEL); if (!kbuf) - goto out_vn_rele; + goto out_dput; cursor = (attrlist_cursor_kern_t *)&al_hreq.pos; - error = xfs_attr_list(XFS_I(inode), kbuf, al_hreq.buflen, + error = -xfs_attr_list(XFS_I(dentry->d_inode), kbuf, al_hreq.buflen, al_hreq.flags, cursor); if (error) goto out_kfree; @@ -466,22 +394,20 @@ xfs_compat_attrlist_by_handle( out_kfree: kfree(kbuf); - out_vn_rele: - iput(inode); - out: - return -error; + out_dput: + dput(dentry); + return error; } STATIC int xfs_compat_attrmulti_by_handle( - xfs_mount_t *mp, - void __user *arg, - struct inode *parinode) + struct file *parfilp, + void __user *arg) { int error; compat_xfs_attr_multiop_t *ops; compat_xfs_fsop_attrmulti_handlereq_t am_hreq; - struct inode *inode; + struct dentry *dentry; unsigned int i, size; char *attr_name; @@ -491,20 +417,19 @@ xfs_compat_attrmulti_by_handle( sizeof(compat_xfs_fsop_attrmulti_handlereq_t))) return -XFS_ERROR(EFAULT); - error = xfs_vget_fsop_handlereq_compat(mp, parinode, &am_hreq.hreq, - &inode); - if (error) - goto out; + dentry = xfs_compat_handlereq_to_dentry(parfilp, &am_hreq.hreq); + if (IS_ERR(dentry)) + return PTR_ERR(dentry); error = E2BIG; size = am_hreq.opcount * sizeof(compat_xfs_attr_multiop_t); if (!size || size > 16 * PAGE_SIZE) - goto out_vn_rele; + goto out_dput; error = ENOMEM; ops = kmalloc(size, GFP_KERNEL); if (!ops) - goto out_vn_rele; + goto out_dput; error = EFAULT; if (copy_from_user(ops, compat_ptr(am_hreq.ops), size)) @@ -527,20 +452,21 @@ xfs_compat_attrmulti_by_handle( switch (ops[i].am_opcode) { case ATTR_OP_GET: - ops[i].am_error = xfs_attrmulti_attr_get(inode, - attr_name, + ops[i].am_error = xfs_attrmulti_attr_get( + dentry->d_inode, attr_name, compat_ptr(ops[i].am_attrvalue), &ops[i].am_length, ops[i].am_flags); break; case ATTR_OP_SET: - ops[i].am_error = xfs_attrmulti_attr_set(inode, - attr_name, + ops[i].am_error = xfs_attrmulti_attr_set( + dentry->d_inode, attr_name, compat_ptr(ops[i].am_attrvalue), ops[i].am_length, ops[i].am_flags); break; case ATTR_OP_REMOVE: - ops[i].am_error = xfs_attrmulti_attr_remove(inode, - attr_name, ops[i].am_flags); + ops[i].am_error = xfs_attrmulti_attr_remove( + dentry->d_inode, attr_name, + ops[i].am_flags); break; default: ops[i].am_error = EINVAL; @@ -553,22 +479,20 @@ xfs_compat_attrmulti_by_handle( kfree(attr_name); out_kfree_ops: kfree(ops); - out_vn_rele: - iput(inode); - out: + out_dput: + dput(dentry); return -error; } STATIC int xfs_compat_fssetdm_by_handle( - xfs_mount_t *mp, - void __user *arg, - struct inode *parinode) + struct file *parfilp, + void __user *arg) { int error; struct fsdmidata fsd; compat_xfs_fsop_setdm_handlereq_t dmhreq; - struct inode *inode; + struct dentry *dentry; if (!capable(CAP_MKNOD)) return -XFS_ERROR(EPERM); @@ -576,12 +500,11 @@ xfs_compat_fssetdm_by_handle( sizeof(compat_xfs_fsop_setdm_handlereq_t))) return -XFS_ERROR(EFAULT); - error = xfs_vget_fsop_handlereq_compat(mp, parinode, &dmhreq.hreq, - &inode); - if (error) - return -error; + dentry = xfs_compat_handlereq_to_dentry(parfilp, &dmhreq.hreq); + if (IS_ERR(dentry)) + return PTR_ERR(dentry); - if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) { + if (IS_IMMUTABLE(dentry->d_inode) || IS_APPEND(dentry->d_inode)) { error = -XFS_ERROR(EPERM); goto out; } @@ -591,11 +514,11 @@ xfs_compat_fssetdm_by_handle( goto out; } - error = -xfs_set_dmattrs(XFS_I(inode), fsd.fsd_dmevmask, + error = -xfs_set_dmattrs(XFS_I(dentry->d_inode), fsd.fsd_dmevmask, fsd.fsd_dmstate); out: - iput(inode); + dput(dentry); return error; } @@ -722,21 +645,21 @@ xfs_file_compat_ioctl( if (xfs_compat_handlereq_copyin(&hreq, arg)) return -XFS_ERROR(EFAULT); - return xfs_open_by_handle(mp, &hreq, filp, inode); + return xfs_open_by_handle(filp, &hreq); } case XFS_IOC_READLINK_BY_HANDLE_32: { struct xfs_fsop_handlereq hreq; if (xfs_compat_handlereq_copyin(&hreq, arg)) return -XFS_ERROR(EFAULT); - return xfs_readlink_by_handle(mp, &hreq, inode); + return xfs_readlink_by_handle(filp, &hreq); } case XFS_IOC_ATTRLIST_BY_HANDLE_32: - return xfs_compat_attrlist_by_handle(mp, arg, inode); + return xfs_compat_attrlist_by_handle(filp, arg); case XFS_IOC_ATTRMULTI_BY_HANDLE_32: - return xfs_compat_attrmulti_by_handle(mp, arg, inode); + return xfs_compat_attrmulti_by_handle(filp, arg); case XFS_IOC_FSSETDM_BY_HANDLE_32: - return xfs_compat_fssetdm_by_handle(mp, arg, inode); + return xfs_compat_fssetdm_by_handle(filp, arg); default: return -XFS_ERROR(ENOIOCTLCMD); } -- cgit v1.2.3 From 178eae342b34185ef7b11e62df2f74ba45daa56e Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 19 Jan 2009 02:03:03 +0100 Subject: xfs: use mnt_want_write in compat_attrmulti ioctl The compat version of the attrmulti ioctl needs to ask for and then later release write access to the mount just like the native version, otherwise we could potentially write to read-only mounts. Signed-off-by: Christoph Hellwig Reviewed-by: Dave Chinner --- fs/xfs/linux-2.6/xfs_ioctl32.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/fs/xfs/linux-2.6/xfs_ioctl32.c b/fs/xfs/linux-2.6/xfs_ioctl32.c index fd4362063f2..c70c4e3db79 100644 --- a/fs/xfs/linux-2.6/xfs_ioctl32.c +++ b/fs/xfs/linux-2.6/xfs_ioctl32.c @@ -17,6 +17,7 @@ */ #include #include +#include #include #include "xfs.h" #include "xfs_fs.h" @@ -458,15 +459,23 @@ xfs_compat_attrmulti_by_handle( &ops[i].am_length, ops[i].am_flags); break; case ATTR_OP_SET: + ops[i].am_error = mnt_want_write(parfilp->f_path.mnt); + if (ops[i].am_error) + break; ops[i].am_error = xfs_attrmulti_attr_set( dentry->d_inode, attr_name, compat_ptr(ops[i].am_attrvalue), ops[i].am_length, ops[i].am_flags); + mnt_drop_write(parfilp->f_path.mnt); break; case ATTR_OP_REMOVE: + ops[i].am_error = mnt_want_write(parfilp->f_path.mnt); + if (ops[i].am_error) + break; ops[i].am_error = xfs_attrmulti_attr_remove( dentry->d_inode, attr_name, ops[i].am_flags); + mnt_drop_write(parfilp->f_path.mnt); break; default: ops[i].am_error = EINVAL; -- cgit v1.2.3 From a4edd1da20af79b2e92efeee3ca94831c8024d61 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 19 Jan 2009 02:03:11 +0100 Subject: xfs: add a separate lock class for the per-mount list of dquots We can have both a a quota hash chain and the per-mount list locked at the same time. But given that both use the same struct dqhash as list head we have to tell lockdep that they are different lock classes. Signed-off-by: Christoph Hellwig Reviewed-by: Dave Chinner --- fs/xfs/quota/xfs_qm.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/fs/xfs/quota/xfs_qm.c b/fs/xfs/quota/xfs_qm.c index 6b13960cf31..7a2beb64314 100644 --- a/fs/xfs/quota/xfs_qm.c +++ b/fs/xfs/quota/xfs_qm.c @@ -1070,6 +1070,13 @@ xfs_qm_sync( return 0; } +/* + * The hash chains and the mplist use the same xfs_dqhash structure as + * their list head, but we can take the mplist qh_lock and one of the + * hash qh_locks at the same time without any problem as they aren't + * related. + */ +static struct lock_class_key xfs_quota_mplist_class; /* * This initializes all the quota information that's kept in the @@ -1105,6 +1112,8 @@ xfs_qm_init_quotainfo( } xfs_qm_list_init(&qinf->qi_dqlist, "mpdqlist", 0); + lockdep_set_class(&qinf->qi_dqlist.qh_lock, &xfs_quota_mplist_class); + qinf->qi_dqreclaims = 0; /* mutex used to serialize quotaoffs */ -- cgit v1.2.3 From 5bb87a33b2cfb8e7ef3383718274094bdff266a3 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 19 Jan 2009 02:03:19 +0100 Subject: xfs: lockdep annotations for xfs_dqlock2 xfs_dqlock2 locks two xfs_dquots, which is fine as it always locks the dquot with the lower id first. Use mutex_lock_nested to tell lockdep about this fact. Signed-off-by: Christoph Hellwig Reviewed-by: Dave Chinner --- fs/xfs/quota/xfs_dquot.c | 24 ++++++++++++++---------- fs/xfs/quota/xfs_dquot.h | 10 ++++++++++ 2 files changed, 24 insertions(+), 10 deletions(-) diff --git a/fs/xfs/quota/xfs_dquot.c b/fs/xfs/quota/xfs_dquot.c index d68b4e1cf1d..ebdf3c842dc 100644 --- a/fs/xfs/quota/xfs_dquot.c +++ b/fs/xfs/quota/xfs_dquot.c @@ -1383,6 +1383,12 @@ xfs_dqunlock_nonotify( mutex_unlock(&(dqp->q_qlock)); } +/* + * Lock two xfs_dquot structures. + * + * To avoid deadlocks we always lock the quota structure with + * the lowerd id first. + */ void xfs_dqlock2( xfs_dquot_t *d1, @@ -1392,18 +1398,16 @@ xfs_dqlock2( ASSERT(d1 != d2); if (be32_to_cpu(d1->q_core.d_id) > be32_to_cpu(d2->q_core.d_id)) { - xfs_dqlock(d2); - xfs_dqlock(d1); + mutex_lock(&d2->q_qlock); + mutex_lock_nested(&d1->q_qlock, XFS_QLOCK_NESTED); } else { - xfs_dqlock(d1); - xfs_dqlock(d2); - } - } else { - if (d1) { - xfs_dqlock(d1); - } else if (d2) { - xfs_dqlock(d2); + mutex_lock(&d1->q_qlock); + mutex_lock_nested(&d2->q_qlock, XFS_QLOCK_NESTED); } + } else if (d1) { + mutex_lock(&d1->q_qlock); + } else if (d2) { + mutex_lock(&d2->q_qlock); } } diff --git a/fs/xfs/quota/xfs_dquot.h b/fs/xfs/quota/xfs_dquot.h index 7e455337e2b..d443e93b433 100644 --- a/fs/xfs/quota/xfs_dquot.h +++ b/fs/xfs/quota/xfs_dquot.h @@ -97,6 +97,16 @@ typedef struct xfs_dquot { #define dq_hashlist q_lists.dqm_hashlist #define dq_flags q_lists.dqm_flags +/* + * Lock hierachy for q_qlock: + * XFS_QLOCK_NORMAL is the implicit default, + * XFS_QLOCK_NESTED is the dquot with the higher id in xfs_dqlock2 + */ +enum { + XFS_QLOCK_NORMAL = 0, + XFS_QLOCK_NESTED, +}; + #define XFS_DQHOLD(dqp) ((dqp)->q_nrefs++) #ifdef DEBUG -- cgit v1.2.3 From 98b8c7a0c42acf0d6963dbb9aabe4a2e312aae12 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 19 Jan 2009 02:03:25 +0100 Subject: xfs: add a lock class for group/project dquots We can have both a user and a group/project dquot locked at the same time, as long as the user dquot is locked first. Tell lockdep about that fact by making the group/project dquots a different lock class. Signed-off-by: Christoph Hellwig Reviewed-by: Dave Chinner --- fs/xfs/quota/xfs_dquot.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/fs/xfs/quota/xfs_dquot.c b/fs/xfs/quota/xfs_dquot.c index ebdf3c842dc..6543c0b2975 100644 --- a/fs/xfs/quota/xfs_dquot.c +++ b/fs/xfs/quota/xfs_dquot.c @@ -73,6 +73,8 @@ int xfs_dqreq_num; int xfs_dqerror_mod = 33; #endif +static struct lock_class_key xfs_dquot_other_class; + /* * Allocate and initialize a dquot. We don't always allocate fresh memory; * we try to reclaim a free dquot if the number of incore dquots are above @@ -139,7 +141,15 @@ xfs_qm_dqinit( ASSERT(dqp->q_trace); xfs_dqtrace_entry(dqp, "DQRECLAIMED_INIT"); #endif - } + } + + /* + * In either case we need to make sure group quotas have a different + * lock class than user quotas, to make sure lockdep knows we can + * locks of one of each at the same time. + */ + if (!(type & XFS_DQ_USER)) + lockdep_set_class(&dqp->q_qlock, &xfs_dquot_other_class); /* * log item gets initialized later -- cgit v1.2.3 From 7884bc8617e6b8afda8cb8853cf14abfd3148d5c Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 19 Jan 2009 02:04:07 +0100 Subject: xfs: fix bad_features2 fixups for the root filesystem Currently the bad_features2 fixup and the alignment updates in the superblock are skipped if we mount a filesystem read-only. But for the root filesystem the typical case is to mount read-only first and only later remount writeable so we'll never perform this update at all. It's not a big problem but means the logs of people needing the fixup get spammed at every boot because they never happen on disk. Reported-by: Arkadiusz Miskiewicz Signed-off-by: Christoph Hellwig Reviewed-by: Dave Chinner --- fs/xfs/linux-2.6/xfs_super.c | 17 ++++++++++++++++- fs/xfs/xfs_mount.c | 26 +++++++++++++------------- fs/xfs/xfs_mount.h | 3 +++ 3 files changed, 32 insertions(+), 14 deletions(-) diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c index 95a97108036..c71e226da7f 100644 --- a/fs/xfs/linux-2.6/xfs_super.c +++ b/fs/xfs/linux-2.6/xfs_super.c @@ -1197,6 +1197,7 @@ xfs_fs_remount( struct xfs_mount *mp = XFS_M(sb); substring_t args[MAX_OPT_ARGS]; char *p; + int error; while ((p = strsep(&options, ",")) != NULL) { int token; @@ -1247,11 +1248,25 @@ xfs_fs_remount( } } - /* rw/ro -> rw */ + /* ro -> rw */ if ((mp->m_flags & XFS_MOUNT_RDONLY) && !(*flags & MS_RDONLY)) { mp->m_flags &= ~XFS_MOUNT_RDONLY; if (mp->m_flags & XFS_MOUNT_BARRIER) xfs_mountfs_check_barriers(mp); + + /* + * If this is the first remount to writeable state we + * might have some superblock changes to update. + */ + if (mp->m_update_flags) { + error = xfs_mount_log_sb(mp, mp->m_update_flags); + if (error) { + cmn_err(CE_WARN, + "XFS: failed to write sb changes"); + return error; + } + mp->m_update_flags = 0; + } } /* rw -> ro */ diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c index 3c97c6463a4..35300250e86 100644 --- a/fs/xfs/xfs_mount.c +++ b/fs/xfs/xfs_mount.c @@ -45,7 +45,6 @@ #include "xfs_fsops.h" #include "xfs_utils.h" -STATIC int xfs_mount_log_sb(xfs_mount_t *, __int64_t); STATIC int xfs_uuid_mount(xfs_mount_t *); STATIC void xfs_unmountfs_wait(xfs_mount_t *); @@ -682,7 +681,7 @@ xfs_initialize_perag_data(xfs_mount_t *mp, xfs_agnumber_t agcount) * Update alignment values based on mount options and sb values */ STATIC int -xfs_update_alignment(xfs_mount_t *mp, __uint64_t *update_flags) +xfs_update_alignment(xfs_mount_t *mp) { xfs_sb_t *sbp = &(mp->m_sb); @@ -736,11 +735,11 @@ xfs_update_alignment(xfs_mount_t *mp, __uint64_t *update_flags) if (xfs_sb_version_hasdalign(sbp)) { if (sbp->sb_unit != mp->m_dalign) { sbp->sb_unit = mp->m_dalign; - *update_flags |= XFS_SB_UNIT; + mp->m_update_flags |= XFS_SB_UNIT; } if (sbp->sb_width != mp->m_swidth) { sbp->sb_width = mp->m_swidth; - *update_flags |= XFS_SB_WIDTH; + mp->m_update_flags |= XFS_SB_WIDTH; } } } else if ((mp->m_flags & XFS_MOUNT_NOALIGN) != XFS_MOUNT_NOALIGN && @@ -905,7 +904,6 @@ xfs_mountfs( xfs_sb_t *sbp = &(mp->m_sb); xfs_inode_t *rip; __uint64_t resblks; - __int64_t update_flags = 0LL; uint quotamount, quotaflags; int uuid_mounted = 0; int error = 0; @@ -933,7 +931,7 @@ xfs_mountfs( "XFS: correcting sb_features alignment problem"); sbp->sb_features2 |= sbp->sb_bad_features2; sbp->sb_bad_features2 = sbp->sb_features2; - update_flags |= XFS_SB_FEATURES2 | XFS_SB_BAD_FEATURES2; + mp->m_update_flags |= XFS_SB_FEATURES2 | XFS_SB_BAD_FEATURES2; /* * Re-check for ATTR2 in case it was found in bad_features2 @@ -947,11 +945,11 @@ xfs_mountfs( if (xfs_sb_version_hasattr2(&mp->m_sb) && (mp->m_flags & XFS_MOUNT_NOATTR2)) { xfs_sb_version_removeattr2(&mp->m_sb); - update_flags |= XFS_SB_FEATURES2; + mp->m_update_flags |= XFS_SB_FEATURES2; /* update sb_versionnum for the clearing of the morebits */ if (!sbp->sb_features2) - update_flags |= XFS_SB_VERSIONNUM; + mp->m_update_flags |= XFS_SB_VERSIONNUM; } /* @@ -960,7 +958,7 @@ xfs_mountfs( * allocator alignment is within an ag, therefore ag has * to be aligned at stripe boundary. */ - error = xfs_update_alignment(mp, &update_flags); + error = xfs_update_alignment(mp); if (error) goto error1; @@ -1137,10 +1135,12 @@ xfs_mountfs( } /* - * If fs is not mounted readonly, then update the superblock changes. + * If this is a read-only mount defer the superblock updates until + * the next remount into writeable mode. Otherwise we would never + * perform the update e.g. for the root filesystem. */ - if (update_flags && !(mp->m_flags & XFS_MOUNT_RDONLY)) { - error = xfs_mount_log_sb(mp, update_flags); + if (mp->m_update_flags && !(mp->m_flags & XFS_MOUNT_RDONLY)) { + error = xfs_mount_log_sb(mp, mp->m_update_flags); if (error) { cmn_err(CE_WARN, "XFS: failed to write sb changes"); goto error4; @@ -1820,7 +1820,7 @@ xfs_uuid_mount( * be altered by the mount options, as well as any potential sb_features2 * fixup. Only the first superblock is updated. */ -STATIC int +int xfs_mount_log_sb( xfs_mount_t *mp, __int64_t fields) diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h index 9bb41a9f765..f5e9937f9bd 100644 --- a/fs/xfs/xfs_mount.h +++ b/fs/xfs/xfs_mount.h @@ -327,6 +327,8 @@ typedef struct xfs_mount { spinlock_t m_sync_lock; /* work item list lock */ int m_sync_seq; /* sync thread generation no. */ wait_queue_head_t m_wait_single_sync_task; + __int64_t m_update_flags; /* sb flags we need to update + on the next remount,rw */ } xfs_mount_t; /* @@ -512,6 +514,7 @@ extern int xfs_mod_incore_sb_unlocked(xfs_mount_t *, xfs_sb_field_t, int64_t, int); extern int xfs_mod_incore_sb_batch(xfs_mount_t *, xfs_mod_sb_t *, uint, int); +extern int xfs_mount_log_sb(xfs_mount_t *, __int64_t); extern struct xfs_buf *xfs_getsb(xfs_mount_t *, int); extern int xfs_readsb(xfs_mount_t *, int); extern void xfs_freesb(xfs_mount_t *); -- cgit v1.2.3 From 2809f76afce05a73e08120f455107912aa519647 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 19 Jan 2009 02:04:16 +0100 Subject: xfs: sanity check attr fork size Recently we have quite a few kerneloops reports about dereferencing a NULL if_data in the attribute fork. From looking over the code this can only happen if we pass a 0 size argument to xfs_iformat_local. This implies some sort of corruption and in fact the only mailinglist report about this from earlier this year was after a powerfail presumably on a system with write cache and without barriers. Add a quick sanity check for the attr fork size in xfs_iformat to catch these early and without an oops. Signed-off-by: Christoph Hellwig Reviewed-by: Dave Chinner --- fs/xfs/xfs_inode.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index be2ca4d67b5..e7ae08d1df4 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -424,6 +424,19 @@ xfs_iformat( case XFS_DINODE_FMT_LOCAL: atp = (xfs_attr_shortform_t *)XFS_DFORK_APTR(dip); size = be16_to_cpu(atp->hdr.totsize); + + if (unlikely(size < sizeof(struct xfs_attr_sf_hdr))) { + xfs_fs_repair_cmn_err(CE_WARN, ip->i_mount, + "corrupt inode %Lu " + "(bad attr fork size %Ld).", + (unsigned long long) ip->i_ino, + (long long) size); + XFS_CORRUPTION_ERROR("xfs_iformat(8)", + XFS_ERRLEVEL_LOW, + ip->i_mount, dip); + return XFS_ERROR(EFSCORRUPTED); + } + error = xfs_iformat_local(ip, dip, XFS_ATTR_FORK, size); break; case XFS_DINODE_FMT_EXTENTS: -- cgit v1.2.3