From a893a84e8799270fbec5c3708d001650aab47138 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Tue, 23 Jun 2009 21:25:32 +0200 Subject: mm_for_maps: simplify, use ptrace_may_access() It would be nice to kill __ptrace_may_access(). It requires task_lock(), but this lock is only needed to read mm->flags in the middle. Convert mm_for_maps() to use ptrace_may_access(), this also simplifies the code a little bit. Also, we do not need to take ->mmap_sem in advance. In fact I think mm_for_maps() should not play with ->mmap_sem at all, the caller should take this lock. With or without this patch, without ->cred_guard_mutex held we can race with exec() and get the new ->mm but check old creds. Signed-off-by: Oleg Nesterov Reviewed-by: Serge Hallyn Signed-off-by: James Morris --- fs/proc/base.c | 23 +++++++++++------------ 1 file changed, 11 insertions(+), 12 deletions(-) (limited to 'fs') diff --git a/fs/proc/base.c b/fs/proc/base.c index 3ce5ae9e3d2..917f338a673 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -237,20 +237,19 @@ struct mm_struct *mm_for_maps(struct task_struct *task) struct mm_struct *mm = get_task_mm(task); if (!mm) return NULL; + if (mm != current->mm) { + /* + * task->mm can be changed before security check, + * in that case we must notice the change after. + */ + if (!ptrace_may_access(task, PTRACE_MODE_READ) || + mm != task->mm) { + mmput(mm); + return NULL; + } + } down_read(&mm->mmap_sem); - task_lock(task); - if (task->mm != mm) - goto out; - if (task->mm != current->mm && - __ptrace_may_access(task, PTRACE_MODE_READ) < 0) - goto out; - task_unlock(task); return mm; -out: - task_unlock(task); - up_read(&mm->mmap_sem); - mmput(mm); - return NULL; } static int proc_pid_cmdline(struct task_struct *task, char * buffer) -- cgit v1.2.3 From 713c0ecdb888e9ef6f085e828555455c5916b07f Mon Sep 17 00:00:00 2001 From: Sten Spans Date: Thu, 16 Jul 2009 09:41:39 +0200 Subject: security: fix security_file_lock cmd argument Pass posix-translated lock operations to security_file_lock when invoked via sys_flock. Signed-off-by: Sten Spans Signed-off-by: James Morris --- fs/locks.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/locks.c b/fs/locks.c index b6440f52178..52366e877d7 100644 --- a/fs/locks.c +++ b/fs/locks.c @@ -1591,7 +1591,7 @@ SYSCALL_DEFINE2(flock, unsigned int, fd, unsigned int, cmd) if (can_sleep) lock->fl_flags |= FL_SLEEP; - error = security_file_lock(filp, cmd); + error = security_file_lock(filp, lock->fl_type); if (error) goto out_free; -- cgit v1.2.3 From d3c8660233d3f2801e14eaf722937ff4ed49bfb7 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Fri, 10 Jul 2009 03:27:38 +0200 Subject: mm_for_maps: shift down_read(mmap_sem) to the caller mm_for_maps() takes ->mmap_sem after security checks, this looks strange and obfuscates the locking rules. Move this lock to its single caller, m_start(). Signed-off-by: Oleg Nesterov Acked-by: Serge Hallyn Signed-off-by: James Morris --- fs/proc/base.c | 8 +++----- fs/proc/task_mmu.c | 1 + fs/proc/task_nommu.c | 1 + 3 files changed, 5 insertions(+), 5 deletions(-) (limited to 'fs') diff --git a/fs/proc/base.c b/fs/proc/base.c index 917f338a673..f3c2e4085fe 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -235,9 +235,8 @@ static int check_mem_permission(struct task_struct *task) struct mm_struct *mm_for_maps(struct task_struct *task) { struct mm_struct *mm = get_task_mm(task); - if (!mm) - return NULL; - if (mm != current->mm) { + + if (mm && mm != current->mm) { /* * task->mm can be changed before security check, * in that case we must notice the change after. @@ -245,10 +244,9 @@ struct mm_struct *mm_for_maps(struct task_struct *task) if (!ptrace_may_access(task, PTRACE_MODE_READ) || mm != task->mm) { mmput(mm); - return NULL; + mm = NULL; } } - down_read(&mm->mmap_sem); return mm; } diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 6f61b7cc32e..9bd8be1d235 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -119,6 +119,7 @@ static void *m_start(struct seq_file *m, loff_t *pos) mm = mm_for_maps(priv->task); if (!mm) return NULL; + down_read(&mm->mmap_sem); tail_vma = get_gate_vma(priv->task); priv->tail_vma = tail_vma; diff --git a/fs/proc/task_nommu.c b/fs/proc/task_nommu.c index 64a72e2e765..8f5c05d3dbd 100644 --- a/fs/proc/task_nommu.c +++ b/fs/proc/task_nommu.c @@ -189,6 +189,7 @@ static void *m_start(struct seq_file *m, loff_t *pos) priv->task = NULL; return NULL; } + down_read(&mm->mmap_sem); /* start from the Nth VMA */ for (p = rb_first(&mm->mm_rb); p; p = rb_next(p)) -- cgit v1.2.3 From 896a6de40ef3814525632609799af909338f50c3 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Fri, 10 Jul 2009 03:27:40 +0200 Subject: mm_for_maps: take ->cred_guard_mutex to fix the race with exec The problem is minor, but without ->cred_guard_mutex held we can race with exec() and get the new ->mm but check old creds. Now we do not need to re-check task->mm after ptrace_may_access(), it can't be changed to the new mm under us. Strictly speaking, this also fixes another very minor problem. Unless security check fails or the task exits mm_for_maps() should never return NULL, the caller should get either old or new ->mm. Signed-off-by: Oleg Nesterov Acked-by: Serge Hallyn Signed-off-by: James Morris --- fs/proc/base.c | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) (limited to 'fs') diff --git a/fs/proc/base.c b/fs/proc/base.c index f3c2e4085fe..175db258942 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -234,19 +234,19 @@ static int check_mem_permission(struct task_struct *task) struct mm_struct *mm_for_maps(struct task_struct *task) { - struct mm_struct *mm = get_task_mm(task); + struct mm_struct *mm; - if (mm && mm != current->mm) { - /* - * task->mm can be changed before security check, - * in that case we must notice the change after. - */ - if (!ptrace_may_access(task, PTRACE_MODE_READ) || - mm != task->mm) { - mmput(mm); - mm = NULL; - } + if (mutex_lock_killable(&task->cred_guard_mutex)) + return NULL; + + mm = get_task_mm(task); + if (mm && mm != current->mm && + !ptrace_may_access(task, PTRACE_MODE_READ)) { + mmput(mm); + mm = NULL; } + mutex_unlock(&task->cred_guard_mutex); + return mm; } -- cgit v1.2.3 From 939a9421eb53d3ea83188ae13802779041caefdb Mon Sep 17 00:00:00 2001 From: Amerigo Wang Date: Thu, 20 Aug 2009 19:29:03 -0700 Subject: vfs: allow file truncations when both suid and write permissions set When suid is set and the non-owner user has write permission, any writing into this file should be allowed and suid should be removed after that. However, current kernel only allows writing without truncations, when we do truncations on that file, we get EPERM. This is a bug. Steps to reproduce this bug: % ls -l rootdir/file1 -rwsrwsrwx 1 root root 3 Jun 25 15:42 rootdir/file1 % echo h > rootdir/file1 zsh: operation not permitted: rootdir/file1 % ls -l rootdir/file1 -rwsrwsrwx 1 root root 3 Jun 25 15:42 rootdir/file1 % echo h >> rootdir/file1 % ls -l rootdir/file1 -rwxrwxrwx 1 root root 5 Jun 25 16:34 rootdir/file1 Signed-off-by: WANG Cong Cc: Eric Sandeen Acked-by: Eric Paris Cc: Eugene Teo Cc: Al Viro Cc: OGAWA Hirofumi Cc: Christoph Hellwig Cc: Stephen Smalley Signed-off-by: Andrew Morton Signed-off-by: James Morris --- fs/open.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'fs') diff --git a/fs/open.c b/fs/open.c index dd98e807602..40d1fa25f5a 100644 --- a/fs/open.c +++ b/fs/open.c @@ -199,7 +199,7 @@ out: int do_truncate(struct dentry *dentry, loff_t length, unsigned int time_attrs, struct file *filp) { - int err; + int ret; struct iattr newattrs; /* Not pretty: "inode->i_size" shouldn't really be signed. But it is. */ @@ -214,12 +214,14 @@ int do_truncate(struct dentry *dentry, loff_t length, unsigned int time_attrs, } /* Remove suid/sgid on truncate too */ - newattrs.ia_valid |= should_remove_suid(dentry); + ret = should_remove_suid(dentry); + if (ret) + newattrs.ia_valid |= ret | ATTR_FORCE; mutex_lock(&dentry->d_inode->i_mutex); - err = notify_change(dentry, &newattrs); + ret = notify_change(dentry, &newattrs); mutex_unlock(&dentry->d_inode->i_mutex); - return err; + return ret; } static long do_sys_truncate(const char __user *pathname, loff_t length) -- cgit v1.2.3 From e0e817392b9acf2c98d3be80c233dddb1b52003d Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 2 Sep 2009 09:13:40 +0100 Subject: CRED: Add some configurable debugging [try #6] Add a config option (CONFIG_DEBUG_CREDENTIALS) to turn on some debug checking for credential management. The additional code keeps track of the number of pointers from task_structs to any given cred struct, and checks to see that this number never exceeds the usage count of the cred struct (which includes all references, not just those from task_structs). Furthermore, if SELinux is enabled, the code also checks that the security pointer in the cred struct is never seen to be invalid. This attempts to catch the bug whereby inode_has_perm() faults in an nfsd kernel thread on seeing cred->security be a NULL pointer (it appears that the credential struct has been previously released): http://www.kerneloops.org/oops.php?number=252883 Signed-off-by: David Howells Signed-off-by: James Morris --- fs/nfsd/auth.c | 4 ++++ fs/nfsd/nfssvc.c | 2 ++ fs/nfsd/vfs.c | 3 +++ fs/open.c | 2 ++ 4 files changed, 11 insertions(+) (limited to 'fs') diff --git a/fs/nfsd/auth.c b/fs/nfsd/auth.c index 5573508f707..36fcabbf518 100644 --- a/fs/nfsd/auth.c +++ b/fs/nfsd/auth.c @@ -34,6 +34,8 @@ int nfsd_setuser(struct svc_rqst *rqstp, struct svc_export *exp) int flags = nfsexp_flags(rqstp, exp); int ret; + validate_process_creds(); + /* discard any old override before preparing the new set */ revert_creds(get_cred(current->real_cred)); new = prepare_creds(); @@ -86,8 +88,10 @@ int nfsd_setuser(struct svc_rqst *rqstp, struct svc_export *exp) else new->cap_effective = cap_raise_nfsd_set(new->cap_effective, new->cap_permitted); + validate_process_creds(); put_cred(override_creds(new)); put_cred(new); + validate_process_creds(); return 0; oom: diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c index 492c79b7800..24d58adfe5f 100644 --- a/fs/nfsd/nfssvc.c +++ b/fs/nfsd/nfssvc.c @@ -496,7 +496,9 @@ nfsd(void *vrqstp) /* Lock the export hash tables for reading. */ exp_readlock(); + validate_process_creds(); svc_process(rqstp); + validate_process_creds(); /* Unlock export hash tables */ exp_readunlock(); diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index 23341c1063b..8fa09bfbcba 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -684,6 +684,8 @@ nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, int type, __be32 err; int host_err; + validate_process_creds(); + /* * If we get here, then the client has already done an "open", * and (hopefully) checked permission - so allow OWNER_OVERRIDE @@ -740,6 +742,7 @@ nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, int type, out_nfserr: err = nfserrno(host_err); out: + validate_process_creds(); return err; } diff --git a/fs/open.c b/fs/open.c index 40d1fa25f5a..31191bf513e 100644 --- a/fs/open.c +++ b/fs/open.c @@ -959,6 +959,8 @@ struct file *dentry_open(struct dentry *dentry, struct vfsmount *mnt, int flags, int error; struct file *f; + validate_creds(cred); + /* * We must always pass in a valid mount pointer. Historically * callers got away with not passing it, but we must enforce this at -- cgit v1.2.3 From 6c1488fd581a447ec87c4b59f0d33f95f0aa441b Mon Sep 17 00:00:00 2001 From: Mimi Zohar Date: Wed, 2 Sep 2009 11:40:32 -0400 Subject: IMA: open new file for read When creating a new file, ima_path_check() assumed the new file was being opened for write. Call ima_path_check() with the appropriate acc_mode so that the read/write counters are incremented correctly. Signed-off-by: Mimi Zohar Signed-off-by: James Morris --- fs/namei.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/namei.c b/fs/namei.c index f3c5b278895..ee01308a01d 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -1533,9 +1533,11 @@ int may_open(struct path *path, int acc_mode, int flag) if (error) return error; - error = ima_path_check(path, - acc_mode & (MAY_READ | MAY_WRITE | MAY_EXEC), + error = ima_path_check(path, acc_mode ? + acc_mode & (MAY_READ | MAY_WRITE | MAY_EXEC) : + ACC_MODE(flag) & (MAY_READ | MAY_WRITE), IMA_COUNT_UPDATE); + if (error) return error; /* -- cgit v1.2.3 From b1ab7e4b2a88d3ac13771463be8f302ce1616cfc Mon Sep 17 00:00:00 2001 From: "David P. Quigley" Date: Thu, 3 Sep 2009 14:25:56 -0400 Subject: VFS: Factor out part of vfs_setxattr so it can be called from the SELinux hook for inode_setsecctx. This factors out the part of the vfs_setxattr function that performs the setting of the xattr and its notification. This is needed so the SELinux implementation of inode_setsecctx can handle the setting of the xattr while maintaining the proper separation of layers. Signed-off-by: David P. Quigley Acked-by: Serge Hallyn Signed-off-by: James Morris --- fs/xattr.c | 55 ++++++++++++++++++++++++++++++++++++++++++------------- 1 file changed, 42 insertions(+), 13 deletions(-) (limited to 'fs') diff --git a/fs/xattr.c b/fs/xattr.c index 1c3d0af59dd..6d4f6d3449f 100644 --- a/fs/xattr.c +++ b/fs/xattr.c @@ -66,22 +66,28 @@ xattr_permission(struct inode *inode, const char *name, int mask) return inode_permission(inode, mask); } -int -vfs_setxattr(struct dentry *dentry, const char *name, const void *value, - size_t size, int flags) +/** + * __vfs_setxattr_noperm - perform setxattr operation without performing + * permission checks. + * + * @dentry - object to perform setxattr on + * @name - xattr name to set + * @value - value to set @name to + * @size - size of @value + * @flags - flags to pass into filesystem operations + * + * returns the result of the internal setxattr or setsecurity operations. + * + * This function requires the caller to lock the inode's i_mutex before it + * is executed. It also assumes that the caller will make the appropriate + * permission checks. + */ +int __vfs_setxattr_noperm(struct dentry *dentry, const char *name, + const void *value, size_t size, int flags) { struct inode *inode = dentry->d_inode; - int error; - - error = xattr_permission(inode, name, MAY_WRITE); - if (error) - return error; + int error = -EOPNOTSUPP; - mutex_lock(&inode->i_mutex); - error = security_inode_setxattr(dentry, name, value, size, flags); - if (error) - goto out; - error = -EOPNOTSUPP; if (inode->i_op->setxattr) { error = inode->i_op->setxattr(dentry, name, value, size, flags); if (!error) { @@ -97,6 +103,29 @@ vfs_setxattr(struct dentry *dentry, const char *name, const void *value, if (!error) fsnotify_xattr(dentry); } + + return error; +} + + +int +vfs_setxattr(struct dentry *dentry, const char *name, const void *value, + size_t size, int flags) +{ + struct inode *inode = dentry->d_inode; + int error; + + error = xattr_permission(inode, name, MAY_WRITE); + if (error) + return error; + + mutex_lock(&inode->i_mutex); + error = security_inode_setxattr(dentry, name, value, size, flags); + if (error) + goto out; + + error = __vfs_setxattr_noperm(dentry, name, value, size, flags); + out: mutex_unlock(&inode->i_mutex); return error; -- cgit v1.2.3 From ddd29ec6597125c830f7badb608a86c98b936b64 Mon Sep 17 00:00:00 2001 From: "David P. Quigley" Date: Wed, 9 Sep 2009 14:25:37 -0400 Subject: sysfs: Add labeling support for sysfs This patch adds a setxattr handler to the file, directory, and symlink inode_operations structures for sysfs. The patch uses hooks introduced in the previous patch to handle the getting and setting of security information for the sysfs inodes. As was suggested by Eric Biederman the struct iattr in the sysfs_dirent structure has been replaced by a structure which contains the iattr, secdata and secdata length to allow the changes to persist in the event that the inode representing the sysfs_dirent is evicted. Because sysfs only stores this information when a change is made all the optional data is moved into one dynamically allocated field. This patch addresses an issue where SELinux was denying virtd access to the PCI configuration entries in sysfs. The lack of setxattr handlers for sysfs required that a single label be assigned to all entries in sysfs. Granting virtd access to every entry in sysfs is not an acceptable solution so fine grained labeling of sysfs is required such that individual entries can be labeled appropriately. [sds: Fixed compile-time warnings, coding style, and setting of inode security init flags.] Signed-off-by: David P. Quigley Signed-off-by: Stephen D. Smalley Signed-off-by: James Morris --- fs/sysfs/dir.c | 1 + fs/sysfs/inode.c | 134 +++++++++++++++++++++++++++++++++++++++-------------- fs/sysfs/symlink.c | 2 + fs/sysfs/sysfs.h | 12 ++++- 4 files changed, 112 insertions(+), 37 deletions(-) (limited to 'fs') diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c index 14f2d71ea3c..0050fc40e8c 100644 --- a/fs/sysfs/dir.c +++ b/fs/sysfs/dir.c @@ -760,6 +760,7 @@ static struct dentry * sysfs_lookup(struct inode *dir, struct dentry *dentry, const struct inode_operations sysfs_dir_inode_operations = { .lookup = sysfs_lookup, .setattr = sysfs_setattr, + .setxattr = sysfs_setxattr, }; static void remove_dir(struct sysfs_dirent *sd) diff --git a/fs/sysfs/inode.c b/fs/sysfs/inode.c index 555f0ff988d..2b6a8d9de73 100644 --- a/fs/sysfs/inode.c +++ b/fs/sysfs/inode.c @@ -18,6 +18,8 @@ #include #include #include +#include +#include #include "sysfs.h" extern struct super_block * sysfs_sb; @@ -35,6 +37,7 @@ static struct backing_dev_info sysfs_backing_dev_info = { static const struct inode_operations sysfs_inode_operations ={ .setattr = sysfs_setattr, + .setxattr = sysfs_setxattr, }; int __init sysfs_inode_init(void) @@ -42,18 +45,37 @@ int __init sysfs_inode_init(void) return bdi_init(&sysfs_backing_dev_info); } +struct sysfs_inode_attrs *sysfs_init_inode_attrs(struct sysfs_dirent *sd) +{ + struct sysfs_inode_attrs *attrs; + struct iattr *iattrs; + + attrs = kzalloc(sizeof(struct sysfs_inode_attrs), GFP_KERNEL); + if (!attrs) + return NULL; + iattrs = &attrs->ia_iattr; + + /* assign default attributes */ + iattrs->ia_mode = sd->s_mode; + iattrs->ia_uid = 0; + iattrs->ia_gid = 0; + iattrs->ia_atime = iattrs->ia_mtime = iattrs->ia_ctime = CURRENT_TIME; + + return attrs; +} int sysfs_setattr(struct dentry * dentry, struct iattr * iattr) { struct inode * inode = dentry->d_inode; struct sysfs_dirent * sd = dentry->d_fsdata; - struct iattr * sd_iattr; + struct sysfs_inode_attrs *sd_attrs; + struct iattr *iattrs; unsigned int ia_valid = iattr->ia_valid; int error; if (!sd) return -EINVAL; - sd_iattr = sd->s_iattr; + sd_attrs = sd->s_iattr; error = inode_change_ok(inode, iattr); if (error) @@ -65,42 +87,77 @@ int sysfs_setattr(struct dentry * dentry, struct iattr * iattr) if (error) return error; - if (!sd_iattr) { + if (!sd_attrs) { /* setting attributes for the first time, allocate now */ - sd_iattr = kzalloc(sizeof(struct iattr), GFP_KERNEL); - if (!sd_iattr) + sd_attrs = sysfs_init_inode_attrs(sd); + if (!sd_attrs) return -ENOMEM; - /* assign default attributes */ - sd_iattr->ia_mode = sd->s_mode; - sd_iattr->ia_uid = 0; - sd_iattr->ia_gid = 0; - sd_iattr->ia_atime = sd_iattr->ia_mtime = sd_iattr->ia_ctime = CURRENT_TIME; - sd->s_iattr = sd_iattr; + sd->s_iattr = sd_attrs; + } else { + /* attributes were changed at least once in past */ + iattrs = &sd_attrs->ia_iattr; + + if (ia_valid & ATTR_UID) + iattrs->ia_uid = iattr->ia_uid; + if (ia_valid & ATTR_GID) + iattrs->ia_gid = iattr->ia_gid; + if (ia_valid & ATTR_ATIME) + iattrs->ia_atime = timespec_trunc(iattr->ia_atime, + inode->i_sb->s_time_gran); + if (ia_valid & ATTR_MTIME) + iattrs->ia_mtime = timespec_trunc(iattr->ia_mtime, + inode->i_sb->s_time_gran); + if (ia_valid & ATTR_CTIME) + iattrs->ia_ctime = timespec_trunc(iattr->ia_ctime, + inode->i_sb->s_time_gran); + if (ia_valid & ATTR_MODE) { + umode_t mode = iattr->ia_mode; + + if (!in_group_p(inode->i_gid) && !capable(CAP_FSETID)) + mode &= ~S_ISGID; + iattrs->ia_mode = sd->s_mode = mode; + } } + return error; +} - /* attributes were changed atleast once in past */ - - if (ia_valid & ATTR_UID) - sd_iattr->ia_uid = iattr->ia_uid; - if (ia_valid & ATTR_GID) - sd_iattr->ia_gid = iattr->ia_gid; - if (ia_valid & ATTR_ATIME) - sd_iattr->ia_atime = timespec_trunc(iattr->ia_atime, - inode->i_sb->s_time_gran); - if (ia_valid & ATTR_MTIME) - sd_iattr->ia_mtime = timespec_trunc(iattr->ia_mtime, - inode->i_sb->s_time_gran); - if (ia_valid & ATTR_CTIME) - sd_iattr->ia_ctime = timespec_trunc(iattr->ia_ctime, - inode->i_sb->s_time_gran); - if (ia_valid & ATTR_MODE) { - umode_t mode = iattr->ia_mode; - - if (!in_group_p(inode->i_gid) && !capable(CAP_FSETID)) - mode &= ~S_ISGID; - sd_iattr->ia_mode = sd->s_mode = mode; - } +int sysfs_setxattr(struct dentry *dentry, const char *name, const void *value, + size_t size, int flags) +{ + struct sysfs_dirent *sd = dentry->d_fsdata; + struct sysfs_inode_attrs *iattrs; + void *secdata; + int error; + u32 secdata_len = 0; + + if (!sd) + return -EINVAL; + if (!sd->s_iattr) + sd->s_iattr = sysfs_init_inode_attrs(sd); + if (!sd->s_iattr) + return -ENOMEM; + + iattrs = sd->s_iattr; + + if (!strncmp(name, XATTR_SECURITY_PREFIX, XATTR_SECURITY_PREFIX_LEN)) { + const char *suffix = name + XATTR_SECURITY_PREFIX_LEN; + error = security_inode_setsecurity(dentry->d_inode, suffix, + value, size, flags); + if (error) + goto out; + error = security_inode_getsecctx(dentry->d_inode, + &secdata, &secdata_len); + if (error) + goto out; + if (iattrs->ia_secdata) + security_release_secctx(iattrs->ia_secdata, + iattrs->ia_secdata_len); + iattrs->ia_secdata = secdata; + iattrs->ia_secdata_len = secdata_len; + } else + return -EINVAL; +out: return error; } @@ -146,6 +203,7 @@ static int sysfs_count_nlink(struct sysfs_dirent *sd) static void sysfs_init_inode(struct sysfs_dirent *sd, struct inode *inode) { struct bin_attribute *bin_attr; + struct sysfs_inode_attrs *iattrs; inode->i_private = sysfs_get(sd); inode->i_mapping->a_ops = &sysfs_aops; @@ -154,16 +212,20 @@ static void sysfs_init_inode(struct sysfs_dirent *sd, struct inode *inode) inode->i_ino = sd->s_ino; lockdep_set_class(&inode->i_mutex, &sysfs_inode_imutex_key); - if (sd->s_iattr) { + iattrs = sd->s_iattr; + if (iattrs) { /* sysfs_dirent has non-default attributes * get them for the new inode from persistent copy * in sysfs_dirent */ - set_inode_attr(inode, sd->s_iattr); + set_inode_attr(inode, &iattrs->ia_iattr); + if (iattrs->ia_secdata) + security_inode_notifysecctx(inode, + iattrs->ia_secdata, + iattrs->ia_secdata_len); } else set_default_inode_attr(inode, sd->s_mode); - /* initialize inode according to type */ switch (sysfs_type(sd)) { case SYSFS_DIR: diff --git a/fs/sysfs/symlink.c b/fs/sysfs/symlink.c index 1d897ad808e..c5081ad7702 100644 --- a/fs/sysfs/symlink.c +++ b/fs/sysfs/symlink.c @@ -16,6 +16,7 @@ #include #include #include +#include #include "sysfs.h" @@ -209,6 +210,7 @@ static void sysfs_put_link(struct dentry *dentry, struct nameidata *nd, void *co } const struct inode_operations sysfs_symlink_inode_operations = { + .setxattr = sysfs_setxattr, .readlink = generic_readlink, .follow_link = sysfs_follow_link, .put_link = sysfs_put_link, diff --git a/fs/sysfs/sysfs.h b/fs/sysfs/sysfs.h index 3fa0d98481e..af4c4e7482a 100644 --- a/fs/sysfs/sysfs.h +++ b/fs/sysfs/sysfs.h @@ -8,6 +8,8 @@ * This file is released under the GPLv2. */ +#include + struct sysfs_open_dirent; /* type-specific structures for sysfs_dirent->s_* union members */ @@ -31,6 +33,12 @@ struct sysfs_elem_bin_attr { struct hlist_head buffers; }; +struct sysfs_inode_attrs { + struct iattr ia_iattr; + void *ia_secdata; + u32 ia_secdata_len; +}; + /* * sysfs_dirent - the building block of sysfs hierarchy. Each and * every sysfs node is represented by single sysfs_dirent. @@ -56,7 +64,7 @@ struct sysfs_dirent { unsigned int s_flags; ino_t s_ino; umode_t s_mode; - struct iattr *s_iattr; + struct sysfs_inode_attrs *s_iattr; }; #define SD_DEACTIVATED_BIAS INT_MIN @@ -148,6 +156,8 @@ static inline void __sysfs_put(struct sysfs_dirent *sd) struct inode *sysfs_get_inode(struct sysfs_dirent *sd); void sysfs_delete_inode(struct inode *inode); int sysfs_setattr(struct dentry *dentry, struct iattr *iattr); +int sysfs_setxattr(struct dentry *dentry, const char *name, const void *value, + size_t size, int flags); int sysfs_hash_and_remove(struct sysfs_dirent *dir_sd, const char *name); int sysfs_inode_init(void); -- cgit v1.2.3 From 9f0ab4a3f0fdb1ff404d150618ace2fa069bb2e1 Mon Sep 17 00:00:00 2001 From: Roland McGrath Date: Tue, 8 Sep 2009 19:49:40 -0700 Subject: binfmt_elf: fix PT_INTERP bss handling In fs/binfmt_elf.c, load_elf_interp() calls padzero() for .bss even if the PT_LOAD has no PROT_WRITE and no .bss. This generates EFAULT. Here is a small test case. (Yes, there are other, useful PT_INTERP which have only .text and no .data/.bss.) ----- ptinterp.S _start: .globl _start nop int3 ----- $ gcc -m32 -nostartfiles -nostdlib -o ptinterp ptinterp.S $ gcc -m32 -Wl,--dynamic-linker=ptinterp -o hello hello.c $ ./hello Segmentation fault # during execve() itself After applying the patch: $ ./hello Trace trap # user-mode execution after execve() finishes If the ELF headers are actually self-inconsistent, then dying is fine. But having no PROT_WRITE segment is perfectly normal and correct if there is no segment with p_memsz > p_filesz (i.e. bss). John Reiser suggested checking for PROT_WRITE in the bss logic. I think it makes most sense to simply apply the bss logic only when there is bss. This patch looks less trivial than it is due to some reindentation. It just moves the "if (last_bss > elf_bss) {" test up to include the partial-page bss logic as well as the more-pages bss logic. Reported-by: John Reiser Signed-off-by: Roland McGrath Signed-off-by: James Morris --- fs/binfmt_elf.c | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) (limited to 'fs') diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index b7c1603cd4b..7c1e65d5487 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -501,22 +501,22 @@ static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex, } } - /* - * Now fill out the bss section. First pad the last page up - * to the page boundary, and then perform a mmap to make sure - * that there are zero-mapped pages up to and including the - * last bss page. - */ - if (padzero(elf_bss)) { - error = -EFAULT; - goto out_close; - } + if (last_bss > elf_bss) { + /* + * Now fill out the bss section. First pad the last page up + * to the page boundary, and then perform a mmap to make sure + * that there are zero-mapped pages up to and including the + * last bss page. + */ + if (padzero(elf_bss)) { + error = -EFAULT; + goto out_close; + } - /* What we have mapped so far */ - elf_bss = ELF_PAGESTART(elf_bss + ELF_MIN_ALIGN - 1); + /* What we have mapped so far */ + elf_bss = ELF_PAGESTART(elf_bss + ELF_MIN_ALIGN - 1); - /* Map the last of the bss segment */ - if (last_bss > elf_bss) { + /* Map the last of the bss segment */ down_write(¤t->mm->mmap_sem); error = do_brk(elf_bss, last_bss - elf_bss); up_write(¤t->mm->mmap_sem); -- cgit v1.2.3